[clang] [OpenMP] Support capturing structured bindings in OpenMP regions. (PR #190832)

Zahira Ammarguellat via cfe-commits cfe-commits at lists.llvm.org
Thu Jun 25 09:59:26 PDT 2026


https://github.com/zahiraam updated https://github.com/llvm/llvm-project/pull/190832

>From ca43027b4156c15d34c7b6b75ed8420ccc0c1f2e Mon Sep 17 00:00:00 2001
From: Ammarguellat <zahira.ammarguellat at intel.com>
Date: Tue, 7 Apr 2026 12:10:39 -0700
Subject: [PATCH 01/45] [OpenMP] Support capturing structured bindings in
 OpenMP regions.

---
 clang/lib/CodeGen/CGExpr.cpp                  |  18 ++-
 clang/lib/Sema/SemaExpr.cpp                   |  16 +-
 clang/lib/Sema/SemaStmt.cpp                   |   5 +-
 .../OpenMP/structured-binding-capture.cpp     | 141 ++++++++++++++++++
 4 files changed, 169 insertions(+), 11 deletions(-)
 create mode 100644 clang/test/OpenMP/structured-binding-capture.cpp

diff --git a/clang/lib/CodeGen/CGExpr.cpp b/clang/lib/CodeGen/CGExpr.cpp
index 23802cdeb4811..b2feb5d339a8e 100644
--- a/clang/lib/CodeGen/CGExpr.cpp
+++ b/clang/lib/CodeGen/CGExpr.cpp
@@ -3737,8 +3737,22 @@ LValue CodeGenFunction::EmitDeclRefLValue(const DeclRefExpr *E) {
   // an enclosing scope.
   if (const auto *BD = dyn_cast<BindingDecl>(ND)) {
     if (E->refersToEnclosingVariableOrCapture()) {
-      auto *FD = LambdaCaptureFields.lookup(BD);
-      return EmitCapturedFieldLValue(*this, FD, CXXABIThisValue);
+      if (auto *DD = dyn_cast<VarDecl>(BD->getDecomposedDecl())) {
+        auto I = LocalDeclMap.find(DD);
+        if (I != LocalDeclMap.end()) {
+          Address DDAddr = I->second;
+          llvm::Type *StructTy = CGM.getTypes().ConvertTypeForMem(
+              DD->getType().getCanonicalType());
+          if (DDAddr.getElementType() != StructTy)
+            DDAddr = DDAddr.withElementType(StructTy);
+          LValue BaseLV =
+              MakeAddrLValue(DDAddr, DD->getType().getCanonicalType());
+          return EmitLValueForField(
+              BaseLV, cast<FieldDecl>(
+                          cast<MemberExpr>(BD->getBinding()->IgnoreImplicit())
+                              ->getMemberDecl()));
+        }
+      }
     }
     // Suppress debug location updates when visiting the binding, since the
     // binding may emit instructions that would otherwise be associated with the
diff --git a/clang/lib/Sema/SemaExpr.cpp b/clang/lib/Sema/SemaExpr.cpp
index c9642ed298bf3..46a93b8e53d3e 100644
--- a/clang/lib/Sema/SemaExpr.cpp
+++ b/clang/lib/Sema/SemaExpr.cpp
@@ -19374,6 +19374,8 @@ static bool isVariableCapturable(CapturingScopeInfo *CSI, ValueDecl *Var,
   }
 
   if (isa<BindingDecl>(Var)) {
+      if (Var->getDeclName() && !Var->isImplicit())
+          return true;
     if (!IsLambda || !S.getLangOpts().CPlusPlus) {
       if (Diagnose)
         diagnoseUncapturableValueReferenceOrBinding(S, Loc, Var);
@@ -19514,6 +19516,12 @@ static bool captureInLambda(LambdaScopeInfo *LSI, ValueDecl *Var,
     ByRef = (LSI->ImpCaptureStyle == LambdaScopeInfo::ImpCap_LambdaByref);
   }
 
+  if (auto* BD = dyn_cast<BindingDecl>(Var)) {
+    // For structured bindings, capture the individual element type,
+    // not the full decomposed type.
+    CaptureType = BD->getType();
+    DeclRefType = BD->getType();
+  }
   if (BuildAndDiagnose && S.Context.getTargetInfo().getTriple().isWasm() &&
       CaptureType.getNonReferenceType().isWebAssemblyReferenceType()) {
     S.Diag(Loc, diag::err_wasm_ca_reference) << 0;
@@ -19880,14 +19888,6 @@ bool Sema::tryCaptureVariable(
         // just break here. Similarly, global variables that are captured in a
         // target region should not be captured outside the scope of the region.
         if (RSI->CapRegionKind == CR_OpenMP) {
-          // FIXME: We should support capturing structured bindings in OpenMP.
-          if (isa<BindingDecl>(Var)) {
-            if (BuildAndDiagnose) {
-              Diag(ExprLoc, diag::err_capture_binding_openmp) << Var;
-              Diag(Var->getLocation(), diag::note_entity_declared_at) << Var;
-            }
-            return true;
-          }
           OpenMPClauseKind IsOpenMPPrivateDecl = OpenMP().isOpenMPPrivateDecl(
               Var, RSI->OpenMPLevel, RSI->OpenMPCaptureLevel);
           // If the variable is private (i.e. not captured) and has variably
diff --git a/clang/lib/Sema/SemaStmt.cpp b/clang/lib/Sema/SemaStmt.cpp
index 531147ef35b08..21c799b89a64a 100644
--- a/clang/lib/Sema/SemaStmt.cpp
+++ b/clang/lib/Sema/SemaStmt.cpp
@@ -4700,11 +4700,14 @@ buildCapturedStmtCaptureList(Sema &S, CapturedRegionScopeInfo *RSI,
         S.OpenMP().setOpenMPCaptureKind(Field, Cap.getVariable(),
                                         RSI->OpenMPLevel);
 
+      ValueDecl* CapVar = Cap.getVariable();
+      if (auto* BD = dyn_cast<BindingDecl>(CapVar))
+        CapVar = cast<VarDecl>(BD->getDecomposedDecl());
       Captures.push_back(CapturedStmt::Capture(
           Cap.getLocation(),
           Cap.isReferenceCapture() ? CapturedStmt::VCK_ByRef
                                    : CapturedStmt::VCK_ByCopy,
-          cast<VarDecl>(Cap.getVariable())));
+          cast<VarDecl>(CapVar)));
     }
     CaptureInits.push_back(Init.get());
   }
diff --git a/clang/test/OpenMP/structured-binding-capture.cpp b/clang/test/OpenMP/structured-binding-capture.cpp
new file mode 100644
index 0000000000000..5d3fae741958b
--- /dev/null
+++ b/clang/test/OpenMP/structured-binding-capture.cpp
@@ -0,0 +1,141 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --include-generated-funcs --replace-value-regex "__omp_offloading_[0-9a-z]+_[0-9a-z]+" "reduction_size[.].+[.]" "pl_cond[.].+[.|,]" --prefix-filecheck-ir-name _ --version 4
+// RUN: %clang_cc1 -verify -std=c++20 -triple x86_64-pc-linux-gnu -fopenmp \
+// RUN: -emit-llvm %s -o - | FileCheck %s
+
+// expected-no-diagnostics
+
+struct Point {
+  int first, second;
+};
+
+Point twoints() {
+  return {37, 24};
+}
+
+int main() {
+  auto [m, n] = twoints();
+#pragma omp parallel for collapse(2)
+  for (int i = 0; i < 10; i++)
+    for (int j = 0; j < 10; j++)
+      [m, n](int i, int j) -> void { return; }(i, j);
+  return 0;
+}
+
+// CHECK-LABEL: define dso_local i64 @_Z7twointsv(
+// CHECK-SAME: ) #[[ATTR0:[0-9]+]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[RETVAL:%.*]] = alloca [[STRUCT_POINT:%.*]], align 4
+// CHECK-NEXT:    [[FIRST:%.*]] = getelementptr inbounds nuw [[STRUCT_POINT]], ptr [[RETVAL]], i32 0, i32 0
+// CHECK-NEXT:    store i32 37, ptr [[FIRST]], align 4
+// CHECK-NEXT:    [[SECOND:%.*]] = getelementptr inbounds nuw [[STRUCT_POINT]], ptr [[RETVAL]], i32 0, i32 1
+// CHECK-NEXT:    store i32 24, ptr [[SECOND]], align 4
+// CHECK-NEXT:    [[TMP0:%.*]] = load i64, ptr [[RETVAL]], align 4
+// CHECK-NEXT:    ret i64 [[TMP0]]
+//
+//
+// CHECK-LABEL: define dso_local noundef i32 @main(
+// CHECK-SAME: ) #[[ATTR1:[0-9]+]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[RETVAL:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    [[TMP0:%.*]] = alloca [[STRUCT_POINT:%.*]], align 4
+// CHECK-NEXT:    store i32 0, ptr [[RETVAL]], align 4
+// CHECK-NEXT:    [[CALL:%.*]] = call i64 @_Z7twointsv()
+// CHECK-NEXT:    store i64 [[CALL]], ptr [[TMP0]], align 4
+// CHECK-NEXT:    [[FIRST:%.*]] = getelementptr inbounds nuw [[STRUCT_POINT]], ptr [[TMP0]], i32 0, i32 0
+// CHECK-NEXT:    [[SECOND:%.*]] = getelementptr inbounds nuw [[STRUCT_POINT]], ptr [[TMP0]], i32 0, i32 1
+// CHECK-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2:[0-9]+]], i32 2, ptr @main.omp_outlined, ptr [[FIRST]], ptr [[SECOND]])
+// CHECK-NEXT:    ret i32 0
+//
+//
+// CHECK-LABEL: define internal void @main.omp_outlined(
+// CHECK-SAME: ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP0:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP1:%.*]]) #[[ATTR2:[0-9]+]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
+// CHECK-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
+// CHECK-NEXT:    [[DOTADDR:%.*]] = alloca ptr, align 8
+// CHECK-NEXT:    [[DOTADDR1:%.*]] = alloca ptr, align 8
+// CHECK-NEXT:    [[DOTOMP_IV:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    [[TMP:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    [[_TMP2:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    [[DOTOMP_LB:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    [[DOTOMP_UB:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    [[I:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    [[J:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    [[REF_TMP:%.*]] = alloca [[CLASS_ANON:%.*]], align 4
+// CHECK-NEXT:    store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK-NEXT:    store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
+// CHECK-NEXT:    store ptr [[TMP0]], ptr [[DOTADDR]], align 8
+// CHECK-NEXT:    store ptr [[TMP1]], ptr [[DOTADDR1]], align 8
+// CHECK-NEXT:    [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8, !nonnull [[META2:![0-9]+]], !align [[META3:![0-9]+]]
+// CHECK-NEXT:    [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8, !nonnull [[META2]], !align [[META3]]
+// CHECK-NEXT:    store i32 0, ptr [[DOTOMP_LB]], align 4
+// CHECK-NEXT:    store i32 99, ptr [[DOTOMP_UB]], align 4
+// CHECK-NEXT:    store i32 1, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK-NEXT:    store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
+// CHECK-NEXT:    [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK-NEXT:    [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4
+// CHECK-NEXT:    call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP5]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
+// CHECK-NEXT:    [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
+// CHECK-NEXT:    [[CMP:%.*]] = icmp sgt i32 [[TMP6]], 99
+// CHECK-NEXT:    br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
+// CHECK:       cond.true:
+// CHECK-NEXT:    br label [[COND_END:%.*]]
+// CHECK:       cond.false:
+// CHECK-NEXT:    [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
+// CHECK-NEXT:    br label [[COND_END]]
+// CHECK:       cond.end:
+// CHECK-NEXT:    [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP7]], [[COND_FALSE]] ]
+// CHECK-NEXT:    store i32 [[COND]], ptr [[DOTOMP_UB]], align 4
+// CHECK-NEXT:    [[TMP8:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
+// CHECK-NEXT:    store i32 [[TMP8]], ptr [[DOTOMP_IV]], align 4
+// CHECK-NEXT:    br label [[OMP_INNER_FOR_COND:%.*]]
+// CHECK:       omp.inner.for.cond:
+// CHECK-NEXT:    [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK-NEXT:    [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
+// CHECK-NEXT:    [[CMP3:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]]
+// CHECK-NEXT:    br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
+// CHECK:       omp.inner.for.body:
+// CHECK-NEXT:    [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK-NEXT:    [[DIV:%.*]] = sdiv i32 [[TMP11]], 10
+// CHECK-NEXT:    [[MUL:%.*]] = mul nsw i32 [[DIV]], 1
+// CHECK-NEXT:    [[ADD:%.*]] = add nsw i32 0, [[MUL]]
+// CHECK-NEXT:    store i32 [[ADD]], ptr [[I]], align 4
+// CHECK-NEXT:    [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK-NEXT:    [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK-NEXT:    [[DIV4:%.*]] = sdiv i32 [[TMP13]], 10
+// CHECK-NEXT:    [[MUL5:%.*]] = mul nsw i32 [[DIV4]], 10
+// CHECK-NEXT:    [[SUB:%.*]] = sub nsw i32 [[TMP12]], [[MUL5]]
+// CHECK-NEXT:    [[MUL6:%.*]] = mul nsw i32 [[SUB]], 1
+// CHECK-NEXT:    [[ADD7:%.*]] = add nsw i32 0, [[MUL6]]
+// CHECK-NEXT:    store i32 [[ADD7]], ptr [[J]], align 4
+// CHECK-NEXT:    [[TMP14:%.*]] = getelementptr inbounds nuw [[CLASS_ANON]], ptr [[REF_TMP]], i32 0, i32 0
+// CHECK-NEXT:    [[FIRST:%.*]] = getelementptr inbounds nuw [[STRUCT_POINT:%.*]], ptr [[TMP2]], i32 0, i32 0
+// CHECK-NEXT:    [[TMP15:%.*]] = load i32, ptr [[FIRST]], align 4
+// CHECK-NEXT:    store i32 [[TMP15]], ptr [[TMP14]], align 4
+// CHECK-NEXT:    [[TMP16:%.*]] = getelementptr inbounds nuw [[CLASS_ANON]], ptr [[REF_TMP]], i32 0, i32 1
+// CHECK-NEXT:    [[SECOND:%.*]] = getelementptr inbounds nuw [[STRUCT_POINT]], ptr [[TMP2]], i32 0, i32 1
+// CHECK-NEXT:    [[TMP17:%.*]] = load i32, ptr [[SECOND]], align 4
+// CHECK-NEXT:    store i32 [[TMP17]], ptr [[TMP16]], align 4
+// CHECK-NEXT:    [[TMP18:%.*]] = load i32, ptr [[I]], align 4
+// CHECK-NEXT:    [[TMP19:%.*]] = load i32, ptr [[J]], align 4
+// CHECK-NEXT:    call void @"_ZZ4mainENK3$_0clEii"(ptr noundef nonnull align 4 dereferenceable(8) [[REF_TMP]], i32 noundef [[TMP18]], i32 noundef [[TMP19]])
+// CHECK-NEXT:    br label [[OMP_BODY_CONTINUE:%.*]]
+// CHECK:       omp.body.continue:
+// CHECK-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
+// CHECK:       omp.inner.for.inc:
+// CHECK-NEXT:    [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK-NEXT:    [[ADD8:%.*]] = add nsw i32 [[TMP20]], 1
+// CHECK-NEXT:    store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4
+// CHECK-NEXT:    br label [[OMP_INNER_FOR_COND]]
+// CHECK:       omp.inner.for.end:
+// CHECK-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
+// CHECK:       omp.loop.exit:
+// CHECK-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP5]])
+// CHECK-NEXT:    ret void
+//
+//.
+// CHECK: [[META2]] = !{}
+// CHECK: [[META3]] = !{i64 4}
+//.

>From 5518969a479ceeb370ff6130545cd8b3d9b5ebd3 Mon Sep 17 00:00:00 2001
From: Ammarguellat <zahira.ammarguellat at intel.com>
Date: Fri, 17 Apr 2026 11:59:31 -0700
Subject: [PATCH 02/45] Addressed review comments

---
 clang/lib/CodeGen/CGExpr.cpp                | 33 ++++++++++++---------
 clang/lib/Sema/SemaExpr.cpp                 |  5 +++-
 clang/test/SemaCXX/decomposition-openmp.cpp |  5 ++--
 3 files changed, 26 insertions(+), 17 deletions(-)

diff --git a/clang/lib/CodeGen/CGExpr.cpp b/clang/lib/CodeGen/CGExpr.cpp
index b2feb5d339a8e..4366853389ed8 100644
--- a/clang/lib/CodeGen/CGExpr.cpp
+++ b/clang/lib/CodeGen/CGExpr.cpp
@@ -3737,22 +3737,27 @@ LValue CodeGenFunction::EmitDeclRefLValue(const DeclRefExpr *E) {
   // an enclosing scope.
   if (const auto *BD = dyn_cast<BindingDecl>(ND)) {
     if (E->refersToEnclosingVariableOrCapture()) {
-      if (auto *DD = dyn_cast<VarDecl>(BD->getDecomposedDecl())) {
-        auto I = LocalDeclMap.find(DD);
-        if (I != LocalDeclMap.end()) {
-          Address DDAddr = I->second;
-          llvm::Type *StructTy = CGM.getTypes().ConvertTypeForMem(
-              DD->getType().getCanonicalType());
-          if (DDAddr.getElementType() != StructTy)
-            DDAddr = DDAddr.withElementType(StructTy);
-          LValue BaseLV =
-              MakeAddrLValue(DDAddr, DD->getType().getCanonicalType());
-          return EmitLValueForField(
-              BaseLV, cast<FieldDecl>(
-                          cast<MemberExpr>(BD->getBinding()->IgnoreImplicit())
-                              ->getMemberDecl()));
+      auto *FD = LambdaCaptureFields.lookup(BD);
+      if (!FD) {
+        // OpenMP case: binding was captured via its decomposed decl.
+        if (auto *DD = dyn_cast<VarDecl>(BD->getDecomposedDecl())) {
+          auto I = LocalDeclMap.find(DD);
+          if (I != LocalDeclMap.end()) {
+            Address DDAddr = I->second;
+            llvm::Type *StructTy = CGM.getTypes().ConvertTypeForMem(
+                DD->getType().getCanonicalType());
+            if (DDAddr.getElementType() != StructTy)
+              DDAddr = DDAddr.withElementType(StructTy);
+            LValue BaseLV =
+                MakeAddrLValue(DDAddr, DD->getType().getCanonicalType());
+            return EmitLValueForField(
+                BaseLV, cast<FieldDecl>(
+                            cast<MemberExpr>(BD->getBinding()->IgnoreImplicit())
+                                ->getMemberDecl()));
+          }
         }
       }
+      return EmitCapturedFieldLValue(*this, FD, CXXABIThisValue);
     }
     // Suppress debug location updates when visiting the binding, since the
     // binding may emit instructions that would otherwise be associated with the
diff --git a/clang/lib/Sema/SemaExpr.cpp b/clang/lib/Sema/SemaExpr.cpp
index 46a93b8e53d3e..fe059f427b767 100644
--- a/clang/lib/Sema/SemaExpr.cpp
+++ b/clang/lib/Sema/SemaExpr.cpp
@@ -19374,8 +19374,11 @@ static bool isVariableCapturable(CapturingScopeInfo *CSI, ValueDecl *Var,
   }
 
   if (isa<BindingDecl>(Var)) {
-      if (Var->getDeclName() && !Var->isImplicit())
+    if (Var->getDeclName() && !Var->isImplicit()) {
+      if (auto *RSI = dyn_cast<CapturedRegionScopeInfo>(CSI))
+        if (RSI->CapRegionKind == CR_OpenMP)
           return true;
+    }
     if (!IsLambda || !S.getLangOpts().CPlusPlus) {
       if (Diagnose)
         diagnoseUncapturableValueReferenceOrBinding(S, Loc, Var);
diff --git a/clang/test/SemaCXX/decomposition-openmp.cpp b/clang/test/SemaCXX/decomposition-openmp.cpp
index 2185f3db83d4e..70f1d40a87661 100644
--- a/clang/test/SemaCXX/decomposition-openmp.cpp
+++ b/clang/test/SemaCXX/decomposition-openmp.cpp
@@ -1,5 +1,7 @@
 // RUN: %clang_cc1 -fsyntax-only -verify -std=c++20 -fopenmp %s
 
+// expected-no-diagnostics
+
 // Okay, not an OpenMP capture.
 auto f() {
   int i[2] = {};
@@ -23,10 +25,9 @@ void g() {
 // FIXME: OpenMP should support capturing structured bindings
 void h() {
   int i[2] = {};
-  auto [a, b] = i; // expected-note 2{{declared here}}
+  auto [a, b] = i;
   #pragma omp parallel
   {
-    // expected-error at +1 2{{capturing a structured binding is not yet supported in OpenMP}}
     foo(a + b);
   }
 }

>From 4dc3499c7b11b267ecdfc4f20f1adef16e69b04f Mon Sep 17 00:00:00 2001
From: Ammarguellat <zahira.ammarguellat at intel.com>
Date: Fri, 17 Apr 2026 12:05:48 -0700
Subject: [PATCH 03/45] Fix format

---
 clang/lib/Sema/SemaExpr.cpp |  2 +-
 clang/lib/Sema/SemaStmt.cpp | 14 +++++++-------
 2 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/clang/lib/Sema/SemaExpr.cpp b/clang/lib/Sema/SemaExpr.cpp
index fe059f427b767..300a20133f49a 100644
--- a/clang/lib/Sema/SemaExpr.cpp
+++ b/clang/lib/Sema/SemaExpr.cpp
@@ -19519,7 +19519,7 @@ static bool captureInLambda(LambdaScopeInfo *LSI, ValueDecl *Var,
     ByRef = (LSI->ImpCaptureStyle == LambdaScopeInfo::ImpCap_LambdaByref);
   }
 
-  if (auto* BD = dyn_cast<BindingDecl>(Var)) {
+  if (auto *BD = dyn_cast<BindingDecl>(Var)) {
     // For structured bindings, capture the individual element type,
     // not the full decomposed type.
     CaptureType = BD->getType();
diff --git a/clang/lib/Sema/SemaStmt.cpp b/clang/lib/Sema/SemaStmt.cpp
index 21c799b89a64a..60d4214bb3c8a 100644
--- a/clang/lib/Sema/SemaStmt.cpp
+++ b/clang/lib/Sema/SemaStmt.cpp
@@ -4700,14 +4700,14 @@ buildCapturedStmtCaptureList(Sema &S, CapturedRegionScopeInfo *RSI,
         S.OpenMP().setOpenMPCaptureKind(Field, Cap.getVariable(),
                                         RSI->OpenMPLevel);
 
-      ValueDecl* CapVar = Cap.getVariable();
-      if (auto* BD = dyn_cast<BindingDecl>(CapVar))
+      ValueDecl *CapVar = Cap.getVariable();
+      if (auto *BD = dyn_cast<BindingDecl>(CapVar))
         CapVar = cast<VarDecl>(BD->getDecomposedDecl());
-      Captures.push_back(CapturedStmt::Capture(
-          Cap.getLocation(),
-          Cap.isReferenceCapture() ? CapturedStmt::VCK_ByRef
-                                   : CapturedStmt::VCK_ByCopy,
-          cast<VarDecl>(CapVar)));
+      Captures.push_back(CapturedStmt::Capture(Cap.getLocation(),
+                                               Cap.isReferenceCapture()
+                                                   ? CapturedStmt::VCK_ByRef
+                                                   : CapturedStmt::VCK_ByCopy,
+                                               cast<VarDecl>(CapVar)));
     }
     CaptureInits.push_back(Init.get());
   }

>From da55057a51bd0bdfb0f845b03d583600de7622ea Mon Sep 17 00:00:00 2001
From: Ammarguellat <zahira.ammarguellat at intel.com>
Date: Mon, 20 Apr 2026 05:40:43 -0700
Subject: [PATCH 04/45] Addressed review comments

---
 clang/lib/CodeGen/CGExpr.cpp | 35 +++++++++++++++++------------------
 1 file changed, 17 insertions(+), 18 deletions(-)

diff --git a/clang/lib/CodeGen/CGExpr.cpp b/clang/lib/CodeGen/CGExpr.cpp
index 4366853389ed8..f1a67b8334702 100644
--- a/clang/lib/CodeGen/CGExpr.cpp
+++ b/clang/lib/CodeGen/CGExpr.cpp
@@ -3737,26 +3737,25 @@ LValue CodeGenFunction::EmitDeclRefLValue(const DeclRefExpr *E) {
   // an enclosing scope.
   if (const auto *BD = dyn_cast<BindingDecl>(ND)) {
     if (E->refersToEnclosingVariableOrCapture()) {
-      auto *FD = LambdaCaptureFields.lookup(BD);
-      if (!FD) {
-        // OpenMP case: binding was captured via its decomposed decl.
-        if (auto *DD = dyn_cast<VarDecl>(BD->getDecomposedDecl())) {
-          auto I = LocalDeclMap.find(DD);
-          if (I != LocalDeclMap.end()) {
-            Address DDAddr = I->second;
-            llvm::Type *StructTy = CGM.getTypes().ConvertTypeForMem(
-                DD->getType().getCanonicalType());
-            if (DDAddr.getElementType() != StructTy)
-              DDAddr = DDAddr.withElementType(StructTy);
-            LValue BaseLV =
-                MakeAddrLValue(DDAddr, DD->getType().getCanonicalType());
-            return EmitLValueForField(
-                BaseLV, cast<FieldDecl>(
-                            cast<MemberExpr>(BD->getBinding()->IgnoreImplicit())
-                                ->getMemberDecl()));
-          }
+      // OpenMP case: binding was captured via its decomposed decl.
+      if (auto *DD = dyn_cast<VarDecl>(BD->getDecomposedDecl())) {
+        auto I = LocalDeclMap.find(DD);
+        if (I != LocalDeclMap.end()) {
+          Address DDAddr = I->second;
+          llvm::Type *StructTy = CGM.getTypes().ConvertTypeForMem(
+              DD->getType().getCanonicalType());
+          if (DDAddr.getElementType() != StructTy)
+            DDAddr = DDAddr.withElementType(StructTy);
+          LValue BaseLV =
+              MakeAddrLValue(DDAddr, DD->getType().getCanonicalType());
+          return EmitLValueForField(
+              BaseLV, cast<FieldDecl>(
+                          cast<MemberExpr>(BD->getBinding()->IgnoreImplicit())
+                              ->getMemberDecl()));
         }
       }
+      // Non-OpenMP case: binding was captured as a lambda field directly.
+      auto *FD = LambdaCaptureFields.lookup(BD);
       return EmitCapturedFieldLValue(*this, FD, CXXABIThisValue);
     }
     // Suppress debug location updates when visiting the binding, since the

>From 5261f6f55d6fabdf83ba69d1bb47568140dbf345 Mon Sep 17 00:00:00 2001
From: Ammarguellat <zahira.ammarguellat at intel.com>
Date: Tue, 21 Apr 2026 14:47:59 -0700
Subject: [PATCH 05/45] Addressed review comments

---
 clang/docs/ReleaseNotes.rst                      | 3 +++
 clang/include/clang/Basic/DiagnosticSemaKinds.td | 2 --
 clang/lib/CodeGen/CGExpr.cpp                     | 4 ++++
 clang/lib/Sema/SemaStmt.cpp                      | 9 ++++-----
 4 files changed, 11 insertions(+), 7 deletions(-)

diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst
index 688f0a2c2bb75..dafb83a3fa2af 100644
--- a/clang/docs/ReleaseNotes.rst
+++ b/clang/docs/ReleaseNotes.rst
@@ -615,6 +615,9 @@ OpenMP Support
 - Added support for ``transparent`` clause in task and taskloop directives.
 - Added support for ``use_device_ptr`` clause to accept an optional
   ``fallback`` modifier (``fb_nullify`` or ``fb_preserve``) with OpenMP >= 61.
+- Added support for capturing structured bindings. Variables introduced by
+  decomposition declarations are now handled correctly when captured inside
+  OpenMP constructs.
 
 Improvements
 ^^^^^^^^^^^^
diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td
index eddf9c50033e1..62b15bca04196 100644
--- a/clang/include/clang/Basic/DiagnosticSemaKinds.td
+++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td
@@ -10260,8 +10260,6 @@ def ext_ms_anonymous_record : ExtWarn<
 def err_reference_to_local_in_enclosing_context : Error<
   "reference to local %select{variable|binding}1 %0 declared in enclosing "
   "%select{%3|block literal|lambda expression|context}2">;
-def err_capture_binding_openmp : Error<
-  "capturing a structured binding is not yet supported in OpenMP">;
 def ext_capture_binding : ExtWarn<
   "captured structured bindings are a C++20 extension">, InGroup<CXX20>;
 def warn_cxx17_compat_capture_binding : Warning<
diff --git a/clang/lib/CodeGen/CGExpr.cpp b/clang/lib/CodeGen/CGExpr.cpp
index f1a67b8334702..d6573d94be470 100644
--- a/clang/lib/CodeGen/CGExpr.cpp
+++ b/clang/lib/CodeGen/CGExpr.cpp
@@ -3739,6 +3739,10 @@ LValue CodeGenFunction::EmitDeclRefLValue(const DeclRefExpr *E) {
     if (E->refersToEnclosingVariableOrCapture()) {
       // OpenMP case: binding was captured via its decomposed decl.
       if (auto *DD = dyn_cast<VarDecl>(BD->getDecomposedDecl())) {
+        assert(CapturedStmtInfo && "Expected to be in a captured statement");
+        assert(CapturedStmtInfo->getKind() == CapturedRegionKind::CR_OpenMP &&
+               "Expected OpenMP captured region");
+        assert(CGM.getLangOpts().OpenMP && "OpenMP not enabled");
         auto I = LocalDeclMap.find(DD);
         if (I != LocalDeclMap.end()) {
           Address DDAddr = I->second;
diff --git a/clang/lib/Sema/SemaStmt.cpp b/clang/lib/Sema/SemaStmt.cpp
index 60d4214bb3c8a..b4e041841feee 100644
--- a/clang/lib/Sema/SemaStmt.cpp
+++ b/clang/lib/Sema/SemaStmt.cpp
@@ -4703,11 +4703,10 @@ buildCapturedStmtCaptureList(Sema &S, CapturedRegionScopeInfo *RSI,
       ValueDecl *CapVar = Cap.getVariable();
       if (auto *BD = dyn_cast<BindingDecl>(CapVar))
         CapVar = cast<VarDecl>(BD->getDecomposedDecl());
-      Captures.push_back(CapturedStmt::Capture(Cap.getLocation(),
-                                               Cap.isReferenceCapture()
-                                                   ? CapturedStmt::VCK_ByRef
-                                                   : CapturedStmt::VCK_ByCopy,
-                                               cast<VarDecl>(CapVar)));
+      Captures.emplace_back(Cap.getLocation(),
+                            Cap.isReferenceCapture() ? CapturedStmt::VCK_ByRef
+                                                     : CapturedStmt::VCK_ByCopy,
+                            cast<VarDecl>(CapVar));
     }
     CaptureInits.push_back(Init.get());
   }

>From 1c1da353d5ca3d3ae8a75b0970bf415adb10d8d0 Mon Sep 17 00:00:00 2001
From: Ammarguellat <zahira.ammarguellat at intel.com>
Date: Wed, 29 Apr 2026 07:51:04 -0700
Subject: [PATCH 06/45] Addressed review comments

---
 clang/docs/ReleaseNotes.rst                   |   7 +-
 clang/lib/CodeGen/CGExpr.cpp                  | 107 ++++++++--
 clang/lib/Sema/SemaExpr.cpp                   |   6 -
 clang/lib/Sema/SemaLambda.cpp                 |   7 +-
 clang/lib/Sema/SemaStmt.cpp                   |  24 ++-
 .../OpenMP/structured-binding-capture.cpp     | 141 -------------
 .../OpenMP/structured-bindings-codegen.cpp    | 195 ++++++++++++++++++
 clang/test/SemaCXX/decomposition-openmp.cpp   |   1 -
 8 files changed, 310 insertions(+), 178 deletions(-)
 delete mode 100644 clang/test/OpenMP/structured-binding-capture.cpp
 create mode 100644 clang/test/OpenMP/structured-bindings-codegen.cpp

diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst
index dafb83a3fa2af..bc094aeaa33b1 100644
--- a/clang/docs/ReleaseNotes.rst
+++ b/clang/docs/ReleaseNotes.rst
@@ -615,9 +615,10 @@ OpenMP Support
 - Added support for ``transparent`` clause in task and taskloop directives.
 - Added support for ``use_device_ptr`` clause to accept an optional
   ``fallback`` modifier (``fb_nullify`` or ``fb_preserve``) with OpenMP >= 61.
-- Added support for capturing structured bindings. Variables introduced by
-  decomposition declarations are now handled correctly when captured inside
-  OpenMP constructs.
+- Added support for C++17 structured bindings in OpenMP regions. Structured
+  bindings from structs, classes, and arrays can now be used inside
+  OpenMP directives. Note: Tuple-like bindings (types using the tuple protocol
+  with ``get<N>()``) are not yet supported and will produce a compilation error.
 
 Improvements
 ^^^^^^^^^^^^
diff --git a/clang/lib/CodeGen/CGExpr.cpp b/clang/lib/CodeGen/CGExpr.cpp
index d6573d94be470..ed9b8580b63ea 100644
--- a/clang/lib/CodeGen/CGExpr.cpp
+++ b/clang/lib/CodeGen/CGExpr.cpp
@@ -3739,26 +3739,97 @@ LValue CodeGenFunction::EmitDeclRefLValue(const DeclRefExpr *E) {
     if (E->refersToEnclosingVariableOrCapture()) {
       // OpenMP case: binding was captured via its decomposed decl.
       if (auto *DD = dyn_cast<VarDecl>(BD->getDecomposedDecl())) {
-        assert(CapturedStmtInfo && "Expected to be in a captured statement");
-        assert(CapturedStmtInfo->getKind() == CapturedRegionKind::CR_OpenMP &&
-               "Expected OpenMP captured region");
-        assert(CGM.getLangOpts().OpenMP && "OpenMP not enabled");
-        auto I = LocalDeclMap.find(DD);
-        if (I != LocalDeclMap.end()) {
-          Address DDAddr = I->second;
-          llvm::Type *StructTy = CGM.getTypes().ConvertTypeForMem(
-              DD->getType().getCanonicalType());
-          if (DDAddr.getElementType() != StructTy)
-            DDAddr = DDAddr.withElementType(StructTy);
-          LValue BaseLV =
-              MakeAddrLValue(DDAddr, DD->getType().getCanonicalType());
-          return EmitLValueForField(
-              BaseLV, cast<FieldDecl>(
-                          cast<MemberExpr>(BD->getBinding()->IgnoreImplicit())
-                              ->getMemberDecl()));
+        if (CapturedStmtInfo &&
+            CapturedStmtInfo->getKind() == CapturedRegionKind::CR_OpenMP &&
+            CGM.getLangOpts().OpenMP) {
+          auto I = LocalDeclMap.find(DD);
+          if (I != LocalDeclMap.end()) {
+            Address DDAddr = I->second;
+            llvm::Type *ExpectedTy = CGM.getTypes().ConvertTypeForMem(
+                DD->getType().getCanonicalType());
+            if (DDAddr.getElementType() != ExpectedTy)
+              DDAddr = DDAddr.withElementType(ExpectedTy);
+            LValue CapLVal;
+            if (DD->getType()->isReferenceType())
+              CapLVal = EmitLoadOfReferenceLValue(DDAddr, DD->getType(),
+                                                  AlignmentSource::Decl);
+            else
+              CapLVal =
+                  MakeAddrLValue(DDAddr, DD->getType().getCanonicalType());
+            if (getLangOpts().OpenMP &&
+                CGM.getOpenMPRuntime().isNontemporalDecl(DD))
+              CapLVal.setNontemporal(/*Value=*/true);
+            // Extract the specific binding from the decomposed object.
+            Expr *BindingExpr = BD->getBinding()->IgnoreImplicit();
+            if (auto *ME = dyn_cast<MemberExpr>(BindingExpr)) {
+              // Struct/union: access field.
+              return EmitLValueForField(CapLVal,
+                                        cast<FieldDecl>(ME->getMemberDecl()));
+            }
+            if (auto *ASE = dyn_cast<ArraySubscriptExpr>(BindingExpr)) {
+              Address Base = CapLVal.getAddress();
+              llvm::Value *Idx = EmitScalarExpr(ASE->getIdx());
+              llvm::Value *Indices[] = {llvm::ConstantInt::get(Int32Ty, 0),
+                                        Idx};
+              llvm::Type *ElemTy =
+                  CGM.getTypes().ConvertTypeForMem(ASE->getType());
+              llvm::Value *EltPtr = Builder.CreateInBoundsGEP(
+                  Base.getElementType(), Base.emitRawPointer(*this), Indices,
+                  "arrayidx");
+              CharUnits Align = Base.getAlignment().alignmentOfArrayElement(
+                  getContext().getTypeSizeInChars(ASE->getType()));
+              Address EltAddr(EltPtr, ElemTy, Align);
+              return MakeAddrLValue(EltAddr, ASE->getType());
+            }
+            // Fallback for complex binding types.
+            // TODO: Tuple bindings (std::tuple, std::pair via tuple protocol)
+            // use hidden temporary variables that aren't captured in OpenMP
+            // regions. Need to re-emit the get<N>() call on the captured tuple
+            // base object. For now, this will fail.
+            if (isa<DeclRefExpr>(BindingExpr))
+              llvm_unreachable(
+                  "tuple-like structured bindings not yet supported in OpenMP");
+            return EmitLValue(BindingExpr);
+          }
+          // DD not in LocalDeclMap, check capture struct
+          if (auto *FD = CapturedStmtInfo->lookup(DD)) {
+            LValue CapLVal = EmitCapturedFieldLValue(
+                *this, FD, CapturedStmtInfo->getContextValue());
+            Address Addr = CapLVal.getAddress();
+            llvm::Type *ExpectedTy = CGM.getTypes().ConvertTypeForMem(
+                DD->getType().getCanonicalType());
+            if (Addr.getElementType() != ExpectedTy)
+              Addr = Addr.withElementType(ExpectedTy);
+            CapLVal = MakeAddrLValue(Addr, DD->getType().getCanonicalType());
+            if (DD->getType()->isReferenceType())
+              CapLVal = EmitLoadOfReferenceLValue(
+                  CapLVal.getAddress(), DD->getType(), AlignmentSource::Decl);
+            if (getLangOpts().OpenMP &&
+                CGM.getOpenMPRuntime().isNontemporalDecl(DD))
+              CapLVal.setNontemporal(/*Value=*/true);
+
+            // Extract the specific binding.
+            Expr *BindingExpr = BD->getBinding()->IgnoreImplicit();
+            if (auto *ME = dyn_cast<MemberExpr>(BindingExpr)) {
+              return EmitLValueForField(CapLVal,
+                                        cast<FieldDecl>(ME->getMemberDecl()));
+            }
+            if (auto *ASE = dyn_cast<ArraySubscriptExpr>(BindingExpr)) {
+              Address Base = CapLVal.getAddress();
+              llvm::Value *Idx = EmitScalarExpr(ASE->getIdx());
+              llvm::Value *EltPtr = Builder.CreateInBoundsGEP(
+                  Base.getElementType(), Base.emitRawPointer(*this), Idx,
+                  "arrayidx");
+              CharUnits Align = Base.getAlignment().alignmentOfArrayElement(
+                  getContext().getTypeSizeInChars(ASE->getType()));
+              Address EltAddr(EltPtr, Base.getElementType(), Align);
+              return MakeAddrLValue(EltAddr, ASE->getType());
+            }
+            return EmitLValue(BindingExpr);
+          }
         }
       }
-      // Non-OpenMP case: binding was captured as a lambda field directly.
+      // Non-OpenMP case: lambda capture.
       auto *FD = LambdaCaptureFields.lookup(BD);
       return EmitCapturedFieldLValue(*this, FD, CXXABIThisValue);
     }
diff --git a/clang/lib/Sema/SemaExpr.cpp b/clang/lib/Sema/SemaExpr.cpp
index 300a20133f49a..e59a6c861aefd 100644
--- a/clang/lib/Sema/SemaExpr.cpp
+++ b/clang/lib/Sema/SemaExpr.cpp
@@ -19519,12 +19519,6 @@ static bool captureInLambda(LambdaScopeInfo *LSI, ValueDecl *Var,
     ByRef = (LSI->ImpCaptureStyle == LambdaScopeInfo::ImpCap_LambdaByref);
   }
 
-  if (auto *BD = dyn_cast<BindingDecl>(Var)) {
-    // For structured bindings, capture the individual element type,
-    // not the full decomposed type.
-    CaptureType = BD->getType();
-    DeclRefType = BD->getType();
-  }
   if (BuildAndDiagnose && S.Context.getTargetInfo().getTriple().isWasm() &&
       CaptureType.getNonReferenceType().isWebAssemblyReferenceType()) {
     S.Diag(Loc, diag::err_wasm_ca_reference) << 0;
diff --git a/clang/lib/Sema/SemaLambda.cpp b/clang/lib/Sema/SemaLambda.cpp
index 8572e3a742a6c..754d0918f79da 100644
--- a/clang/lib/Sema/SemaLambda.cpp
+++ b/clang/lib/Sema/SemaLambda.cpp
@@ -1970,9 +1970,14 @@ ExprResult Sema::BuildCaptureInit(const Capture &Cap,
   } else {
     assert(Cap.isVariableCapture() && "unknown kind of capture");
     ValueDecl *Var = Cap.getVariable();
+    // For OpenMP structured bindings, capture the decomposed decl, not the
+    // binding.
+    if (IsOpenMPMapping && isa<BindingDecl>(Var)) {
+      Var = cast<BindingDecl>(Var)->getDecomposedDecl();
+    }
     Name = Var->getIdentifier();
     Init = BuildDeclarationNameExpr(
-      CXXScopeSpec(), DeclarationNameInfo(Var->getDeclName(), Loc), Var);
+        CXXScopeSpec(), DeclarationNameInfo(Var->getDeclName(), Loc), Var);
   }
 
   // In OpenMP, the capture kind doesn't actually describe how to capture:
diff --git a/clang/lib/Sema/SemaStmt.cpp b/clang/lib/Sema/SemaStmt.cpp
index b4e041841feee..9f90dc1fda665 100644
--- a/clang/lib/Sema/SemaStmt.cpp
+++ b/clang/lib/Sema/SemaStmt.cpp
@@ -4672,10 +4672,23 @@ static bool
 buildCapturedStmtCaptureList(Sema &S, CapturedRegionScopeInfo *RSI,
                              SmallVectorImpl<CapturedStmt::Capture> &Captures,
                              SmallVectorImpl<Expr *> &CaptureInits) {
+  llvm::SmallPtrSet<VarDecl *, 4> CapturedDecomposed;
   for (const sema::Capture &Cap : RSI->Captures) {
     if (Cap.isInvalid())
       continue;
 
+    ValueDecl *CapVar = nullptr;
+    if (Cap.isVariableCapture()) {
+      CapVar = Cap.getVariable();
+      if (auto *BD = dyn_cast<BindingDecl>(CapVar)) {
+        VarDecl *DD = cast<VarDecl>(BD->getDecomposedDecl());
+        if (!CapturedDecomposed.insert(DD).second) {
+          continue; // Skip duplicate.
+        }
+        CapVar = DD;
+      }
+    }
+
     // Form the initializer for the capture.
     ExprResult Init = S.BuildCaptureInit(Cap, Cap.getLocation(),
                                          RSI->CapRegionKind == CR_OpenMP);
@@ -4688,8 +4701,8 @@ buildCapturedStmtCaptureList(Sema &S, CapturedRegionScopeInfo *RSI,
 
     // Add the capture to our list of captures.
     if (Cap.isThisCapture()) {
-      Captures.push_back(CapturedStmt::Capture(Cap.getLocation(),
-                                               CapturedStmt::VCK_This));
+      Captures.push_back(
+          CapturedStmt::Capture(Cap.getLocation(), CapturedStmt::VCK_This));
     } else if (Cap.isVLATypeCapture()) {
       Captures.push_back(
           CapturedStmt::Capture(Cap.getLocation(), CapturedStmt::VCK_VLAType));
@@ -4697,12 +4710,7 @@ buildCapturedStmtCaptureList(Sema &S, CapturedRegionScopeInfo *RSI,
       assert(Cap.isVariableCapture() && "unknown kind of capture");
 
       if (S.getLangOpts().OpenMP && RSI->CapRegionKind == CR_OpenMP)
-        S.OpenMP().setOpenMPCaptureKind(Field, Cap.getVariable(),
-                                        RSI->OpenMPLevel);
-
-      ValueDecl *CapVar = Cap.getVariable();
-      if (auto *BD = dyn_cast<BindingDecl>(CapVar))
-        CapVar = cast<VarDecl>(BD->getDecomposedDecl());
+        S.OpenMP().setOpenMPCaptureKind(Field, CapVar, RSI->OpenMPLevel);
       Captures.emplace_back(Cap.getLocation(),
                             Cap.isReferenceCapture() ? CapturedStmt::VCK_ByRef
                                                      : CapturedStmt::VCK_ByCopy,
diff --git a/clang/test/OpenMP/structured-binding-capture.cpp b/clang/test/OpenMP/structured-binding-capture.cpp
deleted file mode 100644
index 5d3fae741958b..0000000000000
--- a/clang/test/OpenMP/structured-binding-capture.cpp
+++ /dev/null
@@ -1,141 +0,0 @@
-// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --include-generated-funcs --replace-value-regex "__omp_offloading_[0-9a-z]+_[0-9a-z]+" "reduction_size[.].+[.]" "pl_cond[.].+[.|,]" --prefix-filecheck-ir-name _ --version 4
-// RUN: %clang_cc1 -verify -std=c++20 -triple x86_64-pc-linux-gnu -fopenmp \
-// RUN: -emit-llvm %s -o - | FileCheck %s
-
-// expected-no-diagnostics
-
-struct Point {
-  int first, second;
-};
-
-Point twoints() {
-  return {37, 24};
-}
-
-int main() {
-  auto [m, n] = twoints();
-#pragma omp parallel for collapse(2)
-  for (int i = 0; i < 10; i++)
-    for (int j = 0; j < 10; j++)
-      [m, n](int i, int j) -> void { return; }(i, j);
-  return 0;
-}
-
-// CHECK-LABEL: define dso_local i64 @_Z7twointsv(
-// CHECK-SAME: ) #[[ATTR0:[0-9]+]] {
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[RETVAL:%.*]] = alloca [[STRUCT_POINT:%.*]], align 4
-// CHECK-NEXT:    [[FIRST:%.*]] = getelementptr inbounds nuw [[STRUCT_POINT]], ptr [[RETVAL]], i32 0, i32 0
-// CHECK-NEXT:    store i32 37, ptr [[FIRST]], align 4
-// CHECK-NEXT:    [[SECOND:%.*]] = getelementptr inbounds nuw [[STRUCT_POINT]], ptr [[RETVAL]], i32 0, i32 1
-// CHECK-NEXT:    store i32 24, ptr [[SECOND]], align 4
-// CHECK-NEXT:    [[TMP0:%.*]] = load i64, ptr [[RETVAL]], align 4
-// CHECK-NEXT:    ret i64 [[TMP0]]
-//
-//
-// CHECK-LABEL: define dso_local noundef i32 @main(
-// CHECK-SAME: ) #[[ATTR1:[0-9]+]] {
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[RETVAL:%.*]] = alloca i32, align 4
-// CHECK-NEXT:    [[TMP0:%.*]] = alloca [[STRUCT_POINT:%.*]], align 4
-// CHECK-NEXT:    store i32 0, ptr [[RETVAL]], align 4
-// CHECK-NEXT:    [[CALL:%.*]] = call i64 @_Z7twointsv()
-// CHECK-NEXT:    store i64 [[CALL]], ptr [[TMP0]], align 4
-// CHECK-NEXT:    [[FIRST:%.*]] = getelementptr inbounds nuw [[STRUCT_POINT]], ptr [[TMP0]], i32 0, i32 0
-// CHECK-NEXT:    [[SECOND:%.*]] = getelementptr inbounds nuw [[STRUCT_POINT]], ptr [[TMP0]], i32 0, i32 1
-// CHECK-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2:[0-9]+]], i32 2, ptr @main.omp_outlined, ptr [[FIRST]], ptr [[SECOND]])
-// CHECK-NEXT:    ret i32 0
-//
-//
-// CHECK-LABEL: define internal void @main.omp_outlined(
-// CHECK-SAME: ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP0:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP1:%.*]]) #[[ATTR2:[0-9]+]] {
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK-NEXT:    [[DOTADDR:%.*]] = alloca ptr, align 8
-// CHECK-NEXT:    [[DOTADDR1:%.*]] = alloca ptr, align 8
-// CHECK-NEXT:    [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK-NEXT:    [[TMP:%.*]] = alloca i32, align 4
-// CHECK-NEXT:    [[_TMP2:%.*]] = alloca i32, align 4
-// CHECK-NEXT:    [[DOTOMP_LB:%.*]] = alloca i32, align 4
-// CHECK-NEXT:    [[DOTOMP_UB:%.*]] = alloca i32, align 4
-// CHECK-NEXT:    [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK-NEXT:    [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK-NEXT:    [[I:%.*]] = alloca i32, align 4
-// CHECK-NEXT:    [[J:%.*]] = alloca i32, align 4
-// CHECK-NEXT:    [[REF_TMP:%.*]] = alloca [[CLASS_ANON:%.*]], align 4
-// CHECK-NEXT:    store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// CHECK-NEXT:    store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
-// CHECK-NEXT:    store ptr [[TMP0]], ptr [[DOTADDR]], align 8
-// CHECK-NEXT:    store ptr [[TMP1]], ptr [[DOTADDR1]], align 8
-// CHECK-NEXT:    [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8, !nonnull [[META2:![0-9]+]], !align [[META3:![0-9]+]]
-// CHECK-NEXT:    [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8, !nonnull [[META2]], !align [[META3]]
-// CHECK-NEXT:    store i32 0, ptr [[DOTOMP_LB]], align 4
-// CHECK-NEXT:    store i32 99, ptr [[DOTOMP_UB]], align 4
-// CHECK-NEXT:    store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK-NEXT:    store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
-// CHECK-NEXT:    [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// CHECK-NEXT:    [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4
-// CHECK-NEXT:    call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP5]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
-// CHECK-NEXT:    [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
-// CHECK-NEXT:    [[CMP:%.*]] = icmp sgt i32 [[TMP6]], 99
-// CHECK-NEXT:    br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
-// CHECK:       cond.true:
-// CHECK-NEXT:    br label [[COND_END:%.*]]
-// CHECK:       cond.false:
-// CHECK-NEXT:    [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
-// CHECK-NEXT:    br label [[COND_END]]
-// CHECK:       cond.end:
-// CHECK-NEXT:    [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP7]], [[COND_FALSE]] ]
-// CHECK-NEXT:    store i32 [[COND]], ptr [[DOTOMP_UB]], align 4
-// CHECK-NEXT:    [[TMP8:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK-NEXT:    store i32 [[TMP8]], ptr [[DOTOMP_IV]], align 4
-// CHECK-NEXT:    br label [[OMP_INNER_FOR_COND:%.*]]
-// CHECK:       omp.inner.for.cond:
-// CHECK-NEXT:    [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK-NEXT:    [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
-// CHECK-NEXT:    [[CMP3:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]]
-// CHECK-NEXT:    br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
-// CHECK:       omp.inner.for.body:
-// CHECK-NEXT:    [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK-NEXT:    [[DIV:%.*]] = sdiv i32 [[TMP11]], 10
-// CHECK-NEXT:    [[MUL:%.*]] = mul nsw i32 [[DIV]], 1
-// CHECK-NEXT:    [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK-NEXT:    store i32 [[ADD]], ptr [[I]], align 4
-// CHECK-NEXT:    [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK-NEXT:    [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK-NEXT:    [[DIV4:%.*]] = sdiv i32 [[TMP13]], 10
-// CHECK-NEXT:    [[MUL5:%.*]] = mul nsw i32 [[DIV4]], 10
-// CHECK-NEXT:    [[SUB:%.*]] = sub nsw i32 [[TMP12]], [[MUL5]]
-// CHECK-NEXT:    [[MUL6:%.*]] = mul nsw i32 [[SUB]], 1
-// CHECK-NEXT:    [[ADD7:%.*]] = add nsw i32 0, [[MUL6]]
-// CHECK-NEXT:    store i32 [[ADD7]], ptr [[J]], align 4
-// CHECK-NEXT:    [[TMP14:%.*]] = getelementptr inbounds nuw [[CLASS_ANON]], ptr [[REF_TMP]], i32 0, i32 0
-// CHECK-NEXT:    [[FIRST:%.*]] = getelementptr inbounds nuw [[STRUCT_POINT:%.*]], ptr [[TMP2]], i32 0, i32 0
-// CHECK-NEXT:    [[TMP15:%.*]] = load i32, ptr [[FIRST]], align 4
-// CHECK-NEXT:    store i32 [[TMP15]], ptr [[TMP14]], align 4
-// CHECK-NEXT:    [[TMP16:%.*]] = getelementptr inbounds nuw [[CLASS_ANON]], ptr [[REF_TMP]], i32 0, i32 1
-// CHECK-NEXT:    [[SECOND:%.*]] = getelementptr inbounds nuw [[STRUCT_POINT]], ptr [[TMP2]], i32 0, i32 1
-// CHECK-NEXT:    [[TMP17:%.*]] = load i32, ptr [[SECOND]], align 4
-// CHECK-NEXT:    store i32 [[TMP17]], ptr [[TMP16]], align 4
-// CHECK-NEXT:    [[TMP18:%.*]] = load i32, ptr [[I]], align 4
-// CHECK-NEXT:    [[TMP19:%.*]] = load i32, ptr [[J]], align 4
-// CHECK-NEXT:    call void @"_ZZ4mainENK3$_0clEii"(ptr noundef nonnull align 4 dereferenceable(8) [[REF_TMP]], i32 noundef [[TMP18]], i32 noundef [[TMP19]])
-// CHECK-NEXT:    br label [[OMP_BODY_CONTINUE:%.*]]
-// CHECK:       omp.body.continue:
-// CHECK-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
-// CHECK:       omp.inner.for.inc:
-// CHECK-NEXT:    [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK-NEXT:    [[ADD8:%.*]] = add nsw i32 [[TMP20]], 1
-// CHECK-NEXT:    store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4
-// CHECK-NEXT:    br label [[OMP_INNER_FOR_COND]]
-// CHECK:       omp.inner.for.end:
-// CHECK-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
-// CHECK:       omp.loop.exit:
-// CHECK-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP5]])
-// CHECK-NEXT:    ret void
-//
-//.
-// CHECK: [[META2]] = !{}
-// CHECK: [[META3]] = !{i64 4}
-//.
diff --git a/clang/test/OpenMP/structured-bindings-codegen.cpp b/clang/test/OpenMP/structured-bindings-codegen.cpp
new file mode 100644
index 0000000000000..b829a4ea93274
--- /dev/null
+++ b/clang/test/OpenMP/structured-bindings-codegen.cpp
@@ -0,0 +1,195 @@
+// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=51 -x c++ -std=c++20 -emit-llvm %s -o - | FileCheck %s
+
+// expected-no-diagnostics
+
+void use(int);
+
+// Struct binding.
+struct Point {
+  int x, y;
+};
+Point make_point() { return {1, 2}; }
+void test_struct() {
+  auto [m, n] = make_point();
+#pragma omp parallel
+  {
+    use(m + n);
+  }
+}
+// CHECK-LABEL: @{{.*}}test_struct{{.*}}()
+// CHECK: call void {{.*}}@__kmpc_fork_call({{.*}}, i32 1, ptr @{{.*}}test_struct{{.*}}.omp_outlined", ptr {{.*}})
+
+// CHECK-LABEL: @{{.*}}test_struct{{.*}}.omp_outlined"(
+// CHECK-SAME: ptr {{.*}}, ptr {{.*}}, ptr noundef nonnull{{.*}}[[TMP0:%.*]])
+// CHECK: [[TMP1:%.*]] = load ptr, ptr {{.*}}, align 8
+// CHECK-NEXT: [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_POINT:%.*]], ptr [[TMP1]], i32 0, i32 0
+// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[X]], align 4
+// CHECK-NEXT: [[Y:%.*]] = getelementptr inbounds nuw [[STRUCT_POINT]], ptr [[TMP1]], i32 0, i32 1
+// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[Y]], align 4
+//
+
+// Pair binding.
+struct pair {
+  int first;
+  int second;
+};
+pair make_pair(int a, int b) {
+  return {a, b};
+}
+void test_pair() {
+  auto [a, b] = make_pair(1, 2);
+#pragma omp parallel
+  {
+    use(a);
+  }
+}
+// CHECK-LABEL: @{{.*}}test_pair{{.*}}()
+// CHECK: call void {{.*}}@__kmpc_fork_call({{.*}}, i32 1, ptr @{{.*}}test_pair{{.*}}.omp_outlined", ptr {{.*}})
+
+// CHECK-LABEL: @{{.*}}test_pair{{.*}}.omp_outlined"(
+// CHECK-SAME: ptr {{.*}}, ptr {{.*}}, ptr noundef nonnull{{.*}}[[TMP0:%.*]])
+// CHECK: [[TMP1:%.*]] = load ptr, ptr {{.*}}, align 8
+// CHECK-NEXT: [[FIRST:%.*]] = getelementptr inbounds nuw [[STRUCT_PAIR:%.*]], ptr [[TMP1]], i32 0, i32 0
+// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[FIRST]], align 4
+// CHECK-NEXT: call void {{.*}}use{{.*}}"(i32 noundef [[TMP2]])
+//
+
+// Array binding.
+void test_array() {
+  int arr[2] = {1, 2};
+  auto [x, y] = arr;
+#pragma omp parallel
+  {
+    use(x + y);
+  }
+}
+// CHECK-LABEL: @{{.*}}test_array{{.*}}()
+// CHECK: call void {{.*}}@__kmpc_fork_call({{.*}}, i32 1, ptr @{{.*}}test_array{{.*}}.omp_outlined", ptr {{.*}})
+
+// CHECK-LABEL: @{{.*}}test_array{{.*}}.omp_outlined"(
+// CHECK-SAME: ptr {{.*}}, ptr {{.*}}, ptr noundef nonnull{{.*}}[[TMP0:%.*]])
+// CHECK: [[TMP1:%.*]] = load ptr, ptr {{.*}}, align 8
+// CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[TMP1]], i32 0, i32 0
+// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
+// CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [2 x i32], ptr [[TMP1]], i32 0, i32 1
+// CHECK-NEXT:  [[TMP3:%.*]] = load i32, ptr [[ARRAYIDX1]], align 4
+//
+
+// Binding with bitfields.
+struct S {
+  int x : 4;
+  int y : 4;
+};
+void test_bitfields() {
+  S s{1, 2};
+  auto [a, b] = s;
+#pragma omp parallel
+  {
+    use(a + b);
+  }
+}
+// CHECK-LABEL: @{{.*}}test_bitfields{{.*}}()
+// CHECK: call void{{.*}}@__kmpc_fork_call({{.*}}, i32 1, ptr @{{.*}}test_bitfields{{.*}}.omp_outlined", ptr {{.*}})
+
+// CHECK-LABEL: @{{.*}}test_bitfields{{.*}}.omp_outlined"(
+// CHECK-SAME: ptr {{.*}}, ptr {{.*}}, ptr noundef nonnull{{.*}}[[TMP0:%.*]])
+// CHECK: [[TMP1:%.*]] = load ptr, ptr {{.*}}, align 8
+// CHECK-NEXT: [[BF_LOAD:%.*]] = load i32, ptr [[TMP1]], align 4
+// CHECK-NEXT: [[BF_SHL:%.*]] = shl i32 [[BF_LOAD]], 28
+// CHECK-NEXT: [[BF_ASHR:%.*]] = ashr i32 [[BF_SHL]], 28
+// CHECK-NEXT: [[BF_LOAD1:%.*]] = load i32, ptr [[TMP1]], align 4
+// CHECK-NEXT: [[BF_SHL2:%.*]] = shl i32 [[BF_LOAD1]], 24
+// CHECK-NEXT: [[BF_ASHR3:%.*]] = ashr i32 [[BF_SHL2]], 28
+//
+
+// Lambda inside OpenMP with captured bindings.
+void test_with_lambda() {
+  auto [m, n] = make_point();
+#pragma omp parallel for collapse(2)
+  for (int i = 0; i < 10; i++)
+    for (int j = 0; j < 10; j++)
+      [m, n](int i, int j) -> void { return; }(i, j);
+}
+// CHECK-LABEL: @{{.*}}test_with_lambda{{.*}}()
+// CHECK: call void{{.*}} @__kmpc_fork_call(ptr {{.*}}, i32 1, ptr @{{.*}}test_with_lambda{{.*}}.omp_outlined", ptr {{.*}})
+
+// CHECK-LABEL: @{{.*}}test_with_lambda{{.*}}.omp_outlined"(
+// CHECK-SAME: ptr {{.*}}, ptr {{.*}}, ptr noundef nonnull{{.*}}[[TMP0:%.*]])
+// CHECK: [[TMP1:%.*]] = load ptr, ptr {{.*}}, align 8
+// CHECK: [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_POINT:%.*]], ptr [[TMP1]], i32 0, i32 0
+// CHECK-NEXT: [[TMP13:%.*]] = load i32, ptr [[X]], align 4
+// CHECK: [[Y:%.*]] = getelementptr inbounds nuw [[STRUCT_POINT]], ptr [[TMP1]], i32 0, i32 1
+// CHECK-NEXT: [[TMP15:%.*]] = load i32, ptr [[Y]], align 4
+//
+
+// Only one binding used.
+void test_partial_capture() {
+  auto [a, b] = make_pair(1, 2);
+#pragma omp parallel
+  {
+    use(a);
+  }
+}
+// CHECK-LABEL: @{{.*}}test_partial_capture{{.*}}()
+// CHECK: call void {{.*}}@__kmpc_fork_call(ptr {{.*}}, i32 1, ptr @{{.*}}test_partial_capture{{.*}}.omp_outlined", ptr {{.*}})
+
+// CHECK-LABEL: @{{.*}}test_partial_capture{{.*}}.omp_outlined"(
+// CHECK-SAME: ptr {{.*}}, ptr {{.*}}, ptr noundef nonnull{{.*}}[[TMP0:%.*]])
+// CHECK: [[TMP1:%.*]] = load ptr, ptr {{.*}}, align 8
+// CHECK-NEXT: [[FIRST:%.*]] = getelementptr inbounds nuw [[STRUCT_PAIR:%.*]], ptr [[TMP1]], i32 0, i32 0
+// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[FIRST]], align 4
+// CHECK-NEXT: call void {{.*}}use{{.*}}"(i32 noundef [[TMP2]])
+//
+
+// Nested parallel regions.
+void test_nested() {
+  auto [x, y] = make_point();
+#pragma omp parallel
+  {
+    use(x);
+#pragma omp parallel
+    {
+      use(y);
+    }
+  }
+}
+// CHECK-LABEL: @{{.*}}test_nested{{.*}}()
+// CHECK: call void {{.*}}@__kmpc_fork_call(ptr {{.*}}, i32 2, ptr @{{.*}}test_nested{{.*}}.omp_outlined", ptr {{.*}}, ptr {{.*}})
+
+// CHECK-LABEL: @{{.*}}test_nested{{.*}}.omp_outlined"(
+// CHECK-SAME: ptr {{.*}}, ptr {{.*}}, ptr noundef nonnull{{.*}}[[TMP0:%.*]], ptr noundef nonnull{{.*}}[[TMP1:%.*]])
+// CHECK: [[TMP2:%.*]] = load ptr, ptr {{.*}}, align 8
+// CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr {{.*}}, align 8
+// CHECK-NEXT: [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_POINT:%.*]], ptr [[TMP2]], i32 0, i32 0
+// CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[X]], align 4
+// CHECK-NEXT: call void @{{.*}}use{{.*}}"(i32 noundef [[TMP4]])
+//
+
+// Multiple bindings in same region.
+void test_multiple() {
+  auto [a, b] = make_point();
+  auto [c, d] = make_pair(3, 4);
+#pragma omp parallel
+  {
+    use(a + b + c + d);
+  }
+}
+// CHECK-LABEL: define dso_local void @"?test_multiple@@YAXXZ"()
+// CHECK: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @1, i32 2, ptr @"?test_multiple@@YAXXZ.omp_outlined", ptr %0, ptr %1)
+
+// CHECK-LABEL: define internal void @"?test_multiple@@YAXXZ.omp_outlined"(ptr noalias noundef %.global_tid., ptr noalias noundef %.bound_tid., ptr noundef nonnull align 4 dereferenceable(4) %0, ptr noundef nonnull align 4 dereferenceable(4) %1)
+// CHECK: [[TMP2:%.*]] = load ptr, ptr {{.*}}, align 8
+// CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr {{.*}}, align 8
+// CHECK-NEXT: [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_POINT:%.*]], ptr [[TMP2]], i32 0, i32 0
+// CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[X]], align 4
+// CHECK-NEXT: [[Y:%.*]] = getelementptr inbounds nuw [[STRUCT_POINT]], ptr [[TMP2]], i32 0, i32 1
+// CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr [[Y]], align 4
+// CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP4]], [[TMP5]]
+// CHECK-NEXT: [[FIRST:%.*]] = getelementptr inbounds nuw [[STRUCT_PAIR:%.*]], ptr [[TMP3]], i32 0, i32 0
+// CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr [[FIRST]], align 4
+// CHECK-NEXT: [[ADD2:%.*]] = add nsw i32 [[ADD]], [[TMP6]]
+// CHECK-NEXT: [[SECOND:%.*]] = getelementptr inbounds nuw [[STRUCT_PAIR]], ptr [[TMP3]], i32 0, i32 1
+// CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr [[SECOND]], align 4
+// CHECK-NEXT: [[ADD3:%.*]] = add nsw i32 [[ADD2]], [[TMP7]]
+// CHECK-NEXT: call void {{.*}}use{{.*}}(i32 noundef [[ADD3]])
+
diff --git a/clang/test/SemaCXX/decomposition-openmp.cpp b/clang/test/SemaCXX/decomposition-openmp.cpp
index 70f1d40a87661..e3f04305f0961 100644
--- a/clang/test/SemaCXX/decomposition-openmp.cpp
+++ b/clang/test/SemaCXX/decomposition-openmp.cpp
@@ -22,7 +22,6 @@ void g() {
   }
 }
 
-// FIXME: OpenMP should support capturing structured bindings
 void h() {
   int i[2] = {};
   auto [a, b] = i;

>From acb2aa66d5a1a9f1655520503acf89af8eb9bc5d Mon Sep 17 00:00:00 2001
From: Ammarguellat <zahira.ammarguellat at intel.com>
Date: Wed, 29 Apr 2026 09:23:50 -0700
Subject: [PATCH 07/45] Added warning requested in review

---
 clang/lib/Sema/SemaExpr.cpp                 | 13 +++++++++++--
 clang/test/SemaCXX/decomposition-openmp.cpp | 13 +++++++++++++
 2 files changed, 24 insertions(+), 2 deletions(-)

diff --git a/clang/lib/Sema/SemaExpr.cpp b/clang/lib/Sema/SemaExpr.cpp
index e59a6c861aefd..41c00199a5a9d 100644
--- a/clang/lib/Sema/SemaExpr.cpp
+++ b/clang/lib/Sema/SemaExpr.cpp
@@ -19375,9 +19375,18 @@ static bool isVariableCapturable(CapturingScopeInfo *CSI, ValueDecl *Var,
 
   if (isa<BindingDecl>(Var)) {
     if (Var->getDeclName() && !Var->isImplicit()) {
-      if (auto *RSI = dyn_cast<CapturedRegionScopeInfo>(CSI))
-        if (RSI->CapRegionKind == CR_OpenMP)
+      if (auto *RSI = dyn_cast<CapturedRegionScopeInfo>(CSI)) {
+        if (RSI->CapRegionKind == CR_OpenMP) {
+          if (Diagnose && S.getLangOpts().CPlusPlus) {
+            S.Diag(Loc, S.LangOpts.CPlusPlus20
+                            ? diag::warn_cxx17_compat_capture_binding
+                            : diag::ext_capture_binding)
+                << Var;
+            S.Diag(Var->getLocation(), diag::note_entity_declared_at) << Var;
+          }
           return true;
+        }
+      }
     }
     if (!IsLambda || !S.getLangOpts().CPlusPlus) {
       if (Diagnose)
diff --git a/clang/test/SemaCXX/decomposition-openmp.cpp b/clang/test/SemaCXX/decomposition-openmp.cpp
index e3f04305f0961..78135d90a561b 100644
--- a/clang/test/SemaCXX/decomposition-openmp.cpp
+++ b/clang/test/SemaCXX/decomposition-openmp.cpp
@@ -1,3 +1,4 @@
+// RUN: %clang_cc1 -fsyntax-only -verify=cxx17 -std=c++17 -fopenmp %s
 // RUN: %clang_cc1 -fsyntax-only -verify -std=c++20 -fopenmp %s
 
 // expected-no-diagnostics
@@ -5,8 +6,12 @@
 // Okay, not an OpenMP capture.
 auto f() {
   int i[2] = {};
+  // cxx17-note at +2{{'a' declared here}}
+  // cxx17-note at +1{{'b' declared here}}
   auto [a, b] = i;
+  // cxx17-warning at +1{{captured structured bindings are a C++20 extension}}
   return [=, &a] {
+    // cxx17-warning at +1{{captured structured bindings are a C++20 extension}}
     return a + b;
   };
 }
@@ -17,16 +22,24 @@ void g() {
   #pragma omp parallel
   {
     int i[2] = {};
+    // cxx17-note at +2{{'a' declared here}}
+    // cxx17-note at +1{{'b' declared here}}
     auto [a, b] = i;
+    // cxx17-warning at +2{{captured structured bindings are a C++20 extension}}
+    // cxx17-warning at +1{{captured structured bindings are a C++20 extension}}
     auto L = [&] { foo(a+b); };
   }
 }
 
 void h() {
   int i[2] = {};
+  // cxx17-note at +2{{'a' declared here}}
+  // cxx17-note at +1{{'b' declared here}}
   auto [a, b] = i;
   #pragma omp parallel
   {
+    // cxx17-warning at +2{{captured structured bindings are a C++20 extension}}
+    // cxx17-warning at +1{{captured structured bindings are a C++20 extension}}
     foo(a + b);
   }
 }

>From 4b266f3575780caac74e78e5f1559d599db34396 Mon Sep 17 00:00:00 2001
From: Ammarguellat <zahira.ammarguellat at intel.com>
Date: Wed, 29 Apr 2026 12:05:50 -0700
Subject: [PATCH 08/45] Made check lines less strict for failing LIT test

---
 .../OpenMP/structured-bindings-codegen.cpp    | 125 +++++++-----------
 1 file changed, 45 insertions(+), 80 deletions(-)

diff --git a/clang/test/OpenMP/structured-bindings-codegen.cpp b/clang/test/OpenMP/structured-bindings-codegen.cpp
index b829a4ea93274..81d5c9298ede2 100644
--- a/clang/test/OpenMP/structured-bindings-codegen.cpp
+++ b/clang/test/OpenMP/structured-bindings-codegen.cpp
@@ -16,17 +16,9 @@ void test_struct() {
     use(m + n);
   }
 }
-// CHECK-LABEL: @{{.*}}test_struct{{.*}}()
-// CHECK: call void {{.*}}@__kmpc_fork_call({{.*}}, i32 1, ptr @{{.*}}test_struct{{.*}}.omp_outlined", ptr {{.*}})
-
-// CHECK-LABEL: @{{.*}}test_struct{{.*}}.omp_outlined"(
-// CHECK-SAME: ptr {{.*}}, ptr {{.*}}, ptr noundef nonnull{{.*}}[[TMP0:%.*]])
-// CHECK: [[TMP1:%.*]] = load ptr, ptr {{.*}}, align 8
-// CHECK-NEXT: [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_POINT:%.*]], ptr [[TMP1]], i32 0, i32 0
-// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[X]], align 4
-// CHECK-NEXT: [[Y:%.*]] = getelementptr inbounds nuw [[STRUCT_POINT]], ptr [[TMP1]], i32 0, i32 1
-// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[Y]], align 4
-//
+// CHECK-LABEL: @{{.*}}test_struct{{.*}}.omp_outlined{{.*}}(
+// CHECK: getelementptr inbounds{{.*}}i32 0, i32 0
+// CHECK: getelementptr inbounds{{.*}}i32 0, i32 1
 
 // Pair binding.
 struct pair {
@@ -43,15 +35,11 @@ void test_pair() {
     use(a);
   }
 }
-// CHECK-LABEL: @{{.*}}test_pair{{.*}}()
-// CHECK: call void {{.*}}@__kmpc_fork_call({{.*}}, i32 1, ptr @{{.*}}test_pair{{.*}}.omp_outlined", ptr {{.*}})
-
-// CHECK-LABEL: @{{.*}}test_pair{{.*}}.omp_outlined"(
-// CHECK-SAME: ptr {{.*}}, ptr {{.*}}, ptr noundef nonnull{{.*}}[[TMP0:%.*]])
+// CHECK-LABEL: @{{.*}}test_pair{{.*}}.omp_outlined{{.*}}(
 // CHECK: [[TMP1:%.*]] = load ptr, ptr {{.*}}, align 8
-// CHECK-NEXT: [[FIRST:%.*]] = getelementptr inbounds nuw [[STRUCT_PAIR:%.*]], ptr [[TMP1]], i32 0, i32 0
-// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[FIRST]], align 4
-// CHECK-NEXT: call void {{.*}}use{{.*}}"(i32 noundef [[TMP2]])
+// CHECK: [[FIRST:%.*]] = getelementptr inbounds nuw [[STRUCT_PAIR:%.*]], ptr [[TMP1]], i32 0, i32 0
+// CHECK: [[TMP2:%.*]] = load i32, ptr [[FIRST]], align 4
+// CHECK: call void {{.*}}use{{.*}}"(i32 noundef [[TMP2]])
 //
 
 // Array binding.
@@ -63,16 +51,12 @@ void test_array() {
     use(x + y);
   }
 }
-// CHECK-LABEL: @{{.*}}test_array{{.*}}()
-// CHECK: call void {{.*}}@__kmpc_fork_call({{.*}}, i32 1, ptr @{{.*}}test_array{{.*}}.omp_outlined", ptr {{.*}})
-
-// CHECK-LABEL: @{{.*}}test_array{{.*}}.omp_outlined"(
-// CHECK-SAME: ptr {{.*}}, ptr {{.*}}, ptr noundef nonnull{{.*}}[[TMP0:%.*]])
+// CHECK-LABEL: @{{.*}}test_array{{.*}}.omp_outlined{{.*}}(
 // CHECK: [[TMP1:%.*]] = load ptr, ptr {{.*}}, align 8
-// CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[TMP1]], i32 0, i32 0
-// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
-// CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [2 x i32], ptr [[TMP1]], i32 0, i32 1
-// CHECK-NEXT:  [[TMP3:%.*]] = load i32, ptr [[ARRAYIDX1]], align 4
+// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[TMP1]], i32 0, i32 0
+// CHECK: [[TMP2:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
+// CHECK: [[ARRAYIDX1:%.*]] = getelementptr inbounds [2 x i32], ptr [[TMP1]], i32 0, i32 1
+// CHECK:  [[TMP3:%.*]] = load i32, ptr [[ARRAYIDX1]], align 4
 //
 
 // Binding with bitfields.
@@ -88,18 +72,14 @@ void test_bitfields() {
     use(a + b);
   }
 }
-// CHECK-LABEL: @{{.*}}test_bitfields{{.*}}()
-// CHECK: call void{{.*}}@__kmpc_fork_call({{.*}}, i32 1, ptr @{{.*}}test_bitfields{{.*}}.omp_outlined", ptr {{.*}})
-
-// CHECK-LABEL: @{{.*}}test_bitfields{{.*}}.omp_outlined"(
-// CHECK-SAME: ptr {{.*}}, ptr {{.*}}, ptr noundef nonnull{{.*}}[[TMP0:%.*]])
+// CHECK-LABEL: @{{.*}}test_bitfields{{.*}}.omp_outlined{{.*}}(
 // CHECK: [[TMP1:%.*]] = load ptr, ptr {{.*}}, align 8
-// CHECK-NEXT: [[BF_LOAD:%.*]] = load i32, ptr [[TMP1]], align 4
-// CHECK-NEXT: [[BF_SHL:%.*]] = shl i32 [[BF_LOAD]], 28
-// CHECK-NEXT: [[BF_ASHR:%.*]] = ashr i32 [[BF_SHL]], 28
-// CHECK-NEXT: [[BF_LOAD1:%.*]] = load i32, ptr [[TMP1]], align 4
-// CHECK-NEXT: [[BF_SHL2:%.*]] = shl i32 [[BF_LOAD1]], 24
-// CHECK-NEXT: [[BF_ASHR3:%.*]] = ashr i32 [[BF_SHL2]], 28
+// CHECK: [[BF_LOAD:%.*]] = load i32, ptr [[TMP1]], align 4
+// CHECK: [[BF_SHL:%.*]] = shl i32 [[BF_LOAD]], 28
+// CHECK: [[BF_ASHR:%.*]] = ashr i32 [[BF_SHL]], 28
+// CHECK: [[BF_LOAD1:%.*]] = load i32, ptr [[TMP1]], align 4
+// CHECK: [[BF_SHL2:%.*]] = shl i32 [[BF_LOAD1]], 24
+// CHECK: [[BF_ASHR3:%.*]] = ashr i32 [[BF_SHL2]], 28
 //
 
 // Lambda inside OpenMP with captured bindings.
@@ -110,16 +90,12 @@ void test_with_lambda() {
     for (int j = 0; j < 10; j++)
       [m, n](int i, int j) -> void { return; }(i, j);
 }
-// CHECK-LABEL: @{{.*}}test_with_lambda{{.*}}()
-// CHECK: call void{{.*}} @__kmpc_fork_call(ptr {{.*}}, i32 1, ptr @{{.*}}test_with_lambda{{.*}}.omp_outlined", ptr {{.*}})
-
-// CHECK-LABEL: @{{.*}}test_with_lambda{{.*}}.omp_outlined"(
-// CHECK-SAME: ptr {{.*}}, ptr {{.*}}, ptr noundef nonnull{{.*}}[[TMP0:%.*]])
+// CHECK-LABEL: @{{.*}}test_with_lambda{{.*}}.omp_outlined{{.*}}(
 // CHECK: [[TMP1:%.*]] = load ptr, ptr {{.*}}, align 8
 // CHECK: [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_POINT:%.*]], ptr [[TMP1]], i32 0, i32 0
-// CHECK-NEXT: [[TMP13:%.*]] = load i32, ptr [[X]], align 4
+// CHECK: [[TMP13:%.*]] = load i32, ptr [[X]], align 4
 // CHECK: [[Y:%.*]] = getelementptr inbounds nuw [[STRUCT_POINT]], ptr [[TMP1]], i32 0, i32 1
-// CHECK-NEXT: [[TMP15:%.*]] = load i32, ptr [[Y]], align 4
+// CHECK: [[TMP15:%.*]] = load i32, ptr [[Y]], align 4
 //
 
 // Only one binding used.
@@ -130,15 +106,11 @@ void test_partial_capture() {
     use(a);
   }
 }
-// CHECK-LABEL: @{{.*}}test_partial_capture{{.*}}()
-// CHECK: call void {{.*}}@__kmpc_fork_call(ptr {{.*}}, i32 1, ptr @{{.*}}test_partial_capture{{.*}}.omp_outlined", ptr {{.*}})
-
-// CHECK-LABEL: @{{.*}}test_partial_capture{{.*}}.omp_outlined"(
-// CHECK-SAME: ptr {{.*}}, ptr {{.*}}, ptr noundef nonnull{{.*}}[[TMP0:%.*]])
+// CHECK-LABEL: @{{.*}}test_partial_capture{{.*}}.omp_outlined{{.*}}(
 // CHECK: [[TMP1:%.*]] = load ptr, ptr {{.*}}, align 8
-// CHECK-NEXT: [[FIRST:%.*]] = getelementptr inbounds nuw [[STRUCT_PAIR:%.*]], ptr [[TMP1]], i32 0, i32 0
-// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[FIRST]], align 4
-// CHECK-NEXT: call void {{.*}}use{{.*}}"(i32 noundef [[TMP2]])
+// CHECK: [[FIRST:%.*]] = getelementptr inbounds nuw [[STRUCT_PAIR:%.*]], ptr [[TMP1]], i32 0, i32 0
+// CHECK: [[TMP2:%.*]] = load i32, ptr [[FIRST]], align 4
+// CHECK: call void {{.*}}use{{.*}}"(i32 noundef [[TMP2]])
 //
 
 // Nested parallel regions.
@@ -153,16 +125,12 @@ void test_nested() {
     }
   }
 }
-// CHECK-LABEL: @{{.*}}test_nested{{.*}}()
-// CHECK: call void {{.*}}@__kmpc_fork_call(ptr {{.*}}, i32 2, ptr @{{.*}}test_nested{{.*}}.omp_outlined", ptr {{.*}}, ptr {{.*}})
-
-// CHECK-LABEL: @{{.*}}test_nested{{.*}}.omp_outlined"(
-// CHECK-SAME: ptr {{.*}}, ptr {{.*}}, ptr noundef nonnull{{.*}}[[TMP0:%.*]], ptr noundef nonnull{{.*}}[[TMP1:%.*]])
+// CHECK-LABEL: @{{.*}}test_nested{{.*}}.omp_outlined{{.*}}(
 // CHECK: [[TMP2:%.*]] = load ptr, ptr {{.*}}, align 8
-// CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr {{.*}}, align 8
-// CHECK-NEXT: [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_POINT:%.*]], ptr [[TMP2]], i32 0, i32 0
-// CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[X]], align 4
-// CHECK-NEXT: call void @{{.*}}use{{.*}}"(i32 noundef [[TMP4]])
+// CHECK: [[TMP3:%.*]] = load ptr, ptr {{.*}}, align 8
+// CHECK: [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_POINT:%.*]], ptr [[TMP2]], i32 0, i32 0
+// CHECK: [[TMP4:%.*]] = load i32, ptr [[X]], align 4
+// CHECK: call void @{{.*}}use{{.*}}"(i32 noundef [[TMP4]])
 //
 
 // Multiple bindings in same region.
@@ -174,22 +142,19 @@ void test_multiple() {
     use(a + b + c + d);
   }
 }
-// CHECK-LABEL: define dso_local void @"?test_multiple@@YAXXZ"()
-// CHECK: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @1, i32 2, ptr @"?test_multiple@@YAXXZ.omp_outlined", ptr %0, ptr %1)
-
-// CHECK-LABEL: define internal void @"?test_multiple@@YAXXZ.omp_outlined"(ptr noalias noundef %.global_tid., ptr noalias noundef %.bound_tid., ptr noundef nonnull align 4 dereferenceable(4) %0, ptr noundef nonnull align 4 dereferenceable(4) %1)
+// CHECK-LABEL: @{{.*}}test_multiple{{.*}}.omp_outlined{{.*}}(
 // CHECK: [[TMP2:%.*]] = load ptr, ptr {{.*}}, align 8
-// CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr {{.*}}, align 8
-// CHECK-NEXT: [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_POINT:%.*]], ptr [[TMP2]], i32 0, i32 0
-// CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[X]], align 4
-// CHECK-NEXT: [[Y:%.*]] = getelementptr inbounds nuw [[STRUCT_POINT]], ptr [[TMP2]], i32 0, i32 1
-// CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr [[Y]], align 4
-// CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP4]], [[TMP5]]
-// CHECK-NEXT: [[FIRST:%.*]] = getelementptr inbounds nuw [[STRUCT_PAIR:%.*]], ptr [[TMP3]], i32 0, i32 0
-// CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr [[FIRST]], align 4
-// CHECK-NEXT: [[ADD2:%.*]] = add nsw i32 [[ADD]], [[TMP6]]
-// CHECK-NEXT: [[SECOND:%.*]] = getelementptr inbounds nuw [[STRUCT_PAIR]], ptr [[TMP3]], i32 0, i32 1
-// CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr [[SECOND]], align 4
-// CHECK-NEXT: [[ADD3:%.*]] = add nsw i32 [[ADD2]], [[TMP7]]
-// CHECK-NEXT: call void {{.*}}use{{.*}}(i32 noundef [[ADD3]])
+// CHECK: [[TMP3:%.*]] = load ptr, ptr {{.*}}, align 8
+// CHECK: [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_POINT:%.*]], ptr [[TMP2]], i32 0, i32 0
+// CHECK: [[TMP4:%.*]] = load i32, ptr [[X]], align 4
+// CHECK: [[Y:%.*]] = getelementptr inbounds nuw [[STRUCT_POINT]], ptr [[TMP2]], i32 0, i32 1
+// CHECK: [[TMP5:%.*]] = load i32, ptr [[Y]], align 4
+// CHECK: [[ADD:%.*]] = add nsw i32 [[TMP4]], [[TMP5]]
+// CHECK: [[FIRST:%.*]] = getelementptr inbounds nuw [[STRUCT_PAIR:%.*]], ptr [[TMP3]], i32 0, i32 0
+// CHECK: [[TMP6:%.*]] = load i32, ptr [[FIRST]], align 4
+// CHECK: [[ADD2:%.*]] = add nsw i32 [[ADD]], [[TMP6]]
+// CHECK: [[SECOND:%.*]] = getelementptr inbounds nuw [[STRUCT_PAIR]], ptr [[TMP3]], i32 0, i32 1
+// CHECK: [[TMP7:%.*]] = load i32, ptr [[SECOND]], align 4
+// CHECK: [[ADD3:%.*]] = add nsw i32 [[ADD2]], [[TMP7]]
+// CHECK: call void {{.*}}use{{.*}}(i32 noundef [[ADD3]])
 

>From f840cd350d45eece92750730492e450164e8f8b9 Mon Sep 17 00:00:00 2001
From: Ammarguellat <zahira.ammarguellat at intel.com>
Date: Thu, 30 Apr 2026 05:34:59 -0700
Subject: [PATCH 09/45] Fixed LIT test.

---
 clang/test/OpenMP/structured-bindings-codegen.cpp | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/clang/test/OpenMP/structured-bindings-codegen.cpp b/clang/test/OpenMP/structured-bindings-codegen.cpp
index 81d5c9298ede2..6e895491ea5bd 100644
--- a/clang/test/OpenMP/structured-bindings-codegen.cpp
+++ b/clang/test/OpenMP/structured-bindings-codegen.cpp
@@ -1,4 +1,5 @@
-// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=51 -x c++ -std=c++20 -emit-llvm %s -o - | FileCheck %s
+// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=51 -x c++ -std=c++20 \
+// RUN: -emit-llvm %s -o - | FileCheck %s
 
 // expected-no-diagnostics
 

>From b39bad61c5232eb0f8d102ab66d6009aa8a28218 Mon Sep 17 00:00:00 2001
From: Ammarguellat <zahira.ammarguellat at intel.com>
Date: Thu, 30 Apr 2026 13:31:33 -0700
Subject: [PATCH 10/45] Addressed review comments

---
 clang/lib/CodeGen/CGExpr.cpp                  | 150 +++----
 clang/lib/CodeGen/CodeGenFunction.h           |   1 +
 .../OpenMP/structured-bindings-codegen.cpp    | 409 +++++++++++++++---
 3 files changed, 409 insertions(+), 151 deletions(-)

diff --git a/clang/lib/CodeGen/CGExpr.cpp b/clang/lib/CodeGen/CGExpr.cpp
index d640b8e698def..9a0dc0a88c6be 100644
--- a/clang/lib/CodeGen/CGExpr.cpp
+++ b/clang/lib/CodeGen/CGExpr.cpp
@@ -3589,6 +3589,61 @@ static bool canEmitSpuriousReferenceToVariable(CodeGenFunction &CGF,
   }
 }
 
+/// Emit an LValue for a structured binding captured in an OpenMP region.
+/// Handles extracting individual bindings from the captured decomposed
+/// declaration (struct fields, array elements, etc.).
+LValue CodeGenFunction::EmitOMPCapturedBindingLValue(const BindingDecl *BD) {
+  assert(CapturedStmtInfo &&
+         CapturedStmtInfo->getKind() == CapturedRegionKind::CR_OpenMP &&
+             CGM.getLangOpts().OpenMP);
+  auto *DD = cast<VarDecl>(BD->getDecomposedDecl());
+  auto I = LocalDeclMap.find(DD);
+  assert(I != LocalDeclMap.end() && "Decomposed decl not in LocalDeclMap");
+
+  Address ParamAddr = I->second;
+  QualType AggregType = DD->getType();
+  if (AggregType->isReferenceType())
+    AggregType = AggregType->getPointeeType();
+
+  LValue CapLVal;
+  llvm::Type *ParamLLVMType = ParamAddr.getElementType();
+  if (ParamLLVMType->isPointerTy()) {
+    llvm::Value *Ptr = Builder.CreateLoad(ParamAddr, "captured.val");
+    Address AggregAddr(Ptr, ConvertTypeForMem(AggregType),
+                       getContext().getDeclAlign(DD));
+    CapLVal = MakeAddrLValue(AggregAddr, AggregType);
+  } else {
+    Address AggregAddr(ParamAddr.emitRawPointer(*this),
+                       ConvertTypeForMem(AggregType), ParamAddr.getAlignment());
+    CapLVal = MakeAddrLValue(AggregAddr, AggregType);
+  }
+  // Extract the specific binding from the decomposed object.
+  Expr *BindingExpr = BD->getBinding()->IgnoreImplicit();
+  if (auto *ME = dyn_cast<MemberExpr>(BindingExpr)) {
+    // Struct/union: access field.
+    FieldDecl *Field = cast<FieldDecl>(ME->getMemberDecl());
+    return EmitLValueForField(CapLVal, Field);
+  } else if (auto *ASE = dyn_cast<ArraySubscriptExpr>(BindingExpr)) {
+    // Array binding - access element.
+    Address Base = CapLVal.getAddress();
+    llvm::Value *Idx = EmitScalarExpr(ASE->getIdx());
+    llvm::Value *Indices[] = {llvm::ConstantInt::get(Int32Ty, 0), Idx};
+    llvm::Type *ElemTy = CGM.getTypes().ConvertTypeForMem(ASE->getType());
+    llvm::Value *EltPtr = Builder.CreateInBoundsGEP(
+        Base.getElementType(), Base.emitRawPointer(*this), Indices, "arrayidx");
+    CharUnits Align = Base.getAlignment().alignmentOfArrayElement(
+        getContext().getTypeSizeInChars(ASE->getType()));
+    Address EltAddr(EltPtr, ElemTy, Align);
+    return MakeAddrLValue(EltAddr, ASE->getType());
+  }
+
+  // TODO: Tuple bindings (std::tuple, std::pair via tuple protocol)
+  // use hidden temporary variables that aren't captured in OpenMP
+  // regions. Need to re-emit the get<N>() call on the captured tuple
+  // base object.
+  llvm_unreachable("Unexpected structured binding type in OpenMP");
+}
+
 LValue CodeGenFunction::EmitDeclRefLValue(const DeclRefExpr *E) {
   const NamedDecl *ND = E->getDecl();
   QualType T = E->getType();
@@ -3772,96 +3827,11 @@ LValue CodeGenFunction::EmitDeclRefLValue(const DeclRefExpr *E) {
   if (const auto *BD = dyn_cast<BindingDecl>(ND)) {
     if (E->refersToEnclosingVariableOrCapture()) {
       // OpenMP case: binding was captured via its decomposed decl.
-      if (auto *DD = dyn_cast<VarDecl>(BD->getDecomposedDecl())) {
-        if (CapturedStmtInfo &&
-            CapturedStmtInfo->getKind() == CapturedRegionKind::CR_OpenMP &&
-            CGM.getLangOpts().OpenMP) {
-          auto I = LocalDeclMap.find(DD);
-          if (I != LocalDeclMap.end()) {
-            Address DDAddr = I->second;
-            llvm::Type *ExpectedTy = CGM.getTypes().ConvertTypeForMem(
-                DD->getType().getCanonicalType());
-            if (DDAddr.getElementType() != ExpectedTy)
-              DDAddr = DDAddr.withElementType(ExpectedTy);
-            LValue CapLVal;
-            if (DD->getType()->isReferenceType())
-              CapLVal = EmitLoadOfReferenceLValue(DDAddr, DD->getType(),
-                                                  AlignmentSource::Decl);
-            else
-              CapLVal =
-                  MakeAddrLValue(DDAddr, DD->getType().getCanonicalType());
-            if (getLangOpts().OpenMP &&
-                CGM.getOpenMPRuntime().isNontemporalDecl(DD))
-              CapLVal.setNontemporal(/*Value=*/true);
-            // Extract the specific binding from the decomposed object.
-            Expr *BindingExpr = BD->getBinding()->IgnoreImplicit();
-            if (auto *ME = dyn_cast<MemberExpr>(BindingExpr)) {
-              // Struct/union: access field.
-              return EmitLValueForField(CapLVal,
-                                        cast<FieldDecl>(ME->getMemberDecl()));
-            }
-            if (auto *ASE = dyn_cast<ArraySubscriptExpr>(BindingExpr)) {
-              Address Base = CapLVal.getAddress();
-              llvm::Value *Idx = EmitScalarExpr(ASE->getIdx());
-              llvm::Value *Indices[] = {llvm::ConstantInt::get(Int32Ty, 0),
-                                        Idx};
-              llvm::Type *ElemTy =
-                  CGM.getTypes().ConvertTypeForMem(ASE->getType());
-              llvm::Value *EltPtr = Builder.CreateInBoundsGEP(
-                  Base.getElementType(), Base.emitRawPointer(*this), Indices,
-                  "arrayidx");
-              CharUnits Align = Base.getAlignment().alignmentOfArrayElement(
-                  getContext().getTypeSizeInChars(ASE->getType()));
-              Address EltAddr(EltPtr, ElemTy, Align);
-              return MakeAddrLValue(EltAddr, ASE->getType());
-            }
-            // Fallback for complex binding types.
-            // TODO: Tuple bindings (std::tuple, std::pair via tuple protocol)
-            // use hidden temporary variables that aren't captured in OpenMP
-            // regions. Need to re-emit the get<N>() call on the captured tuple
-            // base object. For now, this will fail.
-            if (isa<DeclRefExpr>(BindingExpr))
-              llvm_unreachable(
-                  "tuple-like structured bindings not yet supported in OpenMP");
-            return EmitLValue(BindingExpr);
-          }
-          // DD not in LocalDeclMap, check capture struct
-          if (auto *FD = CapturedStmtInfo->lookup(DD)) {
-            LValue CapLVal = EmitCapturedFieldLValue(
-                *this, FD, CapturedStmtInfo->getContextValue());
-            Address Addr = CapLVal.getAddress();
-            llvm::Type *ExpectedTy = CGM.getTypes().ConvertTypeForMem(
-                DD->getType().getCanonicalType());
-            if (Addr.getElementType() != ExpectedTy)
-              Addr = Addr.withElementType(ExpectedTy);
-            CapLVal = MakeAddrLValue(Addr, DD->getType().getCanonicalType());
-            if (DD->getType()->isReferenceType())
-              CapLVal = EmitLoadOfReferenceLValue(
-                  CapLVal.getAddress(), DD->getType(), AlignmentSource::Decl);
-            if (getLangOpts().OpenMP &&
-                CGM.getOpenMPRuntime().isNontemporalDecl(DD))
-              CapLVal.setNontemporal(/*Value=*/true);
-
-            // Extract the specific binding.
-            Expr *BindingExpr = BD->getBinding()->IgnoreImplicit();
-            if (auto *ME = dyn_cast<MemberExpr>(BindingExpr)) {
-              return EmitLValueForField(CapLVal,
-                                        cast<FieldDecl>(ME->getMemberDecl()));
-            }
-            if (auto *ASE = dyn_cast<ArraySubscriptExpr>(BindingExpr)) {
-              Address Base = CapLVal.getAddress();
-              llvm::Value *Idx = EmitScalarExpr(ASE->getIdx());
-              llvm::Value *EltPtr = Builder.CreateInBoundsGEP(
-                  Base.getElementType(), Base.emitRawPointer(*this), Idx,
-                  "arrayidx");
-              CharUnits Align = Base.getAlignment().alignmentOfArrayElement(
-                  getContext().getTypeSizeInChars(ASE->getType()));
-              Address EltAddr(EltPtr, Base.getElementType(), Align);
-              return MakeAddrLValue(EltAddr, ASE->getType());
-            }
-            return EmitLValue(BindingExpr);
-          }
-        }
+      if (CapturedStmtInfo &&
+          CapturedStmtInfo->getKind() == CapturedRegionKind::CR_OpenMP &&
+          CGM.getLangOpts().OpenMP) {
+        // OpenMP case: binding was captured via its decomposed decl.
+        return EmitOMPCapturedBindingLValue(BD);
       }
       // Non-OpenMP case: lambda capture.
       auto *FD = LambdaCaptureFields.lookup(BD);
diff --git a/clang/lib/CodeGen/CodeGenFunction.h b/clang/lib/CodeGen/CodeGenFunction.h
index 29b87a0616992..b01f92c1b6a92 100644
--- a/clang/lib/CodeGen/CodeGenFunction.h
+++ b/clang/lib/CodeGen/CodeGenFunction.h
@@ -4461,6 +4461,7 @@ class CodeGenFunction : public CodeGenTypeCache {
   // Note: only available for agg return types
   LValue EmitVAArgExprLValue(const VAArgExpr *E);
   LValue EmitDeclRefLValue(const DeclRefExpr *E);
+  LValue EmitOMPCapturedBindingLValue(const BindingDecl *BD);
   LValue EmitStringLiteralLValue(const StringLiteral *E);
   LValue EmitObjCEncodeExprLValue(const ObjCEncodeExpr *E);
   LValue EmitPredefinedLValue(const PredefinedExpr *E);
diff --git a/clang/test/OpenMP/structured-bindings-codegen.cpp b/clang/test/OpenMP/structured-bindings-codegen.cpp
index 6e895491ea5bd..f1ae96a273443 100644
--- a/clang/test/OpenMP/structured-bindings-codegen.cpp
+++ b/clang/test/OpenMP/structured-bindings-codegen.cpp
@@ -1,5 +1,6 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --check-globals all --filter-out-after "getelem.*kernel" --filter-out "= alloca.*" --include-generated-funcs --replace-value-regex "__omp_offloading_[0-9a-z]+_[0-9a-z]+" "reduction_size[.].+[.]" "pl_cond[.].+[.|,]" --prefix-filecheck-ir-name _ --global-value-regex "\.offload_.*" --global-hex-value-regex ".offload_maptypes.*" --version 6
 // RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=51 -x c++ -std=c++20 \
-// RUN: -emit-llvm %s -o - | FileCheck %s
+// RUN: -triple x86_64-unknown-unknown -emit-llvm %s -o - | FileCheck %s
 
 // expected-no-diagnostics
 
@@ -17,9 +18,6 @@ void test_struct() {
     use(m + n);
   }
 }
-// CHECK-LABEL: @{{.*}}test_struct{{.*}}.omp_outlined{{.*}}(
-// CHECK: getelementptr inbounds{{.*}}i32 0, i32 0
-// CHECK: getelementptr inbounds{{.*}}i32 0, i32 1
 
 // Pair binding.
 struct pair {
@@ -36,12 +34,6 @@ void test_pair() {
     use(a);
   }
 }
-// CHECK-LABEL: @{{.*}}test_pair{{.*}}.omp_outlined{{.*}}(
-// CHECK: [[TMP1:%.*]] = load ptr, ptr {{.*}}, align 8
-// CHECK: [[FIRST:%.*]] = getelementptr inbounds nuw [[STRUCT_PAIR:%.*]], ptr [[TMP1]], i32 0, i32 0
-// CHECK: [[TMP2:%.*]] = load i32, ptr [[FIRST]], align 4
-// CHECK: call void {{.*}}use{{.*}}"(i32 noundef [[TMP2]])
-//
 
 // Array binding.
 void test_array() {
@@ -52,13 +44,6 @@ void test_array() {
     use(x + y);
   }
 }
-// CHECK-LABEL: @{{.*}}test_array{{.*}}.omp_outlined{{.*}}(
-// CHECK: [[TMP1:%.*]] = load ptr, ptr {{.*}}, align 8
-// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[TMP1]], i32 0, i32 0
-// CHECK: [[TMP2:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
-// CHECK: [[ARRAYIDX1:%.*]] = getelementptr inbounds [2 x i32], ptr [[TMP1]], i32 0, i32 1
-// CHECK:  [[TMP3:%.*]] = load i32, ptr [[ARRAYIDX1]], align 4
-//
 
 // Binding with bitfields.
 struct S {
@@ -73,15 +58,6 @@ void test_bitfields() {
     use(a + b);
   }
 }
-// CHECK-LABEL: @{{.*}}test_bitfields{{.*}}.omp_outlined{{.*}}(
-// CHECK: [[TMP1:%.*]] = load ptr, ptr {{.*}}, align 8
-// CHECK: [[BF_LOAD:%.*]] = load i32, ptr [[TMP1]], align 4
-// CHECK: [[BF_SHL:%.*]] = shl i32 [[BF_LOAD]], 28
-// CHECK: [[BF_ASHR:%.*]] = ashr i32 [[BF_SHL]], 28
-// CHECK: [[BF_LOAD1:%.*]] = load i32, ptr [[TMP1]], align 4
-// CHECK: [[BF_SHL2:%.*]] = shl i32 [[BF_LOAD1]], 24
-// CHECK: [[BF_ASHR3:%.*]] = ashr i32 [[BF_SHL2]], 28
-//
 
 // Lambda inside OpenMP with captured bindings.
 void test_with_lambda() {
@@ -91,13 +67,6 @@ void test_with_lambda() {
     for (int j = 0; j < 10; j++)
       [m, n](int i, int j) -> void { return; }(i, j);
 }
-// CHECK-LABEL: @{{.*}}test_with_lambda{{.*}}.omp_outlined{{.*}}(
-// CHECK: [[TMP1:%.*]] = load ptr, ptr {{.*}}, align 8
-// CHECK: [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_POINT:%.*]], ptr [[TMP1]], i32 0, i32 0
-// CHECK: [[TMP13:%.*]] = load i32, ptr [[X]], align 4
-// CHECK: [[Y:%.*]] = getelementptr inbounds nuw [[STRUCT_POINT]], ptr [[TMP1]], i32 0, i32 1
-// CHECK: [[TMP15:%.*]] = load i32, ptr [[Y]], align 4
-//
 
 // Only one binding used.
 void test_partial_capture() {
@@ -107,12 +76,6 @@ void test_partial_capture() {
     use(a);
   }
 }
-// CHECK-LABEL: @{{.*}}test_partial_capture{{.*}}.omp_outlined{{.*}}(
-// CHECK: [[TMP1:%.*]] = load ptr, ptr {{.*}}, align 8
-// CHECK: [[FIRST:%.*]] = getelementptr inbounds nuw [[STRUCT_PAIR:%.*]], ptr [[TMP1]], i32 0, i32 0
-// CHECK: [[TMP2:%.*]] = load i32, ptr [[FIRST]], align 4
-// CHECK: call void {{.*}}use{{.*}}"(i32 noundef [[TMP2]])
-//
 
 // Nested parallel regions.
 void test_nested() {
@@ -126,13 +89,6 @@ void test_nested() {
     }
   }
 }
-// CHECK-LABEL: @{{.*}}test_nested{{.*}}.omp_outlined{{.*}}(
-// CHECK: [[TMP2:%.*]] = load ptr, ptr {{.*}}, align 8
-// CHECK: [[TMP3:%.*]] = load ptr, ptr {{.*}}, align 8
-// CHECK: [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_POINT:%.*]], ptr [[TMP2]], i32 0, i32 0
-// CHECK: [[TMP4:%.*]] = load i32, ptr [[X]], align 4
-// CHECK: call void @{{.*}}use{{.*}}"(i32 noundef [[TMP4]])
-//
 
 // Multiple bindings in same region.
 void test_multiple() {
@@ -143,19 +99,350 @@ void test_multiple() {
     use(a + b + c + d);
   }
 }
-// CHECK-LABEL: @{{.*}}test_multiple{{.*}}.omp_outlined{{.*}}(
-// CHECK: [[TMP2:%.*]] = load ptr, ptr {{.*}}, align 8
-// CHECK: [[TMP3:%.*]] = load ptr, ptr {{.*}}, align 8
-// CHECK: [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_POINT:%.*]], ptr [[TMP2]], i32 0, i32 0
-// CHECK: [[TMP4:%.*]] = load i32, ptr [[X]], align 4
-// CHECK: [[Y:%.*]] = getelementptr inbounds nuw [[STRUCT_POINT]], ptr [[TMP2]], i32 0, i32 1
-// CHECK: [[TMP5:%.*]] = load i32, ptr [[Y]], align 4
-// CHECK: [[ADD:%.*]] = add nsw i32 [[TMP4]], [[TMP5]]
-// CHECK: [[FIRST:%.*]] = getelementptr inbounds nuw [[STRUCT_PAIR:%.*]], ptr [[TMP3]], i32 0, i32 0
-// CHECK: [[TMP6:%.*]] = load i32, ptr [[FIRST]], align 4
-// CHECK: [[ADD2:%.*]] = add nsw i32 [[ADD]], [[TMP6]]
-// CHECK: [[SECOND:%.*]] = getelementptr inbounds nuw [[STRUCT_PAIR]], ptr [[TMP3]], i32 0, i32 1
-// CHECK: [[TMP7:%.*]] = load i32, ptr [[SECOND]], align 4
-// CHECK: [[ADD3:%.*]] = add nsw i32 [[ADD2]], [[TMP7]]
-// CHECK: call void {{.*}}use{{.*}}(i32 noundef [[ADD3]])
 
+// Reference structured binding.
+void test_reference_binding() {
+  Point p = make_point();
+  auto& [m, n] = p;
+#pragma omp parallel
+  { use(m); }
+}
+// CHECK-LABEL: define dso_local i64 @_Z10make_pointv(
+// CHECK-SAME: ) #[[ATTR0:[0-9]+]] {
+// CHECK:  [[ENTRY:.*:]]
+// CHECK:    [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_POINT:%.*]], ptr [[RETVAL:%.*]], i32 0, i32 0
+// CHECK:    store i32 1, ptr [[X]], align 4
+// CHECK:    [[Y:%.*]] = getelementptr inbounds nuw [[STRUCT_POINT]], ptr [[RETVAL]], i32 0, i32 1
+// CHECK:    store i32 2, ptr [[Y]], align 4
+// CHECK:    [[TMP0:%.*]] = load i64, ptr [[RETVAL]], align 4
+// CHECK:    ret i64 [[TMP0]]
+//
+//
+// CHECK-LABEL: define dso_local void @_Z11test_structv(
+// CHECK-SAME: ) #[[ATTR0]] {
+// CHECK:  [[ENTRY:.*:]]
+// CHECK:    [[CALL:%.*]] = call i64 @_Z10make_pointv()
+// CHECK:    store i64 [[CALL]], ptr [[TMP0:%.*]], align 4
+// CHECK:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1:[0-9]+]], i32 1, ptr @_Z11test_structv.omp_outlined, ptr [[TMP0]])
+// CHECK:    ret void
+//
+//
+// CHECK-LABEL: define internal void @_Z11test_structv.omp_outlined(
+// CHECK-SAME: ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP0:%.*]]) #[[ATTR1:[0-9]+]] {
+// CHECK:  [[ENTRY:.*:]]
+// CHECK:    store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR:%.*]], align 8
+// CHECK:    store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR:%.*]], align 8
+// CHECK:    store ptr [[TMP0]], ptr [[DOTADDR:%.*]], align 8
+// CHECK:    [[TMP1:%.*]] = load ptr, ptr [[DOTADDR]], align 8, !nonnull [[META2:![0-9]+]], !align [[META3:![0-9]+]]
+// CHECK:    [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_POINT:%.*]], ptr [[TMP1]], i32 0, i32 0
+// CHECK:    [[TMP2:%.*]] = load i32, ptr [[X]], align 4
+// CHECK:    [[Y:%.*]] = getelementptr inbounds nuw [[STRUCT_POINT]], ptr [[TMP1]], i32 0, i32 1
+// CHECK:    [[TMP3:%.*]] = load i32, ptr [[Y]], align 4
+// CHECK:    [[ADD:%.*]] = add nsw i32 [[TMP2]], [[TMP3]]
+// CHECK:    call void @_Z3usei(i32 noundef [[ADD]])
+// CHECK:    ret void
+//
+//
+// CHECK-LABEL: define dso_local i64 @_Z9make_pairii(
+// CHECK-SAME: i32 noundef [[A:%.*]], i32 noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK:  [[ENTRY:.*:]]
+// CHECK:    store i32 [[A]], ptr [[A_ADDR:%.*]], align 4
+// CHECK:    store i32 [[B]], ptr [[B_ADDR:%.*]], align 4
+// CHECK:    [[FIRST:%.*]] = getelementptr inbounds nuw [[STRUCT_PAIR:%.*]], ptr [[RETVAL:%.*]], i32 0, i32 0
+// CHECK:    [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4
+// CHECK:    store i32 [[TMP0]], ptr [[FIRST]], align 4
+// CHECK:    [[SECOND:%.*]] = getelementptr inbounds nuw [[STRUCT_PAIR]], ptr [[RETVAL]], i32 0, i32 1
+// CHECK:    [[TMP1:%.*]] = load i32, ptr [[B_ADDR]], align 4
+// CHECK:    store i32 [[TMP1]], ptr [[SECOND]], align 4
+// CHECK:    [[TMP2:%.*]] = load i64, ptr [[RETVAL]], align 4
+// CHECK:    ret i64 [[TMP2]]
+//
+//
+// CHECK-LABEL: define dso_local void @_Z9test_pairv(
+// CHECK-SAME: ) #[[ATTR0]] {
+// CHECK:  [[ENTRY:.*:]]
+// CHECK:    [[CALL:%.*]] = call i64 @_Z9make_pairii(i32 noundef 1, i32 noundef 2)
+// CHECK:    store i64 [[CALL]], ptr [[TMP0:%.*]], align 4
+// CHECK:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @_Z9test_pairv.omp_outlined, ptr [[TMP0]])
+// CHECK:    ret void
+//
+//
+// CHECK-LABEL: define internal void @_Z9test_pairv.omp_outlined(
+// CHECK-SAME: ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP0:%.*]]) #[[ATTR1]] {
+// CHECK:  [[ENTRY:.*:]]
+// CHECK:    store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR:%.*]], align 8
+// CHECK:    store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR:%.*]], align 8
+// CHECK:    store ptr [[TMP0]], ptr [[DOTADDR:%.*]], align 8
+// CHECK:    [[TMP1:%.*]] = load ptr, ptr [[DOTADDR]], align 8, !nonnull [[META2]], !align [[META3]]
+// CHECK:    [[FIRST:%.*]] = getelementptr inbounds nuw [[STRUCT_PAIR:%.*]], ptr [[TMP1]], i32 0, i32 0
+// CHECK:    [[TMP2:%.*]] = load i32, ptr [[FIRST]], align 4
+// CHECK:    call void @_Z3usei(i32 noundef [[TMP2]])
+// CHECK:    ret void
+//
+//
+// CHECK-LABEL: define dso_local void @_Z10test_arrayv(
+// CHECK-SAME: ) #[[ATTR0]] {
+// CHECK:  [[ENTRY:.*]]:
+// CHECK:    call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARR:%.*]], ptr align 4 @__const._Z10test_arrayv.arr, i64 8, i1 false)
+// CHECK:    [[ARRAYINIT_BEGIN:%.*]] = getelementptr inbounds [2 x i32], ptr [[TMP0:%.*]], i64 0, i64 0
+// CHECK:    br label %[[ARRAYINIT_BODY:.*]]
+// CHECK:       [[ARRAYINIT_BODY]]:
+// CHECK:    [[ARRAYINIT_INDEX:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[ARRAYINIT_NEXT:%.*]], %[[ARRAYINIT_BODY]] ]
+// CHECK:    [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[ARRAYINIT_BEGIN]], i64 [[ARRAYINIT_INDEX]]
+// CHECK:    [[ARRAYIDX:%.*]] = getelementptr inbounds nuw [2 x i32], ptr [[ARR]], i64 0, i64 [[ARRAYINIT_INDEX]]
+// CHECK:    [[TMP2:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
+// CHECK:    store i32 [[TMP2]], ptr [[TMP1]], align 4
+// CHECK:    [[ARRAYINIT_NEXT]] = add nuw i64 [[ARRAYINIT_INDEX]], 1
+// CHECK:    [[ARRAYINIT_DONE:%.*]] = icmp eq i64 [[ARRAYINIT_NEXT]], 2
+// CHECK:    br i1 [[ARRAYINIT_DONE]], label %[[ARRAYINIT_END:.*]], label %[[ARRAYINIT_BODY]]
+// CHECK:       [[ARRAYINIT_END]]:
+// CHECK:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @_Z10test_arrayv.omp_outlined, ptr [[TMP0]])
+// CHECK:    ret void
+//
+//
+// CHECK-LABEL: define internal void @_Z10test_arrayv.omp_outlined(
+// CHECK-SAME: ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP0:%.*]]) #[[ATTR1]] {
+// CHECK:  [[ENTRY:.*:]]
+// CHECK:    store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR:%.*]], align 8
+// CHECK:    store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR:%.*]], align 8
+// CHECK:    store ptr [[TMP0]], ptr [[DOTADDR:%.*]], align 8
+// CHECK:    [[TMP1:%.*]] = load ptr, ptr [[DOTADDR]], align 8, !nonnull [[META2]], !align [[META3]]
+// CHECK:    [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[TMP1]], i32 0, i32 0
+// CHECK:    [[TMP2:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
+// CHECK:    [[ARRAYIDX1:%.*]] = getelementptr inbounds [2 x i32], ptr [[TMP1]], i32 0, i32 1
+// CHECK:    [[TMP3:%.*]] = load i32, ptr [[ARRAYIDX1]], align 4
+// CHECK:    [[ADD:%.*]] = add nsw i32 [[TMP2]], [[TMP3]]
+// CHECK:    call void @_Z3usei(i32 noundef [[ADD]])
+// CHECK:    ret void
+//
+//
+// CHECK-LABEL: define dso_local void @_Z14test_bitfieldsv(
+// CHECK-SAME: ) #[[ATTR0]] {
+// CHECK:  [[ENTRY:.*:]]
+// CHECK:    call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[S:%.*]], ptr align 4 @__const._Z14test_bitfieldsv.s, i64 4, i1 false)
+// CHECK:    call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP0:%.*]], ptr align 4 [[S]], i64 4, i1 false)
+// CHECK:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @_Z14test_bitfieldsv.omp_outlined, ptr [[TMP0]])
+// CHECK:    ret void
+//
+//
+// CHECK-LABEL: define internal void @_Z14test_bitfieldsv.omp_outlined(
+// CHECK-SAME: ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP0:%.*]]) #[[ATTR1]] {
+// CHECK:  [[ENTRY:.*:]]
+// CHECK:    store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR:%.*]], align 8
+// CHECK:    store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR:%.*]], align 8
+// CHECK:    store ptr [[TMP0]], ptr [[DOTADDR:%.*]], align 8
+// CHECK:    [[TMP1:%.*]] = load ptr, ptr [[DOTADDR]], align 8, !nonnull [[META2]], !align [[META3]]
+// CHECK:    [[BF_LOAD:%.*]] = load i8, ptr [[TMP1]], align 4
+// CHECK:    [[BF_SHL:%.*]] = shl i8 [[BF_LOAD]], 4
+// CHECK:    [[BF_ASHR:%.*]] = ashr i8 [[BF_SHL]], 4
+// CHECK:    [[BF_CAST:%.*]] = sext i8 [[BF_ASHR]] to i32
+// CHECK:    [[BF_LOAD1:%.*]] = load i8, ptr [[TMP1]], align 4
+// CHECK:    [[BF_ASHR2:%.*]] = ashr i8 [[BF_LOAD1]], 4
+// CHECK:    [[BF_CAST3:%.*]] = sext i8 [[BF_ASHR2]] to i32
+// CHECK:    [[ADD:%.*]] = add nsw i32 [[BF_CAST]], [[BF_CAST3]]
+// CHECK:    call void @_Z3usei(i32 noundef [[ADD]])
+// CHECK:    ret void
+//
+//
+// CHECK-LABEL: define dso_local void @_Z16test_with_lambdav(
+// CHECK-SAME: ) #[[ATTR0]] {
+// CHECK:  [[ENTRY:.*:]]
+// CHECK:    [[CALL:%.*]] = call i64 @_Z10make_pointv()
+// CHECK:    store i64 [[CALL]], ptr [[TMP0:%.*]], align 4
+// CHECK:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @_Z16test_with_lambdav.omp_outlined, ptr [[TMP0]])
+// CHECK:    ret void
+//
+//
+// CHECK-LABEL: define internal void @_Z16test_with_lambdav.omp_outlined(
+// CHECK-SAME: ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP0:%.*]]) #[[ATTR1]] {
+// CHECK:  [[ENTRY:.*:]]
+// CHECK:    store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR:%.*]], align 8
+// CHECK:    store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR:%.*]], align 8
+// CHECK:    store ptr [[TMP0]], ptr [[DOTADDR:%.*]], align 8
+// CHECK:    [[TMP1:%.*]] = load ptr, ptr [[DOTADDR]], align 8, !nonnull [[META2]], !align [[META3]]
+// CHECK:    store i32 0, ptr [[DOTOMP_LB:%.*]], align 4
+// CHECK:    store i32 99, ptr [[DOTOMP_UB:%.*]], align 4
+// CHECK:    store i32 1, ptr [[DOTOMP_STRIDE:%.*]], align 4
+// CHECK:    store i32 0, ptr [[DOTOMP_IS_LAST:%.*]], align 4
+// CHECK:    [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK:    [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4
+// CHECK:    call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
+// CHECK:    [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
+// CHECK:    [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99
+// CHECK:    br i1 [[CMP]], label %[[COND_TRUE:.*]], label %[[COND_FALSE:.*]]
+// CHECK:       [[COND_TRUE]]:
+// CHECK:    br label %[[COND_END:.*]]
+// CHECK:       [[COND_FALSE]]:
+// CHECK:    [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
+// CHECK:    br label %[[COND_END]]
+// CHECK:       [[COND_END]]:
+// CHECK:    [[COND:%.*]] = phi i32 [ 99, %[[COND_TRUE]] ], [ [[TMP5]], %[[COND_FALSE]] ]
+// CHECK:    store i32 [[COND]], ptr [[DOTOMP_UB]], align 4
+// CHECK:    [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
+// CHECK:    store i32 [[TMP6]], ptr [[DOTOMP_IV:%.*]], align 4
+// CHECK:    br label %[[OMP_INNER_FOR_COND:.*]]
+// CHECK:       [[OMP_INNER_FOR_COND]]:
+// CHECK:    [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK:    [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
+// CHECK:    [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]]
+// CHECK:    br i1 [[CMP2]], label %[[OMP_INNER_FOR_BODY:.*]], label %[[OMP_INNER_FOR_END:.*]]
+// CHECK:       [[OMP_INNER_FOR_BODY]]:
+// CHECK:    [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK:    [[DIV:%.*]] = sdiv i32 [[TMP9]], 10
+// CHECK:    [[MUL:%.*]] = mul nsw i32 [[DIV]], 1
+// CHECK:    [[ADD:%.*]] = add nsw i32 0, [[MUL]]
+// CHECK:    store i32 [[ADD]], ptr [[I:%.*]], align 4
+// CHECK:    [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK:    [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK:    [[DIV3:%.*]] = sdiv i32 [[TMP11]], 10
+// CHECK:    [[MUL4:%.*]] = mul nsw i32 [[DIV3]], 10
+// CHECK:    [[SUB:%.*]] = sub nsw i32 [[TMP10]], [[MUL4]]
+// CHECK:    [[MUL5:%.*]] = mul nsw i32 [[SUB]], 1
+// CHECK:    [[ADD6:%.*]] = add nsw i32 0, [[MUL5]]
+// CHECK:    store i32 [[ADD6]], ptr [[J:%.*]], align 4
+// CHECK:    [[TMP12:%.*]] = getelementptr inbounds nuw [[CLASS_ANON:%.*]], ptr [[REF_TMP:%.*]], i32 0, i32 0
+// CHECK:    [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_POINT:%.*]], ptr [[TMP1]], i32 0, i32 0
+// CHECK:    [[TMP13:%.*]] = load i32, ptr [[X]], align 4
+// CHECK:    store i32 [[TMP13]], ptr [[TMP12]], align 4
+// CHECK:    [[TMP14:%.*]] = getelementptr inbounds nuw [[CLASS_ANON]], ptr [[REF_TMP]], i32 0, i32 1
+// CHECK:    [[Y:%.*]] = getelementptr inbounds nuw [[STRUCT_POINT]], ptr [[TMP1]], i32 0, i32 1
+// CHECK:    [[TMP15:%.*]] = load i32, ptr [[Y]], align 4
+// CHECK:    store i32 [[TMP15]], ptr [[TMP14]], align 4
+// CHECK:    [[TMP16:%.*]] = load i32, ptr [[I]], align 4
+// CHECK:    [[TMP17:%.*]] = load i32, ptr [[J]], align 4
+// CHECK:    call void @"_ZZ16test_with_lambdavENK3$_0clEii"(ptr noundef nonnull align 4 dereferenceable(8) [[REF_TMP]], i32 noundef [[TMP16]], i32 noundef [[TMP17]])
+// CHECK:    br label %[[OMP_BODY_CONTINUE:.*]]
+// CHECK:       [[OMP_BODY_CONTINUE]]:
+// CHECK:    br label %[[OMP_INNER_FOR_INC:.*]]
+// CHECK:       [[OMP_INNER_FOR_INC]]:
+// CHECK:    [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK:    [[ADD7:%.*]] = add nsw i32 [[TMP18]], 1
+// CHECK:    store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4
+// CHECK:    br label %[[OMP_INNER_FOR_COND]]
+// CHECK:       [[OMP_INNER_FOR_END]]:
+// CHECK:    br label %[[OMP_LOOP_EXIT:.*]]
+// CHECK:       [[OMP_LOOP_EXIT]]:
+// CHECK:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP3]])
+// CHECK:    ret void
+//
+//
+// CHECK-LABEL: define dso_local void @_Z20test_partial_capturev(
+// CHECK-SAME: ) #[[ATTR0]] {
+// CHECK:  [[ENTRY:.*:]]
+// CHECK:    [[CALL:%.*]] = call i64 @_Z9make_pairii(i32 noundef 1, i32 noundef 2)
+// CHECK:    store i64 [[CALL]], ptr [[TMP0:%.*]], align 4
+// CHECK:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @_Z20test_partial_capturev.omp_outlined, ptr [[TMP0]])
+// CHECK:    ret void
+//
+//
+// CHECK-LABEL: define internal void @_Z20test_partial_capturev.omp_outlined(
+// CHECK-SAME: ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP0:%.*]]) #[[ATTR1]] {
+// CHECK:  [[ENTRY:.*:]]
+// CHECK:    store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR:%.*]], align 8
+// CHECK:    store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR:%.*]], align 8
+// CHECK:    store ptr [[TMP0]], ptr [[DOTADDR:%.*]], align 8
+// CHECK:    [[TMP1:%.*]] = load ptr, ptr [[DOTADDR]], align 8, !nonnull [[META2]], !align [[META3]]
+// CHECK:    [[FIRST:%.*]] = getelementptr inbounds nuw [[STRUCT_PAIR:%.*]], ptr [[TMP1]], i32 0, i32 0
+// CHECK:    [[TMP2:%.*]] = load i32, ptr [[FIRST]], align 4
+// CHECK:    call void @_Z3usei(i32 noundef [[TMP2]])
+// CHECK:    ret void
+//
+//
+// CHECK-LABEL: define dso_local void @_Z11test_nestedv(
+// CHECK-SAME: ) #[[ATTR0]] {
+// CHECK:  [[ENTRY:.*:]]
+// CHECK:    [[CALL:%.*]] = call i64 @_Z10make_pointv()
+// CHECK:    store i64 [[CALL]], ptr [[TMP0:%.*]], align 4
+// CHECK:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 2, ptr @_Z11test_nestedv.omp_outlined, ptr [[TMP0]], ptr [[TMP0]])
+// CHECK:    ret void
+//
+//
+// CHECK-LABEL: define internal void @_Z11test_nestedv.omp_outlined(
+// CHECK-SAME: ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP0:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[TMP1:%.*]]) #[[ATTR1]] {
+// CHECK:  [[ENTRY:.*:]]
+// CHECK:    store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR:%.*]], align 8
+// CHECK:    store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR:%.*]], align 8
+// CHECK:    store ptr [[TMP0]], ptr [[DOTADDR:%.*]], align 8
+// CHECK:    store ptr [[TMP1]], ptr [[DOTADDR1:%.*]], align 8
+// CHECK:    [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8, !nonnull [[META2]], !align [[META3]]
+// CHECK:    [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8, !nonnull [[META2]], !align [[META3]]
+// CHECK:    [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_POINT:%.*]], ptr [[TMP2]], i32 0, i32 0
+// CHECK:    [[TMP4:%.*]] = load i32, ptr [[X]], align 4
+// CHECK:    call void @_Z3usei(i32 noundef [[TMP4]])
+// CHECK:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @_Z11test_nestedv.omp_outlined.omp_outlined, ptr [[TMP2]])
+// CHECK:    ret void
+//
+//
+// CHECK-LABEL: define internal void @_Z11test_nestedv.omp_outlined.omp_outlined(
+// CHECK-SAME: ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP0:%.*]]) #[[ATTR1]] {
+// CHECK:  [[ENTRY:.*:]]
+// CHECK:    store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR:%.*]], align 8
+// CHECK:    store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR:%.*]], align 8
+// CHECK:    store ptr [[TMP0]], ptr [[DOTADDR:%.*]], align 8
+// CHECK:    [[TMP1:%.*]] = load ptr, ptr [[DOTADDR]], align 8, !nonnull [[META2]], !align [[META3]]
+// CHECK:    [[Y:%.*]] = getelementptr inbounds nuw [[STRUCT_POINT:%.*]], ptr [[TMP1]], i32 0, i32 1
+// CHECK:    [[TMP2:%.*]] = load i32, ptr [[Y]], align 4
+// CHECK:    call void @_Z3usei(i32 noundef [[TMP2]])
+// CHECK:    ret void
+//
+//
+// CHECK-LABEL: define dso_local void @_Z13test_multiplev(
+// CHECK-SAME: ) #[[ATTR0]] {
+// CHECK:  [[ENTRY:.*:]]
+// CHECK:    [[CALL:%.*]] = call i64 @_Z10make_pointv()
+// CHECK:    store i64 [[CALL]], ptr [[TMP0:%.*]], align 4
+// CHECK:    [[CALL1:%.*]] = call i64 @_Z9make_pairii(i32 noundef 3, i32 noundef 4)
+// CHECK:    store i64 [[CALL1]], ptr [[TMP1:%.*]], align 4
+// CHECK:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 2, ptr @_Z13test_multiplev.omp_outlined, ptr [[TMP0]], ptr [[TMP1]])
+// CHECK:    ret void
+//
+//
+// CHECK-LABEL: define internal void @_Z13test_multiplev.omp_outlined(
+// CHECK-SAME: ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP0:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP1:%.*]]) #[[ATTR1]] {
+// CHECK:  [[ENTRY:.*:]]
+// CHECK:    store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR:%.*]], align 8
+// CHECK:    store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR:%.*]], align 8
+// CHECK:    store ptr [[TMP0]], ptr [[DOTADDR:%.*]], align 8
+// CHECK:    store ptr [[TMP1]], ptr [[DOTADDR1:%.*]], align 8
+// CHECK:    [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8, !nonnull [[META2]], !align [[META3]]
+// CHECK:    [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8, !nonnull [[META2]], !align [[META3]]
+// CHECK:    [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_POINT:%.*]], ptr [[TMP2]], i32 0, i32 0
+// CHECK:    [[TMP4:%.*]] = load i32, ptr [[X]], align 4
+// CHECK:    [[Y:%.*]] = getelementptr inbounds nuw [[STRUCT_POINT]], ptr [[TMP2]], i32 0, i32 1
+// CHECK:    [[TMP5:%.*]] = load i32, ptr [[Y]], align 4
+// CHECK:    [[ADD:%.*]] = add nsw i32 [[TMP4]], [[TMP5]]
+// CHECK:    [[FIRST:%.*]] = getelementptr inbounds nuw [[STRUCT_PAIR:%.*]], ptr [[TMP3]], i32 0, i32 0
+// CHECK:    [[TMP6:%.*]] = load i32, ptr [[FIRST]], align 4
+// CHECK:    [[ADD2:%.*]] = add nsw i32 [[ADD]], [[TMP6]]
+// CHECK:    [[SECOND:%.*]] = getelementptr inbounds nuw [[STRUCT_PAIR]], ptr [[TMP3]], i32 0, i32 1
+// CHECK:    [[TMP7:%.*]] = load i32, ptr [[SECOND]], align 4
+// CHECK:    [[ADD3:%.*]] = add nsw i32 [[ADD2]], [[TMP7]]
+// CHECK:    call void @_Z3usei(i32 noundef [[ADD3]])
+// CHECK:    ret void
+//
+//
+// CHECK-LABEL: define dso_local void @_Z22test_reference_bindingv(
+// CHECK-SAME: ) #[[ATTR0]] {
+// CHECK:  [[ENTRY:.*:]]
+// CHECK:    [[CALL:%.*]] = call i64 @_Z10make_pointv()
+// CHECK:    store i64 [[CALL]], ptr [[P:%.*]], align 4
+// CHECK:    store ptr [[P]], ptr [[TMP0:%.*]], align 8
+// CHECK:    [[TMP1:%.*]] = load ptr, ptr [[TMP0]], align 8, !nonnull [[META2]], !align [[META3]]
+// CHECK:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @_Z22test_reference_bindingv.omp_outlined, ptr [[TMP1]])
+// CHECK:    ret void
+//
+//
+// CHECK-LABEL: define internal void @_Z22test_reference_bindingv.omp_outlined(
+// CHECK-SAME: ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP0:%.*]]) #[[ATTR1]] {
+// CHECK:  [[ENTRY:.*:]]
+// CHECK:    store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR:%.*]], align 8
+// CHECK:    store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR:%.*]], align 8
+// CHECK:    store ptr [[TMP0]], ptr [[DOTADDR:%.*]], align 8
+// CHECK:    [[TMP1:%.*]] = load ptr, ptr [[DOTADDR]], align 8, !nonnull [[META2]], !align [[META3]]
+// CHECK:    store ptr [[TMP1]], ptr [[TMP:%.*]], align 8
+// CHECK:    [[CAPTURED_VAL:%.*]] = load ptr, ptr [[TMP]], align 8
+// CHECK:    [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_POINT:%.*]], ptr [[CAPTURED_VAL]], i32 0, i32 0
+// CHECK:    [[TMP2:%.*]] = load i32, ptr [[X]], align 8
+// CHECK:    call void @_Z3usei(i32 noundef [[TMP2]])
+// CHECK:    ret void
+//

>From 0aea9f294d8ecb54017a1fbbc5abb142d708ba34 Mon Sep 17 00:00:00 2001
From: Ammarguellat <zahira.ammarguellat at intel.com>
Date: Thu, 30 Apr 2026 13:53:23 -0700
Subject: [PATCH 11/45] Fix format

---
 clang/lib/CodeGen/CGExpr.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/clang/lib/CodeGen/CGExpr.cpp b/clang/lib/CodeGen/CGExpr.cpp
index 9a0dc0a88c6be..38147ced873f8 100644
--- a/clang/lib/CodeGen/CGExpr.cpp
+++ b/clang/lib/CodeGen/CGExpr.cpp
@@ -3595,7 +3595,7 @@ static bool canEmitSpuriousReferenceToVariable(CodeGenFunction &CGF,
 LValue CodeGenFunction::EmitOMPCapturedBindingLValue(const BindingDecl *BD) {
   assert(CapturedStmtInfo &&
          CapturedStmtInfo->getKind() == CapturedRegionKind::CR_OpenMP &&
-             CGM.getLangOpts().OpenMP);
+         CGM.getLangOpts().OpenMP);
   auto *DD = cast<VarDecl>(BD->getDecomposedDecl());
   auto I = LocalDeclMap.find(DD);
   assert(I != LocalDeclMap.end() && "Decomposed decl not in LocalDeclMap");

>From 340302fc9bca1e83f4a1eceb3f16f3e7dbc8d194 Mon Sep 17 00:00:00 2001
From: Ammarguellat <zahira.ammarguellat at intel.com>
Date: Fri, 1 May 2026 09:06:41 -0700
Subject: [PATCH 12/45] Addressed review comments

---
 clang/lib/CodeGen/CGExpr.cpp                  | 22 +++++--------------
 .../OpenMP/structured-bindings-codegen.cpp    |  5 +++--
 2 files changed, 9 insertions(+), 18 deletions(-)

diff --git a/clang/lib/CodeGen/CGExpr.cpp b/clang/lib/CodeGen/CGExpr.cpp
index 38147ced873f8..cabde70c31a9b 100644
--- a/clang/lib/CodeGen/CGExpr.cpp
+++ b/clang/lib/CodeGen/CGExpr.cpp
@@ -3597,26 +3597,16 @@ LValue CodeGenFunction::EmitOMPCapturedBindingLValue(const BindingDecl *BD) {
          CapturedStmtInfo->getKind() == CapturedRegionKind::CR_OpenMP &&
          CGM.getLangOpts().OpenMP);
   auto *DD = cast<VarDecl>(BD->getDecomposedDecl());
-  auto I = LocalDeclMap.find(DD);
-  assert(I != LocalDeclMap.end() && "Decomposed decl not in LocalDeclMap");
-
-  Address ParamAddr = I->second;
   QualType AggregType = DD->getType();
   if (AggregType->isReferenceType())
     AggregType = AggregType->getPointeeType();
+  DeclarationNameInfo NameInfo(DD->getDeclName(), SourceLocation());
+  DeclRefExpr *DRE = DeclRefExpr::Create(
+      getContext(), NestedNameSpecifierLoc(), SourceLocation(), DD,
+      /*RefersToEnclosingVariableOrCapture=*/false, NameInfo, AggregType,
+      VK_LValue);
+  LValue CapLVal = EmitLValue(DRE);
 
-  LValue CapLVal;
-  llvm::Type *ParamLLVMType = ParamAddr.getElementType();
-  if (ParamLLVMType->isPointerTy()) {
-    llvm::Value *Ptr = Builder.CreateLoad(ParamAddr, "captured.val");
-    Address AggregAddr(Ptr, ConvertTypeForMem(AggregType),
-                       getContext().getDeclAlign(DD));
-    CapLVal = MakeAddrLValue(AggregAddr, AggregType);
-  } else {
-    Address AggregAddr(ParamAddr.emitRawPointer(*this),
-                       ConvertTypeForMem(AggregType), ParamAddr.getAlignment());
-    CapLVal = MakeAddrLValue(AggregAddr, AggregType);
-  }
   // Extract the specific binding from the decomposed object.
   Expr *BindingExpr = BD->getBinding()->IgnoreImplicit();
   if (auto *ME = dyn_cast<MemberExpr>(BindingExpr)) {
diff --git a/clang/test/OpenMP/structured-bindings-codegen.cpp b/clang/test/OpenMP/structured-bindings-codegen.cpp
index f1ae96a273443..3c6cfdd35dd16 100644
--- a/clang/test/OpenMP/structured-bindings-codegen.cpp
+++ b/clang/test/OpenMP/structured-bindings-codegen.cpp
@@ -440,9 +440,10 @@ void test_reference_binding() {
 // CHECK:    store ptr [[TMP0]], ptr [[DOTADDR:%.*]], align 8
 // CHECK:    [[TMP1:%.*]] = load ptr, ptr [[DOTADDR]], align 8, !nonnull [[META2]], !align [[META3]]
 // CHECK:    store ptr [[TMP1]], ptr [[TMP:%.*]], align 8
+// CHECK:    [[TMP2:%.*]] = load ptr, ptr [[TMP]], align 8, !nonnull [[META2]], !align [[META3]]
 // CHECK:    [[CAPTURED_VAL:%.*]] = load ptr, ptr [[TMP]], align 8
 // CHECK:    [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_POINT:%.*]], ptr [[CAPTURED_VAL]], i32 0, i32 0
-// CHECK:    [[TMP2:%.*]] = load i32, ptr [[X]], align 8
-// CHECK:    call void @_Z3usei(i32 noundef [[TMP2]])
+// CHECK:    [[TMP3:%.*]] = load i32, ptr [[X]], align 8
+// CHECK:    call void @_Z3usei(i32 noundef [[TMP3]])
 // CHECK:    ret void
 //

>From 61d5d1a3894f6b46bb5d4a8d707a87b2aeeff8fb Mon Sep 17 00:00:00 2001
From: Ammarguellat <zahira.ammarguellat at intel.com>
Date: Fri, 1 May 2026 13:13:31 -0700
Subject: [PATCH 13/45] Fix LIT fail

---
 clang/lib/CodeGen/CGExpr.cpp                      | 14 +++++++++++---
 clang/test/OpenMP/structured-bindings-codegen.cpp |  5 ++---
 2 files changed, 13 insertions(+), 6 deletions(-)

diff --git a/clang/lib/CodeGen/CGExpr.cpp b/clang/lib/CodeGen/CGExpr.cpp
index cabde70c31a9b..bac486a557721 100644
--- a/clang/lib/CodeGen/CGExpr.cpp
+++ b/clang/lib/CodeGen/CGExpr.cpp
@@ -3603,17 +3603,25 @@ LValue CodeGenFunction::EmitOMPCapturedBindingLValue(const BindingDecl *BD) {
   DeclarationNameInfo NameInfo(DD->getDeclName(), SourceLocation());
   DeclRefExpr *DRE = DeclRefExpr::Create(
       getContext(), NestedNameSpecifierLoc(), SourceLocation(), DD,
-      /*RefersToEnclosingVariableOrCapture=*/false, NameInfo, AggregType,
+      /*RefersToEnclosingVariableOrCapture=*/true, NameInfo, AggregType,
       VK_LValue);
   LValue CapLVal = EmitLValue(DRE);
-
+  QualType CanonType = AggregType.getCanonicalType();
+  llvm::Type *StructTy = CGM.getTypes().ConvertTypeForMem(CanonType);
+  Address Addr = CapLVal.getAddress();
+  if (Addr.getElementType() != StructTy) {
+    Addr = Addr.withElementType(StructTy);
+    CapLVal = MakeAddrLValue(Addr, CanonType, CapLVal.getBaseInfo(),
+                             CapLVal.getTBAAInfo());
+  }
   // Extract the specific binding from the decomposed object.
   Expr *BindingExpr = BD->getBinding()->IgnoreImplicit();
   if (auto *ME = dyn_cast<MemberExpr>(BindingExpr)) {
     // Struct/union: access field.
     FieldDecl *Field = cast<FieldDecl>(ME->getMemberDecl());
     return EmitLValueForField(CapLVal, Field);
-  } else if (auto *ASE = dyn_cast<ArraySubscriptExpr>(BindingExpr)) {
+  }
+  if (auto *ASE = dyn_cast<ArraySubscriptExpr>(BindingExpr)) {
     // Array binding - access element.
     Address Base = CapLVal.getAddress();
     llvm::Value *Idx = EmitScalarExpr(ASE->getIdx());
diff --git a/clang/test/OpenMP/structured-bindings-codegen.cpp b/clang/test/OpenMP/structured-bindings-codegen.cpp
index 3c6cfdd35dd16..bd1f463f28615 100644
--- a/clang/test/OpenMP/structured-bindings-codegen.cpp
+++ b/clang/test/OpenMP/structured-bindings-codegen.cpp
@@ -441,9 +441,8 @@ void test_reference_binding() {
 // CHECK:    [[TMP1:%.*]] = load ptr, ptr [[DOTADDR]], align 8, !nonnull [[META2]], !align [[META3]]
 // CHECK:    store ptr [[TMP1]], ptr [[TMP:%.*]], align 8
 // CHECK:    [[TMP2:%.*]] = load ptr, ptr [[TMP]], align 8, !nonnull [[META2]], !align [[META3]]
-// CHECK:    [[CAPTURED_VAL:%.*]] = load ptr, ptr [[TMP]], align 8
-// CHECK:    [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_POINT:%.*]], ptr [[CAPTURED_VAL]], i32 0, i32 0
-// CHECK:    [[TMP3:%.*]] = load i32, ptr [[X]], align 8
+// CHECK:    [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_POINT:%.*]], ptr [[TMP2]], i32 0, i32 0
+// CHECK:    [[TMP3:%.*]] = load i32, ptr [[X]], align 4
 // CHECK:    call void @_Z3usei(i32 noundef [[TMP3]])
 // CHECK:    ret void
 //

>From ea142ebe5fdf6adcfabb21a0206d4679c67b0c52 Mon Sep 17 00:00:00 2001
From: Ammarguellat <zahira.ammarguellat at intel.com>
Date: Mon, 4 May 2026 13:30:25 -0700
Subject: [PATCH 14/45] Addressed review comments

---
 clang/lib/CodeGen/CGExpr.cpp                  | 11 +--------
 clang/lib/Sema/SemaStmt.cpp                   | 24 +++++++++++++++++--
 .../OpenMP/structured-bindings-codegen.cpp    | 22 ++++++++---------
 3 files changed, 34 insertions(+), 23 deletions(-)

diff --git a/clang/lib/CodeGen/CGExpr.cpp b/clang/lib/CodeGen/CGExpr.cpp
index bac486a557721..50b5bef33dfec 100644
--- a/clang/lib/CodeGen/CGExpr.cpp
+++ b/clang/lib/CodeGen/CGExpr.cpp
@@ -3623,16 +3623,7 @@ LValue CodeGenFunction::EmitOMPCapturedBindingLValue(const BindingDecl *BD) {
   }
   if (auto *ASE = dyn_cast<ArraySubscriptExpr>(BindingExpr)) {
     // Array binding - access element.
-    Address Base = CapLVal.getAddress();
-    llvm::Value *Idx = EmitScalarExpr(ASE->getIdx());
-    llvm::Value *Indices[] = {llvm::ConstantInt::get(Int32Ty, 0), Idx};
-    llvm::Type *ElemTy = CGM.getTypes().ConvertTypeForMem(ASE->getType());
-    llvm::Value *EltPtr = Builder.CreateInBoundsGEP(
-        Base.getElementType(), Base.emitRawPointer(*this), Indices, "arrayidx");
-    CharUnits Align = Base.getAlignment().alignmentOfArrayElement(
-        getContext().getTypeSizeInChars(ASE->getType()));
-    Address EltAddr(EltPtr, ElemTy, Align);
-    return MakeAddrLValue(EltAddr, ASE->getType());
+    return EmitLValue(BD->getBinding(), NotKnownNonNull);
   }
 
   // TODO: Tuple bindings (std::tuple, std::pair via tuple protocol)
diff --git a/clang/lib/Sema/SemaStmt.cpp b/clang/lib/Sema/SemaStmt.cpp
index 9f90dc1fda665..26852795223be 100644
--- a/clang/lib/Sema/SemaStmt.cpp
+++ b/clang/lib/Sema/SemaStmt.cpp
@@ -4696,8 +4696,28 @@ buildCapturedStmtCaptureList(Sema &S, CapturedRegionScopeInfo *RSI,
     // FIXME: Bail out now if the capture is not used and the initializer has
     // no side-effects.
 
-    // Create a field for this capture.
-    FieldDecl *Field = S.BuildCaptureField(RSI->TheRecordDecl, Cap);
+    // Build the capture field. For OpenMP BindingDecl captures redirected
+    // to their DecompositionDecl, the field type must use the
+    // DecompositionDecl's type (e.g. int[2]) not the BindingDecl's type (e.g.
+    // int).
+    FieldDecl *Field = nullptr;
+    if (RSI->CapRegionKind == CR_OpenMP && CapVar &&
+        CapVar != Cap.getVariable() && isa<DecompositionDecl>(CapVar)) {
+      // Manually build a reference field with the DecompositionDecl's type.
+      QualType DDType = cast<VarDecl>(CapVar)->getType();
+      QualType RefType = S.Context.getLValueReferenceType(DDType);
+      TypeSourceInfo *TSI =
+          S.Context.getTrivialTypeSourceInfo(RefType, Cap.getLocation());
+      Field = FieldDecl::Create(S.Context, RSI->TheRecordDecl,
+                                Cap.getLocation(), Cap.getLocation(),
+                                /*Id=*/nullptr, RefType, TSI,
+                                /*BW=*/nullptr, /*Mutable=*/false, ICIS_NoInit);
+      Field->setImplicit(true);
+      Field->setAccess(AS_private);
+      RSI->TheRecordDecl->addDecl(Field);
+    } else {
+      Field = S.BuildCaptureField(RSI->TheRecordDecl, Cap);
+    }
 
     // Add the capture to our list of captures.
     if (Cap.isThisCapture()) {
diff --git a/clang/test/OpenMP/structured-bindings-codegen.cpp b/clang/test/OpenMP/structured-bindings-codegen.cpp
index bd1f463f28615..b36d3847dd626 100644
--- a/clang/test/OpenMP/structured-bindings-codegen.cpp
+++ b/clang/test/OpenMP/structured-bindings-codegen.cpp
@@ -128,7 +128,7 @@ void test_reference_binding() {
 //
 //
 // CHECK-LABEL: define internal void @_Z11test_structv.omp_outlined(
-// CHECK-SAME: ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP0:%.*]]) #[[ATTR1:[0-9]+]] {
+// CHECK-SAME: ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[TMP0:%.*]]) #[[ATTR1:[0-9]+]] {
 // CHECK:  [[ENTRY:.*:]]
 // CHECK:    store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR:%.*]], align 8
 // CHECK:    store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR:%.*]], align 8
@@ -168,7 +168,7 @@ void test_reference_binding() {
 //
 //
 // CHECK-LABEL: define internal void @_Z9test_pairv.omp_outlined(
-// CHECK-SAME: ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP0:%.*]]) #[[ATTR1]] {
+// CHECK-SAME: ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[TMP0:%.*]]) #[[ATTR1]] {
 // CHECK:  [[ENTRY:.*:]]
 // CHECK:    store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR:%.*]], align 8
 // CHECK:    store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR:%.*]], align 8
@@ -201,15 +201,15 @@ void test_reference_binding() {
 //
 //
 // CHECK-LABEL: define internal void @_Z10test_arrayv.omp_outlined(
-// CHECK-SAME: ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP0:%.*]]) #[[ATTR1]] {
+// CHECK-SAME: ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[TMP0:%.*]]) #[[ATTR1]] {
 // CHECK:  [[ENTRY:.*:]]
 // CHECK:    store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR:%.*]], align 8
 // CHECK:    store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR:%.*]], align 8
 // CHECK:    store ptr [[TMP0]], ptr [[DOTADDR:%.*]], align 8
 // CHECK:    [[TMP1:%.*]] = load ptr, ptr [[DOTADDR]], align 8, !nonnull [[META2]], !align [[META3]]
-// CHECK:    [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[TMP1]], i32 0, i32 0
+// CHECK:    [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[TMP1]], i64 0, i64 0
 // CHECK:    [[TMP2:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
-// CHECK:    [[ARRAYIDX1:%.*]] = getelementptr inbounds [2 x i32], ptr [[TMP1]], i32 0, i32 1
+// CHECK:    [[ARRAYIDX1:%.*]] = getelementptr inbounds [2 x i32], ptr [[TMP1]], i64 0, i64 1
 // CHECK:    [[TMP3:%.*]] = load i32, ptr [[ARRAYIDX1]], align 4
 // CHECK:    [[ADD:%.*]] = add nsw i32 [[TMP2]], [[TMP3]]
 // CHECK:    call void @_Z3usei(i32 noundef [[ADD]])
@@ -254,7 +254,7 @@ void test_reference_binding() {
 //
 //
 // CHECK-LABEL: define internal void @_Z16test_with_lambdav.omp_outlined(
-// CHECK-SAME: ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP0:%.*]]) #[[ATTR1]] {
+// CHECK-SAME: ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[TMP0:%.*]]) #[[ATTR1]] {
 // CHECK:  [[ENTRY:.*:]]
 // CHECK:    store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR:%.*]], align 8
 // CHECK:    store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR:%.*]], align 8
@@ -336,7 +336,7 @@ void test_reference_binding() {
 //
 //
 // CHECK-LABEL: define internal void @_Z20test_partial_capturev.omp_outlined(
-// CHECK-SAME: ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP0:%.*]]) #[[ATTR1]] {
+// CHECK-SAME: ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[TMP0:%.*]]) #[[ATTR1]] {
 // CHECK:  [[ENTRY:.*:]]
 // CHECK:    store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR:%.*]], align 8
 // CHECK:    store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR:%.*]], align 8
@@ -358,7 +358,7 @@ void test_reference_binding() {
 //
 //
 // CHECK-LABEL: define internal void @_Z11test_nestedv.omp_outlined(
-// CHECK-SAME: ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP0:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[TMP1:%.*]]) #[[ATTR1]] {
+// CHECK-SAME: ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[TMP0:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[TMP1:%.*]]) #[[ATTR1]] {
 // CHECK:  [[ENTRY:.*:]]
 // CHECK:    store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR:%.*]], align 8
 // CHECK:    store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR:%.*]], align 8
@@ -374,7 +374,7 @@ void test_reference_binding() {
 //
 //
 // CHECK-LABEL: define internal void @_Z11test_nestedv.omp_outlined.omp_outlined(
-// CHECK-SAME: ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP0:%.*]]) #[[ATTR1]] {
+// CHECK-SAME: ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[TMP0:%.*]]) #[[ATTR1]] {
 // CHECK:  [[ENTRY:.*:]]
 // CHECK:    store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR:%.*]], align 8
 // CHECK:    store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR:%.*]], align 8
@@ -398,7 +398,7 @@ void test_reference_binding() {
 //
 //
 // CHECK-LABEL: define internal void @_Z13test_multiplev.omp_outlined(
-// CHECK-SAME: ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP0:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP1:%.*]]) #[[ATTR1]] {
+// CHECK-SAME: ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[TMP0:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[TMP1:%.*]]) #[[ATTR1]] {
 // CHECK:  [[ENTRY:.*:]]
 // CHECK:    store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR:%.*]], align 8
 // CHECK:    store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR:%.*]], align 8
@@ -433,7 +433,7 @@ void test_reference_binding() {
 //
 //
 // CHECK-LABEL: define internal void @_Z22test_reference_bindingv.omp_outlined(
-// CHECK-SAME: ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP0:%.*]]) #[[ATTR1]] {
+// CHECK-SAME: ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[TMP0:%.*]]) #[[ATTR1]] {
 // CHECK:  [[ENTRY:.*:]]
 // CHECK:    store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR:%.*]], align 8
 // CHECK:    store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR:%.*]], align 8

>From d35db84309e91db2eb4b53c0f44136d04ed0527e Mon Sep 17 00:00:00 2001
From: Ammarguellat <zahira.ammarguellat at intel.com>
Date: Mon, 4 May 2026 13:41:35 -0700
Subject: [PATCH 15/45] Addressed build issue

---
 clang/lib/CodeGen/CGExpr.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/clang/lib/CodeGen/CGExpr.cpp b/clang/lib/CodeGen/CGExpr.cpp
index 50b5bef33dfec..becd0610e0236 100644
--- a/clang/lib/CodeGen/CGExpr.cpp
+++ b/clang/lib/CodeGen/CGExpr.cpp
@@ -3621,7 +3621,7 @@ LValue CodeGenFunction::EmitOMPCapturedBindingLValue(const BindingDecl *BD) {
     FieldDecl *Field = cast<FieldDecl>(ME->getMemberDecl());
     return EmitLValueForField(CapLVal, Field);
   }
-  if (auto *ASE = dyn_cast<ArraySubscriptExpr>(BindingExpr)) {
+  if (dyn_cast<ArraySubscriptExpr>(BindingExpr)) {
     // Array binding - access element.
     return EmitLValue(BD->getBinding(), NotKnownNonNull);
   }

>From 1f801e514a4e04df89e9eb2f374bfb1daf3b1f00 Mon Sep 17 00:00:00 2001
From: Ammarguellat <zahira.ammarguellat at intel.com>
Date: Tue, 12 May 2026 14:00:29 -0700
Subject: [PATCH 16/45] Addressed Blocker 1, 2 and 6. Not ready for review yet.

---
 clang/docs/ReleaseNotes.rst                   |  11 +-
 .../clang/Basic/DiagnosticSemaKinds.td        |   2 +
 clang/lib/CodeGen/CGExpr.cpp                  |  43 ++---
 clang/lib/Sema/SemaStmt.cpp                   |   9 +-
 .../OpenMP/structured-bindings-messages.cpp   | 148 ++++++++++++++++++
 ...ctured-bindings-template-instantiation.cpp | 124 +++++++++++++++
 6 files changed, 304 insertions(+), 33 deletions(-)
 create mode 100644 clang/test/OpenMP/structured-bindings-messages.cpp
 create mode 100644 clang/test/OpenMP/structured-bindings-template-instantiation.cpp

diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst
index 601e78352c4ab..51f6b094d958c 100644
--- a/clang/docs/ReleaseNotes.rst
+++ b/clang/docs/ReleaseNotes.rst
@@ -772,10 +772,13 @@ OpenMP Support
 - Added support for ``transparent`` clause in task and taskloop directives.
 - Added support for ``use_device_ptr`` clause to accept an optional
   ``fallback`` modifier (``fb_nullify`` or ``fb_preserve``) with OpenMP >= 61.
-- Added support for C++17 structured bindings in OpenMP regions. Structured
-  bindings from structs, classes, and arrays can now be used inside
-  OpenMP directives. Note: Tuple-like bindings (types using the tuple protocol
-  with ``get<N>()``) are not yet supported and will produce a compilation error.
+- Added support for capturing structured bindings in OpenMP regions
+  (a C++20 extension; warned as an extension in C++17). Aggregate bindings
+  (structs, classes, and arrays) can now be captured inside OpenMP directives.
+  Tuple-like bindings (types using the tuple protocol with ``get<N>()``,
+  such as ``std::pair`` or ``std::tuple``) are not yet supported and
+  will produce a compilation error.
+
 
 Improvements
 ^^^^^^^^^^^^
diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td
index 534018daa9178..916ab5ee577e5 100644
--- a/clang/include/clang/Basic/DiagnosticSemaKinds.td
+++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td
@@ -10271,6 +10271,8 @@ def warn_cxx17_compat_capture_binding : Warning<
   "captured structured bindings are incompatible with "
   "C++ standards before C++20">,
   InGroup<CXXPre20Compat>, DefaultIgnore;
+def err_capture_tuple_binding_openmp : Error<
+  "capturing tuple-like structured binding %0 is not yet supported in OpenMP">;
 
 def err_static_data_member_not_allowed_in_local_class : Error<
   "static data member %0 not allowed in local %sub{select_tag_type_kind}2 %1">;
diff --git a/clang/lib/CodeGen/CGExpr.cpp b/clang/lib/CodeGen/CGExpr.cpp
index becd0610e0236..2f2d2fdda06e8 100644
--- a/clang/lib/CodeGen/CGExpr.cpp
+++ b/clang/lib/CodeGen/CGExpr.cpp
@@ -3593,43 +3593,30 @@ static bool canEmitSpuriousReferenceToVariable(CodeGenFunction &CGF,
 /// Handles extracting individual bindings from the captured decomposed
 /// declaration (struct fields, array elements, etc.).
 LValue CodeGenFunction::EmitOMPCapturedBindingLValue(const BindingDecl *BD) {
-  assert(CapturedStmtInfo &&
-         CapturedStmtInfo->getKind() == CapturedRegionKind::CR_OpenMP &&
-         CGM.getLangOpts().OpenMP);
+  assert(CapturedStmtInfo && "Expected to be inside a captured region");
+  assert(CapturedStmtInfo->getKind() == CapturedRegionKind::CR_OpenMP &&
+         "Expected OpenMP captured region");
+  assert(CGM.getLangOpts().OpenMP && "Expected OpenMP to be enabled");
+
   auto *DD = cast<VarDecl>(BD->getDecomposedDecl());
-  QualType AggregType = DD->getType();
-  if (AggregType->isReferenceType())
-    AggregType = AggregType->getPointeeType();
+
+  QualType DREType = DD->getType().getNonReferenceType();
   DeclarationNameInfo NameInfo(DD->getDeclName(), SourceLocation());
   DeclRefExpr *DRE = DeclRefExpr::Create(
       getContext(), NestedNameSpecifierLoc(), SourceLocation(), DD,
-      /*RefersToEnclosingVariableOrCapture=*/true, NameInfo, AggregType,
+      /*RefersToEnclosingVariableOrCapture=*/true, NameInfo, DREType,
       VK_LValue);
-  LValue CapLVal = EmitLValue(DRE);
-  QualType CanonType = AggregType.getCanonicalType();
-  llvm::Type *StructTy = CGM.getTypes().ConvertTypeForMem(CanonType);
-  Address Addr = CapLVal.getAddress();
-  if (Addr.getElementType() != StructTy) {
-    Addr = Addr.withElementType(StructTy);
-    CapLVal = MakeAddrLValue(Addr, CanonType, CapLVal.getBaseInfo(),
-                             CapLVal.getTBAAInfo());
-  }
+  LValue CapLVal = EmitDeclRefLValue(DRE);
+
   // Extract the specific binding from the decomposed object.
   Expr *BindingExpr = BD->getBinding()->IgnoreImplicit();
-  if (auto *ME = dyn_cast<MemberExpr>(BindingExpr)) {
-    // Struct/union: access field.
-    FieldDecl *Field = cast<FieldDecl>(ME->getMemberDecl());
-    return EmitLValueForField(CapLVal, Field);
-  }
-  if (dyn_cast<ArraySubscriptExpr>(BindingExpr)) {
-    // Array binding - access element.
+  if (auto *ME = dyn_cast<MemberExpr>(BindingExpr))
+    return EmitLValueForField(CapLVal, cast<FieldDecl>(ME->getMemberDecl()));
+  if (isa<ArraySubscriptExpr>(BindingExpr))
     return EmitLValue(BD->getBinding(), NotKnownNonNull);
-  }
 
-  // TODO: Tuple bindings (std::tuple, std::pair via tuple protocol)
-  // use hidden temporary variables that aren't captured in OpenMP
-  // regions. Need to re-emit the get<N>() call on the captured tuple
-  // base object.
+  // Sema ensures tuple-like bindings are rejected earlier, so this path should
+  // never be reached.
   llvm_unreachable("Unexpected structured binding type in OpenMP");
 }
 
diff --git a/clang/lib/Sema/SemaStmt.cpp b/clang/lib/Sema/SemaStmt.cpp
index 26852795223be..bffe9e20084c5 100644
--- a/clang/lib/Sema/SemaStmt.cpp
+++ b/clang/lib/Sema/SemaStmt.cpp
@@ -4681,9 +4681,16 @@ buildCapturedStmtCaptureList(Sema &S, CapturedRegionScopeInfo *RSI,
     if (Cap.isVariableCapture()) {
       CapVar = Cap.getVariable();
       if (auto *BD = dyn_cast<BindingDecl>(CapVar)) {
+        if (RSI->CapRegionKind == CR_OpenMP && BD->getHoldingVar()) {
+          S.Diag(Cap.getLocation(), diag::err_capture_tuple_binding_openmp)
+              << CapVar;
+          S.Diag(CapVar->getLocation(), diag::note_entity_declared_at)
+              << CapVar;
+          return true;
+        }
         VarDecl *DD = cast<VarDecl>(BD->getDecomposedDecl());
         if (!CapturedDecomposed.insert(DD).second) {
-          continue; // Skip duplicate.
+          continue; // Skip duplicate.  
         }
         CapVar = DD;
       }
diff --git a/clang/test/OpenMP/structured-bindings-messages.cpp b/clang/test/OpenMP/structured-bindings-messages.cpp
new file mode 100644
index 0000000000000..522405c3f8855
--- /dev/null
+++ b/clang/test/OpenMP/structured-bindings-messages.cpp
@@ -0,0 +1,148 @@
+// RUN: %clang_cc1 -verify -std=c++20 -triple x86_64-pc-linux-gnu -fopenmp \
+// RUN: -fsyntax-only %s 
+
+namespace std {
+  typedef unsigned long size_t;
+
+  // pair
+  template <typename T1, typename T2>
+  struct pair {
+    T1 first;
+    T2 second;
+  };
+
+  template <typename T1, typename T2>
+  pair<T1, T2> make_pair(T1 a, T2 b) {
+    return {a, b};
+  }
+
+  // tuple
+  template <typename... Ts>
+  struct tuple;
+
+  template <typename T, typename... Ts>
+  struct tuple<T, Ts...> {
+    T head;
+    tuple<Ts...> tail;
+  };
+
+  template <>
+  struct tuple<> {};
+
+  template <size_t I, typename T>
+  struct tuple_element;
+
+  template <typename T1, typename T2>
+  struct tuple_element<0, pair<T1, T2>> { using type = T1; };
+
+  template <typename T1, typename T2>
+  struct tuple_element<1, pair<T1, T2>> { using type = T2; };
+
+  template <typename T, typename... Ts>
+  struct tuple_element<0, tuple<T, Ts...>> { using type = T; };
+
+  template <size_t I, typename T, typename... Ts>
+  struct tuple_element<I, tuple<T, Ts...>> {
+    using type = typename tuple_element<I-1, tuple<Ts...>>::type;
+  };
+
+  template <size_t N, typename T>
+  struct tuple_element<0, T[N]> { using type = T; };
+
+  template <typename T>
+  struct tuple_size;
+
+  template <typename T1, typename T2>
+  struct tuple_size<pair<T1, T2>> {
+    static constexpr size_t value = 2;
+  };
+
+  template <typename... Ts>
+  struct tuple_size<tuple<Ts...>> {
+    static constexpr size_t value = sizeof...(Ts);
+  };
+
+  template <typename T, size_t N>
+  struct tuple_size<T[N]> {
+    static constexpr size_t value = N;
+  };
+
+  template <size_t I, typename T1, typename T2>
+  typename tuple_element<I, pair<T1, T2>>::type &
+  get(pair<T1, T2> &p) {
+    if constexpr (I == 0) return p.first;
+    else return p.second;
+  }
+
+  template <size_t I, typename T1, typename T2>
+  typename tuple_element<I, pair<T1, T2>>::type &&
+  get(pair<T1, T2> &&p) {
+    if constexpr (I == 0) return static_cast<T1&&>(p.first);
+    else return static_cast<T2&&>(p.second);
+  }
+
+  template <size_t I, typename T, typename... Ts>
+  auto& get(tuple<T, Ts...> &t) {
+    if constexpr (I == 0) return t.head;
+    else return get<I-1>(t.tail);
+  }
+
+  template <size_t I, typename T, typename... Ts>
+  auto&& get(tuple<T, Ts...> &&t) {
+    if constexpr (I == 0) return static_cast<T&&>(t.head);
+    else return get<I-1>(static_cast<tuple<Ts...>&&>(t.tail));
+  }
+
+  // array
+  template <typename T, size_t N>
+  struct array {
+    T data[N];
+    T& operator[](size_t i) { return data[i]; }
+    const T& operator[](size_t i) const { return data[i]; }
+  };
+
+  template <size_t I, typename T, size_t N>
+  struct tuple_element<I, array<T, N>> { using type = T; };
+
+  template <typename T, size_t N>
+  struct tuple_size<array<T, N>> {
+    static constexpr size_t value = N;
+  };
+
+  template <size_t I, typename T, size_t N>
+  T& get(array<T, N> &a) { return a.data[I]; }
+
+  template <size_t I, typename T, size_t N>
+  T&& get(array<T, N> &&a) { return static_cast<T&&>(a.data[I]); }
+}
+
+void use(int);
+
+void test_pair() {
+  auto [a, b] = std::make_pair(1, 2); // expected-note{{'a' declared here}}
+#pragma omp parallel
+  {
+    // expected-error at +1{{capturing tuple-like structured binding 'a' is not yet supported in OpenMP}}
+    use(a + b);
+  }
+}
+
+void test_tuple() {
+  std::tuple<int, int, int> t = {1, 2, 3};
+  auto [x, y, z] = t; // expected-note{{'x' declared here}}
+#pragma omp parallel
+  {
+    // expected-error at +1{{capturing tuple-like structured binding 'x' is not yet supported in OpenMP}}
+    use(x + y + z);
+  }
+}
+
+void test_array() {
+  std::array<int, 2> arr = {1, 2};
+  auto [p, q] = arr; // expected-note{{'p' declared here}}
+#pragma omp parallel
+  {
+    // expected-error at +1{{capturing tuple-like structured binding 'p' is not yet supported in OpenMP}}
+    use(p + q);
+  }
+}
diff --git a/clang/test/OpenMP/structured-bindings-template-instantiation.cpp b/clang/test/OpenMP/structured-bindings-template-instantiation.cpp
new file mode 100644
index 0000000000000..34cce8adaab60
--- /dev/null
+++ b/clang/test/OpenMP/structured-bindings-template-instantiation.cpp
@@ -0,0 +1,124 @@
+// RUN: %clang_cc1 -verify -std=c++20 -fopenmp -triple x86_64-pc-linux-gnu -emit-llvm -o - %s | FileCheck %s
+
+// RUN: %clang_cc1 -verify -std=c++20 -fopenmp -triple x86_64-pc-linux-gnu -ast-dump %s | FileCheck %s --check-prefix=AST
+
+// Test that template instantiation with structured binding captures in OpenMP
+// works correctly when multiple bindings from the same decomposition are deduped
+// into a single capture.
+
+// expected-no-diagnostics
+
+struct Point {
+  int x, y;
+};
+
+struct Point3D {
+  int x, y, z;
+};
+
+
+// CHECK-LABEL: define {{.*}}@_Z28test_template_single_binding5Point            
+// CHECK: call void {{.*}}@__kmpc_fork_call(ptr {{.*}}, i32 1, ptr {{.*}}, ptr
+template<typename T>
+void test_template_single_binding(T p) {
+  auto [a, b] = p;
+#pragma omp parallel
+  {
+    use(a);  // Only one binding captured                                       
+  }
+}
+
+// Template function capturing two bindings from struct decomposition.
+template<typename T>
+void test_template_two_bindings(T p) {
+  auto [a, b] = p;
+#pragma omp parallel reduction(+:result)
+  {
+    result = a + b;
+  }
+}
+
+// Template function capturing three bindings.
+template<typename T>
+int test_template_three_bindings(T p) {
+  auto [x, y, z] = p;
+
+  int result = 0;
+#pragma omp parallel reduction(+:result)
+  {
+    result = x + y + z;
+  }
+  return result;
+}
+
+// Template with multiple uses of same binding.
+template<typename T>
+int test_template_reuse_bindings(T p) {
+  auto [a, b] = p;
+  int result = 0;
+#pragma omp parallel reduction(+:result)
+  {
+    result = a + b + a * 2 + b * 3;
+  }
+  return result;
+}
+
+// Template with nested OpenMP constructs.
+template<typename T>
+int test_template_nested(T p) {
+  auto [a, b] = p;
+  int result = 0;
+#pragma omp parallel
+  {
+#pragma omp critical
+    {
+      result += a + b;
+    }
+  }
+  return result;
+}
+
+// Template with multiple OpenMP regions capturing same bindings.
+template<typename T>
+void test_template_multiple_regions(T p) {
+  auto [a, b] = p;
+  int result1 = 0, result2 = 0;
+#pragma omp parallel reduction(+:result1)
+  {
+    result1 = a;
+  }
+}
+
+void instantiate_tests() {
+  Point p2{1, 2};
+  Point3D p3{1, 2, 3};
+
+  test_template_two_bindings(p2);
+  test_template_two_bindings(Point{5, 6});
+
+  test_template_three_bindings(p3);
+  test_template_three_bindings(Point3D{7, 8, 9});
+
+  test_template_reuse_bindings(p2);
+  test_template_nested(p2);
+  test_template_multiple_regions(p2);
+}
+
+typedef unsigned int size_t;
+
+// Test with array bindings.
+template<typename T, size_t N>
+int test_template_array(T (&arr)[N]) {
+  auto [a, b] = arr;
+  int result = 0;
+#pragma omp parallel reduction(+:result)
+  {
+    result = a + b;
+  }
+  return result;
+}
+
+void test_array_instantiation() {
+  int arr2[2] = {1, 2};
+  test_template_array(arr2);
+}

>From e5ed11de45f55bbcb8507b33406a4c0322dbcc25 Mon Sep 17 00:00:00 2001
From: Ammarguellat <zahira.ammarguellat at intel.com>
Date: Tue, 12 May 2026 14:16:35 -0700
Subject: [PATCH 17/45] Refactored EmitOMPCapturedBindingLValue

---
 clang/lib/CodeGen/CGExpr.cpp | 31 +++++++++++++++++--------------
 1 file changed, 17 insertions(+), 14 deletions(-)

diff --git a/clang/lib/CodeGen/CGExpr.cpp b/clang/lib/CodeGen/CGExpr.cpp
index 2f2d2fdda06e8..da9b57e5d1480 100644
--- a/clang/lib/CodeGen/CGExpr.cpp
+++ b/clang/lib/CodeGen/CGExpr.cpp
@@ -3599,24 +3599,27 @@ LValue CodeGenFunction::EmitOMPCapturedBindingLValue(const BindingDecl *BD) {
   assert(CGM.getLangOpts().OpenMP && "Expected OpenMP to be enabled");
 
   auto *DD = cast<VarDecl>(BD->getDecomposedDecl());
-
-  QualType DREType = DD->getType().getNonReferenceType();
-  DeclarationNameInfo NameInfo(DD->getDeclName(), SourceLocation());
-  DeclRefExpr *DRE = DeclRefExpr::Create(
-      getContext(), NestedNameSpecifierLoc(), SourceLocation(), DD,
-      /*RefersToEnclosingVariableOrCapture=*/true, NameInfo, DREType,
-      VK_LValue);
-  LValue CapLVal = EmitDeclRefLValue(DRE);
-
-  // Extract the specific binding from the decomposed object.
   Expr *BindingExpr = BD->getBinding()->IgnoreImplicit();
-  if (auto *ME = dyn_cast<MemberExpr>(BindingExpr))
-    return EmitLValueForField(CapLVal, cast<FieldDecl>(ME->getMemberDecl()));
+
   if (isa<ArraySubscriptExpr>(BindingExpr))
     return EmitLValue(BD->getBinding(), NotKnownNonNull);
 
-  // Sema ensures tuple-like bindings are rejected earlier, so this path should
-  // never be reached.
+  if (auto *ME = dyn_cast<MemberExpr>(BindingExpr)) {
+    // DeclRefExpr type must be the non-reference type — EmitDeclRefLValue
+    // checks VD->getType()->isReferenceType() and calls
+    // EmitLoadOfReferenceLValue automatically.
+    QualType DREType = DD->getType().getNonReferenceType();
+    DeclarationNameInfo NameInfo(DD->getDeclName(), SourceLocation());
+    DeclRefExpr *DRE = DeclRefExpr::Create(
+        getContext(), NestedNameSpecifierLoc(), SourceLocation(), DD,
+        /*RefersToEnclosingVariableOrCapture=*/true, NameInfo, DREType,
+        VK_LValue);
+    LValue CapLVal = EmitDeclRefLValue(DRE);
+    return EmitLValueForField(CapLVal, cast<FieldDecl>(ME->getMemberDecl()));
+  }
+
+  // Sema ensures tuple-like bindings are rejected earlier, so this path
+  // should never be reached.
   llvm_unreachable("Unexpected structured binding type in OpenMP");
 }
 

>From e41682dc8ff4cd24e954284b76b937938f3c6205 Mon Sep 17 00:00:00 2001
From: Ammarguellat <zahira.ammarguellat at intel.com>
Date: Tue, 12 May 2026 14:24:09 -0700
Subject: [PATCH 18/45] Remove dup comment

---
 clang/lib/CodeGen/CGExpr.cpp | 1 -
 1 file changed, 1 deletion(-)

diff --git a/clang/lib/CodeGen/CGExpr.cpp b/clang/lib/CodeGen/CGExpr.cpp
index da9b57e5d1480..9d5b95a0b1992 100644
--- a/clang/lib/CodeGen/CGExpr.cpp
+++ b/clang/lib/CodeGen/CGExpr.cpp
@@ -3809,7 +3809,6 @@ LValue CodeGenFunction::EmitDeclRefLValue(const DeclRefExpr *E) {
       if (CapturedStmtInfo &&
           CapturedStmtInfo->getKind() == CapturedRegionKind::CR_OpenMP &&
           CGM.getLangOpts().OpenMP) {
-        // OpenMP case: binding was captured via its decomposed decl.
         return EmitOMPCapturedBindingLValue(BD);
       }
       // Non-OpenMP case: lambda capture.

>From 67d75ac3f748719bde5cb7359b977dd892cbaa57 Mon Sep 17 00:00:00 2001
From: Ammarguellat <zahira.ammarguellat at intel.com>
Date: Wed, 13 May 2026 13:11:38 -0700
Subject: [PATCH 19/45] Addressed BLockers 3 and 4.

---
 clang/include/clang/Sema/Sema.h               |   3 +-
 clang/lib/Sema/SemaExpr.cpp                   |  20 ++--
 clang/lib/Sema/SemaLambda.cpp                 |  21 +++-
 clang/lib/Sema/SemaStmt.cpp                   |  24 ++--
 ...ctured-bindings-template-instantiation.cpp | 113 ++++++++++++------
 5 files changed, 114 insertions(+), 67 deletions(-)

diff --git a/clang/include/clang/Sema/Sema.h b/clang/include/clang/Sema/Sema.h
index f9bf3e4de0a5e..b46de44eac403 100644
--- a/clang/include/clang/Sema/Sema.h
+++ b/clang/include/clang/Sema/Sema.h
@@ -9274,7 +9274,8 @@ class Sema final : public SemaBase {
                                    const sema::Capture &From);
 
   /// Build a FieldDecl suitable to hold the given capture.
-  FieldDecl *BuildCaptureField(RecordDecl *RD, const sema::Capture &Capture);
+  FieldDecl *BuildCaptureField(RecordDecl *RD, const sema::Capture &Capture,
+                               bool IsOpenMP = false);
 
   /// Initialize the given capture with a suitable expression.
   ExprResult BuildCaptureInit(const sema::Capture &Capture,
diff --git a/clang/lib/Sema/SemaExpr.cpp b/clang/lib/Sema/SemaExpr.cpp
index 26988dbe404d1..ff0ea2818f832 100644
--- a/clang/lib/Sema/SemaExpr.cpp
+++ b/clang/lib/Sema/SemaExpr.cpp
@@ -19380,18 +19380,16 @@ static bool isVariableCapturable(CapturingScopeInfo *CSI, ValueDecl *Var,
   }
 
   if (isa<BindingDecl>(Var)) {
-    if (Var->getDeclName() && !Var->isImplicit()) {
-      if (auto *RSI = dyn_cast<CapturedRegionScopeInfo>(CSI)) {
-        if (RSI->CapRegionKind == CR_OpenMP) {
-          if (Diagnose && S.getLangOpts().CPlusPlus) {
-            S.Diag(Loc, S.LangOpts.CPlusPlus20
-                            ? diag::warn_cxx17_compat_capture_binding
-                            : diag::ext_capture_binding)
-                << Var;
-            S.Diag(Var->getLocation(), diag::note_entity_declared_at) << Var;
-          }
-          return true;
+    if (auto *RSI = dyn_cast<CapturedRegionScopeInfo>(CSI)) {
+      if (RSI->CapRegionKind == CR_OpenMP) {
+        if (Diagnose && S.getLangOpts().CPlusPlus) {
+          S.Diag(Loc, S.LangOpts.CPlusPlus20
+                          ? diag::warn_cxx17_compat_capture_binding
+                          : diag::ext_capture_binding)
+              << Var;
+          S.Diag(Var->getLocation(), diag::note_entity_declared_at) << Var;
         }
+        return true;
       }
     }
     if (!IsLambda || !S.getLangOpts().CPlusPlus) {
diff --git a/clang/lib/Sema/SemaLambda.cpp b/clang/lib/Sema/SemaLambda.cpp
index 754d0918f79da..892075e7e019d 100644
--- a/clang/lib/Sema/SemaLambda.cpp
+++ b/clang/lib/Sema/SemaLambda.cpp
@@ -1972,9 +1972,9 @@ ExprResult Sema::BuildCaptureInit(const Capture &Cap,
     ValueDecl *Var = Cap.getVariable();
     // For OpenMP structured bindings, capture the decomposed decl, not the
     // binding.
-    if (IsOpenMPMapping && isa<BindingDecl>(Var)) {
+    auto *BD = dyn_cast<BindingDecl>(Var);
+    if (IsOpenMPMapping && BD)
       Var = cast<BindingDecl>(Var)->getDecomposedDecl();
-    }
     Name = Var->getIdentifier();
     Init = BuildDeclarationNameExpr(
         CXXScopeSpec(), DeclarationNameInfo(Var->getDeclName(), Loc), Var);
@@ -2083,14 +2083,23 @@ bool Sema::DiagnoseUnusedLambdaCapture(SourceRange CaptureRange,
 
 /// Create a field within the lambda class or captured statement record for the
 /// given capture.
-FieldDecl *Sema::BuildCaptureField(RecordDecl *RD,
-                                   const sema::Capture &Capture) {
+FieldDecl *Sema::BuildCaptureField(RecordDecl *RD, const sema::Capture &Capture,
+                                   bool isOpenMP) {
   SourceLocation Loc = Capture.getLocation();
   QualType FieldType = Capture.getCaptureType();
-
   TypeSourceInfo *TSI = nullptr;
   if (Capture.isVariableCapture()) {
-    const auto *Var = dyn_cast_or_null<VarDecl>(Capture.getVariable());
+    const VarDecl *Var = nullptr;
+    if (isOpenMP) {
+      if (auto *BD = dyn_cast_or_null<BindingDecl>(Capture.getVariable())) {
+        assert(Capture.isReferenceCapture() &&
+               "OpenMP structured binding capture must be by reference");
+        Var = cast<VarDecl>(BD->getDecomposedDecl());
+        FieldType = Context.getLValueReferenceType(Var->getType());
+      }
+    }
+    if (!Var)
+      Var = dyn_cast_or_null<VarDecl>(Capture.getVariable());
     if (Var && Var->isInitCapture())
       TSI = Var->getTypeSourceInfo();
   }
diff --git a/clang/lib/Sema/SemaStmt.cpp b/clang/lib/Sema/SemaStmt.cpp
index bffe9e20084c5..5dc4863a87cc5 100644
--- a/clang/lib/Sema/SemaStmt.cpp
+++ b/clang/lib/Sema/SemaStmt.cpp
@@ -4710,18 +4710,12 @@ buildCapturedStmtCaptureList(Sema &S, CapturedRegionScopeInfo *RSI,
     FieldDecl *Field = nullptr;
     if (RSI->CapRegionKind == CR_OpenMP && CapVar &&
         CapVar != Cap.getVariable() && isa<DecompositionDecl>(CapVar)) {
-      // Manually build a reference field with the DecompositionDecl's type.
-      QualType DDType = cast<VarDecl>(CapVar)->getType();
-      QualType RefType = S.Context.getLValueReferenceType(DDType);
-      TypeSourceInfo *TSI =
-          S.Context.getTrivialTypeSourceInfo(RefType, Cap.getLocation());
-      Field = FieldDecl::Create(S.Context, RSI->TheRecordDecl,
-                                Cap.getLocation(), Cap.getLocation(),
-                                /*Id=*/nullptr, RefType, TSI,
-                                /*BW=*/nullptr, /*Mutable=*/false, ICIS_NoInit);
-      Field->setImplicit(true);
-      Field->setAccess(AS_private);
-      RSI->TheRecordDecl->addDecl(Field);
+      assert(isa<BindingDecl>(Cap.getVariable()) &&
+             cast<BindingDecl>(Cap.getVariable())->getDecomposedDecl() ==
+                 CapVar &&
+             "OpenMP capture redirection should only happen for BindingDecl -> "
+             "DecompositionDecl");
+      Field = S.BuildCaptureField(RSI->TheRecordDecl, Cap, true);
     } else {
       Field = S.BuildCaptureField(RSI->TheRecordDecl, Cap);
     }
@@ -4736,8 +4730,10 @@ buildCapturedStmtCaptureList(Sema &S, CapturedRegionScopeInfo *RSI,
     } else {
       assert(Cap.isVariableCapture() && "unknown kind of capture");
 
-      if (S.getLangOpts().OpenMP && RSI->CapRegionKind == CR_OpenMP)
-        S.OpenMP().setOpenMPCaptureKind(Field, CapVar, RSI->OpenMPLevel);
+      if (S.getLangOpts().OpenMP && RSI->CapRegionKind == CR_OpenMP) {
+        ValueDecl *DSAVar = Cap.getVariable();
+        S.OpenMP().setOpenMPCaptureKind(Field, DSAVar, RSI->OpenMPLevel);
+      }
       Captures.emplace_back(Cap.getLocation(),
                             Cap.isReferenceCapture() ? CapturedStmt::VCK_ByRef
                                                      : CapturedStmt::VCK_ByCopy,
diff --git a/clang/test/OpenMP/structured-bindings-template-instantiation.cpp b/clang/test/OpenMP/structured-bindings-template-instantiation.cpp
index 34cce8adaab60..6f0185b6eb583 100644
--- a/clang/test/OpenMP/structured-bindings-template-instantiation.cpp
+++ b/clang/test/OpenMP/structured-bindings-template-instantiation.cpp
@@ -1,13 +1,17 @@
-// RUN: %clang_cc1 -verify -std=c++20 -fopenmp -triple x86_64-pc-linux-gnu -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=51 -x c++ -std=c++20 \
+// RUN: -triple x86_64-unknown-unknown -emit-llvm %s -o - | FileCheck %s
 
-// RUN: %clang_cc1 -verify -std=c++20 -fopenmp -triple x86_64-pc-linux-gnu -ast-dump %s | FileCheck %s --check-prefix=AST
-
-// Test that template instantiation with structured binding captures in OpenMP
-// works correctly when multiple bindings from the same decomposition are deduped
-// into a single capture.
+// RUN: %clang_cc1 -verify -std=c++20 -fopenmp -triple x86_64-pc-linux-gnu \
+// RUN: -ast-dump %s | FileCheck %s --check-prefix=AST
 
 // expected-no-diagnostics
 
+// Test template instantiation with structured bindings in OpenMP regions.
+// This verifies that skipping duplicate captures (when both bindings from
+// the same decomposition are used) doesn't break template instantiation.
+
+void use(int);
+
 struct Point {
   int x, y;
 };
@@ -16,29 +20,49 @@ struct Point3D {
   int x, y, z;
 };
 
-
-// CHECK-LABEL: define {{.*}}@_Z28test_template_single_binding5Point            
-// CHECK: call void {{.*}}@__kmpc_fork_call(ptr {{.*}}, i32 1, ptr {{.*}}, ptr
+// CHECK-LABEL: define {{.*}} @_Z28test_template_single_bindingI5PointEvT_(
+// CHECK: call void {{.*}}@__kmpc_fork_call(ptr @{{[0-9]+}}, i32 1, ptr @{{.*}}.omp_outlined, ptr
+//
+// AST: FunctionDecl {{.*}} test_template_single_binding 'void (Point)' implicit_instantiation
+// AST: DecompositionDecl {{.*}} used 'Point'
+// AST: OMPParallelDirective
+// AST-NEXT: CapturedStmt
+// AST: DeclRefExpr {{.*}} 'Point' lvalue Decomposition {{.*}} first_binding 'a' 'Point'
 template<typename T>
 void test_template_single_binding(T p) {
   auto [a, b] = p;
 #pragma omp parallel
   {
-    use(a);  // Only one binding captured                                       
+    use(a);
   }
 }
 
-// Template function capturing two bindings from struct decomposition.
+// CHECK-LABEL: define {{.*}}@_Z26test_template_two_bindingsI5PointEvT_
+// CHECK: call void {{.*}}@__kmpc_fork_call(ptr {{.*}}, i32 2, ptr {{.*}}, ptr
+//
+// AST: FunctionDecl {{.*}} test_template_two_bindings 'void (Point)' implicit_instantiation
+// AST: DecompositionDecl {{.*}} used 'Point'
+// AST: OMPParallelDirective
+// AST: CapturedStmt
+// AST: DeclRefExpr {{.*}} 'Point' lvalue Decomposition {{.*}} first_binding 'a' 'Point'
 template<typename T>
 void test_template_two_bindings(T p) {
   auto [a, b] = p;
+  int result = 0;
 #pragma omp parallel reduction(+:result)
   {
     result = a + b;
   }
 }
 
-// Template function capturing three bindings.
+// CHECK-LABEL: define {{.*}}@_Z28test_template_three_bindingsI7Point3DEiT_
+// CHECK: call void {{.*}}@__kmpc_fork_call(ptr {{.*}}, i32 2, ptr {{.*}}, ptr
+//
+// AST: FunctionDecl {{.*}} test_template_three_bindings 'int (Point3D)' implicit_instantiation
+// AST: DecompositionDecl {{.*}} used 'Point3D'
+// AST: OMPParallelDirective
+// AST: CapturedStmt
+// AST: DeclRefExpr {{.*}} 'Point3D' lvalue Decomposition
 template<typename T>
 int test_template_three_bindings(T p) {
   auto [x, y, z] = p;
@@ -51,7 +75,13 @@ int test_template_three_bindings(T p) {
   return result;
 }
 
-// Template with multiple uses of same binding.
+// CHECK-LABEL: define {{.*}}@_Z28test_template_reuse_bindingsI5PointEiT_
+// CHECK: call void {{.*}}@__kmpc_fork_call(ptr {{.*}}, i32 2, ptr {{.*}}, ptr
+//
+// AST: FunctionDecl {{.*}} test_template_reuse_bindings 'int (Point)' implicit_instantiation
+// AST: DecompositionDecl {{.*}} used 'Point'
+// AST: CapturedStmt
+// AST: DeclRefExpr {{.*}} 'Point' lvalue Decomposition
 template<typename T>
 int test_template_reuse_bindings(T p) {
   auto [a, b] = p;
@@ -63,7 +93,14 @@ int test_template_reuse_bindings(T p) {
   return result;
 }
 
-// Template with nested OpenMP constructs.
+// CHECK-LABEL: define {{.*}}@_Z20test_template_nestedI5PointEiT_
+// CHECK: call void {{.*}}@__kmpc_fork_call(ptr {{.*}}, i32 2, ptr {{.*}}, ptr
+//
+// AST: FunctionDecl {{.*}} test_template_nested 'int (Point)' implicit_instantiation
+// AST: DecompositionDecl {{.*}} used 'Point'
+// AST: OMPParallelDirective
+// AST: CapturedStmt
+// AST: DeclRefExpr {{.*}} 'Point' lvalue Decomposition
 template<typename T>
 int test_template_nested(T p) {
   auto [a, b] = p;
@@ -78,7 +115,14 @@ int test_template_nested(T p) {
   return result;
 }
 
-// Template with multiple OpenMP regions capturing same bindings.
+// CHECK-LABEL: define {{.*}}@_Z30test_template_multiple_regionsI5PointEvT_
+// CHECK: call void {{.*}}@__kmpc_fork_call(ptr {{.*}}, i32 2, ptr {{.*}}, ptr
+//
+// AST: FunctionDecl {{.*}} test_template_multiple_regions 'void (Point)' implicit_instantiation
+// AST: DecompositionDecl {{.*}} used 'Point'
+// AST: OMPParallelDirective
+// AST: CapturedStmt
+// AST: DeclRefExpr {{.*}} 'Point' lvalue Decomposition
 template<typename T>
 void test_template_multiple_regions(T p) {
   auto [a, b] = p;
@@ -89,24 +133,15 @@ void test_template_multiple_regions(T p) {
   }
 }
 
-void instantiate_tests() {
-  Point p2{1, 2};
-  Point3D p3{1, 2, 3};
-
-  test_template_two_bindings(p2);
-  test_template_two_bindings(Point{5, 6});
-
-  test_template_three_bindings(p3);
-  test_template_three_bindings(Point3D{7, 8, 9});
-
-  test_template_reuse_bindings(p2);
-  test_template_nested(p2);
-  test_template_multiple_regions(p2);
-}
-
 typedef unsigned int size_t;
-
-// Test with array bindings.
+// CHECK-LABEL: define {{.*}}@_Z19test_template_arrayIiLj2EEiRAT0__T_
+// CHECK: call void {{.*}}@__kmpc_fork_call(ptr {{.*}}, i32 2, ptr {{.*}}, ptr
+//
+// AST: FunctionDecl {{.*}} test_template_array 'int (int (&)[2])' implicit_instantiation
+// AST: DecompositionDecl {{.*}} used 'int[2]'
+// AST: OMPParallelDirective
+// AST: CapturedStmt
+// AST: DeclRefExpr {{.*}} 'int[2]' lvalue Decomposition
 template<typename T, size_t N>
 int test_template_array(T (&arr)[N]) {
   auto [a, b] = arr;
@@ -118,7 +153,15 @@ int test_template_array(T (&arr)[N]) {
   return result;
 }
 
-void test_array_instantiation() {
-  int arr2[2] = {1, 2};
-  test_template_array(arr2);
+void instantiate_tests() {
+  Point p1{1, 2};
+  Point3D p2{1, 2, 3};
+  int arr[2] = {1, 2};
+  test_template_single_binding(p1);
+  test_template_two_bindings(p1);
+  test_template_three_bindings(p2);
+  test_template_reuse_bindings(p1);
+  test_template_nested(p1);
+  test_template_multiple_regions(p1);
+  test_template_array(arr);
 }

>From 8babc6db1dd7351c5621ce7e5deea05d03eb7b8b Mon Sep 17 00:00:00 2001
From: Ammarguellat <zahira.ammarguellat at intel.com>
Date: Wed, 13 May 2026 13:25:50 -0700
Subject: [PATCH 20/45] Fix format

---
 clang/lib/Sema/SemaStmt.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/clang/lib/Sema/SemaStmt.cpp b/clang/lib/Sema/SemaStmt.cpp
index 5dc4863a87cc5..84574ca46117e 100644
--- a/clang/lib/Sema/SemaStmt.cpp
+++ b/clang/lib/Sema/SemaStmt.cpp
@@ -4690,7 +4690,7 @@ buildCapturedStmtCaptureList(Sema &S, CapturedRegionScopeInfo *RSI,
         }
         VarDecl *DD = cast<VarDecl>(BD->getDecomposedDecl());
         if (!CapturedDecomposed.insert(DD).second) {
-          continue; // Skip duplicate.  
+          continue; // Skip duplicate.
         }
         CapVar = DD;
       }

>From 58e90d177a380d9b8b39adec8193ec3504af3e33 Mon Sep 17 00:00:00 2001
From: Ammarguellat <zahira.ammarguellat at intel.com>
Date: Fri, 15 May 2026 09:07:16 -0700
Subject: [PATCH 21/45] Added a comprehensive test and fixed a few crashes

---
 clang/lib/CodeGen/CGExpr.cpp                  |   41 +-
 clang/lib/Sema/SemaExpr.cpp                   |    7 +-
 clang/lib/Sema/SemaLambda.cpp                 |    6 +-
 clang/lib/Sema/SemaOpenMP.cpp                 |    4 +
 clang/lib/Sema/SemaStmt.cpp                   |   17 +-
 .../OpenMP/structured-bindings-codegen.cpp    | 2101 +++++++++++++++--
 .../OpenMP/structured-bindings-messages.cpp   |   23 +-
 7 files changed, 1908 insertions(+), 291 deletions(-)

diff --git a/clang/lib/CodeGen/CGExpr.cpp b/clang/lib/CodeGen/CGExpr.cpp
index b992abf8e59c8..692b1744cc684 100644
--- a/clang/lib/CodeGen/CGExpr.cpp
+++ b/clang/lib/CodeGen/CGExpr.cpp
@@ -3600,22 +3600,35 @@ LValue CodeGenFunction::EmitOMPCapturedBindingLValue(const BindingDecl *BD) {
 
   auto *DD = cast<VarDecl>(BD->getDecomposedDecl());
   Expr *BindingExpr = BD->getBinding()->IgnoreImplicit();
-
-  if (isa<ArraySubscriptExpr>(BindingExpr))
-    return EmitLValue(BD->getBinding(), NotKnownNonNull);
+  QualType DREType = DD->getType().getNonReferenceType();
+  DeclarationNameInfo NameInfo(DD->getDeclName(), SourceLocation());
+  DeclRefExpr *DRE = DeclRefExpr::Create(
+      getContext(), NestedNameSpecifierLoc(), SourceLocation(), DD,
+      /*RefersToEnclosingVariableOrCapture=*/true, NameInfo, DREType,
+      VK_LValue);
+  LValue BaseLVal = EmitDeclRefLValue(DRE);
+  QualType CanonType = DREType.getCanonicalType();
+  Address Addr = BaseLVal.getAddress();
+  llvm::Type *ExpectedTy = CGM.getTypes().ConvertTypeForMem(CanonType);
+  if (auto *ASE = dyn_cast<ArraySubscriptExpr>(BindingExpr)) {
+    if (Addr.getElementType() != ExpectedTy)
+      Addr = Addr.withElementType(ExpectedTy);
+
+    Expr::EvalResult Result;
+    ASE->getIdx()->EvaluateAsInt(Result, getContext());
+    uint64_t Idx = Result.Val.getInt().getZExtValue();
+    Address EltAddr = Builder.CreateConstArrayGEP(Addr, Idx);
+    return MakeAddrLValue(EltAddr, BD->getType(), BaseLVal.getBaseInfo(),
+                          CGM.getTBAAInfoForSubobject(BaseLVal, BD->getType()));
+  }
 
   if (auto *ME = dyn_cast<MemberExpr>(BindingExpr)) {
-    // DeclRefExpr type must be the non-reference type — EmitDeclRefLValue
-    // checks VD->getType()->isReferenceType() and calls
-    // EmitLoadOfReferenceLValue automatically.
-    QualType DREType = DD->getType().getNonReferenceType();
-    DeclarationNameInfo NameInfo(DD->getDeclName(), SourceLocation());
-    DeclRefExpr *DRE = DeclRefExpr::Create(
-        getContext(), NestedNameSpecifierLoc(), SourceLocation(), DD,
-        /*RefersToEnclosingVariableOrCapture=*/true, NameInfo, DREType,
-        VK_LValue);
-    LValue CapLVal = EmitDeclRefLValue(DRE);
-    return EmitLValueForField(CapLVal, cast<FieldDecl>(ME->getMemberDecl()));
+    if (Addr.getElementType() != ExpectedTy) {
+      Addr = Addr.withElementType(ExpectedTy);
+      BaseLVal = MakeAddrLValue(Addr, CanonType, BaseLVal.getBaseInfo(),
+                                BaseLVal.getTBAAInfo());
+    }
+    return EmitLValueForField(BaseLVal, cast<FieldDecl>(ME->getMemberDecl()));
   }
 
   // Sema ensures tuple-like bindings are rejected earlier, so this path
diff --git a/clang/lib/Sema/SemaExpr.cpp b/clang/lib/Sema/SemaExpr.cpp
index f1fc94d61b6c6..ae17f6e778b54 100644
--- a/clang/lib/Sema/SemaExpr.cpp
+++ b/clang/lib/Sema/SemaExpr.cpp
@@ -19504,8 +19504,10 @@ static bool captureInCapturedRegion(
   if (IsTopScope && Kind != TryCaptureKind::Implicit) {
     ByRef = (Kind == TryCaptureKind::ExplicitByRef);
   } else if (S.getLangOpts().OpenMP && RSI->CapRegionKind == CR_OpenMP) {
+    bool IsBindingDecl = isa<BindingDecl>(Var);
     // Using an LValue reference type is consistent with Lambdas (see below).
-    if (S.OpenMP().isOpenMPCapturedDecl(Var)) {
+    if (VarDecl *VD = S.OpenMP().isOpenMPCapturedDecl(Var)) {
+      Var = VD; // Capture the DecompositionDecl.
       bool HasConst = DeclRefType.isConstQualified();
       DeclRefType = DeclRefType.getUnqualifiedType();
       // Don't lose diagnostics about assignments to const.
@@ -19513,7 +19515,8 @@ static bool captureInCapturedRegion(
         DeclRefType.addConst();
     }
     // Do not capture firstprivates in tasks.
-    if (S.OpenMP().isOpenMPPrivateDecl(Var, RSI->OpenMPLevel,
+    if (!IsBindingDecl &&
+        S.OpenMP().isOpenMPPrivateDecl(Var, RSI->OpenMPLevel,
                                        RSI->OpenMPCaptureLevel) != OMPC_unknown)
       return true;
     ByRef = S.OpenMP().isOpenMPCapturedByRef(Var, RSI->OpenMPLevel,
diff --git a/clang/lib/Sema/SemaLambda.cpp b/clang/lib/Sema/SemaLambda.cpp
index 892075e7e019d..59e5a7ae9d5c3 100644
--- a/clang/lib/Sema/SemaLambda.cpp
+++ b/clang/lib/Sema/SemaLambda.cpp
@@ -2092,10 +2092,10 @@ FieldDecl *Sema::BuildCaptureField(RecordDecl *RD, const sema::Capture &Capture,
     const VarDecl *Var = nullptr;
     if (isOpenMP) {
       if (auto *BD = dyn_cast_or_null<BindingDecl>(Capture.getVariable())) {
-        assert(Capture.isReferenceCapture() &&
-               "OpenMP structured binding capture must be by reference");
         Var = cast<VarDecl>(BD->getDecomposedDecl());
-        FieldType = Context.getLValueReferenceType(Var->getType());
+        FieldType = Var->getType();
+        if (Capture.isReferenceCapture())
+          FieldType = Context.getLValueReferenceType(Var->getType());
       }
     }
     if (!Var)
diff --git a/clang/lib/Sema/SemaOpenMP.cpp b/clang/lib/Sema/SemaOpenMP.cpp
index d6f6bc919a31b..49868d853cba1 100644
--- a/clang/lib/Sema/SemaOpenMP.cpp
+++ b/clang/lib/Sema/SemaOpenMP.cpp
@@ -2426,6 +2426,10 @@ VarDecl *SemaOpenMP::isOpenMPCapturedDecl(ValueDecl *D, bool CheckScopeInfo,
   assert(getLangOpts().OpenMP && "OpenMP is not allowed");
   D = getCanonicalDecl(D);
 
+   if (auto *BD = dyn_cast<BindingDecl>(D)) {
+    if (!BD->getHoldingVar())
+      D = cast<VarDecl>(BD->getDecomposedDecl());
+  }
   auto *VD = dyn_cast<VarDecl>(D);
   // Do not capture constexpr variables.
   if (VD && VD->isConstexpr())
diff --git a/clang/lib/Sema/SemaStmt.cpp b/clang/lib/Sema/SemaStmt.cpp
index 84574ca46117e..34bfc4ad7c505 100644
--- a/clang/lib/Sema/SemaStmt.cpp
+++ b/clang/lib/Sema/SemaStmt.cpp
@@ -4672,6 +4672,7 @@ static bool
 buildCapturedStmtCaptureList(Sema &S, CapturedRegionScopeInfo *RSI,
                              SmallVectorImpl<CapturedStmt::Capture> &Captures,
                              SmallVectorImpl<Expr *> &CaptureInits) {
+  bool HasError = false; // Track if any errors occurred.
   llvm::SmallPtrSet<VarDecl *, 4> CapturedDecomposed;
   for (const sema::Capture &Cap : RSI->Captures) {
     if (Cap.isInvalid())
@@ -4686,13 +4687,17 @@ buildCapturedStmtCaptureList(Sema &S, CapturedRegionScopeInfo *RSI,
               << CapVar;
           S.Diag(CapVar->getLocation(), diag::note_entity_declared_at)
               << CapVar;
-          return true;
+          HasError = true; // Mark error but continue.
+          continue;        // Skip this capture, move to next.
         }
-        VarDecl *DD = cast<VarDecl>(BD->getDecomposedDecl());
-        if (!CapturedDecomposed.insert(DD).second) {
-          continue; // Skip duplicate.
+        CapVar = cast<VarDecl>(BD->getDecomposedDecl());
+      }
+      if (RSI->CapRegionKind == CR_OpenMP) {
+        if (auto *DD = dyn_cast<DecompositionDecl>(CapVar)) {
+          if (!CapturedDecomposed.insert(DD).second) {
+            continue; // Skip duplicate
+          }
         }
-        CapVar = DD;
       }
     }
 
@@ -4741,7 +4746,7 @@ buildCapturedStmtCaptureList(Sema &S, CapturedRegionScopeInfo *RSI,
     }
     CaptureInits.push_back(Init.get());
   }
-  return false;
+  return HasError;
 }
 
 static std::optional<int>
diff --git a/clang/test/OpenMP/structured-bindings-codegen.cpp b/clang/test/OpenMP/structured-bindings-codegen.cpp
index b36d3847dd626..3b89ac3f43c9d 100644
--- a/clang/test/OpenMP/structured-bindings-codegen.cpp
+++ b/clang/test/OpenMP/structured-bindings-codegen.cpp
@@ -1,274 +1,521 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --check-globals all --filter-out-after "getelem.*kernel" --filter-out "= alloca.*" --include-generated-funcs --replace-value-regex "__omp_offloading_[0-9a-z]+_[0-9a-z]+" "reduction_size[.].+[.]" "pl_cond[.].+[.|,]" --prefix-filecheck-ir-name _ --global-value-regex "\.offload_.*" --global-hex-value-regex ".offload_maptypes.*" --version 6
-// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=51 -x c++ -std=c++20 \
-// RUN: -triple x86_64-unknown-unknown -emit-llvm %s -o - | FileCheck %s
+// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=51 -std=c++20 -triple x86_64-unknown-unknown -emit-llvm %s -o - | FileCheck %s
+
+// RUN: %clang_cc1 -fopenmp -fopenmp-version=51 -std=c++20 -triple x86_64-unknown-unknown -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp -fopenmp-version=51 -std=c++20 -triple x86_64-unknown-unknown -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s
 
 // expected-no-diagnostics
 
-void use(int);
+#ifndef HEADER
+#define HEADER
+
+struct Point { int x, y; };
+struct Point3D { int x, y, z; };
+
+void test_target_explicit_map() {
+  Point p{1, 2};
+  auto [a, b] = p;
+
+#pragma omp target map(tofrom: p)
+  {
+    a = a + 1;
+    b = b + 1;
+  }
+}
+
+void test_target_implicit_map() {
+  Point p{1, 2};
+  auto [a, b] = p;
+
+#pragma omp target
+  {
+    int sum = a + b;
+  }
+}
+
+void test_target_parallel() {
+  Point p{3, 4};
+  auto [a, b] = p;
+
+#pragma omp target parallel
+  {
+    int sum = a + b;
+  }
+}
+
+void test_target_parallel_for() {
+  Point p{5, 6};
+  auto [a, b] = p;
+
+#pragma omp target parallel for
+  for (int i = 0; i < 10; i++) {
+    int result = a + b + i;
+  }
+}
+
+void test_firstprivate_dsa() {
+  Point p{7, 8};
+  auto [a, b] = p;
+
+#pragma omp parallel firstprivate(p)
+  {
+    int sum = a + b;
+  }
+}
+
+void test_shared_dsa() {
+  Point p{9, 10};
+  auto [a, b] = p;
+
+#pragma omp parallel shared(p)
+  {
+    int sum = a + b;
+  }
+}
+
+void test_reduction_dsa() {
+  Point p{1, 2};
+  auto [a, b] = p;
+  int sum = 0;
+
+#pragma omp parallel for reduction(+:sum)
+  for (int i = 0; i < 10; i++) {
+    sum += a + b;
+  }
+}
+
+void test_parallel_for_() {
+  Point p{11, 12};
+  auto [a, b] = p;
+
+#pragma omp parallel for
+  for (int i = 0; i < 10; i++) {
+    int result = a + b + i;
+  }
+}
+
+void test_parallel_for_simd_() {
+  Point p{13, 14};
+  auto [a, b] = p;
+
+#pragma omp parallel for simd
+  for (int i = 0; i < 10; i++) {
+    int result = a + b + i;
+  }
+}
+
+void test_target_teams_distribute() {
+  Point p{15, 16};
+  auto [a, b] = p;
+
+#pragma omp target teams distribute
+  for (int i = 0; i < 10; i++) {
+    int result = a + b + i;
+  }
+}
+
+void test_task() {
+  Point p{17, 18};
+  auto [a, b] = p;
+
+#pragma omp task
+  {
+    int sum = a + b;
+  }
+}
+
+void test_task_depend() {
+  Point p{19, 20};
+  auto [a, b] = p;
+
+#pragma omp task depend(in: a, b)
+  {
+    int sum = a + b;
+  }
+}
+
+void test_taskloop_() {
+  Point p{21, 22};
+  auto [a, b] = p;
+
+#pragma omp taskloop
+  for (int i = 0; i < 10; i++) {
+    int result = a + b + i;
+  }
+}
+
+template<typename T>
+int test_template_bas(T p) {
+  auto [a, b] = p;
+  int result = 0;
+
+#pragma omp parallel reduction(+:result)
+  {
+    result = a + b;
+  }
+  return result;
+}
+
+template<typename T>
+int test_template_target(T p) {
+  auto [a, b] = p;
+  int result = 0;
+
+#pragma omp target map(tofrom: result)
+  {
+    result = a + b;
+  }
+  return result;
+}
+
+template<typename T>
+int test_template_task(T p) {
+  auto [a, b] = p;
+  int result = 0;
+
+#pragma omp task shared(result)
+  {
+    result = a + b;
+  }
+#pragma omp taskwait
+  return result;
+}
+
+template<typename T>
+int test_template_3_bindings(T p) {
+  auto [x, y, z] = p;
+  int result = 0;
+
+#pragma omp parallel reduction(+:result)
+  {
+    result = x + y + z;
+  }
+  return result;
+}
+
+void instantiate_templates() {
+  Point p2{1, 2};
+  Point3D p3{1, 2, 3};
+
+  test_template_bas(p2);
+  test_template_bas(Point{3, 4});
+  test_template_target(p2);
+  test_template_task(p2);
+  test_template_3_bindings(p3);
+}
+
+void test_static_binding() {
+  static Point p{23, 24};
+  static auto [a, b] = p;
 
-// Struct binding.
-struct Point {
-  int x, y;
-};
-Point make_point() { return {1, 2}; }
-void test_struct() {
-  auto [m, n] = make_point();
 #pragma omp parallel
   {
-    use(m + n);
+    int sum = a + b;
+  }
+}
+
+void test_static_binding_shared() {
+  static Point p{25, 26};
+  static auto [a, b] = p;
+
+#pragma omp parallel shared(p)
+  {
+    a = a + 1;
+    b = b + 1;
+  }
+}
+
+void test_array_target() {
+  int arr[2] = {27, 28};
+  auto [a, b] = arr;
+
+#pragma omp target
+  {
+    int sum = a + b;
+  }
+}
+
+void test_array_task() {
+  int arr[2] = {29, 30};
+  auto [a, b] = arr;
+
+#pragma omp task
+  {
+    int sum = a + b;
   }
 }
 
-// Pair binding.
-struct pair {
-  int first;
-  int second;
-};
-pair make_pair(int a, int b) {
-  return {a, b};
+void test_nested() {
+  Point p{31, 32};
+  auto [a, b] = p;
+
+#pragma omp parallel
+  {
+#pragma omp critical
+    {
+      int sum = a + b;
+    }
+
+#pragma omp task
+    {
+      int product = a * b;
+    }
+  }
 }
-void test_pair() {
-  auto [a, b] = make_pair(1, 2);
+
+void test_reference_binding() {
+  Point p{31, 32};
+  auto& [a, b] = p;
+
 #pragma omp parallel
   {
-    use(a);
+    int sum = a + b;
   }
 }
 
-// Array binding.
-void test_array() {
-  int arr[2] = {1, 2};
-  auto [x, y] = arr;
+void test_const_binding() {
+  const Point p{33, 34};
+  const auto [a, b] = p;
+
 #pragma omp parallel
   {
-    use(x + y);
+    int sum = a + b;
   }
 }
 
-// Binding with bitfields.
-struct S {
-  int x : 4;
-  int y : 4;
-};
-void test_bitfields() {
-  S s{1, 2};
-  auto [a, b] = s;
+void test_multiple_bindings() {
+  Point p1{33, 34};
+  Point p2{35, 36};
+  auto [a, b] = p1;
+  auto [c, d] = p2;
+
 #pragma omp parallel
   {
-    use(a + b);
+    int sum = a + b + c + d;
   }
 }
 
-// Lambda inside OpenMP with captured bindings.
-void test_with_lambda() {
-  auto [m, n] = make_point();
-#pragma omp parallel for collapse(2)
-  for (int i = 0; i < 10; i++)
-    for (int j = 0; j < 10; j++)
-      [m, n](int i, int j) -> void { return; }(i, j);
+void test_multiple_bindings_mixed_dsa() {
+  Point p1{37, 38};
+  Point p2{39, 40};
+  auto [a, b] = p1;
+  auto [c, d] = p2;
+
+#pragma omp parallel firstprivate(p1) shared(p2)
+  {
+    int result = a + b + c + d;
+  }
 }
 
-// Only one binding used.
-void test_partial_capture() {
-  auto [a, b] = make_pair(1, 2);
+void test_array_3_elements() {
+  int arr[3] = {35, 36, 37};
+  auto [a, b, c] = arr;
+
 #pragma omp parallel
   {
-    use(a);
+    int sum = a + b + c;
   }
 }
 
-// Nested parallel regions.
-void test_nested() {
-  auto [x, y] = make_point();
+void test_single() {
+  Point p{38, 39};
+  auto [a, b] = p;
+
+#pragma omp parallel
+#pragma omp single
+  {
+    int sum = a + b;
+  }
+}
+
+void test_sections() {
+  Point p{40, 41};
+  auto [a, b] = p;
+
+#pragma omp parallel sections
+  {
+#pragma omp section
+    { int sum = a + b; }
+#pragma omp section
+    { int diff = a - b; }
+  }
+}
+
+void test_nested_parallel() {
+  Point p{42, 43};
+  auto [a, b] = p;
+
 #pragma omp parallel
   {
-    use(x);
 #pragma omp parallel
     {
-      use(y);
+      int sum = a + b;
     }
   }
 }
 
-// Multiple bindings in same region.
-void test_multiple() {
-  auto [a, b] = make_point();
-  auto [c, d] = make_pair(3, 4);
-#pragma omp parallel
-  {
-    use(a + b + c + d);
+void test_simd_() {
+  Point p{44, 45};
+  auto [a, b] = p;
+
+#pragma omp simd
+  for (int i = 0; i < 10; i++) {
+    int result = a + b + i;
   }
 }
 
-// Reference structured binding.
-void test_reference_binding() {
-  Point p = make_point();
-  auto& [m, n] = p;
-#pragma omp parallel
-  { use(m); }
+int main() {
+  test_target_explicit_map();
+  test_target_implicit_map();
+  test_target_parallel();
+  test_target_parallel_for();
+  test_firstprivate_dsa();
+  test_shared_dsa();
+  test_reduction_dsa();
+  test_parallel_for_();
+  test_parallel_for_simd_();
+  test_target_teams_distribute();
+  test_task();
+  test_task_depend();
+  test_taskloop_();
+  instantiate_templates();
+  test_static_binding();
+  test_static_binding_shared();
+  test_array_target();
+  test_array_task();
+  test_nested();
+  test_multiple_bindings();
+  test_multiple_bindings_mixed_dsa();
+  return 0;
 }
-// CHECK-LABEL: define dso_local i64 @_Z10make_pointv(
+
+#endif
+// CHECK-LABEL: define dso_local void @_Z24test_target_explicit_mapv(
 // CHECK-SAME: ) #[[ATTR0:[0-9]+]] {
 // CHECK:  [[ENTRY:.*:]]
-// CHECK:    [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_POINT:%.*]], ptr [[RETVAL:%.*]], i32 0, i32 0
-// CHECK:    store i32 1, ptr [[X]], align 4
-// CHECK:    [[Y:%.*]] = getelementptr inbounds nuw [[STRUCT_POINT]], ptr [[RETVAL]], i32 0, i32 1
-// CHECK:    store i32 2, ptr [[Y]], align 4
-// CHECK:    [[TMP0:%.*]] = load i64, ptr [[RETVAL]], align 4
-// CHECK:    ret i64 [[TMP0]]
+// CHECK:    call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[P:%.*]], ptr align 4 @__const._Z24test_target_explicit_mapv.p, i64 8, i1 false)
+// CHECK:    call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP0:%.*]], ptr align 4 [[P]], i64 8, i1 false)
+// CHECK:    [[TMP1:%.*]] = load [[STRUCT_POINT:%.*]], ptr [[TMP0]], align 4
+// CHECK:    store [[STRUCT_POINT]] [[TMP1]], ptr [[DOTCASTED:%.*]], align 4
+// CHECK:    [[TMP2:%.*]] = load i64, ptr [[DOTCASTED]], align 8
+// CHECK:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z24test_target_explicit_mapv_l19(i64 [[TMP2]], ptr null) #[[ATTR3:[0-9]+]]
+// CHECK:    ret void
 //
 //
-// CHECK-LABEL: define dso_local void @_Z11test_structv(
-// CHECK-SAME: ) #[[ATTR0]] {
+// CHECK-LABEL: define internal void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z24test_target_explicit_mapv_l19(
+// CHECK-SAME: i64 noundef [[TMP0:%.*]], ptr noalias noundef [[DYN_PTR:%.*]]) #[[ATTR2:[0-9]+]] {
 // CHECK:  [[ENTRY:.*:]]
-// CHECK:    [[CALL:%.*]] = call i64 @_Z10make_pointv()
-// CHECK:    store i64 [[CALL]], ptr [[TMP0:%.*]], align 4
-// CHECK:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1:[0-9]+]], i32 1, ptr @_Z11test_structv.omp_outlined, ptr [[TMP0]])
+// CHECK:    store i64 [[TMP0]], ptr [[DOTADDR:%.*]], align 8
+// CHECK:    store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR:%.*]], align 8
+// CHECK:    [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_POINT:%.*]], ptr [[DOTADDR]], i32 0, i32 0
+// CHECK:    [[TMP1:%.*]] = load i32, ptr [[X]], align 4
+// CHECK:    [[ADD:%.*]] = add nsw i32 [[TMP1]], 1
+// CHECK:    [[X1:%.*]] = getelementptr inbounds nuw [[STRUCT_POINT]], ptr [[DOTADDR]], i32 0, i32 0
+// CHECK:    store i32 [[ADD]], ptr [[X1]], align 4
+// CHECK:    [[Y:%.*]] = getelementptr inbounds nuw [[STRUCT_POINT]], ptr [[DOTADDR]], i32 0, i32 1
+// CHECK:    [[TMP2:%.*]] = load i32, ptr [[Y]], align 4
+// CHECK:    [[ADD2:%.*]] = add nsw i32 [[TMP2]], 1
+// CHECK:    [[Y3:%.*]] = getelementptr inbounds nuw [[STRUCT_POINT]], ptr [[DOTADDR]], i32 0, i32 1
+// CHECK:    store i32 [[ADD2]], ptr [[Y3]], align 4
 // CHECK:    ret void
 //
 //
-// CHECK-LABEL: define internal void @_Z11test_structv.omp_outlined(
-// CHECK-SAME: ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[TMP0:%.*]]) #[[ATTR1:[0-9]+]] {
+// CHECK-LABEL: define dso_local void @_Z24test_target_implicit_mapv(
+// CHECK-SAME: ) #[[ATTR0]] {
 // CHECK:  [[ENTRY:.*:]]
-// CHECK:    store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR:%.*]], align 8
-// CHECK:    store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR:%.*]], align 8
-// CHECK:    store ptr [[TMP0]], ptr [[DOTADDR:%.*]], align 8
-// CHECK:    [[TMP1:%.*]] = load ptr, ptr [[DOTADDR]], align 8, !nonnull [[META2:![0-9]+]], !align [[META3:![0-9]+]]
-// CHECK:    [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_POINT:%.*]], ptr [[TMP1]], i32 0, i32 0
-// CHECK:    [[TMP2:%.*]] = load i32, ptr [[X]], align 4
-// CHECK:    [[Y:%.*]] = getelementptr inbounds nuw [[STRUCT_POINT]], ptr [[TMP1]], i32 0, i32 1
-// CHECK:    [[TMP3:%.*]] = load i32, ptr [[Y]], align 4
-// CHECK:    [[ADD:%.*]] = add nsw i32 [[TMP2]], [[TMP3]]
-// CHECK:    call void @_Z3usei(i32 noundef [[ADD]])
+// CHECK:    call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[P:%.*]], ptr align 4 @__const._Z24test_target_implicit_mapv.p, i64 8, i1 false)
+// CHECK:    call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP0:%.*]], ptr align 4 [[P]], i64 8, i1 false)
+// CHECK:    [[TMP1:%.*]] = load [[STRUCT_POINT:%.*]], ptr [[TMP0]], align 4
+// CHECK:    store [[STRUCT_POINT]] [[TMP1]], ptr [[DOTCASTED:%.*]], align 4
+// CHECK:    [[TMP2:%.*]] = load i64, ptr [[DOTCASTED]], align 8
+// CHECK:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z24test_target_implicit_mapv_l30(i64 [[TMP2]], ptr null) #[[ATTR3]]
 // CHECK:    ret void
 //
 //
-// CHECK-LABEL: define dso_local i64 @_Z9make_pairii(
-// CHECK-SAME: i32 noundef [[A:%.*]], i32 noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-LABEL: define internal void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z24test_target_implicit_mapv_l30(
+// CHECK-SAME: i64 noundef [[TMP0:%.*]], ptr noalias noundef [[DYN_PTR:%.*]]) #[[ATTR2]] {
 // CHECK:  [[ENTRY:.*:]]
-// CHECK:    store i32 [[A]], ptr [[A_ADDR:%.*]], align 4
-// CHECK:    store i32 [[B]], ptr [[B_ADDR:%.*]], align 4
-// CHECK:    [[FIRST:%.*]] = getelementptr inbounds nuw [[STRUCT_PAIR:%.*]], ptr [[RETVAL:%.*]], i32 0, i32 0
-// CHECK:    [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4
-// CHECK:    store i32 [[TMP0]], ptr [[FIRST]], align 4
-// CHECK:    [[SECOND:%.*]] = getelementptr inbounds nuw [[STRUCT_PAIR]], ptr [[RETVAL]], i32 0, i32 1
-// CHECK:    [[TMP1:%.*]] = load i32, ptr [[B_ADDR]], align 4
-// CHECK:    store i32 [[TMP1]], ptr [[SECOND]], align 4
-// CHECK:    [[TMP2:%.*]] = load i64, ptr [[RETVAL]], align 4
-// CHECK:    ret i64 [[TMP2]]
+// CHECK:    store i64 [[TMP0]], ptr [[DOTADDR:%.*]], align 8
+// CHECK:    store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR:%.*]], align 8
+// CHECK:    [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_POINT:%.*]], ptr [[DOTADDR]], i32 0, i32 0
+// CHECK:    [[TMP1:%.*]] = load i32, ptr [[X]], align 4
+// CHECK:    [[Y:%.*]] = getelementptr inbounds nuw [[STRUCT_POINT]], ptr [[DOTADDR]], i32 0, i32 1
+// CHECK:    [[TMP2:%.*]] = load i32, ptr [[Y]], align 4
+// CHECK:    [[ADD:%.*]] = add nsw i32 [[TMP1]], [[TMP2]]
+// CHECK:    store i32 [[ADD]], ptr [[SUM:%.*]], align 4
+// CHECK:    ret void
 //
 //
-// CHECK-LABEL: define dso_local void @_Z9test_pairv(
+// CHECK-LABEL: define dso_local void @_Z20test_target_parallelv(
 // CHECK-SAME: ) #[[ATTR0]] {
 // CHECK:  [[ENTRY:.*:]]
-// CHECK:    [[CALL:%.*]] = call i64 @_Z9make_pairii(i32 noundef 1, i32 noundef 2)
-// CHECK:    store i64 [[CALL]], ptr [[TMP0:%.*]], align 4
-// CHECK:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @_Z9test_pairv.omp_outlined, ptr [[TMP0]])
+// CHECK:    call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[P:%.*]], ptr align 4 @__const._Z20test_target_parallelv.p, i64 8, i1 false)
+// CHECK:    call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP0:%.*]], ptr align 4 [[P]], i64 8, i1 false)
+// CHECK:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z20test_target_parallelv_l40(ptr [[TMP0]], ptr null) #[[ATTR3]]
 // CHECK:    ret void
 //
 //
-// CHECK-LABEL: define internal void @_Z9test_pairv.omp_outlined(
-// CHECK-SAME: ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[TMP0:%.*]]) #[[ATTR1]] {
+// CHECK-LABEL: define internal void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z20test_target_parallelv_l40(
+// CHECK-SAME: ptr noundef nonnull align 4 dereferenceable(4) [[TMP0:%.*]], ptr noalias noundef [[DYN_PTR:%.*]]) #[[ATTR2]] {
 // CHECK:  [[ENTRY:.*:]]
-// CHECK:    store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR:%.*]], align 8
-// CHECK:    store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR:%.*]], align 8
 // CHECK:    store ptr [[TMP0]], ptr [[DOTADDR:%.*]], align 8
-// CHECK:    [[TMP1:%.*]] = load ptr, ptr [[DOTADDR]], align 8, !nonnull [[META2]], !align [[META3]]
-// CHECK:    [[FIRST:%.*]] = getelementptr inbounds nuw [[STRUCT_PAIR:%.*]], ptr [[TMP1]], i32 0, i32 0
-// CHECK:    [[TMP2:%.*]] = load i32, ptr [[FIRST]], align 4
-// CHECK:    call void @_Z3usei(i32 noundef [[TMP2]])
-// CHECK:    ret void
-//
-//
-// CHECK-LABEL: define dso_local void @_Z10test_arrayv(
-// CHECK-SAME: ) #[[ATTR0]] {
-// CHECK:  [[ENTRY:.*]]:
-// CHECK:    call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARR:%.*]], ptr align 4 @__const._Z10test_arrayv.arr, i64 8, i1 false)
-// CHECK:    [[ARRAYINIT_BEGIN:%.*]] = getelementptr inbounds [2 x i32], ptr [[TMP0:%.*]], i64 0, i64 0
-// CHECK:    br label %[[ARRAYINIT_BODY:.*]]
-// CHECK:       [[ARRAYINIT_BODY]]:
-// CHECK:    [[ARRAYINIT_INDEX:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[ARRAYINIT_NEXT:%.*]], %[[ARRAYINIT_BODY]] ]
-// CHECK:    [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[ARRAYINIT_BEGIN]], i64 [[ARRAYINIT_INDEX]]
-// CHECK:    [[ARRAYIDX:%.*]] = getelementptr inbounds nuw [2 x i32], ptr [[ARR]], i64 0, i64 [[ARRAYINIT_INDEX]]
-// CHECK:    [[TMP2:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
-// CHECK:    store i32 [[TMP2]], ptr [[TMP1]], align 4
-// CHECK:    [[ARRAYINIT_NEXT]] = add nuw i64 [[ARRAYINIT_INDEX]], 1
-// CHECK:    [[ARRAYINIT_DONE:%.*]] = icmp eq i64 [[ARRAYINIT_NEXT]], 2
-// CHECK:    br i1 [[ARRAYINIT_DONE]], label %[[ARRAYINIT_END:.*]], label %[[ARRAYINIT_BODY]]
-// CHECK:       [[ARRAYINIT_END]]:
-// CHECK:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @_Z10test_arrayv.omp_outlined, ptr [[TMP0]])
+// CHECK:    store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR:%.*]], align 8
+// CHECK:    [[TMP1:%.*]] = load ptr, ptr [[DOTADDR]], align 8, !nonnull [[META2:![0-9]+]], !align [[META3:![0-9]+]]
+// CHECK:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1:[0-9]+]], i32 1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z20test_target_parallelv_l40.omp_outlined, ptr [[TMP1]])
 // CHECK:    ret void
 //
 //
-// CHECK-LABEL: define internal void @_Z10test_arrayv.omp_outlined(
-// CHECK-SAME: ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[TMP0:%.*]]) #[[ATTR1]] {
+// CHECK-LABEL: define internal void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z20test_target_parallelv_l40.omp_outlined(
+// CHECK-SAME: ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP0:%.*]]) #[[ATTR2]] {
 // CHECK:  [[ENTRY:.*:]]
 // CHECK:    store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR:%.*]], align 8
 // CHECK:    store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR:%.*]], align 8
 // CHECK:    store ptr [[TMP0]], ptr [[DOTADDR:%.*]], align 8
 // CHECK:    [[TMP1:%.*]] = load ptr, ptr [[DOTADDR]], align 8, !nonnull [[META2]], !align [[META3]]
-// CHECK:    [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[TMP1]], i64 0, i64 0
-// CHECK:    [[TMP2:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
-// CHECK:    [[ARRAYIDX1:%.*]] = getelementptr inbounds [2 x i32], ptr [[TMP1]], i64 0, i64 1
-// CHECK:    [[TMP3:%.*]] = load i32, ptr [[ARRAYIDX1]], align 4
+// CHECK:    [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_POINT:%.*]], ptr [[TMP1]], i32 0, i32 0
+// CHECK:    [[TMP2:%.*]] = load i32, ptr [[X]], align 4
+// CHECK:    [[Y:%.*]] = getelementptr inbounds nuw [[STRUCT_POINT]], ptr [[TMP1]], i32 0, i32 1
+// CHECK:    [[TMP3:%.*]] = load i32, ptr [[Y]], align 4
 // CHECK:    [[ADD:%.*]] = add nsw i32 [[TMP2]], [[TMP3]]
-// CHECK:    call void @_Z3usei(i32 noundef [[ADD]])
+// CHECK:    store i32 [[ADD]], ptr [[SUM:%.*]], align 4
 // CHECK:    ret void
 //
 //
-// CHECK-LABEL: define dso_local void @_Z14test_bitfieldsv(
+// CHECK-LABEL: define dso_local void @_Z24test_target_parallel_forv(
 // CHECK-SAME: ) #[[ATTR0]] {
 // CHECK:  [[ENTRY:.*:]]
-// CHECK:    call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[S:%.*]], ptr align 4 @__const._Z14test_bitfieldsv.s, i64 4, i1 false)
-// CHECK:    call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP0:%.*]], ptr align 4 [[S]], i64 4, i1 false)
-// CHECK:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @_Z14test_bitfieldsv.omp_outlined, ptr [[TMP0]])
+// CHECK:    call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[P:%.*]], ptr align 4 @__const._Z24test_target_parallel_forv.p, i64 8, i1 false)
+// CHECK:    call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP0:%.*]], ptr align 4 [[P]], i64 8, i1 false)
+// CHECK:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z24test_target_parallel_forv_l50(ptr [[TMP0]], ptr null) #[[ATTR3]]
 // CHECK:    ret void
 //
 //
-// CHECK-LABEL: define internal void @_Z14test_bitfieldsv.omp_outlined(
-// CHECK-SAME: ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP0:%.*]]) #[[ATTR1]] {
+// CHECK-LABEL: define internal void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z24test_target_parallel_forv_l50(
+// CHECK-SAME: ptr noundef nonnull align 4 dereferenceable(4) [[TMP0:%.*]], ptr noalias noundef [[DYN_PTR:%.*]]) #[[ATTR2]] {
 // CHECK:  [[ENTRY:.*:]]
-// CHECK:    store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR:%.*]], align 8
-// CHECK:    store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR:%.*]], align 8
 // CHECK:    store ptr [[TMP0]], ptr [[DOTADDR:%.*]], align 8
+// CHECK:    store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR:%.*]], align 8
 // CHECK:    [[TMP1:%.*]] = load ptr, ptr [[DOTADDR]], align 8, !nonnull [[META2]], !align [[META3]]
-// CHECK:    [[BF_LOAD:%.*]] = load i8, ptr [[TMP1]], align 4
-// CHECK:    [[BF_SHL:%.*]] = shl i8 [[BF_LOAD]], 4
-// CHECK:    [[BF_ASHR:%.*]] = ashr i8 [[BF_SHL]], 4
-// CHECK:    [[BF_CAST:%.*]] = sext i8 [[BF_ASHR]] to i32
-// CHECK:    [[BF_LOAD1:%.*]] = load i8, ptr [[TMP1]], align 4
-// CHECK:    [[BF_ASHR2:%.*]] = ashr i8 [[BF_LOAD1]], 4
-// CHECK:    [[BF_CAST3:%.*]] = sext i8 [[BF_ASHR2]] to i32
-// CHECK:    [[ADD:%.*]] = add nsw i32 [[BF_CAST]], [[BF_CAST3]]
-// CHECK:    call void @_Z3usei(i32 noundef [[ADD]])
-// CHECK:    ret void
-//
-//
-// CHECK-LABEL: define dso_local void @_Z16test_with_lambdav(
-// CHECK-SAME: ) #[[ATTR0]] {
-// CHECK:  [[ENTRY:.*:]]
-// CHECK:    [[CALL:%.*]] = call i64 @_Z10make_pointv()
-// CHECK:    store i64 [[CALL]], ptr [[TMP0:%.*]], align 4
-// CHECK:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @_Z16test_with_lambdav.omp_outlined, ptr [[TMP0]])
+// CHECK:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z24test_target_parallel_forv_l50.omp_outlined, ptr [[TMP1]])
 // CHECK:    ret void
 //
 //
-// CHECK-LABEL: define internal void @_Z16test_with_lambdav.omp_outlined(
-// CHECK-SAME: ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[TMP0:%.*]]) #[[ATTR1]] {
+// CHECK-LABEL: define internal void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z24test_target_parallel_forv_l50.omp_outlined(
+// CHECK-SAME: ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP0:%.*]]) #[[ATTR2]] {
 // CHECK:  [[ENTRY:.*:]]
 // CHECK:    store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR:%.*]], align 8
 // CHECK:    store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR:%.*]], align 8
 // CHECK:    store ptr [[TMP0]], ptr [[DOTADDR:%.*]], align 8
 // CHECK:    [[TMP1:%.*]] = load ptr, ptr [[DOTADDR]], align 8, !nonnull [[META2]], !align [[META3]]
 // CHECK:    store i32 0, ptr [[DOTOMP_LB:%.*]], align 4
-// CHECK:    store i32 99, ptr [[DOTOMP_UB:%.*]], align 4
+// CHECK:    store i32 9, ptr [[DOTOMP_UB:%.*]], align 4
 // CHECK:    store i32 1, ptr [[DOTOMP_STRIDE:%.*]], align 4
 // CHECK:    store i32 0, ptr [[DOTOMP_IS_LAST:%.*]], align 4
 // CHECK:    [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
 // CHECK:    [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4
 // CHECK:    call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
 // CHECK:    [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
-// CHECK:    [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99
+// CHECK:    [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 9
 // CHECK:    br i1 [[CMP]], label %[[COND_TRUE:.*]], label %[[COND_FALSE:.*]]
 // CHECK:       [[COND_TRUE]]:
 // CHECK:    br label %[[COND_END:.*]]
@@ -276,7 +523,7 @@ void test_reference_binding() {
 // CHECK:    [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
 // CHECK:    br label %[[COND_END]]
 // CHECK:       [[COND_END]]:
-// CHECK:    [[COND:%.*]] = phi i32 [ 99, %[[COND_TRUE]] ], [ [[TMP5]], %[[COND_FALSE]] ]
+// CHECK:    [[COND:%.*]] = phi i32 [ 9, %[[COND_TRUE]] ], [ [[TMP5]], %[[COND_FALSE]] ]
 // CHECK:    store i32 [[COND]], ptr [[DOTOMP_UB]], align 4
 // CHECK:    [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
 // CHECK:    store i32 [[TMP6]], ptr [[DOTOMP_IV:%.*]], align 4
@@ -284,40 +531,28 @@ void test_reference_binding() {
 // CHECK:       [[OMP_INNER_FOR_COND]]:
 // CHECK:    [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
 // CHECK:    [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
-// CHECK:    [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]]
-// CHECK:    br i1 [[CMP2]], label %[[OMP_INNER_FOR_BODY:.*]], label %[[OMP_INNER_FOR_END:.*]]
+// CHECK:    [[CMP1:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]]
+// CHECK:    br i1 [[CMP1]], label %[[OMP_INNER_FOR_BODY:.*]], label %[[OMP_INNER_FOR_END:.*]]
 // CHECK:       [[OMP_INNER_FOR_BODY]]:
 // CHECK:    [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK:    [[DIV:%.*]] = sdiv i32 [[TMP9]], 10
-// CHECK:    [[MUL:%.*]] = mul nsw i32 [[DIV]], 1
+// CHECK:    [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1
 // CHECK:    [[ADD:%.*]] = add nsw i32 0, [[MUL]]
 // CHECK:    store i32 [[ADD]], ptr [[I:%.*]], align 4
-// CHECK:    [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK:    [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK:    [[DIV3:%.*]] = sdiv i32 [[TMP11]], 10
-// CHECK:    [[MUL4:%.*]] = mul nsw i32 [[DIV3]], 10
-// CHECK:    [[SUB:%.*]] = sub nsw i32 [[TMP10]], [[MUL4]]
-// CHECK:    [[MUL5:%.*]] = mul nsw i32 [[SUB]], 1
-// CHECK:    [[ADD6:%.*]] = add nsw i32 0, [[MUL5]]
-// CHECK:    store i32 [[ADD6]], ptr [[J:%.*]], align 4
-// CHECK:    [[TMP12:%.*]] = getelementptr inbounds nuw [[CLASS_ANON:%.*]], ptr [[REF_TMP:%.*]], i32 0, i32 0
 // CHECK:    [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_POINT:%.*]], ptr [[TMP1]], i32 0, i32 0
-// CHECK:    [[TMP13:%.*]] = load i32, ptr [[X]], align 4
-// CHECK:    store i32 [[TMP13]], ptr [[TMP12]], align 4
-// CHECK:    [[TMP14:%.*]] = getelementptr inbounds nuw [[CLASS_ANON]], ptr [[REF_TMP]], i32 0, i32 1
+// CHECK:    [[TMP10:%.*]] = load i32, ptr [[X]], align 4
 // CHECK:    [[Y:%.*]] = getelementptr inbounds nuw [[STRUCT_POINT]], ptr [[TMP1]], i32 0, i32 1
-// CHECK:    [[TMP15:%.*]] = load i32, ptr [[Y]], align 4
-// CHECK:    store i32 [[TMP15]], ptr [[TMP14]], align 4
-// CHECK:    [[TMP16:%.*]] = load i32, ptr [[I]], align 4
-// CHECK:    [[TMP17:%.*]] = load i32, ptr [[J]], align 4
-// CHECK:    call void @"_ZZ16test_with_lambdavENK3$_0clEii"(ptr noundef nonnull align 4 dereferenceable(8) [[REF_TMP]], i32 noundef [[TMP16]], i32 noundef [[TMP17]])
+// CHECK:    [[TMP11:%.*]] = load i32, ptr [[Y]], align 4
+// CHECK:    [[ADD2:%.*]] = add nsw i32 [[TMP10]], [[TMP11]]
+// CHECK:    [[TMP12:%.*]] = load i32, ptr [[I]], align 4
+// CHECK:    [[ADD3:%.*]] = add nsw i32 [[ADD2]], [[TMP12]]
+// CHECK:    store i32 [[ADD3]], ptr [[RESULT:%.*]], align 4
 // CHECK:    br label %[[OMP_BODY_CONTINUE:.*]]
 // CHECK:       [[OMP_BODY_CONTINUE]]:
 // CHECK:    br label %[[OMP_INNER_FOR_INC:.*]]
 // CHECK:       [[OMP_INNER_FOR_INC]]:
-// CHECK:    [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK:    [[ADD7:%.*]] = add nsw i32 [[TMP18]], 1
-// CHECK:    store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4
+// CHECK:    [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK:    [[ADD4:%.*]] = add nsw i32 [[TMP13]], 1
+// CHECK:    store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4
 // CHECK:    br label %[[OMP_INNER_FOR_COND]]
 // CHECK:       [[OMP_INNER_FOR_END]]:
 // CHECK:    br label %[[OMP_LOOP_EXIT:.*]]
@@ -326,123 +561,1469 @@ void test_reference_binding() {
 // CHECK:    ret void
 //
 //
-// CHECK-LABEL: define dso_local void @_Z20test_partial_capturev(
+// CHECK-LABEL: define dso_local void @_Z21test_firstprivate_dsav(
 // CHECK-SAME: ) #[[ATTR0]] {
 // CHECK:  [[ENTRY:.*:]]
-// CHECK:    [[CALL:%.*]] = call i64 @_Z9make_pairii(i32 noundef 1, i32 noundef 2)
-// CHECK:    store i64 [[CALL]], ptr [[TMP0:%.*]], align 4
-// CHECK:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @_Z20test_partial_capturev.omp_outlined, ptr [[TMP0]])
+// CHECK:    call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[P:%.*]], ptr align 4 @__const._Z21test_firstprivate_dsav.p, i64 8, i1 false)
+// CHECK:    call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP0:%.*]], ptr align 4 [[P]], i64 8, i1 false)
+// CHECK:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 2, ptr @_Z21test_firstprivate_dsav.omp_outlined, ptr [[TMP0]], ptr [[P]])
 // CHECK:    ret void
 //
 //
-// CHECK-LABEL: define internal void @_Z20test_partial_capturev.omp_outlined(
-// CHECK-SAME: ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[TMP0:%.*]]) #[[ATTR1]] {
+// CHECK-LABEL: define internal void @_Z21test_firstprivate_dsav.omp_outlined(
+// CHECK-SAME: ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP0:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[P:%.*]]) #[[ATTR2]] {
 // CHECK:  [[ENTRY:.*:]]
 // CHECK:    store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR:%.*]], align 8
 // CHECK:    store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR:%.*]], align 8
 // CHECK:    store ptr [[TMP0]], ptr [[DOTADDR:%.*]], align 8
+// CHECK:    store ptr [[P]], ptr [[P_ADDR:%.*]], align 8
 // CHECK:    [[TMP1:%.*]] = load ptr, ptr [[DOTADDR]], align 8, !nonnull [[META2]], !align [[META3]]
-// CHECK:    [[FIRST:%.*]] = getelementptr inbounds nuw [[STRUCT_PAIR:%.*]], ptr [[TMP1]], i32 0, i32 0
-// CHECK:    [[TMP2:%.*]] = load i32, ptr [[FIRST]], align 4
-// CHECK:    call void @_Z3usei(i32 noundef [[TMP2]])
+// CHECK:    [[TMP2:%.*]] = load ptr, ptr [[P_ADDR]], align 8, !nonnull [[META2]], !align [[META3]]
+// CHECK:    call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[P1:%.*]], ptr align 4 [[TMP2]], i64 8, i1 false)
+// CHECK:    [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_POINT:%.*]], ptr [[TMP1]], i32 0, i32 0
+// CHECK:    [[TMP3:%.*]] = load i32, ptr [[X]], align 4
+// CHECK:    [[Y:%.*]] = getelementptr inbounds nuw [[STRUCT_POINT]], ptr [[TMP1]], i32 0, i32 1
+// CHECK:    [[TMP4:%.*]] = load i32, ptr [[Y]], align 4
+// CHECK:    [[ADD:%.*]] = add nsw i32 [[TMP3]], [[TMP4]]
+// CHECK:    store i32 [[ADD]], ptr [[SUM:%.*]], align 4
 // CHECK:    ret void
 //
 //
-// CHECK-LABEL: define dso_local void @_Z11test_nestedv(
+// CHECK-LABEL: define dso_local void @_Z15test_shared_dsav(
 // CHECK-SAME: ) #[[ATTR0]] {
 // CHECK:  [[ENTRY:.*:]]
-// CHECK:    [[CALL:%.*]] = call i64 @_Z10make_pointv()
-// CHECK:    store i64 [[CALL]], ptr [[TMP0:%.*]], align 4
-// CHECK:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 2, ptr @_Z11test_nestedv.omp_outlined, ptr [[TMP0]], ptr [[TMP0]])
+// CHECK:    call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[P:%.*]], ptr align 4 @__const._Z15test_shared_dsav.p, i64 8, i1 false)
+// CHECK:    call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP0:%.*]], ptr align 4 [[P]], i64 8, i1 false)
+// CHECK:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @_Z15test_shared_dsav.omp_outlined, ptr [[TMP0]])
 // CHECK:    ret void
 //
 //
-// CHECK-LABEL: define internal void @_Z11test_nestedv.omp_outlined(
-// CHECK-SAME: ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[TMP0:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[TMP1:%.*]]) #[[ATTR1]] {
-// CHECK:  [[ENTRY:.*:]]
-// CHECK:    store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR:%.*]], align 8
-// CHECK:    store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR:%.*]], align 8
-// CHECK:    store ptr [[TMP0]], ptr [[DOTADDR:%.*]], align 8
-// CHECK:    store ptr [[TMP1]], ptr [[DOTADDR1:%.*]], align 8
-// CHECK:    [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8, !nonnull [[META2]], !align [[META3]]
-// CHECK:    [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8, !nonnull [[META2]], !align [[META3]]
-// CHECK:    [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_POINT:%.*]], ptr [[TMP2]], i32 0, i32 0
-// CHECK:    [[TMP4:%.*]] = load i32, ptr [[X]], align 4
-// CHECK:    call void @_Z3usei(i32 noundef [[TMP4]])
-// CHECK:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @_Z11test_nestedv.omp_outlined.omp_outlined, ptr [[TMP2]])
-// CHECK:    ret void
-//
-//
-// CHECK-LABEL: define internal void @_Z11test_nestedv.omp_outlined.omp_outlined(
-// CHECK-SAME: ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[TMP0:%.*]]) #[[ATTR1]] {
+// CHECK-LABEL: define internal void @_Z15test_shared_dsav.omp_outlined(
+// CHECK-SAME: ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP0:%.*]]) #[[ATTR2]] {
 // CHECK:  [[ENTRY:.*:]]
 // CHECK:    store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR:%.*]], align 8
 // CHECK:    store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR:%.*]], align 8
 // CHECK:    store ptr [[TMP0]], ptr [[DOTADDR:%.*]], align 8
 // CHECK:    [[TMP1:%.*]] = load ptr, ptr [[DOTADDR]], align 8, !nonnull [[META2]], !align [[META3]]
-// CHECK:    [[Y:%.*]] = getelementptr inbounds nuw [[STRUCT_POINT:%.*]], ptr [[TMP1]], i32 0, i32 1
-// CHECK:    [[TMP2:%.*]] = load i32, ptr [[Y]], align 4
-// CHECK:    call void @_Z3usei(i32 noundef [[TMP2]])
+// CHECK:    [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_POINT:%.*]], ptr [[TMP1]], i32 0, i32 0
+// CHECK:    [[TMP2:%.*]] = load i32, ptr [[X]], align 4
+// CHECK:    [[Y:%.*]] = getelementptr inbounds nuw [[STRUCT_POINT]], ptr [[TMP1]], i32 0, i32 1
+// CHECK:    [[TMP3:%.*]] = load i32, ptr [[Y]], align 4
+// CHECK:    [[ADD:%.*]] = add nsw i32 [[TMP2]], [[TMP3]]
+// CHECK:    store i32 [[ADD]], ptr [[SUM:%.*]], align 4
 // CHECK:    ret void
 //
 //
-// CHECK-LABEL: define dso_local void @_Z13test_multiplev(
+// CHECK-LABEL: define dso_local void @_Z18test_reduction_dsav(
 // CHECK-SAME: ) #[[ATTR0]] {
 // CHECK:  [[ENTRY:.*:]]
-// CHECK:    [[CALL:%.*]] = call i64 @_Z10make_pointv()
-// CHECK:    store i64 [[CALL]], ptr [[TMP0:%.*]], align 4
-// CHECK:    [[CALL1:%.*]] = call i64 @_Z9make_pairii(i32 noundef 3, i32 noundef 4)
-// CHECK:    store i64 [[CALL1]], ptr [[TMP1:%.*]], align 4
-// CHECK:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 2, ptr @_Z13test_multiplev.omp_outlined, ptr [[TMP0]], ptr [[TMP1]])
+// CHECK:    call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[P:%.*]], ptr align 4 @__const._Z18test_reduction_dsav.p, i64 8, i1 false)
+// CHECK:    call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP0:%.*]], ptr align 4 [[P]], i64 8, i1 false)
+// CHECK:    store i32 0, ptr [[SUM:%.*]], align 4
+// CHECK:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 2, ptr @_Z18test_reduction_dsav.omp_outlined, ptr [[SUM]], ptr [[TMP0]])
 // CHECK:    ret void
 //
 //
-// CHECK-LABEL: define internal void @_Z13test_multiplev.omp_outlined(
-// CHECK-SAME: ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[TMP0:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[TMP1:%.*]]) #[[ATTR1]] {
+// CHECK-LABEL: define internal void @_Z18test_reduction_dsav.omp_outlined(
+// CHECK-SAME: ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[SUM:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP0:%.*]]) #[[ATTR2]] {
 // CHECK:  [[ENTRY:.*:]]
 // CHECK:    store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR:%.*]], align 8
 // CHECK:    store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR:%.*]], align 8
+// CHECK:    store ptr [[SUM]], ptr [[SUM_ADDR:%.*]], align 8
 // CHECK:    store ptr [[TMP0]], ptr [[DOTADDR:%.*]], align 8
-// CHECK:    store ptr [[TMP1]], ptr [[DOTADDR1:%.*]], align 8
+// CHECK:    [[TMP1:%.*]] = load ptr, ptr [[SUM_ADDR]], align 8, !nonnull [[META2]], !align [[META3]]
 // CHECK:    [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8, !nonnull [[META2]], !align [[META3]]
-// CHECK:    [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8, !nonnull [[META2]], !align [[META3]]
+// CHECK:    store i32 0, ptr [[DOTOMP_LB:%.*]], align 4
+// CHECK:    store i32 9, ptr [[DOTOMP_UB:%.*]], align 4
+// CHECK:    store i32 1, ptr [[DOTOMP_STRIDE:%.*]], align 4
+// CHECK:    store i32 0, ptr [[DOTOMP_IS_LAST:%.*]], align 4
+// CHECK:    store i32 0, ptr [[SUM1:%.*]], align 4
+// CHECK:    [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK:    [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4
+// CHECK:    call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
+// CHECK:    [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
+// CHECK:    [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9
+// CHECK:    br i1 [[CMP]], label %[[COND_TRUE:.*]], label %[[COND_FALSE:.*]]
+// CHECK:       [[COND_TRUE]]:
+// CHECK:    br label %[[COND_END:.*]]
+// CHECK:       [[COND_FALSE]]:
+// CHECK:    [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
+// CHECK:    br label %[[COND_END]]
+// CHECK:       [[COND_END]]:
+// CHECK:    [[COND:%.*]] = phi i32 [ 9, %[[COND_TRUE]] ], [ [[TMP6]], %[[COND_FALSE]] ]
+// CHECK:    store i32 [[COND]], ptr [[DOTOMP_UB]], align 4
+// CHECK:    [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
+// CHECK:    store i32 [[TMP7]], ptr [[DOTOMP_IV:%.*]], align 4
+// CHECK:    br label %[[OMP_INNER_FOR_COND:.*]]
+// CHECK:       [[OMP_INNER_FOR_COND]]:
+// CHECK:    [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK:    [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
+// CHECK:    [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]]
+// CHECK:    br i1 [[CMP2]], label %[[OMP_INNER_FOR_BODY:.*]], label %[[OMP_INNER_FOR_END:.*]]
+// CHECK:       [[OMP_INNER_FOR_BODY]]:
+// CHECK:    [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK:    [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1
+// CHECK:    [[ADD:%.*]] = add nsw i32 0, [[MUL]]
+// CHECK:    store i32 [[ADD]], ptr [[I:%.*]], align 4
 // CHECK:    [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_POINT:%.*]], ptr [[TMP2]], i32 0, i32 0
-// CHECK:    [[TMP4:%.*]] = load i32, ptr [[X]], align 4
+// CHECK:    [[TMP11:%.*]] = load i32, ptr [[X]], align 4
 // CHECK:    [[Y:%.*]] = getelementptr inbounds nuw [[STRUCT_POINT]], ptr [[TMP2]], i32 0, i32 1
-// CHECK:    [[TMP5:%.*]] = load i32, ptr [[Y]], align 4
-// CHECK:    [[ADD:%.*]] = add nsw i32 [[TMP4]], [[TMP5]]
-// CHECK:    [[FIRST:%.*]] = getelementptr inbounds nuw [[STRUCT_PAIR:%.*]], ptr [[TMP3]], i32 0, i32 0
-// CHECK:    [[TMP6:%.*]] = load i32, ptr [[FIRST]], align 4
-// CHECK:    [[ADD2:%.*]] = add nsw i32 [[ADD]], [[TMP6]]
-// CHECK:    [[SECOND:%.*]] = getelementptr inbounds nuw [[STRUCT_PAIR]], ptr [[TMP3]], i32 0, i32 1
-// CHECK:    [[TMP7:%.*]] = load i32, ptr [[SECOND]], align 4
-// CHECK:    [[ADD3:%.*]] = add nsw i32 [[ADD2]], [[TMP7]]
-// CHECK:    call void @_Z3usei(i32 noundef [[ADD3]])
+// CHECK:    [[TMP12:%.*]] = load i32, ptr [[Y]], align 4
+// CHECK:    [[ADD3:%.*]] = add nsw i32 [[TMP11]], [[TMP12]]
+// CHECK:    [[TMP13:%.*]] = load i32, ptr [[SUM1]], align 4
+// CHECK:    [[ADD4:%.*]] = add nsw i32 [[TMP13]], [[ADD3]]
+// CHECK:    store i32 [[ADD4]], ptr [[SUM1]], align 4
+// CHECK:    br label %[[OMP_BODY_CONTINUE:.*]]
+// CHECK:       [[OMP_BODY_CONTINUE]]:
+// CHECK:    br label %[[OMP_INNER_FOR_INC:.*]]
+// CHECK:       [[OMP_INNER_FOR_INC]]:
+// CHECK:    [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK:    [[ADD5:%.*]] = add nsw i32 [[TMP14]], 1
+// CHECK:    store i32 [[ADD5]], ptr [[DOTOMP_IV]], align 4
+// CHECK:    br label %[[OMP_INNER_FOR_COND]]
+// CHECK:       [[OMP_INNER_FOR_END]]:
+// CHECK:    br label %[[OMP_LOOP_EXIT:.*]]
+// CHECK:       [[OMP_LOOP_EXIT]]:
+// CHECK:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP4]])
+// CHECK:    [[TMP15:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST:%.*]], i64 0, i64 0
+// CHECK:    store ptr [[SUM1]], ptr [[TMP15]], align 8
+// CHECK:    [[TMP16:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3:[0-9]+]], i32 [[TMP4]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @_Z18test_reduction_dsav.omp_outlined.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var)
+// CHECK:    switch i32 [[TMP16]], [[DOTOMP_REDUCTION_DEFAULT:label %.*]] [
+// CHECK:      i32 1, [[DOTOMP_REDUCTION_CASE1:label %.*]]
+// CHECK:      i32 2, [[DOTOMP_REDUCTION_CASE2:label %.*]]
+// CHECK:    ]
+// CHECK:       [[_OMP_REDUCTION_CASE1:.*:]]
+// CHECK:    [[TMP17:%.*]] = load i32, ptr [[TMP1]], align 4
+// CHECK:    [[TMP18:%.*]] = load i32, ptr [[SUM1]], align 4
+// CHECK:    [[ADD6:%.*]] = add nsw i32 [[TMP17]], [[TMP18]]
+// CHECK:    store i32 [[ADD6]], ptr [[TMP1]], align 4
+// CHECK:    call void @__kmpc_end_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP4]], ptr @.gomp_critical_user_.reduction.var)
+// CHECK:    br [[DOTOMP_REDUCTION_DEFAULT]]
+// CHECK:       [[_OMP_REDUCTION_CASE2:.*:]]
+// CHECK:    [[TMP19:%.*]] = load i32, ptr [[SUM1]], align 4
+// CHECK:    [[TMP20:%.*]] = atomicrmw add ptr [[TMP1]], i32 [[TMP19]] monotonic, align 4
+// CHECK:    br [[DOTOMP_REDUCTION_DEFAULT]]
+// CHECK:       [[_OMP_REDUCTION_DEFAULT:.*:]]
 // CHECK:    ret void
 //
 //
-// CHECK-LABEL: define dso_local void @_Z22test_reference_bindingv(
+// CHECK-LABEL: define internal void @_Z18test_reduction_dsav.omp_outlined.omp.reduction.reduction_func(
+// CHECK-SAME: ptr noundef [[TMP0:%.*]], ptr noundef [[TMP1:%.*]]) #[[ATTR4:[0-9]+]] {
+// CHECK:  [[ENTRY:.*:]]
+// CHECK:    store ptr [[TMP0]], ptr [[DOTADDR:%.*]], align 8
+// CHECK:    store ptr [[TMP1]], ptr [[DOTADDR1:%.*]], align 8
+// CHECK:    [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8
+// CHECK:    [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8
+// CHECK:    [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP3]], i64 0, i64 0
+// CHECK:    [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8
+// CHECK:    [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i64 0, i64 0
+// CHECK:    [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8
+// CHECK:    [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4
+// CHECK:    [[TMP9:%.*]] = load i32, ptr [[TMP5]], align 4
+// CHECK:    [[ADD:%.*]] = add nsw i32 [[TMP8]], [[TMP9]]
+// CHECK:    store i32 [[ADD]], ptr [[TMP7]], align 4
+// CHECK:    ret void
+//
+//
+// CHECK-LABEL: define dso_local void @_Z18test_parallel_for_v(
 // CHECK-SAME: ) #[[ATTR0]] {
 // CHECK:  [[ENTRY:.*:]]
-// CHECK:    [[CALL:%.*]] = call i64 @_Z10make_pointv()
-// CHECK:    store i64 [[CALL]], ptr [[P:%.*]], align 4
-// CHECK:    store ptr [[P]], ptr [[TMP0:%.*]], align 8
-// CHECK:    [[TMP1:%.*]] = load ptr, ptr [[TMP0]], align 8, !nonnull [[META2]], !align [[META3]]
-// CHECK:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @_Z22test_reference_bindingv.omp_outlined, ptr [[TMP1]])
+// CHECK:    call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[P:%.*]], ptr align 4 @__const._Z18test_parallel_for_v.p, i64 8, i1 false)
+// CHECK:    call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP0:%.*]], ptr align 4 [[P]], i64 8, i1 false)
+// CHECK:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @_Z18test_parallel_for_v.omp_outlined, ptr [[TMP0]])
 // CHECK:    ret void
 //
 //
-// CHECK-LABEL: define internal void @_Z22test_reference_bindingv.omp_outlined(
-// CHECK-SAME: ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[TMP0:%.*]]) #[[ATTR1]] {
+// CHECK-LABEL: define internal void @_Z18test_parallel_for_v.omp_outlined(
+// CHECK-SAME: ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP0:%.*]]) #[[ATTR2]] {
 // CHECK:  [[ENTRY:.*:]]
 // CHECK:    store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR:%.*]], align 8
 // CHECK:    store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR:%.*]], align 8
 // CHECK:    store ptr [[TMP0]], ptr [[DOTADDR:%.*]], align 8
 // CHECK:    [[TMP1:%.*]] = load ptr, ptr [[DOTADDR]], align 8, !nonnull [[META2]], !align [[META3]]
-// CHECK:    store ptr [[TMP1]], ptr [[TMP:%.*]], align 8
-// CHECK:    [[TMP2:%.*]] = load ptr, ptr [[TMP]], align 8, !nonnull [[META2]], !align [[META3]]
-// CHECK:    [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_POINT:%.*]], ptr [[TMP2]], i32 0, i32 0
+// CHECK:    store i32 0, ptr [[DOTOMP_LB:%.*]], align 4
+// CHECK:    store i32 9, ptr [[DOTOMP_UB:%.*]], align 4
+// CHECK:    store i32 1, ptr [[DOTOMP_STRIDE:%.*]], align 4
+// CHECK:    store i32 0, ptr [[DOTOMP_IS_LAST:%.*]], align 4
+// CHECK:    [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK:    [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4
+// CHECK:    call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
+// CHECK:    [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
+// CHECK:    [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 9
+// CHECK:    br i1 [[CMP]], label %[[COND_TRUE:.*]], label %[[COND_FALSE:.*]]
+// CHECK:       [[COND_TRUE]]:
+// CHECK:    br label %[[COND_END:.*]]
+// CHECK:       [[COND_FALSE]]:
+// CHECK:    [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
+// CHECK:    br label %[[COND_END]]
+// CHECK:       [[COND_END]]:
+// CHECK:    [[COND:%.*]] = phi i32 [ 9, %[[COND_TRUE]] ], [ [[TMP5]], %[[COND_FALSE]] ]
+// CHECK:    store i32 [[COND]], ptr [[DOTOMP_UB]], align 4
+// CHECK:    [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
+// CHECK:    store i32 [[TMP6]], ptr [[DOTOMP_IV:%.*]], align 4
+// CHECK:    br label %[[OMP_INNER_FOR_COND:.*]]
+// CHECK:       [[OMP_INNER_FOR_COND]]:
+// CHECK:    [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK:    [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
+// CHECK:    [[CMP1:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]]
+// CHECK:    br i1 [[CMP1]], label %[[OMP_INNER_FOR_BODY:.*]], label %[[OMP_INNER_FOR_END:.*]]
+// CHECK:       [[OMP_INNER_FOR_BODY]]:
+// CHECK:    [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK:    [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1
+// CHECK:    [[ADD:%.*]] = add nsw i32 0, [[MUL]]
+// CHECK:    store i32 [[ADD]], ptr [[I:%.*]], align 4
+// CHECK:    [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_POINT:%.*]], ptr [[TMP1]], i32 0, i32 0
+// CHECK:    [[TMP10:%.*]] = load i32, ptr [[X]], align 4
+// CHECK:    [[Y:%.*]] = getelementptr inbounds nuw [[STRUCT_POINT]], ptr [[TMP1]], i32 0, i32 1
+// CHECK:    [[TMP11:%.*]] = load i32, ptr [[Y]], align 4
+// CHECK:    [[ADD2:%.*]] = add nsw i32 [[TMP10]], [[TMP11]]
+// CHECK:    [[TMP12:%.*]] = load i32, ptr [[I]], align 4
+// CHECK:    [[ADD3:%.*]] = add nsw i32 [[ADD2]], [[TMP12]]
+// CHECK:    store i32 [[ADD3]], ptr [[RESULT:%.*]], align 4
+// CHECK:    br label %[[OMP_BODY_CONTINUE:.*]]
+// CHECK:       [[OMP_BODY_CONTINUE]]:
+// CHECK:    br label %[[OMP_INNER_FOR_INC:.*]]
+// CHECK:       [[OMP_INNER_FOR_INC]]:
+// CHECK:    [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK:    [[ADD4:%.*]] = add nsw i32 [[TMP13]], 1
+// CHECK:    store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4
+// CHECK:    br label %[[OMP_INNER_FOR_COND]]
+// CHECK:       [[OMP_INNER_FOR_END]]:
+// CHECK:    br label %[[OMP_LOOP_EXIT:.*]]
+// CHECK:       [[OMP_LOOP_EXIT]]:
+// CHECK:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP3]])
+// CHECK:    ret void
+//
+//
+// CHECK-LABEL: define dso_local void @_Z23test_parallel_for_simd_v(
+// CHECK-SAME: ) #[[ATTR0]] {
+// CHECK:  [[ENTRY:.*:]]
+// CHECK:    call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[P:%.*]], ptr align 4 @__const._Z23test_parallel_for_simd_v.p, i64 8, i1 false)
+// CHECK:    call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP0:%.*]], ptr align 4 [[P]], i64 8, i1 false)
+// CHECK:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @_Z23test_parallel_for_simd_v.omp_outlined, ptr [[TMP0]])
+// CHECK:    ret void
+//
+//
+// CHECK-LABEL: define internal void @_Z23test_parallel_for_simd_v.omp_outlined(
+// CHECK-SAME: ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP0:%.*]]) #[[ATTR2]] {
+// CHECK:  [[ENTRY:.*:]]
+// CHECK:    store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR:%.*]], align 8
+// CHECK:    store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR:%.*]], align 8
+// CHECK:    store ptr [[TMP0]], ptr [[DOTADDR:%.*]], align 8
+// CHECK:    [[TMP1:%.*]] = load ptr, ptr [[DOTADDR]], align 8, !nonnull [[META2]], !align [[META3]]
+// CHECK:    store i32 0, ptr [[DOTOMP_LB:%.*]], align 4
+// CHECK:    store i32 9, ptr [[DOTOMP_UB:%.*]], align 4
+// CHECK:    store i32 1, ptr [[DOTOMP_STRIDE:%.*]], align 4
+// CHECK:    store i32 0, ptr [[DOTOMP_IS_LAST:%.*]], align 4
+// CHECK:    [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK:    [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4
+// CHECK:    call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
+// CHECK:    [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
+// CHECK:    [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 9
+// CHECK:    br i1 [[CMP]], label %[[COND_TRUE:.*]], label %[[COND_FALSE:.*]]
+// CHECK:       [[COND_TRUE]]:
+// CHECK:    br label %[[COND_END:.*]]
+// CHECK:       [[COND_FALSE]]:
+// CHECK:    [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
+// CHECK:    br label %[[COND_END]]
+// CHECK:       [[COND_END]]:
+// CHECK:    [[COND:%.*]] = phi i32 [ 9, %[[COND_TRUE]] ], [ [[TMP5]], %[[COND_FALSE]] ]
+// CHECK:    store i32 [[COND]], ptr [[DOTOMP_UB]], align 4
+// CHECK:    [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
+// CHECK:    store i32 [[TMP6]], ptr [[DOTOMP_IV:%.*]], align 4
+// CHECK:    br label %[[OMP_INNER_FOR_COND:.*]]
+// CHECK:       [[OMP_INNER_FOR_COND]]:
+// CHECK:    [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6:![0-9]+]]
+// CHECK:    [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP6]]
+// CHECK:    [[CMP1:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]]
+// CHECK:    br i1 [[CMP1]], label %[[OMP_INNER_FOR_BODY:.*]], label %[[OMP_INNER_FOR_END:.*]]
+// CHECK:       [[OMP_INNER_FOR_BODY]]:
+// CHECK:    [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6]]
+// CHECK:    [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1
+// CHECK:    [[ADD:%.*]] = add nsw i32 0, [[MUL]]
+// CHECK:    store i32 [[ADD]], ptr [[I:%.*]], align 4, !llvm.access.group [[ACC_GRP6]]
+// CHECK:    [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_POINT:%.*]], ptr [[TMP1]], i32 0, i32 0
+// CHECK:    [[TMP10:%.*]] = load i32, ptr [[X]], align 4, !llvm.access.group [[ACC_GRP6]]
+// CHECK:    [[Y:%.*]] = getelementptr inbounds nuw [[STRUCT_POINT]], ptr [[TMP1]], i32 0, i32 1
+// CHECK:    [[TMP11:%.*]] = load i32, ptr [[Y]], align 4, !llvm.access.group [[ACC_GRP6]]
+// CHECK:    [[ADD2:%.*]] = add nsw i32 [[TMP10]], [[TMP11]]
+// CHECK:    [[TMP12:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP6]]
+// CHECK:    [[ADD3:%.*]] = add nsw i32 [[ADD2]], [[TMP12]]
+// CHECK:    store i32 [[ADD3]], ptr [[RESULT:%.*]], align 4, !llvm.access.group [[ACC_GRP6]]
+// CHECK:    br label %[[OMP_BODY_CONTINUE:.*]]
+// CHECK:       [[OMP_BODY_CONTINUE]]:
+// CHECK:    br label %[[OMP_INNER_FOR_INC:.*]]
+// CHECK:       [[OMP_INNER_FOR_INC]]:
+// CHECK:    [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6]]
+// CHECK:    [[ADD4:%.*]] = add nsw i32 [[TMP13]], 1
+// CHECK:    store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6]]
+// CHECK:    br label %[[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]]
+// CHECK:       [[OMP_INNER_FOR_END]]:
+// CHECK:    br label %[[OMP_LOOP_EXIT:.*]]
+// CHECK:       [[OMP_LOOP_EXIT]]:
+// CHECK:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP3]])
+// CHECK:    [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
+// CHECK:    [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0
+// CHECK:    br i1 [[TMP15]], [[DOTOMP_FINAL_THEN:label %.*]], [[DOTOMP_FINAL_DONE:label %.*]]
+// CHECK:       [[_OMP_FINAL_THEN:.*:]]
+// CHECK:    store i32 10, ptr [[I]], align 4
+// CHECK:    br [[DOTOMP_FINAL_DONE]]
+// CHECK:       [[_OMP_FINAL_DONE:.*:]]
+// CHECK:    ret void
+//
+//
+// CHECK-LABEL: define dso_local void @_Z28test_target_teams_distributev(
+// CHECK-SAME: ) #[[ATTR0]] {
+// CHECK:  [[ENTRY:.*:]]
+// CHECK:    call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[P:%.*]], ptr align 4 @__const._Z28test_target_teams_distributev.p, i64 8, i1 false)
+// CHECK:    call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP0:%.*]], ptr align 4 [[P]], i64 8, i1 false)
+// CHECK:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z28test_target_teams_distributev_l111(ptr [[TMP0]], ptr null) #[[ATTR3]]
+// CHECK:    ret void
+//
+//
+// CHECK-LABEL: define internal void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z28test_target_teams_distributev_l111(
+// CHECK-SAME: ptr noundef nonnull align 4 dereferenceable(4) [[TMP0:%.*]], ptr noalias noundef [[DYN_PTR:%.*]]) #[[ATTR2]] {
+// CHECK:  [[ENTRY:.*:]]
+// CHECK:    store ptr [[TMP0]], ptr [[DOTADDR:%.*]], align 8
+// CHECK:    store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR:%.*]], align 8
+// CHECK:    [[TMP1:%.*]] = load ptr, ptr [[DOTADDR]], align 8, !nonnull [[META2]], !align [[META3]]
+// CHECK:    call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z28test_target_teams_distributev_l111.omp_outlined, ptr [[TMP1]])
+// CHECK:    ret void
+//
+//
+// CHECK-LABEL: define internal void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z28test_target_teams_distributev_l111.omp_outlined(
+// CHECK-SAME: ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP0:%.*]]) #[[ATTR2]] {
+// CHECK:  [[ENTRY:.*:]]
+// CHECK:    store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR:%.*]], align 8
+// CHECK:    store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR:%.*]], align 8
+// CHECK:    store ptr [[TMP0]], ptr [[DOTADDR:%.*]], align 8
+// CHECK:    [[TMP1:%.*]] = load ptr, ptr [[DOTADDR]], align 8, !nonnull [[META2]], !align [[META3]]
+// CHECK:    store i32 0, ptr [[DOTOMP_LB:%.*]], align 4
+// CHECK:    store i32 9, ptr [[DOTOMP_UB:%.*]], align 4
+// CHECK:    store i32 1, ptr [[DOTOMP_STRIDE:%.*]], align 4
+// CHECK:    store i32 0, ptr [[DOTOMP_IS_LAST:%.*]], align 4
+// CHECK:    [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK:    [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4
+// CHECK:    call void @__kmpc_for_static_init_4(ptr @[[GLOB4:[0-9]+]], i32 [[TMP3]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
+// CHECK:    [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
+// CHECK:    [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 9
+// CHECK:    br i1 [[CMP]], label %[[COND_TRUE:.*]], label %[[COND_FALSE:.*]]
+// CHECK:       [[COND_TRUE]]:
+// CHECK:    br label %[[COND_END:.*]]
+// CHECK:       [[COND_FALSE]]:
+// CHECK:    [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
+// CHECK:    br label %[[COND_END]]
+// CHECK:       [[COND_END]]:
+// CHECK:    [[COND:%.*]] = phi i32 [ 9, %[[COND_TRUE]] ], [ [[TMP5]], %[[COND_FALSE]] ]
+// CHECK:    store i32 [[COND]], ptr [[DOTOMP_UB]], align 4
+// CHECK:    [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
+// CHECK:    store i32 [[TMP6]], ptr [[DOTOMP_IV:%.*]], align 4
+// CHECK:    br label %[[OMP_INNER_FOR_COND:.*]]
+// CHECK:       [[OMP_INNER_FOR_COND]]:
+// CHECK:    [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK:    [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
+// CHECK:    [[CMP1:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]]
+// CHECK:    br i1 [[CMP1]], label %[[OMP_INNER_FOR_BODY:.*]], label %[[OMP_INNER_FOR_END:.*]]
+// CHECK:       [[OMP_INNER_FOR_BODY]]:
+// CHECK:    [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK:    [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1
+// CHECK:    [[ADD:%.*]] = add nsw i32 0, [[MUL]]
+// CHECK:    store i32 [[ADD]], ptr [[I:%.*]], align 4
+// CHECK:    [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_POINT:%.*]], ptr [[TMP1]], i32 0, i32 0
+// CHECK:    [[TMP10:%.*]] = load i32, ptr [[X]], align 4
+// CHECK:    [[Y:%.*]] = getelementptr inbounds nuw [[STRUCT_POINT]], ptr [[TMP1]], i32 0, i32 1
+// CHECK:    [[TMP11:%.*]] = load i32, ptr [[Y]], align 4
+// CHECK:    [[ADD2:%.*]] = add nsw i32 [[TMP10]], [[TMP11]]
+// CHECK:    [[TMP12:%.*]] = load i32, ptr [[I]], align 4
+// CHECK:    [[ADD3:%.*]] = add nsw i32 [[ADD2]], [[TMP12]]
+// CHECK:    store i32 [[ADD3]], ptr [[RESULT:%.*]], align 4
+// CHECK:    br label %[[OMP_BODY_CONTINUE:.*]]
+// CHECK:       [[OMP_BODY_CONTINUE]]:
+// CHECK:    br label %[[OMP_INNER_FOR_INC:.*]]
+// CHECK:       [[OMP_INNER_FOR_INC]]:
+// CHECK:    [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK:    [[ADD4:%.*]] = add nsw i32 [[TMP13]], 1
+// CHECK:    store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4
+// CHECK:    br label %[[OMP_INNER_FOR_COND]]
+// CHECK:       [[OMP_INNER_FOR_END]]:
+// CHECK:    br label %[[OMP_LOOP_EXIT:.*]]
+// CHECK:       [[OMP_LOOP_EXIT]]:
+// CHECK:    call void @__kmpc_for_static_fini(ptr @[[GLOB4]], i32 [[TMP3]])
+// CHECK:    ret void
+//
+//
+// CHECK-LABEL: define dso_local void @_Z9test_taskv(
+// CHECK-SAME: ) #[[ATTR0]] {
+// CHECK:  [[ENTRY:.*:]]
+// CHECK:    [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
+// CHECK:    call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[P:%.*]], ptr align 4 @__const._Z9test_taskv.p, i64 8, i1 false)
+// CHECK:    call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP0:%.*]], ptr align 4 [[P]], i64 8, i1 false)
+// CHECK:    [[TMP2:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON:%.*]], ptr [[AGG_CAPTURED:%.*]], i32 0, i32 0
+// CHECK:    store ptr [[TMP0]], ptr [[TMP2]], align 8
+// CHECK:    [[TMP3:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i64 40, i64 8, ptr @.omp_task_entry.)
+// CHECK:    [[TMP4:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASK_T_WITH_PRIVATES:%.*]], ptr [[TMP3]], i32 0, i32 0
+// CHECK:    [[TMP5:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASK_T:%.*]], ptr [[TMP4]], i32 0, i32 0
+// CHECK:    [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8
+// CHECK:    call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[TMP6]], ptr align 8 [[AGG_CAPTURED]], i64 8, i1 false)
+// CHECK:    [[TMP7:%.*]] = call i32 @__kmpc_omp_task(ptr @[[GLOB1]], i32 [[TMP1]], ptr [[TMP3]])
+// CHECK:    ret void
+//
+//
+// CHECK-LABEL: define internal noundef i32 @.omp_task_entry.(
+// CHECK-SAME: i32 noundef [[TMP0:%.*]], ptr noalias noundef [[TMP1:%.*]]) #[[ATTR4]] {
+// CHECK:  [[ENTRY:.*:]]
+// CHECK:    store i32 [[TMP0]], ptr [[DOTADDR:%.*]], align 4
+// CHECK:    store ptr [[TMP1]], ptr [[DOTADDR1:%.*]], align 8
+// CHECK:    [[TMP2:%.*]] = load i32, ptr [[DOTADDR]], align 4
+// CHECK:    [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8
+// CHECK:    [[TMP4:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASK_T_WITH_PRIVATES:%.*]], ptr [[TMP3]], i32 0, i32 0
+// CHECK:    [[TMP5:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASK_T:%.*]], ptr [[TMP4]], i32 0, i32 2
+// CHECK:    [[TMP6:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASK_T]], ptr [[TMP4]], i32 0, i32 0
+// CHECK:    [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8
+// CHECK:    call void @llvm.experimental.noalias.scope.decl(metadata [[META10:![0-9]+]])
+// CHECK:    call void @llvm.experimental.noalias.scope.decl(metadata [[META13:![0-9]+]])
+// CHECK:    call void @llvm.experimental.noalias.scope.decl(metadata [[META15:![0-9]+]])
+// CHECK:    call void @llvm.experimental.noalias.scope.decl(metadata [[META17:![0-9]+]])
+// CHECK:    store i32 [[TMP2]], ptr [[DOTGLOBAL_TID__ADDR_I:%.*]], align 4, !noalias [[META19:![0-9]+]]
+// CHECK:    store ptr [[TMP5]], ptr [[DOTPART_ID__ADDR_I:%.*]], align 8, !noalias [[META19]]
+// CHECK:    store ptr null, ptr [[DOTPRIVATES__ADDR_I:%.*]], align 8, !noalias [[META19]]
+// CHECK:    store ptr null, ptr [[DOTCOPY_FN__ADDR_I:%.*]], align 8, !noalias [[META19]]
+// CHECK:    store ptr [[TMP3]], ptr [[DOTTASK_T__ADDR_I:%.*]], align 8, !noalias [[META19]]
+// CHECK:    store ptr [[TMP7]], ptr [[__CONTEXT_ADDR_I:%.*]], align 8, !noalias [[META19]]
+// CHECK:    [[TMP8:%.*]] = load ptr, ptr [[__CONTEXT_ADDR_I]], align 8, !noalias [[META19]]
+// CHECK:    [[TMP9:%.*]] = load ptr, ptr [[TMP8]], align 8, !nonnull [[META2]], !align [[META3]]
+// CHECK:    [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4
+// CHECK:    [[TMP11:%.*]] = load ptr, ptr [[TMP8]], align 8, !nonnull [[META2]], !align [[META3]]
+// CHECK:    [[Y_I:%.*]] = getelementptr inbounds nuw [[STRUCT_POINT:%.*]], ptr [[TMP11]], i32 0, i32 1
+// CHECK:    [[TMP12:%.*]] = load i32, ptr [[Y_I]], align 4
+// CHECK:    [[ADD_I:%.*]] = add nsw i32 [[TMP10]], [[TMP12]]
+// CHECK:    store i32 [[ADD_I]], ptr [[SUM_I:%.*]], align 4, !noalias [[META19]]
+// CHECK:    ret i32 0
+//
+//
+// CHECK-LABEL: define dso_local void @_Z16test_task_dependv(
+// CHECK-SAME: ) #[[ATTR0]] {
+// CHECK:  [[ENTRY:.*:]]
+// CHECK:    [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
+// CHECK:    call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[P:%.*]], ptr align 4 @__const._Z16test_task_dependv.p, i64 8, i1 false)
+// CHECK:    call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP0:%.*]], ptr align 4 [[P]], i64 8, i1 false)
+// CHECK:    [[TMP2:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON_0:%.*]], ptr [[AGG_CAPTURED:%.*]], i32 0, i32 0
+// CHECK:    store ptr [[TMP0]], ptr [[TMP2]], align 8
+// CHECK:    [[TMP3:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i64 40, i64 8, ptr @.omp_task_entry..2)
+// CHECK:    [[TMP4:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASK_T_WITH_PRIVATES_1:%.*]], ptr [[TMP3]], i32 0, i32 0
+// CHECK:    [[TMP5:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASK_T:%.*]], ptr [[TMP4]], i32 0, i32 0
+// CHECK:    [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8
+// CHECK:    call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[TMP6]], ptr align 8 [[AGG_CAPTURED]], i64 8, i1 false)
+// CHECK:    [[TMP7:%.*]] = getelementptr inbounds [2 x [[STRUCT_KMP_DEPEND_INFO:%.*]]], ptr [[DOTDEP_ARR_ADDR:%.*]], i64 0, i64 0
+// CHECK:    [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_POINT:%.*]], ptr [[TMP0]], i32 0, i32 0
+// CHECK:    [[TMP8:%.*]] = ptrtoint ptr [[X]] to i64
+// CHECK:    [[TMP9:%.*]] = getelementptr [[STRUCT_KMP_DEPEND_INFO]], ptr [[TMP7]], i64 0
+// CHECK:    [[TMP10:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_DEPEND_INFO]], ptr [[TMP9]], i32 0, i32 0
+// CHECK:    store i64 [[TMP8]], ptr [[TMP10]], align 8
+// CHECK:    [[TMP11:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_DEPEND_INFO]], ptr [[TMP9]], i32 0, i32 1
+// CHECK:    store i64 4, ptr [[TMP11]], align 8
+// CHECK:    [[TMP12:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_DEPEND_INFO]], ptr [[TMP9]], i32 0, i32 2
+// CHECK:    store i8 1, ptr [[TMP12]], align 8
+// CHECK:    [[Y:%.*]] = getelementptr inbounds nuw [[STRUCT_POINT]], ptr [[TMP0]], i32 0, i32 1
+// CHECK:    [[TMP13:%.*]] = ptrtoint ptr [[Y]] to i64
+// CHECK:    [[TMP14:%.*]] = getelementptr [[STRUCT_KMP_DEPEND_INFO]], ptr [[TMP7]], i64 1
+// CHECK:    [[TMP15:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_DEPEND_INFO]], ptr [[TMP14]], i32 0, i32 0
+// CHECK:    store i64 [[TMP13]], ptr [[TMP15]], align 8
+// CHECK:    [[TMP16:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_DEPEND_INFO]], ptr [[TMP14]], i32 0, i32 1
+// CHECK:    store i64 4, ptr [[TMP16]], align 8
+// CHECK:    [[TMP17:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_DEPEND_INFO]], ptr [[TMP14]], i32 0, i32 2
+// CHECK:    store i8 1, ptr [[TMP17]], align 8
+// CHECK:    store i64 2, ptr [[DEP_COUNTER_ADDR:%.*]], align 8
+// CHECK:    [[TMP18:%.*]] = call i32 @__kmpc_omp_task_with_deps(ptr @[[GLOB1]], i32 [[TMP1]], ptr [[TMP3]], i32 2, ptr [[TMP7]], i32 0, ptr null)
+// CHECK:    ret void
+//
+//
+// CHECK-LABEL: define internal noundef i32 @.omp_task_entry..2(
+// CHECK-SAME: i32 noundef [[TMP0:%.*]], ptr noalias noundef [[TMP1:%.*]]) #[[ATTR4]] {
+// CHECK:  [[ENTRY:.*:]]
+// CHECK:    store i32 [[TMP0]], ptr [[DOTADDR:%.*]], align 4
+// CHECK:    store ptr [[TMP1]], ptr [[DOTADDR1:%.*]], align 8
+// CHECK:    [[TMP2:%.*]] = load i32, ptr [[DOTADDR]], align 4
+// CHECK:    [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8
+// CHECK:    [[TMP4:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASK_T_WITH_PRIVATES_1:%.*]], ptr [[TMP3]], i32 0, i32 0
+// CHECK:    [[TMP5:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASK_T:%.*]], ptr [[TMP4]], i32 0, i32 2
+// CHECK:    [[TMP6:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASK_T]], ptr [[TMP4]], i32 0, i32 0
+// CHECK:    [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8
+// CHECK:    call void @llvm.experimental.noalias.scope.decl(metadata [[META20:![0-9]+]])
+// CHECK:    call void @llvm.experimental.noalias.scope.decl(metadata [[META23:![0-9]+]])
+// CHECK:    call void @llvm.experimental.noalias.scope.decl(metadata [[META25:![0-9]+]])
+// CHECK:    call void @llvm.experimental.noalias.scope.decl(metadata [[META27:![0-9]+]])
+// CHECK:    store i32 [[TMP2]], ptr [[DOTGLOBAL_TID__ADDR_I:%.*]], align 4, !noalias [[META29:![0-9]+]]
+// CHECK:    store ptr [[TMP5]], ptr [[DOTPART_ID__ADDR_I:%.*]], align 8, !noalias [[META29]]
+// CHECK:    store ptr null, ptr [[DOTPRIVATES__ADDR_I:%.*]], align 8, !noalias [[META29]]
+// CHECK:    store ptr null, ptr [[DOTCOPY_FN__ADDR_I:%.*]], align 8, !noalias [[META29]]
+// CHECK:    store ptr [[TMP3]], ptr [[DOTTASK_T__ADDR_I:%.*]], align 8, !noalias [[META29]]
+// CHECK:    store ptr [[TMP7]], ptr [[__CONTEXT_ADDR_I:%.*]], align 8, !noalias [[META29]]
+// CHECK:    [[TMP8:%.*]] = load ptr, ptr [[__CONTEXT_ADDR_I]], align 8, !noalias [[META29]]
+// CHECK:    [[TMP9:%.*]] = load ptr, ptr [[TMP8]], align 8, !nonnull [[META2]], !align [[META3]]
+// CHECK:    [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4
+// CHECK:    [[TMP11:%.*]] = load ptr, ptr [[TMP8]], align 8, !nonnull [[META2]], !align [[META3]]
+// CHECK:    [[Y_I:%.*]] = getelementptr inbounds nuw [[STRUCT_POINT:%.*]], ptr [[TMP11]], i32 0, i32 1
+// CHECK:    [[TMP12:%.*]] = load i32, ptr [[Y_I]], align 4
+// CHECK:    [[ADD_I:%.*]] = add nsw i32 [[TMP10]], [[TMP12]]
+// CHECK:    store i32 [[ADD_I]], ptr [[SUM_I:%.*]], align 4, !noalias [[META29]]
+// CHECK:    ret i32 0
+//
+//
+// CHECK-LABEL: define dso_local void @_Z14test_taskloop_v(
+// CHECK-SAME: ) #[[ATTR0]] {
+// CHECK:  [[ENTRY:.*:]]
+// CHECK:    [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
+// CHECK:    call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[P:%.*]], ptr align 4 @__const._Z14test_taskloop_v.p, i64 8, i1 false)
+// CHECK:    call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP0:%.*]], ptr align 4 [[P]], i64 8, i1 false)
+// CHECK:    [[TMP2:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON_2:%.*]], ptr [[AGG_CAPTURED:%.*]], i32 0, i32 0
+// CHECK:    store ptr [[TMP0]], ptr [[TMP2]], align 8
+// CHECK:    call void @__kmpc_taskgroup(ptr @[[GLOB1]], i32 [[TMP1]])
+// CHECK:    [[TMP3:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i64 80, i64 8, ptr @.omp_task_entry..4)
+// CHECK:    [[TMP4:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASK_T_WITH_PRIVATES_3:%.*]], ptr [[TMP3]], i32 0, i32 0
+// CHECK:    [[TMP5:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASK_T_4:%.*]], ptr [[TMP4]], i32 0, i32 0
+// CHECK:    [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8
+// CHECK:    call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[TMP6]], ptr align 8 [[AGG_CAPTURED]], i64 8, i1 false)
+// CHECK:    [[TMP7:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASK_T_4]], ptr [[TMP4]], i32 0, i32 5
+// CHECK:    store i64 0, ptr [[TMP7]], align 8
+// CHECK:    [[TMP8:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASK_T_4]], ptr [[TMP4]], i32 0, i32 6
+// CHECK:    store i64 9, ptr [[TMP8]], align 8
+// CHECK:    [[TMP9:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASK_T_4]], ptr [[TMP4]], i32 0, i32 7
+// CHECK:    store i64 1, ptr [[TMP9]], align 8
+// CHECK:    [[TMP10:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASK_T_4]], ptr [[TMP4]], i32 0, i32 9
+// CHECK:    call void @llvm.memset.p0.i64(ptr align 8 [[TMP10]], i8 0, i64 8, i1 false)
+// CHECK:    [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 8
+// CHECK:    call void @__kmpc_taskloop(ptr @[[GLOB1]], i32 [[TMP1]], ptr [[TMP3]], i32 1, ptr [[TMP7]], ptr [[TMP8]], i64 [[TMP11]], i32 1, i32 0, i64 0, ptr null)
+// CHECK:    call void @__kmpc_end_taskgroup(ptr @[[GLOB1]], i32 [[TMP1]])
+// CHECK:    ret void
+//
+//
+// CHECK-LABEL: define internal noundef i32 @.omp_task_entry..4(
+// CHECK-SAME: i32 noundef [[TMP0:%.*]], ptr noalias noundef [[TMP1:%.*]]) #[[ATTR4]] {
+// CHECK:  [[ENTRY:.*:]]
+// CHECK:    store i32 [[TMP0]], ptr [[DOTADDR:%.*]], align 4
+// CHECK:    store ptr [[TMP1]], ptr [[DOTADDR1:%.*]], align 8
+// CHECK:    [[TMP2:%.*]] = load i32, ptr [[DOTADDR]], align 4
+// CHECK:    [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8
+// CHECK:    [[TMP4:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASK_T_WITH_PRIVATES_3:%.*]], ptr [[TMP3]], i32 0, i32 0
+// CHECK:    [[TMP5:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASK_T_4:%.*]], ptr [[TMP4]], i32 0, i32 2
+// CHECK:    [[TMP6:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASK_T_4]], ptr [[TMP4]], i32 0, i32 0
+// CHECK:    [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8
+// CHECK:    [[TMP8:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASK_T_4]], ptr [[TMP4]], i32 0, i32 5
+// CHECK:    [[TMP9:%.*]] = load i64, ptr [[TMP8]], align 8
+// CHECK:    [[TMP10:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASK_T_4]], ptr [[TMP4]], i32 0, i32 6
+// CHECK:    [[TMP11:%.*]] = load i64, ptr [[TMP10]], align 8
+// CHECK:    [[TMP12:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASK_T_4]], ptr [[TMP4]], i32 0, i32 7
+// CHECK:    [[TMP13:%.*]] = load i64, ptr [[TMP12]], align 8
+// CHECK:    [[TMP14:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASK_T_4]], ptr [[TMP4]], i32 0, i32 8
+// CHECK:    [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 8
+// CHECK:    [[TMP16:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASK_T_4]], ptr [[TMP4]], i32 0, i32 9
+// CHECK:    [[TMP17:%.*]] = load ptr, ptr [[TMP16]], align 8
+// CHECK:    call void @llvm.experimental.noalias.scope.decl(metadata [[META30:![0-9]+]])
+// CHECK:    call void @llvm.experimental.noalias.scope.decl(metadata [[META33:![0-9]+]])
+// CHECK:    call void @llvm.experimental.noalias.scope.decl(metadata [[META35:![0-9]+]])
+// CHECK:    call void @llvm.experimental.noalias.scope.decl(metadata [[META37:![0-9]+]])
+// CHECK:    call void @llvm.experimental.noalias.scope.decl(metadata [[META39:![0-9]+]])
+// CHECK:    store i32 [[TMP2]], ptr [[DOTGLOBAL_TID__ADDR_I:%.*]], align 4, !noalias [[META41:![0-9]+]]
+// CHECK:    store ptr [[TMP5]], ptr [[DOTPART_ID__ADDR_I:%.*]], align 8, !noalias [[META41]]
+// CHECK:    store ptr null, ptr [[DOTPRIVATES__ADDR_I:%.*]], align 8, !noalias [[META41]]
+// CHECK:    store ptr null, ptr [[DOTCOPY_FN__ADDR_I:%.*]], align 8, !noalias [[META41]]
+// CHECK:    store ptr [[TMP3]], ptr [[DOTTASK_T__ADDR_I:%.*]], align 8, !noalias [[META41]]
+// CHECK:    store i64 [[TMP9]], ptr [[DOTLB__ADDR_I:%.*]], align 8, !noalias [[META41]]
+// CHECK:    store i64 [[TMP11]], ptr [[DOTUB__ADDR_I:%.*]], align 8, !noalias [[META41]]
+// CHECK:    store i64 [[TMP13]], ptr [[DOTST__ADDR_I:%.*]], align 8, !noalias [[META41]]
+// CHECK:    store i32 [[TMP15]], ptr [[DOTLITER__ADDR_I:%.*]], align 4, !noalias [[META41]]
+// CHECK:    store ptr [[TMP17]], ptr [[DOTREDUCTIONS__ADDR_I:%.*]], align 8, !noalias [[META41]]
+// CHECK:    store ptr [[TMP7]], ptr [[__CONTEXT_ADDR_I:%.*]], align 8, !noalias [[META41]]
+// CHECK:    [[TMP18:%.*]] = load ptr, ptr [[__CONTEXT_ADDR_I]], align 8, !noalias [[META41]]
+// CHECK:    [[TMP19:%.*]] = load i64, ptr [[DOTLB__ADDR_I]], align 8, !noalias [[META41]]
+// CHECK:    [[CONV_I:%.*]] = trunc i64 [[TMP19]] to i32
+// CHECK:    store i32 [[CONV_I]], ptr [[DOTOMP_IV_I:%.*]], align 4, !noalias [[META41]]
+// CHECK:    [[TMP20:%.*]] = load ptr, ptr [[TMP18]], align 8, !nonnull [[META2]], !align [[META3]]
+// CHECK:    br label %[[OMP_INNER_FOR_COND_I:.*]]
+// CHECK:       [[OMP_INNER_FOR_COND_I]]:
+// CHECK:    [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV_I]], align 4, !noalias [[META41]]
+// CHECK:    [[CONV1_I:%.*]] = sext i32 [[TMP21]] to i64
+// CHECK:    [[TMP22:%.*]] = load i64, ptr [[DOTUB__ADDR_I]], align 8, !noalias [[META41]]
+// CHECK:    [[CMP_I:%.*]] = icmp ule i64 [[CONV1_I]], [[TMP22]]
+// CHECK:    br i1 [[CMP_I]], label %[[OMP_INNER_FOR_BODY_I:.*]], [[DOTOMP_OUTLINED__3_EXIT:label %.*]]
+// CHECK:       [[OMP_INNER_FOR_BODY_I]]:
+// CHECK:    [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV_I]], align 4, !noalias [[META41]]
+// CHECK:    store i32 [[TMP23]], ptr [[I_I:%.*]], align 4, !noalias [[META41]]
+// CHECK:    [[TMP24:%.*]] = load i32, ptr [[TMP20]], align 4
+// CHECK:    [[Y_I:%.*]] = getelementptr inbounds nuw [[STRUCT_POINT:%.*]], ptr [[TMP20]], i32 0, i32 1
+// CHECK:    [[TMP25:%.*]] = load i32, ptr [[Y_I]], align 4
+// CHECK:    [[ADD2_I:%.*]] = add nsw i32 [[TMP24]], [[TMP25]]
+// CHECK:    [[TMP26:%.*]] = load i32, ptr [[I_I]], align 4, !noalias [[META41]]
+// CHECK:    [[ADD3_I:%.*]] = add nsw i32 [[ADD2_I]], [[TMP26]]
+// CHECK:    store i32 [[ADD3_I]], ptr [[RESULT_I:%.*]], align 4, !noalias [[META41]]
+// CHECK:    [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IV_I]], align 4, !noalias [[META41]]
+// CHECK:    [[ADD4_I:%.*]] = add nsw i32 [[TMP27]], 1
+// CHECK:    store i32 [[ADD4_I]], ptr [[DOTOMP_IV_I]], align 4, !noalias [[META41]]
+// CHECK:    br label %[[OMP_INNER_FOR_COND_I]]
+// CHECK:       [[_OMP_OUTLINED__3_EXIT:.*:]]
+// CHECK:    ret i32 0
+//
+//
+// CHECK-LABEL: define dso_local void @_Z21instantiate_templatesv(
+// CHECK-SAME: ) #[[ATTR0]] {
+// CHECK:  [[ENTRY:.*:]]
+// CHECK:    call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[P2:%.*]], ptr align 4 @__const._Z21instantiate_templatesv.p2, i64 8, i1 false)
+// CHECK:    call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[P3:%.*]], ptr align 4 @__const._Z21instantiate_templatesv.p3, i64 12, i1 false)
+// CHECK:    call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[AGG_TMP:%.*]], ptr align 4 [[P2]], i64 8, i1 false)
+// CHECK:    [[TMP0:%.*]] = load i64, ptr [[AGG_TMP]], align 4
+// CHECK:    [[CALL:%.*]] = call noundef i32 @_Z17test_template_basI5PointEiT_(i64 [[TMP0]])
+// CHECK:    [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_POINT:%.*]], ptr [[AGG_TMP1:%.*]], i32 0, i32 0
+// CHECK:    store i32 3, ptr [[X]], align 4
+// CHECK:    [[Y:%.*]] = getelementptr inbounds nuw [[STRUCT_POINT]], ptr [[AGG_TMP1]], i32 0, i32 1
+// CHECK:    store i32 4, ptr [[Y]], align 4
+// CHECK:    [[TMP1:%.*]] = load i64, ptr [[AGG_TMP1]], align 4
+// CHECK:    [[CALL2:%.*]] = call noundef i32 @_Z17test_template_basI5PointEiT_(i64 [[TMP1]])
+// CHECK:    call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[AGG_TMP3:%.*]], ptr align 4 [[P2]], i64 8, i1 false)
+// CHECK:    [[TMP2:%.*]] = load i64, ptr [[AGG_TMP3]], align 4
+// CHECK:    [[CALL4:%.*]] = call noundef i32 @_Z20test_template_targetI5PointEiT_(i64 [[TMP2]])
+// CHECK:    call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[AGG_TMP5:%.*]], ptr align 4 [[P2]], i64 8, i1 false)
+// CHECK:    [[TMP3:%.*]] = load i64, ptr [[AGG_TMP5]], align 4
+// CHECK:    [[CALL6:%.*]] = call noundef i32 @_Z18test_template_taskI5PointEiT_(i64 [[TMP3]])
+// CHECK:    call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[AGG_TMP7:%.*]], ptr align 4 [[P3]], i64 12, i1 false)
+// CHECK:    call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[AGG_TMP7_COERCE:%.*]], ptr align 4 [[AGG_TMP7]], i64 12, i1 false)
+// CHECK:    [[TMP4:%.*]] = getelementptr inbounds nuw { i64, i32 }, ptr [[AGG_TMP7_COERCE]], i32 0, i32 0
+// CHECK:    [[TMP5:%.*]] = load i64, ptr [[TMP4]], align 4
+// CHECK:    [[TMP6:%.*]] = getelementptr inbounds nuw { i64, i32 }, ptr [[AGG_TMP7_COERCE]], i32 0, i32 1
+// CHECK:    [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4
+// CHECK:    [[CALL8:%.*]] = call noundef i32 @_Z24test_template_3_bindingsI7Point3DEiT_(i64 [[TMP5]], i32 [[TMP7]])
+// CHECK:    ret void
+//
+//
+// CHECK-LABEL: define linkonce_odr noundef i32 @_Z17test_template_basI5PointEiT_(
+// CHECK-SAME: i64 [[P_COERCE:%.*]]) #[[ATTR0]] comdat {
+// CHECK:  [[ENTRY:.*:]]
+// CHECK:    store i64 [[P_COERCE]], ptr [[P:%.*]], align 4
+// CHECK:    call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP0:%.*]], ptr align 4 [[P]], i64 8, i1 false)
+// CHECK:    store i32 0, ptr [[RESULT:%.*]], align 4
+// CHECK:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 2, ptr @_Z17test_template_basI5PointEiT_.omp_outlined, ptr [[RESULT]], ptr [[TMP0]])
+// CHECK:    [[TMP1:%.*]] = load i32, ptr [[RESULT]], align 4
+// CHECK:    ret i32 [[TMP1]]
+//
+//
+// CHECK-LABEL: define linkonce_odr noundef i32 @_Z20test_template_targetI5PointEiT_(
+// CHECK-SAME: i64 [[P_COERCE:%.*]]) #[[ATTR0]] comdat {
+// CHECK:  [[ENTRY:.*:]]
+// CHECK:    store i64 [[P_COERCE]], ptr [[P:%.*]], align 4
+// CHECK:    call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP0:%.*]], ptr align 4 [[P]], i64 8, i1 false)
+// CHECK:    store i32 0, ptr [[RESULT:%.*]], align 4
+// CHECK:    [[TMP1:%.*]] = load [[STRUCT_POINT:%.*]], ptr [[TMP0]], align 4
+// CHECK:    store [[STRUCT_POINT]] [[TMP1]], ptr [[DOTCASTED:%.*]], align 4
+// CHECK:    [[TMP2:%.*]] = load i64, ptr [[DOTCASTED]], align 8
+// CHECK:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z20test_template_targetI5PointEiT__l164(ptr [[RESULT]], i64 [[TMP2]], ptr null) #[[ATTR3]]
+// CHECK:    [[TMP3:%.*]] = load i32, ptr [[RESULT]], align 4
+// CHECK:    ret i32 [[TMP3]]
+//
+//
+// CHECK-LABEL: define linkonce_odr noundef i32 @_Z18test_template_taskI5PointEiT_(
+// CHECK-SAME: i64 [[P_COERCE:%.*]]) #[[ATTR0]] comdat {
+// CHECK:  [[ENTRY:.*:]]
+// CHECK:    [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
+// CHECK:    store i64 [[P_COERCE]], ptr [[P:%.*]], align 4
+// CHECK:    call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP0:%.*]], ptr align 4 [[P]], i64 8, i1 false)
+// CHECK:    store i32 0, ptr [[RESULT:%.*]], align 4
+// CHECK:    [[TMP2:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON_6:%.*]], ptr [[AGG_CAPTURED:%.*]], i32 0, i32 0
+// CHECK:    store ptr [[RESULT]], ptr [[TMP2]], align 8
+// CHECK:    [[TMP3:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON_6]], ptr [[AGG_CAPTURED]], i32 0, i32 1
+// CHECK:    store ptr [[TMP0]], ptr [[TMP3]], align 8
+// CHECK:    [[TMP4:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i64 40, i64 16, ptr @.omp_task_entry..6)
+// CHECK:    [[TMP5:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASK_T_WITH_PRIVATES_7:%.*]], ptr [[TMP4]], i32 0, i32 0
+// CHECK:    [[TMP6:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASK_T:%.*]], ptr [[TMP5]], i32 0, i32 0
+// CHECK:    [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8
+// CHECK:    call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[TMP7]], ptr align 8 [[AGG_CAPTURED]], i64 16, i1 false)
+// CHECK:    [[TMP8:%.*]] = call i32 @__kmpc_omp_task(ptr @[[GLOB1]], i32 [[TMP1]], ptr [[TMP4]])
+// CHECK:    [[TMP9:%.*]] = call i32 @__kmpc_omp_taskwait(ptr @[[GLOB1]], i32 [[TMP1]])
+// CHECK:    [[TMP10:%.*]] = load i32, ptr [[RESULT]], align 4
+// CHECK:    ret i32 [[TMP10]]
+//
+//
+// CHECK-LABEL: define linkonce_odr noundef i32 @_Z24test_template_3_bindingsI7Point3DEiT_(
+// CHECK-SAME: i64 [[P_COERCE0:%.*]], i32 [[P_COERCE1:%.*]]) #[[ATTR0]] comdat {
+// CHECK:  [[ENTRY:.*:]]
+// CHECK:    [[TMP1:%.*]] = getelementptr inbounds nuw { i64, i32 }, ptr [[COERCE:%.*]], i32 0, i32 0
+// CHECK:    store i64 [[P_COERCE0]], ptr [[TMP1]], align 4
+// CHECK:    [[TMP2:%.*]] = getelementptr inbounds nuw { i64, i32 }, ptr [[COERCE]], i32 0, i32 1
+// CHECK:    store i32 [[P_COERCE1]], ptr [[TMP2]], align 4
+// CHECK:    call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[P:%.*]], ptr align 4 [[COERCE]], i64 12, i1 false)
+// CHECK:    call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP0:%.*]], ptr align 4 [[P]], i64 12, i1 false)
+// CHECK:    store i32 0, ptr [[RESULT:%.*]], align 4
+// CHECK:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 2, ptr @_Z24test_template_3_bindingsI7Point3DEiT_.omp_outlined, ptr [[RESULT]], ptr [[TMP0]])
+// CHECK:    [[TMP3:%.*]] = load i32, ptr [[RESULT]], align 4
+// CHECK:    ret i32 [[TMP3]]
+//
+//
+// CHECK-LABEL: define internal void @_Z17test_template_basI5PointEiT_.omp_outlined(
+// CHECK-SAME: ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[RESULT:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP0:%.*]]) #[[ATTR2]] {
+// CHECK:  [[ENTRY:.*:]]
+// CHECK:    store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR:%.*]], align 8
+// CHECK:    store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR:%.*]], align 8
+// CHECK:    store ptr [[RESULT]], ptr [[RESULT_ADDR:%.*]], align 8
+// CHECK:    store ptr [[TMP0]], ptr [[DOTADDR:%.*]], align 8
+// CHECK:    [[TMP1:%.*]] = load ptr, ptr [[RESULT_ADDR]], align 8, !nonnull [[META2]], !align [[META3]]
+// CHECK:    [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8, !nonnull [[META2]], !align [[META3]]
+// CHECK:    store i32 0, ptr [[RESULT1:%.*]], align 4
+// CHECK:    [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_POINT:%.*]], ptr [[TMP2]], i32 0, i32 0
 // CHECK:    [[TMP3:%.*]] = load i32, ptr [[X]], align 4
-// CHECK:    call void @_Z3usei(i32 noundef [[TMP3]])
+// CHECK:    [[Y:%.*]] = getelementptr inbounds nuw [[STRUCT_POINT]], ptr [[TMP2]], i32 0, i32 1
+// CHECK:    [[TMP4:%.*]] = load i32, ptr [[Y]], align 4
+// CHECK:    [[ADD:%.*]] = add nsw i32 [[TMP3]], [[TMP4]]
+// CHECK:    store i32 [[ADD]], ptr [[RESULT1]], align 4
+// CHECK:    [[TMP5:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST:%.*]], i64 0, i64 0
+// CHECK:    store ptr [[RESULT1]], ptr [[TMP5]], align 8
+// CHECK:    [[TMP6:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK:    [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4
+// CHECK:    [[TMP8:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP7]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @_Z17test_template_basI5PointEiT_.omp_outlined.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var)
+// CHECK:    switch i32 [[TMP8]], [[DOTOMP_REDUCTION_DEFAULT:label %.*]] [
+// CHECK:      i32 1, [[DOTOMP_REDUCTION_CASE1:label %.*]]
+// CHECK:      i32 2, [[DOTOMP_REDUCTION_CASE2:label %.*]]
+// CHECK:    ]
+// CHECK:       [[_OMP_REDUCTION_CASE1:.*:]]
+// CHECK:    [[TMP9:%.*]] = load i32, ptr [[TMP1]], align 4
+// CHECK:    [[TMP10:%.*]] = load i32, ptr [[RESULT1]], align 4
+// CHECK:    [[ADD2:%.*]] = add nsw i32 [[TMP9]], [[TMP10]]
+// CHECK:    store i32 [[ADD2]], ptr [[TMP1]], align 4
+// CHECK:    call void @__kmpc_end_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP7]], ptr @.gomp_critical_user_.reduction.var)
+// CHECK:    br [[DOTOMP_REDUCTION_DEFAULT]]
+// CHECK:       [[_OMP_REDUCTION_CASE2:.*:]]
+// CHECK:    [[TMP11:%.*]] = load i32, ptr [[RESULT1]], align 4
+// CHECK:    [[TMP12:%.*]] = atomicrmw add ptr [[TMP1]], i32 [[TMP11]] monotonic, align 4
+// CHECK:    br [[DOTOMP_REDUCTION_DEFAULT]]
+// CHECK:       [[_OMP_REDUCTION_DEFAULT:.*:]]
+// CHECK:    ret void
+//
+//
+// CHECK-LABEL: define internal void @_Z17test_template_basI5PointEiT_.omp_outlined.omp.reduction.reduction_func(
+// CHECK-SAME: ptr noundef [[TMP0:%.*]], ptr noundef [[TMP1:%.*]]) #[[ATTR4]] {
+// CHECK:  [[ENTRY:.*:]]
+// CHECK:    store ptr [[TMP0]], ptr [[DOTADDR:%.*]], align 8
+// CHECK:    store ptr [[TMP1]], ptr [[DOTADDR1:%.*]], align 8
+// CHECK:    [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8
+// CHECK:    [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8
+// CHECK:    [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP3]], i64 0, i64 0
+// CHECK:    [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8
+// CHECK:    [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i64 0, i64 0
+// CHECK:    [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8
+// CHECK:    [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4
+// CHECK:    [[TMP9:%.*]] = load i32, ptr [[TMP5]], align 4
+// CHECK:    [[ADD:%.*]] = add nsw i32 [[TMP8]], [[TMP9]]
+// CHECK:    store i32 [[ADD]], ptr [[TMP7]], align 4
+// CHECK:    ret void
+//
+//
+// CHECK-LABEL: define internal void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z20test_template_targetI5PointEiT__l164(
+// CHECK-SAME: ptr noundef nonnull align 4 dereferenceable(4) [[RESULT:%.*]], i64 noundef [[TMP0:%.*]], ptr noalias noundef [[DYN_PTR:%.*]]) #[[ATTR2]] {
+// CHECK:  [[ENTRY:.*:]]
+// CHECK:    store ptr [[RESULT]], ptr [[RESULT_ADDR:%.*]], align 8
+// CHECK:    store i64 [[TMP0]], ptr [[DOTADDR:%.*]], align 8
+// CHECK:    store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR:%.*]], align 8
+// CHECK:    [[TMP1:%.*]] = load ptr, ptr [[RESULT_ADDR]], align 8, !nonnull [[META2]], !align [[META3]]
+// CHECK:    [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_POINT:%.*]], ptr [[DOTADDR]], i32 0, i32 0
+// CHECK:    [[TMP2:%.*]] = load i32, ptr [[X]], align 4
+// CHECK:    [[Y:%.*]] = getelementptr inbounds nuw [[STRUCT_POINT]], ptr [[DOTADDR]], i32 0, i32 1
+// CHECK:    [[TMP3:%.*]] = load i32, ptr [[Y]], align 4
+// CHECK:    [[ADD:%.*]] = add nsw i32 [[TMP2]], [[TMP3]]
+// CHECK:    store i32 [[ADD]], ptr [[TMP1]], align 4
 // CHECK:    ret void
 //
+//
+// CHECK-LABEL: define internal noundef i32 @.omp_task_entry..6(
+// CHECK-SAME: i32 noundef [[TMP0:%.*]], ptr noalias noundef [[TMP1:%.*]]) #[[ATTR4]] {
+// CHECK:  [[ENTRY:.*:]]
+// CHECK:    store i32 [[TMP0]], ptr [[DOTADDR:%.*]], align 4
+// CHECK:    store ptr [[TMP1]], ptr [[DOTADDR1:%.*]], align 8
+// CHECK:    [[TMP2:%.*]] = load i32, ptr [[DOTADDR]], align 4
+// CHECK:    [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8
+// CHECK:    [[TMP4:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASK_T_WITH_PRIVATES_7:%.*]], ptr [[TMP3]], i32 0, i32 0
+// CHECK:    [[TMP5:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASK_T:%.*]], ptr [[TMP4]], i32 0, i32 2
+// CHECK:    [[TMP6:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASK_T]], ptr [[TMP4]], i32 0, i32 0
+// CHECK:    [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8
+// CHECK:    call void @llvm.experimental.noalias.scope.decl(metadata [[META42:![0-9]+]])
+// CHECK:    call void @llvm.experimental.noalias.scope.decl(metadata [[META45:![0-9]+]])
+// CHECK:    call void @llvm.experimental.noalias.scope.decl(metadata [[META47:![0-9]+]])
+// CHECK:    call void @llvm.experimental.noalias.scope.decl(metadata [[META49:![0-9]+]])
+// CHECK:    store i32 [[TMP2]], ptr [[DOTGLOBAL_TID__ADDR_I:%.*]], align 4, !noalias [[META51:![0-9]+]]
+// CHECK:    store ptr [[TMP5]], ptr [[DOTPART_ID__ADDR_I:%.*]], align 8, !noalias [[META51]]
+// CHECK:    store ptr null, ptr [[DOTPRIVATES__ADDR_I:%.*]], align 8, !noalias [[META51]]
+// CHECK:    store ptr null, ptr [[DOTCOPY_FN__ADDR_I:%.*]], align 8, !noalias [[META51]]
+// CHECK:    store ptr [[TMP3]], ptr [[DOTTASK_T__ADDR_I:%.*]], align 8, !noalias [[META51]]
+// CHECK:    store ptr [[TMP7]], ptr [[__CONTEXT_ADDR_I:%.*]], align 8, !noalias [[META51]]
+// CHECK:    [[TMP8:%.*]] = load ptr, ptr [[__CONTEXT_ADDR_I]], align 8, !noalias [[META51]]
+// CHECK:    [[TMP9:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON_6:%.*]], ptr [[TMP8]], i32 0, i32 1
+// CHECK:    [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8, !nonnull [[META2]], !align [[META3]]
+// CHECK:    [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4
+// CHECK:    [[TMP12:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON_6]], ptr [[TMP8]], i32 0, i32 1
+// CHECK:    [[TMP13:%.*]] = load ptr, ptr [[TMP12]], align 8, !nonnull [[META2]], !align [[META3]]
+// CHECK:    [[Y_I:%.*]] = getelementptr inbounds nuw [[STRUCT_POINT:%.*]], ptr [[TMP13]], i32 0, i32 1
+// CHECK:    [[TMP14:%.*]] = load i32, ptr [[Y_I]], align 4
+// CHECK:    [[ADD_I:%.*]] = add nsw i32 [[TMP11]], [[TMP14]]
+// CHECK:    [[TMP15:%.*]] = load ptr, ptr [[TMP8]], align 8, !nonnull [[META2]], !align [[META3]]
+// CHECK:    store i32 [[ADD_I]], ptr [[TMP15]], align 4
+// CHECK:    ret i32 0
+//
+//
+// CHECK-LABEL: define internal void @_Z24test_template_3_bindingsI7Point3DEiT_.omp_outlined(
+// CHECK-SAME: ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[RESULT:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP0:%.*]]) #[[ATTR2]] {
+// CHECK:  [[ENTRY:.*:]]
+// CHECK:    store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR:%.*]], align 8
+// CHECK:    store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR:%.*]], align 8
+// CHECK:    store ptr [[RESULT]], ptr [[RESULT_ADDR:%.*]], align 8
+// CHECK:    store ptr [[TMP0]], ptr [[DOTADDR:%.*]], align 8
+// CHECK:    [[TMP1:%.*]] = load ptr, ptr [[RESULT_ADDR]], align 8, !nonnull [[META2]], !align [[META3]]
+// CHECK:    [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8, !nonnull [[META2]], !align [[META3]]
+// CHECK:    store i32 0, ptr [[RESULT1:%.*]], align 4
+// CHECK:    [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_POINT3D:%.*]], ptr [[TMP2]], i32 0, i32 0
+// CHECK:    [[TMP3:%.*]] = load i32, ptr [[X]], align 4
+// CHECK:    [[Y:%.*]] = getelementptr inbounds nuw [[STRUCT_POINT3D]], ptr [[TMP2]], i32 0, i32 1
+// CHECK:    [[TMP4:%.*]] = load i32, ptr [[Y]], align 4
+// CHECK:    [[ADD:%.*]] = add nsw i32 [[TMP3]], [[TMP4]]
+// CHECK:    [[Z:%.*]] = getelementptr inbounds nuw [[STRUCT_POINT3D]], ptr [[TMP2]], i32 0, i32 2
+// CHECK:    [[TMP5:%.*]] = load i32, ptr [[Z]], align 4
+// CHECK:    [[ADD2:%.*]] = add nsw i32 [[ADD]], [[TMP5]]
+// CHECK:    store i32 [[ADD2]], ptr [[RESULT1]], align 4
+// CHECK:    [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST:%.*]], i64 0, i64 0
+// CHECK:    store ptr [[RESULT1]], ptr [[TMP6]], align 8
+// CHECK:    [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK:    [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4
+// CHECK:    [[TMP9:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP8]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @_Z24test_template_3_bindingsI7Point3DEiT_.omp_outlined.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var)
+// CHECK:    switch i32 [[TMP9]], [[DOTOMP_REDUCTION_DEFAULT:label %.*]] [
+// CHECK:      i32 1, [[DOTOMP_REDUCTION_CASE1:label %.*]]
+// CHECK:      i32 2, [[DOTOMP_REDUCTION_CASE2:label %.*]]
+// CHECK:    ]
+// CHECK:       [[_OMP_REDUCTION_CASE1:.*:]]
+// CHECK:    [[TMP10:%.*]] = load i32, ptr [[TMP1]], align 4
+// CHECK:    [[TMP11:%.*]] = load i32, ptr [[RESULT1]], align 4
+// CHECK:    [[ADD3:%.*]] = add nsw i32 [[TMP10]], [[TMP11]]
+// CHECK:    store i32 [[ADD3]], ptr [[TMP1]], align 4
+// CHECK:    call void @__kmpc_end_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP8]], ptr @.gomp_critical_user_.reduction.var)
+// CHECK:    br [[DOTOMP_REDUCTION_DEFAULT]]
+// CHECK:       [[_OMP_REDUCTION_CASE2:.*:]]
+// CHECK:    [[TMP12:%.*]] = load i32, ptr [[RESULT1]], align 4
+// CHECK:    [[TMP13:%.*]] = atomicrmw add ptr [[TMP1]], i32 [[TMP12]] monotonic, align 4
+// CHECK:    br [[DOTOMP_REDUCTION_DEFAULT]]
+// CHECK:       [[_OMP_REDUCTION_DEFAULT:.*:]]
+// CHECK:    ret void
+//
+//
+// CHECK-LABEL: define internal void @_Z24test_template_3_bindingsI7Point3DEiT_.omp_outlined.omp.reduction.reduction_func(
+// CHECK-SAME: ptr noundef [[TMP0:%.*]], ptr noundef [[TMP1:%.*]]) #[[ATTR4]] {
+// CHECK:  [[ENTRY:.*:]]
+// CHECK:    store ptr [[TMP0]], ptr [[DOTADDR:%.*]], align 8
+// CHECK:    store ptr [[TMP1]], ptr [[DOTADDR1:%.*]], align 8
+// CHECK:    [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8
+// CHECK:    [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8
+// CHECK:    [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP3]], i64 0, i64 0
+// CHECK:    [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8
+// CHECK:    [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i64 0, i64 0
+// CHECK:    [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8
+// CHECK:    [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4
+// CHECK:    [[TMP9:%.*]] = load i32, ptr [[TMP5]], align 4
+// CHECK:    [[ADD:%.*]] = add nsw i32 [[TMP8]], [[TMP9]]
+// CHECK:    store i32 [[ADD]], ptr [[TMP7]], align 4
+// CHECK:    ret void
+//
+//
+// CHECK-LABEL: define dso_local void @_Z19test_static_bindingv(
+// CHECK-SAME: ) #[[ATTR0]] {
+// CHECK:  [[ENTRY:.*:]]
+// CHECK:    [[TMP0:%.*]] = load atomic i8, ptr @_ZGVZ19test_static_bindingvEDC1a1bE acquire, align 8
+// CHECK:    [[GUARD_UNINITIALIZED:%.*]] = icmp eq i8 [[TMP0]], 0
+// CHECK:    br i1 [[GUARD_UNINITIALIZED]], label %[[INIT_CHECK:.*]], label %[[INIT_END:.*]], !prof [[PROF52:![0-9]+]]
+// CHECK:       [[INIT_CHECK]]:
+// CHECK:    [[TMP1:%.*]] = call i32 @__cxa_guard_acquire(ptr @_ZGVZ19test_static_bindingvEDC1a1bE) #[[ATTR3]]
+// CHECK:    [[TOBOOL:%.*]] = icmp ne i32 [[TMP1]], 0
+// CHECK:    br i1 [[TOBOOL]], label %[[INIT:.*]], label %[[INIT_END]]
+// CHECK:       [[INIT]]:
+// CHECK:    call void @llvm.memcpy.p0.p0.i64(ptr align 4 @_ZZ19test_static_bindingvEDC1a1bE, ptr align 4 @_ZZ19test_static_bindingvE1p, i64 8, i1 false)
+// CHECK:    call void @__cxa_guard_release(ptr @_ZGVZ19test_static_bindingvEDC1a1bE) #[[ATTR3]]
+// CHECK:    br label %[[INIT_END]]
+// CHECK:       [[INIT_END]]:
+// CHECK:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 0, ptr @_Z19test_static_bindingv.omp_outlined)
+// CHECK:    ret void
+//
+//
+// CHECK-LABEL: define internal void @_Z19test_static_bindingv.omp_outlined(
+// CHECK-SAME: ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR2]] {
+// CHECK:  [[ENTRY:.*:]]
+// CHECK:    store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR:%.*]], align 8
+// CHECK:    store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR:%.*]], align 8
+// CHECK:    [[TMP0:%.*]] = load i32, ptr @_ZZ19test_static_bindingvEDC1a1bE, align 4
+// CHECK:    [[TMP1:%.*]] = load i32, ptr getelementptr inbounds nuw (i8, ptr @_ZZ19test_static_bindingvEDC1a1bE, i64 4), align 4
+// CHECK:    [[ADD:%.*]] = add nsw i32 [[TMP0]], [[TMP1]]
+// CHECK:    store i32 [[ADD]], ptr [[SUM:%.*]], align 4
+// CHECK:    ret void
+//
+//
+// CHECK-LABEL: define dso_local void @_Z26test_static_binding_sharedv(
+// CHECK-SAME: ) #[[ATTR0]] {
+// CHECK:  [[ENTRY:.*:]]
+// CHECK:    [[TMP0:%.*]] = load atomic i8, ptr @_ZGVZ26test_static_binding_sharedvEDC1a1bE acquire, align 8
+// CHECK:    [[GUARD_UNINITIALIZED:%.*]] = icmp eq i8 [[TMP0]], 0
+// CHECK:    br i1 [[GUARD_UNINITIALIZED]], label %[[INIT_CHECK:.*]], label %[[INIT_END:.*]], !prof [[PROF52]]
+// CHECK:       [[INIT_CHECK]]:
+// CHECK:    [[TMP1:%.*]] = call i32 @__cxa_guard_acquire(ptr @_ZGVZ26test_static_binding_sharedvEDC1a1bE) #[[ATTR3]]
+// CHECK:    [[TOBOOL:%.*]] = icmp ne i32 [[TMP1]], 0
+// CHECK:    br i1 [[TOBOOL]], label %[[INIT:.*]], label %[[INIT_END]]
+// CHECK:       [[INIT]]:
+// CHECK:    call void @llvm.memcpy.p0.p0.i64(ptr align 4 @_ZZ26test_static_binding_sharedvEDC1a1bE, ptr align 4 @_ZZ26test_static_binding_sharedvE1p, i64 8, i1 false)
+// CHECK:    call void @__cxa_guard_release(ptr @_ZGVZ26test_static_binding_sharedvEDC1a1bE) #[[ATTR3]]
+// CHECK:    br label %[[INIT_END]]
+// CHECK:       [[INIT_END]]:
+// CHECK:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 0, ptr @_Z26test_static_binding_sharedv.omp_outlined)
+// CHECK:    ret void
+//
+//
+// CHECK-LABEL: define internal void @_Z26test_static_binding_sharedv.omp_outlined(
+// CHECK-SAME: ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR2]] {
+// CHECK:  [[ENTRY:.*:]]
+// CHECK:    store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR:%.*]], align 8
+// CHECK:    store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR:%.*]], align 8
+// CHECK:    [[TMP0:%.*]] = load i32, ptr @_ZZ26test_static_binding_sharedvEDC1a1bE, align 4
+// CHECK:    [[ADD:%.*]] = add nsw i32 [[TMP0]], 1
+// CHECK:    store i32 [[ADD]], ptr @_ZZ26test_static_binding_sharedvEDC1a1bE, align 4
+// CHECK:    [[TMP1:%.*]] = load i32, ptr getelementptr inbounds nuw (i8, ptr @_ZZ26test_static_binding_sharedvEDC1a1bE, i64 4), align 4
+// CHECK:    [[ADD1:%.*]] = add nsw i32 [[TMP1]], 1
+// CHECK:    store i32 [[ADD1]], ptr getelementptr inbounds nuw (i8, ptr @_ZZ26test_static_binding_sharedvEDC1a1bE, i64 4), align 4
+// CHECK:    ret void
+//
+//
+// CHECK-LABEL: define dso_local void @_Z17test_array_targetv(
+// CHECK-SAME: ) #[[ATTR0]] {
+// CHECK:  [[ENTRY:.*]]:
+// CHECK:    call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARR:%.*]], ptr align 4 @__const._Z17test_array_targetv.arr, i64 8, i1 false)
+// CHECK:    [[ARRAYINIT_BEGIN:%.*]] = getelementptr inbounds [2 x i32], ptr [[TMP0:%.*]], i64 0, i64 0
+// CHECK:    br label %[[ARRAYINIT_BODY:.*]]
+// CHECK:       [[ARRAYINIT_BODY]]:
+// CHECK:    [[ARRAYINIT_INDEX:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[ARRAYINIT_NEXT:%.*]], %[[ARRAYINIT_BODY]] ]
+// CHECK:    [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[ARRAYINIT_BEGIN]], i64 [[ARRAYINIT_INDEX]]
+// CHECK:    [[ARRAYIDX:%.*]] = getelementptr inbounds nuw [2 x i32], ptr [[ARR]], i64 0, i64 [[ARRAYINIT_INDEX]]
+// CHECK:    [[TMP2:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
+// CHECK:    store i32 [[TMP2]], ptr [[TMP1]], align 4
+// CHECK:    [[ARRAYINIT_NEXT]] = add nuw i64 [[ARRAYINIT_INDEX]], 1
+// CHECK:    [[ARRAYINIT_DONE:%.*]] = icmp eq i64 [[ARRAYINIT_NEXT]], 2
+// CHECK:    br i1 [[ARRAYINIT_DONE]], label %[[ARRAYINIT_END:.*]], label %[[ARRAYINIT_BODY]]
+// CHECK:       [[ARRAYINIT_END]]:
+// CHECK:    [[TMP3:%.*]] = load [2 x i32], ptr [[TMP0]], align 4
+// CHECK:    store [2 x i32] [[TMP3]], ptr [[DOTCASTED:%.*]], align 4
+// CHECK:    [[TMP4:%.*]] = load i64, ptr [[DOTCASTED]], align 8
+// CHECK:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z17test_array_targetv_l232(i64 [[TMP4]], ptr null) #[[ATTR3]]
+// CHECK:    ret void
+//
+//
+// CHECK-LABEL: define internal void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z17test_array_targetv_l232(
+// CHECK-SAME: i64 noundef [[TMP0:%.*]], ptr noalias noundef [[DYN_PTR:%.*]]) #[[ATTR2]] {
+// CHECK:  [[ENTRY:.*:]]
+// CHECK:    store i64 [[TMP0]], ptr [[DOTADDR:%.*]], align 8
+// CHECK:    store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR:%.*]], align 8
+// CHECK:    [[TMP1:%.*]] = getelementptr inbounds [2 x i32], ptr [[DOTADDR]], i64 0, i64 0
+// CHECK:    [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4
+// CHECK:    [[TMP3:%.*]] = getelementptr inbounds [2 x i32], ptr [[DOTADDR]], i64 0, i64 1
+// CHECK:    [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4
+// CHECK:    [[ADD:%.*]] = add nsw i32 [[TMP2]], [[TMP4]]
+// CHECK:    store i32 [[ADD]], ptr [[SUM:%.*]], align 4
+// CHECK:    ret void
+//
+//
+// CHECK-LABEL: define dso_local void @_Z15test_array_taskv(
+// CHECK-SAME: ) #[[ATTR0]] {
+// CHECK:  [[ENTRY:.*]]:
+// CHECK:    [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
+// CHECK:    call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARR:%.*]], ptr align 4 @__const._Z15test_array_taskv.arr, i64 8, i1 false)
+// CHECK:    [[ARRAYINIT_BEGIN:%.*]] = getelementptr inbounds [2 x i32], ptr [[TMP0:%.*]], i64 0, i64 0
+// CHECK:    br label %[[ARRAYINIT_BODY:.*]]
+// CHECK:       [[ARRAYINIT_BODY]]:
+// CHECK:    [[ARRAYINIT_INDEX:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[ARRAYINIT_NEXT:%.*]], %[[ARRAYINIT_BODY]] ]
+// CHECK:    [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[ARRAYINIT_BEGIN]], i64 [[ARRAYINIT_INDEX]]
+// CHECK:    [[ARRAYIDX:%.*]] = getelementptr inbounds nuw [2 x i32], ptr [[ARR]], i64 0, i64 [[ARRAYINIT_INDEX]]
+// CHECK:    [[TMP3:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
+// CHECK:    store i32 [[TMP3]], ptr [[TMP2]], align 4
+// CHECK:    [[ARRAYINIT_NEXT]] = add nuw i64 [[ARRAYINIT_INDEX]], 1
+// CHECK:    [[ARRAYINIT_DONE:%.*]] = icmp eq i64 [[ARRAYINIT_NEXT]], 2
+// CHECK:    br i1 [[ARRAYINIT_DONE]], label %[[ARRAYINIT_END:.*]], label %[[ARRAYINIT_BODY]]
+// CHECK:       [[ARRAYINIT_END]]:
+// CHECK:    [[TMP4:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON_8:%.*]], ptr [[AGG_CAPTURED:%.*]], i32 0, i32 0
+// CHECK:    store ptr [[TMP0]], ptr [[TMP4]], align 8
+// CHECK:    [[TMP5:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i64 40, i64 8, ptr @.omp_task_entry..8)
+// CHECK:    [[TMP6:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASK_T_WITH_PRIVATES_9:%.*]], ptr [[TMP5]], i32 0, i32 0
+// CHECK:    [[TMP7:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASK_T:%.*]], ptr [[TMP6]], i32 0, i32 0
+// CHECK:    [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8
+// CHECK:    call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[TMP8]], ptr align 8 [[AGG_CAPTURED]], i64 8, i1 false)
+// CHECK:    [[TMP9:%.*]] = call i32 @__kmpc_omp_task(ptr @[[GLOB1]], i32 [[TMP1]], ptr [[TMP5]])
+// CHECK:    ret void
+//
+//
+// CHECK-LABEL: define internal noundef i32 @.omp_task_entry..8(
+// CHECK-SAME: i32 noundef [[TMP0:%.*]], ptr noalias noundef [[TMP1:%.*]]) #[[ATTR4]] {
+// CHECK:  [[ENTRY:.*:]]
+// CHECK:    store i32 [[TMP0]], ptr [[DOTADDR:%.*]], align 4
+// CHECK:    store ptr [[TMP1]], ptr [[DOTADDR1:%.*]], align 8
+// CHECK:    [[TMP2:%.*]] = load i32, ptr [[DOTADDR]], align 4
+// CHECK:    [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8
+// CHECK:    [[TMP4:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASK_T_WITH_PRIVATES_9:%.*]], ptr [[TMP3]], i32 0, i32 0
+// CHECK:    [[TMP5:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASK_T:%.*]], ptr [[TMP4]], i32 0, i32 2
+// CHECK:    [[TMP6:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASK_T]], ptr [[TMP4]], i32 0, i32 0
+// CHECK:    [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8
+// CHECK:    call void @llvm.experimental.noalias.scope.decl(metadata [[META53:![0-9]+]])
+// CHECK:    call void @llvm.experimental.noalias.scope.decl(metadata [[META56:![0-9]+]])
+// CHECK:    call void @llvm.experimental.noalias.scope.decl(metadata [[META58:![0-9]+]])
+// CHECK:    call void @llvm.experimental.noalias.scope.decl(metadata [[META60:![0-9]+]])
+// CHECK:    store i32 [[TMP2]], ptr [[DOTGLOBAL_TID__ADDR_I:%.*]], align 4, !noalias [[META62:![0-9]+]]
+// CHECK:    store ptr [[TMP5]], ptr [[DOTPART_ID__ADDR_I:%.*]], align 8, !noalias [[META62]]
+// CHECK:    store ptr null, ptr [[DOTPRIVATES__ADDR_I:%.*]], align 8, !noalias [[META62]]
+// CHECK:    store ptr null, ptr [[DOTCOPY_FN__ADDR_I:%.*]], align 8, !noalias [[META62]]
+// CHECK:    store ptr [[TMP3]], ptr [[DOTTASK_T__ADDR_I:%.*]], align 8, !noalias [[META62]]
+// CHECK:    store ptr [[TMP7]], ptr [[__CONTEXT_ADDR_I:%.*]], align 8, !noalias [[META62]]
+// CHECK:    [[TMP8:%.*]] = load ptr, ptr [[__CONTEXT_ADDR_I]], align 8, !noalias [[META62]]
+// CHECK:    [[TMP9:%.*]] = load ptr, ptr [[TMP8]], align 8, !nonnull [[META2]], !align [[META3]]
+// CHECK:    [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4
+// CHECK:    [[TMP11:%.*]] = load ptr, ptr [[TMP8]], align 8, !nonnull [[META2]], !align [[META3]]
+// CHECK:    [[TMP12:%.*]] = getelementptr inbounds [2 x i32], ptr [[TMP11]], i64 0, i64 1
+// CHECK:    [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4
+// CHECK:    [[ADD_I:%.*]] = add nsw i32 [[TMP10]], [[TMP13]]
+// CHECK:    store i32 [[ADD_I]], ptr [[SUM_I:%.*]], align 4, !noalias [[META62]]
+// CHECK:    ret i32 0
+//
+//
+// CHECK-LABEL: define dso_local void @_Z11test_nestedv(
+// CHECK-SAME: ) #[[ATTR0]] {
+// CHECK:  [[ENTRY:.*:]]
+// CHECK:    call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[P:%.*]], ptr align 4 @__const._Z11test_nestedv.p, i64 8, i1 false)
+// CHECK:    call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP0:%.*]], ptr align 4 [[P]], i64 8, i1 false)
+// CHECK:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @_Z11test_nestedv.omp_outlined, ptr [[TMP0]])
+// CHECK:    ret void
+//
+//
+// CHECK-LABEL: define internal void @_Z11test_nestedv.omp_outlined(
+// CHECK-SAME: ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[TMP0:%.*]]) #[[ATTR2]] {
+// CHECK:  [[ENTRY:.*:]]
+// CHECK:    store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR:%.*]], align 8
+// CHECK:    store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR:%.*]], align 8
+// CHECK:    store ptr [[TMP0]], ptr [[DOTADDR:%.*]], align 8
+// CHECK:    [[TMP1:%.*]] = load ptr, ptr [[DOTADDR]], align 8, !nonnull [[META2]], !align [[META3]]
+// CHECK:    [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK:    [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4
+// CHECK:    call void @__kmpc_critical(ptr @[[GLOB1]], i32 [[TMP3]], ptr @.gomp_critical_user_.var)
+// CHECK:    [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_POINT:%.*]], ptr [[TMP1]], i32 0, i32 0
+// CHECK:    [[TMP4:%.*]] = load i32, ptr [[X]], align 4
+// CHECK:    [[Y:%.*]] = getelementptr inbounds nuw [[STRUCT_POINT]], ptr [[TMP1]], i32 0, i32 1
+// CHECK:    [[TMP5:%.*]] = load i32, ptr [[Y]], align 4
+// CHECK:    [[ADD:%.*]] = add nsw i32 [[TMP4]], [[TMP5]]
+// CHECK:    store i32 [[ADD]], ptr [[SUM:%.*]], align 4
+// CHECK:    call void @__kmpc_end_critical(ptr @[[GLOB1]], i32 [[TMP3]], ptr @.gomp_critical_user_.var)
+// CHECK:    [[TMP6:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON_10:%.*]], ptr [[AGG_CAPTURED:%.*]], i32 0, i32 0
+// CHECK:    store ptr [[TMP1]], ptr [[TMP6]], align 8
+// CHECK:    [[TMP7:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr @[[GLOB1]], i32 [[TMP3]], i32 1, i64 40, i64 8, ptr @.omp_task_entry..10)
+// CHECK:    [[TMP8:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASK_T_WITH_PRIVATES_11:%.*]], ptr [[TMP7]], i32 0, i32 0
+// CHECK:    [[TMP9:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASK_T:%.*]], ptr [[TMP8]], i32 0, i32 0
+// CHECK:    [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8
+// CHECK:    call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[TMP10]], ptr align 8 [[AGG_CAPTURED]], i64 8, i1 false)
+// CHECK:    [[TMP11:%.*]] = call i32 @__kmpc_omp_task(ptr @[[GLOB1]], i32 [[TMP3]], ptr [[TMP7]])
+// CHECK:    ret void
+//
+//
+// CHECK-LABEL: define internal noundef i32 @.omp_task_entry..10(
+// CHECK-SAME: i32 noundef [[TMP0:%.*]], ptr noalias noundef [[TMP1:%.*]]) #[[ATTR4]] {
+// CHECK:  [[ENTRY:.*:]]
+// CHECK:    store i32 [[TMP0]], ptr [[DOTADDR:%.*]], align 4
+// CHECK:    store ptr [[TMP1]], ptr [[DOTADDR1:%.*]], align 8
+// CHECK:    [[TMP2:%.*]] = load i32, ptr [[DOTADDR]], align 4
+// CHECK:    [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8
+// CHECK:    [[TMP4:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASK_T_WITH_PRIVATES_11:%.*]], ptr [[TMP3]], i32 0, i32 0
+// CHECK:    [[TMP5:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASK_T:%.*]], ptr [[TMP4]], i32 0, i32 2
+// CHECK:    [[TMP6:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASK_T]], ptr [[TMP4]], i32 0, i32 0
+// CHECK:    [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8
+// CHECK:    call void @llvm.experimental.noalias.scope.decl(metadata [[META63:![0-9]+]])
+// CHECK:    call void @llvm.experimental.noalias.scope.decl(metadata [[META66:![0-9]+]])
+// CHECK:    call void @llvm.experimental.noalias.scope.decl(metadata [[META68:![0-9]+]])
+// CHECK:    call void @llvm.experimental.noalias.scope.decl(metadata [[META70:![0-9]+]])
+// CHECK:    store i32 [[TMP2]], ptr [[DOTGLOBAL_TID__ADDR_I:%.*]], align 4, !noalias [[META72:![0-9]+]]
+// CHECK:    store ptr [[TMP5]], ptr [[DOTPART_ID__ADDR_I:%.*]], align 8, !noalias [[META72]]
+// CHECK:    store ptr null, ptr [[DOTPRIVATES__ADDR_I:%.*]], align 8, !noalias [[META72]]
+// CHECK:    store ptr null, ptr [[DOTCOPY_FN__ADDR_I:%.*]], align 8, !noalias [[META72]]
+// CHECK:    store ptr [[TMP3]], ptr [[DOTTASK_T__ADDR_I:%.*]], align 8, !noalias [[META72]]
+// CHECK:    store ptr [[TMP7]], ptr [[__CONTEXT_ADDR_I:%.*]], align 8, !noalias [[META72]]
+// CHECK:    [[TMP8:%.*]] = load ptr, ptr [[__CONTEXT_ADDR_I]], align 8, !noalias [[META72]]
+// CHECK:    [[TMP9:%.*]] = load ptr, ptr [[TMP8]], align 8, !nonnull [[META2]], !align [[META3]]
+// CHECK:    [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4
+// CHECK:    [[TMP11:%.*]] = load ptr, ptr [[TMP8]], align 8, !nonnull [[META2]], !align [[META3]]
+// CHECK:    [[Y_I:%.*]] = getelementptr inbounds nuw [[STRUCT_POINT:%.*]], ptr [[TMP11]], i32 0, i32 1
+// CHECK:    [[TMP12:%.*]] = load i32, ptr [[Y_I]], align 4
+// CHECK:    [[MUL_I:%.*]] = mul nsw i32 [[TMP10]], [[TMP12]]
+// CHECK:    store i32 [[MUL_I]], ptr [[PRODUCT_I:%.*]], align 4, !noalias [[META72]]
+// CHECK:    ret i32 0
+//
+//
+// CHECK-LABEL: define dso_local void @_Z22test_reference_bindingv(
+// CHECK-SAME: ) #[[ATTR0]] {
+// CHECK:  [[ENTRY:.*:]]
+// CHECK:    call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[P:%.*]], ptr align 4 @__const._Z22test_reference_bindingv.p, i64 8, i1 false)
+// CHECK:    store ptr [[P]], ptr [[TMP0:%.*]], align 8
+// CHECK:    [[TMP1:%.*]] = load ptr, ptr [[TMP0]], align 8, !nonnull [[META2]], !align [[META3]]
+// CHECK:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @_Z22test_reference_bindingv.omp_outlined, ptr [[TMP1]])
+// CHECK:    ret void
+//
+//
+// CHECK-LABEL: define internal void @_Z22test_reference_bindingv.omp_outlined(
+// CHECK-SAME: ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP0:%.*]]) #[[ATTR2]] {
+// CHECK:  [[ENTRY:.*:]]
+// CHECK:    store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR:%.*]], align 8
+// CHECK:    store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR:%.*]], align 8
+// CHECK:    store ptr [[TMP0]], ptr [[DOTADDR:%.*]], align 8
+// CHECK:    [[TMP1:%.*]] = load ptr, ptr [[DOTADDR]], align 8, !nonnull [[META2]], !align [[META3]]
+// CHECK:    store ptr [[TMP1]], ptr [[TMP:%.*]], align 8
+// CHECK:    [[TMP2:%.*]] = load ptr, ptr [[TMP]], align 8, !nonnull [[META2]], !align [[META3]]
+// CHECK:    [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_POINT:%.*]], ptr [[TMP2]], i32 0, i32 0
+// CHECK:    [[TMP3:%.*]] = load i32, ptr [[X]], align 4
+// CHECK:    [[TMP4:%.*]] = load ptr, ptr [[TMP]], align 8, !nonnull [[META2]], !align [[META3]]
+// CHECK:    [[Y:%.*]] = getelementptr inbounds nuw [[STRUCT_POINT]], ptr [[TMP4]], i32 0, i32 1
+// CHECK:    [[TMP5:%.*]] = load i32, ptr [[Y]], align 4
+// CHECK:    [[ADD:%.*]] = add nsw i32 [[TMP3]], [[TMP5]]
+// CHECK:    store i32 [[ADD]], ptr [[SUM:%.*]], align 4
+// CHECK:    ret void
+//
+//
+// CHECK-LABEL: define dso_local void @_Z18test_const_bindingv(
+// CHECK-SAME: ) #[[ATTR0]] {
+// CHECK:  [[ENTRY:.*:]]
+// CHECK:    call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[P:%.*]], ptr align 4 @__const._Z18test_const_bindingv.p, i64 8, i1 false)
+// CHECK:    call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP0:%.*]], ptr align 4 @__const._Z18test_const_bindingv., i64 8, i1 false)
+// CHECK:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @_Z18test_const_bindingv.omp_outlined, ptr [[TMP0]])
+// CHECK:    ret void
+//
+//
+// CHECK-LABEL: define internal void @_Z18test_const_bindingv.omp_outlined(
+// CHECK-SAME: ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP0:%.*]]) #[[ATTR2]] {
+// CHECK:  [[ENTRY:.*:]]
+// CHECK:    store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR:%.*]], align 8
+// CHECK:    store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR:%.*]], align 8
+// CHECK:    store ptr [[TMP0]], ptr [[DOTADDR:%.*]], align 8
+// CHECK:    [[TMP1:%.*]] = load ptr, ptr [[DOTADDR]], align 8, !nonnull [[META2]], !align [[META3]]
+// CHECK:    [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_POINT:%.*]], ptr [[TMP1]], i32 0, i32 0
+// CHECK:    [[TMP2:%.*]] = load i32, ptr [[X]], align 4
+// CHECK:    [[Y:%.*]] = getelementptr inbounds nuw [[STRUCT_POINT]], ptr [[TMP1]], i32 0, i32 1
+// CHECK:    [[TMP3:%.*]] = load i32, ptr [[Y]], align 4
+// CHECK:    [[ADD:%.*]] = add nsw i32 [[TMP2]], [[TMP3]]
+// CHECK:    store i32 [[ADD]], ptr [[SUM:%.*]], align 4
+// CHECK:    ret void
+//
+//
+// CHECK-LABEL: define dso_local void @_Z22test_multiple_bindingsv(
+// CHECK-SAME: ) #[[ATTR0]] {
+// CHECK:  [[ENTRY:.*:]]
+// CHECK:    call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[P1:%.*]], ptr align 4 @__const._Z22test_multiple_bindingsv.p1, i64 8, i1 false)
+// CHECK:    call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[P2:%.*]], ptr align 4 @__const._Z22test_multiple_bindingsv.p2, i64 8, i1 false)
+// CHECK:    call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP0:%.*]], ptr align 4 [[P1]], i64 8, i1 false)
+// CHECK:    call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP1:%.*]], ptr align 4 [[P2]], i64 8, i1 false)
+// CHECK:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 2, ptr @_Z22test_multiple_bindingsv.omp_outlined, ptr [[TMP0]], ptr [[TMP1]])
+// CHECK:    ret void
+//
+//
+// CHECK-LABEL: define internal void @_Z22test_multiple_bindingsv.omp_outlined(
+// CHECK-SAME: ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP0:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP1:%.*]]) #[[ATTR2]] {
+// CHECK:  [[ENTRY:.*:]]
+// CHECK:    store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR:%.*]], align 8
+// CHECK:    store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR:%.*]], align 8
+// CHECK:    store ptr [[TMP0]], ptr [[DOTADDR:%.*]], align 8
+// CHECK:    store ptr [[TMP1]], ptr [[DOTADDR1:%.*]], align 8
+// CHECK:    [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8, !nonnull [[META2]], !align [[META3]]
+// CHECK:    [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8, !nonnull [[META2]], !align [[META3]]
+// CHECK:    [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_POINT:%.*]], ptr [[TMP2]], i32 0, i32 0
+// CHECK:    [[TMP4:%.*]] = load i32, ptr [[X]], align 4
+// CHECK:    [[Y:%.*]] = getelementptr inbounds nuw [[STRUCT_POINT]], ptr [[TMP2]], i32 0, i32 1
+// CHECK:    [[TMP5:%.*]] = load i32, ptr [[Y]], align 4
+// CHECK:    [[ADD:%.*]] = add nsw i32 [[TMP4]], [[TMP5]]
+// CHECK:    [[X2:%.*]] = getelementptr inbounds nuw [[STRUCT_POINT]], ptr [[TMP3]], i32 0, i32 0
+// CHECK:    [[TMP6:%.*]] = load i32, ptr [[X2]], align 4
+// CHECK:    [[ADD3:%.*]] = add nsw i32 [[ADD]], [[TMP6]]
+// CHECK:    [[Y4:%.*]] = getelementptr inbounds nuw [[STRUCT_POINT]], ptr [[TMP3]], i32 0, i32 1
+// CHECK:    [[TMP7:%.*]] = load i32, ptr [[Y4]], align 4
+// CHECK:    [[ADD5:%.*]] = add nsw i32 [[ADD3]], [[TMP7]]
+// CHECK:    store i32 [[ADD5]], ptr [[SUM:%.*]], align 4
+// CHECK:    ret void
+//
+//
+// CHECK-LABEL: define dso_local void @_Z32test_multiple_bindings_mixed_dsav(
+// CHECK-SAME: ) #[[ATTR0]] {
+// CHECK:  [[ENTRY:.*:]]
+// CHECK:    call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[P1:%.*]], ptr align 4 @__const._Z32test_multiple_bindings_mixed_dsav.p1, i64 8, i1 false)
+// CHECK:    call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[P2:%.*]], ptr align 4 @__const._Z32test_multiple_bindings_mixed_dsav.p2, i64 8, i1 false)
+// CHECK:    call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP0:%.*]], ptr align 4 [[P1]], i64 8, i1 false)
+// CHECK:    call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP1:%.*]], ptr align 4 [[P2]], i64 8, i1 false)
+// CHECK:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 3, ptr @_Z32test_multiple_bindings_mixed_dsav.omp_outlined, ptr [[TMP0]], ptr [[TMP1]], ptr [[P1]])
+// CHECK:    ret void
+//
+//
+// CHECK-LABEL: define internal void @_Z32test_multiple_bindings_mixed_dsav.omp_outlined(
+// CHECK-SAME: ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP0:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP1:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[P1:%.*]]) #[[ATTR2]] {
+// CHECK:  [[ENTRY:.*:]]
+// CHECK:    store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR:%.*]], align 8
+// CHECK:    store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR:%.*]], align 8
+// CHECK:    store ptr [[TMP0]], ptr [[DOTADDR:%.*]], align 8
+// CHECK:    store ptr [[TMP1]], ptr [[DOTADDR1:%.*]], align 8
+// CHECK:    store ptr [[P1]], ptr [[P1_ADDR:%.*]], align 8
+// CHECK:    [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8, !nonnull [[META2]], !align [[META3]]
+// CHECK:    [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8, !nonnull [[META2]], !align [[META3]]
+// CHECK:    [[TMP4:%.*]] = load ptr, ptr [[P1_ADDR]], align 8, !nonnull [[META2]], !align [[META3]]
+// CHECK:    call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[P12:%.*]], ptr align 4 [[TMP4]], i64 8, i1 false)
+// CHECK:    [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_POINT:%.*]], ptr [[TMP2]], i32 0, i32 0
+// CHECK:    [[TMP5:%.*]] = load i32, ptr [[X]], align 4
+// CHECK:    [[Y:%.*]] = getelementptr inbounds nuw [[STRUCT_POINT]], ptr [[TMP2]], i32 0, i32 1
+// CHECK:    [[TMP6:%.*]] = load i32, ptr [[Y]], align 4
+// CHECK:    [[ADD:%.*]] = add nsw i32 [[TMP5]], [[TMP6]]
+// CHECK:    [[X3:%.*]] = getelementptr inbounds nuw [[STRUCT_POINT]], ptr [[TMP3]], i32 0, i32 0
+// CHECK:    [[TMP7:%.*]] = load i32, ptr [[X3]], align 4
+// CHECK:    [[ADD4:%.*]] = add nsw i32 [[ADD]], [[TMP7]]
+// CHECK:    [[Y5:%.*]] = getelementptr inbounds nuw [[STRUCT_POINT]], ptr [[TMP3]], i32 0, i32 1
+// CHECK:    [[TMP8:%.*]] = load i32, ptr [[Y5]], align 4
+// CHECK:    [[ADD6:%.*]] = add nsw i32 [[ADD4]], [[TMP8]]
+// CHECK:    store i32 [[ADD6]], ptr [[RESULT:%.*]], align 4
+// CHECK:    ret void
+//
+//
+// CHECK-LABEL: define dso_local void @_Z21test_array_3_elementsv(
+// CHECK-SAME: ) #[[ATTR0]] {
+// CHECK:  [[ENTRY:.*]]:
+// CHECK:    call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARR:%.*]], ptr align 4 @__const._Z21test_array_3_elementsv.arr, i64 12, i1 false)
+// CHECK:    [[ARRAYINIT_BEGIN:%.*]] = getelementptr inbounds [3 x i32], ptr [[TMP0:%.*]], i64 0, i64 0
+// CHECK:    br label %[[ARRAYINIT_BODY:.*]]
+// CHECK:       [[ARRAYINIT_BODY]]:
+// CHECK:    [[ARRAYINIT_INDEX:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[ARRAYINIT_NEXT:%.*]], %[[ARRAYINIT_BODY]] ]
+// CHECK:    [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[ARRAYINIT_BEGIN]], i64 [[ARRAYINIT_INDEX]]
+// CHECK:    [[ARRAYIDX:%.*]] = getelementptr inbounds nuw [3 x i32], ptr [[ARR]], i64 0, i64 [[ARRAYINIT_INDEX]]
+// CHECK:    [[TMP2:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
+// CHECK:    store i32 [[TMP2]], ptr [[TMP1]], align 4
+// CHECK:    [[ARRAYINIT_NEXT]] = add nuw i64 [[ARRAYINIT_INDEX]], 1
+// CHECK:    [[ARRAYINIT_DONE:%.*]] = icmp eq i64 [[ARRAYINIT_NEXT]], 3
+// CHECK:    br i1 [[ARRAYINIT_DONE]], label %[[ARRAYINIT_END:.*]], label %[[ARRAYINIT_BODY]]
+// CHECK:       [[ARRAYINIT_END]]:
+// CHECK:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @_Z21test_array_3_elementsv.omp_outlined, ptr [[TMP0]])
+// CHECK:    ret void
+//
+//
+// CHECK-LABEL: define internal void @_Z21test_array_3_elementsv.omp_outlined(
+// CHECK-SAME: ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP0:%.*]]) #[[ATTR2]] {
+// CHECK:  [[ENTRY:.*:]]
+// CHECK:    store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR:%.*]], align 8
+// CHECK:    store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR:%.*]], align 8
+// CHECK:    store ptr [[TMP0]], ptr [[DOTADDR:%.*]], align 8
+// CHECK:    [[TMP1:%.*]] = load ptr, ptr [[DOTADDR]], align 8, !nonnull [[META2]], !align [[META3]]
+// CHECK:    [[TMP2:%.*]] = getelementptr inbounds [3 x i32], ptr [[TMP1]], i64 0, i64 0
+// CHECK:    [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4
+// CHECK:    [[TMP4:%.*]] = getelementptr inbounds [3 x i32], ptr [[TMP1]], i64 0, i64 1
+// CHECK:    [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4
+// CHECK:    [[ADD:%.*]] = add nsw i32 [[TMP3]], [[TMP5]]
+// CHECK:    [[TMP6:%.*]] = getelementptr inbounds [3 x i32], ptr [[TMP1]], i64 0, i64 2
+// CHECK:    [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4
+// CHECK:    [[ADD1:%.*]] = add nsw i32 [[ADD]], [[TMP7]]
+// CHECK:    store i32 [[ADD1]], ptr [[SUM:%.*]], align 4
+// CHECK:    ret void
+//
+//
+// CHECK-LABEL: define dso_local void @_Z11test_singlev(
+// CHECK-SAME: ) #[[ATTR0]] {
+// CHECK:  [[ENTRY:.*:]]
+// CHECK:    call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[P:%.*]], ptr align 4 @__const._Z11test_singlev.p, i64 8, i1 false)
+// CHECK:    call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP0:%.*]], ptr align 4 [[P]], i64 8, i1 false)
+// CHECK:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @_Z11test_singlev.omp_outlined, ptr [[TMP0]])
+// CHECK:    ret void
+//
+//
+// CHECK-LABEL: define internal void @_Z11test_singlev.omp_outlined(
+// CHECK-SAME: ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[TMP0:%.*]]) #[[ATTR2]] {
+// CHECK:  [[ENTRY:.*:]]
+// CHECK:    store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR:%.*]], align 8
+// CHECK:    store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR:%.*]], align 8
+// CHECK:    store ptr [[TMP0]], ptr [[DOTADDR:%.*]], align 8
+// CHECK:    [[TMP1:%.*]] = load ptr, ptr [[DOTADDR]], align 8, !nonnull [[META2]], !align [[META3]]
+// CHECK:    [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK:    [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4
+// CHECK:    [[TMP4:%.*]] = call i32 @__kmpc_single(ptr @[[GLOB1]], i32 [[TMP3]])
+// CHECK:    [[TMP5:%.*]] = icmp ne i32 [[TMP4]], 0
+// CHECK:    br i1 [[TMP5]], label %[[OMP_IF_THEN:.*]], label %[[OMP_IF_END:.*]]
+// CHECK:       [[OMP_IF_THEN]]:
+// CHECK:    [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_POINT:%.*]], ptr [[TMP1]], i32 0, i32 0
+// CHECK:    [[TMP6:%.*]] = load i32, ptr [[X]], align 4
+// CHECK:    [[Y:%.*]] = getelementptr inbounds nuw [[STRUCT_POINT]], ptr [[TMP1]], i32 0, i32 1
+// CHECK:    [[TMP7:%.*]] = load i32, ptr [[Y]], align 4
+// CHECK:    [[ADD:%.*]] = add nsw i32 [[TMP6]], [[TMP7]]
+// CHECK:    store i32 [[ADD]], ptr [[SUM:%.*]], align 4
+// CHECK:    call void @__kmpc_end_single(ptr @[[GLOB1]], i32 [[TMP3]])
+// CHECK:    br label %[[OMP_IF_END]]
+// CHECK:       [[OMP_IF_END]]:
+// CHECK:    call void @__kmpc_barrier(ptr @[[GLOB5:[0-9]+]], i32 [[TMP3]])
+// CHECK:    ret void
+//
+//
+// CHECK-LABEL: define dso_local void @_Z13test_sectionsv(
+// CHECK-SAME: ) #[[ATTR0]] {
+// CHECK:  [[ENTRY:.*:]]
+// CHECK:    call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[P:%.*]], ptr align 4 @__const._Z13test_sectionsv.p, i64 8, i1 false)
+// CHECK:    call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP0:%.*]], ptr align 4 [[P]], i64 8, i1 false)
+// CHECK:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @_Z13test_sectionsv.omp_outlined, ptr [[TMP0]])
+// CHECK:    ret void
+//
+//
+// CHECK-LABEL: define internal void @_Z13test_sectionsv.omp_outlined(
+// CHECK-SAME: ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[TMP0:%.*]]) #[[ATTR2]] {
+// CHECK:  [[ENTRY:.*:]]
+// CHECK:    store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR:%.*]], align 8
+// CHECK:    store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR:%.*]], align 8
+// CHECK:    store ptr [[TMP0]], ptr [[DOTADDR:%.*]], align 8
+// CHECK:    [[TMP1:%.*]] = load ptr, ptr [[DOTADDR]], align 8, !nonnull [[META2]], !align [[META3]]
+// CHECK:    store i32 0, ptr [[DOTOMP_SECTIONS_LB_:%.*]], align 4
+// CHECK:    store i32 1, ptr [[DOTOMP_SECTIONS_UB_:%.*]], align 4
+// CHECK:    store i32 1, ptr [[DOTOMP_SECTIONS_ST_:%.*]], align 4
+// CHECK:    store i32 0, ptr [[DOTOMP_SECTIONS_IL_:%.*]], align 4
+// CHECK:    [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK:    [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4
+// CHECK:    call void @__kmpc_for_static_init_4(ptr @[[GLOB6:[0-9]+]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_SECTIONS_IL_]], ptr [[DOTOMP_SECTIONS_LB_]], ptr [[DOTOMP_SECTIONS_UB_]], ptr [[DOTOMP_SECTIONS_ST_]], i32 1, i32 1)
+// CHECK:    [[TMP4:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_UB_]], align 4
+// CHECK:    [[TMP5:%.*]] = icmp slt i32 [[TMP4]], 1
+// CHECK:    [[TMP6:%.*]] = select i1 [[TMP5]], i32 [[TMP4]], i32 1
+// CHECK:    store i32 [[TMP6]], ptr [[DOTOMP_SECTIONS_UB_]], align 4
+// CHECK:    [[TMP7:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_LB_]], align 4
+// CHECK:    store i32 [[TMP7]], ptr [[DOTOMP_SECTIONS_IV_:%.*]], align 4
+// CHECK:    br label %[[OMP_INNER_FOR_COND:.*]]
+// CHECK:       [[OMP_INNER_FOR_COND]]:
+// CHECK:    [[TMP8:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_]], align 4
+// CHECK:    [[TMP9:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_UB_]], align 4
+// CHECK:    [[CMP:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]]
+// CHECK:    br i1 [[CMP]], label %[[OMP_INNER_FOR_BODY:.*]], label %[[OMP_INNER_FOR_END:.*]]
+// CHECK:       [[OMP_INNER_FOR_BODY]]:
+// CHECK:    [[TMP10:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_]], align 4
+// CHECK:    switch i32 [[TMP10]], [[DOTOMP_SECTIONS_EXIT:label %.*]] [
+// CHECK:      i32 0, [[DOTOMP_SECTIONS_CASE:label %.*]]
+// CHECK:      i32 1, [[DOTOMP_SECTIONS_CASE1:label %.*]]
+// CHECK:    ]
+// CHECK:       [[_OMP_SECTIONS_CASE:.*:]]
+// CHECK:    [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_POINT:%.*]], ptr [[TMP1]], i32 0, i32 0
+// CHECK:    [[TMP11:%.*]] = load i32, ptr [[X]], align 4
+// CHECK:    [[Y:%.*]] = getelementptr inbounds nuw [[STRUCT_POINT]], ptr [[TMP1]], i32 0, i32 1
+// CHECK:    [[TMP12:%.*]] = load i32, ptr [[Y]], align 4
+// CHECK:    [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]]
+// CHECK:    store i32 [[ADD]], ptr [[SUM:%.*]], align 4
+// CHECK:    br [[DOTOMP_SECTIONS_EXIT]]
+// CHECK:       [[_OMP_SECTIONS_CASE1:.*:]]
+// CHECK:    [[X2:%.*]] = getelementptr inbounds nuw [[STRUCT_POINT]], ptr [[TMP1]], i32 0, i32 0
+// CHECK:    [[TMP13:%.*]] = load i32, ptr [[X2]], align 4
+// CHECK:    [[Y3:%.*]] = getelementptr inbounds nuw [[STRUCT_POINT]], ptr [[TMP1]], i32 0, i32 1
+// CHECK:    [[TMP14:%.*]] = load i32, ptr [[Y3]], align 4
+// CHECK:    [[SUB:%.*]] = sub nsw i32 [[TMP13]], [[TMP14]]
+// CHECK:    store i32 [[SUB]], ptr [[DIFF:%.*]], align 4
+// CHECK:    br [[DOTOMP_SECTIONS_EXIT]]
+// CHECK:       [[_OMP_SECTIONS_EXIT:.*:]]
+// CHECK:    br label %[[OMP_INNER_FOR_INC:.*]]
+// CHECK:       [[OMP_INNER_FOR_INC]]:
+// CHECK:    [[TMP15:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_]], align 4
+// CHECK:    [[INC:%.*]] = add nsw i32 [[TMP15]], 1
+// CHECK:    store i32 [[INC]], ptr [[DOTOMP_SECTIONS_IV_]], align 4
+// CHECK:    br label %[[OMP_INNER_FOR_COND]]
+// CHECK:       [[OMP_INNER_FOR_END]]:
+// CHECK:    call void @__kmpc_for_static_fini(ptr @[[GLOB6]], i32 [[TMP3]])
+// CHECK:    ret void
+//
+//
+// CHECK-LABEL: define dso_local void @_Z20test_nested_parallelv(
+// CHECK-SAME: ) #[[ATTR0]] {
+// CHECK:  [[ENTRY:.*:]]
+// CHECK:    call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[P:%.*]], ptr align 4 @__const._Z20test_nested_parallelv.p, i64 8, i1 false)
+// CHECK:    call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP0:%.*]], ptr align 4 [[P]], i64 8, i1 false)
+// CHECK:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @_Z20test_nested_parallelv.omp_outlined, ptr [[TMP0]])
+// CHECK:    ret void
+//
+//
+// CHECK-LABEL: define internal void @_Z20test_nested_parallelv.omp_outlined(
+// CHECK-SAME: ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP0:%.*]]) #[[ATTR2]] {
+// CHECK:  [[ENTRY:.*:]]
+// CHECK:    store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR:%.*]], align 8
+// CHECK:    store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR:%.*]], align 8
+// CHECK:    store ptr [[TMP0]], ptr [[DOTADDR:%.*]], align 8
+// CHECK:    [[TMP1:%.*]] = load ptr, ptr [[DOTADDR]], align 8, !nonnull [[META2]], !align [[META3]]
+// CHECK:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @_Z20test_nested_parallelv.omp_outlined.omp_outlined, ptr [[TMP1]])
+// CHECK:    ret void
+//
+//
+// CHECK-LABEL: define internal void @_Z20test_nested_parallelv.omp_outlined.omp_outlined(
+// CHECK-SAME: ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP0:%.*]]) #[[ATTR2]] {
+// CHECK:  [[ENTRY:.*:]]
+// CHECK:    store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR:%.*]], align 8
+// CHECK:    store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR:%.*]], align 8
+// CHECK:    store ptr [[TMP0]], ptr [[DOTADDR:%.*]], align 8
+// CHECK:    [[TMP1:%.*]] = load ptr, ptr [[DOTADDR]], align 8, !nonnull [[META2]], !align [[META3]]
+// CHECK:    [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_POINT:%.*]], ptr [[TMP1]], i32 0, i32 0
+// CHECK:    [[TMP2:%.*]] = load i32, ptr [[X]], align 4
+// CHECK:    [[Y:%.*]] = getelementptr inbounds nuw [[STRUCT_POINT]], ptr [[TMP1]], i32 0, i32 1
+// CHECK:    [[TMP3:%.*]] = load i32, ptr [[Y]], align 4
+// CHECK:    [[ADD:%.*]] = add nsw i32 [[TMP2]], [[TMP3]]
+// CHECK:    store i32 [[ADD]], ptr [[SUM:%.*]], align 4
+// CHECK:    ret void
+//
+//
+// CHECK-LABEL: define dso_local void @_Z10test_simd_v(
+// CHECK-SAME: ) #[[ATTR0]] {
+// CHECK:  [[ENTRY:.*:]]
+// CHECK:    call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[P:%.*]], ptr align 4 @__const._Z10test_simd_v.p, i64 8, i1 false)
+// CHECK:    call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP0:%.*]], ptr align 4 [[P]], i64 8, i1 false)
+// CHECK:    store i32 0, ptr [[DOTOMP_IV:%.*]], align 4
+// CHECK:    br label %[[OMP_INNER_FOR_COND:.*]]
+// CHECK:       [[OMP_INNER_FOR_COND]]:
+// CHECK:    [[TMP1:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP73:![0-9]+]]
+// CHECK:    [[CMP:%.*]] = icmp slt i32 [[TMP1]], 10
+// CHECK:    br i1 [[CMP]], label %[[OMP_INNER_FOR_BODY:.*]], label %[[OMP_INNER_FOR_END:.*]]
+// CHECK:       [[OMP_INNER_FOR_BODY]]:
+// CHECK:    [[TMP2:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP73]]
+// CHECK:    [[MUL:%.*]] = mul nsw i32 [[TMP2]], 1
+// CHECK:    [[ADD:%.*]] = add nsw i32 0, [[MUL]]
+// CHECK:    store i32 [[ADD]], ptr [[I:%.*]], align 4, !llvm.access.group [[ACC_GRP73]]
+// CHECK:    [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_POINT:%.*]], ptr [[TMP0]], i32 0, i32 0
+// CHECK:    [[TMP3:%.*]] = load i32, ptr [[X]], align 4, !llvm.access.group [[ACC_GRP73]]
+// CHECK:    [[Y:%.*]] = getelementptr inbounds nuw [[STRUCT_POINT]], ptr [[TMP0]], i32 0, i32 1
+// CHECK:    [[TMP4:%.*]] = load i32, ptr [[Y]], align 4, !llvm.access.group [[ACC_GRP73]]
+// CHECK:    [[ADD1:%.*]] = add nsw i32 [[TMP3]], [[TMP4]]
+// CHECK:    [[TMP5:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP73]]
+// CHECK:    [[ADD2:%.*]] = add nsw i32 [[ADD1]], [[TMP5]]
+// CHECK:    store i32 [[ADD2]], ptr [[RESULT:%.*]], align 4, !llvm.access.group [[ACC_GRP73]]
+// CHECK:    br label %[[OMP_BODY_CONTINUE:.*]]
+// CHECK:       [[OMP_BODY_CONTINUE]]:
+// CHECK:    br label %[[OMP_INNER_FOR_INC:.*]]
+// CHECK:       [[OMP_INNER_FOR_INC]]:
+// CHECK:    [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP73]]
+// CHECK:    [[ADD3:%.*]] = add nsw i32 [[TMP6]], 1
+// CHECK:    store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP73]]
+// CHECK:    br label %[[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP74:![0-9]+]]
+// CHECK:       [[OMP_INNER_FOR_END]]:
+// CHECK:    store i32 10, ptr [[I]], align 4
+// CHECK:    ret void
+//
+//
+// CHECK-LABEL: define dso_local noundef i32 @main(
+// CHECK-SAME: ) #[[ATTR7:[0-9]+]] {
+// CHECK:  [[ENTRY:.*:]]
+// CHECK:    store i32 0, ptr [[RETVAL:%.*]], align 4
+// CHECK:    call void @_Z24test_target_explicit_mapv()
+// CHECK:    call void @_Z24test_target_implicit_mapv()
+// CHECK:    call void @_Z20test_target_parallelv()
+// CHECK:    call void @_Z24test_target_parallel_forv()
+// CHECK:    call void @_Z21test_firstprivate_dsav()
+// CHECK:    call void @_Z15test_shared_dsav()
+// CHECK:    call void @_Z18test_reduction_dsav()
+// CHECK:    call void @_Z18test_parallel_for_v()
+// CHECK:    call void @_Z23test_parallel_for_simd_v()
+// CHECK:    call void @_Z28test_target_teams_distributev()
+// CHECK:    call void @_Z9test_taskv()
+// CHECK:    call void @_Z16test_task_dependv()
+// CHECK:    call void @_Z14test_taskloop_v()
+// CHECK:    call void @_Z21instantiate_templatesv()
+// CHECK:    call void @_Z19test_static_bindingv()
+// CHECK:    call void @_Z26test_static_binding_sharedv()
+// CHECK:    call void @_Z17test_array_targetv()
+// CHECK:    call void @_Z15test_array_taskv()
+// CHECK:    call void @_Z11test_nestedv()
+// CHECK:    call void @_Z22test_multiple_bindingsv()
+// CHECK:    call void @_Z32test_multiple_bindings_mixed_dsav()
+// CHECK:    ret i32 0
+//
diff --git a/clang/test/OpenMP/structured-bindings-messages.cpp b/clang/test/OpenMP/structured-bindings-messages.cpp
index 522405c3f8855..ad3103654c94f 100644
--- a/clang/test/OpenMP/structured-bindings-messages.cpp
+++ b/clang/test/OpenMP/structured-bindings-messages.cpp
@@ -119,30 +119,41 @@ namespace std {
 void use(int);
 
 void test_pair() {
-  auto [a, b] = std::make_pair(1, 2); // expected-note{{'a' declared here}}
+  auto [a, b] = std::make_pair(1, 2);
+  // expected-note at -1{{'a' declared here}}
+  // expected-note at -2{{'b' declared here}}
 #pragma omp parallel
   {
-    // expected-error at +1{{capturing tuple-like structured binding 'a' is not yet supported in OpenMP}}
     use(a + b);
+    // expected-error at -1{{capturing tuple-like structured binding 'a' is not yet supported in OpenMP}}
+    // expected-error at -2{{capturing tuple-like structured binding 'b' is not yet supported in OpenMP}}
   }
 }
 
 void test_tuple() {
   std::tuple<int, int, int> t = {1, 2, 3};
-  auto [x, y, z] = t; // expected-note{{'x' declared here}}
+  auto [x, y, z] = t;
+  // expected-note at -1{{'x' declared here}}
+  // expected-note at -2{{'y' declared here}}
+  // expected-note at -3{{'z' declared here}}
 #pragma omp parallel
   {
-    // expected-error at +1{{capturing tuple-like structured binding 'x' is not yet supported in OpenMP}}
     use(x + y + z);
+    // expected-error at -1{{capturing tuple-like structured binding 'x' is not yet supported in OpenMP}}
+    // expected-error at -2{{capturing tuple-like structured binding 'y' is not yet supported in OpenMP}}
+    // expected-error at -3{{capturing tuple-like structured binding 'z' is not yet supported in OpenMP}}
   }
 }
 
 void test_array() {
   std::array<int, 2> arr = {1, 2};
-  auto [p, q] = arr; // expected-note{{'p' declared here}}
+  auto [p, q] = arr;
+  // expected-note at -1{{'p' declared here}}
+  // expected-note at -2{{'q' declared here}}
 #pragma omp parallel
   {
-    // expected-error at +1{{capturing tuple-like structured binding 'p' is not yet supported in OpenMP}}
     use(p + q);
+    // expected-error at -1{{capturing tuple-like structured binding 'p' is not yet supported in OpenMP}}
+    // expected-error at -2{{capturing tuple-like structured binding 'q' is not yet supported in OpenMP}}
   }
 }

>From 62f537f86a58ca849f43a1b55b1c0487a2d1adfc Mon Sep 17 00:00:00 2001
From: Ammarguellat <zahira.ammarguellat at intel.com>
Date: Fri, 15 May 2026 09:12:45 -0700
Subject: [PATCH 22/45] Fix format

---
 clang/lib/Sema/SemaOpenMP.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/clang/lib/Sema/SemaOpenMP.cpp b/clang/lib/Sema/SemaOpenMP.cpp
index 49868d853cba1..848b0ea0071cf 100644
--- a/clang/lib/Sema/SemaOpenMP.cpp
+++ b/clang/lib/Sema/SemaOpenMP.cpp
@@ -2426,7 +2426,7 @@ VarDecl *SemaOpenMP::isOpenMPCapturedDecl(ValueDecl *D, bool CheckScopeInfo,
   assert(getLangOpts().OpenMP && "OpenMP is not allowed");
   D = getCanonicalDecl(D);
 
-   if (auto *BD = dyn_cast<BindingDecl>(D)) {
+  if (auto *BD = dyn_cast<BindingDecl>(D)) {
     if (!BD->getHoldingVar())
       D = cast<VarDecl>(BD->getDecomposedDecl());
   }

>From d63c2a74533d974325d1b50bc08ebb373344d4cb Mon Sep 17 00:00:00 2001
From: Ammarguellat <zahira.ammarguellat at intel.com>
Date: Tue, 19 May 2026 14:11:56 -0700
Subject: [PATCH 23/45] Addressed review comments

---
 clang/lib/CodeGen/CGExpr.cpp                  |  10 ++
 clang/lib/CodeGen/CGStmtOpenMP.cpp            |  46 +++++-
 clang/lib/CodeGen/CodeGenFunction.h           |  16 ++-
 clang/lib/Sema/SemaOpenMP.cpp                 |  44 +++---
 .../OpenMP/structured-bindings-codegen.cpp    | 131 ++++++++++++++++++
 5 files changed, 219 insertions(+), 28 deletions(-)

diff --git a/clang/lib/CodeGen/CGExpr.cpp b/clang/lib/CodeGen/CGExpr.cpp
index 692b1744cc684..a887be575898b 100644
--- a/clang/lib/CodeGen/CGExpr.cpp
+++ b/clang/lib/CodeGen/CGExpr.cpp
@@ -3818,10 +3818,20 @@ LValue CodeGenFunction::EmitDeclRefLValue(const DeclRefExpr *E) {
   // an enclosing scope.
   if (const auto *BD = dyn_cast<BindingDecl>(ND)) {
     if (E->refersToEnclosingVariableOrCapture()) {
+      // Try direct lookup first.
+      auto It = LocalDeclMap.find(BD->getCanonicalDecl());
+      if (It != LocalDeclMap.end()) {
+        return MakeAddrLValue(It->second, E->getType(), AlignmentSource::Decl);
+      }
       // OpenMP case: binding was captured via its decomposed decl.
       if (CapturedStmtInfo &&
           CapturedStmtInfo->getKind() == CapturedRegionKind::CR_OpenMP &&
           CGM.getLangOpts().OpenMP) {
+        auto NameIt = OMPPrivatizedBindingsByName.find(BD->getName());
+        if (NameIt != OMPPrivatizedBindingsByName.end()) {
+          return MakeAddrLValue(NameIt->second, E->getType(),
+                                AlignmentSource::Decl);
+        }
         return EmitOMPCapturedBindingLValue(BD);
       }
       // Non-OpenMP case: lambda capture.
diff --git a/clang/lib/CodeGen/CGStmtOpenMP.cpp b/clang/lib/CodeGen/CGStmtOpenMP.cpp
index 82307d3a064c6..b658f875f346a 100644
--- a/clang/lib/CodeGen/CGStmtOpenMP.cpp
+++ b/clang/lib/CodeGen/CGStmtOpenMP.cpp
@@ -1152,11 +1152,35 @@ bool CodeGenFunction::EmitOMPFirstprivateClause(const OMPExecutableDirective &D,
     const auto *IRef = C->varlist_begin();
     const auto *InitsRef = C->inits().begin();
     for (const Expr *IInit : C->private_copies()) {
-      const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
+      const auto *OrigDecl = cast<DeclRefExpr>(*IRef)->getDecl();
+      const VarDecl *OrigVD = dyn_cast<VarDecl>(OrigDecl);
+      const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl());
+
+      if (const auto *BD = dyn_cast<BindingDecl>(OrigDecl)) {
+        Address PrivateAddr = CreateMemTemp(VD->getType(), VD->getName());
+        DeclRefExpr DRE(getContext(), const_cast<BindingDecl *>(BD),
+                        /*RefersToEnclosingVariableOrCapture=*/true,
+                        BD->getType(), VK_LValue, (*IRef)->getExprLoc());
+        LValue OriginalLVal = EmitLValue(&DRE);
+        RValue OrigValue =
+            EmitLoadOfLValue(OriginalLVal, (*IRef)->getExprLoc());
+        EmitStoreThroughLValue(OrigValue,
+                               MakeAddrLValue(PrivateAddr, VD->getType()));
+        LocalDeclMap.try_emplace(VD, PrivateAddr);
+        bool IsRegistered = PrivateScope.addPrivate(BD, PrivateAddr);
+        assert(IsRegistered &&
+               "firstprivate var already registered as firstprivate");
+        (void)IsRegistered;
+        ++IRef;
+        ++InitsRef;
+        continue;
+      }
+
+      // Original VarDecl logic.
+      assert(OrigVD && "Expected VarDecl for non-BindingDecl firstprivate");
       bool ThisFirstprivateIsLastprivate =
-          Lastprivates.count(OrigVD->getCanonicalDecl()) > 0;
+          OrigVD && Lastprivates.count(OrigVD->getCanonicalDecl()) > 0;
       const FieldDecl *FD = CapturedStmtInfo->lookup(OrigVD);
-      const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl());
       if (!MustEmitFirstprivateCopy && !ThisFirstprivateIsLastprivate && FD &&
           !FD->getType()->isReferenceType() &&
           (!VD || !VD->hasAttr<OMPAllocateDeclAttr>())) {
@@ -1279,13 +1303,19 @@ void CodeGenFunction::EmitOMPPrivateClause(
   for (const auto *C : D.getClausesOfKind<OMPPrivateClause>()) {
     auto IRef = C->varlist_begin();
     for (const Expr *IInit : C->private_copies()) {
-      const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
-      if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) {
+      const auto *OrigDecl = cast<DeclRefExpr>(*IRef)->getDecl();
+      bool ShouldEmit = true;
+      if (const auto *VD = dyn_cast<VarDecl>(OrigDecl)) {
+        if (!EmittedAsPrivate.insert(VD->getCanonicalDecl()).second) {
+          ShouldEmit = false; // Already emitted.
+        }
+      }
+      if (ShouldEmit) {
         const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl());
         EmitDecl(*VD);
         // Emit private VarDecl with copy init.
         bool IsRegistered =
-            PrivateScope.addPrivate(OrigVD, GetAddrOfLocalVar(VD));
+            PrivateScope.addPrivate(OrigDecl, GetAddrOfLocalVar(VD));
         assert(IsRegistered && "private var already registered as private");
         // Silence the warning about unused variable.
         (void)IsRegistered;
@@ -1862,7 +1892,9 @@ checkForLastprivateConditionalUpdate(CodeGenFunction &CGF,
       const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
       if (!DRE)
         continue;
-      PrivateDecls.insert(cast<VarDecl>(DRE->getDecl()));
+      // Only track VarDecl, not BindingDecl.
+      if (const auto *VD = dyn_cast<VarDecl>(DRE->getDecl()))
+        PrivateDecls.insert(VD);
     }
   }
   CGF.CGM.getOpenMPRuntime().checkAndEmitSharedLastprivateConditional(
diff --git a/clang/lib/CodeGen/CodeGenFunction.h b/clang/lib/CodeGen/CodeGenFunction.h
index 81f167e2aa0a5..08c2f1762d127 100644
--- a/clang/lib/CodeGen/CodeGenFunction.h
+++ b/clang/lib/CodeGen/CodeGenFunction.h
@@ -1152,9 +1152,14 @@ class CodeGenFunction : public CodeGenTypeCache {
     /// Sets the address of the variable \p LocalVD to be \p TempAddr in
     /// function \p CGF.
     /// \return true if at least one variable was set already, false otherwise.
-    bool setVarAddr(CodeGenFunction &CGF, const VarDecl *LocalVD,
+    bool setVarAddr(CodeGenFunction &CGF, const ValueDecl *LocalVD,
                     Address TempAddr) {
-      LocalVD = LocalVD->getCanonicalDecl();
+      LocalVD = cast<ValueDecl>(LocalVD->getCanonicalDecl());
+      // For BindingDecls, also store by name for remapped lookup
+      if (const auto *BD = dyn_cast<BindingDecl>(LocalVD)) {
+        CGF.OMPPrivatizedBindingsByName.insert({BD->getName(), TempAddr});
+      }
+
       // Only save it once.
       if (SavedLocals.count(LocalVD))
         return false;
@@ -1225,7 +1230,7 @@ class CodeGenFunction : public CodeGenTypeCache {
     /// PrivateGen is the address of the generated private variable.
     /// \return true if the variable is registered as private, false if it has
     /// been privatized already.
-    bool addPrivate(const VarDecl *LocalVD, Address Addr) {
+    bool addPrivate(const ValueDecl *LocalVD, Address Addr) {
       assert(PerformCleanup && "adding private to dead scope");
       return MappedVars.setVarAddr(CGF, LocalVD, Addr);
     }
@@ -1548,6 +1553,11 @@ class CodeGenFunction : public CodeGenTypeCache {
   /// decls.
   DeclMapTy LocalDeclMap;
 
+  /// Name-based lookup map for privatized BindingDecls.
+  /// Used when BindingDecls are remapped during OpenMP outlining, since the
+  /// remapped BindingDecl has a different pointer than the original.
+  llvm::StringMap<Address> OMPPrivatizedBindingsByName;
+
   // Keep track of the cleanups for callee-destructed parameters pushed to the
   // cleanup stack so that they can be deactivated later.
   llvm::DenseMap<const ParmVarDecl *, EHScopeStack::stable_iterator>
diff --git a/clang/lib/Sema/SemaOpenMP.cpp b/clang/lib/Sema/SemaOpenMP.cpp
index 848b0ea0071cf..e7cce8105625a 100644
--- a/clang/lib/Sema/SemaOpenMP.cpp
+++ b/clang/lib/Sema/SemaOpenMP.cpp
@@ -5444,7 +5444,8 @@ getPrivateItem(Sema &S, Expr *&RefExpr, SourceLocation &ELoc,
   RefExpr = RefExpr->IgnoreParenImpCasts();
   auto *DE = dyn_cast_or_null<DeclRefExpr>(RefExpr);
   auto *ME = dyn_cast_or_null<MemberExpr>(RefExpr);
-  if ((!DE || !isa<VarDecl>(DE->getDecl())) &&
+  if ((!DE ||
+       (!isa<VarDecl>(DE->getDecl()) && !isa<BindingDecl>(DE->getDecl()))) &&
       (S.getCurrentThisType().isNull() || !ME ||
        !isa<CXXThisExpr>(ME->getBase()->IgnoreParenImpCasts()) ||
        !isa<FieldDecl>(ME->getMemberDecl()))) {
@@ -19481,7 +19482,8 @@ OMPClause *SemaOpenMP::ActOnOpenMPPrivateClause(ArrayRef<Expr *> VarList,
         SemaRef, VDPrivate, RefExpr->getType().getUnqualifiedType(), ELoc);
 
     DeclRefExpr *Ref = nullptr;
-    if (!VD && !SemaRef.CurContext->isDependentContext()) {
+    bool IsBindingDecl = isa<BindingDecl>(D);
+    if (!VD && !IsBindingDecl && !SemaRef.CurContext->isDependentContext()) {
       auto *FD = dyn_cast<FieldDecl>(D);
       VarDecl *VD = FD ? DSAStack->getImplicitFDCapExprDecl(FD) : nullptr;
       if (VD)
@@ -19492,9 +19494,10 @@ OMPClause *SemaOpenMP::ActOnOpenMPPrivateClause(ArrayRef<Expr *> VarList,
     }
     if (!IsImplicitClause)
       DSAStack->addDSA(D, RefExpr->IgnoreParens(), OMPC_private, Ref);
-    Vars.push_back((VD || SemaRef.CurContext->isDependentContext())
-                       ? RefExpr->IgnoreParens()
-                       : Ref);
+    Vars.push_back(
+        (VD || IsBindingDecl || SemaRef.CurContext->isDependentContext())
+            ? RefExpr->IgnoreParens()
+            : Ref);
     PrivateCopies.push_back(VDPrivateRefExpr);
   }
 
@@ -19768,23 +19771,28 @@ OMPClause *SemaOpenMP::ActOnOpenMPFirstprivateClause(ArrayRef<Expr *> VarList,
       if (TopDVar.CKind == OMPC_lastprivate) {
         Ref = TopDVar.PrivateCopy;
       } else {
-        auto *FD = dyn_cast<FieldDecl>(D);
-        VarDecl *VD = FD ? DSAStack->getImplicitFDCapExprDecl(FD) : nullptr;
-        if (VD)
-          Ref =
-              buildDeclRefExpr(SemaRef, VD, VD->getType().getNonReferenceType(),
-                               RefExpr->getExprLoc());
-        else
-          Ref = buildCapture(SemaRef, D, SimpleRefExpr, /*WithInit=*/true);
-        if (VD || !isOpenMPCapturedDecl(D))
-          ExprCaptures.push_back(Ref->getDecl());
+        bool IsBindingDecl = isa<BindingDecl>(D);
+        if (!IsBindingDecl) {
+          auto *FD = dyn_cast<FieldDecl>(D);
+          VarDecl *VD = FD ? DSAStack->getImplicitFDCapExprDecl(FD) : nullptr;
+          if (VD)
+            Ref = buildDeclRefExpr(SemaRef, VD,
+                                   VD->getType().getNonReferenceType(),
+                                   RefExpr->getExprLoc());
+          else
+            Ref = buildCapture(SemaRef, D, SimpleRefExpr, /*WithInit=*/true);
+          if (VD || !isOpenMPCapturedDecl(D))
+            ExprCaptures.push_back(Ref->getDecl());
+        }
       }
     }
     if (!IsImplicitClause)
       DSAStack->addDSA(D, RefExpr->IgnoreParens(), OMPC_firstprivate, Ref);
-    Vars.push_back((VD || SemaRef.CurContext->isDependentContext())
-                       ? RefExpr->IgnoreParens()
-                       : Ref);
+    bool IsBindingDecl = isa<BindingDecl>(D);
+    Vars.push_back(
+        (VD || IsBindingDecl || SemaRef.CurContext->isDependentContext())
+            ? RefExpr->IgnoreParens()
+            : Ref);
     PrivateCopies.push_back(VDPrivateRefExpr);
     Inits.push_back(VDInitRefExpr);
   }
diff --git a/clang/test/OpenMP/structured-bindings-codegen.cpp b/clang/test/OpenMP/structured-bindings-codegen.cpp
index 3b89ac3f43c9d..bcc5e23664388 100644
--- a/clang/test/OpenMP/structured-bindings-codegen.cpp
+++ b/clang/test/OpenMP/structured-bindings-codegen.cpp
@@ -364,6 +364,41 @@ void test_simd_() {
   }
 }
 
+void test_private_individual_bindings() {
+  Point p{1, 2};
+  auto [a, b] = p;
+#pragma omp parallel private(a)
+  {
+    a = 2;
+  }
+}
+
+void test_firstprivate_individual_bindings() {
+  Point p{1, 2};
+  auto [a, b] = p;
+#pragma omp parallel firstprivate(b)
+  {
+    b = b + 10;
+  }
+}
+
+void test_mixed_dsa() {
+  Point p{1, 2};
+  auto [a, b] = p;
+#pragma omp parallel firstprivate(a) shared(b)
+  {
+    int result = a + b;
+  }
+}
+
+void test_static_bindings() {
+  static auto [a, b] = Point{1, 2};
+#pragma omp parallel
+  {
+    (void)(a + b);
+  }
+}
+
 int main() {
   test_target_explicit_map();
   test_target_implicit_map();
@@ -386,6 +421,10 @@ int main() {
   test_nested();
   test_multiple_bindings();
   test_multiple_bindings_mixed_dsa();
+  test_private_individual_bindings();
+  test_firstprivate_individual_bindings();
+  test_mixed_dsa();
+  test_static_bindings();
   return 0;
 }
 
@@ -2000,6 +2039,94 @@ int main() {
 // CHECK:    ret void
 //
 //
+// CHECK-LABEL: define dso_local void @_Z32test_private_individual_bindingsv(
+// CHECK-SAME: ) #[[ATTR0]] {
+// CHECK:  [[ENTRY:.*:]]
+// CHECK:    call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[P:%.*]], ptr align 4 @__const._Z32test_private_individual_bindingsv.p, i64 8, i1 false)
+// CHECK:    call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP0:%.*]], ptr align 4 [[P]], i64 8, i1 false)
+// CHECK:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 0, ptr @_Z32test_private_individual_bindingsv.omp_outlined)
+// CHECK:    ret void
+//
+//
+// CHECK-LABEL: define internal void @_Z32test_private_individual_bindingsv.omp_outlined(
+// CHECK-SAME: ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR2]] {
+// CHECK:  [[ENTRY:.*:]]
+// CHECK:    store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR:%.*]], align 8
+// CHECK:    store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR:%.*]], align 8
+// CHECK:    store i32 2, ptr [[A:%.*]], align 4
+// CHECK:    ret void
+//
+//
+// CHECK-LABEL: define dso_local void @_Z37test_firstprivate_individual_bindingsv(
+// CHECK-SAME: ) #[[ATTR0]] {
+// CHECK:  [[ENTRY:.*:]]
+// CHECK:    call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[P:%.*]], ptr align 4 @__const._Z37test_firstprivate_individual_bindingsv.p, i64 8, i1 false)
+// CHECK:    call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP0:%.*]], ptr align 4 [[P]], i64 8, i1 false)
+// CHECK:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @_Z37test_firstprivate_individual_bindingsv.omp_outlined, ptr [[TMP0]])
+// CHECK:    ret void
+//
+//
+// CHECK-LABEL: define internal void @_Z37test_firstprivate_individual_bindingsv.omp_outlined(
+// CHECK-SAME: ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP0:%.*]]) #[[ATTR2]] {
+// CHECK:  [[ENTRY:.*:]]
+// CHECK:    store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR:%.*]], align 8
+// CHECK:    store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR:%.*]], align 8
+// CHECK:    store ptr [[TMP0]], ptr [[DOTADDR:%.*]], align 8
+// CHECK:    [[TMP1:%.*]] = load ptr, ptr [[DOTADDR]], align 8, !nonnull [[META2]], !align [[META3]]
+// CHECK:    [[Y:%.*]] = getelementptr inbounds nuw [[STRUCT_POINT:%.*]], ptr [[TMP1]], i32 0, i32 1
+// CHECK:    [[TMP2:%.*]] = load i32, ptr [[Y]], align 4
+// CHECK:    store i32 [[TMP2]], ptr [[B:%.*]], align 4
+// CHECK:    [[TMP3:%.*]] = load i32, ptr [[B]], align 4
+// CHECK:    [[ADD:%.*]] = add nsw i32 [[TMP3]], 10
+// CHECK:    store i32 [[ADD]], ptr [[B]], align 4
+// CHECK:    ret void
+//
+//
+// CHECK-LABEL: define dso_local void @_Z14test_mixed_dsav(
+// CHECK-SAME: ) #[[ATTR0]] {
+// CHECK:  [[ENTRY:.*:]]
+// CHECK:    call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[P:%.*]], ptr align 4 @__const._Z14test_mixed_dsav.p, i64 8, i1 false)
+// CHECK:    call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP0:%.*]], ptr align 4 [[P]], i64 8, i1 false)
+// CHECK:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @_Z14test_mixed_dsav.omp_outlined, ptr [[TMP0]])
+// CHECK:    ret void
+//
+//
+// CHECK-LABEL: define internal void @_Z14test_mixed_dsav.omp_outlined(
+// CHECK-SAME: ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP0:%.*]]) #[[ATTR2]] {
+// CHECK:  [[ENTRY:.*:]]
+// CHECK:    store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR:%.*]], align 8
+// CHECK:    store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR:%.*]], align 8
+// CHECK:    store ptr [[TMP0]], ptr [[DOTADDR:%.*]], align 8
+// CHECK:    [[TMP1:%.*]] = load ptr, ptr [[DOTADDR]], align 8, !nonnull [[META2]], !align [[META3]]
+// CHECK:    [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_POINT:%.*]], ptr [[TMP1]], i32 0, i32 0
+// CHECK:    [[TMP2:%.*]] = load i32, ptr [[X]], align 4
+// CHECK:    store i32 [[TMP2]], ptr [[A:%.*]], align 4
+// CHECK:    [[TMP3:%.*]] = load i32, ptr [[A]], align 4
+// CHECK:    [[Y:%.*]] = getelementptr inbounds nuw [[STRUCT_POINT]], ptr [[TMP1]], i32 0, i32 1
+// CHECK:    [[TMP4:%.*]] = load i32, ptr [[Y]], align 4
+// CHECK:    [[ADD:%.*]] = add nsw i32 [[TMP3]], [[TMP4]]
+// CHECK:    store i32 [[ADD]], ptr [[RESULT:%.*]], align 4
+// CHECK:    ret void
+//
+//
+// CHECK-LABEL: define dso_local void @_Z20test_static_bindingsv(
+// CHECK-SAME: ) #[[ATTR0]] {
+// CHECK:  [[ENTRY:.*:]]
+// CHECK:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 0, ptr @_Z20test_static_bindingsv.omp_outlined)
+// CHECK:    ret void
+//
+//
+// CHECK-LABEL: define internal void @_Z20test_static_bindingsv.omp_outlined(
+// CHECK-SAME: ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR2]] {
+// CHECK:  [[ENTRY:.*:]]
+// CHECK:    store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR:%.*]], align 8
+// CHECK:    store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR:%.*]], align 8
+// CHECK:    [[TMP0:%.*]] = load i32, ptr @_ZZ20test_static_bindingsvEDC1a1bE, align 4
+// CHECK:    [[TMP1:%.*]] = load i32, ptr getelementptr inbounds nuw (i8, ptr @_ZZ20test_static_bindingsvEDC1a1bE, i64 4), align 4
+// CHECK:    [[ADD:%.*]] = add nsw i32 [[TMP0]], [[TMP1]]
+// CHECK:    ret void
+//
+//
 // CHECK-LABEL: define dso_local noundef i32 @main(
 // CHECK-SAME: ) #[[ATTR7:[0-9]+]] {
 // CHECK:  [[ENTRY:.*:]]
@@ -2025,5 +2152,9 @@ int main() {
 // CHECK:    call void @_Z11test_nestedv()
 // CHECK:    call void @_Z22test_multiple_bindingsv()
 // CHECK:    call void @_Z32test_multiple_bindings_mixed_dsav()
+// CHECK:    call void @_Z32test_private_individual_bindingsv()
+// CHECK:    call void @_Z37test_firstprivate_individual_bindingsv()
+// CHECK:    call void @_Z14test_mixed_dsav()
+// CHECK:    call void @_Z20test_static_bindingsv()
 // CHECK:    ret i32 0
 //

>From b925b2c3fa0f15b20898bd6456329eddfdb027ea Mon Sep 17 00:00:00 2001
From: Zahira Ammarguellat <zahira.ammarguellat at intel.com>
Date: Wed, 20 May 2026 05:48:13 -0700
Subject: [PATCH 24/45] [OpenMP] Prevent parser infinite loop on unimplemented
 clauses

---
 clang/lib/Basic/OpenMPKinds.cpp               |  1 +
 clang/lib/Parse/ParseOpenMP.cpp               |  9 ++
 clang/lib/Sema/SemaOpenMP.cpp                 |  4 +
 .../OpenMP/unimplemented_clause_messages.cpp  | 93 +++++++++++++++++++
 4 files changed, 107 insertions(+)
 create mode 100644 clang/test/OpenMP/unimplemented_clause_messages.cpp

diff --git a/clang/lib/Basic/OpenMPKinds.cpp b/clang/lib/Basic/OpenMPKinds.cpp
index 287eb217ba458..675d86349c933 100644
--- a/clang/lib/Basic/OpenMPKinds.cpp
+++ b/clang/lib/Basic/OpenMPKinds.cpp
@@ -965,6 +965,7 @@ void clang::getOpenMPCaptureRegions(
     case OMPD_simd:
     case OMPD_single:
     case OMPD_target_data:
+    case OMPD_taskgraph:
     case OMPD_taskgroup:
     case OMPD_stripe:
       // These directives (when standalone) use OMPD_unknown as the region,
diff --git a/clang/lib/Parse/ParseOpenMP.cpp b/clang/lib/Parse/ParseOpenMP.cpp
index 45a47ec797f01..ba3d3113700ff 100644
--- a/clang/lib/Parse/ParseOpenMP.cpp
+++ b/clang/lib/Parse/ParseOpenMP.cpp
@@ -2388,6 +2388,8 @@ StmtResult Parser::ParseOpenMPExecutableDirective(
     ImplicitClauseAllowed = false;
     Actions.OpenMP().StartOpenMPClause(CKind);
     HasImplicitClause = false;
+    SourceLocation ClauseLoc = Tok.getLocation();
+
     OMPClause *Clause =
         ParseOpenMPClause(DKind, CKind, !SeenClauses[unsigned(CKind)]);
     SeenClauses[unsigned(CKind)] = true;
@@ -2398,6 +2400,13 @@ StmtResult Parser::ParseOpenMPExecutableDirective(
     if (Tok.is(tok::comma))
       ConsumeToken();
     Actions.OpenMP().EndOpenMPClause();
+
+    // If ParseOpenMPClause returned without consuming any tokens, skip
+    // to end to avoid an infinite loop.
+    if (Tok.getLocation() == ClauseLoc) {
+      skipUntilPragmaOpenMPEnd(DKind);
+      break;
+    }
   }
   // End location of the directive.
   EndLoc = Tok.getLocation();
diff --git a/clang/lib/Sema/SemaOpenMP.cpp b/clang/lib/Sema/SemaOpenMP.cpp
index d6f6bc919a31b..76b40a5039180 100644
--- a/clang/lib/Sema/SemaOpenMP.cpp
+++ b/clang/lib/Sema/SemaOpenMP.cpp
@@ -6778,6 +6778,10 @@ StmtResult SemaOpenMP::ActOnOpenMPExecutableDirective(
   case OMPD_begin_declare_variant:
   case OMPD_end_declare_variant:
     llvm_unreachable("OpenMP Directive is not allowed");
+  case OMPD_taskgraph:
+    Diag(StartLoc, diag::err_omp_unexpected_directive)
+        << 1 << getOpenMPDirectiveName(OMPD_taskgraph);
+    return StmtError();
   case OMPD_unknown:
   default:
     llvm_unreachable("Unknown OpenMP directive");
diff --git a/clang/test/OpenMP/unimplemented_clause_messages.cpp b/clang/test/OpenMP/unimplemented_clause_messages.cpp
new file mode 100644
index 0000000000000..172203ea5d040
--- /dev/null
+++ b/clang/test/OpenMP/unimplemented_clause_messages.cpp
@@ -0,0 +1,93 @@
+// RUN: %clang_cc1 -verify=expected,omp60 -fopenmp -fopenmp-version=60 %s
+// RUN: %clang_cc1 -verify=expected,omp51 -fopenmp -fopenmp-version=51 %s
+// RUN: %clang_cc1 -verify=expected,omp60 -fopenmp-simd -fopenmp-version=60 %s
+// RUN: %clang_cc1 -verify=expected,omp51 -fopenmp-simd -fopenmp-version=51 %s
+
+
+void test_induction_basic() {
+  int i;
+  // omp60-warning at +4{{extra tokens at the end of '#pragma omp parallel for' are ignored}}
+  // omp60-error at +3{{unexpected OpenMP clause 'induction' in directive '#pragma omp parallel for'}}
+  // omp51-warning at +2{{extra tokens at the end of '#pragma omp parallel for' are ignored}}
+  // omp51-error at +1{{unexpected OpenMP clause 'induction' in directive '#pragma omp parallel for'}}
+#pragma omp parallel for induction(i)
+  for (i = 0; i < 10; ++i)
+    ;
+}
+
+void test_apply() {
+  // omp60-warning at +4{{extra tokens at the end of '#pragma omp tile' are ignored}}
+  // omp60-error at +3{{unexpected OpenMP clause 'apply' in directive '#pragma omp tile'}}
+  // omp51-error at +2{{unexpected OpenMP clause 'apply' in directive '#pragma omp tile'}}
+  // omp51-warning at +1{{extra tokens at the end of '#pragma omp tile' are ignored}}
+#pragma omp tile sizes(10) apply(intratile: unroll)
+  for (int i = 0; i < 10; ++i)
+    ;
+}
+
+void test_empty_apply() {
+ // omp60-warning at +4{{extra tokens at the end of '#pragma omp tile' are ignored}}
+  // omp60-error at +3{{unexpected OpenMP clause 'apply' in directive '#pragma omp tile'}}
+  // omp51-error at +2{{unexpected OpenMP clause 'apply' in directive '#pragma omp tile'}}
+  // omp51-warning at +1{{extra tokens at the end of '#pragma omp tile' are ignored}}
+#pragma omp tile sizes(10) apply()
+  for (int i = 0; i < 10; ++i)
+    ;
+}
+
+void test_nested_apply()
+{
+  // omp60-error at +5{{unexpected OpenMP clause 'apply' in directive '#pragma omp tile'}}
+  // omp60-warning at +4{{extra tokens at the end of '#pragma omp tile' are ignored}}
+  //omp51-error at +3{{unexpected OpenMP clause 'apply' in directive '#pragma omp tile'}}
+  // omp51-warning at +2{{extra tokens at the end of '#pragma omp tile' are ignored}}
+#pragma omp tile sizes(10) \
+            apply(intratile: unroll partial(2) apply(reverse))
+  for (int i = 0; i < 100; ++i)
+    ;
+}
+
+void test_induction_with_following_clause() {
+  int i;
+  // omp60-warning at +4{{extra tokens at the end of '#pragma omp parallel for' are ignored}}
+  // omp60-error at +3{{unexpected OpenMP clause 'induction' in directive '#pragma omp parallel for'}}
+  // omp51-error at +2{{unexpected OpenMP clause 'induction' in directive '#pragma omp parallel for'}}
+  // omp51-warning at +1{{extra tokens at the end of '#pragma omp parallel for' are ignored}}
+#pragma omp parallel for induction(i) num_threads(4)
+  for (i = 0; i < 10; ++i)
+    ;
+}
+
+class Point {
+  float x, y, m;
+  char color;
+
+};
+
+void processPointsInLine() {
+  float separation;
+  // omp60-error at +4{{unexpected OpenMP clause 'induction' in directive '#pragma omp parallel for'}}
+  // omp60-warning at +3{{extra tokens at the end of '#pragma omp parallel for' are ignored}}
+  // omp51-error at +2{{unexpected OpenMP clause 'induction' in directive '#pragma omp parallel for'}}
+  // omp51-warning at +1{{extra tokens at the end of '#pragma omp parallel for' are ignored}}
+#pragma omp parallel for induction(step(Separation))
+  for (int i = 0; i < 10; ++i) {
+    ;
+  }
+}
+
+// Make sure test doesn't crash.
+void test_tasgraph()
+{
+  // omp60-error at +2{{unexpected OpenMP directive '#pragma omp taskgraph'}}
+  // omp51-error at +1{{unexpected OpenMP directive '#pragma omp taskgraph'}}
+#pragma omp taskgraph
+  for (int i = 0; i < 10; ++i)
+    ;
+}
+
+void test_implemented_clause() {
+#pragma omp tile sizes(10)
+  for (int i = 0; i < 10; ++i)
+    ;
+}

>From fd395476bcd164f874bc3e5349e18aedc695c2dd Mon Sep 17 00:00:00 2001
From: Ammarguellat <zahira.ammarguellat at intel.com>
Date: Thu, 21 May 2026 09:59:21 -0700
Subject: [PATCH 25/45] Added test to show that there can be no name collision

---
 .../OpenMP/structured-bindings-codegen.cpp    | 47 ++++++++++++++++++-
 1 file changed, 46 insertions(+), 1 deletion(-)

diff --git a/clang/test/OpenMP/structured-bindings-codegen.cpp b/clang/test/OpenMP/structured-bindings-codegen.cpp
index bcc5e23664388..3a7aa08a141d7 100644
--- a/clang/test/OpenMP/structured-bindings-codegen.cpp
+++ b/clang/test/OpenMP/structured-bindings-codegen.cpp
@@ -399,6 +399,22 @@ void test_static_bindings() {
   }
 }
 
+void use(int a);
+
+void test_shadowing() {
+  auto [a, b] = Point{1, 2};
+#pragma omp parallel private(a)
+  {
+  use(a);
+  {
+    auto [a, b] = Point{10, 20};
+    use(a);
+  }
+  use(a);
+  }
+  use(a);
+}
+
 int main() {
   test_target_explicit_map();
   test_target_implicit_map();
@@ -425,6 +441,7 @@ int main() {
   test_firstprivate_individual_bindings();
   test_mixed_dsa();
   test_static_bindings();
+  test_shadowing();
   return 0;
 }
 
@@ -2127,8 +2144,35 @@ int main() {
 // CHECK:    ret void
 //
 //
+// CHECK-LABEL: define dso_local void @_Z14test_shadowingv(
+// CHECK-SAME: ) #[[ATTR0]] {
+// CHECK:  [[ENTRY:.*:]]
+// CHECK:    call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP0:%.*]], ptr align 4 @__const._Z14test_shadowingv., i64 8, i1 false)
+// CHECK:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 0, ptr @_Z14test_shadowingv.omp_outlined)
+// CHECK:    [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_POINT:%.*]], ptr [[TMP0]], i32 0, i32 0
+// CHECK:    [[TMP1:%.*]] = load i32, ptr [[X]], align 4
+// CHECK:    call void @_Z3usei(i32 noundef [[TMP1]])
+// CHECK:    ret void
+//
+//
+// CHECK-LABEL: define internal void @_Z14test_shadowingv.omp_outlined(
+// CHECK-SAME: ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR2]] {
+// CHECK:  [[ENTRY:.*:]]
+// CHECK:    store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR:%.*]], align 8
+// CHECK:    store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR:%.*]], align 8
+// CHECK:    [[TMP1:%.*]] = load i32, ptr [[A:%.*]], align 4
+// CHECK:    call void @_Z3usei(i32 noundef [[TMP1]])
+// CHECK:    call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP0:%.*]], ptr align 4 @"__const.<captured>.", i64 8, i1 false)
+// CHECK:    [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_POINT:%.*]], ptr [[TMP0]], i32 0, i32 0
+// CHECK:    [[TMP2:%.*]] = load i32, ptr [[X]], align 4
+// CHECK:    call void @_Z3usei(i32 noundef [[TMP2]])
+// CHECK:    [[TMP3:%.*]] = load i32, ptr [[A]], align 4
+// CHECK:    call void @_Z3usei(i32 noundef [[TMP3]])
+// CHECK:    ret void
+//
+//
 // CHECK-LABEL: define dso_local noundef i32 @main(
-// CHECK-SAME: ) #[[ATTR7:[0-9]+]] {
+// CHECK-SAME: ) #[[ATTR8:[0-9]+]] {
 // CHECK:  [[ENTRY:.*:]]
 // CHECK:    store i32 0, ptr [[RETVAL:%.*]], align 4
 // CHECK:    call void @_Z24test_target_explicit_mapv()
@@ -2156,5 +2200,6 @@ int main() {
 // CHECK:    call void @_Z37test_firstprivate_individual_bindingsv()
 // CHECK:    call void @_Z14test_mixed_dsav()
 // CHECK:    call void @_Z20test_static_bindingsv()
+// CHECK:    call void @_Z14test_shadowingv()
 // CHECK:    ret i32 0
 //

>From fcb5bff472f6eefa0d23a04abb8f8ecd7dd4cdd4 Mon Sep 17 00:00:00 2001
From: Ammarguellat <zahira.ammarguellat at intel.com>
Date: Thu, 21 May 2026 10:14:47 -0700
Subject: [PATCH 26/45] Fixed the crash and added the test

---
 clang/lib/CodeGen/CGStmtOpenMP.cpp            |  6 +-
 .../OpenMP/structured-bindings-codegen.cpp    | 68 +++++++++++++++++--
 2 files changed, 68 insertions(+), 6 deletions(-)

diff --git a/clang/lib/CodeGen/CGStmtOpenMP.cpp b/clang/lib/CodeGen/CGStmtOpenMP.cpp
index df2c275113372..57fad1fc42645 100644
--- a/clang/lib/CodeGen/CGStmtOpenMP.cpp
+++ b/clang/lib/CodeGen/CGStmtOpenMP.cpp
@@ -174,8 +174,10 @@ class OMPLoopScope : public CodeGenFunction::RunCleanupsScope {
       // Mark private vars as undefs.
       for (const auto *C : LD->getClausesOfKind<OMPPrivateClause>()) {
         for (const Expr *IRef : C->varlist()) {
-          const auto *OrigVD =
-              cast<VarDecl>(cast<DeclRefExpr>(IRef)->getDecl());
+          const auto *OrigDecl = cast<DeclRefExpr>(IRef)->getDecl();
+          const auto *OrigVD = dyn_cast<VarDecl>(OrigDecl);
+          if (!OrigVD)
+            continue;
           if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) {
             QualType OrigVDTy = OrigVD->getType().getNonReferenceType();
             (void)PreCondVars.setVarAddr(
diff --git a/clang/test/OpenMP/structured-bindings-codegen.cpp b/clang/test/OpenMP/structured-bindings-codegen.cpp
index 3a7aa08a141d7..1014d0719be76 100644
--- a/clang/test/OpenMP/structured-bindings-codegen.cpp
+++ b/clang/test/OpenMP/structured-bindings-codegen.cpp
@@ -405,14 +405,25 @@ void test_shadowing() {
   auto [a, b] = Point{1, 2};
 #pragma omp parallel private(a)
   {
-  use(a);
-  {
-    auto [a, b] = Point{10, 20};
+    use(a);
+    {
+      auto [a, b] = Point{10, 20};
+      use(a);
+    }
     use(a);
   }
   use(a);
+}
+
+void test_simd_private_then_parallel() {
+  Point p{1,2};
+  auto [a,b] = p;
+#pragma omp simd private(a)
+  for (int i=0;i<10;++i) a += i;
+#pragma omp parallel
+  {
+    use(a);
   }
-  use(a);
 }
 
 int main() {
@@ -442,6 +453,7 @@ int main() {
   test_mixed_dsa();
   test_static_bindings();
   test_shadowing();
+  test_simd_private_then_parallel();
   return 0;
 }
 
@@ -2171,6 +2183,53 @@ int main() {
 // CHECK:    ret void
 //
 //
+// CHECK-LABEL: define dso_local void @_Z31test_simd_private_then_parallelv(
+// CHECK-SAME: ) #[[ATTR0]] {
+// CHECK:  [[ENTRY:.*:]]
+// CHECK:    call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[P:%.*]], ptr align 4 @__const._Z31test_simd_private_then_parallelv.p, i64 8, i1 false)
+// CHECK:    call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP0:%.*]], ptr align 4 [[P]], i64 8, i1 false)
+// CHECK:    store i32 0, ptr [[DOTOMP_IV:%.*]], align 4
+// CHECK:    br label %[[OMP_INNER_FOR_COND:.*]]
+// CHECK:       [[OMP_INNER_FOR_COND]]:
+// CHECK:    [[TMP1:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP76:![0-9]+]]
+// CHECK:    [[CMP:%.*]] = icmp slt i32 [[TMP1]], 10
+// CHECK:    br i1 [[CMP]], label %[[OMP_INNER_FOR_BODY:.*]], label %[[OMP_INNER_FOR_END:.*]]
+// CHECK:       [[OMP_INNER_FOR_BODY]]:
+// CHECK:    [[TMP2:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP76]]
+// CHECK:    [[MUL:%.*]] = mul nsw i32 [[TMP2]], 1
+// CHECK:    [[ADD:%.*]] = add nsw i32 0, [[MUL]]
+// CHECK:    store i32 [[ADD]], ptr [[I:%.*]], align 4, !llvm.access.group [[ACC_GRP76]]
+// CHECK:    [[TMP3:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP76]]
+// CHECK:    [[TMP4:%.*]] = load i32, ptr [[A:%.*]], align 4, !llvm.access.group [[ACC_GRP76]]
+// CHECK:    [[ADD1:%.*]] = add nsw i32 [[TMP4]], [[TMP3]]
+// CHECK:    store i32 [[ADD1]], ptr [[A]], align 4, !llvm.access.group [[ACC_GRP76]]
+// CHECK:    br label %[[OMP_BODY_CONTINUE:.*]]
+// CHECK:       [[OMP_BODY_CONTINUE]]:
+// CHECK:    br label %[[OMP_INNER_FOR_INC:.*]]
+// CHECK:       [[OMP_INNER_FOR_INC]]:
+// CHECK:    [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP76]]
+// CHECK:    [[ADD2:%.*]] = add nsw i32 [[TMP5]], 1
+// CHECK:    store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP76]]
+// CHECK:    br label %[[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP77:![0-9]+]]
+// CHECK:       [[OMP_INNER_FOR_END]]:
+// CHECK:    store i32 10, ptr [[I]], align 4
+// CHECK:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @_Z31test_simd_private_then_parallelv.omp_outlined, ptr [[TMP0]])
+// CHECK:    ret void
+//
+//
+// CHECK-LABEL: define internal void @_Z31test_simd_private_then_parallelv.omp_outlined(
+// CHECK-SAME: ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP0:%.*]]) #[[ATTR2]] {
+// CHECK:  [[ENTRY:.*:]]
+// CHECK:    store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR:%.*]], align 8
+// CHECK:    store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR:%.*]], align 8
+// CHECK:    store ptr [[TMP0]], ptr [[DOTADDR:%.*]], align 8
+// CHECK:    [[TMP1:%.*]] = load ptr, ptr [[DOTADDR]], align 8, !nonnull [[META2]], !align [[META3]]
+// CHECK:    [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_POINT:%.*]], ptr [[TMP1]], i32 0, i32 0
+// CHECK:    [[TMP2:%.*]] = load i32, ptr [[X]], align 4
+// CHECK:    call void @_Z3usei(i32 noundef [[TMP2]])
+// CHECK:    ret void
+//
+//
 // CHECK-LABEL: define dso_local noundef i32 @main(
 // CHECK-SAME: ) #[[ATTR8:[0-9]+]] {
 // CHECK:  [[ENTRY:.*:]]
@@ -2201,5 +2260,6 @@ int main() {
 // CHECK:    call void @_Z14test_mixed_dsav()
 // CHECK:    call void @_Z20test_static_bindingsv()
 // CHECK:    call void @_Z14test_shadowingv()
+// CHECK:    call void @_Z31test_simd_private_then_parallelv()
 // CHECK:    ret i32 0
 //

>From 5dcf65d612193005385d65fdc530b600e798f0b2 Mon Sep 17 00:00:00 2001
From: Ammarguellat <zahira.ammarguellat at intel.com>
Date: Thu, 21 May 2026 11:54:29 -0700
Subject: [PATCH 27/45] Added linear, reductions and lastprivates clauses

---
 clang/lib/CodeGen/CGStmtOpenMP.cpp            |  59 +-
 clang/lib/Sema/SemaOpenMP.cpp                 |  19 +-
 .../OpenMP/structured-bindings-codegen.cpp    | 874 ++++++++++++++++++
 3 files changed, 924 insertions(+), 28 deletions(-)

diff --git a/clang/lib/CodeGen/CGStmtOpenMP.cpp b/clang/lib/CodeGen/CGStmtOpenMP.cpp
index 57fad1fc42645..2e22905cdc84c 100644
--- a/clang/lib/CodeGen/CGStmtOpenMP.cpp
+++ b/clang/lib/CodeGen/CGStmtOpenMP.cpp
@@ -1879,7 +1879,8 @@ checkForLastprivateConditionalUpdate(CodeGenFunction &CGF,
       const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
       if (!DRE)
         continue;
-      PrivateDecls.insert(cast<VarDecl>(DRE->getDecl()));
+      if (const auto *VD = dyn_cast<VarDecl>(DRE->getDecl()))
+        PrivateDecls.insert(VD);
       CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, Ref);
     }
   }
@@ -2555,17 +2556,21 @@ bool CodeGenFunction::EmitOMPLinearClauseInit(const OMPLoopDirective &D) {
       const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(Init)->getDecl());
       if (const auto *Ref =
               dyn_cast<DeclRefExpr>(VD->getInit()->IgnoreImpCasts())) {
-        AutoVarEmission Emission = EmitAutoVarAlloca(*VD);
-        const auto *OrigVD = cast<VarDecl>(Ref->getDecl());
-        DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(OrigVD),
-                        CapturedStmtInfo->lookup(OrigVD) != nullptr,
-                        VD->getInit()->getType(), VK_LValue,
-                        VD->getInit()->getExprLoc());
-        EmitExprAsInit(
-            &DRE, VD,
-            MakeAddrLValue(Emission.getAllocatedAddress(), VD->getType()),
-            /*capturedByInit=*/false);
-        EmitAutoVarCleanups(Emission);
+        if (isa<BindingDecl>(Ref->getDecl())) {
+          EmitVarDecl(*VD);
+        } else {
+          AutoVarEmission Emission = EmitAutoVarAlloca(*VD);
+          const auto *OrigVD = cast<VarDecl>(Ref->getDecl());
+          DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(OrigVD),
+                          CapturedStmtInfo->lookup(OrigVD) != nullptr,
+                          VD->getInit()->getType(), VK_LValue,
+                          VD->getInit()->getExprLoc());
+          EmitExprAsInit(
+              &DRE, VD,
+              MakeAddrLValue(Emission.getAllocatedAddress(), VD->getType()),
+              /*capturedByInit=*/false);
+          EmitAutoVarCleanups(Emission);
+        }
       } else {
         EmitVarDecl(*VD);
       }
@@ -2602,13 +2607,21 @@ void CodeGenFunction::EmitOMPLinearClauseFinal(
           EmitBlock(ThenBB);
         }
       }
-      const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IC)->getDecl());
-      DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(OrigVD),
-                      CapturedStmtInfo->lookup(OrigVD) != nullptr,
-                      (*IC)->getType(), VK_LValue, (*IC)->getExprLoc());
-      Address OrigAddr = EmitLValue(&DRE).getAddress();
-      CodeGenFunction::OMPPrivateScope VarScope(*this);
-      VarScope.addPrivate(OrigVD, OrigAddr);
+      const auto *OrigDecl = cast<DeclRefExpr>(*IC)->getDecl();
+      Address OrigAddr = [&]() -> Address {
+        if (const auto *BD = dyn_cast<BindingDecl>(OrigDecl)) {
+          // BindingDecl: use the original expression directly.
+          return EmitLValue(*IC).getAddress();
+        } else {
+          const auto *OrigVD = cast<VarDecl>(OrigDecl);
+          DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(OrigVD),
+                          CapturedStmtInfo->lookup(OrigVD) != nullptr,
+                          (*IC)->getType(), VK_LValue, (*IC)->getExprLoc());
+          return EmitLValue(&DRE).getAddress();
+        }
+      }();
+     CodeGenFunction::OMPPrivateScope VarScope(*this);
+      VarScope.addPrivate(OrigDecl, OrigAddr);
       (void)VarScope.Privatize();
       EmitIgnoredExpr(F);
       ++IC;
@@ -2751,10 +2764,14 @@ void CodeGenFunction::EmitOMPLinearClause(
   for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) {
     auto CurPrivate = C->privates().begin();
     for (const Expr *E : C->varlist()) {
-      const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
+      const auto *VD = cast<DeclRefExpr>(E)->getDecl();
       const auto *PrivateVD =
           cast<VarDecl>(cast<DeclRefExpr>(*CurPrivate)->getDecl());
-      if (!SIMDLCVs.count(VD->getCanonicalDecl())) {
+      bool IsSIMDLCV = false;
+      if (const auto *VarD = dyn_cast<VarDecl>(VD)) {
+        IsSIMDLCV = SIMDLCVs.count(VarD->getCanonicalDecl());
+      }
+      if (!IsSIMDLCV) {
         // Emit private VarDecl with copy init.
         EmitVarDecl(*PrivateVD);
         bool IsRegistered =
diff --git a/clang/lib/Sema/SemaOpenMP.cpp b/clang/lib/Sema/SemaOpenMP.cpp
index e7cce8105625a..989630057a517 100644
--- a/clang/lib/Sema/SemaOpenMP.cpp
+++ b/clang/lib/Sema/SemaOpenMP.cpp
@@ -21459,7 +21459,8 @@ OMPClause *SemaOpenMP::ActOnOpenMPLinearClause(
     VarDecl *Init = buildVarDecl(SemaRef, ELoc, Type, ".linear.start");
     Expr *InitExpr;
     DeclRefExpr *Ref = nullptr;
-    if (!VD && !SemaRef.CurContext->isDependentContext()) {
+    bool IsBindingDecl = isa<BindingDecl>(D);
+    if (!VD && !IsBindingDecl && !SemaRef.CurContext->isDependentContext()) {
       Ref = buildCapture(SemaRef, D, SimpleRefExpr, /*WithInit=*/false);
       if (!isOpenMPCapturedDecl(D)) {
         ExprCaptures.push_back(Ref->getDecl());
@@ -21480,16 +21481,17 @@ OMPClause *SemaOpenMP::ActOnOpenMPLinearClause(
     if (LinKind == OMPC_LINEAR_uval)
       InitExpr = VD ? VD->getInit() : SimpleRefExpr;
     else
-      InitExpr = VD ? SimpleRefExpr : Ref;
+      InitExpr = (VD || IsBindingDecl) ? SimpleRefExpr : Ref;
     SemaRef.AddInitializerToDecl(
         Init, SemaRef.DefaultLvalueConversion(InitExpr).get(),
         /*DirectInit=*/false);
     DeclRefExpr *InitRef = buildDeclRefExpr(SemaRef, Init, Type, ELoc);
 
     DSAStack->addDSA(D, RefExpr->IgnoreParens(), OMPC_linear, Ref);
-    Vars.push_back((VD || SemaRef.CurContext->isDependentContext())
-                       ? RefExpr->IgnoreParens()
-                       : Ref);
+    Vars.push_back(
+        (VD || IsBindingDecl || SemaRef.CurContext->isDependentContext())
+            ? RefExpr->IgnoreParens()
+            : Ref);
     Privates.push_back(PrivateRef);
     Inits.push_back(InitRef);
   }
@@ -21588,13 +21590,16 @@ static bool FinishOpenMPLinearClause(OMPLinearClause &Clause, DeclRefExpr *IV,
     // Build privatized reference to the current linear var.
     auto *DE = cast<DeclRefExpr>(SimpleRefExpr);
     Expr *CapturedRef;
-    if (LinKind == OMPC_LINEAR_uval)
+    if (auto *BD = dyn_cast<BindingDecl>(DE->getDecl())) {
+      CapturedRef = SimpleRefExpr;
+    } else if (LinKind == OMPC_LINEAR_uval) {
       CapturedRef = cast<VarDecl>(DE->getDecl())->getInit();
-    else
+    } else {
       CapturedRef =
           buildDeclRefExpr(SemaRef, cast<VarDecl>(DE->getDecl()),
                            DE->getType().getUnqualifiedType(), DE->getExprLoc(),
                            /*RefersToCapture=*/true);
+    }
 
     // Build update: Var = InitExpr + IV * Step
     ExprResult Update;
diff --git a/clang/test/OpenMP/structured-bindings-codegen.cpp b/clang/test/OpenMP/structured-bindings-codegen.cpp
index 1014d0719be76..c2a1366e01a9e 100644
--- a/clang/test/OpenMP/structured-bindings-codegen.cpp
+++ b/clang/test/OpenMP/structured-bindings-codegen.cpp
@@ -426,6 +426,89 @@ void test_simd_private_then_parallel() {
   }
 }
 
+void test_linear_binding() {
+  Point p{1, 2};
+  auto [a, b] = p;
+
+#pragma omp simd linear(a:1)
+  for (int i = 0; i < 10; ++i) {
+    a += 1;
+    use(a);
+  }
+}
+
+void test_reduction_binding_sum() {
+  Point p{0, 0};
+  auto [a, b] = p;
+
+#pragma omp parallel for reduction(+:a)
+  for (int i = 0; i < 100; ++i) {
+    a += i;
+  }
+  use(a);
+}
+
+void test_reduction_binding_operators() {
+  Point p{1, 100};
+  auto [a, b] = p;
+
+#pragma omp parallel for reduction(*:a) reduction(min:b)
+  for (int i = 1; i <= 10; ++i) {
+    a *= 2;
+    if (i < b) b = i;
+  }
+  use(a);
+  use(b);
+}
+
+void test_lastprivate_binding() {
+  Point p{1, 2};
+  auto [a, b] = p;
+
+#pragma omp parallel for lastprivate(a)
+  for (int i = 0; i < 10; ++i) {
+    a = i * 10;
+  }
+  use(a);
+}
+
+void test_mixed_linear_private() {
+  Point p{1, 2};
+  auto [a, b] = p;
+
+#pragma omp simd linear(a:2) private(b)
+  for (int i = 0; i < 10; ++i) {
+    a += 2;
+    b = i;
+    use(a + b);
+  }
+}
+
+void test_lastprivate_conditional() {
+  Point p{0, 0};
+  auto [a, b] = p;
+
+#pragma omp parallel for lastprivate(conditional: a)
+  for (int i = 0; i < 100; ++i) {
+    if (i % 7 == 0)
+      a = i;
+  }
+  use(a);
+}
+
+void test_reduction_binding_max() {
+  Point p{-100, -100};
+  auto [a, b] = p;
+
+#pragma omp parallel for reduction(max:a,b)
+  for (int i = 0; i < 100; ++i) {
+    if (i > a) a = i;
+    if (i > b) b = i;
+  }
+  use(a);
+  use(b);
+}
+
 int main() {
   test_target_explicit_map();
   test_target_implicit_map();
@@ -454,6 +537,13 @@ int main() {
   test_static_bindings();
   test_shadowing();
   test_simd_private_then_parallel();
+  test_linear_binding();
+  test_reduction_binding_sum();
+  test_reduction_binding_operators();
+  test_lastprivate_binding();
+  test_mixed_linear_private();
+  test_lastprivate_conditional();
+  test_reduction_binding_max();
   return 0;
 }
 
@@ -2230,6 +2320,783 @@ int main() {
 // CHECK:    ret void
 //
 //
+// CHECK-LABEL: define dso_local void @_Z19test_linear_bindingv(
+// CHECK-SAME: ) #[[ATTR0]] {
+// CHECK:  [[ENTRY:.*:]]
+// CHECK:    call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[P:%.*]], ptr align 4 @__const._Z19test_linear_bindingv.p, i64 8, i1 false)
+// CHECK:    call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP0:%.*]], ptr align 4 [[P]], i64 8, i1 false)
+// CHECK:    store i32 0, ptr [[DOTOMP_IV:%.*]], align 4
+// CHECK:    [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_POINT:%.*]], ptr [[TMP0]], i32 0, i32 0
+// CHECK:    [[TMP1:%.*]] = load i32, ptr [[X]], align 4
+// CHECK:    store i32 [[TMP1]], ptr [[DOTLINEAR_START:%.*]], align 4
+// CHECK:    br label %[[OMP_INNER_FOR_COND:.*]]
+// CHECK:       [[OMP_INNER_FOR_COND]]:
+// CHECK:    [[TMP2:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP79:![0-9]+]]
+// CHECK:    [[CMP:%.*]] = icmp slt i32 [[TMP2]], 10
+// CHECK:    br i1 [[CMP]], label %[[OMP_INNER_FOR_BODY:.*]], label %[[OMP_INNER_FOR_END:.*]]
+// CHECK:       [[OMP_INNER_FOR_BODY]]:
+// CHECK:    [[TMP3:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP79]]
+// CHECK:    [[MUL:%.*]] = mul nsw i32 [[TMP3]], 1
+// CHECK:    [[ADD:%.*]] = add nsw i32 0, [[MUL]]
+// CHECK:    store i32 [[ADD]], ptr [[I:%.*]], align 4, !llvm.access.group [[ACC_GRP79]]
+// CHECK:    [[TMP4:%.*]] = load i32, ptr [[DOTLINEAR_START]], align 4, !llvm.access.group [[ACC_GRP79]]
+// CHECK:    [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP79]]
+// CHECK:    [[MUL1:%.*]] = mul nsw i32 [[TMP5]], 1
+// CHECK:    [[ADD2:%.*]] = add nsw i32 [[TMP4]], [[MUL1]]
+// CHECK:    store i32 [[ADD2]], ptr [[A:%.*]], align 4, !llvm.access.group [[ACC_GRP79]]
+// CHECK:    [[TMP6:%.*]] = load i32, ptr [[A]], align 4, !llvm.access.group [[ACC_GRP79]]
+// CHECK:    [[ADD3:%.*]] = add nsw i32 [[TMP6]], 1
+// CHECK:    store i32 [[ADD3]], ptr [[A]], align 4, !llvm.access.group [[ACC_GRP79]]
+// CHECK:    [[TMP7:%.*]] = load i32, ptr [[A]], align 4, !llvm.access.group [[ACC_GRP79]]
+// CHECK:    call void @_Z3usei(i32 noundef [[TMP7]]), !llvm.access.group [[ACC_GRP79]]
+// CHECK:    br label %[[OMP_BODY_CONTINUE:.*]]
+// CHECK:       [[OMP_BODY_CONTINUE]]:
+// CHECK:    br label %[[OMP_INNER_FOR_INC:.*]]
+// CHECK:       [[OMP_INNER_FOR_INC]]:
+// CHECK:    [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP79]]
+// CHECK:    [[ADD4:%.*]] = add nsw i32 [[TMP8]], 1
+// CHECK:    store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP79]]
+// CHECK:    br label %[[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP80:![0-9]+]]
+// CHECK:       [[OMP_INNER_FOR_END]]:
+// CHECK:    store i32 10, ptr [[I]], align 4
+// CHECK:    [[X5:%.*]] = getelementptr inbounds nuw [[STRUCT_POINT]], ptr [[TMP0]], i32 0, i32 0
+// CHECK:    [[TMP9:%.*]] = load i32, ptr [[A]], align 4
+// CHECK:    [[X6:%.*]] = getelementptr inbounds nuw [[STRUCT_POINT]], ptr [[TMP0]], i32 0, i32 0
+// CHECK:    store i32 [[TMP9]], ptr [[X6]], align 4
+// CHECK:    ret void
+//
+//
+// CHECK-LABEL: define dso_local void @_Z26test_reduction_binding_sumv(
+// CHECK-SAME: ) #[[ATTR0]] {
+// CHECK:  [[ENTRY:.*:]]
+// CHECK:    call void @llvm.memset.p0.i64(ptr align 4 [[P:%.*]], i8 0, i64 8, i1 false)
+// CHECK:    call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP0:%.*]], ptr align 4 [[P]], i64 8, i1 false)
+// CHECK:    [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_POINT:%.*]], ptr [[TMP0]], i32 0, i32 0
+// CHECK:    store ptr [[X]], ptr [[A:%.*]], align 8
+// CHECK:    [[TMP1:%.*]] = load ptr, ptr [[A]], align 8, !nonnull [[META2]], !align [[META3]]
+// CHECK:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 2, ptr @_Z26test_reduction_binding_sumv.omp_outlined, ptr [[TMP0]], ptr [[TMP1]])
+// CHECK:    [[X1:%.*]] = getelementptr inbounds nuw [[STRUCT_POINT]], ptr [[TMP0]], i32 0, i32 0
+// CHECK:    [[TMP2:%.*]] = load i32, ptr [[X1]], align 4
+// CHECK:    call void @_Z3usei(i32 noundef [[TMP2]])
+// CHECK:    ret void
+//
+//
+// CHECK-LABEL: define internal void @_Z26test_reduction_binding_sumv.omp_outlined(
+// CHECK-SAME: ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP0:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] {
+// CHECK:  [[ENTRY:.*:]]
+// CHECK:    store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR:%.*]], align 8
+// CHECK:    store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR:%.*]], align 8
+// CHECK:    store ptr [[TMP0]], ptr [[DOTADDR:%.*]], align 8
+// CHECK:    store ptr [[A]], ptr [[A_ADDR:%.*]], align 8
+// CHECK:    [[TMP1:%.*]] = load ptr, ptr [[DOTADDR]], align 8, !nonnull [[META2]], !align [[META3]]
+// CHECK:    [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 8, !nonnull [[META2]], !align [[META3]]
+// CHECK:    store ptr [[TMP2]], ptr [[TMP:%.*]], align 8
+// CHECK:    store i32 0, ptr [[DOTOMP_LB:%.*]], align 4
+// CHECK:    store i32 99, ptr [[DOTOMP_UB:%.*]], align 4
+// CHECK:    store i32 1, ptr [[DOTOMP_STRIDE:%.*]], align 4
+// CHECK:    store i32 0, ptr [[DOTOMP_IS_LAST:%.*]], align 4
+// CHECK:    [[TMP3:%.*]] = load ptr, ptr [[TMP]], align 8, !nonnull [[META2]], !align [[META3]]
+// CHECK:    store i32 0, ptr [[A2:%.*]], align 4
+// CHECK:    store ptr [[A2]], ptr [[_TMP3:%.*]], align 8
+// CHECK:    [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK:    [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4
+// CHECK:    call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP5]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
+// CHECK:    [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
+// CHECK:    [[CMP:%.*]] = icmp sgt i32 [[TMP6]], 99
+// CHECK:    br i1 [[CMP]], label %[[COND_TRUE:.*]], label %[[COND_FALSE:.*]]
+// CHECK:       [[COND_TRUE]]:
+// CHECK:    br label %[[COND_END:.*]]
+// CHECK:       [[COND_FALSE]]:
+// CHECK:    [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
+// CHECK:    br label %[[COND_END]]
+// CHECK:       [[COND_END]]:
+// CHECK:    [[COND:%.*]] = phi i32 [ 99, %[[COND_TRUE]] ], [ [[TMP7]], %[[COND_FALSE]] ]
+// CHECK:    store i32 [[COND]], ptr [[DOTOMP_UB]], align 4
+// CHECK:    [[TMP8:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
+// CHECK:    store i32 [[TMP8]], ptr [[DOTOMP_IV:%.*]], align 4
+// CHECK:    br label %[[OMP_INNER_FOR_COND:.*]]
+// CHECK:       [[OMP_INNER_FOR_COND]]:
+// CHECK:    [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK:    [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
+// CHECK:    [[CMP4:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]]
+// CHECK:    br i1 [[CMP4]], label %[[OMP_INNER_FOR_BODY:.*]], label %[[OMP_INNER_FOR_END:.*]]
+// CHECK:       [[OMP_INNER_FOR_BODY]]:
+// CHECK:    [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK:    [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1
+// CHECK:    [[ADD:%.*]] = add nsw i32 0, [[MUL]]
+// CHECK:    store i32 [[ADD]], ptr [[I:%.*]], align 4
+// CHECK:    [[TMP12:%.*]] = load i32, ptr [[I]], align 4
+// CHECK:    [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_POINT:%.*]], ptr [[TMP1]], i32 0, i32 0
+// CHECK:    [[TMP13:%.*]] = load i32, ptr [[X]], align 4
+// CHECK:    [[ADD5:%.*]] = add nsw i32 [[TMP13]], [[TMP12]]
+// CHECK:    store i32 [[ADD5]], ptr [[X]], align 4
+// CHECK:    br label %[[OMP_BODY_CONTINUE:.*]]
+// CHECK:       [[OMP_BODY_CONTINUE]]:
+// CHECK:    br label %[[OMP_INNER_FOR_INC:.*]]
+// CHECK:       [[OMP_INNER_FOR_INC]]:
+// CHECK:    [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK:    [[ADD6:%.*]] = add nsw i32 [[TMP14]], 1
+// CHECK:    store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4
+// CHECK:    br label %[[OMP_INNER_FOR_COND]]
+// CHECK:       [[OMP_INNER_FOR_END]]:
+// CHECK:    br label %[[OMP_LOOP_EXIT:.*]]
+// CHECK:       [[OMP_LOOP_EXIT]]:
+// CHECK:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP5]])
+// CHECK:    [[TMP15:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST:%.*]], i64 0, i64 0
+// CHECK:    store ptr [[A2]], ptr [[TMP15]], align 8
+// CHECK:    [[TMP16:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP5]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @_Z26test_reduction_binding_sumv.omp_outlined.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var)
+// CHECK:    switch i32 [[TMP16]], [[DOTOMP_REDUCTION_DEFAULT:label %.*]] [
+// CHECK:      i32 1, [[DOTOMP_REDUCTION_CASE1:label %.*]]
+// CHECK:      i32 2, [[DOTOMP_REDUCTION_CASE2:label %.*]]
+// CHECK:    ]
+// CHECK:       [[_OMP_REDUCTION_CASE1:.*:]]
+// CHECK:    [[TMP17:%.*]] = load i32, ptr [[TMP3]], align 4
+// CHECK:    [[TMP18:%.*]] = load i32, ptr [[A2]], align 4
+// CHECK:    [[ADD7:%.*]] = add nsw i32 [[TMP17]], [[TMP18]]
+// CHECK:    store i32 [[ADD7]], ptr [[TMP3]], align 4
+// CHECK:    call void @__kmpc_end_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP5]], ptr @.gomp_critical_user_.reduction.var)
+// CHECK:    br [[DOTOMP_REDUCTION_DEFAULT]]
+// CHECK:       [[_OMP_REDUCTION_CASE2:.*:]]
+// CHECK:    [[TMP19:%.*]] = load i32, ptr [[A2]], align 4
+// CHECK:    [[TMP20:%.*]] = atomicrmw add ptr [[TMP3]], i32 [[TMP19]] monotonic, align 4
+// CHECK:    br [[DOTOMP_REDUCTION_DEFAULT]]
+// CHECK:       [[_OMP_REDUCTION_DEFAULT:.*:]]
+// CHECK:    ret void
+//
+//
+// CHECK-LABEL: define internal void @_Z26test_reduction_binding_sumv.omp_outlined.omp.reduction.reduction_func(
+// CHECK-SAME: ptr noundef [[TMP0:%.*]], ptr noundef [[TMP1:%.*]]) #[[ATTR4]] {
+// CHECK:  [[ENTRY:.*:]]
+// CHECK:    store ptr [[TMP0]], ptr [[DOTADDR:%.*]], align 8
+// CHECK:    store ptr [[TMP1]], ptr [[DOTADDR1:%.*]], align 8
+// CHECK:    [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8
+// CHECK:    [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8
+// CHECK:    [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP3]], i64 0, i64 0
+// CHECK:    [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8
+// CHECK:    [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i64 0, i64 0
+// CHECK:    [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8
+// CHECK:    [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4
+// CHECK:    [[TMP9:%.*]] = load i32, ptr [[TMP5]], align 4
+// CHECK:    [[ADD:%.*]] = add nsw i32 [[TMP8]], [[TMP9]]
+// CHECK:    store i32 [[ADD]], ptr [[TMP7]], align 4
+// CHECK:    ret void
+//
+//
+// CHECK-LABEL: define dso_local void @_Z32test_reduction_binding_operatorsv(
+// CHECK-SAME: ) #[[ATTR0]] {
+// CHECK:  [[ENTRY:.*:]]
+// CHECK:    call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[P:%.*]], ptr align 4 @__const._Z32test_reduction_binding_operatorsv.p, i64 8, i1 false)
+// CHECK:    call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP0:%.*]], ptr align 4 [[P]], i64 8, i1 false)
+// CHECK:    [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_POINT:%.*]], ptr [[TMP0]], i32 0, i32 0
+// CHECK:    store ptr [[X]], ptr [[A:%.*]], align 8
+// CHECK:    [[Y:%.*]] = getelementptr inbounds nuw [[STRUCT_POINT]], ptr [[TMP0]], i32 0, i32 1
+// CHECK:    store ptr [[Y]], ptr [[B:%.*]], align 8
+// CHECK:    [[TMP1:%.*]] = load ptr, ptr [[A]], align 8, !nonnull [[META2]], !align [[META3]]
+// CHECK:    [[TMP2:%.*]] = load ptr, ptr [[B]], align 8, !nonnull [[META2]], !align [[META3]]
+// CHECK:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 3, ptr @_Z32test_reduction_binding_operatorsv.omp_outlined, ptr [[TMP0]], ptr [[TMP1]], ptr [[TMP2]])
+// CHECK:    [[X1:%.*]] = getelementptr inbounds nuw [[STRUCT_POINT]], ptr [[TMP0]], i32 0, i32 0
+// CHECK:    [[TMP3:%.*]] = load i32, ptr [[X1]], align 4
+// CHECK:    call void @_Z3usei(i32 noundef [[TMP3]])
+// CHECK:    [[Y2:%.*]] = getelementptr inbounds nuw [[STRUCT_POINT]], ptr [[TMP0]], i32 0, i32 1
+// CHECK:    [[TMP4:%.*]] = load i32, ptr [[Y2]], align 4
+// CHECK:    call void @_Z3usei(i32 noundef [[TMP4]])
+// CHECK:    ret void
+//
+//
+// CHECK-LABEL: define internal void @_Z32test_reduction_binding_operatorsv.omp_outlined(
+// CHECK-SAME: ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP0:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]]) #[[ATTR2]] {
+// CHECK:  [[ENTRY:.*:]]
+// CHECK:    store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR:%.*]], align 8
+// CHECK:    store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR:%.*]], align 8
+// CHECK:    store ptr [[TMP0]], ptr [[DOTADDR:%.*]], align 8
+// CHECK:    store ptr [[A]], ptr [[A_ADDR:%.*]], align 8
+// CHECK:    store ptr [[B]], ptr [[B_ADDR:%.*]], align 8
+// CHECK:    [[TMP1:%.*]] = load ptr, ptr [[DOTADDR]], align 8, !nonnull [[META2]], !align [[META3]]
+// CHECK:    [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 8, !nonnull [[META2]], !align [[META3]]
+// CHECK:    [[TMP3:%.*]] = load ptr, ptr [[B_ADDR]], align 8, !nonnull [[META2]], !align [[META3]]
+// CHECK:    store ptr [[TMP2]], ptr [[TMP:%.*]], align 8
+// CHECK:    store ptr [[TMP3]], ptr [[_TMP1:%.*]], align 8
+// CHECK:    store i32 0, ptr [[DOTOMP_LB:%.*]], align 4
+// CHECK:    store i32 9, ptr [[DOTOMP_UB:%.*]], align 4
+// CHECK:    store i32 1, ptr [[DOTOMP_STRIDE:%.*]], align 4
+// CHECK:    store i32 0, ptr [[DOTOMP_IS_LAST:%.*]], align 4
+// CHECK:    [[TMP4:%.*]] = load ptr, ptr [[TMP]], align 8, !nonnull [[META2]], !align [[META3]]
+// CHECK:    store i32 1, ptr [[A3:%.*]], align 4
+// CHECK:    store ptr [[A3]], ptr [[_TMP4:%.*]], align 8
+// CHECK:    [[TMP5:%.*]] = load ptr, ptr [[_TMP1]], align 8, !nonnull [[META2]], !align [[META3]]
+// CHECK:    store i32 2147483647, ptr [[B5:%.*]], align 4
+// CHECK:    store ptr [[B5]], ptr [[_TMP6:%.*]], align 8
+// CHECK:    [[TMP6:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK:    [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4
+// CHECK:    call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP7]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
+// CHECK:    [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
+// CHECK:    [[CMP:%.*]] = icmp sgt i32 [[TMP8]], 9
+// CHECK:    br i1 [[CMP]], label %[[COND_TRUE:.*]], label %[[COND_FALSE:.*]]
+// CHECK:       [[COND_TRUE]]:
+// CHECK:    br label %[[COND_END:.*]]
+// CHECK:       [[COND_FALSE]]:
+// CHECK:    [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
+// CHECK:    br label %[[COND_END]]
+// CHECK:       [[COND_END]]:
+// CHECK:    [[COND:%.*]] = phi i32 [ 9, %[[COND_TRUE]] ], [ [[TMP9]], %[[COND_FALSE]] ]
+// CHECK:    store i32 [[COND]], ptr [[DOTOMP_UB]], align 4
+// CHECK:    [[TMP10:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
+// CHECK:    store i32 [[TMP10]], ptr [[DOTOMP_IV:%.*]], align 4
+// CHECK:    br label %[[OMP_INNER_FOR_COND:.*]]
+// CHECK:       [[OMP_INNER_FOR_COND]]:
+// CHECK:    [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK:    [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
+// CHECK:    [[CMP7:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]]
+// CHECK:    br i1 [[CMP7]], label %[[OMP_INNER_FOR_BODY:.*]], label %[[OMP_INNER_FOR_END:.*]]
+// CHECK:       [[OMP_INNER_FOR_BODY]]:
+// CHECK:    [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK:    [[MUL:%.*]] = mul nsw i32 [[TMP13]], 1
+// CHECK:    [[ADD:%.*]] = add nsw i32 1, [[MUL]]
+// CHECK:    store i32 [[ADD]], ptr [[I:%.*]], align 4
+// CHECK:    [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_POINT:%.*]], ptr [[TMP1]], i32 0, i32 0
+// CHECK:    [[TMP14:%.*]] = load i32, ptr [[X]], align 4
+// CHECK:    [[MUL8:%.*]] = mul nsw i32 [[TMP14]], 2
+// CHECK:    store i32 [[MUL8]], ptr [[X]], align 4
+// CHECK:    [[TMP15:%.*]] = load i32, ptr [[I]], align 4
+// CHECK:    [[Y:%.*]] = getelementptr inbounds nuw [[STRUCT_POINT]], ptr [[TMP1]], i32 0, i32 1
+// CHECK:    [[TMP16:%.*]] = load i32, ptr [[Y]], align 4
+// CHECK:    [[CMP9:%.*]] = icmp slt i32 [[TMP15]], [[TMP16]]
+// CHECK:    br i1 [[CMP9]], label %[[IF_THEN:.*]], label %[[IF_END:.*]]
+// CHECK:       [[IF_THEN]]:
+// CHECK:    [[TMP17:%.*]] = load i32, ptr [[I]], align 4
+// CHECK:    [[Y10:%.*]] = getelementptr inbounds nuw [[STRUCT_POINT]], ptr [[TMP1]], i32 0, i32 1
+// CHECK:    store i32 [[TMP17]], ptr [[Y10]], align 4
+// CHECK:    br label %[[IF_END]]
+// CHECK:       [[IF_END]]:
+// CHECK:    br label %[[OMP_BODY_CONTINUE:.*]]
+// CHECK:       [[OMP_BODY_CONTINUE]]:
+// CHECK:    br label %[[OMP_INNER_FOR_INC:.*]]
+// CHECK:       [[OMP_INNER_FOR_INC]]:
+// CHECK:    [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK:    [[ADD11:%.*]] = add nsw i32 [[TMP18]], 1
+// CHECK:    store i32 [[ADD11]], ptr [[DOTOMP_IV]], align 4
+// CHECK:    br label %[[OMP_INNER_FOR_COND]]
+// CHECK:       [[OMP_INNER_FOR_END]]:
+// CHECK:    br label %[[OMP_LOOP_EXIT:.*]]
+// CHECK:       [[OMP_LOOP_EXIT]]:
+// CHECK:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP7]])
+// CHECK:    [[TMP19:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST:%.*]], i64 0, i64 0
+// CHECK:    store ptr [[A3]], ptr [[TMP19]], align 8
+// CHECK:    [[TMP20:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 1
+// CHECK:    store ptr [[B5]], ptr [[TMP20]], align 8
+// CHECK:    [[TMP21:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP7]], i32 2, i64 16, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @_Z32test_reduction_binding_operatorsv.omp_outlined.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var)
+// CHECK:    switch i32 [[TMP21]], [[DOTOMP_REDUCTION_DEFAULT:label %.*]] [
+// CHECK:      i32 1, [[DOTOMP_REDUCTION_CASE1:label %.*]]
+// CHECK:      i32 2, label %[[DOTOMP_REDUCTION_CASE2:.*]]
+// CHECK:    ]
+// CHECK:       [[_OMP_REDUCTION_CASE1:.*:]]
+// CHECK:    [[TMP22:%.*]] = load i32, ptr [[TMP4]], align 4
+// CHECK:    [[TMP23:%.*]] = load i32, ptr [[A3]], align 4
+// CHECK:    [[MUL12:%.*]] = mul nsw i32 [[TMP22]], [[TMP23]]
+// CHECK:    store i32 [[MUL12]], ptr [[TMP4]], align 4
+// CHECK:    [[TMP24:%.*]] = load i32, ptr [[TMP5]], align 4
+// CHECK:    [[TMP25:%.*]] = load i32, ptr [[B5]], align 4
+// CHECK:    [[CMP13:%.*]] = icmp slt i32 [[TMP24]], [[TMP25]]
+// CHECK:    br i1 [[CMP13]], label %[[COND_TRUE14:.*]], label %[[COND_FALSE15:.*]]
+// CHECK:       [[COND_TRUE14]]:
+// CHECK:    [[TMP26:%.*]] = load i32, ptr [[TMP5]], align 4
+// CHECK:    br label %[[COND_END16:.*]]
+// CHECK:       [[COND_FALSE15]]:
+// CHECK:    [[TMP27:%.*]] = load i32, ptr [[B5]], align 4
+// CHECK:    br label %[[COND_END16]]
+// CHECK:       [[COND_END16]]:
+// CHECK:    [[COND17:%.*]] = phi i32 [ [[TMP26]], %[[COND_TRUE14]] ], [ [[TMP27]], %[[COND_FALSE15]] ]
+// CHECK:    store i32 [[COND17]], ptr [[TMP5]], align 4
+// CHECK:    call void @__kmpc_end_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP7]], ptr @.gomp_critical_user_.reduction.var)
+// CHECK:    br [[DOTOMP_REDUCTION_DEFAULT]]
+// CHECK:       [[_OMP_REDUCTION_CASE2:.*:]]
+// CHECK:    [[TMP28:%.*]] = load i32, ptr [[A3]], align 4
+// CHECK:    [[ATOMIC_LOAD:%.*]] = load atomic i32, ptr [[TMP4]] monotonic, align 4
+// CHECK:    br label %[[ATOMIC_CONT:.*]]
+// CHECK:       [[ATOMIC_CONT]]:
+// CHECK:    [[TMP29:%.*]] = phi i32 [ [[ATOMIC_LOAD]], %[[DOTOMP_REDUCTION_CASE2]] ], [ [[TMP34:%.*]], %[[ATOMIC_CONT]] ]
+// CHECK:    store i32 [[TMP29]], ptr [[_TMP18:%.*]], align 4
+// CHECK:    [[TMP30:%.*]] = load i32, ptr [[_TMP18]], align 4
+// CHECK:    [[TMP31:%.*]] = load i32, ptr [[A3]], align 4
+// CHECK:    [[MUL19:%.*]] = mul nsw i32 [[TMP30]], [[TMP31]]
+// CHECK:    store i32 [[MUL19]], ptr [[ATOMIC_TEMP:%.*]], align 4
+// CHECK:    [[TMP32:%.*]] = load i32, ptr [[ATOMIC_TEMP]], align 4
+// CHECK:    [[TMP33:%.*]] = cmpxchg ptr [[TMP4]], i32 [[TMP29]], i32 [[TMP32]] monotonic monotonic, align 4
+// CHECK:    [[TMP34]] = extractvalue { i32, i1 } [[TMP33]], 0
+// CHECK:    [[TMP35:%.*]] = extractvalue { i32, i1 } [[TMP33]], 1
+// CHECK:    br i1 [[TMP35]], label %[[ATOMIC_EXIT:.*]], label %[[ATOMIC_CONT]]
+// CHECK:       [[ATOMIC_EXIT]]:
+// CHECK:    [[TMP36:%.*]] = load i32, ptr [[B5]], align 4
+// CHECK:    [[TMP37:%.*]] = atomicrmw min ptr [[TMP5]], i32 [[TMP36]] monotonic, align 4
+// CHECK:    br [[DOTOMP_REDUCTION_DEFAULT]]
+// CHECK:       [[_OMP_REDUCTION_DEFAULT:.*:]]
+// CHECK:    ret void
+//
+//
+// CHECK-LABEL: define internal void @_Z32test_reduction_binding_operatorsv.omp_outlined.omp.reduction.reduction_func(
+// CHECK-SAME: ptr noundef [[TMP0:%.*]], ptr noundef [[TMP1:%.*]]) #[[ATTR4]] {
+// CHECK:  [[ENTRY:.*:]]
+// CHECK:    store ptr [[TMP0]], ptr [[DOTADDR:%.*]], align 8
+// CHECK:    store ptr [[TMP1]], ptr [[DOTADDR1:%.*]], align 8
+// CHECK:    [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8
+// CHECK:    [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8
+// CHECK:    [[TMP4:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP3]], i64 0, i64 0
+// CHECK:    [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8
+// CHECK:    [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP2]], i64 0, i64 0
+// CHECK:    [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8
+// CHECK:    [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP3]], i64 0, i64 1
+// CHECK:    [[TMP9:%.*]] = load ptr, ptr [[TMP8]], align 8
+// CHECK:    [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP2]], i64 0, i64 1
+// CHECK:    [[TMP11:%.*]] = load ptr, ptr [[TMP10]], align 8
+// CHECK:    [[TMP12:%.*]] = load i32, ptr [[TMP7]], align 4
+// CHECK:    [[TMP13:%.*]] = load i32, ptr [[TMP5]], align 4
+// CHECK:    [[MUL:%.*]] = mul nsw i32 [[TMP12]], [[TMP13]]
+// CHECK:    store i32 [[MUL]], ptr [[TMP7]], align 4
+// CHECK:    [[TMP14:%.*]] = load i32, ptr [[TMP11]], align 4
+// CHECK:    [[TMP15:%.*]] = load i32, ptr [[TMP9]], align 4
+// CHECK:    [[CMP:%.*]] = icmp slt i32 [[TMP14]], [[TMP15]]
+// CHECK:    br i1 [[CMP]], label %[[COND_TRUE:.*]], label %[[COND_FALSE:.*]]
+// CHECK:       [[COND_TRUE]]:
+// CHECK:    [[TMP16:%.*]] = load i32, ptr [[TMP11]], align 4
+// CHECK:    br label %[[COND_END:.*]]
+// CHECK:       [[COND_FALSE]]:
+// CHECK:    [[TMP17:%.*]] = load i32, ptr [[TMP9]], align 4
+// CHECK:    br label %[[COND_END]]
+// CHECK:       [[COND_END]]:
+// CHECK:    [[COND:%.*]] = phi i32 [ [[TMP16]], %[[COND_TRUE]] ], [ [[TMP17]], %[[COND_FALSE]] ]
+// CHECK:    store i32 [[COND]], ptr [[TMP11]], align 4
+// CHECK:    ret void
+//
+//
+// CHECK-LABEL: define dso_local void @_Z24test_lastprivate_bindingv(
+// CHECK-SAME: ) #[[ATTR0]] {
+// CHECK:  [[ENTRY:.*:]]
+// CHECK:    call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[P:%.*]], ptr align 4 @__const._Z24test_lastprivate_bindingv.p, i64 8, i1 false)
+// CHECK:    call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP0:%.*]], ptr align 4 [[P]], i64 8, i1 false)
+// CHECK:    [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_POINT:%.*]], ptr [[TMP0]], i32 0, i32 0
+// CHECK:    store ptr [[X]], ptr [[A:%.*]], align 8
+// CHECK:    [[TMP1:%.*]] = load ptr, ptr [[A]], align 8, !nonnull [[META2]], !align [[META3]]
+// CHECK:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 2, ptr @_Z24test_lastprivate_bindingv.omp_outlined, ptr [[TMP0]], ptr [[TMP1]])
+// CHECK:    [[X1:%.*]] = getelementptr inbounds nuw [[STRUCT_POINT]], ptr [[TMP0]], i32 0, i32 0
+// CHECK:    [[TMP2:%.*]] = load i32, ptr [[X1]], align 4
+// CHECK:    call void @_Z3usei(i32 noundef [[TMP2]])
+// CHECK:    ret void
+//
+//
+// CHECK-LABEL: define internal void @_Z24test_lastprivate_bindingv.omp_outlined(
+// CHECK-SAME: ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP0:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] {
+// CHECK:  [[ENTRY:.*:]]
+// CHECK:    store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR:%.*]], align 8
+// CHECK:    store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR:%.*]], align 8
+// CHECK:    store ptr [[TMP0]], ptr [[DOTADDR:%.*]], align 8
+// CHECK:    store ptr [[A]], ptr [[A_ADDR:%.*]], align 8
+// CHECK:    [[TMP1:%.*]] = load ptr, ptr [[DOTADDR]], align 8, !nonnull [[META2]], !align [[META3]]
+// CHECK:    [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 8, !nonnull [[META2]], !align [[META3]]
+// CHECK:    store ptr [[TMP2]], ptr [[TMP:%.*]], align 8
+// CHECK:    store i32 0, ptr [[DOTOMP_LB:%.*]], align 4
+// CHECK:    store i32 9, ptr [[DOTOMP_UB:%.*]], align 4
+// CHECK:    store i32 1, ptr [[DOTOMP_STRIDE:%.*]], align 4
+// CHECK:    store i32 0, ptr [[DOTOMP_IS_LAST:%.*]], align 4
+// CHECK:    [[TMP3:%.*]] = load ptr, ptr [[TMP]], align 8, !nonnull [[META2]], !align [[META3]]
+// CHECK:    store ptr [[A2:%.*]], ptr [[_TMP3:%.*]], align 8
+// CHECK:    [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK:    [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4
+// CHECK:    call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP5]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
+// CHECK:    [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
+// CHECK:    [[CMP:%.*]] = icmp sgt i32 [[TMP6]], 9
+// CHECK:    br i1 [[CMP]], label %[[COND_TRUE:.*]], label %[[COND_FALSE:.*]]
+// CHECK:       [[COND_TRUE]]:
+// CHECK:    br label %[[COND_END:.*]]
+// CHECK:       [[COND_FALSE]]:
+// CHECK:    [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
+// CHECK:    br label %[[COND_END]]
+// CHECK:       [[COND_END]]:
+// CHECK:    [[COND:%.*]] = phi i32 [ 9, %[[COND_TRUE]] ], [ [[TMP7]], %[[COND_FALSE]] ]
+// CHECK:    store i32 [[COND]], ptr [[DOTOMP_UB]], align 4
+// CHECK:    [[TMP8:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
+// CHECK:    store i32 [[TMP8]], ptr [[DOTOMP_IV:%.*]], align 4
+// CHECK:    br label %[[OMP_INNER_FOR_COND:.*]]
+// CHECK:       [[OMP_INNER_FOR_COND]]:
+// CHECK:    [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK:    [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
+// CHECK:    [[CMP4:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]]
+// CHECK:    br i1 [[CMP4]], label %[[OMP_INNER_FOR_BODY:.*]], label %[[OMP_INNER_FOR_END:.*]]
+// CHECK:       [[OMP_INNER_FOR_BODY]]:
+// CHECK:    [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK:    [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1
+// CHECK:    [[ADD:%.*]] = add nsw i32 0, [[MUL]]
+// CHECK:    store i32 [[ADD]], ptr [[I:%.*]], align 4
+// CHECK:    [[TMP12:%.*]] = load i32, ptr [[I]], align 4
+// CHECK:    [[MUL5:%.*]] = mul nsw i32 [[TMP12]], 10
+// CHECK:    [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_POINT:%.*]], ptr [[TMP1]], i32 0, i32 0
+// CHECK:    store i32 [[MUL5]], ptr [[X]], align 4
+// CHECK:    br label %[[OMP_BODY_CONTINUE:.*]]
+// CHECK:       [[OMP_BODY_CONTINUE]]:
+// CHECK:    br label %[[OMP_INNER_FOR_INC:.*]]
+// CHECK:       [[OMP_INNER_FOR_INC]]:
+// CHECK:    [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK:    [[ADD6:%.*]] = add nsw i32 [[TMP13]], 1
+// CHECK:    store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4
+// CHECK:    br label %[[OMP_INNER_FOR_COND]]
+// CHECK:       [[OMP_INNER_FOR_END]]:
+// CHECK:    br label %[[OMP_LOOP_EXIT:.*]]
+// CHECK:       [[OMP_LOOP_EXIT]]:
+// CHECK:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP5]])
+// CHECK:    [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
+// CHECK:    [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0
+// CHECK:    br i1 [[TMP15]], [[DOTOMP_LASTPRIVATE_THEN:label %.*]], [[DOTOMP_LASTPRIVATE_DONE:label %.*]]
+// CHECK:       [[_OMP_LASTPRIVATE_THEN:.*:]]
+// CHECK:    [[TMP16:%.*]] = load ptr, ptr [[_TMP3]], align 8
+// CHECK:    [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4
+// CHECK:    store i32 [[TMP17]], ptr [[TMP3]], align 4
+// CHECK:    br [[DOTOMP_LASTPRIVATE_DONE]]
+// CHECK:       [[_OMP_LASTPRIVATE_DONE:.*:]]
+// CHECK:    ret void
+//
+//
+// CHECK-LABEL: define dso_local void @_Z25test_mixed_linear_privatev(
+// CHECK-SAME: ) #[[ATTR0]] {
+// CHECK:  [[ENTRY:.*:]]
+// CHECK:    call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[P:%.*]], ptr align 4 @__const._Z25test_mixed_linear_privatev.p, i64 8, i1 false)
+// CHECK:    call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP0:%.*]], ptr align 4 [[P]], i64 8, i1 false)
+// CHECK:    store i32 0, ptr [[DOTOMP_IV:%.*]], align 4
+// CHECK:    [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_POINT:%.*]], ptr [[TMP0]], i32 0, i32 0
+// CHECK:    [[TMP1:%.*]] = load i32, ptr [[X]], align 4
+// CHECK:    store i32 [[TMP1]], ptr [[DOTLINEAR_START:%.*]], align 4
+// CHECK:    br label %[[OMP_INNER_FOR_COND:.*]]
+// CHECK:       [[OMP_INNER_FOR_COND]]:
+// CHECK:    [[TMP2:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP82:![0-9]+]]
+// CHECK:    [[CMP:%.*]] = icmp slt i32 [[TMP2]], 10
+// CHECK:    br i1 [[CMP]], label %[[OMP_INNER_FOR_BODY:.*]], label %[[OMP_INNER_FOR_END:.*]]
+// CHECK:       [[OMP_INNER_FOR_BODY]]:
+// CHECK:    [[TMP3:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP82]]
+// CHECK:    [[MUL:%.*]] = mul nsw i32 [[TMP3]], 1
+// CHECK:    [[ADD:%.*]] = add nsw i32 0, [[MUL]]
+// CHECK:    store i32 [[ADD]], ptr [[I:%.*]], align 4, !llvm.access.group [[ACC_GRP82]]
+// CHECK:    [[TMP4:%.*]] = load i32, ptr [[DOTLINEAR_START]], align 4, !llvm.access.group [[ACC_GRP82]]
+// CHECK:    [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP82]]
+// CHECK:    [[MUL1:%.*]] = mul nsw i32 [[TMP5]], 2
+// CHECK:    [[ADD2:%.*]] = add nsw i32 [[TMP4]], [[MUL1]]
+// CHECK:    store i32 [[ADD2]], ptr [[A:%.*]], align 4, !llvm.access.group [[ACC_GRP82]]
+// CHECK:    [[TMP6:%.*]] = load i32, ptr [[A]], align 4, !llvm.access.group [[ACC_GRP82]]
+// CHECK:    [[ADD3:%.*]] = add nsw i32 [[TMP6]], 2
+// CHECK:    store i32 [[ADD3]], ptr [[A]], align 4, !llvm.access.group [[ACC_GRP82]]
+// CHECK:    [[TMP7:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP82]]
+// CHECK:    store i32 [[TMP7]], ptr [[B:%.*]], align 4, !llvm.access.group [[ACC_GRP82]]
+// CHECK:    [[TMP8:%.*]] = load i32, ptr [[A]], align 4, !llvm.access.group [[ACC_GRP82]]
+// CHECK:    [[TMP9:%.*]] = load i32, ptr [[B]], align 4, !llvm.access.group [[ACC_GRP82]]
+// CHECK:    [[ADD4:%.*]] = add nsw i32 [[TMP8]], [[TMP9]]
+// CHECK:    call void @_Z3usei(i32 noundef [[ADD4]]), !llvm.access.group [[ACC_GRP82]]
+// CHECK:    br label %[[OMP_BODY_CONTINUE:.*]]
+// CHECK:       [[OMP_BODY_CONTINUE]]:
+// CHECK:    br label %[[OMP_INNER_FOR_INC:.*]]
+// CHECK:       [[OMP_INNER_FOR_INC]]:
+// CHECK:    [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP82]]
+// CHECK:    [[ADD5:%.*]] = add nsw i32 [[TMP10]], 1
+// CHECK:    store i32 [[ADD5]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP82]]
+// CHECK:    br label %[[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP83:![0-9]+]]
+// CHECK:       [[OMP_INNER_FOR_END]]:
+// CHECK:    store i32 10, ptr [[I]], align 4
+// CHECK:    [[X6:%.*]] = getelementptr inbounds nuw [[STRUCT_POINT]], ptr [[TMP0]], i32 0, i32 0
+// CHECK:    [[TMP11:%.*]] = load i32, ptr [[A]], align 4
+// CHECK:    [[X7:%.*]] = getelementptr inbounds nuw [[STRUCT_POINT]], ptr [[TMP0]], i32 0, i32 0
+// CHECK:    store i32 [[TMP11]], ptr [[X7]], align 4
+// CHECK:    ret void
+//
+//
+// CHECK-LABEL: define dso_local void @_Z28test_lastprivate_conditionalv(
+// CHECK-SAME: ) #[[ATTR0]] {
+// CHECK:  [[ENTRY:.*:]]
+// CHECK:    call void @llvm.memset.p0.i64(ptr align 4 [[P:%.*]], i8 0, i64 8, i1 false)
+// CHECK:    call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP0:%.*]], ptr align 4 [[P]], i64 8, i1 false)
+// CHECK:    [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_POINT:%.*]], ptr [[TMP0]], i32 0, i32 0
+// CHECK:    store ptr [[X]], ptr [[A:%.*]], align 8
+// CHECK:    [[TMP1:%.*]] = load ptr, ptr [[A]], align 8, !nonnull [[META2]], !align [[META3]]
+// CHECK:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 2, ptr @_Z28test_lastprivate_conditionalv.omp_outlined, ptr [[TMP0]], ptr [[TMP1]])
+// CHECK:    [[X1:%.*]] = getelementptr inbounds nuw [[STRUCT_POINT]], ptr [[TMP0]], i32 0, i32 0
+// CHECK:    [[TMP2:%.*]] = load i32, ptr [[X1]], align 4
+// CHECK:    call void @_Z3usei(i32 noundef [[TMP2]])
+// CHECK:    ret void
+//
+//
+// CHECK-LABEL: define internal void @_Z28test_lastprivate_conditionalv.omp_outlined(
+// CHECK-SAME: ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP0:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] {
+// CHECK:  [[ENTRY:.*:]]
+// CHECK:    store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR:%.*]], align 8
+// CHECK:    store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR:%.*]], align 8
+// CHECK:    store ptr [[TMP0]], ptr [[DOTADDR:%.*]], align 8
+// CHECK:    store ptr [[A]], ptr [[A_ADDR:%.*]], align 8
+// CHECK:    [[TMP1:%.*]] = load ptr, ptr [[DOTADDR]], align 8, !nonnull [[META2]], !align [[META3]]
+// CHECK:    [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 8, !nonnull [[META2]], !align [[META3]]
+// CHECK:    store ptr [[TMP2]], ptr [[TMP:%.*]], align 8
+// CHECK:    store i32 0, ptr [[DOTOMP_LB:%.*]], align 4
+// CHECK:    store i32 99, ptr [[DOTOMP_UB:%.*]], align 4
+// CHECK:    store i32 1, ptr [[DOTOMP_STRIDE:%.*]], align 4
+// CHECK:    store i32 0, ptr [[DOTOMP_IS_LAST:%.*]], align 4
+// CHECK:    [[TMP3:%.*]] = load ptr, ptr [[TMP]], align 8, !nonnull [[META2]], !align [[META3]]
+// CHECK:    [[TMP4:%.*]] = getelementptr inbounds nuw [[STRUCT_LASPRIVATE_CONDITIONAL:%.*]], ptr [[A2:%.*]], i32 0, i32 1
+// CHECK:    store i8 0, ptr [[TMP4]], align 4
+// CHECK:    [[TMP5:%.*]] = getelementptr inbounds nuw [[STRUCT_LASPRIVATE_CONDITIONAL]], ptr [[A2]], i32 0, i32 0
+// CHECK:    store ptr [[TMP5]], ptr [[_TMP3:%.*]], align 8
+// CHECK:    [[TMP6:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK:    [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4
+// CHECK:    call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP7]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
+// CHECK:    [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
+// CHECK:    [[CMP:%.*]] = icmp sgt i32 [[TMP8]], 99
+// CHECK:    br i1 [[CMP]], label %[[COND_TRUE:.*]], label %[[COND_FALSE:.*]]
+// CHECK:       [[COND_TRUE]]:
+// CHECK:    br label %[[COND_END:.*]]
+// CHECK:       [[COND_FALSE]]:
+// CHECK:    [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
+// CHECK:    br label %[[COND_END]]
+// CHECK:       [[COND_END]]:
+// CHECK:    [[COND:%.*]] = phi i32 [ 99, %[[COND_TRUE]] ], [ [[TMP9]], %[[COND_FALSE]] ]
+// CHECK:    store i32 [[COND]], ptr [[DOTOMP_UB]], align 4
+// CHECK:    [[TMP10:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
+// CHECK:    store i32 [[TMP10]], ptr [[DOTOMP_IV:%.*]], align 4
+// CHECK:    br label %[[OMP_INNER_FOR_COND:.*]]
+// CHECK:       [[OMP_INNER_FOR_COND]]:
+// CHECK:    [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK:    [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
+// CHECK:    [[CMP4:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]]
+// CHECK:    br i1 [[CMP4]], label %[[OMP_INNER_FOR_BODY:.*]], label %[[OMP_INNER_FOR_END:.*]]
+// CHECK:       [[OMP_INNER_FOR_BODY]]:
+// CHECK:    [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK:    [[MUL:%.*]] = mul nsw i32 [[TMP13]], 1
+// CHECK:    [[ADD:%.*]] = add nsw i32 0, [[MUL]]
+// CHECK:    store i32 [[ADD]], ptr [[I:%.*]], align 4
+// CHECK:    [[TMP14:%.*]] = load i32, ptr [[I]], align 4
+// CHECK:    [[REM:%.*]] = srem i32 [[TMP14]], 7
+// CHECK:    [[CMP5:%.*]] = icmp eq i32 [[REM]], 0
+// CHECK:    br i1 [[CMP5]], label %[[IF_THEN:.*]], label %[[IF_END:.*]]
+// CHECK:       [[IF_THEN]]:
+// CHECK:    [[TMP15:%.*]] = load i32, ptr [[I]], align 4
+// CHECK:    [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_POINT:%.*]], ptr [[TMP1]], i32 0, i32 0
+// CHECK:    store i32 [[TMP15]], ptr [[X]], align 4
+// CHECK:    br label %[[IF_END]]
+// CHECK:       [[IF_END]]:
+// CHECK:    br label %[[OMP_BODY_CONTINUE:.*]]
+// CHECK:       [[OMP_BODY_CONTINUE]]:
+// CHECK:    br label %[[OMP_INNER_FOR_INC:.*]]
+// CHECK:       [[OMP_INNER_FOR_INC]]:
+// CHECK:    [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK:    [[ADD6:%.*]] = add nsw i32 [[TMP16]], 1
+// CHECK:    store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4
+// CHECK:    br label %[[OMP_INNER_FOR_COND]]
+// CHECK:       [[OMP_INNER_FOR_END]]:
+// CHECK:    br label %[[OMP_LOOP_EXIT:.*]]
+// CHECK:       [[OMP_LOOP_EXIT]]:
+// CHECK:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP7]])
+// CHECK:    [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
+// CHECK:    [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0
+// CHECK:    call void @__kmpc_barrier(ptr @[[GLOB7:[0-9]+]], i32 [[TMP7]])
+// CHECK:    br i1 [[TMP18]], [[DOTOMP_LASTPRIVATE_THEN:label %.*]], [[DOTOMP_LASTPRIVATE_DONE:label %.*]]
+// CHECK:       [[_OMP_LASTPRIVATE_THEN:.*:]]
+// CHECK:    [[TMP19:%.*]] = load ptr, ptr [[_TMP3]], align 8
+// CHECK:    [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4
+// CHECK:    store i32 [[TMP20]], ptr [[TMP3]], align 4
+// CHECK:    br [[DOTOMP_LASTPRIVATE_DONE]]
+// CHECK:       [[_OMP_LASTPRIVATE_DONE:.*:]]
+// CHECK:    ret void
+//
+//
+// CHECK-LABEL: define dso_local void @_Z26test_reduction_binding_maxv(
+// CHECK-SAME: ) #[[ATTR0]] {
+// CHECK:  [[ENTRY:.*:]]
+// CHECK:    call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[P:%.*]], ptr align 4 @__const._Z26test_reduction_binding_maxv.p, i64 8, i1 false)
+// CHECK:    call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP0:%.*]], ptr align 4 [[P]], i64 8, i1 false)
+// CHECK:    [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_POINT:%.*]], ptr [[TMP0]], i32 0, i32 0
+// CHECK:    store ptr [[X]], ptr [[A:%.*]], align 8
+// CHECK:    [[Y:%.*]] = getelementptr inbounds nuw [[STRUCT_POINT]], ptr [[TMP0]], i32 0, i32 1
+// CHECK:    store ptr [[Y]], ptr [[B:%.*]], align 8
+// CHECK:    [[TMP1:%.*]] = load ptr, ptr [[A]], align 8, !nonnull [[META2]], !align [[META3]]
+// CHECK:    [[TMP2:%.*]] = load ptr, ptr [[B]], align 8, !nonnull [[META2]], !align [[META3]]
+// CHECK:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 3, ptr @_Z26test_reduction_binding_maxv.omp_outlined, ptr [[TMP0]], ptr [[TMP1]], ptr [[TMP2]])
+// CHECK:    [[X1:%.*]] = getelementptr inbounds nuw [[STRUCT_POINT]], ptr [[TMP0]], i32 0, i32 0
+// CHECK:    [[TMP3:%.*]] = load i32, ptr [[X1]], align 4
+// CHECK:    call void @_Z3usei(i32 noundef [[TMP3]])
+// CHECK:    [[Y2:%.*]] = getelementptr inbounds nuw [[STRUCT_POINT]], ptr [[TMP0]], i32 0, i32 1
+// CHECK:    [[TMP4:%.*]] = load i32, ptr [[Y2]], align 4
+// CHECK:    call void @_Z3usei(i32 noundef [[TMP4]])
+// CHECK:    ret void
+//
+//
+// CHECK-LABEL: define internal void @_Z26test_reduction_binding_maxv.omp_outlined(
+// CHECK-SAME: ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP0:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]]) #[[ATTR2]] {
+// CHECK:  [[ENTRY:.*:]]
+// CHECK:    store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR:%.*]], align 8
+// CHECK:    store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR:%.*]], align 8
+// CHECK:    store ptr [[TMP0]], ptr [[DOTADDR:%.*]], align 8
+// CHECK:    store ptr [[A]], ptr [[A_ADDR:%.*]], align 8
+// CHECK:    store ptr [[B]], ptr [[B_ADDR:%.*]], align 8
+// CHECK:    [[TMP1:%.*]] = load ptr, ptr [[DOTADDR]], align 8, !nonnull [[META2]], !align [[META3]]
+// CHECK:    [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 8, !nonnull [[META2]], !align [[META3]]
+// CHECK:    [[TMP3:%.*]] = load ptr, ptr [[B_ADDR]], align 8, !nonnull [[META2]], !align [[META3]]
+// CHECK:    store ptr [[TMP2]], ptr [[TMP:%.*]], align 8
+// CHECK:    store ptr [[TMP3]], ptr [[_TMP1:%.*]], align 8
+// CHECK:    store i32 0, ptr [[DOTOMP_LB:%.*]], align 4
+// CHECK:    store i32 99, ptr [[DOTOMP_UB:%.*]], align 4
+// CHECK:    store i32 1, ptr [[DOTOMP_STRIDE:%.*]], align 4
+// CHECK:    store i32 0, ptr [[DOTOMP_IS_LAST:%.*]], align 4
+// CHECK:    [[TMP4:%.*]] = load ptr, ptr [[TMP]], align 8, !nonnull [[META2]], !align [[META3]]
+// CHECK:    store i32 -2147483648, ptr [[A3:%.*]], align 4
+// CHECK:    store ptr [[A3]], ptr [[_TMP4:%.*]], align 8
+// CHECK:    [[TMP5:%.*]] = load ptr, ptr [[_TMP1]], align 8, !nonnull [[META2]], !align [[META3]]
+// CHECK:    store i32 -2147483648, ptr [[B5:%.*]], align 4
+// CHECK:    store ptr [[B5]], ptr [[_TMP6:%.*]], align 8
+// CHECK:    [[TMP6:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK:    [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4
+// CHECK:    call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP7]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
+// CHECK:    [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
+// CHECK:    [[CMP:%.*]] = icmp sgt i32 [[TMP8]], 99
+// CHECK:    br i1 [[CMP]], label %[[COND_TRUE:.*]], label %[[COND_FALSE:.*]]
+// CHECK:       [[COND_TRUE]]:
+// CHECK:    br label %[[COND_END:.*]]
+// CHECK:       [[COND_FALSE]]:
+// CHECK:    [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
+// CHECK:    br label %[[COND_END]]
+// CHECK:       [[COND_END]]:
+// CHECK:    [[COND:%.*]] = phi i32 [ 99, %[[COND_TRUE]] ], [ [[TMP9]], %[[COND_FALSE]] ]
+// CHECK:    store i32 [[COND]], ptr [[DOTOMP_UB]], align 4
+// CHECK:    [[TMP10:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
+// CHECK:    store i32 [[TMP10]], ptr [[DOTOMP_IV:%.*]], align 4
+// CHECK:    br label %[[OMP_INNER_FOR_COND:.*]]
+// CHECK:       [[OMP_INNER_FOR_COND]]:
+// CHECK:    [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK:    [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
+// CHECK:    [[CMP7:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]]
+// CHECK:    br i1 [[CMP7]], label %[[OMP_INNER_FOR_BODY:.*]], label %[[OMP_INNER_FOR_END:.*]]
+// CHECK:       [[OMP_INNER_FOR_BODY]]:
+// CHECK:    [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK:    [[MUL:%.*]] = mul nsw i32 [[TMP13]], 1
+// CHECK:    [[ADD:%.*]] = add nsw i32 0, [[MUL]]
+// CHECK:    store i32 [[ADD]], ptr [[I:%.*]], align 4
+// CHECK:    [[TMP14:%.*]] = load i32, ptr [[I]], align 4
+// CHECK:    [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_POINT:%.*]], ptr [[TMP1]], i32 0, i32 0
+// CHECK:    [[TMP15:%.*]] = load i32, ptr [[X]], align 4
+// CHECK:    [[CMP8:%.*]] = icmp sgt i32 [[TMP14]], [[TMP15]]
+// CHECK:    br i1 [[CMP8]], label %[[IF_THEN:.*]], label %[[IF_END:.*]]
+// CHECK:       [[IF_THEN]]:
+// CHECK:    [[TMP16:%.*]] = load i32, ptr [[I]], align 4
+// CHECK:    [[X9:%.*]] = getelementptr inbounds nuw [[STRUCT_POINT]], ptr [[TMP1]], i32 0, i32 0
+// CHECK:    store i32 [[TMP16]], ptr [[X9]], align 4
+// CHECK:    br label %[[IF_END]]
+// CHECK:       [[IF_END]]:
+// CHECK:    [[TMP17:%.*]] = load i32, ptr [[I]], align 4
+// CHECK:    [[Y:%.*]] = getelementptr inbounds nuw [[STRUCT_POINT]], ptr [[TMP1]], i32 0, i32 1
+// CHECK:    [[TMP18:%.*]] = load i32, ptr [[Y]], align 4
+// CHECK:    [[CMP10:%.*]] = icmp sgt i32 [[TMP17]], [[TMP18]]
+// CHECK:    br i1 [[CMP10]], label %[[IF_THEN11:.*]], label %[[IF_END13:.*]]
+// CHECK:       [[IF_THEN11]]:
+// CHECK:    [[TMP19:%.*]] = load i32, ptr [[I]], align 4
+// CHECK:    [[Y12:%.*]] = getelementptr inbounds nuw [[STRUCT_POINT]], ptr [[TMP1]], i32 0, i32 1
+// CHECK:    store i32 [[TMP19]], ptr [[Y12]], align 4
+// CHECK:    br label %[[IF_END13]]
+// CHECK:       [[IF_END13]]:
+// CHECK:    br label %[[OMP_BODY_CONTINUE:.*]]
+// CHECK:       [[OMP_BODY_CONTINUE]]:
+// CHECK:    br label %[[OMP_INNER_FOR_INC:.*]]
+// CHECK:       [[OMP_INNER_FOR_INC]]:
+// CHECK:    [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK:    [[ADD14:%.*]] = add nsw i32 [[TMP20]], 1
+// CHECK:    store i32 [[ADD14]], ptr [[DOTOMP_IV]], align 4
+// CHECK:    br label %[[OMP_INNER_FOR_COND]]
+// CHECK:       [[OMP_INNER_FOR_END]]:
+// CHECK:    br label %[[OMP_LOOP_EXIT:.*]]
+// CHECK:       [[OMP_LOOP_EXIT]]:
+// CHECK:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP7]])
+// CHECK:    [[TMP21:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST:%.*]], i64 0, i64 0
+// CHECK:    store ptr [[A3]], ptr [[TMP21]], align 8
+// CHECK:    [[TMP22:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 1
+// CHECK:    store ptr [[B5]], ptr [[TMP22]], align 8
+// CHECK:    [[TMP23:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP7]], i32 2, i64 16, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @_Z26test_reduction_binding_maxv.omp_outlined.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var)
+// CHECK:    switch i32 [[TMP23]], [[DOTOMP_REDUCTION_DEFAULT:label %.*]] [
+// CHECK:      i32 1, [[DOTOMP_REDUCTION_CASE1:label %.*]]
+// CHECK:      i32 2, [[DOTOMP_REDUCTION_CASE2:label %.*]]
+// CHECK:    ]
+// CHECK:       [[_OMP_REDUCTION_CASE1:.*:]]
+// CHECK:    [[TMP24:%.*]] = load i32, ptr [[TMP4]], align 4
+// CHECK:    [[TMP25:%.*]] = load i32, ptr [[A3]], align 4
+// CHECK:    [[CMP15:%.*]] = icmp sgt i32 [[TMP24]], [[TMP25]]
+// CHECK:    br i1 [[CMP15]], label %[[COND_TRUE16:.*]], label %[[COND_FALSE17:.*]]
+// CHECK:       [[COND_TRUE16]]:
+// CHECK:    [[TMP26:%.*]] = load i32, ptr [[TMP4]], align 4
+// CHECK:    br label %[[COND_END18:.*]]
+// CHECK:       [[COND_FALSE17]]:
+// CHECK:    [[TMP27:%.*]] = load i32, ptr [[A3]], align 4
+// CHECK:    br label %[[COND_END18]]
+// CHECK:       [[COND_END18]]:
+// CHECK:    [[COND19:%.*]] = phi i32 [ [[TMP26]], %[[COND_TRUE16]] ], [ [[TMP27]], %[[COND_FALSE17]] ]
+// CHECK:    store i32 [[COND19]], ptr [[TMP4]], align 4
+// CHECK:    [[TMP28:%.*]] = load i32, ptr [[TMP5]], align 4
+// CHECK:    [[TMP29:%.*]] = load i32, ptr [[B5]], align 4
+// CHECK:    [[CMP20:%.*]] = icmp sgt i32 [[TMP28]], [[TMP29]]
+// CHECK:    br i1 [[CMP20]], label %[[COND_TRUE21:.*]], label %[[COND_FALSE22:.*]]
+// CHECK:       [[COND_TRUE21]]:
+// CHECK:    [[TMP30:%.*]] = load i32, ptr [[TMP5]], align 4
+// CHECK:    br label %[[COND_END23:.*]]
+// CHECK:       [[COND_FALSE22]]:
+// CHECK:    [[TMP31:%.*]] = load i32, ptr [[B5]], align 4
+// CHECK:    br label %[[COND_END23]]
+// CHECK:       [[COND_END23]]:
+// CHECK:    [[COND24:%.*]] = phi i32 [ [[TMP30]], %[[COND_TRUE21]] ], [ [[TMP31]], %[[COND_FALSE22]] ]
+// CHECK:    store i32 [[COND24]], ptr [[TMP5]], align 4
+// CHECK:    call void @__kmpc_end_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP7]], ptr @.gomp_critical_user_.reduction.var)
+// CHECK:    br [[DOTOMP_REDUCTION_DEFAULT]]
+// CHECK:       [[_OMP_REDUCTION_CASE2:.*:]]
+// CHECK:    [[TMP32:%.*]] = load i32, ptr [[A3]], align 4
+// CHECK:    [[TMP33:%.*]] = atomicrmw max ptr [[TMP4]], i32 [[TMP32]] monotonic, align 4
+// CHECK:    [[TMP34:%.*]] = load i32, ptr [[B5]], align 4
+// CHECK:    [[TMP35:%.*]] = atomicrmw max ptr [[TMP5]], i32 [[TMP34]] monotonic, align 4
+// CHECK:    br [[DOTOMP_REDUCTION_DEFAULT]]
+// CHECK:       [[_OMP_REDUCTION_DEFAULT:.*:]]
+// CHECK:    ret void
+//
+//
+// CHECK-LABEL: define internal void @_Z26test_reduction_binding_maxv.omp_outlined.omp.reduction.reduction_func(
+// CHECK-SAME: ptr noundef [[TMP0:%.*]], ptr noundef [[TMP1:%.*]]) #[[ATTR4]] {
+// CHECK:  [[ENTRY:.*:]]
+// CHECK:    store ptr [[TMP0]], ptr [[DOTADDR:%.*]], align 8
+// CHECK:    store ptr [[TMP1]], ptr [[DOTADDR1:%.*]], align 8
+// CHECK:    [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8
+// CHECK:    [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8
+// CHECK:    [[TMP4:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP3]], i64 0, i64 0
+// CHECK:    [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8
+// CHECK:    [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP2]], i64 0, i64 0
+// CHECK:    [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8
+// CHECK:    [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP3]], i64 0, i64 1
+// CHECK:    [[TMP9:%.*]] = load ptr, ptr [[TMP8]], align 8
+// CHECK:    [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP2]], i64 0, i64 1
+// CHECK:    [[TMP11:%.*]] = load ptr, ptr [[TMP10]], align 8
+// CHECK:    [[TMP12:%.*]] = load i32, ptr [[TMP7]], align 4
+// CHECK:    [[TMP13:%.*]] = load i32, ptr [[TMP5]], align 4
+// CHECK:    [[CMP:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]]
+// CHECK:    br i1 [[CMP]], label %[[COND_TRUE:.*]], label %[[COND_FALSE:.*]]
+// CHECK:       [[COND_TRUE]]:
+// CHECK:    [[TMP14:%.*]] = load i32, ptr [[TMP7]], align 4
+// CHECK:    br label %[[COND_END:.*]]
+// CHECK:       [[COND_FALSE]]:
+// CHECK:    [[TMP15:%.*]] = load i32, ptr [[TMP5]], align 4
+// CHECK:    br label %[[COND_END]]
+// CHECK:       [[COND_END]]:
+// CHECK:    [[COND:%.*]] = phi i32 [ [[TMP14]], %[[COND_TRUE]] ], [ [[TMP15]], %[[COND_FALSE]] ]
+// CHECK:    store i32 [[COND]], ptr [[TMP7]], align 4
+// CHECK:    [[TMP16:%.*]] = load i32, ptr [[TMP11]], align 4
+// CHECK:    [[TMP17:%.*]] = load i32, ptr [[TMP9]], align 4
+// CHECK:    [[CMP2:%.*]] = icmp sgt i32 [[TMP16]], [[TMP17]]
+// CHECK:    br i1 [[CMP2]], label %[[COND_TRUE3:.*]], label %[[COND_FALSE4:.*]]
+// CHECK:       [[COND_TRUE3]]:
+// CHECK:    [[TMP18:%.*]] = load i32, ptr [[TMP11]], align 4
+// CHECK:    br label %[[COND_END5:.*]]
+// CHECK:       [[COND_FALSE4]]:
+// CHECK:    [[TMP19:%.*]] = load i32, ptr [[TMP9]], align 4
+// CHECK:    br label %[[COND_END5]]
+// CHECK:       [[COND_END5]]:
+// CHECK:    [[COND6:%.*]] = phi i32 [ [[TMP18]], %[[COND_TRUE3]] ], [ [[TMP19]], %[[COND_FALSE4]] ]
+// CHECK:    store i32 [[COND6]], ptr [[TMP11]], align 4
+// CHECK:    ret void
+//
+//
 // CHECK-LABEL: define dso_local noundef i32 @main(
 // CHECK-SAME: ) #[[ATTR8:[0-9]+]] {
 // CHECK:  [[ENTRY:.*:]]
@@ -2261,5 +3128,12 @@ int main() {
 // CHECK:    call void @_Z20test_static_bindingsv()
 // CHECK:    call void @_Z14test_shadowingv()
 // CHECK:    call void @_Z31test_simd_private_then_parallelv()
+// CHECK:    call void @_Z19test_linear_bindingv()
+// CHECK:    call void @_Z26test_reduction_binding_sumv()
+// CHECK:    call void @_Z32test_reduction_binding_operatorsv()
+// CHECK:    call void @_Z24test_lastprivate_bindingv()
+// CHECK:    call void @_Z25test_mixed_linear_privatev()
+// CHECK:    call void @_Z28test_lastprivate_conditionalv()
+// CHECK:    call void @_Z26test_reduction_binding_maxv()
 // CHECK:    ret i32 0
 //

>From 39d5aff000af3082f6cb2a1e752f5c8981345a7c Mon Sep 17 00:00:00 2001
From: Ammarguellat <zahira.ammarguellat at intel.com>
Date: Thu, 21 May 2026 12:11:50 -0700
Subject: [PATCH 28/45] Fixed format and build error

---
 clang/lib/CodeGen/CGStmtOpenMP.cpp | 2 +-
 clang/lib/Sema/SemaOpenMP.cpp      | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/clang/lib/CodeGen/CGStmtOpenMP.cpp b/clang/lib/CodeGen/CGStmtOpenMP.cpp
index 2e22905cdc84c..43232194cb759 100644
--- a/clang/lib/CodeGen/CGStmtOpenMP.cpp
+++ b/clang/lib/CodeGen/CGStmtOpenMP.cpp
@@ -2620,7 +2620,7 @@ void CodeGenFunction::EmitOMPLinearClauseFinal(
           return EmitLValue(&DRE).getAddress();
         }
       }();
-     CodeGenFunction::OMPPrivateScope VarScope(*this);
+      CodeGenFunction::OMPPrivateScope VarScope(*this);
       VarScope.addPrivate(OrigDecl, OrigAddr);
       (void)VarScope.Privatize();
       EmitIgnoredExpr(F);
diff --git a/clang/lib/Sema/SemaOpenMP.cpp b/clang/lib/Sema/SemaOpenMP.cpp
index 989630057a517..87dcfbd8941f7 100644
--- a/clang/lib/Sema/SemaOpenMP.cpp
+++ b/clang/lib/Sema/SemaOpenMP.cpp
@@ -21590,7 +21590,7 @@ static bool FinishOpenMPLinearClause(OMPLinearClause &Clause, DeclRefExpr *IV,
     // Build privatized reference to the current linear var.
     auto *DE = cast<DeclRefExpr>(SimpleRefExpr);
     Expr *CapturedRef;
-    if (auto *BD = dyn_cast<BindingDecl>(DE->getDecl())) {
+    if (dyn_cast<BindingDecl>(DE->getDecl())) {
       CapturedRef = SimpleRefExpr;
     } else if (LinKind == OMPC_LINEAR_uval) {
       CapturedRef = cast<VarDecl>(DE->getDecl())->getInit();

>From ebd89c3252d00b25ded6ed8e55d4057ec545d08e Mon Sep 17 00:00:00 2001
From: Ammarguellat <zahira.ammarguellat at intel.com>
Date: Thu, 21 May 2026 12:28:50 -0700
Subject: [PATCH 29/45] Fixed another build error

---
 clang/lib/CodeGen/CGStmtOpenMP.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/clang/lib/CodeGen/CGStmtOpenMP.cpp b/clang/lib/CodeGen/CGStmtOpenMP.cpp
index 43232194cb759..055497bb485e5 100644
--- a/clang/lib/CodeGen/CGStmtOpenMP.cpp
+++ b/clang/lib/CodeGen/CGStmtOpenMP.cpp
@@ -2609,7 +2609,7 @@ void CodeGenFunction::EmitOMPLinearClauseFinal(
       }
       const auto *OrigDecl = cast<DeclRefExpr>(*IC)->getDecl();
       Address OrigAddr = [&]() -> Address {
-        if (const auto *BD = dyn_cast<BindingDecl>(OrigDecl)) {
+        if (dyn_cast<BindingDecl>(OrigDecl)) {
           // BindingDecl: use the original expression directly.
           return EmitLValue(*IC).getAddress();
         } else {

>From 6d5ae5d85d9983012d9d76b02fbd0806046c6967 Mon Sep 17 00:00:00 2001
From: Ammarguellat <zahira.ammarguellat at intel.com>
Date: Fri, 5 Jun 2026 07:05:33 -0700
Subject: [PATCH 30/45] Addressed review comments

---
 clang/lib/CodeGen/CGExpr.cpp                  |    8 +-
 clang/lib/CodeGen/CGStmtOpenMP.cpp            |   17 +-
 clang/lib/CodeGen/CodeGenFunction.h           |   17 +-
 clang/lib/Sema/SemaExpr.cpp                   |    4 +
 clang/lib/Sema/SemaLambda.cpp                 |    7 +
 clang/lib/Sema/SemaOpenMP.cpp                 |    3 +-
 clang/lib/Sema/SemaStmt.cpp                   |    9 +
 .../OpenMP/structured-bindings-codegen.cpp    | 1032 +++++++++++++++--
 8 files changed, 1005 insertions(+), 92 deletions(-)

diff --git a/clang/lib/CodeGen/CGExpr.cpp b/clang/lib/CodeGen/CGExpr.cpp
index 67caecfd1e4e5..c5cd0f9e46810 100644
--- a/clang/lib/CodeGen/CGExpr.cpp
+++ b/clang/lib/CodeGen/CGExpr.cpp
@@ -3618,7 +3618,9 @@ LValue CodeGenFunction::EmitOMPCapturedBindingLValue(const BindingDecl *BD) {
       Addr = Addr.withElementType(ExpectedTy);
 
     Expr::EvalResult Result;
-    ASE->getIdx()->EvaluateAsInt(Result, getContext());
+    [[maybe_unused]] bool Success =
+        ASE->getIdx()->EvaluateAsInt(Result, getContext());
+    assert(Success && "Expected constant integer index for array subscript");
     uint64_t Idx = Result.Val.getInt().getZExtValue();
     Address EltAddr = Builder.CreateConstArrayGEP(Addr, Idx);
     return MakeAddrLValue(EltAddr, BD->getType(), BaseLVal.getBaseInfo(),
@@ -3832,7 +3834,9 @@ LValue CodeGenFunction::EmitDeclRefLValue(const DeclRefExpr *E) {
           CGM.getLangOpts().OpenMP) {
         auto NameIt = OMPPrivatizedBindingsByName.find(BD->getName());
         if (NameIt != OMPPrivatizedBindingsByName.end()) {
-          return MakeAddrLValue(NameIt->second, E->getType(),
+          assert(NameIt->second.has_value() &&
+                 "Expected valid binding address");
+          return MakeAddrLValue(*NameIt->second, E->getType(),
                                 AlignmentSource::Decl);
         }
         return EmitOMPCapturedBindingLValue(BD);
diff --git a/clang/lib/CodeGen/CGStmtOpenMP.cpp b/clang/lib/CodeGen/CGStmtOpenMP.cpp
index 055497bb485e5..764b8df1b59bb 100644
--- a/clang/lib/CodeGen/CGStmtOpenMP.cpp
+++ b/clang/lib/CodeGen/CGStmtOpenMP.cpp
@@ -1159,17 +1159,20 @@ bool CodeGenFunction::EmitOMPFirstprivateClause(const OMPExecutableDirective &D,
       const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl());
 
       if (const auto *BD = dyn_cast<BindingDecl>(OrigDecl)) {
-        Address PrivateAddr = CreateMemTemp(VD->getType(), VD->getName());
+        const auto *VDInit =
+            cast<VarDecl>(cast<DeclRefExpr>(*InitsRef)->getDecl());
         DeclRefExpr DRE(getContext(), const_cast<BindingDecl *>(BD),
                         /*RefersToEnclosingVariableOrCapture=*/true,
                         BD->getType(), VK_LValue, (*IRef)->getExprLoc());
         LValue OriginalLVal = EmitLValue(&DRE);
-        RValue OrigValue =
-            EmitLoadOfLValue(OriginalLVal, (*IRef)->getExprLoc());
-        EmitStoreThroughLValue(OrigValue,
-                               MakeAddrLValue(PrivateAddr, VD->getType()));
-        LocalDeclMap.try_emplace(VD, PrivateAddr);
-        bool IsRegistered = PrivateScope.addPrivate(BD, PrivateAddr);
+        Address OriginalAddr = OriginalLVal.getAddress();
+        // Emit private VarDecl with copy init. Remap VDInit to point to the
+        // original binding so EmitDecl properly initializes VD.
+        setAddrOfLocalVar(VDInit, OriginalAddr);
+        EmitDecl(*VD);
+        LocalDeclMap.erase(VDInit);
+        Address VDAddr = GetAddrOfLocalVar(VD);
+        bool IsRegistered = PrivateScope.addPrivate(BD, VDAddr);
         assert(IsRegistered &&
                "firstprivate var already registered as firstprivate");
         (void)IsRegistered;
diff --git a/clang/lib/CodeGen/CodeGenFunction.h b/clang/lib/CodeGen/CodeGenFunction.h
index 97d194a84d799..2958a4c552cbc 100644
--- a/clang/lib/CodeGen/CodeGenFunction.h
+++ b/clang/lib/CodeGen/CodeGenFunction.h
@@ -1155,10 +1155,11 @@ class CodeGenFunction : public CodeGenTypeCache {
     bool setVarAddr(CodeGenFunction &CGF, const ValueDecl *LocalVD,
                     Address TempAddr) {
       LocalVD = cast<ValueDecl>(LocalVD->getCanonicalDecl());
-      // For BindingDecls, also store by name for remapped lookup
-      if (const auto *BD = dyn_cast<BindingDecl>(LocalVD)) {
-        CGF.OMPPrivatizedBindingsByName.insert({BD->getName(), TempAddr});
-      }
+
+      // For BindingDecls, also store by name for remapped lookup.
+      if (const auto *BD = dyn_cast<BindingDecl>(LocalVD))
+        CGF.OMPPrivatizedBindingsByName.insert_or_assign(BD->getName(),
+                                                         TempAddr);
 
       // Only save it once.
       if (SavedLocals.count(LocalVD))
@@ -1220,10 +1221,13 @@ class CodeGenFunction : public CodeGenTypeCache {
     OMPMapVars MappedVars;
     OMPPrivateScope(const OMPPrivateScope &) = delete;
     void operator=(const OMPPrivateScope &) = delete;
+    llvm::StringMap<std::optional<Address>> SavedBindingsByName;
 
   public:
     /// Enter a new OpenMP private scope.
-    explicit OMPPrivateScope(CodeGenFunction &CGF) : RunCleanupsScope(CGF) {}
+    explicit OMPPrivateScope(CodeGenFunction &CGF) : RunCleanupsScope(CGF) {
+      SavedBindingsByName = CGF.OMPPrivatizedBindingsByName;
+    }
 
     /// Registers \p LocalVD variable as a private with \p Addr as the address
     /// of the corresponding private variable. \p
@@ -1254,6 +1258,7 @@ class CodeGenFunction : public CodeGenTypeCache {
     ~OMPPrivateScope() {
       if (PerformCleanup)
         ForceCleanup();
+      CGF.OMPPrivatizedBindingsByName = std::move(SavedBindingsByName);
     }
 
     /// Checks if the global variable is captured in current function.
@@ -1556,7 +1561,7 @@ class CodeGenFunction : public CodeGenTypeCache {
   /// Name-based lookup map for privatized BindingDecls.
   /// Used when BindingDecls are remapped during OpenMP outlining, since the
   /// remapped BindingDecl has a different pointer than the original.
-  llvm::StringMap<Address> OMPPrivatizedBindingsByName;
+  llvm::StringMap<std::optional<Address>> OMPPrivatizedBindingsByName;
 
   // Keep track of the cleanups for callee-destructed parameters pushed to the
   // cleanup stack so that they can be deactivated later.
diff --git a/clang/lib/Sema/SemaExpr.cpp b/clang/lib/Sema/SemaExpr.cpp
index 2d1f4b6b463c5..812d164e97be3 100644
--- a/clang/lib/Sema/SemaExpr.cpp
+++ b/clang/lib/Sema/SemaExpr.cpp
@@ -19510,6 +19510,10 @@ static bool captureInCapturedRegion(
     if (VarDecl *VD = S.OpenMP().isOpenMPCapturedDecl(Var)) {
       Var = VD; // Capture the DecompositionDecl.
       bool HasConst = DeclRefType.isConstQualified();
+      // Note: DeclRefType should remain the BindingDecl's type (e.g., int),
+      // not the DecompositionDecl's type (e.g., Point). The variable being
+      // captured is the DecompositionDecl, but expressions still reference
+      // the individual binding's type.
       DeclRefType = DeclRefType.getUnqualifiedType();
       // Don't lose diagnostics about assignments to const.
       if (HasConst)
diff --git a/clang/lib/Sema/SemaLambda.cpp b/clang/lib/Sema/SemaLambda.cpp
index 59e5a7ae9d5c3..5a746dd31de85 100644
--- a/clang/lib/Sema/SemaLambda.cpp
+++ b/clang/lib/Sema/SemaLambda.cpp
@@ -1974,6 +1974,13 @@ ExprResult Sema::BuildCaptureInit(const Capture &Cap,
     // binding.
     auto *BD = dyn_cast<BindingDecl>(Var);
     if (IsOpenMPMapping && BD)
+      // When capturing a BindingDecl in an OpenMP mapping context, we need to
+      // capture the DecompositionDecl instead. BindingDecls are references to
+      // storage owned by the DecompositionDecl.
+      // Example:
+      //   auto [a, b] = p;
+      //   auto lambda = [a]() { return a; };  // In OpenMP context.
+      // This is reached during lambda capture for OpenMP mappings.
       Var = cast<BindingDecl>(Var)->getDecomposedDecl();
     Name = Var->getIdentifier();
     Init = BuildDeclarationNameExpr(
diff --git a/clang/lib/Sema/SemaOpenMP.cpp b/clang/lib/Sema/SemaOpenMP.cpp
index 87dcfbd8941f7..322c136f9226e 100644
--- a/clang/lib/Sema/SemaOpenMP.cpp
+++ b/clang/lib/Sema/SemaOpenMP.cpp
@@ -5444,8 +5444,7 @@ getPrivateItem(Sema &S, Expr *&RefExpr, SourceLocation &ELoc,
   RefExpr = RefExpr->IgnoreParenImpCasts();
   auto *DE = dyn_cast_or_null<DeclRefExpr>(RefExpr);
   auto *ME = dyn_cast_or_null<MemberExpr>(RefExpr);
-  if ((!DE ||
-       (!isa<VarDecl>(DE->getDecl()) && !isa<BindingDecl>(DE->getDecl()))) &&
+  if ((!DE || (!isa<VarDecl, BindingDecl>(DE->getDecl()))) &&
       (S.getCurrentThisType().isNull() || !ME ||
        !isa<CXXThisExpr>(ME->getBase()->IgnoreParenImpCasts()) ||
        !isa<FieldDecl>(ME->getMemberDecl()))) {
diff --git a/clang/lib/Sema/SemaStmt.cpp b/clang/lib/Sema/SemaStmt.cpp
index 34bfc4ad7c505..a52b040640633 100644
--- a/clang/lib/Sema/SemaStmt.cpp
+++ b/clang/lib/Sema/SemaStmt.cpp
@@ -4682,6 +4682,15 @@ buildCapturedStmtCaptureList(Sema &S, CapturedRegionScopeInfo *RSI,
     if (Cap.isVariableCapture()) {
       CapVar = Cap.getVariable();
       if (auto *BD = dyn_cast<BindingDecl>(CapVar)) {
+        // Detect structured bindings in OpenMP captured regions.
+        // When a BindingDecl (e.g., 'a' from 'auto [a, b] = p')
+        // is referenced inside an OpenMP region, we currently don't support
+        // capturing them.
+        // This is reached during capture list construction when processing the
+        // OpenMP region, before expression evaluation in SemaExpr.cpp.
+        // Note: The reset to DecompositionDecl in SemaExpr.cpp happens during
+        // expression evaluation (a later phase). This code runs during capture
+        // list construction (earlier phase).
         if (RSI->CapRegionKind == CR_OpenMP && BD->getHoldingVar()) {
           S.Diag(Cap.getLocation(), diag::err_capture_tuple_binding_openmp)
               << CapVar;
diff --git a/clang/test/OpenMP/structured-bindings-codegen.cpp b/clang/test/OpenMP/structured-bindings-codegen.cpp
index c2a1366e01a9e..c35b4d2be9cae 100644
--- a/clang/test/OpenMP/structured-bindings-codegen.cpp
+++ b/clang/test/OpenMP/structured-bindings-codegen.cpp
@@ -509,44 +509,188 @@ void test_reduction_binding_max() {
   use(b);
 }
 
-int main() {
-  test_target_explicit_map();
-  test_target_implicit_map();
-  test_target_parallel();
-  test_target_parallel_for();
-  test_firstprivate_dsa();
-  test_shared_dsa();
-  test_reduction_dsa();
-  test_parallel_for_();
-  test_parallel_for_simd_();
-  test_target_teams_distribute();
-  test_task();
-  test_task_depend();
-  test_taskloop_();
-  instantiate_templates();
-  test_static_binding();
-  test_static_binding_shared();
-  test_array_target();
-  test_array_task();
-  test_nested();
-  test_multiple_bindings();
-  test_multiple_bindings_mixed_dsa();
-  test_private_individual_bindings();
-  test_firstprivate_individual_bindings();
-  test_mixed_dsa();
-  test_static_bindings();
-  test_shadowing();
-  test_simd_private_then_parallel();
-  test_linear_binding();
-  test_reduction_binding_sum();
-  test_reduction_binding_operators();
-  test_lastprivate_binding();
-  test_mixed_linear_private();
-  test_lastprivate_conditional();
-  test_reduction_binding_max();
-  return 0;
+struct NonTrivialCopy {
+  int value;
+  int copy_count;
+
+  NonTrivialCopy(int v) : value(v), copy_count(0) {}
+  NonTrivialCopy() : value(0), copy_count(0) {}
+  NonTrivialCopy(const NonTrivialCopy &other)
+    : value(other.value), copy_count(other.copy_count + 1) {}
+
+  ~NonTrivialCopy() {}
+};
+
+struct Pair {
+  NonTrivialCopy x;
+  NonTrivialCopy y;
+};
+
+void test_firstprivate_nontrivial_sb() {
+  Pair p{NonTrivialCopy(10), NonTrivialCopy(20)};
+  auto [a, b] = p;
+
+#pragma omp parallel firstprivate(a)
+  {
+    (void)a.value;
+  }
+}
+
+void test_firstprivate_ref_binding_sb() {
+  Pair p{NonTrivialCopy(10), NonTrivialCopy(20)};
+  auto& [a, b] = p;
+
+#pragma omp parallel firstprivate(a)
+  {
+    (void)a.value;
+  }
+}
+
+void test_firstprivate_const_ref_binding_sb() {
+  Pair p{NonTrivialCopy(10), NonTrivialCopy(20)};
+  const auto& [a, b] = p;
+
+#pragma omp parallel firstprivate(a)
+  {
+    (void)a.value;
+  }
+}
+
+void test_firstprivate_multiple_bindings_sb() {
+  Pair p{NonTrivialCopy(10), NonTrivialCopy(20)};
+  auto [a, b] = p;
+
+#pragma omp parallel firstprivate(a, b)
+  {
+    (void)(a.value + b.value);
+  }
+}
+
+struct WithDtor {
+  int *ptr;
+  WithDtor(int v) : ptr(new int(v)) {}
+  WithDtor(const WithDtor &other) : ptr(new int(*other.ptr)) {}
+  ~WithDtor() { delete ptr; }
+};
+
+struct PairWithDtor {
+  WithDtor x;
+  WithDtor y;
+};
+
+void test_firstprivate_with_destructor_sb() {
+  PairWithDtor p{WithDtor(100), WithDtor(200)};
+  auto [a, b] = p;
+
+#pragma omp parallel firstprivate(a)
+  {
+    (void)(*a.ptr);
+  }
+}
+
+void test_firstprivate_array_bindings_sb() {
+  NonTrivialCopy arr[3] = {NonTrivialCopy(1), NonTrivialCopy(2), NonTrivialCopy(3)};
+  auto [a, b, c] = arr;
+
+
+#pragma omp parallel firstprivate(a, b, c)
+  {
+    (void)(a.value + b.value + c.value);
+  }
+}
+
+void test_firstprivate_mixed_with_shared_sb() {
+  Pair p{NonTrivialCopy(10), NonTrivialCopy(20)};
+  auto [a, b] = p;
+
+#pragma omp parallel firstprivate(a) shared(b)
+  {
+    (void)a.value;
+    (void)b.value;
+  }
+}
+
+struct Inner {
+  int val;
+  Inner(int v) : val(v) {}
+  Inner(const Inner &o) : val(o.val * 2) {}
+  ~Inner() {}
+};
+
+struct Outer {
+  Inner i1;
+  Inner i2;
+};
+
+void test_firstprivate_nested_struct_sb() {
+  Outer o{Inner(5), Inner(10)};
+  auto [x, y] = o;
+
+#pragma omp parallel firstprivate(x)
+  {
+    (void)x.val;
+  }
+}
+
+void test_firstprivate_ref_binding_both_bindings_sb() {
+  Pair p{NonTrivialCopy(10), NonTrivialCopy(20)};
+  auto& [a, b] = p;
+
+#pragma omp parallel firstprivate(a, b)
+  {
+    (void)(a.value + b.value);
+  }
+}
+
+
+void test_firstprivate_const_ref_binding_both_bindings_sb() {
+  Pair p{NonTrivialCopy(10), NonTrivialCopy(20)};
+  const auto& [a, b] = p;
+
+#pragma omp parallel firstprivate(a, b)
+  {
+    (void)(a.value + b.value);
+  }
+}
+
+void test_lambda_capture_binding_by_value() {
+  Point p{10, 20};
+  auto [a, b] = p;
+  auto lambda = [a]() {
+    return a + a;
+  };
+  int result = lambda();
+}
+
+
+void test_lambda_capture_binding_by_ref() {
+  Point p{10, 20};
+  auto [a, b] = p;
+  auto lambda = [&a]() {
+    a = 100;
+  };
+  lambda();
+}
+
+
+void test_lambda_capture_multiple_bindings() {
+  Point p{10, 20};
+  auto [a, b] = p;
+  auto lambda = [a, &b]() {
+    b = a + 50;
+  };
+  lambda();
 }
 
+
+void test_lambda_implicit_capture() {
+  Point p{10, 20};
+  auto [a, b] = p;
+  auto lambda = [=]() {
+    return a + b;
+  };
+  lambda();
+}
 #endif
 // CHECK-LABEL: define dso_local void @_Z24test_target_explicit_mapv(
 // CHECK-SAME: ) #[[ATTR0:[0-9]+]] {
@@ -3097,43 +3241,781 @@ int main() {
 // CHECK:    ret void
 //
 //
-// CHECK-LABEL: define dso_local noundef i32 @main(
-// CHECK-SAME: ) #[[ATTR8:[0-9]+]] {
-// CHECK:  [[ENTRY:.*:]]
-// CHECK:    store i32 0, ptr [[RETVAL:%.*]], align 4
-// CHECK:    call void @_Z24test_target_explicit_mapv()
-// CHECK:    call void @_Z24test_target_implicit_mapv()
-// CHECK:    call void @_Z20test_target_parallelv()
-// CHECK:    call void @_Z24test_target_parallel_forv()
-// CHECK:    call void @_Z21test_firstprivate_dsav()
-// CHECK:    call void @_Z15test_shared_dsav()
-// CHECK:    call void @_Z18test_reduction_dsav()
-// CHECK:    call void @_Z18test_parallel_for_v()
-// CHECK:    call void @_Z23test_parallel_for_simd_v()
-// CHECK:    call void @_Z28test_target_teams_distributev()
-// CHECK:    call void @_Z9test_taskv()
-// CHECK:    call void @_Z16test_task_dependv()
-// CHECK:    call void @_Z14test_taskloop_v()
-// CHECK:    call void @_Z21instantiate_templatesv()
-// CHECK:    call void @_Z19test_static_bindingv()
-// CHECK:    call void @_Z26test_static_binding_sharedv()
-// CHECK:    call void @_Z17test_array_targetv()
-// CHECK:    call void @_Z15test_array_taskv()
-// CHECK:    call void @_Z11test_nestedv()
-// CHECK:    call void @_Z22test_multiple_bindingsv()
-// CHECK:    call void @_Z32test_multiple_bindings_mixed_dsav()
-// CHECK:    call void @_Z32test_private_individual_bindingsv()
-// CHECK:    call void @_Z37test_firstprivate_individual_bindingsv()
-// CHECK:    call void @_Z14test_mixed_dsav()
-// CHECK:    call void @_Z20test_static_bindingsv()
-// CHECK:    call void @_Z14test_shadowingv()
-// CHECK:    call void @_Z31test_simd_private_then_parallelv()
-// CHECK:    call void @_Z19test_linear_bindingv()
-// CHECK:    call void @_Z26test_reduction_binding_sumv()
-// CHECK:    call void @_Z32test_reduction_binding_operatorsv()
-// CHECK:    call void @_Z24test_lastprivate_bindingv()
-// CHECK:    call void @_Z25test_mixed_linear_privatev()
-// CHECK:    call void @_Z28test_lastprivate_conditionalv()
-// CHECK:    call void @_Z26test_reduction_binding_maxv()
-// CHECK:    ret i32 0
+// CHECK-LABEL: define dso_local void @_Z31test_firstprivate_nontrivial_sbv(
+// CHECK-SAME: ) #[[ATTR0]] {
+// CHECK:  [[ENTRY:.*:]]
+// CHECK:    [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_PAIR:%.*]], ptr [[P:%.*]], i32 0, i32 0
+// CHECK:    call void @_ZN14NonTrivialCopyC1Ei(ptr noundef nonnull align 4 dereferenceable(8) [[X]], i32 noundef 10)
+// CHECK:    [[Y:%.*]] = getelementptr inbounds nuw [[STRUCT_PAIR]], ptr [[P]], i32 0, i32 1
+// CHECK:    call void @_ZN14NonTrivialCopyC1Ei(ptr noundef nonnull align 4 dereferenceable(8) [[Y]], i32 noundef 20)
+// CHECK:    call void @_ZN4PairC1ERKS_(ptr noundef nonnull align 4 dereferenceable(16) [[TMP0:%.*]], ptr noundef nonnull align 4 dereferenceable(16) [[P]])
+// CHECK:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @_Z31test_firstprivate_nontrivial_sbv.omp_outlined, ptr [[TMP0]])
+// CHECK:    call void @_ZN4PairD1Ev(ptr noundef nonnull align 4 dead_on_return(16) dereferenceable(16) [[TMP0]]) #[[ATTR3]]
+// CHECK:    call void @_ZN4PairD1Ev(ptr noundef nonnull align 4 dead_on_return(16) dereferenceable(16) [[P]]) #[[ATTR3]]
+// CHECK:    ret void
+//
+//
+// CHECK-LABEL: define linkonce_odr void @_ZN14NonTrivialCopyC1Ei(
+// CHECK-SAME: ptr noundef nonnull align 4 dereferenceable(8) [[THIS:%.*]], i32 noundef [[V:%.*]]) unnamed_addr #[[ATTR0]] comdat align 2 {
+// CHECK:  [[ENTRY:.*:]]
+// CHECK:    store ptr [[THIS]], ptr [[THIS_ADDR:%.*]], align 8
+// CHECK:    store i32 [[V]], ptr [[V_ADDR:%.*]], align 4
+// CHECK:    [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8
+// CHECK:    [[TMP0:%.*]] = load i32, ptr [[V_ADDR]], align 4
+// CHECK:    call void @_ZN14NonTrivialCopyC2Ei(ptr noundef nonnull align 4 dereferenceable(8) [[THIS1]], i32 noundef [[TMP0]])
+// CHECK:    ret void
+//
+//
+// CHECK-LABEL: define linkonce_odr void @_ZN4PairC1ERKS_(
+// CHECK-SAME: ptr noundef nonnull align 4 dereferenceable(16) [[THIS:%.*]], ptr noundef nonnull align 4 dereferenceable(16) [[TMP0:%.*]]) unnamed_addr #[[ATTR0]] comdat align 2 {
+// CHECK:  [[ENTRY:.*:]]
+// CHECK:    store ptr [[THIS]], ptr [[THIS_ADDR:%.*]], align 8
+// CHECK:    store ptr [[TMP0]], ptr [[DOTADDR:%.*]], align 8
+// CHECK:    [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8
+// CHECK:    [[TMP1:%.*]] = load ptr, ptr [[DOTADDR]], align 8
+// CHECK:    call void @_ZN4PairC2ERKS_(ptr noundef nonnull align 4 dereferenceable(16) [[THIS1]], ptr noundef nonnull align 4 dereferenceable(16) [[TMP1]])
+// CHECK:    ret void
+//
+//
+// CHECK-LABEL: define internal void @_Z31test_firstprivate_nontrivial_sbv.omp_outlined(
+// CHECK-SAME: ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[TMP0:%.*]]) #[[ATTR2]] {
+// CHECK:  [[ENTRY:.*:]]
+// CHECK:    store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR:%.*]], align 8
+// CHECK:    store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR:%.*]], align 8
+// CHECK:    store ptr [[TMP0]], ptr [[DOTADDR:%.*]], align 8
+// CHECK:    [[TMP1:%.*]] = load ptr, ptr [[DOTADDR]], align 8, !nonnull [[META2]], !align [[META3]]
+// CHECK:    [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_PAIR:%.*]], ptr [[TMP1]], i32 0, i32 0
+// CHECK:    call void @_ZN14NonTrivialCopyC1ERKS_(ptr noundef nonnull align 4 dereferenceable(8) [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[X]])
+// CHECK:    [[VALUE:%.*]] = getelementptr inbounds nuw [[STRUCT_NONTRIVIALCOPY:%.*]], ptr [[A]], i32 0, i32 0
+// CHECK:    call void @_ZN14NonTrivialCopyD1Ev(ptr noundef nonnull align 4 dead_on_return(8) dereferenceable(8) [[A]]) #[[ATTR3]]
+// CHECK:    ret void
+//
+//
+// CHECK-LABEL: define linkonce_odr void @_ZN14NonTrivialCopyC1ERKS_(
+// CHECK-SAME: ptr noundef nonnull align 4 dereferenceable(8) [[THIS:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[OTHER:%.*]]) unnamed_addr #[[ATTR0]] comdat align 2 {
+// CHECK:  [[ENTRY:.*:]]
+// CHECK:    store ptr [[THIS]], ptr [[THIS_ADDR:%.*]], align 8
+// CHECK:    store ptr [[OTHER]], ptr [[OTHER_ADDR:%.*]], align 8
+// CHECK:    [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8
+// CHECK:    [[TMP0:%.*]] = load ptr, ptr [[OTHER_ADDR]], align 8
+// CHECK:    call void @_ZN14NonTrivialCopyC2ERKS_(ptr noundef nonnull align 4 dereferenceable(8) [[THIS1]], ptr noundef nonnull align 4 dereferenceable(8) [[TMP0]])
+// CHECK:    ret void
+//
+//
+// CHECK-LABEL: define linkonce_odr void @_ZN14NonTrivialCopyD1Ev(
+// CHECK-SAME: ptr noundef nonnull align 4 dead_on_return(8) dereferenceable(8) [[THIS:%.*]]) unnamed_addr #[[ATTR0]] comdat align 2 {
+// CHECK:  [[ENTRY:.*:]]
+// CHECK:    store ptr [[THIS]], ptr [[THIS_ADDR:%.*]], align 8
+// CHECK:    [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8
+// CHECK:    call void @_ZN14NonTrivialCopyD2Ev(ptr noundef nonnull align 4 dead_on_return(8) dereferenceable(8) [[THIS1]]) #[[ATTR3]]
+// CHECK:    ret void
+//
+//
+// CHECK-LABEL: define linkonce_odr void @_ZN4PairD1Ev(
+// CHECK-SAME: ptr noundef nonnull align 4 dead_on_return(16) dereferenceable(16) [[THIS:%.*]]) unnamed_addr #[[ATTR0]] comdat align 2 {
+// CHECK:  [[ENTRY:.*:]]
+// CHECK:    store ptr [[THIS]], ptr [[THIS_ADDR:%.*]], align 8
+// CHECK:    [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8
+// CHECK:    call void @_ZN4PairD2Ev(ptr noundef nonnull align 4 dead_on_return(16) dereferenceable(16) [[THIS1]]) #[[ATTR3]]
+// CHECK:    ret void
+//
+//
+// CHECK-LABEL: define linkonce_odr void @_ZN14NonTrivialCopyC2Ei(
+// CHECK-SAME: ptr noundef nonnull align 4 dereferenceable(8) [[THIS:%.*]], i32 noundef [[V:%.*]]) unnamed_addr #[[ATTR0]] comdat align 2 {
+// CHECK:  [[ENTRY:.*:]]
+// CHECK:    store ptr [[THIS]], ptr [[THIS_ADDR:%.*]], align 8
+// CHECK:    store i32 [[V]], ptr [[V_ADDR:%.*]], align 4
+// CHECK:    [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8
+// CHECK:    [[VALUE:%.*]] = getelementptr inbounds nuw [[STRUCT_NONTRIVIALCOPY:%.*]], ptr [[THIS1]], i32 0, i32 0
+// CHECK:    [[TMP0:%.*]] = load i32, ptr [[V_ADDR]], align 4
+// CHECK:    store i32 [[TMP0]], ptr [[VALUE]], align 4
+// CHECK:    [[COPY_COUNT:%.*]] = getelementptr inbounds nuw [[STRUCT_NONTRIVIALCOPY]], ptr [[THIS1]], i32 0, i32 1
+// CHECK:    store i32 0, ptr [[COPY_COUNT]], align 4
+// CHECK:    ret void
+//
+//
+// CHECK-LABEL: define linkonce_odr void @_ZN4PairC2ERKS_(
+// CHECK-SAME: ptr noundef nonnull align 4 dereferenceable(16) [[THIS:%.*]], ptr noundef nonnull align 4 dereferenceable(16) [[TMP0:%.*]]) unnamed_addr #[[ATTR0]] comdat align 2 {
+// CHECK:  [[ENTRY:.*:]]
+// CHECK:    store ptr [[THIS]], ptr [[THIS_ADDR:%.*]], align 8
+// CHECK:    store ptr [[TMP0]], ptr [[DOTADDR:%.*]], align 8
+// CHECK:    [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8
+// CHECK:    [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_PAIR:%.*]], ptr [[THIS1]], i32 0, i32 0
+// CHECK:    [[TMP1:%.*]] = load ptr, ptr [[DOTADDR]], align 8, !nonnull [[META2]], !align [[META3]]
+// CHECK:    [[X2:%.*]] = getelementptr inbounds nuw [[STRUCT_PAIR]], ptr [[TMP1]], i32 0, i32 0
+// CHECK:    call void @_ZN14NonTrivialCopyC1ERKS_(ptr noundef nonnull align 4 dereferenceable(8) [[X]], ptr noundef nonnull align 4 dereferenceable(8) [[X2]])
+// CHECK:    [[Y:%.*]] = getelementptr inbounds nuw [[STRUCT_PAIR]], ptr [[THIS1]], i32 0, i32 1
+// CHECK:    [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8, !nonnull [[META2]], !align [[META3]]
+// CHECK:    [[Y3:%.*]] = getelementptr inbounds nuw [[STRUCT_PAIR]], ptr [[TMP2]], i32 0, i32 1
+// CHECK:    call void @_ZN14NonTrivialCopyC1ERKS_(ptr noundef nonnull align 4 dereferenceable(8) [[Y]], ptr noundef nonnull align 4 dereferenceable(8) [[Y3]])
+// CHECK:    ret void
+//
+//
+// CHECK-LABEL: define linkonce_odr void @_ZN14NonTrivialCopyC2ERKS_(
+// CHECK-SAME: ptr noundef nonnull align 4 dereferenceable(8) [[THIS:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[OTHER:%.*]]) unnamed_addr #[[ATTR0]] comdat align 2 {
+// CHECK:  [[ENTRY:.*:]]
+// CHECK:    store ptr [[THIS]], ptr [[THIS_ADDR:%.*]], align 8
+// CHECK:    store ptr [[OTHER]], ptr [[OTHER_ADDR:%.*]], align 8
+// CHECK:    [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8
+// CHECK:    [[VALUE:%.*]] = getelementptr inbounds nuw [[STRUCT_NONTRIVIALCOPY:%.*]], ptr [[THIS1]], i32 0, i32 0
+// CHECK:    [[TMP0:%.*]] = load ptr, ptr [[OTHER_ADDR]], align 8, !nonnull [[META2]], !align [[META3]]
+// CHECK:    [[VALUE2:%.*]] = getelementptr inbounds nuw [[STRUCT_NONTRIVIALCOPY]], ptr [[TMP0]], i32 0, i32 0
+// CHECK:    [[TMP1:%.*]] = load i32, ptr [[VALUE2]], align 4
+// CHECK:    store i32 [[TMP1]], ptr [[VALUE]], align 4
+// CHECK:    [[COPY_COUNT:%.*]] = getelementptr inbounds nuw [[STRUCT_NONTRIVIALCOPY]], ptr [[THIS1]], i32 0, i32 1
+// CHECK:    [[TMP2:%.*]] = load ptr, ptr [[OTHER_ADDR]], align 8, !nonnull [[META2]], !align [[META3]]
+// CHECK:    [[COPY_COUNT3:%.*]] = getelementptr inbounds nuw [[STRUCT_NONTRIVIALCOPY]], ptr [[TMP2]], i32 0, i32 1
+// CHECK:    [[TMP3:%.*]] = load i32, ptr [[COPY_COUNT3]], align 4
+// CHECK:    [[ADD:%.*]] = add nsw i32 [[TMP3]], 1
+// CHECK:    store i32 [[ADD]], ptr [[COPY_COUNT]], align 4
+// CHECK:    ret void
+//
+//
+// CHECK-LABEL: define linkonce_odr void @_ZN14NonTrivialCopyD2Ev(
+// CHECK-SAME: ptr noundef nonnull align 4 dead_on_return(8) dereferenceable(8) [[THIS:%.*]]) unnamed_addr #[[ATTR0]] comdat align 2 {
+// CHECK:  [[ENTRY:.*:]]
+// CHECK:    store ptr [[THIS]], ptr [[THIS_ADDR:%.*]], align 8
+// CHECK:    [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8
+// CHECK:    ret void
+//
+//
+// CHECK-LABEL: define linkonce_odr void @_ZN4PairD2Ev(
+// CHECK-SAME: ptr noundef nonnull align 4 dead_on_return(16) dereferenceable(16) [[THIS:%.*]]) unnamed_addr #[[ATTR0]] comdat align 2 {
+// CHECK:  [[ENTRY:.*:]]
+// CHECK:    store ptr [[THIS]], ptr [[THIS_ADDR:%.*]], align 8
+// CHECK:    [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8
+// CHECK:    [[Y:%.*]] = getelementptr inbounds nuw [[STRUCT_PAIR:%.*]], ptr [[THIS1]], i32 0, i32 1
+// CHECK:    call void @_ZN14NonTrivialCopyD1Ev(ptr noundef nonnull align 4 dead_on_return(8) dereferenceable(8) [[Y]]) #[[ATTR3]]
+// CHECK:    [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_PAIR]], ptr [[THIS1]], i32 0, i32 0
+// CHECK:    call void @_ZN14NonTrivialCopyD1Ev(ptr noundef nonnull align 4 dead_on_return(8) dereferenceable(8) [[X]]) #[[ATTR3]]
+// CHECK:    ret void
+//
+//
+// CHECK-LABEL: define dso_local void @_Z32test_firstprivate_ref_binding_sbv(
+// CHECK-SAME: ) #[[ATTR0]] {
+// CHECK:  [[ENTRY:.*:]]
+// CHECK:    [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_PAIR:%.*]], ptr [[P:%.*]], i32 0, i32 0
+// CHECK:    call void @_ZN14NonTrivialCopyC1Ei(ptr noundef nonnull align 4 dereferenceable(8) [[X]], i32 noundef 10)
+// CHECK:    [[Y:%.*]] = getelementptr inbounds nuw [[STRUCT_PAIR]], ptr [[P]], i32 0, i32 1
+// CHECK:    call void @_ZN14NonTrivialCopyC1Ei(ptr noundef nonnull align 4 dereferenceable(8) [[Y]], i32 noundef 20)
+// CHECK:    store ptr [[P]], ptr [[TMP0:%.*]], align 8
+// CHECK:    [[TMP1:%.*]] = load ptr, ptr [[TMP0]], align 8, !nonnull [[META2]], !align [[META3]]
+// CHECK:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @_Z32test_firstprivate_ref_binding_sbv.omp_outlined, ptr [[TMP1]])
+// CHECK:    call void @_ZN4PairD1Ev(ptr noundef nonnull align 4 dead_on_return(16) dereferenceable(16) [[P]]) #[[ATTR3]]
+// CHECK:    ret void
+//
+//
+// CHECK-LABEL: define internal void @_Z32test_firstprivate_ref_binding_sbv.omp_outlined(
+// CHECK-SAME: ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[TMP0:%.*]]) #[[ATTR2]] {
+// CHECK:  [[ENTRY:.*:]]
+// CHECK:    store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR:%.*]], align 8
+// CHECK:    store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR:%.*]], align 8
+// CHECK:    store ptr [[TMP0]], ptr [[DOTADDR:%.*]], align 8
+// CHECK:    [[TMP1:%.*]] = load ptr, ptr [[DOTADDR]], align 8, !nonnull [[META2]], !align [[META3]]
+// CHECK:    store ptr [[TMP1]], ptr [[TMP:%.*]], align 8
+// CHECK:    [[TMP2:%.*]] = load ptr, ptr [[TMP]], align 8, !nonnull [[META2]], !align [[META3]]
+// CHECK:    [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_PAIR:%.*]], ptr [[TMP2]], i32 0, i32 0
+// CHECK:    call void @_ZN14NonTrivialCopyC1ERKS_(ptr noundef nonnull align 4 dereferenceable(8) [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[X]])
+// CHECK:    [[VALUE:%.*]] = getelementptr inbounds nuw [[STRUCT_NONTRIVIALCOPY:%.*]], ptr [[A]], i32 0, i32 0
+// CHECK:    call void @_ZN14NonTrivialCopyD1Ev(ptr noundef nonnull align 4 dead_on_return(8) dereferenceable(8) [[A]]) #[[ATTR3]]
+// CHECK:    ret void
+//
+//
+// CHECK-LABEL: define dso_local void @_Z38test_firstprivate_const_ref_binding_sbv(
+// CHECK-SAME: ) #[[ATTR0]] {
+// CHECK:  [[ENTRY:.*:]]
+// CHECK:    [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_PAIR:%.*]], ptr [[P:%.*]], i32 0, i32 0
+// CHECK:    call void @_ZN14NonTrivialCopyC1Ei(ptr noundef nonnull align 4 dereferenceable(8) [[X]], i32 noundef 10)
+// CHECK:    [[Y:%.*]] = getelementptr inbounds nuw [[STRUCT_PAIR]], ptr [[P]], i32 0, i32 1
+// CHECK:    call void @_ZN14NonTrivialCopyC1Ei(ptr noundef nonnull align 4 dereferenceable(8) [[Y]], i32 noundef 20)
+// CHECK:    store ptr [[P]], ptr [[TMP0:%.*]], align 8
+// CHECK:    [[TMP1:%.*]] = load ptr, ptr [[TMP0]], align 8, !nonnull [[META2]], !align [[META3]]
+// CHECK:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @_Z38test_firstprivate_const_ref_binding_sbv.omp_outlined, ptr [[TMP1]])
+// CHECK:    call void @_ZN4PairD1Ev(ptr noundef nonnull align 4 dead_on_return(16) dereferenceable(16) [[P]]) #[[ATTR3]]
+// CHECK:    ret void
+//
+//
+// CHECK-LABEL: define internal void @_Z38test_firstprivate_const_ref_binding_sbv.omp_outlined(
+// CHECK-SAME: ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[TMP0:%.*]]) #[[ATTR2]] {
+// CHECK:  [[ENTRY:.*:]]
+// CHECK:    store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR:%.*]], align 8
+// CHECK:    store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR:%.*]], align 8
+// CHECK:    store ptr [[TMP0]], ptr [[DOTADDR:%.*]], align 8
+// CHECK:    [[TMP1:%.*]] = load ptr, ptr [[DOTADDR]], align 8, !nonnull [[META2]], !align [[META3]]
+// CHECK:    store ptr [[TMP1]], ptr [[TMP:%.*]], align 8
+// CHECK:    [[TMP2:%.*]] = load ptr, ptr [[TMP]], align 8, !nonnull [[META2]], !align [[META3]]
+// CHECK:    [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_PAIR:%.*]], ptr [[TMP2]], i32 0, i32 0
+// CHECK:    call void @_ZN14NonTrivialCopyC1ERKS_(ptr noundef nonnull align 4 dereferenceable(8) [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[X]])
+// CHECK:    [[VALUE:%.*]] = getelementptr inbounds nuw [[STRUCT_NONTRIVIALCOPY:%.*]], ptr [[A]], i32 0, i32 0
+// CHECK:    call void @_ZN14NonTrivialCopyD1Ev(ptr noundef nonnull align 4 dead_on_return(8) dereferenceable(8) [[A]]) #[[ATTR3]]
+// CHECK:    ret void
+//
+//
+// CHECK-LABEL: define dso_local void @_Z38test_firstprivate_multiple_bindings_sbv(
+// CHECK-SAME: ) #[[ATTR0]] {
+// CHECK:  [[ENTRY:.*:]]
+// CHECK:    [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_PAIR:%.*]], ptr [[P:%.*]], i32 0, i32 0
+// CHECK:    call void @_ZN14NonTrivialCopyC1Ei(ptr noundef nonnull align 4 dereferenceable(8) [[X]], i32 noundef 10)
+// CHECK:    [[Y:%.*]] = getelementptr inbounds nuw [[STRUCT_PAIR]], ptr [[P]], i32 0, i32 1
+// CHECK:    call void @_ZN14NonTrivialCopyC1Ei(ptr noundef nonnull align 4 dereferenceable(8) [[Y]], i32 noundef 20)
+// CHECK:    call void @_ZN4PairC1ERKS_(ptr noundef nonnull align 4 dereferenceable(16) [[TMP0:%.*]], ptr noundef nonnull align 4 dereferenceable(16) [[P]])
+// CHECK:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @_Z38test_firstprivate_multiple_bindings_sbv.omp_outlined, ptr [[TMP0]])
+// CHECK:    call void @_ZN4PairD1Ev(ptr noundef nonnull align 4 dead_on_return(16) dereferenceable(16) [[TMP0]]) #[[ATTR3]]
+// CHECK:    call void @_ZN4PairD1Ev(ptr noundef nonnull align 4 dead_on_return(16) dereferenceable(16) [[P]]) #[[ATTR3]]
+// CHECK:    ret void
+//
+//
+// CHECK-LABEL: define internal void @_Z38test_firstprivate_multiple_bindings_sbv.omp_outlined(
+// CHECK-SAME: ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[TMP0:%.*]]) #[[ATTR2]] {
+// CHECK:  [[ENTRY:.*:]]
+// CHECK:    store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR:%.*]], align 8
+// CHECK:    store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR:%.*]], align 8
+// CHECK:    store ptr [[TMP0]], ptr [[DOTADDR:%.*]], align 8
+// CHECK:    [[TMP1:%.*]] = load ptr, ptr [[DOTADDR]], align 8, !nonnull [[META2]], !align [[META3]]
+// CHECK:    [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_PAIR:%.*]], ptr [[TMP1]], i32 0, i32 0
+// CHECK:    call void @_ZN14NonTrivialCopyC1ERKS_(ptr noundef nonnull align 4 dereferenceable(8) [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[X]])
+// CHECK:    [[Y:%.*]] = getelementptr inbounds nuw [[STRUCT_PAIR]], ptr [[TMP1]], i32 0, i32 1
+// CHECK:    call void @_ZN14NonTrivialCopyC1ERKS_(ptr noundef nonnull align 4 dereferenceable(8) [[B:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[Y]])
+// CHECK:    [[VALUE:%.*]] = getelementptr inbounds nuw [[STRUCT_NONTRIVIALCOPY:%.*]], ptr [[A]], i32 0, i32 0
+// CHECK:    [[TMP2:%.*]] = load i32, ptr [[VALUE]], align 4
+// CHECK:    [[VALUE1:%.*]] = getelementptr inbounds nuw [[STRUCT_NONTRIVIALCOPY]], ptr [[B]], i32 0, i32 0
+// CHECK:    [[TMP3:%.*]] = load i32, ptr [[VALUE1]], align 4
+// CHECK:    [[ADD:%.*]] = add nsw i32 [[TMP2]], [[TMP3]]
+// CHECK:    call void @_ZN14NonTrivialCopyD1Ev(ptr noundef nonnull align 4 dead_on_return(8) dereferenceable(8) [[B]]) #[[ATTR3]]
+// CHECK:    call void @_ZN14NonTrivialCopyD1Ev(ptr noundef nonnull align 4 dead_on_return(8) dereferenceable(8) [[A]]) #[[ATTR3]]
+// CHECK:    ret void
+//
+//
+// CHECK-LABEL: define dso_local void @_Z36test_firstprivate_with_destructor_sbv(
+// CHECK-SAME: ) #[[ATTR0]] {
+// CHECK:  [[ENTRY:.*:]]
+// CHECK:    [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_PAIRWITHDTOR:%.*]], ptr [[P:%.*]], i32 0, i32 0
+// CHECK:    call void @_ZN8WithDtorC1Ei(ptr noundef nonnull align 8 dereferenceable(8) [[X]], i32 noundef 100)
+// CHECK:    [[Y:%.*]] = getelementptr inbounds nuw [[STRUCT_PAIRWITHDTOR]], ptr [[P]], i32 0, i32 1
+// CHECK:    call void @_ZN8WithDtorC1Ei(ptr noundef nonnull align 8 dereferenceable(8) [[Y]], i32 noundef 200)
+// CHECK:    call void @_ZN12PairWithDtorC1ERKS_(ptr noundef nonnull align 8 dereferenceable(16) [[TMP0:%.*]], ptr noundef nonnull align 8 dereferenceable(16) [[P]])
+// CHECK:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @_Z36test_firstprivate_with_destructor_sbv.omp_outlined, ptr [[TMP0]])
+// CHECK:    call void @_ZN12PairWithDtorD1Ev(ptr noundef nonnull align 8 dead_on_return(16) dereferenceable(16) [[TMP0]]) #[[ATTR3]]
+// CHECK:    call void @_ZN12PairWithDtorD1Ev(ptr noundef nonnull align 8 dead_on_return(16) dereferenceable(16) [[P]]) #[[ATTR3]]
+// CHECK:    ret void
+//
+//
+// CHECK-LABEL: define linkonce_odr void @_ZN8WithDtorC1Ei(
+// CHECK-SAME: ptr noundef nonnull align 8 dereferenceable(8) [[THIS:%.*]], i32 noundef [[V:%.*]]) unnamed_addr #[[ATTR0]] comdat align 2 {
+// CHECK:  [[ENTRY:.*:]]
+// CHECK:    store ptr [[THIS]], ptr [[THIS_ADDR:%.*]], align 8
+// CHECK:    store i32 [[V]], ptr [[V_ADDR:%.*]], align 4
+// CHECK:    [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8
+// CHECK:    [[TMP0:%.*]] = load i32, ptr [[V_ADDR]], align 4
+// CHECK:    call void @_ZN8WithDtorC2Ei(ptr noundef nonnull align 8 dereferenceable(8) [[THIS1]], i32 noundef [[TMP0]])
+// CHECK:    ret void
+//
+//
+// CHECK-LABEL: define linkonce_odr void @_ZN12PairWithDtorC1ERKS_(
+// CHECK-SAME: ptr noundef nonnull align 8 dereferenceable(16) [[THIS:%.*]], ptr noundef nonnull align 8 dereferenceable(16) [[TMP0:%.*]]) unnamed_addr #[[ATTR0]] comdat align 2 {
+// CHECK:  [[ENTRY:.*:]]
+// CHECK:    store ptr [[THIS]], ptr [[THIS_ADDR:%.*]], align 8
+// CHECK:    store ptr [[TMP0]], ptr [[DOTADDR:%.*]], align 8
+// CHECK:    [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8
+// CHECK:    [[TMP1:%.*]] = load ptr, ptr [[DOTADDR]], align 8
+// CHECK:    call void @_ZN12PairWithDtorC2ERKS_(ptr noundef nonnull align 8 dereferenceable(16) [[THIS1]], ptr noundef nonnull align 8 dereferenceable(16) [[TMP1]])
+// CHECK:    ret void
+//
+//
+// CHECK-LABEL: define internal void @_Z36test_firstprivate_with_destructor_sbv.omp_outlined(
+// CHECK-SAME: ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[TMP0:%.*]]) #[[ATTR2]] {
+// CHECK:  [[ENTRY:.*:]]
+// CHECK:    store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR:%.*]], align 8
+// CHECK:    store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR:%.*]], align 8
+// CHECK:    store ptr [[TMP0]], ptr [[DOTADDR:%.*]], align 8
+// CHECK:    [[TMP1:%.*]] = load ptr, ptr [[DOTADDR]], align 8, !nonnull [[META2]], !align [[META85:![0-9]+]]
+// CHECK:    [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_PAIRWITHDTOR:%.*]], ptr [[TMP1]], i32 0, i32 0
+// CHECK:    call void @_ZN8WithDtorC1ERKS_(ptr noundef nonnull align 8 dereferenceable(8) [[A:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[X]])
+// CHECK:    [[PTR:%.*]] = getelementptr inbounds nuw [[STRUCT_WITHDTOR:%.*]], ptr [[A]], i32 0, i32 0
+// CHECK:    [[TMP2:%.*]] = load ptr, ptr [[PTR]], align 8
+// CHECK:    call void @_ZN8WithDtorD1Ev(ptr noundef nonnull align 8 dead_on_return(8) dereferenceable(8) [[A]]) #[[ATTR3]]
+// CHECK:    ret void
+//
+//
+// CHECK-LABEL: define linkonce_odr void @_ZN8WithDtorC1ERKS_(
+// CHECK-SAME: ptr noundef nonnull align 8 dereferenceable(8) [[THIS:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[OTHER:%.*]]) unnamed_addr #[[ATTR0]] comdat align 2 {
+// CHECK:  [[ENTRY:.*:]]
+// CHECK:    store ptr [[THIS]], ptr [[THIS_ADDR:%.*]], align 8
+// CHECK:    store ptr [[OTHER]], ptr [[OTHER_ADDR:%.*]], align 8
+// CHECK:    [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8
+// CHECK:    [[TMP0:%.*]] = load ptr, ptr [[OTHER_ADDR]], align 8
+// CHECK:    call void @_ZN8WithDtorC2ERKS_(ptr noundef nonnull align 8 dereferenceable(8) [[THIS1]], ptr noundef nonnull align 8 dereferenceable(8) [[TMP0]])
+// CHECK:    ret void
+//
+//
+// CHECK-LABEL: define linkonce_odr void @_ZN8WithDtorD1Ev(
+// CHECK-SAME: ptr noundef nonnull align 8 dead_on_return(8) dereferenceable(8) [[THIS:%.*]]) unnamed_addr #[[ATTR0]] comdat align 2 {
+// CHECK:  [[ENTRY:.*:]]
+// CHECK:    store ptr [[THIS]], ptr [[THIS_ADDR:%.*]], align 8
+// CHECK:    [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8
+// CHECK:    call void @_ZN8WithDtorD2Ev(ptr noundef nonnull align 8 dead_on_return(8) dereferenceable(8) [[THIS1]]) #[[ATTR3]]
+// CHECK:    ret void
+//
+//
+// CHECK-LABEL: define linkonce_odr void @_ZN12PairWithDtorD1Ev(
+// CHECK-SAME: ptr noundef nonnull align 8 dead_on_return(16) dereferenceable(16) [[THIS:%.*]]) unnamed_addr #[[ATTR0]] comdat align 2 {
+// CHECK:  [[ENTRY:.*:]]
+// CHECK:    store ptr [[THIS]], ptr [[THIS_ADDR:%.*]], align 8
+// CHECK:    [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8
+// CHECK:    call void @_ZN12PairWithDtorD2Ev(ptr noundef nonnull align 8 dead_on_return(16) dereferenceable(16) [[THIS1]]) #[[ATTR3]]
+// CHECK:    ret void
+//
+//
+// CHECK-LABEL: define linkonce_odr void @_ZN8WithDtorC2Ei(
+// CHECK-SAME: ptr noundef nonnull align 8 dereferenceable(8) [[THIS:%.*]], i32 noundef [[V:%.*]]) unnamed_addr #[[ATTR0]] comdat align 2 {
+// CHECK:  [[ENTRY:.*:]]
+// CHECK:    store ptr [[THIS]], ptr [[THIS_ADDR:%.*]], align 8
+// CHECK:    store i32 [[V]], ptr [[V_ADDR:%.*]], align 4
+// CHECK:    [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8
+// CHECK:    [[PTR:%.*]] = getelementptr inbounds nuw [[STRUCT_WITHDTOR:%.*]], ptr [[THIS1]], i32 0, i32 0
+// CHECK:    [[CALL:%.*]] = call noalias noundef nonnull ptr @_Znwm(i64 noundef 4) #[[ATTR11:[0-9]+]]
+// CHECK:    [[TMP0:%.*]] = load i32, ptr [[V_ADDR]], align 4
+// CHECK:    store i32 [[TMP0]], ptr [[CALL]], align 4
+// CHECK:    store ptr [[CALL]], ptr [[PTR]], align 8
+// CHECK:    ret void
+//
+//
+// CHECK-LABEL: define linkonce_odr void @_ZN12PairWithDtorC2ERKS_(
+// CHECK-SAME: ptr noundef nonnull align 8 dereferenceable(16) [[THIS:%.*]], ptr noundef nonnull align 8 dereferenceable(16) [[TMP0:%.*]]) unnamed_addr #[[ATTR0]] comdat align 2 {
+// CHECK:  [[ENTRY:.*:]]
+// CHECK:    store ptr [[THIS]], ptr [[THIS_ADDR:%.*]], align 8
+// CHECK:    store ptr [[TMP0]], ptr [[DOTADDR:%.*]], align 8
+// CHECK:    [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8
+// CHECK:    [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_PAIRWITHDTOR:%.*]], ptr [[THIS1]], i32 0, i32 0
+// CHECK:    [[TMP1:%.*]] = load ptr, ptr [[DOTADDR]], align 8, !nonnull [[META2]], !align [[META85]]
+// CHECK:    [[X2:%.*]] = getelementptr inbounds nuw [[STRUCT_PAIRWITHDTOR]], ptr [[TMP1]], i32 0, i32 0
+// CHECK:    call void @_ZN8WithDtorC1ERKS_(ptr noundef nonnull align 8 dereferenceable(8) [[X]], ptr noundef nonnull align 8 dereferenceable(8) [[X2]])
+// CHECK:    [[Y:%.*]] = getelementptr inbounds nuw [[STRUCT_PAIRWITHDTOR]], ptr [[THIS1]], i32 0, i32 1
+// CHECK:    [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8, !nonnull [[META2]], !align [[META85]]
+// CHECK:    [[Y3:%.*]] = getelementptr inbounds nuw [[STRUCT_PAIRWITHDTOR]], ptr [[TMP2]], i32 0, i32 1
+// CHECK:    call void @_ZN8WithDtorC1ERKS_(ptr noundef nonnull align 8 dereferenceable(8) [[Y]], ptr noundef nonnull align 8 dereferenceable(8) [[Y3]])
+// CHECK:    ret void
+//
+//
+// CHECK-LABEL: define linkonce_odr void @_ZN8WithDtorC2ERKS_(
+// CHECK-SAME: ptr noundef nonnull align 8 dereferenceable(8) [[THIS:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[OTHER:%.*]]) unnamed_addr #[[ATTR0]] comdat align 2 {
+// CHECK:  [[ENTRY:.*:]]
+// CHECK:    store ptr [[THIS]], ptr [[THIS_ADDR:%.*]], align 8
+// CHECK:    store ptr [[OTHER]], ptr [[OTHER_ADDR:%.*]], align 8
+// CHECK:    [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8
+// CHECK:    [[PTR:%.*]] = getelementptr inbounds nuw [[STRUCT_WITHDTOR:%.*]], ptr [[THIS1]], i32 0, i32 0
+// CHECK:    [[CALL:%.*]] = call noalias noundef nonnull ptr @_Znwm(i64 noundef 4) #[[ATTR11]]
+// CHECK:    [[TMP0:%.*]] = load ptr, ptr [[OTHER_ADDR]], align 8, !nonnull [[META2]], !align [[META85]]
+// CHECK:    [[PTR2:%.*]] = getelementptr inbounds nuw [[STRUCT_WITHDTOR]], ptr [[TMP0]], i32 0, i32 0
+// CHECK:    [[TMP1:%.*]] = load ptr, ptr [[PTR2]], align 8
+// CHECK:    [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4
+// CHECK:    store i32 [[TMP2]], ptr [[CALL]], align 4
+// CHECK:    store ptr [[CALL]], ptr [[PTR]], align 8
+// CHECK:    ret void
+//
+//
+// CHECK-LABEL: define linkonce_odr void @_ZN8WithDtorD2Ev(
+// CHECK-SAME: ptr noundef nonnull align 8 dead_on_return(8) dereferenceable(8) [[THIS:%.*]]) unnamed_addr #[[ATTR0]] comdat align 2 {
+// CHECK:  [[ENTRY:.*:]]
+// CHECK:    store ptr [[THIS]], ptr [[THIS_ADDR:%.*]], align 8
+// CHECK:    [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8
+// CHECK:    [[PTR:%.*]] = getelementptr inbounds nuw [[STRUCT_WITHDTOR:%.*]], ptr [[THIS1]], i32 0, i32 0
+// CHECK:    [[TMP0:%.*]] = load ptr, ptr [[PTR]], align 8
+// CHECK:    [[ISNULL:%.*]] = icmp eq ptr [[TMP0]], null
+// CHECK:    br i1 [[ISNULL]], label %[[DELETE_END:.*]], label %[[DELETE_NOTNULL:.*]]
+// CHECK:       [[DELETE_NOTNULL]]:
+// CHECK:    call void @_ZdlPvm(ptr noundef [[TMP0]], i64 noundef 4) #[[ATTR12:[0-9]+]]
+// CHECK:    br label %[[DELETE_END]]
+// CHECK:       [[DELETE_END]]:
+// CHECK:    ret void
+//
+//
+// CHECK-LABEL: define linkonce_odr void @_ZN12PairWithDtorD2Ev(
+// CHECK-SAME: ptr noundef nonnull align 8 dead_on_return(16) dereferenceable(16) [[THIS:%.*]]) unnamed_addr #[[ATTR0]] comdat align 2 {
+// CHECK:  [[ENTRY:.*:]]
+// CHECK:    store ptr [[THIS]], ptr [[THIS_ADDR:%.*]], align 8
+// CHECK:    [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8
+// CHECK:    [[Y:%.*]] = getelementptr inbounds nuw [[STRUCT_PAIRWITHDTOR:%.*]], ptr [[THIS1]], i32 0, i32 1
+// CHECK:    call void @_ZN8WithDtorD1Ev(ptr noundef nonnull align 8 dead_on_return(8) dereferenceable(8) [[Y]]) #[[ATTR3]]
+// CHECK:    [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_PAIRWITHDTOR]], ptr [[THIS1]], i32 0, i32 0
+// CHECK:    call void @_ZN8WithDtorD1Ev(ptr noundef nonnull align 8 dead_on_return(8) dereferenceable(8) [[X]]) #[[ATTR3]]
+// CHECK:    ret void
+//
+//
+// CHECK-LABEL: define dso_local void @_Z35test_firstprivate_array_bindings_sbv(
+// CHECK-SAME: ) #[[ATTR0]] {
+// CHECK:  [[ENTRY:.*]]:
+// CHECK:    call void @_ZN14NonTrivialCopyC1Ei(ptr noundef nonnull align 4 dereferenceable(8) [[ARR:%.*]], i32 noundef 1)
+// CHECK:    [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_NONTRIVIALCOPY:%.*]], ptr [[ARR]], i64 1
+// CHECK:    call void @_ZN14NonTrivialCopyC1Ei(ptr noundef nonnull align 4 dereferenceable(8) [[ARRAYINIT_ELEMENT]], i32 noundef 2)
+// CHECK:    [[ARRAYINIT_ELEMENT1:%.*]] = getelementptr inbounds [[STRUCT_NONTRIVIALCOPY]], ptr [[ARR]], i64 2
+// CHECK:    call void @_ZN14NonTrivialCopyC1Ei(ptr noundef nonnull align 4 dereferenceable(8) [[ARRAYINIT_ELEMENT1]], i32 noundef 3)
+// CHECK:    [[ARRAYINIT_BEGIN:%.*]] = getelementptr inbounds [3 x [[STRUCT_NONTRIVIALCOPY]]], ptr [[TMP0:%.*]], i64 0, i64 0
+// CHECK:    br label %[[ARRAYINIT_BODY:.*]]
+// CHECK:       [[ARRAYINIT_BODY]]:
+// CHECK:    [[ARRAYINIT_INDEX:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[ARRAYINIT_NEXT:%.*]], %[[ARRAYINIT_BODY]] ]
+// CHECK:    [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_NONTRIVIALCOPY]], ptr [[ARRAYINIT_BEGIN]], i64 [[ARRAYINIT_INDEX]]
+// CHECK:    [[ARRAYIDX:%.*]] = getelementptr inbounds nuw [3 x [[STRUCT_NONTRIVIALCOPY]]], ptr [[ARR]], i64 0, i64 [[ARRAYINIT_INDEX]]
+// CHECK:    call void @_ZN14NonTrivialCopyC1ERKS_(ptr noundef nonnull align 4 dereferenceable(8) [[TMP1]], ptr noundef nonnull align 4 dereferenceable(8) [[ARRAYIDX]])
+// CHECK:    [[ARRAYINIT_NEXT]] = add nuw i64 [[ARRAYINIT_INDEX]], 1
+// CHECK:    [[ARRAYINIT_DONE:%.*]] = icmp eq i64 [[ARRAYINIT_NEXT]], 3
+// CHECK:    br i1 [[ARRAYINIT_DONE]], label %[[ARRAYINIT_END:.*]], label %[[ARRAYINIT_BODY]]
+// CHECK:       [[ARRAYINIT_END]]:
+// CHECK:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @_Z35test_firstprivate_array_bindings_sbv.omp_outlined, ptr [[TMP0]])
+// CHECK:    [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [3 x [[STRUCT_NONTRIVIALCOPY]]], ptr [[TMP0]], i32 0, i32 0
+// CHECK:    [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_NONTRIVIALCOPY]], ptr [[ARRAY_BEGIN]], i64 3
+// CHECK:    br label %[[ARRAYDESTROY_BODY:.*]]
+// CHECK:       [[ARRAYDESTROY_BODY]]:
+// CHECK:    [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP2]], %[[ARRAYINIT_END]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], %[[ARRAYDESTROY_BODY]] ]
+// CHECK:    [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_NONTRIVIALCOPY]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1
+// CHECK:    call void @_ZN14NonTrivialCopyD1Ev(ptr noundef nonnull align 4 dead_on_return(8) dereferenceable(8) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR3]]
+// CHECK:    [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN]]
+// CHECK:    br i1 [[ARRAYDESTROY_DONE]], label %[[ARRAYDESTROY_DONE2:.*]], label %[[ARRAYDESTROY_BODY]]
+// CHECK:       [[ARRAYDESTROY_DONE2]]:
+// CHECK:    [[ARRAY_BEGIN3:%.*]] = getelementptr inbounds [3 x [[STRUCT_NONTRIVIALCOPY]]], ptr [[ARR]], i32 0, i32 0
+// CHECK:    [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_NONTRIVIALCOPY]], ptr [[ARRAY_BEGIN3]], i64 3
+// CHECK:    br label %[[ARRAYDESTROY_BODY4:.*]]
+// CHECK:       [[ARRAYDESTROY_BODY4]]:
+// CHECK:    [[ARRAYDESTROY_ELEMENTPAST5:%.*]] = phi ptr [ [[TMP3]], %[[ARRAYDESTROY_DONE2]] ], [ [[ARRAYDESTROY_ELEMENT6:%.*]], %[[ARRAYDESTROY_BODY4]] ]
+// CHECK:    [[ARRAYDESTROY_ELEMENT6]] = getelementptr inbounds [[STRUCT_NONTRIVIALCOPY]], ptr [[ARRAYDESTROY_ELEMENTPAST5]], i64 -1
+// CHECK:    call void @_ZN14NonTrivialCopyD1Ev(ptr noundef nonnull align 4 dead_on_return(8) dereferenceable(8) [[ARRAYDESTROY_ELEMENT6]]) #[[ATTR3]]
+// CHECK:    [[ARRAYDESTROY_DONE7:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT6]], [[ARRAY_BEGIN3]]
+// CHECK:    br i1 [[ARRAYDESTROY_DONE7]], label %[[ARRAYDESTROY_DONE8:.*]], label %[[ARRAYDESTROY_BODY4]]
+// CHECK:       [[ARRAYDESTROY_DONE8]]:
+// CHECK:    ret void
+//
+//
+// CHECK-LABEL: define internal void @_Z35test_firstprivate_array_bindings_sbv.omp_outlined(
+// CHECK-SAME: ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[TMP0:%.*]]) #[[ATTR2]] {
+// CHECK:  [[ENTRY:.*:]]
+// CHECK:    store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR:%.*]], align 8
+// CHECK:    store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR:%.*]], align 8
+// CHECK:    store ptr [[TMP0]], ptr [[DOTADDR:%.*]], align 8
+// CHECK:    [[TMP1:%.*]] = load ptr, ptr [[DOTADDR]], align 8, !nonnull [[META2]], !align [[META3]]
+// CHECK:    [[TMP2:%.*]] = getelementptr inbounds [3 x [[STRUCT_NONTRIVIALCOPY:%.*]]], ptr [[TMP1]], i64 0, i64 0
+// CHECK:    call void @_ZN14NonTrivialCopyC1ERKS_(ptr noundef nonnull align 4 dereferenceable(8) [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[TMP2]])
+// CHECK:    [[TMP3:%.*]] = getelementptr inbounds [3 x [[STRUCT_NONTRIVIALCOPY]]], ptr [[TMP1]], i64 0, i64 1
+// CHECK:    call void @_ZN14NonTrivialCopyC1ERKS_(ptr noundef nonnull align 4 dereferenceable(8) [[B:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[TMP3]])
+// CHECK:    [[TMP4:%.*]] = getelementptr inbounds [3 x [[STRUCT_NONTRIVIALCOPY]]], ptr [[TMP1]], i64 0, i64 2
+// CHECK:    call void @_ZN14NonTrivialCopyC1ERKS_(ptr noundef nonnull align 4 dereferenceable(8) [[C:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[TMP4]])
+// CHECK:    [[VALUE:%.*]] = getelementptr inbounds nuw [[STRUCT_NONTRIVIALCOPY]], ptr [[A]], i32 0, i32 0
+// CHECK:    [[TMP5:%.*]] = load i32, ptr [[VALUE]], align 4
+// CHECK:    [[VALUE1:%.*]] = getelementptr inbounds nuw [[STRUCT_NONTRIVIALCOPY]], ptr [[B]], i32 0, i32 0
+// CHECK:    [[TMP6:%.*]] = load i32, ptr [[VALUE1]], align 4
+// CHECK:    [[ADD:%.*]] = add nsw i32 [[TMP5]], [[TMP6]]
+// CHECK:    [[VALUE2:%.*]] = getelementptr inbounds nuw [[STRUCT_NONTRIVIALCOPY]], ptr [[C]], i32 0, i32 0
+// CHECK:    [[TMP7:%.*]] = load i32, ptr [[VALUE2]], align 4
+// CHECK:    [[ADD3:%.*]] = add nsw i32 [[ADD]], [[TMP7]]
+// CHECK:    call void @_ZN14NonTrivialCopyD1Ev(ptr noundef nonnull align 4 dead_on_return(8) dereferenceable(8) [[C]]) #[[ATTR3]]
+// CHECK:    call void @_ZN14NonTrivialCopyD1Ev(ptr noundef nonnull align 4 dead_on_return(8) dereferenceable(8) [[B]]) #[[ATTR3]]
+// CHECK:    call void @_ZN14NonTrivialCopyD1Ev(ptr noundef nonnull align 4 dead_on_return(8) dereferenceable(8) [[A]]) #[[ATTR3]]
+// CHECK:    ret void
+//
+//
+// CHECK-LABEL: define dso_local void @_Z38test_firstprivate_mixed_with_shared_sbv(
+// CHECK-SAME: ) #[[ATTR0]] {
+// CHECK:  [[ENTRY:.*:]]
+// CHECK:    [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_PAIR:%.*]], ptr [[P:%.*]], i32 0, i32 0
+// CHECK:    call void @_ZN14NonTrivialCopyC1Ei(ptr noundef nonnull align 4 dereferenceable(8) [[X]], i32 noundef 10)
+// CHECK:    [[Y:%.*]] = getelementptr inbounds nuw [[STRUCT_PAIR]], ptr [[P]], i32 0, i32 1
+// CHECK:    call void @_ZN14NonTrivialCopyC1Ei(ptr noundef nonnull align 4 dereferenceable(8) [[Y]], i32 noundef 20)
+// CHECK:    call void @_ZN4PairC1ERKS_(ptr noundef nonnull align 4 dereferenceable(16) [[TMP0:%.*]], ptr noundef nonnull align 4 dereferenceable(16) [[P]])
+// CHECK:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @_Z38test_firstprivate_mixed_with_shared_sbv.omp_outlined, ptr [[TMP0]])
+// CHECK:    call void @_ZN4PairD1Ev(ptr noundef nonnull align 4 dead_on_return(16) dereferenceable(16) [[TMP0]]) #[[ATTR3]]
+// CHECK:    call void @_ZN4PairD1Ev(ptr noundef nonnull align 4 dead_on_return(16) dereferenceable(16) [[P]]) #[[ATTR3]]
+// CHECK:    ret void
+//
+//
+// CHECK-LABEL: define internal void @_Z38test_firstprivate_mixed_with_shared_sbv.omp_outlined(
+// CHECK-SAME: ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[TMP0:%.*]]) #[[ATTR2]] {
+// CHECK:  [[ENTRY:.*:]]
+// CHECK:    store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR:%.*]], align 8
+// CHECK:    store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR:%.*]], align 8
+// CHECK:    store ptr [[TMP0]], ptr [[DOTADDR:%.*]], align 8
+// CHECK:    [[TMP1:%.*]] = load ptr, ptr [[DOTADDR]], align 8, !nonnull [[META2]], !align [[META3]]
+// CHECK:    [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_PAIR:%.*]], ptr [[TMP1]], i32 0, i32 0
+// CHECK:    call void @_ZN14NonTrivialCopyC1ERKS_(ptr noundef nonnull align 4 dereferenceable(8) [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[X]])
+// CHECK:    [[VALUE:%.*]] = getelementptr inbounds nuw [[STRUCT_NONTRIVIALCOPY:%.*]], ptr [[A]], i32 0, i32 0
+// CHECK:    [[Y:%.*]] = getelementptr inbounds nuw [[STRUCT_PAIR]], ptr [[TMP1]], i32 0, i32 1
+// CHECK:    [[VALUE1:%.*]] = getelementptr inbounds nuw [[STRUCT_NONTRIVIALCOPY]], ptr [[Y]], i32 0, i32 0
+// CHECK:    call void @_ZN14NonTrivialCopyD1Ev(ptr noundef nonnull align 4 dead_on_return(8) dereferenceable(8) [[A]]) #[[ATTR3]]
+// CHECK:    ret void
+//
+//
+// CHECK-LABEL: define dso_local void @_Z34test_firstprivate_nested_struct_sbv(
+// CHECK-SAME: ) #[[ATTR0]] {
+// CHECK:  [[ENTRY:.*:]]
+// CHECK:    [[I1:%.*]] = getelementptr inbounds nuw [[STRUCT_OUTER:%.*]], ptr [[O:%.*]], i32 0, i32 0
+// CHECK:    call void @_ZN5InnerC1Ei(ptr noundef nonnull align 4 dereferenceable(4) [[I1]], i32 noundef 5)
+// CHECK:    [[I2:%.*]] = getelementptr inbounds nuw [[STRUCT_OUTER]], ptr [[O]], i32 0, i32 1
+// CHECK:    call void @_ZN5InnerC1Ei(ptr noundef nonnull align 4 dereferenceable(4) [[I2]], i32 noundef 10)
+// CHECK:    call void @_ZN5OuterC1ERKS_(ptr noundef nonnull align 4 dereferenceable(8) [[TMP0:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[O]])
+// CHECK:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @_Z34test_firstprivate_nested_struct_sbv.omp_outlined, ptr [[TMP0]])
+// CHECK:    call void @_ZN5OuterD1Ev(ptr noundef nonnull align 4 dead_on_return(8) dereferenceable(8) [[TMP0]]) #[[ATTR3]]
+// CHECK:    call void @_ZN5OuterD1Ev(ptr noundef nonnull align 4 dead_on_return(8) dereferenceable(8) [[O]]) #[[ATTR3]]
+// CHECK:    ret void
+//
+//
+// CHECK-LABEL: define linkonce_odr void @_ZN5InnerC1Ei(
+// CHECK-SAME: ptr noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]], i32 noundef [[V:%.*]]) unnamed_addr #[[ATTR0]] comdat align 2 {
+// CHECK:  [[ENTRY:.*:]]
+// CHECK:    store ptr [[THIS]], ptr [[THIS_ADDR:%.*]], align 8
+// CHECK:    store i32 [[V]], ptr [[V_ADDR:%.*]], align 4
+// CHECK:    [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8
+// CHECK:    [[TMP0:%.*]] = load i32, ptr [[V_ADDR]], align 4
+// CHECK:    call void @_ZN5InnerC2Ei(ptr noundef nonnull align 4 dereferenceable(4) [[THIS1]], i32 noundef [[TMP0]])
+// CHECK:    ret void
+//
+//
+// CHECK-LABEL: define linkonce_odr void @_ZN5OuterC1ERKS_(
+// CHECK-SAME: ptr noundef nonnull align 4 dereferenceable(8) [[THIS:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[TMP0:%.*]]) unnamed_addr #[[ATTR0]] comdat align 2 {
+// CHECK:  [[ENTRY:.*:]]
+// CHECK:    store ptr [[THIS]], ptr [[THIS_ADDR:%.*]], align 8
+// CHECK:    store ptr [[TMP0]], ptr [[DOTADDR:%.*]], align 8
+// CHECK:    [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8
+// CHECK:    [[TMP1:%.*]] = load ptr, ptr [[DOTADDR]], align 8
+// CHECK:    call void @_ZN5OuterC2ERKS_(ptr noundef nonnull align 4 dereferenceable(8) [[THIS1]], ptr noundef nonnull align 4 dereferenceable(8) [[TMP1]])
+// CHECK:    ret void
+//
+//
+// CHECK-LABEL: define internal void @_Z34test_firstprivate_nested_struct_sbv.omp_outlined(
+// CHECK-SAME: ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP0:%.*]]) #[[ATTR2]] {
+// CHECK:  [[ENTRY:.*:]]
+// CHECK:    store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR:%.*]], align 8
+// CHECK:    store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR:%.*]], align 8
+// CHECK:    store ptr [[TMP0]], ptr [[DOTADDR:%.*]], align 8
+// CHECK:    [[TMP1:%.*]] = load ptr, ptr [[DOTADDR]], align 8, !nonnull [[META2]], !align [[META3]]
+// CHECK:    [[I1:%.*]] = getelementptr inbounds nuw [[STRUCT_OUTER:%.*]], ptr [[TMP1]], i32 0, i32 0
+// CHECK:    call void @_ZN5InnerC1ERKS_(ptr noundef nonnull align 4 dereferenceable(4) [[X:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[I1]])
+// CHECK:    [[VAL:%.*]] = getelementptr inbounds nuw [[STRUCT_INNER:%.*]], ptr [[X]], i32 0, i32 0
+// CHECK:    call void @_ZN5InnerD1Ev(ptr noundef nonnull align 4 dead_on_return(4) dereferenceable(4) [[X]]) #[[ATTR3]]
+// CHECK:    ret void
+//
+//
+// CHECK-LABEL: define linkonce_odr void @_ZN5InnerC1ERKS_(
+// CHECK-SAME: ptr noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[O:%.*]]) unnamed_addr #[[ATTR0]] comdat align 2 {
+// CHECK:  [[ENTRY:.*:]]
+// CHECK:    store ptr [[THIS]], ptr [[THIS_ADDR:%.*]], align 8
+// CHECK:    store ptr [[O]], ptr [[O_ADDR:%.*]], align 8
+// CHECK:    [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8
+// CHECK:    [[TMP0:%.*]] = load ptr, ptr [[O_ADDR]], align 8
+// CHECK:    call void @_ZN5InnerC2ERKS_(ptr noundef nonnull align 4 dereferenceable(4) [[THIS1]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP0]])
+// CHECK:    ret void
+//
+//
+// CHECK-LABEL: define linkonce_odr void @_ZN5InnerD1Ev(
+// CHECK-SAME: ptr noundef nonnull align 4 dead_on_return(4) dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR0]] comdat align 2 {
+// CHECK:  [[ENTRY:.*:]]
+// CHECK:    store ptr [[THIS]], ptr [[THIS_ADDR:%.*]], align 8
+// CHECK:    [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8
+// CHECK:    call void @_ZN5InnerD2Ev(ptr noundef nonnull align 4 dead_on_return(4) dereferenceable(4) [[THIS1]]) #[[ATTR3]]
+// CHECK:    ret void
+//
+//
+// CHECK-LABEL: define linkonce_odr void @_ZN5OuterD1Ev(
+// CHECK-SAME: ptr noundef nonnull align 4 dead_on_return(8) dereferenceable(8) [[THIS:%.*]]) unnamed_addr #[[ATTR0]] comdat align 2 {
+// CHECK:  [[ENTRY:.*:]]
+// CHECK:    store ptr [[THIS]], ptr [[THIS_ADDR:%.*]], align 8
+// CHECK:    [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8
+// CHECK:    call void @_ZN5OuterD2Ev(ptr noundef nonnull align 4 dead_on_return(8) dereferenceable(8) [[THIS1]]) #[[ATTR3]]
+// CHECK:    ret void
+//
+//
+// CHECK-LABEL: define linkonce_odr void @_ZN5InnerC2Ei(
+// CHECK-SAME: ptr noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]], i32 noundef [[V:%.*]]) unnamed_addr #[[ATTR0]] comdat align 2 {
+// CHECK:  [[ENTRY:.*:]]
+// CHECK:    store ptr [[THIS]], ptr [[THIS_ADDR:%.*]], align 8
+// CHECK:    store i32 [[V]], ptr [[V_ADDR:%.*]], align 4
+// CHECK:    [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8
+// CHECK:    [[VAL:%.*]] = getelementptr inbounds nuw [[STRUCT_INNER:%.*]], ptr [[THIS1]], i32 0, i32 0
+// CHECK:    [[TMP0:%.*]] = load i32, ptr [[V_ADDR]], align 4
+// CHECK:    store i32 [[TMP0]], ptr [[VAL]], align 4
+// CHECK:    ret void
+//
+//
+// CHECK-LABEL: define linkonce_odr void @_ZN5OuterC2ERKS_(
+// CHECK-SAME: ptr noundef nonnull align 4 dereferenceable(8) [[THIS:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[TMP0:%.*]]) unnamed_addr #[[ATTR0]] comdat align 2 {
+// CHECK:  [[ENTRY:.*:]]
+// CHECK:    store ptr [[THIS]], ptr [[THIS_ADDR:%.*]], align 8
+// CHECK:    store ptr [[TMP0]], ptr [[DOTADDR:%.*]], align 8
+// CHECK:    [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8
+// CHECK:    [[I1:%.*]] = getelementptr inbounds nuw [[STRUCT_OUTER:%.*]], ptr [[THIS1]], i32 0, i32 0
+// CHECK:    [[TMP1:%.*]] = load ptr, ptr [[DOTADDR]], align 8, !nonnull [[META2]], !align [[META3]]
+// CHECK:    [[I12:%.*]] = getelementptr inbounds nuw [[STRUCT_OUTER]], ptr [[TMP1]], i32 0, i32 0
+// CHECK:    call void @_ZN5InnerC1ERKS_(ptr noundef nonnull align 4 dereferenceable(4) [[I1]], ptr noundef nonnull align 4 dereferenceable(4) [[I12]])
+// CHECK:    [[I2:%.*]] = getelementptr inbounds nuw [[STRUCT_OUTER]], ptr [[THIS1]], i32 0, i32 1
+// CHECK:    [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8, !nonnull [[META2]], !align [[META3]]
+// CHECK:    [[I23:%.*]] = getelementptr inbounds nuw [[STRUCT_OUTER]], ptr [[TMP2]], i32 0, i32 1
+// CHECK:    call void @_ZN5InnerC1ERKS_(ptr noundef nonnull align 4 dereferenceable(4) [[I2]], ptr noundef nonnull align 4 dereferenceable(4) [[I23]])
+// CHECK:    ret void
+//
+//
+// CHECK-LABEL: define linkonce_odr void @_ZN5InnerC2ERKS_(
+// CHECK-SAME: ptr noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[O:%.*]]) unnamed_addr #[[ATTR0]] comdat align 2 {
+// CHECK:  [[ENTRY:.*:]]
+// CHECK:    store ptr [[THIS]], ptr [[THIS_ADDR:%.*]], align 8
+// CHECK:    store ptr [[O]], ptr [[O_ADDR:%.*]], align 8
+// CHECK:    [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8
+// CHECK:    [[VAL:%.*]] = getelementptr inbounds nuw [[STRUCT_INNER:%.*]], ptr [[THIS1]], i32 0, i32 0
+// CHECK:    [[TMP0:%.*]] = load ptr, ptr [[O_ADDR]], align 8, !nonnull [[META2]], !align [[META3]]
+// CHECK:    [[VAL2:%.*]] = getelementptr inbounds nuw [[STRUCT_INNER]], ptr [[TMP0]], i32 0, i32 0
+// CHECK:    [[TMP1:%.*]] = load i32, ptr [[VAL2]], align 4
+// CHECK:    [[MUL:%.*]] = mul nsw i32 [[TMP1]], 2
+// CHECK:    store i32 [[MUL]], ptr [[VAL]], align 4
+// CHECK:    ret void
+//
+//
+// CHECK-LABEL: define linkonce_odr void @_ZN5InnerD2Ev(
+// CHECK-SAME: ptr noundef nonnull align 4 dead_on_return(4) dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR0]] comdat align 2 {
+// CHECK:  [[ENTRY:.*:]]
+// CHECK:    store ptr [[THIS]], ptr [[THIS_ADDR:%.*]], align 8
+// CHECK:    [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8
+// CHECK:    ret void
+//
+//
+// CHECK-LABEL: define linkonce_odr void @_ZN5OuterD2Ev(
+// CHECK-SAME: ptr noundef nonnull align 4 dead_on_return(8) dereferenceable(8) [[THIS:%.*]]) unnamed_addr #[[ATTR0]] comdat align 2 {
+// CHECK:  [[ENTRY:.*:]]
+// CHECK:    store ptr [[THIS]], ptr [[THIS_ADDR:%.*]], align 8
+// CHECK:    [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8
+// CHECK:    [[I2:%.*]] = getelementptr inbounds nuw [[STRUCT_OUTER:%.*]], ptr [[THIS1]], i32 0, i32 1
+// CHECK:    call void @_ZN5InnerD1Ev(ptr noundef nonnull align 4 dead_on_return(4) dereferenceable(4) [[I2]]) #[[ATTR3]]
+// CHECK:    [[I1:%.*]] = getelementptr inbounds nuw [[STRUCT_OUTER]], ptr [[THIS1]], i32 0, i32 0
+// CHECK:    call void @_ZN5InnerD1Ev(ptr noundef nonnull align 4 dead_on_return(4) dereferenceable(4) [[I1]]) #[[ATTR3]]
+// CHECK:    ret void
+//
+//
+// CHECK-LABEL: define dso_local void @_Z46test_firstprivate_ref_binding_both_bindings_sbv(
+// CHECK-SAME: ) #[[ATTR0]] {
+// CHECK:  [[ENTRY:.*:]]
+// CHECK:    [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_PAIR:%.*]], ptr [[P:%.*]], i32 0, i32 0
+// CHECK:    call void @_ZN14NonTrivialCopyC1Ei(ptr noundef nonnull align 4 dereferenceable(8) [[X]], i32 noundef 10)
+// CHECK:    [[Y:%.*]] = getelementptr inbounds nuw [[STRUCT_PAIR]], ptr [[P]], i32 0, i32 1
+// CHECK:    call void @_ZN14NonTrivialCopyC1Ei(ptr noundef nonnull align 4 dereferenceable(8) [[Y]], i32 noundef 20)
+// CHECK:    store ptr [[P]], ptr [[TMP0:%.*]], align 8
+// CHECK:    [[TMP1:%.*]] = load ptr, ptr [[TMP0]], align 8, !nonnull [[META2]], !align [[META3]]
+// CHECK:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @_Z46test_firstprivate_ref_binding_both_bindings_sbv.omp_outlined, ptr [[TMP1]])
+// CHECK:    call void @_ZN4PairD1Ev(ptr noundef nonnull align 4 dead_on_return(16) dereferenceable(16) [[P]]) #[[ATTR3]]
+// CHECK:    ret void
+//
+//
+// CHECK-LABEL: define internal void @_Z46test_firstprivate_ref_binding_both_bindings_sbv.omp_outlined(
+// CHECK-SAME: ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[TMP0:%.*]]) #[[ATTR2]] {
+// CHECK:  [[ENTRY:.*:]]
+// CHECK:    store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR:%.*]], align 8
+// CHECK:    store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR:%.*]], align 8
+// CHECK:    store ptr [[TMP0]], ptr [[DOTADDR:%.*]], align 8
+// CHECK:    [[TMP1:%.*]] = load ptr, ptr [[DOTADDR]], align 8, !nonnull [[META2]], !align [[META3]]
+// CHECK:    store ptr [[TMP1]], ptr [[TMP:%.*]], align 8
+// CHECK:    [[TMP2:%.*]] = load ptr, ptr [[TMP]], align 8, !nonnull [[META2]], !align [[META3]]
+// CHECK:    [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_PAIR:%.*]], ptr [[TMP2]], i32 0, i32 0
+// CHECK:    call void @_ZN14NonTrivialCopyC1ERKS_(ptr noundef nonnull align 4 dereferenceable(8) [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[X]])
+// CHECK:    [[TMP3:%.*]] = load ptr, ptr [[TMP]], align 8, !nonnull [[META2]], !align [[META3]]
+// CHECK:    [[Y:%.*]] = getelementptr inbounds nuw [[STRUCT_PAIR]], ptr [[TMP3]], i32 0, i32 1
+// CHECK:    call void @_ZN14NonTrivialCopyC1ERKS_(ptr noundef nonnull align 4 dereferenceable(8) [[B:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[Y]])
+// CHECK:    [[VALUE:%.*]] = getelementptr inbounds nuw [[STRUCT_NONTRIVIALCOPY:%.*]], ptr [[A]], i32 0, i32 0
+// CHECK:    [[TMP4:%.*]] = load i32, ptr [[VALUE]], align 4
+// CHECK:    [[VALUE1:%.*]] = getelementptr inbounds nuw [[STRUCT_NONTRIVIALCOPY]], ptr [[B]], i32 0, i32 0
+// CHECK:    [[TMP5:%.*]] = load i32, ptr [[VALUE1]], align 4
+// CHECK:    [[ADD:%.*]] = add nsw i32 [[TMP4]], [[TMP5]]
+// CHECK:    call void @_ZN14NonTrivialCopyD1Ev(ptr noundef nonnull align 4 dead_on_return(8) dereferenceable(8) [[B]]) #[[ATTR3]]
+// CHECK:    call void @_ZN14NonTrivialCopyD1Ev(ptr noundef nonnull align 4 dead_on_return(8) dereferenceable(8) [[A]]) #[[ATTR3]]
+// CHECK:    ret void
+//
+//
+// CHECK-LABEL: define dso_local void @_Z52test_firstprivate_const_ref_binding_both_bindings_sbv(
+// CHECK-SAME: ) #[[ATTR0]] {
+// CHECK:  [[ENTRY:.*:]]
+// CHECK:    [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_PAIR:%.*]], ptr [[P:%.*]], i32 0, i32 0
+// CHECK:    call void @_ZN14NonTrivialCopyC1Ei(ptr noundef nonnull align 4 dereferenceable(8) [[X]], i32 noundef 10)
+// CHECK:    [[Y:%.*]] = getelementptr inbounds nuw [[STRUCT_PAIR]], ptr [[P]], i32 0, i32 1
+// CHECK:    call void @_ZN14NonTrivialCopyC1Ei(ptr noundef nonnull align 4 dereferenceable(8) [[Y]], i32 noundef 20)
+// CHECK:    store ptr [[P]], ptr [[TMP0:%.*]], align 8
+// CHECK:    [[TMP1:%.*]] = load ptr, ptr [[TMP0]], align 8, !nonnull [[META2]], !align [[META3]]
+// CHECK:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @_Z52test_firstprivate_const_ref_binding_both_bindings_sbv.omp_outlined, ptr [[TMP1]])
+// CHECK:    call void @_ZN4PairD1Ev(ptr noundef nonnull align 4 dead_on_return(16) dereferenceable(16) [[P]]) #[[ATTR3]]
+// CHECK:    ret void
+//
+//
+// CHECK-LABEL: define internal void @_Z52test_firstprivate_const_ref_binding_both_bindings_sbv.omp_outlined(
+// CHECK-SAME: ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[TMP0:%.*]]) #[[ATTR2]] {
+// CHECK:  [[ENTRY:.*:]]
+// CHECK:    store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR:%.*]], align 8
+// CHECK:    store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR:%.*]], align 8
+// CHECK:    store ptr [[TMP0]], ptr [[DOTADDR:%.*]], align 8
+// CHECK:    [[TMP1:%.*]] = load ptr, ptr [[DOTADDR]], align 8, !nonnull [[META2]], !align [[META3]]
+// CHECK:    store ptr [[TMP1]], ptr [[TMP:%.*]], align 8
+// CHECK:    [[TMP2:%.*]] = load ptr, ptr [[TMP]], align 8, !nonnull [[META2]], !align [[META3]]
+// CHECK:    [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_PAIR:%.*]], ptr [[TMP2]], i32 0, i32 0
+// CHECK:    call void @_ZN14NonTrivialCopyC1ERKS_(ptr noundef nonnull align 4 dereferenceable(8) [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[X]])
+// CHECK:    [[TMP3:%.*]] = load ptr, ptr [[TMP]], align 8, !nonnull [[META2]], !align [[META3]]
+// CHECK:    [[Y:%.*]] = getelementptr inbounds nuw [[STRUCT_PAIR]], ptr [[TMP3]], i32 0, i32 1
+// CHECK:    call void @_ZN14NonTrivialCopyC1ERKS_(ptr noundef nonnull align 4 dereferenceable(8) [[B:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[Y]])
+// CHECK:    [[VALUE:%.*]] = getelementptr inbounds nuw [[STRUCT_NONTRIVIALCOPY:%.*]], ptr [[A]], i32 0, i32 0
+// CHECK:    [[TMP4:%.*]] = load i32, ptr [[VALUE]], align 4
+// CHECK:    [[VALUE1:%.*]] = getelementptr inbounds nuw [[STRUCT_NONTRIVIALCOPY]], ptr [[B]], i32 0, i32 0
+// CHECK:    [[TMP5:%.*]] = load i32, ptr [[VALUE1]], align 4
+// CHECK:    [[ADD:%.*]] = add nsw i32 [[TMP4]], [[TMP5]]
+// CHECK:    call void @_ZN14NonTrivialCopyD1Ev(ptr noundef nonnull align 4 dead_on_return(8) dereferenceable(8) [[B]]) #[[ATTR3]]
+// CHECK:    call void @_ZN14NonTrivialCopyD1Ev(ptr noundef nonnull align 4 dead_on_return(8) dereferenceable(8) [[A]]) #[[ATTR3]]
+// CHECK:    ret void
+//
+//
+// CHECK-LABEL: define dso_local void @_Z36test_lambda_capture_binding_by_valuev(
+// CHECK-SAME: ) #[[ATTR0]] {
+// CHECK:  [[ENTRY:.*:]]
+// CHECK:    call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[P:%.*]], ptr align 4 @__const._Z36test_lambda_capture_binding_by_valuev.p, i64 8, i1 false)
+// CHECK:    call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP0:%.*]], ptr align 4 [[P]], i64 8, i1 false)
+// CHECK:    [[TMP1:%.*]] = getelementptr inbounds nuw [[CLASS_ANON:%.*]], ptr [[LAMBDA:%.*]], i32 0, i32 0
+// CHECK:    [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_POINT:%.*]], ptr [[TMP0]], i32 0, i32 0
+// CHECK:    [[TMP2:%.*]] = load i32, ptr [[X]], align 4
+// CHECK:    store i32 [[TMP2]], ptr [[TMP1]], align 4
+// CHECK:    [[CALL:%.*]] = call noundef i32 @"_ZZ36test_lambda_capture_binding_by_valuevENK3$_0clEv"(ptr noundef nonnull align 4 dereferenceable(4) [[LAMBDA]])
+// CHECK:    store i32 [[CALL]], ptr [[RESULT:%.*]], align 4
+// CHECK:    ret void
+//
+//
+// CHECK-LABEL: define dso_local void @_Z34test_lambda_capture_binding_by_refv(
+// CHECK-SAME: ) #[[ATTR0]] {
+// CHECK:  [[ENTRY:.*:]]
+// CHECK:    call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[P:%.*]], ptr align 4 @__const._Z34test_lambda_capture_binding_by_refv.p, i64 8, i1 false)
+// CHECK:    call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP0:%.*]], ptr align 4 [[P]], i64 8, i1 false)
+// CHECK:    [[TMP1:%.*]] = getelementptr inbounds nuw [[CLASS_ANON_12:%.*]], ptr [[LAMBDA:%.*]], i32 0, i32 0
+// CHECK:    [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_POINT:%.*]], ptr [[TMP0]], i32 0, i32 0
+// CHECK:    store ptr [[X]], ptr [[TMP1]], align 8
+// CHECK:    call void @"_ZZ34test_lambda_capture_binding_by_refvENK3$_0clEv"(ptr noundef nonnull align 8 dereferenceable(8) [[LAMBDA]])
+// CHECK:    ret void
+//
+//
+// CHECK-LABEL: define dso_local void @_Z37test_lambda_capture_multiple_bindingsv(
+// CHECK-SAME: ) #[[ATTR0]] {
+// CHECK:  [[ENTRY:.*:]]
+// CHECK:    call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[P:%.*]], ptr align 4 @__const._Z37test_lambda_capture_multiple_bindingsv.p, i64 8, i1 false)
+// CHECK:    call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP0:%.*]], ptr align 4 [[P]], i64 8, i1 false)
+// CHECK:    [[TMP1:%.*]] = getelementptr inbounds nuw [[CLASS_ANON_13:%.*]], ptr [[LAMBDA:%.*]], i32 0, i32 0
+// CHECK:    [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_POINT:%.*]], ptr [[TMP0]], i32 0, i32 0
+// CHECK:    [[TMP2:%.*]] = load i32, ptr [[X]], align 4
+// CHECK:    store i32 [[TMP2]], ptr [[TMP1]], align 8
+// CHECK:    [[TMP3:%.*]] = getelementptr inbounds nuw [[CLASS_ANON_13]], ptr [[LAMBDA]], i32 0, i32 1
+// CHECK:    [[Y:%.*]] = getelementptr inbounds nuw [[STRUCT_POINT]], ptr [[TMP0]], i32 0, i32 1
+// CHECK:    store ptr [[Y]], ptr [[TMP3]], align 8
+// CHECK:    call void @"_ZZ37test_lambda_capture_multiple_bindingsvENK3$_0clEv"(ptr noundef nonnull align 8 dereferenceable(16) [[LAMBDA]])
+// CHECK:    ret void
+//
+//
+// CHECK-LABEL: define dso_local void @_Z28test_lambda_implicit_capturev(
+// CHECK-SAME: ) #[[ATTR0]] {
+// CHECK:  [[ENTRY:.*:]]
+// CHECK:    call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[P:%.*]], ptr align 4 @__const._Z28test_lambda_implicit_capturev.p, i64 8, i1 false)
+// CHECK:    call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP0:%.*]], ptr align 4 [[P]], i64 8, i1 false)
+// CHECK:    [[TMP1:%.*]] = getelementptr inbounds nuw [[CLASS_ANON_14:%.*]], ptr [[LAMBDA:%.*]], i32 0, i32 0
+// CHECK:    [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_POINT:%.*]], ptr [[TMP0]], i32 0, i32 0
+// CHECK:    [[TMP2:%.*]] = load i32, ptr [[X]], align 4
+// CHECK:    store i32 [[TMP2]], ptr [[TMP1]], align 4
+// CHECK:    [[TMP3:%.*]] = getelementptr inbounds nuw [[CLASS_ANON_14]], ptr [[LAMBDA]], i32 0, i32 1
+// CHECK:    [[Y:%.*]] = getelementptr inbounds nuw [[STRUCT_POINT]], ptr [[TMP0]], i32 0, i32 1
+// CHECK:    [[TMP4:%.*]] = load i32, ptr [[Y]], align 4
+// CHECK:    store i32 [[TMP4]], ptr [[TMP3]], align 4
+// CHECK:    [[CALL:%.*]] = call noundef i32 @"_ZZ28test_lambda_implicit_capturevENK3$_0clEv"(ptr noundef nonnull align 4 dereferenceable(8) [[LAMBDA]])
+// CHECK:    ret void
 //

>From 9ca046531ebffe7f31668b516272c5baf4e970f6 Mon Sep 17 00:00:00 2001
From: Ammarguellat <zahira.ammarguellat at intel.com>
Date: Mon, 8 Jun 2026 07:40:13 -0700
Subject: [PATCH 31/45] Addressed review comments

---
 clang/lib/CodeGen/CGExpr.cpp                  |  12 +-
 clang/lib/CodeGen/CGStmtOpenMP.cpp            |  16 ++
 clang/lib/CodeGen/CodeGenFunction.h           |  11 +-
 .../OpenMP/structured-bindings-codegen.cpp    | 199 ++++++++++++------
 ...ctured-bindings-template-instantiation.cpp | 171 ++++++++++++---
 5 files changed, 302 insertions(+), 107 deletions(-)

diff --git a/clang/lib/CodeGen/CGExpr.cpp b/clang/lib/CodeGen/CGExpr.cpp
index 4e995533b262a..18362560670d8 100644
--- a/clang/lib/CodeGen/CGExpr.cpp
+++ b/clang/lib/CodeGen/CGExpr.cpp
@@ -3606,6 +3606,9 @@ LValue CodeGenFunction::EmitOMPCapturedBindingLValue(const BindingDecl *BD) {
          "Expected OpenMP captured region");
   assert(CGM.getLangOpts().OpenMP && "Expected OpenMP to be enabled");
 
+  if (auto It = LocalDeclMap.find(BD); It != LocalDeclMap.end()) {
+    return MakeAddrLValue(It->second, BD->getType());
+  }
   auto *DD = cast<VarDecl>(BD->getDecomposedDecl());
   Expr *BindingExpr = BD->getBinding()->IgnoreImplicit();
   QualType DREType = DD->getType().getNonReferenceType();
@@ -3837,13 +3840,10 @@ LValue CodeGenFunction::EmitDeclRefLValue(const DeclRefExpr *E) {
       if (CapturedStmtInfo &&
           CapturedStmtInfo->getKind() == CapturedRegionKind::CR_OpenMP &&
           CGM.getLangOpts().OpenMP) {
-        auto NameIt = OMPPrivatizedBindingsByName.find(BD->getName());
-        if (NameIt != OMPPrivatizedBindingsByName.end()) {
-          assert(NameIt->second.has_value() &&
-                 "Expected valid binding address");
-          return MakeAddrLValue(*NameIt->second, E->getType(),
+        auto NameIt = OMPPrivatizedBindings.find(BD);
+        if (NameIt != OMPPrivatizedBindings.end())
+          return MakeAddrLValue(NameIt->second, E->getType(),
                                 AlignmentSource::Decl);
-        }
         return EmitOMPCapturedBindingLValue(BD);
       }
       // Non-OpenMP case: lambda capture.
diff --git a/clang/lib/CodeGen/CGStmtOpenMP.cpp b/clang/lib/CodeGen/CGStmtOpenMP.cpp
index 764b8df1b59bb..147bf4e687c89 100644
--- a/clang/lib/CodeGen/CGStmtOpenMP.cpp
+++ b/clang/lib/CodeGen/CGStmtOpenMP.cpp
@@ -1613,6 +1613,22 @@ void CodeGenFunction::EmitOMPReductionClauseInit(
     // Silence the warning about unused variable.
     (void)IsRegistered;
 
+    if (const auto *DRE = dyn_cast<DeclRefExpr>(IRef->IgnoreParenImpCasts())) {
+      const ValueDecl *VD = DRE->getDecl();
+      if (const auto *OCED = dyn_cast<OMPCapturedExprDecl>(VD)) {
+        if (const Expr *Init = OCED->getInit()) {
+          if (const auto *InnerDRE =
+                  dyn_cast<DeclRefExpr>(Init->IgnoreParenImpCasts())) {
+            if (const auto *BD = dyn_cast<BindingDecl>(InnerDRE->getDecl())) {
+              IsRegistered = PrivateScope.addPrivate(BD, BaseAddr);
+              assert(IsRegistered &&
+                     "private binding already registered as private");
+              (void)IsRegistered;
+            }
+          }
+        }
+      }
+    }
     const auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
     const auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
     QualType Type = PrivateVD->getType();
diff --git a/clang/lib/CodeGen/CodeGenFunction.h b/clang/lib/CodeGen/CodeGenFunction.h
index bfd3251ca4d7d..cc2bd396423dc 100644
--- a/clang/lib/CodeGen/CodeGenFunction.h
+++ b/clang/lib/CodeGen/CodeGenFunction.h
@@ -1160,8 +1160,7 @@ class CodeGenFunction : public CodeGenTypeCache {
 
       // For BindingDecls, also store by name for remapped lookup.
       if (const auto *BD = dyn_cast<BindingDecl>(LocalVD))
-        CGF.OMPPrivatizedBindingsByName.insert_or_assign(BD->getName(),
-                                                         TempAddr);
+        CGF.OMPPrivatizedBindings.insert_or_assign(BD, TempAddr);
 
       // Only save it once.
       if (SavedLocals.count(LocalVD))
@@ -1223,12 +1222,12 @@ class CodeGenFunction : public CodeGenTypeCache {
     OMPMapVars MappedVars;
     OMPPrivateScope(const OMPPrivateScope &) = delete;
     void operator=(const OMPPrivateScope &) = delete;
-    llvm::StringMap<std::optional<Address>> SavedBindingsByName;
+    llvm::DenseMap<const BindingDecl*, Address> SavedBindings;
 
   public:
     /// Enter a new OpenMP private scope.
     explicit OMPPrivateScope(CodeGenFunction &CGF) : RunCleanupsScope(CGF) {
-      SavedBindingsByName = CGF.OMPPrivatizedBindingsByName;
+      SavedBindings = CGF.OMPPrivatizedBindings;
     }
 
     /// Registers \p LocalVD variable as a private with \p Addr as the address
@@ -1260,7 +1259,7 @@ class CodeGenFunction : public CodeGenTypeCache {
     ~OMPPrivateScope() {
       if (PerformCleanup)
         ForceCleanup();
-      CGF.OMPPrivatizedBindingsByName = std::move(SavedBindingsByName);
+      CGF.OMPPrivatizedBindings = std::move(SavedBindings);
     }
 
     /// Checks if the global variable is captured in current function.
@@ -1563,7 +1562,7 @@ class CodeGenFunction : public CodeGenTypeCache {
   /// Name-based lookup map for privatized BindingDecls.
   /// Used when BindingDecls are remapped during OpenMP outlining, since the
   /// remapped BindingDecl has a different pointer than the original.
-  llvm::StringMap<std::optional<Address>> OMPPrivatizedBindingsByName;
+  llvm::DenseMap<const BindingDecl *, Address> OMPPrivatizedBindings;
 
   // Keep track of the cleanups for callee-destructed parameters pushed to the
   // cleanup stack so that they can be deactivated later.
diff --git a/clang/test/OpenMP/structured-bindings-codegen.cpp b/clang/test/OpenMP/structured-bindings-codegen.cpp
index c35b4d2be9cae..f3c45fd6d8d81 100644
--- a/clang/test/OpenMP/structured-bindings-codegen.cpp
+++ b/clang/test/OpenMP/structured-bindings-codegen.cpp
@@ -509,6 +509,19 @@ void test_reduction_binding_max() {
   use(b);
 }
 
+void test_binding_name_collision() {
+  Point p1{1, 2};
+  auto [a, b] = p1;
+
+#pragma omp parallel reduction(+:a)
+  {
+    Point p2{10, 20};
+    auto [a, b] = p2;
+    a += 1;
+    use(a);
+  }
+}
+
 struct NonTrivialCopy {
   int value;
   int copy_count;
@@ -2570,10 +2583,9 @@ void test_lambda_implicit_capture() {
 // CHECK:    [[ADD:%.*]] = add nsw i32 0, [[MUL]]
 // CHECK:    store i32 [[ADD]], ptr [[I:%.*]], align 4
 // CHECK:    [[TMP12:%.*]] = load i32, ptr [[I]], align 4
-// CHECK:    [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_POINT:%.*]], ptr [[TMP1]], i32 0, i32 0
-// CHECK:    [[TMP13:%.*]] = load i32, ptr [[X]], align 4
+// CHECK:    [[TMP13:%.*]] = load i32, ptr [[A2]], align 4
 // CHECK:    [[ADD5:%.*]] = add nsw i32 [[TMP13]], [[TMP12]]
-// CHECK:    store i32 [[ADD5]], ptr [[X]], align 4
+// CHECK:    store i32 [[ADD5]], ptr [[A2]], align 4
 // CHECK:    br label %[[OMP_BODY_CONTINUE:.*]]
 // CHECK:       [[OMP_BODY_CONTINUE]]:
 // CHECK:    br label %[[OMP_INNER_FOR_INC:.*]]
@@ -2697,19 +2709,16 @@ void test_lambda_implicit_capture() {
 // CHECK:    [[MUL:%.*]] = mul nsw i32 [[TMP13]], 1
 // CHECK:    [[ADD:%.*]] = add nsw i32 1, [[MUL]]
 // CHECK:    store i32 [[ADD]], ptr [[I:%.*]], align 4
-// CHECK:    [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_POINT:%.*]], ptr [[TMP1]], i32 0, i32 0
-// CHECK:    [[TMP14:%.*]] = load i32, ptr [[X]], align 4
+// CHECK:    [[TMP14:%.*]] = load i32, ptr [[A3]], align 4
 // CHECK:    [[MUL8:%.*]] = mul nsw i32 [[TMP14]], 2
-// CHECK:    store i32 [[MUL8]], ptr [[X]], align 4
+// CHECK:    store i32 [[MUL8]], ptr [[A3]], align 4
 // CHECK:    [[TMP15:%.*]] = load i32, ptr [[I]], align 4
-// CHECK:    [[Y:%.*]] = getelementptr inbounds nuw [[STRUCT_POINT]], ptr [[TMP1]], i32 0, i32 1
-// CHECK:    [[TMP16:%.*]] = load i32, ptr [[Y]], align 4
+// CHECK:    [[TMP16:%.*]] = load i32, ptr [[B5]], align 4
 // CHECK:    [[CMP9:%.*]] = icmp slt i32 [[TMP15]], [[TMP16]]
 // CHECK:    br i1 [[CMP9]], label %[[IF_THEN:.*]], label %[[IF_END:.*]]
 // CHECK:       [[IF_THEN]]:
 // CHECK:    [[TMP17:%.*]] = load i32, ptr [[I]], align 4
-// CHECK:    [[Y10:%.*]] = getelementptr inbounds nuw [[STRUCT_POINT]], ptr [[TMP1]], i32 0, i32 1
-// CHECK:    store i32 [[TMP17]], ptr [[Y10]], align 4
+// CHECK:    store i32 [[TMP17]], ptr [[B5]], align 4
 // CHECK:    br label %[[IF_END]]
 // CHECK:       [[IF_END]]:
 // CHECK:    br label %[[OMP_BODY_CONTINUE:.*]]
@@ -2717,8 +2726,8 @@ void test_lambda_implicit_capture() {
 // CHECK:    br label %[[OMP_INNER_FOR_INC:.*]]
 // CHECK:       [[OMP_INNER_FOR_INC]]:
 // CHECK:    [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK:    [[ADD11:%.*]] = add nsw i32 [[TMP18]], 1
-// CHECK:    store i32 [[ADD11]], ptr [[DOTOMP_IV]], align 4
+// CHECK:    [[ADD10:%.*]] = add nsw i32 [[TMP18]], 1
+// CHECK:    store i32 [[ADD10]], ptr [[DOTOMP_IV]], align 4
 // CHECK:    br label %[[OMP_INNER_FOR_COND]]
 // CHECK:       [[OMP_INNER_FOR_END]]:
 // CHECK:    br label %[[OMP_LOOP_EXIT:.*]]
@@ -2736,21 +2745,21 @@ void test_lambda_implicit_capture() {
 // CHECK:       [[_OMP_REDUCTION_CASE1:.*:]]
 // CHECK:    [[TMP22:%.*]] = load i32, ptr [[TMP4]], align 4
 // CHECK:    [[TMP23:%.*]] = load i32, ptr [[A3]], align 4
-// CHECK:    [[MUL12:%.*]] = mul nsw i32 [[TMP22]], [[TMP23]]
-// CHECK:    store i32 [[MUL12]], ptr [[TMP4]], align 4
+// CHECK:    [[MUL11:%.*]] = mul nsw i32 [[TMP22]], [[TMP23]]
+// CHECK:    store i32 [[MUL11]], ptr [[TMP4]], align 4
 // CHECK:    [[TMP24:%.*]] = load i32, ptr [[TMP5]], align 4
 // CHECK:    [[TMP25:%.*]] = load i32, ptr [[B5]], align 4
-// CHECK:    [[CMP13:%.*]] = icmp slt i32 [[TMP24]], [[TMP25]]
-// CHECK:    br i1 [[CMP13]], label %[[COND_TRUE14:.*]], label %[[COND_FALSE15:.*]]
-// CHECK:       [[COND_TRUE14]]:
+// CHECK:    [[CMP12:%.*]] = icmp slt i32 [[TMP24]], [[TMP25]]
+// CHECK:    br i1 [[CMP12]], label %[[COND_TRUE13:.*]], label %[[COND_FALSE14:.*]]
+// CHECK:       [[COND_TRUE13]]:
 // CHECK:    [[TMP26:%.*]] = load i32, ptr [[TMP5]], align 4
-// CHECK:    br label %[[COND_END16:.*]]
-// CHECK:       [[COND_FALSE15]]:
+// CHECK:    br label %[[COND_END15:.*]]
+// CHECK:       [[COND_FALSE14]]:
 // CHECK:    [[TMP27:%.*]] = load i32, ptr [[B5]], align 4
-// CHECK:    br label %[[COND_END16]]
-// CHECK:       [[COND_END16]]:
-// CHECK:    [[COND17:%.*]] = phi i32 [ [[TMP26]], %[[COND_TRUE14]] ], [ [[TMP27]], %[[COND_FALSE15]] ]
-// CHECK:    store i32 [[COND17]], ptr [[TMP5]], align 4
+// CHECK:    br label %[[COND_END15]]
+// CHECK:       [[COND_END15]]:
+// CHECK:    [[COND16:%.*]] = phi i32 [ [[TMP26]], %[[COND_TRUE13]] ], [ [[TMP27]], %[[COND_FALSE14]] ]
+// CHECK:    store i32 [[COND16]], ptr [[TMP5]], align 4
 // CHECK:    call void @__kmpc_end_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP7]], ptr @.gomp_critical_user_.reduction.var)
 // CHECK:    br [[DOTOMP_REDUCTION_DEFAULT]]
 // CHECK:       [[_OMP_REDUCTION_CASE2:.*:]]
@@ -2759,11 +2768,11 @@ void test_lambda_implicit_capture() {
 // CHECK:    br label %[[ATOMIC_CONT:.*]]
 // CHECK:       [[ATOMIC_CONT]]:
 // CHECK:    [[TMP29:%.*]] = phi i32 [ [[ATOMIC_LOAD]], %[[DOTOMP_REDUCTION_CASE2]] ], [ [[TMP34:%.*]], %[[ATOMIC_CONT]] ]
-// CHECK:    store i32 [[TMP29]], ptr [[_TMP18:%.*]], align 4
-// CHECK:    [[TMP30:%.*]] = load i32, ptr [[_TMP18]], align 4
+// CHECK:    store i32 [[TMP29]], ptr [[_TMP17:%.*]], align 4
+// CHECK:    [[TMP30:%.*]] = load i32, ptr [[_TMP17]], align 4
 // CHECK:    [[TMP31:%.*]] = load i32, ptr [[A3]], align 4
-// CHECK:    [[MUL19:%.*]] = mul nsw i32 [[TMP30]], [[TMP31]]
-// CHECK:    store i32 [[MUL19]], ptr [[ATOMIC_TEMP:%.*]], align 4
+// CHECK:    [[MUL18:%.*]] = mul nsw i32 [[TMP30]], [[TMP31]]
+// CHECK:    store i32 [[MUL18]], ptr [[ATOMIC_TEMP:%.*]], align 4
 // CHECK:    [[TMP32:%.*]] = load i32, ptr [[ATOMIC_TEMP]], align 4
 // CHECK:    [[TMP33:%.*]] = cmpxchg ptr [[TMP4]], i32 [[TMP29]], i32 [[TMP32]] monotonic monotonic, align 4
 // CHECK:    [[TMP34]] = extractvalue { i32, i1 } [[TMP33]], 0
@@ -3116,34 +3125,30 @@ void test_lambda_implicit_capture() {
 // CHECK:    [[ADD:%.*]] = add nsw i32 0, [[MUL]]
 // CHECK:    store i32 [[ADD]], ptr [[I:%.*]], align 4
 // CHECK:    [[TMP14:%.*]] = load i32, ptr [[I]], align 4
-// CHECK:    [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_POINT:%.*]], ptr [[TMP1]], i32 0, i32 0
-// CHECK:    [[TMP15:%.*]] = load i32, ptr [[X]], align 4
+// CHECK:    [[TMP15:%.*]] = load i32, ptr [[A3]], align 4
 // CHECK:    [[CMP8:%.*]] = icmp sgt i32 [[TMP14]], [[TMP15]]
 // CHECK:    br i1 [[CMP8]], label %[[IF_THEN:.*]], label %[[IF_END:.*]]
 // CHECK:       [[IF_THEN]]:
 // CHECK:    [[TMP16:%.*]] = load i32, ptr [[I]], align 4
-// CHECK:    [[X9:%.*]] = getelementptr inbounds nuw [[STRUCT_POINT]], ptr [[TMP1]], i32 0, i32 0
-// CHECK:    store i32 [[TMP16]], ptr [[X9]], align 4
+// CHECK:    store i32 [[TMP16]], ptr [[A3]], align 4
 // CHECK:    br label %[[IF_END]]
 // CHECK:       [[IF_END]]:
 // CHECK:    [[TMP17:%.*]] = load i32, ptr [[I]], align 4
-// CHECK:    [[Y:%.*]] = getelementptr inbounds nuw [[STRUCT_POINT]], ptr [[TMP1]], i32 0, i32 1
-// CHECK:    [[TMP18:%.*]] = load i32, ptr [[Y]], align 4
-// CHECK:    [[CMP10:%.*]] = icmp sgt i32 [[TMP17]], [[TMP18]]
-// CHECK:    br i1 [[CMP10]], label %[[IF_THEN11:.*]], label %[[IF_END13:.*]]
-// CHECK:       [[IF_THEN11]]:
+// CHECK:    [[TMP18:%.*]] = load i32, ptr [[B5]], align 4
+// CHECK:    [[CMP9:%.*]] = icmp sgt i32 [[TMP17]], [[TMP18]]
+// CHECK:    br i1 [[CMP9]], label %[[IF_THEN10:.*]], label %[[IF_END11:.*]]
+// CHECK:       [[IF_THEN10]]:
 // CHECK:    [[TMP19:%.*]] = load i32, ptr [[I]], align 4
-// CHECK:    [[Y12:%.*]] = getelementptr inbounds nuw [[STRUCT_POINT]], ptr [[TMP1]], i32 0, i32 1
-// CHECK:    store i32 [[TMP19]], ptr [[Y12]], align 4
-// CHECK:    br label %[[IF_END13]]
-// CHECK:       [[IF_END13]]:
+// CHECK:    store i32 [[TMP19]], ptr [[B5]], align 4
+// CHECK:    br label %[[IF_END11]]
+// CHECK:       [[IF_END11]]:
 // CHECK:    br label %[[OMP_BODY_CONTINUE:.*]]
 // CHECK:       [[OMP_BODY_CONTINUE]]:
 // CHECK:    br label %[[OMP_INNER_FOR_INC:.*]]
 // CHECK:       [[OMP_INNER_FOR_INC]]:
 // CHECK:    [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK:    [[ADD14:%.*]] = add nsw i32 [[TMP20]], 1
-// CHECK:    store i32 [[ADD14]], ptr [[DOTOMP_IV]], align 4
+// CHECK:    [[ADD12:%.*]] = add nsw i32 [[TMP20]], 1
+// CHECK:    store i32 [[ADD12]], ptr [[DOTOMP_IV]], align 4
 // CHECK:    br label %[[OMP_INNER_FOR_COND]]
 // CHECK:       [[OMP_INNER_FOR_END]]:
 // CHECK:    br label %[[OMP_LOOP_EXIT:.*]]
@@ -3161,30 +3166,30 @@ void test_lambda_implicit_capture() {
 // CHECK:       [[_OMP_REDUCTION_CASE1:.*:]]
 // CHECK:    [[TMP24:%.*]] = load i32, ptr [[TMP4]], align 4
 // CHECK:    [[TMP25:%.*]] = load i32, ptr [[A3]], align 4
-// CHECK:    [[CMP15:%.*]] = icmp sgt i32 [[TMP24]], [[TMP25]]
-// CHECK:    br i1 [[CMP15]], label %[[COND_TRUE16:.*]], label %[[COND_FALSE17:.*]]
-// CHECK:       [[COND_TRUE16]]:
+// CHECK:    [[CMP13:%.*]] = icmp sgt i32 [[TMP24]], [[TMP25]]
+// CHECK:    br i1 [[CMP13]], label %[[COND_TRUE14:.*]], label %[[COND_FALSE15:.*]]
+// CHECK:       [[COND_TRUE14]]:
 // CHECK:    [[TMP26:%.*]] = load i32, ptr [[TMP4]], align 4
-// CHECK:    br label %[[COND_END18:.*]]
-// CHECK:       [[COND_FALSE17]]:
+// CHECK:    br label %[[COND_END16:.*]]
+// CHECK:       [[COND_FALSE15]]:
 // CHECK:    [[TMP27:%.*]] = load i32, ptr [[A3]], align 4
-// CHECK:    br label %[[COND_END18]]
-// CHECK:       [[COND_END18]]:
-// CHECK:    [[COND19:%.*]] = phi i32 [ [[TMP26]], %[[COND_TRUE16]] ], [ [[TMP27]], %[[COND_FALSE17]] ]
-// CHECK:    store i32 [[COND19]], ptr [[TMP4]], align 4
+// CHECK:    br label %[[COND_END16]]
+// CHECK:       [[COND_END16]]:
+// CHECK:    [[COND17:%.*]] = phi i32 [ [[TMP26]], %[[COND_TRUE14]] ], [ [[TMP27]], %[[COND_FALSE15]] ]
+// CHECK:    store i32 [[COND17]], ptr [[TMP4]], align 4
 // CHECK:    [[TMP28:%.*]] = load i32, ptr [[TMP5]], align 4
 // CHECK:    [[TMP29:%.*]] = load i32, ptr [[B5]], align 4
-// CHECK:    [[CMP20:%.*]] = icmp sgt i32 [[TMP28]], [[TMP29]]
-// CHECK:    br i1 [[CMP20]], label %[[COND_TRUE21:.*]], label %[[COND_FALSE22:.*]]
-// CHECK:       [[COND_TRUE21]]:
+// CHECK:    [[CMP18:%.*]] = icmp sgt i32 [[TMP28]], [[TMP29]]
+// CHECK:    br i1 [[CMP18]], label %[[COND_TRUE19:.*]], label %[[COND_FALSE20:.*]]
+// CHECK:       [[COND_TRUE19]]:
 // CHECK:    [[TMP30:%.*]] = load i32, ptr [[TMP5]], align 4
-// CHECK:    br label %[[COND_END23:.*]]
-// CHECK:       [[COND_FALSE22]]:
+// CHECK:    br label %[[COND_END21:.*]]
+// CHECK:       [[COND_FALSE20]]:
 // CHECK:    [[TMP31:%.*]] = load i32, ptr [[B5]], align 4
-// CHECK:    br label %[[COND_END23]]
-// CHECK:       [[COND_END23]]:
-// CHECK:    [[COND24:%.*]] = phi i32 [ [[TMP30]], %[[COND_TRUE21]] ], [ [[TMP31]], %[[COND_FALSE22]] ]
-// CHECK:    store i32 [[COND24]], ptr [[TMP5]], align 4
+// CHECK:    br label %[[COND_END21]]
+// CHECK:       [[COND_END21]]:
+// CHECK:    [[COND22:%.*]] = phi i32 [ [[TMP30]], %[[COND_TRUE19]] ], [ [[TMP31]], %[[COND_FALSE20]] ]
+// CHECK:    store i32 [[COND22]], ptr [[TMP5]], align 4
 // CHECK:    call void @__kmpc_end_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP7]], ptr @.gomp_critical_user_.reduction.var)
 // CHECK:    br [[DOTOMP_REDUCTION_DEFAULT]]
 // CHECK:       [[_OMP_REDUCTION_CASE2:.*:]]
@@ -3241,6 +3246,80 @@ void test_lambda_implicit_capture() {
 // CHECK:    ret void
 //
 //
+// CHECK-LABEL: define dso_local void @_Z27test_binding_name_collisionv(
+// CHECK-SAME: ) #[[ATTR0]] {
+// CHECK:  [[ENTRY:.*:]]
+// CHECK:    call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[P1:%.*]], ptr align 4 @__const._Z27test_binding_name_collisionv.p1, i64 8, i1 false)
+// CHECK:    call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP0:%.*]], ptr align 4 [[P1]], i64 8, i1 false)
+// CHECK:    [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_POINT:%.*]], ptr [[TMP0]], i32 0, i32 0
+// CHECK:    store ptr [[X]], ptr [[A:%.*]], align 8
+// CHECK:    [[TMP1:%.*]] = load ptr, ptr [[A]], align 8, !nonnull [[META2]], !align [[META3]]
+// CHECK:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @_Z27test_binding_name_collisionv.omp_outlined, ptr [[TMP1]])
+// CHECK:    ret void
+//
+//
+// CHECK-LABEL: define internal void @_Z27test_binding_name_collisionv.omp_outlined(
+// CHECK-SAME: ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] {
+// CHECK:  [[ENTRY:.*:]]
+// CHECK:    store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR:%.*]], align 8
+// CHECK:    store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR:%.*]], align 8
+// CHECK:    store ptr [[A]], ptr [[A_ADDR:%.*]], align 8
+// CHECK:    [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8, !nonnull [[META2]], !align [[META3]]
+// CHECK:    store ptr [[TMP1]], ptr [[TMP:%.*]], align 8
+// CHECK:    [[TMP2:%.*]] = load ptr, ptr [[TMP]], align 8, !nonnull [[META2]], !align [[META3]]
+// CHECK:    store i32 0, ptr [[A1:%.*]], align 4
+// CHECK:    store ptr [[A1]], ptr [[_TMP2:%.*]], align 8
+// CHECK:    call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[P2:%.*]], ptr align 4 @"__const.<captured>.p2", i64 8, i1 false)
+// CHECK:    call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP0:%.*]], ptr align 4 [[P2]], i64 8, i1 false)
+// CHECK:    [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_POINT:%.*]], ptr [[TMP0]], i32 0, i32 0
+// CHECK:    [[TMP3:%.*]] = load i32, ptr [[X]], align 4
+// CHECK:    [[ADD:%.*]] = add nsw i32 [[TMP3]], 1
+// CHECK:    store i32 [[ADD]], ptr [[X]], align 4
+// CHECK:    [[X3:%.*]] = getelementptr inbounds nuw [[STRUCT_POINT]], ptr [[TMP0]], i32 0, i32 0
+// CHECK:    [[TMP4:%.*]] = load i32, ptr [[X3]], align 4
+// CHECK:    call void @_Z3usei(i32 noundef [[TMP4]])
+// CHECK:    [[TMP5:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST:%.*]], i64 0, i64 0
+// CHECK:    store ptr [[A1]], ptr [[TMP5]], align 8
+// CHECK:    [[TMP6:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK:    [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4
+// CHECK:    [[TMP8:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP7]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @_Z27test_binding_name_collisionv.omp_outlined.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var)
+// CHECK:    switch i32 [[TMP8]], [[DOTOMP_REDUCTION_DEFAULT:label %.*]] [
+// CHECK:      i32 1, [[DOTOMP_REDUCTION_CASE1:label %.*]]
+// CHECK:      i32 2, [[DOTOMP_REDUCTION_CASE2:label %.*]]
+// CHECK:    ]
+// CHECK:       [[_OMP_REDUCTION_CASE1:.*:]]
+// CHECK:    [[TMP9:%.*]] = load i32, ptr [[TMP2]], align 4
+// CHECK:    [[TMP10:%.*]] = load i32, ptr [[A1]], align 4
+// CHECK:    [[ADD4:%.*]] = add nsw i32 [[TMP9]], [[TMP10]]
+// CHECK:    store i32 [[ADD4]], ptr [[TMP2]], align 4
+// CHECK:    call void @__kmpc_end_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP7]], ptr @.gomp_critical_user_.reduction.var)
+// CHECK:    br [[DOTOMP_REDUCTION_DEFAULT]]
+// CHECK:       [[_OMP_REDUCTION_CASE2:.*:]]
+// CHECK:    [[TMP11:%.*]] = load i32, ptr [[A1]], align 4
+// CHECK:    [[TMP12:%.*]] = atomicrmw add ptr [[TMP2]], i32 [[TMP11]] monotonic, align 4
+// CHECK:    br [[DOTOMP_REDUCTION_DEFAULT]]
+// CHECK:       [[_OMP_REDUCTION_DEFAULT:.*:]]
+// CHECK:    ret void
+//
+//
+// CHECK-LABEL: define internal void @_Z27test_binding_name_collisionv.omp_outlined.omp.reduction.reduction_func(
+// CHECK-SAME: ptr noundef [[TMP0:%.*]], ptr noundef [[TMP1:%.*]]) #[[ATTR4]] {
+// CHECK:  [[ENTRY:.*:]]
+// CHECK:    store ptr [[TMP0]], ptr [[DOTADDR:%.*]], align 8
+// CHECK:    store ptr [[TMP1]], ptr [[DOTADDR1:%.*]], align 8
+// CHECK:    [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8
+// CHECK:    [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8
+// CHECK:    [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP3]], i64 0, i64 0
+// CHECK:    [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8
+// CHECK:    [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i64 0, i64 0
+// CHECK:    [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8
+// CHECK:    [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4
+// CHECK:    [[TMP9:%.*]] = load i32, ptr [[TMP5]], align 4
+// CHECK:    [[ADD:%.*]] = add nsw i32 [[TMP8]], [[TMP9]]
+// CHECK:    store i32 [[ADD]], ptr [[TMP7]], align 4
+// CHECK:    ret void
+//
+//
 // CHECK-LABEL: define dso_local void @_Z31test_firstprivate_nontrivial_sbv(
 // CHECK-SAME: ) #[[ATTR0]] {
 // CHECK:  [[ENTRY:.*:]]
diff --git a/clang/test/OpenMP/structured-bindings-template-instantiation.cpp b/clang/test/OpenMP/structured-bindings-template-instantiation.cpp
index 6f0185b6eb583..3c8feef3efaa3 100644
--- a/clang/test/OpenMP/structured-bindings-template-instantiation.cpp
+++ b/clang/test/OpenMP/structured-bindings-template-instantiation.cpp
@@ -2,7 +2,7 @@
 // RUN: -triple x86_64-unknown-unknown -emit-llvm %s -o - | FileCheck %s
 
 // RUN: %clang_cc1 -verify -std=c++20 -fopenmp -triple x86_64-pc-linux-gnu \
-// RUN: -ast-dump %s | FileCheck %s --check-prefix=AST
+// RUN: -ast-print %s | FileCheck %s --check-prefix=AST
 
 // expected-no-diagnostics
 
@@ -23,11 +23,30 @@ struct Point3D {
 // CHECK-LABEL: define {{.*}} @_Z28test_template_single_bindingI5PointEvT_(
 // CHECK: call void {{.*}}@__kmpc_fork_call(ptr @{{[0-9]+}}, i32 1, ptr @{{.*}}.omp_outlined, ptr
 //
-// AST: FunctionDecl {{.*}} test_template_single_binding 'void (Point)' implicit_instantiation
-// AST: DecompositionDecl {{.*}} used 'Point'
-// AST: OMPParallelDirective
-// AST-NEXT: CapturedStmt
-// AST: DeclRefExpr {{.*}} 'Point' lvalue Decomposition {{.*}} first_binding 'a' 'Point'
+// AST: void use(int);
+// AST: struct Point {
+// AST:     int x;
+// AST:     int y;
+// AST: };
+// AST: struct Point3D {
+// AST:     int x;
+// AST:     int y;
+// AST:     int z;
+// AST: };
+// AST: template <typename T> void test_template_single_binding(T p) {
+// AST:     auto = p;
+// AST:     #pragma omp parallel
+// AST:         {
+// AST:             use(a);
+// AST:         }
+// AST: }
+// AST: template<> void test_template_single_binding<Point>(Point p) {
+// AST:     auto = p;
+// AST:     #pragma omp parallel
+// AST:         {
+// AST:             use(a);
+// AST:         }
+// AST: }
 template<typename T>
 void test_template_single_binding(T p) {
   auto [a, b] = p;
@@ -40,11 +59,22 @@ void test_template_single_binding(T p) {
 // CHECK-LABEL: define {{.*}}@_Z26test_template_two_bindingsI5PointEvT_
 // CHECK: call void {{.*}}@__kmpc_fork_call(ptr {{.*}}, i32 2, ptr {{.*}}, ptr
 //
-// AST: FunctionDecl {{.*}} test_template_two_bindings 'void (Point)' implicit_instantiation
-// AST: DecompositionDecl {{.*}} used 'Point'
-// AST: OMPParallelDirective
-// AST: CapturedStmt
-// AST: DeclRefExpr {{.*}} 'Point' lvalue Decomposition {{.*}} first_binding 'a' 'Point'
+// AST: template <typename T> void test_template_two_bindings(T p) {
+// AST:     auto = p;
+// AST:     int result = 0;
+// AST:     #pragma omp parallel reduction(+: result)
+// AST:         {
+// AST:             result = a + b;
+// AST:         }
+// AST: }
+// AST: template<> void test_template_two_bindings<Point>(Point p) {
+// AST:     auto = p;
+// AST:     int result = 0;
+// AST:     #pragma omp parallel reduction(+: result)
+// AST:         {
+// AST:             result = a + b;
+// AST:         }
+// AST: }
 template<typename T>
 void test_template_two_bindings(T p) {
   auto [a, b] = p;
@@ -58,11 +88,24 @@ void test_template_two_bindings(T p) {
 // CHECK-LABEL: define {{.*}}@_Z28test_template_three_bindingsI7Point3DEiT_
 // CHECK: call void {{.*}}@__kmpc_fork_call(ptr {{.*}}, i32 2, ptr {{.*}}, ptr
 //
-// AST: FunctionDecl {{.*}} test_template_three_bindings 'int (Point3D)' implicit_instantiation
-// AST: DecompositionDecl {{.*}} used 'Point3D'
-// AST: OMPParallelDirective
-// AST: CapturedStmt
-// AST: DeclRefExpr {{.*}} 'Point3D' lvalue Decomposition
+// AST: template <typename T> int test_template_three_bindings(T p) {
+// AST:     auto = p;
+// AST:     int result = 0;
+// AST:     #pragma omp parallel reduction(+: result)
+// AST:         {
+// AST:             result = x + y + z;
+// AST:         }
+// AST:     return result;
+// AST: }
+// AST: template<> int test_template_three_bindings<Point3D>(Point3D p) {
+// AST:     auto = p;
+// AST:     int result = 0;
+// AST:     #pragma omp parallel reduction(+: result)
+// AST:         {
+// AST:             result = x + y + z;
+// AST:         }
+// AST:     return result;
+// AST: }
 template<typename T>
 int test_template_three_bindings(T p) {
   auto [x, y, z] = p;
@@ -78,10 +121,24 @@ int test_template_three_bindings(T p) {
 // CHECK-LABEL: define {{.*}}@_Z28test_template_reuse_bindingsI5PointEiT_
 // CHECK: call void {{.*}}@__kmpc_fork_call(ptr {{.*}}, i32 2, ptr {{.*}}, ptr
 //
-// AST: FunctionDecl {{.*}} test_template_reuse_bindings 'int (Point)' implicit_instantiation
-// AST: DecompositionDecl {{.*}} used 'Point'
-// AST: CapturedStmt
-// AST: DeclRefExpr {{.*}} 'Point' lvalue Decomposition
+// AST: template <typename T> int test_template_reuse_bindings(T p) {
+// AST:     auto = p;
+// AST:     int result = 0;
+// AST:     #pragma omp parallel reduction(+: result)
+// AST:         {
+// AST:             result = a + b + a * 2 + b * 3;
+// AST:         }
+// AST:     return result;
+// AST: }
+// AST: template<> int test_template_reuse_bindings<Point>(Point p) {
+// AST:     auto = p;
+// AST:     int result = 0;
+// AST:     #pragma omp parallel reduction(+: result)
+// AST:         {
+// AST:             result = a + b + a * 2 + b * 3;
+// AST:         }
+// AST:     return result;
+// AST: }
 template<typename T>
 int test_template_reuse_bindings(T p) {
   auto [a, b] = p;
@@ -96,11 +153,30 @@ int test_template_reuse_bindings(T p) {
 // CHECK-LABEL: define {{.*}}@_Z20test_template_nestedI5PointEiT_
 // CHECK: call void {{.*}}@__kmpc_fork_call(ptr {{.*}}, i32 2, ptr {{.*}}, ptr
 //
-// AST: FunctionDecl {{.*}} test_template_nested 'int (Point)' implicit_instantiation
-// AST: DecompositionDecl {{.*}} used 'Point'
-// AST: OMPParallelDirective
-// AST: CapturedStmt
-// AST: DeclRefExpr {{.*}} 'Point' lvalue Decomposition
+// AST: template <typename T> int test_template_nested(T p) {
+// AST:     auto = p;
+// AST:     int result = 0;
+// AST:     #pragma omp parallel
+// AST:         {
+// AST:             #pragma omp critical
+// AST:                 {
+// AST:                     result += a + b;
+// AST:                 }
+// AST:         }
+// AST:     return result;
+// AST: }
+// AST: template<> int test_template_nested<Point>(Point p) {
+// AST:     auto = p;
+// AST:     int result = 0;
+// AST:     #pragma omp parallel
+// AST:         {
+// AST:             #pragma omp critical
+// AST:                 {
+// AST:                     result += a + b;
+// AST:                 }
+// AST:         }
+// AST:     return result;
+// AST: }
 template<typename T>
 int test_template_nested(T p) {
   auto [a, b] = p;
@@ -118,11 +194,22 @@ int test_template_nested(T p) {
 // CHECK-LABEL: define {{.*}}@_Z30test_template_multiple_regionsI5PointEvT_
 // CHECK: call void {{.*}}@__kmpc_fork_call(ptr {{.*}}, i32 2, ptr {{.*}}, ptr
 //
-// AST: FunctionDecl {{.*}} test_template_multiple_regions 'void (Point)' implicit_instantiation
-// AST: DecompositionDecl {{.*}} used 'Point'
-// AST: OMPParallelDirective
-// AST: CapturedStmt
-// AST: DeclRefExpr {{.*}} 'Point' lvalue Decomposition
+// AST: template <typename T> void test_template_multiple_regions(T p) {
+// AST:     auto = p;
+// AST:     int result1 = 0, result2 = 0;
+// AST:     #pragma omp parallel reduction(+: result1)
+// AST:         {
+// AST:             result1 = a;
+// AST:         }
+// AST: }
+// AST: template<> void test_template_multiple_regions<Point>(Point p) {
+// AST:     auto = p;
+// AST:     int result1 = 0, result2 = 0;
+// AST:     #pragma omp parallel reduction(+: result1)
+// AST:         {
+// AST:             result1 = a;
+// AST:         }
+// AST: }
 template<typename T>
 void test_template_multiple_regions(T p) {
   auto [a, b] = p;
@@ -137,11 +224,25 @@ typedef unsigned int size_t;
 // CHECK-LABEL: define {{.*}}@_Z19test_template_arrayIiLj2EEiRAT0__T_
 // CHECK: call void {{.*}}@__kmpc_fork_call(ptr {{.*}}, i32 2, ptr {{.*}}, ptr
 //
-// AST: FunctionDecl {{.*}} test_template_array 'int (int (&)[2])' implicit_instantiation
-// AST: DecompositionDecl {{.*}} used 'int[2]'
-// AST: OMPParallelDirective
-// AST: CapturedStmt
-// AST: DeclRefExpr {{.*}} 'int[2]' lvalue Decomposition
+// AST: typedef unsigned int size_t;
+// AST: template <typename T, size_t N> int test_template_array(T (&arr)[N]) {
+// AST:     auto = arr;
+// AST:     int result = 0;
+// AST:     #pragma omp parallel reduction(+: result)
+// AST:         {
+// AST:             result = a + b;
+// AST:         }
+// AST:     return result;
+// AST: }
+// AST: template<> int test_template_array<int, 2U>(int (&arr)[2]) {
+// AST:     auto = {arr[*]};
+// AST:     int result = 0;
+// AST:     #pragma omp parallel reduction(+: result)
+// AST:         {
+// AST:             result = a + b;
+// AST:         }
+// AST:     return result;
+// AST: }
 template<typename T, size_t N>
 int test_template_array(T (&arr)[N]) {
   auto [a, b] = arr;

>From 703db8f7e41c97afe572c05257cd0a35afd47ec9 Mon Sep 17 00:00:00 2001
From: Ammarguellat <zahira.ammarguellat at intel.com>
Date: Mon, 8 Jun 2026 07:46:39 -0700
Subject: [PATCH 32/45] Fix format

---
 clang/lib/CodeGen/CodeGenFunction.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/clang/lib/CodeGen/CodeGenFunction.h b/clang/lib/CodeGen/CodeGenFunction.h
index cc2bd396423dc..2e61f95661ef0 100644
--- a/clang/lib/CodeGen/CodeGenFunction.h
+++ b/clang/lib/CodeGen/CodeGenFunction.h
@@ -1222,7 +1222,7 @@ class CodeGenFunction : public CodeGenTypeCache {
     OMPMapVars MappedVars;
     OMPPrivateScope(const OMPPrivateScope &) = delete;
     void operator=(const OMPPrivateScope &) = delete;
-    llvm::DenseMap<const BindingDecl*, Address> SavedBindings;
+    llvm::DenseMap<const BindingDecl *, Address> SavedBindings;
 
   public:
     /// Enter a new OpenMP private scope.

>From 7326ee4c81f6d4c83c97ff28dee3fc463528fa26 Mon Sep 17 00:00:00 2001
From: Ammarguellat <zahira.ammarguellat at intel.com>
Date: Tue, 9 Jun 2026 11:03:02 -0700
Subject: [PATCH 33/45] Addressed review comments

---
 clang/lib/CodeGen/CGExpr.cpp                  |   8 +-
 clang/lib/CodeGen/CGOpenMPRuntime.cpp         |  11 +-
 clang/lib/CodeGen/CGStmtOpenMP.cpp            | 102 +++++++-----
 clang/lib/CodeGen/CodeGenFunction.h           |   5 +-
 clang/lib/Sema/SemaOpenMP.cpp                 |  20 ++-
 clang/lib/Sema/SemaStmt.cpp                   |   3 +-
 .../OpenMP/structured-bindings-codegen.cpp    | 150 +++++++++---------
 7 files changed, 163 insertions(+), 136 deletions(-)

diff --git a/clang/lib/CodeGen/CGExpr.cpp b/clang/lib/CodeGen/CGExpr.cpp
index 18362560670d8..939154b487442 100644
--- a/clang/lib/CodeGen/CGExpr.cpp
+++ b/clang/lib/CodeGen/CGExpr.cpp
@@ -3606,9 +3606,10 @@ LValue CodeGenFunction::EmitOMPCapturedBindingLValue(const BindingDecl *BD) {
          "Expected OpenMP captured region");
   assert(CGM.getLangOpts().OpenMP && "Expected OpenMP to be enabled");
 
-  if (auto It = LocalDeclMap.find(BD); It != LocalDeclMap.end()) {
+ if (auto It = LocalDeclMap.find(BD->getCanonicalDecl());
+      It != LocalDeclMap.end())
     return MakeAddrLValue(It->second, BD->getType());
-  }
+
   auto *DD = cast<VarDecl>(BD->getDecomposedDecl());
   Expr *BindingExpr = BD->getBinding()->IgnoreImplicit();
   QualType DREType = DD->getType().getNonReferenceType();
@@ -3840,7 +3841,8 @@ LValue CodeGenFunction::EmitDeclRefLValue(const DeclRefExpr *E) {
       if (CapturedStmtInfo &&
           CapturedStmtInfo->getKind() == CapturedRegionKind::CR_OpenMP &&
           CGM.getLangOpts().OpenMP) {
-        auto NameIt = OMPPrivatizedBindings.find(BD);
+        auto NameIt = OMPPrivatizedBindings.find(
+            cast<BindingDecl>(BD->getCanonicalDecl()));
         if (NameIt != OMPPrivatizedBindings.end())
           return MakeAddrLValue(NameIt->second, E->getType(),
                                 AlignmentSource::Decl);
diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
index f3158f48e7944..2f7a90a6f18b9 100644
--- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp
+++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
@@ -5661,8 +5661,15 @@ static std::string generateUniqueName(CodeGenModule &CGM, StringRef Prefix,
   llvm::raw_svector_ostream Out(Buffer);
   const clang::DeclRefExpr *DE;
   const VarDecl *D = ::getBaseDecl(Ref, DE);
-  if (!D)
-    D = cast<VarDecl>(cast<DeclRefExpr>(Ref)->getDecl());
+  if (!D) {
+    auto *DRE = cast<DeclRefExpr>(Ref);
+    if (const auto *BD = dyn_cast<BindingDecl>(DRE->getDecl())) {
+      // For BindingDecls, use the decomposed declaration as the base.
+      D = cast<VarDecl>(BD->getDecomposedDecl());
+    } else {
+      D = cast<VarDecl>(DRE->getDecl());
+    }
+  }
   D = D->getCanonicalDecl();
   std::string Name = CGM.getOpenMPRuntime().getName(
       {D->isLocalVarDeclOrParm() ? D->getName() : CGM.getMangledName(D)});
diff --git a/clang/lib/CodeGen/CGStmtOpenMP.cpp b/clang/lib/CodeGen/CGStmtOpenMP.cpp
index 147bf4e687c89..17d6b3cd5fdd3 100644
--- a/clang/lib/CodeGen/CGStmtOpenMP.cpp
+++ b/clang/lib/CodeGen/CGStmtOpenMP.cpp
@@ -1136,12 +1136,12 @@ bool CodeGenFunction::EmitOMPFirstprivateClause(const OMPExecutableDirective &D,
   bool DeviceConstTarget = getLangOpts().OpenMPIsTargetDevice &&
                            isOpenMPTargetExecutionDirective(EKind);
   bool FirstprivateIsLastprivate = false;
-  llvm::DenseMap<const VarDecl *, OpenMPLastprivateModifier> Lastprivates;
+  llvm::DenseMap<const Decl *, OpenMPLastprivateModifier> Lastprivates;
   for (const auto *C : D.getClausesOfKind<OMPLastprivateClause>()) {
-    for (const auto *D : C->varlist())
-      Lastprivates.try_emplace(
-          cast<VarDecl>(cast<DeclRefExpr>(D)->getDecl())->getCanonicalDecl(),
-          C->getKind());
+    for (const auto *D : C->varlist()) {
+      const auto *VD = cast<DeclRefExpr>(D)->getDecl();
+      Lastprivates.try_emplace(VD->getCanonicalDecl(), C->getKind());
+    }
   }
   llvm::DenseSet<const VarDecl *> EmittedAsFirstprivate;
   llvm::SmallVector<OpenMPDirectiveKind, 4> CaptureRegions;
@@ -1159,13 +1159,17 @@ bool CodeGenFunction::EmitOMPFirstprivateClause(const OMPExecutableDirective &D,
       const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl());
 
       if (const auto *BD = dyn_cast<BindingDecl>(OrigDecl)) {
+        // Check if this binding is also lastprivate.
+        bool ThisFirstprivateIsLastprivate =
+            Lastprivates.count(BD->getCanonicalDecl()) > 0;
         const auto *VDInit =
             cast<VarDecl>(cast<DeclRefExpr>(*InitsRef)->getDecl());
         DeclRefExpr DRE(getContext(), const_cast<BindingDecl *>(BD),
                         /*RefersToEnclosingVariableOrCapture=*/true,
                         BD->getType(), VK_LValue, (*IRef)->getExprLoc());
         LValue OriginalLVal = EmitLValue(&DRE);
-        Address OriginalAddr = OriginalLVal.getAddress();
+        Address OriginalAddr =
+            OriginalLVal.getAddress();
         // Emit private VarDecl with copy init. Remap VDInit to point to the
         // original binding so EmitDecl properly initializes VD.
         setAddrOfLocalVar(VDInit, OriginalAddr);
@@ -1176,6 +1180,8 @@ bool CodeGenFunction::EmitOMPFirstprivateClause(const OMPExecutableDirective &D,
         assert(IsRegistered &&
                "firstprivate var already registered as firstprivate");
         (void)IsRegistered;
+        FirstprivateIsLastprivate =
+            FirstprivateIsLastprivate || ThisFirstprivateIsLastprivate;
         ++IRef;
         ++InitsRef;
         continue;
@@ -1309,22 +1315,14 @@ void CodeGenFunction::EmitOMPPrivateClause(
     auto IRef = C->varlist_begin();
     for (const Expr *IInit : C->private_copies()) {
       const auto *OrigDecl = cast<DeclRefExpr>(*IRef)->getDecl();
-      bool ShouldEmit = true;
-      if (const auto *VD = dyn_cast<VarDecl>(OrigDecl)) {
-        if (!EmittedAsPrivate.insert(VD->getCanonicalDecl()).second) {
-          ShouldEmit = false; // Already emitted.
-        }
-      }
-      if (ShouldEmit) {
-        const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl());
-        EmitDecl(*VD);
-        // Emit private VarDecl with copy init.
-        bool IsRegistered =
-            PrivateScope.addPrivate(OrigDecl, GetAddrOfLocalVar(VD));
-        assert(IsRegistered && "private var already registered as private");
-        // Silence the warning about unused variable.
-        (void)IsRegistered;
-      }
+      const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl());
+      EmitDecl(*VD);
+      // Emit private VarDecl with copy init.
+      bool IsRegistered =
+          PrivateScope.addPrivate(OrigDecl, GetAddrOfLocalVar(VD));
+      assert(IsRegistered && "private var already registered as private");
+      // Silence the warning about unused variable.
+      (void)IsRegistered;
       ++IRef;
     }
   }
@@ -1426,7 +1424,15 @@ bool CodeGenFunction::EmitOMPLastprivateClauseInit(
     for (const Expr *IInit : C->private_copies()) {
       // Keep the address of the original variable for future update at the end
       // of the loop.
-      const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
+      const auto *OrigDecl = cast<DeclRefExpr>(*IRef)->getDecl();
+      // BindingDecls are handled through privatization in
+      // EmitOMPCapturedBindingLValue.
+      if (isa<BindingDecl>(OrigDecl)) {
+        ++IRef;
+        ++IDestRef;
+        continue;
+      }
+      const auto *OrigVD = cast<VarDecl>(OrigDecl);
       // Taskloops do not require additional initialization, it is done in
       // runtime support library.
       if (AlreadyEmittedVars.insert(OrigVD->getCanonicalDecl()).second) {
@@ -1515,8 +1521,15 @@ void CodeGenFunction::EmitOMPLastprivateClauseFinal(
     auto ISrcRef = C->source_exprs().begin();
     auto IDestRef = C->destination_exprs().begin();
     for (const Expr *AssignOp : C->assignment_ops()) {
-      const auto *PrivateVD =
-          cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
+      const auto *PrivateDecl = cast<DeclRefExpr>(*IRef)->getDecl();
+      // BindingDecls don't need the same lastprivate finalization as VarDecls.
+      if (isa<BindingDecl>(PrivateDecl)) {
+        ++IRef;
+        ++ISrcRef;
+        ++IDestRef;
+        continue;
+      }
+      const auto *PrivateVD = cast<VarDecl>(PrivateDecl);
       QualType Type = PrivateVD->getType();
       const auto *CanonicalVD = PrivateVD->getCanonicalDecl();
       if (AlreadyEmittedVars.insert(CanonicalVD).second) {
@@ -1594,6 +1607,17 @@ void CodeGenFunction::EmitOMPReductionClauseInit(
   auto *IPriv = Privates.begin();
   for (const Expr *IRef : Shareds) {
     const auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>(*IPriv)->getDecl());
+    const BindingDecl *BD = nullptr;
+    if (const auto *DRE = dyn_cast<DeclRefExpr>(IRef->IgnoreParenImpCasts())) {
+      if (const auto *OCED = dyn_cast<OMPCapturedExprDecl>(DRE->getDecl())) {
+        if (const Expr *Init = OCED->getInit()) {
+          if (const auto *InnerDRE =
+                  dyn_cast<DeclRefExpr>(Init->IgnoreParenImpCasts())) {
+            BD = dyn_cast<BindingDecl>(InnerDRE->getDecl());
+          }
+        }
+      }
+    }
     // Emit private VarDecl with reduction init.
     RedCG.emitSharedOrigLValue(*this, Count);
     RedCG.emitAggregateType(*this, Count);
@@ -1613,21 +1637,11 @@ void CodeGenFunction::EmitOMPReductionClauseInit(
     // Silence the warning about unused variable.
     (void)IsRegistered;
 
-    if (const auto *DRE = dyn_cast<DeclRefExpr>(IRef->IgnoreParenImpCasts())) {
-      const ValueDecl *VD = DRE->getDecl();
-      if (const auto *OCED = dyn_cast<OMPCapturedExprDecl>(VD)) {
-        if (const Expr *Init = OCED->getInit()) {
-          if (const auto *InnerDRE =
-                  dyn_cast<DeclRefExpr>(Init->IgnoreParenImpCasts())) {
-            if (const auto *BD = dyn_cast<BindingDecl>(InnerDRE->getDecl())) {
-              IsRegistered = PrivateScope.addPrivate(BD, BaseAddr);
-              assert(IsRegistered &&
-                     "private binding already registered as private");
-              (void)IsRegistered;
-            }
-          }
-        }
-      }
+    // If this is a BindingDecl, also register it directly.
+    if (BD) {
+      IsRegistered = PrivateScope.addPrivate(BD, BaseAddr);
+      assert(IsRegistered && "private binding already registered as private");
+      (void)IsRegistered;
     }
     const auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
     const auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
@@ -1887,8 +1901,12 @@ checkForLastprivateConditionalUpdate(CodeGenFunction &CGF,
       const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
       if (!DRE)
         continue;
-      PrivateDecls.insert(cast<VarDecl>(DRE->getDecl()));
-      CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, Ref);
+      // Skip BindingDecls - they don't use the same conditional lastprivate
+      // mechanism.
+      if (const auto *VD = dyn_cast<VarDecl>(DRE->getDecl())) {
+        PrivateDecls.insert(VD);
+        CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, Ref);
+      } 
     }
   }
   for (const auto *C : S.getClausesOfKind<OMPLinearClause>()) {
diff --git a/clang/lib/CodeGen/CodeGenFunction.h b/clang/lib/CodeGen/CodeGenFunction.h
index 2e61f95661ef0..343f5c58f36df 100644
--- a/clang/lib/CodeGen/CodeGenFunction.h
+++ b/clang/lib/CodeGen/CodeGenFunction.h
@@ -1158,7 +1158,8 @@ class CodeGenFunction : public CodeGenTypeCache {
                     Address TempAddr) {
       LocalVD = cast<ValueDecl>(LocalVD->getCanonicalDecl());
 
-      // For BindingDecls, also store by name for remapped lookup.
+      // For BindingDecls, also store in OMPPrivatizedBindings for remapped
+      // lookup.
       if (const auto *BD = dyn_cast<BindingDecl>(LocalVD))
         CGF.OMPPrivatizedBindings.insert_or_assign(BD, TempAddr);
 
@@ -1559,7 +1560,7 @@ class CodeGenFunction : public CodeGenTypeCache {
   /// decls.
   DeclMapTy LocalDeclMap;
 
-  /// Name-based lookup map for privatized BindingDecls.
+  /// Lookup map for privatized BindingDecls.
   /// Used when BindingDecls are remapped during OpenMP outlining, since the
   /// remapped BindingDecl has a different pointer than the original.
   llvm::DenseMap<const BindingDecl *, Address> OMPPrivatizedBindings;
diff --git a/clang/lib/Sema/SemaOpenMP.cpp b/clang/lib/Sema/SemaOpenMP.cpp
index 39c5493863d0e..f025d715ecf9b 100644
--- a/clang/lib/Sema/SemaOpenMP.cpp
+++ b/clang/lib/Sema/SemaOpenMP.cpp
@@ -2971,7 +2971,14 @@ void SemaOpenMP::EndOpenMPDSABlock(Stmt *CurDirective) {
         continue;
       }
       auto *DRE = cast<DeclRefExpr>(DE->IgnoreParens());
-      auto *VD = cast<VarDecl>(DRE->getDecl());
+      auto *D = DRE->getDecl();
+      // BindingDecls don't need special lastprivate handling - they're already
+      // handled through their decomposition decl.
+      if (isa<BindingDecl>(D)) {
+        PrivateCopies.push_back(nullptr);
+        continue;
+      }
+      auto *VD = cast<VarDecl>(D);
       QualType Type = VD->getType().getNonReferenceType();
       const DSAStackTy::DSAVarData DVar =
           DSAStack->getTopDSA(VD, /*FromParent=*/false);
@@ -19770,11 +19777,11 @@ OMPClause *SemaOpenMP::ActOnOpenMPFirstprivateClause(ArrayRef<Expr *> VarList,
         SemaRef, VDPrivate, RefExpr->getType().getUnqualifiedType(),
         RefExpr->getExprLoc());
     DeclRefExpr *Ref = nullptr;
+    bool IsBindingDecl = isa<BindingDecl>(D);
     if (!VD && !SemaRef.CurContext->isDependentContext()) {
       if (TopDVar.CKind == OMPC_lastprivate) {
         Ref = TopDVar.PrivateCopy;
       } else {
-        bool IsBindingDecl = isa<BindingDecl>(D);
         if (!IsBindingDecl) {
           auto *FD = dyn_cast<FieldDecl>(D);
           VarDecl *VD = FD ? DSAStack->getImplicitFDCapExprDecl(FD) : nullptr;
@@ -19791,7 +19798,6 @@ OMPClause *SemaOpenMP::ActOnOpenMPFirstprivateClause(ArrayRef<Expr *> VarList,
     }
     if (!IsImplicitClause)
       DSAStack->addDSA(D, RefExpr->IgnoreParens(), OMPC_firstprivate, Ref);
-    bool IsBindingDecl = isa<BindingDecl>(D);
     Vars.push_back(
         (VD || IsBindingDecl || SemaRef.CurContext->isDependentContext())
             ? RefExpr->IgnoreParens()
@@ -19974,9 +19980,11 @@ OMPClause *SemaOpenMP::ActOnOpenMPLastprivateClause(
       }
     }
     DSAStack->addDSA(D, RefExpr->IgnoreParens(), OMPC_lastprivate, Ref);
-    Vars.push_back((VD || SemaRef.CurContext->isDependentContext())
-                       ? RefExpr->IgnoreParens()
-                       : Ref);
+    bool IsBindingDecl = isa<BindingDecl>(D);
+    Vars.push_back(
+        (VD || IsBindingDecl || SemaRef.CurContext->isDependentContext())
+            ? RefExpr->IgnoreParens()
+            : Ref);
     SrcExprs.push_back(PseudoSrcExpr);
     DstExprs.push_back(PseudoDstExpr);
     AssignmentOps.push_back(AssignmentOp.get());
diff --git a/clang/lib/Sema/SemaStmt.cpp b/clang/lib/Sema/SemaStmt.cpp
index 47f50005f0fb7..122d5f4c7df20 100644
--- a/clang/lib/Sema/SemaStmt.cpp
+++ b/clang/lib/Sema/SemaStmt.cpp
@@ -4647,8 +4647,7 @@ buildCapturedStmtCaptureList(Sema &S, CapturedRegionScopeInfo *RSI,
       if (auto *BD = dyn_cast<BindingDecl>(CapVar)) {
         // Detect structured bindings in OpenMP captured regions.
         // When a BindingDecl (e.g., 'a' from 'auto [a, b] = p')
-        // is referenced inside an OpenMP region, we currently don't support
-        // capturing them.
+        // is referenced inside an OpenMP region.
         // This is reached during capture list construction when processing the
         // OpenMP region, before expression evaluation in SemaExpr.cpp.
         // Note: The reset to DecompositionDecl in SemaExpr.cpp happens during
diff --git a/clang/test/OpenMP/structured-bindings-codegen.cpp b/clang/test/OpenMP/structured-bindings-codegen.cpp
index f3c45fd6d8d81..2301fd8ba19cf 100644
--- a/clang/test/OpenMP/structured-bindings-codegen.cpp
+++ b/clang/test/OpenMP/structured-bindings-codegen.cpp
@@ -2828,80 +2828,71 @@ void test_lambda_implicit_capture() {
 // CHECK:    call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP0:%.*]], ptr align 4 [[P]], i64 8, i1 false)
 // CHECK:    [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_POINT:%.*]], ptr [[TMP0]], i32 0, i32 0
 // CHECK:    store ptr [[X]], ptr [[A:%.*]], align 8
-// CHECK:    [[TMP1:%.*]] = load ptr, ptr [[A]], align 8, !nonnull [[META2]], !align [[META3]]
-// CHECK:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 2, ptr @_Z24test_lastprivate_bindingv.omp_outlined, ptr [[TMP0]], ptr [[TMP1]])
+// CHECK:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @_Z24test_lastprivate_bindingv.omp_outlined, ptr [[TMP0]])
 // CHECK:    [[X1:%.*]] = getelementptr inbounds nuw [[STRUCT_POINT]], ptr [[TMP0]], i32 0, i32 0
-// CHECK:    [[TMP2:%.*]] = load i32, ptr [[X1]], align 4
-// CHECK:    call void @_Z3usei(i32 noundef [[TMP2]])
+// CHECK:    [[TMP1:%.*]] = load i32, ptr [[X1]], align 4
+// CHECK:    call void @_Z3usei(i32 noundef [[TMP1]])
 // CHECK:    ret void
 //
 //
 // CHECK-LABEL: define internal void @_Z24test_lastprivate_bindingv.omp_outlined(
-// CHECK-SAME: ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP0:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] {
+// CHECK-SAME: ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP0:%.*]]) #[[ATTR2]] {
 // CHECK:  [[ENTRY:.*:]]
 // CHECK:    store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR:%.*]], align 8
 // CHECK:    store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR:%.*]], align 8
 // CHECK:    store ptr [[TMP0]], ptr [[DOTADDR:%.*]], align 8
-// CHECK:    store ptr [[A]], ptr [[A_ADDR:%.*]], align 8
 // CHECK:    [[TMP1:%.*]] = load ptr, ptr [[DOTADDR]], align 8, !nonnull [[META2]], !align [[META3]]
-// CHECK:    [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 8, !nonnull [[META2]], !align [[META3]]
-// CHECK:    store ptr [[TMP2]], ptr [[TMP:%.*]], align 8
 // CHECK:    store i32 0, ptr [[DOTOMP_LB:%.*]], align 4
 // CHECK:    store i32 9, ptr [[DOTOMP_UB:%.*]], align 4
 // CHECK:    store i32 1, ptr [[DOTOMP_STRIDE:%.*]], align 4
 // CHECK:    store i32 0, ptr [[DOTOMP_IS_LAST:%.*]], align 4
-// CHECK:    [[TMP3:%.*]] = load ptr, ptr [[TMP]], align 8, !nonnull [[META2]], !align [[META3]]
-// CHECK:    store ptr [[A2:%.*]], ptr [[_TMP3:%.*]], align 8
-// CHECK:    [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// CHECK:    [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4
-// CHECK:    call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP5]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
-// CHECK:    [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
-// CHECK:    [[CMP:%.*]] = icmp sgt i32 [[TMP6]], 9
+// CHECK:    [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK:    [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4
+// CHECK:    call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
+// CHECK:    [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
+// CHECK:    [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 9
 // CHECK:    br i1 [[CMP]], label %[[COND_TRUE:.*]], label %[[COND_FALSE:.*]]
 // CHECK:       [[COND_TRUE]]:
 // CHECK:    br label %[[COND_END:.*]]
 // CHECK:       [[COND_FALSE]]:
-// CHECK:    [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
+// CHECK:    [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
 // CHECK:    br label %[[COND_END]]
 // CHECK:       [[COND_END]]:
-// CHECK:    [[COND:%.*]] = phi i32 [ 9, %[[COND_TRUE]] ], [ [[TMP7]], %[[COND_FALSE]] ]
+// CHECK:    [[COND:%.*]] = phi i32 [ 9, %[[COND_TRUE]] ], [ [[TMP5]], %[[COND_FALSE]] ]
 // CHECK:    store i32 [[COND]], ptr [[DOTOMP_UB]], align 4
-// CHECK:    [[TMP8:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK:    store i32 [[TMP8]], ptr [[DOTOMP_IV:%.*]], align 4
+// CHECK:    [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
+// CHECK:    store i32 [[TMP6]], ptr [[DOTOMP_IV:%.*]], align 4
 // CHECK:    br label %[[OMP_INNER_FOR_COND:.*]]
 // CHECK:       [[OMP_INNER_FOR_COND]]:
-// CHECK:    [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK:    [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
-// CHECK:    [[CMP4:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]]
-// CHECK:    br i1 [[CMP4]], label %[[OMP_INNER_FOR_BODY:.*]], label %[[OMP_INNER_FOR_END:.*]]
+// CHECK:    [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK:    [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
+// CHECK:    [[CMP1:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]]
+// CHECK:    br i1 [[CMP1]], label %[[OMP_INNER_FOR_BODY:.*]], label %[[OMP_INNER_FOR_END:.*]]
 // CHECK:       [[OMP_INNER_FOR_BODY]]:
-// CHECK:    [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK:    [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1
+// CHECK:    [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK:    [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1
 // CHECK:    [[ADD:%.*]] = add nsw i32 0, [[MUL]]
 // CHECK:    store i32 [[ADD]], ptr [[I:%.*]], align 4
-// CHECK:    [[TMP12:%.*]] = load i32, ptr [[I]], align 4
-// CHECK:    [[MUL5:%.*]] = mul nsw i32 [[TMP12]], 10
+// CHECK:    [[TMP10:%.*]] = load i32, ptr [[I]], align 4
+// CHECK:    [[MUL2:%.*]] = mul nsw i32 [[TMP10]], 10
 // CHECK:    [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_POINT:%.*]], ptr [[TMP1]], i32 0, i32 0
-// CHECK:    store i32 [[MUL5]], ptr [[X]], align 4
+// CHECK:    store i32 [[MUL2]], ptr [[X]], align 4
 // CHECK:    br label %[[OMP_BODY_CONTINUE:.*]]
 // CHECK:       [[OMP_BODY_CONTINUE]]:
 // CHECK:    br label %[[OMP_INNER_FOR_INC:.*]]
 // CHECK:       [[OMP_INNER_FOR_INC]]:
-// CHECK:    [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK:    [[ADD6:%.*]] = add nsw i32 [[TMP13]], 1
-// CHECK:    store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4
+// CHECK:    [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK:    [[ADD3:%.*]] = add nsw i32 [[TMP11]], 1
+// CHECK:    store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4
 // CHECK:    br label %[[OMP_INNER_FOR_COND]]
 // CHECK:       [[OMP_INNER_FOR_END]]:
 // CHECK:    br label %[[OMP_LOOP_EXIT:.*]]
 // CHECK:       [[OMP_LOOP_EXIT]]:
-// CHECK:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP5]])
-// CHECK:    [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
-// CHECK:    [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0
-// CHECK:    br i1 [[TMP15]], [[DOTOMP_LASTPRIVATE_THEN:label %.*]], [[DOTOMP_LASTPRIVATE_DONE:label %.*]]
+// CHECK:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP3]])
+// CHECK:    [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
+// CHECK:    [[TMP13:%.*]] = icmp ne i32 [[TMP12]], 0
+// CHECK:    br i1 [[TMP13]], [[DOTOMP_LASTPRIVATE_THEN:label %.*]], [[DOTOMP_LASTPRIVATE_DONE:label %.*]]
 // CHECK:       [[_OMP_LASTPRIVATE_THEN:.*:]]
-// CHECK:    [[TMP16:%.*]] = load ptr, ptr [[_TMP3]], align 8
-// CHECK:    [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4
-// CHECK:    store i32 [[TMP17]], ptr [[TMP3]], align 4
 // CHECK:    br [[DOTOMP_LASTPRIVATE_DONE]]
 // CHECK:       [[_OMP_LASTPRIVATE_DONE:.*:]]
 // CHECK:    ret void
@@ -2964,68 +2955,72 @@ void test_lambda_implicit_capture() {
 // CHECK:    call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP0:%.*]], ptr align 4 [[P]], i64 8, i1 false)
 // CHECK:    [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_POINT:%.*]], ptr [[TMP0]], i32 0, i32 0
 // CHECK:    store ptr [[X]], ptr [[A:%.*]], align 8
-// CHECK:    [[TMP1:%.*]] = load ptr, ptr [[A]], align 8, !nonnull [[META2]], !align [[META3]]
-// CHECK:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 2, ptr @_Z28test_lastprivate_conditionalv.omp_outlined, ptr [[TMP0]], ptr [[TMP1]])
+// CHECK:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @_Z28test_lastprivate_conditionalv.omp_outlined, ptr [[TMP0]])
 // CHECK:    [[X1:%.*]] = getelementptr inbounds nuw [[STRUCT_POINT]], ptr [[TMP0]], i32 0, i32 0
-// CHECK:    [[TMP2:%.*]] = load i32, ptr [[X1]], align 4
-// CHECK:    call void @_Z3usei(i32 noundef [[TMP2]])
+// CHECK:    [[TMP1:%.*]] = load i32, ptr [[X1]], align 4
+// CHECK:    call void @_Z3usei(i32 noundef [[TMP1]])
 // CHECK:    ret void
 //
 //
 // CHECK-LABEL: define internal void @_Z28test_lastprivate_conditionalv.omp_outlined(
-// CHECK-SAME: ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP0:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] {
+// CHECK-SAME: ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP0:%.*]]) #[[ATTR2]] {
 // CHECK:  [[ENTRY:.*:]]
 // CHECK:    store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR:%.*]], align 8
 // CHECK:    store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR:%.*]], align 8
 // CHECK:    store ptr [[TMP0]], ptr [[DOTADDR:%.*]], align 8
-// CHECK:    store ptr [[A]], ptr [[A_ADDR:%.*]], align 8
 // CHECK:    [[TMP1:%.*]] = load ptr, ptr [[DOTADDR]], align 8, !nonnull [[META2]], !align [[META3]]
-// CHECK:    [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 8, !nonnull [[META2]], !align [[META3]]
-// CHECK:    store ptr [[TMP2]], ptr [[TMP:%.*]], align 8
 // CHECK:    store i32 0, ptr [[DOTOMP_LB:%.*]], align 4
 // CHECK:    store i32 99, ptr [[DOTOMP_UB:%.*]], align 4
 // CHECK:    store i32 1, ptr [[DOTOMP_STRIDE:%.*]], align 4
 // CHECK:    store i32 0, ptr [[DOTOMP_IS_LAST:%.*]], align 4
-// CHECK:    [[TMP3:%.*]] = load ptr, ptr [[TMP]], align 8, !nonnull [[META2]], !align [[META3]]
-// CHECK:    [[TMP4:%.*]] = getelementptr inbounds nuw [[STRUCT_LASPRIVATE_CONDITIONAL:%.*]], ptr [[A2:%.*]], i32 0, i32 1
-// CHECK:    store i8 0, ptr [[TMP4]], align 4
-// CHECK:    [[TMP5:%.*]] = getelementptr inbounds nuw [[STRUCT_LASPRIVATE_CONDITIONAL]], ptr [[A2]], i32 0, i32 0
-// CHECK:    store ptr [[TMP5]], ptr [[_TMP3:%.*]], align 8
-// CHECK:    [[TMP6:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// CHECK:    [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4
-// CHECK:    call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP7]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
-// CHECK:    [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
-// CHECK:    [[CMP:%.*]] = icmp sgt i32 [[TMP8]], 99
+// CHECK:    [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK:    [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4
+// CHECK:    call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
+// CHECK:    [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
+// CHECK:    [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99
 // CHECK:    br i1 [[CMP]], label %[[COND_TRUE:.*]], label %[[COND_FALSE:.*]]
 // CHECK:       [[COND_TRUE]]:
 // CHECK:    br label %[[COND_END:.*]]
 // CHECK:       [[COND_FALSE]]:
-// CHECK:    [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
+// CHECK:    [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
 // CHECK:    br label %[[COND_END]]
 // CHECK:       [[COND_END]]:
-// CHECK:    [[COND:%.*]] = phi i32 [ 99, %[[COND_TRUE]] ], [ [[TMP9]], %[[COND_FALSE]] ]
+// CHECK:    [[COND:%.*]] = phi i32 [ 99, %[[COND_TRUE]] ], [ [[TMP5]], %[[COND_FALSE]] ]
 // CHECK:    store i32 [[COND]], ptr [[DOTOMP_UB]], align 4
-// CHECK:    [[TMP10:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK:    store i32 [[TMP10]], ptr [[DOTOMP_IV:%.*]], align 4
+// CHECK:    [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
+// CHECK:    store i32 [[TMP6]], ptr [[DOTOMP_IV:%.*]], align 4
 // CHECK:    br label %[[OMP_INNER_FOR_COND:.*]]
 // CHECK:       [[OMP_INNER_FOR_COND]]:
-// CHECK:    [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK:    [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
-// CHECK:    [[CMP4:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]]
-// CHECK:    br i1 [[CMP4]], label %[[OMP_INNER_FOR_BODY:.*]], label %[[OMP_INNER_FOR_END:.*]]
+// CHECK:    [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK:    [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
+// CHECK:    [[CMP1:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]]
+// CHECK:    br i1 [[CMP1]], label %[[OMP_INNER_FOR_BODY:.*]], label %[[OMP_INNER_FOR_END:.*]]
 // CHECK:       [[OMP_INNER_FOR_BODY]]:
-// CHECK:    [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK:    [[MUL:%.*]] = mul nsw i32 [[TMP13]], 1
+// CHECK:    [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK:    [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1
 // CHECK:    [[ADD:%.*]] = add nsw i32 0, [[MUL]]
 // CHECK:    store i32 [[ADD]], ptr [[I:%.*]], align 4
-// CHECK:    [[TMP14:%.*]] = load i32, ptr [[I]], align 4
-// CHECK:    [[REM:%.*]] = srem i32 [[TMP14]], 7
-// CHECK:    [[CMP5:%.*]] = icmp eq i32 [[REM]], 0
-// CHECK:    br i1 [[CMP5]], label %[[IF_THEN:.*]], label %[[IF_END:.*]]
+// CHECK:    [[TMP10:%.*]] = load i32, ptr [[I]], align 4
+// CHECK:    [[REM:%.*]] = srem i32 [[TMP10]], 7
+// CHECK:    [[CMP2:%.*]] = icmp eq i32 [[REM]], 0
+// CHECK:    br i1 [[CMP2]], label %[[IF_THEN:.*]], label %[[IF_END:.*]]
 // CHECK:       [[IF_THEN]]:
-// CHECK:    [[TMP15:%.*]] = load i32, ptr [[I]], align 4
+// CHECK:    [[TMP11:%.*]] = load i32, ptr [[I]], align 4
 // CHECK:    [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_POINT:%.*]], ptr [[TMP1]], i32 0, i32 0
-// CHECK:    store i32 [[TMP15]], ptr [[X]], align 4
+// CHECK:    store i32 [[TMP11]], ptr [[X]], align 4
+// CHECK:    [[X3:%.*]] = getelementptr inbounds nuw [[STRUCT_POINT]], ptr [[TMP1]], i32 0, i32 0
+// CHECK:    [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK:    call void @__kmpc_critical(ptr @[[GLOB1]], i32 [[TMP3]], ptr @.gomp_critical_user_{{pl_cond[.].+[.|,]}}var)
+// CHECK:    [[TMP13:%.*]] = load i32, ptr @.{{pl_cond[.].+[.|,]}} align 4
+// CHECK:    [[TMP14:%.*]] = icmp sle i32 [[TMP13]], [[TMP12]]
+// CHECK:    br i1 [[TMP14]], label %[[LP_COND_THEN:.*]], label %[[LP_COND_EXIT:.*]]
+// CHECK:       [[LP_COND_THEN]]:
+// CHECK:    store i32 [[TMP12]], ptr @.{{pl_cond[.].+[.|,]}} align 4
+// CHECK:    [[TMP15:%.*]] = load i32, ptr [[X3]], align 4
+// CHECK:    store i32 [[TMP15]], ptr @{{pl_cond[.].+[.|,]}} align 4
+// CHECK:    br label %[[LP_COND_EXIT]]
+// CHECK:       [[LP_COND_EXIT]]:
+// CHECK:    call void @__kmpc_end_critical(ptr @[[GLOB1]], i32 [[TMP3]], ptr @.gomp_critical_user_{{pl_cond[.].+[.|,]}}var)
 // CHECK:    br label %[[IF_END]]
 // CHECK:       [[IF_END]]:
 // CHECK:    br label %[[OMP_BODY_CONTINUE:.*]]
@@ -3033,21 +3028,18 @@ void test_lambda_implicit_capture() {
 // CHECK:    br label %[[OMP_INNER_FOR_INC:.*]]
 // CHECK:       [[OMP_INNER_FOR_INC]]:
 // CHECK:    [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK:    [[ADD6:%.*]] = add nsw i32 [[TMP16]], 1
-// CHECK:    store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4
+// CHECK:    [[ADD4:%.*]] = add nsw i32 [[TMP16]], 1
+// CHECK:    store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4
 // CHECK:    br label %[[OMP_INNER_FOR_COND]]
 // CHECK:       [[OMP_INNER_FOR_END]]:
 // CHECK:    br label %[[OMP_LOOP_EXIT:.*]]
 // CHECK:       [[OMP_LOOP_EXIT]]:
-// CHECK:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP7]])
+// CHECK:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP3]])
 // CHECK:    [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK:    [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0
-// CHECK:    call void @__kmpc_barrier(ptr @[[GLOB7:[0-9]+]], i32 [[TMP7]])
+// CHECK:    call void @__kmpc_barrier(ptr @[[GLOB7:[0-9]+]], i32 [[TMP3]])
 // CHECK:    br i1 [[TMP18]], [[DOTOMP_LASTPRIVATE_THEN:label %.*]], [[DOTOMP_LASTPRIVATE_DONE:label %.*]]
 // CHECK:       [[_OMP_LASTPRIVATE_THEN:.*:]]
-// CHECK:    [[TMP19:%.*]] = load ptr, ptr [[_TMP3]], align 8
-// CHECK:    [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4
-// CHECK:    store i32 [[TMP20]], ptr [[TMP3]], align 4
 // CHECK:    br [[DOTOMP_LASTPRIVATE_DONE]]
 // CHECK:       [[_OMP_LASTPRIVATE_DONE:.*:]]
 // CHECK:    ret void

>From 974a8c9e1892d1dcea7c719bf3ed120471198119 Mon Sep 17 00:00:00 2001
From: Ammarguellat <zahira.ammarguellat at intel.com>
Date: Wed, 10 Jun 2026 08:30:44 -0700
Subject: [PATCH 34/45] Fixed the failing tests

---
 clang/lib/CodeGen/CGStmtOpenMP.cpp            |  66 ++-
 clang/lib/Sema/SemaOpenMP.cpp                 |   3 +-
 .../OpenMP/structured-bindings-codegen.cpp    | 464 ++++++++----------
 3 files changed, 253 insertions(+), 280 deletions(-)

diff --git a/clang/lib/CodeGen/CGStmtOpenMP.cpp b/clang/lib/CodeGen/CGStmtOpenMP.cpp
index 17d6b3cd5fdd3..69ed2ed56ec6e 100644
--- a/clang/lib/CodeGen/CGStmtOpenMP.cpp
+++ b/clang/lib/CodeGen/CGStmtOpenMP.cpp
@@ -1310,19 +1310,19 @@ void CodeGenFunction::EmitOMPPrivateClause(
     CodeGenFunction::OMPPrivateScope &PrivateScope) {
   if (!HaveInsertPoint())
     return;
-  llvm::DenseSet<const VarDecl *> EmittedAsPrivate;
+  llvm::DenseSet<const ValueDecl *> EmittedAsPrivate;
   for (const auto *C : D.getClausesOfKind<OMPPrivateClause>()) {
     auto IRef = C->varlist_begin();
     for (const Expr *IInit : C->private_copies()) {
       const auto *OrigDecl = cast<DeclRefExpr>(*IRef)->getDecl();
       const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl());
-      EmitDecl(*VD);
-      // Emit private VarDecl with copy init.
-      bool IsRegistered =
-          PrivateScope.addPrivate(OrigDecl, GetAddrOfLocalVar(VD));
-      assert(IsRegistered && "private var already registered as private");
-      // Silence the warning about unused variable.
-      (void)IsRegistered;
+      if (EmittedAsPrivate.insert(OrigDecl).second) {
+        EmitDecl(*VD);
+        bool IsRegistered =
+            PrivateScope.addPrivate(OrigDecl, GetAddrOfLocalVar(VD));
+        assert(IsRegistered && "private var already registered as private");
+        (void)IsRegistered;
+      }
       ++IRef;
     }
   }
@@ -1608,15 +1608,35 @@ void CodeGenFunction::EmitOMPReductionClauseInit(
   for (const Expr *IRef : Shareds) {
     const auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>(*IPriv)->getDecl());
     const BindingDecl *BD = nullptr;
-    if (const auto *DRE = dyn_cast<DeclRefExpr>(IRef->IgnoreParenImpCasts())) {
-      if (const auto *OCED = dyn_cast<OMPCapturedExprDecl>(DRE->getDecl())) {
-        if (const Expr *Init = OCED->getInit()) {
-          if (const auto *InnerDRE =
-                  dyn_cast<DeclRefExpr>(Init->IgnoreParenImpCasts())) {
-            BD = dyn_cast<BindingDecl>(InnerDRE->getDecl());
-          }
-        }
-      }
+    if (const auto *DRE = dyn_cast<DeclRefExpr>(IRef->IgnoreParenImpCasts()))
+      BD = dyn_cast<BindingDecl>(DRE->getDecl());
+    if (BD) {
+      // Emit the private VarDecl with reduction initialization.
+      EmitDecl(*PrivateVD);
+      Address PrivateAddr = GetAddrOfLocalVar(PrivateVD);
+
+      // Register the BindingDecl with the private address.
+      bool IsRegistered = PrivateScope.addPrivate(BD, PrivateAddr);
+      assert(IsRegistered && "private binding already registered as private");
+      (void)IsRegistered;
+
+      // Get the original BindingDecl address for LHSVD registration.
+      DeclRefExpr BindingDRE(getContext(), const_cast<BindingDecl *>(BD),
+                             /*RefersToEnclosingVariableOrCapture=*/true,
+                             BD->getType(), VK_LValue, IRef->getExprLoc());
+      LValue OriginalLVal = EmitLValue(&BindingDRE);
+      Address OriginalAddr = OriginalLVal.getAddress();
+
+      // Register LHSVD/RHSVD for reduction operation.
+      const auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
+      const auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
+      PrivateScope.addPrivate(LHSVD, OriginalAddr);
+      PrivateScope.addPrivate(RHSVD, PrivateAddr);
+      ++Count;
+      ++ILHS;
+      ++IRHS;
+      ++IPriv;
+      continue;
     }
     // Emit private VarDecl with reduction init.
     RedCG.emitSharedOrigLValue(*this, Count);
@@ -1637,12 +1657,6 @@ void CodeGenFunction::EmitOMPReductionClauseInit(
     // Silence the warning about unused variable.
     (void)IsRegistered;
 
-    // If this is a BindingDecl, also register it directly.
-    if (BD) {
-      IsRegistered = PrivateScope.addPrivate(BD, BaseAddr);
-      assert(IsRegistered && "private binding already registered as private");
-      (void)IsRegistered;
-    }
     const auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
     const auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
     QualType Type = PrivateVD->getType();
@@ -1890,7 +1904,11 @@ checkForLastprivateConditionalUpdate(CodeGenFunction &CGF,
       const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
       if (!DRE)
         continue;
-      PrivateDecls.insert(cast<VarDecl>(DRE->getDecl()));
+      // Skip BindingDecls - lastprivate conditional only applies to VarDecls.
+      const auto *VD = dyn_cast<VarDecl>(DRE->getDecl());
+      if (!VD)
+        continue;
+      PrivateDecls.insert(VD);
       CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, Ref);
     }
   }
diff --git a/clang/lib/Sema/SemaOpenMP.cpp b/clang/lib/Sema/SemaOpenMP.cpp
index f025d715ecf9b..d2074f83698d4 100644
--- a/clang/lib/Sema/SemaOpenMP.cpp
+++ b/clang/lib/Sema/SemaOpenMP.cpp
@@ -21202,7 +21202,8 @@ static bool actOnOMPReductionKindClause(
 
     DeclRefExpr *Ref = nullptr;
     Expr *VarsExpr = RefExpr->IgnoreParens();
-    if (!VD && !S.CurContext->isDependentContext()) {
+    bool IsBindingDecl = isa<BindingDecl>(D);
+    if (!VD && !IsBindingDecl && !S.CurContext->isDependentContext()) {
       if (ASE || OASE) {
         TransformExprToCaptures RebuildToCapture(S, D);
         VarsExpr =
diff --git a/clang/test/OpenMP/structured-bindings-codegen.cpp b/clang/test/OpenMP/structured-bindings-codegen.cpp
index 2301fd8ba19cf..affe028edb9d2 100644
--- a/clang/test/OpenMP/structured-bindings-codegen.cpp
+++ b/clang/test/OpenMP/structured-bindings-codegen.cpp
@@ -2528,93 +2528,85 @@ void test_lambda_implicit_capture() {
 // CHECK:  [[ENTRY:.*:]]
 // CHECK:    call void @llvm.memset.p0.i64(ptr align 4 [[P:%.*]], i8 0, i64 8, i1 false)
 // CHECK:    call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP0:%.*]], ptr align 4 [[P]], i64 8, i1 false)
+// CHECK:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @_Z26test_reduction_binding_sumv.omp_outlined, ptr [[TMP0]])
 // CHECK:    [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_POINT:%.*]], ptr [[TMP0]], i32 0, i32 0
-// CHECK:    store ptr [[X]], ptr [[A:%.*]], align 8
-// CHECK:    [[TMP1:%.*]] = load ptr, ptr [[A]], align 8, !nonnull [[META2]], !align [[META3]]
-// CHECK:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 2, ptr @_Z26test_reduction_binding_sumv.omp_outlined, ptr [[TMP0]], ptr [[TMP1]])
-// CHECK:    [[X1:%.*]] = getelementptr inbounds nuw [[STRUCT_POINT]], ptr [[TMP0]], i32 0, i32 0
-// CHECK:    [[TMP2:%.*]] = load i32, ptr [[X1]], align 4
-// CHECK:    call void @_Z3usei(i32 noundef [[TMP2]])
+// CHECK:    [[TMP1:%.*]] = load i32, ptr [[X]], align 4
+// CHECK:    call void @_Z3usei(i32 noundef [[TMP1]])
 // CHECK:    ret void
 //
 //
 // CHECK-LABEL: define internal void @_Z26test_reduction_binding_sumv.omp_outlined(
-// CHECK-SAME: ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP0:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] {
+// CHECK-SAME: ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP0:%.*]]) #[[ATTR2]] {
 // CHECK:  [[ENTRY:.*:]]
 // CHECK:    store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR:%.*]], align 8
 // CHECK:    store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR:%.*]], align 8
 // CHECK:    store ptr [[TMP0]], ptr [[DOTADDR:%.*]], align 8
-// CHECK:    store ptr [[A]], ptr [[A_ADDR:%.*]], align 8
 // CHECK:    [[TMP1:%.*]] = load ptr, ptr [[DOTADDR]], align 8, !nonnull [[META2]], !align [[META3]]
-// CHECK:    [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 8, !nonnull [[META2]], !align [[META3]]
-// CHECK:    store ptr [[TMP2]], ptr [[TMP:%.*]], align 8
 // CHECK:    store i32 0, ptr [[DOTOMP_LB:%.*]], align 4
 // CHECK:    store i32 99, ptr [[DOTOMP_UB:%.*]], align 4
 // CHECK:    store i32 1, ptr [[DOTOMP_STRIDE:%.*]], align 4
 // CHECK:    store i32 0, ptr [[DOTOMP_IS_LAST:%.*]], align 4
-// CHECK:    [[TMP3:%.*]] = load ptr, ptr [[TMP]], align 8, !nonnull [[META2]], !align [[META3]]
-// CHECK:    store i32 0, ptr [[A2:%.*]], align 4
-// CHECK:    store ptr [[A2]], ptr [[_TMP3:%.*]], align 8
-// CHECK:    [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// CHECK:    [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4
-// CHECK:    call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP5]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
-// CHECK:    [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
-// CHECK:    [[CMP:%.*]] = icmp sgt i32 [[TMP6]], 99
+// CHECK:    store i32 0, ptr [[A:%.*]], align 4
+// CHECK:    [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK:    [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4
+// CHECK:    call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
+// CHECK:    [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
+// CHECK:    [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99
 // CHECK:    br i1 [[CMP]], label %[[COND_TRUE:.*]], label %[[COND_FALSE:.*]]
 // CHECK:       [[COND_TRUE]]:
 // CHECK:    br label %[[COND_END:.*]]
 // CHECK:       [[COND_FALSE]]:
-// CHECK:    [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
+// CHECK:    [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
 // CHECK:    br label %[[COND_END]]
 // CHECK:       [[COND_END]]:
-// CHECK:    [[COND:%.*]] = phi i32 [ 99, %[[COND_TRUE]] ], [ [[TMP7]], %[[COND_FALSE]] ]
+// CHECK:    [[COND:%.*]] = phi i32 [ 99, %[[COND_TRUE]] ], [ [[TMP5]], %[[COND_FALSE]] ]
 // CHECK:    store i32 [[COND]], ptr [[DOTOMP_UB]], align 4
-// CHECK:    [[TMP8:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK:    store i32 [[TMP8]], ptr [[DOTOMP_IV:%.*]], align 4
+// CHECK:    [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
+// CHECK:    store i32 [[TMP6]], ptr [[DOTOMP_IV:%.*]], align 4
 // CHECK:    br label %[[OMP_INNER_FOR_COND:.*]]
 // CHECK:       [[OMP_INNER_FOR_COND]]:
-// CHECK:    [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK:    [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
-// CHECK:    [[CMP4:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]]
-// CHECK:    br i1 [[CMP4]], label %[[OMP_INNER_FOR_BODY:.*]], label %[[OMP_INNER_FOR_END:.*]]
+// CHECK:    [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK:    [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
+// CHECK:    [[CMP1:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]]
+// CHECK:    br i1 [[CMP1]], label %[[OMP_INNER_FOR_BODY:.*]], label %[[OMP_INNER_FOR_END:.*]]
 // CHECK:       [[OMP_INNER_FOR_BODY]]:
-// CHECK:    [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK:    [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1
+// CHECK:    [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK:    [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1
 // CHECK:    [[ADD:%.*]] = add nsw i32 0, [[MUL]]
 // CHECK:    store i32 [[ADD]], ptr [[I:%.*]], align 4
-// CHECK:    [[TMP12:%.*]] = load i32, ptr [[I]], align 4
-// CHECK:    [[TMP13:%.*]] = load i32, ptr [[A2]], align 4
-// CHECK:    [[ADD5:%.*]] = add nsw i32 [[TMP13]], [[TMP12]]
-// CHECK:    store i32 [[ADD5]], ptr [[A2]], align 4
+// CHECK:    [[TMP10:%.*]] = load i32, ptr [[I]], align 4
+// CHECK:    [[TMP11:%.*]] = load i32, ptr [[A]], align 4
+// CHECK:    [[ADD2:%.*]] = add nsw i32 [[TMP11]], [[TMP10]]
+// CHECK:    store i32 [[ADD2]], ptr [[A]], align 4
 // CHECK:    br label %[[OMP_BODY_CONTINUE:.*]]
 // CHECK:       [[OMP_BODY_CONTINUE]]:
 // CHECK:    br label %[[OMP_INNER_FOR_INC:.*]]
 // CHECK:       [[OMP_INNER_FOR_INC]]:
-// CHECK:    [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK:    [[ADD6:%.*]] = add nsw i32 [[TMP14]], 1
-// CHECK:    store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4
+// CHECK:    [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK:    [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1
+// CHECK:    store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4
 // CHECK:    br label %[[OMP_INNER_FOR_COND]]
 // CHECK:       [[OMP_INNER_FOR_END]]:
 // CHECK:    br label %[[OMP_LOOP_EXIT:.*]]
 // CHECK:       [[OMP_LOOP_EXIT]]:
-// CHECK:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP5]])
-// CHECK:    [[TMP15:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST:%.*]], i64 0, i64 0
-// CHECK:    store ptr [[A2]], ptr [[TMP15]], align 8
-// CHECK:    [[TMP16:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP5]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @_Z26test_reduction_binding_sumv.omp_outlined.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var)
-// CHECK:    switch i32 [[TMP16]], [[DOTOMP_REDUCTION_DEFAULT:label %.*]] [
+// CHECK:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP3]])
+// CHECK:    [[TMP13:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST:%.*]], i64 0, i64 0
+// CHECK:    store ptr [[A]], ptr [[TMP13]], align 8
+// CHECK:    [[TMP14:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP3]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @_Z26test_reduction_binding_sumv.omp_outlined.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var)
+// CHECK:    switch i32 [[TMP14]], [[DOTOMP_REDUCTION_DEFAULT:label %.*]] [
 // CHECK:      i32 1, [[DOTOMP_REDUCTION_CASE1:label %.*]]
 // CHECK:      i32 2, [[DOTOMP_REDUCTION_CASE2:label %.*]]
 // CHECK:    ]
 // CHECK:       [[_OMP_REDUCTION_CASE1:.*:]]
-// CHECK:    [[TMP17:%.*]] = load i32, ptr [[TMP3]], align 4
-// CHECK:    [[TMP18:%.*]] = load i32, ptr [[A2]], align 4
-// CHECK:    [[ADD7:%.*]] = add nsw i32 [[TMP17]], [[TMP18]]
-// CHECK:    store i32 [[ADD7]], ptr [[TMP3]], align 4
-// CHECK:    call void @__kmpc_end_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP5]], ptr @.gomp_critical_user_.reduction.var)
+// CHECK:    [[TMP15:%.*]] = load i32, ptr [[A]], align 4
+// CHECK:    [[TMP16:%.*]] = load i32, ptr [[A]], align 4
+// CHECK:    [[ADD4:%.*]] = add nsw i32 [[TMP15]], [[TMP16]]
+// CHECK:    store i32 [[ADD4]], ptr [[A]], align 4
+// CHECK:    call void @__kmpc_end_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP3]], ptr @.gomp_critical_user_.reduction.var)
 // CHECK:    br [[DOTOMP_REDUCTION_DEFAULT]]
 // CHECK:       [[_OMP_REDUCTION_CASE2:.*:]]
-// CHECK:    [[TMP19:%.*]] = load i32, ptr [[A2]], align 4
-// CHECK:    [[TMP20:%.*]] = atomicrmw add ptr [[TMP3]], i32 [[TMP19]] monotonic, align 4
+// CHECK:    [[TMP17:%.*]] = load i32, ptr [[A]], align 4
+// CHECK:    [[TMP18:%.*]] = atomicrmw add ptr [[A]], i32 [[TMP17]] monotonic, align 4
 // CHECK:    br [[DOTOMP_REDUCTION_DEFAULT]]
 // CHECK:       [[_OMP_REDUCTION_DEFAULT:.*:]]
 // CHECK:    ret void
@@ -2643,144 +2635,128 @@ void test_lambda_implicit_capture() {
 // CHECK:  [[ENTRY:.*:]]
 // CHECK:    call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[P:%.*]], ptr align 4 @__const._Z32test_reduction_binding_operatorsv.p, i64 8, i1 false)
 // CHECK:    call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP0:%.*]], ptr align 4 [[P]], i64 8, i1 false)
+// CHECK:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @_Z32test_reduction_binding_operatorsv.omp_outlined, ptr [[TMP0]])
 // CHECK:    [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_POINT:%.*]], ptr [[TMP0]], i32 0, i32 0
-// CHECK:    store ptr [[X]], ptr [[A:%.*]], align 8
+// CHECK:    [[TMP1:%.*]] = load i32, ptr [[X]], align 4
+// CHECK:    call void @_Z3usei(i32 noundef [[TMP1]])
 // CHECK:    [[Y:%.*]] = getelementptr inbounds nuw [[STRUCT_POINT]], ptr [[TMP0]], i32 0, i32 1
-// CHECK:    store ptr [[Y]], ptr [[B:%.*]], align 8
-// CHECK:    [[TMP1:%.*]] = load ptr, ptr [[A]], align 8, !nonnull [[META2]], !align [[META3]]
-// CHECK:    [[TMP2:%.*]] = load ptr, ptr [[B]], align 8, !nonnull [[META2]], !align [[META3]]
-// CHECK:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 3, ptr @_Z32test_reduction_binding_operatorsv.omp_outlined, ptr [[TMP0]], ptr [[TMP1]], ptr [[TMP2]])
-// CHECK:    [[X1:%.*]] = getelementptr inbounds nuw [[STRUCT_POINT]], ptr [[TMP0]], i32 0, i32 0
-// CHECK:    [[TMP3:%.*]] = load i32, ptr [[X1]], align 4
-// CHECK:    call void @_Z3usei(i32 noundef [[TMP3]])
-// CHECK:    [[Y2:%.*]] = getelementptr inbounds nuw [[STRUCT_POINT]], ptr [[TMP0]], i32 0, i32 1
-// CHECK:    [[TMP4:%.*]] = load i32, ptr [[Y2]], align 4
-// CHECK:    call void @_Z3usei(i32 noundef [[TMP4]])
+// CHECK:    [[TMP2:%.*]] = load i32, ptr [[Y]], align 4
+// CHECK:    call void @_Z3usei(i32 noundef [[TMP2]])
 // CHECK:    ret void
 //
 //
 // CHECK-LABEL: define internal void @_Z32test_reduction_binding_operatorsv.omp_outlined(
-// CHECK-SAME: ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP0:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]]) #[[ATTR2]] {
+// CHECK-SAME: ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP0:%.*]]) #[[ATTR2]] {
 // CHECK:  [[ENTRY:.*:]]
 // CHECK:    store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR:%.*]], align 8
 // CHECK:    store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR:%.*]], align 8
 // CHECK:    store ptr [[TMP0]], ptr [[DOTADDR:%.*]], align 8
-// CHECK:    store ptr [[A]], ptr [[A_ADDR:%.*]], align 8
-// CHECK:    store ptr [[B]], ptr [[B_ADDR:%.*]], align 8
 // CHECK:    [[TMP1:%.*]] = load ptr, ptr [[DOTADDR]], align 8, !nonnull [[META2]], !align [[META3]]
-// CHECK:    [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 8, !nonnull [[META2]], !align [[META3]]
-// CHECK:    [[TMP3:%.*]] = load ptr, ptr [[B_ADDR]], align 8, !nonnull [[META2]], !align [[META3]]
-// CHECK:    store ptr [[TMP2]], ptr [[TMP:%.*]], align 8
-// CHECK:    store ptr [[TMP3]], ptr [[_TMP1:%.*]], align 8
 // CHECK:    store i32 0, ptr [[DOTOMP_LB:%.*]], align 4
 // CHECK:    store i32 9, ptr [[DOTOMP_UB:%.*]], align 4
 // CHECK:    store i32 1, ptr [[DOTOMP_STRIDE:%.*]], align 4
 // CHECK:    store i32 0, ptr [[DOTOMP_IS_LAST:%.*]], align 4
-// CHECK:    [[TMP4:%.*]] = load ptr, ptr [[TMP]], align 8, !nonnull [[META2]], !align [[META3]]
-// CHECK:    store i32 1, ptr [[A3:%.*]], align 4
-// CHECK:    store ptr [[A3]], ptr [[_TMP4:%.*]], align 8
-// CHECK:    [[TMP5:%.*]] = load ptr, ptr [[_TMP1]], align 8, !nonnull [[META2]], !align [[META3]]
-// CHECK:    store i32 2147483647, ptr [[B5:%.*]], align 4
-// CHECK:    store ptr [[B5]], ptr [[_TMP6:%.*]], align 8
-// CHECK:    [[TMP6:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// CHECK:    [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4
-// CHECK:    call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP7]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
-// CHECK:    [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
-// CHECK:    [[CMP:%.*]] = icmp sgt i32 [[TMP8]], 9
+// CHECK:    store i32 1, ptr [[A:%.*]], align 4
+// CHECK:    store i32 2147483647, ptr [[B:%.*]], align 4
+// CHECK:    [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK:    [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4
+// CHECK:    call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
+// CHECK:    [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
+// CHECK:    [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 9
 // CHECK:    br i1 [[CMP]], label %[[COND_TRUE:.*]], label %[[COND_FALSE:.*]]
 // CHECK:       [[COND_TRUE]]:
 // CHECK:    br label %[[COND_END:.*]]
 // CHECK:       [[COND_FALSE]]:
-// CHECK:    [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
+// CHECK:    [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
 // CHECK:    br label %[[COND_END]]
 // CHECK:       [[COND_END]]:
-// CHECK:    [[COND:%.*]] = phi i32 [ 9, %[[COND_TRUE]] ], [ [[TMP9]], %[[COND_FALSE]] ]
+// CHECK:    [[COND:%.*]] = phi i32 [ 9, %[[COND_TRUE]] ], [ [[TMP5]], %[[COND_FALSE]] ]
 // CHECK:    store i32 [[COND]], ptr [[DOTOMP_UB]], align 4
-// CHECK:    [[TMP10:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK:    store i32 [[TMP10]], ptr [[DOTOMP_IV:%.*]], align 4
+// CHECK:    [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
+// CHECK:    store i32 [[TMP6]], ptr [[DOTOMP_IV:%.*]], align 4
 // CHECK:    br label %[[OMP_INNER_FOR_COND:.*]]
 // CHECK:       [[OMP_INNER_FOR_COND]]:
-// CHECK:    [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK:    [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
-// CHECK:    [[CMP7:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]]
-// CHECK:    br i1 [[CMP7]], label %[[OMP_INNER_FOR_BODY:.*]], label %[[OMP_INNER_FOR_END:.*]]
+// CHECK:    [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK:    [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
+// CHECK:    [[CMP1:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]]
+// CHECK:    br i1 [[CMP1]], label %[[OMP_INNER_FOR_BODY:.*]], label %[[OMP_INNER_FOR_END:.*]]
 // CHECK:       [[OMP_INNER_FOR_BODY]]:
-// CHECK:    [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK:    [[MUL:%.*]] = mul nsw i32 [[TMP13]], 1
+// CHECK:    [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK:    [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1
 // CHECK:    [[ADD:%.*]] = add nsw i32 1, [[MUL]]
 // CHECK:    store i32 [[ADD]], ptr [[I:%.*]], align 4
-// CHECK:    [[TMP14:%.*]] = load i32, ptr [[A3]], align 4
-// CHECK:    [[MUL8:%.*]] = mul nsw i32 [[TMP14]], 2
-// CHECK:    store i32 [[MUL8]], ptr [[A3]], align 4
-// CHECK:    [[TMP15:%.*]] = load i32, ptr [[I]], align 4
-// CHECK:    [[TMP16:%.*]] = load i32, ptr [[B5]], align 4
-// CHECK:    [[CMP9:%.*]] = icmp slt i32 [[TMP15]], [[TMP16]]
-// CHECK:    br i1 [[CMP9]], label %[[IF_THEN:.*]], label %[[IF_END:.*]]
+// CHECK:    [[TMP10:%.*]] = load i32, ptr [[A]], align 4
+// CHECK:    [[MUL2:%.*]] = mul nsw i32 [[TMP10]], 2
+// CHECK:    store i32 [[MUL2]], ptr [[A]], align 4
+// CHECK:    [[TMP11:%.*]] = load i32, ptr [[I]], align 4
+// CHECK:    [[TMP12:%.*]] = load i32, ptr [[B]], align 4
+// CHECK:    [[CMP3:%.*]] = icmp slt i32 [[TMP11]], [[TMP12]]
+// CHECK:    br i1 [[CMP3]], label %[[IF_THEN:.*]], label %[[IF_END:.*]]
 // CHECK:       [[IF_THEN]]:
-// CHECK:    [[TMP17:%.*]] = load i32, ptr [[I]], align 4
-// CHECK:    store i32 [[TMP17]], ptr [[B5]], align 4
+// CHECK:    [[TMP13:%.*]] = load i32, ptr [[I]], align 4
+// CHECK:    store i32 [[TMP13]], ptr [[B]], align 4
 // CHECK:    br label %[[IF_END]]
 // CHECK:       [[IF_END]]:
 // CHECK:    br label %[[OMP_BODY_CONTINUE:.*]]
 // CHECK:       [[OMP_BODY_CONTINUE]]:
 // CHECK:    br label %[[OMP_INNER_FOR_INC:.*]]
 // CHECK:       [[OMP_INNER_FOR_INC]]:
-// CHECK:    [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK:    [[ADD10:%.*]] = add nsw i32 [[TMP18]], 1
-// CHECK:    store i32 [[ADD10]], ptr [[DOTOMP_IV]], align 4
+// CHECK:    [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK:    [[ADD4:%.*]] = add nsw i32 [[TMP14]], 1
+// CHECK:    store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4
 // CHECK:    br label %[[OMP_INNER_FOR_COND]]
 // CHECK:       [[OMP_INNER_FOR_END]]:
 // CHECK:    br label %[[OMP_LOOP_EXIT:.*]]
 // CHECK:       [[OMP_LOOP_EXIT]]:
-// CHECK:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP7]])
-// CHECK:    [[TMP19:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST:%.*]], i64 0, i64 0
-// CHECK:    store ptr [[A3]], ptr [[TMP19]], align 8
-// CHECK:    [[TMP20:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 1
-// CHECK:    store ptr [[B5]], ptr [[TMP20]], align 8
-// CHECK:    [[TMP21:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP7]], i32 2, i64 16, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @_Z32test_reduction_binding_operatorsv.omp_outlined.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var)
-// CHECK:    switch i32 [[TMP21]], [[DOTOMP_REDUCTION_DEFAULT:label %.*]] [
+// CHECK:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP3]])
+// CHECK:    [[TMP15:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST:%.*]], i64 0, i64 0
+// CHECK:    store ptr [[A]], ptr [[TMP15]], align 8
+// CHECK:    [[TMP16:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 1
+// CHECK:    store ptr [[B]], ptr [[TMP16]], align 8
+// CHECK:    [[TMP17:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP3]], i32 2, i64 16, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @_Z32test_reduction_binding_operatorsv.omp_outlined.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var)
+// CHECK:    switch i32 [[TMP17]], [[DOTOMP_REDUCTION_DEFAULT:label %.*]] [
 // CHECK:      i32 1, [[DOTOMP_REDUCTION_CASE1:label %.*]]
 // CHECK:      i32 2, label %[[DOTOMP_REDUCTION_CASE2:.*]]
 // CHECK:    ]
 // CHECK:       [[_OMP_REDUCTION_CASE1:.*:]]
-// CHECK:    [[TMP22:%.*]] = load i32, ptr [[TMP4]], align 4
-// CHECK:    [[TMP23:%.*]] = load i32, ptr [[A3]], align 4
-// CHECK:    [[MUL11:%.*]] = mul nsw i32 [[TMP22]], [[TMP23]]
-// CHECK:    store i32 [[MUL11]], ptr [[TMP4]], align 4
-// CHECK:    [[TMP24:%.*]] = load i32, ptr [[TMP5]], align 4
-// CHECK:    [[TMP25:%.*]] = load i32, ptr [[B5]], align 4
-// CHECK:    [[CMP12:%.*]] = icmp slt i32 [[TMP24]], [[TMP25]]
-// CHECK:    br i1 [[CMP12]], label %[[COND_TRUE13:.*]], label %[[COND_FALSE14:.*]]
-// CHECK:       [[COND_TRUE13]]:
-// CHECK:    [[TMP26:%.*]] = load i32, ptr [[TMP5]], align 4
-// CHECK:    br label %[[COND_END15:.*]]
-// CHECK:       [[COND_FALSE14]]:
-// CHECK:    [[TMP27:%.*]] = load i32, ptr [[B5]], align 4
-// CHECK:    br label %[[COND_END15]]
-// CHECK:       [[COND_END15]]:
-// CHECK:    [[COND16:%.*]] = phi i32 [ [[TMP26]], %[[COND_TRUE13]] ], [ [[TMP27]], %[[COND_FALSE14]] ]
-// CHECK:    store i32 [[COND16]], ptr [[TMP5]], align 4
-// CHECK:    call void @__kmpc_end_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP7]], ptr @.gomp_critical_user_.reduction.var)
+// CHECK:    [[TMP18:%.*]] = load i32, ptr [[A]], align 4
+// CHECK:    [[TMP19:%.*]] = load i32, ptr [[A]], align 4
+// CHECK:    [[MUL5:%.*]] = mul nsw i32 [[TMP18]], [[TMP19]]
+// CHECK:    store i32 [[MUL5]], ptr [[A]], align 4
+// CHECK:    [[TMP20:%.*]] = load i32, ptr [[B]], align 4
+// CHECK:    [[TMP21:%.*]] = load i32, ptr [[B]], align 4
+// CHECK:    [[CMP6:%.*]] = icmp slt i32 [[TMP20]], [[TMP21]]
+// CHECK:    br i1 [[CMP6]], label %[[COND_TRUE7:.*]], label %[[COND_FALSE8:.*]]
+// CHECK:       [[COND_TRUE7]]:
+// CHECK:    [[TMP22:%.*]] = load i32, ptr [[B]], align 4
+// CHECK:    br label %[[COND_END9:.*]]
+// CHECK:       [[COND_FALSE8]]:
+// CHECK:    [[TMP23:%.*]] = load i32, ptr [[B]], align 4
+// CHECK:    br label %[[COND_END9]]
+// CHECK:       [[COND_END9]]:
+// CHECK:    [[COND10:%.*]] = phi i32 [ [[TMP22]], %[[COND_TRUE7]] ], [ [[TMP23]], %[[COND_FALSE8]] ]
+// CHECK:    store i32 [[COND10]], ptr [[B]], align 4
+// CHECK:    call void @__kmpc_end_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP3]], ptr @.gomp_critical_user_.reduction.var)
 // CHECK:    br [[DOTOMP_REDUCTION_DEFAULT]]
 // CHECK:       [[_OMP_REDUCTION_CASE2:.*:]]
-// CHECK:    [[TMP28:%.*]] = load i32, ptr [[A3]], align 4
-// CHECK:    [[ATOMIC_LOAD:%.*]] = load atomic i32, ptr [[TMP4]] monotonic, align 4
+// CHECK:    [[TMP24:%.*]] = load i32, ptr [[A]], align 4
+// CHECK:    [[ATOMIC_LOAD:%.*]] = load atomic i32, ptr [[A]] monotonic, align 4
 // CHECK:    br label %[[ATOMIC_CONT:.*]]
 // CHECK:       [[ATOMIC_CONT]]:
-// CHECK:    [[TMP29:%.*]] = phi i32 [ [[ATOMIC_LOAD]], %[[DOTOMP_REDUCTION_CASE2]] ], [ [[TMP34:%.*]], %[[ATOMIC_CONT]] ]
-// CHECK:    store i32 [[TMP29]], ptr [[_TMP17:%.*]], align 4
-// CHECK:    [[TMP30:%.*]] = load i32, ptr [[_TMP17]], align 4
-// CHECK:    [[TMP31:%.*]] = load i32, ptr [[A3]], align 4
-// CHECK:    [[MUL18:%.*]] = mul nsw i32 [[TMP30]], [[TMP31]]
-// CHECK:    store i32 [[MUL18]], ptr [[ATOMIC_TEMP:%.*]], align 4
-// CHECK:    [[TMP32:%.*]] = load i32, ptr [[ATOMIC_TEMP]], align 4
-// CHECK:    [[TMP33:%.*]] = cmpxchg ptr [[TMP4]], i32 [[TMP29]], i32 [[TMP32]] monotonic monotonic, align 4
-// CHECK:    [[TMP34]] = extractvalue { i32, i1 } [[TMP33]], 0
-// CHECK:    [[TMP35:%.*]] = extractvalue { i32, i1 } [[TMP33]], 1
-// CHECK:    br i1 [[TMP35]], label %[[ATOMIC_EXIT:.*]], label %[[ATOMIC_CONT]]
+// CHECK:    [[TMP25:%.*]] = phi i32 [ [[ATOMIC_LOAD]], %[[DOTOMP_REDUCTION_CASE2]] ], [ [[TMP30:%.*]], %[[ATOMIC_CONT]] ]
+// CHECK:    store i32 [[TMP25]], ptr [[_TMP11:%.*]], align 4
+// CHECK:    [[TMP26:%.*]] = load i32, ptr [[_TMP11]], align 4
+// CHECK:    [[TMP27:%.*]] = load i32, ptr [[A]], align 4
+// CHECK:    [[MUL12:%.*]] = mul nsw i32 [[TMP26]], [[TMP27]]
+// CHECK:    store i32 [[MUL12]], ptr [[ATOMIC_TEMP:%.*]], align 4
+// CHECK:    [[TMP28:%.*]] = load i32, ptr [[ATOMIC_TEMP]], align 4
+// CHECK:    [[TMP29:%.*]] = cmpxchg ptr [[A]], i32 [[TMP25]], i32 [[TMP28]] monotonic monotonic, align 4
+// CHECK:    [[TMP30]] = extractvalue { i32, i1 } [[TMP29]], 0
+// CHECK:    [[TMP31:%.*]] = extractvalue { i32, i1 } [[TMP29]], 1
+// CHECK:    br i1 [[TMP31]], label %[[ATOMIC_EXIT:.*]], label %[[ATOMIC_CONT]]
 // CHECK:       [[ATOMIC_EXIT]]:
-// CHECK:    [[TMP36:%.*]] = load i32, ptr [[B5]], align 4
-// CHECK:    [[TMP37:%.*]] = atomicrmw min ptr [[TMP5]], i32 [[TMP36]] monotonic, align 4
+// CHECK:    [[TMP32:%.*]] = load i32, ptr [[B]], align 4
+// CHECK:    [[TMP33:%.*]] = atomicrmw min ptr [[B]], i32 [[TMP32]] monotonic, align 4
 // CHECK:    br [[DOTOMP_REDUCTION_DEFAULT]]
 // CHECK:       [[_OMP_REDUCTION_DEFAULT:.*:]]
 // CHECK:    ret void
@@ -3050,145 +3026,129 @@ void test_lambda_implicit_capture() {
 // CHECK:  [[ENTRY:.*:]]
 // CHECK:    call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[P:%.*]], ptr align 4 @__const._Z26test_reduction_binding_maxv.p, i64 8, i1 false)
 // CHECK:    call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP0:%.*]], ptr align 4 [[P]], i64 8, i1 false)
+// CHECK:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @_Z26test_reduction_binding_maxv.omp_outlined, ptr [[TMP0]])
 // CHECK:    [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_POINT:%.*]], ptr [[TMP0]], i32 0, i32 0
-// CHECK:    store ptr [[X]], ptr [[A:%.*]], align 8
+// CHECK:    [[TMP1:%.*]] = load i32, ptr [[X]], align 4
+// CHECK:    call void @_Z3usei(i32 noundef [[TMP1]])
 // CHECK:    [[Y:%.*]] = getelementptr inbounds nuw [[STRUCT_POINT]], ptr [[TMP0]], i32 0, i32 1
-// CHECK:    store ptr [[Y]], ptr [[B:%.*]], align 8
-// CHECK:    [[TMP1:%.*]] = load ptr, ptr [[A]], align 8, !nonnull [[META2]], !align [[META3]]
-// CHECK:    [[TMP2:%.*]] = load ptr, ptr [[B]], align 8, !nonnull [[META2]], !align [[META3]]
-// CHECK:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 3, ptr @_Z26test_reduction_binding_maxv.omp_outlined, ptr [[TMP0]], ptr [[TMP1]], ptr [[TMP2]])
-// CHECK:    [[X1:%.*]] = getelementptr inbounds nuw [[STRUCT_POINT]], ptr [[TMP0]], i32 0, i32 0
-// CHECK:    [[TMP3:%.*]] = load i32, ptr [[X1]], align 4
-// CHECK:    call void @_Z3usei(i32 noundef [[TMP3]])
-// CHECK:    [[Y2:%.*]] = getelementptr inbounds nuw [[STRUCT_POINT]], ptr [[TMP0]], i32 0, i32 1
-// CHECK:    [[TMP4:%.*]] = load i32, ptr [[Y2]], align 4
-// CHECK:    call void @_Z3usei(i32 noundef [[TMP4]])
+// CHECK:    [[TMP2:%.*]] = load i32, ptr [[Y]], align 4
+// CHECK:    call void @_Z3usei(i32 noundef [[TMP2]])
 // CHECK:    ret void
 //
 //
 // CHECK-LABEL: define internal void @_Z26test_reduction_binding_maxv.omp_outlined(
-// CHECK-SAME: ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP0:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]]) #[[ATTR2]] {
+// CHECK-SAME: ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP0:%.*]]) #[[ATTR2]] {
 // CHECK:  [[ENTRY:.*:]]
 // CHECK:    store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR:%.*]], align 8
 // CHECK:    store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR:%.*]], align 8
 // CHECK:    store ptr [[TMP0]], ptr [[DOTADDR:%.*]], align 8
-// CHECK:    store ptr [[A]], ptr [[A_ADDR:%.*]], align 8
-// CHECK:    store ptr [[B]], ptr [[B_ADDR:%.*]], align 8
 // CHECK:    [[TMP1:%.*]] = load ptr, ptr [[DOTADDR]], align 8, !nonnull [[META2]], !align [[META3]]
-// CHECK:    [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 8, !nonnull [[META2]], !align [[META3]]
-// CHECK:    [[TMP3:%.*]] = load ptr, ptr [[B_ADDR]], align 8, !nonnull [[META2]], !align [[META3]]
-// CHECK:    store ptr [[TMP2]], ptr [[TMP:%.*]], align 8
-// CHECK:    store ptr [[TMP3]], ptr [[_TMP1:%.*]], align 8
 // CHECK:    store i32 0, ptr [[DOTOMP_LB:%.*]], align 4
 // CHECK:    store i32 99, ptr [[DOTOMP_UB:%.*]], align 4
 // CHECK:    store i32 1, ptr [[DOTOMP_STRIDE:%.*]], align 4
 // CHECK:    store i32 0, ptr [[DOTOMP_IS_LAST:%.*]], align 4
-// CHECK:    [[TMP4:%.*]] = load ptr, ptr [[TMP]], align 8, !nonnull [[META2]], !align [[META3]]
-// CHECK:    store i32 -2147483648, ptr [[A3:%.*]], align 4
-// CHECK:    store ptr [[A3]], ptr [[_TMP4:%.*]], align 8
-// CHECK:    [[TMP5:%.*]] = load ptr, ptr [[_TMP1]], align 8, !nonnull [[META2]], !align [[META3]]
-// CHECK:    store i32 -2147483648, ptr [[B5:%.*]], align 4
-// CHECK:    store ptr [[B5]], ptr [[_TMP6:%.*]], align 8
-// CHECK:    [[TMP6:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// CHECK:    [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4
-// CHECK:    call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP7]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
-// CHECK:    [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
-// CHECK:    [[CMP:%.*]] = icmp sgt i32 [[TMP8]], 99
+// CHECK:    store i32 -2147483648, ptr [[A:%.*]], align 4
+// CHECK:    store i32 -2147483648, ptr [[B:%.*]], align 4
+// CHECK:    [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK:    [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4
+// CHECK:    call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
+// CHECK:    [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
+// CHECK:    [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99
 // CHECK:    br i1 [[CMP]], label %[[COND_TRUE:.*]], label %[[COND_FALSE:.*]]
 // CHECK:       [[COND_TRUE]]:
 // CHECK:    br label %[[COND_END:.*]]
 // CHECK:       [[COND_FALSE]]:
-// CHECK:    [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
+// CHECK:    [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
 // CHECK:    br label %[[COND_END]]
 // CHECK:       [[COND_END]]:
-// CHECK:    [[COND:%.*]] = phi i32 [ 99, %[[COND_TRUE]] ], [ [[TMP9]], %[[COND_FALSE]] ]
+// CHECK:    [[COND:%.*]] = phi i32 [ 99, %[[COND_TRUE]] ], [ [[TMP5]], %[[COND_FALSE]] ]
 // CHECK:    store i32 [[COND]], ptr [[DOTOMP_UB]], align 4
-// CHECK:    [[TMP10:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK:    store i32 [[TMP10]], ptr [[DOTOMP_IV:%.*]], align 4
+// CHECK:    [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
+// CHECK:    store i32 [[TMP6]], ptr [[DOTOMP_IV:%.*]], align 4
 // CHECK:    br label %[[OMP_INNER_FOR_COND:.*]]
 // CHECK:       [[OMP_INNER_FOR_COND]]:
-// CHECK:    [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK:    [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
-// CHECK:    [[CMP7:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]]
-// CHECK:    br i1 [[CMP7]], label %[[OMP_INNER_FOR_BODY:.*]], label %[[OMP_INNER_FOR_END:.*]]
+// CHECK:    [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK:    [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
+// CHECK:    [[CMP1:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]]
+// CHECK:    br i1 [[CMP1]], label %[[OMP_INNER_FOR_BODY:.*]], label %[[OMP_INNER_FOR_END:.*]]
 // CHECK:       [[OMP_INNER_FOR_BODY]]:
-// CHECK:    [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK:    [[MUL:%.*]] = mul nsw i32 [[TMP13]], 1
+// CHECK:    [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK:    [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1
 // CHECK:    [[ADD:%.*]] = add nsw i32 0, [[MUL]]
 // CHECK:    store i32 [[ADD]], ptr [[I:%.*]], align 4
-// CHECK:    [[TMP14:%.*]] = load i32, ptr [[I]], align 4
-// CHECK:    [[TMP15:%.*]] = load i32, ptr [[A3]], align 4
-// CHECK:    [[CMP8:%.*]] = icmp sgt i32 [[TMP14]], [[TMP15]]
-// CHECK:    br i1 [[CMP8]], label %[[IF_THEN:.*]], label %[[IF_END:.*]]
+// CHECK:    [[TMP10:%.*]] = load i32, ptr [[I]], align 4
+// CHECK:    [[TMP11:%.*]] = load i32, ptr [[A]], align 4
+// CHECK:    [[CMP2:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]]
+// CHECK:    br i1 [[CMP2]], label %[[IF_THEN:.*]], label %[[IF_END:.*]]
 // CHECK:       [[IF_THEN]]:
-// CHECK:    [[TMP16:%.*]] = load i32, ptr [[I]], align 4
-// CHECK:    store i32 [[TMP16]], ptr [[A3]], align 4
+// CHECK:    [[TMP12:%.*]] = load i32, ptr [[I]], align 4
+// CHECK:    store i32 [[TMP12]], ptr [[A]], align 4
 // CHECK:    br label %[[IF_END]]
 // CHECK:       [[IF_END]]:
-// CHECK:    [[TMP17:%.*]] = load i32, ptr [[I]], align 4
-// CHECK:    [[TMP18:%.*]] = load i32, ptr [[B5]], align 4
-// CHECK:    [[CMP9:%.*]] = icmp sgt i32 [[TMP17]], [[TMP18]]
-// CHECK:    br i1 [[CMP9]], label %[[IF_THEN10:.*]], label %[[IF_END11:.*]]
-// CHECK:       [[IF_THEN10]]:
-// CHECK:    [[TMP19:%.*]] = load i32, ptr [[I]], align 4
-// CHECK:    store i32 [[TMP19]], ptr [[B5]], align 4
-// CHECK:    br label %[[IF_END11]]
-// CHECK:       [[IF_END11]]:
+// CHECK:    [[TMP13:%.*]] = load i32, ptr [[I]], align 4
+// CHECK:    [[TMP14:%.*]] = load i32, ptr [[B]], align 4
+// CHECK:    [[CMP3:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]]
+// CHECK:    br i1 [[CMP3]], label %[[IF_THEN4:.*]], label %[[IF_END5:.*]]
+// CHECK:       [[IF_THEN4]]:
+// CHECK:    [[TMP15:%.*]] = load i32, ptr [[I]], align 4
+// CHECK:    store i32 [[TMP15]], ptr [[B]], align 4
+// CHECK:    br label %[[IF_END5]]
+// CHECK:       [[IF_END5]]:
 // CHECK:    br label %[[OMP_BODY_CONTINUE:.*]]
 // CHECK:       [[OMP_BODY_CONTINUE]]:
 // CHECK:    br label %[[OMP_INNER_FOR_INC:.*]]
 // CHECK:       [[OMP_INNER_FOR_INC]]:
-// CHECK:    [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK:    [[ADD12:%.*]] = add nsw i32 [[TMP20]], 1
-// CHECK:    store i32 [[ADD12]], ptr [[DOTOMP_IV]], align 4
+// CHECK:    [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK:    [[ADD6:%.*]] = add nsw i32 [[TMP16]], 1
+// CHECK:    store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4
 // CHECK:    br label %[[OMP_INNER_FOR_COND]]
 // CHECK:       [[OMP_INNER_FOR_END]]:
 // CHECK:    br label %[[OMP_LOOP_EXIT:.*]]
 // CHECK:       [[OMP_LOOP_EXIT]]:
-// CHECK:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP7]])
-// CHECK:    [[TMP21:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST:%.*]], i64 0, i64 0
-// CHECK:    store ptr [[A3]], ptr [[TMP21]], align 8
-// CHECK:    [[TMP22:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 1
-// CHECK:    store ptr [[B5]], ptr [[TMP22]], align 8
-// CHECK:    [[TMP23:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP7]], i32 2, i64 16, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @_Z26test_reduction_binding_maxv.omp_outlined.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var)
-// CHECK:    switch i32 [[TMP23]], [[DOTOMP_REDUCTION_DEFAULT:label %.*]] [
+// CHECK:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP3]])
+// CHECK:    [[TMP17:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST:%.*]], i64 0, i64 0
+// CHECK:    store ptr [[A]], ptr [[TMP17]], align 8
+// CHECK:    [[TMP18:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 1
+// CHECK:    store ptr [[B]], ptr [[TMP18]], align 8
+// CHECK:    [[TMP19:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP3]], i32 2, i64 16, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @_Z26test_reduction_binding_maxv.omp_outlined.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var)
+// CHECK:    switch i32 [[TMP19]], [[DOTOMP_REDUCTION_DEFAULT:label %.*]] [
 // CHECK:      i32 1, [[DOTOMP_REDUCTION_CASE1:label %.*]]
 // CHECK:      i32 2, [[DOTOMP_REDUCTION_CASE2:label %.*]]
 // CHECK:    ]
 // CHECK:       [[_OMP_REDUCTION_CASE1:.*:]]
-// CHECK:    [[TMP24:%.*]] = load i32, ptr [[TMP4]], align 4
-// CHECK:    [[TMP25:%.*]] = load i32, ptr [[A3]], align 4
-// CHECK:    [[CMP13:%.*]] = icmp sgt i32 [[TMP24]], [[TMP25]]
-// CHECK:    br i1 [[CMP13]], label %[[COND_TRUE14:.*]], label %[[COND_FALSE15:.*]]
-// CHECK:       [[COND_TRUE14]]:
-// CHECK:    [[TMP26:%.*]] = load i32, ptr [[TMP4]], align 4
-// CHECK:    br label %[[COND_END16:.*]]
-// CHECK:       [[COND_FALSE15]]:
-// CHECK:    [[TMP27:%.*]] = load i32, ptr [[A3]], align 4
-// CHECK:    br label %[[COND_END16]]
-// CHECK:       [[COND_END16]]:
-// CHECK:    [[COND17:%.*]] = phi i32 [ [[TMP26]], %[[COND_TRUE14]] ], [ [[TMP27]], %[[COND_FALSE15]] ]
-// CHECK:    store i32 [[COND17]], ptr [[TMP4]], align 4
-// CHECK:    [[TMP28:%.*]] = load i32, ptr [[TMP5]], align 4
-// CHECK:    [[TMP29:%.*]] = load i32, ptr [[B5]], align 4
-// CHECK:    [[CMP18:%.*]] = icmp sgt i32 [[TMP28]], [[TMP29]]
-// CHECK:    br i1 [[CMP18]], label %[[COND_TRUE19:.*]], label %[[COND_FALSE20:.*]]
-// CHECK:       [[COND_TRUE19]]:
-// CHECK:    [[TMP30:%.*]] = load i32, ptr [[TMP5]], align 4
-// CHECK:    br label %[[COND_END21:.*]]
-// CHECK:       [[COND_FALSE20]]:
-// CHECK:    [[TMP31:%.*]] = load i32, ptr [[B5]], align 4
-// CHECK:    br label %[[COND_END21]]
-// CHECK:       [[COND_END21]]:
-// CHECK:    [[COND22:%.*]] = phi i32 [ [[TMP30]], %[[COND_TRUE19]] ], [ [[TMP31]], %[[COND_FALSE20]] ]
-// CHECK:    store i32 [[COND22]], ptr [[TMP5]], align 4
-// CHECK:    call void @__kmpc_end_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP7]], ptr @.gomp_critical_user_.reduction.var)
+// CHECK:    [[TMP20:%.*]] = load i32, ptr [[A]], align 4
+// CHECK:    [[TMP21:%.*]] = load i32, ptr [[A]], align 4
+// CHECK:    [[CMP7:%.*]] = icmp sgt i32 [[TMP20]], [[TMP21]]
+// CHECK:    br i1 [[CMP7]], label %[[COND_TRUE8:.*]], label %[[COND_FALSE9:.*]]
+// CHECK:       [[COND_TRUE8]]:
+// CHECK:    [[TMP22:%.*]] = load i32, ptr [[A]], align 4
+// CHECK:    br label %[[COND_END10:.*]]
+// CHECK:       [[COND_FALSE9]]:
+// CHECK:    [[TMP23:%.*]] = load i32, ptr [[A]], align 4
+// CHECK:    br label %[[COND_END10]]
+// CHECK:       [[COND_END10]]:
+// CHECK:    [[COND11:%.*]] = phi i32 [ [[TMP22]], %[[COND_TRUE8]] ], [ [[TMP23]], %[[COND_FALSE9]] ]
+// CHECK:    store i32 [[COND11]], ptr [[A]], align 4
+// CHECK:    [[TMP24:%.*]] = load i32, ptr [[B]], align 4
+// CHECK:    [[TMP25:%.*]] = load i32, ptr [[B]], align 4
+// CHECK:    [[CMP12:%.*]] = icmp sgt i32 [[TMP24]], [[TMP25]]
+// CHECK:    br i1 [[CMP12]], label %[[COND_TRUE13:.*]], label %[[COND_FALSE14:.*]]
+// CHECK:       [[COND_TRUE13]]:
+// CHECK:    [[TMP26:%.*]] = load i32, ptr [[B]], align 4
+// CHECK:    br label %[[COND_END15:.*]]
+// CHECK:       [[COND_FALSE14]]:
+// CHECK:    [[TMP27:%.*]] = load i32, ptr [[B]], align 4
+// CHECK:    br label %[[COND_END15]]
+// CHECK:       [[COND_END15]]:
+// CHECK:    [[COND16:%.*]] = phi i32 [ [[TMP26]], %[[COND_TRUE13]] ], [ [[TMP27]], %[[COND_FALSE14]] ]
+// CHECK:    store i32 [[COND16]], ptr [[B]], align 4
+// CHECK:    call void @__kmpc_end_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP3]], ptr @.gomp_critical_user_.reduction.var)
 // CHECK:    br [[DOTOMP_REDUCTION_DEFAULT]]
 // CHECK:       [[_OMP_REDUCTION_CASE2:.*:]]
-// CHECK:    [[TMP32:%.*]] = load i32, ptr [[A3]], align 4
-// CHECK:    [[TMP33:%.*]] = atomicrmw max ptr [[TMP4]], i32 [[TMP32]] monotonic, align 4
-// CHECK:    [[TMP34:%.*]] = load i32, ptr [[B5]], align 4
-// CHECK:    [[TMP35:%.*]] = atomicrmw max ptr [[TMP5]], i32 [[TMP34]] monotonic, align 4
+// CHECK:    [[TMP28:%.*]] = load i32, ptr [[A]], align 4
+// CHECK:    [[TMP29:%.*]] = atomicrmw max ptr [[A]], i32 [[TMP28]] monotonic, align 4
+// CHECK:    [[TMP30:%.*]] = load i32, ptr [[B]], align 4
+// CHECK:    [[TMP31:%.*]] = atomicrmw max ptr [[B]], i32 [[TMP30]] monotonic, align 4
 // CHECK:    br [[DOTOMP_REDUCTION_DEFAULT]]
 // CHECK:       [[_OMP_REDUCTION_DEFAULT:.*:]]
 // CHECK:    ret void
@@ -3243,35 +3203,29 @@ void test_lambda_implicit_capture() {
 // CHECK:  [[ENTRY:.*:]]
 // CHECK:    call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[P1:%.*]], ptr align 4 @__const._Z27test_binding_name_collisionv.p1, i64 8, i1 false)
 // CHECK:    call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP0:%.*]], ptr align 4 [[P1]], i64 8, i1 false)
-// CHECK:    [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_POINT:%.*]], ptr [[TMP0]], i32 0, i32 0
-// CHECK:    store ptr [[X]], ptr [[A:%.*]], align 8
-// CHECK:    [[TMP1:%.*]] = load ptr, ptr [[A]], align 8, !nonnull [[META2]], !align [[META3]]
-// CHECK:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @_Z27test_binding_name_collisionv.omp_outlined, ptr [[TMP1]])
+// CHECK:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @_Z27test_binding_name_collisionv.omp_outlined, ptr [[TMP0]])
 // CHECK:    ret void
 //
 //
 // CHECK-LABEL: define internal void @_Z27test_binding_name_collisionv.omp_outlined(
-// CHECK-SAME: ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] {
+// CHECK-SAME: ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP0:%.*]]) #[[ATTR2]] {
 // CHECK:  [[ENTRY:.*:]]
 // CHECK:    store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR:%.*]], align 8
 // CHECK:    store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR:%.*]], align 8
-// CHECK:    store ptr [[A]], ptr [[A_ADDR:%.*]], align 8
-// CHECK:    [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8, !nonnull [[META2]], !align [[META3]]
-// CHECK:    store ptr [[TMP1]], ptr [[TMP:%.*]], align 8
-// CHECK:    [[TMP2:%.*]] = load ptr, ptr [[TMP]], align 8, !nonnull [[META2]], !align [[META3]]
-// CHECK:    store i32 0, ptr [[A1:%.*]], align 4
-// CHECK:    store ptr [[A1]], ptr [[_TMP2:%.*]], align 8
+// CHECK:    store ptr [[TMP0]], ptr [[DOTADDR:%.*]], align 8
+// CHECK:    [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8, !nonnull [[META2]], !align [[META3]]
+// CHECK:    store i32 0, ptr [[A:%.*]], align 4
 // CHECK:    call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[P2:%.*]], ptr align 4 @"__const.<captured>.p2", i64 8, i1 false)
-// CHECK:    call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP0:%.*]], ptr align 4 [[P2]], i64 8, i1 false)
-// CHECK:    [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_POINT:%.*]], ptr [[TMP0]], i32 0, i32 0
+// CHECK:    call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP1:%.*]], ptr align 4 [[P2]], i64 8, i1 false)
+// CHECK:    [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_POINT:%.*]], ptr [[TMP1]], i32 0, i32 0
 // CHECK:    [[TMP3:%.*]] = load i32, ptr [[X]], align 4
 // CHECK:    [[ADD:%.*]] = add nsw i32 [[TMP3]], 1
 // CHECK:    store i32 [[ADD]], ptr [[X]], align 4
-// CHECK:    [[X3:%.*]] = getelementptr inbounds nuw [[STRUCT_POINT]], ptr [[TMP0]], i32 0, i32 0
-// CHECK:    [[TMP4:%.*]] = load i32, ptr [[X3]], align 4
+// CHECK:    [[X1:%.*]] = getelementptr inbounds nuw [[STRUCT_POINT]], ptr [[TMP1]], i32 0, i32 0
+// CHECK:    [[TMP4:%.*]] = load i32, ptr [[X1]], align 4
 // CHECK:    call void @_Z3usei(i32 noundef [[TMP4]])
 // CHECK:    [[TMP5:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST:%.*]], i64 0, i64 0
-// CHECK:    store ptr [[A1]], ptr [[TMP5]], align 8
+// CHECK:    store ptr [[A]], ptr [[TMP5]], align 8
 // CHECK:    [[TMP6:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
 // CHECK:    [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4
 // CHECK:    [[TMP8:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP7]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @_Z27test_binding_name_collisionv.omp_outlined.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var)
@@ -3280,15 +3234,15 @@ void test_lambda_implicit_capture() {
 // CHECK:      i32 2, [[DOTOMP_REDUCTION_CASE2:label %.*]]
 // CHECK:    ]
 // CHECK:       [[_OMP_REDUCTION_CASE1:.*:]]
-// CHECK:    [[TMP9:%.*]] = load i32, ptr [[TMP2]], align 4
-// CHECK:    [[TMP10:%.*]] = load i32, ptr [[A1]], align 4
-// CHECK:    [[ADD4:%.*]] = add nsw i32 [[TMP9]], [[TMP10]]
-// CHECK:    store i32 [[ADD4]], ptr [[TMP2]], align 4
+// CHECK:    [[TMP9:%.*]] = load i32, ptr [[A]], align 4
+// CHECK:    [[TMP10:%.*]] = load i32, ptr [[A]], align 4
+// CHECK:    [[ADD2:%.*]] = add nsw i32 [[TMP9]], [[TMP10]]
+// CHECK:    store i32 [[ADD2]], ptr [[A]], align 4
 // CHECK:    call void @__kmpc_end_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP7]], ptr @.gomp_critical_user_.reduction.var)
 // CHECK:    br [[DOTOMP_REDUCTION_DEFAULT]]
 // CHECK:       [[_OMP_REDUCTION_CASE2:.*:]]
-// CHECK:    [[TMP11:%.*]] = load i32, ptr [[A1]], align 4
-// CHECK:    [[TMP12:%.*]] = atomicrmw add ptr [[TMP2]], i32 [[TMP11]] monotonic, align 4
+// CHECK:    [[TMP11:%.*]] = load i32, ptr [[A]], align 4
+// CHECK:    [[TMP12:%.*]] = atomicrmw add ptr [[A]], i32 [[TMP11]] monotonic, align 4
 // CHECK:    br [[DOTOMP_REDUCTION_DEFAULT]]
 // CHECK:       [[_OMP_REDUCTION_DEFAULT:.*:]]
 // CHECK:    ret void

>From 792fe6c5404d377527d0a588d455a34982ce9c41 Mon Sep 17 00:00:00 2001
From: Ammarguellat <zahira.ammarguellat at intel.com>
Date: Wed, 10 Jun 2026 09:35:27 -0700
Subject: [PATCH 35/45] Fix format

---
 clang/lib/CodeGen/CGExpr.cpp       | 2 +-
 clang/lib/CodeGen/CGStmtOpenMP.cpp | 5 ++---
 2 files changed, 3 insertions(+), 4 deletions(-)

diff --git a/clang/lib/CodeGen/CGExpr.cpp b/clang/lib/CodeGen/CGExpr.cpp
index 939154b487442..872b86b14aa15 100644
--- a/clang/lib/CodeGen/CGExpr.cpp
+++ b/clang/lib/CodeGen/CGExpr.cpp
@@ -3606,7 +3606,7 @@ LValue CodeGenFunction::EmitOMPCapturedBindingLValue(const BindingDecl *BD) {
          "Expected OpenMP captured region");
   assert(CGM.getLangOpts().OpenMP && "Expected OpenMP to be enabled");
 
- if (auto It = LocalDeclMap.find(BD->getCanonicalDecl());
+  if (auto It = LocalDeclMap.find(BD->getCanonicalDecl());
       It != LocalDeclMap.end())
     return MakeAddrLValue(It->second, BD->getType());
 
diff --git a/clang/lib/CodeGen/CGStmtOpenMP.cpp b/clang/lib/CodeGen/CGStmtOpenMP.cpp
index 69ed2ed56ec6e..905bc92ab398c 100644
--- a/clang/lib/CodeGen/CGStmtOpenMP.cpp
+++ b/clang/lib/CodeGen/CGStmtOpenMP.cpp
@@ -1168,8 +1168,7 @@ bool CodeGenFunction::EmitOMPFirstprivateClause(const OMPExecutableDirective &D,
                         /*RefersToEnclosingVariableOrCapture=*/true,
                         BD->getType(), VK_LValue, (*IRef)->getExprLoc());
         LValue OriginalLVal = EmitLValue(&DRE);
-        Address OriginalAddr =
-            OriginalLVal.getAddress();
+        Address OriginalAddr = OriginalLVal.getAddress();
         // Emit private VarDecl with copy init. Remap VDInit to point to the
         // original binding so EmitDecl properly initializes VD.
         setAddrOfLocalVar(VDInit, OriginalAddr);
@@ -1924,7 +1923,7 @@ checkForLastprivateConditionalUpdate(CodeGenFunction &CGF,
       if (const auto *VD = dyn_cast<VarDecl>(DRE->getDecl())) {
         PrivateDecls.insert(VD);
         CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, Ref);
-      } 
+      }
     }
   }
   for (const auto *C : S.getClausesOfKind<OMPLinearClause>()) {

>From 0215d36bfb902e69b73f1999c4f924d73142c664 Mon Sep 17 00:00:00 2001
From: Ammarguellat <zahira.ammarguellat at intel.com>
Date: Fri, 12 Jun 2026 06:38:53 -0700
Subject: [PATCH 36/45] Addressed review comments

---
 .../clang/Basic/DiagnosticSemaKinds.td        |  2 -
 clang/lib/CodeGen/CGExpr.cpp                  |  4 ++
 clang/lib/CodeGen/CGStmtOpenMP.cpp            | 25 +++++---
 clang/lib/Sema/SemaLambda.cpp                 |  2 +-
 .../OpenMP/structured-bindings-codegen.cpp    | 62 ++++++++++---------
 5 files changed, 57 insertions(+), 38 deletions(-)

diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td
index 793653c2d4e07..2712a110551be 100644
--- a/clang/include/clang/Basic/DiagnosticSemaKinds.td
+++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td
@@ -10278,8 +10278,6 @@ def err_reference_to_local_in_enclosing_context : Error<
   "%select{%3|block literal|lambda expression|context}2">;
 def err_local_nested_class_invalid_scope : Error<
   "nested local class %0 must be defined in the same block scope as %1">;
-def err_capture_binding_openmp : Error<
-  "capturing a structured binding is not yet supported in OpenMP">;
 def ext_capture_binding : ExtWarn<
   "captured structured bindings are a C++20 extension">, InGroup<CXX20>;
 def warn_cxx17_compat_capture_binding : Warning<
diff --git a/clang/lib/CodeGen/CGExpr.cpp b/clang/lib/CodeGen/CGExpr.cpp
index 872b86b14aa15..3fe9a1bc35d86 100644
--- a/clang/lib/CodeGen/CGExpr.cpp
+++ b/clang/lib/CodeGen/CGExpr.cpp
@@ -3612,6 +3612,10 @@ LValue CodeGenFunction::EmitOMPCapturedBindingLValue(const BindingDecl *BD) {
 
   auto *DD = cast<VarDecl>(BD->getDecomposedDecl());
   Expr *BindingExpr = BD->getBinding()->IgnoreImplicit();
+  // Use getNonReferenceType() because we need the actual object type, not the
+  // reference type. For example, with "struct Point { int x, y; }; auto& [a, b]
+  // = p;", DD is a reference to Point, but we need the Point type itself to
+  // compute field offsets.
   QualType DREType = DD->getType().getNonReferenceType();
   DeclarationNameInfo NameInfo(DD->getDeclName(), SourceLocation());
   DeclRefExpr *DRE = DeclRefExpr::Create(
diff --git a/clang/lib/CodeGen/CGStmtOpenMP.cpp b/clang/lib/CodeGen/CGStmtOpenMP.cpp
index 905bc92ab398c..c82b3620c6b8c 100644
--- a/clang/lib/CodeGen/CGStmtOpenMP.cpp
+++ b/clang/lib/CodeGen/CGStmtOpenMP.cpp
@@ -1610,6 +1610,24 @@ void CodeGenFunction::EmitOMPReductionClauseInit(
     if (const auto *DRE = dyn_cast<DeclRefExpr>(IRef->IgnoreParenImpCasts()))
       BD = dyn_cast<BindingDecl>(DRE->getDecl());
     if (BD) {
+      // BindingDecls cannot use ReductionCodeGen infrastructure because:
+      // 1. RedCG.emitSharedOrigLValue() crashes when emitting BindingDecl refs
+      // (DecompositionDecl context issues in the outlined region)
+      // 2. All RedCG methods (emitAggregateType, emitInitialization, etc.)
+      // depend on emitSharedOrigLValue() being called first.
+      // However, writeback still works correctly through LHSVD/RHSVD
+      // registration:
+      // - LHSVD is registered with the original BindingDecl address
+      // - RHSVD is registered with the private copy address
+      // - The reduction operation writes from RHSVD back to LHSVD
+      // - This achieves the same writeback that RedCG provides for VarDecls
+      // Get the original BindingDecl address.
+      DeclRefExpr BindingDRE(getContext(), const_cast<BindingDecl *>(BD),
+                             /*RefersToEnclosingVariableOrCapture=*/true,
+                             BD->getType(), VK_LValue, IRef->getExprLoc());
+      LValue OriginalLVal = EmitLValue(&BindingDRE);
+      Address OriginalAddr = OriginalLVal.getAddress();
+
       // Emit the private VarDecl with reduction initialization.
       EmitDecl(*PrivateVD);
       Address PrivateAddr = GetAddrOfLocalVar(PrivateVD);
@@ -1619,13 +1637,6 @@ void CodeGenFunction::EmitOMPReductionClauseInit(
       assert(IsRegistered && "private binding already registered as private");
       (void)IsRegistered;
 
-      // Get the original BindingDecl address for LHSVD registration.
-      DeclRefExpr BindingDRE(getContext(), const_cast<BindingDecl *>(BD),
-                             /*RefersToEnclosingVariableOrCapture=*/true,
-                             BD->getType(), VK_LValue, IRef->getExprLoc());
-      LValue OriginalLVal = EmitLValue(&BindingDRE);
-      Address OriginalAddr = OriginalLVal.getAddress();
-
       // Register LHSVD/RHSVD for reduction operation.
       const auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
       const auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
diff --git a/clang/lib/Sema/SemaLambda.cpp b/clang/lib/Sema/SemaLambda.cpp
index 5a746dd31de85..139593bed6eac 100644
--- a/clang/lib/Sema/SemaLambda.cpp
+++ b/clang/lib/Sema/SemaLambda.cpp
@@ -1981,7 +1981,7 @@ ExprResult Sema::BuildCaptureInit(const Capture &Cap,
       //   auto [a, b] = p;
       //   auto lambda = [a]() { return a; };  // In OpenMP context.
       // This is reached during lambda capture for OpenMP mappings.
-      Var = cast<BindingDecl>(Var)->getDecomposedDecl();
+      Var = BD->getDecomposedDecl();
     Name = Var->getIdentifier();
     Init = BuildDeclarationNameExpr(
         CXXScopeSpec(), DeclarationNameInfo(Var->getDeclName(), Loc), Var);
diff --git a/clang/test/OpenMP/structured-bindings-codegen.cpp b/clang/test/OpenMP/structured-bindings-codegen.cpp
index affe028edb9d2..7adb1b3ea34ad 100644
--- a/clang/test/OpenMP/structured-bindings-codegen.cpp
+++ b/clang/test/OpenMP/structured-bindings-codegen.cpp
@@ -2546,6 +2546,7 @@ void test_lambda_implicit_capture() {
 // CHECK:    store i32 99, ptr [[DOTOMP_UB:%.*]], align 4
 // CHECK:    store i32 1, ptr [[DOTOMP_STRIDE:%.*]], align 4
 // CHECK:    store i32 0, ptr [[DOTOMP_IS_LAST:%.*]], align 4
+// CHECK:    [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_POINT:%.*]], ptr [[TMP1]], i32 0, i32 0
 // CHECK:    store i32 0, ptr [[A:%.*]], align 4
 // CHECK:    [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
 // CHECK:    [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4
@@ -2598,15 +2599,15 @@ void test_lambda_implicit_capture() {
 // CHECK:      i32 2, [[DOTOMP_REDUCTION_CASE2:label %.*]]
 // CHECK:    ]
 // CHECK:       [[_OMP_REDUCTION_CASE1:.*:]]
-// CHECK:    [[TMP15:%.*]] = load i32, ptr [[A]], align 4
+// CHECK:    [[TMP15:%.*]] = load i32, ptr [[X]], align 4
 // CHECK:    [[TMP16:%.*]] = load i32, ptr [[A]], align 4
 // CHECK:    [[ADD4:%.*]] = add nsw i32 [[TMP15]], [[TMP16]]
-// CHECK:    store i32 [[ADD4]], ptr [[A]], align 4
+// CHECK:    store i32 [[ADD4]], ptr [[X]], align 4
 // CHECK:    call void @__kmpc_end_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP3]], ptr @.gomp_critical_user_.reduction.var)
 // CHECK:    br [[DOTOMP_REDUCTION_DEFAULT]]
 // CHECK:       [[_OMP_REDUCTION_CASE2:.*:]]
 // CHECK:    [[TMP17:%.*]] = load i32, ptr [[A]], align 4
-// CHECK:    [[TMP18:%.*]] = atomicrmw add ptr [[A]], i32 [[TMP17]] monotonic, align 4
+// CHECK:    [[TMP18:%.*]] = atomicrmw add ptr [[X]], i32 [[TMP17]] monotonic, align 4
 // CHECK:    br [[DOTOMP_REDUCTION_DEFAULT]]
 // CHECK:       [[_OMP_REDUCTION_DEFAULT:.*:]]
 // CHECK:    ret void
@@ -2656,7 +2657,9 @@ void test_lambda_implicit_capture() {
 // CHECK:    store i32 9, ptr [[DOTOMP_UB:%.*]], align 4
 // CHECK:    store i32 1, ptr [[DOTOMP_STRIDE:%.*]], align 4
 // CHECK:    store i32 0, ptr [[DOTOMP_IS_LAST:%.*]], align 4
+// CHECK:    [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_POINT:%.*]], ptr [[TMP1]], i32 0, i32 0
 // CHECK:    store i32 1, ptr [[A:%.*]], align 4
+// CHECK:    [[Y:%.*]] = getelementptr inbounds nuw [[STRUCT_POINT]], ptr [[TMP1]], i32 0, i32 1
 // CHECK:    store i32 2147483647, ptr [[B:%.*]], align 4
 // CHECK:    [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
 // CHECK:    [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4
@@ -2719,28 +2722,28 @@ void test_lambda_implicit_capture() {
 // CHECK:      i32 2, label %[[DOTOMP_REDUCTION_CASE2:.*]]
 // CHECK:    ]
 // CHECK:       [[_OMP_REDUCTION_CASE1:.*:]]
-// CHECK:    [[TMP18:%.*]] = load i32, ptr [[A]], align 4
+// CHECK:    [[TMP18:%.*]] = load i32, ptr [[X]], align 4
 // CHECK:    [[TMP19:%.*]] = load i32, ptr [[A]], align 4
 // CHECK:    [[MUL5:%.*]] = mul nsw i32 [[TMP18]], [[TMP19]]
-// CHECK:    store i32 [[MUL5]], ptr [[A]], align 4
-// CHECK:    [[TMP20:%.*]] = load i32, ptr [[B]], align 4
+// CHECK:    store i32 [[MUL5]], ptr [[X]], align 4
+// CHECK:    [[TMP20:%.*]] = load i32, ptr [[Y]], align 4
 // CHECK:    [[TMP21:%.*]] = load i32, ptr [[B]], align 4
 // CHECK:    [[CMP6:%.*]] = icmp slt i32 [[TMP20]], [[TMP21]]
 // CHECK:    br i1 [[CMP6]], label %[[COND_TRUE7:.*]], label %[[COND_FALSE8:.*]]
 // CHECK:       [[COND_TRUE7]]:
-// CHECK:    [[TMP22:%.*]] = load i32, ptr [[B]], align 4
+// CHECK:    [[TMP22:%.*]] = load i32, ptr [[Y]], align 4
 // CHECK:    br label %[[COND_END9:.*]]
 // CHECK:       [[COND_FALSE8]]:
 // CHECK:    [[TMP23:%.*]] = load i32, ptr [[B]], align 4
 // CHECK:    br label %[[COND_END9]]
 // CHECK:       [[COND_END9]]:
 // CHECK:    [[COND10:%.*]] = phi i32 [ [[TMP22]], %[[COND_TRUE7]] ], [ [[TMP23]], %[[COND_FALSE8]] ]
-// CHECK:    store i32 [[COND10]], ptr [[B]], align 4
+// CHECK:    store i32 [[COND10]], ptr [[Y]], align 4
 // CHECK:    call void @__kmpc_end_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP3]], ptr @.gomp_critical_user_.reduction.var)
 // CHECK:    br [[DOTOMP_REDUCTION_DEFAULT]]
 // CHECK:       [[_OMP_REDUCTION_CASE2:.*:]]
 // CHECK:    [[TMP24:%.*]] = load i32, ptr [[A]], align 4
-// CHECK:    [[ATOMIC_LOAD:%.*]] = load atomic i32, ptr [[A]] monotonic, align 4
+// CHECK:    [[ATOMIC_LOAD:%.*]] = load atomic i32, ptr [[X]] monotonic, align 4
 // CHECK:    br label %[[ATOMIC_CONT:.*]]
 // CHECK:       [[ATOMIC_CONT]]:
 // CHECK:    [[TMP25:%.*]] = phi i32 [ [[ATOMIC_LOAD]], %[[DOTOMP_REDUCTION_CASE2]] ], [ [[TMP30:%.*]], %[[ATOMIC_CONT]] ]
@@ -2750,13 +2753,13 @@ void test_lambda_implicit_capture() {
 // CHECK:    [[MUL12:%.*]] = mul nsw i32 [[TMP26]], [[TMP27]]
 // CHECK:    store i32 [[MUL12]], ptr [[ATOMIC_TEMP:%.*]], align 4
 // CHECK:    [[TMP28:%.*]] = load i32, ptr [[ATOMIC_TEMP]], align 4
-// CHECK:    [[TMP29:%.*]] = cmpxchg ptr [[A]], i32 [[TMP25]], i32 [[TMP28]] monotonic monotonic, align 4
+// CHECK:    [[TMP29:%.*]] = cmpxchg ptr [[X]], i32 [[TMP25]], i32 [[TMP28]] monotonic monotonic, align 4
 // CHECK:    [[TMP30]] = extractvalue { i32, i1 } [[TMP29]], 0
 // CHECK:    [[TMP31:%.*]] = extractvalue { i32, i1 } [[TMP29]], 1
 // CHECK:    br i1 [[TMP31]], label %[[ATOMIC_EXIT:.*]], label %[[ATOMIC_CONT]]
 // CHECK:       [[ATOMIC_EXIT]]:
 // CHECK:    [[TMP32:%.*]] = load i32, ptr [[B]], align 4
-// CHECK:    [[TMP33:%.*]] = atomicrmw min ptr [[B]], i32 [[TMP32]] monotonic, align 4
+// CHECK:    [[TMP33:%.*]] = atomicrmw min ptr [[Y]], i32 [[TMP32]] monotonic, align 4
 // CHECK:    br [[DOTOMP_REDUCTION_DEFAULT]]
 // CHECK:       [[_OMP_REDUCTION_DEFAULT:.*:]]
 // CHECK:    ret void
@@ -3047,7 +3050,9 @@ void test_lambda_implicit_capture() {
 // CHECK:    store i32 99, ptr [[DOTOMP_UB:%.*]], align 4
 // CHECK:    store i32 1, ptr [[DOTOMP_STRIDE:%.*]], align 4
 // CHECK:    store i32 0, ptr [[DOTOMP_IS_LAST:%.*]], align 4
+// CHECK:    [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_POINT:%.*]], ptr [[TMP1]], i32 0, i32 0
 // CHECK:    store i32 -2147483648, ptr [[A:%.*]], align 4
+// CHECK:    [[Y:%.*]] = getelementptr inbounds nuw [[STRUCT_POINT]], ptr [[TMP1]], i32 0, i32 1
 // CHECK:    store i32 -2147483648, ptr [[B:%.*]], align 4
 // CHECK:    [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
 // CHECK:    [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4
@@ -3116,39 +3121,39 @@ void test_lambda_implicit_capture() {
 // CHECK:      i32 2, [[DOTOMP_REDUCTION_CASE2:label %.*]]
 // CHECK:    ]
 // CHECK:       [[_OMP_REDUCTION_CASE1:.*:]]
-// CHECK:    [[TMP20:%.*]] = load i32, ptr [[A]], align 4
+// CHECK:    [[TMP20:%.*]] = load i32, ptr [[X]], align 4
 // CHECK:    [[TMP21:%.*]] = load i32, ptr [[A]], align 4
 // CHECK:    [[CMP7:%.*]] = icmp sgt i32 [[TMP20]], [[TMP21]]
 // CHECK:    br i1 [[CMP7]], label %[[COND_TRUE8:.*]], label %[[COND_FALSE9:.*]]
 // CHECK:       [[COND_TRUE8]]:
-// CHECK:    [[TMP22:%.*]] = load i32, ptr [[A]], align 4
+// CHECK:    [[TMP22:%.*]] = load i32, ptr [[X]], align 4
 // CHECK:    br label %[[COND_END10:.*]]
 // CHECK:       [[COND_FALSE9]]:
 // CHECK:    [[TMP23:%.*]] = load i32, ptr [[A]], align 4
 // CHECK:    br label %[[COND_END10]]
 // CHECK:       [[COND_END10]]:
 // CHECK:    [[COND11:%.*]] = phi i32 [ [[TMP22]], %[[COND_TRUE8]] ], [ [[TMP23]], %[[COND_FALSE9]] ]
-// CHECK:    store i32 [[COND11]], ptr [[A]], align 4
-// CHECK:    [[TMP24:%.*]] = load i32, ptr [[B]], align 4
+// CHECK:    store i32 [[COND11]], ptr [[X]], align 4
+// CHECK:    [[TMP24:%.*]] = load i32, ptr [[Y]], align 4
 // CHECK:    [[TMP25:%.*]] = load i32, ptr [[B]], align 4
 // CHECK:    [[CMP12:%.*]] = icmp sgt i32 [[TMP24]], [[TMP25]]
 // CHECK:    br i1 [[CMP12]], label %[[COND_TRUE13:.*]], label %[[COND_FALSE14:.*]]
 // CHECK:       [[COND_TRUE13]]:
-// CHECK:    [[TMP26:%.*]] = load i32, ptr [[B]], align 4
+// CHECK:    [[TMP26:%.*]] = load i32, ptr [[Y]], align 4
 // CHECK:    br label %[[COND_END15:.*]]
 // CHECK:       [[COND_FALSE14]]:
 // CHECK:    [[TMP27:%.*]] = load i32, ptr [[B]], align 4
 // CHECK:    br label %[[COND_END15]]
 // CHECK:       [[COND_END15]]:
 // CHECK:    [[COND16:%.*]] = phi i32 [ [[TMP26]], %[[COND_TRUE13]] ], [ [[TMP27]], %[[COND_FALSE14]] ]
-// CHECK:    store i32 [[COND16]], ptr [[B]], align 4
+// CHECK:    store i32 [[COND16]], ptr [[Y]], align 4
 // CHECK:    call void @__kmpc_end_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP3]], ptr @.gomp_critical_user_.reduction.var)
 // CHECK:    br [[DOTOMP_REDUCTION_DEFAULT]]
 // CHECK:       [[_OMP_REDUCTION_CASE2:.*:]]
 // CHECK:    [[TMP28:%.*]] = load i32, ptr [[A]], align 4
-// CHECK:    [[TMP29:%.*]] = atomicrmw max ptr [[A]], i32 [[TMP28]] monotonic, align 4
+// CHECK:    [[TMP29:%.*]] = atomicrmw max ptr [[X]], i32 [[TMP28]] monotonic, align 4
 // CHECK:    [[TMP30:%.*]] = load i32, ptr [[B]], align 4
-// CHECK:    [[TMP31:%.*]] = atomicrmw max ptr [[B]], i32 [[TMP30]] monotonic, align 4
+// CHECK:    [[TMP31:%.*]] = atomicrmw max ptr [[Y]], i32 [[TMP30]] monotonic, align 4
 // CHECK:    br [[DOTOMP_REDUCTION_DEFAULT]]
 // CHECK:       [[_OMP_REDUCTION_DEFAULT:.*:]]
 // CHECK:    ret void
@@ -3214,15 +3219,16 @@ void test_lambda_implicit_capture() {
 // CHECK:    store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR:%.*]], align 8
 // CHECK:    store ptr [[TMP0]], ptr [[DOTADDR:%.*]], align 8
 // CHECK:    [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8, !nonnull [[META2]], !align [[META3]]
+// CHECK:    [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_POINT:%.*]], ptr [[TMP2]], i32 0, i32 0
 // CHECK:    store i32 0, ptr [[A:%.*]], align 4
 // CHECK:    call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[P2:%.*]], ptr align 4 @"__const.<captured>.p2", i64 8, i1 false)
 // CHECK:    call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP1:%.*]], ptr align 4 [[P2]], i64 8, i1 false)
-// CHECK:    [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_POINT:%.*]], ptr [[TMP1]], i32 0, i32 0
-// CHECK:    [[TMP3:%.*]] = load i32, ptr [[X]], align 4
-// CHECK:    [[ADD:%.*]] = add nsw i32 [[TMP3]], 1
-// CHECK:    store i32 [[ADD]], ptr [[X]], align 4
 // CHECK:    [[X1:%.*]] = getelementptr inbounds nuw [[STRUCT_POINT]], ptr [[TMP1]], i32 0, i32 0
-// CHECK:    [[TMP4:%.*]] = load i32, ptr [[X1]], align 4
+// CHECK:    [[TMP3:%.*]] = load i32, ptr [[X1]], align 4
+// CHECK:    [[ADD:%.*]] = add nsw i32 [[TMP3]], 1
+// CHECK:    store i32 [[ADD]], ptr [[X1]], align 4
+// CHECK:    [[X2:%.*]] = getelementptr inbounds nuw [[STRUCT_POINT]], ptr [[TMP1]], i32 0, i32 0
+// CHECK:    [[TMP4:%.*]] = load i32, ptr [[X2]], align 4
 // CHECK:    call void @_Z3usei(i32 noundef [[TMP4]])
 // CHECK:    [[TMP5:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST:%.*]], i64 0, i64 0
 // CHECK:    store ptr [[A]], ptr [[TMP5]], align 8
@@ -3234,15 +3240,15 @@ void test_lambda_implicit_capture() {
 // CHECK:      i32 2, [[DOTOMP_REDUCTION_CASE2:label %.*]]
 // CHECK:    ]
 // CHECK:       [[_OMP_REDUCTION_CASE1:.*:]]
-// CHECK:    [[TMP9:%.*]] = load i32, ptr [[A]], align 4
+// CHECK:    [[TMP9:%.*]] = load i32, ptr [[X]], align 4
 // CHECK:    [[TMP10:%.*]] = load i32, ptr [[A]], align 4
-// CHECK:    [[ADD2:%.*]] = add nsw i32 [[TMP9]], [[TMP10]]
-// CHECK:    store i32 [[ADD2]], ptr [[A]], align 4
+// CHECK:    [[ADD3:%.*]] = add nsw i32 [[TMP9]], [[TMP10]]
+// CHECK:    store i32 [[ADD3]], ptr [[X]], align 4
 // CHECK:    call void @__kmpc_end_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP7]], ptr @.gomp_critical_user_.reduction.var)
 // CHECK:    br [[DOTOMP_REDUCTION_DEFAULT]]
 // CHECK:       [[_OMP_REDUCTION_CASE2:.*:]]
 // CHECK:    [[TMP11:%.*]] = load i32, ptr [[A]], align 4
-// CHECK:    [[TMP12:%.*]] = atomicrmw add ptr [[A]], i32 [[TMP11]] monotonic, align 4
+// CHECK:    [[TMP12:%.*]] = atomicrmw add ptr [[X]], i32 [[TMP11]] monotonic, align 4
 // CHECK:    br [[DOTOMP_REDUCTION_DEFAULT]]
 // CHECK:       [[_OMP_REDUCTION_DEFAULT:.*:]]
 // CHECK:    ret void

>From a092176ec43b04f1cc8937e33e323c722bfe9744 Mon Sep 17 00:00:00 2001
From: Ammarguellat <zahira.ammarguellat at intel.com>
Date: Mon, 15 Jun 2026 12:49:32 -0700
Subject: [PATCH 37/45] Addressed review comments

---
 clang/lib/CodeGen/CGExpr.cpp                  |   5 +-
 clang/lib/CodeGen/CGStmtOpenMP.cpp            |  60 ++-
 clang/lib/Sema/SemaExpr.cpp                   |   3 +-
 clang/lib/Sema/SemaOpenMP.cpp                 |  24 +-
 .../OpenMP/structured-bindings-codegen.cpp    | 383 +++++++++---------
 5 files changed, 278 insertions(+), 197 deletions(-)

diff --git a/clang/lib/CodeGen/CGExpr.cpp b/clang/lib/CodeGen/CGExpr.cpp
index 3fe9a1bc35d86..3a723bdd1dc33 100644
--- a/clang/lib/CodeGen/CGExpr.cpp
+++ b/clang/lib/CodeGen/CGExpr.cpp
@@ -3613,9 +3613,8 @@ LValue CodeGenFunction::EmitOMPCapturedBindingLValue(const BindingDecl *BD) {
   auto *DD = cast<VarDecl>(BD->getDecomposedDecl());
   Expr *BindingExpr = BD->getBinding()->IgnoreImplicit();
   // Use getNonReferenceType() because we need the actual object type, not the
-  // reference type. For example, with "struct Point { int x, y; }; auto& [a, b]
-  // = p;", DD is a reference to Point, but we need the Point type itself to
-  // compute field offsets.
+  // reference type. DeclRefExpr with VK_LValue requires a non-reference type
+  // (AST invariant). EmitDeclRefLValue will load any reference for us.
   QualType DREType = DD->getType().getNonReferenceType();
   DeclarationNameInfo NameInfo(DD->getDeclName(), SourceLocation());
   DeclRefExpr *DRE = DeclRefExpr::Create(
diff --git a/clang/lib/CodeGen/CGStmtOpenMP.cpp b/clang/lib/CodeGen/CGStmtOpenMP.cpp
index c82b3620c6b8c..46e2303459c36 100644
--- a/clang/lib/CodeGen/CGStmtOpenMP.cpp
+++ b/clang/lib/CodeGen/CGStmtOpenMP.cpp
@@ -1413,7 +1413,7 @@ bool CodeGenFunction::EmitOMPLastprivateClauseInit(
           cast<VarDecl>(cast<DeclRefExpr>(C)->getDecl())->getCanonicalDecl());
     }
   }
-  llvm::DenseSet<const VarDecl *> AlreadyEmittedVars;
+  llvm::DenseSet<const ValueDecl *> AlreadyEmittedVars;
   for (const auto *C : D.getClausesOfKind<OMPLastprivateClause>()) {
     HasAtLeastOneLastprivate = true;
     if (isOpenMPTaskLoopDirective(EKind) && !getLangOpts().OpenMPSimd)
@@ -1424,9 +1424,29 @@ bool CodeGenFunction::EmitOMPLastprivateClauseInit(
       // Keep the address of the original variable for future update at the end
       // of the loop.
       const auto *OrigDecl = cast<DeclRefExpr>(*IRef)->getDecl();
-      // BindingDecls are handled through privatization in
-      // EmitOMPCapturedBindingLValue.
-      if (isa<BindingDecl>(OrigDecl)) {
+      // Handle BindingDecls with the same level of support as VarDecls.
+      if (const auto *BD = dyn_cast<BindingDecl>(OrigDecl)) {
+        if (AlreadyEmittedVars.insert(cast<ValueDecl>(BD->getCanonicalDecl()))
+                .second) {
+          const auto *DestVD =
+              cast<VarDecl>(cast<DeclRefExpr>(*IDestRef)->getDecl());
+          // Get the original binding address.
+          DeclRefExpr DRE(getContext(), const_cast<BindingDecl *>(BD),
+                          /*RefersToEnclosingVariableOrCapture=*/true,
+                          BD->getType(), VK_LValue, (*IRef)->getExprLoc());
+          PrivateScope.addPrivate(DestVD, EmitLValue(&DRE).getAddress());
+
+          if (IInit) {
+            const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl());
+            // Emit private VarDecl with copy init.
+            EmitDecl(*VD);
+            Address VDAddr = GetAddrOfLocalVar(VD);
+            bool IsRegistered = PrivateScope.addPrivate(BD, VDAddr);
+            assert(IsRegistered &&
+                   "lastprivate binding already registered as private");
+            (void)IsRegistered;
+          }
+        }
         ++IRef;
         ++IDestRef;
         continue;
@@ -1501,7 +1521,7 @@ void CodeGenFunction::EmitOMPLastprivateClauseFinal(
     Builder.CreateCondBr(IsLastIterCond, ThenBB, DoneBB);
     EmitBlock(ThenBB);
   }
-  llvm::DenseSet<const VarDecl *> AlreadyEmittedVars;
+  llvm::DenseSet<const ValueDecl *> AlreadyEmittedVars;
   llvm::DenseMap<const VarDecl *, const Expr *> LoopCountersAndUpdates;
   if (const auto *LoopDirective = dyn_cast<OMPLoopDirective>(&D)) {
     auto IC = LoopDirective->counters().begin();
@@ -1521,8 +1541,34 @@ void CodeGenFunction::EmitOMPLastprivateClauseFinal(
     auto IDestRef = C->destination_exprs().begin();
     for (const Expr *AssignOp : C->assignment_ops()) {
       const auto *PrivateDecl = cast<DeclRefExpr>(*IRef)->getDecl();
-      // BindingDecls don't need the same lastprivate finalization as VarDecls.
-      if (isa<BindingDecl>(PrivateDecl)) {
+      // Handle BindingDecls with the same level of support as VarDecls.
+      if (const auto *BD = dyn_cast<BindingDecl>(PrivateDecl)) {
+        QualType Type = BD->getType();
+        const auto *CanonicalBD = cast<ValueDecl>(BD->getCanonicalDecl());
+        if (AlreadyEmittedVars.insert(CanonicalBD).second) {
+          // Get the private address. The BindingDecl was registered in
+          // PrivateScope during initialization, so look it up in LocalDeclMap.
+          auto It = LocalDeclMap.find(CanonicalBD);
+          if (It == LocalDeclMap.end()) {
+            It = LocalDeclMap.find(BD);
+          }
+          assert(It != LocalDeclMap.end() &&
+                 "lastprivate BindingDecl not found in LocalDeclMap");
+          Address PrivateAddr = It->second;
+
+          // Get the original binding address.
+          DeclRefExpr BindingDRE(getContext(), const_cast<BindingDecl *>(BD),
+                                 /*RefersToEnclosingVariableOrCapture=*/true,
+                                 BD->getType(), VK_LValue,
+                                 (*IRef)->getExprLoc());
+          Address OriginalAddr = EmitLValue(&BindingDRE).getAddress();
+
+          const auto *SrcVD =
+              cast<VarDecl>(cast<DeclRefExpr>(*ISrcRef)->getDecl());
+          const auto *DestVD =
+              cast<VarDecl>(cast<DeclRefExpr>(*IDestRef)->getDecl());
+          EmitOMPCopy(Type, OriginalAddr, PrivateAddr, DestVD, SrcVD, AssignOp);
+        }
         ++IRef;
         ++ISrcRef;
         ++IDestRef;
diff --git a/clang/lib/Sema/SemaExpr.cpp b/clang/lib/Sema/SemaExpr.cpp
index a1e19e69284af..f7beed25f2304 100644
--- a/clang/lib/Sema/SemaExpr.cpp
+++ b/clang/lib/Sema/SemaExpr.cpp
@@ -19517,6 +19517,7 @@ static bool captureInCapturedRegion(
     ByRef = (Kind == TryCaptureKind::ExplicitByRef);
   } else if (S.getLangOpts().OpenMP && RSI->CapRegionKind == CR_OpenMP) {
     bool IsBindingDecl = isa<BindingDecl>(Var);
+    ValueDecl *DSAVar = Var;
     // Using an LValue reference type is consistent with Lambdas (see below).
     if (VarDecl *VD = S.OpenMP().isOpenMPCapturedDecl(Var)) {
       Var = VD; // Capture the DecompositionDecl.
@@ -19535,7 +19536,7 @@ static bool captureInCapturedRegion(
         S.OpenMP().isOpenMPPrivateDecl(Var, RSI->OpenMPLevel,
                                        RSI->OpenMPCaptureLevel) != OMPC_unknown)
       return true;
-    ByRef = S.OpenMP().isOpenMPCapturedByRef(Var, RSI->OpenMPLevel,
+    ByRef = S.OpenMP().isOpenMPCapturedByRef(DSAVar, RSI->OpenMPLevel,
                                              RSI->OpenMPCaptureLevel);
   }
 
diff --git a/clang/lib/Sema/SemaOpenMP.cpp b/clang/lib/Sema/SemaOpenMP.cpp
index d2074f83698d4..073a173b3ae94 100644
--- a/clang/lib/Sema/SemaOpenMP.cpp
+++ b/clang/lib/Sema/SemaOpenMP.cpp
@@ -2972,10 +2972,26 @@ void SemaOpenMP::EndOpenMPDSABlock(Stmt *CurDirective) {
       }
       auto *DRE = cast<DeclRefExpr>(DE->IgnoreParens());
       auto *D = DRE->getDecl();
-      // BindingDecls don't need special lastprivate handling - they're already
-      // handled through their decomposition decl.
-      if (isa<BindingDecl>(D)) {
-        PrivateCopies.push_back(nullptr);
+      if (auto *BD = dyn_cast<BindingDecl>(D)) {
+        QualType Type = BD->getType().getNonReferenceType();
+        const DSAStackTy::DSAVarData DVar =
+            DSAStack->getTopDSA(BD, /*FromParent=*/false);
+        if (DVar.CKind != OMPC_lastprivate) {
+          // The variable is also a firstprivate, so initialization sequence
+          // for private copy is generated already.
+          PrivateCopies.push_back(nullptr);
+          continue;
+        }
+        VarDecl *VDPrivate = buildVarDecl(
+            SemaRef, DE->getExprLoc(), Type.getUnqualifiedType(), BD->getName(),
+            BD->hasAttrs() ? &BD->getAttrs() : nullptr, DRE);
+        SemaRef.ActOnUninitializedDecl(VDPrivate);
+        if (VDPrivate->isInvalidDecl()) {
+          PrivateCopies.push_back(nullptr);
+          continue;
+        }
+        PrivateCopies.push_back(buildDeclRefExpr(
+            SemaRef, VDPrivate, DE->getType(), DE->getExprLoc()));
         continue;
       }
       auto *VD = cast<VarDecl>(D);
diff --git a/clang/test/OpenMP/structured-bindings-codegen.cpp b/clang/test/OpenMP/structured-bindings-codegen.cpp
index 7adb1b3ea34ad..857f77a629247 100644
--- a/clang/test/OpenMP/structured-bindings-codegen.cpp
+++ b/clang/test/OpenMP/structured-bindings-codegen.cpp
@@ -766,32 +766,36 @@ void test_lambda_implicit_capture() {
 // CHECK:  [[ENTRY:.*:]]
 // CHECK:    call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[P:%.*]], ptr align 4 @__const._Z20test_target_parallelv.p, i64 8, i1 false)
 // CHECK:    call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP0:%.*]], ptr align 4 [[P]], i64 8, i1 false)
-// CHECK:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z20test_target_parallelv_l40(ptr [[TMP0]], ptr null) #[[ATTR3]]
+// CHECK:    [[TMP1:%.*]] = load [[STRUCT_POINT:%.*]], ptr [[TMP0]], align 4
+// CHECK:    store [[STRUCT_POINT]] [[TMP1]], ptr [[DOTCASTED:%.*]], align 4
+// CHECK:    [[TMP2:%.*]] = load i64, ptr [[DOTCASTED]], align 8
+// CHECK:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z20test_target_parallelv_l40(i64 [[TMP2]], ptr null) #[[ATTR3]]
 // CHECK:    ret void
 //
 //
 // CHECK-LABEL: define internal void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z20test_target_parallelv_l40(
-// CHECK-SAME: ptr noundef nonnull align 4 dereferenceable(4) [[TMP0:%.*]], ptr noalias noundef [[DYN_PTR:%.*]]) #[[ATTR2]] {
+// CHECK-SAME: i64 noundef [[TMP0:%.*]], ptr noalias noundef [[DYN_PTR:%.*]]) #[[ATTR2]] {
 // CHECK:  [[ENTRY:.*:]]
-// CHECK:    store ptr [[TMP0]], ptr [[DOTADDR:%.*]], align 8
+// CHECK:    store i64 [[TMP0]], ptr [[DOTADDR:%.*]], align 8
 // CHECK:    store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR:%.*]], align 8
-// CHECK:    [[TMP1:%.*]] = load ptr, ptr [[DOTADDR]], align 8, !nonnull [[META2:![0-9]+]], !align [[META3:![0-9]+]]
-// CHECK:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1:[0-9]+]], i32 1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z20test_target_parallelv_l40.omp_outlined, ptr [[TMP1]])
+// CHECK:    [[TMP1:%.*]] = load i32, ptr [[DOTADDR]], align 4
+// CHECK:    store i32 [[TMP1]], ptr [[DOTCASTED:%.*]], align 4
+// CHECK:    [[TMP2:%.*]] = load i64, ptr [[DOTCASTED]], align 8
+// CHECK:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1:[0-9]+]], i32 1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z20test_target_parallelv_l40.omp_outlined, i64 [[TMP2]])
 // CHECK:    ret void
 //
 //
 // CHECK-LABEL: define internal void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z20test_target_parallelv_l40.omp_outlined(
-// CHECK-SAME: ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP0:%.*]]) #[[ATTR2]] {
+// CHECK-SAME: ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[TMP0:%.*]]) #[[ATTR2]] {
 // CHECK:  [[ENTRY:.*:]]
 // CHECK:    store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR:%.*]], align 8
 // CHECK:    store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR:%.*]], align 8
-// CHECK:    store ptr [[TMP0]], ptr [[DOTADDR:%.*]], align 8
-// CHECK:    [[TMP1:%.*]] = load ptr, ptr [[DOTADDR]], align 8, !nonnull [[META2]], !align [[META3]]
-// CHECK:    [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_POINT:%.*]], ptr [[TMP1]], i32 0, i32 0
-// CHECK:    [[TMP2:%.*]] = load i32, ptr [[X]], align 4
-// CHECK:    [[Y:%.*]] = getelementptr inbounds nuw [[STRUCT_POINT]], ptr [[TMP1]], i32 0, i32 1
-// CHECK:    [[TMP3:%.*]] = load i32, ptr [[Y]], align 4
-// CHECK:    [[ADD:%.*]] = add nsw i32 [[TMP2]], [[TMP3]]
+// CHECK:    store i64 [[TMP0]], ptr [[DOTADDR:%.*]], align 8
+// CHECK:    [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_POINT:%.*]], ptr [[DOTADDR]], i32 0, i32 0
+// CHECK:    [[TMP1:%.*]] = load i32, ptr [[X]], align 4
+// CHECK:    [[Y:%.*]] = getelementptr inbounds nuw [[STRUCT_POINT]], ptr [[DOTADDR]], i32 0, i32 1
+// CHECK:    [[TMP2:%.*]] = load i32, ptr [[Y]], align 4
+// CHECK:    [[ADD:%.*]] = add nsw i32 [[TMP1]], [[TMP2]]
 // CHECK:    store i32 [[ADD]], ptr [[SUM:%.*]], align 4
 // CHECK:    ret void
 //
@@ -801,78 +805,82 @@ void test_lambda_implicit_capture() {
 // CHECK:  [[ENTRY:.*:]]
 // CHECK:    call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[P:%.*]], ptr align 4 @__const._Z24test_target_parallel_forv.p, i64 8, i1 false)
 // CHECK:    call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP0:%.*]], ptr align 4 [[P]], i64 8, i1 false)
-// CHECK:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z24test_target_parallel_forv_l50(ptr [[TMP0]], ptr null) #[[ATTR3]]
+// CHECK:    [[TMP1:%.*]] = load [[STRUCT_POINT:%.*]], ptr [[TMP0]], align 4
+// CHECK:    store [[STRUCT_POINT]] [[TMP1]], ptr [[DOTCASTED:%.*]], align 4
+// CHECK:    [[TMP2:%.*]] = load i64, ptr [[DOTCASTED]], align 8
+// CHECK:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z24test_target_parallel_forv_l50(i64 [[TMP2]], ptr null) #[[ATTR3]]
 // CHECK:    ret void
 //
 //
 // CHECK-LABEL: define internal void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z24test_target_parallel_forv_l50(
-// CHECK-SAME: ptr noundef nonnull align 4 dereferenceable(4) [[TMP0:%.*]], ptr noalias noundef [[DYN_PTR:%.*]]) #[[ATTR2]] {
+// CHECK-SAME: i64 noundef [[TMP0:%.*]], ptr noalias noundef [[DYN_PTR:%.*]]) #[[ATTR2]] {
 // CHECK:  [[ENTRY:.*:]]
-// CHECK:    store ptr [[TMP0]], ptr [[DOTADDR:%.*]], align 8
+// CHECK:    store i64 [[TMP0]], ptr [[DOTADDR:%.*]], align 8
 // CHECK:    store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR:%.*]], align 8
-// CHECK:    [[TMP1:%.*]] = load ptr, ptr [[DOTADDR]], align 8, !nonnull [[META2]], !align [[META3]]
-// CHECK:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z24test_target_parallel_forv_l50.omp_outlined, ptr [[TMP1]])
+// CHECK:    [[TMP1:%.*]] = load i32, ptr [[DOTADDR]], align 4
+// CHECK:    store i32 [[TMP1]], ptr [[DOTCASTED:%.*]], align 4
+// CHECK:    [[TMP2:%.*]] = load i64, ptr [[DOTCASTED]], align 8
+// CHECK:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z24test_target_parallel_forv_l50.omp_outlined, i64 [[TMP2]])
 // CHECK:    ret void
 //
 //
 // CHECK-LABEL: define internal void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z24test_target_parallel_forv_l50.omp_outlined(
-// CHECK-SAME: ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP0:%.*]]) #[[ATTR2]] {
+// CHECK-SAME: ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[TMP0:%.*]]) #[[ATTR2]] {
 // CHECK:  [[ENTRY:.*:]]
 // CHECK:    store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR:%.*]], align 8
 // CHECK:    store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR:%.*]], align 8
-// CHECK:    store ptr [[TMP0]], ptr [[DOTADDR:%.*]], align 8
-// CHECK:    [[TMP1:%.*]] = load ptr, ptr [[DOTADDR]], align 8, !nonnull [[META2]], !align [[META3]]
+// CHECK:    store i64 [[TMP0]], ptr [[DOTADDR:%.*]], align 8
 // CHECK:    store i32 0, ptr [[DOTOMP_LB:%.*]], align 4
 // CHECK:    store i32 9, ptr [[DOTOMP_UB:%.*]], align 4
 // CHECK:    store i32 1, ptr [[DOTOMP_STRIDE:%.*]], align 4
 // CHECK:    store i32 0, ptr [[DOTOMP_IS_LAST:%.*]], align 4
-// CHECK:    [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// CHECK:    [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4
-// CHECK:    call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
-// CHECK:    [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
-// CHECK:    [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 9
+// CHECK:    [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK:    [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4
+// CHECK:    call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP2]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
+// CHECK:    [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
+// CHECK:    [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9
 // CHECK:    br i1 [[CMP]], label %[[COND_TRUE:.*]], label %[[COND_FALSE:.*]]
 // CHECK:       [[COND_TRUE]]:
 // CHECK:    br label %[[COND_END:.*]]
 // CHECK:       [[COND_FALSE]]:
-// CHECK:    [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
+// CHECK:    [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
 // CHECK:    br label %[[COND_END]]
 // CHECK:       [[COND_END]]:
-// CHECK:    [[COND:%.*]] = phi i32 [ 9, %[[COND_TRUE]] ], [ [[TMP5]], %[[COND_FALSE]] ]
+// CHECK:    [[COND:%.*]] = phi i32 [ 9, %[[COND_TRUE]] ], [ [[TMP4]], %[[COND_FALSE]] ]
 // CHECK:    store i32 [[COND]], ptr [[DOTOMP_UB]], align 4
-// CHECK:    [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK:    store i32 [[TMP6]], ptr [[DOTOMP_IV:%.*]], align 4
+// CHECK:    [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
+// CHECK:    store i32 [[TMP5]], ptr [[DOTOMP_IV:%.*]], align 4
 // CHECK:    br label %[[OMP_INNER_FOR_COND:.*]]
 // CHECK:       [[OMP_INNER_FOR_COND]]:
-// CHECK:    [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK:    [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
-// CHECK:    [[CMP1:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]]
+// CHECK:    [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK:    [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
+// CHECK:    [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]]
 // CHECK:    br i1 [[CMP1]], label %[[OMP_INNER_FOR_BODY:.*]], label %[[OMP_INNER_FOR_END:.*]]
 // CHECK:       [[OMP_INNER_FOR_BODY]]:
-// CHECK:    [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK:    [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1
+// CHECK:    [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK:    [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1
 // CHECK:    [[ADD:%.*]] = add nsw i32 0, [[MUL]]
 // CHECK:    store i32 [[ADD]], ptr [[I:%.*]], align 4
-// CHECK:    [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_POINT:%.*]], ptr [[TMP1]], i32 0, i32 0
-// CHECK:    [[TMP10:%.*]] = load i32, ptr [[X]], align 4
-// CHECK:    [[Y:%.*]] = getelementptr inbounds nuw [[STRUCT_POINT]], ptr [[TMP1]], i32 0, i32 1
-// CHECK:    [[TMP11:%.*]] = load i32, ptr [[Y]], align 4
-// CHECK:    [[ADD2:%.*]] = add nsw i32 [[TMP10]], [[TMP11]]
-// CHECK:    [[TMP12:%.*]] = load i32, ptr [[I]], align 4
-// CHECK:    [[ADD3:%.*]] = add nsw i32 [[ADD2]], [[TMP12]]
+// CHECK:    [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_POINT:%.*]], ptr [[DOTADDR]], i32 0, i32 0
+// CHECK:    [[TMP9:%.*]] = load i32, ptr [[X]], align 4
+// CHECK:    [[Y:%.*]] = getelementptr inbounds nuw [[STRUCT_POINT]], ptr [[DOTADDR]], i32 0, i32 1
+// CHECK:    [[TMP10:%.*]] = load i32, ptr [[Y]], align 4
+// CHECK:    [[ADD2:%.*]] = add nsw i32 [[TMP9]], [[TMP10]]
+// CHECK:    [[TMP11:%.*]] = load i32, ptr [[I]], align 4
+// CHECK:    [[ADD3:%.*]] = add nsw i32 [[ADD2]], [[TMP11]]
 // CHECK:    store i32 [[ADD3]], ptr [[RESULT:%.*]], align 4
 // CHECK:    br label %[[OMP_BODY_CONTINUE:.*]]
 // CHECK:       [[OMP_BODY_CONTINUE]]:
 // CHECK:    br label %[[OMP_INNER_FOR_INC:.*]]
 // CHECK:       [[OMP_INNER_FOR_INC]]:
-// CHECK:    [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK:    [[ADD4:%.*]] = add nsw i32 [[TMP13]], 1
+// CHECK:    [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK:    [[ADD4:%.*]] = add nsw i32 [[TMP12]], 1
 // CHECK:    store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4
 // CHECK:    br label %[[OMP_INNER_FOR_COND]]
 // CHECK:       [[OMP_INNER_FOR_END]]:
 // CHECK:    br label %[[OMP_LOOP_EXIT:.*]]
 // CHECK:       [[OMP_LOOP_EXIT]]:
-// CHECK:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP3]])
+// CHECK:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP2]])
 // CHECK:    ret void
 //
 //
@@ -892,8 +900,8 @@ void test_lambda_implicit_capture() {
 // CHECK:    store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR:%.*]], align 8
 // CHECK:    store ptr [[TMP0]], ptr [[DOTADDR:%.*]], align 8
 // CHECK:    store ptr [[P]], ptr [[P_ADDR:%.*]], align 8
-// CHECK:    [[TMP1:%.*]] = load ptr, ptr [[DOTADDR]], align 8, !nonnull [[META2]], !align [[META3]]
-// CHECK:    [[TMP2:%.*]] = load ptr, ptr [[P_ADDR]], align 8, !nonnull [[META2]], !align [[META3]]
+// CHECK:    [[TMP1:%.*]] = load ptr, ptr [[DOTADDR]], align 8, !nonnull [[META4:![0-9]+]], !align [[META5:![0-9]+]]
+// CHECK:    [[TMP2:%.*]] = load ptr, ptr [[P_ADDR]], align 8, !nonnull [[META4]], !align [[META5]]
 // CHECK:    call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[P1:%.*]], ptr align 4 [[TMP2]], i64 8, i1 false)
 // CHECK:    [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_POINT:%.*]], ptr [[TMP1]], i32 0, i32 0
 // CHECK:    [[TMP3:%.*]] = load i32, ptr [[X]], align 4
@@ -919,7 +927,7 @@ void test_lambda_implicit_capture() {
 // CHECK:    store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR:%.*]], align 8
 // CHECK:    store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR:%.*]], align 8
 // CHECK:    store ptr [[TMP0]], ptr [[DOTADDR:%.*]], align 8
-// CHECK:    [[TMP1:%.*]] = load ptr, ptr [[DOTADDR]], align 8, !nonnull [[META2]], !align [[META3]]
+// CHECK:    [[TMP1:%.*]] = load ptr, ptr [[DOTADDR]], align 8, !nonnull [[META4]], !align [[META5]]
 // CHECK:    [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_POINT:%.*]], ptr [[TMP1]], i32 0, i32 0
 // CHECK:    [[TMP2:%.*]] = load i32, ptr [[X]], align 4
 // CHECK:    [[Y:%.*]] = getelementptr inbounds nuw [[STRUCT_POINT]], ptr [[TMP1]], i32 0, i32 1
@@ -946,8 +954,8 @@ void test_lambda_implicit_capture() {
 // CHECK:    store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR:%.*]], align 8
 // CHECK:    store ptr [[SUM]], ptr [[SUM_ADDR:%.*]], align 8
 // CHECK:    store ptr [[TMP0]], ptr [[DOTADDR:%.*]], align 8
-// CHECK:    [[TMP1:%.*]] = load ptr, ptr [[SUM_ADDR]], align 8, !nonnull [[META2]], !align [[META3]]
-// CHECK:    [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8, !nonnull [[META2]], !align [[META3]]
+// CHECK:    [[TMP1:%.*]] = load ptr, ptr [[SUM_ADDR]], align 8, !nonnull [[META4]], !align [[META5]]
+// CHECK:    [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8, !nonnull [[META4]], !align [[META5]]
 // CHECK:    store i32 0, ptr [[DOTOMP_LB:%.*]], align 4
 // CHECK:    store i32 9, ptr [[DOTOMP_UB:%.*]], align 4
 // CHECK:    store i32 1, ptr [[DOTOMP_STRIDE:%.*]], align 4
@@ -1055,7 +1063,7 @@ void test_lambda_implicit_capture() {
 // CHECK:    store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR:%.*]], align 8
 // CHECK:    store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR:%.*]], align 8
 // CHECK:    store ptr [[TMP0]], ptr [[DOTADDR:%.*]], align 8
-// CHECK:    [[TMP1:%.*]] = load ptr, ptr [[DOTADDR]], align 8, !nonnull [[META2]], !align [[META3]]
+// CHECK:    [[TMP1:%.*]] = load ptr, ptr [[DOTADDR]], align 8, !nonnull [[META4]], !align [[META5]]
 // CHECK:    store i32 0, ptr [[DOTOMP_LB:%.*]], align 4
 // CHECK:    store i32 9, ptr [[DOTOMP_UB:%.*]], align 4
 // CHECK:    store i32 1, ptr [[DOTOMP_STRIDE:%.*]], align 4
@@ -1125,7 +1133,7 @@ void test_lambda_implicit_capture() {
 // CHECK:    store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR:%.*]], align 8
 // CHECK:    store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR:%.*]], align 8
 // CHECK:    store ptr [[TMP0]], ptr [[DOTADDR:%.*]], align 8
-// CHECK:    [[TMP1:%.*]] = load ptr, ptr [[DOTADDR]], align 8, !nonnull [[META2]], !align [[META3]]
+// CHECK:    [[TMP1:%.*]] = load ptr, ptr [[DOTADDR]], align 8, !nonnull [[META4]], !align [[META5]]
 // CHECK:    store i32 0, ptr [[DOTOMP_LB:%.*]], align 4
 // CHECK:    store i32 9, ptr [[DOTOMP_UB:%.*]], align 4
 // CHECK:    store i32 1, ptr [[DOTOMP_STRIDE:%.*]], align 4
@@ -1192,78 +1200,82 @@ void test_lambda_implicit_capture() {
 // CHECK:  [[ENTRY:.*:]]
 // CHECK:    call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[P:%.*]], ptr align 4 @__const._Z28test_target_teams_distributev.p, i64 8, i1 false)
 // CHECK:    call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP0:%.*]], ptr align 4 [[P]], i64 8, i1 false)
-// CHECK:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z28test_target_teams_distributev_l111(ptr [[TMP0]], ptr null) #[[ATTR3]]
+// CHECK:    [[TMP1:%.*]] = load [[STRUCT_POINT:%.*]], ptr [[TMP0]], align 4
+// CHECK:    store [[STRUCT_POINT]] [[TMP1]], ptr [[DOTCASTED:%.*]], align 4
+// CHECK:    [[TMP2:%.*]] = load i64, ptr [[DOTCASTED]], align 8
+// CHECK:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z28test_target_teams_distributev_l111(i64 [[TMP2]], ptr null) #[[ATTR3]]
 // CHECK:    ret void
 //
 //
 // CHECK-LABEL: define internal void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z28test_target_teams_distributev_l111(
-// CHECK-SAME: ptr noundef nonnull align 4 dereferenceable(4) [[TMP0:%.*]], ptr noalias noundef [[DYN_PTR:%.*]]) #[[ATTR2]] {
+// CHECK-SAME: i64 noundef [[TMP0:%.*]], ptr noalias noundef [[DYN_PTR:%.*]]) #[[ATTR2]] {
 // CHECK:  [[ENTRY:.*:]]
-// CHECK:    store ptr [[TMP0]], ptr [[DOTADDR:%.*]], align 8
+// CHECK:    store i64 [[TMP0]], ptr [[DOTADDR:%.*]], align 8
 // CHECK:    store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR:%.*]], align 8
-// CHECK:    [[TMP1:%.*]] = load ptr, ptr [[DOTADDR]], align 8, !nonnull [[META2]], !align [[META3]]
-// CHECK:    call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z28test_target_teams_distributev_l111.omp_outlined, ptr [[TMP1]])
+// CHECK:    [[TMP1:%.*]] = load i32, ptr [[DOTADDR]], align 4
+// CHECK:    store i32 [[TMP1]], ptr [[DOTCASTED:%.*]], align 4
+// CHECK:    [[TMP2:%.*]] = load i64, ptr [[DOTCASTED]], align 8
+// CHECK:    call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z28test_target_teams_distributev_l111.omp_outlined, i64 [[TMP2]])
 // CHECK:    ret void
 //
 //
 // CHECK-LABEL: define internal void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z28test_target_teams_distributev_l111.omp_outlined(
-// CHECK-SAME: ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP0:%.*]]) #[[ATTR2]] {
+// CHECK-SAME: ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[TMP0:%.*]]) #[[ATTR2]] {
 // CHECK:  [[ENTRY:.*:]]
 // CHECK:    store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR:%.*]], align 8
 // CHECK:    store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR:%.*]], align 8
-// CHECK:    store ptr [[TMP0]], ptr [[DOTADDR:%.*]], align 8
-// CHECK:    [[TMP1:%.*]] = load ptr, ptr [[DOTADDR]], align 8, !nonnull [[META2]], !align [[META3]]
+// CHECK:    store i64 [[TMP0]], ptr [[DOTADDR:%.*]], align 8
 // CHECK:    store i32 0, ptr [[DOTOMP_LB:%.*]], align 4
 // CHECK:    store i32 9, ptr [[DOTOMP_UB:%.*]], align 4
 // CHECK:    store i32 1, ptr [[DOTOMP_STRIDE:%.*]], align 4
 // CHECK:    store i32 0, ptr [[DOTOMP_IS_LAST:%.*]], align 4
-// CHECK:    [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// CHECK:    [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4
-// CHECK:    call void @__kmpc_for_static_init_4(ptr @[[GLOB4:[0-9]+]], i32 [[TMP3]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
-// CHECK:    [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
-// CHECK:    [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 9
+// CHECK:    [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK:    [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4
+// CHECK:    call void @__kmpc_for_static_init_4(ptr @[[GLOB4:[0-9]+]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
+// CHECK:    [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
+// CHECK:    [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9
 // CHECK:    br i1 [[CMP]], label %[[COND_TRUE:.*]], label %[[COND_FALSE:.*]]
 // CHECK:       [[COND_TRUE]]:
 // CHECK:    br label %[[COND_END:.*]]
 // CHECK:       [[COND_FALSE]]:
-// CHECK:    [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
+// CHECK:    [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
 // CHECK:    br label %[[COND_END]]
 // CHECK:       [[COND_END]]:
-// CHECK:    [[COND:%.*]] = phi i32 [ 9, %[[COND_TRUE]] ], [ [[TMP5]], %[[COND_FALSE]] ]
+// CHECK:    [[COND:%.*]] = phi i32 [ 9, %[[COND_TRUE]] ], [ [[TMP4]], %[[COND_FALSE]] ]
 // CHECK:    store i32 [[COND]], ptr [[DOTOMP_UB]], align 4
-// CHECK:    [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK:    store i32 [[TMP6]], ptr [[DOTOMP_IV:%.*]], align 4
+// CHECK:    [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
+// CHECK:    store i32 [[TMP5]], ptr [[DOTOMP_IV:%.*]], align 4
 // CHECK:    br label %[[OMP_INNER_FOR_COND:.*]]
 // CHECK:       [[OMP_INNER_FOR_COND]]:
-// CHECK:    [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK:    [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
-// CHECK:    [[CMP1:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]]
+// CHECK:    [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK:    [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
+// CHECK:    [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]]
 // CHECK:    br i1 [[CMP1]], label %[[OMP_INNER_FOR_BODY:.*]], label %[[OMP_INNER_FOR_END:.*]]
 // CHECK:       [[OMP_INNER_FOR_BODY]]:
-// CHECK:    [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK:    [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1
+// CHECK:    [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK:    [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1
 // CHECK:    [[ADD:%.*]] = add nsw i32 0, [[MUL]]
 // CHECK:    store i32 [[ADD]], ptr [[I:%.*]], align 4
-// CHECK:    [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_POINT:%.*]], ptr [[TMP1]], i32 0, i32 0
-// CHECK:    [[TMP10:%.*]] = load i32, ptr [[X]], align 4
-// CHECK:    [[Y:%.*]] = getelementptr inbounds nuw [[STRUCT_POINT]], ptr [[TMP1]], i32 0, i32 1
-// CHECK:    [[TMP11:%.*]] = load i32, ptr [[Y]], align 4
-// CHECK:    [[ADD2:%.*]] = add nsw i32 [[TMP10]], [[TMP11]]
-// CHECK:    [[TMP12:%.*]] = load i32, ptr [[I]], align 4
-// CHECK:    [[ADD3:%.*]] = add nsw i32 [[ADD2]], [[TMP12]]
+// CHECK:    [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_POINT:%.*]], ptr [[DOTADDR]], i32 0, i32 0
+// CHECK:    [[TMP9:%.*]] = load i32, ptr [[X]], align 4
+// CHECK:    [[Y:%.*]] = getelementptr inbounds nuw [[STRUCT_POINT]], ptr [[DOTADDR]], i32 0, i32 1
+// CHECK:    [[TMP10:%.*]] = load i32, ptr [[Y]], align 4
+// CHECK:    [[ADD2:%.*]] = add nsw i32 [[TMP9]], [[TMP10]]
+// CHECK:    [[TMP11:%.*]] = load i32, ptr [[I]], align 4
+// CHECK:    [[ADD3:%.*]] = add nsw i32 [[ADD2]], [[TMP11]]
 // CHECK:    store i32 [[ADD3]], ptr [[RESULT:%.*]], align 4
 // CHECK:    br label %[[OMP_BODY_CONTINUE:.*]]
 // CHECK:       [[OMP_BODY_CONTINUE]]:
 // CHECK:    br label %[[OMP_INNER_FOR_INC:.*]]
 // CHECK:       [[OMP_INNER_FOR_INC]]:
-// CHECK:    [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK:    [[ADD4:%.*]] = add nsw i32 [[TMP13]], 1
+// CHECK:    [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK:    [[ADD4:%.*]] = add nsw i32 [[TMP12]], 1
 // CHECK:    store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4
 // CHECK:    br label %[[OMP_INNER_FOR_COND]]
 // CHECK:       [[OMP_INNER_FOR_END]]:
 // CHECK:    br label %[[OMP_LOOP_EXIT:.*]]
 // CHECK:       [[OMP_LOOP_EXIT]]:
-// CHECK:    call void @__kmpc_for_static_fini(ptr @[[GLOB4]], i32 [[TMP3]])
+// CHECK:    call void @__kmpc_for_static_fini(ptr @[[GLOB4]], i32 [[TMP2]])
 // CHECK:    ret void
 //
 //
@@ -1306,9 +1318,9 @@ void test_lambda_implicit_capture() {
 // CHECK:    store ptr [[TMP3]], ptr [[DOTTASK_T__ADDR_I:%.*]], align 8, !noalias [[META19]]
 // CHECK:    store ptr [[TMP7]], ptr [[__CONTEXT_ADDR_I:%.*]], align 8, !noalias [[META19]]
 // CHECK:    [[TMP8:%.*]] = load ptr, ptr [[__CONTEXT_ADDR_I]], align 8, !noalias [[META19]]
-// CHECK:    [[TMP9:%.*]] = load ptr, ptr [[TMP8]], align 8, !nonnull [[META2]], !align [[META3]]
+// CHECK:    [[TMP9:%.*]] = load ptr, ptr [[TMP8]], align 8, !nonnull [[META4]], !align [[META5]]
 // CHECK:    [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4
-// CHECK:    [[TMP11:%.*]] = load ptr, ptr [[TMP8]], align 8, !nonnull [[META2]], !align [[META3]]
+// CHECK:    [[TMP11:%.*]] = load ptr, ptr [[TMP8]], align 8, !nonnull [[META4]], !align [[META5]]
 // CHECK:    [[Y_I:%.*]] = getelementptr inbounds nuw [[STRUCT_POINT:%.*]], ptr [[TMP11]], i32 0, i32 1
 // CHECK:    [[TMP12:%.*]] = load i32, ptr [[Y_I]], align 4
 // CHECK:    [[ADD_I:%.*]] = add nsw i32 [[TMP10]], [[TMP12]]
@@ -1375,9 +1387,9 @@ void test_lambda_implicit_capture() {
 // CHECK:    store ptr [[TMP3]], ptr [[DOTTASK_T__ADDR_I:%.*]], align 8, !noalias [[META29]]
 // CHECK:    store ptr [[TMP7]], ptr [[__CONTEXT_ADDR_I:%.*]], align 8, !noalias [[META29]]
 // CHECK:    [[TMP8:%.*]] = load ptr, ptr [[__CONTEXT_ADDR_I]], align 8, !noalias [[META29]]
-// CHECK:    [[TMP9:%.*]] = load ptr, ptr [[TMP8]], align 8, !nonnull [[META2]], !align [[META3]]
+// CHECK:    [[TMP9:%.*]] = load ptr, ptr [[TMP8]], align 8, !nonnull [[META4]], !align [[META5]]
 // CHECK:    [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4
-// CHECK:    [[TMP11:%.*]] = load ptr, ptr [[TMP8]], align 8, !nonnull [[META2]], !align [[META3]]
+// CHECK:    [[TMP11:%.*]] = load ptr, ptr [[TMP8]], align 8, !nonnull [[META4]], !align [[META5]]
 // CHECK:    [[Y_I:%.*]] = getelementptr inbounds nuw [[STRUCT_POINT:%.*]], ptr [[TMP11]], i32 0, i32 1
 // CHECK:    [[TMP12:%.*]] = load i32, ptr [[Y_I]], align 4
 // CHECK:    [[ADD_I:%.*]] = add nsw i32 [[TMP10]], [[TMP12]]
@@ -1454,7 +1466,7 @@ void test_lambda_implicit_capture() {
 // CHECK:    [[TMP19:%.*]] = load i64, ptr [[DOTLB__ADDR_I]], align 8, !noalias [[META41]]
 // CHECK:    [[CONV_I:%.*]] = trunc i64 [[TMP19]] to i32
 // CHECK:    store i32 [[CONV_I]], ptr [[DOTOMP_IV_I:%.*]], align 4, !noalias [[META41]]
-// CHECK:    [[TMP20:%.*]] = load ptr, ptr [[TMP18]], align 8, !nonnull [[META2]], !align [[META3]]
+// CHECK:    [[TMP20:%.*]] = load ptr, ptr [[TMP18]], align 8, !nonnull [[META4]], !align [[META5]]
 // CHECK:    br label %[[OMP_INNER_FOR_COND_I:.*]]
 // CHECK:       [[OMP_INNER_FOR_COND_I]]:
 // CHECK:    [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV_I]], align 4, !noalias [[META41]]
@@ -1579,8 +1591,8 @@ void test_lambda_implicit_capture() {
 // CHECK:    store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR:%.*]], align 8
 // CHECK:    store ptr [[RESULT]], ptr [[RESULT_ADDR:%.*]], align 8
 // CHECK:    store ptr [[TMP0]], ptr [[DOTADDR:%.*]], align 8
-// CHECK:    [[TMP1:%.*]] = load ptr, ptr [[RESULT_ADDR]], align 8, !nonnull [[META2]], !align [[META3]]
-// CHECK:    [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8, !nonnull [[META2]], !align [[META3]]
+// CHECK:    [[TMP1:%.*]] = load ptr, ptr [[RESULT_ADDR]], align 8, !nonnull [[META4]], !align [[META5]]
+// CHECK:    [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8, !nonnull [[META4]], !align [[META5]]
 // CHECK:    store i32 0, ptr [[RESULT1:%.*]], align 4
 // CHECK:    [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_POINT:%.*]], ptr [[TMP2]], i32 0, i32 0
 // CHECK:    [[TMP3:%.*]] = load i32, ptr [[X]], align 4
@@ -1636,7 +1648,7 @@ void test_lambda_implicit_capture() {
 // CHECK:    store ptr [[RESULT]], ptr [[RESULT_ADDR:%.*]], align 8
 // CHECK:    store i64 [[TMP0]], ptr [[DOTADDR:%.*]], align 8
 // CHECK:    store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR:%.*]], align 8
-// CHECK:    [[TMP1:%.*]] = load ptr, ptr [[RESULT_ADDR]], align 8, !nonnull [[META2]], !align [[META3]]
+// CHECK:    [[TMP1:%.*]] = load ptr, ptr [[RESULT_ADDR]], align 8, !nonnull [[META4]], !align [[META5]]
 // CHECK:    [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_POINT:%.*]], ptr [[DOTADDR]], i32 0, i32 0
 // CHECK:    [[TMP2:%.*]] = load i32, ptr [[X]], align 4
 // CHECK:    [[Y:%.*]] = getelementptr inbounds nuw [[STRUCT_POINT]], ptr [[DOTADDR]], i32 0, i32 1
@@ -1669,14 +1681,14 @@ void test_lambda_implicit_capture() {
 // CHECK:    store ptr [[TMP7]], ptr [[__CONTEXT_ADDR_I:%.*]], align 8, !noalias [[META51]]
 // CHECK:    [[TMP8:%.*]] = load ptr, ptr [[__CONTEXT_ADDR_I]], align 8, !noalias [[META51]]
 // CHECK:    [[TMP9:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON_6:%.*]], ptr [[TMP8]], i32 0, i32 1
-// CHECK:    [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8, !nonnull [[META2]], !align [[META3]]
+// CHECK:    [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8, !nonnull [[META4]], !align [[META5]]
 // CHECK:    [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4
 // CHECK:    [[TMP12:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON_6]], ptr [[TMP8]], i32 0, i32 1
-// CHECK:    [[TMP13:%.*]] = load ptr, ptr [[TMP12]], align 8, !nonnull [[META2]], !align [[META3]]
+// CHECK:    [[TMP13:%.*]] = load ptr, ptr [[TMP12]], align 8, !nonnull [[META4]], !align [[META5]]
 // CHECK:    [[Y_I:%.*]] = getelementptr inbounds nuw [[STRUCT_POINT:%.*]], ptr [[TMP13]], i32 0, i32 1
 // CHECK:    [[TMP14:%.*]] = load i32, ptr [[Y_I]], align 4
 // CHECK:    [[ADD_I:%.*]] = add nsw i32 [[TMP11]], [[TMP14]]
-// CHECK:    [[TMP15:%.*]] = load ptr, ptr [[TMP8]], align 8, !nonnull [[META2]], !align [[META3]]
+// CHECK:    [[TMP15:%.*]] = load ptr, ptr [[TMP8]], align 8, !nonnull [[META4]], !align [[META5]]
 // CHECK:    store i32 [[ADD_I]], ptr [[TMP15]], align 4
 // CHECK:    ret i32 0
 //
@@ -1688,8 +1700,8 @@ void test_lambda_implicit_capture() {
 // CHECK:    store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR:%.*]], align 8
 // CHECK:    store ptr [[RESULT]], ptr [[RESULT_ADDR:%.*]], align 8
 // CHECK:    store ptr [[TMP0]], ptr [[DOTADDR:%.*]], align 8
-// CHECK:    [[TMP1:%.*]] = load ptr, ptr [[RESULT_ADDR]], align 8, !nonnull [[META2]], !align [[META3]]
-// CHECK:    [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8, !nonnull [[META2]], !align [[META3]]
+// CHECK:    [[TMP1:%.*]] = load ptr, ptr [[RESULT_ADDR]], align 8, !nonnull [[META4]], !align [[META5]]
+// CHECK:    [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8, !nonnull [[META4]], !align [[META5]]
 // CHECK:    store i32 0, ptr [[RESULT1:%.*]], align 4
 // CHECK:    [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_POINT3D:%.*]], ptr [[TMP2]], i32 0, i32 0
 // CHECK:    [[TMP3:%.*]] = load i32, ptr [[X]], align 4
@@ -1893,9 +1905,9 @@ void test_lambda_implicit_capture() {
 // CHECK:    store ptr [[TMP3]], ptr [[DOTTASK_T__ADDR_I:%.*]], align 8, !noalias [[META62]]
 // CHECK:    store ptr [[TMP7]], ptr [[__CONTEXT_ADDR_I:%.*]], align 8, !noalias [[META62]]
 // CHECK:    [[TMP8:%.*]] = load ptr, ptr [[__CONTEXT_ADDR_I]], align 8, !noalias [[META62]]
-// CHECK:    [[TMP9:%.*]] = load ptr, ptr [[TMP8]], align 8, !nonnull [[META2]], !align [[META3]]
+// CHECK:    [[TMP9:%.*]] = load ptr, ptr [[TMP8]], align 8, !nonnull [[META4]], !align [[META5]]
 // CHECK:    [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4
-// CHECK:    [[TMP11:%.*]] = load ptr, ptr [[TMP8]], align 8, !nonnull [[META2]], !align [[META3]]
+// CHECK:    [[TMP11:%.*]] = load ptr, ptr [[TMP8]], align 8, !nonnull [[META4]], !align [[META5]]
 // CHECK:    [[TMP12:%.*]] = getelementptr inbounds [2 x i32], ptr [[TMP11]], i64 0, i64 1
 // CHECK:    [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4
 // CHECK:    [[ADD_I:%.*]] = add nsw i32 [[TMP10]], [[TMP13]]
@@ -1918,7 +1930,7 @@ void test_lambda_implicit_capture() {
 // CHECK:    store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR:%.*]], align 8
 // CHECK:    store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR:%.*]], align 8
 // CHECK:    store ptr [[TMP0]], ptr [[DOTADDR:%.*]], align 8
-// CHECK:    [[TMP1:%.*]] = load ptr, ptr [[DOTADDR]], align 8, !nonnull [[META2]], !align [[META3]]
+// CHECK:    [[TMP1:%.*]] = load ptr, ptr [[DOTADDR]], align 8, !nonnull [[META4]], !align [[META5]]
 // CHECK:    [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
 // CHECK:    [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4
 // CHECK:    call void @__kmpc_critical(ptr @[[GLOB1]], i32 [[TMP3]], ptr @.gomp_critical_user_.var)
@@ -1962,9 +1974,9 @@ void test_lambda_implicit_capture() {
 // CHECK:    store ptr [[TMP3]], ptr [[DOTTASK_T__ADDR_I:%.*]], align 8, !noalias [[META72]]
 // CHECK:    store ptr [[TMP7]], ptr [[__CONTEXT_ADDR_I:%.*]], align 8, !noalias [[META72]]
 // CHECK:    [[TMP8:%.*]] = load ptr, ptr [[__CONTEXT_ADDR_I]], align 8, !noalias [[META72]]
-// CHECK:    [[TMP9:%.*]] = load ptr, ptr [[TMP8]], align 8, !nonnull [[META2]], !align [[META3]]
+// CHECK:    [[TMP9:%.*]] = load ptr, ptr [[TMP8]], align 8, !nonnull [[META4]], !align [[META5]]
 // CHECK:    [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4
-// CHECK:    [[TMP11:%.*]] = load ptr, ptr [[TMP8]], align 8, !nonnull [[META2]], !align [[META3]]
+// CHECK:    [[TMP11:%.*]] = load ptr, ptr [[TMP8]], align 8, !nonnull [[META4]], !align [[META5]]
 // CHECK:    [[Y_I:%.*]] = getelementptr inbounds nuw [[STRUCT_POINT:%.*]], ptr [[TMP11]], i32 0, i32 1
 // CHECK:    [[TMP12:%.*]] = load i32, ptr [[Y_I]], align 4
 // CHECK:    [[MUL_I:%.*]] = mul nsw i32 [[TMP10]], [[TMP12]]
@@ -1977,7 +1989,7 @@ void test_lambda_implicit_capture() {
 // CHECK:  [[ENTRY:.*:]]
 // CHECK:    call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[P:%.*]], ptr align 4 @__const._Z22test_reference_bindingv.p, i64 8, i1 false)
 // CHECK:    store ptr [[P]], ptr [[TMP0:%.*]], align 8
-// CHECK:    [[TMP1:%.*]] = load ptr, ptr [[TMP0]], align 8, !nonnull [[META2]], !align [[META3]]
+// CHECK:    [[TMP1:%.*]] = load ptr, ptr [[TMP0]], align 8, !nonnull [[META4]], !align [[META5]]
 // CHECK:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @_Z22test_reference_bindingv.omp_outlined, ptr [[TMP1]])
 // CHECK:    ret void
 //
@@ -1988,12 +2000,12 @@ void test_lambda_implicit_capture() {
 // CHECK:    store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR:%.*]], align 8
 // CHECK:    store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR:%.*]], align 8
 // CHECK:    store ptr [[TMP0]], ptr [[DOTADDR:%.*]], align 8
-// CHECK:    [[TMP1:%.*]] = load ptr, ptr [[DOTADDR]], align 8, !nonnull [[META2]], !align [[META3]]
+// CHECK:    [[TMP1:%.*]] = load ptr, ptr [[DOTADDR]], align 8, !nonnull [[META4]], !align [[META5]]
 // CHECK:    store ptr [[TMP1]], ptr [[TMP:%.*]], align 8
-// CHECK:    [[TMP2:%.*]] = load ptr, ptr [[TMP]], align 8, !nonnull [[META2]], !align [[META3]]
+// CHECK:    [[TMP2:%.*]] = load ptr, ptr [[TMP]], align 8, !nonnull [[META4]], !align [[META5]]
 // CHECK:    [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_POINT:%.*]], ptr [[TMP2]], i32 0, i32 0
 // CHECK:    [[TMP3:%.*]] = load i32, ptr [[X]], align 4
-// CHECK:    [[TMP4:%.*]] = load ptr, ptr [[TMP]], align 8, !nonnull [[META2]], !align [[META3]]
+// CHECK:    [[TMP4:%.*]] = load ptr, ptr [[TMP]], align 8, !nonnull [[META4]], !align [[META5]]
 // CHECK:    [[Y:%.*]] = getelementptr inbounds nuw [[STRUCT_POINT]], ptr [[TMP4]], i32 0, i32 1
 // CHECK:    [[TMP5:%.*]] = load i32, ptr [[Y]], align 4
 // CHECK:    [[ADD:%.*]] = add nsw i32 [[TMP3]], [[TMP5]]
@@ -2016,7 +2028,7 @@ void test_lambda_implicit_capture() {
 // CHECK:    store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR:%.*]], align 8
 // CHECK:    store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR:%.*]], align 8
 // CHECK:    store ptr [[TMP0]], ptr [[DOTADDR:%.*]], align 8
-// CHECK:    [[TMP1:%.*]] = load ptr, ptr [[DOTADDR]], align 8, !nonnull [[META2]], !align [[META3]]
+// CHECK:    [[TMP1:%.*]] = load ptr, ptr [[DOTADDR]], align 8, !nonnull [[META4]], !align [[META5]]
 // CHECK:    [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_POINT:%.*]], ptr [[TMP1]], i32 0, i32 0
 // CHECK:    [[TMP2:%.*]] = load i32, ptr [[X]], align 4
 // CHECK:    [[Y:%.*]] = getelementptr inbounds nuw [[STRUCT_POINT]], ptr [[TMP1]], i32 0, i32 1
@@ -2044,8 +2056,8 @@ void test_lambda_implicit_capture() {
 // CHECK:    store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR:%.*]], align 8
 // CHECK:    store ptr [[TMP0]], ptr [[DOTADDR:%.*]], align 8
 // CHECK:    store ptr [[TMP1]], ptr [[DOTADDR1:%.*]], align 8
-// CHECK:    [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8, !nonnull [[META2]], !align [[META3]]
-// CHECK:    [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8, !nonnull [[META2]], !align [[META3]]
+// CHECK:    [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8, !nonnull [[META4]], !align [[META5]]
+// CHECK:    [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8, !nonnull [[META4]], !align [[META5]]
 // CHECK:    [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_POINT:%.*]], ptr [[TMP2]], i32 0, i32 0
 // CHECK:    [[TMP4:%.*]] = load i32, ptr [[X]], align 4
 // CHECK:    [[Y:%.*]] = getelementptr inbounds nuw [[STRUCT_POINT]], ptr [[TMP2]], i32 0, i32 1
@@ -2080,9 +2092,9 @@ void test_lambda_implicit_capture() {
 // CHECK:    store ptr [[TMP0]], ptr [[DOTADDR:%.*]], align 8
 // CHECK:    store ptr [[TMP1]], ptr [[DOTADDR1:%.*]], align 8
 // CHECK:    store ptr [[P1]], ptr [[P1_ADDR:%.*]], align 8
-// CHECK:    [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8, !nonnull [[META2]], !align [[META3]]
-// CHECK:    [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8, !nonnull [[META2]], !align [[META3]]
-// CHECK:    [[TMP4:%.*]] = load ptr, ptr [[P1_ADDR]], align 8, !nonnull [[META2]], !align [[META3]]
+// CHECK:    [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8, !nonnull [[META4]], !align [[META5]]
+// CHECK:    [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8, !nonnull [[META4]], !align [[META5]]
+// CHECK:    [[TMP4:%.*]] = load ptr, ptr [[P1_ADDR]], align 8, !nonnull [[META4]], !align [[META5]]
 // CHECK:    call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[P12:%.*]], ptr align 4 [[TMP4]], i64 8, i1 false)
 // CHECK:    [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_POINT:%.*]], ptr [[TMP2]], i32 0, i32 0
 // CHECK:    [[TMP5:%.*]] = load i32, ptr [[X]], align 4
@@ -2125,7 +2137,7 @@ void test_lambda_implicit_capture() {
 // CHECK:    store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR:%.*]], align 8
 // CHECK:    store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR:%.*]], align 8
 // CHECK:    store ptr [[TMP0]], ptr [[DOTADDR:%.*]], align 8
-// CHECK:    [[TMP1:%.*]] = load ptr, ptr [[DOTADDR]], align 8, !nonnull [[META2]], !align [[META3]]
+// CHECK:    [[TMP1:%.*]] = load ptr, ptr [[DOTADDR]], align 8, !nonnull [[META4]], !align [[META5]]
 // CHECK:    [[TMP2:%.*]] = getelementptr inbounds [3 x i32], ptr [[TMP1]], i64 0, i64 0
 // CHECK:    [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4
 // CHECK:    [[TMP4:%.*]] = getelementptr inbounds [3 x i32], ptr [[TMP1]], i64 0, i64 1
@@ -2153,7 +2165,7 @@ void test_lambda_implicit_capture() {
 // CHECK:    store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR:%.*]], align 8
 // CHECK:    store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR:%.*]], align 8
 // CHECK:    store ptr [[TMP0]], ptr [[DOTADDR:%.*]], align 8
-// CHECK:    [[TMP1:%.*]] = load ptr, ptr [[DOTADDR]], align 8, !nonnull [[META2]], !align [[META3]]
+// CHECK:    [[TMP1:%.*]] = load ptr, ptr [[DOTADDR]], align 8, !nonnull [[META4]], !align [[META5]]
 // CHECK:    [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
 // CHECK:    [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4
 // CHECK:    [[TMP4:%.*]] = call i32 @__kmpc_single(ptr @[[GLOB1]], i32 [[TMP3]])
@@ -2188,7 +2200,7 @@ void test_lambda_implicit_capture() {
 // CHECK:    store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR:%.*]], align 8
 // CHECK:    store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR:%.*]], align 8
 // CHECK:    store ptr [[TMP0]], ptr [[DOTADDR:%.*]], align 8
-// CHECK:    [[TMP1:%.*]] = load ptr, ptr [[DOTADDR]], align 8, !nonnull [[META2]], !align [[META3]]
+// CHECK:    [[TMP1:%.*]] = load ptr, ptr [[DOTADDR]], align 8, !nonnull [[META4]], !align [[META5]]
 // CHECK:    store i32 0, ptr [[DOTOMP_SECTIONS_LB_:%.*]], align 4
 // CHECK:    store i32 1, ptr [[DOTOMP_SECTIONS_UB_:%.*]], align 4
 // CHECK:    store i32 1, ptr [[DOTOMP_SECTIONS_ST_:%.*]], align 4
@@ -2257,7 +2269,7 @@ void test_lambda_implicit_capture() {
 // CHECK:    store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR:%.*]], align 8
 // CHECK:    store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR:%.*]], align 8
 // CHECK:    store ptr [[TMP0]], ptr [[DOTADDR:%.*]], align 8
-// CHECK:    [[TMP1:%.*]] = load ptr, ptr [[DOTADDR]], align 8, !nonnull [[META2]], !align [[META3]]
+// CHECK:    [[TMP1:%.*]] = load ptr, ptr [[DOTADDR]], align 8, !nonnull [[META4]], !align [[META5]]
 // CHECK:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @_Z20test_nested_parallelv.omp_outlined.omp_outlined, ptr [[TMP1]])
 // CHECK:    ret void
 //
@@ -2268,7 +2280,7 @@ void test_lambda_implicit_capture() {
 // CHECK:    store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR:%.*]], align 8
 // CHECK:    store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR:%.*]], align 8
 // CHECK:    store ptr [[TMP0]], ptr [[DOTADDR:%.*]], align 8
-// CHECK:    [[TMP1:%.*]] = load ptr, ptr [[DOTADDR]], align 8, !nonnull [[META2]], !align [[META3]]
+// CHECK:    [[TMP1:%.*]] = load ptr, ptr [[DOTADDR]], align 8, !nonnull [[META4]], !align [[META5]]
 // CHECK:    [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_POINT:%.*]], ptr [[TMP1]], i32 0, i32 0
 // CHECK:    [[TMP2:%.*]] = load i32, ptr [[X]], align 4
 // CHECK:    [[Y:%.*]] = getelementptr inbounds nuw [[STRUCT_POINT]], ptr [[TMP1]], i32 0, i32 1
@@ -2338,22 +2350,24 @@ void test_lambda_implicit_capture() {
 // CHECK:  [[ENTRY:.*:]]
 // CHECK:    call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[P:%.*]], ptr align 4 @__const._Z37test_firstprivate_individual_bindingsv.p, i64 8, i1 false)
 // CHECK:    call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP0:%.*]], ptr align 4 [[P]], i64 8, i1 false)
-// CHECK:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @_Z37test_firstprivate_individual_bindingsv.omp_outlined, ptr [[TMP0]])
+// CHECK:    [[TMP1:%.*]] = load [[STRUCT_POINT:%.*]], ptr [[TMP0]], align 4
+// CHECK:    store [[STRUCT_POINT]] [[TMP1]], ptr [[DOTCASTED:%.*]], align 4
+// CHECK:    [[TMP2:%.*]] = load i64, ptr [[DOTCASTED]], align 8
+// CHECK:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @_Z37test_firstprivate_individual_bindingsv.omp_outlined, i64 [[TMP2]])
 // CHECK:    ret void
 //
 //
 // CHECK-LABEL: define internal void @_Z37test_firstprivate_individual_bindingsv.omp_outlined(
-// CHECK-SAME: ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP0:%.*]]) #[[ATTR2]] {
+// CHECK-SAME: ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[TMP0:%.*]]) #[[ATTR2]] {
 // CHECK:  [[ENTRY:.*:]]
 // CHECK:    store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR:%.*]], align 8
 // CHECK:    store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR:%.*]], align 8
-// CHECK:    store ptr [[TMP0]], ptr [[DOTADDR:%.*]], align 8
-// CHECK:    [[TMP1:%.*]] = load ptr, ptr [[DOTADDR]], align 8, !nonnull [[META2]], !align [[META3]]
-// CHECK:    [[Y:%.*]] = getelementptr inbounds nuw [[STRUCT_POINT:%.*]], ptr [[TMP1]], i32 0, i32 1
-// CHECK:    [[TMP2:%.*]] = load i32, ptr [[Y]], align 4
-// CHECK:    store i32 [[TMP2]], ptr [[B:%.*]], align 4
-// CHECK:    [[TMP3:%.*]] = load i32, ptr [[B]], align 4
-// CHECK:    [[ADD:%.*]] = add nsw i32 [[TMP3]], 10
+// CHECK:    store i64 [[TMP0]], ptr [[DOTADDR:%.*]], align 8
+// CHECK:    [[Y:%.*]] = getelementptr inbounds nuw [[STRUCT_POINT:%.*]], ptr [[DOTADDR]], i32 0, i32 1
+// CHECK:    [[TMP1:%.*]] = load i32, ptr [[Y]], align 4
+// CHECK:    store i32 [[TMP1]], ptr [[B:%.*]], align 4
+// CHECK:    [[TMP2:%.*]] = load i32, ptr [[B]], align 4
+// CHECK:    [[ADD:%.*]] = add nsw i32 [[TMP2]], 10
 // CHECK:    store i32 [[ADD]], ptr [[B]], align 4
 // CHECK:    ret void
 //
@@ -2363,24 +2377,26 @@ void test_lambda_implicit_capture() {
 // CHECK:  [[ENTRY:.*:]]
 // CHECK:    call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[P:%.*]], ptr align 4 @__const._Z14test_mixed_dsav.p, i64 8, i1 false)
 // CHECK:    call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP0:%.*]], ptr align 4 [[P]], i64 8, i1 false)
-// CHECK:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @_Z14test_mixed_dsav.omp_outlined, ptr [[TMP0]])
+// CHECK:    [[TMP1:%.*]] = load [[STRUCT_POINT:%.*]], ptr [[TMP0]], align 4
+// CHECK:    store [[STRUCT_POINT]] [[TMP1]], ptr [[DOTCASTED:%.*]], align 4
+// CHECK:    [[TMP2:%.*]] = load i64, ptr [[DOTCASTED]], align 8
+// CHECK:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @_Z14test_mixed_dsav.omp_outlined, i64 [[TMP2]])
 // CHECK:    ret void
 //
 //
 // CHECK-LABEL: define internal void @_Z14test_mixed_dsav.omp_outlined(
-// CHECK-SAME: ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP0:%.*]]) #[[ATTR2]] {
+// CHECK-SAME: ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[TMP0:%.*]]) #[[ATTR2]] {
 // CHECK:  [[ENTRY:.*:]]
 // CHECK:    store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR:%.*]], align 8
 // CHECK:    store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR:%.*]], align 8
-// CHECK:    store ptr [[TMP0]], ptr [[DOTADDR:%.*]], align 8
-// CHECK:    [[TMP1:%.*]] = load ptr, ptr [[DOTADDR]], align 8, !nonnull [[META2]], !align [[META3]]
-// CHECK:    [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_POINT:%.*]], ptr [[TMP1]], i32 0, i32 0
-// CHECK:    [[TMP2:%.*]] = load i32, ptr [[X]], align 4
-// CHECK:    store i32 [[TMP2]], ptr [[A:%.*]], align 4
-// CHECK:    [[TMP3:%.*]] = load i32, ptr [[A]], align 4
-// CHECK:    [[Y:%.*]] = getelementptr inbounds nuw [[STRUCT_POINT]], ptr [[TMP1]], i32 0, i32 1
-// CHECK:    [[TMP4:%.*]] = load i32, ptr [[Y]], align 4
-// CHECK:    [[ADD:%.*]] = add nsw i32 [[TMP3]], [[TMP4]]
+// CHECK:    store i64 [[TMP0]], ptr [[DOTADDR:%.*]], align 8
+// CHECK:    [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_POINT:%.*]], ptr [[DOTADDR]], i32 0, i32 0
+// CHECK:    [[TMP1:%.*]] = load i32, ptr [[X]], align 4
+// CHECK:    store i32 [[TMP1]], ptr [[A:%.*]], align 4
+// CHECK:    [[TMP2:%.*]] = load i32, ptr [[A]], align 4
+// CHECK:    [[Y:%.*]] = getelementptr inbounds nuw [[STRUCT_POINT]], ptr [[DOTADDR]], i32 0, i32 1
+// CHECK:    [[TMP3:%.*]] = load i32, ptr [[Y]], align 4
+// CHECK:    [[ADD:%.*]] = add nsw i32 [[TMP2]], [[TMP3]]
 // CHECK:    store i32 [[ADD]], ptr [[RESULT:%.*]], align 4
 // CHECK:    ret void
 //
@@ -2470,7 +2486,7 @@ void test_lambda_implicit_capture() {
 // CHECK:    store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR:%.*]], align 8
 // CHECK:    store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR:%.*]], align 8
 // CHECK:    store ptr [[TMP0]], ptr [[DOTADDR:%.*]], align 8
-// CHECK:    [[TMP1:%.*]] = load ptr, ptr [[DOTADDR]], align 8, !nonnull [[META2]], !align [[META3]]
+// CHECK:    [[TMP1:%.*]] = load ptr, ptr [[DOTADDR]], align 8, !nonnull [[META4]], !align [[META5]]
 // CHECK:    [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_POINT:%.*]], ptr [[TMP1]], i32 0, i32 0
 // CHECK:    [[TMP2:%.*]] = load i32, ptr [[X]], align 4
 // CHECK:    call void @_Z3usei(i32 noundef [[TMP2]])
@@ -2541,7 +2557,7 @@ void test_lambda_implicit_capture() {
 // CHECK:    store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR:%.*]], align 8
 // CHECK:    store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR:%.*]], align 8
 // CHECK:    store ptr [[TMP0]], ptr [[DOTADDR:%.*]], align 8
-// CHECK:    [[TMP1:%.*]] = load ptr, ptr [[DOTADDR]], align 8, !nonnull [[META2]], !align [[META3]]
+// CHECK:    [[TMP1:%.*]] = load ptr, ptr [[DOTADDR]], align 8, !nonnull [[META4]], !align [[META5]]
 // CHECK:    store i32 0, ptr [[DOTOMP_LB:%.*]], align 4
 // CHECK:    store i32 99, ptr [[DOTOMP_UB:%.*]], align 4
 // CHECK:    store i32 1, ptr [[DOTOMP_STRIDE:%.*]], align 4
@@ -2652,7 +2668,7 @@ void test_lambda_implicit_capture() {
 // CHECK:    store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR:%.*]], align 8
 // CHECK:    store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR:%.*]], align 8
 // CHECK:    store ptr [[TMP0]], ptr [[DOTADDR:%.*]], align 8
-// CHECK:    [[TMP1:%.*]] = load ptr, ptr [[DOTADDR]], align 8, !nonnull [[META2]], !align [[META3]]
+// CHECK:    [[TMP1:%.*]] = load ptr, ptr [[DOTADDR]], align 8, !nonnull [[META4]], !align [[META5]]
 // CHECK:    store i32 0, ptr [[DOTOMP_LB:%.*]], align 4
 // CHECK:    store i32 9, ptr [[DOTOMP_UB:%.*]], align 4
 // CHECK:    store i32 1, ptr [[DOTOMP_STRIDE:%.*]], align 4
@@ -2820,11 +2836,12 @@ void test_lambda_implicit_capture() {
 // CHECK:    store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR:%.*]], align 8
 // CHECK:    store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR:%.*]], align 8
 // CHECK:    store ptr [[TMP0]], ptr [[DOTADDR:%.*]], align 8
-// CHECK:    [[TMP1:%.*]] = load ptr, ptr [[DOTADDR]], align 8, !nonnull [[META2]], !align [[META3]]
+// CHECK:    [[TMP1:%.*]] = load ptr, ptr [[DOTADDR]], align 8, !nonnull [[META4]], !align [[META5]]
 // CHECK:    store i32 0, ptr [[DOTOMP_LB:%.*]], align 4
 // CHECK:    store i32 9, ptr [[DOTOMP_UB:%.*]], align 4
 // CHECK:    store i32 1, ptr [[DOTOMP_STRIDE:%.*]], align 4
 // CHECK:    store i32 0, ptr [[DOTOMP_IS_LAST:%.*]], align 4
+// CHECK:    [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_POINT:%.*]], ptr [[TMP1]], i32 0, i32 0
 // CHECK:    [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
 // CHECK:    [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4
 // CHECK:    call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
@@ -2854,8 +2871,7 @@ void test_lambda_implicit_capture() {
 // CHECK:    store i32 [[ADD]], ptr [[I:%.*]], align 4
 // CHECK:    [[TMP10:%.*]] = load i32, ptr [[I]], align 4
 // CHECK:    [[MUL2:%.*]] = mul nsw i32 [[TMP10]], 10
-// CHECK:    [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_POINT:%.*]], ptr [[TMP1]], i32 0, i32 0
-// CHECK:    store i32 [[MUL2]], ptr [[X]], align 4
+// CHECK:    store i32 [[MUL2]], ptr [[A:%.*]], align 4
 // CHECK:    br label %[[OMP_BODY_CONTINUE:.*]]
 // CHECK:       [[OMP_BODY_CONTINUE]]:
 // CHECK:    br label %[[OMP_INNER_FOR_INC:.*]]
@@ -2872,6 +2888,8 @@ void test_lambda_implicit_capture() {
 // CHECK:    [[TMP13:%.*]] = icmp ne i32 [[TMP12]], 0
 // CHECK:    br i1 [[TMP13]], [[DOTOMP_LASTPRIVATE_THEN:label %.*]], [[DOTOMP_LASTPRIVATE_DONE:label %.*]]
 // CHECK:       [[_OMP_LASTPRIVATE_THEN:.*:]]
+// CHECK:    [[TMP14:%.*]] = load i32, ptr [[A]], align 4
+// CHECK:    store i32 [[TMP14]], ptr [[A]], align 4
 // CHECK:    br [[DOTOMP_LASTPRIVATE_DONE]]
 // CHECK:       [[_OMP_LASTPRIVATE_DONE:.*:]]
 // CHECK:    ret void
@@ -2947,11 +2965,12 @@ void test_lambda_implicit_capture() {
 // CHECK:    store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR:%.*]], align 8
 // CHECK:    store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR:%.*]], align 8
 // CHECK:    store ptr [[TMP0]], ptr [[DOTADDR:%.*]], align 8
-// CHECK:    [[TMP1:%.*]] = load ptr, ptr [[DOTADDR]], align 8, !nonnull [[META2]], !align [[META3]]
+// CHECK:    [[TMP1:%.*]] = load ptr, ptr [[DOTADDR]], align 8, !nonnull [[META4]], !align [[META5]]
 // CHECK:    store i32 0, ptr [[DOTOMP_LB:%.*]], align 4
 // CHECK:    store i32 99, ptr [[DOTOMP_UB:%.*]], align 4
 // CHECK:    store i32 1, ptr [[DOTOMP_STRIDE:%.*]], align 4
 // CHECK:    store i32 0, ptr [[DOTOMP_IS_LAST:%.*]], align 4
+// CHECK:    [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_POINT:%.*]], ptr [[TMP1]], i32 0, i32 0
 // CHECK:    [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
 // CHECK:    [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4
 // CHECK:    call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
@@ -2985,9 +3004,7 @@ void test_lambda_implicit_capture() {
 // CHECK:    br i1 [[CMP2]], label %[[IF_THEN:.*]], label %[[IF_END:.*]]
 // CHECK:       [[IF_THEN]]:
 // CHECK:    [[TMP11:%.*]] = load i32, ptr [[I]], align 4
-// CHECK:    [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_POINT:%.*]], ptr [[TMP1]], i32 0, i32 0
-// CHECK:    store i32 [[TMP11]], ptr [[X]], align 4
-// CHECK:    [[X3:%.*]] = getelementptr inbounds nuw [[STRUCT_POINT]], ptr [[TMP1]], i32 0, i32 0
+// CHECK:    store i32 [[TMP11]], ptr [[A:%.*]], align 4
 // CHECK:    [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
 // CHECK:    call void @__kmpc_critical(ptr @[[GLOB1]], i32 [[TMP3]], ptr @.gomp_critical_user_{{pl_cond[.].+[.|,]}}var)
 // CHECK:    [[TMP13:%.*]] = load i32, ptr @.{{pl_cond[.].+[.|,]}} align 4
@@ -2995,7 +3012,7 @@ void test_lambda_implicit_capture() {
 // CHECK:    br i1 [[TMP14]], label %[[LP_COND_THEN:.*]], label %[[LP_COND_EXIT:.*]]
 // CHECK:       [[LP_COND_THEN]]:
 // CHECK:    store i32 [[TMP12]], ptr @.{{pl_cond[.].+[.|,]}} align 4
-// CHECK:    [[TMP15:%.*]] = load i32, ptr [[X3]], align 4
+// CHECK:    [[TMP15:%.*]] = load i32, ptr [[A]], align 4
 // CHECK:    store i32 [[TMP15]], ptr @{{pl_cond[.].+[.|,]}} align 4
 // CHECK:    br label %[[LP_COND_EXIT]]
 // CHECK:       [[LP_COND_EXIT]]:
@@ -3007,8 +3024,8 @@ void test_lambda_implicit_capture() {
 // CHECK:    br label %[[OMP_INNER_FOR_INC:.*]]
 // CHECK:       [[OMP_INNER_FOR_INC]]:
 // CHECK:    [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK:    [[ADD4:%.*]] = add nsw i32 [[TMP16]], 1
-// CHECK:    store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4
+// CHECK:    [[ADD3:%.*]] = add nsw i32 [[TMP16]], 1
+// CHECK:    store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4
 // CHECK:    br label %[[OMP_INNER_FOR_COND]]
 // CHECK:       [[OMP_INNER_FOR_END]]:
 // CHECK:    br label %[[OMP_LOOP_EXIT:.*]]
@@ -3019,6 +3036,8 @@ void test_lambda_implicit_capture() {
 // CHECK:    call void @__kmpc_barrier(ptr @[[GLOB7:[0-9]+]], i32 [[TMP3]])
 // CHECK:    br i1 [[TMP18]], [[DOTOMP_LASTPRIVATE_THEN:label %.*]], [[DOTOMP_LASTPRIVATE_DONE:label %.*]]
 // CHECK:       [[_OMP_LASTPRIVATE_THEN:.*:]]
+// CHECK:    [[TMP19:%.*]] = load i32, ptr [[A]], align 4
+// CHECK:    store i32 [[TMP19]], ptr [[A]], align 4
 // CHECK:    br [[DOTOMP_LASTPRIVATE_DONE]]
 // CHECK:       [[_OMP_LASTPRIVATE_DONE:.*:]]
 // CHECK:    ret void
@@ -3045,7 +3064,7 @@ void test_lambda_implicit_capture() {
 // CHECK:    store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR:%.*]], align 8
 // CHECK:    store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR:%.*]], align 8
 // CHECK:    store ptr [[TMP0]], ptr [[DOTADDR:%.*]], align 8
-// CHECK:    [[TMP1:%.*]] = load ptr, ptr [[DOTADDR]], align 8, !nonnull [[META2]], !align [[META3]]
+// CHECK:    [[TMP1:%.*]] = load ptr, ptr [[DOTADDR]], align 8, !nonnull [[META4]], !align [[META5]]
 // CHECK:    store i32 0, ptr [[DOTOMP_LB:%.*]], align 4
 // CHECK:    store i32 99, ptr [[DOTOMP_UB:%.*]], align 4
 // CHECK:    store i32 1, ptr [[DOTOMP_STRIDE:%.*]], align 4
@@ -3218,7 +3237,7 @@ void test_lambda_implicit_capture() {
 // CHECK:    store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR:%.*]], align 8
 // CHECK:    store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR:%.*]], align 8
 // CHECK:    store ptr [[TMP0]], ptr [[DOTADDR:%.*]], align 8
-// CHECK:    [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8, !nonnull [[META2]], !align [[META3]]
+// CHECK:    [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8, !nonnull [[META4]], !align [[META5]]
 // CHECK:    [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_POINT:%.*]], ptr [[TMP2]], i32 0, i32 0
 // CHECK:    store i32 0, ptr [[A:%.*]], align 4
 // CHECK:    call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[P2:%.*]], ptr align 4 @"__const.<captured>.p2", i64 8, i1 false)
@@ -3314,7 +3333,7 @@ void test_lambda_implicit_capture() {
 // CHECK:    store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR:%.*]], align 8
 // CHECK:    store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR:%.*]], align 8
 // CHECK:    store ptr [[TMP0]], ptr [[DOTADDR:%.*]], align 8
-// CHECK:    [[TMP1:%.*]] = load ptr, ptr [[DOTADDR]], align 8, !nonnull [[META2]], !align [[META3]]
+// CHECK:    [[TMP1:%.*]] = load ptr, ptr [[DOTADDR]], align 8, !nonnull [[META4]], !align [[META5]]
 // CHECK:    [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_PAIR:%.*]], ptr [[TMP1]], i32 0, i32 0
 // CHECK:    call void @_ZN14NonTrivialCopyC1ERKS_(ptr noundef nonnull align 4 dereferenceable(8) [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[X]])
 // CHECK:    [[VALUE:%.*]] = getelementptr inbounds nuw [[STRUCT_NONTRIVIALCOPY:%.*]], ptr [[A]], i32 0, i32 0
@@ -3372,11 +3391,11 @@ void test_lambda_implicit_capture() {
 // CHECK:    store ptr [[TMP0]], ptr [[DOTADDR:%.*]], align 8
 // CHECK:    [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8
 // CHECK:    [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_PAIR:%.*]], ptr [[THIS1]], i32 0, i32 0
-// CHECK:    [[TMP1:%.*]] = load ptr, ptr [[DOTADDR]], align 8, !nonnull [[META2]], !align [[META3]]
+// CHECK:    [[TMP1:%.*]] = load ptr, ptr [[DOTADDR]], align 8, !nonnull [[META4]], !align [[META5]]
 // CHECK:    [[X2:%.*]] = getelementptr inbounds nuw [[STRUCT_PAIR]], ptr [[TMP1]], i32 0, i32 0
 // CHECK:    call void @_ZN14NonTrivialCopyC1ERKS_(ptr noundef nonnull align 4 dereferenceable(8) [[X]], ptr noundef nonnull align 4 dereferenceable(8) [[X2]])
 // CHECK:    [[Y:%.*]] = getelementptr inbounds nuw [[STRUCT_PAIR]], ptr [[THIS1]], i32 0, i32 1
-// CHECK:    [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8, !nonnull [[META2]], !align [[META3]]
+// CHECK:    [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8, !nonnull [[META4]], !align [[META5]]
 // CHECK:    [[Y3:%.*]] = getelementptr inbounds nuw [[STRUCT_PAIR]], ptr [[TMP2]], i32 0, i32 1
 // CHECK:    call void @_ZN14NonTrivialCopyC1ERKS_(ptr noundef nonnull align 4 dereferenceable(8) [[Y]], ptr noundef nonnull align 4 dereferenceable(8) [[Y3]])
 // CHECK:    ret void
@@ -3389,12 +3408,12 @@ void test_lambda_implicit_capture() {
 // CHECK:    store ptr [[OTHER]], ptr [[OTHER_ADDR:%.*]], align 8
 // CHECK:    [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8
 // CHECK:    [[VALUE:%.*]] = getelementptr inbounds nuw [[STRUCT_NONTRIVIALCOPY:%.*]], ptr [[THIS1]], i32 0, i32 0
-// CHECK:    [[TMP0:%.*]] = load ptr, ptr [[OTHER_ADDR]], align 8, !nonnull [[META2]], !align [[META3]]
+// CHECK:    [[TMP0:%.*]] = load ptr, ptr [[OTHER_ADDR]], align 8, !nonnull [[META4]], !align [[META5]]
 // CHECK:    [[VALUE2:%.*]] = getelementptr inbounds nuw [[STRUCT_NONTRIVIALCOPY]], ptr [[TMP0]], i32 0, i32 0
 // CHECK:    [[TMP1:%.*]] = load i32, ptr [[VALUE2]], align 4
 // CHECK:    store i32 [[TMP1]], ptr [[VALUE]], align 4
 // CHECK:    [[COPY_COUNT:%.*]] = getelementptr inbounds nuw [[STRUCT_NONTRIVIALCOPY]], ptr [[THIS1]], i32 0, i32 1
-// CHECK:    [[TMP2:%.*]] = load ptr, ptr [[OTHER_ADDR]], align 8, !nonnull [[META2]], !align [[META3]]
+// CHECK:    [[TMP2:%.*]] = load ptr, ptr [[OTHER_ADDR]], align 8, !nonnull [[META4]], !align [[META5]]
 // CHECK:    [[COPY_COUNT3:%.*]] = getelementptr inbounds nuw [[STRUCT_NONTRIVIALCOPY]], ptr [[TMP2]], i32 0, i32 1
 // CHECK:    [[TMP3:%.*]] = load i32, ptr [[COPY_COUNT3]], align 4
 // CHECK:    [[ADD:%.*]] = add nsw i32 [[TMP3]], 1
@@ -3430,7 +3449,7 @@ void test_lambda_implicit_capture() {
 // CHECK:    [[Y:%.*]] = getelementptr inbounds nuw [[STRUCT_PAIR]], ptr [[P]], i32 0, i32 1
 // CHECK:    call void @_ZN14NonTrivialCopyC1Ei(ptr noundef nonnull align 4 dereferenceable(8) [[Y]], i32 noundef 20)
 // CHECK:    store ptr [[P]], ptr [[TMP0:%.*]], align 8
-// CHECK:    [[TMP1:%.*]] = load ptr, ptr [[TMP0]], align 8, !nonnull [[META2]], !align [[META3]]
+// CHECK:    [[TMP1:%.*]] = load ptr, ptr [[TMP0]], align 8, !nonnull [[META4]], !align [[META5]]
 // CHECK:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @_Z32test_firstprivate_ref_binding_sbv.omp_outlined, ptr [[TMP1]])
 // CHECK:    call void @_ZN4PairD1Ev(ptr noundef nonnull align 4 dead_on_return(16) dereferenceable(16) [[P]]) #[[ATTR3]]
 // CHECK:    ret void
@@ -3442,9 +3461,9 @@ void test_lambda_implicit_capture() {
 // CHECK:    store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR:%.*]], align 8
 // CHECK:    store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR:%.*]], align 8
 // CHECK:    store ptr [[TMP0]], ptr [[DOTADDR:%.*]], align 8
-// CHECK:    [[TMP1:%.*]] = load ptr, ptr [[DOTADDR]], align 8, !nonnull [[META2]], !align [[META3]]
+// CHECK:    [[TMP1:%.*]] = load ptr, ptr [[DOTADDR]], align 8, !nonnull [[META4]], !align [[META5]]
 // CHECK:    store ptr [[TMP1]], ptr [[TMP:%.*]], align 8
-// CHECK:    [[TMP2:%.*]] = load ptr, ptr [[TMP]], align 8, !nonnull [[META2]], !align [[META3]]
+// CHECK:    [[TMP2:%.*]] = load ptr, ptr [[TMP]], align 8, !nonnull [[META4]], !align [[META5]]
 // CHECK:    [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_PAIR:%.*]], ptr [[TMP2]], i32 0, i32 0
 // CHECK:    call void @_ZN14NonTrivialCopyC1ERKS_(ptr noundef nonnull align 4 dereferenceable(8) [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[X]])
 // CHECK:    [[VALUE:%.*]] = getelementptr inbounds nuw [[STRUCT_NONTRIVIALCOPY:%.*]], ptr [[A]], i32 0, i32 0
@@ -3460,7 +3479,7 @@ void test_lambda_implicit_capture() {
 // CHECK:    [[Y:%.*]] = getelementptr inbounds nuw [[STRUCT_PAIR]], ptr [[P]], i32 0, i32 1
 // CHECK:    call void @_ZN14NonTrivialCopyC1Ei(ptr noundef nonnull align 4 dereferenceable(8) [[Y]], i32 noundef 20)
 // CHECK:    store ptr [[P]], ptr [[TMP0:%.*]], align 8
-// CHECK:    [[TMP1:%.*]] = load ptr, ptr [[TMP0]], align 8, !nonnull [[META2]], !align [[META3]]
+// CHECK:    [[TMP1:%.*]] = load ptr, ptr [[TMP0]], align 8, !nonnull [[META4]], !align [[META5]]
 // CHECK:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @_Z38test_firstprivate_const_ref_binding_sbv.omp_outlined, ptr [[TMP1]])
 // CHECK:    call void @_ZN4PairD1Ev(ptr noundef nonnull align 4 dead_on_return(16) dereferenceable(16) [[P]]) #[[ATTR3]]
 // CHECK:    ret void
@@ -3472,9 +3491,9 @@ void test_lambda_implicit_capture() {
 // CHECK:    store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR:%.*]], align 8
 // CHECK:    store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR:%.*]], align 8
 // CHECK:    store ptr [[TMP0]], ptr [[DOTADDR:%.*]], align 8
-// CHECK:    [[TMP1:%.*]] = load ptr, ptr [[DOTADDR]], align 8, !nonnull [[META2]], !align [[META3]]
+// CHECK:    [[TMP1:%.*]] = load ptr, ptr [[DOTADDR]], align 8, !nonnull [[META4]], !align [[META5]]
 // CHECK:    store ptr [[TMP1]], ptr [[TMP:%.*]], align 8
-// CHECK:    [[TMP2:%.*]] = load ptr, ptr [[TMP]], align 8, !nonnull [[META2]], !align [[META3]]
+// CHECK:    [[TMP2:%.*]] = load ptr, ptr [[TMP]], align 8, !nonnull [[META4]], !align [[META5]]
 // CHECK:    [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_PAIR:%.*]], ptr [[TMP2]], i32 0, i32 0
 // CHECK:    call void @_ZN14NonTrivialCopyC1ERKS_(ptr noundef nonnull align 4 dereferenceable(8) [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[X]])
 // CHECK:    [[VALUE:%.*]] = getelementptr inbounds nuw [[STRUCT_NONTRIVIALCOPY:%.*]], ptr [[A]], i32 0, i32 0
@@ -3502,7 +3521,7 @@ void test_lambda_implicit_capture() {
 // CHECK:    store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR:%.*]], align 8
 // CHECK:    store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR:%.*]], align 8
 // CHECK:    store ptr [[TMP0]], ptr [[DOTADDR:%.*]], align 8
-// CHECK:    [[TMP1:%.*]] = load ptr, ptr [[DOTADDR]], align 8, !nonnull [[META2]], !align [[META3]]
+// CHECK:    [[TMP1:%.*]] = load ptr, ptr [[DOTADDR]], align 8, !nonnull [[META4]], !align [[META5]]
 // CHECK:    [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_PAIR:%.*]], ptr [[TMP1]], i32 0, i32 0
 // CHECK:    call void @_ZN14NonTrivialCopyC1ERKS_(ptr noundef nonnull align 4 dereferenceable(8) [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[X]])
 // CHECK:    [[Y:%.*]] = getelementptr inbounds nuw [[STRUCT_PAIR]], ptr [[TMP1]], i32 0, i32 1
@@ -3559,7 +3578,7 @@ void test_lambda_implicit_capture() {
 // CHECK:    store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR:%.*]], align 8
 // CHECK:    store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR:%.*]], align 8
 // CHECK:    store ptr [[TMP0]], ptr [[DOTADDR:%.*]], align 8
-// CHECK:    [[TMP1:%.*]] = load ptr, ptr [[DOTADDR]], align 8, !nonnull [[META2]], !align [[META85:![0-9]+]]
+// CHECK:    [[TMP1:%.*]] = load ptr, ptr [[DOTADDR]], align 8, !nonnull [[META4]], !align [[META85:![0-9]+]]
 // CHECK:    [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_PAIRWITHDTOR:%.*]], ptr [[TMP1]], i32 0, i32 0
 // CHECK:    call void @_ZN8WithDtorC1ERKS_(ptr noundef nonnull align 8 dereferenceable(8) [[A:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[X]])
 // CHECK:    [[PTR:%.*]] = getelementptr inbounds nuw [[STRUCT_WITHDTOR:%.*]], ptr [[A]], i32 0, i32 0
@@ -3618,11 +3637,11 @@ void test_lambda_implicit_capture() {
 // CHECK:    store ptr [[TMP0]], ptr [[DOTADDR:%.*]], align 8
 // CHECK:    [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8
 // CHECK:    [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_PAIRWITHDTOR:%.*]], ptr [[THIS1]], i32 0, i32 0
-// CHECK:    [[TMP1:%.*]] = load ptr, ptr [[DOTADDR]], align 8, !nonnull [[META2]], !align [[META85]]
+// CHECK:    [[TMP1:%.*]] = load ptr, ptr [[DOTADDR]], align 8, !nonnull [[META4]], !align [[META85]]
 // CHECK:    [[X2:%.*]] = getelementptr inbounds nuw [[STRUCT_PAIRWITHDTOR]], ptr [[TMP1]], i32 0, i32 0
 // CHECK:    call void @_ZN8WithDtorC1ERKS_(ptr noundef nonnull align 8 dereferenceable(8) [[X]], ptr noundef nonnull align 8 dereferenceable(8) [[X2]])
 // CHECK:    [[Y:%.*]] = getelementptr inbounds nuw [[STRUCT_PAIRWITHDTOR]], ptr [[THIS1]], i32 0, i32 1
-// CHECK:    [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8, !nonnull [[META2]], !align [[META85]]
+// CHECK:    [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8, !nonnull [[META4]], !align [[META85]]
 // CHECK:    [[Y3:%.*]] = getelementptr inbounds nuw [[STRUCT_PAIRWITHDTOR]], ptr [[TMP2]], i32 0, i32 1
 // CHECK:    call void @_ZN8WithDtorC1ERKS_(ptr noundef nonnull align 8 dereferenceable(8) [[Y]], ptr noundef nonnull align 8 dereferenceable(8) [[Y3]])
 // CHECK:    ret void
@@ -3636,7 +3655,7 @@ void test_lambda_implicit_capture() {
 // CHECK:    [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8
 // CHECK:    [[PTR:%.*]] = getelementptr inbounds nuw [[STRUCT_WITHDTOR:%.*]], ptr [[THIS1]], i32 0, i32 0
 // CHECK:    [[CALL:%.*]] = call noalias noundef nonnull ptr @_Znwm(i64 noundef 4) #[[ATTR11]]
-// CHECK:    [[TMP0:%.*]] = load ptr, ptr [[OTHER_ADDR]], align 8, !nonnull [[META2]], !align [[META85]]
+// CHECK:    [[TMP0:%.*]] = load ptr, ptr [[OTHER_ADDR]], align 8, !nonnull [[META4]], !align [[META85]]
 // CHECK:    [[PTR2:%.*]] = getelementptr inbounds nuw [[STRUCT_WITHDTOR]], ptr [[TMP0]], i32 0, i32 0
 // CHECK:    [[TMP1:%.*]] = load ptr, ptr [[PTR2]], align 8
 // CHECK:    [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4
@@ -3722,7 +3741,7 @@ void test_lambda_implicit_capture() {
 // CHECK:    store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR:%.*]], align 8
 // CHECK:    store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR:%.*]], align 8
 // CHECK:    store ptr [[TMP0]], ptr [[DOTADDR:%.*]], align 8
-// CHECK:    [[TMP1:%.*]] = load ptr, ptr [[DOTADDR]], align 8, !nonnull [[META2]], !align [[META3]]
+// CHECK:    [[TMP1:%.*]] = load ptr, ptr [[DOTADDR]], align 8, !nonnull [[META4]], !align [[META5]]
 // CHECK:    [[TMP2:%.*]] = getelementptr inbounds [3 x [[STRUCT_NONTRIVIALCOPY:%.*]]], ptr [[TMP1]], i64 0, i64 0
 // CHECK:    call void @_ZN14NonTrivialCopyC1ERKS_(ptr noundef nonnull align 4 dereferenceable(8) [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[TMP2]])
 // CHECK:    [[TMP3:%.*]] = getelementptr inbounds [3 x [[STRUCT_NONTRIVIALCOPY]]], ptr [[TMP1]], i64 0, i64 1
@@ -3763,7 +3782,7 @@ void test_lambda_implicit_capture() {
 // CHECK:    store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR:%.*]], align 8
 // CHECK:    store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR:%.*]], align 8
 // CHECK:    store ptr [[TMP0]], ptr [[DOTADDR:%.*]], align 8
-// CHECK:    [[TMP1:%.*]] = load ptr, ptr [[DOTADDR]], align 8, !nonnull [[META2]], !align [[META3]]
+// CHECK:    [[TMP1:%.*]] = load ptr, ptr [[DOTADDR]], align 8, !nonnull [[META4]], !align [[META5]]
 // CHECK:    [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_PAIR:%.*]], ptr [[TMP1]], i32 0, i32 0
 // CHECK:    call void @_ZN14NonTrivialCopyC1ERKS_(ptr noundef nonnull align 4 dereferenceable(8) [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[X]])
 // CHECK:    [[VALUE:%.*]] = getelementptr inbounds nuw [[STRUCT_NONTRIVIALCOPY:%.*]], ptr [[A]], i32 0, i32 0
@@ -3815,7 +3834,7 @@ void test_lambda_implicit_capture() {
 // CHECK:    store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR:%.*]], align 8
 // CHECK:    store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR:%.*]], align 8
 // CHECK:    store ptr [[TMP0]], ptr [[DOTADDR:%.*]], align 8
-// CHECK:    [[TMP1:%.*]] = load ptr, ptr [[DOTADDR]], align 8, !nonnull [[META2]], !align [[META3]]
+// CHECK:    [[TMP1:%.*]] = load ptr, ptr [[DOTADDR]], align 8, !nonnull [[META4]], !align [[META5]]
 // CHECK:    [[I1:%.*]] = getelementptr inbounds nuw [[STRUCT_OUTER:%.*]], ptr [[TMP1]], i32 0, i32 0
 // CHECK:    call void @_ZN5InnerC1ERKS_(ptr noundef nonnull align 4 dereferenceable(4) [[X:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[I1]])
 // CHECK:    [[VAL:%.*]] = getelementptr inbounds nuw [[STRUCT_INNER:%.*]], ptr [[X]], i32 0, i32 0
@@ -3871,11 +3890,11 @@ void test_lambda_implicit_capture() {
 // CHECK:    store ptr [[TMP0]], ptr [[DOTADDR:%.*]], align 8
 // CHECK:    [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8
 // CHECK:    [[I1:%.*]] = getelementptr inbounds nuw [[STRUCT_OUTER:%.*]], ptr [[THIS1]], i32 0, i32 0
-// CHECK:    [[TMP1:%.*]] = load ptr, ptr [[DOTADDR]], align 8, !nonnull [[META2]], !align [[META3]]
+// CHECK:    [[TMP1:%.*]] = load ptr, ptr [[DOTADDR]], align 8, !nonnull [[META4]], !align [[META5]]
 // CHECK:    [[I12:%.*]] = getelementptr inbounds nuw [[STRUCT_OUTER]], ptr [[TMP1]], i32 0, i32 0
 // CHECK:    call void @_ZN5InnerC1ERKS_(ptr noundef nonnull align 4 dereferenceable(4) [[I1]], ptr noundef nonnull align 4 dereferenceable(4) [[I12]])
 // CHECK:    [[I2:%.*]] = getelementptr inbounds nuw [[STRUCT_OUTER]], ptr [[THIS1]], i32 0, i32 1
-// CHECK:    [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8, !nonnull [[META2]], !align [[META3]]
+// CHECK:    [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8, !nonnull [[META4]], !align [[META5]]
 // CHECK:    [[I23:%.*]] = getelementptr inbounds nuw [[STRUCT_OUTER]], ptr [[TMP2]], i32 0, i32 1
 // CHECK:    call void @_ZN5InnerC1ERKS_(ptr noundef nonnull align 4 dereferenceable(4) [[I2]], ptr noundef nonnull align 4 dereferenceable(4) [[I23]])
 // CHECK:    ret void
@@ -3888,7 +3907,7 @@ void test_lambda_implicit_capture() {
 // CHECK:    store ptr [[O]], ptr [[O_ADDR:%.*]], align 8
 // CHECK:    [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8
 // CHECK:    [[VAL:%.*]] = getelementptr inbounds nuw [[STRUCT_INNER:%.*]], ptr [[THIS1]], i32 0, i32 0
-// CHECK:    [[TMP0:%.*]] = load ptr, ptr [[O_ADDR]], align 8, !nonnull [[META2]], !align [[META3]]
+// CHECK:    [[TMP0:%.*]] = load ptr, ptr [[O_ADDR]], align 8, !nonnull [[META4]], !align [[META5]]
 // CHECK:    [[VAL2:%.*]] = getelementptr inbounds nuw [[STRUCT_INNER]], ptr [[TMP0]], i32 0, i32 0
 // CHECK:    [[TMP1:%.*]] = load i32, ptr [[VAL2]], align 4
 // CHECK:    [[MUL:%.*]] = mul nsw i32 [[TMP1]], 2
@@ -3924,7 +3943,7 @@ void test_lambda_implicit_capture() {
 // CHECK:    [[Y:%.*]] = getelementptr inbounds nuw [[STRUCT_PAIR]], ptr [[P]], i32 0, i32 1
 // CHECK:    call void @_ZN14NonTrivialCopyC1Ei(ptr noundef nonnull align 4 dereferenceable(8) [[Y]], i32 noundef 20)
 // CHECK:    store ptr [[P]], ptr [[TMP0:%.*]], align 8
-// CHECK:    [[TMP1:%.*]] = load ptr, ptr [[TMP0]], align 8, !nonnull [[META2]], !align [[META3]]
+// CHECK:    [[TMP1:%.*]] = load ptr, ptr [[TMP0]], align 8, !nonnull [[META4]], !align [[META5]]
 // CHECK:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @_Z46test_firstprivate_ref_binding_both_bindings_sbv.omp_outlined, ptr [[TMP1]])
 // CHECK:    call void @_ZN4PairD1Ev(ptr noundef nonnull align 4 dead_on_return(16) dereferenceable(16) [[P]]) #[[ATTR3]]
 // CHECK:    ret void
@@ -3936,12 +3955,12 @@ void test_lambda_implicit_capture() {
 // CHECK:    store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR:%.*]], align 8
 // CHECK:    store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR:%.*]], align 8
 // CHECK:    store ptr [[TMP0]], ptr [[DOTADDR:%.*]], align 8
-// CHECK:    [[TMP1:%.*]] = load ptr, ptr [[DOTADDR]], align 8, !nonnull [[META2]], !align [[META3]]
+// CHECK:    [[TMP1:%.*]] = load ptr, ptr [[DOTADDR]], align 8, !nonnull [[META4]], !align [[META5]]
 // CHECK:    store ptr [[TMP1]], ptr [[TMP:%.*]], align 8
-// CHECK:    [[TMP2:%.*]] = load ptr, ptr [[TMP]], align 8, !nonnull [[META2]], !align [[META3]]
+// CHECK:    [[TMP2:%.*]] = load ptr, ptr [[TMP]], align 8, !nonnull [[META4]], !align [[META5]]
 // CHECK:    [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_PAIR:%.*]], ptr [[TMP2]], i32 0, i32 0
 // CHECK:    call void @_ZN14NonTrivialCopyC1ERKS_(ptr noundef nonnull align 4 dereferenceable(8) [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[X]])
-// CHECK:    [[TMP3:%.*]] = load ptr, ptr [[TMP]], align 8, !nonnull [[META2]], !align [[META3]]
+// CHECK:    [[TMP3:%.*]] = load ptr, ptr [[TMP]], align 8, !nonnull [[META4]], !align [[META5]]
 // CHECK:    [[Y:%.*]] = getelementptr inbounds nuw [[STRUCT_PAIR]], ptr [[TMP3]], i32 0, i32 1
 // CHECK:    call void @_ZN14NonTrivialCopyC1ERKS_(ptr noundef nonnull align 4 dereferenceable(8) [[B:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[Y]])
 // CHECK:    [[VALUE:%.*]] = getelementptr inbounds nuw [[STRUCT_NONTRIVIALCOPY:%.*]], ptr [[A]], i32 0, i32 0
@@ -3962,7 +3981,7 @@ void test_lambda_implicit_capture() {
 // CHECK:    [[Y:%.*]] = getelementptr inbounds nuw [[STRUCT_PAIR]], ptr [[P]], i32 0, i32 1
 // CHECK:    call void @_ZN14NonTrivialCopyC1Ei(ptr noundef nonnull align 4 dereferenceable(8) [[Y]], i32 noundef 20)
 // CHECK:    store ptr [[P]], ptr [[TMP0:%.*]], align 8
-// CHECK:    [[TMP1:%.*]] = load ptr, ptr [[TMP0]], align 8, !nonnull [[META2]], !align [[META3]]
+// CHECK:    [[TMP1:%.*]] = load ptr, ptr [[TMP0]], align 8, !nonnull [[META4]], !align [[META5]]
 // CHECK:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @_Z52test_firstprivate_const_ref_binding_both_bindings_sbv.omp_outlined, ptr [[TMP1]])
 // CHECK:    call void @_ZN4PairD1Ev(ptr noundef nonnull align 4 dead_on_return(16) dereferenceable(16) [[P]]) #[[ATTR3]]
 // CHECK:    ret void
@@ -3974,12 +3993,12 @@ void test_lambda_implicit_capture() {
 // CHECK:    store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR:%.*]], align 8
 // CHECK:    store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR:%.*]], align 8
 // CHECK:    store ptr [[TMP0]], ptr [[DOTADDR:%.*]], align 8
-// CHECK:    [[TMP1:%.*]] = load ptr, ptr [[DOTADDR]], align 8, !nonnull [[META2]], !align [[META3]]
+// CHECK:    [[TMP1:%.*]] = load ptr, ptr [[DOTADDR]], align 8, !nonnull [[META4]], !align [[META5]]
 // CHECK:    store ptr [[TMP1]], ptr [[TMP:%.*]], align 8
-// CHECK:    [[TMP2:%.*]] = load ptr, ptr [[TMP]], align 8, !nonnull [[META2]], !align [[META3]]
+// CHECK:    [[TMP2:%.*]] = load ptr, ptr [[TMP]], align 8, !nonnull [[META4]], !align [[META5]]
 // CHECK:    [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_PAIR:%.*]], ptr [[TMP2]], i32 0, i32 0
 // CHECK:    call void @_ZN14NonTrivialCopyC1ERKS_(ptr noundef nonnull align 4 dereferenceable(8) [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[X]])
-// CHECK:    [[TMP3:%.*]] = load ptr, ptr [[TMP]], align 8, !nonnull [[META2]], !align [[META3]]
+// CHECK:    [[TMP3:%.*]] = load ptr, ptr [[TMP]], align 8, !nonnull [[META4]], !align [[META5]]
 // CHECK:    [[Y:%.*]] = getelementptr inbounds nuw [[STRUCT_PAIR]], ptr [[TMP3]], i32 0, i32 1
 // CHECK:    call void @_ZN14NonTrivialCopyC1ERKS_(ptr noundef nonnull align 4 dereferenceable(8) [[B:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[Y]])
 // CHECK:    [[VALUE:%.*]] = getelementptr inbounds nuw [[STRUCT_NONTRIVIALCOPY:%.*]], ptr [[A]], i32 0, i32 0

>From 86d4049092db8ac3b9c1c39f5e21aa5049c99b35 Mon Sep 17 00:00:00 2001
From: Ammarguellat <zahira.ammarguellat at intel.com>
Date: Tue, 16 Jun 2026 14:19:16 -0700
Subject: [PATCH 38/45] Addressed part of the reviews

---
 clang/lib/CodeGen/CGStmtOpenMP.cpp  | 26 ++++++++++----------------
 clang/lib/CodeGen/CodeGenFunction.h | 26 +++++++++++++++++++++-----
 clang/lib/Sema/SemaExpr.cpp         |  9 ++++++++-
 clang/lib/Sema/SemaOpenMP.cpp       |  2 +-
 4 files changed, 40 insertions(+), 23 deletions(-)

diff --git a/clang/lib/CodeGen/CGStmtOpenMP.cpp b/clang/lib/CodeGen/CGStmtOpenMP.cpp
index 46e2303459c36..aa74c532517d3 100644
--- a/clang/lib/CodeGen/CGStmtOpenMP.cpp
+++ b/clang/lib/CodeGen/CGStmtOpenMP.cpp
@@ -1189,7 +1189,7 @@ bool CodeGenFunction::EmitOMPFirstprivateClause(const OMPExecutableDirective &D,
       // Original VarDecl logic.
       assert(OrigVD && "Expected VarDecl for non-BindingDecl firstprivate");
       bool ThisFirstprivateIsLastprivate =
-          OrigVD && Lastprivates.count(OrigVD->getCanonicalDecl()) > 0;
+          Lastprivates.count(OrigVD->getCanonicalDecl()) > 0;
       const FieldDecl *FD = CapturedStmtInfo->lookup(OrigVD);
       if (!MustEmitFirstprivateCopy && !ThisFirstprivateIsLastprivate && FD &&
           !FD->getType()->isReferenceType() &&
@@ -1546,14 +1546,9 @@ void CodeGenFunction::EmitOMPLastprivateClauseFinal(
         QualType Type = BD->getType();
         const auto *CanonicalBD = cast<ValueDecl>(BD->getCanonicalDecl());
         if (AlreadyEmittedVars.insert(CanonicalBD).second) {
-          // Get the private address. The BindingDecl was registered in
-          // PrivateScope during initialization, so look it up in LocalDeclMap.
-          auto It = LocalDeclMap.find(CanonicalBD);
-          if (It == LocalDeclMap.end()) {
-            It = LocalDeclMap.find(BD);
-          }
-          assert(It != LocalDeclMap.end() &&
-                 "lastprivate BindingDecl not found in LocalDeclMap");
+          auto It = OMPPrivatizedBindings.find(BD);
+          assert(It != OMPPrivatizedBindings.end() &&
+                 "lastprivate BindingDecl not found in OMPPrivatizedBindings");
           Address PrivateAddr = It->second;
 
           // Get the original binding address.
@@ -2720,16 +2715,15 @@ void CodeGenFunction::EmitOMPLinearClauseFinal(
       }
       const auto *OrigDecl = cast<DeclRefExpr>(*IC)->getDecl();
       Address OrigAddr = [&]() -> Address {
-        if (dyn_cast<BindingDecl>(OrigDecl)) {
+        if (isa<BindingDecl>(OrigDecl)) {
           // BindingDecl: use the original expression directly.
           return EmitLValue(*IC).getAddress();
-        } else {
-          const auto *OrigVD = cast<VarDecl>(OrigDecl);
-          DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(OrigVD),
-                          CapturedStmtInfo->lookup(OrigVD) != nullptr,
-                          (*IC)->getType(), VK_LValue, (*IC)->getExprLoc());
-          return EmitLValue(&DRE).getAddress();
         }
+        const auto *OrigVD = cast<VarDecl>(OrigDecl);
+        DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(OrigVD),
+                        CapturedStmtInfo->lookup(OrigVD) != nullptr,
+                        (*IC)->getType(), VK_LValue, (*IC)->getExprLoc());
+        return EmitLValue(&DRE).getAddress();
       }();
       CodeGenFunction::OMPPrivateScope VarScope(*this);
       VarScope.addPrivate(OrigDecl, OrigAddr);
diff --git a/clang/lib/CodeGen/CodeGenFunction.h b/clang/lib/CodeGen/CodeGenFunction.h
index 343f5c58f36df..0a116c6b7568c 100644
--- a/clang/lib/CodeGen/CodeGenFunction.h
+++ b/clang/lib/CodeGen/CodeGenFunction.h
@@ -1223,13 +1223,12 @@ class CodeGenFunction : public CodeGenTypeCache {
     OMPMapVars MappedVars;
     OMPPrivateScope(const OMPPrivateScope &) = delete;
     void operator=(const OMPPrivateScope &) = delete;
-    llvm::DenseMap<const BindingDecl *, Address> SavedBindings;
+    llvm::SmallVector<std::pair<const BindingDecl *, std::optional<Address>>, 4>
+        BindingChanges;
 
   public:
     /// Enter a new OpenMP private scope.
-    explicit OMPPrivateScope(CodeGenFunction &CGF) : RunCleanupsScope(CGF) {
-      SavedBindings = CGF.OMPPrivatizedBindings;
-    }
+    explicit OMPPrivateScope(CodeGenFunction &CGF) : RunCleanupsScope(CGF) {}
 
     /// Registers \p LocalVD variable as a private with \p Addr as the address
     /// of the corresponding private variable. \p
@@ -1238,6 +1237,14 @@ class CodeGenFunction : public CodeGenTypeCache {
     /// been privatized already.
     bool addPrivate(const ValueDecl *LocalVD, Address Addr) {
       assert(PerformCleanup && "adding private to dead scope");
+      if (const auto *BD = dyn_cast<BindingDecl>(LocalVD->getCanonicalDecl())) {
+        auto It = CGF.OMPPrivatizedBindings.find(BD);
+        if (It != CGF.OMPPrivatizedBindings.end()) {
+          BindingChanges.emplace_back(BD, It->second);
+        } else {
+          BindingChanges.emplace_back(BD, std::nullopt);
+        }
+      }
       return MappedVars.setVarAddr(CGF, LocalVD, Addr);
     }
 
@@ -1260,7 +1267,16 @@ class CodeGenFunction : public CodeGenTypeCache {
     ~OMPPrivateScope() {
       if (PerformCleanup)
         ForceCleanup();
-      CGF.OMPPrivatizedBindings = std::move(SavedBindings);
+      for (auto &Change : BindingChanges) {
+        if (Change.second.has_value()) {
+          auto It = CGF.OMPPrivatizedBindings.find(Change.first);
+          assert(It != CGF.OMPPrivatizedBindings.end() &&
+              "Entry should exist when restoring previous value");
+          It->second = *Change.second;
+        } else {
+          CGF.OMPPrivatizedBindings.erase(Change.first);
+        }
+      }
     }
 
     /// Checks if the global variable is captured in current function.
diff --git a/clang/lib/Sema/SemaExpr.cpp b/clang/lib/Sema/SemaExpr.cpp
index f7beed25f2304..25cdf8ec1802d 100644
--- a/clang/lib/Sema/SemaExpr.cpp
+++ b/clang/lib/Sema/SemaExpr.cpp
@@ -19408,9 +19408,16 @@ static bool isVariableCapturable(CapturingScopeInfo *CSI, ValueDecl *Var,
     return false;
   }
 
-  if (isa<BindingDecl>(Var)) {
+  if (auto *BD = dyn_cast<BindingDecl>(Var)) {
     if (auto *RSI = dyn_cast<CapturedRegionScopeInfo>(CSI)) {
       if (RSI->CapRegionKind == CR_OpenMP) {
+        if (BD->getHoldingVar()) {
+          if (Diagnose) {
+            S.Diag(Loc, diag::err_capture_tuple_binding_openmp) << Var;
+            S.Diag(Var->getLocation(), diag::note_entity_declared_at) << Var;
+          }
+          return false;
+        }
         if (Diagnose && S.getLangOpts().CPlusPlus) {
           S.Diag(Loc, S.LangOpts.CPlusPlus20
                           ? diag::warn_cxx17_compat_capture_binding
diff --git a/clang/lib/Sema/SemaOpenMP.cpp b/clang/lib/Sema/SemaOpenMP.cpp
index 073a173b3ae94..d9cc6834ac06e 100644
--- a/clang/lib/Sema/SemaOpenMP.cpp
+++ b/clang/lib/Sema/SemaOpenMP.cpp
@@ -21618,7 +21618,7 @@ static bool FinishOpenMPLinearClause(OMPLinearClause &Clause, DeclRefExpr *IV,
     // Build privatized reference to the current linear var.
     auto *DE = cast<DeclRefExpr>(SimpleRefExpr);
     Expr *CapturedRef;
-    if (dyn_cast<BindingDecl>(DE->getDecl())) {
+    if (isa<BindingDecl>(DE->getDecl())) {
       CapturedRef = SimpleRefExpr;
     } else if (LinKind == OMPC_LINEAR_uval) {
       CapturedRef = cast<VarDecl>(DE->getDecl())->getInit();

>From bab77f8ad3283f04f5fb7890bf4ad25329948f9d Mon Sep 17 00:00:00 2001
From: Ammarguellat <zahira.ammarguellat at intel.com>
Date: Wed, 17 Jun 2026 07:48:17 -0700
Subject: [PATCH 39/45] Fixed the reduction codegen gaps

---
 .../clang/Basic/DiagnosticSemaKinds.td        |   4 +
 clang/lib/CodeGen/CodeGenFunction.h           |   2 +-
 clang/lib/Sema/SemaOpenMP.cpp                 |  13 +
 .../OpenMP/structured-bindings-codegen.cpp    | 244 ++++++++++++++++++
 ...uctured-bindings-reduction-unsupported.cpp |  40 +++
 5 files changed, 302 insertions(+), 1 deletion(-)
 create mode 100644 clang/test/OpenMP/structured-bindings-reduction-unsupported.cpp

diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td
index 2712a110551be..3df6cf614200e 100644
--- a/clang/include/clang/Basic/DiagnosticSemaKinds.td
+++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td
@@ -12390,6 +12390,10 @@ def err_omp_parent_cancel_region_ordered : Error<
   "parent region for 'omp %select{cancellation point|cancel}0' construct cannot be ordered">;
 def err_omp_reduction_wrong_type : Error<"reduction type cannot be %select{qualified with 'const', 'volatile' or 'restrict'|a function|a reference|an array}0 type">;
 def err_omp_wrong_var_in_declare_reduction : Error<"only %select{'omp_priv' or 'omp_orig'|'omp_in' or 'omp_out'}0 variables are allowed in %select{initializer|combiner}0 expression">;
+def err_omp_array_reduction_on_binding : Error<
+  "array-type reductions on structured bindings are not yet supported">;
+def err_omp_udr_reduction_on_binding : Error<
+  "user-defined reductions on structured bindings are not yet supported">;
 def err_omp_declare_reduction_redefinition : Error<"redefinition of user-defined reduction for type %0">;
 def err_omp_mapper_wrong_type : Error<
   "mapper type must be of struct, union or class type">;
diff --git a/clang/lib/CodeGen/CodeGenFunction.h b/clang/lib/CodeGen/CodeGenFunction.h
index 0a116c6b7568c..8d4edd29c7bb4 100644
--- a/clang/lib/CodeGen/CodeGenFunction.h
+++ b/clang/lib/CodeGen/CodeGenFunction.h
@@ -1271,7 +1271,7 @@ class CodeGenFunction : public CodeGenTypeCache {
         if (Change.second.has_value()) {
           auto It = CGF.OMPPrivatizedBindings.find(Change.first);
           assert(It != CGF.OMPPrivatizedBindings.end() &&
-              "Entry should exist when restoring previous value");
+                 "Entry should exist when restoring previous value");
           It->second = *Change.second;
         } else {
           CGF.OMPPrivatizedBindings.erase(Change.first);
diff --git a/clang/lib/Sema/SemaOpenMP.cpp b/clang/lib/Sema/SemaOpenMP.cpp
index d9cc6834ac06e..565108fdcf394 100644
--- a/clang/lib/Sema/SemaOpenMP.cpp
+++ b/clang/lib/Sema/SemaOpenMP.cpp
@@ -20689,6 +20689,19 @@ static bool actOnOMPReductionKindClause(
       Type = Context.getBaseElementType(D->getType().getNonReferenceType());
     }
     auto *VD = dyn_cast<VarDecl>(D);
+    auto *BD = dyn_cast<BindingDecl>(D);
+
+    // Check for unsupported reduction forms on structured bindings.
+    if (BD && (D->getType().getNonReferenceType()->isArrayType() ||
+               BOK == BO_Comma)) {
+      // Array-type reductions are not supported.
+      if (D->getType().getNonReferenceType()->isArrayType())
+        S.Diag(ELoc, diag::err_omp_array_reduction_on_binding);
+      else
+        // User-defined reductions (declare reduction) are not supported.
+        S.Diag(ELoc, diag::err_omp_udr_reduction_on_binding);
+      continue;
+    }
 
     // OpenMP [2.9.3.3, Restrictions, C/C++, p.3]
     //  A variable that appears in a private clause must not have an incomplete
diff --git a/clang/test/OpenMP/structured-bindings-codegen.cpp b/clang/test/OpenMP/structured-bindings-codegen.cpp
index 857f77a629247..6fc24fa4e21b9 100644
--- a/clang/test/OpenMP/structured-bindings-codegen.cpp
+++ b/clang/test/OpenMP/structured-bindings-codegen.cpp
@@ -461,6 +461,30 @@ void test_reduction_binding_operators() {
   use(b);
 }
 
+void test_reduction_binding_nontrivial() {
+  struct NonTrivial {
+    int value;
+    NonTrivial() : value(0) {}
+    NonTrivial(int v) : value(v) {}
+    ~NonTrivial() {}
+    NonTrivial& operator+=(int x) { value += x; return *this; }
+    NonTrivial& operator+=(const NonTrivial& other) { value += other.value; return *this; }
+  };
+  struct PairNonTrivial {
+    NonTrivial a;
+    NonTrivial b;
+  };
+
+  PairNonTrivial p{NonTrivial(0), NonTrivial(0)};
+  auto [a, b] = p;
+
+#pragma omp parallel for reduction(+:a)
+  for (int i = 0; i < 10; ++i) {
+    a += i;
+  }
+  use(a.value);
+}
+
 void test_lastprivate_binding() {
   Point p{1, 2};
   auto [a, b] = p;
@@ -2816,6 +2840,226 @@ void test_lambda_implicit_capture() {
 // CHECK:    ret void
 //
 //
+// CHECK-LABEL: define dso_local void @_Z33test_reduction_binding_nontrivialv(
+// CHECK-SAME: ) #[[ATTR0]] {
+// CHECK:  [[ENTRY:.*:]]
+// CHECK:    [[A:%.*]] = getelementptr inbounds nuw [[STRUCT_PAIRNONTRIVIAL:%.*]], ptr [[P:%.*]], i32 0, i32 0
+// CHECK:    call void @_ZZ33test_reduction_binding_nontrivialvEN10NonTrivialC1Ei(ptr noundef nonnull align 4 dereferenceable(4) [[A]], i32 noundef 0)
+// CHECK:    [[B:%.*]] = getelementptr inbounds nuw [[STRUCT_PAIRNONTRIVIAL]], ptr [[P]], i32 0, i32 1
+// CHECK:    call void @_ZZ33test_reduction_binding_nontrivialvEN10NonTrivialC1Ei(ptr noundef nonnull align 4 dereferenceable(4) [[B]], i32 noundef 0)
+// CHECK:    call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP0:%.*]], ptr align 4 [[P]], i64 8, i1 false)
+// CHECK:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @_Z33test_reduction_binding_nontrivialv.omp_outlined, ptr [[TMP0]])
+// CHECK:    [[A1:%.*]] = getelementptr inbounds nuw [[STRUCT_PAIRNONTRIVIAL]], ptr [[TMP0]], i32 0, i32 0
+// CHECK:    [[VALUE:%.*]] = getelementptr inbounds nuw [[STRUCT_NONTRIVIAL:%.*]], ptr [[A1]], i32 0, i32 0
+// CHECK:    [[TMP1:%.*]] = load i32, ptr [[VALUE]], align 4
+// CHECK:    call void @_Z3usei(i32 noundef [[TMP1]])
+// CHECK:    call void @_ZZ33test_reduction_binding_nontrivialvEN14PairNonTrivialD1Ev(ptr noundef nonnull align 4 dead_on_return(8) dereferenceable(8) [[TMP0]]) #[[ATTR3]]
+// CHECK:    call void @_ZZ33test_reduction_binding_nontrivialvEN14PairNonTrivialD1Ev(ptr noundef nonnull align 4 dead_on_return(8) dereferenceable(8) [[P]]) #[[ATTR3]]
+// CHECK:    ret void
+//
+//
+// CHECK-LABEL: define internal void @_ZZ33test_reduction_binding_nontrivialvEN10NonTrivialC1Ei(
+// CHECK-SAME: ptr noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]], i32 noundef [[V:%.*]]) unnamed_addr #[[ATTR0]] align 2 {
+// CHECK:  [[ENTRY:.*:]]
+// CHECK:    store ptr [[THIS]], ptr [[THIS_ADDR:%.*]], align 8
+// CHECK:    store i32 [[V]], ptr [[V_ADDR:%.*]], align 4
+// CHECK:    [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8
+// CHECK:    [[TMP0:%.*]] = load i32, ptr [[V_ADDR]], align 4
+// CHECK:    call void @_ZZ33test_reduction_binding_nontrivialvEN10NonTrivialC2Ei(ptr noundef nonnull align 4 dereferenceable(4) [[THIS1]], i32 noundef [[TMP0]])
+// CHECK:    ret void
+//
+//
+// CHECK-LABEL: define internal void @_Z33test_reduction_binding_nontrivialv.omp_outlined(
+// CHECK-SAME: ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP0:%.*]]) #[[ATTR2]] {
+// CHECK:  [[ENTRY:.*:]]
+// CHECK:    store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR:%.*]], align 8
+// CHECK:    store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR:%.*]], align 8
+// CHECK:    store ptr [[TMP0]], ptr [[DOTADDR:%.*]], align 8
+// CHECK:    [[TMP1:%.*]] = load ptr, ptr [[DOTADDR]], align 8, !nonnull [[META4]], !align [[META5]]
+// CHECK:    store i32 0, ptr [[DOTOMP_LB:%.*]], align 4
+// CHECK:    store i32 9, ptr [[DOTOMP_UB:%.*]], align 4
+// CHECK:    store i32 1, ptr [[DOTOMP_STRIDE:%.*]], align 4
+// CHECK:    store i32 0, ptr [[DOTOMP_IS_LAST:%.*]], align 4
+// CHECK:    [[A:%.*]] = getelementptr inbounds nuw [[STRUCT_PAIRNONTRIVIAL:%.*]], ptr [[TMP1]], i32 0, i32 0
+// CHECK:    call void @_ZZ33test_reduction_binding_nontrivialvEN10NonTrivialC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[A1:%.*]])
+// CHECK:    [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK:    [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4
+// CHECK:    call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
+// CHECK:    [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
+// CHECK:    [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 9
+// CHECK:    br i1 [[CMP]], label %[[COND_TRUE:.*]], label %[[COND_FALSE:.*]]
+// CHECK:       [[COND_TRUE]]:
+// CHECK:    br label %[[COND_END:.*]]
+// CHECK:       [[COND_FALSE]]:
+// CHECK:    [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
+// CHECK:    br label %[[COND_END]]
+// CHECK:       [[COND_END]]:
+// CHECK:    [[COND:%.*]] = phi i32 [ 9, %[[COND_TRUE]] ], [ [[TMP5]], %[[COND_FALSE]] ]
+// CHECK:    store i32 [[COND]], ptr [[DOTOMP_UB]], align 4
+// CHECK:    [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
+// CHECK:    store i32 [[TMP6]], ptr [[DOTOMP_IV:%.*]], align 4
+// CHECK:    br label %[[OMP_INNER_FOR_COND:.*]]
+// CHECK:       [[OMP_INNER_FOR_COND]]:
+// CHECK:    [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK:    [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
+// CHECK:    [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]]
+// CHECK:    br i1 [[CMP2]], label %[[OMP_INNER_FOR_BODY:.*]], label %[[OMP_INNER_FOR_COND_CLEANUP:.*]]
+// CHECK:       [[OMP_INNER_FOR_COND_CLEANUP]]:
+// CHECK:    br label %[[OMP_INNER_FOR_END:.*]]
+// CHECK:       [[OMP_INNER_FOR_BODY]]:
+// CHECK:    [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK:    [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1
+// CHECK:    [[ADD:%.*]] = add nsw i32 0, [[MUL]]
+// CHECK:    store i32 [[ADD]], ptr [[I:%.*]], align 4
+// CHECK:    [[TMP10:%.*]] = load i32, ptr [[I]], align 4
+// CHECK:    [[CALL:%.*]] = call noundef nonnull align 4 dereferenceable(4) ptr @_ZZ33test_reduction_binding_nontrivialvEN10NonTrivialpLEi(ptr noundef nonnull align 4 dereferenceable(4) [[A1]], i32 noundef [[TMP10]])
+// CHECK:    br label %[[OMP_BODY_CONTINUE:.*]]
+// CHECK:       [[OMP_BODY_CONTINUE]]:
+// CHECK:    br label %[[OMP_INNER_FOR_INC:.*]]
+// CHECK:       [[OMP_INNER_FOR_INC]]:
+// CHECK:    [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK:    [[ADD3:%.*]] = add nsw i32 [[TMP11]], 1
+// CHECK:    store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4
+// CHECK:    br label %[[OMP_INNER_FOR_COND]]
+// CHECK:       [[OMP_INNER_FOR_END]]:
+// CHECK:    br label %[[OMP_LOOP_EXIT:.*]]
+// CHECK:       [[OMP_LOOP_EXIT]]:
+// CHECK:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP3]])
+// CHECK:    [[TMP12:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST:%.*]], i64 0, i64 0
+// CHECK:    store ptr [[A1]], ptr [[TMP12]], align 8
+// CHECK:    [[TMP13:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP3]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @_Z33test_reduction_binding_nontrivialv.omp_outlined.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var)
+// CHECK:    switch i32 [[TMP13]], [[DOTOMP_REDUCTION_DEFAULT:label %.*]] [
+// CHECK:      i32 1, [[DOTOMP_REDUCTION_CASE1:label %.*]]
+// CHECK:      i32 2, [[DOTOMP_REDUCTION_CASE2:label %.*]]
+// CHECK:    ]
+// CHECK:       [[_OMP_REDUCTION_CASE1:.*:]]
+// CHECK:    [[CALL4:%.*]] = call noundef nonnull align 4 dereferenceable(4) ptr @_ZZ33test_reduction_binding_nontrivialvEN10NonTrivialpLERKS_(ptr noundef nonnull align 4 dereferenceable(4) [[A]], ptr noundef nonnull align 4 dereferenceable(4) [[A1]])
+// CHECK:    call void @__kmpc_end_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP3]], ptr @.gomp_critical_user_.reduction.var)
+// CHECK:    br [[DOTOMP_REDUCTION_DEFAULT]]
+// CHECK:       [[_OMP_REDUCTION_CASE2:.*:]]
+// CHECK:    call void @__kmpc_critical(ptr @[[GLOB1]], i32 [[TMP3]], ptr @.gomp_critical_user_.atomic_reduction.var)
+// CHECK:    [[CALL5:%.*]] = call noundef nonnull align 4 dereferenceable(4) ptr @_ZZ33test_reduction_binding_nontrivialvEN10NonTrivialpLERKS_(ptr noundef nonnull align 4 dereferenceable(4) [[A]], ptr noundef nonnull align 4 dereferenceable(4) [[A1]])
+// CHECK:    call void @__kmpc_end_critical(ptr @[[GLOB1]], i32 [[TMP3]], ptr @.gomp_critical_user_.atomic_reduction.var)
+// CHECK:    br [[DOTOMP_REDUCTION_DEFAULT]]
+// CHECK:       [[_OMP_REDUCTION_DEFAULT:.*:]]
+// CHECK:    call void @_ZZ33test_reduction_binding_nontrivialvEN10NonTrivialD1Ev(ptr noundef nonnull align 4 dead_on_return(4) dereferenceable(4) [[A1]]) #[[ATTR3]]
+// CHECK:    ret void
+//
+//
+// CHECK-LABEL: define internal void @_ZZ33test_reduction_binding_nontrivialvEN10NonTrivialC1Ev(
+// CHECK-SAME: ptr noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR0]] align 2 {
+// CHECK:  [[ENTRY:.*:]]
+// CHECK:    store ptr [[THIS]], ptr [[THIS_ADDR:%.*]], align 8
+// CHECK:    [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8
+// CHECK:    call void @_ZZ33test_reduction_binding_nontrivialvEN10NonTrivialC2Ev(ptr noundef nonnull align 4 dereferenceable(4) [[THIS1]])
+// CHECK:    ret void
+//
+//
+// CHECK-LABEL: define internal noundef nonnull align 4 dereferenceable(4) ptr @_ZZ33test_reduction_binding_nontrivialvEN10NonTrivialpLEi(
+// CHECK-SAME: ptr noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]], i32 noundef [[X:%.*]]) #[[ATTR0]] align 2 {
+// CHECK:  [[ENTRY:.*:]]
+// CHECK:    store ptr [[THIS]], ptr [[THIS_ADDR:%.*]], align 8
+// CHECK:    store i32 [[X]], ptr [[X_ADDR:%.*]], align 4
+// CHECK:    [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8
+// CHECK:    [[TMP0:%.*]] = load i32, ptr [[X_ADDR]], align 4
+// CHECK:    [[VALUE:%.*]] = getelementptr inbounds nuw [[STRUCT_NONTRIVIAL:%.*]], ptr [[THIS1]], i32 0, i32 0
+// CHECK:    [[TMP1:%.*]] = load i32, ptr [[VALUE]], align 4
+// CHECK:    [[ADD:%.*]] = add nsw i32 [[TMP1]], [[TMP0]]
+// CHECK:    store i32 [[ADD]], ptr [[VALUE]], align 4
+// CHECK:    ret ptr [[THIS1]]
+//
+//
+// CHECK-LABEL: define internal void @_Z33test_reduction_binding_nontrivialv.omp_outlined.omp.reduction.reduction_func(
+// CHECK-SAME: ptr noundef [[TMP0:%.*]], ptr noundef [[TMP1:%.*]]) #[[ATTR4]] {
+// CHECK:  [[ENTRY:.*:]]
+// CHECK:    store ptr [[TMP0]], ptr [[DOTADDR:%.*]], align 8
+// CHECK:    store ptr [[TMP1]], ptr [[DOTADDR1:%.*]], align 8
+// CHECK:    [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8
+// CHECK:    [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8
+// CHECK:    [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP3]], i64 0, i64 0
+// CHECK:    [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8
+// CHECK:    [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i64 0, i64 0
+// CHECK:    [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8
+// CHECK:    [[CALL:%.*]] = call noundef nonnull align 4 dereferenceable(4) ptr @_ZZ33test_reduction_binding_nontrivialvEN10NonTrivialpLERKS_(ptr noundef nonnull align 4 dereferenceable(4) [[TMP7]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP5]])
+// CHECK:    ret void
+//
+//
+// CHECK-LABEL: define internal noundef nonnull align 4 dereferenceable(4) ptr @_ZZ33test_reduction_binding_nontrivialvEN10NonTrivialpLERKS_(
+// CHECK-SAME: ptr noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[OTHER:%.*]]) #[[ATTR0]] align 2 {
+// CHECK:  [[ENTRY:.*:]]
+// CHECK:    store ptr [[THIS]], ptr [[THIS_ADDR:%.*]], align 8
+// CHECK:    store ptr [[OTHER]], ptr [[OTHER_ADDR:%.*]], align 8
+// CHECK:    [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8
+// CHECK:    [[TMP0:%.*]] = load ptr, ptr [[OTHER_ADDR]], align 8, !nonnull [[META4]], !align [[META5]]
+// CHECK:    [[VALUE:%.*]] = getelementptr inbounds nuw [[STRUCT_NONTRIVIAL:%.*]], ptr [[TMP0]], i32 0, i32 0
+// CHECK:    [[TMP1:%.*]] = load i32, ptr [[VALUE]], align 4
+// CHECK:    [[VALUE2:%.*]] = getelementptr inbounds nuw [[STRUCT_NONTRIVIAL]], ptr [[THIS1]], i32 0, i32 0
+// CHECK:    [[TMP2:%.*]] = load i32, ptr [[VALUE2]], align 4
+// CHECK:    [[ADD:%.*]] = add nsw i32 [[TMP2]], [[TMP1]]
+// CHECK:    store i32 [[ADD]], ptr [[VALUE2]], align 4
+// CHECK:    ret ptr [[THIS1]]
+//
+//
+// CHECK-LABEL: define internal void @_ZZ33test_reduction_binding_nontrivialvEN10NonTrivialD1Ev(
+// CHECK-SAME: ptr noundef nonnull align 4 dead_on_return(4) dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR0]] align 2 {
+// CHECK:  [[ENTRY:.*:]]
+// CHECK:    store ptr [[THIS]], ptr [[THIS_ADDR:%.*]], align 8
+// CHECK:    [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8
+// CHECK:    call void @_ZZ33test_reduction_binding_nontrivialvEN10NonTrivialD2Ev(ptr noundef nonnull align 4 dead_on_return(4) dereferenceable(4) [[THIS1]]) #[[ATTR3]]
+// CHECK:    ret void
+//
+//
+// CHECK-LABEL: define internal void @_ZZ33test_reduction_binding_nontrivialvEN14PairNonTrivialD1Ev(
+// CHECK-SAME: ptr noundef nonnull align 4 dead_on_return(8) dereferenceable(8) [[THIS:%.*]]) unnamed_addr #[[ATTR0]] align 2 {
+// CHECK:  [[ENTRY:.*:]]
+// CHECK:    store ptr [[THIS]], ptr [[THIS_ADDR:%.*]], align 8
+// CHECK:    [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8
+// CHECK:    call void @_ZZ33test_reduction_binding_nontrivialvEN14PairNonTrivialD2Ev(ptr noundef nonnull align 4 dead_on_return(8) dereferenceable(8) [[THIS1]]) #[[ATTR3]]
+// CHECK:    ret void
+//
+//
+// CHECK-LABEL: define internal void @_ZZ33test_reduction_binding_nontrivialvEN10NonTrivialC2Ei(
+// CHECK-SAME: ptr noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]], i32 noundef [[V:%.*]]) unnamed_addr #[[ATTR0]] align 2 {
+// CHECK:  [[ENTRY:.*:]]
+// CHECK:    store ptr [[THIS]], ptr [[THIS_ADDR:%.*]], align 8
+// CHECK:    store i32 [[V]], ptr [[V_ADDR:%.*]], align 4
+// CHECK:    [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8
+// CHECK:    [[VALUE:%.*]] = getelementptr inbounds nuw [[STRUCT_NONTRIVIAL:%.*]], ptr [[THIS1]], i32 0, i32 0
+// CHECK:    [[TMP0:%.*]] = load i32, ptr [[V_ADDR]], align 4
+// CHECK:    store i32 [[TMP0]], ptr [[VALUE]], align 4
+// CHECK:    ret void
+//
+//
+// CHECK-LABEL: define internal void @_ZZ33test_reduction_binding_nontrivialvEN10NonTrivialC2Ev(
+// CHECK-SAME: ptr noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR0]] align 2 {
+// CHECK:  [[ENTRY:.*:]]
+// CHECK:    store ptr [[THIS]], ptr [[THIS_ADDR:%.*]], align 8
+// CHECK:    [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8
+// CHECK:    [[VALUE:%.*]] = getelementptr inbounds nuw [[STRUCT_NONTRIVIAL:%.*]], ptr [[THIS1]], i32 0, i32 0
+// CHECK:    store i32 0, ptr [[VALUE]], align 4
+// CHECK:    ret void
+//
+//
+// CHECK-LABEL: define internal void @_ZZ33test_reduction_binding_nontrivialvEN10NonTrivialD2Ev(
+// CHECK-SAME: ptr noundef nonnull align 4 dead_on_return(4) dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR0]] align 2 {
+// CHECK:  [[ENTRY:.*:]]
+// CHECK:    store ptr [[THIS]], ptr [[THIS_ADDR:%.*]], align 8
+// CHECK:    [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8
+// CHECK:    ret void
+//
+//
+// CHECK-LABEL: define internal void @_ZZ33test_reduction_binding_nontrivialvEN14PairNonTrivialD2Ev(
+// CHECK-SAME: ptr noundef nonnull align 4 dead_on_return(8) dereferenceable(8) [[THIS:%.*]]) unnamed_addr #[[ATTR0]] align 2 {
+// CHECK:  [[ENTRY:.*:]]
+// CHECK:    store ptr [[THIS]], ptr [[THIS_ADDR:%.*]], align 8
+// CHECK:    [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8
+// CHECK:    [[B:%.*]] = getelementptr inbounds nuw [[STRUCT_PAIRNONTRIVIAL:%.*]], ptr [[THIS1]], i32 0, i32 1
+// CHECK:    call void @_ZZ33test_reduction_binding_nontrivialvEN10NonTrivialD1Ev(ptr noundef nonnull align 4 dead_on_return(4) dereferenceable(4) [[B]]) #[[ATTR3]]
+// CHECK:    [[A:%.*]] = getelementptr inbounds nuw [[STRUCT_PAIRNONTRIVIAL]], ptr [[THIS1]], i32 0, i32 0
+// CHECK:    call void @_ZZ33test_reduction_binding_nontrivialvEN10NonTrivialD1Ev(ptr noundef nonnull align 4 dead_on_return(4) dereferenceable(4) [[A]]) #[[ATTR3]]
+// CHECK:    ret void
+//
+//
 // CHECK-LABEL: define dso_local void @_Z24test_lastprivate_bindingv(
 // CHECK-SAME: ) #[[ATTR0]] {
 // CHECK:  [[ENTRY:.*:]]
diff --git a/clang/test/OpenMP/structured-bindings-reduction-unsupported.cpp b/clang/test/OpenMP/structured-bindings-reduction-unsupported.cpp
new file mode 100644
index 0000000000000..614526a0bece0
--- /dev/null
+++ b/clang/test/OpenMP/structured-bindings-reduction-unsupported.cpp
@@ -0,0 +1,40 @@
+// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=51 -std=c++20 \
+// RUN: -triple x86_64-unknown-unknown -emit-llvm %s -o -
+
+struct Point { int x, y; };
+struct ArrayStruct { int arr[2]; };
+
+#pragma omp declare reduction(mysum: int: omp_out += omp_in) initializer(omp_pr\
+iv = 0)
+
+void test_array_reduction() {
+  ArrayStruct s{{1, 2}};
+  auto [arr] = s;
+
+#pragma omp parallel for reduction(+:arr) // expected-error {{array-type reduct\
+ions on structured bindings are not yet supported}}                             
+  for (int i = 0; i < 10; ++i) {
+    arr[0] += i;
+  }
+}
+
+void test_udr_reduction() {
+  Point p{0, 0};
+  auto [a, b] = p;
+
+#pragma omp parallel for reduction(mysum:a) // expected-error {{user-defined re\
+ductions on structured bindings are not yet supported}}                         
+  for (int i = 0; i < 10; ++i) {
+    a += i;
+  }
+}
+
+void test_simple_scalar_reduction() {
+  Point p{0, 0};
+  auto [a, b] = p;
+
+#pragma omp parallel for reduction(+:a)
+  for (int i = 0; i < 10; ++i) {
+    a += i;
+  }
+}

>From eb96d0a3b74cbbb771d4015a7d5831a67a15a08e Mon Sep 17 00:00:00 2001
From: Ammarguellat <zahira.ammarguellat at intel.com>
Date: Wed, 17 Jun 2026 12:04:38 -0700
Subject: [PATCH 40/45] Addressed review comments

---
 clang/lib/CodeGen/CGExpr.cpp                  |  10 +-
 clang/lib/CodeGen/CGStmtOpenMP.cpp            |   5 +-
 clang/lib/Sema/SemaOpenMP.cpp                 |  24 +-
 ...ctured-bindings-template-instantiation.cpp | 243 ++++++++++++++++++
 4 files changed, 261 insertions(+), 21 deletions(-)

diff --git a/clang/lib/CodeGen/CGExpr.cpp b/clang/lib/CodeGen/CGExpr.cpp
index 3a723bdd1dc33..143364615a2d7 100644
--- a/clang/lib/CodeGen/CGExpr.cpp
+++ b/clang/lib/CodeGen/CGExpr.cpp
@@ -3616,12 +3616,10 @@ LValue CodeGenFunction::EmitOMPCapturedBindingLValue(const BindingDecl *BD) {
   // reference type. DeclRefExpr with VK_LValue requires a non-reference type
   // (AST invariant). EmitDeclRefLValue will load any reference for us.
   QualType DREType = DD->getType().getNonReferenceType();
-  DeclarationNameInfo NameInfo(DD->getDeclName(), SourceLocation());
-  DeclRefExpr *DRE = DeclRefExpr::Create(
-      getContext(), NestedNameSpecifierLoc(), SourceLocation(), DD,
-      /*RefersToEnclosingVariableOrCapture=*/true, NameInfo, DREType,
-      VK_LValue);
-  LValue BaseLVal = EmitDeclRefLValue(DRE);
+  DeclRefExpr DRE(getContext(), DD,
+                  /*RefersToEnclosingVariableOrCapture=*/true, DREType,
+                  VK_LValue, SourceLocation());
+  LValue BaseLVal = EmitDeclRefLValue(&DRE);
   QualType CanonType = DREType.getCanonicalType();
   Address Addr = BaseLVal.getAddress();
   llvm::Type *ExpectedTy = CGM.getTypes().ConvertTypeForMem(CanonType);
diff --git a/clang/lib/CodeGen/CGStmtOpenMP.cpp b/clang/lib/CodeGen/CGStmtOpenMP.cpp
index aa74c532517d3..e5da208ee89ad 100644
--- a/clang/lib/CodeGen/CGStmtOpenMP.cpp
+++ b/clang/lib/CodeGen/CGStmtOpenMP.cpp
@@ -1985,9 +1985,10 @@ checkForLastprivateConditionalUpdate(CodeGenFunction &CGF,
       const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
       if (!DRE)
         continue;
-      if (const auto *VD = dyn_cast<VarDecl>(DRE->getDecl()))
+      if (const auto *VD = dyn_cast<VarDecl>(DRE->getDecl())) {
         PrivateDecls.insert(VD);
-      CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, Ref);
+        CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, Ref);
+      }
     }
   }
   // Privates should ne analyzed since they are not captured at all.
diff --git a/clang/lib/Sema/SemaOpenMP.cpp b/clang/lib/Sema/SemaOpenMP.cpp
index 565108fdcf394..7836ac8b1737d 100644
--- a/clang/lib/Sema/SemaOpenMP.cpp
+++ b/clang/lib/Sema/SemaOpenMP.cpp
@@ -19797,19 +19797,17 @@ OMPClause *SemaOpenMP::ActOnOpenMPFirstprivateClause(ArrayRef<Expr *> VarList,
     if (!VD && !SemaRef.CurContext->isDependentContext()) {
       if (TopDVar.CKind == OMPC_lastprivate) {
         Ref = TopDVar.PrivateCopy;
-      } else {
-        if (!IsBindingDecl) {
-          auto *FD = dyn_cast<FieldDecl>(D);
-          VarDecl *VD = FD ? DSAStack->getImplicitFDCapExprDecl(FD) : nullptr;
-          if (VD)
-            Ref = buildDeclRefExpr(SemaRef, VD,
-                                   VD->getType().getNonReferenceType(),
-                                   RefExpr->getExprLoc());
-          else
-            Ref = buildCapture(SemaRef, D, SimpleRefExpr, /*WithInit=*/true);
-          if (VD || !isOpenMPCapturedDecl(D))
-            ExprCaptures.push_back(Ref->getDecl());
-        }
+      } else if (!IsBindingDecl) {
+        auto *FD = dyn_cast<FieldDecl>(D);
+        VarDecl *VD = FD ? DSAStack->getImplicitFDCapExprDecl(FD) : nullptr;
+        if (VD)
+          Ref =
+              buildDeclRefExpr(SemaRef, VD, VD->getType().getNonReferenceType(),
+                               RefExpr->getExprLoc());
+        else
+          Ref = buildCapture(SemaRef, D, SimpleRefExpr, /*WithInit=*/true);
+        if (VD || !isOpenMPCapturedDecl(D))
+          ExprCaptures.push_back(Ref->getDecl());
       }
     }
     if (!IsImplicitClause)
diff --git a/clang/test/OpenMP/structured-bindings-template-instantiation.cpp b/clang/test/OpenMP/structured-bindings-template-instantiation.cpp
index 3c8feef3efaa3..544cc35c59ebd 100644
--- a/clang/test/OpenMP/structured-bindings-template-instantiation.cpp
+++ b/clang/test/OpenMP/structured-bindings-template-instantiation.cpp
@@ -254,6 +254,225 @@ int test_template_array(T (&arr)[N]) {
   return result;
 }
 
+template<typename T>
+struct Pair {
+  T first;
+  T second;
+};
+
+// CHECK-LABEL: define {{.*}}@_Z28test_dependent_decompositionI{{.*}}
+// AST: template <typename T> void test_dependent_decomposition(Pair<T> p) {
+// AST:    auto = p;
+// AST:    #pragma omp parallel shared(a,b)
+// AST:       {
+// AST:            a = a + T(1);
+// AST:            b = b + T(2);
+// AST:        }
+// AST: }
+// AST: template<> void test_dependent_decomposition<int>(Pair<int> p) {
+// AST:     auto = p;
+// AST:     #pragma omp parallel shared(a,b)
+// AST:         {
+// AST:             a = a + int(1);
+// AST:             b = b + int(2);
+// AST:         }
+// AST: }
+// AST: template<> void test_dependent_decomposition<double>(Pair<double> p) {
+// AST:         auto = p;
+// AST:     #pragma omp parallel shared(a,b)
+// AST:         {
+// AST:             a = a + double(1);
+// AST:             b = b + double(2);
+// AST:         }
+// AST: }
+
+template<typename T>
+void test_dependent_decomposition(Pair<T> p) {
+  auto [a, b] = p;
+
+#pragma omp parallel shared(a, b)
+  {
+    a = a + T(1);
+    b = b + T(2);
+  }
+}
+
+// CHECK-LABEL: define {{.*}}@_Z24test_dependent_reductionI{{.*}}
+// AST: template <typename T> T test_dependent_reduction(Pair<T> p) {
+// AST:     auto = p;
+// AST:     T sum = T(0);
+// AST:     #pragma omp parallel for reduction(+: sum)
+// AST:         for (int i = 0; i < 10; ++i) {
+// AST:             sum = sum + a + b;
+// AST:         }
+// AST:     return sum;
+// AST: }
+// AST: template<> int test_dependent_reduction<int>(Pair<int> p) {
+// AST:     auto = p;
+// AST:     int sum = int(0);
+// AST:     #pragma omp parallel for reduction(+: sum)
+// AST:         for (int i = 0; i < 10; ++i) {
+// AST:             sum = sum + a + b;
+// AST:         }
+// AST:     return sum;
+// AST: }
+// AST: template<> double test_dependent_reduction<double>(Pair<double> p) {
+// AST:     auto = p;
+// AST:     double sum = double(0);
+// AST:     #pragma omp parallel for reduction(+: sum)
+// AST:         for (int i = 0; i < 10; ++i) {
+// AST:             sum = sum + a + b;
+// AST:         }
+// AST:     return sum;
+// AST: }
+template<typename T>
+T test_dependent_reduction(Pair<T> p) {
+  auto [a, b] = p;
+  T sum = T(0);
+
+#pragma omp parallel for reduction(+:sum)
+  for (int i = 0; i < 10; ++i) {
+    sum = sum + a + b;
+  }
+  return sum;
+}
+
+// CHECK-LABEL: define {{.*}}@_Z27test_dependent_firstprivateI{{.*}}
+// AST: template <typename T> void test_dependent_firstprivate(Pair<T> p) {
+// AST:     auto = p;
+// AST:     #pragma omp parallel firstprivate(a,b)
+// AST:         {
+// AST:             T local = a + b;
+// AST:         }
+// AST: }
+// AST: template<> void test_dependent_firstprivate<int>(Pair<int> p) {
+// AST:     auto = p;
+// AST:     #pragma omp parallel firstprivate(a,b)
+// AST:         {
+// AST:             int local = a + b;
+// AST:         }
+// AST: }
+// AST: template<> void test_dependent_firstprivate<double>(Pair<double> p) {
+// AST:     auto = p;
+// AST:     #pragma omp parallel firstprivate(a,b)
+// AST:         {
+// AST:             double local = a + b;
+// AST:         }
+// AST: }
+template<typename T>
+void test_dependent_firstprivate(Pair<T> p) {
+  auto [a, b] = p;
+
+#pragma omp parallel firstprivate(a, b)
+  {
+    T local = a + b;
+  }
+}
+
+// CHECK-LABEL: define {{.*}}@_Z19test_dependent_taskI{{.*}}
+// AST: template <typename T> void test_dependent_task(Pair<T> p) {
+// AST:     auto = p;
+// AST:     #pragma omp task shared(a)
+// AST:         {
+// AST:             a = a + T(10);
+// AST:         }
+// AST: }
+// AST: template<> void test_dependent_task<int>(Pair<int> p) {
+// AST:     auto = p;
+// AST:     #pragma omp task shared(a)
+// AST:         {
+// AST:             a = a + int(10);
+// AST:         }
+// AST: }
+// AST: template<> void test_dependent_task<double>(Pair<double> p) {
+// AST:     auto = p;
+// AST:     #pragma omp task shared(a)
+// AST:         {
+// AST:             a = a + double(10);
+// AST:         }
+// AST: }
+template<typename T>
+void test_dependent_task(Pair<T> p) {
+  auto [a, b] = p;
+
+#pragma omp task shared(a)
+  {
+    a = a + T(10);
+  }
+}
+
+template<typename T>
+struct Triple {
+  T x, y, z;
+};
+
+// CHECK-LABEL: define {{.*}}@_Z30test_dependent_partial_captureIiEv6TripleIT_E
+// AST: template <typename T> void test_dependent_partial_capture(Triple<T> p) {
+// AST:     auto = p;
+// AST:     #pragma omp parallel firstprivate(a)
+// AST:         {
+// AST:             T result = a + b + c;
+// AST:         }
+// AST: }
+// AST: template<> void test_dependent_partial_capture<int>(Triple<int> p) {
+// AST:     auto = p;
+// AST:     #pragma omp parallel firstprivate(a)
+// AST:         {
+// AST:             int result = a + b + c;
+// AST:         }
+// AST: }
+// AST: template<> void test_dependent_partial_capture<double>(Triple<double> p) {
+// AST:     auto = p;
+// AST:     #pragma omp parallel firstprivate(a)
+// AST:         {
+// AST:             double result = a + b + c;
+// AST:         }
+// AST: }
+template<typename T>
+void test_dependent_partial_capture(Triple<T> p) {
+  auto [a, b, c] = p;
+#pragma omp parallel firstprivate(a)
+  {
+    T result = a + b + c;
+  }
+}
+
+// CHECK-LABEL: define {{.*}}@_Z32test_dependent_multiple_capturesI{{.*}}
+// AST: template <typename T> void test_dependent_multiple_captures(Pair<T> p1, Pair<T> p2) {
+// AST:     auto = p1;
+// AST:     auto = p2;
+// AST:     #pragma omp parallel shared(a,b,c,d)
+// AST:         {
+// AST:             T result = a + b + c + d;
+// AST:         }
+// AST: }
+// AST: template<> void test_dependent_multiple_captures<int>(Pair<int> p1, Pair<int> p2) {
+// AST:     auto = p1;
+// AST:     auto = p2;
+// AST:     #pragma omp parallel shared(a,b,c,d)
+// AST:         {
+// AST:             int result = a + b + c + d;
+// AST:         }
+// AST: }
+// AST: template<> void test_dependent_multiple_captures<double>(Pair<double> p1, Pair<double> p2) {
+// AST:     auto = p1;
+// AST:     auto = p2;
+// AST:     #pragma omp parallel shared(a,b,c,d)
+// AST:         {
+// AST:             double result = a + b + c + d;
+// AST:         }
+// AST: }
+template<typename T>
+void test_dependent_multiple_captures(Pair<T> p1, Pair<T> p2) {
+  auto [a, b] = p1;
+  auto [c, d] = p2;
+
+#pragma omp parallel shared(a, b, c, d)
+  {
+    T result = a + b + c + d;
+  }
+}
+
 void instantiate_tests() {
   Point p1{1, 2};
   Point3D p2{1, 2, 3};
@@ -265,4 +484,28 @@ void instantiate_tests() {
   test_template_nested(p1);
   test_template_multiple_regions(p1);
   test_template_array(arr);
+
+  Pair<int> pi{1, 2};
+  test_dependent_decomposition(pi);
+  test_dependent_reduction(pi);
+  test_dependent_firstprivate(pi);
+  test_dependent_task(pi);
+
+  Pair<int> pi2{3, 4};
+  test_dependent_multiple_captures(pi, pi2);
+
+  Triple<int> ti{1, 2, 3};
+  test_dependent_partial_capture(ti);
+
+  Pair<double> pd{1.5, 2.5};
+  test_dependent_decomposition(pd);
+  test_dependent_reduction(pd);
+  test_dependent_firstprivate(pd);
+  test_dependent_task(pd);
+
+  Pair<double> pd2{3.5, 4.5};
+  test_dependent_multiple_captures(pd, pd2);
+
+  Triple<double> td{1.5, 2.5, 3.5};
+  test_dependent_partial_capture(td);
 }

>From e7aa6662e34220ae4cbd0d1646a8caa52d555266 Mon Sep 17 00:00:00 2001
From: Ammarguellat <zahira.ammarguellat at intel.com>
Date: Thu, 18 Jun 2026 11:53:23 -0700
Subject: [PATCH 41/45] Applied all the code suggestions

---
 clang/lib/CodeGen/CGExpr.cpp        |  3 +--
 clang/lib/CodeGen/CGStmtOpenMP.cpp  | 13 ++++++-------
 clang/lib/CodeGen/CodeGenFunction.h | 14 +++++---------
 clang/lib/Sema/SemaLambda.cpp       |  4 ++--
 clang/lib/Sema/SemaOpenMP.cpp       | 18 +++++++++---------
 5 files changed, 23 insertions(+), 29 deletions(-)

diff --git a/clang/lib/CodeGen/CGExpr.cpp b/clang/lib/CodeGen/CGExpr.cpp
index 143364615a2d7..4010663685118 100644
--- a/clang/lib/CodeGen/CGExpr.cpp
+++ b/clang/lib/CodeGen/CGExpr.cpp
@@ -3835,9 +3835,8 @@ LValue CodeGenFunction::EmitDeclRefLValue(const DeclRefExpr *E) {
     if (E->refersToEnclosingVariableOrCapture()) {
       // Try direct lookup first.
       auto It = LocalDeclMap.find(BD->getCanonicalDecl());
-      if (It != LocalDeclMap.end()) {
+      if (It != LocalDeclMap.end())
         return MakeAddrLValue(It->second, E->getType(), AlignmentSource::Decl);
-      }
       // OpenMP case: binding was captured via its decomposed decl.
       if (CapturedStmtInfo &&
           CapturedStmtInfo->getKind() == CapturedRegionKind::CR_OpenMP &&
diff --git a/clang/lib/CodeGen/CGStmtOpenMP.cpp b/clang/lib/CodeGen/CGStmtOpenMP.cpp
index e5da208ee89ad..bf21bdc4bd05e 100644
--- a/clang/lib/CodeGen/CGStmtOpenMP.cpp
+++ b/clang/lib/CodeGen/CGStmtOpenMP.cpp
@@ -1136,7 +1136,7 @@ bool CodeGenFunction::EmitOMPFirstprivateClause(const OMPExecutableDirective &D,
   bool DeviceConstTarget = getLangOpts().OpenMPIsTargetDevice &&
                            isOpenMPTargetExecutionDirective(EKind);
   bool FirstprivateIsLastprivate = false;
-  llvm::DenseMap<const Decl *, OpenMPLastprivateModifier> Lastprivates;
+  llvm::SmallDenseMap<const Decl *, OpenMPLastprivateModifier> Lastprivates;
   for (const auto *C : D.getClausesOfKind<OMPLastprivateClause>()) {
     for (const auto *D : C->varlist()) {
       const auto *VD = cast<DeclRefExpr>(D)->getDecl();
@@ -1155,7 +1155,6 @@ bool CodeGenFunction::EmitOMPFirstprivateClause(const OMPExecutableDirective &D,
     const auto *InitsRef = C->inits().begin();
     for (const Expr *IInit : C->private_copies()) {
       const auto *OrigDecl = cast<DeclRefExpr>(*IRef)->getDecl();
-      const VarDecl *OrigVD = dyn_cast<VarDecl>(OrigDecl);
       const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl());
 
       if (const auto *BD = dyn_cast<BindingDecl>(OrigDecl)) {
@@ -1187,6 +1186,7 @@ bool CodeGenFunction::EmitOMPFirstprivateClause(const OMPExecutableDirective &D,
       }
 
       // Original VarDecl logic.
+      const VarDecl *OrigVD = dyn_cast<VarDecl>(OrigDecl);
       assert(OrigVD && "Expected VarDecl for non-BindingDecl firstprivate");
       bool ThisFirstprivateIsLastprivate =
           Lastprivates.count(OrigVD->getCanonicalDecl()) > 0;
@@ -1309,7 +1309,7 @@ void CodeGenFunction::EmitOMPPrivateClause(
     CodeGenFunction::OMPPrivateScope &PrivateScope) {
   if (!HaveInsertPoint())
     return;
-  llvm::DenseSet<const ValueDecl *> EmittedAsPrivate;
+  llvm::SmallDenseSet<const ValueDecl *> EmittedAsPrivate;
   for (const auto *C : D.getClausesOfKind<OMPPrivateClause>()) {
     auto IRef = C->varlist_begin();
     for (const Expr *IInit : C->private_copies()) {
@@ -1413,7 +1413,7 @@ bool CodeGenFunction::EmitOMPLastprivateClauseInit(
           cast<VarDecl>(cast<DeclRefExpr>(C)->getDecl())->getCanonicalDecl());
     }
   }
-  llvm::DenseSet<const ValueDecl *> AlreadyEmittedVars;
+  llvm::SmallDenseSet<const ValueDecl *> AlreadyEmittedVars;
   for (const auto *C : D.getClausesOfKind<OMPLastprivateClause>()) {
     HasAtLeastOneLastprivate = true;
     if (isOpenMPTaskLoopDirective(EKind) && !getLangOpts().OpenMPSimd)
@@ -1522,7 +1522,7 @@ void CodeGenFunction::EmitOMPLastprivateClauseFinal(
     EmitBlock(ThenBB);
   }
   llvm::DenseSet<const ValueDecl *> AlreadyEmittedVars;
-  llvm::DenseMap<const VarDecl *, const Expr *> LoopCountersAndUpdates;
+  llvm::SmallDenseMap<const VarDecl *, const Expr *> LoopCountersAndUpdates;
   if (const auto *LoopDirective = dyn_cast<OMPLoopDirective>(&D)) {
     auto IC = LoopDirective->counters().begin();
     for (const Expr *F : LoopDirective->finals()) {
@@ -2874,9 +2874,8 @@ void CodeGenFunction::EmitOMPLinearClause(
       const auto *PrivateVD =
           cast<VarDecl>(cast<DeclRefExpr>(*CurPrivate)->getDecl());
       bool IsSIMDLCV = false;
-      if (const auto *VarD = dyn_cast<VarDecl>(VD)) {
+      if (const auto *VarD = dyn_cast<VarDecl>(VD))
         IsSIMDLCV = SIMDLCVs.count(VarD->getCanonicalDecl());
-      }
       if (!IsSIMDLCV) {
         // Emit private VarDecl with copy init.
         EmitVarDecl(*PrivateVD);
diff --git a/clang/lib/CodeGen/CodeGenFunction.h b/clang/lib/CodeGen/CodeGenFunction.h
index 8d4edd29c7bb4..e94837e5fd5d6 100644
--- a/clang/lib/CodeGen/CodeGenFunction.h
+++ b/clang/lib/CodeGen/CodeGenFunction.h
@@ -1239,11 +1239,9 @@ class CodeGenFunction : public CodeGenTypeCache {
       assert(PerformCleanup && "adding private to dead scope");
       if (const auto *BD = dyn_cast<BindingDecl>(LocalVD->getCanonicalDecl())) {
         auto It = CGF.OMPPrivatizedBindings.find(BD);
-        if (It != CGF.OMPPrivatizedBindings.end()) {
-          BindingChanges.emplace_back(BD, It->second);
-        } else {
-          BindingChanges.emplace_back(BD, std::nullopt);
-        }
+        BindingChanges.emplace_back(BD, It != CGF.OMPPrivatizedBindings.end()
+                                            ? std::make_optional(It->second)
+                                            : std::nullopt);
       }
       return MappedVars.setVarAddr(CGF, LocalVD, Addr);
     }
@@ -1270,9 +1268,7 @@ class CodeGenFunction : public CodeGenTypeCache {
       for (auto &Change : BindingChanges) {
         if (Change.second.has_value()) {
           auto It = CGF.OMPPrivatizedBindings.find(Change.first);
-          assert(It != CGF.OMPPrivatizedBindings.end() &&
-                 "Entry should exist when restoring previous value");
-          It->second = *Change.second;
+          It->second = CGF.OMPPrivatizedBindings.at(Change.first);
         } else {
           CGF.OMPPrivatizedBindings.erase(Change.first);
         }
@@ -1579,7 +1575,7 @@ class CodeGenFunction : public CodeGenTypeCache {
   /// Lookup map for privatized BindingDecls.
   /// Used when BindingDecls are remapped during OpenMP outlining, since the
   /// remapped BindingDecl has a different pointer than the original.
-  llvm::DenseMap<const BindingDecl *, Address> OMPPrivatizedBindings;
+  llvm::SmallDenseMap<const BindingDecl *, Address> OMPPrivatizedBindings;
 
   // Keep track of the cleanups for callee-destructed parameters pushed to the
   // cleanup stack so that they can be deactivated later.
diff --git a/clang/lib/Sema/SemaLambda.cpp b/clang/lib/Sema/SemaLambda.cpp
index 139593bed6eac..efdea7dcf06ff 100644
--- a/clang/lib/Sema/SemaLambda.cpp
+++ b/clang/lib/Sema/SemaLambda.cpp
@@ -2091,13 +2091,13 @@ bool Sema::DiagnoseUnusedLambdaCapture(SourceRange CaptureRange,
 /// Create a field within the lambda class or captured statement record for the
 /// given capture.
 FieldDecl *Sema::BuildCaptureField(RecordDecl *RD, const sema::Capture &Capture,
-                                   bool isOpenMP) {
+                                   bool IsOpenMP) {
   SourceLocation Loc = Capture.getLocation();
   QualType FieldType = Capture.getCaptureType();
   TypeSourceInfo *TSI = nullptr;
   if (Capture.isVariableCapture()) {
     const VarDecl *Var = nullptr;
-    if (isOpenMP) {
+    if (IsOpenMP) {
       if (auto *BD = dyn_cast_or_null<BindingDecl>(Capture.getVariable())) {
         Var = cast<VarDecl>(BD->getDecomposedDecl());
         FieldType = Var->getType();
diff --git a/clang/lib/Sema/SemaOpenMP.cpp b/clang/lib/Sema/SemaOpenMP.cpp
index 7836ac8b1737d..ff87e674dce36 100644
--- a/clang/lib/Sema/SemaOpenMP.cpp
+++ b/clang/lib/Sema/SemaOpenMP.cpp
@@ -5467,7 +5467,7 @@ getPrivateItem(Sema &S, Expr *&RefExpr, SourceLocation &ELoc,
   RefExpr = RefExpr->IgnoreParenImpCasts();
   auto *DE = dyn_cast_or_null<DeclRefExpr>(RefExpr);
   auto *ME = dyn_cast_or_null<MemberExpr>(RefExpr);
-  if ((!DE || (!isa<VarDecl, BindingDecl>(DE->getDecl()))) &&
+  if ((!DE || !isa<VarDecl, BindingDecl>(DE->getDecl())) &&
       (S.getCurrentThisType().isNull() || !ME ||
        !isa<CXXThisExpr>(ME->getBase()->IgnoreParenImpCasts()) ||
        !isa<FieldDecl>(ME->getMemberDecl()))) {
@@ -20687,17 +20687,17 @@ static bool actOnOMPReductionKindClause(
       Type = Context.getBaseElementType(D->getType().getNonReferenceType());
     }
     auto *VD = dyn_cast<VarDecl>(D);
-    auto *BD = dyn_cast<BindingDecl>(D);
 
     // Check for unsupported reduction forms on structured bindings.
-    if (BD && (D->getType().getNonReferenceType()->isArrayType() ||
-               BOK == BO_Comma)) {
+    auto *BD = dyn_cast<BindingDecl>(D);
+    if (BD && D->getType().getNonReferenceType()->isArrayType()) {
       // Array-type reductions are not supported.
-      if (D->getType().getNonReferenceType()->isArrayType())
-        S.Diag(ELoc, diag::err_omp_array_reduction_on_binding);
-      else
-        // User-defined reductions (declare reduction) are not supported.
-        S.Diag(ELoc, diag::err_omp_udr_reduction_on_binding);
+      S.Diag(ELoc, diag::err_omp_array_reduction_on_binding);
+      continue;
+    }
+    if (BD && BOK == BO_Comma) {
+      // User-defined reductions (declare reduction) are not supported.
+      S.Diag(ELoc, diag::err_omp_udr_reduction_on_binding);
       continue;
     }
 

>From 450ba02957757296a272cf2439bc53f9668b13f2 Mon Sep 17 00:00:00 2001
From: Ammarguellat <zahira.ammarguellat at intel.com>
Date: Fri, 19 Jun 2026 09:08:08 -0700
Subject: [PATCH 42/45] Fixed capture inconsistency for structured bindings
 with different map types

---
 clang/include/clang/AST/DeclCXX.h             |   4 +
 clang/lib/AST/DeclCXX.cpp                     |  17 +
 clang/lib/CodeGen/CGExpr.cpp                  |  18 +-
 clang/lib/CodeGen/CGOpenMPRuntime.cpp         |  33 +-
 clang/lib/CodeGen/CodeGenFunction.h           |  14 +-
 clang/lib/Sema/SemaOpenMP.cpp                 | 137 +++++--
 .../OpenMP/structured-bindings-codegen.cpp    |  12 +-
 ...ed-bindings-target-map-different-types.cpp | 344 ++++++++++++++++++
 8 files changed, 535 insertions(+), 44 deletions(-)
 create mode 100644 clang/test/OpenMP/structured-bindings-target-map-different-types.cpp

diff --git a/clang/include/clang/AST/DeclCXX.h b/clang/include/clang/AST/DeclCXX.h
index cc4b4ff9db273..2aa010e617036 100644
--- a/clang/include/clang/AST/DeclCXX.h
+++ b/clang/include/clang/AST/DeclCXX.h
@@ -4315,6 +4315,10 @@ class DecompositionDecl final
 
   void printName(raw_ostream &OS, const PrintingPolicy &Policy) const override;
 
+  /// If this decomposition was initialized from a variable (e.g., auto [a,b] = p),
+  /// return that variable. Otherwise return nullptr.
+  const VarDecl *getOriginalVar() const;
+
   static bool classof(const Decl *D) { return classofKind(D->getKind()); }
   static bool classofKind(Kind K) { return K == Decomposition; }
 };
diff --git a/clang/lib/AST/DeclCXX.cpp b/clang/lib/AST/DeclCXX.cpp
index fc8a15287f438..181e5fb235bf2 100644
--- a/clang/lib/AST/DeclCXX.cpp
+++ b/clang/lib/AST/DeclCXX.cpp
@@ -3764,6 +3764,23 @@ void DecompositionDecl::printName(llvm::raw_ostream &OS,
   OS << ']';
 }
 
+const VarDecl *DecompositionDecl::getOriginalVar() const {
+  const Expr *Init = getInit();
+  if (!Init)
+    return nullptr;
+  const Expr *Stripped = Init->IgnoreParenImpCasts();
+  if (const auto *DRE = dyn_cast<DeclRefExpr>(Stripped))
+    return dyn_cast<VarDecl>(DRE->getDecl());
+  if (const auto *CE = dyn_cast<CXXConstructExpr>(Stripped)) {
+    if (CE->getNumArgs() == 1) {
+      const Expr *Arg = CE->getArg(0)->IgnoreParenImpCasts();
+      if (const auto *ArgDRE = dyn_cast<DeclRefExpr>(Arg))
+        return dyn_cast<VarDecl>(ArgDRE->getDecl());
+    }
+  }
+  return nullptr;
+}
+
 void MSPropertyDecl::anchor() {}
 
 MSPropertyDecl *MSPropertyDecl::Create(ASTContext &C, DeclContext *DC,
diff --git a/clang/lib/CodeGen/CGExpr.cpp b/clang/lib/CodeGen/CGExpr.cpp
index 4010663685118..6b349123eaf4f 100644
--- a/clang/lib/CodeGen/CGExpr.cpp
+++ b/clang/lib/CodeGen/CGExpr.cpp
@@ -3610,13 +3610,25 @@ LValue CodeGenFunction::EmitOMPCapturedBindingLValue(const BindingDecl *BD) {
       It != LocalDeclMap.end())
     return MakeAddrLValue(It->second, BD->getType());
 
-  auto *DD = cast<VarDecl>(BD->getDecomposedDecl());
+  const auto *DD = cast<VarDecl>(BD->getDecomposedDecl());
+
+  // Check if the original variable (what DD decomposes) has been mapped.
+  // If so, use the original variable instead of DD to avoid capturing DD.
+  const VarDecl *TargetDecl = DD;
+  if (const auto *DecompDecl = dyn_cast<DecompositionDecl>(DD)) {
+    if (const VarDecl *OrigVar = DecompDecl->getOriginalVar()) {
+      auto It = LocalDeclMap.find(OrigVar->getCanonicalDecl());
+      if (It != LocalDeclMap.end())
+        // Original variable is mapped, use it instead.
+        TargetDecl = OrigVar;
+    }
+  }
   Expr *BindingExpr = BD->getBinding()->IgnoreImplicit();
   // Use getNonReferenceType() because we need the actual object type, not the
   // reference type. DeclRefExpr with VK_LValue requires a non-reference type
   // (AST invariant). EmitDeclRefLValue will load any reference for us.
-  QualType DREType = DD->getType().getNonReferenceType();
-  DeclRefExpr DRE(getContext(), DD,
+  QualType DREType = TargetDecl->getType().getNonReferenceType();
+  DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(TargetDecl),
                   /*RefersToEnclosingVariableOrCapture=*/true, DREType,
                   VK_LValue, SourceLocation());
   LValue BaseLVal = EmitDeclRefLValue(&DRE);
diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
index 2f7a90a6f18b9..975e44b6b628e 100644
--- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp
+++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
@@ -1880,7 +1880,11 @@ void CGOpenMPRuntime::registerVTable(const OMPExecutableDirective &D) {
 
     const VarDecl *VD = nullptr;
     if (auto *DRE = dyn_cast<DeclRefExpr>(E)) {
-      VD = cast<VarDecl>(DRE->getDecl());
+      // Handle BindingDecls by redirecting to their DecompositionDecl.
+      if (auto *BD = dyn_cast<BindingDecl>(DRE->getDecl()))
+        VD = cast<VarDecl>(BD->getDecomposedDecl());
+      else
+        VD = cast<VarDecl>(DRE->getDecl());
     } else if (auto *MRE = dyn_cast<MemberExpr>(E)) {
       if (auto *BaseDRE = dyn_cast<DeclRefExpr>(MRE->getBase())) {
         if (auto *BaseVD = dyn_cast<VarDecl>(BaseDRE->getDecl()))
@@ -10223,7 +10227,29 @@ class MappableExprsHandler {
   /// record field declaration \a RI and captured value \a CV.
   void generateDefaultMapInfo(const CapturedStmt::Capture &CI,
                               const FieldDecl &RI, llvm::Value *CV,
-                              MapCombinedInfoTy &CombinedInfo) const {
+                              MapCombinedInfoTy &CombinedInfo,
+                              ArrayRef<MapData> DeclComponentLists) const {
+    // Check if this is a DecompositionDecl whose original variable has been
+    // explicitly mapped. If so, skip this default mapping to avoid redundancy.
+    if (CI.capturesVariable() || CI.capturesVariableByCopy()) {
+      const VarDecl *VD = CI.getCapturedVar();
+      if (auto *DD = dyn_cast<DecompositionDecl>(VD)) {
+        if (const VarDecl *OrigVar = DD->getOriginalVar()) {
+          // Check if the original variable has been explicitly mapped.
+          for (const MapData &L : DeclComponentLists) {
+            OMPClauseMappableExprCommon::MappableExprComponentListRef
+                Components = std::get<0>(L);
+            for (const OMPClauseMappableExprCommon::MappableComponent &MC :
+                 Components) {
+              if (MC.getAssociatedDeclaration() == OrigVar)
+                // Original variable is explicitly mapped, skip this default
+                // map.
+                return;
+            }
+          }
+        }
+      }
+    }
     bool IsImplicit = true;
     // Do the default mapping.
     if (CI.capturesThis()) {
@@ -10790,7 +10816,8 @@ static void genMapInfoForCaptures(
       // the base-variable, or attach pointer.
       if (DeclComponentLists.empty() ||
           (!HasEntryWithCVAsAttachPtr && !HasEntryWithoutAttachPtr))
-        MEHandler.generateDefaultMapInfo(*CI, **RI, *CV, CurInfo);
+        MEHandler.generateDefaultMapInfo(*CI, **RI, *CV, CurInfo,
+                                         DeclComponentLists);
 
       // If we have any information in the map clause, we use it, otherwise we
       // just do a default mapping.
diff --git a/clang/lib/CodeGen/CodeGenFunction.h b/clang/lib/CodeGen/CodeGenFunction.h
index e94837e5fd5d6..266538d3ea065 100644
--- a/clang/lib/CodeGen/CodeGenFunction.h
+++ b/clang/lib/CodeGen/CodeGenFunction.h
@@ -1223,8 +1223,7 @@ class CodeGenFunction : public CodeGenTypeCache {
     OMPMapVars MappedVars;
     OMPPrivateScope(const OMPPrivateScope &) = delete;
     void operator=(const OMPPrivateScope &) = delete;
-    llvm::SmallVector<std::pair<const BindingDecl *, std::optional<Address>>, 4>
-        BindingChanges;
+    SmallVector<std::pair<const BindingDecl *, Address>, 4> BindingChanges;
 
   public:
     /// Enter a new OpenMP private scope.
@@ -1240,8 +1239,8 @@ class CodeGenFunction : public CodeGenTypeCache {
       if (const auto *BD = dyn_cast<BindingDecl>(LocalVD->getCanonicalDecl())) {
         auto It = CGF.OMPPrivatizedBindings.find(BD);
         BindingChanges.emplace_back(BD, It != CGF.OMPPrivatizedBindings.end()
-                                            ? std::make_optional(It->second)
-                                            : std::nullopt);
+                                            ? It->second
+                                            : Address::invalid());
       }
       return MappedVars.setVarAddr(CGF, LocalVD, Addr);
     }
@@ -1266,9 +1265,12 @@ class CodeGenFunction : public CodeGenTypeCache {
       if (PerformCleanup)
         ForceCleanup();
       for (auto &Change : BindingChanges) {
-        if (Change.second.has_value()) {
+        if (Change.second.isValid()) {
           auto It = CGF.OMPPrivatizedBindings.find(Change.first);
-          It->second = CGF.OMPPrivatizedBindings.at(Change.first);
+          if (It != CGF.OMPPrivatizedBindings.end())
+            It->second = Change.second;
+          else
+            CGF.OMPPrivatizedBindings.insert({Change.first, Change.second});
         } else {
           CGF.OMPPrivatizedBindings.erase(Change.first);
         }
diff --git a/clang/lib/Sema/SemaOpenMP.cpp b/clang/lib/Sema/SemaOpenMP.cpp
index ff87e674dce36..65543372f66e9 100644
--- a/clang/lib/Sema/SemaOpenMP.cpp
+++ b/clang/lib/Sema/SemaOpenMP.cpp
@@ -4149,30 +4149,45 @@ class DSAAttrChecker final : public StmtVisitor<DSAAttrChecker, void> {
 
       if (isOpenMPTargetExecutionDirective(DKind) &&
           !Stack->isLoopControlVariable(VD).first) {
-        if (!Stack->checkMappableExprComponentListsForDecl(
-                VD, /*CurrentRegionOnly=*/true,
-                [this](OMPClauseMappableExprCommon::MappableExprComponentListRef
-                           StackComponents,
-                       OpenMPClauseKind) {
-                  if (SemaRef.LangOpts.OpenMP >= 50)
-                    return !StackComponents.empty();
-                  // Variable is used if it has been marked as an array, array
-                  // section, array shaping or the variable itself.
-                  return StackComponents.size() == 1 ||
-                         llvm::all_of(
-                             llvm::drop_begin(llvm::reverse(StackComponents)),
-                             [](const OMPClauseMappableExprCommon::
-                                    MappableComponent &MC) {
-                               return MC.getAssociatedDeclaration() ==
-                                          nullptr &&
-                                      (isa<ArraySectionExpr>(
-                                           MC.getAssociatedExpression()) ||
-                                       isa<OMPArrayShapingExpr>(
-                                           MC.getAssociatedExpression()) ||
-                                       isa<ArraySubscriptExpr>(
-                                           MC.getAssociatedExpression()));
-                             });
-                })) {
+        // Check if VD is already mapped. For DecompositionDecls, also check if
+        // the original variable they decompose has been mapped (via BindingDecl
+        // map clauses).
+        bool AlreadyMapped = Stack->checkMappableExprComponentListsForDecl(
+            VD, /*CurrentRegionOnly=*/true, [this](auto StackComponents, auto) {
+          if (SemaRef.LangOpts.OpenMP >= 50)
+            return !StackComponents.empty();
+          // Variable is used if it has been marked as an array, array
+          // section, array shaping or the variable itself.
+          return StackComponents.size() == 1 ||
+                 llvm::all_of(llvm::drop_begin(llvm::reverse(StackComponents)),
+                              [](const auto &MC) {
+                                return MC.getAssociatedDeclaration() ==
+                                           nullptr &&
+                                       (isa<ArraySectionExpr>(
+                                            MC.getAssociatedExpression()) ||
+                                        isa<OMPArrayShapingExpr>(
+                                            MC.getAssociatedExpression()) ||
+                                        isa<ArraySubscriptExpr>(
+                                            MC.getAssociatedExpression()));
+                              });
+            });
+
+        // For DecompositionDecls, check if the original variable has been
+        // mapped.
+        if (!AlreadyMapped && isa<DecompositionDecl>(VD)) {
+          if (const auto *DD = cast<DecompositionDecl>(VD)) {
+            if (const VarDecl *OrigVar = DD->getOriginalVar()) {
+              AlreadyMapped = Stack->checkMappableExprComponentListsForDecl(
+                  OrigVar, /*CurrentRegionOnly=*/true,
+                  [this](auto StackComponents, auto) {
+                    if (SemaRef.LangOpts.OpenMP >= 50)
+                      return !StackComponents.empty();
+                    return StackComponents.size() == 1;
+                  });
+            }
+          }
+        }
+        if (!AlreadyMapped) {
           bool IsFirstprivate = false;
           // By default lambdas are captured as firstprivates.
           if (const auto *RD =
@@ -22603,14 +22618,84 @@ class MapBaseChecker final : public StmtVisitor<MapBaseChecker, bool> {
 
 public:
   bool VisitDeclRefExpr(DeclRefExpr *DRE) {
-    if (!isa<VarDecl>(DRE->getDecl())) {
+    ValueDecl *D = DRE->getDecl();
+    Expr *E = DRE;
+
+    // Handle BindingDecls by mapping them as member accesses.
+    // When the user writes:
+    //   auto [a, b] = p;
+    //   #pragma omp target map(tofrom:a) map(to:b)
+    // we transform it to:
+    //   #pragma omp target map(tofrom:p.x) map(to:p.y)
+    // This avoids conflicts when different bindings have different map types.
+    if (auto *BD = dyn_cast<BindingDecl>(D)) {
+      auto *DD = cast<DecompositionDecl>(BD->getDecomposedDecl());
+      Expr *BindingExpr = BD->getBinding();
+
+      // Check if the binding is a member expression (struct/class decomposition)
+      if (auto *ME = dyn_cast_or_null<MemberExpr>(BindingExpr)) {
+
+        // Get the original variable that the decomposition was initialized from
+        if (const VarDecl *OrigVar = DD->getOriginalVar()) {
+
+          // Create a new member expression: OrigVar.field
+          // This transforms map(a) -> map(p.x)
+          DeclarationNameInfo BaseNameInfo(OrigVar->getDeclName(),
+                                           DRE->getLocation());
+          Expr *BaseExpr = DeclRefExpr::Create(
+              SemaRef.Context, DRE->getQualifierLoc(),
+              DRE->getTemplateKeywordLoc(), const_cast<VarDecl *>(OrigVar),
+              /*RefersToEnclosingVariableOrCapture=*/false, BaseNameInfo,
+              OrigVar->getType(), DRE->getValueKind(), nullptr,
+              /*TemplateArgs=*/nullptr, DRE->isNonOdrUse());
+
+          // Create member expression: base.member
+          E = MemberExpr::Create(
+              SemaRef.Context, BaseExpr, /*IsArrow=*/false,
+              ME->getOperatorLoc(), ME->getQualifierLoc(),
+              ME->getTemplateKeywordLoc(), ME->getMemberDecl(),
+              ME->getFoundDecl(), ME->getMemberNameInfo(),
+              /*TemplateArgs=*/nullptr, ME->getType(), ME->getValueKind(),
+              ME->getObjectKind(), ME->isNonOdrUse());
+
+          // Now process this as a member expression, which will properly
+          // handle the field-level mapping
+          return Visit(E);
+        }
+      }
+
+      // Fallback: redirect to DecompositionDecl for non-struct bindings
+      // (arrays, tuples)
+      D = DD;
+      DeclarationNameInfo NameInfo(D->getDeclName(), DRE->getLocation());
+      E = DeclRefExpr::Create(
+          SemaRef.Context, DRE->getQualifierLoc(),
+          DRE->getTemplateKeywordLoc(), DD,
+          /*RefersToEnclosingVariableOrCapture=*/false, NameInfo,
+          D->getType(), DRE->getValueKind(), DRE->getFoundDecl(),
+          /*TemplateArgs=*/nullptr, DRE->isNonOdrUse());
+    }
+    // Handle DecompositionDecl directly (implicit captures).
+    else if (auto *DD = dyn_cast<DecompositionDecl>(D)) {
+      if (const VarDecl *OrigVar = DD->getOriginalVar()) {
+        D = const_cast<VarDecl *>(OrigVar);
+        DeclarationNameInfo NameInfo(D->getDeclName(), DRE->getLocation());
+        E = DeclRefExpr::Create(SemaRef.Context, DRE->getQualifierLoc(),
+                                DRE->getTemplateKeywordLoc(), D,
+                                /*RefersToEnclosingVariableOrCapture=*/false,
+                                NameInfo, D->getType(), DRE->getValueKind(),
+                                DRE->getFoundDecl(),
+                                /*TemplateArgs=*/nullptr, DRE->isNonOdrUse());
+      }
+      // If we can't find the original, keep the DecompositionDecl as-is.
+    } else if (!isa<VarDecl>(D)) {
       emitErrorMsg();
       return false;
     }
     assert(!RelevantExpr && "RelevantExpr is expected to be nullptr");
     RelevantExpr = DRE;
     // Record the component.
-    Components.emplace_back(DRE, DRE->getDecl(), IsNonContiguous);
+    Components.emplace_back(E, D, IsNonContiguous);
     return true;
   }
 
diff --git a/clang/test/OpenMP/structured-bindings-codegen.cpp b/clang/test/OpenMP/structured-bindings-codegen.cpp
index 6fc24fa4e21b9..bb17764947902 100644
--- a/clang/test/OpenMP/structured-bindings-codegen.cpp
+++ b/clang/test/OpenMP/structured-bindings-codegen.cpp
@@ -927,9 +927,9 @@ void test_lambda_implicit_capture() {
 // CHECK:    [[TMP1:%.*]] = load ptr, ptr [[DOTADDR]], align 8, !nonnull [[META4:![0-9]+]], !align [[META5:![0-9]+]]
 // CHECK:    [[TMP2:%.*]] = load ptr, ptr [[P_ADDR]], align 8, !nonnull [[META4]], !align [[META5]]
 // CHECK:    call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[P1:%.*]], ptr align 4 [[TMP2]], i64 8, i1 false)
-// CHECK:    [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_POINT:%.*]], ptr [[TMP1]], i32 0, i32 0
+// CHECK:    [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_POINT:%.*]], ptr [[P1]], i32 0, i32 0
 // CHECK:    [[TMP3:%.*]] = load i32, ptr [[X]], align 4
-// CHECK:    [[Y:%.*]] = getelementptr inbounds nuw [[STRUCT_POINT]], ptr [[TMP1]], i32 0, i32 1
+// CHECK:    [[Y:%.*]] = getelementptr inbounds nuw [[STRUCT_POINT]], ptr [[P1]], i32 0, i32 1
 // CHECK:    [[TMP4:%.*]] = load i32, ptr [[Y]], align 4
 // CHECK:    [[ADD:%.*]] = add nsw i32 [[TMP3]], [[TMP4]]
 // CHECK:    store i32 [[ADD]], ptr [[SUM:%.*]], align 4
@@ -2120,9 +2120,9 @@ void test_lambda_implicit_capture() {
 // CHECK:    [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8, !nonnull [[META4]], !align [[META5]]
 // CHECK:    [[TMP4:%.*]] = load ptr, ptr [[P1_ADDR]], align 8, !nonnull [[META4]], !align [[META5]]
 // CHECK:    call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[P12:%.*]], ptr align 4 [[TMP4]], i64 8, i1 false)
-// CHECK:    [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_POINT:%.*]], ptr [[TMP2]], i32 0, i32 0
+// CHECK:    [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_POINT:%.*]], ptr [[P12]], i32 0, i32 0
 // CHECK:    [[TMP5:%.*]] = load i32, ptr [[X]], align 4
-// CHECK:    [[Y:%.*]] = getelementptr inbounds nuw [[STRUCT_POINT]], ptr [[TMP2]], i32 0, i32 1
+// CHECK:    [[Y:%.*]] = getelementptr inbounds nuw [[STRUCT_POINT]], ptr [[P12]], i32 0, i32 1
 // CHECK:    [[TMP6:%.*]] = load i32, ptr [[Y]], align 4
 // CHECK:    [[ADD:%.*]] = add nsw i32 [[TMP5]], [[TMP6]]
 // CHECK:    [[X3:%.*]] = getelementptr inbounds nuw [[STRUCT_POINT]], ptr [[TMP3]], i32 0, i32 0
@@ -2330,9 +2330,9 @@ void test_lambda_implicit_capture() {
 // CHECK:    [[MUL:%.*]] = mul nsw i32 [[TMP2]], 1
 // CHECK:    [[ADD:%.*]] = add nsw i32 0, [[MUL]]
 // CHECK:    store i32 [[ADD]], ptr [[I:%.*]], align 4, !llvm.access.group [[ACC_GRP73]]
-// CHECK:    [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_POINT:%.*]], ptr [[TMP0]], i32 0, i32 0
+// CHECK:    [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_POINT:%.*]], ptr [[P]], i32 0, i32 0
 // CHECK:    [[TMP3:%.*]] = load i32, ptr [[X]], align 4, !llvm.access.group [[ACC_GRP73]]
-// CHECK:    [[Y:%.*]] = getelementptr inbounds nuw [[STRUCT_POINT]], ptr [[TMP0]], i32 0, i32 1
+// CHECK:    [[Y:%.*]] = getelementptr inbounds nuw [[STRUCT_POINT]], ptr [[P]], i32 0, i32 1
 // CHECK:    [[TMP4:%.*]] = load i32, ptr [[Y]], align 4, !llvm.access.group [[ACC_GRP73]]
 // CHECK:    [[ADD1:%.*]] = add nsw i32 [[TMP3]], [[TMP4]]
 // CHECK:    [[TMP5:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP73]]
diff --git a/clang/test/OpenMP/structured-bindings-target-map-different-types.cpp b/clang/test/OpenMP/structured-bindings-target-map-different-types.cpp
new file mode 100644
index 0000000000000..0243b95690fd1
--- /dev/null
+++ b/clang/test/OpenMP/structured-bindings-target-map-different-types.cpp
@@ -0,0 +1,344 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --check-globals all --filter-out-after "getelem.*kernel" --filter-out "= alloca.*" --include-generated-funcs --replace-value-regex "__omp_offloading_[0-9a-z]+_[0-9a-z]+" "reduction_size[.].+[.]" "pl_cond[.].+[.|,]" --prefix-filecheck-ir-name _ --global-value-regex "\.offload_.*" --global-hex-value-regex ".offload_maptypes.*" --version 6
+// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=51 -std=c++20 \
+// RUN: -fopenmp-targets=x86_64-unknown-unknown \
+// RUN: -triple x86_64-unknown-unknown -emit-llvm %s -o - | FileCheck %s
+
+// expected-no-diagnostics
+
+struct Point { int x, y; };
+
+// Test that different map clauses on bindings from the same decomposition
+// result in consistent capture behavior. The DecompositionDecl is captured
+// once (due to deduplication), and the capture kind should be determined
+// consistently regardless of which binding's map clause is processed first.
+
+
+void test_different_map_type() {
+  Point p{1, 2};
+  auto [a, b] = p;
+#pragma omp target map(tofrom:a) map(to:b)
+  {
+    a = a + 1;
+    b = b + 2;
+  }
+}
+
+void test_same_map_type_different() {
+  Point p{1, 2};
+  auto [a, b] = p;
+
+#pragma omp target map(tofrom:a) map(tofrom:b)
+  {
+    a = a + 1;
+    b = b + 2;
+  }
+}
+
+void test_one_binding_mapped() {
+  Point p{1, 2};
+  auto [a, b] = p;
+
+#pragma omp target map(tofrom:a)
+  {
+    a = a + 1;
+    b = b + 2;
+  }
+}
+
+void test_mixed_map_types_three_way() {
+  struct Triple { int x, y, z; };
+  Triple t{1, 2, 3};
+  auto [a, b, c] = t;
+
+#pragma omp target map(tofrom:a) map(to:b) map(from:c)
+  {
+    a = a + 1;
+    b = b + 2;
+    c = c + 3;
+  }
+}
+
+//.
+// CHECK: @.offload_sizes = private unnamed_addr constant [5 x i64] [i64 8, i64 0, i64 4, i64 4, i64 0]
+// CHECK: @.offload_maptypes = private unnamed_addr constant [5 x i64] [i64 [[#0x320]], i64 [[#0x0]], i64 [[#0x2000000000003]], i64 [[#0x2000000000001]], i64 [[#0x120]]]
+// CHECK: @.offload_sizes.1 = private unnamed_addr constant [5 x i64] [i64 8, i64 0, i64 4, i64 4, i64 0]
+// CHECK: @.offload_maptypes.2 = private unnamed_addr constant [5 x i64] [i64 [[#0x320]], i64 [[#0x0]], i64 [[#0x2000000000003]], i64 [[#0x2000000000003]], i64 [[#0x120]]]
+// CHECK: @.offload_sizes.3 = private unnamed_addr constant [3 x i64] [i64 8, i64 4, i64 0]
+// CHECK: @.offload_maptypes.4 = private unnamed_addr constant [3 x i64] [i64 [[#0x320]], i64 [[#0x3]], i64 [[#0x120]]]
+// CHECK: @.offload_sizes.5 = private unnamed_addr constant [6 x i64] [i64 12, i64 0, i64 4, i64 4, i64 4, i64 0]
+// CHECK: @.offload_maptypes.6 = private unnamed_addr constant [6 x i64] [i64 [[#0x320]], i64 [[#0x0]], i64 [[#0x2000000000003]], i64 [[#0x2000000000001]], i64 [[#0x2000000000002]], i64 [[#0x120]]]
+//.
+// CHECK-LABEL: define dso_local void @_Z23test_different_map_typev(
+// CHECK-SAME: ) #[[ATTR0:[0-9]+]] {
+// CHECK:  [[ENTRY:.*:]]
+// CHECK:    call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[P:%.*]], ptr align 4 @__const._Z23test_different_map_typev.p, i64 8, i1 false)
+// CHECK:    call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP0:%.*]], ptr align 4 [[P]], i64 8, i1 false)
+// CHECK:    [[TMP1:%.*]] = load [[STRUCT_POINT:%.*]], ptr [[TMP0]], align 4
+// CHECK:    store [[STRUCT_POINT]] [[TMP1]], ptr [[DOTCASTED:%.*]], align 4
+// CHECK:    [[TMP2:%.*]] = load i64, ptr [[DOTCASTED]], align 8
+// CHECK:    [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_POINT]], ptr [[P]], i32 0, i32 0
+// CHECK:    [[Y:%.*]] = getelementptr inbounds nuw [[STRUCT_POINT]], ptr [[P]], i32 0, i32 1
+// CHECK:    [[TMP3:%.*]] = getelementptr i32, ptr [[Y]], i32 1
+// CHECK:    [[TMP4:%.*]] = ptrtoaddr ptr [[TMP3]] to i64
+// CHECK:    [[TMP5:%.*]] = ptrtoaddr ptr [[X]] to i64
+// CHECK:    [[TMP6:%.*]] = sub i64 [[TMP4]], [[TMP5]]
+// CHECK:    call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[DOTOFFLOAD_SIZES:%.*]], ptr align 8 @.offload_sizes, i64 40, i1 false)
+// CHECK:    [[TMP7:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS:%.*]], i32 0, i32 0
+// CHECK:    store i64 [[TMP2]], ptr [[TMP7]], align 8
+// CHECK:    [[TMP8:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS:%.*]], i32 0, i32 0
+// CHECK:    store i64 [[TMP2]], ptr [[TMP8]], align 8
+// CHECK:    [[TMP9:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_MAPPERS:%.*]], i64 0, i64 0
+// CHECK:    store ptr null, ptr [[TMP9]], align 8
+// CHECK:    [[TMP10:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
+// CHECK:    store ptr [[P]], ptr [[TMP10]], align 8
+// CHECK:    [[TMP11:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 1
+// CHECK:    store ptr [[X]], ptr [[TMP11]], align 8
+// CHECK:    [[TMP12:%.*]] = getelementptr inbounds [5 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 1
+// CHECK:    store i64 [[TMP6]], ptr [[TMP12]], align 8
+// CHECK:    [[TMP13:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1
+// CHECK:    store ptr null, ptr [[TMP13]], align 8
+// CHECK:    [[TMP14:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
+// CHECK:    store ptr [[P]], ptr [[TMP14]], align 8
+// CHECK:    [[TMP15:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 2
+// CHECK:    store ptr [[X]], ptr [[TMP15]], align 8
+// CHECK:    [[TMP16:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2
+// CHECK:    store ptr null, ptr [[TMP16]], align 8
+// CHECK:    [[TMP17:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3
+// CHECK:    store ptr [[P]], ptr [[TMP17]], align 8
+// CHECK:    [[TMP18:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 3
+// CHECK:    store ptr [[Y]], ptr [[TMP18]], align 8
+// CHECK:    [[TMP19:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 3
+// CHECK:    store ptr null, ptr [[TMP19]], align 8
+// CHECK:    [[TMP20:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 4
+// CHECK:    store ptr null, ptr [[TMP20]], align 8
+// CHECK:    [[TMP21:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 4
+// CHECK:    store ptr null, ptr [[TMP21]], align 8
+// CHECK:    [[TMP22:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 4
+// CHECK:    store ptr null, ptr [[TMP22]], align 8
+// CHECK:    [[TMP23:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK:    [[TMP24:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK:    [[TMP25:%.*]] = getelementptr inbounds [5 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 0
+// CHECK:    [[TMP26:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], ptr [[KERNEL_ARGS:%.*]], i32 0, i32 0
+//
+//
+// CHECK-LABEL: define internal void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z23test_different_map_typev_l19(
+// CHECK-SAME: i64 noundef [[TMP0:%.*]], ptr noalias noundef [[DYN_PTR:%.*]]) #[[ATTR2:[0-9]+]] {
+// CHECK:  [[ENTRY:.*:]]
+// CHECK:    store i64 [[TMP0]], ptr [[DOTADDR:%.*]], align 8
+// CHECK:    store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR:%.*]], align 8
+// CHECK:    [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_POINT:%.*]], ptr [[DOTADDR]], i32 0, i32 0
+// CHECK:    [[TMP1:%.*]] = load i32, ptr [[X]], align 4
+// CHECK:    [[ADD:%.*]] = add nsw i32 [[TMP1]], 1
+// CHECK:    [[X1:%.*]] = getelementptr inbounds nuw [[STRUCT_POINT]], ptr [[DOTADDR]], i32 0, i32 0
+// CHECK:    store i32 [[ADD]], ptr [[X1]], align 4
+// CHECK:    [[Y:%.*]] = getelementptr inbounds nuw [[STRUCT_POINT]], ptr [[DOTADDR]], i32 0, i32 1
+// CHECK:    [[TMP2:%.*]] = load i32, ptr [[Y]], align 4
+// CHECK:    [[ADD2:%.*]] = add nsw i32 [[TMP2]], 2
+// CHECK:    [[Y3:%.*]] = getelementptr inbounds nuw [[STRUCT_POINT]], ptr [[DOTADDR]], i32 0, i32 1
+// CHECK:    store i32 [[ADD2]], ptr [[Y3]], align 4
+// CHECK:    ret void
+//
+//
+// CHECK-LABEL: define dso_local void @_Z28test_same_map_type_differentv(
+// CHECK-SAME: ) #[[ATTR0]] {
+// CHECK:  [[ENTRY:.*:]]
+// CHECK:    call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[P:%.*]], ptr align 4 @__const._Z28test_same_map_type_differentv.p, i64 8, i1 false)
+// CHECK:    call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP0:%.*]], ptr align 4 [[P]], i64 8, i1 false)
+// CHECK:    [[TMP1:%.*]] = load [[STRUCT_POINT:%.*]], ptr [[TMP0]], align 4
+// CHECK:    store [[STRUCT_POINT]] [[TMP1]], ptr [[DOTCASTED:%.*]], align 4
+// CHECK:    [[TMP2:%.*]] = load i64, ptr [[DOTCASTED]], align 8
+// CHECK:    [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_POINT]], ptr [[P]], i32 0, i32 0
+// CHECK:    [[Y:%.*]] = getelementptr inbounds nuw [[STRUCT_POINT]], ptr [[P]], i32 0, i32 1
+// CHECK:    [[TMP3:%.*]] = getelementptr i32, ptr [[Y]], i32 1
+// CHECK:    [[TMP4:%.*]] = ptrtoaddr ptr [[TMP3]] to i64
+// CHECK:    [[TMP5:%.*]] = ptrtoaddr ptr [[X]] to i64
+// CHECK:    [[TMP6:%.*]] = sub i64 [[TMP4]], [[TMP5]]
+// CHECK:    call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[DOTOFFLOAD_SIZES:%.*]], ptr align 8 @.offload_sizes.1, i64 40, i1 false)
+// CHECK:    [[TMP7:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS:%.*]], i32 0, i32 0
+// CHECK:    store i64 [[TMP2]], ptr [[TMP7]], align 8
+// CHECK:    [[TMP8:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS:%.*]], i32 0, i32 0
+// CHECK:    store i64 [[TMP2]], ptr [[TMP8]], align 8
+// CHECK:    [[TMP9:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_MAPPERS:%.*]], i64 0, i64 0
+// CHECK:    store ptr null, ptr [[TMP9]], align 8
+// CHECK:    [[TMP10:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
+// CHECK:    store ptr [[P]], ptr [[TMP10]], align 8
+// CHECK:    [[TMP11:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 1
+// CHECK:    store ptr [[X]], ptr [[TMP11]], align 8
+// CHECK:    [[TMP12:%.*]] = getelementptr inbounds [5 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 1
+// CHECK:    store i64 [[TMP6]], ptr [[TMP12]], align 8
+// CHECK:    [[TMP13:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1
+// CHECK:    store ptr null, ptr [[TMP13]], align 8
+// CHECK:    [[TMP14:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
+// CHECK:    store ptr [[P]], ptr [[TMP14]], align 8
+// CHECK:    [[TMP15:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 2
+// CHECK:    store ptr [[X]], ptr [[TMP15]], align 8
+// CHECK:    [[TMP16:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2
+// CHECK:    store ptr null, ptr [[TMP16]], align 8
+// CHECK:    [[TMP17:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3
+// CHECK:    store ptr [[P]], ptr [[TMP17]], align 8
+// CHECK:    [[TMP18:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 3
+// CHECK:    store ptr [[Y]], ptr [[TMP18]], align 8
+// CHECK:    [[TMP19:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 3
+// CHECK:    store ptr null, ptr [[TMP19]], align 8
+// CHECK:    [[TMP20:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 4
+// CHECK:    store ptr null, ptr [[TMP20]], align 8
+// CHECK:    [[TMP21:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 4
+// CHECK:    store ptr null, ptr [[TMP21]], align 8
+// CHECK:    [[TMP22:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 4
+// CHECK:    store ptr null, ptr [[TMP22]], align 8
+// CHECK:    [[TMP23:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK:    [[TMP24:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK:    [[TMP25:%.*]] = getelementptr inbounds [5 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 0
+// CHECK:    [[TMP26:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], ptr [[KERNEL_ARGS:%.*]], i32 0, i32 0
+//
+//
+// CHECK-LABEL: define internal void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z28test_same_map_type_differentv_l30(
+// CHECK-SAME: i64 noundef [[TMP0:%.*]], ptr noalias noundef [[DYN_PTR:%.*]]) #[[ATTR2]] {
+// CHECK:  [[ENTRY:.*:]]
+// CHECK:    store i64 [[TMP0]], ptr [[DOTADDR:%.*]], align 8
+// CHECK:    store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR:%.*]], align 8
+// CHECK:    [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_POINT:%.*]], ptr [[DOTADDR]], i32 0, i32 0
+// CHECK:    [[TMP1:%.*]] = load i32, ptr [[X]], align 4
+// CHECK:    [[ADD:%.*]] = add nsw i32 [[TMP1]], 1
+// CHECK:    [[X1:%.*]] = getelementptr inbounds nuw [[STRUCT_POINT]], ptr [[DOTADDR]], i32 0, i32 0
+// CHECK:    store i32 [[ADD]], ptr [[X1]], align 4
+// CHECK:    [[Y:%.*]] = getelementptr inbounds nuw [[STRUCT_POINT]], ptr [[DOTADDR]], i32 0, i32 1
+// CHECK:    [[TMP2:%.*]] = load i32, ptr [[Y]], align 4
+// CHECK:    [[ADD2:%.*]] = add nsw i32 [[TMP2]], 2
+// CHECK:    [[Y3:%.*]] = getelementptr inbounds nuw [[STRUCT_POINT]], ptr [[DOTADDR]], i32 0, i32 1
+// CHECK:    store i32 [[ADD2]], ptr [[Y3]], align 4
+// CHECK:    ret void
+//
+//
+// CHECK-LABEL: define dso_local void @_Z23test_one_binding_mappedv(
+// CHECK-SAME: ) #[[ATTR0]] {
+// CHECK:  [[ENTRY:.*:]]
+// CHECK:    call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[P:%.*]], ptr align 4 @__const._Z23test_one_binding_mappedv.p, i64 8, i1 false)
+// CHECK:    call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP0:%.*]], ptr align 4 [[P]], i64 8, i1 false)
+// CHECK:    [[TMP1:%.*]] = load [[STRUCT_POINT:%.*]], ptr [[TMP0]], align 4
+// CHECK:    store [[STRUCT_POINT]] [[TMP1]], ptr [[DOTCASTED:%.*]], align 4
+// CHECK:    [[TMP2:%.*]] = load i64, ptr [[DOTCASTED]], align 8
+// CHECK:    [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_POINT]], ptr [[P]], i32 0, i32 0
+// CHECK:    [[TMP3:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS:%.*]], i32 0, i32 0
+// CHECK:    store i64 [[TMP2]], ptr [[TMP3]], align 8
+// CHECK:    [[TMP4:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS:%.*]], i32 0, i32 0
+// CHECK:    store i64 [[TMP2]], ptr [[TMP4]], align 8
+// CHECK:    [[TMP5:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_MAPPERS:%.*]], i64 0, i64 0
+// CHECK:    store ptr null, ptr [[TMP5]], align 8
+// CHECK:    [[TMP6:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
+// CHECK:    store ptr [[P]], ptr [[TMP6]], align 8
+// CHECK:    [[TMP7:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 1
+// CHECK:    store ptr [[X]], ptr [[TMP7]], align 8
+// CHECK:    [[TMP8:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1
+// CHECK:    store ptr null, ptr [[TMP8]], align 8
+// CHECK:    [[TMP9:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
+// CHECK:    store ptr null, ptr [[TMP9]], align 8
+// CHECK:    [[TMP10:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 2
+// CHECK:    store ptr null, ptr [[TMP10]], align 8
+// CHECK:    [[TMP11:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2
+// CHECK:    store ptr null, ptr [[TMP11]], align 8
+// CHECK:    [[TMP12:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK:    [[TMP13:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK:    [[TMP14:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], ptr [[KERNEL_ARGS:%.*]], i32 0, i32 0
+//
+//
+// CHECK-LABEL: define internal void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z23test_one_binding_mappedv_l41(
+// CHECK-SAME: i64 noundef [[TMP0:%.*]], ptr noalias noundef [[DYN_PTR:%.*]]) #[[ATTR2]] {
+// CHECK:  [[ENTRY:.*:]]
+// CHECK:    store i64 [[TMP0]], ptr [[DOTADDR:%.*]], align 8
+// CHECK:    store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR:%.*]], align 8
+// CHECK:    [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_POINT:%.*]], ptr [[DOTADDR]], i32 0, i32 0
+// CHECK:    [[TMP1:%.*]] = load i32, ptr [[X]], align 4
+// CHECK:    [[ADD:%.*]] = add nsw i32 [[TMP1]], 1
+// CHECK:    [[X1:%.*]] = getelementptr inbounds nuw [[STRUCT_POINT]], ptr [[DOTADDR]], i32 0, i32 0
+// CHECK:    store i32 [[ADD]], ptr [[X1]], align 4
+// CHECK:    [[Y:%.*]] = getelementptr inbounds nuw [[STRUCT_POINT]], ptr [[DOTADDR]], i32 0, i32 1
+// CHECK:    [[TMP2:%.*]] = load i32, ptr [[Y]], align 4
+// CHECK:    [[ADD2:%.*]] = add nsw i32 [[TMP2]], 2
+// CHECK:    [[Y3:%.*]] = getelementptr inbounds nuw [[STRUCT_POINT]], ptr [[DOTADDR]], i32 0, i32 1
+// CHECK:    store i32 [[ADD2]], ptr [[Y3]], align 4
+// CHECK:    ret void
+//
+//
+// CHECK-LABEL: define dso_local void @_Z30test_mixed_map_types_three_wayv(
+// CHECK-SAME: ) #[[ATTR0]] {
+// CHECK:  [[ENTRY:.*:]]
+// CHECK:    call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[T:%.*]], ptr align 4 @__const._Z30test_mixed_map_types_three_wayv.t, i64 12, i1 false)
+// CHECK:    call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP0:%.*]], ptr align 4 [[T]], i64 12, i1 false)
+// CHECK:    [[TMP1:%.*]] = load [[STRUCT_TRIPLE:%.*]], ptr [[TMP0]], align 4
+// CHECK:    store [[STRUCT_TRIPLE]] [[TMP1]], ptr [[DOTCASTED:%.*]], align 4
+// CHECK:    [[TMP2:%.*]] = load i64, ptr [[DOTCASTED]], align 8
+// CHECK:    [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_TRIPLE]], ptr [[T]], i32 0, i32 0
+// CHECK:    [[Y:%.*]] = getelementptr inbounds nuw [[STRUCT_TRIPLE]], ptr [[T]], i32 0, i32 1
+// CHECK:    [[Z:%.*]] = getelementptr inbounds nuw [[STRUCT_TRIPLE]], ptr [[T]], i32 0, i32 2
+// CHECK:    [[TMP3:%.*]] = getelementptr i32, ptr [[Z]], i32 1
+// CHECK:    [[TMP4:%.*]] = ptrtoaddr ptr [[TMP3]] to i64
+// CHECK:    [[TMP5:%.*]] = ptrtoaddr ptr [[X]] to i64
+// CHECK:    [[TMP6:%.*]] = sub i64 [[TMP4]], [[TMP5]]
+// CHECK:    call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[DOTOFFLOAD_SIZES:%.*]], ptr align 8 @.offload_sizes.5, i64 48, i1 false)
+// CHECK:    [[TMP7:%.*]] = getelementptr inbounds [6 x ptr], ptr [[DOTOFFLOAD_BASEPTRS:%.*]], i32 0, i32 0
+// CHECK:    store i64 [[TMP2]], ptr [[TMP7]], align 8
+// CHECK:    [[TMP8:%.*]] = getelementptr inbounds [6 x ptr], ptr [[DOTOFFLOAD_PTRS:%.*]], i32 0, i32 0
+// CHECK:    store i64 [[TMP2]], ptr [[TMP8]], align 8
+// CHECK:    [[TMP9:%.*]] = getelementptr inbounds [6 x ptr], ptr [[DOTOFFLOAD_MAPPERS:%.*]], i64 0, i64 0
+// CHECK:    store ptr null, ptr [[TMP9]], align 8
+// CHECK:    [[TMP10:%.*]] = getelementptr inbounds [6 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
+// CHECK:    store ptr [[T]], ptr [[TMP10]], align 8
+// CHECK:    [[TMP11:%.*]] = getelementptr inbounds [6 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 1
+// CHECK:    store ptr [[X]], ptr [[TMP11]], align 8
+// CHECK:    [[TMP12:%.*]] = getelementptr inbounds [6 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 1
+// CHECK:    store i64 [[TMP6]], ptr [[TMP12]], align 8
+// CHECK:    [[TMP13:%.*]] = getelementptr inbounds [6 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1
+// CHECK:    store ptr null, ptr [[TMP13]], align 8
+// CHECK:    [[TMP14:%.*]] = getelementptr inbounds [6 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
+// CHECK:    store ptr [[T]], ptr [[TMP14]], align 8
+// CHECK:    [[TMP15:%.*]] = getelementptr inbounds [6 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 2
+// CHECK:    store ptr [[X]], ptr [[TMP15]], align 8
+// CHECK:    [[TMP16:%.*]] = getelementptr inbounds [6 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2
+// CHECK:    store ptr null, ptr [[TMP16]], align 8
+// CHECK:    [[TMP17:%.*]] = getelementptr inbounds [6 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3
+// CHECK:    store ptr [[T]], ptr [[TMP17]], align 8
+// CHECK:    [[TMP18:%.*]] = getelementptr inbounds [6 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 3
+// CHECK:    store ptr [[Y]], ptr [[TMP18]], align 8
+// CHECK:    [[TMP19:%.*]] = getelementptr inbounds [6 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 3
+// CHECK:    store ptr null, ptr [[TMP19]], align 8
+// CHECK:    [[TMP20:%.*]] = getelementptr inbounds [6 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 4
+// CHECK:    store ptr [[T]], ptr [[TMP20]], align 8
+// CHECK:    [[TMP21:%.*]] = getelementptr inbounds [6 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 4
+// CHECK:    store ptr [[Z]], ptr [[TMP21]], align 8
+// CHECK:    [[TMP22:%.*]] = getelementptr inbounds [6 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 4
+// CHECK:    store ptr null, ptr [[TMP22]], align 8
+// CHECK:    [[TMP23:%.*]] = getelementptr inbounds [6 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 5
+// CHECK:    store ptr null, ptr [[TMP23]], align 8
+// CHECK:    [[TMP24:%.*]] = getelementptr inbounds [6 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 5
+// CHECK:    store ptr null, ptr [[TMP24]], align 8
+// CHECK:    [[TMP25:%.*]] = getelementptr inbounds [6 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 5
+// CHECK:    store ptr null, ptr [[TMP25]], align 8
+// CHECK:    [[TMP26:%.*]] = getelementptr inbounds [6 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK:    [[TMP27:%.*]] = getelementptr inbounds [6 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK:    [[TMP28:%.*]] = getelementptr inbounds [6 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 0
+// CHECK:    [[TMP29:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], ptr [[KERNEL_ARGS:%.*]], i32 0, i32 0
+//
+//
+// CHECK-LABEL: define internal void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z30test_mixed_map_types_three_wayv_l53(
+// CHECK-SAME: i64 noundef [[TMP0:%.*]], ptr noalias noundef [[DYN_PTR:%.*]]) #[[ATTR2]] {
+// CHECK:  [[ENTRY:.*:]]
+// CHECK:    store i64 [[TMP0]], ptr [[DOTADDR:%.*]], align 8
+// CHECK:    store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR:%.*]], align 8
+// CHECK:    [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_TRIPLE:%.*]], ptr [[DOTADDR]], i32 0, i32 0
+// CHECK:    [[TMP1:%.*]] = load i32, ptr [[X]], align 4
+// CHECK:    [[ADD:%.*]] = add nsw i32 [[TMP1]], 1
+// CHECK:    [[X1:%.*]] = getelementptr inbounds nuw [[STRUCT_TRIPLE]], ptr [[DOTADDR]], i32 0, i32 0
+// CHECK:    store i32 [[ADD]], ptr [[X1]], align 4
+// CHECK:    [[Y:%.*]] = getelementptr inbounds nuw [[STRUCT_TRIPLE]], ptr [[DOTADDR]], i32 0, i32 1
+// CHECK:    [[TMP2:%.*]] = load i32, ptr [[Y]], align 4
+// CHECK:    [[ADD2:%.*]] = add nsw i32 [[TMP2]], 2
+// CHECK:    [[Y3:%.*]] = getelementptr inbounds nuw [[STRUCT_TRIPLE]], ptr [[DOTADDR]], i32 0, i32 1
+// CHECK:    store i32 [[ADD2]], ptr [[Y3]], align 4
+// CHECK:    [[Z:%.*]] = getelementptr inbounds nuw [[STRUCT_TRIPLE]], ptr [[DOTADDR]], i32 0, i32 2
+// CHECK:    [[TMP3:%.*]] = load i32, ptr [[Z]], align 4
+// CHECK:    [[ADD4:%.*]] = add nsw i32 [[TMP3]], 3
+// CHECK:    [[Z5:%.*]] = getelementptr inbounds nuw [[STRUCT_TRIPLE]], ptr [[DOTADDR]], i32 0, i32 2
+// CHECK:    store i32 [[ADD4]], ptr [[Z5]], align 4
+// CHECK:    ret void
+//

>From 8d5011b399f7a45dbadba89633d84dc39652f351 Mon Sep 17 00:00:00 2001
From: Ammarguellat <zahira.ammarguellat at intel.com>
Date: Fri, 19 Jun 2026 09:39:48 -0700
Subject: [PATCH 43/45] Fix format

---
 clang/include/clang/AST/DeclCXX.h |  4 +--
 clang/lib/Sema/SemaOpenMP.cpp     | 49 ++++++++++++++++---------------
 2 files changed, 27 insertions(+), 26 deletions(-)

diff --git a/clang/include/clang/AST/DeclCXX.h b/clang/include/clang/AST/DeclCXX.h
index 2aa010e617036..641a756db5387 100644
--- a/clang/include/clang/AST/DeclCXX.h
+++ b/clang/include/clang/AST/DeclCXX.h
@@ -4315,8 +4315,8 @@ class DecompositionDecl final
 
   void printName(raw_ostream &OS, const PrintingPolicy &Policy) const override;
 
-  /// If this decomposition was initialized from a variable (e.g., auto [a,b] = p),
-  /// return that variable. Otherwise return nullptr.
+  /// If this decomposition was initialized from a variable (e.g., auto [a,b] =
+  /// p), return that variable. Otherwise return nullptr.
   const VarDecl *getOriginalVar() const;
 
   static bool classof(const Decl *D) { return classofKind(D->getKind()); }
diff --git a/clang/lib/Sema/SemaOpenMP.cpp b/clang/lib/Sema/SemaOpenMP.cpp
index 65543372f66e9..bc7c96322198d 100644
--- a/clang/lib/Sema/SemaOpenMP.cpp
+++ b/clang/lib/Sema/SemaOpenMP.cpp
@@ -4154,22 +4154,22 @@ class DSAAttrChecker final : public StmtVisitor<DSAAttrChecker, void> {
         // map clauses).
         bool AlreadyMapped = Stack->checkMappableExprComponentListsForDecl(
             VD, /*CurrentRegionOnly=*/true, [this](auto StackComponents, auto) {
-          if (SemaRef.LangOpts.OpenMP >= 50)
-            return !StackComponents.empty();
-          // Variable is used if it has been marked as an array, array
-          // section, array shaping or the variable itself.
-          return StackComponents.size() == 1 ||
-                 llvm::all_of(llvm::drop_begin(llvm::reverse(StackComponents)),
-                              [](const auto &MC) {
-                                return MC.getAssociatedDeclaration() ==
-                                           nullptr &&
-                                       (isa<ArraySectionExpr>(
-                                            MC.getAssociatedExpression()) ||
-                                        isa<OMPArrayShapingExpr>(
-                                            MC.getAssociatedExpression()) ||
-                                        isa<ArraySubscriptExpr>(
-                                            MC.getAssociatedExpression()));
-                              });
+              if (SemaRef.LangOpts.OpenMP >= 50)
+                return !StackComponents.empty();
+              // Variable is used if it has been marked as an array, array
+              // section, array shaping or the variable itself.
+              return StackComponents.size() == 1 ||
+                     llvm::all_of(
+                         llvm::drop_begin(llvm::reverse(StackComponents)),
+                         [](const auto &MC) {
+                           return MC.getAssociatedDeclaration() == nullptr &&
+                                  (isa<ArraySectionExpr>(
+                                       MC.getAssociatedExpression()) ||
+                                   isa<OMPArrayShapingExpr>(
+                                       MC.getAssociatedExpression()) ||
+                                   isa<ArraySubscriptExpr>(
+                                       MC.getAssociatedExpression()));
+                         });
             });
 
         // For DecompositionDecls, check if the original variable has been
@@ -22632,7 +22632,8 @@ class MapBaseChecker final : public StmtVisitor<MapBaseChecker, bool> {
       auto *DD = cast<DecompositionDecl>(BD->getDecomposedDecl());
       Expr *BindingExpr = BD->getBinding();
 
-      // Check if the binding is a member expression (struct/class decomposition)
+      // Check if the binding is a member expression (struct/class
+      // decomposition).
       if (auto *ME = dyn_cast_or_null<MemberExpr>(BindingExpr)) {
 
         // Get the original variable that the decomposition was initialized from
@@ -22665,15 +22666,15 @@ class MapBaseChecker final : public StmtVisitor<MapBaseChecker, bool> {
       }
 
       // Fallback: redirect to DecompositionDecl for non-struct bindings
-      // (arrays, tuples)
+      // (arrays, tuples).
       D = DD;
       DeclarationNameInfo NameInfo(D->getDeclName(), DRE->getLocation());
-      E = DeclRefExpr::Create(
-          SemaRef.Context, DRE->getQualifierLoc(),
-          DRE->getTemplateKeywordLoc(), DD,
-          /*RefersToEnclosingVariableOrCapture=*/false, NameInfo,
-          D->getType(), DRE->getValueKind(), DRE->getFoundDecl(),
-          /*TemplateArgs=*/nullptr, DRE->isNonOdrUse());
+      E = DeclRefExpr::Create(SemaRef.Context, DRE->getQualifierLoc(),
+                              DRE->getTemplateKeywordLoc(), DD,
+                              /*RefersToEnclosingVariableOrCapture=*/false,
+                              NameInfo, D->getType(), DRE->getValueKind(),
+                              DRE->getFoundDecl(),
+                              /*TemplateArgs=*/nullptr, DRE->isNonOdrUse());
     }
     // Handle DecompositionDecl directly (implicit captures).
     else if (auto *DD = dyn_cast<DecompositionDecl>(D)) {

>From c3f37dba98212721a59e593c9da7037388682f20 Mon Sep 17 00:00:00 2001
From: Ammarguellat <zahira.ammarguellat at intel.com>
Date: Thu, 25 Jun 2026 08:59:13 -0700
Subject: [PATCH 44/45] Addressed review comments

---
 .../clang/Basic/DiagnosticSemaKinds.td        |   8 +
 clang/lib/CodeGen/CGExpr.cpp                  |  59 +-
 clang/lib/Sema/SemaOpenMP.cpp                 | 143 +++-
 .../OpenMP/structured-bindings-codegen.cpp    | 648 ++++--------------
 .../OpenMP/structured-bindings-messages.cpp   |  83 ++-
 5 files changed, 391 insertions(+), 550 deletions(-)

diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td
index 3df6cf614200e..4f6d0e3c60670 100644
--- a/clang/include/clang/Basic/DiagnosticSemaKinds.td
+++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td
@@ -12684,6 +12684,14 @@ def note_omp_flush_order_clause_here : Note<
   "memory order clause '%0' is specified here">;
 def err_omp_non_lvalue_in_map_or_motion_clauses: Error<
   "expected addressable lvalue in '%0' clause">;
+def err_omp_unsupported_structured_binding_init : Error<
+  "mapping of structured binding initialized from %select{function call|"
+  "initializer list|temporary object|move expression}0 is not supported">;
+def err_omp_bindings_from_same_decomposition_with_different_dsa : Error<
+  "bindings from the same structured binding declaration cannot have "
+  "different data-sharing attributes">;
+def note_omp_previous_dsa_for_binding : Note<
+  "previous binding from the same declaration has '%0' attribute here">;
 def err_omp_var_expected : Error<
   "expected variable of the '%0' type%select{|, not %2}1">;
 def err_omp_non_pointer_type_array_shaping_base : Error<
diff --git a/clang/lib/CodeGen/CGExpr.cpp b/clang/lib/CodeGen/CGExpr.cpp
index 6b349123eaf4f..dc5f11d44b46f 100644
--- a/clang/lib/CodeGen/CGExpr.cpp
+++ b/clang/lib/CodeGen/CGExpr.cpp
@@ -3623,7 +3623,7 @@ LValue CodeGenFunction::EmitOMPCapturedBindingLValue(const BindingDecl *BD) {
         TargetDecl = OrigVar;
     }
   }
-  Expr *BindingExpr = BD->getBinding()->IgnoreImplicit();
+
   // Use getNonReferenceType() because we need the actual object type, not the
   // reference type. DeclRefExpr with VK_LValue requires a non-reference type
   // (AST invariant). EmitDeclRefLValue will load any reference for us.
@@ -3632,35 +3632,36 @@ LValue CodeGenFunction::EmitOMPCapturedBindingLValue(const BindingDecl *BD) {
                   /*RefersToEnclosingVariableOrCapture=*/true, DREType,
                   VK_LValue, SourceLocation());
   LValue BaseLVal = EmitDeclRefLValue(&DRE);
-  QualType CanonType = DREType.getCanonicalType();
-  Address Addr = BaseLVal.getAddress();
-  llvm::Type *ExpectedTy = CGM.getTypes().ConvertTypeForMem(CanonType);
-  if (auto *ASE = dyn_cast<ArraySubscriptExpr>(BindingExpr)) {
-    if (Addr.getElementType() != ExpectedTy)
-      Addr = Addr.withElementType(ExpectedTy);
-
-    Expr::EvalResult Result;
-    [[maybe_unused]] bool Success =
-        ASE->getIdx()->EvaluateAsInt(Result, getContext());
-    assert(Success && "Expected constant integer index for array subscript");
-    uint64_t Idx = Result.Val.getInt().getZExtValue();
-    Address EltAddr = Builder.CreateConstArrayGEP(Addr, Idx);
-    return MakeAddrLValue(EltAddr, BD->getType(), BaseLVal.getBaseInfo(),
-                          CGM.getTBAAInfoForSubobject(BaseLVal, BD->getType()));
-  }
-
-  if (auto *ME = dyn_cast<MemberExpr>(BindingExpr)) {
-    if (Addr.getElementType() != ExpectedTy) {
-      Addr = Addr.withElementType(ExpectedTy);
-      BaseLVal = MakeAddrLValue(Addr, CanonType, BaseLVal.getBaseInfo(),
-                                BaseLVal.getTBAAInfo());
-    }
-    return EmitLValueForField(BaseLVal, cast<FieldDecl>(ME->getMemberDecl()));
-  }
 
-  // Sema ensures tuple-like bindings are rejected earlier, so this path
-  // should never be reached.
-  llvm_unreachable("Unexpected structured binding type in OpenMP");
+  // Ensure the Address has the correct element type for DD's type.
+  // EmitDeclRefLValue might return an address with a different element type
+  // if TargetDecl != DD or if reference unwrapping occurred.
+  Address BaseAddr = BaseLVal.getAddress();
+  QualType DDType = DD->getType();
+  llvm::Type *ExpectedTy = CGM.getTypes().ConvertTypeForMem(DDType);
+  if (BaseAddr.getElementType() != ExpectedTy)
+    BaseAddr = BaseAddr.withElementType(ExpectedTy);
+
+  // Now emit the binding expression (array subscript, member access, etc.)
+  // by temporarily installing the decomposed storage address, then routing
+  // through EmitLValue for the binding expression.
+  Expr *BindingExpr = BD->getBinding();
+  auto DDIt = LocalDeclMap.find(DD);
+  bool DDWasMapped = DDIt != LocalDeclMap.end();
+  Address SavedAddr = DDWasMapped ? DDIt->second : Address::invalid();
+  if (DDWasMapped)
+    DDIt->second = BaseAddr;
+  else
+    LocalDeclMap.insert({DD, BaseAddr});
+  LValue Result = EmitLValue(BindingExpr);
+  if (DDWasMapped) {
+    auto RestoreIt = LocalDeclMap.find(DD);
+    assert(RestoreIt != LocalDeclMap.end() && "DD should still be in map");
+    RestoreIt->second = SavedAddr;
+  } else {
+    LocalDeclMap.erase(DD);
+  }
+  return Result;
 }
 
 LValue CodeGenFunction::EmitDeclRefLValue(const DeclRefExpr *E) {
diff --git a/clang/lib/Sema/SemaOpenMP.cpp b/clang/lib/Sema/SemaOpenMP.cpp
index bc7c96322198d..dd2e58dc0f937 100644
--- a/clang/lib/Sema/SemaOpenMP.cpp
+++ b/clang/lib/Sema/SemaOpenMP.cpp
@@ -177,6 +177,9 @@ class DSAStackTy {
     UsedRefMapTy NontemporalMap;
     MappedExprComponentsTy MappedExprComponents;
     LoopControlVariablesMapTy LCVMap;
+    /// Track DecompositionDecls and their data-sharing attributes to detect
+    /// conflicting clauses on bindings from the same decomposition.
+    llvm::SmallDenseMap<const DecompositionDecl *, DSAInfo, 4> DecompositionDSA;
     DefaultDataSharingAttributes DefaultAttr = DSA_unspecified;
     SourceLocation DefaultAttrLoc;
     DefaultDataSharingVCAttributes DefaultVCAttr = DSA_VC_all;
@@ -567,6 +570,11 @@ class DSAStackTy {
               DeclRefExpr *PrivateCopy = nullptr, unsigned Modifier = 0,
               bool AppliedToPointee = false);
 
+  /// Check if bindings from the same DecompositionDecl have conflicting DSA.
+  /// Returns the conflicting DSAInfo if found, nullptr otherwise.
+  const DSAInfo *hasConflictingDecompositionDSA(const DecompositionDecl *DD,
+                                                OpenMPClauseKind A) const;
+
   /// Adds additional information for the reduction items with the reduction id
   /// represented as an operator.
   void addTaskgroupReductionData(const ValueDecl *D, SourceRange SR,
@@ -1582,9 +1590,43 @@ void DSAStackTy::addDSA(const ValueDecl *D, const Expr *E, OpenMPClauseKind A,
       Data.PrivateCopy = nullptr;
       Data.AppliedToPointee = AppliedToPointee;
     }
+    // Track DecompositionDecls for binding conflict detection.
+    if (const auto *BD = dyn_cast<BindingDecl>(D)) {
+      if (const auto *DD =
+              dyn_cast<DecompositionDecl>(BD->getDecomposedDecl())) {
+        DSAInfo &DDData = getTopOfStack().DecompositionDSA[DD];
+        if (DDData.Attributes == OMPC_unknown) {
+          // First binding from this decomposition.
+          DDData.Attributes = A;
+          DDData.RefExpr.setPointerAndInt(E, IsLastprivate);
+          DDData.Modifier = Modifier;
+        }
+      }
+    }
   }
 }
 
+const DSAStackTy::DSAInfo *
+DSAStackTy::hasConflictingDecompositionDSA(const DecompositionDecl *DD,
+                                           OpenMPClauseKind A) const {
+  if (isStackEmpty())
+    return nullptr;
+
+  auto It = getTopOfStack().DecompositionDSA.find(DD);
+  if (It == getTopOfStack().DecompositionDSA.end())
+    return nullptr;
+
+  const DSAInfo &ExistingDSA = It->second;
+
+  // Check if the new attribute conflicts with the existing one.
+  // Allow firstprivate + lastprivate on the same decomposition.
+  if (ExistingDSA.Attributes != A &&
+      !(A == OMPC_firstprivate && ExistingDSA.Attributes == OMPC_lastprivate) &&
+      !(A == OMPC_lastprivate && ExistingDSA.Attributes == OMPC_firstprivate))
+    return &ExistingDSA;
+  return nullptr;
+}
+
 /// Build a variable declaration for OpenMP loop iteration variable.
 static VarDecl *buildVarDecl(Sema &SemaRef, SourceLocation Loc, QualType Type,
                              StringRef Name, const AttrVec *Attrs = nullptr,
@@ -3887,6 +3929,35 @@ static void reportOriginalDsa(Sema &SemaRef, const DSAStackTy *Stack,
   }
 }
 
+/// Check for conflicting data-sharing attributes on bindings from the same
+/// structured binding declaration. Returns true if a conflict was found and
+/// diagnosed.
+static bool checkDecompositionDSAConflict(Sema &SemaRef, DSAStackTy *Stack,
+                                          const ValueDecl *D,
+                                          SourceLocation ELoc,
+                                          OpenMPClauseKind NewDSA) {
+  const auto *BD = dyn_cast<BindingDecl>(D);
+  if (!BD)
+    return false;
+
+  const auto *DD = dyn_cast<DecompositionDecl>(BD->getDecomposedDecl());
+  if (!DD)
+    return false;
+
+  if (const auto *ConflictDSA =
+          Stack->hasConflictingDecompositionDSA(DD, NewDSA)) {
+    SemaRef.Diag(
+        ELoc,
+        diag::err_omp_bindings_from_same_decomposition_with_different_dsa);
+    if (ConflictDSA->RefExpr.getPointer())
+      SemaRef.Diag(ConflictDSA->RefExpr.getPointer()->getExprLoc(),
+                   diag::note_omp_previous_dsa_for_binding)
+          << getOpenMPClauseName(ConflictDSA->Attributes);
+    return true;
+  }
+  return false;
+}
+
 static OpenMPMapClauseKind
 getMapClauseKindFromModifier(OpenMPDefaultmapClauseModifier M,
                              bool IsAggregateOrDeclareTarget,
@@ -19533,8 +19604,12 @@ OMPClause *SemaOpenMP::ActOnOpenMPPrivateClause(ArrayRef<Expr *> VarList,
       else
         Ref = buildCapture(SemaRef, D, SimpleRefExpr, /*WithInit=*/false);
     }
-    if (!IsImplicitClause)
+    if (!IsImplicitClause) {
+      if (checkDecompositionDSAConflict(SemaRef, DSAStack, D, ELoc,
+                                        OMPC_private))
+        continue;
       DSAStack->addDSA(D, RefExpr->IgnoreParens(), OMPC_private, Ref);
+    }
     Vars.push_back(
         (VD || IsBindingDecl || SemaRef.CurContext->isDependentContext())
             ? RefExpr->IgnoreParens()
@@ -20008,6 +20083,9 @@ OMPClause *SemaOpenMP::ActOnOpenMPLastprivateClause(
             SemaRef.IgnoredValueConversions(PostUpdateRes.get()).get());
       }
     }
+    if (checkDecompositionDSAConflict(SemaRef, DSAStack, D, ELoc,
+                                      OMPC_lastprivate))
+      continue;
     DSAStack->addDSA(D, RefExpr->IgnoreParens(), OMPC_lastprivate, Ref);
     bool IsBindingDecl = isa<BindingDecl>(D);
     Vars.push_back(
@@ -20070,6 +20148,8 @@ OMPClause *SemaOpenMP::ActOnOpenMPSharedClause(ArrayRef<Expr *> VarList,
     if (!VD && isOpenMPCapturedDecl(D) &&
         !SemaRef.CurContext->isDependentContext())
       Ref = buildCapture(SemaRef, D, SimpleRefExpr, /*WithInit=*/true);
+    if (checkDecompositionDSAConflict(SemaRef, DSAStack, D, ELoc, OMPC_shared))
+      continue;
     DSAStack->addDSA(D, RefExpr->IgnoreParens(), OMPC_shared, Ref);
     Vars.push_back((VD || !Ref || SemaRef.CurContext->isDependentContext())
                        ? RefExpr->IgnoreParens()
@@ -21284,6 +21364,8 @@ static bool actOnOMPReductionKindClause(
     // correct analysis of in_reduction clauses.
     if (CurrDir == OMPD_taskgroup && ClauseKind == OMPC_task_reduction)
       Modifier = OMPC_REDUCTION_task;
+    if (checkDecompositionDSAConflict(S, Stack, D, ELoc, OMPC_reduction))
+      continue;
     Stack->addDSA(D, RefExpr->IgnoreParens(), OMPC_reduction, Ref, Modifier,
                   ASE || OASE);
     if (Modifier == OMPC_REDUCTION_task &&
@@ -21541,6 +21623,8 @@ OMPClause *SemaOpenMP::ActOnOpenMPLinearClause(
         /*DirectInit=*/false);
     DeclRefExpr *InitRef = buildDeclRefExpr(SemaRef, Init, Type, ELoc);
 
+    if (checkDecompositionDSAConflict(SemaRef, DSAStack, D, ELoc, OMPC_linear))
+      continue;
     DSAStack->addDSA(D, RefExpr->IgnoreParens(), OMPC_linear, Ref);
     Vars.push_back(
         (VD || IsBindingDecl || SemaRef.CurContext->isDependentContext())
@@ -22662,6 +22746,8 @@ class MapBaseChecker final : public StmtVisitor<MapBaseChecker, bool> {
           // Now process this as a member expression, which will properly
           // handle the field-level mapping
           return Visit(E);
+        } else {
+          return false;
         }
       }
 
@@ -22687,8 +22773,9 @@ class MapBaseChecker final : public StmtVisitor<MapBaseChecker, bool> {
                                 NameInfo, D->getType(), DRE->getValueKind(),
                                 DRE->getFoundDecl(),
                                 /*TemplateArgs=*/nullptr, DRE->isNonOdrUse());
-      }
-      // If we can't find the original, keep the DecompositionDecl as-is.
+        } else {
+          return false;
+        }
     } else if (!isa<VarDecl>(D)) {
       emitErrorMsg();
       return false;
@@ -23668,6 +23755,14 @@ static void checkMappableExpressionList(
 
     Expr *SimpleExpr = RE->IgnoreParenCasts();
 
+    // Skip entries with unnamed decls (can happen with transformed expressions)
+    if (const auto *DRE = dyn_cast<DeclRefExpr>(SimpleExpr)) {
+      if (const auto *D = dyn_cast<NamedDecl>(DRE->getDecl())) {
+        if (!D->getDeclName())
+          continue;
+      }
+    }
+
     if (!RE->isLValue()) {
       if (SemaRef.getLangOpts().OpenMP < 50) {
         SemaRef.Diag(
@@ -23680,6 +23775,48 @@ static void checkMappableExpressionList(
       continue;
     }
 
+    // Check for unsupported structured bindings early.
+    if (!NoDiagnose) { 
+      if (const auto *DRE = dyn_cast<DeclRefExpr>(SimpleExpr)) {
+        const DecompositionDecl *DD = nullptr;
+        if (const auto *BD = dyn_cast<BindingDecl>(DRE->getDecl())) {
+          DD = cast<DecompositionDecl>(BD->getDecomposedDecl());
+        } else if (const auto *D =
+                       dyn_cast<DecompositionDecl>(DRE->getDecl())) {
+          DD = D;
+        }
+        if (DD && !DD->getOriginalVar()) {
+          const Expr *Init = DD->getInit();
+          unsigned DiagKind = 3;
+          if (Init) {
+            const Expr *Stripped = Init->IgnoreParenImpCasts();
+            if (isa<CallExpr>(Stripped)) {
+              DiagKind = 0;
+            } else if (const auto *CCE = dyn_cast<CXXConstructExpr>(Stripped)) {
+              if (CCE->getNumArgs() == 1) {
+                const Expr *Arg = CCE->getArg(0)->IgnoreParenImpCasts();
+                if (isa<CallExpr>(Arg))
+                  DiagKind = 3;
+                else
+                  DiagKind = 2;
+              }
+            } else if (isa<InitListExpr>(Stripped) ||
+                       isa<CXXStdInitializerListExpr>(Stripped)) {
+              DiagKind = 1;
+            } else if (const auto *FCE =
+                           dyn_cast<CXXFunctionalCastExpr>(Stripped)) {
+              if (isa<InitListExpr>(FCE->getSubExpr()->IgnoreParenImpCasts()))
+                DiagKind = 1;
+            } else if (isa<MaterializeTemporaryExpr>(Init) ||
+                       isa<CXXBindTemporaryExpr>(Init))
+              DiagKind = 2;
+          }
+          SemaRef.Diag(ELoc, diag::err_omp_unsupported_structured_binding_init)
+              << DiagKind;
+          continue;
+        }
+      }
+    }
     OMPClauseMappableExprCommon::MappableExprComponentList CurComponents;
     ValueDecl *CurDeclaration = nullptr;
 
diff --git a/clang/test/OpenMP/structured-bindings-codegen.cpp b/clang/test/OpenMP/structured-bindings-codegen.cpp
index bb17764947902..1510c98704886 100644
--- a/clang/test/OpenMP/structured-bindings-codegen.cpp
+++ b/clang/test/OpenMP/structured-bindings-codegen.cpp
@@ -382,15 +382,6 @@ void test_firstprivate_individual_bindings() {
   }
 }
 
-void test_mixed_dsa() {
-  Point p{1, 2};
-  auto [a, b] = p;
-#pragma omp parallel firstprivate(a) shared(b)
-  {
-    int result = a + b;
-  }
-}
-
 void test_static_bindings() {
   static auto [a, b] = Point{1, 2};
 #pragma omp parallel
@@ -496,18 +487,6 @@ void test_lastprivate_binding() {
   use(a);
 }
 
-void test_mixed_linear_private() {
-  Point p{1, 2};
-  auto [a, b] = p;
-
-#pragma omp simd linear(a:2) private(b)
-  for (int i = 0; i < 10; ++i) {
-    a += 2;
-    b = i;
-    use(a + b);
-  }
-}
-
 void test_lastprivate_conditional() {
   Point p{0, 0};
   auto [a, b] = p;
@@ -636,17 +615,6 @@ void test_firstprivate_array_bindings_sb() {
   }
 }
 
-void test_firstprivate_mixed_with_shared_sb() {
-  Pair p{NonTrivialCopy(10), NonTrivialCopy(20)};
-  auto [a, b] = p;
-
-#pragma omp parallel firstprivate(a) shared(b)
-  {
-    (void)a.value;
-    (void)b.value;
-  }
-}
-
 struct Inner {
   int val;
   Inner(int v) : val(v) {}
@@ -764,24 +732,6 @@ void test_lambda_implicit_capture() {
 // CHECK:  [[ENTRY:.*:]]
 // CHECK:    call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[P:%.*]], ptr align 4 @__const._Z24test_target_implicit_mapv.p, i64 8, i1 false)
 // CHECK:    call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP0:%.*]], ptr align 4 [[P]], i64 8, i1 false)
-// CHECK:    [[TMP1:%.*]] = load [[STRUCT_POINT:%.*]], ptr [[TMP0]], align 4
-// CHECK:    store [[STRUCT_POINT]] [[TMP1]], ptr [[DOTCASTED:%.*]], align 4
-// CHECK:    [[TMP2:%.*]] = load i64, ptr [[DOTCASTED]], align 8
-// CHECK:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z24test_target_implicit_mapv_l30(i64 [[TMP2]], ptr null) #[[ATTR3]]
-// CHECK:    ret void
-//
-//
-// CHECK-LABEL: define internal void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z24test_target_implicit_mapv_l30(
-// CHECK-SAME: i64 noundef [[TMP0:%.*]], ptr noalias noundef [[DYN_PTR:%.*]]) #[[ATTR2]] {
-// CHECK:  [[ENTRY:.*:]]
-// CHECK:    store i64 [[TMP0]], ptr [[DOTADDR:%.*]], align 8
-// CHECK:    store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR:%.*]], align 8
-// CHECK:    [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_POINT:%.*]], ptr [[DOTADDR]], i32 0, i32 0
-// CHECK:    [[TMP1:%.*]] = load i32, ptr [[X]], align 4
-// CHECK:    [[Y:%.*]] = getelementptr inbounds nuw [[STRUCT_POINT]], ptr [[DOTADDR]], i32 0, i32 1
-// CHECK:    [[TMP2:%.*]] = load i32, ptr [[Y]], align 4
-// CHECK:    [[ADD:%.*]] = add nsw i32 [[TMP1]], [[TMP2]]
-// CHECK:    store i32 [[ADD]], ptr [[SUM:%.*]], align 4
 // CHECK:    ret void
 //
 //
@@ -790,37 +740,6 @@ void test_lambda_implicit_capture() {
 // CHECK:  [[ENTRY:.*:]]
 // CHECK:    call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[P:%.*]], ptr align 4 @__const._Z20test_target_parallelv.p, i64 8, i1 false)
 // CHECK:    call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP0:%.*]], ptr align 4 [[P]], i64 8, i1 false)
-// CHECK:    [[TMP1:%.*]] = load [[STRUCT_POINT:%.*]], ptr [[TMP0]], align 4
-// CHECK:    store [[STRUCT_POINT]] [[TMP1]], ptr [[DOTCASTED:%.*]], align 4
-// CHECK:    [[TMP2:%.*]] = load i64, ptr [[DOTCASTED]], align 8
-// CHECK:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z20test_target_parallelv_l40(i64 [[TMP2]], ptr null) #[[ATTR3]]
-// CHECK:    ret void
-//
-//
-// CHECK-LABEL: define internal void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z20test_target_parallelv_l40(
-// CHECK-SAME: i64 noundef [[TMP0:%.*]], ptr noalias noundef [[DYN_PTR:%.*]]) #[[ATTR2]] {
-// CHECK:  [[ENTRY:.*:]]
-// CHECK:    store i64 [[TMP0]], ptr [[DOTADDR:%.*]], align 8
-// CHECK:    store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR:%.*]], align 8
-// CHECK:    [[TMP1:%.*]] = load i32, ptr [[DOTADDR]], align 4
-// CHECK:    store i32 [[TMP1]], ptr [[DOTCASTED:%.*]], align 4
-// CHECK:    [[TMP2:%.*]] = load i64, ptr [[DOTCASTED]], align 8
-// CHECK:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1:[0-9]+]], i32 1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z20test_target_parallelv_l40.omp_outlined, i64 [[TMP2]])
-// CHECK:    ret void
-//
-//
-// CHECK-LABEL: define internal void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z20test_target_parallelv_l40.omp_outlined(
-// CHECK-SAME: ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[TMP0:%.*]]) #[[ATTR2]] {
-// CHECK:  [[ENTRY:.*:]]
-// CHECK:    store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR:%.*]], align 8
-// CHECK:    store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR:%.*]], align 8
-// CHECK:    store i64 [[TMP0]], ptr [[DOTADDR:%.*]], align 8
-// CHECK:    [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_POINT:%.*]], ptr [[DOTADDR]], i32 0, i32 0
-// CHECK:    [[TMP1:%.*]] = load i32, ptr [[X]], align 4
-// CHECK:    [[Y:%.*]] = getelementptr inbounds nuw [[STRUCT_POINT]], ptr [[DOTADDR]], i32 0, i32 1
-// CHECK:    [[TMP2:%.*]] = load i32, ptr [[Y]], align 4
-// CHECK:    [[ADD:%.*]] = add nsw i32 [[TMP1]], [[TMP2]]
-// CHECK:    store i32 [[ADD]], ptr [[SUM:%.*]], align 4
 // CHECK:    ret void
 //
 //
@@ -829,82 +748,6 @@ void test_lambda_implicit_capture() {
 // CHECK:  [[ENTRY:.*:]]
 // CHECK:    call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[P:%.*]], ptr align 4 @__const._Z24test_target_parallel_forv.p, i64 8, i1 false)
 // CHECK:    call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP0:%.*]], ptr align 4 [[P]], i64 8, i1 false)
-// CHECK:    [[TMP1:%.*]] = load [[STRUCT_POINT:%.*]], ptr [[TMP0]], align 4
-// CHECK:    store [[STRUCT_POINT]] [[TMP1]], ptr [[DOTCASTED:%.*]], align 4
-// CHECK:    [[TMP2:%.*]] = load i64, ptr [[DOTCASTED]], align 8
-// CHECK:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z24test_target_parallel_forv_l50(i64 [[TMP2]], ptr null) #[[ATTR3]]
-// CHECK:    ret void
-//
-//
-// CHECK-LABEL: define internal void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z24test_target_parallel_forv_l50(
-// CHECK-SAME: i64 noundef [[TMP0:%.*]], ptr noalias noundef [[DYN_PTR:%.*]]) #[[ATTR2]] {
-// CHECK:  [[ENTRY:.*:]]
-// CHECK:    store i64 [[TMP0]], ptr [[DOTADDR:%.*]], align 8
-// CHECK:    store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR:%.*]], align 8
-// CHECK:    [[TMP1:%.*]] = load i32, ptr [[DOTADDR]], align 4
-// CHECK:    store i32 [[TMP1]], ptr [[DOTCASTED:%.*]], align 4
-// CHECK:    [[TMP2:%.*]] = load i64, ptr [[DOTCASTED]], align 8
-// CHECK:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z24test_target_parallel_forv_l50.omp_outlined, i64 [[TMP2]])
-// CHECK:    ret void
-//
-//
-// CHECK-LABEL: define internal void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z24test_target_parallel_forv_l50.omp_outlined(
-// CHECK-SAME: ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[TMP0:%.*]]) #[[ATTR2]] {
-// CHECK:  [[ENTRY:.*:]]
-// CHECK:    store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR:%.*]], align 8
-// CHECK:    store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR:%.*]], align 8
-// CHECK:    store i64 [[TMP0]], ptr [[DOTADDR:%.*]], align 8
-// CHECK:    store i32 0, ptr [[DOTOMP_LB:%.*]], align 4
-// CHECK:    store i32 9, ptr [[DOTOMP_UB:%.*]], align 4
-// CHECK:    store i32 1, ptr [[DOTOMP_STRIDE:%.*]], align 4
-// CHECK:    store i32 0, ptr [[DOTOMP_IS_LAST:%.*]], align 4
-// CHECK:    [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// CHECK:    [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4
-// CHECK:    call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP2]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
-// CHECK:    [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
-// CHECK:    [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9
-// CHECK:    br i1 [[CMP]], label %[[COND_TRUE:.*]], label %[[COND_FALSE:.*]]
-// CHECK:       [[COND_TRUE]]:
-// CHECK:    br label %[[COND_END:.*]]
-// CHECK:       [[COND_FALSE]]:
-// CHECK:    [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
-// CHECK:    br label %[[COND_END]]
-// CHECK:       [[COND_END]]:
-// CHECK:    [[COND:%.*]] = phi i32 [ 9, %[[COND_TRUE]] ], [ [[TMP4]], %[[COND_FALSE]] ]
-// CHECK:    store i32 [[COND]], ptr [[DOTOMP_UB]], align 4
-// CHECK:    [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK:    store i32 [[TMP5]], ptr [[DOTOMP_IV:%.*]], align 4
-// CHECK:    br label %[[OMP_INNER_FOR_COND:.*]]
-// CHECK:       [[OMP_INNER_FOR_COND]]:
-// CHECK:    [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK:    [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
-// CHECK:    [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]]
-// CHECK:    br i1 [[CMP1]], label %[[OMP_INNER_FOR_BODY:.*]], label %[[OMP_INNER_FOR_END:.*]]
-// CHECK:       [[OMP_INNER_FOR_BODY]]:
-// CHECK:    [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK:    [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1
-// CHECK:    [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK:    store i32 [[ADD]], ptr [[I:%.*]], align 4
-// CHECK:    [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_POINT:%.*]], ptr [[DOTADDR]], i32 0, i32 0
-// CHECK:    [[TMP9:%.*]] = load i32, ptr [[X]], align 4
-// CHECK:    [[Y:%.*]] = getelementptr inbounds nuw [[STRUCT_POINT]], ptr [[DOTADDR]], i32 0, i32 1
-// CHECK:    [[TMP10:%.*]] = load i32, ptr [[Y]], align 4
-// CHECK:    [[ADD2:%.*]] = add nsw i32 [[TMP9]], [[TMP10]]
-// CHECK:    [[TMP11:%.*]] = load i32, ptr [[I]], align 4
-// CHECK:    [[ADD3:%.*]] = add nsw i32 [[ADD2]], [[TMP11]]
-// CHECK:    store i32 [[ADD3]], ptr [[RESULT:%.*]], align 4
-// CHECK:    br label %[[OMP_BODY_CONTINUE:.*]]
-// CHECK:       [[OMP_BODY_CONTINUE]]:
-// CHECK:    br label %[[OMP_INNER_FOR_INC:.*]]
-// CHECK:       [[OMP_INNER_FOR_INC]]:
-// CHECK:    [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK:    [[ADD4:%.*]] = add nsw i32 [[TMP12]], 1
-// CHECK:    store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4
-// CHECK:    br label %[[OMP_INNER_FOR_COND]]
-// CHECK:       [[OMP_INNER_FOR_END]]:
-// CHECK:    br label %[[OMP_LOOP_EXIT:.*]]
-// CHECK:       [[OMP_LOOP_EXIT]]:
-// CHECK:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP2]])
 // CHECK:    ret void
 //
 //
@@ -913,7 +756,7 @@ void test_lambda_implicit_capture() {
 // CHECK:  [[ENTRY:.*:]]
 // CHECK:    call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[P:%.*]], ptr align 4 @__const._Z21test_firstprivate_dsav.p, i64 8, i1 false)
 // CHECK:    call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP0:%.*]], ptr align 4 [[P]], i64 8, i1 false)
-// CHECK:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 2, ptr @_Z21test_firstprivate_dsav.omp_outlined, ptr [[TMP0]], ptr [[P]])
+// CHECK:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1:[0-9]+]], i32 2, ptr @_Z21test_firstprivate_dsav.omp_outlined, ptr [[TMP0]], ptr [[P]])
 // CHECK:    ret void
 //
 //
@@ -924,8 +767,8 @@ void test_lambda_implicit_capture() {
 // CHECK:    store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR:%.*]], align 8
 // CHECK:    store ptr [[TMP0]], ptr [[DOTADDR:%.*]], align 8
 // CHECK:    store ptr [[P]], ptr [[P_ADDR:%.*]], align 8
-// CHECK:    [[TMP1:%.*]] = load ptr, ptr [[DOTADDR]], align 8, !nonnull [[META4:![0-9]+]], !align [[META5:![0-9]+]]
-// CHECK:    [[TMP2:%.*]] = load ptr, ptr [[P_ADDR]], align 8, !nonnull [[META4]], !align [[META5]]
+// CHECK:    [[TMP1:%.*]] = load ptr, ptr [[DOTADDR]], align 8, !nonnull [[META2:![0-9]+]], !align [[META3:![0-9]+]]
+// CHECK:    [[TMP2:%.*]] = load ptr, ptr [[P_ADDR]], align 8, !nonnull [[META2]], !align [[META3]]
 // CHECK:    call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[P1:%.*]], ptr align 4 [[TMP2]], i64 8, i1 false)
 // CHECK:    [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_POINT:%.*]], ptr [[P1]], i32 0, i32 0
 // CHECK:    [[TMP3:%.*]] = load i32, ptr [[X]], align 4
@@ -951,7 +794,7 @@ void test_lambda_implicit_capture() {
 // CHECK:    store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR:%.*]], align 8
 // CHECK:    store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR:%.*]], align 8
 // CHECK:    store ptr [[TMP0]], ptr [[DOTADDR:%.*]], align 8
-// CHECK:    [[TMP1:%.*]] = load ptr, ptr [[DOTADDR]], align 8, !nonnull [[META4]], !align [[META5]]
+// CHECK:    [[TMP1:%.*]] = load ptr, ptr [[DOTADDR]], align 8, !nonnull [[META2]], !align [[META3]]
 // CHECK:    [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_POINT:%.*]], ptr [[TMP1]], i32 0, i32 0
 // CHECK:    [[TMP2:%.*]] = load i32, ptr [[X]], align 4
 // CHECK:    [[Y:%.*]] = getelementptr inbounds nuw [[STRUCT_POINT]], ptr [[TMP1]], i32 0, i32 1
@@ -978,8 +821,8 @@ void test_lambda_implicit_capture() {
 // CHECK:    store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR:%.*]], align 8
 // CHECK:    store ptr [[SUM]], ptr [[SUM_ADDR:%.*]], align 8
 // CHECK:    store ptr [[TMP0]], ptr [[DOTADDR:%.*]], align 8
-// CHECK:    [[TMP1:%.*]] = load ptr, ptr [[SUM_ADDR]], align 8, !nonnull [[META4]], !align [[META5]]
-// CHECK:    [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8, !nonnull [[META4]], !align [[META5]]
+// CHECK:    [[TMP1:%.*]] = load ptr, ptr [[SUM_ADDR]], align 8, !nonnull [[META2]], !align [[META3]]
+// CHECK:    [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8, !nonnull [[META2]], !align [[META3]]
 // CHECK:    store i32 0, ptr [[DOTOMP_LB:%.*]], align 4
 // CHECK:    store i32 9, ptr [[DOTOMP_UB:%.*]], align 4
 // CHECK:    store i32 1, ptr [[DOTOMP_STRIDE:%.*]], align 4
@@ -987,7 +830,7 @@ void test_lambda_implicit_capture() {
 // CHECK:    store i32 0, ptr [[SUM1:%.*]], align 4
 // CHECK:    [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
 // CHECK:    [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4
-// CHECK:    call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
+// CHECK:    call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
 // CHECK:    [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
 // CHECK:    [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9
 // CHECK:    br i1 [[CMP]], label %[[COND_TRUE:.*]], label %[[COND_FALSE:.*]]
@@ -1087,7 +930,7 @@ void test_lambda_implicit_capture() {
 // CHECK:    store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR:%.*]], align 8
 // CHECK:    store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR:%.*]], align 8
 // CHECK:    store ptr [[TMP0]], ptr [[DOTADDR:%.*]], align 8
-// CHECK:    [[TMP1:%.*]] = load ptr, ptr [[DOTADDR]], align 8, !nonnull [[META4]], !align [[META5]]
+// CHECK:    [[TMP1:%.*]] = load ptr, ptr [[DOTADDR]], align 8, !nonnull [[META2]], !align [[META3]]
 // CHECK:    store i32 0, ptr [[DOTOMP_LB:%.*]], align 4
 // CHECK:    store i32 9, ptr [[DOTOMP_UB:%.*]], align 4
 // CHECK:    store i32 1, ptr [[DOTOMP_STRIDE:%.*]], align 4
@@ -1157,7 +1000,7 @@ void test_lambda_implicit_capture() {
 // CHECK:    store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR:%.*]], align 8
 // CHECK:    store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR:%.*]], align 8
 // CHECK:    store ptr [[TMP0]], ptr [[DOTADDR:%.*]], align 8
-// CHECK:    [[TMP1:%.*]] = load ptr, ptr [[DOTADDR]], align 8, !nonnull [[META4]], !align [[META5]]
+// CHECK:    [[TMP1:%.*]] = load ptr, ptr [[DOTADDR]], align 8, !nonnull [[META2]], !align [[META3]]
 // CHECK:    store i32 0, ptr [[DOTOMP_LB:%.*]], align 4
 // CHECK:    store i32 9, ptr [[DOTOMP_UB:%.*]], align 4
 // CHECK:    store i32 1, ptr [[DOTOMP_STRIDE:%.*]], align 4
@@ -1224,82 +1067,6 @@ void test_lambda_implicit_capture() {
 // CHECK:  [[ENTRY:.*:]]
 // CHECK:    call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[P:%.*]], ptr align 4 @__const._Z28test_target_teams_distributev.p, i64 8, i1 false)
 // CHECK:    call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP0:%.*]], ptr align 4 [[P]], i64 8, i1 false)
-// CHECK:    [[TMP1:%.*]] = load [[STRUCT_POINT:%.*]], ptr [[TMP0]], align 4
-// CHECK:    store [[STRUCT_POINT]] [[TMP1]], ptr [[DOTCASTED:%.*]], align 4
-// CHECK:    [[TMP2:%.*]] = load i64, ptr [[DOTCASTED]], align 8
-// CHECK:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z28test_target_teams_distributev_l111(i64 [[TMP2]], ptr null) #[[ATTR3]]
-// CHECK:    ret void
-//
-//
-// CHECK-LABEL: define internal void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z28test_target_teams_distributev_l111(
-// CHECK-SAME: i64 noundef [[TMP0:%.*]], ptr noalias noundef [[DYN_PTR:%.*]]) #[[ATTR2]] {
-// CHECK:  [[ENTRY:.*:]]
-// CHECK:    store i64 [[TMP0]], ptr [[DOTADDR:%.*]], align 8
-// CHECK:    store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR:%.*]], align 8
-// CHECK:    [[TMP1:%.*]] = load i32, ptr [[DOTADDR]], align 4
-// CHECK:    store i32 [[TMP1]], ptr [[DOTCASTED:%.*]], align 4
-// CHECK:    [[TMP2:%.*]] = load i64, ptr [[DOTCASTED]], align 8
-// CHECK:    call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z28test_target_teams_distributev_l111.omp_outlined, i64 [[TMP2]])
-// CHECK:    ret void
-//
-//
-// CHECK-LABEL: define internal void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z28test_target_teams_distributev_l111.omp_outlined(
-// CHECK-SAME: ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[TMP0:%.*]]) #[[ATTR2]] {
-// CHECK:  [[ENTRY:.*:]]
-// CHECK:    store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR:%.*]], align 8
-// CHECK:    store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR:%.*]], align 8
-// CHECK:    store i64 [[TMP0]], ptr [[DOTADDR:%.*]], align 8
-// CHECK:    store i32 0, ptr [[DOTOMP_LB:%.*]], align 4
-// CHECK:    store i32 9, ptr [[DOTOMP_UB:%.*]], align 4
-// CHECK:    store i32 1, ptr [[DOTOMP_STRIDE:%.*]], align 4
-// CHECK:    store i32 0, ptr [[DOTOMP_IS_LAST:%.*]], align 4
-// CHECK:    [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// CHECK:    [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4
-// CHECK:    call void @__kmpc_for_static_init_4(ptr @[[GLOB4:[0-9]+]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
-// CHECK:    [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
-// CHECK:    [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9
-// CHECK:    br i1 [[CMP]], label %[[COND_TRUE:.*]], label %[[COND_FALSE:.*]]
-// CHECK:       [[COND_TRUE]]:
-// CHECK:    br label %[[COND_END:.*]]
-// CHECK:       [[COND_FALSE]]:
-// CHECK:    [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
-// CHECK:    br label %[[COND_END]]
-// CHECK:       [[COND_END]]:
-// CHECK:    [[COND:%.*]] = phi i32 [ 9, %[[COND_TRUE]] ], [ [[TMP4]], %[[COND_FALSE]] ]
-// CHECK:    store i32 [[COND]], ptr [[DOTOMP_UB]], align 4
-// CHECK:    [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK:    store i32 [[TMP5]], ptr [[DOTOMP_IV:%.*]], align 4
-// CHECK:    br label %[[OMP_INNER_FOR_COND:.*]]
-// CHECK:       [[OMP_INNER_FOR_COND]]:
-// CHECK:    [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK:    [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
-// CHECK:    [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]]
-// CHECK:    br i1 [[CMP1]], label %[[OMP_INNER_FOR_BODY:.*]], label %[[OMP_INNER_FOR_END:.*]]
-// CHECK:       [[OMP_INNER_FOR_BODY]]:
-// CHECK:    [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK:    [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1
-// CHECK:    [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK:    store i32 [[ADD]], ptr [[I:%.*]], align 4
-// CHECK:    [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_POINT:%.*]], ptr [[DOTADDR]], i32 0, i32 0
-// CHECK:    [[TMP9:%.*]] = load i32, ptr [[X]], align 4
-// CHECK:    [[Y:%.*]] = getelementptr inbounds nuw [[STRUCT_POINT]], ptr [[DOTADDR]], i32 0, i32 1
-// CHECK:    [[TMP10:%.*]] = load i32, ptr [[Y]], align 4
-// CHECK:    [[ADD2:%.*]] = add nsw i32 [[TMP9]], [[TMP10]]
-// CHECK:    [[TMP11:%.*]] = load i32, ptr [[I]], align 4
-// CHECK:    [[ADD3:%.*]] = add nsw i32 [[ADD2]], [[TMP11]]
-// CHECK:    store i32 [[ADD3]], ptr [[RESULT:%.*]], align 4
-// CHECK:    br label %[[OMP_BODY_CONTINUE:.*]]
-// CHECK:       [[OMP_BODY_CONTINUE]]:
-// CHECK:    br label %[[OMP_INNER_FOR_INC:.*]]
-// CHECK:       [[OMP_INNER_FOR_INC]]:
-// CHECK:    [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK:    [[ADD4:%.*]] = add nsw i32 [[TMP12]], 1
-// CHECK:    store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4
-// CHECK:    br label %[[OMP_INNER_FOR_COND]]
-// CHECK:       [[OMP_INNER_FOR_END]]:
-// CHECK:    br label %[[OMP_LOOP_EXIT:.*]]
-// CHECK:       [[OMP_LOOP_EXIT]]:
-// CHECK:    call void @__kmpc_for_static_fini(ptr @[[GLOB4]], i32 [[TMP2]])
 // CHECK:    ret void
 //
 //
@@ -1342,9 +1109,9 @@ void test_lambda_implicit_capture() {
 // CHECK:    store ptr [[TMP3]], ptr [[DOTTASK_T__ADDR_I:%.*]], align 8, !noalias [[META19]]
 // CHECK:    store ptr [[TMP7]], ptr [[__CONTEXT_ADDR_I:%.*]], align 8, !noalias [[META19]]
 // CHECK:    [[TMP8:%.*]] = load ptr, ptr [[__CONTEXT_ADDR_I]], align 8, !noalias [[META19]]
-// CHECK:    [[TMP9:%.*]] = load ptr, ptr [[TMP8]], align 8, !nonnull [[META4]], !align [[META5]]
+// CHECK:    [[TMP9:%.*]] = load ptr, ptr [[TMP8]], align 8, !nonnull [[META2]], !align [[META3]]
 // CHECK:    [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4
-// CHECK:    [[TMP11:%.*]] = load ptr, ptr [[TMP8]], align 8, !nonnull [[META4]], !align [[META5]]
+// CHECK:    [[TMP11:%.*]] = load ptr, ptr [[TMP8]], align 8, !nonnull [[META2]], !align [[META3]]
 // CHECK:    [[Y_I:%.*]] = getelementptr inbounds nuw [[STRUCT_POINT:%.*]], ptr [[TMP11]], i32 0, i32 1
 // CHECK:    [[TMP12:%.*]] = load i32, ptr [[Y_I]], align 4
 // CHECK:    [[ADD_I:%.*]] = add nsw i32 [[TMP10]], [[TMP12]]
@@ -1411,9 +1178,9 @@ void test_lambda_implicit_capture() {
 // CHECK:    store ptr [[TMP3]], ptr [[DOTTASK_T__ADDR_I:%.*]], align 8, !noalias [[META29]]
 // CHECK:    store ptr [[TMP7]], ptr [[__CONTEXT_ADDR_I:%.*]], align 8, !noalias [[META29]]
 // CHECK:    [[TMP8:%.*]] = load ptr, ptr [[__CONTEXT_ADDR_I]], align 8, !noalias [[META29]]
-// CHECK:    [[TMP9:%.*]] = load ptr, ptr [[TMP8]], align 8, !nonnull [[META4]], !align [[META5]]
+// CHECK:    [[TMP9:%.*]] = load ptr, ptr [[TMP8]], align 8, !nonnull [[META2]], !align [[META3]]
 // CHECK:    [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4
-// CHECK:    [[TMP11:%.*]] = load ptr, ptr [[TMP8]], align 8, !nonnull [[META4]], !align [[META5]]
+// CHECK:    [[TMP11:%.*]] = load ptr, ptr [[TMP8]], align 8, !nonnull [[META2]], !align [[META3]]
 // CHECK:    [[Y_I:%.*]] = getelementptr inbounds nuw [[STRUCT_POINT:%.*]], ptr [[TMP11]], i32 0, i32 1
 // CHECK:    [[TMP12:%.*]] = load i32, ptr [[Y_I]], align 4
 // CHECK:    [[ADD_I:%.*]] = add nsw i32 [[TMP10]], [[TMP12]]
@@ -1490,7 +1257,7 @@ void test_lambda_implicit_capture() {
 // CHECK:    [[TMP19:%.*]] = load i64, ptr [[DOTLB__ADDR_I]], align 8, !noalias [[META41]]
 // CHECK:    [[CONV_I:%.*]] = trunc i64 [[TMP19]] to i32
 // CHECK:    store i32 [[CONV_I]], ptr [[DOTOMP_IV_I:%.*]], align 4, !noalias [[META41]]
-// CHECK:    [[TMP20:%.*]] = load ptr, ptr [[TMP18]], align 8, !nonnull [[META4]], !align [[META5]]
+// CHECK:    [[TMP20:%.*]] = load ptr, ptr [[TMP18]], align 8, !nonnull [[META2]], !align [[META3]]
 // CHECK:    br label %[[OMP_INNER_FOR_COND_I:.*]]
 // CHECK:       [[OMP_INNER_FOR_COND_I]]:
 // CHECK:    [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV_I]], align 4, !noalias [[META41]]
@@ -1557,20 +1324,6 @@ void test_lambda_implicit_capture() {
 // CHECK:    ret i32 [[TMP1]]
 //
 //
-// CHECK-LABEL: define linkonce_odr noundef i32 @_Z20test_template_targetI5PointEiT_(
-// CHECK-SAME: i64 [[P_COERCE:%.*]]) #[[ATTR0]] comdat {
-// CHECK:  [[ENTRY:.*:]]
-// CHECK:    store i64 [[P_COERCE]], ptr [[P:%.*]], align 4
-// CHECK:    call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP0:%.*]], ptr align 4 [[P]], i64 8, i1 false)
-// CHECK:    store i32 0, ptr [[RESULT:%.*]], align 4
-// CHECK:    [[TMP1:%.*]] = load [[STRUCT_POINT:%.*]], ptr [[TMP0]], align 4
-// CHECK:    store [[STRUCT_POINT]] [[TMP1]], ptr [[DOTCASTED:%.*]], align 4
-// CHECK:    [[TMP2:%.*]] = load i64, ptr [[DOTCASTED]], align 8
-// CHECK:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z20test_template_targetI5PointEiT__l164(ptr [[RESULT]], i64 [[TMP2]], ptr null) #[[ATTR3]]
-// CHECK:    [[TMP3:%.*]] = load i32, ptr [[RESULT]], align 4
-// CHECK:    ret i32 [[TMP3]]
-//
-//
 // CHECK-LABEL: define linkonce_odr noundef i32 @_Z18test_template_taskI5PointEiT_(
 // CHECK-SAME: i64 [[P_COERCE:%.*]]) #[[ATTR0]] comdat {
 // CHECK:  [[ENTRY:.*:]]
@@ -1615,8 +1368,8 @@ void test_lambda_implicit_capture() {
 // CHECK:    store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR:%.*]], align 8
 // CHECK:    store ptr [[RESULT]], ptr [[RESULT_ADDR:%.*]], align 8
 // CHECK:    store ptr [[TMP0]], ptr [[DOTADDR:%.*]], align 8
-// CHECK:    [[TMP1:%.*]] = load ptr, ptr [[RESULT_ADDR]], align 8, !nonnull [[META4]], !align [[META5]]
-// CHECK:    [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8, !nonnull [[META4]], !align [[META5]]
+// CHECK:    [[TMP1:%.*]] = load ptr, ptr [[RESULT_ADDR]], align 8, !nonnull [[META2]], !align [[META3]]
+// CHECK:    [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8, !nonnull [[META2]], !align [[META3]]
 // CHECK:    store i32 0, ptr [[RESULT1:%.*]], align 4
 // CHECK:    [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_POINT:%.*]], ptr [[TMP2]], i32 0, i32 0
 // CHECK:    [[TMP3:%.*]] = load i32, ptr [[X]], align 4
@@ -1666,22 +1419,6 @@ void test_lambda_implicit_capture() {
 // CHECK:    ret void
 //
 //
-// CHECK-LABEL: define internal void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z20test_template_targetI5PointEiT__l164(
-// CHECK-SAME: ptr noundef nonnull align 4 dereferenceable(4) [[RESULT:%.*]], i64 noundef [[TMP0:%.*]], ptr noalias noundef [[DYN_PTR:%.*]]) #[[ATTR2]] {
-// CHECK:  [[ENTRY:.*:]]
-// CHECK:    store ptr [[RESULT]], ptr [[RESULT_ADDR:%.*]], align 8
-// CHECK:    store i64 [[TMP0]], ptr [[DOTADDR:%.*]], align 8
-// CHECK:    store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR:%.*]], align 8
-// CHECK:    [[TMP1:%.*]] = load ptr, ptr [[RESULT_ADDR]], align 8, !nonnull [[META4]], !align [[META5]]
-// CHECK:    [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_POINT:%.*]], ptr [[DOTADDR]], i32 0, i32 0
-// CHECK:    [[TMP2:%.*]] = load i32, ptr [[X]], align 4
-// CHECK:    [[Y:%.*]] = getelementptr inbounds nuw [[STRUCT_POINT]], ptr [[DOTADDR]], i32 0, i32 1
-// CHECK:    [[TMP3:%.*]] = load i32, ptr [[Y]], align 4
-// CHECK:    [[ADD:%.*]] = add nsw i32 [[TMP2]], [[TMP3]]
-// CHECK:    store i32 [[ADD]], ptr [[TMP1]], align 4
-// CHECK:    ret void
-//
-//
 // CHECK-LABEL: define internal noundef i32 @.omp_task_entry..6(
 // CHECK-SAME: i32 noundef [[TMP0:%.*]], ptr noalias noundef [[TMP1:%.*]]) #[[ATTR4]] {
 // CHECK:  [[ENTRY:.*:]]
@@ -1705,14 +1442,14 @@ void test_lambda_implicit_capture() {
 // CHECK:    store ptr [[TMP7]], ptr [[__CONTEXT_ADDR_I:%.*]], align 8, !noalias [[META51]]
 // CHECK:    [[TMP8:%.*]] = load ptr, ptr [[__CONTEXT_ADDR_I]], align 8, !noalias [[META51]]
 // CHECK:    [[TMP9:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON_6:%.*]], ptr [[TMP8]], i32 0, i32 1
-// CHECK:    [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8, !nonnull [[META4]], !align [[META5]]
+// CHECK:    [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8, !nonnull [[META2]], !align [[META3]]
 // CHECK:    [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4
 // CHECK:    [[TMP12:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON_6]], ptr [[TMP8]], i32 0, i32 1
-// CHECK:    [[TMP13:%.*]] = load ptr, ptr [[TMP12]], align 8, !nonnull [[META4]], !align [[META5]]
+// CHECK:    [[TMP13:%.*]] = load ptr, ptr [[TMP12]], align 8, !nonnull [[META2]], !align [[META3]]
 // CHECK:    [[Y_I:%.*]] = getelementptr inbounds nuw [[STRUCT_POINT:%.*]], ptr [[TMP13]], i32 0, i32 1
 // CHECK:    [[TMP14:%.*]] = load i32, ptr [[Y_I]], align 4
 // CHECK:    [[ADD_I:%.*]] = add nsw i32 [[TMP11]], [[TMP14]]
-// CHECK:    [[TMP15:%.*]] = load ptr, ptr [[TMP8]], align 8, !nonnull [[META4]], !align [[META5]]
+// CHECK:    [[TMP15:%.*]] = load ptr, ptr [[TMP8]], align 8, !nonnull [[META2]], !align [[META3]]
 // CHECK:    store i32 [[ADD_I]], ptr [[TMP15]], align 4
 // CHECK:    ret i32 0
 //
@@ -1724,8 +1461,8 @@ void test_lambda_implicit_capture() {
 // CHECK:    store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR:%.*]], align 8
 // CHECK:    store ptr [[RESULT]], ptr [[RESULT_ADDR:%.*]], align 8
 // CHECK:    store ptr [[TMP0]], ptr [[DOTADDR:%.*]], align 8
-// CHECK:    [[TMP1:%.*]] = load ptr, ptr [[RESULT_ADDR]], align 8, !nonnull [[META4]], !align [[META5]]
-// CHECK:    [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8, !nonnull [[META4]], !align [[META5]]
+// CHECK:    [[TMP1:%.*]] = load ptr, ptr [[RESULT_ADDR]], align 8, !nonnull [[META2]], !align [[META3]]
+// CHECK:    [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8, !nonnull [[META2]], !align [[META3]]
 // CHECK:    store i32 0, ptr [[RESULT1:%.*]], align 4
 // CHECK:    [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_POINT3D:%.*]], ptr [[TMP2]], i32 0, i32 0
 // CHECK:    [[TMP3:%.*]] = load i32, ptr [[X]], align 4
@@ -1858,24 +1595,6 @@ void test_lambda_implicit_capture() {
 // CHECK:    [[ARRAYINIT_DONE:%.*]] = icmp eq i64 [[ARRAYINIT_NEXT]], 2
 // CHECK:    br i1 [[ARRAYINIT_DONE]], label %[[ARRAYINIT_END:.*]], label %[[ARRAYINIT_BODY]]
 // CHECK:       [[ARRAYINIT_END]]:
-// CHECK:    [[TMP3:%.*]] = load [2 x i32], ptr [[TMP0]], align 4
-// CHECK:    store [2 x i32] [[TMP3]], ptr [[DOTCASTED:%.*]], align 4
-// CHECK:    [[TMP4:%.*]] = load i64, ptr [[DOTCASTED]], align 8
-// CHECK:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z17test_array_targetv_l232(i64 [[TMP4]], ptr null) #[[ATTR3]]
-// CHECK:    ret void
-//
-//
-// CHECK-LABEL: define internal void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z17test_array_targetv_l232(
-// CHECK-SAME: i64 noundef [[TMP0:%.*]], ptr noalias noundef [[DYN_PTR:%.*]]) #[[ATTR2]] {
-// CHECK:  [[ENTRY:.*:]]
-// CHECK:    store i64 [[TMP0]], ptr [[DOTADDR:%.*]], align 8
-// CHECK:    store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR:%.*]], align 8
-// CHECK:    [[TMP1:%.*]] = getelementptr inbounds [2 x i32], ptr [[DOTADDR]], i64 0, i64 0
-// CHECK:    [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4
-// CHECK:    [[TMP3:%.*]] = getelementptr inbounds [2 x i32], ptr [[DOTADDR]], i64 0, i64 1
-// CHECK:    [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4
-// CHECK:    [[ADD:%.*]] = add nsw i32 [[TMP2]], [[TMP4]]
-// CHECK:    store i32 [[ADD]], ptr [[SUM:%.*]], align 4
 // CHECK:    ret void
 //
 //
@@ -1929,12 +1648,12 @@ void test_lambda_implicit_capture() {
 // CHECK:    store ptr [[TMP3]], ptr [[DOTTASK_T__ADDR_I:%.*]], align 8, !noalias [[META62]]
 // CHECK:    store ptr [[TMP7]], ptr [[__CONTEXT_ADDR_I:%.*]], align 8, !noalias [[META62]]
 // CHECK:    [[TMP8:%.*]] = load ptr, ptr [[__CONTEXT_ADDR_I]], align 8, !noalias [[META62]]
-// CHECK:    [[TMP9:%.*]] = load ptr, ptr [[TMP8]], align 8, !nonnull [[META4]], !align [[META5]]
+// CHECK:    [[TMP9:%.*]] = load ptr, ptr [[TMP8]], align 8, !nonnull [[META2]], !align [[META3]]
 // CHECK:    [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4
-// CHECK:    [[TMP11:%.*]] = load ptr, ptr [[TMP8]], align 8, !nonnull [[META4]], !align [[META5]]
-// CHECK:    [[TMP12:%.*]] = getelementptr inbounds [2 x i32], ptr [[TMP11]], i64 0, i64 1
-// CHECK:    [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4
-// CHECK:    [[ADD_I:%.*]] = add nsw i32 [[TMP10]], [[TMP13]]
+// CHECK:    [[TMP11:%.*]] = load ptr, ptr [[TMP8]], align 8, !nonnull [[META2]], !align [[META3]]
+// CHECK:    [[ARRAYIDX1_I:%.*]] = getelementptr inbounds [2 x i32], ptr [[TMP11]], i64 0, i64 1
+// CHECK:    [[TMP12:%.*]] = load i32, ptr [[ARRAYIDX1_I]], align 4
+// CHECK:    [[ADD_I:%.*]] = add nsw i32 [[TMP10]], [[TMP12]]
 // CHECK:    store i32 [[ADD_I]], ptr [[SUM_I:%.*]], align 4, !noalias [[META62]]
 // CHECK:    ret i32 0
 //
@@ -1954,7 +1673,7 @@ void test_lambda_implicit_capture() {
 // CHECK:    store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR:%.*]], align 8
 // CHECK:    store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR:%.*]], align 8
 // CHECK:    store ptr [[TMP0]], ptr [[DOTADDR:%.*]], align 8
-// CHECK:    [[TMP1:%.*]] = load ptr, ptr [[DOTADDR]], align 8, !nonnull [[META4]], !align [[META5]]
+// CHECK:    [[TMP1:%.*]] = load ptr, ptr [[DOTADDR]], align 8, !nonnull [[META2]], !align [[META3]]
 // CHECK:    [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
 // CHECK:    [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4
 // CHECK:    call void @__kmpc_critical(ptr @[[GLOB1]], i32 [[TMP3]], ptr @.gomp_critical_user_.var)
@@ -1998,9 +1717,9 @@ void test_lambda_implicit_capture() {
 // CHECK:    store ptr [[TMP3]], ptr [[DOTTASK_T__ADDR_I:%.*]], align 8, !noalias [[META72]]
 // CHECK:    store ptr [[TMP7]], ptr [[__CONTEXT_ADDR_I:%.*]], align 8, !noalias [[META72]]
 // CHECK:    [[TMP8:%.*]] = load ptr, ptr [[__CONTEXT_ADDR_I]], align 8, !noalias [[META72]]
-// CHECK:    [[TMP9:%.*]] = load ptr, ptr [[TMP8]], align 8, !nonnull [[META4]], !align [[META5]]
+// CHECK:    [[TMP9:%.*]] = load ptr, ptr [[TMP8]], align 8, !nonnull [[META2]], !align [[META3]]
 // CHECK:    [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4
-// CHECK:    [[TMP11:%.*]] = load ptr, ptr [[TMP8]], align 8, !nonnull [[META4]], !align [[META5]]
+// CHECK:    [[TMP11:%.*]] = load ptr, ptr [[TMP8]], align 8, !nonnull [[META2]], !align [[META3]]
 // CHECK:    [[Y_I:%.*]] = getelementptr inbounds nuw [[STRUCT_POINT:%.*]], ptr [[TMP11]], i32 0, i32 1
 // CHECK:    [[TMP12:%.*]] = load i32, ptr [[Y_I]], align 4
 // CHECK:    [[MUL_I:%.*]] = mul nsw i32 [[TMP10]], [[TMP12]]
@@ -2013,7 +1732,7 @@ void test_lambda_implicit_capture() {
 // CHECK:  [[ENTRY:.*:]]
 // CHECK:    call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[P:%.*]], ptr align 4 @__const._Z22test_reference_bindingv.p, i64 8, i1 false)
 // CHECK:    store ptr [[P]], ptr [[TMP0:%.*]], align 8
-// CHECK:    [[TMP1:%.*]] = load ptr, ptr [[TMP0]], align 8, !nonnull [[META4]], !align [[META5]]
+// CHECK:    [[TMP1:%.*]] = load ptr, ptr [[TMP0]], align 8, !nonnull [[META2]], !align [[META3]]
 // CHECK:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @_Z22test_reference_bindingv.omp_outlined, ptr [[TMP1]])
 // CHECK:    ret void
 //
@@ -2024,15 +1743,17 @@ void test_lambda_implicit_capture() {
 // CHECK:    store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR:%.*]], align 8
 // CHECK:    store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR:%.*]], align 8
 // CHECK:    store ptr [[TMP0]], ptr [[DOTADDR:%.*]], align 8
-// CHECK:    [[TMP1:%.*]] = load ptr, ptr [[DOTADDR]], align 8, !nonnull [[META4]], !align [[META5]]
+// CHECK:    [[TMP1:%.*]] = load ptr, ptr [[DOTADDR]], align 8, !nonnull [[META2]], !align [[META3]]
 // CHECK:    store ptr [[TMP1]], ptr [[TMP:%.*]], align 8
-// CHECK:    [[TMP2:%.*]] = load ptr, ptr [[TMP]], align 8, !nonnull [[META4]], !align [[META5]]
-// CHECK:    [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_POINT:%.*]], ptr [[TMP2]], i32 0, i32 0
-// CHECK:    [[TMP3:%.*]] = load i32, ptr [[X]], align 4
-// CHECK:    [[TMP4:%.*]] = load ptr, ptr [[TMP]], align 8, !nonnull [[META4]], !align [[META5]]
-// CHECK:    [[Y:%.*]] = getelementptr inbounds nuw [[STRUCT_POINT]], ptr [[TMP4]], i32 0, i32 1
-// CHECK:    [[TMP5:%.*]] = load i32, ptr [[Y]], align 4
-// CHECK:    [[ADD:%.*]] = add nsw i32 [[TMP3]], [[TMP5]]
+// CHECK:    [[TMP2:%.*]] = load ptr, ptr [[TMP]], align 8, !nonnull [[META2]], !align [[META3]]
+// CHECK:    [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 4, !nonnull [[META2]], !align [[META3]]
+// CHECK:    [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_POINT:%.*]], ptr [[TMP3]], i32 0, i32 0
+// CHECK:    [[TMP4:%.*]] = load i32, ptr [[X]], align 4
+// CHECK:    [[TMP5:%.*]] = load ptr, ptr [[TMP]], align 8, !nonnull [[META2]], !align [[META3]]
+// CHECK:    [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4, !nonnull [[META2]], !align [[META3]]
+// CHECK:    [[Y:%.*]] = getelementptr inbounds nuw [[STRUCT_POINT]], ptr [[TMP6]], i32 0, i32 1
+// CHECK:    [[TMP7:%.*]] = load i32, ptr [[Y]], align 4
+// CHECK:    [[ADD:%.*]] = add nsw i32 [[TMP4]], [[TMP7]]
 // CHECK:    store i32 [[ADD]], ptr [[SUM:%.*]], align 4
 // CHECK:    ret void
 //
@@ -2052,7 +1773,7 @@ void test_lambda_implicit_capture() {
 // CHECK:    store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR:%.*]], align 8
 // CHECK:    store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR:%.*]], align 8
 // CHECK:    store ptr [[TMP0]], ptr [[DOTADDR:%.*]], align 8
-// CHECK:    [[TMP1:%.*]] = load ptr, ptr [[DOTADDR]], align 8, !nonnull [[META4]], !align [[META5]]
+// CHECK:    [[TMP1:%.*]] = load ptr, ptr [[DOTADDR]], align 8, !nonnull [[META2]], !align [[META3]]
 // CHECK:    [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_POINT:%.*]], ptr [[TMP1]], i32 0, i32 0
 // CHECK:    [[TMP2:%.*]] = load i32, ptr [[X]], align 4
 // CHECK:    [[Y:%.*]] = getelementptr inbounds nuw [[STRUCT_POINT]], ptr [[TMP1]], i32 0, i32 1
@@ -2080,8 +1801,8 @@ void test_lambda_implicit_capture() {
 // CHECK:    store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR:%.*]], align 8
 // CHECK:    store ptr [[TMP0]], ptr [[DOTADDR:%.*]], align 8
 // CHECK:    store ptr [[TMP1]], ptr [[DOTADDR1:%.*]], align 8
-// CHECK:    [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8, !nonnull [[META4]], !align [[META5]]
-// CHECK:    [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8, !nonnull [[META4]], !align [[META5]]
+// CHECK:    [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8, !nonnull [[META2]], !align [[META3]]
+// CHECK:    [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8, !nonnull [[META2]], !align [[META3]]
 // CHECK:    [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_POINT:%.*]], ptr [[TMP2]], i32 0, i32 0
 // CHECK:    [[TMP4:%.*]] = load i32, ptr [[X]], align 4
 // CHECK:    [[Y:%.*]] = getelementptr inbounds nuw [[STRUCT_POINT]], ptr [[TMP2]], i32 0, i32 1
@@ -2116,9 +1837,9 @@ void test_lambda_implicit_capture() {
 // CHECK:    store ptr [[TMP0]], ptr [[DOTADDR:%.*]], align 8
 // CHECK:    store ptr [[TMP1]], ptr [[DOTADDR1:%.*]], align 8
 // CHECK:    store ptr [[P1]], ptr [[P1_ADDR:%.*]], align 8
-// CHECK:    [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8, !nonnull [[META4]], !align [[META5]]
-// CHECK:    [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8, !nonnull [[META4]], !align [[META5]]
-// CHECK:    [[TMP4:%.*]] = load ptr, ptr [[P1_ADDR]], align 8, !nonnull [[META4]], !align [[META5]]
+// CHECK:    [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8, !nonnull [[META2]], !align [[META3]]
+// CHECK:    [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8, !nonnull [[META2]], !align [[META3]]
+// CHECK:    [[TMP4:%.*]] = load ptr, ptr [[P1_ADDR]], align 8, !nonnull [[META2]], !align [[META3]]
 // CHECK:    call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[P12:%.*]], ptr align 4 [[TMP4]], i64 8, i1 false)
 // CHECK:    [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_POINT:%.*]], ptr [[P12]], i32 0, i32 0
 // CHECK:    [[TMP5:%.*]] = load i32, ptr [[X]], align 4
@@ -2161,16 +1882,16 @@ void test_lambda_implicit_capture() {
 // CHECK:    store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR:%.*]], align 8
 // CHECK:    store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR:%.*]], align 8
 // CHECK:    store ptr [[TMP0]], ptr [[DOTADDR:%.*]], align 8
-// CHECK:    [[TMP1:%.*]] = load ptr, ptr [[DOTADDR]], align 8, !nonnull [[META4]], !align [[META5]]
-// CHECK:    [[TMP2:%.*]] = getelementptr inbounds [3 x i32], ptr [[TMP1]], i64 0, i64 0
-// CHECK:    [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4
-// CHECK:    [[TMP4:%.*]] = getelementptr inbounds [3 x i32], ptr [[TMP1]], i64 0, i64 1
-// CHECK:    [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4
-// CHECK:    [[ADD:%.*]] = add nsw i32 [[TMP3]], [[TMP5]]
-// CHECK:    [[TMP6:%.*]] = getelementptr inbounds [3 x i32], ptr [[TMP1]], i64 0, i64 2
-// CHECK:    [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4
-// CHECK:    [[ADD1:%.*]] = add nsw i32 [[ADD]], [[TMP7]]
-// CHECK:    store i32 [[ADD1]], ptr [[SUM:%.*]], align 4
+// CHECK:    [[TMP1:%.*]] = load ptr, ptr [[DOTADDR]], align 8, !nonnull [[META2]], !align [[META3]]
+// CHECK:    [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x i32], ptr [[TMP1]], i64 0, i64 0
+// CHECK:    [[TMP2:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
+// CHECK:    [[ARRAYIDX1:%.*]] = getelementptr inbounds [3 x i32], ptr [[TMP1]], i64 0, i64 1
+// CHECK:    [[TMP3:%.*]] = load i32, ptr [[ARRAYIDX1]], align 4
+// CHECK:    [[ADD:%.*]] = add nsw i32 [[TMP2]], [[TMP3]]
+// CHECK:    [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x i32], ptr [[TMP1]], i64 0, i64 2
+// CHECK:    [[TMP4:%.*]] = load i32, ptr [[ARRAYIDX2]], align 4
+// CHECK:    [[ADD3:%.*]] = add nsw i32 [[ADD]], [[TMP4]]
+// CHECK:    store i32 [[ADD3]], ptr [[SUM:%.*]], align 4
 // CHECK:    ret void
 //
 //
@@ -2189,7 +1910,7 @@ void test_lambda_implicit_capture() {
 // CHECK:    store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR:%.*]], align 8
 // CHECK:    store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR:%.*]], align 8
 // CHECK:    store ptr [[TMP0]], ptr [[DOTADDR:%.*]], align 8
-// CHECK:    [[TMP1:%.*]] = load ptr, ptr [[DOTADDR]], align 8, !nonnull [[META4]], !align [[META5]]
+// CHECK:    [[TMP1:%.*]] = load ptr, ptr [[DOTADDR]], align 8, !nonnull [[META2]], !align [[META3]]
 // CHECK:    [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
 // CHECK:    [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4
 // CHECK:    [[TMP4:%.*]] = call i32 @__kmpc_single(ptr @[[GLOB1]], i32 [[TMP3]])
@@ -2205,7 +1926,7 @@ void test_lambda_implicit_capture() {
 // CHECK:    call void @__kmpc_end_single(ptr @[[GLOB1]], i32 [[TMP3]])
 // CHECK:    br label %[[OMP_IF_END]]
 // CHECK:       [[OMP_IF_END]]:
-// CHECK:    call void @__kmpc_barrier(ptr @[[GLOB5:[0-9]+]], i32 [[TMP3]])
+// CHECK:    call void @__kmpc_barrier(ptr @[[GLOB4:[0-9]+]], i32 [[TMP3]])
 // CHECK:    ret void
 //
 //
@@ -2224,14 +1945,14 @@ void test_lambda_implicit_capture() {
 // CHECK:    store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR:%.*]], align 8
 // CHECK:    store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR:%.*]], align 8
 // CHECK:    store ptr [[TMP0]], ptr [[DOTADDR:%.*]], align 8
-// CHECK:    [[TMP1:%.*]] = load ptr, ptr [[DOTADDR]], align 8, !nonnull [[META4]], !align [[META5]]
+// CHECK:    [[TMP1:%.*]] = load ptr, ptr [[DOTADDR]], align 8, !nonnull [[META2]], !align [[META3]]
 // CHECK:    store i32 0, ptr [[DOTOMP_SECTIONS_LB_:%.*]], align 4
 // CHECK:    store i32 1, ptr [[DOTOMP_SECTIONS_UB_:%.*]], align 4
 // CHECK:    store i32 1, ptr [[DOTOMP_SECTIONS_ST_:%.*]], align 4
 // CHECK:    store i32 0, ptr [[DOTOMP_SECTIONS_IL_:%.*]], align 4
 // CHECK:    [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
 // CHECK:    [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4
-// CHECK:    call void @__kmpc_for_static_init_4(ptr @[[GLOB6:[0-9]+]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_SECTIONS_IL_]], ptr [[DOTOMP_SECTIONS_LB_]], ptr [[DOTOMP_SECTIONS_UB_]], ptr [[DOTOMP_SECTIONS_ST_]], i32 1, i32 1)
+// CHECK:    call void @__kmpc_for_static_init_4(ptr @[[GLOB5:[0-9]+]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_SECTIONS_IL_]], ptr [[DOTOMP_SECTIONS_LB_]], ptr [[DOTOMP_SECTIONS_UB_]], ptr [[DOTOMP_SECTIONS_ST_]], i32 1, i32 1)
 // CHECK:    [[TMP4:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_UB_]], align 4
 // CHECK:    [[TMP5:%.*]] = icmp slt i32 [[TMP4]], 1
 // CHECK:    [[TMP6:%.*]] = select i1 [[TMP5]], i32 [[TMP4]], i32 1
@@ -2274,7 +1995,7 @@ void test_lambda_implicit_capture() {
 // CHECK:    store i32 [[INC]], ptr [[DOTOMP_SECTIONS_IV_]], align 4
 // CHECK:    br label %[[OMP_INNER_FOR_COND]]
 // CHECK:       [[OMP_INNER_FOR_END]]:
-// CHECK:    call void @__kmpc_for_static_fini(ptr @[[GLOB6]], i32 [[TMP3]])
+// CHECK:    call void @__kmpc_for_static_fini(ptr @[[GLOB5]], i32 [[TMP3]])
 // CHECK:    ret void
 //
 //
@@ -2293,7 +2014,7 @@ void test_lambda_implicit_capture() {
 // CHECK:    store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR:%.*]], align 8
 // CHECK:    store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR:%.*]], align 8
 // CHECK:    store ptr [[TMP0]], ptr [[DOTADDR:%.*]], align 8
-// CHECK:    [[TMP1:%.*]] = load ptr, ptr [[DOTADDR]], align 8, !nonnull [[META4]], !align [[META5]]
+// CHECK:    [[TMP1:%.*]] = load ptr, ptr [[DOTADDR]], align 8, !nonnull [[META2]], !align [[META3]]
 // CHECK:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @_Z20test_nested_parallelv.omp_outlined.omp_outlined, ptr [[TMP1]])
 // CHECK:    ret void
 //
@@ -2304,7 +2025,7 @@ void test_lambda_implicit_capture() {
 // CHECK:    store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR:%.*]], align 8
 // CHECK:    store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR:%.*]], align 8
 // CHECK:    store ptr [[TMP0]], ptr [[DOTADDR:%.*]], align 8
-// CHECK:    [[TMP1:%.*]] = load ptr, ptr [[DOTADDR]], align 8, !nonnull [[META4]], !align [[META5]]
+// CHECK:    [[TMP1:%.*]] = load ptr, ptr [[DOTADDR]], align 8, !nonnull [[META2]], !align [[META3]]
 // CHECK:    [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_POINT:%.*]], ptr [[TMP1]], i32 0, i32 0
 // CHECK:    [[TMP2:%.*]] = load i32, ptr [[X]], align 4
 // CHECK:    [[Y:%.*]] = getelementptr inbounds nuw [[STRUCT_POINT]], ptr [[TMP1]], i32 0, i32 1
@@ -2396,35 +2117,6 @@ void test_lambda_implicit_capture() {
 // CHECK:    ret void
 //
 //
-// CHECK-LABEL: define dso_local void @_Z14test_mixed_dsav(
-// CHECK-SAME: ) #[[ATTR0]] {
-// CHECK:  [[ENTRY:.*:]]
-// CHECK:    call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[P:%.*]], ptr align 4 @__const._Z14test_mixed_dsav.p, i64 8, i1 false)
-// CHECK:    call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP0:%.*]], ptr align 4 [[P]], i64 8, i1 false)
-// CHECK:    [[TMP1:%.*]] = load [[STRUCT_POINT:%.*]], ptr [[TMP0]], align 4
-// CHECK:    store [[STRUCT_POINT]] [[TMP1]], ptr [[DOTCASTED:%.*]], align 4
-// CHECK:    [[TMP2:%.*]] = load i64, ptr [[DOTCASTED]], align 8
-// CHECK:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @_Z14test_mixed_dsav.omp_outlined, i64 [[TMP2]])
-// CHECK:    ret void
-//
-//
-// CHECK-LABEL: define internal void @_Z14test_mixed_dsav.omp_outlined(
-// CHECK-SAME: ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[TMP0:%.*]]) #[[ATTR2]] {
-// CHECK:  [[ENTRY:.*:]]
-// CHECK:    store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR:%.*]], align 8
-// CHECK:    store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR:%.*]], align 8
-// CHECK:    store i64 [[TMP0]], ptr [[DOTADDR:%.*]], align 8
-// CHECK:    [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_POINT:%.*]], ptr [[DOTADDR]], i32 0, i32 0
-// CHECK:    [[TMP1:%.*]] = load i32, ptr [[X]], align 4
-// CHECK:    store i32 [[TMP1]], ptr [[A:%.*]], align 4
-// CHECK:    [[TMP2:%.*]] = load i32, ptr [[A]], align 4
-// CHECK:    [[Y:%.*]] = getelementptr inbounds nuw [[STRUCT_POINT]], ptr [[DOTADDR]], i32 0, i32 1
-// CHECK:    [[TMP3:%.*]] = load i32, ptr [[Y]], align 4
-// CHECK:    [[ADD:%.*]] = add nsw i32 [[TMP2]], [[TMP3]]
-// CHECK:    store i32 [[ADD]], ptr [[RESULT:%.*]], align 4
-// CHECK:    ret void
-//
-//
 // CHECK-LABEL: define dso_local void @_Z20test_static_bindingsv(
 // CHECK-SAME: ) #[[ATTR0]] {
 // CHECK:  [[ENTRY:.*:]]
@@ -2510,7 +2202,7 @@ void test_lambda_implicit_capture() {
 // CHECK:    store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR:%.*]], align 8
 // CHECK:    store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR:%.*]], align 8
 // CHECK:    store ptr [[TMP0]], ptr [[DOTADDR:%.*]], align 8
-// CHECK:    [[TMP1:%.*]] = load ptr, ptr [[DOTADDR]], align 8, !nonnull [[META4]], !align [[META5]]
+// CHECK:    [[TMP1:%.*]] = load ptr, ptr [[DOTADDR]], align 8, !nonnull [[META2]], !align [[META3]]
 // CHECK:    [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_POINT:%.*]], ptr [[TMP1]], i32 0, i32 0
 // CHECK:    [[TMP2:%.*]] = load i32, ptr [[X]], align 4
 // CHECK:    call void @_Z3usei(i32 noundef [[TMP2]])
@@ -2581,7 +2273,7 @@ void test_lambda_implicit_capture() {
 // CHECK:    store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR:%.*]], align 8
 // CHECK:    store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR:%.*]], align 8
 // CHECK:    store ptr [[TMP0]], ptr [[DOTADDR:%.*]], align 8
-// CHECK:    [[TMP1:%.*]] = load ptr, ptr [[DOTADDR]], align 8, !nonnull [[META4]], !align [[META5]]
+// CHECK:    [[TMP1:%.*]] = load ptr, ptr [[DOTADDR]], align 8, !nonnull [[META2]], !align [[META3]]
 // CHECK:    store i32 0, ptr [[DOTOMP_LB:%.*]], align 4
 // CHECK:    store i32 99, ptr [[DOTOMP_UB:%.*]], align 4
 // CHECK:    store i32 1, ptr [[DOTOMP_STRIDE:%.*]], align 4
@@ -2692,7 +2384,7 @@ void test_lambda_implicit_capture() {
 // CHECK:    store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR:%.*]], align 8
 // CHECK:    store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR:%.*]], align 8
 // CHECK:    store ptr [[TMP0]], ptr [[DOTADDR:%.*]], align 8
-// CHECK:    [[TMP1:%.*]] = load ptr, ptr [[DOTADDR]], align 8, !nonnull [[META4]], !align [[META5]]
+// CHECK:    [[TMP1:%.*]] = load ptr, ptr [[DOTADDR]], align 8, !nonnull [[META2]], !align [[META3]]
 // CHECK:    store i32 0, ptr [[DOTOMP_LB:%.*]], align 4
 // CHECK:    store i32 9, ptr [[DOTOMP_UB:%.*]], align 4
 // CHECK:    store i32 1, ptr [[DOTOMP_STRIDE:%.*]], align 4
@@ -2875,7 +2567,7 @@ void test_lambda_implicit_capture() {
 // CHECK:    store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR:%.*]], align 8
 // CHECK:    store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR:%.*]], align 8
 // CHECK:    store ptr [[TMP0]], ptr [[DOTADDR:%.*]], align 8
-// CHECK:    [[TMP1:%.*]] = load ptr, ptr [[DOTADDR]], align 8, !nonnull [[META4]], !align [[META5]]
+// CHECK:    [[TMP1:%.*]] = load ptr, ptr [[DOTADDR]], align 8, !nonnull [[META2]], !align [[META3]]
 // CHECK:    store i32 0, ptr [[DOTOMP_LB:%.*]], align 4
 // CHECK:    store i32 9, ptr [[DOTOMP_UB:%.*]], align 4
 // CHECK:    store i32 1, ptr [[DOTOMP_STRIDE:%.*]], align 4
@@ -2990,7 +2682,7 @@ void test_lambda_implicit_capture() {
 // CHECK:    store ptr [[THIS]], ptr [[THIS_ADDR:%.*]], align 8
 // CHECK:    store ptr [[OTHER]], ptr [[OTHER_ADDR:%.*]], align 8
 // CHECK:    [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8
-// CHECK:    [[TMP0:%.*]] = load ptr, ptr [[OTHER_ADDR]], align 8, !nonnull [[META4]], !align [[META5]]
+// CHECK:    [[TMP0:%.*]] = load ptr, ptr [[OTHER_ADDR]], align 8, !nonnull [[META2]], !align [[META3]]
 // CHECK:    [[VALUE:%.*]] = getelementptr inbounds nuw [[STRUCT_NONTRIVIAL:%.*]], ptr [[TMP0]], i32 0, i32 0
 // CHECK:    [[TMP1:%.*]] = load i32, ptr [[VALUE]], align 4
 // CHECK:    [[VALUE2:%.*]] = getelementptr inbounds nuw [[STRUCT_NONTRIVIAL]], ptr [[THIS1]], i32 0, i32 0
@@ -3080,7 +2772,7 @@ void test_lambda_implicit_capture() {
 // CHECK:    store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR:%.*]], align 8
 // CHECK:    store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR:%.*]], align 8
 // CHECK:    store ptr [[TMP0]], ptr [[DOTADDR:%.*]], align 8
-// CHECK:    [[TMP1:%.*]] = load ptr, ptr [[DOTADDR]], align 8, !nonnull [[META4]], !align [[META5]]
+// CHECK:    [[TMP1:%.*]] = load ptr, ptr [[DOTADDR]], align 8, !nonnull [[META2]], !align [[META3]]
 // CHECK:    store i32 0, ptr [[DOTOMP_LB:%.*]], align 4
 // CHECK:    store i32 9, ptr [[DOTOMP_UB:%.*]], align 4
 // CHECK:    store i32 1, ptr [[DOTOMP_STRIDE:%.*]], align 4
@@ -3139,56 +2831,6 @@ void test_lambda_implicit_capture() {
 // CHECK:    ret void
 //
 //
-// CHECK-LABEL: define dso_local void @_Z25test_mixed_linear_privatev(
-// CHECK-SAME: ) #[[ATTR0]] {
-// CHECK:  [[ENTRY:.*:]]
-// CHECK:    call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[P:%.*]], ptr align 4 @__const._Z25test_mixed_linear_privatev.p, i64 8, i1 false)
-// CHECK:    call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP0:%.*]], ptr align 4 [[P]], i64 8, i1 false)
-// CHECK:    store i32 0, ptr [[DOTOMP_IV:%.*]], align 4
-// CHECK:    [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_POINT:%.*]], ptr [[TMP0]], i32 0, i32 0
-// CHECK:    [[TMP1:%.*]] = load i32, ptr [[X]], align 4
-// CHECK:    store i32 [[TMP1]], ptr [[DOTLINEAR_START:%.*]], align 4
-// CHECK:    br label %[[OMP_INNER_FOR_COND:.*]]
-// CHECK:       [[OMP_INNER_FOR_COND]]:
-// CHECK:    [[TMP2:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP82:![0-9]+]]
-// CHECK:    [[CMP:%.*]] = icmp slt i32 [[TMP2]], 10
-// CHECK:    br i1 [[CMP]], label %[[OMP_INNER_FOR_BODY:.*]], label %[[OMP_INNER_FOR_END:.*]]
-// CHECK:       [[OMP_INNER_FOR_BODY]]:
-// CHECK:    [[TMP3:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP82]]
-// CHECK:    [[MUL:%.*]] = mul nsw i32 [[TMP3]], 1
-// CHECK:    [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK:    store i32 [[ADD]], ptr [[I:%.*]], align 4, !llvm.access.group [[ACC_GRP82]]
-// CHECK:    [[TMP4:%.*]] = load i32, ptr [[DOTLINEAR_START]], align 4, !llvm.access.group [[ACC_GRP82]]
-// CHECK:    [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP82]]
-// CHECK:    [[MUL1:%.*]] = mul nsw i32 [[TMP5]], 2
-// CHECK:    [[ADD2:%.*]] = add nsw i32 [[TMP4]], [[MUL1]]
-// CHECK:    store i32 [[ADD2]], ptr [[A:%.*]], align 4, !llvm.access.group [[ACC_GRP82]]
-// CHECK:    [[TMP6:%.*]] = load i32, ptr [[A]], align 4, !llvm.access.group [[ACC_GRP82]]
-// CHECK:    [[ADD3:%.*]] = add nsw i32 [[TMP6]], 2
-// CHECK:    store i32 [[ADD3]], ptr [[A]], align 4, !llvm.access.group [[ACC_GRP82]]
-// CHECK:    [[TMP7:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP82]]
-// CHECK:    store i32 [[TMP7]], ptr [[B:%.*]], align 4, !llvm.access.group [[ACC_GRP82]]
-// CHECK:    [[TMP8:%.*]] = load i32, ptr [[A]], align 4, !llvm.access.group [[ACC_GRP82]]
-// CHECK:    [[TMP9:%.*]] = load i32, ptr [[B]], align 4, !llvm.access.group [[ACC_GRP82]]
-// CHECK:    [[ADD4:%.*]] = add nsw i32 [[TMP8]], [[TMP9]]
-// CHECK:    call void @_Z3usei(i32 noundef [[ADD4]]), !llvm.access.group [[ACC_GRP82]]
-// CHECK:    br label %[[OMP_BODY_CONTINUE:.*]]
-// CHECK:       [[OMP_BODY_CONTINUE]]:
-// CHECK:    br label %[[OMP_INNER_FOR_INC:.*]]
-// CHECK:       [[OMP_INNER_FOR_INC]]:
-// CHECK:    [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP82]]
-// CHECK:    [[ADD5:%.*]] = add nsw i32 [[TMP10]], 1
-// CHECK:    store i32 [[ADD5]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP82]]
-// CHECK:    br label %[[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP83:![0-9]+]]
-// CHECK:       [[OMP_INNER_FOR_END]]:
-// CHECK:    store i32 10, ptr [[I]], align 4
-// CHECK:    [[X6:%.*]] = getelementptr inbounds nuw [[STRUCT_POINT]], ptr [[TMP0]], i32 0, i32 0
-// CHECK:    [[TMP11:%.*]] = load i32, ptr [[A]], align 4
-// CHECK:    [[X7:%.*]] = getelementptr inbounds nuw [[STRUCT_POINT]], ptr [[TMP0]], i32 0, i32 0
-// CHECK:    store i32 [[TMP11]], ptr [[X7]], align 4
-// CHECK:    ret void
-//
-//
 // CHECK-LABEL: define dso_local void @_Z28test_lastprivate_conditionalv(
 // CHECK-SAME: ) #[[ATTR0]] {
 // CHECK:  [[ENTRY:.*:]]
@@ -3209,7 +2851,7 @@ void test_lambda_implicit_capture() {
 // CHECK:    store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR:%.*]], align 8
 // CHECK:    store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR:%.*]], align 8
 // CHECK:    store ptr [[TMP0]], ptr [[DOTADDR:%.*]], align 8
-// CHECK:    [[TMP1:%.*]] = load ptr, ptr [[DOTADDR]], align 8, !nonnull [[META4]], !align [[META5]]
+// CHECK:    [[TMP1:%.*]] = load ptr, ptr [[DOTADDR]], align 8, !nonnull [[META2]], !align [[META3]]
 // CHECK:    store i32 0, ptr [[DOTOMP_LB:%.*]], align 4
 // CHECK:    store i32 99, ptr [[DOTOMP_UB:%.*]], align 4
 // CHECK:    store i32 1, ptr [[DOTOMP_STRIDE:%.*]], align 4
@@ -3277,7 +2919,7 @@ void test_lambda_implicit_capture() {
 // CHECK:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP3]])
 // CHECK:    [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK:    [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0
-// CHECK:    call void @__kmpc_barrier(ptr @[[GLOB7:[0-9]+]], i32 [[TMP3]])
+// CHECK:    call void @__kmpc_barrier(ptr @[[GLOB6:[0-9]+]], i32 [[TMP3]])
 // CHECK:    br i1 [[TMP18]], [[DOTOMP_LASTPRIVATE_THEN:label %.*]], [[DOTOMP_LASTPRIVATE_DONE:label %.*]]
 // CHECK:       [[_OMP_LASTPRIVATE_THEN:.*:]]
 // CHECK:    [[TMP19:%.*]] = load i32, ptr [[A]], align 4
@@ -3308,7 +2950,7 @@ void test_lambda_implicit_capture() {
 // CHECK:    store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR:%.*]], align 8
 // CHECK:    store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR:%.*]], align 8
 // CHECK:    store ptr [[TMP0]], ptr [[DOTADDR:%.*]], align 8
-// CHECK:    [[TMP1:%.*]] = load ptr, ptr [[DOTADDR]], align 8, !nonnull [[META4]], !align [[META5]]
+// CHECK:    [[TMP1:%.*]] = load ptr, ptr [[DOTADDR]], align 8, !nonnull [[META2]], !align [[META3]]
 // CHECK:    store i32 0, ptr [[DOTOMP_LB:%.*]], align 4
 // CHECK:    store i32 99, ptr [[DOTOMP_UB:%.*]], align 4
 // CHECK:    store i32 1, ptr [[DOTOMP_STRIDE:%.*]], align 4
@@ -3481,7 +3123,7 @@ void test_lambda_implicit_capture() {
 // CHECK:    store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR:%.*]], align 8
 // CHECK:    store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR:%.*]], align 8
 // CHECK:    store ptr [[TMP0]], ptr [[DOTADDR:%.*]], align 8
-// CHECK:    [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8, !nonnull [[META4]], !align [[META5]]
+// CHECK:    [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8, !nonnull [[META2]], !align [[META3]]
 // CHECK:    [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_POINT:%.*]], ptr [[TMP2]], i32 0, i32 0
 // CHECK:    store i32 0, ptr [[A:%.*]], align 4
 // CHECK:    call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[P2:%.*]], ptr align 4 @"__const.<captured>.p2", i64 8, i1 false)
@@ -3577,7 +3219,7 @@ void test_lambda_implicit_capture() {
 // CHECK:    store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR:%.*]], align 8
 // CHECK:    store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR:%.*]], align 8
 // CHECK:    store ptr [[TMP0]], ptr [[DOTADDR:%.*]], align 8
-// CHECK:    [[TMP1:%.*]] = load ptr, ptr [[DOTADDR]], align 8, !nonnull [[META4]], !align [[META5]]
+// CHECK:    [[TMP1:%.*]] = load ptr, ptr [[DOTADDR]], align 8, !nonnull [[META2]], !align [[META3]]
 // CHECK:    [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_PAIR:%.*]], ptr [[TMP1]], i32 0, i32 0
 // CHECK:    call void @_ZN14NonTrivialCopyC1ERKS_(ptr noundef nonnull align 4 dereferenceable(8) [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[X]])
 // CHECK:    [[VALUE:%.*]] = getelementptr inbounds nuw [[STRUCT_NONTRIVIALCOPY:%.*]], ptr [[A]], i32 0, i32 0
@@ -3635,11 +3277,11 @@ void test_lambda_implicit_capture() {
 // CHECK:    store ptr [[TMP0]], ptr [[DOTADDR:%.*]], align 8
 // CHECK:    [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8
 // CHECK:    [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_PAIR:%.*]], ptr [[THIS1]], i32 0, i32 0
-// CHECK:    [[TMP1:%.*]] = load ptr, ptr [[DOTADDR]], align 8, !nonnull [[META4]], !align [[META5]]
+// CHECK:    [[TMP1:%.*]] = load ptr, ptr [[DOTADDR]], align 8, !nonnull [[META2]], !align [[META3]]
 // CHECK:    [[X2:%.*]] = getelementptr inbounds nuw [[STRUCT_PAIR]], ptr [[TMP1]], i32 0, i32 0
 // CHECK:    call void @_ZN14NonTrivialCopyC1ERKS_(ptr noundef nonnull align 4 dereferenceable(8) [[X]], ptr noundef nonnull align 4 dereferenceable(8) [[X2]])
 // CHECK:    [[Y:%.*]] = getelementptr inbounds nuw [[STRUCT_PAIR]], ptr [[THIS1]], i32 0, i32 1
-// CHECK:    [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8, !nonnull [[META4]], !align [[META5]]
+// CHECK:    [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8, !nonnull [[META2]], !align [[META3]]
 // CHECK:    [[Y3:%.*]] = getelementptr inbounds nuw [[STRUCT_PAIR]], ptr [[TMP2]], i32 0, i32 1
 // CHECK:    call void @_ZN14NonTrivialCopyC1ERKS_(ptr noundef nonnull align 4 dereferenceable(8) [[Y]], ptr noundef nonnull align 4 dereferenceable(8) [[Y3]])
 // CHECK:    ret void
@@ -3652,12 +3294,12 @@ void test_lambda_implicit_capture() {
 // CHECK:    store ptr [[OTHER]], ptr [[OTHER_ADDR:%.*]], align 8
 // CHECK:    [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8
 // CHECK:    [[VALUE:%.*]] = getelementptr inbounds nuw [[STRUCT_NONTRIVIALCOPY:%.*]], ptr [[THIS1]], i32 0, i32 0
-// CHECK:    [[TMP0:%.*]] = load ptr, ptr [[OTHER_ADDR]], align 8, !nonnull [[META4]], !align [[META5]]
+// CHECK:    [[TMP0:%.*]] = load ptr, ptr [[OTHER_ADDR]], align 8, !nonnull [[META2]], !align [[META3]]
 // CHECK:    [[VALUE2:%.*]] = getelementptr inbounds nuw [[STRUCT_NONTRIVIALCOPY]], ptr [[TMP0]], i32 0, i32 0
 // CHECK:    [[TMP1:%.*]] = load i32, ptr [[VALUE2]], align 4
 // CHECK:    store i32 [[TMP1]], ptr [[VALUE]], align 4
 // CHECK:    [[COPY_COUNT:%.*]] = getelementptr inbounds nuw [[STRUCT_NONTRIVIALCOPY]], ptr [[THIS1]], i32 0, i32 1
-// CHECK:    [[TMP2:%.*]] = load ptr, ptr [[OTHER_ADDR]], align 8, !nonnull [[META4]], !align [[META5]]
+// CHECK:    [[TMP2:%.*]] = load ptr, ptr [[OTHER_ADDR]], align 8, !nonnull [[META2]], !align [[META3]]
 // CHECK:    [[COPY_COUNT3:%.*]] = getelementptr inbounds nuw [[STRUCT_NONTRIVIALCOPY]], ptr [[TMP2]], i32 0, i32 1
 // CHECK:    [[TMP3:%.*]] = load i32, ptr [[COPY_COUNT3]], align 4
 // CHECK:    [[ADD:%.*]] = add nsw i32 [[TMP3]], 1
@@ -3693,7 +3335,7 @@ void test_lambda_implicit_capture() {
 // CHECK:    [[Y:%.*]] = getelementptr inbounds nuw [[STRUCT_PAIR]], ptr [[P]], i32 0, i32 1
 // CHECK:    call void @_ZN14NonTrivialCopyC1Ei(ptr noundef nonnull align 4 dereferenceable(8) [[Y]], i32 noundef 20)
 // CHECK:    store ptr [[P]], ptr [[TMP0:%.*]], align 8
-// CHECK:    [[TMP1:%.*]] = load ptr, ptr [[TMP0]], align 8, !nonnull [[META4]], !align [[META5]]
+// CHECK:    [[TMP1:%.*]] = load ptr, ptr [[TMP0]], align 8, !nonnull [[META2]], !align [[META3]]
 // CHECK:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @_Z32test_firstprivate_ref_binding_sbv.omp_outlined, ptr [[TMP1]])
 // CHECK:    call void @_ZN4PairD1Ev(ptr noundef nonnull align 4 dead_on_return(16) dereferenceable(16) [[P]]) #[[ATTR3]]
 // CHECK:    ret void
@@ -3705,10 +3347,11 @@ void test_lambda_implicit_capture() {
 // CHECK:    store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR:%.*]], align 8
 // CHECK:    store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR:%.*]], align 8
 // CHECK:    store ptr [[TMP0]], ptr [[DOTADDR:%.*]], align 8
-// CHECK:    [[TMP1:%.*]] = load ptr, ptr [[DOTADDR]], align 8, !nonnull [[META4]], !align [[META5]]
+// CHECK:    [[TMP1:%.*]] = load ptr, ptr [[DOTADDR]], align 8, !nonnull [[META2]], !align [[META3]]
 // CHECK:    store ptr [[TMP1]], ptr [[TMP:%.*]], align 8
-// CHECK:    [[TMP2:%.*]] = load ptr, ptr [[TMP]], align 8, !nonnull [[META4]], !align [[META5]]
-// CHECK:    [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_PAIR:%.*]], ptr [[TMP2]], i32 0, i32 0
+// CHECK:    [[TMP2:%.*]] = load ptr, ptr [[TMP]], align 8, !nonnull [[META2]], !align [[META3]]
+// CHECK:    [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 4, !nonnull [[META2]], !align [[META3]]
+// CHECK:    [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_PAIR:%.*]], ptr [[TMP3]], i32 0, i32 0
 // CHECK:    call void @_ZN14NonTrivialCopyC1ERKS_(ptr noundef nonnull align 4 dereferenceable(8) [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[X]])
 // CHECK:    [[VALUE:%.*]] = getelementptr inbounds nuw [[STRUCT_NONTRIVIALCOPY:%.*]], ptr [[A]], i32 0, i32 0
 // CHECK:    call void @_ZN14NonTrivialCopyD1Ev(ptr noundef nonnull align 4 dead_on_return(8) dereferenceable(8) [[A]]) #[[ATTR3]]
@@ -3723,7 +3366,7 @@ void test_lambda_implicit_capture() {
 // CHECK:    [[Y:%.*]] = getelementptr inbounds nuw [[STRUCT_PAIR]], ptr [[P]], i32 0, i32 1
 // CHECK:    call void @_ZN14NonTrivialCopyC1Ei(ptr noundef nonnull align 4 dereferenceable(8) [[Y]], i32 noundef 20)
 // CHECK:    store ptr [[P]], ptr [[TMP0:%.*]], align 8
-// CHECK:    [[TMP1:%.*]] = load ptr, ptr [[TMP0]], align 8, !nonnull [[META4]], !align [[META5]]
+// CHECK:    [[TMP1:%.*]] = load ptr, ptr [[TMP0]], align 8, !nonnull [[META2]], !align [[META3]]
 // CHECK:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @_Z38test_firstprivate_const_ref_binding_sbv.omp_outlined, ptr [[TMP1]])
 // CHECK:    call void @_ZN4PairD1Ev(ptr noundef nonnull align 4 dead_on_return(16) dereferenceable(16) [[P]]) #[[ATTR3]]
 // CHECK:    ret void
@@ -3735,10 +3378,11 @@ void test_lambda_implicit_capture() {
 // CHECK:    store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR:%.*]], align 8
 // CHECK:    store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR:%.*]], align 8
 // CHECK:    store ptr [[TMP0]], ptr [[DOTADDR:%.*]], align 8
-// CHECK:    [[TMP1:%.*]] = load ptr, ptr [[DOTADDR]], align 8, !nonnull [[META4]], !align [[META5]]
+// CHECK:    [[TMP1:%.*]] = load ptr, ptr [[DOTADDR]], align 8, !nonnull [[META2]], !align [[META3]]
 // CHECK:    store ptr [[TMP1]], ptr [[TMP:%.*]], align 8
-// CHECK:    [[TMP2:%.*]] = load ptr, ptr [[TMP]], align 8, !nonnull [[META4]], !align [[META5]]
-// CHECK:    [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_PAIR:%.*]], ptr [[TMP2]], i32 0, i32 0
+// CHECK:    [[TMP2:%.*]] = load ptr, ptr [[TMP]], align 8, !nonnull [[META2]], !align [[META3]]
+// CHECK:    [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 4, !nonnull [[META2]], !align [[META3]]
+// CHECK:    [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_PAIR:%.*]], ptr [[TMP3]], i32 0, i32 0
 // CHECK:    call void @_ZN14NonTrivialCopyC1ERKS_(ptr noundef nonnull align 4 dereferenceable(8) [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[X]])
 // CHECK:    [[VALUE:%.*]] = getelementptr inbounds nuw [[STRUCT_NONTRIVIALCOPY:%.*]], ptr [[A]], i32 0, i32 0
 // CHECK:    call void @_ZN14NonTrivialCopyD1Ev(ptr noundef nonnull align 4 dead_on_return(8) dereferenceable(8) [[A]]) #[[ATTR3]]
@@ -3765,7 +3409,7 @@ void test_lambda_implicit_capture() {
 // CHECK:    store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR:%.*]], align 8
 // CHECK:    store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR:%.*]], align 8
 // CHECK:    store ptr [[TMP0]], ptr [[DOTADDR:%.*]], align 8
-// CHECK:    [[TMP1:%.*]] = load ptr, ptr [[DOTADDR]], align 8, !nonnull [[META4]], !align [[META5]]
+// CHECK:    [[TMP1:%.*]] = load ptr, ptr [[DOTADDR]], align 8, !nonnull [[META2]], !align [[META3]]
 // CHECK:    [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_PAIR:%.*]], ptr [[TMP1]], i32 0, i32 0
 // CHECK:    call void @_ZN14NonTrivialCopyC1ERKS_(ptr noundef nonnull align 4 dereferenceable(8) [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[X]])
 // CHECK:    [[Y:%.*]] = getelementptr inbounds nuw [[STRUCT_PAIR]], ptr [[TMP1]], i32 0, i32 1
@@ -3822,7 +3466,7 @@ void test_lambda_implicit_capture() {
 // CHECK:    store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR:%.*]], align 8
 // CHECK:    store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR:%.*]], align 8
 // CHECK:    store ptr [[TMP0]], ptr [[DOTADDR:%.*]], align 8
-// CHECK:    [[TMP1:%.*]] = load ptr, ptr [[DOTADDR]], align 8, !nonnull [[META4]], !align [[META85:![0-9]+]]
+// CHECK:    [[TMP1:%.*]] = load ptr, ptr [[DOTADDR]], align 8, !nonnull [[META2]], !align [[META82:![0-9]+]]
 // CHECK:    [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_PAIRWITHDTOR:%.*]], ptr [[TMP1]], i32 0, i32 0
 // CHECK:    call void @_ZN8WithDtorC1ERKS_(ptr noundef nonnull align 8 dereferenceable(8) [[A:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[X]])
 // CHECK:    [[PTR:%.*]] = getelementptr inbounds nuw [[STRUCT_WITHDTOR:%.*]], ptr [[A]], i32 0, i32 0
@@ -3881,11 +3525,11 @@ void test_lambda_implicit_capture() {
 // CHECK:    store ptr [[TMP0]], ptr [[DOTADDR:%.*]], align 8
 // CHECK:    [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8
 // CHECK:    [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_PAIRWITHDTOR:%.*]], ptr [[THIS1]], i32 0, i32 0
-// CHECK:    [[TMP1:%.*]] = load ptr, ptr [[DOTADDR]], align 8, !nonnull [[META4]], !align [[META85]]
+// CHECK:    [[TMP1:%.*]] = load ptr, ptr [[DOTADDR]], align 8, !nonnull [[META2]], !align [[META82]]
 // CHECK:    [[X2:%.*]] = getelementptr inbounds nuw [[STRUCT_PAIRWITHDTOR]], ptr [[TMP1]], i32 0, i32 0
 // CHECK:    call void @_ZN8WithDtorC1ERKS_(ptr noundef nonnull align 8 dereferenceable(8) [[X]], ptr noundef nonnull align 8 dereferenceable(8) [[X2]])
 // CHECK:    [[Y:%.*]] = getelementptr inbounds nuw [[STRUCT_PAIRWITHDTOR]], ptr [[THIS1]], i32 0, i32 1
-// CHECK:    [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8, !nonnull [[META4]], !align [[META85]]
+// CHECK:    [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8, !nonnull [[META2]], !align [[META82]]
 // CHECK:    [[Y3:%.*]] = getelementptr inbounds nuw [[STRUCT_PAIRWITHDTOR]], ptr [[TMP2]], i32 0, i32 1
 // CHECK:    call void @_ZN8WithDtorC1ERKS_(ptr noundef nonnull align 8 dereferenceable(8) [[Y]], ptr noundef nonnull align 8 dereferenceable(8) [[Y3]])
 // CHECK:    ret void
@@ -3899,7 +3543,7 @@ void test_lambda_implicit_capture() {
 // CHECK:    [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8
 // CHECK:    [[PTR:%.*]] = getelementptr inbounds nuw [[STRUCT_WITHDTOR:%.*]], ptr [[THIS1]], i32 0, i32 0
 // CHECK:    [[CALL:%.*]] = call noalias noundef nonnull ptr @_Znwm(i64 noundef 4) #[[ATTR11]]
-// CHECK:    [[TMP0:%.*]] = load ptr, ptr [[OTHER_ADDR]], align 8, !nonnull [[META4]], !align [[META85]]
+// CHECK:    [[TMP0:%.*]] = load ptr, ptr [[OTHER_ADDR]], align 8, !nonnull [[META2]], !align [[META82]]
 // CHECK:    [[PTR2:%.*]] = getelementptr inbounds nuw [[STRUCT_WITHDTOR]], ptr [[TMP0]], i32 0, i32 0
 // CHECK:    [[TMP1:%.*]] = load ptr, ptr [[PTR2]], align 8
 // CHECK:    [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4
@@ -3985,57 +3629,27 @@ void test_lambda_implicit_capture() {
 // CHECK:    store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR:%.*]], align 8
 // CHECK:    store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR:%.*]], align 8
 // CHECK:    store ptr [[TMP0]], ptr [[DOTADDR:%.*]], align 8
-// CHECK:    [[TMP1:%.*]] = load ptr, ptr [[DOTADDR]], align 8, !nonnull [[META4]], !align [[META5]]
-// CHECK:    [[TMP2:%.*]] = getelementptr inbounds [3 x [[STRUCT_NONTRIVIALCOPY:%.*]]], ptr [[TMP1]], i64 0, i64 0
-// CHECK:    call void @_ZN14NonTrivialCopyC1ERKS_(ptr noundef nonnull align 4 dereferenceable(8) [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[TMP2]])
-// CHECK:    [[TMP3:%.*]] = getelementptr inbounds [3 x [[STRUCT_NONTRIVIALCOPY]]], ptr [[TMP1]], i64 0, i64 1
-// CHECK:    call void @_ZN14NonTrivialCopyC1ERKS_(ptr noundef nonnull align 4 dereferenceable(8) [[B:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[TMP3]])
-// CHECK:    [[TMP4:%.*]] = getelementptr inbounds [3 x [[STRUCT_NONTRIVIALCOPY]]], ptr [[TMP1]], i64 0, i64 2
-// CHECK:    call void @_ZN14NonTrivialCopyC1ERKS_(ptr noundef nonnull align 4 dereferenceable(8) [[C:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[TMP4]])
+// CHECK:    [[TMP1:%.*]] = load ptr, ptr [[DOTADDR]], align 8, !nonnull [[META2]], !align [[META3]]
+// CHECK:    [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x [[STRUCT_NONTRIVIALCOPY:%.*]]], ptr [[TMP1]], i64 0, i64 0
+// CHECK:    call void @_ZN14NonTrivialCopyC1ERKS_(ptr noundef nonnull align 4 dereferenceable(8) [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[ARRAYIDX]])
+// CHECK:    [[ARRAYIDX1:%.*]] = getelementptr inbounds [3 x [[STRUCT_NONTRIVIALCOPY]]], ptr [[TMP1]], i64 0, i64 1
+// CHECK:    call void @_ZN14NonTrivialCopyC1ERKS_(ptr noundef nonnull align 4 dereferenceable(8) [[B:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[ARRAYIDX1]])
+// CHECK:    [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x [[STRUCT_NONTRIVIALCOPY]]], ptr [[TMP1]], i64 0, i64 2
+// CHECK:    call void @_ZN14NonTrivialCopyC1ERKS_(ptr noundef nonnull align 4 dereferenceable(8) [[C:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[ARRAYIDX2]])
 // CHECK:    [[VALUE:%.*]] = getelementptr inbounds nuw [[STRUCT_NONTRIVIALCOPY]], ptr [[A]], i32 0, i32 0
-// CHECK:    [[TMP5:%.*]] = load i32, ptr [[VALUE]], align 4
-// CHECK:    [[VALUE1:%.*]] = getelementptr inbounds nuw [[STRUCT_NONTRIVIALCOPY]], ptr [[B]], i32 0, i32 0
-// CHECK:    [[TMP6:%.*]] = load i32, ptr [[VALUE1]], align 4
-// CHECK:    [[ADD:%.*]] = add nsw i32 [[TMP5]], [[TMP6]]
-// CHECK:    [[VALUE2:%.*]] = getelementptr inbounds nuw [[STRUCT_NONTRIVIALCOPY]], ptr [[C]], i32 0, i32 0
-// CHECK:    [[TMP7:%.*]] = load i32, ptr [[VALUE2]], align 4
-// CHECK:    [[ADD3:%.*]] = add nsw i32 [[ADD]], [[TMP7]]
+// CHECK:    [[TMP2:%.*]] = load i32, ptr [[VALUE]], align 4
+// CHECK:    [[VALUE3:%.*]] = getelementptr inbounds nuw [[STRUCT_NONTRIVIALCOPY]], ptr [[B]], i32 0, i32 0
+// CHECK:    [[TMP3:%.*]] = load i32, ptr [[VALUE3]], align 4
+// CHECK:    [[ADD:%.*]] = add nsw i32 [[TMP2]], [[TMP3]]
+// CHECK:    [[VALUE4:%.*]] = getelementptr inbounds nuw [[STRUCT_NONTRIVIALCOPY]], ptr [[C]], i32 0, i32 0
+// CHECK:    [[TMP4:%.*]] = load i32, ptr [[VALUE4]], align 4
+// CHECK:    [[ADD5:%.*]] = add nsw i32 [[ADD]], [[TMP4]]
 // CHECK:    call void @_ZN14NonTrivialCopyD1Ev(ptr noundef nonnull align 4 dead_on_return(8) dereferenceable(8) [[C]]) #[[ATTR3]]
 // CHECK:    call void @_ZN14NonTrivialCopyD1Ev(ptr noundef nonnull align 4 dead_on_return(8) dereferenceable(8) [[B]]) #[[ATTR3]]
 // CHECK:    call void @_ZN14NonTrivialCopyD1Ev(ptr noundef nonnull align 4 dead_on_return(8) dereferenceable(8) [[A]]) #[[ATTR3]]
 // CHECK:    ret void
 //
 //
-// CHECK-LABEL: define dso_local void @_Z38test_firstprivate_mixed_with_shared_sbv(
-// CHECK-SAME: ) #[[ATTR0]] {
-// CHECK:  [[ENTRY:.*:]]
-// CHECK:    [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_PAIR:%.*]], ptr [[P:%.*]], i32 0, i32 0
-// CHECK:    call void @_ZN14NonTrivialCopyC1Ei(ptr noundef nonnull align 4 dereferenceable(8) [[X]], i32 noundef 10)
-// CHECK:    [[Y:%.*]] = getelementptr inbounds nuw [[STRUCT_PAIR]], ptr [[P]], i32 0, i32 1
-// CHECK:    call void @_ZN14NonTrivialCopyC1Ei(ptr noundef nonnull align 4 dereferenceable(8) [[Y]], i32 noundef 20)
-// CHECK:    call void @_ZN4PairC1ERKS_(ptr noundef nonnull align 4 dereferenceable(16) [[TMP0:%.*]], ptr noundef nonnull align 4 dereferenceable(16) [[P]])
-// CHECK:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @_Z38test_firstprivate_mixed_with_shared_sbv.omp_outlined, ptr [[TMP0]])
-// CHECK:    call void @_ZN4PairD1Ev(ptr noundef nonnull align 4 dead_on_return(16) dereferenceable(16) [[TMP0]]) #[[ATTR3]]
-// CHECK:    call void @_ZN4PairD1Ev(ptr noundef nonnull align 4 dead_on_return(16) dereferenceable(16) [[P]]) #[[ATTR3]]
-// CHECK:    ret void
-//
-//
-// CHECK-LABEL: define internal void @_Z38test_firstprivate_mixed_with_shared_sbv.omp_outlined(
-// CHECK-SAME: ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[TMP0:%.*]]) #[[ATTR2]] {
-// CHECK:  [[ENTRY:.*:]]
-// CHECK:    store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR:%.*]], align 8
-// CHECK:    store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR:%.*]], align 8
-// CHECK:    store ptr [[TMP0]], ptr [[DOTADDR:%.*]], align 8
-// CHECK:    [[TMP1:%.*]] = load ptr, ptr [[DOTADDR]], align 8, !nonnull [[META4]], !align [[META5]]
-// CHECK:    [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_PAIR:%.*]], ptr [[TMP1]], i32 0, i32 0
-// CHECK:    call void @_ZN14NonTrivialCopyC1ERKS_(ptr noundef nonnull align 4 dereferenceable(8) [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[X]])
-// CHECK:    [[VALUE:%.*]] = getelementptr inbounds nuw [[STRUCT_NONTRIVIALCOPY:%.*]], ptr [[A]], i32 0, i32 0
-// CHECK:    [[Y:%.*]] = getelementptr inbounds nuw [[STRUCT_PAIR]], ptr [[TMP1]], i32 0, i32 1
-// CHECK:    [[VALUE1:%.*]] = getelementptr inbounds nuw [[STRUCT_NONTRIVIALCOPY]], ptr [[Y]], i32 0, i32 0
-// CHECK:    call void @_ZN14NonTrivialCopyD1Ev(ptr noundef nonnull align 4 dead_on_return(8) dereferenceable(8) [[A]]) #[[ATTR3]]
-// CHECK:    ret void
-//
-//
 // CHECK-LABEL: define dso_local void @_Z34test_firstprivate_nested_struct_sbv(
 // CHECK-SAME: ) #[[ATTR0]] {
 // CHECK:  [[ENTRY:.*:]]
@@ -4078,7 +3692,7 @@ void test_lambda_implicit_capture() {
 // CHECK:    store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR:%.*]], align 8
 // CHECK:    store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR:%.*]], align 8
 // CHECK:    store ptr [[TMP0]], ptr [[DOTADDR:%.*]], align 8
-// CHECK:    [[TMP1:%.*]] = load ptr, ptr [[DOTADDR]], align 8, !nonnull [[META4]], !align [[META5]]
+// CHECK:    [[TMP1:%.*]] = load ptr, ptr [[DOTADDR]], align 8, !nonnull [[META2]], !align [[META3]]
 // CHECK:    [[I1:%.*]] = getelementptr inbounds nuw [[STRUCT_OUTER:%.*]], ptr [[TMP1]], i32 0, i32 0
 // CHECK:    call void @_ZN5InnerC1ERKS_(ptr noundef nonnull align 4 dereferenceable(4) [[X:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[I1]])
 // CHECK:    [[VAL:%.*]] = getelementptr inbounds nuw [[STRUCT_INNER:%.*]], ptr [[X]], i32 0, i32 0
@@ -4134,11 +3748,11 @@ void test_lambda_implicit_capture() {
 // CHECK:    store ptr [[TMP0]], ptr [[DOTADDR:%.*]], align 8
 // CHECK:    [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8
 // CHECK:    [[I1:%.*]] = getelementptr inbounds nuw [[STRUCT_OUTER:%.*]], ptr [[THIS1]], i32 0, i32 0
-// CHECK:    [[TMP1:%.*]] = load ptr, ptr [[DOTADDR]], align 8, !nonnull [[META4]], !align [[META5]]
+// CHECK:    [[TMP1:%.*]] = load ptr, ptr [[DOTADDR]], align 8, !nonnull [[META2]], !align [[META3]]
 // CHECK:    [[I12:%.*]] = getelementptr inbounds nuw [[STRUCT_OUTER]], ptr [[TMP1]], i32 0, i32 0
 // CHECK:    call void @_ZN5InnerC1ERKS_(ptr noundef nonnull align 4 dereferenceable(4) [[I1]], ptr noundef nonnull align 4 dereferenceable(4) [[I12]])
 // CHECK:    [[I2:%.*]] = getelementptr inbounds nuw [[STRUCT_OUTER]], ptr [[THIS1]], i32 0, i32 1
-// CHECK:    [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8, !nonnull [[META4]], !align [[META5]]
+// CHECK:    [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8, !nonnull [[META2]], !align [[META3]]
 // CHECK:    [[I23:%.*]] = getelementptr inbounds nuw [[STRUCT_OUTER]], ptr [[TMP2]], i32 0, i32 1
 // CHECK:    call void @_ZN5InnerC1ERKS_(ptr noundef nonnull align 4 dereferenceable(4) [[I2]], ptr noundef nonnull align 4 dereferenceable(4) [[I23]])
 // CHECK:    ret void
@@ -4151,7 +3765,7 @@ void test_lambda_implicit_capture() {
 // CHECK:    store ptr [[O]], ptr [[O_ADDR:%.*]], align 8
 // CHECK:    [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8
 // CHECK:    [[VAL:%.*]] = getelementptr inbounds nuw [[STRUCT_INNER:%.*]], ptr [[THIS1]], i32 0, i32 0
-// CHECK:    [[TMP0:%.*]] = load ptr, ptr [[O_ADDR]], align 8, !nonnull [[META4]], !align [[META5]]
+// CHECK:    [[TMP0:%.*]] = load ptr, ptr [[O_ADDR]], align 8, !nonnull [[META2]], !align [[META3]]
 // CHECK:    [[VAL2:%.*]] = getelementptr inbounds nuw [[STRUCT_INNER]], ptr [[TMP0]], i32 0, i32 0
 // CHECK:    [[TMP1:%.*]] = load i32, ptr [[VAL2]], align 4
 // CHECK:    [[MUL:%.*]] = mul nsw i32 [[TMP1]], 2
@@ -4187,7 +3801,7 @@ void test_lambda_implicit_capture() {
 // CHECK:    [[Y:%.*]] = getelementptr inbounds nuw [[STRUCT_PAIR]], ptr [[P]], i32 0, i32 1
 // CHECK:    call void @_ZN14NonTrivialCopyC1Ei(ptr noundef nonnull align 4 dereferenceable(8) [[Y]], i32 noundef 20)
 // CHECK:    store ptr [[P]], ptr [[TMP0:%.*]], align 8
-// CHECK:    [[TMP1:%.*]] = load ptr, ptr [[TMP0]], align 8, !nonnull [[META4]], !align [[META5]]
+// CHECK:    [[TMP1:%.*]] = load ptr, ptr [[TMP0]], align 8, !nonnull [[META2]], !align [[META3]]
 // CHECK:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @_Z46test_firstprivate_ref_binding_both_bindings_sbv.omp_outlined, ptr [[TMP1]])
 // CHECK:    call void @_ZN4PairD1Ev(ptr noundef nonnull align 4 dead_on_return(16) dereferenceable(16) [[P]]) #[[ATTR3]]
 // CHECK:    ret void
@@ -4199,19 +3813,21 @@ void test_lambda_implicit_capture() {
 // CHECK:    store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR:%.*]], align 8
 // CHECK:    store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR:%.*]], align 8
 // CHECK:    store ptr [[TMP0]], ptr [[DOTADDR:%.*]], align 8
-// CHECK:    [[TMP1:%.*]] = load ptr, ptr [[DOTADDR]], align 8, !nonnull [[META4]], !align [[META5]]
+// CHECK:    [[TMP1:%.*]] = load ptr, ptr [[DOTADDR]], align 8, !nonnull [[META2]], !align [[META3]]
 // CHECK:    store ptr [[TMP1]], ptr [[TMP:%.*]], align 8
-// CHECK:    [[TMP2:%.*]] = load ptr, ptr [[TMP]], align 8, !nonnull [[META4]], !align [[META5]]
-// CHECK:    [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_PAIR:%.*]], ptr [[TMP2]], i32 0, i32 0
+// CHECK:    [[TMP2:%.*]] = load ptr, ptr [[TMP]], align 8, !nonnull [[META2]], !align [[META3]]
+// CHECK:    [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 4, !nonnull [[META2]], !align [[META3]]
+// CHECK:    [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_PAIR:%.*]], ptr [[TMP3]], i32 0, i32 0
 // CHECK:    call void @_ZN14NonTrivialCopyC1ERKS_(ptr noundef nonnull align 4 dereferenceable(8) [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[X]])
-// CHECK:    [[TMP3:%.*]] = load ptr, ptr [[TMP]], align 8, !nonnull [[META4]], !align [[META5]]
-// CHECK:    [[Y:%.*]] = getelementptr inbounds nuw [[STRUCT_PAIR]], ptr [[TMP3]], i32 0, i32 1
+// CHECK:    [[TMP4:%.*]] = load ptr, ptr [[TMP]], align 8, !nonnull [[META2]], !align [[META3]]
+// CHECK:    [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 4, !nonnull [[META2]], !align [[META3]]
+// CHECK:    [[Y:%.*]] = getelementptr inbounds nuw [[STRUCT_PAIR]], ptr [[TMP5]], i32 0, i32 1
 // CHECK:    call void @_ZN14NonTrivialCopyC1ERKS_(ptr noundef nonnull align 4 dereferenceable(8) [[B:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[Y]])
 // CHECK:    [[VALUE:%.*]] = getelementptr inbounds nuw [[STRUCT_NONTRIVIALCOPY:%.*]], ptr [[A]], i32 0, i32 0
-// CHECK:    [[TMP4:%.*]] = load i32, ptr [[VALUE]], align 4
+// CHECK:    [[TMP6:%.*]] = load i32, ptr [[VALUE]], align 4
 // CHECK:    [[VALUE1:%.*]] = getelementptr inbounds nuw [[STRUCT_NONTRIVIALCOPY]], ptr [[B]], i32 0, i32 0
-// CHECK:    [[TMP5:%.*]] = load i32, ptr [[VALUE1]], align 4
-// CHECK:    [[ADD:%.*]] = add nsw i32 [[TMP4]], [[TMP5]]
+// CHECK:    [[TMP7:%.*]] = load i32, ptr [[VALUE1]], align 4
+// CHECK:    [[ADD:%.*]] = add nsw i32 [[TMP6]], [[TMP7]]
 // CHECK:    call void @_ZN14NonTrivialCopyD1Ev(ptr noundef nonnull align 4 dead_on_return(8) dereferenceable(8) [[B]]) #[[ATTR3]]
 // CHECK:    call void @_ZN14NonTrivialCopyD1Ev(ptr noundef nonnull align 4 dead_on_return(8) dereferenceable(8) [[A]]) #[[ATTR3]]
 // CHECK:    ret void
@@ -4225,7 +3841,7 @@ void test_lambda_implicit_capture() {
 // CHECK:    [[Y:%.*]] = getelementptr inbounds nuw [[STRUCT_PAIR]], ptr [[P]], i32 0, i32 1
 // CHECK:    call void @_ZN14NonTrivialCopyC1Ei(ptr noundef nonnull align 4 dereferenceable(8) [[Y]], i32 noundef 20)
 // CHECK:    store ptr [[P]], ptr [[TMP0:%.*]], align 8
-// CHECK:    [[TMP1:%.*]] = load ptr, ptr [[TMP0]], align 8, !nonnull [[META4]], !align [[META5]]
+// CHECK:    [[TMP1:%.*]] = load ptr, ptr [[TMP0]], align 8, !nonnull [[META2]], !align [[META3]]
 // CHECK:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @_Z52test_firstprivate_const_ref_binding_both_bindings_sbv.omp_outlined, ptr [[TMP1]])
 // CHECK:    call void @_ZN4PairD1Ev(ptr noundef nonnull align 4 dead_on_return(16) dereferenceable(16) [[P]]) #[[ATTR3]]
 // CHECK:    ret void
@@ -4237,19 +3853,21 @@ void test_lambda_implicit_capture() {
 // CHECK:    store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR:%.*]], align 8
 // CHECK:    store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR:%.*]], align 8
 // CHECK:    store ptr [[TMP0]], ptr [[DOTADDR:%.*]], align 8
-// CHECK:    [[TMP1:%.*]] = load ptr, ptr [[DOTADDR]], align 8, !nonnull [[META4]], !align [[META5]]
+// CHECK:    [[TMP1:%.*]] = load ptr, ptr [[DOTADDR]], align 8, !nonnull [[META2]], !align [[META3]]
 // CHECK:    store ptr [[TMP1]], ptr [[TMP:%.*]], align 8
-// CHECK:    [[TMP2:%.*]] = load ptr, ptr [[TMP]], align 8, !nonnull [[META4]], !align [[META5]]
-// CHECK:    [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_PAIR:%.*]], ptr [[TMP2]], i32 0, i32 0
+// CHECK:    [[TMP2:%.*]] = load ptr, ptr [[TMP]], align 8, !nonnull [[META2]], !align [[META3]]
+// CHECK:    [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 4, !nonnull [[META2]], !align [[META3]]
+// CHECK:    [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_PAIR:%.*]], ptr [[TMP3]], i32 0, i32 0
 // CHECK:    call void @_ZN14NonTrivialCopyC1ERKS_(ptr noundef nonnull align 4 dereferenceable(8) [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[X]])
-// CHECK:    [[TMP3:%.*]] = load ptr, ptr [[TMP]], align 8, !nonnull [[META4]], !align [[META5]]
-// CHECK:    [[Y:%.*]] = getelementptr inbounds nuw [[STRUCT_PAIR]], ptr [[TMP3]], i32 0, i32 1
+// CHECK:    [[TMP4:%.*]] = load ptr, ptr [[TMP]], align 8, !nonnull [[META2]], !align [[META3]]
+// CHECK:    [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 4, !nonnull [[META2]], !align [[META3]]
+// CHECK:    [[Y:%.*]] = getelementptr inbounds nuw [[STRUCT_PAIR]], ptr [[TMP5]], i32 0, i32 1
 // CHECK:    call void @_ZN14NonTrivialCopyC1ERKS_(ptr noundef nonnull align 4 dereferenceable(8) [[B:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[Y]])
 // CHECK:    [[VALUE:%.*]] = getelementptr inbounds nuw [[STRUCT_NONTRIVIALCOPY:%.*]], ptr [[A]], i32 0, i32 0
-// CHECK:    [[TMP4:%.*]] = load i32, ptr [[VALUE]], align 4
+// CHECK:    [[TMP6:%.*]] = load i32, ptr [[VALUE]], align 4
 // CHECK:    [[VALUE1:%.*]] = getelementptr inbounds nuw [[STRUCT_NONTRIVIALCOPY]], ptr [[B]], i32 0, i32 0
-// CHECK:    [[TMP5:%.*]] = load i32, ptr [[VALUE1]], align 4
-// CHECK:    [[ADD:%.*]] = add nsw i32 [[TMP4]], [[TMP5]]
+// CHECK:    [[TMP7:%.*]] = load i32, ptr [[VALUE1]], align 4
+// CHECK:    [[ADD:%.*]] = add nsw i32 [[TMP6]], [[TMP7]]
 // CHECK:    call void @_ZN14NonTrivialCopyD1Ev(ptr noundef nonnull align 4 dead_on_return(8) dereferenceable(8) [[B]]) #[[ATTR3]]
 // CHECK:    call void @_ZN14NonTrivialCopyD1Ev(ptr noundef nonnull align 4 dead_on_return(8) dereferenceable(8) [[A]]) #[[ATTR3]]
 // CHECK:    ret void
diff --git a/clang/test/OpenMP/structured-bindings-messages.cpp b/clang/test/OpenMP/structured-bindings-messages.cpp
index ad3103654c94f..69480c1d6711c 100644
--- a/clang/test/OpenMP/structured-bindings-messages.cpp
+++ b/clang/test/OpenMP/structured-bindings-messages.cpp
@@ -3,8 +3,14 @@
 
 namespace std {
   typedef unsigned long size_t;
+  
+  // move.
+  template<typename T>
+  constexpr T&& move(T& t) noexcept {
+    return static_cast<T&&>(t);
+  }
 
-  // pair
+  // pair.
   template <typename T1, typename T2>
   struct pair {
     T1 first;
@@ -16,7 +22,7 @@ namespace std {
     return {a, b};
   }
 
-  // tuple
+  // tuple.
   template <typename... Ts>
   struct tuple;
 
@@ -93,7 +99,7 @@ namespace std {
     else return get<I-1>(static_cast<tuple<Ts...>&&>(t.tail));
   }
 
-  // array
+  // array.
   template <typename T, size_t N>
   struct array {
     T data[N];
@@ -157,3 +163,74 @@ void test_array() {
     // expected-error at -2{{capturing tuple-like structured binding 'q' is not yet supported in OpenMP}}
   }
 }
+
+struct Point {
+  int x, y;
+};
+
+Point make_point() { return {1, 2}; }
+
+void test_function_call() {
+  auto [a, b] = make_point();
+  // expected-error at +1{{mapping of structured binding initialized from function call is not supported}}
+#pragma omp target map(a)
+  {
+    a++;
+  }
+}
+
+void test_brace_init() {
+  auto [a, b] = Point{1, 2};
+  // expected-error at +1{{mapping of structured binding initialized from initializer list is not supported}}
+#pragma omp target map(a)
+  {
+    a++;
+  }
+}
+
+void test_move() {
+  Point p{1, 2};
+  auto [a, b] = std::move(p);
+  // expected-error at +1{{mapping of structured binding initialized from move expression is not supported}}
+#pragma omp target map(a)
+  { 
+    a++;
+  }
+}
+
+void test_conflicting_clauses() {
+  Point pt{1, 2};
+  auto [a, b] = pt;
+  // expected-error at +2{{bindings from the same structured binding declaration cannot have different data-sharing attributes}}
+  // expected-note at +1{{previous binding from the same declaration has 'firstprivate' attribute here}}
+#pragma omp parallel firstprivate(a) shared(b)
+  {
+    use(a);
+    use(b);
+  }
+}
+
+void test_conflicting_linear_private() {
+  Point p{1, 2};
+  auto [a, b] = p;
+  // expected-note at +1{{previous binding from the same declaration has 'linear' attribute here}}
+#pragma omp simd linear(a:2) private(b)
+  // expected-error at -1{{bindings from the same structured binding declaration cannot have different data-sharing attributes}}
+  for (int i = 0; i < 10; ++i) {
+    a += 2;
+    b = i;
+    use(a + b);
+  }
+}
+
+void test_conflicting_reduction_shared() {
+  Point p{0, 0};
+  auto [a, b] = p;
+  // expected-note at +1{{previous binding from the same declaration has 'reduction' attribute here}}
+#pragma omp parallel for reduction(+:a) shared(b)
+  // expected-error at -1{{bindings from the same structured binding declaration cannot have different data-sharing attributes}}
+  for (int i = 0; i < 10; ++i) {
+    a += i;
+    b = i;
+  }
+}

>From cc96fbe8c84834241d1479a2471a6bb210ad29bf Mon Sep 17 00:00:00 2001
From: Ammarguellat <zahira.ammarguellat at intel.com>
Date: Thu, 25 Jun 2026 09:58:41 -0700
Subject: [PATCH 45/45] Fix format

---
 clang/lib/Sema/SemaOpenMP.cpp | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/clang/lib/Sema/SemaOpenMP.cpp b/clang/lib/Sema/SemaOpenMP.cpp
index dd2e58dc0f937..5acab57be9de9 100644
--- a/clang/lib/Sema/SemaOpenMP.cpp
+++ b/clang/lib/Sema/SemaOpenMP.cpp
@@ -22773,9 +22773,9 @@ class MapBaseChecker final : public StmtVisitor<MapBaseChecker, bool> {
                                 NameInfo, D->getType(), DRE->getValueKind(),
                                 DRE->getFoundDecl(),
                                 /*TemplateArgs=*/nullptr, DRE->isNonOdrUse());
-        } else {
-          return false;
-        }
+      } else {
+        return false;
+      }
     } else if (!isa<VarDecl>(D)) {
       emitErrorMsg();
       return false;



More information about the cfe-commits mailing list