[clang] [OpenMP] Support capturing structured bindings in OpenMP regions. (PR #190832)

Zahira Ammarguellat via cfe-commits cfe-commits at lists.llvm.org
Mon Jun 8 07:36:15 PDT 2026


================
@@ -0,0 +1,4021 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --check-globals all --filter-out-after "getelem.*kernel" --filter-out "= alloca.*" --include-generated-funcs --replace-value-regex "__omp_offloading_[0-9a-z]+_[0-9a-z]+" "reduction_size[.].+[.]" "pl_cond[.].+[.|,]" --prefix-filecheck-ir-name _ --global-value-regex "\.offload_.*" --global-hex-value-regex ".offload_maptypes.*" --version 6
+// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=51 -std=c++20 -triple x86_64-unknown-unknown -emit-llvm %s -o - | FileCheck %s
+
+// RUN: %clang_cc1 -fopenmp -fopenmp-version=51 -std=c++20 -triple x86_64-unknown-unknown -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp -fopenmp-version=51 -std=c++20 -triple x86_64-unknown-unknown -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s
+
+// expected-no-diagnostics
+
+#ifndef HEADER
+#define HEADER
+
+struct Point { int x, y; };
+struct Point3D { int x, y, z; };
+
+void test_target_explicit_map() {
+  Point p{1, 2};
+  auto [a, b] = p;
+
+#pragma omp target map(tofrom: p)
+  {
+    a = a + 1;
+    b = b + 1;
+  }
+}
+
+void test_target_implicit_map() {
+  Point p{1, 2};
+  auto [a, b] = p;
+
+#pragma omp target
+  {
+    int sum = a + b;
+  }
+}
+
+void test_target_parallel() {
+  Point p{3, 4};
+  auto [a, b] = p;
+
+#pragma omp target parallel
+  {
+    int sum = a + b;
+  }
+}
+
+void test_target_parallel_for() {
+  Point p{5, 6};
+  auto [a, b] = p;
+
+#pragma omp target parallel for
+  for (int i = 0; i < 10; i++) {
+    int result = a + b + i;
+  }
+}
+
+void test_firstprivate_dsa() {
+  Point p{7, 8};
+  auto [a, b] = p;
+
+#pragma omp parallel firstprivate(p)
+  {
+    int sum = a + b;
+  }
+}
+
+void test_shared_dsa() {
+  Point p{9, 10};
+  auto [a, b] = p;
+
+#pragma omp parallel shared(p)
+  {
+    int sum = a + b;
+  }
+}
+
+void test_reduction_dsa() {
+  Point p{1, 2};
+  auto [a, b] = p;
+  int sum = 0;
+
+#pragma omp parallel for reduction(+:sum)
+  for (int i = 0; i < 10; i++) {
+    sum += a + b;
+  }
+}
+
+void test_parallel_for_() {
+  Point p{11, 12};
+  auto [a, b] = p;
+
+#pragma omp parallel for
+  for (int i = 0; i < 10; i++) {
+    int result = a + b + i;
+  }
+}
+
+void test_parallel_for_simd_() {
+  Point p{13, 14};
+  auto [a, b] = p;
+
+#pragma omp parallel for simd
+  for (int i = 0; i < 10; i++) {
+    int result = a + b + i;
+  }
+}
+
+void test_target_teams_distribute() {
+  Point p{15, 16};
+  auto [a, b] = p;
+
+#pragma omp target teams distribute
+  for (int i = 0; i < 10; i++) {
+    int result = a + b + i;
+  }
+}
+
+void test_task() {
+  Point p{17, 18};
+  auto [a, b] = p;
+
+#pragma omp task
+  {
+    int sum = a + b;
+  }
+}
+
+void test_task_depend() {
+  Point p{19, 20};
+  auto [a, b] = p;
+
+#pragma omp task depend(in: a, b)
+  {
+    int sum = a + b;
+  }
+}
+
+void test_taskloop_() {
+  Point p{21, 22};
+  auto [a, b] = p;
+
+#pragma omp taskloop
+  for (int i = 0; i < 10; i++) {
+    int result = a + b + i;
+  }
+}
+
+template<typename T>
+int test_template_bas(T p) {
+  auto [a, b] = p;
+  int result = 0;
+
+#pragma omp parallel reduction(+:result)
+  {
+    result = a + b;
+  }
+  return result;
+}
+
+template<typename T>
+int test_template_target(T p) {
+  auto [a, b] = p;
+  int result = 0;
+
+#pragma omp target map(tofrom: result)
+  {
+    result = a + b;
+  }
+  return result;
+}
+
+template<typename T>
+int test_template_task(T p) {
+  auto [a, b] = p;
+  int result = 0;
+
+#pragma omp task shared(result)
+  {
+    result = a + b;
+  }
+#pragma omp taskwait
+  return result;
+}
+
+template<typename T>
+int test_template_3_bindings(T p) {
+  auto [x, y, z] = p;
+  int result = 0;
+
+#pragma omp parallel reduction(+:result)
+  {
+    result = x + y + z;
+  }
+  return result;
+}
+
+void instantiate_templates() {
+  Point p2{1, 2};
+  Point3D p3{1, 2, 3};
+
+  test_template_bas(p2);
+  test_template_bas(Point{3, 4});
+  test_template_target(p2);
+  test_template_task(p2);
+  test_template_3_bindings(p3);
+}
+
+void test_static_binding() {
+  static Point p{23, 24};
+  static auto [a, b] = p;
+
+#pragma omp parallel
+  {
+    int sum = a + b;
+  }
+}
+
+void test_static_binding_shared() {
+  static Point p{25, 26};
+  static auto [a, b] = p;
+
+#pragma omp parallel shared(p)
+  {
+    a = a + 1;
+    b = b + 1;
+  }
+}
+
+void test_array_target() {
+  int arr[2] = {27, 28};
+  auto [a, b] = arr;
+
+#pragma omp target
+  {
+    int sum = a + b;
+  }
+}
+
+void test_array_task() {
+  int arr[2] = {29, 30};
+  auto [a, b] = arr;
+
+#pragma omp task
+  {
+    int sum = a + b;
+  }
+}
+
+void test_nested() {
+  Point p{31, 32};
+  auto [a, b] = p;
+
+#pragma omp parallel
+  {
+#pragma omp critical
+    {
+      int sum = a + b;
+    }
+
+#pragma omp task
+    {
+      int product = a * b;
+    }
+  }
+}
+
+void test_reference_binding() {
+  Point p{31, 32};
+  auto& [a, b] = p;
+
+#pragma omp parallel
+  {
+    int sum = a + b;
+  }
+}
+
+void test_const_binding() {
+  const Point p{33, 34};
+  const auto [a, b] = p;
+
+#pragma omp parallel
+  {
+    int sum = a + b;
+  }
+}
+
+void test_multiple_bindings() {
+  Point p1{33, 34};
+  Point p2{35, 36};
+  auto [a, b] = p1;
+  auto [c, d] = p2;
+
+#pragma omp parallel
+  {
+    int sum = a + b + c + d;
+  }
+}
+
+void test_multiple_bindings_mixed_dsa() {
+  Point p1{37, 38};
+  Point p2{39, 40};
+  auto [a, b] = p1;
+  auto [c, d] = p2;
+
+#pragma omp parallel firstprivate(p1) shared(p2)
+  {
+    int result = a + b + c + d;
+  }
+}
+
+void test_array_3_elements() {
+  int arr[3] = {35, 36, 37};
+  auto [a, b, c] = arr;
+
+#pragma omp parallel
+  {
+    int sum = a + b + c;
+  }
+}
+
+void test_single() {
+  Point p{38, 39};
+  auto [a, b] = p;
+
+#pragma omp parallel
+#pragma omp single
+  {
+    int sum = a + b;
+  }
+}
+
+void test_sections() {
+  Point p{40, 41};
+  auto [a, b] = p;
+
+#pragma omp parallel sections
+  {
+#pragma omp section
+    { int sum = a + b; }
+#pragma omp section
+    { int diff = a - b; }
+  }
+}
+
+void test_nested_parallel() {
+  Point p{42, 43};
+  auto [a, b] = p;
+
+#pragma omp parallel
+  {
+#pragma omp parallel
+    {
+      int sum = a + b;
+    }
+  }
+}
+
+void test_simd_() {
+  Point p{44, 45};
+  auto [a, b] = p;
+
+#pragma omp simd
+  for (int i = 0; i < 10; i++) {
+    int result = a + b + i;
+  }
+}
+
+void test_private_individual_bindings() {
+  Point p{1, 2};
+  auto [a, b] = p;
+#pragma omp parallel private(a)
+  {
+    a = 2;
+  }
+}
+
+void test_firstprivate_individual_bindings() {
+  Point p{1, 2};
+  auto [a, b] = p;
+#pragma omp parallel firstprivate(b)
+  {
+    b = b + 10;
+  }
+}
+
+void test_mixed_dsa() {
+  Point p{1, 2};
+  auto [a, b] = p;
+#pragma omp parallel firstprivate(a) shared(b)
+  {
+    int result = a + b;
+  }
+}
+
+void test_static_bindings() {
+  static auto [a, b] = Point{1, 2};
+#pragma omp parallel
+  {
+    (void)(a + b);
+  }
+}
+
+void use(int a);
+
+void test_shadowing() {
+  auto [a, b] = Point{1, 2};
+#pragma omp parallel private(a)
+  {
+    use(a);
+    {
+      auto [a, b] = Point{10, 20};
+      use(a);
+    }
+    use(a);
+  }
+  use(a);
+}
+
+void test_simd_private_then_parallel() {
+  Point p{1,2};
+  auto [a,b] = p;
+#pragma omp simd private(a)
+  for (int i=0;i<10;++i) a += i;
+#pragma omp parallel
+  {
+    use(a);
+  }
+}
+
+void test_linear_binding() {
+  Point p{1, 2};
+  auto [a, b] = p;
+
+#pragma omp simd linear(a:1)
+  for (int i = 0; i < 10; ++i) {
+    a += 1;
+    use(a);
+  }
+}
+
+void test_reduction_binding_sum() {
+  Point p{0, 0};
+  auto [a, b] = p;
+
+#pragma omp parallel for reduction(+:a)
+  for (int i = 0; i < 100; ++i) {
+    a += i;
+  }
+  use(a);
+}
+
+void test_reduction_binding_operators() {
+  Point p{1, 100};
+  auto [a, b] = p;
+
+#pragma omp parallel for reduction(*:a) reduction(min:b)
+  for (int i = 1; i <= 10; ++i) {
+    a *= 2;
+    if (i < b) b = i;
+  }
+  use(a);
+  use(b);
+}
+
+void test_lastprivate_binding() {
+  Point p{1, 2};
+  auto [a, b] = p;
+
+#pragma omp parallel for lastprivate(a)
+  for (int i = 0; i < 10; ++i) {
+    a = i * 10;
+  }
+  use(a);
+}
+
+void test_mixed_linear_private() {
+  Point p{1, 2};
+  auto [a, b] = p;
+
+#pragma omp simd linear(a:2) private(b)
+  for (int i = 0; i < 10; ++i) {
+    a += 2;
+    b = i;
+    use(a + b);
+  }
+}
+
+void test_lastprivate_conditional() {
+  Point p{0, 0};
+  auto [a, b] = p;
+
+#pragma omp parallel for lastprivate(conditional: a)
+  for (int i = 0; i < 100; ++i) {
+    if (i % 7 == 0)
+      a = i;
+  }
+  use(a);
+}
+
+void test_reduction_binding_max() {
+  Point p{-100, -100};
+  auto [a, b] = p;
+
+#pragma omp parallel for reduction(max:a,b)
+  for (int i = 0; i < 100; ++i) {
+    if (i > a) a = i;
+    if (i > b) b = i;
+  }
+  use(a);
+  use(b);
+}
+
+struct NonTrivialCopy {
+  int value;
+  int copy_count;
+
+  NonTrivialCopy(int v) : value(v), copy_count(0) {}
+  NonTrivialCopy() : value(0), copy_count(0) {}
+  NonTrivialCopy(const NonTrivialCopy &other)
+    : value(other.value), copy_count(other.copy_count + 1) {}
+
+  ~NonTrivialCopy() {}
+};
+
+struct Pair {
+  NonTrivialCopy x;
+  NonTrivialCopy y;
+};
+
+void test_firstprivate_nontrivial_sb() {
+  Pair p{NonTrivialCopy(10), NonTrivialCopy(20)};
+  auto [a, b] = p;
+
+#pragma omp parallel firstprivate(a)
+  {
+    (void)a.value;
+  }
+}
+
+void test_firstprivate_ref_binding_sb() {
+  Pair p{NonTrivialCopy(10), NonTrivialCopy(20)};
+  auto& [a, b] = p;
+
+#pragma omp parallel firstprivate(a)
+  {
+    (void)a.value;
+  }
+}
+
+void test_firstprivate_const_ref_binding_sb() {
+  Pair p{NonTrivialCopy(10), NonTrivialCopy(20)};
+  const auto& [a, b] = p;
+
+#pragma omp parallel firstprivate(a)
+  {
+    (void)a.value;
+  }
+}
+
+void test_firstprivate_multiple_bindings_sb() {
+  Pair p{NonTrivialCopy(10), NonTrivialCopy(20)};
+  auto [a, b] = p;
+
+#pragma omp parallel firstprivate(a, b)
+  {
+    (void)(a.value + b.value);
+  }
+}
+
+struct WithDtor {
+  int *ptr;
+  WithDtor(int v) : ptr(new int(v)) {}
+  WithDtor(const WithDtor &other) : ptr(new int(*other.ptr)) {}
+  ~WithDtor() { delete ptr; }
+};
+
+struct PairWithDtor {
+  WithDtor x;
+  WithDtor y;
+};
+
+void test_firstprivate_with_destructor_sb() {
+  PairWithDtor p{WithDtor(100), WithDtor(200)};
+  auto [a, b] = p;
+
+#pragma omp parallel firstprivate(a)
+  {
+    (void)(*a.ptr);
+  }
+}
+
+void test_firstprivate_array_bindings_sb() {
+  NonTrivialCopy arr[3] = {NonTrivialCopy(1), NonTrivialCopy(2), NonTrivialCopy(3)};
+  auto [a, b, c] = arr;
+
+
+#pragma omp parallel firstprivate(a, b, c)
+  {
+    (void)(a.value + b.value + c.value);
+  }
+}
+
+void test_firstprivate_mixed_with_shared_sb() {
+  Pair p{NonTrivialCopy(10), NonTrivialCopy(20)};
+  auto [a, b] = p;
+
+#pragma omp parallel firstprivate(a) shared(b)
+  {
+    (void)a.value;
+    (void)b.value;
+  }
+}
+
+struct Inner {
+  int val;
+  Inner(int v) : val(v) {}
+  Inner(const Inner &o) : val(o.val * 2) {}
+  ~Inner() {}
+};
+
+struct Outer {
+  Inner i1;
+  Inner i2;
+};
+
+void test_firstprivate_nested_struct_sb() {
+  Outer o{Inner(5), Inner(10)};
+  auto [x, y] = o;
+
+#pragma omp parallel firstprivate(x)
+  {
+    (void)x.val;
+  }
+}
+
+void test_firstprivate_ref_binding_both_bindings_sb() {
+  Pair p{NonTrivialCopy(10), NonTrivialCopy(20)};
+  auto& [a, b] = p;
+
+#pragma omp parallel firstprivate(a, b)
+  {
+    (void)(a.value + b.value);
+  }
+}
+
+
+void test_firstprivate_const_ref_binding_both_bindings_sb() {
+  Pair p{NonTrivialCopy(10), NonTrivialCopy(20)};
+  const auto& [a, b] = p;
+
+#pragma omp parallel firstprivate(a, b)
+  {
+    (void)(a.value + b.value);
+  }
+}
+
+void test_lambda_capture_binding_by_value() {
+  Point p{10, 20};
+  auto [a, b] = p;
+  auto lambda = [a]() {
+    return a + a;
+  };
+  int result = lambda();
+}
+
+
+void test_lambda_capture_binding_by_ref() {
+  Point p{10, 20};
+  auto [a, b] = p;
+  auto lambda = [&a]() {
+    a = 100;
+  };
+  lambda();
+}
+
+
+void test_lambda_capture_multiple_bindings() {
+  Point p{10, 20};
+  auto [a, b] = p;
+  auto lambda = [a, &b]() {
+    b = a + 50;
+  };
+  lambda();
+}
+
+
+void test_lambda_implicit_capture() {
+  Point p{10, 20};
+  auto [a, b] = p;
+  auto lambda = [=]() {
+    return a + b;
+  };
+  lambda();
+}
+#endif
+// CHECK-LABEL: define dso_local void @_Z24test_target_explicit_mapv(
+// CHECK-SAME: ) #[[ATTR0:[0-9]+]] {
+// CHECK:  [[ENTRY:.*:]]
+// CHECK:    call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[P:%.*]], ptr align 4 @__const._Z24test_target_explicit_mapv.p, i64 8, i1 false)
+// CHECK:    call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP0:%.*]], ptr align 4 [[P]], i64 8, i1 false)
+// CHECK:    [[TMP1:%.*]] = load [[STRUCT_POINT:%.*]], ptr [[TMP0]], align 4
+// CHECK:    store [[STRUCT_POINT]] [[TMP1]], ptr [[DOTCASTED:%.*]], align 4
+// CHECK:    [[TMP2:%.*]] = load i64, ptr [[DOTCASTED]], align 8
+// CHECK:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z24test_target_explicit_mapv_l19(i64 [[TMP2]], ptr null) #[[ATTR3:[0-9]+]]
+// CHECK:    ret void
+//
+//
+// CHECK-LABEL: define internal void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z24test_target_explicit_mapv_l19(
+// CHECK-SAME: i64 noundef [[TMP0:%.*]], ptr noalias noundef [[DYN_PTR:%.*]]) #[[ATTR2:[0-9]+]] {
+// CHECK:  [[ENTRY:.*:]]
+// CHECK:    store i64 [[TMP0]], ptr [[DOTADDR:%.*]], align 8
+// CHECK:    store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR:%.*]], align 8
+// CHECK:    [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_POINT:%.*]], ptr [[DOTADDR]], i32 0, i32 0
+// CHECK:    [[TMP1:%.*]] = load i32, ptr [[X]], align 4
+// CHECK:    [[ADD:%.*]] = add nsw i32 [[TMP1]], 1
+// CHECK:    [[X1:%.*]] = getelementptr inbounds nuw [[STRUCT_POINT]], ptr [[DOTADDR]], i32 0, i32 0
+// CHECK:    store i32 [[ADD]], ptr [[X1]], align 4
+// CHECK:    [[Y:%.*]] = getelementptr inbounds nuw [[STRUCT_POINT]], ptr [[DOTADDR]], i32 0, i32 1
+// CHECK:    [[TMP2:%.*]] = load i32, ptr [[Y]], align 4
+// CHECK:    [[ADD2:%.*]] = add nsw i32 [[TMP2]], 1
+// CHECK:    [[Y3:%.*]] = getelementptr inbounds nuw [[STRUCT_POINT]], ptr [[DOTADDR]], i32 0, i32 1
+// CHECK:    store i32 [[ADD2]], ptr [[Y3]], align 4
+// CHECK:    ret void
+//
+//
+// CHECK-LABEL: define dso_local void @_Z24test_target_implicit_mapv(
+// CHECK-SAME: ) #[[ATTR0]] {
+// CHECK:  [[ENTRY:.*:]]
+// CHECK:    call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[P:%.*]], ptr align 4 @__const._Z24test_target_implicit_mapv.p, i64 8, i1 false)
+// CHECK:    call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP0:%.*]], ptr align 4 [[P]], i64 8, i1 false)
+// CHECK:    [[TMP1:%.*]] = load [[STRUCT_POINT:%.*]], ptr [[TMP0]], align 4
+// CHECK:    store [[STRUCT_POINT]] [[TMP1]], ptr [[DOTCASTED:%.*]], align 4
+// CHECK:    [[TMP2:%.*]] = load i64, ptr [[DOTCASTED]], align 8
+// CHECK:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z24test_target_implicit_mapv_l30(i64 [[TMP2]], ptr null) #[[ATTR3]]
+// CHECK:    ret void
+//
+//
+// CHECK-LABEL: define internal void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z24test_target_implicit_mapv_l30(
+// CHECK-SAME: i64 noundef [[TMP0:%.*]], ptr noalias noundef [[DYN_PTR:%.*]]) #[[ATTR2]] {
+// CHECK:  [[ENTRY:.*:]]
+// CHECK:    store i64 [[TMP0]], ptr [[DOTADDR:%.*]], align 8
+// CHECK:    store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR:%.*]], align 8
+// CHECK:    [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_POINT:%.*]], ptr [[DOTADDR]], i32 0, i32 0
+// CHECK:    [[TMP1:%.*]] = load i32, ptr [[X]], align 4
+// CHECK:    [[Y:%.*]] = getelementptr inbounds nuw [[STRUCT_POINT]], ptr [[DOTADDR]], i32 0, i32 1
+// CHECK:    [[TMP2:%.*]] = load i32, ptr [[Y]], align 4
+// CHECK:    [[ADD:%.*]] = add nsw i32 [[TMP1]], [[TMP2]]
+// CHECK:    store i32 [[ADD]], ptr [[SUM:%.*]], align 4
+// CHECK:    ret void
+//
+//
+// CHECK-LABEL: define dso_local void @_Z20test_target_parallelv(
+// CHECK-SAME: ) #[[ATTR0]] {
+// CHECK:  [[ENTRY:.*:]]
+// CHECK:    call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[P:%.*]], ptr align 4 @__const._Z20test_target_parallelv.p, i64 8, i1 false)
+// CHECK:    call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP0:%.*]], ptr align 4 [[P]], i64 8, i1 false)
+// CHECK:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z20test_target_parallelv_l40(ptr [[TMP0]], ptr null) #[[ATTR3]]
+// CHECK:    ret void
+//
+//
+// CHECK-LABEL: define internal void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z20test_target_parallelv_l40(
+// CHECK-SAME: ptr noundef nonnull align 4 dereferenceable(4) [[TMP0:%.*]], ptr noalias noundef [[DYN_PTR:%.*]]) #[[ATTR2]] {
+// CHECK:  [[ENTRY:.*:]]
+// CHECK:    store ptr [[TMP0]], ptr [[DOTADDR:%.*]], align 8
+// CHECK:    store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR:%.*]], align 8
+// CHECK:    [[TMP1:%.*]] = load ptr, ptr [[DOTADDR]], align 8, !nonnull [[META2:![0-9]+]], !align [[META3:![0-9]+]]
+// CHECK:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1:[0-9]+]], i32 1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z20test_target_parallelv_l40.omp_outlined, ptr [[TMP1]])
+// CHECK:    ret void
+//
+//
+// CHECK-LABEL: define internal void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z20test_target_parallelv_l40.omp_outlined(
+// CHECK-SAME: ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP0:%.*]]) #[[ATTR2]] {
+// CHECK:  [[ENTRY:.*:]]
+// CHECK:    store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR:%.*]], align 8
+// CHECK:    store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR:%.*]], align 8
+// CHECK:    store ptr [[TMP0]], ptr [[DOTADDR:%.*]], align 8
+// CHECK:    [[TMP1:%.*]] = load ptr, ptr [[DOTADDR]], align 8, !nonnull [[META2]], !align [[META3]]
+// CHECK:    [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_POINT:%.*]], ptr [[TMP1]], i32 0, i32 0
+// CHECK:    [[TMP2:%.*]] = load i32, ptr [[X]], align 4
+// CHECK:    [[Y:%.*]] = getelementptr inbounds nuw [[STRUCT_POINT]], ptr [[TMP1]], i32 0, i32 1
+// CHECK:    [[TMP3:%.*]] = load i32, ptr [[Y]], align 4
+// CHECK:    [[ADD:%.*]] = add nsw i32 [[TMP2]], [[TMP3]]
+// CHECK:    store i32 [[ADD]], ptr [[SUM:%.*]], align 4
+// CHECK:    ret void
+//
+//
+// CHECK-LABEL: define dso_local void @_Z24test_target_parallel_forv(
+// CHECK-SAME: ) #[[ATTR0]] {
+// CHECK:  [[ENTRY:.*:]]
+// CHECK:    call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[P:%.*]], ptr align 4 @__const._Z24test_target_parallel_forv.p, i64 8, i1 false)
+// CHECK:    call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP0:%.*]], ptr align 4 [[P]], i64 8, i1 false)
+// CHECK:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z24test_target_parallel_forv_l50(ptr [[TMP0]], ptr null) #[[ATTR3]]
+// CHECK:    ret void
+//
+//
+// CHECK-LABEL: define internal void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z24test_target_parallel_forv_l50(
+// CHECK-SAME: ptr noundef nonnull align 4 dereferenceable(4) [[TMP0:%.*]], ptr noalias noundef [[DYN_PTR:%.*]]) #[[ATTR2]] {
+// CHECK:  [[ENTRY:.*:]]
+// CHECK:    store ptr [[TMP0]], ptr [[DOTADDR:%.*]], align 8
+// CHECK:    store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR:%.*]], align 8
+// CHECK:    [[TMP1:%.*]] = load ptr, ptr [[DOTADDR]], align 8, !nonnull [[META2]], !align [[META3]]
+// CHECK:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z24test_target_parallel_forv_l50.omp_outlined, ptr [[TMP1]])
+// CHECK:    ret void
+//
+//
+// CHECK-LABEL: define internal void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z24test_target_parallel_forv_l50.omp_outlined(
+// CHECK-SAME: ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP0:%.*]]) #[[ATTR2]] {
+// CHECK:  [[ENTRY:.*:]]
+// CHECK:    store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR:%.*]], align 8
+// CHECK:    store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR:%.*]], align 8
+// CHECK:    store ptr [[TMP0]], ptr [[DOTADDR:%.*]], align 8
+// CHECK:    [[TMP1:%.*]] = load ptr, ptr [[DOTADDR]], align 8, !nonnull [[META2]], !align [[META3]]
+// CHECK:    store i32 0, ptr [[DOTOMP_LB:%.*]], align 4
+// CHECK:    store i32 9, ptr [[DOTOMP_UB:%.*]], align 4
+// CHECK:    store i32 1, ptr [[DOTOMP_STRIDE:%.*]], align 4
+// CHECK:    store i32 0, ptr [[DOTOMP_IS_LAST:%.*]], align 4
+// CHECK:    [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK:    [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4
+// CHECK:    call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
+// CHECK:    [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
+// CHECK:    [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 9
+// CHECK:    br i1 [[CMP]], label %[[COND_TRUE:.*]], label %[[COND_FALSE:.*]]
+// CHECK:       [[COND_TRUE]]:
+// CHECK:    br label %[[COND_END:.*]]
+// CHECK:       [[COND_FALSE]]:
+// CHECK:    [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
+// CHECK:    br label %[[COND_END]]
+// CHECK:       [[COND_END]]:
+// CHECK:    [[COND:%.*]] = phi i32 [ 9, %[[COND_TRUE]] ], [ [[TMP5]], %[[COND_FALSE]] ]
+// CHECK:    store i32 [[COND]], ptr [[DOTOMP_UB]], align 4
+// CHECK:    [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
+// CHECK:    store i32 [[TMP6]], ptr [[DOTOMP_IV:%.*]], align 4
+// CHECK:    br label %[[OMP_INNER_FOR_COND:.*]]
+// CHECK:       [[OMP_INNER_FOR_COND]]:
+// CHECK:    [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK:    [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
+// CHECK:    [[CMP1:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]]
+// CHECK:    br i1 [[CMP1]], label %[[OMP_INNER_FOR_BODY:.*]], label %[[OMP_INNER_FOR_END:.*]]
+// CHECK:       [[OMP_INNER_FOR_BODY]]:
+// CHECK:    [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK:    [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1
+// CHECK:    [[ADD:%.*]] = add nsw i32 0, [[MUL]]
+// CHECK:    store i32 [[ADD]], ptr [[I:%.*]], align 4
+// CHECK:    [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_POINT:%.*]], ptr [[TMP1]], i32 0, i32 0
+// CHECK:    [[TMP10:%.*]] = load i32, ptr [[X]], align 4
+// CHECK:    [[Y:%.*]] = getelementptr inbounds nuw [[STRUCT_POINT]], ptr [[TMP1]], i32 0, i32 1
+// CHECK:    [[TMP11:%.*]] = load i32, ptr [[Y]], align 4
+// CHECK:    [[ADD2:%.*]] = add nsw i32 [[TMP10]], [[TMP11]]
+// CHECK:    [[TMP12:%.*]] = load i32, ptr [[I]], align 4
+// CHECK:    [[ADD3:%.*]] = add nsw i32 [[ADD2]], [[TMP12]]
+// CHECK:    store i32 [[ADD3]], ptr [[RESULT:%.*]], align 4
+// CHECK:    br label %[[OMP_BODY_CONTINUE:.*]]
+// CHECK:       [[OMP_BODY_CONTINUE]]:
+// CHECK:    br label %[[OMP_INNER_FOR_INC:.*]]
+// CHECK:       [[OMP_INNER_FOR_INC]]:
+// CHECK:    [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK:    [[ADD4:%.*]] = add nsw i32 [[TMP13]], 1
+// CHECK:    store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4
+// CHECK:    br label %[[OMP_INNER_FOR_COND]]
+// CHECK:       [[OMP_INNER_FOR_END]]:
+// CHECK:    br label %[[OMP_LOOP_EXIT:.*]]
+// CHECK:       [[OMP_LOOP_EXIT]]:
+// CHECK:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP3]])
+// CHECK:    ret void
+//
+//
+// CHECK-LABEL: define dso_local void @_Z21test_firstprivate_dsav(
+// CHECK-SAME: ) #[[ATTR0]] {
+// CHECK:  [[ENTRY:.*:]]
+// CHECK:    call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[P:%.*]], ptr align 4 @__const._Z21test_firstprivate_dsav.p, i64 8, i1 false)
+// CHECK:    call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP0:%.*]], ptr align 4 [[P]], i64 8, i1 false)
+// CHECK:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 2, ptr @_Z21test_firstprivate_dsav.omp_outlined, ptr [[TMP0]], ptr [[P]])
+// CHECK:    ret void
+//
+//
+// CHECK-LABEL: define internal void @_Z21test_firstprivate_dsav.omp_outlined(
+// CHECK-SAME: ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP0:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[P:%.*]]) #[[ATTR2]] {
+// CHECK:  [[ENTRY:.*:]]
+// CHECK:    store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR:%.*]], align 8
+// CHECK:    store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR:%.*]], align 8
+// CHECK:    store ptr [[TMP0]], ptr [[DOTADDR:%.*]], align 8
+// CHECK:    store ptr [[P]], ptr [[P_ADDR:%.*]], align 8
+// CHECK:    [[TMP1:%.*]] = load ptr, ptr [[DOTADDR]], align 8, !nonnull [[META2]], !align [[META3]]
+// CHECK:    [[TMP2:%.*]] = load ptr, ptr [[P_ADDR]], align 8, !nonnull [[META2]], !align [[META3]]
+// CHECK:    call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[P1:%.*]], ptr align 4 [[TMP2]], i64 8, i1 false)
+// CHECK:    [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_POINT:%.*]], ptr [[TMP1]], i32 0, i32 0
+// CHECK:    [[TMP3:%.*]] = load i32, ptr [[X]], align 4
+// CHECK:    [[Y:%.*]] = getelementptr inbounds nuw [[STRUCT_POINT]], ptr [[TMP1]], i32 0, i32 1
+// CHECK:    [[TMP4:%.*]] = load i32, ptr [[Y]], align 4
+// CHECK:    [[ADD:%.*]] = add nsw i32 [[TMP3]], [[TMP4]]
+// CHECK:    store i32 [[ADD]], ptr [[SUM:%.*]], align 4
+// CHECK:    ret void
+//
+//
+// CHECK-LABEL: define dso_local void @_Z15test_shared_dsav(
+// CHECK-SAME: ) #[[ATTR0]] {
+// CHECK:  [[ENTRY:.*:]]
+// CHECK:    call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[P:%.*]], ptr align 4 @__const._Z15test_shared_dsav.p, i64 8, i1 false)
+// CHECK:    call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP0:%.*]], ptr align 4 [[P]], i64 8, i1 false)
+// CHECK:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @_Z15test_shared_dsav.omp_outlined, ptr [[TMP0]])
+// CHECK:    ret void
+//
+//
+// CHECK-LABEL: define internal void @_Z15test_shared_dsav.omp_outlined(
+// CHECK-SAME: ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP0:%.*]]) #[[ATTR2]] {
+// CHECK:  [[ENTRY:.*:]]
+// CHECK:    store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR:%.*]], align 8
+// CHECK:    store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR:%.*]], align 8
+// CHECK:    store ptr [[TMP0]], ptr [[DOTADDR:%.*]], align 8
+// CHECK:    [[TMP1:%.*]] = load ptr, ptr [[DOTADDR]], align 8, !nonnull [[META2]], !align [[META3]]
+// CHECK:    [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_POINT:%.*]], ptr [[TMP1]], i32 0, i32 0
+// CHECK:    [[TMP2:%.*]] = load i32, ptr [[X]], align 4
+// CHECK:    [[Y:%.*]] = getelementptr inbounds nuw [[STRUCT_POINT]], ptr [[TMP1]], i32 0, i32 1
+// CHECK:    [[TMP3:%.*]] = load i32, ptr [[Y]], align 4
+// CHECK:    [[ADD:%.*]] = add nsw i32 [[TMP2]], [[TMP3]]
+// CHECK:    store i32 [[ADD]], ptr [[SUM:%.*]], align 4
+// CHECK:    ret void
+//
+//
+// CHECK-LABEL: define dso_local void @_Z18test_reduction_dsav(
+// CHECK-SAME: ) #[[ATTR0]] {
+// CHECK:  [[ENTRY:.*:]]
+// CHECK:    call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[P:%.*]], ptr align 4 @__const._Z18test_reduction_dsav.p, i64 8, i1 false)
+// CHECK:    call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP0:%.*]], ptr align 4 [[P]], i64 8, i1 false)
+// CHECK:    store i32 0, ptr [[SUM:%.*]], align 4
+// CHECK:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 2, ptr @_Z18test_reduction_dsav.omp_outlined, ptr [[SUM]], ptr [[TMP0]])
+// CHECK:    ret void
+//
+//
+// CHECK-LABEL: define internal void @_Z18test_reduction_dsav.omp_outlined(
+// CHECK-SAME: ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[SUM:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP0:%.*]]) #[[ATTR2]] {
+// CHECK:  [[ENTRY:.*:]]
+// CHECK:    store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR:%.*]], align 8
+// CHECK:    store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR:%.*]], align 8
+// CHECK:    store ptr [[SUM]], ptr [[SUM_ADDR:%.*]], align 8
+// CHECK:    store ptr [[TMP0]], ptr [[DOTADDR:%.*]], align 8
+// CHECK:    [[TMP1:%.*]] = load ptr, ptr [[SUM_ADDR]], align 8, !nonnull [[META2]], !align [[META3]]
+// CHECK:    [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8, !nonnull [[META2]], !align [[META3]]
+// CHECK:    store i32 0, ptr [[DOTOMP_LB:%.*]], align 4
+// CHECK:    store i32 9, ptr [[DOTOMP_UB:%.*]], align 4
+// CHECK:    store i32 1, ptr [[DOTOMP_STRIDE:%.*]], align 4
+// CHECK:    store i32 0, ptr [[DOTOMP_IS_LAST:%.*]], align 4
+// CHECK:    store i32 0, ptr [[SUM1:%.*]], align 4
+// CHECK:    [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK:    [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4
+// CHECK:    call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
+// CHECK:    [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
+// CHECK:    [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9
+// CHECK:    br i1 [[CMP]], label %[[COND_TRUE:.*]], label %[[COND_FALSE:.*]]
+// CHECK:       [[COND_TRUE]]:
+// CHECK:    br label %[[COND_END:.*]]
+// CHECK:       [[COND_FALSE]]:
+// CHECK:    [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
+// CHECK:    br label %[[COND_END]]
+// CHECK:       [[COND_END]]:
+// CHECK:    [[COND:%.*]] = phi i32 [ 9, %[[COND_TRUE]] ], [ [[TMP6]], %[[COND_FALSE]] ]
+// CHECK:    store i32 [[COND]], ptr [[DOTOMP_UB]], align 4
+// CHECK:    [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
+// CHECK:    store i32 [[TMP7]], ptr [[DOTOMP_IV:%.*]], align 4
+// CHECK:    br label %[[OMP_INNER_FOR_COND:.*]]
+// CHECK:       [[OMP_INNER_FOR_COND]]:
+// CHECK:    [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK:    [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
+// CHECK:    [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]]
+// CHECK:    br i1 [[CMP2]], label %[[OMP_INNER_FOR_BODY:.*]], label %[[OMP_INNER_FOR_END:.*]]
+// CHECK:       [[OMP_INNER_FOR_BODY]]:
+// CHECK:    [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK:    [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1
+// CHECK:    [[ADD:%.*]] = add nsw i32 0, [[MUL]]
+// CHECK:    store i32 [[ADD]], ptr [[I:%.*]], align 4
+// CHECK:    [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_POINT:%.*]], ptr [[TMP2]], i32 0, i32 0
+// CHECK:    [[TMP11:%.*]] = load i32, ptr [[X]], align 4
+// CHECK:    [[Y:%.*]] = getelementptr inbounds nuw [[STRUCT_POINT]], ptr [[TMP2]], i32 0, i32 1
+// CHECK:    [[TMP12:%.*]] = load i32, ptr [[Y]], align 4
+// CHECK:    [[ADD3:%.*]] = add nsw i32 [[TMP11]], [[TMP12]]
+// CHECK:    [[TMP13:%.*]] = load i32, ptr [[SUM1]], align 4
+// CHECK:    [[ADD4:%.*]] = add nsw i32 [[TMP13]], [[ADD3]]
+// CHECK:    store i32 [[ADD4]], ptr [[SUM1]], align 4
+// CHECK:    br label %[[OMP_BODY_CONTINUE:.*]]
+// CHECK:       [[OMP_BODY_CONTINUE]]:
+// CHECK:    br label %[[OMP_INNER_FOR_INC:.*]]
+// CHECK:       [[OMP_INNER_FOR_INC]]:
+// CHECK:    [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK:    [[ADD5:%.*]] = add nsw i32 [[TMP14]], 1
+// CHECK:    store i32 [[ADD5]], ptr [[DOTOMP_IV]], align 4
+// CHECK:    br label %[[OMP_INNER_FOR_COND]]
+// CHECK:       [[OMP_INNER_FOR_END]]:
+// CHECK:    br label %[[OMP_LOOP_EXIT:.*]]
+// CHECK:       [[OMP_LOOP_EXIT]]:
+// CHECK:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP4]])
+// CHECK:    [[TMP15:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST:%.*]], i64 0, i64 0
+// CHECK:    store ptr [[SUM1]], ptr [[TMP15]], align 8
+// CHECK:    [[TMP16:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3:[0-9]+]], i32 [[TMP4]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @_Z18test_reduction_dsav.omp_outlined.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var)
+// CHECK:    switch i32 [[TMP16]], [[DOTOMP_REDUCTION_DEFAULT:label %.*]] [
+// CHECK:      i32 1, [[DOTOMP_REDUCTION_CASE1:label %.*]]
+// CHECK:      i32 2, [[DOTOMP_REDUCTION_CASE2:label %.*]]
+// CHECK:    ]
+// CHECK:       [[_OMP_REDUCTION_CASE1:.*:]]
+// CHECK:    [[TMP17:%.*]] = load i32, ptr [[TMP1]], align 4
+// CHECK:    [[TMP18:%.*]] = load i32, ptr [[SUM1]], align 4
+// CHECK:    [[ADD6:%.*]] = add nsw i32 [[TMP17]], [[TMP18]]
+// CHECK:    store i32 [[ADD6]], ptr [[TMP1]], align 4
+// CHECK:    call void @__kmpc_end_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP4]], ptr @.gomp_critical_user_.reduction.var)
+// CHECK:    br [[DOTOMP_REDUCTION_DEFAULT]]
+// CHECK:       [[_OMP_REDUCTION_CASE2:.*:]]
+// CHECK:    [[TMP19:%.*]] = load i32, ptr [[SUM1]], align 4
+// CHECK:    [[TMP20:%.*]] = atomicrmw add ptr [[TMP1]], i32 [[TMP19]] monotonic, align 4
+// CHECK:    br [[DOTOMP_REDUCTION_DEFAULT]]
+// CHECK:       [[_OMP_REDUCTION_DEFAULT:.*:]]
+// CHECK:    ret void
+//
+//
+// CHECK-LABEL: define internal void @_Z18test_reduction_dsav.omp_outlined.omp.reduction.reduction_func(
+// CHECK-SAME: ptr noundef [[TMP0:%.*]], ptr noundef [[TMP1:%.*]]) #[[ATTR4:[0-9]+]] {
+// CHECK:  [[ENTRY:.*:]]
+// CHECK:    store ptr [[TMP0]], ptr [[DOTADDR:%.*]], align 8
+// CHECK:    store ptr [[TMP1]], ptr [[DOTADDR1:%.*]], align 8
+// CHECK:    [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8
+// CHECK:    [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8
+// CHECK:    [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP3]], i64 0, i64 0
+// CHECK:    [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8
+// CHECK:    [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i64 0, i64 0
+// CHECK:    [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8
+// CHECK:    [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4
+// CHECK:    [[TMP9:%.*]] = load i32, ptr [[TMP5]], align 4
+// CHECK:    [[ADD:%.*]] = add nsw i32 [[TMP8]], [[TMP9]]
+// CHECK:    store i32 [[ADD]], ptr [[TMP7]], align 4
+// CHECK:    ret void
+//
+//
+// CHECK-LABEL: define dso_local void @_Z18test_parallel_for_v(
+// CHECK-SAME: ) #[[ATTR0]] {
+// CHECK:  [[ENTRY:.*:]]
+// CHECK:    call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[P:%.*]], ptr align 4 @__const._Z18test_parallel_for_v.p, i64 8, i1 false)
+// CHECK:    call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP0:%.*]], ptr align 4 [[P]], i64 8, i1 false)
+// CHECK:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @_Z18test_parallel_for_v.omp_outlined, ptr [[TMP0]])
+// CHECK:    ret void
+//
+//
+// CHECK-LABEL: define internal void @_Z18test_parallel_for_v.omp_outlined(
+// CHECK-SAME: ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP0:%.*]]) #[[ATTR2]] {
+// CHECK:  [[ENTRY:.*:]]
+// CHECK:    store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR:%.*]], align 8
+// CHECK:    store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR:%.*]], align 8
+// CHECK:    store ptr [[TMP0]], ptr [[DOTADDR:%.*]], align 8
+// CHECK:    [[TMP1:%.*]] = load ptr, ptr [[DOTADDR]], align 8, !nonnull [[META2]], !align [[META3]]
+// CHECK:    store i32 0, ptr [[DOTOMP_LB:%.*]], align 4
+// CHECK:    store i32 9, ptr [[DOTOMP_UB:%.*]], align 4
+// CHECK:    store i32 1, ptr [[DOTOMP_STRIDE:%.*]], align 4
+// CHECK:    store i32 0, ptr [[DOTOMP_IS_LAST:%.*]], align 4
+// CHECK:    [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK:    [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4
+// CHECK:    call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
+// CHECK:    [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
+// CHECK:    [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 9
+// CHECK:    br i1 [[CMP]], label %[[COND_TRUE:.*]], label %[[COND_FALSE:.*]]
+// CHECK:       [[COND_TRUE]]:
+// CHECK:    br label %[[COND_END:.*]]
+// CHECK:       [[COND_FALSE]]:
+// CHECK:    [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
+// CHECK:    br label %[[COND_END]]
+// CHECK:       [[COND_END]]:
+// CHECK:    [[COND:%.*]] = phi i32 [ 9, %[[COND_TRUE]] ], [ [[TMP5]], %[[COND_FALSE]] ]
+// CHECK:    store i32 [[COND]], ptr [[DOTOMP_UB]], align 4
+// CHECK:    [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
+// CHECK:    store i32 [[TMP6]], ptr [[DOTOMP_IV:%.*]], align 4
+// CHECK:    br label %[[OMP_INNER_FOR_COND:.*]]
+// CHECK:       [[OMP_INNER_FOR_COND]]:
+// CHECK:    [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK:    [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
+// CHECK:    [[CMP1:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]]
+// CHECK:    br i1 [[CMP1]], label %[[OMP_INNER_FOR_BODY:.*]], label %[[OMP_INNER_FOR_END:.*]]
+// CHECK:       [[OMP_INNER_FOR_BODY]]:
+// CHECK:    [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK:    [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1
+// CHECK:    [[ADD:%.*]] = add nsw i32 0, [[MUL]]
+// CHECK:    store i32 [[ADD]], ptr [[I:%.*]], align 4
+// CHECK:    [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_POINT:%.*]], ptr [[TMP1]], i32 0, i32 0
+// CHECK:    [[TMP10:%.*]] = load i32, ptr [[X]], align 4
+// CHECK:    [[Y:%.*]] = getelementptr inbounds nuw [[STRUCT_POINT]], ptr [[TMP1]], i32 0, i32 1
+// CHECK:    [[TMP11:%.*]] = load i32, ptr [[Y]], align 4
+// CHECK:    [[ADD2:%.*]] = add nsw i32 [[TMP10]], [[TMP11]]
+// CHECK:    [[TMP12:%.*]] = load i32, ptr [[I]], align 4
+// CHECK:    [[ADD3:%.*]] = add nsw i32 [[ADD2]], [[TMP12]]
+// CHECK:    store i32 [[ADD3]], ptr [[RESULT:%.*]], align 4
+// CHECK:    br label %[[OMP_BODY_CONTINUE:.*]]
+// CHECK:       [[OMP_BODY_CONTINUE]]:
+// CHECK:    br label %[[OMP_INNER_FOR_INC:.*]]
+// CHECK:       [[OMP_INNER_FOR_INC]]:
+// CHECK:    [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK:    [[ADD4:%.*]] = add nsw i32 [[TMP13]], 1
+// CHECK:    store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4
+// CHECK:    br label %[[OMP_INNER_FOR_COND]]
+// CHECK:       [[OMP_INNER_FOR_END]]:
+// CHECK:    br label %[[OMP_LOOP_EXIT:.*]]
+// CHECK:       [[OMP_LOOP_EXIT]]:
+// CHECK:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP3]])
+// CHECK:    ret void
+//
+//
+// CHECK-LABEL: define dso_local void @_Z23test_parallel_for_simd_v(
+// CHECK-SAME: ) #[[ATTR0]] {
+// CHECK:  [[ENTRY:.*:]]
+// CHECK:    call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[P:%.*]], ptr align 4 @__const._Z23test_parallel_for_simd_v.p, i64 8, i1 false)
+// CHECK:    call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP0:%.*]], ptr align 4 [[P]], i64 8, i1 false)
+// CHECK:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @_Z23test_parallel_for_simd_v.omp_outlined, ptr [[TMP0]])
+// CHECK:    ret void
+//
+//
+// CHECK-LABEL: define internal void @_Z23test_parallel_for_simd_v.omp_outlined(
+// CHECK-SAME: ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP0:%.*]]) #[[ATTR2]] {
+// CHECK:  [[ENTRY:.*:]]
+// CHECK:    store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR:%.*]], align 8
+// CHECK:    store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR:%.*]], align 8
+// CHECK:    store ptr [[TMP0]], ptr [[DOTADDR:%.*]], align 8
+// CHECK:    [[TMP1:%.*]] = load ptr, ptr [[DOTADDR]], align 8, !nonnull [[META2]], !align [[META3]]
+// CHECK:    store i32 0, ptr [[DOTOMP_LB:%.*]], align 4
+// CHECK:    store i32 9, ptr [[DOTOMP_UB:%.*]], align 4
+// CHECK:    store i32 1, ptr [[DOTOMP_STRIDE:%.*]], align 4
+// CHECK:    store i32 0, ptr [[DOTOMP_IS_LAST:%.*]], align 4
+// CHECK:    [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK:    [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4
+// CHECK:    call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
+// CHECK:    [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
+// CHECK:    [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 9
+// CHECK:    br i1 [[CMP]], label %[[COND_TRUE:.*]], label %[[COND_FALSE:.*]]
+// CHECK:       [[COND_TRUE]]:
+// CHECK:    br label %[[COND_END:.*]]
+// CHECK:       [[COND_FALSE]]:
+// CHECK:    [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
+// CHECK:    br label %[[COND_END]]
+// CHECK:       [[COND_END]]:
+// CHECK:    [[COND:%.*]] = phi i32 [ 9, %[[COND_TRUE]] ], [ [[TMP5]], %[[COND_FALSE]] ]
+// CHECK:    store i32 [[COND]], ptr [[DOTOMP_UB]], align 4
+// CHECK:    [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
+// CHECK:    store i32 [[TMP6]], ptr [[DOTOMP_IV:%.*]], align 4
+// CHECK:    br label %[[OMP_INNER_FOR_COND:.*]]
+// CHECK:       [[OMP_INNER_FOR_COND]]:
+// CHECK:    [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6:![0-9]+]]
+// CHECK:    [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP6]]
+// CHECK:    [[CMP1:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]]
+// CHECK:    br i1 [[CMP1]], label %[[OMP_INNER_FOR_BODY:.*]], label %[[OMP_INNER_FOR_END:.*]]
+// CHECK:       [[OMP_INNER_FOR_BODY]]:
+// CHECK:    [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6]]
+// CHECK:    [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1
+// CHECK:    [[ADD:%.*]] = add nsw i32 0, [[MUL]]
+// CHECK:    store i32 [[ADD]], ptr [[I:%.*]], align 4, !llvm.access.group [[ACC_GRP6]]
+// CHECK:    [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_POINT:%.*]], ptr [[TMP1]], i32 0, i32 0
+// CHECK:    [[TMP10:%.*]] = load i32, ptr [[X]], align 4, !llvm.access.group [[ACC_GRP6]]
+// CHECK:    [[Y:%.*]] = getelementptr inbounds nuw [[STRUCT_POINT]], ptr [[TMP1]], i32 0, i32 1
+// CHECK:    [[TMP11:%.*]] = load i32, ptr [[Y]], align 4, !llvm.access.group [[ACC_GRP6]]
+// CHECK:    [[ADD2:%.*]] = add nsw i32 [[TMP10]], [[TMP11]]
+// CHECK:    [[TMP12:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP6]]
+// CHECK:    [[ADD3:%.*]] = add nsw i32 [[ADD2]], [[TMP12]]
+// CHECK:    store i32 [[ADD3]], ptr [[RESULT:%.*]], align 4, !llvm.access.group [[ACC_GRP6]]
+// CHECK:    br label %[[OMP_BODY_CONTINUE:.*]]
+// CHECK:       [[OMP_BODY_CONTINUE]]:
+// CHECK:    br label %[[OMP_INNER_FOR_INC:.*]]
+// CHECK:       [[OMP_INNER_FOR_INC]]:
+// CHECK:    [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6]]
+// CHECK:    [[ADD4:%.*]] = add nsw i32 [[TMP13]], 1
+// CHECK:    store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6]]
+// CHECK:    br label %[[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]]
+// CHECK:       [[OMP_INNER_FOR_END]]:
+// CHECK:    br label %[[OMP_LOOP_EXIT:.*]]
+// CHECK:       [[OMP_LOOP_EXIT]]:
+// CHECK:    call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP3]])
+// CHECK:    [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
+// CHECK:    [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0
+// CHECK:    br i1 [[TMP15]], [[DOTOMP_FINAL_THEN:label %.*]], [[DOTOMP_FINAL_DONE:label %.*]]
+// CHECK:       [[_OMP_FINAL_THEN:.*:]]
+// CHECK:    store i32 10, ptr [[I]], align 4
+// CHECK:    br [[DOTOMP_FINAL_DONE]]
+// CHECK:       [[_OMP_FINAL_DONE:.*:]]
+// CHECK:    ret void
+//
+//
+// CHECK-LABEL: define dso_local void @_Z28test_target_teams_distributev(
+// CHECK-SAME: ) #[[ATTR0]] {
+// CHECK:  [[ENTRY:.*:]]
+// CHECK:    call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[P:%.*]], ptr align 4 @__const._Z28test_target_teams_distributev.p, i64 8, i1 false)
+// CHECK:    call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP0:%.*]], ptr align 4 [[P]], i64 8, i1 false)
+// CHECK:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z28test_target_teams_distributev_l111(ptr [[TMP0]], ptr null) #[[ATTR3]]
+// CHECK:    ret void
+//
+//
+// CHECK-LABEL: define internal void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z28test_target_teams_distributev_l111(
+// CHECK-SAME: ptr noundef nonnull align 4 dereferenceable(4) [[TMP0:%.*]], ptr noalias noundef [[DYN_PTR:%.*]]) #[[ATTR2]] {
+// CHECK:  [[ENTRY:.*:]]
+// CHECK:    store ptr [[TMP0]], ptr [[DOTADDR:%.*]], align 8
+// CHECK:    store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR:%.*]], align 8
+// CHECK:    [[TMP1:%.*]] = load ptr, ptr [[DOTADDR]], align 8, !nonnull [[META2]], !align [[META3]]
+// CHECK:    call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z28test_target_teams_distributev_l111.omp_outlined, ptr [[TMP1]])
+// CHECK:    ret void
+//
+//
+// CHECK-LABEL: define internal void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z28test_target_teams_distributev_l111.omp_outlined(
+// CHECK-SAME: ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP0:%.*]]) #[[ATTR2]] {
+// CHECK:  [[ENTRY:.*:]]
+// CHECK:    store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR:%.*]], align 8
+// CHECK:    store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR:%.*]], align 8
+// CHECK:    store ptr [[TMP0]], ptr [[DOTADDR:%.*]], align 8
+// CHECK:    [[TMP1:%.*]] = load ptr, ptr [[DOTADDR]], align 8, !nonnull [[META2]], !align [[META3]]
+// CHECK:    store i32 0, ptr [[DOTOMP_LB:%.*]], align 4
+// CHECK:    store i32 9, ptr [[DOTOMP_UB:%.*]], align 4
+// CHECK:    store i32 1, ptr [[DOTOMP_STRIDE:%.*]], align 4
+// CHECK:    store i32 0, ptr [[DOTOMP_IS_LAST:%.*]], align 4
+// CHECK:    [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK:    [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4
+// CHECK:    call void @__kmpc_for_static_init_4(ptr @[[GLOB4:[0-9]+]], i32 [[TMP3]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
+// CHECK:    [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
+// CHECK:    [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 9
+// CHECK:    br i1 [[CMP]], label %[[COND_TRUE:.*]], label %[[COND_FALSE:.*]]
+// CHECK:       [[COND_TRUE]]:
+// CHECK:    br label %[[COND_END:.*]]
+// CHECK:       [[COND_FALSE]]:
+// CHECK:    [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
+// CHECK:    br label %[[COND_END]]
+// CHECK:       [[COND_END]]:
+// CHECK:    [[COND:%.*]] = phi i32 [ 9, %[[COND_TRUE]] ], [ [[TMP5]], %[[COND_FALSE]] ]
+// CHECK:    store i32 [[COND]], ptr [[DOTOMP_UB]], align 4
+// CHECK:    [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
+// CHECK:    store i32 [[TMP6]], ptr [[DOTOMP_IV:%.*]], align 4
+// CHECK:    br label %[[OMP_INNER_FOR_COND:.*]]
+// CHECK:       [[OMP_INNER_FOR_COND]]:
+// CHECK:    [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK:    [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
+// CHECK:    [[CMP1:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]]
+// CHECK:    br i1 [[CMP1]], label %[[OMP_INNER_FOR_BODY:.*]], label %[[OMP_INNER_FOR_END:.*]]
+// CHECK:       [[OMP_INNER_FOR_BODY]]:
+// CHECK:    [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK:    [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1
+// CHECK:    [[ADD:%.*]] = add nsw i32 0, [[MUL]]
+// CHECK:    store i32 [[ADD]], ptr [[I:%.*]], align 4
+// CHECK:    [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_POINT:%.*]], ptr [[TMP1]], i32 0, i32 0
+// CHECK:    [[TMP10:%.*]] = load i32, ptr [[X]], align 4
+// CHECK:    [[Y:%.*]] = getelementptr inbounds nuw [[STRUCT_POINT]], ptr [[TMP1]], i32 0, i32 1
+// CHECK:    [[TMP11:%.*]] = load i32, ptr [[Y]], align 4
+// CHECK:    [[ADD2:%.*]] = add nsw i32 [[TMP10]], [[TMP11]]
+// CHECK:    [[TMP12:%.*]] = load i32, ptr [[I]], align 4
+// CHECK:    [[ADD3:%.*]] = add nsw i32 [[ADD2]], [[TMP12]]
+// CHECK:    store i32 [[ADD3]], ptr [[RESULT:%.*]], align 4
+// CHECK:    br label %[[OMP_BODY_CONTINUE:.*]]
+// CHECK:       [[OMP_BODY_CONTINUE]]:
+// CHECK:    br label %[[OMP_INNER_FOR_INC:.*]]
+// CHECK:       [[OMP_INNER_FOR_INC]]:
+// CHECK:    [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK:    [[ADD4:%.*]] = add nsw i32 [[TMP13]], 1
+// CHECK:    store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4
+// CHECK:    br label %[[OMP_INNER_FOR_COND]]
+// CHECK:       [[OMP_INNER_FOR_END]]:
+// CHECK:    br label %[[OMP_LOOP_EXIT:.*]]
+// CHECK:       [[OMP_LOOP_EXIT]]:
+// CHECK:    call void @__kmpc_for_static_fini(ptr @[[GLOB4]], i32 [[TMP3]])
+// CHECK:    ret void
+//
+//
+// CHECK-LABEL: define dso_local void @_Z9test_taskv(
+// CHECK-SAME: ) #[[ATTR0]] {
+// CHECK:  [[ENTRY:.*:]]
+// CHECK:    [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
+// CHECK:    call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[P:%.*]], ptr align 4 @__const._Z9test_taskv.p, i64 8, i1 false)
+// CHECK:    call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP0:%.*]], ptr align 4 [[P]], i64 8, i1 false)
+// CHECK:    [[TMP2:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON:%.*]], ptr [[AGG_CAPTURED:%.*]], i32 0, i32 0
+// CHECK:    store ptr [[TMP0]], ptr [[TMP2]], align 8
+// CHECK:    [[TMP3:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i64 40, i64 8, ptr @.omp_task_entry.)
+// CHECK:    [[TMP4:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASK_T_WITH_PRIVATES:%.*]], ptr [[TMP3]], i32 0, i32 0
+// CHECK:    [[TMP5:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASK_T:%.*]], ptr [[TMP4]], i32 0, i32 0
+// CHECK:    [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8
+// CHECK:    call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[TMP6]], ptr align 8 [[AGG_CAPTURED]], i64 8, i1 false)
+// CHECK:    [[TMP7:%.*]] = call i32 @__kmpc_omp_task(ptr @[[GLOB1]], i32 [[TMP1]], ptr [[TMP3]])
+// CHECK:    ret void
+//
+//
+// CHECK-LABEL: define internal noundef i32 @.omp_task_entry.(
+// CHECK-SAME: i32 noundef [[TMP0:%.*]], ptr noalias noundef [[TMP1:%.*]]) #[[ATTR4]] {
+// CHECK:  [[ENTRY:.*:]]
+// CHECK:    store i32 [[TMP0]], ptr [[DOTADDR:%.*]], align 4
+// CHECK:    store ptr [[TMP1]], ptr [[DOTADDR1:%.*]], align 8
+// CHECK:    [[TMP2:%.*]] = load i32, ptr [[DOTADDR]], align 4
+// CHECK:    [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8
+// CHECK:    [[TMP4:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASK_T_WITH_PRIVATES:%.*]], ptr [[TMP3]], i32 0, i32 0
+// CHECK:    [[TMP5:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASK_T:%.*]], ptr [[TMP4]], i32 0, i32 2
+// CHECK:    [[TMP6:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASK_T]], ptr [[TMP4]], i32 0, i32 0
+// CHECK:    [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8
+// CHECK:    call void @llvm.experimental.noalias.scope.decl(metadata [[META10:![0-9]+]])
+// CHECK:    call void @llvm.experimental.noalias.scope.decl(metadata [[META13:![0-9]+]])
+// CHECK:    call void @llvm.experimental.noalias.scope.decl(metadata [[META15:![0-9]+]])
+// CHECK:    call void @llvm.experimental.noalias.scope.decl(metadata [[META17:![0-9]+]])
+// CHECK:    store i32 [[TMP2]], ptr [[DOTGLOBAL_TID__ADDR_I:%.*]], align 4, !noalias [[META19:![0-9]+]]
+// CHECK:    store ptr [[TMP5]], ptr [[DOTPART_ID__ADDR_I:%.*]], align 8, !noalias [[META19]]
+// CHECK:    store ptr null, ptr [[DOTPRIVATES__ADDR_I:%.*]], align 8, !noalias [[META19]]
+// CHECK:    store ptr null, ptr [[DOTCOPY_FN__ADDR_I:%.*]], align 8, !noalias [[META19]]
+// CHECK:    store ptr [[TMP3]], ptr [[DOTTASK_T__ADDR_I:%.*]], align 8, !noalias [[META19]]
+// CHECK:    store ptr [[TMP7]], ptr [[__CONTEXT_ADDR_I:%.*]], align 8, !noalias [[META19]]
+// CHECK:    [[TMP8:%.*]] = load ptr, ptr [[__CONTEXT_ADDR_I]], align 8, !noalias [[META19]]
+// CHECK:    [[TMP9:%.*]] = load ptr, ptr [[TMP8]], align 8, !nonnull [[META2]], !align [[META3]]
+// CHECK:    [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4
+// CHECK:    [[TMP11:%.*]] = load ptr, ptr [[TMP8]], align 8, !nonnull [[META2]], !align [[META3]]
+// CHECK:    [[Y_I:%.*]] = getelementptr inbounds nuw [[STRUCT_POINT:%.*]], ptr [[TMP11]], i32 0, i32 1
+// CHECK:    [[TMP12:%.*]] = load i32, ptr [[Y_I]], align 4
+// CHECK:    [[ADD_I:%.*]] = add nsw i32 [[TMP10]], [[TMP12]]
+// CHECK:    store i32 [[ADD_I]], ptr [[SUM_I:%.*]], align 4, !noalias [[META19]]
+// CHECK:    ret i32 0
+//
+//
+// CHECK-LABEL: define dso_local void @_Z16test_task_dependv(
+// CHECK-SAME: ) #[[ATTR0]] {
+// CHECK:  [[ENTRY:.*:]]
+// CHECK:    [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
+// CHECK:    call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[P:%.*]], ptr align 4 @__const._Z16test_task_dependv.p, i64 8, i1 false)
+// CHECK:    call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP0:%.*]], ptr align 4 [[P]], i64 8, i1 false)
+// CHECK:    [[TMP2:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON_0:%.*]], ptr [[AGG_CAPTURED:%.*]], i32 0, i32 0
+// CHECK:    store ptr [[TMP0]], ptr [[TMP2]], align 8
+// CHECK:    [[TMP3:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i64 40, i64 8, ptr @.omp_task_entry..2)
+// CHECK:    [[TMP4:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASK_T_WITH_PRIVATES_1:%.*]], ptr [[TMP3]], i32 0, i32 0
+// CHECK:    [[TMP5:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASK_T:%.*]], ptr [[TMP4]], i32 0, i32 0
+// CHECK:    [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8
+// CHECK:    call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[TMP6]], ptr align 8 [[AGG_CAPTURED]], i64 8, i1 false)
+// CHECK:    [[TMP7:%.*]] = getelementptr inbounds [2 x [[STRUCT_KMP_DEPEND_INFO:%.*]]], ptr [[DOTDEP_ARR_ADDR:%.*]], i64 0, i64 0
+// CHECK:    [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_POINT:%.*]], ptr [[TMP0]], i32 0, i32 0
+// CHECK:    [[TMP8:%.*]] = ptrtoint ptr [[X]] to i64
+// CHECK:    [[TMP9:%.*]] = getelementptr [[STRUCT_KMP_DEPEND_INFO]], ptr [[TMP7]], i64 0
+// CHECK:    [[TMP10:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_DEPEND_INFO]], ptr [[TMP9]], i32 0, i32 0
+// CHECK:    store i64 [[TMP8]], ptr [[TMP10]], align 8
+// CHECK:    [[TMP11:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_DEPEND_INFO]], ptr [[TMP9]], i32 0, i32 1
+// CHECK:    store i64 4, ptr [[TMP11]], align 8
+// CHECK:    [[TMP12:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_DEPEND_INFO]], ptr [[TMP9]], i32 0, i32 2
+// CHECK:    store i8 1, ptr [[TMP12]], align 8
+// CHECK:    [[Y:%.*]] = getelementptr inbounds nuw [[STRUCT_POINT]], ptr [[TMP0]], i32 0, i32 1
+// CHECK:    [[TMP13:%.*]] = ptrtoint ptr [[Y]] to i64
+// CHECK:    [[TMP14:%.*]] = getelementptr [[STRUCT_KMP_DEPEND_INFO]], ptr [[TMP7]], i64 1
+// CHECK:    [[TMP15:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_DEPEND_INFO]], ptr [[TMP14]], i32 0, i32 0
+// CHECK:    store i64 [[TMP13]], ptr [[TMP15]], align 8
+// CHECK:    [[TMP16:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_DEPEND_INFO]], ptr [[TMP14]], i32 0, i32 1
+// CHECK:    store i64 4, ptr [[TMP16]], align 8
+// CHECK:    [[TMP17:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_DEPEND_INFO]], ptr [[TMP14]], i32 0, i32 2
+// CHECK:    store i8 1, ptr [[TMP17]], align 8
+// CHECK:    store i64 2, ptr [[DEP_COUNTER_ADDR:%.*]], align 8
+// CHECK:    [[TMP18:%.*]] = call i32 @__kmpc_omp_task_with_deps(ptr @[[GLOB1]], i32 [[TMP1]], ptr [[TMP3]], i32 2, ptr [[TMP7]], i32 0, ptr null)
+// CHECK:    ret void
+//
+//
+// CHECK-LABEL: define internal noundef i32 @.omp_task_entry..2(
+// CHECK-SAME: i32 noundef [[TMP0:%.*]], ptr noalias noundef [[TMP1:%.*]]) #[[ATTR4]] {
+// CHECK:  [[ENTRY:.*:]]
+// CHECK:    store i32 [[TMP0]], ptr [[DOTADDR:%.*]], align 4
+// CHECK:    store ptr [[TMP1]], ptr [[DOTADDR1:%.*]], align 8
+// CHECK:    [[TMP2:%.*]] = load i32, ptr [[DOTADDR]], align 4
+// CHECK:    [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8
+// CHECK:    [[TMP4:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASK_T_WITH_PRIVATES_1:%.*]], ptr [[TMP3]], i32 0, i32 0
+// CHECK:    [[TMP5:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASK_T:%.*]], ptr [[TMP4]], i32 0, i32 2
+// CHECK:    [[TMP6:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASK_T]], ptr [[TMP4]], i32 0, i32 0
+// CHECK:    [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8
+// CHECK:    call void @llvm.experimental.noalias.scope.decl(metadata [[META20:![0-9]+]])
+// CHECK:    call void @llvm.experimental.noalias.scope.decl(metadata [[META23:![0-9]+]])
+// CHECK:    call void @llvm.experimental.noalias.scope.decl(metadata [[META25:![0-9]+]])
+// CHECK:    call void @llvm.experimental.noalias.scope.decl(metadata [[META27:![0-9]+]])
+// CHECK:    store i32 [[TMP2]], ptr [[DOTGLOBAL_TID__ADDR_I:%.*]], align 4, !noalias [[META29:![0-9]+]]
+// CHECK:    store ptr [[TMP5]], ptr [[DOTPART_ID__ADDR_I:%.*]], align 8, !noalias [[META29]]
+// CHECK:    store ptr null, ptr [[DOTPRIVATES__ADDR_I:%.*]], align 8, !noalias [[META29]]
+// CHECK:    store ptr null, ptr [[DOTCOPY_FN__ADDR_I:%.*]], align 8, !noalias [[META29]]
+// CHECK:    store ptr [[TMP3]], ptr [[DOTTASK_T__ADDR_I:%.*]], align 8, !noalias [[META29]]
+// CHECK:    store ptr [[TMP7]], ptr [[__CONTEXT_ADDR_I:%.*]], align 8, !noalias [[META29]]
+// CHECK:    [[TMP8:%.*]] = load ptr, ptr [[__CONTEXT_ADDR_I]], align 8, !noalias [[META29]]
+// CHECK:    [[TMP9:%.*]] = load ptr, ptr [[TMP8]], align 8, !nonnull [[META2]], !align [[META3]]
+// CHECK:    [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4
+// CHECK:    [[TMP11:%.*]] = load ptr, ptr [[TMP8]], align 8, !nonnull [[META2]], !align [[META3]]
+// CHECK:    [[Y_I:%.*]] = getelementptr inbounds nuw [[STRUCT_POINT:%.*]], ptr [[TMP11]], i32 0, i32 1
+// CHECK:    [[TMP12:%.*]] = load i32, ptr [[Y_I]], align 4
+// CHECK:    [[ADD_I:%.*]] = add nsw i32 [[TMP10]], [[TMP12]]
+// CHECK:    store i32 [[ADD_I]], ptr [[SUM_I:%.*]], align 4, !noalias [[META29]]
+// CHECK:    ret i32 0
+//
+//
+// CHECK-LABEL: define dso_local void @_Z14test_taskloop_v(
+// CHECK-SAME: ) #[[ATTR0]] {
+// CHECK:  [[ENTRY:.*:]]
+// CHECK:    [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
+// CHECK:    call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[P:%.*]], ptr align 4 @__const._Z14test_taskloop_v.p, i64 8, i1 false)
+// CHECK:    call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP0:%.*]], ptr align 4 [[P]], i64 8, i1 false)
+// CHECK:    [[TMP2:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON_2:%.*]], ptr [[AGG_CAPTURED:%.*]], i32 0, i32 0
+// CHECK:    store ptr [[TMP0]], ptr [[TMP2]], align 8
+// CHECK:    call void @__kmpc_taskgroup(ptr @[[GLOB1]], i32 [[TMP1]])
+// CHECK:    [[TMP3:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i64 80, i64 8, ptr @.omp_task_entry..4)
+// CHECK:    [[TMP4:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASK_T_WITH_PRIVATES_3:%.*]], ptr [[TMP3]], i32 0, i32 0
+// CHECK:    [[TMP5:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASK_T_4:%.*]], ptr [[TMP4]], i32 0, i32 0
+// CHECK:    [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8
+// CHECK:    call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[TMP6]], ptr align 8 [[AGG_CAPTURED]], i64 8, i1 false)
+// CHECK:    [[TMP7:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASK_T_4]], ptr [[TMP4]], i32 0, i32 5
+// CHECK:    store i64 0, ptr [[TMP7]], align 8
+// CHECK:    [[TMP8:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASK_T_4]], ptr [[TMP4]], i32 0, i32 6
+// CHECK:    store i64 9, ptr [[TMP8]], align 8
+// CHECK:    [[TMP9:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASK_T_4]], ptr [[TMP4]], i32 0, i32 7
+// CHECK:    store i64 1, ptr [[TMP9]], align 8
+// CHECK:    [[TMP10:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASK_T_4]], ptr [[TMP4]], i32 0, i32 9
+// CHECK:    call void @llvm.memset.p0.i64(ptr align 8 [[TMP10]], i8 0, i64 8, i1 false)
+// CHECK:    [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 8
+// CHECK:    call void @__kmpc_taskloop(ptr @[[GLOB1]], i32 [[TMP1]], ptr [[TMP3]], i32 1, ptr [[TMP7]], ptr [[TMP8]], i64 [[TMP11]], i32 1, i32 0, i64 0, ptr null)
+// CHECK:    call void @__kmpc_end_taskgroup(ptr @[[GLOB1]], i32 [[TMP1]])
+// CHECK:    ret void
+//
+//
+// CHECK-LABEL: define internal noundef i32 @.omp_task_entry..4(
+// CHECK-SAME: i32 noundef [[TMP0:%.*]], ptr noalias noundef [[TMP1:%.*]]) #[[ATTR4]] {
+// CHECK:  [[ENTRY:.*:]]
+// CHECK:    store i32 [[TMP0]], ptr [[DOTADDR:%.*]], align 4
+// CHECK:    store ptr [[TMP1]], ptr [[DOTADDR1:%.*]], align 8
+// CHECK:    [[TMP2:%.*]] = load i32, ptr [[DOTADDR]], align 4
+// CHECK:    [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8
+// CHECK:    [[TMP4:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASK_T_WITH_PRIVATES_3:%.*]], ptr [[TMP3]], i32 0, i32 0
+// CHECK:    [[TMP5:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASK_T_4:%.*]], ptr [[TMP4]], i32 0, i32 2
+// CHECK:    [[TMP6:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASK_T_4]], ptr [[TMP4]], i32 0, i32 0
+// CHECK:    [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8
+// CHECK:    [[TMP8:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASK_T_4]], ptr [[TMP4]], i32 0, i32 5
+// CHECK:    [[TMP9:%.*]] = load i64, ptr [[TMP8]], align 8
+// CHECK:    [[TMP10:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASK_T_4]], ptr [[TMP4]], i32 0, i32 6
+// CHECK:    [[TMP11:%.*]] = load i64, ptr [[TMP10]], align 8
+// CHECK:    [[TMP12:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASK_T_4]], ptr [[TMP4]], i32 0, i32 7
+// CHECK:    [[TMP13:%.*]] = load i64, ptr [[TMP12]], align 8
+// CHECK:    [[TMP14:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASK_T_4]], ptr [[TMP4]], i32 0, i32 8
+// CHECK:    [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 8
+// CHECK:    [[TMP16:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASK_T_4]], ptr [[TMP4]], i32 0, i32 9
+// CHECK:    [[TMP17:%.*]] = load ptr, ptr [[TMP16]], align 8
+// CHECK:    call void @llvm.experimental.noalias.scope.decl(metadata [[META30:![0-9]+]])
+// CHECK:    call void @llvm.experimental.noalias.scope.decl(metadata [[META33:![0-9]+]])
+// CHECK:    call void @llvm.experimental.noalias.scope.decl(metadata [[META35:![0-9]+]])
+// CHECK:    call void @llvm.experimental.noalias.scope.decl(metadata [[META37:![0-9]+]])
+// CHECK:    call void @llvm.experimental.noalias.scope.decl(metadata [[META39:![0-9]+]])
+// CHECK:    store i32 [[TMP2]], ptr [[DOTGLOBAL_TID__ADDR_I:%.*]], align 4, !noalias [[META41:![0-9]+]]
+// CHECK:    store ptr [[TMP5]], ptr [[DOTPART_ID__ADDR_I:%.*]], align 8, !noalias [[META41]]
+// CHECK:    store ptr null, ptr [[DOTPRIVATES__ADDR_I:%.*]], align 8, !noalias [[META41]]
+// CHECK:    store ptr null, ptr [[DOTCOPY_FN__ADDR_I:%.*]], align 8, !noalias [[META41]]
+// CHECK:    store ptr [[TMP3]], ptr [[DOTTASK_T__ADDR_I:%.*]], align 8, !noalias [[META41]]
+// CHECK:    store i64 [[TMP9]], ptr [[DOTLB__ADDR_I:%.*]], align 8, !noalias [[META41]]
+// CHECK:    store i64 [[TMP11]], ptr [[DOTUB__ADDR_I:%.*]], align 8, !noalias [[META41]]
+// CHECK:    store i64 [[TMP13]], ptr [[DOTST__ADDR_I:%.*]], align 8, !noalias [[META41]]
+// CHECK:    store i32 [[TMP15]], ptr [[DOTLITER__ADDR_I:%.*]], align 4, !noalias [[META41]]
+// CHECK:    store ptr [[TMP17]], ptr [[DOTREDUCTIONS__ADDR_I:%.*]], align 8, !noalias [[META41]]
+// CHECK:    store ptr [[TMP7]], ptr [[__CONTEXT_ADDR_I:%.*]], align 8, !noalias [[META41]]
+// CHECK:    [[TMP18:%.*]] = load ptr, ptr [[__CONTEXT_ADDR_I]], align 8, !noalias [[META41]]
+// CHECK:    [[TMP19:%.*]] = load i64, ptr [[DOTLB__ADDR_I]], align 8, !noalias [[META41]]
+// CHECK:    [[CONV_I:%.*]] = trunc i64 [[TMP19]] to i32
+// CHECK:    store i32 [[CONV_I]], ptr [[DOTOMP_IV_I:%.*]], align 4, !noalias [[META41]]
+// CHECK:    [[TMP20:%.*]] = load ptr, ptr [[TMP18]], align 8, !nonnull [[META2]], !align [[META3]]
+// CHECK:    br label %[[OMP_INNER_FOR_COND_I:.*]]
+// CHECK:       [[OMP_INNER_FOR_COND_I]]:
+// CHECK:    [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV_I]], align 4, !noalias [[META41]]
+// CHECK:    [[CONV1_I:%.*]] = sext i32 [[TMP21]] to i64
+// CHECK:    [[TMP22:%.*]] = load i64, ptr [[DOTUB__ADDR_I]], align 8, !noalias [[META41]]
+// CHECK:    [[CMP_I:%.*]] = icmp ule i64 [[CONV1_I]], [[TMP22]]
+// CHECK:    br i1 [[CMP_I]], label %[[OMP_INNER_FOR_BODY_I:.*]], [[DOTOMP_OUTLINED__3_EXIT:label %.*]]
+// CHECK:       [[OMP_INNER_FOR_BODY_I]]:
+// CHECK:    [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV_I]], align 4, !noalias [[META41]]
+// CHECK:    store i32 [[TMP23]], ptr [[I_I:%.*]], align 4, !noalias [[META41]]
+// CHECK:    [[TMP24:%.*]] = load i32, ptr [[TMP20]], align 4
+// CHECK:    [[Y_I:%.*]] = getelementptr inbounds nuw [[STRUCT_POINT:%.*]], ptr [[TMP20]], i32 0, i32 1
+// CHECK:    [[TMP25:%.*]] = load i32, ptr [[Y_I]], align 4
+// CHECK:    [[ADD2_I:%.*]] = add nsw i32 [[TMP24]], [[TMP25]]
+// CHECK:    [[TMP26:%.*]] = load i32, ptr [[I_I]], align 4, !noalias [[META41]]
+// CHECK:    [[ADD3_I:%.*]] = add nsw i32 [[ADD2_I]], [[TMP26]]
+// CHECK:    store i32 [[ADD3_I]], ptr [[RESULT_I:%.*]], align 4, !noalias [[META41]]
+// CHECK:    [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IV_I]], align 4, !noalias [[META41]]
+// CHECK:    [[ADD4_I:%.*]] = add nsw i32 [[TMP27]], 1
+// CHECK:    store i32 [[ADD4_I]], ptr [[DOTOMP_IV_I]], align 4, !noalias [[META41]]
+// CHECK:    br label %[[OMP_INNER_FOR_COND_I]]
+// CHECK:       [[_OMP_OUTLINED__3_EXIT:.*:]]
+// CHECK:    ret i32 0
+//
+//
+// CHECK-LABEL: define dso_local void @_Z21instantiate_templatesv(
+// CHECK-SAME: ) #[[ATTR0]] {
+// CHECK:  [[ENTRY:.*:]]
+// CHECK:    call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[P2:%.*]], ptr align 4 @__const._Z21instantiate_templatesv.p2, i64 8, i1 false)
+// CHECK:    call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[P3:%.*]], ptr align 4 @__const._Z21instantiate_templatesv.p3, i64 12, i1 false)
+// CHECK:    call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[AGG_TMP:%.*]], ptr align 4 [[P2]], i64 8, i1 false)
+// CHECK:    [[TMP0:%.*]] = load i64, ptr [[AGG_TMP]], align 4
+// CHECK:    [[CALL:%.*]] = call noundef i32 @_Z17test_template_basI5PointEiT_(i64 [[TMP0]])
+// CHECK:    [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_POINT:%.*]], ptr [[AGG_TMP1:%.*]], i32 0, i32 0
+// CHECK:    store i32 3, ptr [[X]], align 4
+// CHECK:    [[Y:%.*]] = getelementptr inbounds nuw [[STRUCT_POINT]], ptr [[AGG_TMP1]], i32 0, i32 1
+// CHECK:    store i32 4, ptr [[Y]], align 4
+// CHECK:    [[TMP1:%.*]] = load i64, ptr [[AGG_TMP1]], align 4
+// CHECK:    [[CALL2:%.*]] = call noundef i32 @_Z17test_template_basI5PointEiT_(i64 [[TMP1]])
+// CHECK:    call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[AGG_TMP3:%.*]], ptr align 4 [[P2]], i64 8, i1 false)
+// CHECK:    [[TMP2:%.*]] = load i64, ptr [[AGG_TMP3]], align 4
+// CHECK:    [[CALL4:%.*]] = call noundef i32 @_Z20test_template_targetI5PointEiT_(i64 [[TMP2]])
+// CHECK:    call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[AGG_TMP5:%.*]], ptr align 4 [[P2]], i64 8, i1 false)
+// CHECK:    [[TMP3:%.*]] = load i64, ptr [[AGG_TMP5]], align 4
+// CHECK:    [[CALL6:%.*]] = call noundef i32 @_Z18test_template_taskI5PointEiT_(i64 [[TMP3]])
+// CHECK:    call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[AGG_TMP7:%.*]], ptr align 4 [[P3]], i64 12, i1 false)
+// CHECK:    call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[AGG_TMP7_COERCE:%.*]], ptr align 4 [[AGG_TMP7]], i64 12, i1 false)
+// CHECK:    [[TMP4:%.*]] = getelementptr inbounds nuw { i64, i32 }, ptr [[AGG_TMP7_COERCE]], i32 0, i32 0
+// CHECK:    [[TMP5:%.*]] = load i64, ptr [[TMP4]], align 4
+// CHECK:    [[TMP6:%.*]] = getelementptr inbounds nuw { i64, i32 }, ptr [[AGG_TMP7_COERCE]], i32 0, i32 1
+// CHECK:    [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4
+// CHECK:    [[CALL8:%.*]] = call noundef i32 @_Z24test_template_3_bindingsI7Point3DEiT_(i64 [[TMP5]], i32 [[TMP7]])
+// CHECK:    ret void
+//
+//
+// CHECK-LABEL: define linkonce_odr noundef i32 @_Z17test_template_basI5PointEiT_(
+// CHECK-SAME: i64 [[P_COERCE:%.*]]) #[[ATTR0]] comdat {
+// CHECK:  [[ENTRY:.*:]]
+// CHECK:    store i64 [[P_COERCE]], ptr [[P:%.*]], align 4
+// CHECK:    call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP0:%.*]], ptr align 4 [[P]], i64 8, i1 false)
+// CHECK:    store i32 0, ptr [[RESULT:%.*]], align 4
+// CHECK:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 2, ptr @_Z17test_template_basI5PointEiT_.omp_outlined, ptr [[RESULT]], ptr [[TMP0]])
+// CHECK:    [[TMP1:%.*]] = load i32, ptr [[RESULT]], align 4
+// CHECK:    ret i32 [[TMP1]]
+//
+//
+// CHECK-LABEL: define linkonce_odr noundef i32 @_Z20test_template_targetI5PointEiT_(
+// CHECK-SAME: i64 [[P_COERCE:%.*]]) #[[ATTR0]] comdat {
+// CHECK:  [[ENTRY:.*:]]
+// CHECK:    store i64 [[P_COERCE]], ptr [[P:%.*]], align 4
+// CHECK:    call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP0:%.*]], ptr align 4 [[P]], i64 8, i1 false)
+// CHECK:    store i32 0, ptr [[RESULT:%.*]], align 4
+// CHECK:    [[TMP1:%.*]] = load [[STRUCT_POINT:%.*]], ptr [[TMP0]], align 4
+// CHECK:    store [[STRUCT_POINT]] [[TMP1]], ptr [[DOTCASTED:%.*]], align 4
+// CHECK:    [[TMP2:%.*]] = load i64, ptr [[DOTCASTED]], align 8
+// CHECK:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z20test_template_targetI5PointEiT__l164(ptr [[RESULT]], i64 [[TMP2]], ptr null) #[[ATTR3]]
+// CHECK:    [[TMP3:%.*]] = load i32, ptr [[RESULT]], align 4
+// CHECK:    ret i32 [[TMP3]]
+//
+//
+// CHECK-LABEL: define linkonce_odr noundef i32 @_Z18test_template_taskI5PointEiT_(
+// CHECK-SAME: i64 [[P_COERCE:%.*]]) #[[ATTR0]] comdat {
+// CHECK:  [[ENTRY:.*:]]
+// CHECK:    [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
+// CHECK:    store i64 [[P_COERCE]], ptr [[P:%.*]], align 4
+// CHECK:    call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP0:%.*]], ptr align 4 [[P]], i64 8, i1 false)
+// CHECK:    store i32 0, ptr [[RESULT:%.*]], align 4
+// CHECK:    [[TMP2:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON_6:%.*]], ptr [[AGG_CAPTURED:%.*]], i32 0, i32 0
+// CHECK:    store ptr [[RESULT]], ptr [[TMP2]], align 8
+// CHECK:    [[TMP3:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON_6]], ptr [[AGG_CAPTURED]], i32 0, i32 1
+// CHECK:    store ptr [[TMP0]], ptr [[TMP3]], align 8
+// CHECK:    [[TMP4:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i64 40, i64 16, ptr @.omp_task_entry..6)
+// CHECK:    [[TMP5:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASK_T_WITH_PRIVATES_7:%.*]], ptr [[TMP4]], i32 0, i32 0
+// CHECK:    [[TMP6:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASK_T:%.*]], ptr [[TMP5]], i32 0, i32 0
+// CHECK:    [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8
+// CHECK:    call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[TMP7]], ptr align 8 [[AGG_CAPTURED]], i64 16, i1 false)
+// CHECK:    [[TMP8:%.*]] = call i32 @__kmpc_omp_task(ptr @[[GLOB1]], i32 [[TMP1]], ptr [[TMP4]])
+// CHECK:    [[TMP9:%.*]] = call i32 @__kmpc_omp_taskwait(ptr @[[GLOB1]], i32 [[TMP1]])
+// CHECK:    [[TMP10:%.*]] = load i32, ptr [[RESULT]], align 4
+// CHECK:    ret i32 [[TMP10]]
+//
+//
+// CHECK-LABEL: define linkonce_odr noundef i32 @_Z24test_template_3_bindingsI7Point3DEiT_(
+// CHECK-SAME: i64 [[P_COERCE0:%.*]], i32 [[P_COERCE1:%.*]]) #[[ATTR0]] comdat {
+// CHECK:  [[ENTRY:.*:]]
+// CHECK:    [[TMP1:%.*]] = getelementptr inbounds nuw { i64, i32 }, ptr [[COERCE:%.*]], i32 0, i32 0
+// CHECK:    store i64 [[P_COERCE0]], ptr [[TMP1]], align 4
+// CHECK:    [[TMP2:%.*]] = getelementptr inbounds nuw { i64, i32 }, ptr [[COERCE]], i32 0, i32 1
+// CHECK:    store i32 [[P_COERCE1]], ptr [[TMP2]], align 4
+// CHECK:    call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[P:%.*]], ptr align 4 [[COERCE]], i64 12, i1 false)
+// CHECK:    call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP0:%.*]], ptr align 4 [[P]], i64 12, i1 false)
+// CHECK:    store i32 0, ptr [[RESULT:%.*]], align 4
+// CHECK:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 2, ptr @_Z24test_template_3_bindingsI7Point3DEiT_.omp_outlined, ptr [[RESULT]], ptr [[TMP0]])
+// CHECK:    [[TMP3:%.*]] = load i32, ptr [[RESULT]], align 4
+// CHECK:    ret i32 [[TMP3]]
+//
+//
+// CHECK-LABEL: define internal void @_Z17test_template_basI5PointEiT_.omp_outlined(
+// CHECK-SAME: ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[RESULT:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP0:%.*]]) #[[ATTR2]] {
+// CHECK:  [[ENTRY:.*:]]
+// CHECK:    store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR:%.*]], align 8
+// CHECK:    store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR:%.*]], align 8
+// CHECK:    store ptr [[RESULT]], ptr [[RESULT_ADDR:%.*]], align 8
+// CHECK:    store ptr [[TMP0]], ptr [[DOTADDR:%.*]], align 8
+// CHECK:    [[TMP1:%.*]] = load ptr, ptr [[RESULT_ADDR]], align 8, !nonnull [[META2]], !align [[META3]]
+// CHECK:    [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8, !nonnull [[META2]], !align [[META3]]
+// CHECK:    store i32 0, ptr [[RESULT1:%.*]], align 4
+// CHECK:    [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_POINT:%.*]], ptr [[TMP2]], i32 0, i32 0
+// CHECK:    [[TMP3:%.*]] = load i32, ptr [[X]], align 4
+// CHECK:    [[Y:%.*]] = getelementptr inbounds nuw [[STRUCT_POINT]], ptr [[TMP2]], i32 0, i32 1
+// CHECK:    [[TMP4:%.*]] = load i32, ptr [[Y]], align 4
+// CHECK:    [[ADD:%.*]] = add nsw i32 [[TMP3]], [[TMP4]]
+// CHECK:    store i32 [[ADD]], ptr [[RESULT1]], align 4
+// CHECK:    [[TMP5:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST:%.*]], i64 0, i64 0
+// CHECK:    store ptr [[RESULT1]], ptr [[TMP5]], align 8
+// CHECK:    [[TMP6:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK:    [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4
+// CHECK:    [[TMP8:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP7]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @_Z17test_template_basI5PointEiT_.omp_outlined.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var)
+// CHECK:    switch i32 [[TMP8]], [[DOTOMP_REDUCTION_DEFAULT:label %.*]] [
+// CHECK:      i32 1, [[DOTOMP_REDUCTION_CASE1:label %.*]]
+// CHECK:      i32 2, [[DOTOMP_REDUCTION_CASE2:label %.*]]
+// CHECK:    ]
+// CHECK:       [[_OMP_REDUCTION_CASE1:.*:]]
+// CHECK:    [[TMP9:%.*]] = load i32, ptr [[TMP1]], align 4
+// CHECK:    [[TMP10:%.*]] = load i32, ptr [[RESULT1]], align 4
+// CHECK:    [[ADD2:%.*]] = add nsw i32 [[TMP9]], [[TMP10]]
+// CHECK:    store i32 [[ADD2]], ptr [[TMP1]], align 4
+// CHECK:    call void @__kmpc_end_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP7]], ptr @.gomp_critical_user_.reduction.var)
+// CHECK:    br [[DOTOMP_REDUCTION_DEFAULT]]
+// CHECK:       [[_OMP_REDUCTION_CASE2:.*:]]
+// CHECK:    [[TMP11:%.*]] = load i32, ptr [[RESULT1]], align 4
+// CHECK:    [[TMP12:%.*]] = atomicrmw add ptr [[TMP1]], i32 [[TMP11]] monotonic, align 4
+// CHECK:    br [[DOTOMP_REDUCTION_DEFAULT]]
+// CHECK:       [[_OMP_REDUCTION_DEFAULT:.*:]]
+// CHECK:    ret void
+//
+//
+// CHECK-LABEL: define internal void @_Z17test_template_basI5PointEiT_.omp_outlined.omp.reduction.reduction_func(
+// CHECK-SAME: ptr noundef [[TMP0:%.*]], ptr noundef [[TMP1:%.*]]) #[[ATTR4]] {
+// CHECK:  [[ENTRY:.*:]]
+// CHECK:    store ptr [[TMP0]], ptr [[DOTADDR:%.*]], align 8
+// CHECK:    store ptr [[TMP1]], ptr [[DOTADDR1:%.*]], align 8
+// CHECK:    [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8
+// CHECK:    [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8
+// CHECK:    [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP3]], i64 0, i64 0
+// CHECK:    [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8
+// CHECK:    [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i64 0, i64 0
+// CHECK:    [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8
+// CHECK:    [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4
+// CHECK:    [[TMP9:%.*]] = load i32, ptr [[TMP5]], align 4
+// CHECK:    [[ADD:%.*]] = add nsw i32 [[TMP8]], [[TMP9]]
+// CHECK:    store i32 [[ADD]], ptr [[TMP7]], align 4
+// CHECK:    ret void
+//
+//
+// CHECK-LABEL: define internal void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z20test_template_targetI5PointEiT__l164(
+// CHECK-SAME: ptr noundef nonnull align 4 dereferenceable(4) [[RESULT:%.*]], i64 noundef [[TMP0:%.*]], ptr noalias noundef [[DYN_PTR:%.*]]) #[[ATTR2]] {
+// CHECK:  [[ENTRY:.*:]]
+// CHECK:    store ptr [[RESULT]], ptr [[RESULT_ADDR:%.*]], align 8
+// CHECK:    store i64 [[TMP0]], ptr [[DOTADDR:%.*]], align 8
+// CHECK:    store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR:%.*]], align 8
+// CHECK:    [[TMP1:%.*]] = load ptr, ptr [[RESULT_ADDR]], align 8, !nonnull [[META2]], !align [[META3]]
+// CHECK:    [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_POINT:%.*]], ptr [[DOTADDR]], i32 0, i32 0
+// CHECK:    [[TMP2:%.*]] = load i32, ptr [[X]], align 4
+// CHECK:    [[Y:%.*]] = getelementptr inbounds nuw [[STRUCT_POINT]], ptr [[DOTADDR]], i32 0, i32 1
+// CHECK:    [[TMP3:%.*]] = load i32, ptr [[Y]], align 4
+// CHECK:    [[ADD:%.*]] = add nsw i32 [[TMP2]], [[TMP3]]
+// CHECK:    store i32 [[ADD]], ptr [[TMP1]], align 4
+// CHECK:    ret void
+//
+//
+// CHECK-LABEL: define internal noundef i32 @.omp_task_entry..6(
+// CHECK-SAME: i32 noundef [[TMP0:%.*]], ptr noalias noundef [[TMP1:%.*]]) #[[ATTR4]] {
+// CHECK:  [[ENTRY:.*:]]
+// CHECK:    store i32 [[TMP0]], ptr [[DOTADDR:%.*]], align 4
+// CHECK:    store ptr [[TMP1]], ptr [[DOTADDR1:%.*]], align 8
+// CHECK:    [[TMP2:%.*]] = load i32, ptr [[DOTADDR]], align 4
+// CHECK:    [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8
+// CHECK:    [[TMP4:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASK_T_WITH_PRIVATES_7:%.*]], ptr [[TMP3]], i32 0, i32 0
+// CHECK:    [[TMP5:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASK_T:%.*]], ptr [[TMP4]], i32 0, i32 2
+// CHECK:    [[TMP6:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASK_T]], ptr [[TMP4]], i32 0, i32 0
+// CHECK:    [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8
+// CHECK:    call void @llvm.experimental.noalias.scope.decl(metadata [[META42:![0-9]+]])
+// CHECK:    call void @llvm.experimental.noalias.scope.decl(metadata [[META45:![0-9]+]])
+// CHECK:    call void @llvm.experimental.noalias.scope.decl(metadata [[META47:![0-9]+]])
+// CHECK:    call void @llvm.experimental.noalias.scope.decl(metadata [[META49:![0-9]+]])
+// CHECK:    store i32 [[TMP2]], ptr [[DOTGLOBAL_TID__ADDR_I:%.*]], align 4, !noalias [[META51:![0-9]+]]
+// CHECK:    store ptr [[TMP5]], ptr [[DOTPART_ID__ADDR_I:%.*]], align 8, !noalias [[META51]]
+// CHECK:    store ptr null, ptr [[DOTPRIVATES__ADDR_I:%.*]], align 8, !noalias [[META51]]
+// CHECK:    store ptr null, ptr [[DOTCOPY_FN__ADDR_I:%.*]], align 8, !noalias [[META51]]
+// CHECK:    store ptr [[TMP3]], ptr [[DOTTASK_T__ADDR_I:%.*]], align 8, !noalias [[META51]]
+// CHECK:    store ptr [[TMP7]], ptr [[__CONTEXT_ADDR_I:%.*]], align 8, !noalias [[META51]]
+// CHECK:    [[TMP8:%.*]] = load ptr, ptr [[__CONTEXT_ADDR_I]], align 8, !noalias [[META51]]
+// CHECK:    [[TMP9:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON_6:%.*]], ptr [[TMP8]], i32 0, i32 1
+// CHECK:    [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8, !nonnull [[META2]], !align [[META3]]
+// CHECK:    [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4
+// CHECK:    [[TMP12:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON_6]], ptr [[TMP8]], i32 0, i32 1
+// CHECK:    [[TMP13:%.*]] = load ptr, ptr [[TMP12]], align 8, !nonnull [[META2]], !align [[META3]]
+// CHECK:    [[Y_I:%.*]] = getelementptr inbounds nuw [[STRUCT_POINT:%.*]], ptr [[TMP13]], i32 0, i32 1
+// CHECK:    [[TMP14:%.*]] = load i32, ptr [[Y_I]], align 4
+// CHECK:    [[ADD_I:%.*]] = add nsw i32 [[TMP11]], [[TMP14]]
+// CHECK:    [[TMP15:%.*]] = load ptr, ptr [[TMP8]], align 8, !nonnull [[META2]], !align [[META3]]
+// CHECK:    store i32 [[ADD_I]], ptr [[TMP15]], align 4
+// CHECK:    ret i32 0
+//
+//
+// CHECK-LABEL: define internal void @_Z24test_template_3_bindingsI7Point3DEiT_.omp_outlined(
+// CHECK-SAME: ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[RESULT:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP0:%.*]]) #[[ATTR2]] {
+// CHECK:  [[ENTRY:.*:]]
+// CHECK:    store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR:%.*]], align 8
+// CHECK:    store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR:%.*]], align 8
+// CHECK:    store ptr [[RESULT]], ptr [[RESULT_ADDR:%.*]], align 8
+// CHECK:    store ptr [[TMP0]], ptr [[DOTADDR:%.*]], align 8
+// CHECK:    [[TMP1:%.*]] = load ptr, ptr [[RESULT_ADDR]], align 8, !nonnull [[META2]], !align [[META3]]
+// CHECK:    [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8, !nonnull [[META2]], !align [[META3]]
+// CHECK:    store i32 0, ptr [[RESULT1:%.*]], align 4
+// CHECK:    [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_POINT3D:%.*]], ptr [[TMP2]], i32 0, i32 0
+// CHECK:    [[TMP3:%.*]] = load i32, ptr [[X]], align 4
+// CHECK:    [[Y:%.*]] = getelementptr inbounds nuw [[STRUCT_POINT3D]], ptr [[TMP2]], i32 0, i32 1
+// CHECK:    [[TMP4:%.*]] = load i32, ptr [[Y]], align 4
+// CHECK:    [[ADD:%.*]] = add nsw i32 [[TMP3]], [[TMP4]]
+// CHECK:    [[Z:%.*]] = getelementptr inbounds nuw [[STRUCT_POINT3D]], ptr [[TMP2]], i32 0, i32 2
+// CHECK:    [[TMP5:%.*]] = load i32, ptr [[Z]], align 4
+// CHECK:    [[ADD2:%.*]] = add nsw i32 [[ADD]], [[TMP5]]
+// CHECK:    store i32 [[ADD2]], ptr [[RESULT1]], align 4
+// CHECK:    [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST:%.*]], i64 0, i64 0
+// CHECK:    store ptr [[RESULT1]], ptr [[TMP6]], align 8
+// CHECK:    [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK:    [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4
+// CHECK:    [[TMP9:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP8]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @_Z24test_template_3_bindingsI7Point3DEiT_.omp_outlined.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var)
+// CHECK:    switch i32 [[TMP9]], [[DOTOMP_REDUCTION_DEFAULT:label %.*]] [
+// CHECK:      i32 1, [[DOTOMP_REDUCTION_CASE1:label %.*]]
+// CHECK:      i32 2, [[DOTOMP_REDUCTION_CASE2:label %.*]]
+// CHECK:    ]
+// CHECK:       [[_OMP_REDUCTION_CASE1:.*:]]
+// CHECK:    [[TMP10:%.*]] = load i32, ptr [[TMP1]], align 4
+// CHECK:    [[TMP11:%.*]] = load i32, ptr [[RESULT1]], align 4
+// CHECK:    [[ADD3:%.*]] = add nsw i32 [[TMP10]], [[TMP11]]
+// CHECK:    store i32 [[ADD3]], ptr [[TMP1]], align 4
+// CHECK:    call void @__kmpc_end_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP8]], ptr @.gomp_critical_user_.reduction.var)
+// CHECK:    br [[DOTOMP_REDUCTION_DEFAULT]]
+// CHECK:       [[_OMP_REDUCTION_CASE2:.*:]]
+// CHECK:    [[TMP12:%.*]] = load i32, ptr [[RESULT1]], align 4
+// CHECK:    [[TMP13:%.*]] = atomicrmw add ptr [[TMP1]], i32 [[TMP12]] monotonic, align 4
+// CHECK:    br [[DOTOMP_REDUCTION_DEFAULT]]
+// CHECK:       [[_OMP_REDUCTION_DEFAULT:.*:]]
+// CHECK:    ret void
+//
+//
+// CHECK-LABEL: define internal void @_Z24test_template_3_bindingsI7Point3DEiT_.omp_outlined.omp.reduction.reduction_func(
+// CHECK-SAME: ptr noundef [[TMP0:%.*]], ptr noundef [[TMP1:%.*]]) #[[ATTR4]] {
+// CHECK:  [[ENTRY:.*:]]
+// CHECK:    store ptr [[TMP0]], ptr [[DOTADDR:%.*]], align 8
+// CHECK:    store ptr [[TMP1]], ptr [[DOTADDR1:%.*]], align 8
+// CHECK:    [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8
+// CHECK:    [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8
+// CHECK:    [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP3]], i64 0, i64 0
+// CHECK:    [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8
+// CHECK:    [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i64 0, i64 0
+// CHECK:    [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8
+// CHECK:    [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4
+// CHECK:    [[TMP9:%.*]] = load i32, ptr [[TMP5]], align 4
+// CHECK:    [[ADD:%.*]] = add nsw i32 [[TMP8]], [[TMP9]]
+// CHECK:    store i32 [[ADD]], ptr [[TMP7]], align 4
+// CHECK:    ret void
+//
+//
+// CHECK-LABEL: define dso_local void @_Z19test_static_bindingv(
+// CHECK-SAME: ) #[[ATTR0]] {
+// CHECK:  [[ENTRY:.*:]]
+// CHECK:    [[TMP0:%.*]] = load atomic i8, ptr @_ZGVZ19test_static_bindingvEDC1a1bE acquire, align 8
+// CHECK:    [[GUARD_UNINITIALIZED:%.*]] = icmp eq i8 [[TMP0]], 0
+// CHECK:    br i1 [[GUARD_UNINITIALIZED]], label %[[INIT_CHECK:.*]], label %[[INIT_END:.*]], !prof [[PROF52:![0-9]+]]
+// CHECK:       [[INIT_CHECK]]:
+// CHECK:    [[TMP1:%.*]] = call i32 @__cxa_guard_acquire(ptr @_ZGVZ19test_static_bindingvEDC1a1bE) #[[ATTR3]]
+// CHECK:    [[TOBOOL:%.*]] = icmp ne i32 [[TMP1]], 0
+// CHECK:    br i1 [[TOBOOL]], label %[[INIT:.*]], label %[[INIT_END]]
+// CHECK:       [[INIT]]:
+// CHECK:    call void @llvm.memcpy.p0.p0.i64(ptr align 4 @_ZZ19test_static_bindingvEDC1a1bE, ptr align 4 @_ZZ19test_static_bindingvE1p, i64 8, i1 false)
+// CHECK:    call void @__cxa_guard_release(ptr @_ZGVZ19test_static_bindingvEDC1a1bE) #[[ATTR3]]
+// CHECK:    br label %[[INIT_END]]
+// CHECK:       [[INIT_END]]:
+// CHECK:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 0, ptr @_Z19test_static_bindingv.omp_outlined)
+// CHECK:    ret void
+//
+//
+// CHECK-LABEL: define internal void @_Z19test_static_bindingv.omp_outlined(
+// CHECK-SAME: ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR2]] {
+// CHECK:  [[ENTRY:.*:]]
+// CHECK:    store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR:%.*]], align 8
+// CHECK:    store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR:%.*]], align 8
+// CHECK:    [[TMP0:%.*]] = load i32, ptr @_ZZ19test_static_bindingvEDC1a1bE, align 4
+// CHECK:    [[TMP1:%.*]] = load i32, ptr getelementptr inbounds nuw (i8, ptr @_ZZ19test_static_bindingvEDC1a1bE, i64 4), align 4
+// CHECK:    [[ADD:%.*]] = add nsw i32 [[TMP0]], [[TMP1]]
+// CHECK:    store i32 [[ADD]], ptr [[SUM:%.*]], align 4
+// CHECK:    ret void
+//
+//
+// CHECK-LABEL: define dso_local void @_Z26test_static_binding_sharedv(
+// CHECK-SAME: ) #[[ATTR0]] {
+// CHECK:  [[ENTRY:.*:]]
+// CHECK:    [[TMP0:%.*]] = load atomic i8, ptr @_ZGVZ26test_static_binding_sharedvEDC1a1bE acquire, align 8
+// CHECK:    [[GUARD_UNINITIALIZED:%.*]] = icmp eq i8 [[TMP0]], 0
+// CHECK:    br i1 [[GUARD_UNINITIALIZED]], label %[[INIT_CHECK:.*]], label %[[INIT_END:.*]], !prof [[PROF52]]
+// CHECK:       [[INIT_CHECK]]:
+// CHECK:    [[TMP1:%.*]] = call i32 @__cxa_guard_acquire(ptr @_ZGVZ26test_static_binding_sharedvEDC1a1bE) #[[ATTR3]]
+// CHECK:    [[TOBOOL:%.*]] = icmp ne i32 [[TMP1]], 0
+// CHECK:    br i1 [[TOBOOL]], label %[[INIT:.*]], label %[[INIT_END]]
+// CHECK:       [[INIT]]:
+// CHECK:    call void @llvm.memcpy.p0.p0.i64(ptr align 4 @_ZZ26test_static_binding_sharedvEDC1a1bE, ptr align 4 @_ZZ26test_static_binding_sharedvE1p, i64 8, i1 false)
+// CHECK:    call void @__cxa_guard_release(ptr @_ZGVZ26test_static_binding_sharedvEDC1a1bE) #[[ATTR3]]
+// CHECK:    br label %[[INIT_END]]
+// CHECK:       [[INIT_END]]:
+// CHECK:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 0, ptr @_Z26test_static_binding_sharedv.omp_outlined)
+// CHECK:    ret void
+//
+//
+// CHECK-LABEL: define internal void @_Z26test_static_binding_sharedv.omp_outlined(
+// CHECK-SAME: ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR2]] {
+// CHECK:  [[ENTRY:.*:]]
+// CHECK:    store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR:%.*]], align 8
+// CHECK:    store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR:%.*]], align 8
+// CHECK:    [[TMP0:%.*]] = load i32, ptr @_ZZ26test_static_binding_sharedvEDC1a1bE, align 4
+// CHECK:    [[ADD:%.*]] = add nsw i32 [[TMP0]], 1
+// CHECK:    store i32 [[ADD]], ptr @_ZZ26test_static_binding_sharedvEDC1a1bE, align 4
+// CHECK:    [[TMP1:%.*]] = load i32, ptr getelementptr inbounds nuw (i8, ptr @_ZZ26test_static_binding_sharedvEDC1a1bE, i64 4), align 4
+// CHECK:    [[ADD1:%.*]] = add nsw i32 [[TMP1]], 1
+// CHECK:    store i32 [[ADD1]], ptr getelementptr inbounds nuw (i8, ptr @_ZZ26test_static_binding_sharedvEDC1a1bE, i64 4), align 4
+// CHECK:    ret void
+//
+//
+// CHECK-LABEL: define dso_local void @_Z17test_array_targetv(
+// CHECK-SAME: ) #[[ATTR0]] {
+// CHECK:  [[ENTRY:.*]]:
+// CHECK:    call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARR:%.*]], ptr align 4 @__const._Z17test_array_targetv.arr, i64 8, i1 false)
+// CHECK:    [[ARRAYINIT_BEGIN:%.*]] = getelementptr inbounds [2 x i32], ptr [[TMP0:%.*]], i64 0, i64 0
+// CHECK:    br label %[[ARRAYINIT_BODY:.*]]
+// CHECK:       [[ARRAYINIT_BODY]]:
+// CHECK:    [[ARRAYINIT_INDEX:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[ARRAYINIT_NEXT:%.*]], %[[ARRAYINIT_BODY]] ]
+// CHECK:    [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[ARRAYINIT_BEGIN]], i64 [[ARRAYINIT_INDEX]]
+// CHECK:    [[ARRAYIDX:%.*]] = getelementptr inbounds nuw [2 x i32], ptr [[ARR]], i64 0, i64 [[ARRAYINIT_INDEX]]
+// CHECK:    [[TMP2:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
+// CHECK:    store i32 [[TMP2]], ptr [[TMP1]], align 4
+// CHECK:    [[ARRAYINIT_NEXT]] = add nuw i64 [[ARRAYINIT_INDEX]], 1
+// CHECK:    [[ARRAYINIT_DONE:%.*]] = icmp eq i64 [[ARRAYINIT_NEXT]], 2
+// CHECK:    br i1 [[ARRAYINIT_DONE]], label %[[ARRAYINIT_END:.*]], label %[[ARRAYINIT_BODY]]
+// CHECK:       [[ARRAYINIT_END]]:
+// CHECK:    [[TMP3:%.*]] = load [2 x i32], ptr [[TMP0]], align 4
+// CHECK:    store [2 x i32] [[TMP3]], ptr [[DOTCASTED:%.*]], align 4
+// CHECK:    [[TMP4:%.*]] = load i64, ptr [[DOTCASTED]], align 8
+// CHECK:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z17test_array_targetv_l232(i64 [[TMP4]], ptr null) #[[ATTR3]]
+// CHECK:    ret void
+//
+//
+// CHECK-LABEL: define internal void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z17test_array_targetv_l232(
+// CHECK-SAME: i64 noundef [[TMP0:%.*]], ptr noalias noundef [[DYN_PTR:%.*]]) #[[ATTR2]] {
+// CHECK:  [[ENTRY:.*:]]
+// CHECK:    store i64 [[TMP0]], ptr [[DOTADDR:%.*]], align 8
+// CHECK:    store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR:%.*]], align 8
+// CHECK:    [[TMP1:%.*]] = getelementptr inbounds [2 x i32], ptr [[DOTADDR]], i64 0, i64 0
+// CHECK:    [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4
+// CHECK:    [[TMP3:%.*]] = getelementptr inbounds [2 x i32], ptr [[DOTADDR]], i64 0, i64 1
+// CHECK:    [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4
+// CHECK:    [[ADD:%.*]] = add nsw i32 [[TMP2]], [[TMP4]]
+// CHECK:    store i32 [[ADD]], ptr [[SUM:%.*]], align 4
+// CHECK:    ret void
+//
+//
+// CHECK-LABEL: define dso_local void @_Z15test_array_taskv(
+// CHECK-SAME: ) #[[ATTR0]] {
+// CHECK:  [[ENTRY:.*]]:
+// CHECK:    [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
+// CHECK:    call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARR:%.*]], ptr align 4 @__const._Z15test_array_taskv.arr, i64 8, i1 false)
+// CHECK:    [[ARRAYINIT_BEGIN:%.*]] = getelementptr inbounds [2 x i32], ptr [[TMP0:%.*]], i64 0, i64 0
+// CHECK:    br label %[[ARRAYINIT_BODY:.*]]
+// CHECK:       [[ARRAYINIT_BODY]]:
+// CHECK:    [[ARRAYINIT_INDEX:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[ARRAYINIT_NEXT:%.*]], %[[ARRAYINIT_BODY]] ]
+// CHECK:    [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[ARRAYINIT_BEGIN]], i64 [[ARRAYINIT_INDEX]]
+// CHECK:    [[ARRAYIDX:%.*]] = getelementptr inbounds nuw [2 x i32], ptr [[ARR]], i64 0, i64 [[ARRAYINIT_INDEX]]
+// CHECK:    [[TMP3:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
+// CHECK:    store i32 [[TMP3]], ptr [[TMP2]], align 4
+// CHECK:    [[ARRAYINIT_NEXT]] = add nuw i64 [[ARRAYINIT_INDEX]], 1
+// CHECK:    [[ARRAYINIT_DONE:%.*]] = icmp eq i64 [[ARRAYINIT_NEXT]], 2
+// CHECK:    br i1 [[ARRAYINIT_DONE]], label %[[ARRAYINIT_END:.*]], label %[[ARRAYINIT_BODY]]
+// CHECK:       [[ARRAYINIT_END]]:
+// CHECK:    [[TMP4:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON_8:%.*]], ptr [[AGG_CAPTURED:%.*]], i32 0, i32 0
+// CHECK:    store ptr [[TMP0]], ptr [[TMP4]], align 8
+// CHECK:    [[TMP5:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i64 40, i64 8, ptr @.omp_task_entry..8)
+// CHECK:    [[TMP6:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASK_T_WITH_PRIVATES_9:%.*]], ptr [[TMP5]], i32 0, i32 0
+// CHECK:    [[TMP7:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASK_T:%.*]], ptr [[TMP6]], i32 0, i32 0
+// CHECK:    [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8
+// CHECK:    call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[TMP8]], ptr align 8 [[AGG_CAPTURED]], i64 8, i1 false)
+// CHECK:    [[TMP9:%.*]] = call i32 @__kmpc_omp_task(ptr @[[GLOB1]], i32 [[TMP1]], ptr [[TMP5]])
+// CHECK:    ret void
+//
+//
+// CHECK-LABEL: define internal noundef i32 @.omp_task_entry..8(
+// CHECK-SAME: i32 noundef [[TMP0:%.*]], ptr noalias noundef [[TMP1:%.*]]) #[[ATTR4]] {
+// CHECK:  [[ENTRY:.*:]]
+// CHECK:    store i32 [[TMP0]], ptr [[DOTADDR:%.*]], align 4
+// CHECK:    store ptr [[TMP1]], ptr [[DOTADDR1:%.*]], align 8
+// CHECK:    [[TMP2:%.*]] = load i32, ptr [[DOTADDR]], align 4
+// CHECK:    [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8
+// CHECK:    [[TMP4:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASK_T_WITH_PRIVATES_9:%.*]], ptr [[TMP3]], i32 0, i32 0
+// CHECK:    [[TMP5:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASK_T:%.*]], ptr [[TMP4]], i32 0, i32 2
+// CHECK:    [[TMP6:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASK_T]], ptr [[TMP4]], i32 0, i32 0
+// CHECK:    [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8
+// CHECK:    call void @llvm.experimental.noalias.scope.decl(metadata [[META53:![0-9]+]])
+// CHECK:    call void @llvm.experimental.noalias.scope.decl(metadata [[META56:![0-9]+]])
+// CHECK:    call void @llvm.experimental.noalias.scope.decl(metadata [[META58:![0-9]+]])
+// CHECK:    call void @llvm.experimental.noalias.scope.decl(metadata [[META60:![0-9]+]])
+// CHECK:    store i32 [[TMP2]], ptr [[DOTGLOBAL_TID__ADDR_I:%.*]], align 4, !noalias [[META62:![0-9]+]]
+// CHECK:    store ptr [[TMP5]], ptr [[DOTPART_ID__ADDR_I:%.*]], align 8, !noalias [[META62]]
+// CHECK:    store ptr null, ptr [[DOTPRIVATES__ADDR_I:%.*]], align 8, !noalias [[META62]]
+// CHECK:    store ptr null, ptr [[DOTCOPY_FN__ADDR_I:%.*]], align 8, !noalias [[META62]]
+// CHECK:    store ptr [[TMP3]], ptr [[DOTTASK_T__ADDR_I:%.*]], align 8, !noalias [[META62]]
+// CHECK:    store ptr [[TMP7]], ptr [[__CONTEXT_ADDR_I:%.*]], align 8, !noalias [[META62]]
+// CHECK:    [[TMP8:%.*]] = load ptr, ptr [[__CONTEXT_ADDR_I]], align 8, !noalias [[META62]]
+// CHECK:    [[TMP9:%.*]] = load ptr, ptr [[TMP8]], align 8, !nonnull [[META2]], !align [[META3]]
+// CHECK:    [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4
+// CHECK:    [[TMP11:%.*]] = load ptr, ptr [[TMP8]], align 8, !nonnull [[META2]], !align [[META3]]
+// CHECK:    [[TMP12:%.*]] = getelementptr inbounds [2 x i32], ptr [[TMP11]], i64 0, i64 1
+// CHECK:    [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4
+// CHECK:    [[ADD_I:%.*]] = add nsw i32 [[TMP10]], [[TMP13]]
+// CHECK:    store i32 [[ADD_I]], ptr [[SUM_I:%.*]], align 4, !noalias [[META62]]
+// CHECK:    ret i32 0
+//
+//
+// CHECK-LABEL: define dso_local void @_Z11test_nestedv(
+// CHECK-SAME: ) #[[ATTR0]] {
+// CHECK:  [[ENTRY:.*:]]
+// CHECK:    call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[P:%.*]], ptr align 4 @__const._Z11test_nestedv.p, i64 8, i1 false)
+// CHECK:    call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP0:%.*]], ptr align 4 [[P]], i64 8, i1 false)
+// CHECK:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @_Z11test_nestedv.omp_outlined, ptr [[TMP0]])
+// CHECK:    ret void
+//
+//
+// CHECK-LABEL: define internal void @_Z11test_nestedv.omp_outlined(
+// CHECK-SAME: ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[TMP0:%.*]]) #[[ATTR2]] {
+// CHECK:  [[ENTRY:.*:]]
+// CHECK:    store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR:%.*]], align 8
+// CHECK:    store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR:%.*]], align 8
+// CHECK:    store ptr [[TMP0]], ptr [[DOTADDR:%.*]], align 8
+// CHECK:    [[TMP1:%.*]] = load ptr, ptr [[DOTADDR]], align 8, !nonnull [[META2]], !align [[META3]]
+// CHECK:    [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK:    [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4
+// CHECK:    call void @__kmpc_critical(ptr @[[GLOB1]], i32 [[TMP3]], ptr @.gomp_critical_user_.var)
+// CHECK:    [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_POINT:%.*]], ptr [[TMP1]], i32 0, i32 0
+// CHECK:    [[TMP4:%.*]] = load i32, ptr [[X]], align 4
+// CHECK:    [[Y:%.*]] = getelementptr inbounds nuw [[STRUCT_POINT]], ptr [[TMP1]], i32 0, i32 1
+// CHECK:    [[TMP5:%.*]] = load i32, ptr [[Y]], align 4
+// CHECK:    [[ADD:%.*]] = add nsw i32 [[TMP4]], [[TMP5]]
+// CHECK:    store i32 [[ADD]], ptr [[SUM:%.*]], align 4
+// CHECK:    call void @__kmpc_end_critical(ptr @[[GLOB1]], i32 [[TMP3]], ptr @.gomp_critical_user_.var)
+// CHECK:    [[TMP6:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON_10:%.*]], ptr [[AGG_CAPTURED:%.*]], i32 0, i32 0
+// CHECK:    store ptr [[TMP1]], ptr [[TMP6]], align 8
+// CHECK:    [[TMP7:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr @[[GLOB1]], i32 [[TMP3]], i32 1, i64 40, i64 8, ptr @.omp_task_entry..10)
+// CHECK:    [[TMP8:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASK_T_WITH_PRIVATES_11:%.*]], ptr [[TMP7]], i32 0, i32 0
+// CHECK:    [[TMP9:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASK_T:%.*]], ptr [[TMP8]], i32 0, i32 0
+// CHECK:    [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8
+// CHECK:    call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[TMP10]], ptr align 8 [[AGG_CAPTURED]], i64 8, i1 false)
+// CHECK:    [[TMP11:%.*]] = call i32 @__kmpc_omp_task(ptr @[[GLOB1]], i32 [[TMP3]], ptr [[TMP7]])
+// CHECK:    ret void
+//
+//
+// CHECK-LABEL: define internal noundef i32 @.omp_task_entry..10(
+// CHECK-SAME: i32 noundef [[TMP0:%.*]], ptr noalias noundef [[TMP1:%.*]]) #[[ATTR4]] {
+// CHECK:  [[ENTRY:.*:]]
+// CHECK:    store i32 [[TMP0]], ptr [[DOTADDR:%.*]], align 4
+// CHECK:    store ptr [[TMP1]], ptr [[DOTADDR1:%.*]], align 8
+// CHECK:    [[TMP2:%.*]] = load i32, ptr [[DOTADDR]], align 4
+// CHECK:    [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8
+// CHECK:    [[TMP4:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASK_T_WITH_PRIVATES_11:%.*]], ptr [[TMP3]], i32 0, i32 0
+// CHECK:    [[TMP5:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASK_T:%.*]], ptr [[TMP4]], i32 0, i32 2
+// CHECK:    [[TMP6:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASK_T]], ptr [[TMP4]], i32 0, i32 0
+// CHECK:    [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8
+// CHECK:    call void @llvm.experimental.noalias.scope.decl(metadata [[META63:![0-9]+]])
+// CHECK:    call void @llvm.experimental.noalias.scope.decl(metadata [[META66:![0-9]+]])
+// CHECK:    call void @llvm.experimental.noalias.scope.decl(metadata [[META68:![0-9]+]])
+// CHECK:    call void @llvm.experimental.noalias.scope.decl(metadata [[META70:![0-9]+]])
+// CHECK:    store i32 [[TMP2]], ptr [[DOTGLOBAL_TID__ADDR_I:%.*]], align 4, !noalias [[META72:![0-9]+]]
+// CHECK:    store ptr [[TMP5]], ptr [[DOTPART_ID__ADDR_I:%.*]], align 8, !noalias [[META72]]
+// CHECK:    store ptr null, ptr [[DOTPRIVATES__ADDR_I:%.*]], align 8, !noalias [[META72]]
+// CHECK:    store ptr null, ptr [[DOTCOPY_FN__ADDR_I:%.*]], align 8, !noalias [[META72]]
+// CHECK:    store ptr [[TMP3]], ptr [[DOTTASK_T__ADDR_I:%.*]], align 8, !noalias [[META72]]
+// CHECK:    store ptr [[TMP7]], ptr [[__CONTEXT_ADDR_I:%.*]], align 8, !noalias [[META72]]
+// CHECK:    [[TMP8:%.*]] = load ptr, ptr [[__CONTEXT_ADDR_I]], align 8, !noalias [[META72]]
+// CHECK:    [[TMP9:%.*]] = load ptr, ptr [[TMP8]], align 8, !nonnull [[META2]], !align [[META3]]
+// CHECK:    [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4
+// CHECK:    [[TMP11:%.*]] = load ptr, ptr [[TMP8]], align 8, !nonnull [[META2]], !align [[META3]]
+// CHECK:    [[Y_I:%.*]] = getelementptr inbounds nuw [[STRUCT_POINT:%.*]], ptr [[TMP11]], i32 0, i32 1
+// CHECK:    [[TMP12:%.*]] = load i32, ptr [[Y_I]], align 4
+// CHECK:    [[MUL_I:%.*]] = mul nsw i32 [[TMP10]], [[TMP12]]
+// CHECK:    store i32 [[MUL_I]], ptr [[PRODUCT_I:%.*]], align 4, !noalias [[META72]]
+// CHECK:    ret i32 0
+//
+//
+// CHECK-LABEL: define dso_local void @_Z22test_reference_bindingv(
+// CHECK-SAME: ) #[[ATTR0]] {
+// CHECK:  [[ENTRY:.*:]]
+// CHECK:    call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[P:%.*]], ptr align 4 @__const._Z22test_reference_bindingv.p, i64 8, i1 false)
+// CHECK:    store ptr [[P]], ptr [[TMP0:%.*]], align 8
+// CHECK:    [[TMP1:%.*]] = load ptr, ptr [[TMP0]], align 8, !nonnull [[META2]], !align [[META3]]
+// CHECK:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @_Z22test_reference_bindingv.omp_outlined, ptr [[TMP1]])
+// CHECK:    ret void
+//
+//
+// CHECK-LABEL: define internal void @_Z22test_reference_bindingv.omp_outlined(
+// CHECK-SAME: ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP0:%.*]]) #[[ATTR2]] {
+// CHECK:  [[ENTRY:.*:]]
+// CHECK:    store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR:%.*]], align 8
+// CHECK:    store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR:%.*]], align 8
+// CHECK:    store ptr [[TMP0]], ptr [[DOTADDR:%.*]], align 8
+// CHECK:    [[TMP1:%.*]] = load ptr, ptr [[DOTADDR]], align 8, !nonnull [[META2]], !align [[META3]]
+// CHECK:    store ptr [[TMP1]], ptr [[TMP:%.*]], align 8
+// CHECK:    [[TMP2:%.*]] = load ptr, ptr [[TMP]], align 8, !nonnull [[META2]], !align [[META3]]
+// CHECK:    [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_POINT:%.*]], ptr [[TMP2]], i32 0, i32 0
+// CHECK:    [[TMP3:%.*]] = load i32, ptr [[X]], align 4
+// CHECK:    [[TMP4:%.*]] = load ptr, ptr [[TMP]], align 8, !nonnull [[META2]], !align [[META3]]
+// CHECK:    [[Y:%.*]] = getelementptr inbounds nuw [[STRUCT_POINT]], ptr [[TMP4]], i32 0, i32 1
+// CHECK:    [[TMP5:%.*]] = load i32, ptr [[Y]], align 4
+// CHECK:    [[ADD:%.*]] = add nsw i32 [[TMP3]], [[TMP5]]
+// CHECK:    store i32 [[ADD]], ptr [[SUM:%.*]], align 4
+// CHECK:    ret void
+//
+//
+// CHECK-LABEL: define dso_local void @_Z18test_const_bindingv(
+// CHECK-SAME: ) #[[ATTR0]] {
+// CHECK:  [[ENTRY:.*:]]
+// CHECK:    call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[P:%.*]], ptr align 4 @__const._Z18test_const_bindingv.p, i64 8, i1 false)
+// CHECK:    call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP0:%.*]], ptr align 4 @__const._Z18test_const_bindingv., i64 8, i1 false)
+// CHECK:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @_Z18test_const_bindingv.omp_outlined, ptr [[TMP0]])
+// CHECK:    ret void
+//
+//
+// CHECK-LABEL: define internal void @_Z18test_const_bindingv.omp_outlined(
+// CHECK-SAME: ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP0:%.*]]) #[[ATTR2]] {
+// CHECK:  [[ENTRY:.*:]]
+// CHECK:    store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR:%.*]], align 8
+// CHECK:    store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR:%.*]], align 8
+// CHECK:    store ptr [[TMP0]], ptr [[DOTADDR:%.*]], align 8
+// CHECK:    [[TMP1:%.*]] = load ptr, ptr [[DOTADDR]], align 8, !nonnull [[META2]], !align [[META3]]
+// CHECK:    [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_POINT:%.*]], ptr [[TMP1]], i32 0, i32 0
+// CHECK:    [[TMP2:%.*]] = load i32, ptr [[X]], align 4
+// CHECK:    [[Y:%.*]] = getelementptr inbounds nuw [[STRUCT_POINT]], ptr [[TMP1]], i32 0, i32 1
+// CHECK:    [[TMP3:%.*]] = load i32, ptr [[Y]], align 4
+// CHECK:    [[ADD:%.*]] = add nsw i32 [[TMP2]], [[TMP3]]
+// CHECK:    store i32 [[ADD]], ptr [[SUM:%.*]], align 4
+// CHECK:    ret void
+//
+//
+// CHECK-LABEL: define dso_local void @_Z22test_multiple_bindingsv(
+// CHECK-SAME: ) #[[ATTR0]] {
+// CHECK:  [[ENTRY:.*:]]
+// CHECK:    call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[P1:%.*]], ptr align 4 @__const._Z22test_multiple_bindingsv.p1, i64 8, i1 false)
+// CHECK:    call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[P2:%.*]], ptr align 4 @__const._Z22test_multiple_bindingsv.p2, i64 8, i1 false)
+// CHECK:    call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP0:%.*]], ptr align 4 [[P1]], i64 8, i1 false)
+// CHECK:    call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP1:%.*]], ptr align 4 [[P2]], i64 8, i1 false)
+// CHECK:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 2, ptr @_Z22test_multiple_bindingsv.omp_outlined, ptr [[TMP0]], ptr [[TMP1]])
+// CHECK:    ret void
+//
+//
+// CHECK-LABEL: define internal void @_Z22test_multiple_bindingsv.omp_outlined(
+// CHECK-SAME: ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP0:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP1:%.*]]) #[[ATTR2]] {
+// CHECK:  [[ENTRY:.*:]]
+// CHECK:    store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR:%.*]], align 8
+// CHECK:    store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR:%.*]], align 8
+// CHECK:    store ptr [[TMP0]], ptr [[DOTADDR:%.*]], align 8
+// CHECK:    store ptr [[TMP1]], ptr [[DOTADDR1:%.*]], align 8
+// CHECK:    [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8, !nonnull [[META2]], !align [[META3]]
+// CHECK:    [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8, !nonnull [[META2]], !align [[META3]]
+// CHECK:    [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_POINT:%.*]], ptr [[TMP2]], i32 0, i32 0
+// CHECK:    [[TMP4:%.*]] = load i32, ptr [[X]], align 4
+// CHECK:    [[Y:%.*]] = getelementptr inbounds nuw [[STRUCT_POINT]], ptr [[TMP2]], i32 0, i32 1
+// CHECK:    [[TMP5:%.*]] = load i32, ptr [[Y]], align 4
+// CHECK:    [[ADD:%.*]] = add nsw i32 [[TMP4]], [[TMP5]]
+// CHECK:    [[X2:%.*]] = getelementptr inbounds nuw [[STRUCT_POINT]], ptr [[TMP3]], i32 0, i32 0
+// CHECK:    [[TMP6:%.*]] = load i32, ptr [[X2]], align 4
+// CHECK:    [[ADD3:%.*]] = add nsw i32 [[ADD]], [[TMP6]]
+// CHECK:    [[Y4:%.*]] = getelementptr inbounds nuw [[STRUCT_POINT]], ptr [[TMP3]], i32 0, i32 1
+// CHECK:    [[TMP7:%.*]] = load i32, ptr [[Y4]], align 4
+// CHECK:    [[ADD5:%.*]] = add nsw i32 [[ADD3]], [[TMP7]]
+// CHECK:    store i32 [[ADD5]], ptr [[SUM:%.*]], align 4
+// CHECK:    ret void
+//
+//
+// CHECK-LABEL: define dso_local void @_Z32test_multiple_bindings_mixed_dsav(
+// CHECK-SAME: ) #[[ATTR0]] {
+// CHECK:  [[ENTRY:.*:]]
+// CHECK:    call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[P1:%.*]], ptr align 4 @__const._Z32test_multiple_bindings_mixed_dsav.p1, i64 8, i1 false)
+// CHECK:    call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[P2:%.*]], ptr align 4 @__const._Z32test_multiple_bindings_mixed_dsav.p2, i64 8, i1 false)
+// CHECK:    call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP0:%.*]], ptr align 4 [[P1]], i64 8, i1 false)
+// CHECK:    call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP1:%.*]], ptr align 4 [[P2]], i64 8, i1 false)
+// CHECK:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 3, ptr @_Z32test_multiple_bindings_mixed_dsav.omp_outlined, ptr [[TMP0]], ptr [[TMP1]], ptr [[P1]])
+// CHECK:    ret void
+//
+//
+// CHECK-LABEL: define internal void @_Z32test_multiple_bindings_mixed_dsav.omp_outlined(
+// CHECK-SAME: ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP0:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP1:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[P1:%.*]]) #[[ATTR2]] {
+// CHECK:  [[ENTRY:.*:]]
+// CHECK:    store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR:%.*]], align 8
+// CHECK:    store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR:%.*]], align 8
+// CHECK:    store ptr [[TMP0]], ptr [[DOTADDR:%.*]], align 8
+// CHECK:    store ptr [[TMP1]], ptr [[DOTADDR1:%.*]], align 8
+// CHECK:    store ptr [[P1]], ptr [[P1_ADDR:%.*]], align 8
+// CHECK:    [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8, !nonnull [[META2]], !align [[META3]]
+// CHECK:    [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8, !nonnull [[META2]], !align [[META3]]
+// CHECK:    [[TMP4:%.*]] = load ptr, ptr [[P1_ADDR]], align 8, !nonnull [[META2]], !align [[META3]]
+// CHECK:    call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[P12:%.*]], ptr align 4 [[TMP4]], i64 8, i1 false)
+// CHECK:    [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_POINT:%.*]], ptr [[TMP2]], i32 0, i32 0
+// CHECK:    [[TMP5:%.*]] = load i32, ptr [[X]], align 4
+// CHECK:    [[Y:%.*]] = getelementptr inbounds nuw [[STRUCT_POINT]], ptr [[TMP2]], i32 0, i32 1
+// CHECK:    [[TMP6:%.*]] = load i32, ptr [[Y]], align 4
+// CHECK:    [[ADD:%.*]] = add nsw i32 [[TMP5]], [[TMP6]]
+// CHECK:    [[X3:%.*]] = getelementptr inbounds nuw [[STRUCT_POINT]], ptr [[TMP3]], i32 0, i32 0
+// CHECK:    [[TMP7:%.*]] = load i32, ptr [[X3]], align 4
+// CHECK:    [[ADD4:%.*]] = add nsw i32 [[ADD]], [[TMP7]]
+// CHECK:    [[Y5:%.*]] = getelementptr inbounds nuw [[STRUCT_POINT]], ptr [[TMP3]], i32 0, i32 1
+// CHECK:    [[TMP8:%.*]] = load i32, ptr [[Y5]], align 4
+// CHECK:    [[ADD6:%.*]] = add nsw i32 [[ADD4]], [[TMP8]]
+// CHECK:    store i32 [[ADD6]], ptr [[RESULT:%.*]], align 4
+// CHECK:    ret void
+//
+//
+// CHECK-LABEL: define dso_local void @_Z21test_array_3_elementsv(
+// CHECK-SAME: ) #[[ATTR0]] {
+// CHECK:  [[ENTRY:.*]]:
+// CHECK:    call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARR:%.*]], ptr align 4 @__const._Z21test_array_3_elementsv.arr, i64 12, i1 false)
+// CHECK:    [[ARRAYINIT_BEGIN:%.*]] = getelementptr inbounds [3 x i32], ptr [[TMP0:%.*]], i64 0, i64 0
+// CHECK:    br label %[[ARRAYINIT_BODY:.*]]
+// CHECK:       [[ARRAYINIT_BODY]]:
+// CHECK:    [[ARRAYINIT_INDEX:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[ARRAYINIT_NEXT:%.*]], %[[ARRAYINIT_BODY]] ]
+// CHECK:    [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[ARRAYINIT_BEGIN]], i64 [[ARRAYINIT_INDEX]]
+// CHECK:    [[ARRAYIDX:%.*]] = getelementptr inbounds nuw [3 x i32], ptr [[ARR]], i64 0, i64 [[ARRAYINIT_INDEX]]
+// CHECK:    [[TMP2:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
+// CHECK:    store i32 [[TMP2]], ptr [[TMP1]], align 4
+// CHECK:    [[ARRAYINIT_NEXT]] = add nuw i64 [[ARRAYINIT_INDEX]], 1
+// CHECK:    [[ARRAYINIT_DONE:%.*]] = icmp eq i64 [[ARRAYINIT_NEXT]], 3
+// CHECK:    br i1 [[ARRAYINIT_DONE]], label %[[ARRAYINIT_END:.*]], label %[[ARRAYINIT_BODY]]
+// CHECK:       [[ARRAYINIT_END]]:
+// CHECK:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @_Z21test_array_3_elementsv.omp_outlined, ptr [[TMP0]])
+// CHECK:    ret void
+//
+//
+// CHECK-LABEL: define internal void @_Z21test_array_3_elementsv.omp_outlined(
+// CHECK-SAME: ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP0:%.*]]) #[[ATTR2]] {
+// CHECK:  [[ENTRY:.*:]]
+// CHECK:    store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR:%.*]], align 8
+// CHECK:    store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR:%.*]], align 8
+// CHECK:    store ptr [[TMP0]], ptr [[DOTADDR:%.*]], align 8
+// CHECK:    [[TMP1:%.*]] = load ptr, ptr [[DOTADDR]], align 8, !nonnull [[META2]], !align [[META3]]
+// CHECK:    [[TMP2:%.*]] = getelementptr inbounds [3 x i32], ptr [[TMP1]], i64 0, i64 0
+// CHECK:    [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4
+// CHECK:    [[TMP4:%.*]] = getelementptr inbounds [3 x i32], ptr [[TMP1]], i64 0, i64 1
+// CHECK:    [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4
+// CHECK:    [[ADD:%.*]] = add nsw i32 [[TMP3]], [[TMP5]]
+// CHECK:    [[TMP6:%.*]] = getelementptr inbounds [3 x i32], ptr [[TMP1]], i64 0, i64 2
+// CHECK:    [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4
+// CHECK:    [[ADD1:%.*]] = add nsw i32 [[ADD]], [[TMP7]]
+// CHECK:    store i32 [[ADD1]], ptr [[SUM:%.*]], align 4
+// CHECK:    ret void
+//
+//
+// CHECK-LABEL: define dso_local void @_Z11test_singlev(
+// CHECK-SAME: ) #[[ATTR0]] {
+// CHECK:  [[ENTRY:.*:]]
+// CHECK:    call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[P:%.*]], ptr align 4 @__const._Z11test_singlev.p, i64 8, i1 false)
+// CHECK:    call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP0:%.*]], ptr align 4 [[P]], i64 8, i1 false)
+// CHECK:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @_Z11test_singlev.omp_outlined, ptr [[TMP0]])
+// CHECK:    ret void
+//
+//
+// CHECK-LABEL: define internal void @_Z11test_singlev.omp_outlined(
+// CHECK-SAME: ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[TMP0:%.*]]) #[[ATTR2]] {
+// CHECK:  [[ENTRY:.*:]]
+// CHECK:    store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR:%.*]], align 8
+// CHECK:    store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR:%.*]], align 8
+// CHECK:    store ptr [[TMP0]], ptr [[DOTADDR:%.*]], align 8
+// CHECK:    [[TMP1:%.*]] = load ptr, ptr [[DOTADDR]], align 8, !nonnull [[META2]], !align [[META3]]
+// CHECK:    [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK:    [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4
+// CHECK:    [[TMP4:%.*]] = call i32 @__kmpc_single(ptr @[[GLOB1]], i32 [[TMP3]])
+// CHECK:    [[TMP5:%.*]] = icmp ne i32 [[TMP4]], 0
+// CHECK:    br i1 [[TMP5]], label %[[OMP_IF_THEN:.*]], label %[[OMP_IF_END:.*]]
+// CHECK:       [[OMP_IF_THEN]]:
+// CHECK:    [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_POINT:%.*]], ptr [[TMP1]], i32 0, i32 0
+// CHECK:    [[TMP6:%.*]] = load i32, ptr [[X]], align 4
+// CHECK:    [[Y:%.*]] = getelementptr inbounds nuw [[STRUCT_POINT]], ptr [[TMP1]], i32 0, i32 1
+// CHECK:    [[TMP7:%.*]] = load i32, ptr [[Y]], align 4
+// CHECK:    [[ADD:%.*]] = add nsw i32 [[TMP6]], [[TMP7]]
+// CHECK:    store i32 [[ADD]], ptr [[SUM:%.*]], align 4
+// CHECK:    call void @__kmpc_end_single(ptr @[[GLOB1]], i32 [[TMP3]])
+// CHECK:    br label %[[OMP_IF_END]]
+// CHECK:       [[OMP_IF_END]]:
+// CHECK:    call void @__kmpc_barrier(ptr @[[GLOB5:[0-9]+]], i32 [[TMP3]])
+// CHECK:    ret void
+//
+//
+// CHECK-LABEL: define dso_local void @_Z13test_sectionsv(
+// CHECK-SAME: ) #[[ATTR0]] {
+// CHECK:  [[ENTRY:.*:]]
+// CHECK:    call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[P:%.*]], ptr align 4 @__const._Z13test_sectionsv.p, i64 8, i1 false)
+// CHECK:    call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP0:%.*]], ptr align 4 [[P]], i64 8, i1 false)
+// CHECK:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @_Z13test_sectionsv.omp_outlined, ptr [[TMP0]])
+// CHECK:    ret void
+//
+//
+// CHECK-LABEL: define internal void @_Z13test_sectionsv.omp_outlined(
+// CHECK-SAME: ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[TMP0:%.*]]) #[[ATTR2]] {
+// CHECK:  [[ENTRY:.*:]]
+// CHECK:    store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR:%.*]], align 8
+// CHECK:    store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR:%.*]], align 8
+// CHECK:    store ptr [[TMP0]], ptr [[DOTADDR:%.*]], align 8
+// CHECK:    [[TMP1:%.*]] = load ptr, ptr [[DOTADDR]], align 8, !nonnull [[META2]], !align [[META3]]
+// CHECK:    store i32 0, ptr [[DOTOMP_SECTIONS_LB_:%.*]], align 4
+// CHECK:    store i32 1, ptr [[DOTOMP_SECTIONS_UB_:%.*]], align 4
+// CHECK:    store i32 1, ptr [[DOTOMP_SECTIONS_ST_:%.*]], align 4
+// CHECK:    store i32 0, ptr [[DOTOMP_SECTIONS_IL_:%.*]], align 4
+// CHECK:    [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK:    [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4
+// CHECK:    call void @__kmpc_for_static_init_4(ptr @[[GLOB6:[0-9]+]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_SECTIONS_IL_]], ptr [[DOTOMP_SECTIONS_LB_]], ptr [[DOTOMP_SECTIONS_UB_]], ptr [[DOTOMP_SECTIONS_ST_]], i32 1, i32 1)
+// CHECK:    [[TMP4:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_UB_]], align 4
+// CHECK:    [[TMP5:%.*]] = icmp slt i32 [[TMP4]], 1
+// CHECK:    [[TMP6:%.*]] = select i1 [[TMP5]], i32 [[TMP4]], i32 1
+// CHECK:    store i32 [[TMP6]], ptr [[DOTOMP_SECTIONS_UB_]], align 4
+// CHECK:    [[TMP7:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_LB_]], align 4
+// CHECK:    store i32 [[TMP7]], ptr [[DOTOMP_SECTIONS_IV_:%.*]], align 4
+// CHECK:    br label %[[OMP_INNER_FOR_COND:.*]]
+// CHECK:       [[OMP_INNER_FOR_COND]]:
+// CHECK:    [[TMP8:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_]], align 4
+// CHECK:    [[TMP9:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_UB_]], align 4
+// CHECK:    [[CMP:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]]
+// CHECK:    br i1 [[CMP]], label %[[OMP_INNER_FOR_BODY:.*]], label %[[OMP_INNER_FOR_END:.*]]
+// CHECK:       [[OMP_INNER_FOR_BODY]]:
+// CHECK:    [[TMP10:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_]], align 4
+// CHECK:    switch i32 [[TMP10]], [[DOTOMP_SECTIONS_EXIT:label %.*]] [
+// CHECK:      i32 0, [[DOTOMP_SECTIONS_CASE:label %.*]]
+// CHECK:      i32 1, [[DOTOMP_SECTIONS_CASE1:label %.*]]
+// CHECK:    ]
+// CHECK:       [[_OMP_SECTIONS_CASE:.*:]]
+// CHECK:    [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_POINT:%.*]], ptr [[TMP1]], i32 0, i32 0
+// CHECK:    [[TMP11:%.*]] = load i32, ptr [[X]], align 4
+// CHECK:    [[Y:%.*]] = getelementptr inbounds nuw [[STRUCT_POINT]], ptr [[TMP1]], i32 0, i32 1
+// CHECK:    [[TMP12:%.*]] = load i32, ptr [[Y]], align 4
+// CHECK:    [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]]
+// CHECK:    store i32 [[ADD]], ptr [[SUM:%.*]], align 4
+// CHECK:    br [[DOTOMP_SECTIONS_EXIT]]
+// CHECK:       [[_OMP_SECTIONS_CASE1:.*:]]
+// CHECK:    [[X2:%.*]] = getelementptr inbounds nuw [[STRUCT_POINT]], ptr [[TMP1]], i32 0, i32 0
+// CHECK:    [[TMP13:%.*]] = load i32, ptr [[X2]], align 4
+// CHECK:    [[Y3:%.*]] = getelementptr inbounds nuw [[STRUCT_POINT]], ptr [[TMP1]], i32 0, i32 1
+// CHECK:    [[TMP14:%.*]] = load i32, ptr [[Y3]], align 4
+// CHECK:    [[SUB:%.*]] = sub nsw i32 [[TMP13]], [[TMP14]]
+// CHECK:    store i32 [[SUB]], ptr [[DIFF:%.*]], align 4
+// CHECK:    br [[DOTOMP_SECTIONS_EXIT]]
+// CHECK:       [[_OMP_SECTIONS_EXIT:.*:]]
+// CHECK:    br label %[[OMP_INNER_FOR_INC:.*]]
+// CHECK:       [[OMP_INNER_FOR_INC]]:
+// CHECK:    [[TMP15:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_]], align 4
+// CHECK:    [[INC:%.*]] = add nsw i32 [[TMP15]], 1
+// CHECK:    store i32 [[INC]], ptr [[DOTOMP_SECTIONS_IV_]], align 4
+// CHECK:    br label %[[OMP_INNER_FOR_COND]]
+// CHECK:       [[OMP_INNER_FOR_END]]:
+// CHECK:    call void @__kmpc_for_static_fini(ptr @[[GLOB6]], i32 [[TMP3]])
+// CHECK:    ret void
+//
+//
+// CHECK-LABEL: define dso_local void @_Z20test_nested_parallelv(
+// CHECK-SAME: ) #[[ATTR0]] {
+// CHECK:  [[ENTRY:.*:]]
+// CHECK:    call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[P:%.*]], ptr align 4 @__const._Z20test_nested_parallelv.p, i64 8, i1 false)
+// CHECK:    call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP0:%.*]], ptr align 4 [[P]], i64 8, i1 false)
+// CHECK:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @_Z20test_nested_parallelv.omp_outlined, ptr [[TMP0]])
+// CHECK:    ret void
+//
+//
+// CHECK-LABEL: define internal void @_Z20test_nested_parallelv.omp_outlined(
+// CHECK-SAME: ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP0:%.*]]) #[[ATTR2]] {
+// CHECK:  [[ENTRY:.*:]]
+// CHECK:    store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR:%.*]], align 8
+// CHECK:    store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR:%.*]], align 8
+// CHECK:    store ptr [[TMP0]], ptr [[DOTADDR:%.*]], align 8
+// CHECK:    [[TMP1:%.*]] = load ptr, ptr [[DOTADDR]], align 8, !nonnull [[META2]], !align [[META3]]
+// CHECK:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @_Z20test_nested_parallelv.omp_outlined.omp_outlined, ptr [[TMP1]])
+// CHECK:    ret void
+//
+//
+// CHECK-LABEL: define internal void @_Z20test_nested_parallelv.omp_outlined.omp_outlined(
+// CHECK-SAME: ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP0:%.*]]) #[[ATTR2]] {
+// CHECK:  [[ENTRY:.*:]]
+// CHECK:    store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR:%.*]], align 8
+// CHECK:    store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR:%.*]], align 8
+// CHECK:    store ptr [[TMP0]], ptr [[DOTADDR:%.*]], align 8
+// CHECK:    [[TMP1:%.*]] = load ptr, ptr [[DOTADDR]], align 8, !nonnull [[META2]], !align [[META3]]
+// CHECK:    [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_POINT:%.*]], ptr [[TMP1]], i32 0, i32 0
+// CHECK:    [[TMP2:%.*]] = load i32, ptr [[X]], align 4
+// CHECK:    [[Y:%.*]] = getelementptr inbounds nuw [[STRUCT_POINT]], ptr [[TMP1]], i32 0, i32 1
+// CHECK:    [[TMP3:%.*]] = load i32, ptr [[Y]], align 4
+// CHECK:    [[ADD:%.*]] = add nsw i32 [[TMP2]], [[TMP3]]
+// CHECK:    store i32 [[ADD]], ptr [[SUM:%.*]], align 4
+// CHECK:    ret void
+//
+//
+// CHECK-LABEL: define dso_local void @_Z10test_simd_v(
+// CHECK-SAME: ) #[[ATTR0]] {
+// CHECK:  [[ENTRY:.*:]]
+// CHECK:    call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[P:%.*]], ptr align 4 @__const._Z10test_simd_v.p, i64 8, i1 false)
+// CHECK:    call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP0:%.*]], ptr align 4 [[P]], i64 8, i1 false)
+// CHECK:    store i32 0, ptr [[DOTOMP_IV:%.*]], align 4
+// CHECK:    br label %[[OMP_INNER_FOR_COND:.*]]
+// CHECK:       [[OMP_INNER_FOR_COND]]:
+// CHECK:    [[TMP1:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP73:![0-9]+]]
+// CHECK:    [[CMP:%.*]] = icmp slt i32 [[TMP1]], 10
+// CHECK:    br i1 [[CMP]], label %[[OMP_INNER_FOR_BODY:.*]], label %[[OMP_INNER_FOR_END:.*]]
+// CHECK:       [[OMP_INNER_FOR_BODY]]:
+// CHECK:    [[TMP2:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP73]]
+// CHECK:    [[MUL:%.*]] = mul nsw i32 [[TMP2]], 1
+// CHECK:    [[ADD:%.*]] = add nsw i32 0, [[MUL]]
+// CHECK:    store i32 [[ADD]], ptr [[I:%.*]], align 4, !llvm.access.group [[ACC_GRP73]]
+// CHECK:    [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_POINT:%.*]], ptr [[TMP0]], i32 0, i32 0
+// CHECK:    [[TMP3:%.*]] = load i32, ptr [[X]], align 4, !llvm.access.group [[ACC_GRP73]]
+// CHECK:    [[Y:%.*]] = getelementptr inbounds nuw [[STRUCT_POINT]], ptr [[TMP0]], i32 0, i32 1
+// CHECK:    [[TMP4:%.*]] = load i32, ptr [[Y]], align 4, !llvm.access.group [[ACC_GRP73]]
+// CHECK:    [[ADD1:%.*]] = add nsw i32 [[TMP3]], [[TMP4]]
+// CHECK:    [[TMP5:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP73]]
+// CHECK:    [[ADD2:%.*]] = add nsw i32 [[ADD1]], [[TMP5]]
+// CHECK:    store i32 [[ADD2]], ptr [[RESULT:%.*]], align 4, !llvm.access.group [[ACC_GRP73]]
+// CHECK:    br label %[[OMP_BODY_CONTINUE:.*]]
+// CHECK:       [[OMP_BODY_CONTINUE]]:
+// CHECK:    br label %[[OMP_INNER_FOR_INC:.*]]
+// CHECK:       [[OMP_INNER_FOR_INC]]:
+// CHECK:    [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP73]]
+// CHECK:    [[ADD3:%.*]] = add nsw i32 [[TMP6]], 1
+// CHECK:    store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP73]]
+// CHECK:    br label %[[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP74:![0-9]+]]
+// CHECK:       [[OMP_INNER_FOR_END]]:
+// CHECK:    store i32 10, ptr [[I]], align 4
+// CHECK:    ret void
+//
+//
+// CHECK-LABEL: define dso_local void @_Z32test_private_individual_bindingsv(
+// CHECK-SAME: ) #[[ATTR0]] {
+// CHECK:  [[ENTRY:.*:]]
+// CHECK:    call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[P:%.*]], ptr align 4 @__const._Z32test_private_individual_bindingsv.p, i64 8, i1 false)
+// CHECK:    call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP0:%.*]], ptr align 4 [[P]], i64 8, i1 false)
+// CHECK:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 0, ptr @_Z32test_private_individual_bindingsv.omp_outlined)
+// CHECK:    ret void
+//
+//
+// CHECK-LABEL: define internal void @_Z32test_private_individual_bindingsv.omp_outlined(
+// CHECK-SAME: ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR2]] {
+// CHECK:  [[ENTRY:.*:]]
+// CHECK:    store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR:%.*]], align 8
+// CHECK:    store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR:%.*]], align 8
+// CHECK:    store i32 2, ptr [[A:%.*]], align 4
+// CHECK:    ret void
+//
+//
+// CHECK-LABEL: define dso_local void @_Z37test_firstprivate_individual_bindingsv(
+// CHECK-SAME: ) #[[ATTR0]] {
+// CHECK:  [[ENTRY:.*:]]
+// CHECK:    call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[P:%.*]], ptr align 4 @__const._Z37test_firstprivate_individual_bindingsv.p, i64 8, i1 false)
+// CHECK:    call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP0:%.*]], ptr align 4 [[P]], i64 8, i1 false)
+// CHECK:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @_Z37test_firstprivate_individual_bindingsv.omp_outlined, ptr [[TMP0]])
+// CHECK:    ret void
+//
+//
+// CHECK-LABEL: define internal void @_Z37test_firstprivate_individual_bindingsv.omp_outlined(
+// CHECK-SAME: ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP0:%.*]]) #[[ATTR2]] {
+// CHECK:  [[ENTRY:.*:]]
+// CHECK:    store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR:%.*]], align 8
+// CHECK:    store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR:%.*]], align 8
+// CHECK:    store ptr [[TMP0]], ptr [[DOTADDR:%.*]], align 8
+// CHECK:    [[TMP1:%.*]] = load ptr, ptr [[DOTADDR]], align 8, !nonnull [[META2]], !align [[META3]]
+// CHECK:    [[Y:%.*]] = getelementptr inbounds nuw [[STRUCT_POINT:%.*]], ptr [[TMP1]], i32 0, i32 1
+// CHECK:    [[TMP2:%.*]] = load i32, ptr [[Y]], align 4
+// CHECK:    store i32 [[TMP2]], ptr [[B:%.*]], align 4
+// CHECK:    [[TMP3:%.*]] = load i32, ptr [[B]], align 4
+// CHECK:    [[ADD:%.*]] = add nsw i32 [[TMP3]], 10
+// CHECK:    store i32 [[ADD]], ptr [[B]], align 4
+// CHECK:    ret void
+//
+//
+// CHECK-LABEL: define dso_local void @_Z14test_mixed_dsav(
+// CHECK-SAME: ) #[[ATTR0]] {
+// CHECK:  [[ENTRY:.*:]]
+// CHECK:    call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[P:%.*]], ptr align 4 @__const._Z14test_mixed_dsav.p, i64 8, i1 false)
+// CHECK:    call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP0:%.*]], ptr align 4 [[P]], i64 8, i1 false)
+// CHECK:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @_Z14test_mixed_dsav.omp_outlined, ptr [[TMP0]])
+// CHECK:    ret void
+//
+//
+// CHECK-LABEL: define internal void @_Z14test_mixed_dsav.omp_outlined(
+// CHECK-SAME: ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP0:%.*]]) #[[ATTR2]] {
+// CHECK:  [[ENTRY:.*:]]
+// CHECK:    store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR:%.*]], align 8
+// CHECK:    store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR:%.*]], align 8
+// CHECK:    store ptr [[TMP0]], ptr [[DOTADDR:%.*]], align 8
+// CHECK:    [[TMP1:%.*]] = load ptr, ptr [[DOTADDR]], align 8, !nonnull [[META2]], !align [[META3]]
+// CHECK:    [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_POINT:%.*]], ptr [[TMP1]], i32 0, i32 0
+// CHECK:    [[TMP2:%.*]] = load i32, ptr [[X]], align 4
+// CHECK:    store i32 [[TMP2]], ptr [[A:%.*]], align 4
+// CHECK:    [[TMP3:%.*]] = load i32, ptr [[A]], align 4
+// CHECK:    [[Y:%.*]] = getelementptr inbounds nuw [[STRUCT_POINT]], ptr [[TMP1]], i32 0, i32 1
+// CHECK:    [[TMP4:%.*]] = load i32, ptr [[Y]], align 4
+// CHECK:    [[ADD:%.*]] = add nsw i32 [[TMP3]], [[TMP4]]
+// CHECK:    store i32 [[ADD]], ptr [[RESULT:%.*]], align 4
+// CHECK:    ret void
+//
+//
+// CHECK-LABEL: define dso_local void @_Z20test_static_bindingsv(
+// CHECK-SAME: ) #[[ATTR0]] {
+// CHECK:  [[ENTRY:.*:]]
+// CHECK:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 0, ptr @_Z20test_static_bindingsv.omp_outlined)
+// CHECK:    ret void
+//
+//
+// CHECK-LABEL: define internal void @_Z20test_static_bindingsv.omp_outlined(
+// CHECK-SAME: ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR2]] {
+// CHECK:  [[ENTRY:.*:]]
+// CHECK:    store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR:%.*]], align 8
+// CHECK:    store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR:%.*]], align 8
+// CHECK:    [[TMP0:%.*]] = load i32, ptr @_ZZ20test_static_bindingsvEDC1a1bE, align 4
+// CHECK:    [[TMP1:%.*]] = load i32, ptr getelementptr inbounds nuw (i8, ptr @_ZZ20test_static_bindingsvEDC1a1bE, i64 4), align 4
+// CHECK:    [[ADD:%.*]] = add nsw i32 [[TMP0]], [[TMP1]]
+// CHECK:    ret void
+//
+//
+// CHECK-LABEL: define dso_local void @_Z14test_shadowingv(
+// CHECK-SAME: ) #[[ATTR0]] {
+// CHECK:  [[ENTRY:.*:]]
+// CHECK:    call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP0:%.*]], ptr align 4 @__const._Z14test_shadowingv., i64 8, i1 false)
+// CHECK:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 0, ptr @_Z14test_shadowingv.omp_outlined)
+// CHECK:    [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_POINT:%.*]], ptr [[TMP0]], i32 0, i32 0
+// CHECK:    [[TMP1:%.*]] = load i32, ptr [[X]], align 4
+// CHECK:    call void @_Z3usei(i32 noundef [[TMP1]])
+// CHECK:    ret void
+//
+//
+// CHECK-LABEL: define internal void @_Z14test_shadowingv.omp_outlined(
+// CHECK-SAME: ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR2]] {
+// CHECK:  [[ENTRY:.*:]]
+// CHECK:    store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR:%.*]], align 8
+// CHECK:    store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR:%.*]], align 8
+// CHECK:    [[TMP1:%.*]] = load i32, ptr [[A:%.*]], align 4
+// CHECK:    call void @_Z3usei(i32 noundef [[TMP1]])
+// CHECK:    call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP0:%.*]], ptr align 4 @"__const.<captured>.", i64 8, i1 false)
+// CHECK:    [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_POINT:%.*]], ptr [[TMP0]], i32 0, i32 0
+// CHECK:    [[TMP2:%.*]] = load i32, ptr [[X]], align 4
+// CHECK:    call void @_Z3usei(i32 noundef [[TMP2]])
+// CHECK:    [[TMP3:%.*]] = load i32, ptr [[A]], align 4
+// CHECK:    call void @_Z3usei(i32 noundef [[TMP3]])
+// CHECK:    ret void
+//
+//
+// CHECK-LABEL: define dso_local void @_Z31test_simd_private_then_parallelv(
+// CHECK-SAME: ) #[[ATTR0]] {
+// CHECK:  [[ENTRY:.*:]]
+// CHECK:    call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[P:%.*]], ptr align 4 @__const._Z31test_simd_private_then_parallelv.p, i64 8, i1 false)
+// CHECK:    call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP0:%.*]], ptr align 4 [[P]], i64 8, i1 false)
+// CHECK:    store i32 0, ptr [[DOTOMP_IV:%.*]], align 4
+// CHECK:    br label %[[OMP_INNER_FOR_COND:.*]]
+// CHECK:       [[OMP_INNER_FOR_COND]]:
+// CHECK:    [[TMP1:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP76:![0-9]+]]
+// CHECK:    [[CMP:%.*]] = icmp slt i32 [[TMP1]], 10
+// CHECK:    br i1 [[CMP]], label %[[OMP_INNER_FOR_BODY:.*]], label %[[OMP_INNER_FOR_END:.*]]
+// CHECK:       [[OMP_INNER_FOR_BODY]]:
+// CHECK:    [[TMP2:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP76]]
+// CHECK:    [[MUL:%.*]] = mul nsw i32 [[TMP2]], 1
+// CHECK:    [[ADD:%.*]] = add nsw i32 0, [[MUL]]
+// CHECK:    store i32 [[ADD]], ptr [[I:%.*]], align 4, !llvm.access.group [[ACC_GRP76]]
+// CHECK:    [[TMP3:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP76]]
+// CHECK:    [[TMP4:%.*]] = load i32, ptr [[A:%.*]], align 4, !llvm.access.group [[ACC_GRP76]]
+// CHECK:    [[ADD1:%.*]] = add nsw i32 [[TMP4]], [[TMP3]]
+// CHECK:    store i32 [[ADD1]], ptr [[A]], align 4, !llvm.access.group [[ACC_GRP76]]
+// CHECK:    br label %[[OMP_BODY_CONTINUE:.*]]
+// CHECK:       [[OMP_BODY_CONTINUE]]:
+// CHECK:    br label %[[OMP_INNER_FOR_INC:.*]]
+// CHECK:       [[OMP_INNER_FOR_INC]]:
+// CHECK:    [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP76]]
+// CHECK:    [[ADD2:%.*]] = add nsw i32 [[TMP5]], 1
+// CHECK:    store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP76]]
+// CHECK:    br label %[[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP77:![0-9]+]]
+// CHECK:       [[OMP_INNER_FOR_END]]:
+// CHECK:    store i32 10, ptr [[I]], align 4
+// CHECK:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @_Z31test_simd_private_then_parallelv.omp_outlined, ptr [[TMP0]])
+// CHECK:    ret void
+//
+//
+// CHECK-LABEL: define internal void @_Z31test_simd_private_then_parallelv.omp_outlined(
+// CHECK-SAME: ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP0:%.*]]) #[[ATTR2]] {
+// CHECK:  [[ENTRY:.*:]]
+// CHECK:    store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR:%.*]], align 8
+// CHECK:    store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR:%.*]], align 8
+// CHECK:    store ptr [[TMP0]], ptr [[DOTADDR:%.*]], align 8
+// CHECK:    [[TMP1:%.*]] = load ptr, ptr [[DOTADDR]], align 8, !nonnull [[META2]], !align [[META3]]
+// CHECK:    [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_POINT:%.*]], ptr [[TMP1]], i32 0, i32 0
+// CHECK:    [[TMP2:%.*]] = load i32, ptr [[X]], align 4
+// CHECK:    call void @_Z3usei(i32 noundef [[TMP2]])
+// CHECK:    ret void
+//
+//
+// CHECK-LABEL: define dso_local void @_Z19test_linear_bindingv(
+// CHECK-SAME: ) #[[ATTR0]] {
+// CHECK:  [[ENTRY:.*:]]
+// CHECK:    call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[P:%.*]], ptr align 4 @__const._Z19test_linear_bindingv.p, i64 8, i1 false)
+// CHECK:    call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP0:%.*]], ptr align 4 [[P]], i64 8, i1 false)
+// CHECK:    store i32 0, ptr [[DOTOMP_IV:%.*]], align 4
+// CHECK:    [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_POINT:%.*]], ptr [[TMP0]], i32 0, i32 0
+// CHECK:    [[TMP1:%.*]] = load i32, ptr [[X]], align 4
+// CHECK:    store i32 [[TMP1]], ptr [[DOTLINEAR_START:%.*]], align 4
+// CHECK:    br label %[[OMP_INNER_FOR_COND:.*]]
+// CHECK:       [[OMP_INNER_FOR_COND]]:
+// CHECK:    [[TMP2:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP79:![0-9]+]]
+// CHECK:    [[CMP:%.*]] = icmp slt i32 [[TMP2]], 10
+// CHECK:    br i1 [[CMP]], label %[[OMP_INNER_FOR_BODY:.*]], label %[[OMP_INNER_FOR_END:.*]]
+// CHECK:       [[OMP_INNER_FOR_BODY]]:
+// CHECK:    [[TMP3:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP79]]
+// CHECK:    [[MUL:%.*]] = mul nsw i32 [[TMP3]], 1
+// CHECK:    [[ADD:%.*]] = add nsw i32 0, [[MUL]]
+// CHECK:    store i32 [[ADD]], ptr [[I:%.*]], align 4, !llvm.access.group [[ACC_GRP79]]
+// CHECK:    [[TMP4:%.*]] = load i32, ptr [[DOTLINEAR_START]], align 4, !llvm.access.group [[ACC_GRP79]]
+// CHECK:    [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP79]]
+// CHECK:    [[MUL1:%.*]] = mul nsw i32 [[TMP5]], 1
+// CHECK:    [[ADD2:%.*]] = add nsw i32 [[TMP4]], [[MUL1]]
+// CHECK:    store i32 [[ADD2]], ptr [[A:%.*]], align 4, !llvm.access.group [[ACC_GRP79]]
+// CHECK:    [[TMP6:%.*]] = load i32, ptr [[A]], align 4, !llvm.access.group [[ACC_GRP79]]
+// CHECK:    [[ADD3:%.*]] = add nsw i32 [[TMP6]], 1
+// CHECK:    store i32 [[ADD3]], ptr [[A]], align 4, !llvm.access.group [[ACC_GRP79]]
+// CHECK:    [[TMP7:%.*]] = load i32, ptr [[A]], align 4, !llvm.access.group [[ACC_GRP79]]
+// CHECK:    call void @_Z3usei(i32 noundef [[TMP7]]), !llvm.access.group [[ACC_GRP79]]
+// CHECK:    br label %[[OMP_BODY_CONTINUE:.*]]
+// CHECK:       [[OMP_BODY_CONTINUE]]:
+// CHECK:    br label %[[OMP_INNER_FOR_INC:.*]]
+// CHECK:       [[OMP_INNER_FOR_INC]]:
+// CHECK:    [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP79]]
+// CHECK:    [[ADD4:%.*]] = add nsw i32 [[TMP8]], 1
+// CHECK:    store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP79]]
+// CHECK:    br label %[[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP80:![0-9]+]]
+// CHECK:       [[OMP_INNER_FOR_END]]:
+// CHECK:    store i32 10, ptr [[I]], align 4
+// CHECK:    [[X5:%.*]] = getelementptr inbounds nuw [[STRUCT_POINT]], ptr [[TMP0]], i32 0, i32 0
+// CHECK:    [[TMP9:%.*]] = load i32, ptr [[A]], align 4
+// CHECK:    [[X6:%.*]] = getelementptr inbounds nuw [[STRUCT_POINT]], ptr [[TMP0]], i32 0, i32 0
+// CHECK:    store i32 [[TMP9]], ptr [[X6]], align 4
+// CHECK:    ret void
+//
+//
+// CHECK-LABEL: define dso_local void @_Z26test_reduction_binding_sumv(
+// CHECK-SAME: ) #[[ATTR0]] {
+// CHECK:  [[ENTRY:.*:]]
+// CHECK:    call void @llvm.memset.p0.i64(ptr align 4 [[P:%.*]], i8 0, i64 8, i1 false)
+// CHECK:    call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP0:%.*]], ptr align 4 [[P]], i64 8, i1 false)
+// CHECK:    [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_POINT:%.*]], ptr [[TMP0]], i32 0, i32 0
+// CHECK:    store ptr [[X]], ptr [[A:%.*]], align 8
+// CHECK:    [[TMP1:%.*]] = load ptr, ptr [[A]], align 8, !nonnull [[META2]], !align [[META3]]
+// CHECK:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 2, ptr @_Z26test_reduction_binding_sumv.omp_outlined, ptr [[TMP0]], ptr [[TMP1]])
+// CHECK:    [[X1:%.*]] = getelementptr inbounds nuw [[STRUCT_POINT]], ptr [[TMP0]], i32 0, i32 0
+// CHECK:    [[TMP2:%.*]] = load i32, ptr [[X1]], align 4
+// CHECK:    call void @_Z3usei(i32 noundef [[TMP2]])
+// CHECK:    ret void
+//
+//
+// CHECK-LABEL: define internal void @_Z26test_reduction_binding_sumv.omp_outlined(
----------------
zahiraam wrote:

Fixed.

https://github.com/llvm/llvm-project/pull/190832


More information about the cfe-commits mailing list