[llvm] 1c9ec74 - [Clang][OpenMP] Insert alloca for kernel args at function entry block instead of the launch point.

Dhruva Chakrabarti via llvm-commits llvm-commits at lists.llvm.org
Fri Mar 17 13:36:49 PDT 2023


Author: Dhruva Chakrabarti
Date: 2023-03-17T16:36:12-04:00
New Revision: 1c9ec74e3f2a55b4a74ad54e60b58b03baf896d9

URL: https://github.com/llvm/llvm-project/commit/1c9ec74e3f2a55b4a74ad54e60b58b03baf896d9
DIFF: https://github.com/llvm/llvm-project/commit/1c9ec74e3f2a55b4a74ad54e60b58b03baf896d9.diff

LOG: [Clang][OpenMP] Insert alloca for kernel args at function entry block instead of the launch point.

If an inlined kernel is called in a loop, the launch point alloca would
lead to increasing stack usage every time the kernel is invoked. This
could make the application run out of stack space and crash. This problem
is fixed by using the alloca insertion point while creating the alloca instruction.

Fixes https://github.com/llvm/llvm-project/issues/60602

Reviewed By: jdoerfert

Differential Revision: https://reviews.llvm.org/D145820

Added: 
    clang/test/OpenMP/bug60602.cpp

Modified: 
    clang/lib/CodeGen/CGOpenMPRuntime.cpp
    clang/test/OpenMP/align_clause_codegen.cpp
    clang/test/OpenMP/atomic_compare_codegen.cpp
    clang/test/OpenMP/distribute_codegen.cpp
    clang/test/OpenMP/distribute_firstprivate_codegen.cpp
    clang/test/OpenMP/distribute_lastprivate_codegen.cpp
    clang/test/OpenMP/distribute_parallel_for_codegen.cpp
    clang/test/OpenMP/distribute_parallel_for_firstprivate_codegen.cpp
    clang/test/OpenMP/distribute_parallel_for_if_codegen.cpp
    clang/test/OpenMP/distribute_parallel_for_lastprivate_codegen.cpp
    clang/test/OpenMP/distribute_parallel_for_num_threads_codegen.cpp
    clang/test/OpenMP/distribute_parallel_for_private_codegen.cpp
    clang/test/OpenMP/distribute_parallel_for_proc_bind_codegen.cpp
    clang/test/OpenMP/distribute_parallel_for_simd_codegen.cpp
    clang/test/OpenMP/distribute_parallel_for_simd_firstprivate_codegen.cpp
    clang/test/OpenMP/distribute_parallel_for_simd_if_codegen.cpp
    clang/test/OpenMP/distribute_parallel_for_simd_lastprivate_codegen.cpp
    clang/test/OpenMP/distribute_parallel_for_simd_num_threads_codegen.cpp
    clang/test/OpenMP/distribute_parallel_for_simd_private_codegen.cpp
    clang/test/OpenMP/distribute_parallel_for_simd_proc_bind_codegen.cpp
    clang/test/OpenMP/distribute_private_codegen.cpp
    clang/test/OpenMP/distribute_simd_codegen.cpp
    clang/test/OpenMP/distribute_simd_firstprivate_codegen.cpp
    clang/test/OpenMP/distribute_simd_lastprivate_codegen.cpp
    clang/test/OpenMP/distribute_simd_private_codegen.cpp
    clang/test/OpenMP/distribute_simd_reduction_codegen.cpp
    clang/test/OpenMP/for_non_rectangular_codegen.c
    clang/test/OpenMP/irbuilder_nested_openmp_parallel_empty.c
    clang/test/OpenMP/nested_loop_codegen.cpp
    clang/test/OpenMP/nvptx_lambda_capturing.cpp
    clang/test/OpenMP/nvptx_target_parallel_proc_bind_codegen.cpp
    clang/test/OpenMP/nvptx_target_parallel_reduction_codegen_tbaa_PR46146.cpp
    clang/test/OpenMP/reduction_compound_op.cpp
    clang/test/OpenMP/reduction_implicit_map.cpp
    clang/test/OpenMP/target_codegen_global_capture.cpp
    clang/test/OpenMP/target_has_device_addr_codegen.cpp
    clang/test/OpenMP/target_has_device_addr_codegen_01.cpp
    clang/test/OpenMP/target_map_codegen_03.cpp
    clang/test/OpenMP/target_map_codegen_hold.cpp
    clang/test/OpenMP/target_map_deref_array_codegen.cpp
    clang/test/OpenMP/target_map_member_expr_codegen.cpp
    clang/test/OpenMP/target_offload_mandatory_codegen.cpp
    clang/test/OpenMP/target_ompx_dyn_cgroup_mem_codegen.cpp
    clang/test/OpenMP/target_parallel_codegen.cpp
    clang/test/OpenMP/target_parallel_for_codegen.cpp
    clang/test/OpenMP/target_parallel_for_simd_codegen.cpp
    clang/test/OpenMP/target_parallel_if_codegen.cpp
    clang/test/OpenMP/target_parallel_num_threads_codegen.cpp
    clang/test/OpenMP/target_task_affinity_codegen.cpp
    clang/test/OpenMP/target_teams_codegen.cpp
    clang/test/OpenMP/target_teams_distribute_codegen.cpp
    clang/test/OpenMP/target_teams_distribute_collapse_codegen.cpp
    clang/test/OpenMP/target_teams_distribute_dist_schedule_codegen.cpp
    clang/test/OpenMP/target_teams_distribute_firstprivate_codegen.cpp
    clang/test/OpenMP/target_teams_distribute_lastprivate_codegen.cpp
    clang/test/OpenMP/target_teams_distribute_parallel_for_codegen.cpp
    clang/test/OpenMP/target_teams_distribute_parallel_for_collapse_codegen.cpp
    clang/test/OpenMP/target_teams_distribute_parallel_for_dist_schedule_codegen.cpp
    clang/test/OpenMP/target_teams_distribute_parallel_for_firstprivate_codegen.cpp
    clang/test/OpenMP/target_teams_distribute_parallel_for_if_codegen.cpp
    clang/test/OpenMP/target_teams_distribute_parallel_for_lastprivate_codegen.cpp
    clang/test/OpenMP/target_teams_distribute_parallel_for_private_codegen.cpp
    clang/test/OpenMP/target_teams_distribute_parallel_for_proc_bind_codegen.cpp
    clang/test/OpenMP/target_teams_distribute_parallel_for_reduction_codegen.cpp
    clang/test/OpenMP/target_teams_distribute_parallel_for_schedule_codegen.cpp
    clang/test/OpenMP/target_teams_distribute_parallel_for_simd_codegen.cpp
    clang/test/OpenMP/target_teams_distribute_parallel_for_simd_collapse_codegen.cpp
    clang/test/OpenMP/target_teams_distribute_parallel_for_simd_dist_schedule_codegen.cpp
    clang/test/OpenMP/target_teams_distribute_parallel_for_simd_firstprivate_codegen.cpp
    clang/test/OpenMP/target_teams_distribute_parallel_for_simd_if_codegen.cpp
    clang/test/OpenMP/target_teams_distribute_parallel_for_simd_lastprivate_codegen.cpp
    clang/test/OpenMP/target_teams_distribute_parallel_for_simd_private_codegen.cpp
    clang/test/OpenMP/target_teams_distribute_parallel_for_simd_proc_bind_codegen.cpp
    clang/test/OpenMP/target_teams_distribute_parallel_for_simd_reduction_codegen.cpp
    clang/test/OpenMP/target_teams_distribute_parallel_for_simd_schedule_codegen.cpp
    clang/test/OpenMP/target_teams_distribute_private_codegen.cpp
    clang/test/OpenMP/target_teams_distribute_reduction_codegen.cpp
    clang/test/OpenMP/target_teams_distribute_simd_codegen.cpp
    clang/test/OpenMP/target_teams_distribute_simd_collapse_codegen.cpp
    clang/test/OpenMP/target_teams_distribute_simd_dist_schedule_codegen.cpp
    clang/test/OpenMP/target_teams_distribute_simd_firstprivate_codegen.cpp
    clang/test/OpenMP/target_teams_distribute_simd_lastprivate_codegen.cpp
    clang/test/OpenMP/target_teams_distribute_simd_private_codegen.cpp
    clang/test/OpenMP/target_teams_distribute_simd_reduction_codegen.cpp
    clang/test/OpenMP/target_teams_map_codegen.cpp
    clang/test/OpenMP/target_teams_num_teams_codegen.cpp
    clang/test/OpenMP/target_teams_thread_limit_codegen.cpp
    clang/test/OpenMP/teams_codegen.cpp
    clang/test/OpenMP/teams_distribute_codegen.cpp
    clang/test/OpenMP/teams_distribute_collapse_codegen.cpp
    clang/test/OpenMP/teams_distribute_dist_schedule_codegen.cpp
    clang/test/OpenMP/teams_distribute_firstprivate_codegen.cpp
    clang/test/OpenMP/teams_distribute_lastprivate_codegen.cpp
    clang/test/OpenMP/teams_distribute_parallel_for_codegen.cpp
    clang/test/OpenMP/teams_distribute_parallel_for_collapse_codegen.cpp
    clang/test/OpenMP/teams_distribute_parallel_for_copyin_codegen.cpp
    clang/test/OpenMP/teams_distribute_parallel_for_dist_schedule_codegen.cpp
    clang/test/OpenMP/teams_distribute_parallel_for_firstprivate_codegen.cpp
    clang/test/OpenMP/teams_distribute_parallel_for_if_codegen.cpp
    clang/test/OpenMP/teams_distribute_parallel_for_lastprivate_codegen.cpp
    clang/test/OpenMP/teams_distribute_parallel_for_num_threads_codegen.cpp
    clang/test/OpenMP/teams_distribute_parallel_for_private_codegen.cpp
    clang/test/OpenMP/teams_distribute_parallel_for_proc_bind_codegen.cpp
    clang/test/OpenMP/teams_distribute_parallel_for_reduction_codegen.cpp
    clang/test/OpenMP/teams_distribute_parallel_for_schedule_codegen.cpp
    clang/test/OpenMP/teams_distribute_parallel_for_simd_codegen.cpp
    clang/test/OpenMP/teams_distribute_parallel_for_simd_collapse_codegen.cpp
    clang/test/OpenMP/teams_distribute_parallel_for_simd_dist_schedule_codegen.cpp
    clang/test/OpenMP/teams_distribute_parallel_for_simd_firstprivate_codegen.cpp
    clang/test/OpenMP/teams_distribute_parallel_for_simd_if_codegen.cpp
    clang/test/OpenMP/teams_distribute_parallel_for_simd_lastprivate_codegen.cpp
    clang/test/OpenMP/teams_distribute_parallel_for_simd_num_threads_codegen.cpp
    clang/test/OpenMP/teams_distribute_parallel_for_simd_private_codegen.cpp
    clang/test/OpenMP/teams_distribute_parallel_for_simd_proc_bind_codegen.cpp
    clang/test/OpenMP/teams_distribute_parallel_for_simd_reduction_codegen.cpp
    clang/test/OpenMP/teams_distribute_parallel_for_simd_schedule_codegen.cpp
    clang/test/OpenMP/teams_distribute_private_codegen.cpp
    clang/test/OpenMP/teams_distribute_reduction_codegen.cpp
    clang/test/OpenMP/teams_distribute_simd_codegen.cpp
    clang/test/OpenMP/teams_distribute_simd_collapse_codegen.cpp
    clang/test/OpenMP/teams_distribute_simd_dist_schedule_codegen.cpp
    clang/test/OpenMP/teams_distribute_simd_firstprivate_codegen.cpp
    clang/test/OpenMP/teams_distribute_simd_lastprivate_codegen.cpp
    clang/test/OpenMP/teams_distribute_simd_private_codegen.cpp
    clang/test/OpenMP/teams_distribute_simd_reduction_codegen.cpp
    clang/test/OpenMP/teams_firstprivate_codegen.cpp
    clang/test/OpenMP/teams_private_codegen.cpp
    llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
    llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp

Removed: 
    


################################################################################
diff  --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
index 8ebdad4c63ee3..36bc7f10762d7 100644
--- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp
+++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
@@ -9952,6 +9952,9 @@ void CGOpenMPRuntime::emitTargetCall(
         DynCGroupMem,
     };
 
+    llvm::OpenMPIRBuilder::InsertPointTy AllocaIP(
+        CGF.AllocaInsertPt->getParent(), CGF.AllocaInsertPt->getIterator());
+
     // The target region is an outlined function launched by the runtime
     // via calls to __tgt_target_kernel().
     //
@@ -9966,7 +9969,7 @@ void CGOpenMPRuntime::emitTargetCall(
     // of teams and threads so no additional calls to the runtime are required.
     // Check the error code and execute the host version if required.
     CGF.Builder.restoreIP(OMPBuilder.emitTargetKernel(
-        CGF.Builder, Return, RTLoc, DeviceID, NumTeams, NumThreads,
+        CGF.Builder, AllocaIP, Return, RTLoc, DeviceID, NumTeams, NumThreads,
         OutlinedFnID, KernelArgs));
 
     llvm::BasicBlock *OffloadFailedBlock =

diff  --git a/clang/test/OpenMP/align_clause_codegen.cpp b/clang/test/OpenMP/align_clause_codegen.cpp
index 47fed837dfdba..49ece6e3319ec 100644
--- a/clang/test/OpenMP/align_clause_codegen.cpp
+++ b/clang/test/OpenMP/align_clause_codegen.cpp
@@ -99,14 +99,14 @@ int template_test() {
 // CHECK-32-NEXT:    [[DOTFOO10__VOID_ADDR:%.*]] = call ptr @__kmpc_aligned_alloc(i32 [[TMP0]], i32 128, i32 720, ptr null)
 // CHECK-32-NEXT:    call void @__kmpc_free(i32 [[TMP0]], ptr [[DOTFOO10__VOID_ADDR]], ptr null)
 // CHECK-32-NEXT:    call void @__kmpc_free(i32 [[TMP0]], ptr [[DOTFOO9__VOID_ADDR]], ptr inttoptr (i32 8 to ptr))
-// CHECK-32-NEXT:    [[TMP3:%.*]] = load i32, ptr [[MYALLOC]], align 4
-// CHECK-32-NEXT:    [[CONV:%.*]] = inttoptr i32 [[TMP3]] to ptr
+// CHECK-32-NEXT:    [[TMP1:%.*]] = load i32, ptr [[MYALLOC]], align 4
+// CHECK-32-NEXT:    [[CONV:%.*]] = inttoptr i32 [[TMP1]] to ptr
 // CHECK-32-NEXT:    [[DOTBAR1__VOID_ADDR:%.*]] = call ptr @__kmpc_aligned_alloc(i32 [[TMP0]], i32 4, i32 4, ptr [[CONV]])
-// CHECK-32-NEXT:    [[TMP4:%.*]] = load i32, ptr [[MYALLOC]], align 4
-// CHECK-32-NEXT:    [[CONV1:%.*]] = inttoptr i32 [[TMP4]] to ptr
+// CHECK-32-NEXT:    [[TMP2:%.*]] = load i32, ptr [[MYALLOC]], align 4
+// CHECK-32-NEXT:    [[CONV1:%.*]] = inttoptr i32 [[TMP2]] to ptr
 // CHECK-32-NEXT:    [[DOTBAR2__VOID_ADDR:%.*]] = call ptr @__kmpc_aligned_alloc(i32 [[TMP0]], i32 4, i32 40, ptr [[CONV1]])
-// CHECK-32-NEXT:    [[TMP5:%.*]] = load i32, ptr [[MYALLOC]], align 4
-// CHECK-32-NEXT:    [[CONV2:%.*]] = inttoptr i32 [[TMP5]] to ptr
+// CHECK-32-NEXT:    [[TMP3:%.*]] = load i32, ptr [[MYALLOC]], align 4
+// CHECK-32-NEXT:    [[CONV2:%.*]] = inttoptr i32 [[TMP3]] to ptr
 // CHECK-32-NEXT:    [[DOTBAR3__VOID_ADDR:%.*]] = call ptr @__kmpc_aligned_alloc(i32 [[TMP0]], i32 4, i32 80, ptr [[CONV2]])
 // CHECK-32-NEXT:    [[DOTBAR4__VOID_ADDR:%.*]] = call ptr @__kmpc_aligned_alloc(i32 [[TMP0]], i32 16, i32 4, ptr null)
 // CHECK-32-NEXT:    [[DOTBAR5__VOID_ADDR:%.*]] = call ptr @__kmpc_aligned_alloc(i32 [[TMP0]], i32 16, i32 4, ptr null)
@@ -114,14 +114,14 @@ int template_test() {
 // CHECK-32-NEXT:    call void @__kmpc_free(i32 [[TMP0]], ptr [[DOTBAR6__VOID_ADDR]], ptr null)
 // CHECK-32-NEXT:    call void @__kmpc_free(i32 [[TMP0]], ptr [[DOTBAR5__VOID_ADDR]], ptr null)
 // CHECK-32-NEXT:    call void @__kmpc_free(i32 [[TMP0]], ptr [[DOTBAR4__VOID_ADDR]], ptr null)
-// CHECK-32-NEXT:    [[TMP10:%.*]] = load i32, ptr [[MYALLOC]], align 4
-// CHECK-32-NEXT:    [[CONV3:%.*]] = inttoptr i32 [[TMP10]] to ptr
+// CHECK-32-NEXT:    [[TMP4:%.*]] = load i32, ptr [[MYALLOC]], align 4
+// CHECK-32-NEXT:    [[CONV3:%.*]] = inttoptr i32 [[TMP4]] to ptr
 // CHECK-32-NEXT:    call void @__kmpc_free(i32 [[TMP0]], ptr [[DOTBAR3__VOID_ADDR]], ptr [[CONV3]])
-// CHECK-32-NEXT:    [[TMP12:%.*]] = load i32, ptr [[MYALLOC]], align 4
-// CHECK-32-NEXT:    [[CONV4:%.*]] = inttoptr i32 [[TMP12]] to ptr
+// CHECK-32-NEXT:    [[TMP5:%.*]] = load i32, ptr [[MYALLOC]], align 4
+// CHECK-32-NEXT:    [[CONV4:%.*]] = inttoptr i32 [[TMP5]] to ptr
 // CHECK-32-NEXT:    call void @__kmpc_free(i32 [[TMP0]], ptr [[DOTBAR2__VOID_ADDR]], ptr [[CONV4]])
-// CHECK-32-NEXT:    [[TMP14:%.*]] = load i32, ptr [[MYALLOC]], align 4
-// CHECK-32-NEXT:    [[CONV5:%.*]] = inttoptr i32 [[TMP14]] to ptr
+// CHECK-32-NEXT:    [[TMP6:%.*]] = load i32, ptr [[MYALLOC]], align 4
+// CHECK-32-NEXT:    [[CONV5:%.*]] = inttoptr i32 [[TMP6]] to ptr
 // CHECK-32-NEXT:    call void @__kmpc_free(i32 [[TMP0]], ptr [[DOTBAR1__VOID_ADDR]], ptr [[CONV5]])
 // CHECK-32-NEXT:    call void @__kmpc_free(i32 [[TMP0]], ptr [[DOTFOO8__VOID_ADDR]], ptr null)
 // CHECK-32-NEXT:    call void @__kmpc_free(i32 [[TMP0]], ptr [[DOTFOO7__VOID_ADDR]], ptr inttoptr (i32 8 to ptr))
@@ -174,14 +174,14 @@ int template_test() {
 // CHECK-NEXT:    [[DOTFOO10__VOID_ADDR:%.*]] = call ptr @__kmpc_aligned_alloc(i32 [[TMP0]], i64 128, i64 720, ptr null)
 // CHECK-NEXT:    call void @__kmpc_free(i32 [[TMP0]], ptr [[DOTFOO10__VOID_ADDR]], ptr null)
 // CHECK-NEXT:    call void @__kmpc_free(i32 [[TMP0]], ptr [[DOTFOO9__VOID_ADDR]], ptr inttoptr (i64 8 to ptr))
-// CHECK-NEXT:    [[TMP3:%.*]] = load i64, ptr [[MYALLOC]], align 8
-// CHECK-NEXT:    [[CONV:%.*]] = inttoptr i64 [[TMP3]] to ptr
+// CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr [[MYALLOC]], align 8
+// CHECK-NEXT:    [[CONV:%.*]] = inttoptr i64 [[TMP1]] to ptr
 // CHECK-NEXT:    [[DOTBAR1__VOID_ADDR:%.*]] = call ptr @__kmpc_aligned_alloc(i32 [[TMP0]], i64 4, i64 4, ptr [[CONV]])
-// CHECK-NEXT:    [[TMP4:%.*]] = load i64, ptr [[MYALLOC]], align 8
-// CHECK-NEXT:    [[CONV1:%.*]] = inttoptr i64 [[TMP4]] to ptr
+// CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr [[MYALLOC]], align 8
+// CHECK-NEXT:    [[CONV1:%.*]] = inttoptr i64 [[TMP2]] to ptr
 // CHECK-NEXT:    [[DOTBAR2__VOID_ADDR:%.*]] = call ptr @__kmpc_aligned_alloc(i32 [[TMP0]], i64 4, i64 48, ptr [[CONV1]])
-// CHECK-NEXT:    [[TMP5:%.*]] = load i64, ptr [[MYALLOC]], align 8
-// CHECK-NEXT:    [[CONV2:%.*]] = inttoptr i64 [[TMP5]] to ptr
+// CHECK-NEXT:    [[TMP3:%.*]] = load i64, ptr [[MYALLOC]], align 8
+// CHECK-NEXT:    [[CONV2:%.*]] = inttoptr i64 [[TMP3]] to ptr
 // CHECK-NEXT:    [[DOTBAR3__VOID_ADDR:%.*]] = call ptr @__kmpc_aligned_alloc(i32 [[TMP0]], i64 4, i64 80, ptr [[CONV2]])
 // CHECK-NEXT:    [[DOTBAR4__VOID_ADDR:%.*]] = call ptr @__kmpc_aligned_alloc(i32 [[TMP0]], i64 16, i64 8, ptr null)
 // CHECK-NEXT:    [[DOTBAR5__VOID_ADDR:%.*]] = call ptr @__kmpc_aligned_alloc(i32 [[TMP0]], i64 16, i64 4, ptr null)
@@ -189,14 +189,14 @@ int template_test() {
 // CHECK-NEXT:    call void @__kmpc_free(i32 [[TMP0]], ptr [[DOTBAR6__VOID_ADDR]], ptr null)
 // CHECK-NEXT:    call void @__kmpc_free(i32 [[TMP0]], ptr [[DOTBAR5__VOID_ADDR]], ptr null)
 // CHECK-NEXT:    call void @__kmpc_free(i32 [[TMP0]], ptr [[DOTBAR4__VOID_ADDR]], ptr null)
-// CHECK-NEXT:    [[TMP10:%.*]] = load i64, ptr [[MYALLOC]], align 8
-// CHECK-NEXT:    [[CONV3:%.*]] = inttoptr i64 [[TMP10]] to ptr
+// CHECK-NEXT:    [[TMP4:%.*]] = load i64, ptr [[MYALLOC]], align 8
+// CHECK-NEXT:    [[CONV3:%.*]] = inttoptr i64 [[TMP4]] to ptr
 // CHECK-NEXT:    call void @__kmpc_free(i32 [[TMP0]], ptr [[DOTBAR3__VOID_ADDR]], ptr [[CONV3]])
-// CHECK-NEXT:    [[TMP12:%.*]] = load i64, ptr [[MYALLOC]], align 8
-// CHECK-NEXT:    [[CONV4:%.*]] = inttoptr i64 [[TMP12]] to ptr
+// CHECK-NEXT:    [[TMP5:%.*]] = load i64, ptr [[MYALLOC]], align 8
+// CHECK-NEXT:    [[CONV4:%.*]] = inttoptr i64 [[TMP5]] to ptr
 // CHECK-NEXT:    call void @__kmpc_free(i32 [[TMP0]], ptr [[DOTBAR2__VOID_ADDR]], ptr [[CONV4]])
-// CHECK-NEXT:    [[TMP14:%.*]] = load i64, ptr [[MYALLOC]], align 8
-// CHECK-NEXT:    [[CONV5:%.*]] = inttoptr i64 [[TMP14]] to ptr
+// CHECK-NEXT:    [[TMP6:%.*]] = load i64, ptr [[MYALLOC]], align 8
+// CHECK-NEXT:    [[CONV5:%.*]] = inttoptr i64 [[TMP6]] to ptr
 // CHECK-NEXT:    call void @__kmpc_free(i32 [[TMP0]], ptr [[DOTBAR1__VOID_ADDR]], ptr [[CONV5]])
 // CHECK-NEXT:    call void @__kmpc_free(i32 [[TMP0]], ptr [[DOTFOO8__VOID_ADDR]], ptr null)
 // CHECK-NEXT:    call void @__kmpc_free(i32 [[TMP0]], ptr [[DOTFOO7__VOID_ADDR]], ptr inttoptr (i64 8 to ptr))

diff  --git a/clang/test/OpenMP/atomic_compare_codegen.cpp b/clang/test/OpenMP/atomic_compare_codegen.cpp
index 0deb8ac553433..90ea3b2ab92dd 100644
--- a/clang/test/OpenMP/atomic_compare_codegen.cpp
+++ b/clang/test/OpenMP/atomic_compare_codegen.cpp
@@ -6,7 +6,6 @@
 // RUN: %clang_cc1 -verify -triple x86_64-apple-darwin10 -target-cpu core2 -fopenmp-simd -fopenmp-version=51 -x c -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
 // RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=51 -x c -triple x86_64-apple-darwin10 -target-cpu core2 -emit-pch -o %t %s
 // RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=51 -x c -triple x86_64-apple-darwin10 -target-cpu core2 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
-// SIMD-ONLY0-NOT: {{__kmpc|__tgt}}
 // expected-no-diagnostics
 
 #ifndef HEADER
@@ -13808,8 +13807,7 @@ double dxevd() {
 }
 
 #endif
-// CHECK-LABEL: define {{[^@]+}}@foo
-// CHECK-SAME: () #[[ATTR0:[0-9]+]] {
+// CHECK-LABEL: @foo(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[CX:%.*]] = alloca i8, align 1
 // CHECK-NEXT:    [[CE:%.*]] = alloca i8, align 1
@@ -16130,8 +16128,7 @@ double dxevd() {
 // CHECK-NEXT:    ret void
 //
 //
-// CHECK-LABEL: define {{[^@]+}}@bar
-// CHECK-SAME: () #[[ATTR0]] {
+// CHECK-LABEL: @bar(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[CX:%.*]] = alloca i8, align 1
 // CHECK-NEXT:    [[CV:%.*]] = alloca i8, align 1
@@ -25386,8 +25383,7 @@ double dxevd() {
 // CHECK-NEXT:    ret void
 //
 //
-// CHECK-LABEL: define {{[^@]+}}@cxevd
-// CHECK-SAME: () #[[ATTR0]] {
+// CHECK-LABEL: @cxevd(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[CX:%.*]] = alloca i8, align 1
 // CHECK-NEXT:    [[CV:%.*]] = alloca i8, align 1
@@ -25583,8 +25579,7 @@ double dxevd() {
 // CHECK-NEXT:    ret i8 [[TMP132]]
 //
 //
-// CHECK-LABEL: define {{[^@]+}}@ucxevd
-// CHECK-SAME: () #[[ATTR0]] {
+// CHECK-LABEL: @ucxevd(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[UCX:%.*]] = alloca i8, align 1
 // CHECK-NEXT:    [[UCV:%.*]] = alloca i8, align 1
@@ -25780,8 +25775,7 @@ double dxevd() {
 // CHECK-NEXT:    ret i8 [[TMP132]]
 //
 //
-// CHECK-LABEL: define {{[^@]+}}@sxevd
-// CHECK-SAME: () #[[ATTR0]] {
+// CHECK-LABEL: @sxevd(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[SX:%.*]] = alloca i16, align 2
 // CHECK-NEXT:    [[SV:%.*]] = alloca i16, align 2
@@ -25977,8 +25971,7 @@ double dxevd() {
 // CHECK-NEXT:    ret i16 [[TMP132]]
 //
 //
-// CHECK-LABEL: define {{[^@]+}}@usxevd
-// CHECK-SAME: () #[[ATTR0]] {
+// CHECK-LABEL: @usxevd(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[USX:%.*]] = alloca i16, align 2
 // CHECK-NEXT:    [[USV:%.*]] = alloca i16, align 2
@@ -26174,8 +26167,7 @@ double dxevd() {
 // CHECK-NEXT:    ret i16 [[TMP132]]
 //
 //
-// CHECK-LABEL: define {{[^@]+}}@ixevd
-// CHECK-SAME: () #[[ATTR0]] {
+// CHECK-LABEL: @ixevd(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[IX:%.*]] = alloca i32, align 4
 // CHECK-NEXT:    [[IV:%.*]] = alloca i32, align 4
@@ -26371,8 +26363,7 @@ double dxevd() {
 // CHECK-NEXT:    ret i32 [[TMP132]]
 //
 //
-// CHECK-LABEL: define {{[^@]+}}@uixevd
-// CHECK-SAME: () #[[ATTR0]] {
+// CHECK-LABEL: @uixevd(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[UIX:%.*]] = alloca i32, align 4
 // CHECK-NEXT:    [[UIV:%.*]] = alloca i32, align 4
@@ -26568,8 +26559,7 @@ double dxevd() {
 // CHECK-NEXT:    ret i32 [[TMP132]]
 //
 //
-// CHECK-LABEL: define {{[^@]+}}@lxevd
-// CHECK-SAME: () #[[ATTR0]] {
+// CHECK-LABEL: @lxevd(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[LX:%.*]] = alloca i64, align 8
 // CHECK-NEXT:    [[LV:%.*]] = alloca i64, align 8
@@ -26765,8 +26755,7 @@ double dxevd() {
 // CHECK-NEXT:    ret i64 [[TMP132]]
 //
 //
-// CHECK-LABEL: define {{[^@]+}}@ulxevd
-// CHECK-SAME: () #[[ATTR0]] {
+// CHECK-LABEL: @ulxevd(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[ULX:%.*]] = alloca i64, align 8
 // CHECK-NEXT:    [[ULV:%.*]] = alloca i64, align 8
@@ -26962,8 +26951,7 @@ double dxevd() {
 // CHECK-NEXT:    ret i64 [[TMP132]]
 //
 //
-// CHECK-LABEL: define {{[^@]+}}@llxevd
-// CHECK-SAME: () #[[ATTR0]] {
+// CHECK-LABEL: @llxevd(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[LLX:%.*]] = alloca i64, align 8
 // CHECK-NEXT:    [[LLV:%.*]] = alloca i64, align 8
@@ -27159,8 +27147,7 @@ double dxevd() {
 // CHECK-NEXT:    ret i64 [[TMP132]]
 //
 //
-// CHECK-LABEL: define {{[^@]+}}@ullxevd
-// CHECK-SAME: () #[[ATTR0]] {
+// CHECK-LABEL: @ullxevd(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[ULLX:%.*]] = alloca i64, align 8
 // CHECK-NEXT:    [[ULLV:%.*]] = alloca i64, align 8
@@ -27356,8 +27343,7 @@ double dxevd() {
 // CHECK-NEXT:    ret i64 [[TMP132]]
 //
 //
-// CHECK-LABEL: define {{[^@]+}}@fxevd
-// CHECK-SAME: () #[[ATTR0]] {
+// CHECK-LABEL: @fxevd(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[FX:%.*]] = alloca float, align 4
 // CHECK-NEXT:    [[FV:%.*]] = alloca float, align 4
@@ -27589,8 +27575,7 @@ double dxevd() {
 // CHECK-NEXT:    ret float [[TMP168]]
 //
 //
-// CHECK-LABEL: define {{[^@]+}}@dxevd
-// CHECK-SAME: () #[[ATTR0]] {
+// CHECK-LABEL: @dxevd(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[DX:%.*]] = alloca double, align 8
 // CHECK-NEXT:    [[DV:%.*]] = alloca double, align 8
@@ -27820,3 +27805,34164 @@ double dxevd() {
 // CHECK-NEXT:    call void @__kmpc_flush(ptr @[[GLOB1]])
 // CHECK-NEXT:    [[TMP168:%.*]] = load double, ptr [[DV]], align 8
 // CHECK-NEXT:    ret double [[TMP168]]
+//
+//
+// SIMD-ONLY0-LABEL: @foo(
+// SIMD-ONLY0-NEXT:  entry:
+// SIMD-ONLY0-NEXT:    [[CX:%.*]] = alloca i8, align 1
+// SIMD-ONLY0-NEXT:    [[CE:%.*]] = alloca i8, align 1
+// SIMD-ONLY0-NEXT:    [[CD:%.*]] = alloca i8, align 1
+// SIMD-ONLY0-NEXT:    [[UCX:%.*]] = alloca i8, align 1
+// SIMD-ONLY0-NEXT:    [[UCE:%.*]] = alloca i8, align 1
+// SIMD-ONLY0-NEXT:    [[UCD:%.*]] = alloca i8, align 1
+// SIMD-ONLY0-NEXT:    [[SX:%.*]] = alloca i16, align 2
+// SIMD-ONLY0-NEXT:    [[SE:%.*]] = alloca i16, align 2
+// SIMD-ONLY0-NEXT:    [[SD:%.*]] = alloca i16, align 2
+// SIMD-ONLY0-NEXT:    [[USX:%.*]] = alloca i16, align 2
+// SIMD-ONLY0-NEXT:    [[USE:%.*]] = alloca i16, align 2
+// SIMD-ONLY0-NEXT:    [[USD:%.*]] = alloca i16, align 2
+// SIMD-ONLY0-NEXT:    [[IX:%.*]] = alloca i32, align 4
+// SIMD-ONLY0-NEXT:    [[IE:%.*]] = alloca i32, align 4
+// SIMD-ONLY0-NEXT:    [[ID:%.*]] = alloca i32, align 4
+// SIMD-ONLY0-NEXT:    [[UIX:%.*]] = alloca i32, align 4
+// SIMD-ONLY0-NEXT:    [[UIE:%.*]] = alloca i32, align 4
+// SIMD-ONLY0-NEXT:    [[UID:%.*]] = alloca i32, align 4
+// SIMD-ONLY0-NEXT:    [[LX:%.*]] = alloca i64, align 8
+// SIMD-ONLY0-NEXT:    [[LE:%.*]] = alloca i64, align 8
+// SIMD-ONLY0-NEXT:    [[LD:%.*]] = alloca i64, align 8
+// SIMD-ONLY0-NEXT:    [[ULX:%.*]] = alloca i64, align 8
+// SIMD-ONLY0-NEXT:    [[ULE:%.*]] = alloca i64, align 8
+// SIMD-ONLY0-NEXT:    [[ULD:%.*]] = alloca i64, align 8
+// SIMD-ONLY0-NEXT:    [[LLX:%.*]] = alloca i64, align 8
+// SIMD-ONLY0-NEXT:    [[LLE:%.*]] = alloca i64, align 8
+// SIMD-ONLY0-NEXT:    [[LLD:%.*]] = alloca i64, align 8
+// SIMD-ONLY0-NEXT:    [[ULLX:%.*]] = alloca i64, align 8
+// SIMD-ONLY0-NEXT:    [[ULLE:%.*]] = alloca i64, align 8
+// SIMD-ONLY0-NEXT:    [[ULLD:%.*]] = alloca i64, align 8
+// SIMD-ONLY0-NEXT:    [[FX:%.*]] = alloca float, align 4
+// SIMD-ONLY0-NEXT:    [[FE:%.*]] = alloca float, align 4
+// SIMD-ONLY0-NEXT:    [[FD:%.*]] = alloca float, align 4
+// SIMD-ONLY0-NEXT:    [[DX:%.*]] = alloca double, align 8
+// SIMD-ONLY0-NEXT:    [[DE:%.*]] = alloca double, align 8
+// SIMD-ONLY0-NEXT:    [[DD:%.*]] = alloca double, align 8
+// SIMD-ONLY0-NEXT:    [[TMP0:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV:%.*]] = sext i8 [[TMP0]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP1:%.*]] = load i8, ptr [[CE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV1:%.*]] = sext i8 [[TMP1]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP:%.*]] = icmp sgt i32 [[CONV]], [[CONV1]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
+// SIMD-ONLY0:       cond.true:
+// SIMD-ONLY0-NEXT:    [[TMP2:%.*]] = load i8, ptr [[CE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV3:%.*]] = sext i8 [[TMP2]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END:%.*]]
+// SIMD-ONLY0:       cond.false:
+// SIMD-ONLY0-NEXT:    [[TMP3:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV4:%.*]] = sext i8 [[TMP3]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END]]
+// SIMD-ONLY0:       cond.end:
+// SIMD-ONLY0-NEXT:    [[COND:%.*]] = phi i32 [ [[CONV3]], [[COND_TRUE]] ], [ [[CONV4]], [[COND_FALSE]] ]
+// SIMD-ONLY0-NEXT:    [[CONV5:%.*]] = trunc i32 [[COND]] to i8
+// SIMD-ONLY0-NEXT:    store i8 [[CONV5]], ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP4:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV6:%.*]] = sext i8 [[TMP4]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP5:%.*]] = load i8, ptr [[CE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV7:%.*]] = sext i8 [[TMP5]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP8:%.*]] = icmp slt i32 [[CONV6]], [[CONV7]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP8]], label [[COND_TRUE10:%.*]], label [[COND_FALSE12:%.*]]
+// SIMD-ONLY0:       cond.true10:
+// SIMD-ONLY0-NEXT:    [[TMP6:%.*]] = load i8, ptr [[CE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV11:%.*]] = sext i8 [[TMP6]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END14:%.*]]
+// SIMD-ONLY0:       cond.false12:
+// SIMD-ONLY0-NEXT:    [[TMP7:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV13:%.*]] = sext i8 [[TMP7]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END14]]
+// SIMD-ONLY0:       cond.end14:
+// SIMD-ONLY0-NEXT:    [[COND15:%.*]] = phi i32 [ [[CONV11]], [[COND_TRUE10]] ], [ [[CONV13]], [[COND_FALSE12]] ]
+// SIMD-ONLY0-NEXT:    [[CONV16:%.*]] = trunc i32 [[COND15]] to i8
+// SIMD-ONLY0-NEXT:    store i8 [[CONV16]], ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP8:%.*]] = load i8, ptr [[CE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV17:%.*]] = sext i8 [[TMP8]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP9:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV18:%.*]] = sext i8 [[TMP9]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP19:%.*]] = icmp sgt i32 [[CONV17]], [[CONV18]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP19]], label [[COND_TRUE21:%.*]], label [[COND_FALSE23:%.*]]
+// SIMD-ONLY0:       cond.true21:
+// SIMD-ONLY0-NEXT:    [[TMP10:%.*]] = load i8, ptr [[CE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV22:%.*]] = sext i8 [[TMP10]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END25:%.*]]
+// SIMD-ONLY0:       cond.false23:
+// SIMD-ONLY0-NEXT:    [[TMP11:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV24:%.*]] = sext i8 [[TMP11]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END25]]
+// SIMD-ONLY0:       cond.end25:
+// SIMD-ONLY0-NEXT:    [[COND26:%.*]] = phi i32 [ [[CONV22]], [[COND_TRUE21]] ], [ [[CONV24]], [[COND_FALSE23]] ]
+// SIMD-ONLY0-NEXT:    [[CONV27:%.*]] = trunc i32 [[COND26]] to i8
+// SIMD-ONLY0-NEXT:    store i8 [[CONV27]], ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP12:%.*]] = load i8, ptr [[CE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV28:%.*]] = sext i8 [[TMP12]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP13:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV29:%.*]] = sext i8 [[TMP13]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP30:%.*]] = icmp slt i32 [[CONV28]], [[CONV29]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP30]], label [[COND_TRUE32:%.*]], label [[COND_FALSE34:%.*]]
+// SIMD-ONLY0:       cond.true32:
+// SIMD-ONLY0-NEXT:    [[TMP14:%.*]] = load i8, ptr [[CE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV33:%.*]] = sext i8 [[TMP14]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END36:%.*]]
+// SIMD-ONLY0:       cond.false34:
+// SIMD-ONLY0-NEXT:    [[TMP15:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV35:%.*]] = sext i8 [[TMP15]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END36]]
+// SIMD-ONLY0:       cond.end36:
+// SIMD-ONLY0-NEXT:    [[COND37:%.*]] = phi i32 [ [[CONV33]], [[COND_TRUE32]] ], [ [[CONV35]], [[COND_FALSE34]] ]
+// SIMD-ONLY0-NEXT:    [[CONV38:%.*]] = trunc i32 [[COND37]] to i8
+// SIMD-ONLY0-NEXT:    store i8 [[CONV38]], ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP16:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV39:%.*]] = sext i8 [[TMP16]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP17:%.*]] = load i8, ptr [[CE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV40:%.*]] = sext i8 [[TMP17]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP41:%.*]] = icmp sgt i32 [[CONV39]], [[CONV40]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP41]], label [[IF_THEN:%.*]], label [[IF_END:%.*]]
+// SIMD-ONLY0:       if.then:
+// SIMD-ONLY0-NEXT:    [[TMP18:%.*]] = load i8, ptr [[CE]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP18]], ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    br label [[IF_END]]
+// SIMD-ONLY0:       if.end:
+// SIMD-ONLY0-NEXT:    [[TMP19:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV43:%.*]] = sext i8 [[TMP19]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP20:%.*]] = load i8, ptr [[CE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV44:%.*]] = sext i8 [[TMP20]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP45:%.*]] = icmp slt i32 [[CONV43]], [[CONV44]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP45]], label [[IF_THEN47:%.*]], label [[IF_END48:%.*]]
+// SIMD-ONLY0:       if.then47:
+// SIMD-ONLY0-NEXT:    [[TMP21:%.*]] = load i8, ptr [[CE]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP21]], ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    br label [[IF_END48]]
+// SIMD-ONLY0:       if.end48:
+// SIMD-ONLY0-NEXT:    [[TMP22:%.*]] = load i8, ptr [[CE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV49:%.*]] = sext i8 [[TMP22]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP23:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV50:%.*]] = sext i8 [[TMP23]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP51:%.*]] = icmp sgt i32 [[CONV49]], [[CONV50]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP51]], label [[IF_THEN53:%.*]], label [[IF_END54:%.*]]
+// SIMD-ONLY0:       if.then53:
+// SIMD-ONLY0-NEXT:    [[TMP24:%.*]] = load i8, ptr [[CE]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP24]], ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    br label [[IF_END54]]
+// SIMD-ONLY0:       if.end54:
+// SIMD-ONLY0-NEXT:    [[TMP25:%.*]] = load i8, ptr [[CE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV55:%.*]] = sext i8 [[TMP25]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP26:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV56:%.*]] = sext i8 [[TMP26]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP57:%.*]] = icmp slt i32 [[CONV55]], [[CONV56]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP57]], label [[IF_THEN59:%.*]], label [[IF_END60:%.*]]
+// SIMD-ONLY0:       if.then59:
+// SIMD-ONLY0-NEXT:    [[TMP27:%.*]] = load i8, ptr [[CE]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP27]], ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    br label [[IF_END60]]
+// SIMD-ONLY0:       if.end60:
+// SIMD-ONLY0-NEXT:    [[TMP28:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV61:%.*]] = sext i8 [[TMP28]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP29:%.*]] = load i8, ptr [[CE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV62:%.*]] = sext i8 [[TMP29]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP63:%.*]] = icmp eq i32 [[CONV61]], [[CONV62]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP63]], label [[COND_TRUE65:%.*]], label [[COND_FALSE67:%.*]]
+// SIMD-ONLY0:       cond.true65:
+// SIMD-ONLY0-NEXT:    [[TMP30:%.*]] = load i8, ptr [[CD]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV66:%.*]] = sext i8 [[TMP30]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END69:%.*]]
+// SIMD-ONLY0:       cond.false67:
+// SIMD-ONLY0-NEXT:    [[TMP31:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV68:%.*]] = sext i8 [[TMP31]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END69]]
+// SIMD-ONLY0:       cond.end69:
+// SIMD-ONLY0-NEXT:    [[COND70:%.*]] = phi i32 [ [[CONV66]], [[COND_TRUE65]] ], [ [[CONV68]], [[COND_FALSE67]] ]
+// SIMD-ONLY0-NEXT:    [[CONV71:%.*]] = trunc i32 [[COND70]] to i8
+// SIMD-ONLY0-NEXT:    store i8 [[CONV71]], ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP32:%.*]] = load i8, ptr [[CE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV72:%.*]] = sext i8 [[TMP32]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP33:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV73:%.*]] = sext i8 [[TMP33]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP74:%.*]] = icmp eq i32 [[CONV72]], [[CONV73]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP74]], label [[COND_TRUE76:%.*]], label [[COND_FALSE78:%.*]]
+// SIMD-ONLY0:       cond.true76:
+// SIMD-ONLY0-NEXT:    [[TMP34:%.*]] = load i8, ptr [[CD]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV77:%.*]] = sext i8 [[TMP34]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END80:%.*]]
+// SIMD-ONLY0:       cond.false78:
+// SIMD-ONLY0-NEXT:    [[TMP35:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV79:%.*]] = sext i8 [[TMP35]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END80]]
+// SIMD-ONLY0:       cond.end80:
+// SIMD-ONLY0-NEXT:    [[COND81:%.*]] = phi i32 [ [[CONV77]], [[COND_TRUE76]] ], [ [[CONV79]], [[COND_FALSE78]] ]
+// SIMD-ONLY0-NEXT:    [[CONV82:%.*]] = trunc i32 [[COND81]] to i8
+// SIMD-ONLY0-NEXT:    store i8 [[CONV82]], ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP36:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV83:%.*]] = sext i8 [[TMP36]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP37:%.*]] = load i8, ptr [[CE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV84:%.*]] = sext i8 [[TMP37]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP85:%.*]] = icmp eq i32 [[CONV83]], [[CONV84]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP85]], label [[IF_THEN87:%.*]], label [[IF_END88:%.*]]
+// SIMD-ONLY0:       if.then87:
+// SIMD-ONLY0-NEXT:    [[TMP38:%.*]] = load i8, ptr [[CD]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP38]], ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    br label [[IF_END88]]
+// SIMD-ONLY0:       if.end88:
+// SIMD-ONLY0-NEXT:    [[TMP39:%.*]] = load i8, ptr [[CE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV89:%.*]] = sext i8 [[TMP39]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP40:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV90:%.*]] = sext i8 [[TMP40]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP91:%.*]] = icmp eq i32 [[CONV89]], [[CONV90]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP91]], label [[IF_THEN93:%.*]], label [[IF_END94:%.*]]
+// SIMD-ONLY0:       if.then93:
+// SIMD-ONLY0-NEXT:    [[TMP41:%.*]] = load i8, ptr [[CD]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP41]], ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    br label [[IF_END94]]
+// SIMD-ONLY0:       if.end94:
+// SIMD-ONLY0-NEXT:    [[TMP42:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV95:%.*]] = zext i8 [[TMP42]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP43:%.*]] = load i8, ptr [[UCE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV96:%.*]] = zext i8 [[TMP43]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP97:%.*]] = icmp sgt i32 [[CONV95]], [[CONV96]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP97]], label [[COND_TRUE99:%.*]], label [[COND_FALSE101:%.*]]
+// SIMD-ONLY0:       cond.true99:
+// SIMD-ONLY0-NEXT:    [[TMP44:%.*]] = load i8, ptr [[UCE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV100:%.*]] = zext i8 [[TMP44]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END103:%.*]]
+// SIMD-ONLY0:       cond.false101:
+// SIMD-ONLY0-NEXT:    [[TMP45:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV102:%.*]] = zext i8 [[TMP45]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END103]]
+// SIMD-ONLY0:       cond.end103:
+// SIMD-ONLY0-NEXT:    [[COND104:%.*]] = phi i32 [ [[CONV100]], [[COND_TRUE99]] ], [ [[CONV102]], [[COND_FALSE101]] ]
+// SIMD-ONLY0-NEXT:    [[CONV105:%.*]] = trunc i32 [[COND104]] to i8
+// SIMD-ONLY0-NEXT:    store i8 [[CONV105]], ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP46:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV106:%.*]] = zext i8 [[TMP46]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP47:%.*]] = load i8, ptr [[UCE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV107:%.*]] = zext i8 [[TMP47]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP108:%.*]] = icmp slt i32 [[CONV106]], [[CONV107]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP108]], label [[COND_TRUE110:%.*]], label [[COND_FALSE112:%.*]]
+// SIMD-ONLY0:       cond.true110:
+// SIMD-ONLY0-NEXT:    [[TMP48:%.*]] = load i8, ptr [[UCE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV111:%.*]] = zext i8 [[TMP48]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END114:%.*]]
+// SIMD-ONLY0:       cond.false112:
+// SIMD-ONLY0-NEXT:    [[TMP49:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV113:%.*]] = zext i8 [[TMP49]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END114]]
+// SIMD-ONLY0:       cond.end114:
+// SIMD-ONLY0-NEXT:    [[COND115:%.*]] = phi i32 [ [[CONV111]], [[COND_TRUE110]] ], [ [[CONV113]], [[COND_FALSE112]] ]
+// SIMD-ONLY0-NEXT:    [[CONV116:%.*]] = trunc i32 [[COND115]] to i8
+// SIMD-ONLY0-NEXT:    store i8 [[CONV116]], ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP50:%.*]] = load i8, ptr [[UCE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV117:%.*]] = zext i8 [[TMP50]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP51:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV118:%.*]] = zext i8 [[TMP51]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP119:%.*]] = icmp sgt i32 [[CONV117]], [[CONV118]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP119]], label [[COND_TRUE121:%.*]], label [[COND_FALSE123:%.*]]
+// SIMD-ONLY0:       cond.true121:
+// SIMD-ONLY0-NEXT:    [[TMP52:%.*]] = load i8, ptr [[UCE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV122:%.*]] = zext i8 [[TMP52]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END125:%.*]]
+// SIMD-ONLY0:       cond.false123:
+// SIMD-ONLY0-NEXT:    [[TMP53:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV124:%.*]] = zext i8 [[TMP53]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END125]]
+// SIMD-ONLY0:       cond.end125:
+// SIMD-ONLY0-NEXT:    [[COND126:%.*]] = phi i32 [ [[CONV122]], [[COND_TRUE121]] ], [ [[CONV124]], [[COND_FALSE123]] ]
+// SIMD-ONLY0-NEXT:    [[CONV127:%.*]] = trunc i32 [[COND126]] to i8
+// SIMD-ONLY0-NEXT:    store i8 [[CONV127]], ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP54:%.*]] = load i8, ptr [[UCE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV128:%.*]] = zext i8 [[TMP54]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP55:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV129:%.*]] = zext i8 [[TMP55]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP130:%.*]] = icmp slt i32 [[CONV128]], [[CONV129]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP130]], label [[COND_TRUE132:%.*]], label [[COND_FALSE134:%.*]]
+// SIMD-ONLY0:       cond.true132:
+// SIMD-ONLY0-NEXT:    [[TMP56:%.*]] = load i8, ptr [[UCE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV133:%.*]] = zext i8 [[TMP56]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END136:%.*]]
+// SIMD-ONLY0:       cond.false134:
+// SIMD-ONLY0-NEXT:    [[TMP57:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV135:%.*]] = zext i8 [[TMP57]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END136]]
+// SIMD-ONLY0:       cond.end136:
+// SIMD-ONLY0-NEXT:    [[COND137:%.*]] = phi i32 [ [[CONV133]], [[COND_TRUE132]] ], [ [[CONV135]], [[COND_FALSE134]] ]
+// SIMD-ONLY0-NEXT:    [[CONV138:%.*]] = trunc i32 [[COND137]] to i8
+// SIMD-ONLY0-NEXT:    store i8 [[CONV138]], ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP58:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV139:%.*]] = zext i8 [[TMP58]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP59:%.*]] = load i8, ptr [[UCE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV140:%.*]] = zext i8 [[TMP59]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP141:%.*]] = icmp sgt i32 [[CONV139]], [[CONV140]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP141]], label [[IF_THEN143:%.*]], label [[IF_END144:%.*]]
+// SIMD-ONLY0:       if.then143:
+// SIMD-ONLY0-NEXT:    [[TMP60:%.*]] = load i8, ptr [[UCE]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP60]], ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    br label [[IF_END144]]
+// SIMD-ONLY0:       if.end144:
+// SIMD-ONLY0-NEXT:    [[TMP61:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV145:%.*]] = zext i8 [[TMP61]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP62:%.*]] = load i8, ptr [[UCE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV146:%.*]] = zext i8 [[TMP62]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP147:%.*]] = icmp slt i32 [[CONV145]], [[CONV146]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP147]], label [[IF_THEN149:%.*]], label [[IF_END150:%.*]]
+// SIMD-ONLY0:       if.then149:
+// SIMD-ONLY0-NEXT:    [[TMP63:%.*]] = load i8, ptr [[UCE]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP63]], ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    br label [[IF_END150]]
+// SIMD-ONLY0:       if.end150:
+// SIMD-ONLY0-NEXT:    [[TMP64:%.*]] = load i8, ptr [[UCE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV151:%.*]] = zext i8 [[TMP64]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP65:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV152:%.*]] = zext i8 [[TMP65]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP153:%.*]] = icmp sgt i32 [[CONV151]], [[CONV152]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP153]], label [[IF_THEN155:%.*]], label [[IF_END156:%.*]]
+// SIMD-ONLY0:       if.then155:
+// SIMD-ONLY0-NEXT:    [[TMP66:%.*]] = load i8, ptr [[UCE]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP66]], ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    br label [[IF_END156]]
+// SIMD-ONLY0:       if.end156:
+// SIMD-ONLY0-NEXT:    [[TMP67:%.*]] = load i8, ptr [[UCE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV157:%.*]] = zext i8 [[TMP67]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP68:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV158:%.*]] = zext i8 [[TMP68]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP159:%.*]] = icmp slt i32 [[CONV157]], [[CONV158]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP159]], label [[IF_THEN161:%.*]], label [[IF_END162:%.*]]
+// SIMD-ONLY0:       if.then161:
+// SIMD-ONLY0-NEXT:    [[TMP69:%.*]] = load i8, ptr [[UCE]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP69]], ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    br label [[IF_END162]]
+// SIMD-ONLY0:       if.end162:
+// SIMD-ONLY0-NEXT:    [[TMP70:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV163:%.*]] = zext i8 [[TMP70]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP71:%.*]] = load i8, ptr [[UCE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV164:%.*]] = zext i8 [[TMP71]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP165:%.*]] = icmp eq i32 [[CONV163]], [[CONV164]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP165]], label [[COND_TRUE167:%.*]], label [[COND_FALSE169:%.*]]
+// SIMD-ONLY0:       cond.true167:
+// SIMD-ONLY0-NEXT:    [[TMP72:%.*]] = load i8, ptr [[UCD]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV168:%.*]] = zext i8 [[TMP72]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END171:%.*]]
+// SIMD-ONLY0:       cond.false169:
+// SIMD-ONLY0-NEXT:    [[TMP73:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV170:%.*]] = zext i8 [[TMP73]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END171]]
+// SIMD-ONLY0:       cond.end171:
+// SIMD-ONLY0-NEXT:    [[COND172:%.*]] = phi i32 [ [[CONV168]], [[COND_TRUE167]] ], [ [[CONV170]], [[COND_FALSE169]] ]
+// SIMD-ONLY0-NEXT:    [[CONV173:%.*]] = trunc i32 [[COND172]] to i8
+// SIMD-ONLY0-NEXT:    store i8 [[CONV173]], ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP74:%.*]] = load i8, ptr [[UCE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV174:%.*]] = zext i8 [[TMP74]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP75:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV175:%.*]] = zext i8 [[TMP75]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP176:%.*]] = icmp eq i32 [[CONV174]], [[CONV175]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP176]], label [[COND_TRUE178:%.*]], label [[COND_FALSE180:%.*]]
+// SIMD-ONLY0:       cond.true178:
+// SIMD-ONLY0-NEXT:    [[TMP76:%.*]] = load i8, ptr [[UCD]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV179:%.*]] = zext i8 [[TMP76]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END182:%.*]]
+// SIMD-ONLY0:       cond.false180:
+// SIMD-ONLY0-NEXT:    [[TMP77:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV181:%.*]] = zext i8 [[TMP77]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END182]]
+// SIMD-ONLY0:       cond.end182:
+// SIMD-ONLY0-NEXT:    [[COND183:%.*]] = phi i32 [ [[CONV179]], [[COND_TRUE178]] ], [ [[CONV181]], [[COND_FALSE180]] ]
+// SIMD-ONLY0-NEXT:    [[CONV184:%.*]] = trunc i32 [[COND183]] to i8
+// SIMD-ONLY0-NEXT:    store i8 [[CONV184]], ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP78:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV185:%.*]] = zext i8 [[TMP78]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP79:%.*]] = load i8, ptr [[UCE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV186:%.*]] = zext i8 [[TMP79]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP187:%.*]] = icmp eq i32 [[CONV185]], [[CONV186]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP187]], label [[IF_THEN189:%.*]], label [[IF_END190:%.*]]
+// SIMD-ONLY0:       if.then189:
+// SIMD-ONLY0-NEXT:    [[TMP80:%.*]] = load i8, ptr [[UCD]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP80]], ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    br label [[IF_END190]]
+// SIMD-ONLY0:       if.end190:
+// SIMD-ONLY0-NEXT:    [[TMP81:%.*]] = load i8, ptr [[UCE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV191:%.*]] = zext i8 [[TMP81]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP82:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV192:%.*]] = zext i8 [[TMP82]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP193:%.*]] = icmp eq i32 [[CONV191]], [[CONV192]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP193]], label [[IF_THEN195:%.*]], label [[IF_END196:%.*]]
+// SIMD-ONLY0:       if.then195:
+// SIMD-ONLY0-NEXT:    [[TMP83:%.*]] = load i8, ptr [[UCD]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP83]], ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    br label [[IF_END196]]
+// SIMD-ONLY0:       if.end196:
+// SIMD-ONLY0-NEXT:    [[TMP84:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV197:%.*]] = sext i8 [[TMP84]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP85:%.*]] = load i8, ptr [[CE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV198:%.*]] = sext i8 [[TMP85]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP199:%.*]] = icmp sgt i32 [[CONV197]], [[CONV198]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP199]], label [[COND_TRUE201:%.*]], label [[COND_FALSE203:%.*]]
+// SIMD-ONLY0:       cond.true201:
+// SIMD-ONLY0-NEXT:    [[TMP86:%.*]] = load i8, ptr [[CE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV202:%.*]] = sext i8 [[TMP86]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END205:%.*]]
+// SIMD-ONLY0:       cond.false203:
+// SIMD-ONLY0-NEXT:    [[TMP87:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV204:%.*]] = sext i8 [[TMP87]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END205]]
+// SIMD-ONLY0:       cond.end205:
+// SIMD-ONLY0-NEXT:    [[COND206:%.*]] = phi i32 [ [[CONV202]], [[COND_TRUE201]] ], [ [[CONV204]], [[COND_FALSE203]] ]
+// SIMD-ONLY0-NEXT:    [[CONV207:%.*]] = trunc i32 [[COND206]] to i8
+// SIMD-ONLY0-NEXT:    store i8 [[CONV207]], ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP88:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV208:%.*]] = sext i8 [[TMP88]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP89:%.*]] = load i8, ptr [[CE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV209:%.*]] = sext i8 [[TMP89]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP210:%.*]] = icmp slt i32 [[CONV208]], [[CONV209]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP210]], label [[COND_TRUE212:%.*]], label [[COND_FALSE214:%.*]]
+// SIMD-ONLY0:       cond.true212:
+// SIMD-ONLY0-NEXT:    [[TMP90:%.*]] = load i8, ptr [[CE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV213:%.*]] = sext i8 [[TMP90]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END216:%.*]]
+// SIMD-ONLY0:       cond.false214:
+// SIMD-ONLY0-NEXT:    [[TMP91:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV215:%.*]] = sext i8 [[TMP91]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END216]]
+// SIMD-ONLY0:       cond.end216:
+// SIMD-ONLY0-NEXT:    [[COND217:%.*]] = phi i32 [ [[CONV213]], [[COND_TRUE212]] ], [ [[CONV215]], [[COND_FALSE214]] ]
+// SIMD-ONLY0-NEXT:    [[CONV218:%.*]] = trunc i32 [[COND217]] to i8
+// SIMD-ONLY0-NEXT:    store i8 [[CONV218]], ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP92:%.*]] = load i8, ptr [[CE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV219:%.*]] = sext i8 [[TMP92]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP93:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV220:%.*]] = sext i8 [[TMP93]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP221:%.*]] = icmp sgt i32 [[CONV219]], [[CONV220]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP221]], label [[COND_TRUE223:%.*]], label [[COND_FALSE225:%.*]]
+// SIMD-ONLY0:       cond.true223:
+// SIMD-ONLY0-NEXT:    [[TMP94:%.*]] = load i8, ptr [[CE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV224:%.*]] = sext i8 [[TMP94]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END227:%.*]]
+// SIMD-ONLY0:       cond.false225:
+// SIMD-ONLY0-NEXT:    [[TMP95:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV226:%.*]] = sext i8 [[TMP95]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END227]]
+// SIMD-ONLY0:       cond.end227:
+// SIMD-ONLY0-NEXT:    [[COND228:%.*]] = phi i32 [ [[CONV224]], [[COND_TRUE223]] ], [ [[CONV226]], [[COND_FALSE225]] ]
+// SIMD-ONLY0-NEXT:    [[CONV229:%.*]] = trunc i32 [[COND228]] to i8
+// SIMD-ONLY0-NEXT:    store i8 [[CONV229]], ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP96:%.*]] = load i8, ptr [[CE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV230:%.*]] = sext i8 [[TMP96]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP97:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV231:%.*]] = sext i8 [[TMP97]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP232:%.*]] = icmp slt i32 [[CONV230]], [[CONV231]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP232]], label [[COND_TRUE234:%.*]], label [[COND_FALSE236:%.*]]
+// SIMD-ONLY0:       cond.true234:
+// SIMD-ONLY0-NEXT:    [[TMP98:%.*]] = load i8, ptr [[CE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV235:%.*]] = sext i8 [[TMP98]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END238:%.*]]
+// SIMD-ONLY0:       cond.false236:
+// SIMD-ONLY0-NEXT:    [[TMP99:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV237:%.*]] = sext i8 [[TMP99]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END238]]
+// SIMD-ONLY0:       cond.end238:
+// SIMD-ONLY0-NEXT:    [[COND239:%.*]] = phi i32 [ [[CONV235]], [[COND_TRUE234]] ], [ [[CONV237]], [[COND_FALSE236]] ]
+// SIMD-ONLY0-NEXT:    [[CONV240:%.*]] = trunc i32 [[COND239]] to i8
+// SIMD-ONLY0-NEXT:    store i8 [[CONV240]], ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP100:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV241:%.*]] = sext i8 [[TMP100]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP101:%.*]] = load i8, ptr [[CE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV242:%.*]] = sext i8 [[TMP101]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP243:%.*]] = icmp sgt i32 [[CONV241]], [[CONV242]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP243]], label [[IF_THEN245:%.*]], label [[IF_END246:%.*]]
+// SIMD-ONLY0:       if.then245:
+// SIMD-ONLY0-NEXT:    [[TMP102:%.*]] = load i8, ptr [[CE]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP102]], ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    br label [[IF_END246]]
+// SIMD-ONLY0:       if.end246:
+// SIMD-ONLY0-NEXT:    [[TMP103:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV247:%.*]] = sext i8 [[TMP103]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP104:%.*]] = load i8, ptr [[CE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV248:%.*]] = sext i8 [[TMP104]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP249:%.*]] = icmp slt i32 [[CONV247]], [[CONV248]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP249]], label [[IF_THEN251:%.*]], label [[IF_END252:%.*]]
+// SIMD-ONLY0:       if.then251:
+// SIMD-ONLY0-NEXT:    [[TMP105:%.*]] = load i8, ptr [[CE]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP105]], ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    br label [[IF_END252]]
+// SIMD-ONLY0:       if.end252:
+// SIMD-ONLY0-NEXT:    [[TMP106:%.*]] = load i8, ptr [[CE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV253:%.*]] = sext i8 [[TMP106]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP107:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV254:%.*]] = sext i8 [[TMP107]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP255:%.*]] = icmp sgt i32 [[CONV253]], [[CONV254]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP255]], label [[IF_THEN257:%.*]], label [[IF_END258:%.*]]
+// SIMD-ONLY0:       if.then257:
+// SIMD-ONLY0-NEXT:    [[TMP108:%.*]] = load i8, ptr [[CE]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP108]], ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    br label [[IF_END258]]
+// SIMD-ONLY0:       if.end258:
+// SIMD-ONLY0-NEXT:    [[TMP109:%.*]] = load i8, ptr [[CE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV259:%.*]] = sext i8 [[TMP109]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP110:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV260:%.*]] = sext i8 [[TMP110]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP261:%.*]] = icmp slt i32 [[CONV259]], [[CONV260]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP261]], label [[IF_THEN263:%.*]], label [[IF_END264:%.*]]
+// SIMD-ONLY0:       if.then263:
+// SIMD-ONLY0-NEXT:    [[TMP111:%.*]] = load i8, ptr [[CE]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP111]], ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    br label [[IF_END264]]
+// SIMD-ONLY0:       if.end264:
+// SIMD-ONLY0-NEXT:    [[TMP112:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV265:%.*]] = sext i8 [[TMP112]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP113:%.*]] = load i8, ptr [[CE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV266:%.*]] = sext i8 [[TMP113]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP267:%.*]] = icmp eq i32 [[CONV265]], [[CONV266]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP267]], label [[COND_TRUE269:%.*]], label [[COND_FALSE271:%.*]]
+// SIMD-ONLY0:       cond.true269:
+// SIMD-ONLY0-NEXT:    [[TMP114:%.*]] = load i8, ptr [[CD]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV270:%.*]] = sext i8 [[TMP114]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END273:%.*]]
+// SIMD-ONLY0:       cond.false271:
+// SIMD-ONLY0-NEXT:    [[TMP115:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV272:%.*]] = sext i8 [[TMP115]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END273]]
+// SIMD-ONLY0:       cond.end273:
+// SIMD-ONLY0-NEXT:    [[COND274:%.*]] = phi i32 [ [[CONV270]], [[COND_TRUE269]] ], [ [[CONV272]], [[COND_FALSE271]] ]
+// SIMD-ONLY0-NEXT:    [[CONV275:%.*]] = trunc i32 [[COND274]] to i8
+// SIMD-ONLY0-NEXT:    store i8 [[CONV275]], ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP116:%.*]] = load i8, ptr [[CE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV276:%.*]] = sext i8 [[TMP116]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP117:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV277:%.*]] = sext i8 [[TMP117]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP278:%.*]] = icmp eq i32 [[CONV276]], [[CONV277]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP278]], label [[COND_TRUE280:%.*]], label [[COND_FALSE282:%.*]]
+// SIMD-ONLY0:       cond.true280:
+// SIMD-ONLY0-NEXT:    [[TMP118:%.*]] = load i8, ptr [[CD]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV281:%.*]] = sext i8 [[TMP118]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END284:%.*]]
+// SIMD-ONLY0:       cond.false282:
+// SIMD-ONLY0-NEXT:    [[TMP119:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV283:%.*]] = sext i8 [[TMP119]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END284]]
+// SIMD-ONLY0:       cond.end284:
+// SIMD-ONLY0-NEXT:    [[COND285:%.*]] = phi i32 [ [[CONV281]], [[COND_TRUE280]] ], [ [[CONV283]], [[COND_FALSE282]] ]
+// SIMD-ONLY0-NEXT:    [[CONV286:%.*]] = trunc i32 [[COND285]] to i8
+// SIMD-ONLY0-NEXT:    store i8 [[CONV286]], ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP120:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV287:%.*]] = sext i8 [[TMP120]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP121:%.*]] = load i8, ptr [[CE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV288:%.*]] = sext i8 [[TMP121]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP289:%.*]] = icmp eq i32 [[CONV287]], [[CONV288]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP289]], label [[IF_THEN291:%.*]], label [[IF_END292:%.*]]
+// SIMD-ONLY0:       if.then291:
+// SIMD-ONLY0-NEXT:    [[TMP122:%.*]] = load i8, ptr [[CD]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP122]], ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    br label [[IF_END292]]
+// SIMD-ONLY0:       if.end292:
+// SIMD-ONLY0-NEXT:    [[TMP123:%.*]] = load i8, ptr [[CE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV293:%.*]] = sext i8 [[TMP123]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP124:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV294:%.*]] = sext i8 [[TMP124]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP295:%.*]] = icmp eq i32 [[CONV293]], [[CONV294]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP295]], label [[IF_THEN297:%.*]], label [[IF_END298:%.*]]
+// SIMD-ONLY0:       if.then297:
+// SIMD-ONLY0-NEXT:    [[TMP125:%.*]] = load i8, ptr [[CD]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP125]], ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    br label [[IF_END298]]
+// SIMD-ONLY0:       if.end298:
+// SIMD-ONLY0-NEXT:    [[TMP126:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV299:%.*]] = zext i8 [[TMP126]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP127:%.*]] = load i8, ptr [[UCE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV300:%.*]] = zext i8 [[TMP127]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP301:%.*]] = icmp sgt i32 [[CONV299]], [[CONV300]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP301]], label [[COND_TRUE303:%.*]], label [[COND_FALSE305:%.*]]
+// SIMD-ONLY0:       cond.true303:
+// SIMD-ONLY0-NEXT:    [[TMP128:%.*]] = load i8, ptr [[UCE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV304:%.*]] = zext i8 [[TMP128]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END307:%.*]]
+// SIMD-ONLY0:       cond.false305:
+// SIMD-ONLY0-NEXT:    [[TMP129:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV306:%.*]] = zext i8 [[TMP129]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END307]]
+// SIMD-ONLY0:       cond.end307:
+// SIMD-ONLY0-NEXT:    [[COND308:%.*]] = phi i32 [ [[CONV304]], [[COND_TRUE303]] ], [ [[CONV306]], [[COND_FALSE305]] ]
+// SIMD-ONLY0-NEXT:    [[CONV309:%.*]] = trunc i32 [[COND308]] to i8
+// SIMD-ONLY0-NEXT:    store i8 [[CONV309]], ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP130:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV310:%.*]] = zext i8 [[TMP130]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP131:%.*]] = load i8, ptr [[UCE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV311:%.*]] = zext i8 [[TMP131]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP312:%.*]] = icmp slt i32 [[CONV310]], [[CONV311]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP312]], label [[COND_TRUE314:%.*]], label [[COND_FALSE316:%.*]]
+// SIMD-ONLY0:       cond.true314:
+// SIMD-ONLY0-NEXT:    [[TMP132:%.*]] = load i8, ptr [[UCE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV315:%.*]] = zext i8 [[TMP132]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END318:%.*]]
+// SIMD-ONLY0:       cond.false316:
+// SIMD-ONLY0-NEXT:    [[TMP133:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV317:%.*]] = zext i8 [[TMP133]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END318]]
+// SIMD-ONLY0:       cond.end318:
+// SIMD-ONLY0-NEXT:    [[COND319:%.*]] = phi i32 [ [[CONV315]], [[COND_TRUE314]] ], [ [[CONV317]], [[COND_FALSE316]] ]
+// SIMD-ONLY0-NEXT:    [[CONV320:%.*]] = trunc i32 [[COND319]] to i8
+// SIMD-ONLY0-NEXT:    store i8 [[CONV320]], ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP134:%.*]] = load i8, ptr [[UCE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV321:%.*]] = zext i8 [[TMP134]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP135:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV322:%.*]] = zext i8 [[TMP135]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP323:%.*]] = icmp sgt i32 [[CONV321]], [[CONV322]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP323]], label [[COND_TRUE325:%.*]], label [[COND_FALSE327:%.*]]
+// SIMD-ONLY0:       cond.true325:
+// SIMD-ONLY0-NEXT:    [[TMP136:%.*]] = load i8, ptr [[UCE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV326:%.*]] = zext i8 [[TMP136]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END329:%.*]]
+// SIMD-ONLY0:       cond.false327:
+// SIMD-ONLY0-NEXT:    [[TMP137:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV328:%.*]] = zext i8 [[TMP137]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END329]]
+// SIMD-ONLY0:       cond.end329:
+// SIMD-ONLY0-NEXT:    [[COND330:%.*]] = phi i32 [ [[CONV326]], [[COND_TRUE325]] ], [ [[CONV328]], [[COND_FALSE327]] ]
+// SIMD-ONLY0-NEXT:    [[CONV331:%.*]] = trunc i32 [[COND330]] to i8
+// SIMD-ONLY0-NEXT:    store i8 [[CONV331]], ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP138:%.*]] = load i8, ptr [[UCE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV332:%.*]] = zext i8 [[TMP138]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP139:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV333:%.*]] = zext i8 [[TMP139]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP334:%.*]] = icmp slt i32 [[CONV332]], [[CONV333]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP334]], label [[COND_TRUE336:%.*]], label [[COND_FALSE338:%.*]]
+// SIMD-ONLY0:       cond.true336:
+// SIMD-ONLY0-NEXT:    [[TMP140:%.*]] = load i8, ptr [[UCE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV337:%.*]] = zext i8 [[TMP140]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END340:%.*]]
+// SIMD-ONLY0:       cond.false338:
+// SIMD-ONLY0-NEXT:    [[TMP141:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV339:%.*]] = zext i8 [[TMP141]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END340]]
+// SIMD-ONLY0:       cond.end340:
+// SIMD-ONLY0-NEXT:    [[COND341:%.*]] = phi i32 [ [[CONV337]], [[COND_TRUE336]] ], [ [[CONV339]], [[COND_FALSE338]] ]
+// SIMD-ONLY0-NEXT:    [[CONV342:%.*]] = trunc i32 [[COND341]] to i8
+// SIMD-ONLY0-NEXT:    store i8 [[CONV342]], ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP142:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV343:%.*]] = zext i8 [[TMP142]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP143:%.*]] = load i8, ptr [[UCE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV344:%.*]] = zext i8 [[TMP143]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP345:%.*]] = icmp sgt i32 [[CONV343]], [[CONV344]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP345]], label [[IF_THEN347:%.*]], label [[IF_END348:%.*]]
+// SIMD-ONLY0:       if.then347:
+// SIMD-ONLY0-NEXT:    [[TMP144:%.*]] = load i8, ptr [[UCE]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP144]], ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    br label [[IF_END348]]
+// SIMD-ONLY0:       if.end348:
+// SIMD-ONLY0-NEXT:    [[TMP145:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV349:%.*]] = zext i8 [[TMP145]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP146:%.*]] = load i8, ptr [[UCE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV350:%.*]] = zext i8 [[TMP146]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP351:%.*]] = icmp slt i32 [[CONV349]], [[CONV350]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP351]], label [[IF_THEN353:%.*]], label [[IF_END354:%.*]]
+// SIMD-ONLY0:       if.then353:
+// SIMD-ONLY0-NEXT:    [[TMP147:%.*]] = load i8, ptr [[UCE]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP147]], ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    br label [[IF_END354]]
+// SIMD-ONLY0:       if.end354:
+// SIMD-ONLY0-NEXT:    [[TMP148:%.*]] = load i8, ptr [[UCE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV355:%.*]] = zext i8 [[TMP148]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP149:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV356:%.*]] = zext i8 [[TMP149]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP357:%.*]] = icmp sgt i32 [[CONV355]], [[CONV356]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP357]], label [[IF_THEN359:%.*]], label [[IF_END360:%.*]]
+// SIMD-ONLY0:       if.then359:
+// SIMD-ONLY0-NEXT:    [[TMP150:%.*]] = load i8, ptr [[UCE]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP150]], ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    br label [[IF_END360]]
+// SIMD-ONLY0:       if.end360:
+// SIMD-ONLY0-NEXT:    [[TMP151:%.*]] = load i8, ptr [[UCE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV361:%.*]] = zext i8 [[TMP151]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP152:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV362:%.*]] = zext i8 [[TMP152]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP363:%.*]] = icmp slt i32 [[CONV361]], [[CONV362]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP363]], label [[IF_THEN365:%.*]], label [[IF_END366:%.*]]
+// SIMD-ONLY0:       if.then365:
+// SIMD-ONLY0-NEXT:    [[TMP153:%.*]] = load i8, ptr [[UCE]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP153]], ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    br label [[IF_END366]]
+// SIMD-ONLY0:       if.end366:
+// SIMD-ONLY0-NEXT:    [[TMP154:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV367:%.*]] = zext i8 [[TMP154]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP155:%.*]] = load i8, ptr [[UCE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV368:%.*]] = zext i8 [[TMP155]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP369:%.*]] = icmp eq i32 [[CONV367]], [[CONV368]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP369]], label [[COND_TRUE371:%.*]], label [[COND_FALSE373:%.*]]
+// SIMD-ONLY0:       cond.true371:
+// SIMD-ONLY0-NEXT:    [[TMP156:%.*]] = load i8, ptr [[UCD]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV372:%.*]] = zext i8 [[TMP156]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END375:%.*]]
+// SIMD-ONLY0:       cond.false373:
+// SIMD-ONLY0-NEXT:    [[TMP157:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV374:%.*]] = zext i8 [[TMP157]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END375]]
+// SIMD-ONLY0:       cond.end375:
+// SIMD-ONLY0-NEXT:    [[COND376:%.*]] = phi i32 [ [[CONV372]], [[COND_TRUE371]] ], [ [[CONV374]], [[COND_FALSE373]] ]
+// SIMD-ONLY0-NEXT:    [[CONV377:%.*]] = trunc i32 [[COND376]] to i8
+// SIMD-ONLY0-NEXT:    store i8 [[CONV377]], ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP158:%.*]] = load i8, ptr [[UCE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV378:%.*]] = zext i8 [[TMP158]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP159:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV379:%.*]] = zext i8 [[TMP159]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP380:%.*]] = icmp eq i32 [[CONV378]], [[CONV379]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP380]], label [[COND_TRUE382:%.*]], label [[COND_FALSE384:%.*]]
+// SIMD-ONLY0:       cond.true382:
+// SIMD-ONLY0-NEXT:    [[TMP160:%.*]] = load i8, ptr [[UCD]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV383:%.*]] = zext i8 [[TMP160]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END386:%.*]]
+// SIMD-ONLY0:       cond.false384:
+// SIMD-ONLY0-NEXT:    [[TMP161:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV385:%.*]] = zext i8 [[TMP161]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END386]]
+// SIMD-ONLY0:       cond.end386:
+// SIMD-ONLY0-NEXT:    [[COND387:%.*]] = phi i32 [ [[CONV383]], [[COND_TRUE382]] ], [ [[CONV385]], [[COND_FALSE384]] ]
+// SIMD-ONLY0-NEXT:    [[CONV388:%.*]] = trunc i32 [[COND387]] to i8
+// SIMD-ONLY0-NEXT:    store i8 [[CONV388]], ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP162:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV389:%.*]] = zext i8 [[TMP162]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP163:%.*]] = load i8, ptr [[UCE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV390:%.*]] = zext i8 [[TMP163]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP391:%.*]] = icmp eq i32 [[CONV389]], [[CONV390]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP391]], label [[IF_THEN393:%.*]], label [[IF_END394:%.*]]
+// SIMD-ONLY0:       if.then393:
+// SIMD-ONLY0-NEXT:    [[TMP164:%.*]] = load i8, ptr [[UCD]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP164]], ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    br label [[IF_END394]]
+// SIMD-ONLY0:       if.end394:
+// SIMD-ONLY0-NEXT:    [[TMP165:%.*]] = load i8, ptr [[UCE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV395:%.*]] = zext i8 [[TMP165]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP166:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV396:%.*]] = zext i8 [[TMP166]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP397:%.*]] = icmp eq i32 [[CONV395]], [[CONV396]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP397]], label [[IF_THEN399:%.*]], label [[IF_END400:%.*]]
+// SIMD-ONLY0:       if.then399:
+// SIMD-ONLY0-NEXT:    [[TMP167:%.*]] = load i8, ptr [[UCD]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP167]], ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    br label [[IF_END400]]
+// SIMD-ONLY0:       if.end400:
+// SIMD-ONLY0-NEXT:    [[TMP168:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV401:%.*]] = sext i8 [[TMP168]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP169:%.*]] = load i8, ptr [[CE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV402:%.*]] = sext i8 [[TMP169]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP403:%.*]] = icmp sgt i32 [[CONV401]], [[CONV402]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP403]], label [[COND_TRUE405:%.*]], label [[COND_FALSE407:%.*]]
+// SIMD-ONLY0:       cond.true405:
+// SIMD-ONLY0-NEXT:    [[TMP170:%.*]] = load i8, ptr [[CE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV406:%.*]] = sext i8 [[TMP170]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END409:%.*]]
+// SIMD-ONLY0:       cond.false407:
+// SIMD-ONLY0-NEXT:    [[TMP171:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV408:%.*]] = sext i8 [[TMP171]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END409]]
+// SIMD-ONLY0:       cond.end409:
+// SIMD-ONLY0-NEXT:    [[COND410:%.*]] = phi i32 [ [[CONV406]], [[COND_TRUE405]] ], [ [[CONV408]], [[COND_FALSE407]] ]
+// SIMD-ONLY0-NEXT:    [[CONV411:%.*]] = trunc i32 [[COND410]] to i8
+// SIMD-ONLY0-NEXT:    store i8 [[CONV411]], ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP172:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV412:%.*]] = sext i8 [[TMP172]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP173:%.*]] = load i8, ptr [[CE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV413:%.*]] = sext i8 [[TMP173]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP414:%.*]] = icmp slt i32 [[CONV412]], [[CONV413]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP414]], label [[COND_TRUE416:%.*]], label [[COND_FALSE418:%.*]]
+// SIMD-ONLY0:       cond.true416:
+// SIMD-ONLY0-NEXT:    [[TMP174:%.*]] = load i8, ptr [[CE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV417:%.*]] = sext i8 [[TMP174]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END420:%.*]]
+// SIMD-ONLY0:       cond.false418:
+// SIMD-ONLY0-NEXT:    [[TMP175:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV419:%.*]] = sext i8 [[TMP175]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END420]]
+// SIMD-ONLY0:       cond.end420:
+// SIMD-ONLY0-NEXT:    [[COND421:%.*]] = phi i32 [ [[CONV417]], [[COND_TRUE416]] ], [ [[CONV419]], [[COND_FALSE418]] ]
+// SIMD-ONLY0-NEXT:    [[CONV422:%.*]] = trunc i32 [[COND421]] to i8
+// SIMD-ONLY0-NEXT:    store i8 [[CONV422]], ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP176:%.*]] = load i8, ptr [[CE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV423:%.*]] = sext i8 [[TMP176]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP177:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV424:%.*]] = sext i8 [[TMP177]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP425:%.*]] = icmp sgt i32 [[CONV423]], [[CONV424]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP425]], label [[COND_TRUE427:%.*]], label [[COND_FALSE429:%.*]]
+// SIMD-ONLY0:       cond.true427:
+// SIMD-ONLY0-NEXT:    [[TMP178:%.*]] = load i8, ptr [[CE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV428:%.*]] = sext i8 [[TMP178]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END431:%.*]]
+// SIMD-ONLY0:       cond.false429:
+// SIMD-ONLY0-NEXT:    [[TMP179:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV430:%.*]] = sext i8 [[TMP179]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END431]]
+// SIMD-ONLY0:       cond.end431:
+// SIMD-ONLY0-NEXT:    [[COND432:%.*]] = phi i32 [ [[CONV428]], [[COND_TRUE427]] ], [ [[CONV430]], [[COND_FALSE429]] ]
+// SIMD-ONLY0-NEXT:    [[CONV433:%.*]] = trunc i32 [[COND432]] to i8
+// SIMD-ONLY0-NEXT:    store i8 [[CONV433]], ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP180:%.*]] = load i8, ptr [[CE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV434:%.*]] = sext i8 [[TMP180]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP181:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV435:%.*]] = sext i8 [[TMP181]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP436:%.*]] = icmp slt i32 [[CONV434]], [[CONV435]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP436]], label [[COND_TRUE438:%.*]], label [[COND_FALSE440:%.*]]
+// SIMD-ONLY0:       cond.true438:
+// SIMD-ONLY0-NEXT:    [[TMP182:%.*]] = load i8, ptr [[CE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV439:%.*]] = sext i8 [[TMP182]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END442:%.*]]
+// SIMD-ONLY0:       cond.false440:
+// SIMD-ONLY0-NEXT:    [[TMP183:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV441:%.*]] = sext i8 [[TMP183]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END442]]
+// SIMD-ONLY0:       cond.end442:
+// SIMD-ONLY0-NEXT:    [[COND443:%.*]] = phi i32 [ [[CONV439]], [[COND_TRUE438]] ], [ [[CONV441]], [[COND_FALSE440]] ]
+// SIMD-ONLY0-NEXT:    [[CONV444:%.*]] = trunc i32 [[COND443]] to i8
+// SIMD-ONLY0-NEXT:    store i8 [[CONV444]], ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP184:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV445:%.*]] = sext i8 [[TMP184]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP185:%.*]] = load i8, ptr [[CE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV446:%.*]] = sext i8 [[TMP185]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP447:%.*]] = icmp sgt i32 [[CONV445]], [[CONV446]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP447]], label [[IF_THEN449:%.*]], label [[IF_END450:%.*]]
+// SIMD-ONLY0:       if.then449:
+// SIMD-ONLY0-NEXT:    [[TMP186:%.*]] = load i8, ptr [[CE]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP186]], ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    br label [[IF_END450]]
+// SIMD-ONLY0:       if.end450:
+// SIMD-ONLY0-NEXT:    [[TMP187:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV451:%.*]] = sext i8 [[TMP187]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP188:%.*]] = load i8, ptr [[CE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV452:%.*]] = sext i8 [[TMP188]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP453:%.*]] = icmp slt i32 [[CONV451]], [[CONV452]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP453]], label [[IF_THEN455:%.*]], label [[IF_END456:%.*]]
+// SIMD-ONLY0:       if.then455:
+// SIMD-ONLY0-NEXT:    [[TMP189:%.*]] = load i8, ptr [[CE]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP189]], ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    br label [[IF_END456]]
+// SIMD-ONLY0:       if.end456:
+// SIMD-ONLY0-NEXT:    [[TMP190:%.*]] = load i8, ptr [[CE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV457:%.*]] = sext i8 [[TMP190]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP191:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV458:%.*]] = sext i8 [[TMP191]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP459:%.*]] = icmp sgt i32 [[CONV457]], [[CONV458]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP459]], label [[IF_THEN461:%.*]], label [[IF_END462:%.*]]
+// SIMD-ONLY0:       if.then461:
+// SIMD-ONLY0-NEXT:    [[TMP192:%.*]] = load i8, ptr [[CE]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP192]], ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    br label [[IF_END462]]
+// SIMD-ONLY0:       if.end462:
+// SIMD-ONLY0-NEXT:    [[TMP193:%.*]] = load i8, ptr [[CE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV463:%.*]] = sext i8 [[TMP193]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP194:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV464:%.*]] = sext i8 [[TMP194]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP465:%.*]] = icmp slt i32 [[CONV463]], [[CONV464]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP465]], label [[IF_THEN467:%.*]], label [[IF_END468:%.*]]
+// SIMD-ONLY0:       if.then467:
+// SIMD-ONLY0-NEXT:    [[TMP195:%.*]] = load i8, ptr [[CE]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP195]], ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    br label [[IF_END468]]
+// SIMD-ONLY0:       if.end468:
+// SIMD-ONLY0-NEXT:    [[TMP196:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV469:%.*]] = sext i8 [[TMP196]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP197:%.*]] = load i8, ptr [[CE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV470:%.*]] = sext i8 [[TMP197]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP471:%.*]] = icmp eq i32 [[CONV469]], [[CONV470]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP471]], label [[COND_TRUE473:%.*]], label [[COND_FALSE475:%.*]]
+// SIMD-ONLY0:       cond.true473:
+// SIMD-ONLY0-NEXT:    [[TMP198:%.*]] = load i8, ptr [[CD]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV474:%.*]] = sext i8 [[TMP198]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END477:%.*]]
+// SIMD-ONLY0:       cond.false475:
+// SIMD-ONLY0-NEXT:    [[TMP199:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV476:%.*]] = sext i8 [[TMP199]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END477]]
+// SIMD-ONLY0:       cond.end477:
+// SIMD-ONLY0-NEXT:    [[COND478:%.*]] = phi i32 [ [[CONV474]], [[COND_TRUE473]] ], [ [[CONV476]], [[COND_FALSE475]] ]
+// SIMD-ONLY0-NEXT:    [[CONV479:%.*]] = trunc i32 [[COND478]] to i8
+// SIMD-ONLY0-NEXT:    store i8 [[CONV479]], ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP200:%.*]] = load i8, ptr [[CE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV480:%.*]] = sext i8 [[TMP200]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP201:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV481:%.*]] = sext i8 [[TMP201]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP482:%.*]] = icmp eq i32 [[CONV480]], [[CONV481]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP482]], label [[COND_TRUE484:%.*]], label [[COND_FALSE486:%.*]]
+// SIMD-ONLY0:       cond.true484:
+// SIMD-ONLY0-NEXT:    [[TMP202:%.*]] = load i8, ptr [[CD]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV485:%.*]] = sext i8 [[TMP202]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END488:%.*]]
+// SIMD-ONLY0:       cond.false486:
+// SIMD-ONLY0-NEXT:    [[TMP203:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV487:%.*]] = sext i8 [[TMP203]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END488]]
+// SIMD-ONLY0:       cond.end488:
+// SIMD-ONLY0-NEXT:    [[COND489:%.*]] = phi i32 [ [[CONV485]], [[COND_TRUE484]] ], [ [[CONV487]], [[COND_FALSE486]] ]
+// SIMD-ONLY0-NEXT:    [[CONV490:%.*]] = trunc i32 [[COND489]] to i8
+// SIMD-ONLY0-NEXT:    store i8 [[CONV490]], ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP204:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV491:%.*]] = sext i8 [[TMP204]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP205:%.*]] = load i8, ptr [[CE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV492:%.*]] = sext i8 [[TMP205]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP493:%.*]] = icmp eq i32 [[CONV491]], [[CONV492]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP493]], label [[IF_THEN495:%.*]], label [[IF_END496:%.*]]
+// SIMD-ONLY0:       if.then495:
+// SIMD-ONLY0-NEXT:    [[TMP206:%.*]] = load i8, ptr [[CD]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP206]], ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    br label [[IF_END496]]
+// SIMD-ONLY0:       if.end496:
+// SIMD-ONLY0-NEXT:    [[TMP207:%.*]] = load i8, ptr [[CE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV497:%.*]] = sext i8 [[TMP207]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP208:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV498:%.*]] = sext i8 [[TMP208]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP499:%.*]] = icmp eq i32 [[CONV497]], [[CONV498]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP499]], label [[IF_THEN501:%.*]], label [[IF_END502:%.*]]
+// SIMD-ONLY0:       if.then501:
+// SIMD-ONLY0-NEXT:    [[TMP209:%.*]] = load i8, ptr [[CD]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP209]], ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    br label [[IF_END502]]
+// SIMD-ONLY0:       if.end502:
+// SIMD-ONLY0-NEXT:    [[TMP210:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV503:%.*]] = zext i8 [[TMP210]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP211:%.*]] = load i8, ptr [[UCE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV504:%.*]] = zext i8 [[TMP211]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP505:%.*]] = icmp sgt i32 [[CONV503]], [[CONV504]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP505]], label [[COND_TRUE507:%.*]], label [[COND_FALSE509:%.*]]
+// SIMD-ONLY0:       cond.true507:
+// SIMD-ONLY0-NEXT:    [[TMP212:%.*]] = load i8, ptr [[UCE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV508:%.*]] = zext i8 [[TMP212]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END511:%.*]]
+// SIMD-ONLY0:       cond.false509:
+// SIMD-ONLY0-NEXT:    [[TMP213:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV510:%.*]] = zext i8 [[TMP213]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END511]]
+// SIMD-ONLY0:       cond.end511:
+// SIMD-ONLY0-NEXT:    [[COND512:%.*]] = phi i32 [ [[CONV508]], [[COND_TRUE507]] ], [ [[CONV510]], [[COND_FALSE509]] ]
+// SIMD-ONLY0-NEXT:    [[CONV513:%.*]] = trunc i32 [[COND512]] to i8
+// SIMD-ONLY0-NEXT:    store i8 [[CONV513]], ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP214:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV514:%.*]] = zext i8 [[TMP214]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP215:%.*]] = load i8, ptr [[UCE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV515:%.*]] = zext i8 [[TMP215]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP516:%.*]] = icmp slt i32 [[CONV514]], [[CONV515]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP516]], label [[COND_TRUE518:%.*]], label [[COND_FALSE520:%.*]]
+// SIMD-ONLY0:       cond.true518:
+// SIMD-ONLY0-NEXT:    [[TMP216:%.*]] = load i8, ptr [[UCE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV519:%.*]] = zext i8 [[TMP216]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END522:%.*]]
+// SIMD-ONLY0:       cond.false520:
+// SIMD-ONLY0-NEXT:    [[TMP217:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV521:%.*]] = zext i8 [[TMP217]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END522]]
+// SIMD-ONLY0:       cond.end522:
+// SIMD-ONLY0-NEXT:    [[COND523:%.*]] = phi i32 [ [[CONV519]], [[COND_TRUE518]] ], [ [[CONV521]], [[COND_FALSE520]] ]
+// SIMD-ONLY0-NEXT:    [[CONV524:%.*]] = trunc i32 [[COND523]] to i8
+// SIMD-ONLY0-NEXT:    store i8 [[CONV524]], ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP218:%.*]] = load i8, ptr [[UCE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV525:%.*]] = zext i8 [[TMP218]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP219:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV526:%.*]] = zext i8 [[TMP219]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP527:%.*]] = icmp sgt i32 [[CONV525]], [[CONV526]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP527]], label [[COND_TRUE529:%.*]], label [[COND_FALSE531:%.*]]
+// SIMD-ONLY0:       cond.true529:
+// SIMD-ONLY0-NEXT:    [[TMP220:%.*]] = load i8, ptr [[UCE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV530:%.*]] = zext i8 [[TMP220]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END533:%.*]]
+// SIMD-ONLY0:       cond.false531:
+// SIMD-ONLY0-NEXT:    [[TMP221:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV532:%.*]] = zext i8 [[TMP221]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END533]]
+// SIMD-ONLY0:       cond.end533:
+// SIMD-ONLY0-NEXT:    [[COND534:%.*]] = phi i32 [ [[CONV530]], [[COND_TRUE529]] ], [ [[CONV532]], [[COND_FALSE531]] ]
+// SIMD-ONLY0-NEXT:    [[CONV535:%.*]] = trunc i32 [[COND534]] to i8
+// SIMD-ONLY0-NEXT:    store i8 [[CONV535]], ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP222:%.*]] = load i8, ptr [[UCE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV536:%.*]] = zext i8 [[TMP222]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP223:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV537:%.*]] = zext i8 [[TMP223]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP538:%.*]] = icmp slt i32 [[CONV536]], [[CONV537]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP538]], label [[COND_TRUE540:%.*]], label [[COND_FALSE542:%.*]]
+// SIMD-ONLY0:       cond.true540:
+// SIMD-ONLY0-NEXT:    [[TMP224:%.*]] = load i8, ptr [[UCE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV541:%.*]] = zext i8 [[TMP224]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END544:%.*]]
+// SIMD-ONLY0:       cond.false542:
+// SIMD-ONLY0-NEXT:    [[TMP225:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV543:%.*]] = zext i8 [[TMP225]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END544]]
+// SIMD-ONLY0:       cond.end544:
+// SIMD-ONLY0-NEXT:    [[COND545:%.*]] = phi i32 [ [[CONV541]], [[COND_TRUE540]] ], [ [[CONV543]], [[COND_FALSE542]] ]
+// SIMD-ONLY0-NEXT:    [[CONV546:%.*]] = trunc i32 [[COND545]] to i8
+// SIMD-ONLY0-NEXT:    store i8 [[CONV546]], ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP226:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV547:%.*]] = zext i8 [[TMP226]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP227:%.*]] = load i8, ptr [[UCE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV548:%.*]] = zext i8 [[TMP227]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP549:%.*]] = icmp sgt i32 [[CONV547]], [[CONV548]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP549]], label [[IF_THEN551:%.*]], label [[IF_END552:%.*]]
+// SIMD-ONLY0:       if.then551:
+// SIMD-ONLY0-NEXT:    [[TMP228:%.*]] = load i8, ptr [[UCE]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP228]], ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    br label [[IF_END552]]
+// SIMD-ONLY0:       if.end552:
+// SIMD-ONLY0-NEXT:    [[TMP229:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV553:%.*]] = zext i8 [[TMP229]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP230:%.*]] = load i8, ptr [[UCE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV554:%.*]] = zext i8 [[TMP230]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP555:%.*]] = icmp slt i32 [[CONV553]], [[CONV554]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP555]], label [[IF_THEN557:%.*]], label [[IF_END558:%.*]]
+// SIMD-ONLY0:       if.then557:
+// SIMD-ONLY0-NEXT:    [[TMP231:%.*]] = load i8, ptr [[UCE]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP231]], ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    br label [[IF_END558]]
+// SIMD-ONLY0:       if.end558:
+// SIMD-ONLY0-NEXT:    [[TMP232:%.*]] = load i8, ptr [[UCE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV559:%.*]] = zext i8 [[TMP232]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP233:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV560:%.*]] = zext i8 [[TMP233]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP561:%.*]] = icmp sgt i32 [[CONV559]], [[CONV560]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP561]], label [[IF_THEN563:%.*]], label [[IF_END564:%.*]]
+// SIMD-ONLY0:       if.then563:
+// SIMD-ONLY0-NEXT:    [[TMP234:%.*]] = load i8, ptr [[UCE]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP234]], ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    br label [[IF_END564]]
+// SIMD-ONLY0:       if.end564:
+// SIMD-ONLY0-NEXT:    [[TMP235:%.*]] = load i8, ptr [[UCE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV565:%.*]] = zext i8 [[TMP235]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP236:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV566:%.*]] = zext i8 [[TMP236]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP567:%.*]] = icmp slt i32 [[CONV565]], [[CONV566]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP567]], label [[IF_THEN569:%.*]], label [[IF_END570:%.*]]
+// SIMD-ONLY0:       if.then569:
+// SIMD-ONLY0-NEXT:    [[TMP237:%.*]] = load i8, ptr [[UCE]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP237]], ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    br label [[IF_END570]]
+// SIMD-ONLY0:       if.end570:
+// SIMD-ONLY0-NEXT:    [[TMP238:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV571:%.*]] = zext i8 [[TMP238]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP239:%.*]] = load i8, ptr [[UCE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV572:%.*]] = zext i8 [[TMP239]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP573:%.*]] = icmp eq i32 [[CONV571]], [[CONV572]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP573]], label [[COND_TRUE575:%.*]], label [[COND_FALSE577:%.*]]
+// SIMD-ONLY0:       cond.true575:
+// SIMD-ONLY0-NEXT:    [[TMP240:%.*]] = load i8, ptr [[UCD]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV576:%.*]] = zext i8 [[TMP240]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END579:%.*]]
+// SIMD-ONLY0:       cond.false577:
+// SIMD-ONLY0-NEXT:    [[TMP241:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV578:%.*]] = zext i8 [[TMP241]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END579]]
+// SIMD-ONLY0:       cond.end579:
+// SIMD-ONLY0-NEXT:    [[COND580:%.*]] = phi i32 [ [[CONV576]], [[COND_TRUE575]] ], [ [[CONV578]], [[COND_FALSE577]] ]
+// SIMD-ONLY0-NEXT:    [[CONV581:%.*]] = trunc i32 [[COND580]] to i8
+// SIMD-ONLY0-NEXT:    store i8 [[CONV581]], ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP242:%.*]] = load i8, ptr [[UCE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV582:%.*]] = zext i8 [[TMP242]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP243:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV583:%.*]] = zext i8 [[TMP243]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP584:%.*]] = icmp eq i32 [[CONV582]], [[CONV583]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP584]], label [[COND_TRUE586:%.*]], label [[COND_FALSE588:%.*]]
+// SIMD-ONLY0:       cond.true586:
+// SIMD-ONLY0-NEXT:    [[TMP244:%.*]] = load i8, ptr [[UCD]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV587:%.*]] = zext i8 [[TMP244]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END590:%.*]]
+// SIMD-ONLY0:       cond.false588:
+// SIMD-ONLY0-NEXT:    [[TMP245:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV589:%.*]] = zext i8 [[TMP245]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END590]]
+// SIMD-ONLY0:       cond.end590:
+// SIMD-ONLY0-NEXT:    [[COND591:%.*]] = phi i32 [ [[CONV587]], [[COND_TRUE586]] ], [ [[CONV589]], [[COND_FALSE588]] ]
+// SIMD-ONLY0-NEXT:    [[CONV592:%.*]] = trunc i32 [[COND591]] to i8
+// SIMD-ONLY0-NEXT:    store i8 [[CONV592]], ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP246:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV593:%.*]] = zext i8 [[TMP246]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP247:%.*]] = load i8, ptr [[UCE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV594:%.*]] = zext i8 [[TMP247]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP595:%.*]] = icmp eq i32 [[CONV593]], [[CONV594]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP595]], label [[IF_THEN597:%.*]], label [[IF_END598:%.*]]
+// SIMD-ONLY0:       if.then597:
+// SIMD-ONLY0-NEXT:    [[TMP248:%.*]] = load i8, ptr [[UCD]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP248]], ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    br label [[IF_END598]]
+// SIMD-ONLY0:       if.end598:
+// SIMD-ONLY0-NEXT:    [[TMP249:%.*]] = load i8, ptr [[UCE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV599:%.*]] = zext i8 [[TMP249]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP250:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV600:%.*]] = zext i8 [[TMP250]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP601:%.*]] = icmp eq i32 [[CONV599]], [[CONV600]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP601]], label [[IF_THEN603:%.*]], label [[IF_END604:%.*]]
+// SIMD-ONLY0:       if.then603:
+// SIMD-ONLY0-NEXT:    [[TMP251:%.*]] = load i8, ptr [[UCD]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP251]], ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    br label [[IF_END604]]
+// SIMD-ONLY0:       if.end604:
+// SIMD-ONLY0-NEXT:    [[TMP252:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV605:%.*]] = sext i8 [[TMP252]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP253:%.*]] = load i8, ptr [[CE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV606:%.*]] = sext i8 [[TMP253]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP607:%.*]] = icmp sgt i32 [[CONV605]], [[CONV606]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP607]], label [[COND_TRUE609:%.*]], label [[COND_FALSE611:%.*]]
+// SIMD-ONLY0:       cond.true609:
+// SIMD-ONLY0-NEXT:    [[TMP254:%.*]] = load i8, ptr [[CE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV610:%.*]] = sext i8 [[TMP254]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END613:%.*]]
+// SIMD-ONLY0:       cond.false611:
+// SIMD-ONLY0-NEXT:    [[TMP255:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV612:%.*]] = sext i8 [[TMP255]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END613]]
+// SIMD-ONLY0:       cond.end613:
+// SIMD-ONLY0-NEXT:    [[COND614:%.*]] = phi i32 [ [[CONV610]], [[COND_TRUE609]] ], [ [[CONV612]], [[COND_FALSE611]] ]
+// SIMD-ONLY0-NEXT:    [[CONV615:%.*]] = trunc i32 [[COND614]] to i8
+// SIMD-ONLY0-NEXT:    store i8 [[CONV615]], ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP256:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV616:%.*]] = sext i8 [[TMP256]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP257:%.*]] = load i8, ptr [[CE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV617:%.*]] = sext i8 [[TMP257]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP618:%.*]] = icmp slt i32 [[CONV616]], [[CONV617]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP618]], label [[COND_TRUE620:%.*]], label [[COND_FALSE622:%.*]]
+// SIMD-ONLY0:       cond.true620:
+// SIMD-ONLY0-NEXT:    [[TMP258:%.*]] = load i8, ptr [[CE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV621:%.*]] = sext i8 [[TMP258]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END624:%.*]]
+// SIMD-ONLY0:       cond.false622:
+// SIMD-ONLY0-NEXT:    [[TMP259:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV623:%.*]] = sext i8 [[TMP259]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END624]]
+// SIMD-ONLY0:       cond.end624:
+// SIMD-ONLY0-NEXT:    [[COND625:%.*]] = phi i32 [ [[CONV621]], [[COND_TRUE620]] ], [ [[CONV623]], [[COND_FALSE622]] ]
+// SIMD-ONLY0-NEXT:    [[CONV626:%.*]] = trunc i32 [[COND625]] to i8
+// SIMD-ONLY0-NEXT:    store i8 [[CONV626]], ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP260:%.*]] = load i8, ptr [[CE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV627:%.*]] = sext i8 [[TMP260]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP261:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV628:%.*]] = sext i8 [[TMP261]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP629:%.*]] = icmp sgt i32 [[CONV627]], [[CONV628]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP629]], label [[COND_TRUE631:%.*]], label [[COND_FALSE633:%.*]]
+// SIMD-ONLY0:       cond.true631:
+// SIMD-ONLY0-NEXT:    [[TMP262:%.*]] = load i8, ptr [[CE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV632:%.*]] = sext i8 [[TMP262]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END635:%.*]]
+// SIMD-ONLY0:       cond.false633:
+// SIMD-ONLY0-NEXT:    [[TMP263:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV634:%.*]] = sext i8 [[TMP263]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END635]]
+// SIMD-ONLY0:       cond.end635:
+// SIMD-ONLY0-NEXT:    [[COND636:%.*]] = phi i32 [ [[CONV632]], [[COND_TRUE631]] ], [ [[CONV634]], [[COND_FALSE633]] ]
+// SIMD-ONLY0-NEXT:    [[CONV637:%.*]] = trunc i32 [[COND636]] to i8
+// SIMD-ONLY0-NEXT:    store i8 [[CONV637]], ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP264:%.*]] = load i8, ptr [[CE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV638:%.*]] = sext i8 [[TMP264]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP265:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV639:%.*]] = sext i8 [[TMP265]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP640:%.*]] = icmp slt i32 [[CONV638]], [[CONV639]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP640]], label [[COND_TRUE642:%.*]], label [[COND_FALSE644:%.*]]
+// SIMD-ONLY0:       cond.true642:
+// SIMD-ONLY0-NEXT:    [[TMP266:%.*]] = load i8, ptr [[CE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV643:%.*]] = sext i8 [[TMP266]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END646:%.*]]
+// SIMD-ONLY0:       cond.false644:
+// SIMD-ONLY0-NEXT:    [[TMP267:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV645:%.*]] = sext i8 [[TMP267]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END646]]
+// SIMD-ONLY0:       cond.end646:
+// SIMD-ONLY0-NEXT:    [[COND647:%.*]] = phi i32 [ [[CONV643]], [[COND_TRUE642]] ], [ [[CONV645]], [[COND_FALSE644]] ]
+// SIMD-ONLY0-NEXT:    [[CONV648:%.*]] = trunc i32 [[COND647]] to i8
+// SIMD-ONLY0-NEXT:    store i8 [[CONV648]], ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP268:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV649:%.*]] = sext i8 [[TMP268]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP269:%.*]] = load i8, ptr [[CE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV650:%.*]] = sext i8 [[TMP269]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP651:%.*]] = icmp sgt i32 [[CONV649]], [[CONV650]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP651]], label [[IF_THEN653:%.*]], label [[IF_END654:%.*]]
+// SIMD-ONLY0:       if.then653:
+// SIMD-ONLY0-NEXT:    [[TMP270:%.*]] = load i8, ptr [[CE]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP270]], ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    br label [[IF_END654]]
+// SIMD-ONLY0:       if.end654:
+// SIMD-ONLY0-NEXT:    [[TMP271:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV655:%.*]] = sext i8 [[TMP271]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP272:%.*]] = load i8, ptr [[CE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV656:%.*]] = sext i8 [[TMP272]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP657:%.*]] = icmp slt i32 [[CONV655]], [[CONV656]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP657]], label [[IF_THEN659:%.*]], label [[IF_END660:%.*]]
+// SIMD-ONLY0:       if.then659:
+// SIMD-ONLY0-NEXT:    [[TMP273:%.*]] = load i8, ptr [[CE]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP273]], ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    br label [[IF_END660]]
+// SIMD-ONLY0:       if.end660:
+// SIMD-ONLY0-NEXT:    [[TMP274:%.*]] = load i8, ptr [[CE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV661:%.*]] = sext i8 [[TMP274]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP275:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV662:%.*]] = sext i8 [[TMP275]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP663:%.*]] = icmp sgt i32 [[CONV661]], [[CONV662]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP663]], label [[IF_THEN665:%.*]], label [[IF_END666:%.*]]
+// SIMD-ONLY0:       if.then665:
+// SIMD-ONLY0-NEXT:    [[TMP276:%.*]] = load i8, ptr [[CE]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP276]], ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    br label [[IF_END666]]
+// SIMD-ONLY0:       if.end666:
+// SIMD-ONLY0-NEXT:    [[TMP277:%.*]] = load i8, ptr [[CE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV667:%.*]] = sext i8 [[TMP277]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP278:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV668:%.*]] = sext i8 [[TMP278]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP669:%.*]] = icmp slt i32 [[CONV667]], [[CONV668]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP669]], label [[IF_THEN671:%.*]], label [[IF_END672:%.*]]
+// SIMD-ONLY0:       if.then671:
+// SIMD-ONLY0-NEXT:    [[TMP279:%.*]] = load i8, ptr [[CE]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP279]], ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    br label [[IF_END672]]
+// SIMD-ONLY0:       if.end672:
+// SIMD-ONLY0-NEXT:    [[TMP280:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV673:%.*]] = sext i8 [[TMP280]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP281:%.*]] = load i8, ptr [[CE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV674:%.*]] = sext i8 [[TMP281]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP675:%.*]] = icmp eq i32 [[CONV673]], [[CONV674]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP675]], label [[COND_TRUE677:%.*]], label [[COND_FALSE679:%.*]]
+// SIMD-ONLY0:       cond.true677:
+// SIMD-ONLY0-NEXT:    [[TMP282:%.*]] = load i8, ptr [[CD]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV678:%.*]] = sext i8 [[TMP282]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END681:%.*]]
+// SIMD-ONLY0:       cond.false679:
+// SIMD-ONLY0-NEXT:    [[TMP283:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV680:%.*]] = sext i8 [[TMP283]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END681]]
+// SIMD-ONLY0:       cond.end681:
+// SIMD-ONLY0-NEXT:    [[COND682:%.*]] = phi i32 [ [[CONV678]], [[COND_TRUE677]] ], [ [[CONV680]], [[COND_FALSE679]] ]
+// SIMD-ONLY0-NEXT:    [[CONV683:%.*]] = trunc i32 [[COND682]] to i8
+// SIMD-ONLY0-NEXT:    store i8 [[CONV683]], ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP284:%.*]] = load i8, ptr [[CE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV684:%.*]] = sext i8 [[TMP284]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP285:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV685:%.*]] = sext i8 [[TMP285]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP686:%.*]] = icmp eq i32 [[CONV684]], [[CONV685]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP686]], label [[COND_TRUE688:%.*]], label [[COND_FALSE690:%.*]]
+// SIMD-ONLY0:       cond.true688:
+// SIMD-ONLY0-NEXT:    [[TMP286:%.*]] = load i8, ptr [[CD]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV689:%.*]] = sext i8 [[TMP286]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END692:%.*]]
+// SIMD-ONLY0:       cond.false690:
+// SIMD-ONLY0-NEXT:    [[TMP287:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV691:%.*]] = sext i8 [[TMP287]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END692]]
+// SIMD-ONLY0:       cond.end692:
+// SIMD-ONLY0-NEXT:    [[COND693:%.*]] = phi i32 [ [[CONV689]], [[COND_TRUE688]] ], [ [[CONV691]], [[COND_FALSE690]] ]
+// SIMD-ONLY0-NEXT:    [[CONV694:%.*]] = trunc i32 [[COND693]] to i8
+// SIMD-ONLY0-NEXT:    store i8 [[CONV694]], ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP288:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV695:%.*]] = sext i8 [[TMP288]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP289:%.*]] = load i8, ptr [[CE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV696:%.*]] = sext i8 [[TMP289]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP697:%.*]] = icmp eq i32 [[CONV695]], [[CONV696]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP697]], label [[IF_THEN699:%.*]], label [[IF_END700:%.*]]
+// SIMD-ONLY0:       if.then699:
+// SIMD-ONLY0-NEXT:    [[TMP290:%.*]] = load i8, ptr [[CD]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP290]], ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    br label [[IF_END700]]
+// SIMD-ONLY0:       if.end700:
+// SIMD-ONLY0-NEXT:    [[TMP291:%.*]] = load i8, ptr [[CE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV701:%.*]] = sext i8 [[TMP291]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP292:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV702:%.*]] = sext i8 [[TMP292]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP703:%.*]] = icmp eq i32 [[CONV701]], [[CONV702]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP703]], label [[IF_THEN705:%.*]], label [[IF_END706:%.*]]
+// SIMD-ONLY0:       if.then705:
+// SIMD-ONLY0-NEXT:    [[TMP293:%.*]] = load i8, ptr [[CD]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP293]], ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    br label [[IF_END706]]
+// SIMD-ONLY0:       if.end706:
+// SIMD-ONLY0-NEXT:    [[TMP294:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV707:%.*]] = zext i8 [[TMP294]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP295:%.*]] = load i8, ptr [[UCE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV708:%.*]] = zext i8 [[TMP295]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP709:%.*]] = icmp sgt i32 [[CONV707]], [[CONV708]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP709]], label [[COND_TRUE711:%.*]], label [[COND_FALSE713:%.*]]
+// SIMD-ONLY0:       cond.true711:
+// SIMD-ONLY0-NEXT:    [[TMP296:%.*]] = load i8, ptr [[UCE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV712:%.*]] = zext i8 [[TMP296]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END715:%.*]]
+// SIMD-ONLY0:       cond.false713:
+// SIMD-ONLY0-NEXT:    [[TMP297:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV714:%.*]] = zext i8 [[TMP297]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END715]]
+// SIMD-ONLY0:       cond.end715:
+// SIMD-ONLY0-NEXT:    [[COND716:%.*]] = phi i32 [ [[CONV712]], [[COND_TRUE711]] ], [ [[CONV714]], [[COND_FALSE713]] ]
+// SIMD-ONLY0-NEXT:    [[CONV717:%.*]] = trunc i32 [[COND716]] to i8
+// SIMD-ONLY0-NEXT:    store i8 [[CONV717]], ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP298:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV718:%.*]] = zext i8 [[TMP298]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP299:%.*]] = load i8, ptr [[UCE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV719:%.*]] = zext i8 [[TMP299]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP720:%.*]] = icmp slt i32 [[CONV718]], [[CONV719]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP720]], label [[COND_TRUE722:%.*]], label [[COND_FALSE724:%.*]]
+// SIMD-ONLY0:       cond.true722:
+// SIMD-ONLY0-NEXT:    [[TMP300:%.*]] = load i8, ptr [[UCE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV723:%.*]] = zext i8 [[TMP300]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END726:%.*]]
+// SIMD-ONLY0:       cond.false724:
+// SIMD-ONLY0-NEXT:    [[TMP301:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV725:%.*]] = zext i8 [[TMP301]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END726]]
+// SIMD-ONLY0:       cond.end726:
+// SIMD-ONLY0-NEXT:    [[COND727:%.*]] = phi i32 [ [[CONV723]], [[COND_TRUE722]] ], [ [[CONV725]], [[COND_FALSE724]] ]
+// SIMD-ONLY0-NEXT:    [[CONV728:%.*]] = trunc i32 [[COND727]] to i8
+// SIMD-ONLY0-NEXT:    store i8 [[CONV728]], ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP302:%.*]] = load i8, ptr [[UCE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV729:%.*]] = zext i8 [[TMP302]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP303:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV730:%.*]] = zext i8 [[TMP303]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP731:%.*]] = icmp sgt i32 [[CONV729]], [[CONV730]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP731]], label [[COND_TRUE733:%.*]], label [[COND_FALSE735:%.*]]
+// SIMD-ONLY0:       cond.true733:
+// SIMD-ONLY0-NEXT:    [[TMP304:%.*]] = load i8, ptr [[UCE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV734:%.*]] = zext i8 [[TMP304]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END737:%.*]]
+// SIMD-ONLY0:       cond.false735:
+// SIMD-ONLY0-NEXT:    [[TMP305:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV736:%.*]] = zext i8 [[TMP305]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END737]]
+// SIMD-ONLY0:       cond.end737:
+// SIMD-ONLY0-NEXT:    [[COND738:%.*]] = phi i32 [ [[CONV734]], [[COND_TRUE733]] ], [ [[CONV736]], [[COND_FALSE735]] ]
+// SIMD-ONLY0-NEXT:    [[CONV739:%.*]] = trunc i32 [[COND738]] to i8
+// SIMD-ONLY0-NEXT:    store i8 [[CONV739]], ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP306:%.*]] = load i8, ptr [[UCE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV740:%.*]] = zext i8 [[TMP306]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP307:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV741:%.*]] = zext i8 [[TMP307]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP742:%.*]] = icmp slt i32 [[CONV740]], [[CONV741]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP742]], label [[COND_TRUE744:%.*]], label [[COND_FALSE746:%.*]]
+// SIMD-ONLY0:       cond.true744:
+// SIMD-ONLY0-NEXT:    [[TMP308:%.*]] = load i8, ptr [[UCE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV745:%.*]] = zext i8 [[TMP308]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END748:%.*]]
+// SIMD-ONLY0:       cond.false746:
+// SIMD-ONLY0-NEXT:    [[TMP309:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV747:%.*]] = zext i8 [[TMP309]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END748]]
+// SIMD-ONLY0:       cond.end748:
+// SIMD-ONLY0-NEXT:    [[COND749:%.*]] = phi i32 [ [[CONV745]], [[COND_TRUE744]] ], [ [[CONV747]], [[COND_FALSE746]] ]
+// SIMD-ONLY0-NEXT:    [[CONV750:%.*]] = trunc i32 [[COND749]] to i8
+// SIMD-ONLY0-NEXT:    store i8 [[CONV750]], ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP310:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV751:%.*]] = zext i8 [[TMP310]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP311:%.*]] = load i8, ptr [[UCE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV752:%.*]] = zext i8 [[TMP311]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP753:%.*]] = icmp sgt i32 [[CONV751]], [[CONV752]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP753]], label [[IF_THEN755:%.*]], label [[IF_END756:%.*]]
+// SIMD-ONLY0:       if.then755:
+// SIMD-ONLY0-NEXT:    [[TMP312:%.*]] = load i8, ptr [[UCE]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP312]], ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    br label [[IF_END756]]
+// SIMD-ONLY0:       if.end756:
+// SIMD-ONLY0-NEXT:    [[TMP313:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV757:%.*]] = zext i8 [[TMP313]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP314:%.*]] = load i8, ptr [[UCE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV758:%.*]] = zext i8 [[TMP314]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP759:%.*]] = icmp slt i32 [[CONV757]], [[CONV758]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP759]], label [[IF_THEN761:%.*]], label [[IF_END762:%.*]]
+// SIMD-ONLY0:       if.then761:
+// SIMD-ONLY0-NEXT:    [[TMP315:%.*]] = load i8, ptr [[UCE]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP315]], ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    br label [[IF_END762]]
+// SIMD-ONLY0:       if.end762:
+// SIMD-ONLY0-NEXT:    [[TMP316:%.*]] = load i8, ptr [[UCE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV763:%.*]] = zext i8 [[TMP316]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP317:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV764:%.*]] = zext i8 [[TMP317]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP765:%.*]] = icmp sgt i32 [[CONV763]], [[CONV764]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP765]], label [[IF_THEN767:%.*]], label [[IF_END768:%.*]]
+// SIMD-ONLY0:       if.then767:
+// SIMD-ONLY0-NEXT:    [[TMP318:%.*]] = load i8, ptr [[UCE]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP318]], ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    br label [[IF_END768]]
+// SIMD-ONLY0:       if.end768:
+// SIMD-ONLY0-NEXT:    [[TMP319:%.*]] = load i8, ptr [[UCE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV769:%.*]] = zext i8 [[TMP319]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP320:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV770:%.*]] = zext i8 [[TMP320]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP771:%.*]] = icmp slt i32 [[CONV769]], [[CONV770]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP771]], label [[IF_THEN773:%.*]], label [[IF_END774:%.*]]
+// SIMD-ONLY0:       if.then773:
+// SIMD-ONLY0-NEXT:    [[TMP321:%.*]] = load i8, ptr [[UCE]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP321]], ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    br label [[IF_END774]]
+// SIMD-ONLY0:       if.end774:
+// SIMD-ONLY0-NEXT:    [[TMP322:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV775:%.*]] = zext i8 [[TMP322]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP323:%.*]] = load i8, ptr [[UCE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV776:%.*]] = zext i8 [[TMP323]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP777:%.*]] = icmp eq i32 [[CONV775]], [[CONV776]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP777]], label [[COND_TRUE779:%.*]], label [[COND_FALSE781:%.*]]
+// SIMD-ONLY0:       cond.true779:
+// SIMD-ONLY0-NEXT:    [[TMP324:%.*]] = load i8, ptr [[UCD]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV780:%.*]] = zext i8 [[TMP324]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END783:%.*]]
+// SIMD-ONLY0:       cond.false781:
+// SIMD-ONLY0-NEXT:    [[TMP325:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV782:%.*]] = zext i8 [[TMP325]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END783]]
+// SIMD-ONLY0:       cond.end783:
+// SIMD-ONLY0-NEXT:    [[COND784:%.*]] = phi i32 [ [[CONV780]], [[COND_TRUE779]] ], [ [[CONV782]], [[COND_FALSE781]] ]
+// SIMD-ONLY0-NEXT:    [[CONV785:%.*]] = trunc i32 [[COND784]] to i8
+// SIMD-ONLY0-NEXT:    store i8 [[CONV785]], ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP326:%.*]] = load i8, ptr [[UCE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV786:%.*]] = zext i8 [[TMP326]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP327:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV787:%.*]] = zext i8 [[TMP327]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP788:%.*]] = icmp eq i32 [[CONV786]], [[CONV787]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP788]], label [[COND_TRUE790:%.*]], label [[COND_FALSE792:%.*]]
+// SIMD-ONLY0:       cond.true790:
+// SIMD-ONLY0-NEXT:    [[TMP328:%.*]] = load i8, ptr [[UCD]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV791:%.*]] = zext i8 [[TMP328]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END794:%.*]]
+// SIMD-ONLY0:       cond.false792:
+// SIMD-ONLY0-NEXT:    [[TMP329:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV793:%.*]] = zext i8 [[TMP329]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END794]]
+// SIMD-ONLY0:       cond.end794:
+// SIMD-ONLY0-NEXT:    [[COND795:%.*]] = phi i32 [ [[CONV791]], [[COND_TRUE790]] ], [ [[CONV793]], [[COND_FALSE792]] ]
+// SIMD-ONLY0-NEXT:    [[CONV796:%.*]] = trunc i32 [[COND795]] to i8
+// SIMD-ONLY0-NEXT:    store i8 [[CONV796]], ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP330:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV797:%.*]] = zext i8 [[TMP330]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP331:%.*]] = load i8, ptr [[UCE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV798:%.*]] = zext i8 [[TMP331]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP799:%.*]] = icmp eq i32 [[CONV797]], [[CONV798]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP799]], label [[IF_THEN801:%.*]], label [[IF_END802:%.*]]
+// SIMD-ONLY0:       if.then801:
+// SIMD-ONLY0-NEXT:    [[TMP332:%.*]] = load i8, ptr [[UCD]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP332]], ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    br label [[IF_END802]]
+// SIMD-ONLY0:       if.end802:
+// SIMD-ONLY0-NEXT:    [[TMP333:%.*]] = load i8, ptr [[UCE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV803:%.*]] = zext i8 [[TMP333]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP334:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV804:%.*]] = zext i8 [[TMP334]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP805:%.*]] = icmp eq i32 [[CONV803]], [[CONV804]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP805]], label [[IF_THEN807:%.*]], label [[IF_END808:%.*]]
+// SIMD-ONLY0:       if.then807:
+// SIMD-ONLY0-NEXT:    [[TMP335:%.*]] = load i8, ptr [[UCD]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP335]], ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    br label [[IF_END808]]
+// SIMD-ONLY0:       if.end808:
+// SIMD-ONLY0-NEXT:    [[TMP336:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV809:%.*]] = sext i8 [[TMP336]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP337:%.*]] = load i8, ptr [[CE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV810:%.*]] = sext i8 [[TMP337]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP811:%.*]] = icmp sgt i32 [[CONV809]], [[CONV810]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP811]], label [[COND_TRUE813:%.*]], label [[COND_FALSE815:%.*]]
+// SIMD-ONLY0:       cond.true813:
+// SIMD-ONLY0-NEXT:    [[TMP338:%.*]] = load i8, ptr [[CE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV814:%.*]] = sext i8 [[TMP338]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END817:%.*]]
+// SIMD-ONLY0:       cond.false815:
+// SIMD-ONLY0-NEXT:    [[TMP339:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV816:%.*]] = sext i8 [[TMP339]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END817]]
+// SIMD-ONLY0:       cond.end817:
+// SIMD-ONLY0-NEXT:    [[COND818:%.*]] = phi i32 [ [[CONV814]], [[COND_TRUE813]] ], [ [[CONV816]], [[COND_FALSE815]] ]
+// SIMD-ONLY0-NEXT:    [[CONV819:%.*]] = trunc i32 [[COND818]] to i8
+// SIMD-ONLY0-NEXT:    store i8 [[CONV819]], ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP340:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV820:%.*]] = sext i8 [[TMP340]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP341:%.*]] = load i8, ptr [[CE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV821:%.*]] = sext i8 [[TMP341]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP822:%.*]] = icmp slt i32 [[CONV820]], [[CONV821]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP822]], label [[COND_TRUE824:%.*]], label [[COND_FALSE826:%.*]]
+// SIMD-ONLY0:       cond.true824:
+// SIMD-ONLY0-NEXT:    [[TMP342:%.*]] = load i8, ptr [[CE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV825:%.*]] = sext i8 [[TMP342]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END828:%.*]]
+// SIMD-ONLY0:       cond.false826:
+// SIMD-ONLY0-NEXT:    [[TMP343:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV827:%.*]] = sext i8 [[TMP343]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END828]]
+// SIMD-ONLY0:       cond.end828:
+// SIMD-ONLY0-NEXT:    [[COND829:%.*]] = phi i32 [ [[CONV825]], [[COND_TRUE824]] ], [ [[CONV827]], [[COND_FALSE826]] ]
+// SIMD-ONLY0-NEXT:    [[CONV830:%.*]] = trunc i32 [[COND829]] to i8
+// SIMD-ONLY0-NEXT:    store i8 [[CONV830]], ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP344:%.*]] = load i8, ptr [[CE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV831:%.*]] = sext i8 [[TMP344]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP345:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV832:%.*]] = sext i8 [[TMP345]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP833:%.*]] = icmp sgt i32 [[CONV831]], [[CONV832]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP833]], label [[COND_TRUE835:%.*]], label [[COND_FALSE837:%.*]]
+// SIMD-ONLY0:       cond.true835:
+// SIMD-ONLY0-NEXT:    [[TMP346:%.*]] = load i8, ptr [[CE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV836:%.*]] = sext i8 [[TMP346]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END839:%.*]]
+// SIMD-ONLY0:       cond.false837:
+// SIMD-ONLY0-NEXT:    [[TMP347:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV838:%.*]] = sext i8 [[TMP347]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END839]]
+// SIMD-ONLY0:       cond.end839:
+// SIMD-ONLY0-NEXT:    [[COND840:%.*]] = phi i32 [ [[CONV836]], [[COND_TRUE835]] ], [ [[CONV838]], [[COND_FALSE837]] ]
+// SIMD-ONLY0-NEXT:    [[CONV841:%.*]] = trunc i32 [[COND840]] to i8
+// SIMD-ONLY0-NEXT:    store i8 [[CONV841]], ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP348:%.*]] = load i8, ptr [[CE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV842:%.*]] = sext i8 [[TMP348]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP349:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV843:%.*]] = sext i8 [[TMP349]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP844:%.*]] = icmp slt i32 [[CONV842]], [[CONV843]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP844]], label [[COND_TRUE846:%.*]], label [[COND_FALSE848:%.*]]
+// SIMD-ONLY0:       cond.true846:
+// SIMD-ONLY0-NEXT:    [[TMP350:%.*]] = load i8, ptr [[CE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV847:%.*]] = sext i8 [[TMP350]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END850:%.*]]
+// SIMD-ONLY0:       cond.false848:
+// SIMD-ONLY0-NEXT:    [[TMP351:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV849:%.*]] = sext i8 [[TMP351]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END850]]
+// SIMD-ONLY0:       cond.end850:
+// SIMD-ONLY0-NEXT:    [[COND851:%.*]] = phi i32 [ [[CONV847]], [[COND_TRUE846]] ], [ [[CONV849]], [[COND_FALSE848]] ]
+// SIMD-ONLY0-NEXT:    [[CONV852:%.*]] = trunc i32 [[COND851]] to i8
+// SIMD-ONLY0-NEXT:    store i8 [[CONV852]], ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP352:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV853:%.*]] = sext i8 [[TMP352]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP353:%.*]] = load i8, ptr [[CE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV854:%.*]] = sext i8 [[TMP353]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP855:%.*]] = icmp sgt i32 [[CONV853]], [[CONV854]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP855]], label [[IF_THEN857:%.*]], label [[IF_END858:%.*]]
+// SIMD-ONLY0:       if.then857:
+// SIMD-ONLY0-NEXT:    [[TMP354:%.*]] = load i8, ptr [[CE]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP354]], ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    br label [[IF_END858]]
+// SIMD-ONLY0:       if.end858:
+// SIMD-ONLY0-NEXT:    [[TMP355:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV859:%.*]] = sext i8 [[TMP355]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP356:%.*]] = load i8, ptr [[CE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV860:%.*]] = sext i8 [[TMP356]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP861:%.*]] = icmp slt i32 [[CONV859]], [[CONV860]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP861]], label [[IF_THEN863:%.*]], label [[IF_END864:%.*]]
+// SIMD-ONLY0:       if.then863:
+// SIMD-ONLY0-NEXT:    [[TMP357:%.*]] = load i8, ptr [[CE]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP357]], ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    br label [[IF_END864]]
+// SIMD-ONLY0:       if.end864:
+// SIMD-ONLY0-NEXT:    [[TMP358:%.*]] = load i8, ptr [[CE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV865:%.*]] = sext i8 [[TMP358]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP359:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV866:%.*]] = sext i8 [[TMP359]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP867:%.*]] = icmp sgt i32 [[CONV865]], [[CONV866]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP867]], label [[IF_THEN869:%.*]], label [[IF_END870:%.*]]
+// SIMD-ONLY0:       if.then869:
+// SIMD-ONLY0-NEXT:    [[TMP360:%.*]] = load i8, ptr [[CE]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP360]], ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    br label [[IF_END870]]
+// SIMD-ONLY0:       if.end870:
+// SIMD-ONLY0-NEXT:    [[TMP361:%.*]] = load i8, ptr [[CE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV871:%.*]] = sext i8 [[TMP361]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP362:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV872:%.*]] = sext i8 [[TMP362]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP873:%.*]] = icmp slt i32 [[CONV871]], [[CONV872]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP873]], label [[IF_THEN875:%.*]], label [[IF_END876:%.*]]
+// SIMD-ONLY0:       if.then875:
+// SIMD-ONLY0-NEXT:    [[TMP363:%.*]] = load i8, ptr [[CE]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP363]], ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    br label [[IF_END876]]
+// SIMD-ONLY0:       if.end876:
+// SIMD-ONLY0-NEXT:    [[TMP364:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV877:%.*]] = sext i8 [[TMP364]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP365:%.*]] = load i8, ptr [[CE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV878:%.*]] = sext i8 [[TMP365]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP879:%.*]] = icmp eq i32 [[CONV877]], [[CONV878]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP879]], label [[COND_TRUE881:%.*]], label [[COND_FALSE883:%.*]]
+// SIMD-ONLY0:       cond.true881:
+// SIMD-ONLY0-NEXT:    [[TMP366:%.*]] = load i8, ptr [[CD]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV882:%.*]] = sext i8 [[TMP366]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END885:%.*]]
+// SIMD-ONLY0:       cond.false883:
+// SIMD-ONLY0-NEXT:    [[TMP367:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV884:%.*]] = sext i8 [[TMP367]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END885]]
+// SIMD-ONLY0:       cond.end885:
+// SIMD-ONLY0-NEXT:    [[COND886:%.*]] = phi i32 [ [[CONV882]], [[COND_TRUE881]] ], [ [[CONV884]], [[COND_FALSE883]] ]
+// SIMD-ONLY0-NEXT:    [[CONV887:%.*]] = trunc i32 [[COND886]] to i8
+// SIMD-ONLY0-NEXT:    store i8 [[CONV887]], ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP368:%.*]] = load i8, ptr [[CE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV888:%.*]] = sext i8 [[TMP368]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP369:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV889:%.*]] = sext i8 [[TMP369]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP890:%.*]] = icmp eq i32 [[CONV888]], [[CONV889]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP890]], label [[COND_TRUE892:%.*]], label [[COND_FALSE894:%.*]]
+// SIMD-ONLY0:       cond.true892:
+// SIMD-ONLY0-NEXT:    [[TMP370:%.*]] = load i8, ptr [[CD]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV893:%.*]] = sext i8 [[TMP370]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END896:%.*]]
+// SIMD-ONLY0:       cond.false894:
+// SIMD-ONLY0-NEXT:    [[TMP371:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV895:%.*]] = sext i8 [[TMP371]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END896]]
+// SIMD-ONLY0:       cond.end896:
+// SIMD-ONLY0-NEXT:    [[COND897:%.*]] = phi i32 [ [[CONV893]], [[COND_TRUE892]] ], [ [[CONV895]], [[COND_FALSE894]] ]
+// SIMD-ONLY0-NEXT:    [[CONV898:%.*]] = trunc i32 [[COND897]] to i8
+// SIMD-ONLY0-NEXT:    store i8 [[CONV898]], ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP372:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV899:%.*]] = sext i8 [[TMP372]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP373:%.*]] = load i8, ptr [[CE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV900:%.*]] = sext i8 [[TMP373]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP901:%.*]] = icmp eq i32 [[CONV899]], [[CONV900]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP901]], label [[IF_THEN903:%.*]], label [[IF_END904:%.*]]
+// SIMD-ONLY0:       if.then903:
+// SIMD-ONLY0-NEXT:    [[TMP374:%.*]] = load i8, ptr [[CD]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP374]], ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    br label [[IF_END904]]
+// SIMD-ONLY0:       if.end904:
+// SIMD-ONLY0-NEXT:    [[TMP375:%.*]] = load i8, ptr [[CE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV905:%.*]] = sext i8 [[TMP375]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP376:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV906:%.*]] = sext i8 [[TMP376]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP907:%.*]] = icmp eq i32 [[CONV905]], [[CONV906]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP907]], label [[IF_THEN909:%.*]], label [[IF_END910:%.*]]
+// SIMD-ONLY0:       if.then909:
+// SIMD-ONLY0-NEXT:    [[TMP377:%.*]] = load i8, ptr [[CD]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP377]], ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    br label [[IF_END910]]
+// SIMD-ONLY0:       if.end910:
+// SIMD-ONLY0-NEXT:    [[TMP378:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV911:%.*]] = zext i8 [[TMP378]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP379:%.*]] = load i8, ptr [[UCE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV912:%.*]] = zext i8 [[TMP379]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP913:%.*]] = icmp sgt i32 [[CONV911]], [[CONV912]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP913]], label [[COND_TRUE915:%.*]], label [[COND_FALSE917:%.*]]
+// SIMD-ONLY0:       cond.true915:
+// SIMD-ONLY0-NEXT:    [[TMP380:%.*]] = load i8, ptr [[UCE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV916:%.*]] = zext i8 [[TMP380]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END919:%.*]]
+// SIMD-ONLY0:       cond.false917:
+// SIMD-ONLY0-NEXT:    [[TMP381:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV918:%.*]] = zext i8 [[TMP381]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END919]]
+// SIMD-ONLY0:       cond.end919:
+// SIMD-ONLY0-NEXT:    [[COND920:%.*]] = phi i32 [ [[CONV916]], [[COND_TRUE915]] ], [ [[CONV918]], [[COND_FALSE917]] ]
+// SIMD-ONLY0-NEXT:    [[CONV921:%.*]] = trunc i32 [[COND920]] to i8
+// SIMD-ONLY0-NEXT:    store i8 [[CONV921]], ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP382:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV922:%.*]] = zext i8 [[TMP382]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP383:%.*]] = load i8, ptr [[UCE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV923:%.*]] = zext i8 [[TMP383]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP924:%.*]] = icmp slt i32 [[CONV922]], [[CONV923]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP924]], label [[COND_TRUE926:%.*]], label [[COND_FALSE928:%.*]]
+// SIMD-ONLY0:       cond.true926:
+// SIMD-ONLY0-NEXT:    [[TMP384:%.*]] = load i8, ptr [[UCE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV927:%.*]] = zext i8 [[TMP384]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END930:%.*]]
+// SIMD-ONLY0:       cond.false928:
+// SIMD-ONLY0-NEXT:    [[TMP385:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV929:%.*]] = zext i8 [[TMP385]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END930]]
+// SIMD-ONLY0:       cond.end930:
+// SIMD-ONLY0-NEXT:    [[COND931:%.*]] = phi i32 [ [[CONV927]], [[COND_TRUE926]] ], [ [[CONV929]], [[COND_FALSE928]] ]
+// SIMD-ONLY0-NEXT:    [[CONV932:%.*]] = trunc i32 [[COND931]] to i8
+// SIMD-ONLY0-NEXT:    store i8 [[CONV932]], ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP386:%.*]] = load i8, ptr [[UCE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV933:%.*]] = zext i8 [[TMP386]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP387:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV934:%.*]] = zext i8 [[TMP387]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP935:%.*]] = icmp sgt i32 [[CONV933]], [[CONV934]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP935]], label [[COND_TRUE937:%.*]], label [[COND_FALSE939:%.*]]
+// SIMD-ONLY0:       cond.true937:
+// SIMD-ONLY0-NEXT:    [[TMP388:%.*]] = load i8, ptr [[UCE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV938:%.*]] = zext i8 [[TMP388]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END941:%.*]]
+// SIMD-ONLY0:       cond.false939:
+// SIMD-ONLY0-NEXT:    [[TMP389:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV940:%.*]] = zext i8 [[TMP389]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END941]]
+// SIMD-ONLY0:       cond.end941:
+// SIMD-ONLY0-NEXT:    [[COND942:%.*]] = phi i32 [ [[CONV938]], [[COND_TRUE937]] ], [ [[CONV940]], [[COND_FALSE939]] ]
+// SIMD-ONLY0-NEXT:    [[CONV943:%.*]] = trunc i32 [[COND942]] to i8
+// SIMD-ONLY0-NEXT:    store i8 [[CONV943]], ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP390:%.*]] = load i8, ptr [[UCE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV944:%.*]] = zext i8 [[TMP390]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP391:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV945:%.*]] = zext i8 [[TMP391]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP946:%.*]] = icmp slt i32 [[CONV944]], [[CONV945]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP946]], label [[COND_TRUE948:%.*]], label [[COND_FALSE950:%.*]]
+// SIMD-ONLY0:       cond.true948:
+// SIMD-ONLY0-NEXT:    [[TMP392:%.*]] = load i8, ptr [[UCE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV949:%.*]] = zext i8 [[TMP392]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END952:%.*]]
+// SIMD-ONLY0:       cond.false950:
+// SIMD-ONLY0-NEXT:    [[TMP393:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV951:%.*]] = zext i8 [[TMP393]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END952]]
+// SIMD-ONLY0:       cond.end952:
+// SIMD-ONLY0-NEXT:    [[COND953:%.*]] = phi i32 [ [[CONV949]], [[COND_TRUE948]] ], [ [[CONV951]], [[COND_FALSE950]] ]
+// SIMD-ONLY0-NEXT:    [[CONV954:%.*]] = trunc i32 [[COND953]] to i8
+// SIMD-ONLY0-NEXT:    store i8 [[CONV954]], ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP394:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV955:%.*]] = zext i8 [[TMP394]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP395:%.*]] = load i8, ptr [[UCE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV956:%.*]] = zext i8 [[TMP395]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP957:%.*]] = icmp sgt i32 [[CONV955]], [[CONV956]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP957]], label [[IF_THEN959:%.*]], label [[IF_END960:%.*]]
+// SIMD-ONLY0:       if.then959:
+// SIMD-ONLY0-NEXT:    [[TMP396:%.*]] = load i8, ptr [[UCE]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP396]], ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    br label [[IF_END960]]
+// SIMD-ONLY0:       if.end960:
+// SIMD-ONLY0-NEXT:    [[TMP397:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV961:%.*]] = zext i8 [[TMP397]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP398:%.*]] = load i8, ptr [[UCE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV962:%.*]] = zext i8 [[TMP398]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP963:%.*]] = icmp slt i32 [[CONV961]], [[CONV962]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP963]], label [[IF_THEN965:%.*]], label [[IF_END966:%.*]]
+// SIMD-ONLY0:       if.then965:
+// SIMD-ONLY0-NEXT:    [[TMP399:%.*]] = load i8, ptr [[UCE]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP399]], ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    br label [[IF_END966]]
+// SIMD-ONLY0:       if.end966:
+// SIMD-ONLY0-NEXT:    [[TMP400:%.*]] = load i8, ptr [[UCE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV967:%.*]] = zext i8 [[TMP400]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP401:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV968:%.*]] = zext i8 [[TMP401]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP969:%.*]] = icmp sgt i32 [[CONV967]], [[CONV968]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP969]], label [[IF_THEN971:%.*]], label [[IF_END972:%.*]]
+// SIMD-ONLY0:       if.then971:
+// SIMD-ONLY0-NEXT:    [[TMP402:%.*]] = load i8, ptr [[UCE]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP402]], ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    br label [[IF_END972]]
+// SIMD-ONLY0:       if.end972:
+// SIMD-ONLY0-NEXT:    [[TMP403:%.*]] = load i8, ptr [[UCE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV973:%.*]] = zext i8 [[TMP403]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP404:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV974:%.*]] = zext i8 [[TMP404]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP975:%.*]] = icmp slt i32 [[CONV973]], [[CONV974]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP975]], label [[IF_THEN977:%.*]], label [[IF_END978:%.*]]
+// SIMD-ONLY0:       if.then977:
+// SIMD-ONLY0-NEXT:    [[TMP405:%.*]] = load i8, ptr [[UCE]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP405]], ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    br label [[IF_END978]]
+// SIMD-ONLY0:       if.end978:
+// SIMD-ONLY0-NEXT:    [[TMP406:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV979:%.*]] = zext i8 [[TMP406]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP407:%.*]] = load i8, ptr [[UCE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV980:%.*]] = zext i8 [[TMP407]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP981:%.*]] = icmp eq i32 [[CONV979]], [[CONV980]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP981]], label [[COND_TRUE983:%.*]], label [[COND_FALSE985:%.*]]
+// SIMD-ONLY0:       cond.true983:
+// SIMD-ONLY0-NEXT:    [[TMP408:%.*]] = load i8, ptr [[UCD]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV984:%.*]] = zext i8 [[TMP408]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END987:%.*]]
+// SIMD-ONLY0:       cond.false985:
+// SIMD-ONLY0-NEXT:    [[TMP409:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV986:%.*]] = zext i8 [[TMP409]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END987]]
+// SIMD-ONLY0:       cond.end987:
+// SIMD-ONLY0-NEXT:    [[COND988:%.*]] = phi i32 [ [[CONV984]], [[COND_TRUE983]] ], [ [[CONV986]], [[COND_FALSE985]] ]
+// SIMD-ONLY0-NEXT:    [[CONV989:%.*]] = trunc i32 [[COND988]] to i8
+// SIMD-ONLY0-NEXT:    store i8 [[CONV989]], ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP410:%.*]] = load i8, ptr [[UCE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV990:%.*]] = zext i8 [[TMP410]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP411:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV991:%.*]] = zext i8 [[TMP411]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP992:%.*]] = icmp eq i32 [[CONV990]], [[CONV991]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP992]], label [[COND_TRUE994:%.*]], label [[COND_FALSE996:%.*]]
+// SIMD-ONLY0:       cond.true994:
+// SIMD-ONLY0-NEXT:    [[TMP412:%.*]] = load i8, ptr [[UCD]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV995:%.*]] = zext i8 [[TMP412]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END998:%.*]]
+// SIMD-ONLY0:       cond.false996:
+// SIMD-ONLY0-NEXT:    [[TMP413:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV997:%.*]] = zext i8 [[TMP413]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END998]]
+// SIMD-ONLY0:       cond.end998:
+// SIMD-ONLY0-NEXT:    [[COND999:%.*]] = phi i32 [ [[CONV995]], [[COND_TRUE994]] ], [ [[CONV997]], [[COND_FALSE996]] ]
+// SIMD-ONLY0-NEXT:    [[CONV1000:%.*]] = trunc i32 [[COND999]] to i8
+// SIMD-ONLY0-NEXT:    store i8 [[CONV1000]], ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP414:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV1001:%.*]] = zext i8 [[TMP414]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP415:%.*]] = load i8, ptr [[UCE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV1002:%.*]] = zext i8 [[TMP415]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP1003:%.*]] = icmp eq i32 [[CONV1001]], [[CONV1002]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP1003]], label [[IF_THEN1005:%.*]], label [[IF_END1006:%.*]]
+// SIMD-ONLY0:       if.then1005:
+// SIMD-ONLY0-NEXT:    [[TMP416:%.*]] = load i8, ptr [[UCD]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP416]], ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    br label [[IF_END1006]]
+// SIMD-ONLY0:       if.end1006:
+// SIMD-ONLY0-NEXT:    [[TMP417:%.*]] = load i8, ptr [[UCE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV1007:%.*]] = zext i8 [[TMP417]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP418:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV1008:%.*]] = zext i8 [[TMP418]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP1009:%.*]] = icmp eq i32 [[CONV1007]], [[CONV1008]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP1009]], label [[IF_THEN1011:%.*]], label [[IF_END1012:%.*]]
+// SIMD-ONLY0:       if.then1011:
+// SIMD-ONLY0-NEXT:    [[TMP419:%.*]] = load i8, ptr [[UCD]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP419]], ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    br label [[IF_END1012]]
+// SIMD-ONLY0:       if.end1012:
+// SIMD-ONLY0-NEXT:    [[TMP420:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV1013:%.*]] = sext i8 [[TMP420]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP421:%.*]] = load i8, ptr [[CE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV1014:%.*]] = sext i8 [[TMP421]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP1015:%.*]] = icmp sgt i32 [[CONV1013]], [[CONV1014]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP1015]], label [[COND_TRUE1017:%.*]], label [[COND_FALSE1019:%.*]]
+// SIMD-ONLY0:       cond.true1017:
+// SIMD-ONLY0-NEXT:    [[TMP422:%.*]] = load i8, ptr [[CE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV1018:%.*]] = sext i8 [[TMP422]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END1021:%.*]]
+// SIMD-ONLY0:       cond.false1019:
+// SIMD-ONLY0-NEXT:    [[TMP423:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV1020:%.*]] = sext i8 [[TMP423]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END1021]]
+// SIMD-ONLY0:       cond.end1021:
+// SIMD-ONLY0-NEXT:    [[COND1022:%.*]] = phi i32 [ [[CONV1018]], [[COND_TRUE1017]] ], [ [[CONV1020]], [[COND_FALSE1019]] ]
+// SIMD-ONLY0-NEXT:    [[CONV1023:%.*]] = trunc i32 [[COND1022]] to i8
+// SIMD-ONLY0-NEXT:    store i8 [[CONV1023]], ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP424:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV1024:%.*]] = sext i8 [[TMP424]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP425:%.*]] = load i8, ptr [[CE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV1025:%.*]] = sext i8 [[TMP425]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP1026:%.*]] = icmp slt i32 [[CONV1024]], [[CONV1025]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP1026]], label [[COND_TRUE1028:%.*]], label [[COND_FALSE1030:%.*]]
+// SIMD-ONLY0:       cond.true1028:
+// SIMD-ONLY0-NEXT:    [[TMP426:%.*]] = load i8, ptr [[CE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV1029:%.*]] = sext i8 [[TMP426]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END1032:%.*]]
+// SIMD-ONLY0:       cond.false1030:
+// SIMD-ONLY0-NEXT:    [[TMP427:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV1031:%.*]] = sext i8 [[TMP427]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END1032]]
+// SIMD-ONLY0:       cond.end1032:
+// SIMD-ONLY0-NEXT:    [[COND1033:%.*]] = phi i32 [ [[CONV1029]], [[COND_TRUE1028]] ], [ [[CONV1031]], [[COND_FALSE1030]] ]
+// SIMD-ONLY0-NEXT:    [[CONV1034:%.*]] = trunc i32 [[COND1033]] to i8
+// SIMD-ONLY0-NEXT:    store i8 [[CONV1034]], ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP428:%.*]] = load i8, ptr [[CE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV1035:%.*]] = sext i8 [[TMP428]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP429:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV1036:%.*]] = sext i8 [[TMP429]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP1037:%.*]] = icmp sgt i32 [[CONV1035]], [[CONV1036]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP1037]], label [[COND_TRUE1039:%.*]], label [[COND_FALSE1041:%.*]]
+// SIMD-ONLY0:       cond.true1039:
+// SIMD-ONLY0-NEXT:    [[TMP430:%.*]] = load i8, ptr [[CE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV1040:%.*]] = sext i8 [[TMP430]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END1043:%.*]]
+// SIMD-ONLY0:       cond.false1041:
+// SIMD-ONLY0-NEXT:    [[TMP431:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV1042:%.*]] = sext i8 [[TMP431]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END1043]]
+// SIMD-ONLY0:       cond.end1043:
+// SIMD-ONLY0-NEXT:    [[COND1044:%.*]] = phi i32 [ [[CONV1040]], [[COND_TRUE1039]] ], [ [[CONV1042]], [[COND_FALSE1041]] ]
+// SIMD-ONLY0-NEXT:    [[CONV1045:%.*]] = trunc i32 [[COND1044]] to i8
+// SIMD-ONLY0-NEXT:    store i8 [[CONV1045]], ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP432:%.*]] = load i8, ptr [[CE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV1046:%.*]] = sext i8 [[TMP432]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP433:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV1047:%.*]] = sext i8 [[TMP433]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP1048:%.*]] = icmp slt i32 [[CONV1046]], [[CONV1047]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP1048]], label [[COND_TRUE1050:%.*]], label [[COND_FALSE1052:%.*]]
+// SIMD-ONLY0:       cond.true1050:
+// SIMD-ONLY0-NEXT:    [[TMP434:%.*]] = load i8, ptr [[CE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV1051:%.*]] = sext i8 [[TMP434]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END1054:%.*]]
+// SIMD-ONLY0:       cond.false1052:
+// SIMD-ONLY0-NEXT:    [[TMP435:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV1053:%.*]] = sext i8 [[TMP435]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END1054]]
+// SIMD-ONLY0:       cond.end1054:
+// SIMD-ONLY0-NEXT:    [[COND1055:%.*]] = phi i32 [ [[CONV1051]], [[COND_TRUE1050]] ], [ [[CONV1053]], [[COND_FALSE1052]] ]
+// SIMD-ONLY0-NEXT:    [[CONV1056:%.*]] = trunc i32 [[COND1055]] to i8
+// SIMD-ONLY0-NEXT:    store i8 [[CONV1056]], ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP436:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV1057:%.*]] = sext i8 [[TMP436]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP437:%.*]] = load i8, ptr [[CE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV1058:%.*]] = sext i8 [[TMP437]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP1059:%.*]] = icmp sgt i32 [[CONV1057]], [[CONV1058]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP1059]], label [[IF_THEN1061:%.*]], label [[IF_END1062:%.*]]
+// SIMD-ONLY0:       if.then1061:
+// SIMD-ONLY0-NEXT:    [[TMP438:%.*]] = load i8, ptr [[CE]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP438]], ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    br label [[IF_END1062]]
+// SIMD-ONLY0:       if.end1062:
+// SIMD-ONLY0-NEXT:    [[TMP439:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV1063:%.*]] = sext i8 [[TMP439]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP440:%.*]] = load i8, ptr [[CE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV1064:%.*]] = sext i8 [[TMP440]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP1065:%.*]] = icmp slt i32 [[CONV1063]], [[CONV1064]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP1065]], label [[IF_THEN1067:%.*]], label [[IF_END1068:%.*]]
+// SIMD-ONLY0:       if.then1067:
+// SIMD-ONLY0-NEXT:    [[TMP441:%.*]] = load i8, ptr [[CE]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP441]], ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    br label [[IF_END1068]]
+// SIMD-ONLY0:       if.end1068:
+// SIMD-ONLY0-NEXT:    [[TMP442:%.*]] = load i8, ptr [[CE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV1069:%.*]] = sext i8 [[TMP442]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP443:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV1070:%.*]] = sext i8 [[TMP443]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP1071:%.*]] = icmp sgt i32 [[CONV1069]], [[CONV1070]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP1071]], label [[IF_THEN1073:%.*]], label [[IF_END1074:%.*]]
+// SIMD-ONLY0:       if.then1073:
+// SIMD-ONLY0-NEXT:    [[TMP444:%.*]] = load i8, ptr [[CE]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP444]], ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    br label [[IF_END1074]]
+// SIMD-ONLY0:       if.end1074:
+// SIMD-ONLY0-NEXT:    [[TMP445:%.*]] = load i8, ptr [[CE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV1075:%.*]] = sext i8 [[TMP445]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP446:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV1076:%.*]] = sext i8 [[TMP446]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP1077:%.*]] = icmp slt i32 [[CONV1075]], [[CONV1076]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP1077]], label [[IF_THEN1079:%.*]], label [[IF_END1080:%.*]]
+// SIMD-ONLY0:       if.then1079:
+// SIMD-ONLY0-NEXT:    [[TMP447:%.*]] = load i8, ptr [[CE]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP447]], ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    br label [[IF_END1080]]
+// SIMD-ONLY0:       if.end1080:
+// SIMD-ONLY0-NEXT:    [[TMP448:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV1081:%.*]] = sext i8 [[TMP448]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP449:%.*]] = load i8, ptr [[CE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV1082:%.*]] = sext i8 [[TMP449]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP1083:%.*]] = icmp eq i32 [[CONV1081]], [[CONV1082]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP1083]], label [[COND_TRUE1085:%.*]], label [[COND_FALSE1087:%.*]]
+// SIMD-ONLY0:       cond.true1085:
+// SIMD-ONLY0-NEXT:    [[TMP450:%.*]] = load i8, ptr [[CD]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV1086:%.*]] = sext i8 [[TMP450]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END1089:%.*]]
+// SIMD-ONLY0:       cond.false1087:
+// SIMD-ONLY0-NEXT:    [[TMP451:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV1088:%.*]] = sext i8 [[TMP451]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END1089]]
+// SIMD-ONLY0:       cond.end1089:
+// SIMD-ONLY0-NEXT:    [[COND1090:%.*]] = phi i32 [ [[CONV1086]], [[COND_TRUE1085]] ], [ [[CONV1088]], [[COND_FALSE1087]] ]
+// SIMD-ONLY0-NEXT:    [[CONV1091:%.*]] = trunc i32 [[COND1090]] to i8
+// SIMD-ONLY0-NEXT:    store i8 [[CONV1091]], ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP452:%.*]] = load i8, ptr [[CE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV1092:%.*]] = sext i8 [[TMP452]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP453:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV1093:%.*]] = sext i8 [[TMP453]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP1094:%.*]] = icmp eq i32 [[CONV1092]], [[CONV1093]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP1094]], label [[COND_TRUE1096:%.*]], label [[COND_FALSE1098:%.*]]
+// SIMD-ONLY0:       cond.true1096:
+// SIMD-ONLY0-NEXT:    [[TMP454:%.*]] = load i8, ptr [[CD]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV1097:%.*]] = sext i8 [[TMP454]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END1100:%.*]]
+// SIMD-ONLY0:       cond.false1098:
+// SIMD-ONLY0-NEXT:    [[TMP455:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV1099:%.*]] = sext i8 [[TMP455]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END1100]]
+// SIMD-ONLY0:       cond.end1100:
+// SIMD-ONLY0-NEXT:    [[COND1101:%.*]] = phi i32 [ [[CONV1097]], [[COND_TRUE1096]] ], [ [[CONV1099]], [[COND_FALSE1098]] ]
+// SIMD-ONLY0-NEXT:    [[CONV1102:%.*]] = trunc i32 [[COND1101]] to i8
+// SIMD-ONLY0-NEXT:    store i8 [[CONV1102]], ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP456:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV1103:%.*]] = sext i8 [[TMP456]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP457:%.*]] = load i8, ptr [[CE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV1104:%.*]] = sext i8 [[TMP457]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP1105:%.*]] = icmp eq i32 [[CONV1103]], [[CONV1104]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP1105]], label [[IF_THEN1107:%.*]], label [[IF_END1108:%.*]]
+// SIMD-ONLY0:       if.then1107:
+// SIMD-ONLY0-NEXT:    [[TMP458:%.*]] = load i8, ptr [[CD]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP458]], ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    br label [[IF_END1108]]
+// SIMD-ONLY0:       if.end1108:
+// SIMD-ONLY0-NEXT:    [[TMP459:%.*]] = load i8, ptr [[CE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV1109:%.*]] = sext i8 [[TMP459]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP460:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV1110:%.*]] = sext i8 [[TMP460]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP1111:%.*]] = icmp eq i32 [[CONV1109]], [[CONV1110]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP1111]], label [[IF_THEN1113:%.*]], label [[IF_END1114:%.*]]
+// SIMD-ONLY0:       if.then1113:
+// SIMD-ONLY0-NEXT:    [[TMP461:%.*]] = load i8, ptr [[CD]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP461]], ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    br label [[IF_END1114]]
+// SIMD-ONLY0:       if.end1114:
+// SIMD-ONLY0-NEXT:    [[TMP462:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV1115:%.*]] = zext i8 [[TMP462]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP463:%.*]] = load i8, ptr [[UCE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV1116:%.*]] = zext i8 [[TMP463]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP1117:%.*]] = icmp sgt i32 [[CONV1115]], [[CONV1116]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP1117]], label [[COND_TRUE1119:%.*]], label [[COND_FALSE1121:%.*]]
+// SIMD-ONLY0:       cond.true1119:
+// SIMD-ONLY0-NEXT:    [[TMP464:%.*]] = load i8, ptr [[UCE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV1120:%.*]] = zext i8 [[TMP464]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END1123:%.*]]
+// SIMD-ONLY0:       cond.false1121:
+// SIMD-ONLY0-NEXT:    [[TMP465:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV1122:%.*]] = zext i8 [[TMP465]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END1123]]
+// SIMD-ONLY0:       cond.end1123:
+// SIMD-ONLY0-NEXT:    [[COND1124:%.*]] = phi i32 [ [[CONV1120]], [[COND_TRUE1119]] ], [ [[CONV1122]], [[COND_FALSE1121]] ]
+// SIMD-ONLY0-NEXT:    [[CONV1125:%.*]] = trunc i32 [[COND1124]] to i8
+// SIMD-ONLY0-NEXT:    store i8 [[CONV1125]], ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP466:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV1126:%.*]] = zext i8 [[TMP466]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP467:%.*]] = load i8, ptr [[UCE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV1127:%.*]] = zext i8 [[TMP467]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP1128:%.*]] = icmp slt i32 [[CONV1126]], [[CONV1127]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP1128]], label [[COND_TRUE1130:%.*]], label [[COND_FALSE1132:%.*]]
+// SIMD-ONLY0:       cond.true1130:
+// SIMD-ONLY0-NEXT:    [[TMP468:%.*]] = load i8, ptr [[UCE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV1131:%.*]] = zext i8 [[TMP468]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END1134:%.*]]
+// SIMD-ONLY0:       cond.false1132:
+// SIMD-ONLY0-NEXT:    [[TMP469:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV1133:%.*]] = zext i8 [[TMP469]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END1134]]
+// SIMD-ONLY0:       cond.end1134:
+// SIMD-ONLY0-NEXT:    [[COND1135:%.*]] = phi i32 [ [[CONV1131]], [[COND_TRUE1130]] ], [ [[CONV1133]], [[COND_FALSE1132]] ]
+// SIMD-ONLY0-NEXT:    [[CONV1136:%.*]] = trunc i32 [[COND1135]] to i8
+// SIMD-ONLY0-NEXT:    store i8 [[CONV1136]], ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP470:%.*]] = load i8, ptr [[UCE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV1137:%.*]] = zext i8 [[TMP470]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP471:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV1138:%.*]] = zext i8 [[TMP471]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP1139:%.*]] = icmp sgt i32 [[CONV1137]], [[CONV1138]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP1139]], label [[COND_TRUE1141:%.*]], label [[COND_FALSE1143:%.*]]
+// SIMD-ONLY0:       cond.true1141:
+// SIMD-ONLY0-NEXT:    [[TMP472:%.*]] = load i8, ptr [[UCE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV1142:%.*]] = zext i8 [[TMP472]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END1145:%.*]]
+// SIMD-ONLY0:       cond.false1143:
+// SIMD-ONLY0-NEXT:    [[TMP473:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV1144:%.*]] = zext i8 [[TMP473]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END1145]]
+// SIMD-ONLY0:       cond.end1145:
+// SIMD-ONLY0-NEXT:    [[COND1146:%.*]] = phi i32 [ [[CONV1142]], [[COND_TRUE1141]] ], [ [[CONV1144]], [[COND_FALSE1143]] ]
+// SIMD-ONLY0-NEXT:    [[CONV1147:%.*]] = trunc i32 [[COND1146]] to i8
+// SIMD-ONLY0-NEXT:    store i8 [[CONV1147]], ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP474:%.*]] = load i8, ptr [[UCE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV1148:%.*]] = zext i8 [[TMP474]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP475:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV1149:%.*]] = zext i8 [[TMP475]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP1150:%.*]] = icmp slt i32 [[CONV1148]], [[CONV1149]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP1150]], label [[COND_TRUE1152:%.*]], label [[COND_FALSE1154:%.*]]
+// SIMD-ONLY0:       cond.true1152:
+// SIMD-ONLY0-NEXT:    [[TMP476:%.*]] = load i8, ptr [[UCE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV1153:%.*]] = zext i8 [[TMP476]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END1156:%.*]]
+// SIMD-ONLY0:       cond.false1154:
+// SIMD-ONLY0-NEXT:    [[TMP477:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV1155:%.*]] = zext i8 [[TMP477]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END1156]]
+// SIMD-ONLY0:       cond.end1156:
+// SIMD-ONLY0-NEXT:    [[COND1157:%.*]] = phi i32 [ [[CONV1153]], [[COND_TRUE1152]] ], [ [[CONV1155]], [[COND_FALSE1154]] ]
+// SIMD-ONLY0-NEXT:    [[CONV1158:%.*]] = trunc i32 [[COND1157]] to i8
+// SIMD-ONLY0-NEXT:    store i8 [[CONV1158]], ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP478:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV1159:%.*]] = zext i8 [[TMP478]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP479:%.*]] = load i8, ptr [[UCE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV1160:%.*]] = zext i8 [[TMP479]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP1161:%.*]] = icmp sgt i32 [[CONV1159]], [[CONV1160]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP1161]], label [[IF_THEN1163:%.*]], label [[IF_END1164:%.*]]
+// SIMD-ONLY0:       if.then1163:
+// SIMD-ONLY0-NEXT:    [[TMP480:%.*]] = load i8, ptr [[UCE]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP480]], ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    br label [[IF_END1164]]
+// SIMD-ONLY0:       if.end1164:
+// SIMD-ONLY0-NEXT:    [[TMP481:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV1165:%.*]] = zext i8 [[TMP481]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP482:%.*]] = load i8, ptr [[UCE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV1166:%.*]] = zext i8 [[TMP482]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP1167:%.*]] = icmp slt i32 [[CONV1165]], [[CONV1166]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP1167]], label [[IF_THEN1169:%.*]], label [[IF_END1170:%.*]]
+// SIMD-ONLY0:       if.then1169:
+// SIMD-ONLY0-NEXT:    [[TMP483:%.*]] = load i8, ptr [[UCE]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP483]], ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    br label [[IF_END1170]]
+// SIMD-ONLY0:       if.end1170:
+// SIMD-ONLY0-NEXT:    [[TMP484:%.*]] = load i8, ptr [[UCE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV1171:%.*]] = zext i8 [[TMP484]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP485:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV1172:%.*]] = zext i8 [[TMP485]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP1173:%.*]] = icmp sgt i32 [[CONV1171]], [[CONV1172]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP1173]], label [[IF_THEN1175:%.*]], label [[IF_END1176:%.*]]
+// SIMD-ONLY0:       if.then1175:
+// SIMD-ONLY0-NEXT:    [[TMP486:%.*]] = load i8, ptr [[UCE]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP486]], ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    br label [[IF_END1176]]
+// SIMD-ONLY0:       if.end1176:
+// SIMD-ONLY0-NEXT:    [[TMP487:%.*]] = load i8, ptr [[UCE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV1177:%.*]] = zext i8 [[TMP487]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP488:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV1178:%.*]] = zext i8 [[TMP488]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP1179:%.*]] = icmp slt i32 [[CONV1177]], [[CONV1178]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP1179]], label [[IF_THEN1181:%.*]], label [[IF_END1182:%.*]]
+// SIMD-ONLY0:       if.then1181:
+// SIMD-ONLY0-NEXT:    [[TMP489:%.*]] = load i8, ptr [[UCE]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP489]], ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    br label [[IF_END1182]]
+// SIMD-ONLY0:       if.end1182:
+// SIMD-ONLY0-NEXT:    [[TMP490:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV1183:%.*]] = zext i8 [[TMP490]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP491:%.*]] = load i8, ptr [[UCE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV1184:%.*]] = zext i8 [[TMP491]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP1185:%.*]] = icmp eq i32 [[CONV1183]], [[CONV1184]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP1185]], label [[COND_TRUE1187:%.*]], label [[COND_FALSE1189:%.*]]
+// SIMD-ONLY0:       cond.true1187:
+// SIMD-ONLY0-NEXT:    [[TMP492:%.*]] = load i8, ptr [[UCD]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV1188:%.*]] = zext i8 [[TMP492]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END1191:%.*]]
+// SIMD-ONLY0:       cond.false1189:
+// SIMD-ONLY0-NEXT:    [[TMP493:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV1190:%.*]] = zext i8 [[TMP493]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END1191]]
+// SIMD-ONLY0:       cond.end1191:
+// SIMD-ONLY0-NEXT:    [[COND1192:%.*]] = phi i32 [ [[CONV1188]], [[COND_TRUE1187]] ], [ [[CONV1190]], [[COND_FALSE1189]] ]
+// SIMD-ONLY0-NEXT:    [[CONV1193:%.*]] = trunc i32 [[COND1192]] to i8
+// SIMD-ONLY0-NEXT:    store i8 [[CONV1193]], ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP494:%.*]] = load i8, ptr [[UCE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV1194:%.*]] = zext i8 [[TMP494]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP495:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV1195:%.*]] = zext i8 [[TMP495]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP1196:%.*]] = icmp eq i32 [[CONV1194]], [[CONV1195]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP1196]], label [[COND_TRUE1198:%.*]], label [[COND_FALSE1200:%.*]]
+// SIMD-ONLY0:       cond.true1198:
+// SIMD-ONLY0-NEXT:    [[TMP496:%.*]] = load i8, ptr [[UCD]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV1199:%.*]] = zext i8 [[TMP496]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END1202:%.*]]
+// SIMD-ONLY0:       cond.false1200:
+// SIMD-ONLY0-NEXT:    [[TMP497:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV1201:%.*]] = zext i8 [[TMP497]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END1202]]
+// SIMD-ONLY0:       cond.end1202:
+// SIMD-ONLY0-NEXT:    [[COND1203:%.*]] = phi i32 [ [[CONV1199]], [[COND_TRUE1198]] ], [ [[CONV1201]], [[COND_FALSE1200]] ]
+// SIMD-ONLY0-NEXT:    [[CONV1204:%.*]] = trunc i32 [[COND1203]] to i8
+// SIMD-ONLY0-NEXT:    store i8 [[CONV1204]], ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP498:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV1205:%.*]] = zext i8 [[TMP498]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP499:%.*]] = load i8, ptr [[UCE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV1206:%.*]] = zext i8 [[TMP499]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP1207:%.*]] = icmp eq i32 [[CONV1205]], [[CONV1206]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP1207]], label [[IF_THEN1209:%.*]], label [[IF_END1210:%.*]]
+// SIMD-ONLY0:       if.then1209:
+// SIMD-ONLY0-NEXT:    [[TMP500:%.*]] = load i8, ptr [[UCD]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP500]], ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    br label [[IF_END1210]]
+// SIMD-ONLY0:       if.end1210:
+// SIMD-ONLY0-NEXT:    [[TMP501:%.*]] = load i8, ptr [[UCE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV1211:%.*]] = zext i8 [[TMP501]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP502:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV1212:%.*]] = zext i8 [[TMP502]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP1213:%.*]] = icmp eq i32 [[CONV1211]], [[CONV1212]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP1213]], label [[IF_THEN1215:%.*]], label [[IF_END1216:%.*]]
+// SIMD-ONLY0:       if.then1215:
+// SIMD-ONLY0-NEXT:    [[TMP503:%.*]] = load i8, ptr [[UCD]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP503]], ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    br label [[IF_END1216]]
+// SIMD-ONLY0:       if.end1216:
+// SIMD-ONLY0-NEXT:    [[TMP504:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1217:%.*]] = sext i16 [[TMP504]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP505:%.*]] = load i16, ptr [[SE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1218:%.*]] = sext i16 [[TMP505]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP1219:%.*]] = icmp sgt i32 [[CONV1217]], [[CONV1218]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP1219]], label [[COND_TRUE1221:%.*]], label [[COND_FALSE1223:%.*]]
+// SIMD-ONLY0:       cond.true1221:
+// SIMD-ONLY0-NEXT:    [[TMP506:%.*]] = load i16, ptr [[SE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1222:%.*]] = sext i16 [[TMP506]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END1225:%.*]]
+// SIMD-ONLY0:       cond.false1223:
+// SIMD-ONLY0-NEXT:    [[TMP507:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1224:%.*]] = sext i16 [[TMP507]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END1225]]
+// SIMD-ONLY0:       cond.end1225:
+// SIMD-ONLY0-NEXT:    [[COND1226:%.*]] = phi i32 [ [[CONV1222]], [[COND_TRUE1221]] ], [ [[CONV1224]], [[COND_FALSE1223]] ]
+// SIMD-ONLY0-NEXT:    [[CONV1227:%.*]] = trunc i32 [[COND1226]] to i16
+// SIMD-ONLY0-NEXT:    store i16 [[CONV1227]], ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP508:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1228:%.*]] = sext i16 [[TMP508]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP509:%.*]] = load i16, ptr [[SE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1229:%.*]] = sext i16 [[TMP509]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP1230:%.*]] = icmp slt i32 [[CONV1228]], [[CONV1229]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP1230]], label [[COND_TRUE1232:%.*]], label [[COND_FALSE1234:%.*]]
+// SIMD-ONLY0:       cond.true1232:
+// SIMD-ONLY0-NEXT:    [[TMP510:%.*]] = load i16, ptr [[SE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1233:%.*]] = sext i16 [[TMP510]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END1236:%.*]]
+// SIMD-ONLY0:       cond.false1234:
+// SIMD-ONLY0-NEXT:    [[TMP511:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1235:%.*]] = sext i16 [[TMP511]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END1236]]
+// SIMD-ONLY0:       cond.end1236:
+// SIMD-ONLY0-NEXT:    [[COND1237:%.*]] = phi i32 [ [[CONV1233]], [[COND_TRUE1232]] ], [ [[CONV1235]], [[COND_FALSE1234]] ]
+// SIMD-ONLY0-NEXT:    [[CONV1238:%.*]] = trunc i32 [[COND1237]] to i16
+// SIMD-ONLY0-NEXT:    store i16 [[CONV1238]], ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP512:%.*]] = load i16, ptr [[SE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1239:%.*]] = sext i16 [[TMP512]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP513:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1240:%.*]] = sext i16 [[TMP513]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP1241:%.*]] = icmp sgt i32 [[CONV1239]], [[CONV1240]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP1241]], label [[COND_TRUE1243:%.*]], label [[COND_FALSE1245:%.*]]
+// SIMD-ONLY0:       cond.true1243:
+// SIMD-ONLY0-NEXT:    [[TMP514:%.*]] = load i16, ptr [[SE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1244:%.*]] = sext i16 [[TMP514]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END1247:%.*]]
+// SIMD-ONLY0:       cond.false1245:
+// SIMD-ONLY0-NEXT:    [[TMP515:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1246:%.*]] = sext i16 [[TMP515]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END1247]]
+// SIMD-ONLY0:       cond.end1247:
+// SIMD-ONLY0-NEXT:    [[COND1248:%.*]] = phi i32 [ [[CONV1244]], [[COND_TRUE1243]] ], [ [[CONV1246]], [[COND_FALSE1245]] ]
+// SIMD-ONLY0-NEXT:    [[CONV1249:%.*]] = trunc i32 [[COND1248]] to i16
+// SIMD-ONLY0-NEXT:    store i16 [[CONV1249]], ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP516:%.*]] = load i16, ptr [[SE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1250:%.*]] = sext i16 [[TMP516]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP517:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1251:%.*]] = sext i16 [[TMP517]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP1252:%.*]] = icmp slt i32 [[CONV1250]], [[CONV1251]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP1252]], label [[COND_TRUE1254:%.*]], label [[COND_FALSE1256:%.*]]
+// SIMD-ONLY0:       cond.true1254:
+// SIMD-ONLY0-NEXT:    [[TMP518:%.*]] = load i16, ptr [[SE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1255:%.*]] = sext i16 [[TMP518]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END1258:%.*]]
+// SIMD-ONLY0:       cond.false1256:
+// SIMD-ONLY0-NEXT:    [[TMP519:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1257:%.*]] = sext i16 [[TMP519]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END1258]]
+// SIMD-ONLY0:       cond.end1258:
+// SIMD-ONLY0-NEXT:    [[COND1259:%.*]] = phi i32 [ [[CONV1255]], [[COND_TRUE1254]] ], [ [[CONV1257]], [[COND_FALSE1256]] ]
+// SIMD-ONLY0-NEXT:    [[CONV1260:%.*]] = trunc i32 [[COND1259]] to i16
+// SIMD-ONLY0-NEXT:    store i16 [[CONV1260]], ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP520:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1261:%.*]] = sext i16 [[TMP520]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP521:%.*]] = load i16, ptr [[SE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1262:%.*]] = sext i16 [[TMP521]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP1263:%.*]] = icmp sgt i32 [[CONV1261]], [[CONV1262]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP1263]], label [[IF_THEN1265:%.*]], label [[IF_END1266:%.*]]
+// SIMD-ONLY0:       if.then1265:
+// SIMD-ONLY0-NEXT:    [[TMP522:%.*]] = load i16, ptr [[SE]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP522]], ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    br label [[IF_END1266]]
+// SIMD-ONLY0:       if.end1266:
+// SIMD-ONLY0-NEXT:    [[TMP523:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1267:%.*]] = sext i16 [[TMP523]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP524:%.*]] = load i16, ptr [[SE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1268:%.*]] = sext i16 [[TMP524]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP1269:%.*]] = icmp slt i32 [[CONV1267]], [[CONV1268]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP1269]], label [[IF_THEN1271:%.*]], label [[IF_END1272:%.*]]
+// SIMD-ONLY0:       if.then1271:
+// SIMD-ONLY0-NEXT:    [[TMP525:%.*]] = load i16, ptr [[SE]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP525]], ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    br label [[IF_END1272]]
+// SIMD-ONLY0:       if.end1272:
+// SIMD-ONLY0-NEXT:    [[TMP526:%.*]] = load i16, ptr [[SE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1273:%.*]] = sext i16 [[TMP526]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP527:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1274:%.*]] = sext i16 [[TMP527]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP1275:%.*]] = icmp sgt i32 [[CONV1273]], [[CONV1274]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP1275]], label [[IF_THEN1277:%.*]], label [[IF_END1278:%.*]]
+// SIMD-ONLY0:       if.then1277:
+// SIMD-ONLY0-NEXT:    [[TMP528:%.*]] = load i16, ptr [[SE]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP528]], ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    br label [[IF_END1278]]
+// SIMD-ONLY0:       if.end1278:
+// SIMD-ONLY0-NEXT:    [[TMP529:%.*]] = load i16, ptr [[SE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1279:%.*]] = sext i16 [[TMP529]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP530:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1280:%.*]] = sext i16 [[TMP530]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP1281:%.*]] = icmp slt i32 [[CONV1279]], [[CONV1280]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP1281]], label [[IF_THEN1283:%.*]], label [[IF_END1284:%.*]]
+// SIMD-ONLY0:       if.then1283:
+// SIMD-ONLY0-NEXT:    [[TMP531:%.*]] = load i16, ptr [[SE]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP531]], ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    br label [[IF_END1284]]
+// SIMD-ONLY0:       if.end1284:
+// SIMD-ONLY0-NEXT:    [[TMP532:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1285:%.*]] = sext i16 [[TMP532]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP533:%.*]] = load i16, ptr [[SE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1286:%.*]] = sext i16 [[TMP533]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP1287:%.*]] = icmp eq i32 [[CONV1285]], [[CONV1286]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP1287]], label [[COND_TRUE1289:%.*]], label [[COND_FALSE1291:%.*]]
+// SIMD-ONLY0:       cond.true1289:
+// SIMD-ONLY0-NEXT:    [[TMP534:%.*]] = load i16, ptr [[SD]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1290:%.*]] = sext i16 [[TMP534]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END1293:%.*]]
+// SIMD-ONLY0:       cond.false1291:
+// SIMD-ONLY0-NEXT:    [[TMP535:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1292:%.*]] = sext i16 [[TMP535]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END1293]]
+// SIMD-ONLY0:       cond.end1293:
+// SIMD-ONLY0-NEXT:    [[COND1294:%.*]] = phi i32 [ [[CONV1290]], [[COND_TRUE1289]] ], [ [[CONV1292]], [[COND_FALSE1291]] ]
+// SIMD-ONLY0-NEXT:    [[CONV1295:%.*]] = trunc i32 [[COND1294]] to i16
+// SIMD-ONLY0-NEXT:    store i16 [[CONV1295]], ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP536:%.*]] = load i16, ptr [[SE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1296:%.*]] = sext i16 [[TMP536]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP537:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1297:%.*]] = sext i16 [[TMP537]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP1298:%.*]] = icmp eq i32 [[CONV1296]], [[CONV1297]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP1298]], label [[COND_TRUE1300:%.*]], label [[COND_FALSE1302:%.*]]
+// SIMD-ONLY0:       cond.true1300:
+// SIMD-ONLY0-NEXT:    [[TMP538:%.*]] = load i16, ptr [[SD]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1301:%.*]] = sext i16 [[TMP538]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END1304:%.*]]
+// SIMD-ONLY0:       cond.false1302:
+// SIMD-ONLY0-NEXT:    [[TMP539:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1303:%.*]] = sext i16 [[TMP539]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END1304]]
+// SIMD-ONLY0:       cond.end1304:
+// SIMD-ONLY0-NEXT:    [[COND1305:%.*]] = phi i32 [ [[CONV1301]], [[COND_TRUE1300]] ], [ [[CONV1303]], [[COND_FALSE1302]] ]
+// SIMD-ONLY0-NEXT:    [[CONV1306:%.*]] = trunc i32 [[COND1305]] to i16
+// SIMD-ONLY0-NEXT:    store i16 [[CONV1306]], ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP540:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1307:%.*]] = sext i16 [[TMP540]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP541:%.*]] = load i16, ptr [[SE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1308:%.*]] = sext i16 [[TMP541]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP1309:%.*]] = icmp eq i32 [[CONV1307]], [[CONV1308]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP1309]], label [[IF_THEN1311:%.*]], label [[IF_END1312:%.*]]
+// SIMD-ONLY0:       if.then1311:
+// SIMD-ONLY0-NEXT:    [[TMP542:%.*]] = load i16, ptr [[SD]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP542]], ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    br label [[IF_END1312]]
+// SIMD-ONLY0:       if.end1312:
+// SIMD-ONLY0-NEXT:    [[TMP543:%.*]] = load i16, ptr [[SE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1313:%.*]] = sext i16 [[TMP543]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP544:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1314:%.*]] = sext i16 [[TMP544]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP1315:%.*]] = icmp eq i32 [[CONV1313]], [[CONV1314]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP1315]], label [[IF_THEN1317:%.*]], label [[IF_END1318:%.*]]
+// SIMD-ONLY0:       if.then1317:
+// SIMD-ONLY0-NEXT:    [[TMP545:%.*]] = load i16, ptr [[SD]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP545]], ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    br label [[IF_END1318]]
+// SIMD-ONLY0:       if.end1318:
+// SIMD-ONLY0-NEXT:    [[TMP546:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1319:%.*]] = zext i16 [[TMP546]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP547:%.*]] = load i16, ptr [[USE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1320:%.*]] = zext i16 [[TMP547]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP1321:%.*]] = icmp sgt i32 [[CONV1319]], [[CONV1320]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP1321]], label [[COND_TRUE1323:%.*]], label [[COND_FALSE1325:%.*]]
+// SIMD-ONLY0:       cond.true1323:
+// SIMD-ONLY0-NEXT:    [[TMP548:%.*]] = load i16, ptr [[USE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1324:%.*]] = zext i16 [[TMP548]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END1327:%.*]]
+// SIMD-ONLY0:       cond.false1325:
+// SIMD-ONLY0-NEXT:    [[TMP549:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1326:%.*]] = zext i16 [[TMP549]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END1327]]
+// SIMD-ONLY0:       cond.end1327:
+// SIMD-ONLY0-NEXT:    [[COND1328:%.*]] = phi i32 [ [[CONV1324]], [[COND_TRUE1323]] ], [ [[CONV1326]], [[COND_FALSE1325]] ]
+// SIMD-ONLY0-NEXT:    [[CONV1329:%.*]] = trunc i32 [[COND1328]] to i16
+// SIMD-ONLY0-NEXT:    store i16 [[CONV1329]], ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP550:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1330:%.*]] = zext i16 [[TMP550]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP551:%.*]] = load i16, ptr [[USE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1331:%.*]] = zext i16 [[TMP551]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP1332:%.*]] = icmp slt i32 [[CONV1330]], [[CONV1331]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP1332]], label [[COND_TRUE1334:%.*]], label [[COND_FALSE1336:%.*]]
+// SIMD-ONLY0:       cond.true1334:
+// SIMD-ONLY0-NEXT:    [[TMP552:%.*]] = load i16, ptr [[USE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1335:%.*]] = zext i16 [[TMP552]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END1338:%.*]]
+// SIMD-ONLY0:       cond.false1336:
+// SIMD-ONLY0-NEXT:    [[TMP553:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1337:%.*]] = zext i16 [[TMP553]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END1338]]
+// SIMD-ONLY0:       cond.end1338:
+// SIMD-ONLY0-NEXT:    [[COND1339:%.*]] = phi i32 [ [[CONV1335]], [[COND_TRUE1334]] ], [ [[CONV1337]], [[COND_FALSE1336]] ]
+// SIMD-ONLY0-NEXT:    [[CONV1340:%.*]] = trunc i32 [[COND1339]] to i16
+// SIMD-ONLY0-NEXT:    store i16 [[CONV1340]], ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP554:%.*]] = load i16, ptr [[USE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1341:%.*]] = zext i16 [[TMP554]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP555:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1342:%.*]] = zext i16 [[TMP555]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP1343:%.*]] = icmp sgt i32 [[CONV1341]], [[CONV1342]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP1343]], label [[COND_TRUE1345:%.*]], label [[COND_FALSE1347:%.*]]
+// SIMD-ONLY0:       cond.true1345:
+// SIMD-ONLY0-NEXT:    [[TMP556:%.*]] = load i16, ptr [[USE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1346:%.*]] = zext i16 [[TMP556]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END1349:%.*]]
+// SIMD-ONLY0:       cond.false1347:
+// SIMD-ONLY0-NEXT:    [[TMP557:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1348:%.*]] = zext i16 [[TMP557]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END1349]]
+// SIMD-ONLY0:       cond.end1349:
+// SIMD-ONLY0-NEXT:    [[COND1350:%.*]] = phi i32 [ [[CONV1346]], [[COND_TRUE1345]] ], [ [[CONV1348]], [[COND_FALSE1347]] ]
+// SIMD-ONLY0-NEXT:    [[CONV1351:%.*]] = trunc i32 [[COND1350]] to i16
+// SIMD-ONLY0-NEXT:    store i16 [[CONV1351]], ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP558:%.*]] = load i16, ptr [[USE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1352:%.*]] = zext i16 [[TMP558]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP559:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1353:%.*]] = zext i16 [[TMP559]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP1354:%.*]] = icmp slt i32 [[CONV1352]], [[CONV1353]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP1354]], label [[COND_TRUE1356:%.*]], label [[COND_FALSE1358:%.*]]
+// SIMD-ONLY0:       cond.true1356:
+// SIMD-ONLY0-NEXT:    [[TMP560:%.*]] = load i16, ptr [[USE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1357:%.*]] = zext i16 [[TMP560]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END1360:%.*]]
+// SIMD-ONLY0:       cond.false1358:
+// SIMD-ONLY0-NEXT:    [[TMP561:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1359:%.*]] = zext i16 [[TMP561]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END1360]]
+// SIMD-ONLY0:       cond.end1360:
+// SIMD-ONLY0-NEXT:    [[COND1361:%.*]] = phi i32 [ [[CONV1357]], [[COND_TRUE1356]] ], [ [[CONV1359]], [[COND_FALSE1358]] ]
+// SIMD-ONLY0-NEXT:    [[CONV1362:%.*]] = trunc i32 [[COND1361]] to i16
+// SIMD-ONLY0-NEXT:    store i16 [[CONV1362]], ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP562:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1363:%.*]] = zext i16 [[TMP562]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP563:%.*]] = load i16, ptr [[USE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1364:%.*]] = zext i16 [[TMP563]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP1365:%.*]] = icmp sgt i32 [[CONV1363]], [[CONV1364]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP1365]], label [[IF_THEN1367:%.*]], label [[IF_END1368:%.*]]
+// SIMD-ONLY0:       if.then1367:
+// SIMD-ONLY0-NEXT:    [[TMP564:%.*]] = load i16, ptr [[USE]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP564]], ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    br label [[IF_END1368]]
+// SIMD-ONLY0:       if.end1368:
+// SIMD-ONLY0-NEXT:    [[TMP565:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1369:%.*]] = zext i16 [[TMP565]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP566:%.*]] = load i16, ptr [[USE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1370:%.*]] = zext i16 [[TMP566]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP1371:%.*]] = icmp slt i32 [[CONV1369]], [[CONV1370]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP1371]], label [[IF_THEN1373:%.*]], label [[IF_END1374:%.*]]
+// SIMD-ONLY0:       if.then1373:
+// SIMD-ONLY0-NEXT:    [[TMP567:%.*]] = load i16, ptr [[USE]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP567]], ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    br label [[IF_END1374]]
+// SIMD-ONLY0:       if.end1374:
+// SIMD-ONLY0-NEXT:    [[TMP568:%.*]] = load i16, ptr [[USE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1375:%.*]] = zext i16 [[TMP568]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP569:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1376:%.*]] = zext i16 [[TMP569]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP1377:%.*]] = icmp sgt i32 [[CONV1375]], [[CONV1376]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP1377]], label [[IF_THEN1379:%.*]], label [[IF_END1380:%.*]]
+// SIMD-ONLY0:       if.then1379:
+// SIMD-ONLY0-NEXT:    [[TMP570:%.*]] = load i16, ptr [[USE]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP570]], ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    br label [[IF_END1380]]
+// SIMD-ONLY0:       if.end1380:
+// SIMD-ONLY0-NEXT:    [[TMP571:%.*]] = load i16, ptr [[USE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1381:%.*]] = zext i16 [[TMP571]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP572:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1382:%.*]] = zext i16 [[TMP572]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP1383:%.*]] = icmp slt i32 [[CONV1381]], [[CONV1382]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP1383]], label [[IF_THEN1385:%.*]], label [[IF_END1386:%.*]]
+// SIMD-ONLY0:       if.then1385:
+// SIMD-ONLY0-NEXT:    [[TMP573:%.*]] = load i16, ptr [[USE]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP573]], ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    br label [[IF_END1386]]
+// SIMD-ONLY0:       if.end1386:
+// SIMD-ONLY0-NEXT:    [[TMP574:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1387:%.*]] = zext i16 [[TMP574]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP575:%.*]] = load i16, ptr [[USE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1388:%.*]] = zext i16 [[TMP575]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP1389:%.*]] = icmp eq i32 [[CONV1387]], [[CONV1388]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP1389]], label [[COND_TRUE1391:%.*]], label [[COND_FALSE1393:%.*]]
+// SIMD-ONLY0:       cond.true1391:
+// SIMD-ONLY0-NEXT:    [[TMP576:%.*]] = load i16, ptr [[USD]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1392:%.*]] = zext i16 [[TMP576]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END1395:%.*]]
+// SIMD-ONLY0:       cond.false1393:
+// SIMD-ONLY0-NEXT:    [[TMP577:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1394:%.*]] = zext i16 [[TMP577]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END1395]]
+// SIMD-ONLY0:       cond.end1395:
+// SIMD-ONLY0-NEXT:    [[COND1396:%.*]] = phi i32 [ [[CONV1392]], [[COND_TRUE1391]] ], [ [[CONV1394]], [[COND_FALSE1393]] ]
+// SIMD-ONLY0-NEXT:    [[CONV1397:%.*]] = trunc i32 [[COND1396]] to i16
+// SIMD-ONLY0-NEXT:    store i16 [[CONV1397]], ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP578:%.*]] = load i16, ptr [[USE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1398:%.*]] = zext i16 [[TMP578]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP579:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1399:%.*]] = zext i16 [[TMP579]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP1400:%.*]] = icmp eq i32 [[CONV1398]], [[CONV1399]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP1400]], label [[COND_TRUE1402:%.*]], label [[COND_FALSE1404:%.*]]
+// SIMD-ONLY0:       cond.true1402:
+// SIMD-ONLY0-NEXT:    [[TMP580:%.*]] = load i16, ptr [[USD]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1403:%.*]] = zext i16 [[TMP580]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END1406:%.*]]
+// SIMD-ONLY0:       cond.false1404:
+// SIMD-ONLY0-NEXT:    [[TMP581:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1405:%.*]] = zext i16 [[TMP581]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END1406]]
+// SIMD-ONLY0:       cond.end1406:
+// SIMD-ONLY0-NEXT:    [[COND1407:%.*]] = phi i32 [ [[CONV1403]], [[COND_TRUE1402]] ], [ [[CONV1405]], [[COND_FALSE1404]] ]
+// SIMD-ONLY0-NEXT:    [[CONV1408:%.*]] = trunc i32 [[COND1407]] to i16
+// SIMD-ONLY0-NEXT:    store i16 [[CONV1408]], ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP582:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1409:%.*]] = zext i16 [[TMP582]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP583:%.*]] = load i16, ptr [[USE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1410:%.*]] = zext i16 [[TMP583]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP1411:%.*]] = icmp eq i32 [[CONV1409]], [[CONV1410]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP1411]], label [[IF_THEN1413:%.*]], label [[IF_END1414:%.*]]
+// SIMD-ONLY0:       if.then1413:
+// SIMD-ONLY0-NEXT:    [[TMP584:%.*]] = load i16, ptr [[USD]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP584]], ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    br label [[IF_END1414]]
+// SIMD-ONLY0:       if.end1414:
+// SIMD-ONLY0-NEXT:    [[TMP585:%.*]] = load i16, ptr [[USE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1415:%.*]] = zext i16 [[TMP585]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP586:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1416:%.*]] = zext i16 [[TMP586]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP1417:%.*]] = icmp eq i32 [[CONV1415]], [[CONV1416]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP1417]], label [[IF_THEN1419:%.*]], label [[IF_END1420:%.*]]
+// SIMD-ONLY0:       if.then1419:
+// SIMD-ONLY0-NEXT:    [[TMP587:%.*]] = load i16, ptr [[USD]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP587]], ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    br label [[IF_END1420]]
+// SIMD-ONLY0:       if.end1420:
+// SIMD-ONLY0-NEXT:    [[TMP588:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1421:%.*]] = sext i16 [[TMP588]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP589:%.*]] = load i16, ptr [[SE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1422:%.*]] = sext i16 [[TMP589]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP1423:%.*]] = icmp sgt i32 [[CONV1421]], [[CONV1422]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP1423]], label [[COND_TRUE1425:%.*]], label [[COND_FALSE1427:%.*]]
+// SIMD-ONLY0:       cond.true1425:
+// SIMD-ONLY0-NEXT:    [[TMP590:%.*]] = load i16, ptr [[SE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1426:%.*]] = sext i16 [[TMP590]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END1429:%.*]]
+// SIMD-ONLY0:       cond.false1427:
+// SIMD-ONLY0-NEXT:    [[TMP591:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1428:%.*]] = sext i16 [[TMP591]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END1429]]
+// SIMD-ONLY0:       cond.end1429:
+// SIMD-ONLY0-NEXT:    [[COND1430:%.*]] = phi i32 [ [[CONV1426]], [[COND_TRUE1425]] ], [ [[CONV1428]], [[COND_FALSE1427]] ]
+// SIMD-ONLY0-NEXT:    [[CONV1431:%.*]] = trunc i32 [[COND1430]] to i16
+// SIMD-ONLY0-NEXT:    store i16 [[CONV1431]], ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP592:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1432:%.*]] = sext i16 [[TMP592]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP593:%.*]] = load i16, ptr [[SE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1433:%.*]] = sext i16 [[TMP593]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP1434:%.*]] = icmp slt i32 [[CONV1432]], [[CONV1433]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP1434]], label [[COND_TRUE1436:%.*]], label [[COND_FALSE1438:%.*]]
+// SIMD-ONLY0:       cond.true1436:
+// SIMD-ONLY0-NEXT:    [[TMP594:%.*]] = load i16, ptr [[SE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1437:%.*]] = sext i16 [[TMP594]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END1440:%.*]]
+// SIMD-ONLY0:       cond.false1438:
+// SIMD-ONLY0-NEXT:    [[TMP595:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1439:%.*]] = sext i16 [[TMP595]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END1440]]
+// SIMD-ONLY0:       cond.end1440:
+// SIMD-ONLY0-NEXT:    [[COND1441:%.*]] = phi i32 [ [[CONV1437]], [[COND_TRUE1436]] ], [ [[CONV1439]], [[COND_FALSE1438]] ]
+// SIMD-ONLY0-NEXT:    [[CONV1442:%.*]] = trunc i32 [[COND1441]] to i16
+// SIMD-ONLY0-NEXT:    store i16 [[CONV1442]], ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP596:%.*]] = load i16, ptr [[SE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1443:%.*]] = sext i16 [[TMP596]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP597:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1444:%.*]] = sext i16 [[TMP597]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP1445:%.*]] = icmp sgt i32 [[CONV1443]], [[CONV1444]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP1445]], label [[COND_TRUE1447:%.*]], label [[COND_FALSE1449:%.*]]
+// SIMD-ONLY0:       cond.true1447:
+// SIMD-ONLY0-NEXT:    [[TMP598:%.*]] = load i16, ptr [[SE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1448:%.*]] = sext i16 [[TMP598]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END1451:%.*]]
+// SIMD-ONLY0:       cond.false1449:
+// SIMD-ONLY0-NEXT:    [[TMP599:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1450:%.*]] = sext i16 [[TMP599]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END1451]]
+// SIMD-ONLY0:       cond.end1451:
+// SIMD-ONLY0-NEXT:    [[COND1452:%.*]] = phi i32 [ [[CONV1448]], [[COND_TRUE1447]] ], [ [[CONV1450]], [[COND_FALSE1449]] ]
+// SIMD-ONLY0-NEXT:    [[CONV1453:%.*]] = trunc i32 [[COND1452]] to i16
+// SIMD-ONLY0-NEXT:    store i16 [[CONV1453]], ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP600:%.*]] = load i16, ptr [[SE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1454:%.*]] = sext i16 [[TMP600]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP601:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1455:%.*]] = sext i16 [[TMP601]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP1456:%.*]] = icmp slt i32 [[CONV1454]], [[CONV1455]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP1456]], label [[COND_TRUE1458:%.*]], label [[COND_FALSE1460:%.*]]
+// SIMD-ONLY0:       cond.true1458:
+// SIMD-ONLY0-NEXT:    [[TMP602:%.*]] = load i16, ptr [[SE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1459:%.*]] = sext i16 [[TMP602]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END1462:%.*]]
+// SIMD-ONLY0:       cond.false1460:
+// SIMD-ONLY0-NEXT:    [[TMP603:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1461:%.*]] = sext i16 [[TMP603]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END1462]]
+// SIMD-ONLY0:       cond.end1462:
+// SIMD-ONLY0-NEXT:    [[COND1463:%.*]] = phi i32 [ [[CONV1459]], [[COND_TRUE1458]] ], [ [[CONV1461]], [[COND_FALSE1460]] ]
+// SIMD-ONLY0-NEXT:    [[CONV1464:%.*]] = trunc i32 [[COND1463]] to i16
+// SIMD-ONLY0-NEXT:    store i16 [[CONV1464]], ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP604:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1465:%.*]] = sext i16 [[TMP604]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP605:%.*]] = load i16, ptr [[SE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1466:%.*]] = sext i16 [[TMP605]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP1467:%.*]] = icmp sgt i32 [[CONV1465]], [[CONV1466]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP1467]], label [[IF_THEN1469:%.*]], label [[IF_END1470:%.*]]
+// SIMD-ONLY0:       if.then1469:
+// SIMD-ONLY0-NEXT:    [[TMP606:%.*]] = load i16, ptr [[SE]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP606]], ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    br label [[IF_END1470]]
+// SIMD-ONLY0:       if.end1470:
+// SIMD-ONLY0-NEXT:    [[TMP607:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1471:%.*]] = sext i16 [[TMP607]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP608:%.*]] = load i16, ptr [[SE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1472:%.*]] = sext i16 [[TMP608]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP1473:%.*]] = icmp slt i32 [[CONV1471]], [[CONV1472]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP1473]], label [[IF_THEN1475:%.*]], label [[IF_END1476:%.*]]
+// SIMD-ONLY0:       if.then1475:
+// SIMD-ONLY0-NEXT:    [[TMP609:%.*]] = load i16, ptr [[SE]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP609]], ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    br label [[IF_END1476]]
+// SIMD-ONLY0:       if.end1476:
+// SIMD-ONLY0-NEXT:    [[TMP610:%.*]] = load i16, ptr [[SE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1477:%.*]] = sext i16 [[TMP610]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP611:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1478:%.*]] = sext i16 [[TMP611]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP1479:%.*]] = icmp sgt i32 [[CONV1477]], [[CONV1478]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP1479]], label [[IF_THEN1481:%.*]], label [[IF_END1482:%.*]]
+// SIMD-ONLY0:       if.then1481:
+// SIMD-ONLY0-NEXT:    [[TMP612:%.*]] = load i16, ptr [[SE]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP612]], ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    br label [[IF_END1482]]
+// SIMD-ONLY0:       if.end1482:
+// SIMD-ONLY0-NEXT:    [[TMP613:%.*]] = load i16, ptr [[SE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1483:%.*]] = sext i16 [[TMP613]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP614:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1484:%.*]] = sext i16 [[TMP614]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP1485:%.*]] = icmp slt i32 [[CONV1483]], [[CONV1484]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP1485]], label [[IF_THEN1487:%.*]], label [[IF_END1488:%.*]]
+// SIMD-ONLY0:       if.then1487:
+// SIMD-ONLY0-NEXT:    [[TMP615:%.*]] = load i16, ptr [[SE]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP615]], ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    br label [[IF_END1488]]
+// SIMD-ONLY0:       if.end1488:
+// SIMD-ONLY0-NEXT:    [[TMP616:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1489:%.*]] = sext i16 [[TMP616]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP617:%.*]] = load i16, ptr [[SE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1490:%.*]] = sext i16 [[TMP617]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP1491:%.*]] = icmp eq i32 [[CONV1489]], [[CONV1490]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP1491]], label [[COND_TRUE1493:%.*]], label [[COND_FALSE1495:%.*]]
+// SIMD-ONLY0:       cond.true1493:
+// SIMD-ONLY0-NEXT:    [[TMP618:%.*]] = load i16, ptr [[SD]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1494:%.*]] = sext i16 [[TMP618]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END1497:%.*]]
+// SIMD-ONLY0:       cond.false1495:
+// SIMD-ONLY0-NEXT:    [[TMP619:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1496:%.*]] = sext i16 [[TMP619]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END1497]]
+// SIMD-ONLY0:       cond.end1497:
+// SIMD-ONLY0-NEXT:    [[COND1498:%.*]] = phi i32 [ [[CONV1494]], [[COND_TRUE1493]] ], [ [[CONV1496]], [[COND_FALSE1495]] ]
+// SIMD-ONLY0-NEXT:    [[CONV1499:%.*]] = trunc i32 [[COND1498]] to i16
+// SIMD-ONLY0-NEXT:    store i16 [[CONV1499]], ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP620:%.*]] = load i16, ptr [[SE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1500:%.*]] = sext i16 [[TMP620]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP621:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1501:%.*]] = sext i16 [[TMP621]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP1502:%.*]] = icmp eq i32 [[CONV1500]], [[CONV1501]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP1502]], label [[COND_TRUE1504:%.*]], label [[COND_FALSE1506:%.*]]
+// SIMD-ONLY0:       cond.true1504:
+// SIMD-ONLY0-NEXT:    [[TMP622:%.*]] = load i16, ptr [[SD]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1505:%.*]] = sext i16 [[TMP622]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END1508:%.*]]
+// SIMD-ONLY0:       cond.false1506:
+// SIMD-ONLY0-NEXT:    [[TMP623:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1507:%.*]] = sext i16 [[TMP623]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END1508]]
+// SIMD-ONLY0:       cond.end1508:
+// SIMD-ONLY0-NEXT:    [[COND1509:%.*]] = phi i32 [ [[CONV1505]], [[COND_TRUE1504]] ], [ [[CONV1507]], [[COND_FALSE1506]] ]
+// SIMD-ONLY0-NEXT:    [[CONV1510:%.*]] = trunc i32 [[COND1509]] to i16
+// SIMD-ONLY0-NEXT:    store i16 [[CONV1510]], ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP624:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1511:%.*]] = sext i16 [[TMP624]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP625:%.*]] = load i16, ptr [[SE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1512:%.*]] = sext i16 [[TMP625]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP1513:%.*]] = icmp eq i32 [[CONV1511]], [[CONV1512]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP1513]], label [[IF_THEN1515:%.*]], label [[IF_END1516:%.*]]
+// SIMD-ONLY0:       if.then1515:
+// SIMD-ONLY0-NEXT:    [[TMP626:%.*]] = load i16, ptr [[SD]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP626]], ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    br label [[IF_END1516]]
+// SIMD-ONLY0:       if.end1516:
+// SIMD-ONLY0-NEXT:    [[TMP627:%.*]] = load i16, ptr [[SE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1517:%.*]] = sext i16 [[TMP627]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP628:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1518:%.*]] = sext i16 [[TMP628]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP1519:%.*]] = icmp eq i32 [[CONV1517]], [[CONV1518]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP1519]], label [[IF_THEN1521:%.*]], label [[IF_END1522:%.*]]
+// SIMD-ONLY0:       if.then1521:
+// SIMD-ONLY0-NEXT:    [[TMP629:%.*]] = load i16, ptr [[SD]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP629]], ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    br label [[IF_END1522]]
+// SIMD-ONLY0:       if.end1522:
+// SIMD-ONLY0-NEXT:    [[TMP630:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1523:%.*]] = zext i16 [[TMP630]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP631:%.*]] = load i16, ptr [[USE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1524:%.*]] = zext i16 [[TMP631]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP1525:%.*]] = icmp sgt i32 [[CONV1523]], [[CONV1524]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP1525]], label [[COND_TRUE1527:%.*]], label [[COND_FALSE1529:%.*]]
+// SIMD-ONLY0:       cond.true1527:
+// SIMD-ONLY0-NEXT:    [[TMP632:%.*]] = load i16, ptr [[USE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1528:%.*]] = zext i16 [[TMP632]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END1531:%.*]]
+// SIMD-ONLY0:       cond.false1529:
+// SIMD-ONLY0-NEXT:    [[TMP633:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1530:%.*]] = zext i16 [[TMP633]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END1531]]
+// SIMD-ONLY0:       cond.end1531:
+// SIMD-ONLY0-NEXT:    [[COND1532:%.*]] = phi i32 [ [[CONV1528]], [[COND_TRUE1527]] ], [ [[CONV1530]], [[COND_FALSE1529]] ]
+// SIMD-ONLY0-NEXT:    [[CONV1533:%.*]] = trunc i32 [[COND1532]] to i16
+// SIMD-ONLY0-NEXT:    store i16 [[CONV1533]], ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP634:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1534:%.*]] = zext i16 [[TMP634]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP635:%.*]] = load i16, ptr [[USE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1535:%.*]] = zext i16 [[TMP635]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP1536:%.*]] = icmp slt i32 [[CONV1534]], [[CONV1535]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP1536]], label [[COND_TRUE1538:%.*]], label [[COND_FALSE1540:%.*]]
+// SIMD-ONLY0:       cond.true1538:
+// SIMD-ONLY0-NEXT:    [[TMP636:%.*]] = load i16, ptr [[USE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1539:%.*]] = zext i16 [[TMP636]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END1542:%.*]]
+// SIMD-ONLY0:       cond.false1540:
+// SIMD-ONLY0-NEXT:    [[TMP637:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1541:%.*]] = zext i16 [[TMP637]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END1542]]
+// SIMD-ONLY0:       cond.end1542:
+// SIMD-ONLY0-NEXT:    [[COND1543:%.*]] = phi i32 [ [[CONV1539]], [[COND_TRUE1538]] ], [ [[CONV1541]], [[COND_FALSE1540]] ]
+// SIMD-ONLY0-NEXT:    [[CONV1544:%.*]] = trunc i32 [[COND1543]] to i16
+// SIMD-ONLY0-NEXT:    store i16 [[CONV1544]], ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP638:%.*]] = load i16, ptr [[USE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1545:%.*]] = zext i16 [[TMP638]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP639:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1546:%.*]] = zext i16 [[TMP639]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP1547:%.*]] = icmp sgt i32 [[CONV1545]], [[CONV1546]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP1547]], label [[COND_TRUE1549:%.*]], label [[COND_FALSE1551:%.*]]
+// SIMD-ONLY0:       cond.true1549:
+// SIMD-ONLY0-NEXT:    [[TMP640:%.*]] = load i16, ptr [[USE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1550:%.*]] = zext i16 [[TMP640]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END1553:%.*]]
+// SIMD-ONLY0:       cond.false1551:
+// SIMD-ONLY0-NEXT:    [[TMP641:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1552:%.*]] = zext i16 [[TMP641]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END1553]]
+// SIMD-ONLY0:       cond.end1553:
+// SIMD-ONLY0-NEXT:    [[COND1554:%.*]] = phi i32 [ [[CONV1550]], [[COND_TRUE1549]] ], [ [[CONV1552]], [[COND_FALSE1551]] ]
+// SIMD-ONLY0-NEXT:    [[CONV1555:%.*]] = trunc i32 [[COND1554]] to i16
+// SIMD-ONLY0-NEXT:    store i16 [[CONV1555]], ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP642:%.*]] = load i16, ptr [[USE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1556:%.*]] = zext i16 [[TMP642]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP643:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1557:%.*]] = zext i16 [[TMP643]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP1558:%.*]] = icmp slt i32 [[CONV1556]], [[CONV1557]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP1558]], label [[COND_TRUE1560:%.*]], label [[COND_FALSE1562:%.*]]
+// SIMD-ONLY0:       cond.true1560:
+// SIMD-ONLY0-NEXT:    [[TMP644:%.*]] = load i16, ptr [[USE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1561:%.*]] = zext i16 [[TMP644]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END1564:%.*]]
+// SIMD-ONLY0:       cond.false1562:
+// SIMD-ONLY0-NEXT:    [[TMP645:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1563:%.*]] = zext i16 [[TMP645]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END1564]]
+// SIMD-ONLY0:       cond.end1564:
+// SIMD-ONLY0-NEXT:    [[COND1565:%.*]] = phi i32 [ [[CONV1561]], [[COND_TRUE1560]] ], [ [[CONV1563]], [[COND_FALSE1562]] ]
+// SIMD-ONLY0-NEXT:    [[CONV1566:%.*]] = trunc i32 [[COND1565]] to i16
+// SIMD-ONLY0-NEXT:    store i16 [[CONV1566]], ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP646:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1567:%.*]] = zext i16 [[TMP646]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP647:%.*]] = load i16, ptr [[USE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1568:%.*]] = zext i16 [[TMP647]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP1569:%.*]] = icmp sgt i32 [[CONV1567]], [[CONV1568]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP1569]], label [[IF_THEN1571:%.*]], label [[IF_END1572:%.*]]
+// SIMD-ONLY0:       if.then1571:
+// SIMD-ONLY0-NEXT:    [[TMP648:%.*]] = load i16, ptr [[USE]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP648]], ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    br label [[IF_END1572]]
+// SIMD-ONLY0:       if.end1572:
+// SIMD-ONLY0-NEXT:    [[TMP649:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1573:%.*]] = zext i16 [[TMP649]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP650:%.*]] = load i16, ptr [[USE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1574:%.*]] = zext i16 [[TMP650]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP1575:%.*]] = icmp slt i32 [[CONV1573]], [[CONV1574]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP1575]], label [[IF_THEN1577:%.*]], label [[IF_END1578:%.*]]
+// SIMD-ONLY0:       if.then1577:
+// SIMD-ONLY0-NEXT:    [[TMP651:%.*]] = load i16, ptr [[USE]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP651]], ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    br label [[IF_END1578]]
+// SIMD-ONLY0:       if.end1578:
+// SIMD-ONLY0-NEXT:    [[TMP652:%.*]] = load i16, ptr [[USE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1579:%.*]] = zext i16 [[TMP652]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP653:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1580:%.*]] = zext i16 [[TMP653]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP1581:%.*]] = icmp sgt i32 [[CONV1579]], [[CONV1580]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP1581]], label [[IF_THEN1583:%.*]], label [[IF_END1584:%.*]]
+// SIMD-ONLY0:       if.then1583:
+// SIMD-ONLY0-NEXT:    [[TMP654:%.*]] = load i16, ptr [[USE]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP654]], ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    br label [[IF_END1584]]
+// SIMD-ONLY0:       if.end1584:
+// SIMD-ONLY0-NEXT:    [[TMP655:%.*]] = load i16, ptr [[USE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1585:%.*]] = zext i16 [[TMP655]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP656:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1586:%.*]] = zext i16 [[TMP656]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP1587:%.*]] = icmp slt i32 [[CONV1585]], [[CONV1586]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP1587]], label [[IF_THEN1589:%.*]], label [[IF_END1590:%.*]]
+// SIMD-ONLY0:       if.then1589:
+// SIMD-ONLY0-NEXT:    [[TMP657:%.*]] = load i16, ptr [[USE]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP657]], ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    br label [[IF_END1590]]
+// SIMD-ONLY0:       if.end1590:
+// SIMD-ONLY0-NEXT:    [[TMP658:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1591:%.*]] = zext i16 [[TMP658]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP659:%.*]] = load i16, ptr [[USE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1592:%.*]] = zext i16 [[TMP659]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP1593:%.*]] = icmp eq i32 [[CONV1591]], [[CONV1592]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP1593]], label [[COND_TRUE1595:%.*]], label [[COND_FALSE1597:%.*]]
+// SIMD-ONLY0:       cond.true1595:
+// SIMD-ONLY0-NEXT:    [[TMP660:%.*]] = load i16, ptr [[USD]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1596:%.*]] = zext i16 [[TMP660]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END1599:%.*]]
+// SIMD-ONLY0:       cond.false1597:
+// SIMD-ONLY0-NEXT:    [[TMP661:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1598:%.*]] = zext i16 [[TMP661]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END1599]]
+// SIMD-ONLY0:       cond.end1599:
+// SIMD-ONLY0-NEXT:    [[COND1600:%.*]] = phi i32 [ [[CONV1596]], [[COND_TRUE1595]] ], [ [[CONV1598]], [[COND_FALSE1597]] ]
+// SIMD-ONLY0-NEXT:    [[CONV1601:%.*]] = trunc i32 [[COND1600]] to i16
+// SIMD-ONLY0-NEXT:    store i16 [[CONV1601]], ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP662:%.*]] = load i16, ptr [[USE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1602:%.*]] = zext i16 [[TMP662]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP663:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1603:%.*]] = zext i16 [[TMP663]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP1604:%.*]] = icmp eq i32 [[CONV1602]], [[CONV1603]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP1604]], label [[COND_TRUE1606:%.*]], label [[COND_FALSE1608:%.*]]
+// SIMD-ONLY0:       cond.true1606:
+// SIMD-ONLY0-NEXT:    [[TMP664:%.*]] = load i16, ptr [[USD]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1607:%.*]] = zext i16 [[TMP664]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END1610:%.*]]
+// SIMD-ONLY0:       cond.false1608:
+// SIMD-ONLY0-NEXT:    [[TMP665:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1609:%.*]] = zext i16 [[TMP665]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END1610]]
+// SIMD-ONLY0:       cond.end1610:
+// SIMD-ONLY0-NEXT:    [[COND1611:%.*]] = phi i32 [ [[CONV1607]], [[COND_TRUE1606]] ], [ [[CONV1609]], [[COND_FALSE1608]] ]
+// SIMD-ONLY0-NEXT:    [[CONV1612:%.*]] = trunc i32 [[COND1611]] to i16
+// SIMD-ONLY0-NEXT:    store i16 [[CONV1612]], ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP666:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1613:%.*]] = zext i16 [[TMP666]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP667:%.*]] = load i16, ptr [[USE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1614:%.*]] = zext i16 [[TMP667]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP1615:%.*]] = icmp eq i32 [[CONV1613]], [[CONV1614]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP1615]], label [[IF_THEN1617:%.*]], label [[IF_END1618:%.*]]
+// SIMD-ONLY0:       if.then1617:
+// SIMD-ONLY0-NEXT:    [[TMP668:%.*]] = load i16, ptr [[USD]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP668]], ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    br label [[IF_END1618]]
+// SIMD-ONLY0:       if.end1618:
+// SIMD-ONLY0-NEXT:    [[TMP669:%.*]] = load i16, ptr [[USE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1619:%.*]] = zext i16 [[TMP669]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP670:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1620:%.*]] = zext i16 [[TMP670]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP1621:%.*]] = icmp eq i32 [[CONV1619]], [[CONV1620]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP1621]], label [[IF_THEN1623:%.*]], label [[IF_END1624:%.*]]
+// SIMD-ONLY0:       if.then1623:
+// SIMD-ONLY0-NEXT:    [[TMP671:%.*]] = load i16, ptr [[USD]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP671]], ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    br label [[IF_END1624]]
+// SIMD-ONLY0:       if.end1624:
+// SIMD-ONLY0-NEXT:    [[TMP672:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1625:%.*]] = sext i16 [[TMP672]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP673:%.*]] = load i16, ptr [[SE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1626:%.*]] = sext i16 [[TMP673]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP1627:%.*]] = icmp sgt i32 [[CONV1625]], [[CONV1626]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP1627]], label [[COND_TRUE1629:%.*]], label [[COND_FALSE1631:%.*]]
+// SIMD-ONLY0:       cond.true1629:
+// SIMD-ONLY0-NEXT:    [[TMP674:%.*]] = load i16, ptr [[SE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1630:%.*]] = sext i16 [[TMP674]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END1633:%.*]]
+// SIMD-ONLY0:       cond.false1631:
+// SIMD-ONLY0-NEXT:    [[TMP675:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1632:%.*]] = sext i16 [[TMP675]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END1633]]
+// SIMD-ONLY0:       cond.end1633:
+// SIMD-ONLY0-NEXT:    [[COND1634:%.*]] = phi i32 [ [[CONV1630]], [[COND_TRUE1629]] ], [ [[CONV1632]], [[COND_FALSE1631]] ]
+// SIMD-ONLY0-NEXT:    [[CONV1635:%.*]] = trunc i32 [[COND1634]] to i16
+// SIMD-ONLY0-NEXT:    store i16 [[CONV1635]], ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP676:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1636:%.*]] = sext i16 [[TMP676]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP677:%.*]] = load i16, ptr [[SE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1637:%.*]] = sext i16 [[TMP677]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP1638:%.*]] = icmp slt i32 [[CONV1636]], [[CONV1637]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP1638]], label [[COND_TRUE1640:%.*]], label [[COND_FALSE1642:%.*]]
+// SIMD-ONLY0:       cond.true1640:
+// SIMD-ONLY0-NEXT:    [[TMP678:%.*]] = load i16, ptr [[SE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1641:%.*]] = sext i16 [[TMP678]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END1644:%.*]]
+// SIMD-ONLY0:       cond.false1642:
+// SIMD-ONLY0-NEXT:    [[TMP679:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1643:%.*]] = sext i16 [[TMP679]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END1644]]
+// SIMD-ONLY0:       cond.end1644:
+// SIMD-ONLY0-NEXT:    [[COND1645:%.*]] = phi i32 [ [[CONV1641]], [[COND_TRUE1640]] ], [ [[CONV1643]], [[COND_FALSE1642]] ]
+// SIMD-ONLY0-NEXT:    [[CONV1646:%.*]] = trunc i32 [[COND1645]] to i16
+// SIMD-ONLY0-NEXT:    store i16 [[CONV1646]], ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP680:%.*]] = load i16, ptr [[SE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1647:%.*]] = sext i16 [[TMP680]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP681:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1648:%.*]] = sext i16 [[TMP681]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP1649:%.*]] = icmp sgt i32 [[CONV1647]], [[CONV1648]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP1649]], label [[COND_TRUE1651:%.*]], label [[COND_FALSE1653:%.*]]
+// SIMD-ONLY0:       cond.true1651:
+// SIMD-ONLY0-NEXT:    [[TMP682:%.*]] = load i16, ptr [[SE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1652:%.*]] = sext i16 [[TMP682]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END1655:%.*]]
+// SIMD-ONLY0:       cond.false1653:
+// SIMD-ONLY0-NEXT:    [[TMP683:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1654:%.*]] = sext i16 [[TMP683]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END1655]]
+// SIMD-ONLY0:       cond.end1655:
+// SIMD-ONLY0-NEXT:    [[COND1656:%.*]] = phi i32 [ [[CONV1652]], [[COND_TRUE1651]] ], [ [[CONV1654]], [[COND_FALSE1653]] ]
+// SIMD-ONLY0-NEXT:    [[CONV1657:%.*]] = trunc i32 [[COND1656]] to i16
+// SIMD-ONLY0-NEXT:    store i16 [[CONV1657]], ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP684:%.*]] = load i16, ptr [[SE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1658:%.*]] = sext i16 [[TMP684]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP685:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1659:%.*]] = sext i16 [[TMP685]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP1660:%.*]] = icmp slt i32 [[CONV1658]], [[CONV1659]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP1660]], label [[COND_TRUE1662:%.*]], label [[COND_FALSE1664:%.*]]
+// SIMD-ONLY0:       cond.true1662:
+// SIMD-ONLY0-NEXT:    [[TMP686:%.*]] = load i16, ptr [[SE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1663:%.*]] = sext i16 [[TMP686]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END1666:%.*]]
+// SIMD-ONLY0:       cond.false1664:
+// SIMD-ONLY0-NEXT:    [[TMP687:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1665:%.*]] = sext i16 [[TMP687]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END1666]]
+// SIMD-ONLY0:       cond.end1666:
+// SIMD-ONLY0-NEXT:    [[COND1667:%.*]] = phi i32 [ [[CONV1663]], [[COND_TRUE1662]] ], [ [[CONV1665]], [[COND_FALSE1664]] ]
+// SIMD-ONLY0-NEXT:    [[CONV1668:%.*]] = trunc i32 [[COND1667]] to i16
+// SIMD-ONLY0-NEXT:    store i16 [[CONV1668]], ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP688:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1669:%.*]] = sext i16 [[TMP688]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP689:%.*]] = load i16, ptr [[SE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1670:%.*]] = sext i16 [[TMP689]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP1671:%.*]] = icmp sgt i32 [[CONV1669]], [[CONV1670]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP1671]], label [[IF_THEN1673:%.*]], label [[IF_END1674:%.*]]
+// SIMD-ONLY0:       if.then1673:
+// SIMD-ONLY0-NEXT:    [[TMP690:%.*]] = load i16, ptr [[SE]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP690]], ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    br label [[IF_END1674]]
+// SIMD-ONLY0:       if.end1674:
+// SIMD-ONLY0-NEXT:    [[TMP691:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1675:%.*]] = sext i16 [[TMP691]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP692:%.*]] = load i16, ptr [[SE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1676:%.*]] = sext i16 [[TMP692]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP1677:%.*]] = icmp slt i32 [[CONV1675]], [[CONV1676]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP1677]], label [[IF_THEN1679:%.*]], label [[IF_END1680:%.*]]
+// SIMD-ONLY0:       if.then1679:
+// SIMD-ONLY0-NEXT:    [[TMP693:%.*]] = load i16, ptr [[SE]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP693]], ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    br label [[IF_END1680]]
+// SIMD-ONLY0:       if.end1680:
+// SIMD-ONLY0-NEXT:    [[TMP694:%.*]] = load i16, ptr [[SE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1681:%.*]] = sext i16 [[TMP694]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP695:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1682:%.*]] = sext i16 [[TMP695]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP1683:%.*]] = icmp sgt i32 [[CONV1681]], [[CONV1682]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP1683]], label [[IF_THEN1685:%.*]], label [[IF_END1686:%.*]]
+// SIMD-ONLY0:       if.then1685:
+// SIMD-ONLY0-NEXT:    [[TMP696:%.*]] = load i16, ptr [[SE]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP696]], ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    br label [[IF_END1686]]
+// SIMD-ONLY0:       if.end1686:
+// SIMD-ONLY0-NEXT:    [[TMP697:%.*]] = load i16, ptr [[SE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1687:%.*]] = sext i16 [[TMP697]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP698:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1688:%.*]] = sext i16 [[TMP698]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP1689:%.*]] = icmp slt i32 [[CONV1687]], [[CONV1688]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP1689]], label [[IF_THEN1691:%.*]], label [[IF_END1692:%.*]]
+// SIMD-ONLY0:       if.then1691:
+// SIMD-ONLY0-NEXT:    [[TMP699:%.*]] = load i16, ptr [[SE]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP699]], ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    br label [[IF_END1692]]
+// SIMD-ONLY0:       if.end1692:
+// SIMD-ONLY0-NEXT:    [[TMP700:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1693:%.*]] = sext i16 [[TMP700]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP701:%.*]] = load i16, ptr [[SE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1694:%.*]] = sext i16 [[TMP701]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP1695:%.*]] = icmp eq i32 [[CONV1693]], [[CONV1694]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP1695]], label [[COND_TRUE1697:%.*]], label [[COND_FALSE1699:%.*]]
+// SIMD-ONLY0:       cond.true1697:
+// SIMD-ONLY0-NEXT:    [[TMP702:%.*]] = load i16, ptr [[SD]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1698:%.*]] = sext i16 [[TMP702]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END1701:%.*]]
+// SIMD-ONLY0:       cond.false1699:
+// SIMD-ONLY0-NEXT:    [[TMP703:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1700:%.*]] = sext i16 [[TMP703]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END1701]]
+// SIMD-ONLY0:       cond.end1701:
+// SIMD-ONLY0-NEXT:    [[COND1702:%.*]] = phi i32 [ [[CONV1698]], [[COND_TRUE1697]] ], [ [[CONV1700]], [[COND_FALSE1699]] ]
+// SIMD-ONLY0-NEXT:    [[CONV1703:%.*]] = trunc i32 [[COND1702]] to i16
+// SIMD-ONLY0-NEXT:    store i16 [[CONV1703]], ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP704:%.*]] = load i16, ptr [[SE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1704:%.*]] = sext i16 [[TMP704]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP705:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1705:%.*]] = sext i16 [[TMP705]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP1706:%.*]] = icmp eq i32 [[CONV1704]], [[CONV1705]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP1706]], label [[COND_TRUE1708:%.*]], label [[COND_FALSE1710:%.*]]
+// SIMD-ONLY0:       cond.true1708:
+// SIMD-ONLY0-NEXT:    [[TMP706:%.*]] = load i16, ptr [[SD]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1709:%.*]] = sext i16 [[TMP706]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END1712:%.*]]
+// SIMD-ONLY0:       cond.false1710:
+// SIMD-ONLY0-NEXT:    [[TMP707:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1711:%.*]] = sext i16 [[TMP707]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END1712]]
+// SIMD-ONLY0:       cond.end1712:
+// SIMD-ONLY0-NEXT:    [[COND1713:%.*]] = phi i32 [ [[CONV1709]], [[COND_TRUE1708]] ], [ [[CONV1711]], [[COND_FALSE1710]] ]
+// SIMD-ONLY0-NEXT:    [[CONV1714:%.*]] = trunc i32 [[COND1713]] to i16
+// SIMD-ONLY0-NEXT:    store i16 [[CONV1714]], ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP708:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1715:%.*]] = sext i16 [[TMP708]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP709:%.*]] = load i16, ptr [[SE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1716:%.*]] = sext i16 [[TMP709]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP1717:%.*]] = icmp eq i32 [[CONV1715]], [[CONV1716]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP1717]], label [[IF_THEN1719:%.*]], label [[IF_END1720:%.*]]
+// SIMD-ONLY0:       if.then1719:
+// SIMD-ONLY0-NEXT:    [[TMP710:%.*]] = load i16, ptr [[SD]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP710]], ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    br label [[IF_END1720]]
+// SIMD-ONLY0:       if.end1720:
+// SIMD-ONLY0-NEXT:    [[TMP711:%.*]] = load i16, ptr [[SE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1721:%.*]] = sext i16 [[TMP711]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP712:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1722:%.*]] = sext i16 [[TMP712]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP1723:%.*]] = icmp eq i32 [[CONV1721]], [[CONV1722]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP1723]], label [[IF_THEN1725:%.*]], label [[IF_END1726:%.*]]
+// SIMD-ONLY0:       if.then1725:
+// SIMD-ONLY0-NEXT:    [[TMP713:%.*]] = load i16, ptr [[SD]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP713]], ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    br label [[IF_END1726]]
+// SIMD-ONLY0:       if.end1726:
+// SIMD-ONLY0-NEXT:    [[TMP714:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1727:%.*]] = zext i16 [[TMP714]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP715:%.*]] = load i16, ptr [[USE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1728:%.*]] = zext i16 [[TMP715]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP1729:%.*]] = icmp sgt i32 [[CONV1727]], [[CONV1728]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP1729]], label [[COND_TRUE1731:%.*]], label [[COND_FALSE1733:%.*]]
+// SIMD-ONLY0:       cond.true1731:
+// SIMD-ONLY0-NEXT:    [[TMP716:%.*]] = load i16, ptr [[USE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1732:%.*]] = zext i16 [[TMP716]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END1735:%.*]]
+// SIMD-ONLY0:       cond.false1733:
+// SIMD-ONLY0-NEXT:    [[TMP717:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1734:%.*]] = zext i16 [[TMP717]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END1735]]
+// SIMD-ONLY0:       cond.end1735:
+// SIMD-ONLY0-NEXT:    [[COND1736:%.*]] = phi i32 [ [[CONV1732]], [[COND_TRUE1731]] ], [ [[CONV1734]], [[COND_FALSE1733]] ]
+// SIMD-ONLY0-NEXT:    [[CONV1737:%.*]] = trunc i32 [[COND1736]] to i16
+// SIMD-ONLY0-NEXT:    store i16 [[CONV1737]], ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP718:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1738:%.*]] = zext i16 [[TMP718]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP719:%.*]] = load i16, ptr [[USE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1739:%.*]] = zext i16 [[TMP719]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP1740:%.*]] = icmp slt i32 [[CONV1738]], [[CONV1739]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP1740]], label [[COND_TRUE1742:%.*]], label [[COND_FALSE1744:%.*]]
+// SIMD-ONLY0:       cond.true1742:
+// SIMD-ONLY0-NEXT:    [[TMP720:%.*]] = load i16, ptr [[USE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1743:%.*]] = zext i16 [[TMP720]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END1746:%.*]]
+// SIMD-ONLY0:       cond.false1744:
+// SIMD-ONLY0-NEXT:    [[TMP721:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1745:%.*]] = zext i16 [[TMP721]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END1746]]
+// SIMD-ONLY0:       cond.end1746:
+// SIMD-ONLY0-NEXT:    [[COND1747:%.*]] = phi i32 [ [[CONV1743]], [[COND_TRUE1742]] ], [ [[CONV1745]], [[COND_FALSE1744]] ]
+// SIMD-ONLY0-NEXT:    [[CONV1748:%.*]] = trunc i32 [[COND1747]] to i16
+// SIMD-ONLY0-NEXT:    store i16 [[CONV1748]], ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP722:%.*]] = load i16, ptr [[USE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1749:%.*]] = zext i16 [[TMP722]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP723:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1750:%.*]] = zext i16 [[TMP723]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP1751:%.*]] = icmp sgt i32 [[CONV1749]], [[CONV1750]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP1751]], label [[COND_TRUE1753:%.*]], label [[COND_FALSE1755:%.*]]
+// SIMD-ONLY0:       cond.true1753:
+// SIMD-ONLY0-NEXT:    [[TMP724:%.*]] = load i16, ptr [[USE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1754:%.*]] = zext i16 [[TMP724]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END1757:%.*]]
+// SIMD-ONLY0:       cond.false1755:
+// SIMD-ONLY0-NEXT:    [[TMP725:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1756:%.*]] = zext i16 [[TMP725]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END1757]]
+// SIMD-ONLY0:       cond.end1757:
+// SIMD-ONLY0-NEXT:    [[COND1758:%.*]] = phi i32 [ [[CONV1754]], [[COND_TRUE1753]] ], [ [[CONV1756]], [[COND_FALSE1755]] ]
+// SIMD-ONLY0-NEXT:    [[CONV1759:%.*]] = trunc i32 [[COND1758]] to i16
+// SIMD-ONLY0-NEXT:    store i16 [[CONV1759]], ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP726:%.*]] = load i16, ptr [[USE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1760:%.*]] = zext i16 [[TMP726]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP727:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1761:%.*]] = zext i16 [[TMP727]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP1762:%.*]] = icmp slt i32 [[CONV1760]], [[CONV1761]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP1762]], label [[COND_TRUE1764:%.*]], label [[COND_FALSE1766:%.*]]
+// SIMD-ONLY0:       cond.true1764:
+// SIMD-ONLY0-NEXT:    [[TMP728:%.*]] = load i16, ptr [[USE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1765:%.*]] = zext i16 [[TMP728]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END1768:%.*]]
+// SIMD-ONLY0:       cond.false1766:
+// SIMD-ONLY0-NEXT:    [[TMP729:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1767:%.*]] = zext i16 [[TMP729]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END1768]]
+// SIMD-ONLY0:       cond.end1768:
+// SIMD-ONLY0-NEXT:    [[COND1769:%.*]] = phi i32 [ [[CONV1765]], [[COND_TRUE1764]] ], [ [[CONV1767]], [[COND_FALSE1766]] ]
+// SIMD-ONLY0-NEXT:    [[CONV1770:%.*]] = trunc i32 [[COND1769]] to i16
+// SIMD-ONLY0-NEXT:    store i16 [[CONV1770]], ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP730:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1771:%.*]] = zext i16 [[TMP730]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP731:%.*]] = load i16, ptr [[USE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1772:%.*]] = zext i16 [[TMP731]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP1773:%.*]] = icmp sgt i32 [[CONV1771]], [[CONV1772]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP1773]], label [[IF_THEN1775:%.*]], label [[IF_END1776:%.*]]
+// SIMD-ONLY0:       if.then1775:
+// SIMD-ONLY0-NEXT:    [[TMP732:%.*]] = load i16, ptr [[USE]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP732]], ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    br label [[IF_END1776]]
+// SIMD-ONLY0:       if.end1776:
+// SIMD-ONLY0-NEXT:    [[TMP733:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1777:%.*]] = zext i16 [[TMP733]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP734:%.*]] = load i16, ptr [[USE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1778:%.*]] = zext i16 [[TMP734]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP1779:%.*]] = icmp slt i32 [[CONV1777]], [[CONV1778]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP1779]], label [[IF_THEN1781:%.*]], label [[IF_END1782:%.*]]
+// SIMD-ONLY0:       if.then1781:
+// SIMD-ONLY0-NEXT:    [[TMP735:%.*]] = load i16, ptr [[USE]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP735]], ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    br label [[IF_END1782]]
+// SIMD-ONLY0:       if.end1782:
+// SIMD-ONLY0-NEXT:    [[TMP736:%.*]] = load i16, ptr [[USE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1783:%.*]] = zext i16 [[TMP736]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP737:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1784:%.*]] = zext i16 [[TMP737]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP1785:%.*]] = icmp sgt i32 [[CONV1783]], [[CONV1784]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP1785]], label [[IF_THEN1787:%.*]], label [[IF_END1788:%.*]]
+// SIMD-ONLY0:       if.then1787:
+// SIMD-ONLY0-NEXT:    [[TMP738:%.*]] = load i16, ptr [[USE]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP738]], ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    br label [[IF_END1788]]
+// SIMD-ONLY0:       if.end1788:
+// SIMD-ONLY0-NEXT:    [[TMP739:%.*]] = load i16, ptr [[USE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1789:%.*]] = zext i16 [[TMP739]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP740:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1790:%.*]] = zext i16 [[TMP740]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP1791:%.*]] = icmp slt i32 [[CONV1789]], [[CONV1790]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP1791]], label [[IF_THEN1793:%.*]], label [[IF_END1794:%.*]]
+// SIMD-ONLY0:       if.then1793:
+// SIMD-ONLY0-NEXT:    [[TMP741:%.*]] = load i16, ptr [[USE]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP741]], ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    br label [[IF_END1794]]
+// SIMD-ONLY0:       if.end1794:
+// SIMD-ONLY0-NEXT:    [[TMP742:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1795:%.*]] = zext i16 [[TMP742]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP743:%.*]] = load i16, ptr [[USE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1796:%.*]] = zext i16 [[TMP743]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP1797:%.*]] = icmp eq i32 [[CONV1795]], [[CONV1796]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP1797]], label [[COND_TRUE1799:%.*]], label [[COND_FALSE1801:%.*]]
+// SIMD-ONLY0:       cond.true1799:
+// SIMD-ONLY0-NEXT:    [[TMP744:%.*]] = load i16, ptr [[USD]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1800:%.*]] = zext i16 [[TMP744]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END1803:%.*]]
+// SIMD-ONLY0:       cond.false1801:
+// SIMD-ONLY0-NEXT:    [[TMP745:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1802:%.*]] = zext i16 [[TMP745]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END1803]]
+// SIMD-ONLY0:       cond.end1803:
+// SIMD-ONLY0-NEXT:    [[COND1804:%.*]] = phi i32 [ [[CONV1800]], [[COND_TRUE1799]] ], [ [[CONV1802]], [[COND_FALSE1801]] ]
+// SIMD-ONLY0-NEXT:    [[CONV1805:%.*]] = trunc i32 [[COND1804]] to i16
+// SIMD-ONLY0-NEXT:    store i16 [[CONV1805]], ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP746:%.*]] = load i16, ptr [[USE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1806:%.*]] = zext i16 [[TMP746]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP747:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1807:%.*]] = zext i16 [[TMP747]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP1808:%.*]] = icmp eq i32 [[CONV1806]], [[CONV1807]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP1808]], label [[COND_TRUE1810:%.*]], label [[COND_FALSE1812:%.*]]
+// SIMD-ONLY0:       cond.true1810:
+// SIMD-ONLY0-NEXT:    [[TMP748:%.*]] = load i16, ptr [[USD]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1811:%.*]] = zext i16 [[TMP748]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END1814:%.*]]
+// SIMD-ONLY0:       cond.false1812:
+// SIMD-ONLY0-NEXT:    [[TMP749:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1813:%.*]] = zext i16 [[TMP749]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END1814]]
+// SIMD-ONLY0:       cond.end1814:
+// SIMD-ONLY0-NEXT:    [[COND1815:%.*]] = phi i32 [ [[CONV1811]], [[COND_TRUE1810]] ], [ [[CONV1813]], [[COND_FALSE1812]] ]
+// SIMD-ONLY0-NEXT:    [[CONV1816:%.*]] = trunc i32 [[COND1815]] to i16
+// SIMD-ONLY0-NEXT:    store i16 [[CONV1816]], ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP750:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1817:%.*]] = zext i16 [[TMP750]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP751:%.*]] = load i16, ptr [[USE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1818:%.*]] = zext i16 [[TMP751]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP1819:%.*]] = icmp eq i32 [[CONV1817]], [[CONV1818]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP1819]], label [[IF_THEN1821:%.*]], label [[IF_END1822:%.*]]
+// SIMD-ONLY0:       if.then1821:
+// SIMD-ONLY0-NEXT:    [[TMP752:%.*]] = load i16, ptr [[USD]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP752]], ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    br label [[IF_END1822]]
+// SIMD-ONLY0:       if.end1822:
+// SIMD-ONLY0-NEXT:    [[TMP753:%.*]] = load i16, ptr [[USE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1823:%.*]] = zext i16 [[TMP753]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP754:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1824:%.*]] = zext i16 [[TMP754]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP1825:%.*]] = icmp eq i32 [[CONV1823]], [[CONV1824]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP1825]], label [[IF_THEN1827:%.*]], label [[IF_END1828:%.*]]
+// SIMD-ONLY0:       if.then1827:
+// SIMD-ONLY0-NEXT:    [[TMP755:%.*]] = load i16, ptr [[USD]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP755]], ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    br label [[IF_END1828]]
+// SIMD-ONLY0:       if.end1828:
+// SIMD-ONLY0-NEXT:    [[TMP756:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1829:%.*]] = sext i16 [[TMP756]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP757:%.*]] = load i16, ptr [[SE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1830:%.*]] = sext i16 [[TMP757]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP1831:%.*]] = icmp sgt i32 [[CONV1829]], [[CONV1830]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP1831]], label [[COND_TRUE1833:%.*]], label [[COND_FALSE1835:%.*]]
+// SIMD-ONLY0:       cond.true1833:
+// SIMD-ONLY0-NEXT:    [[TMP758:%.*]] = load i16, ptr [[SE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1834:%.*]] = sext i16 [[TMP758]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END1837:%.*]]
+// SIMD-ONLY0:       cond.false1835:
+// SIMD-ONLY0-NEXT:    [[TMP759:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1836:%.*]] = sext i16 [[TMP759]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END1837]]
+// SIMD-ONLY0:       cond.end1837:
+// SIMD-ONLY0-NEXT:    [[COND1838:%.*]] = phi i32 [ [[CONV1834]], [[COND_TRUE1833]] ], [ [[CONV1836]], [[COND_FALSE1835]] ]
+// SIMD-ONLY0-NEXT:    [[CONV1839:%.*]] = trunc i32 [[COND1838]] to i16
+// SIMD-ONLY0-NEXT:    store i16 [[CONV1839]], ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP760:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1840:%.*]] = sext i16 [[TMP760]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP761:%.*]] = load i16, ptr [[SE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1841:%.*]] = sext i16 [[TMP761]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP1842:%.*]] = icmp slt i32 [[CONV1840]], [[CONV1841]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP1842]], label [[COND_TRUE1844:%.*]], label [[COND_FALSE1846:%.*]]
+// SIMD-ONLY0:       cond.true1844:
+// SIMD-ONLY0-NEXT:    [[TMP762:%.*]] = load i16, ptr [[SE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1845:%.*]] = sext i16 [[TMP762]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END1848:%.*]]
+// SIMD-ONLY0:       cond.false1846:
+// SIMD-ONLY0-NEXT:    [[TMP763:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1847:%.*]] = sext i16 [[TMP763]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END1848]]
+// SIMD-ONLY0:       cond.end1848:
+// SIMD-ONLY0-NEXT:    [[COND1849:%.*]] = phi i32 [ [[CONV1845]], [[COND_TRUE1844]] ], [ [[CONV1847]], [[COND_FALSE1846]] ]
+// SIMD-ONLY0-NEXT:    [[CONV1850:%.*]] = trunc i32 [[COND1849]] to i16
+// SIMD-ONLY0-NEXT:    store i16 [[CONV1850]], ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP764:%.*]] = load i16, ptr [[SE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1851:%.*]] = sext i16 [[TMP764]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP765:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1852:%.*]] = sext i16 [[TMP765]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP1853:%.*]] = icmp sgt i32 [[CONV1851]], [[CONV1852]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP1853]], label [[COND_TRUE1855:%.*]], label [[COND_FALSE1857:%.*]]
+// SIMD-ONLY0:       cond.true1855:
+// SIMD-ONLY0-NEXT:    [[TMP766:%.*]] = load i16, ptr [[SE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1856:%.*]] = sext i16 [[TMP766]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END1859:%.*]]
+// SIMD-ONLY0:       cond.false1857:
+// SIMD-ONLY0-NEXT:    [[TMP767:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1858:%.*]] = sext i16 [[TMP767]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END1859]]
+// SIMD-ONLY0:       cond.end1859:
+// SIMD-ONLY0-NEXT:    [[COND1860:%.*]] = phi i32 [ [[CONV1856]], [[COND_TRUE1855]] ], [ [[CONV1858]], [[COND_FALSE1857]] ]
+// SIMD-ONLY0-NEXT:    [[CONV1861:%.*]] = trunc i32 [[COND1860]] to i16
+// SIMD-ONLY0-NEXT:    store i16 [[CONV1861]], ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP768:%.*]] = load i16, ptr [[SE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1862:%.*]] = sext i16 [[TMP768]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP769:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1863:%.*]] = sext i16 [[TMP769]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP1864:%.*]] = icmp slt i32 [[CONV1862]], [[CONV1863]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP1864]], label [[COND_TRUE1866:%.*]], label [[COND_FALSE1868:%.*]]
+// SIMD-ONLY0:       cond.true1866:
+// SIMD-ONLY0-NEXT:    [[TMP770:%.*]] = load i16, ptr [[SE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1867:%.*]] = sext i16 [[TMP770]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END1870:%.*]]
+// SIMD-ONLY0:       cond.false1868:
+// SIMD-ONLY0-NEXT:    [[TMP771:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1869:%.*]] = sext i16 [[TMP771]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END1870]]
+// SIMD-ONLY0:       cond.end1870:
+// SIMD-ONLY0-NEXT:    [[COND1871:%.*]] = phi i32 [ [[CONV1867]], [[COND_TRUE1866]] ], [ [[CONV1869]], [[COND_FALSE1868]] ]
+// SIMD-ONLY0-NEXT:    [[CONV1872:%.*]] = trunc i32 [[COND1871]] to i16
+// SIMD-ONLY0-NEXT:    store i16 [[CONV1872]], ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP772:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1873:%.*]] = sext i16 [[TMP772]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP773:%.*]] = load i16, ptr [[SE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1874:%.*]] = sext i16 [[TMP773]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP1875:%.*]] = icmp sgt i32 [[CONV1873]], [[CONV1874]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP1875]], label [[IF_THEN1877:%.*]], label [[IF_END1878:%.*]]
+// SIMD-ONLY0:       if.then1877:
+// SIMD-ONLY0-NEXT:    [[TMP774:%.*]] = load i16, ptr [[SE]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP774]], ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    br label [[IF_END1878]]
+// SIMD-ONLY0:       if.end1878:
+// SIMD-ONLY0-NEXT:    [[TMP775:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1879:%.*]] = sext i16 [[TMP775]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP776:%.*]] = load i16, ptr [[SE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1880:%.*]] = sext i16 [[TMP776]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP1881:%.*]] = icmp slt i32 [[CONV1879]], [[CONV1880]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP1881]], label [[IF_THEN1883:%.*]], label [[IF_END1884:%.*]]
+// SIMD-ONLY0:       if.then1883:
+// SIMD-ONLY0-NEXT:    [[TMP777:%.*]] = load i16, ptr [[SE]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP777]], ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    br label [[IF_END1884]]
+// SIMD-ONLY0:       if.end1884:
+// SIMD-ONLY0-NEXT:    [[TMP778:%.*]] = load i16, ptr [[SE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1885:%.*]] = sext i16 [[TMP778]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP779:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1886:%.*]] = sext i16 [[TMP779]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP1887:%.*]] = icmp sgt i32 [[CONV1885]], [[CONV1886]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP1887]], label [[IF_THEN1889:%.*]], label [[IF_END1890:%.*]]
+// SIMD-ONLY0:       if.then1889:
+// SIMD-ONLY0-NEXT:    [[TMP780:%.*]] = load i16, ptr [[SE]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP780]], ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    br label [[IF_END1890]]
+// SIMD-ONLY0:       if.end1890:
+// SIMD-ONLY0-NEXT:    [[TMP781:%.*]] = load i16, ptr [[SE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1891:%.*]] = sext i16 [[TMP781]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP782:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1892:%.*]] = sext i16 [[TMP782]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP1893:%.*]] = icmp slt i32 [[CONV1891]], [[CONV1892]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP1893]], label [[IF_THEN1895:%.*]], label [[IF_END1896:%.*]]
+// SIMD-ONLY0:       if.then1895:
+// SIMD-ONLY0-NEXT:    [[TMP783:%.*]] = load i16, ptr [[SE]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP783]], ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    br label [[IF_END1896]]
+// SIMD-ONLY0:       if.end1896:
+// SIMD-ONLY0-NEXT:    [[TMP784:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1897:%.*]] = sext i16 [[TMP784]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP785:%.*]] = load i16, ptr [[SE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1898:%.*]] = sext i16 [[TMP785]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP1899:%.*]] = icmp eq i32 [[CONV1897]], [[CONV1898]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP1899]], label [[COND_TRUE1901:%.*]], label [[COND_FALSE1903:%.*]]
+// SIMD-ONLY0:       cond.true1901:
+// SIMD-ONLY0-NEXT:    [[TMP786:%.*]] = load i16, ptr [[SD]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1902:%.*]] = sext i16 [[TMP786]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END1905:%.*]]
+// SIMD-ONLY0:       cond.false1903:
+// SIMD-ONLY0-NEXT:    [[TMP787:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1904:%.*]] = sext i16 [[TMP787]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END1905]]
+// SIMD-ONLY0:       cond.end1905:
+// SIMD-ONLY0-NEXT:    [[COND1906:%.*]] = phi i32 [ [[CONV1902]], [[COND_TRUE1901]] ], [ [[CONV1904]], [[COND_FALSE1903]] ]
+// SIMD-ONLY0-NEXT:    [[CONV1907:%.*]] = trunc i32 [[COND1906]] to i16
+// SIMD-ONLY0-NEXT:    store i16 [[CONV1907]], ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP788:%.*]] = load i16, ptr [[SE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1908:%.*]] = sext i16 [[TMP788]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP789:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1909:%.*]] = sext i16 [[TMP789]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP1910:%.*]] = icmp eq i32 [[CONV1908]], [[CONV1909]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP1910]], label [[COND_TRUE1912:%.*]], label [[COND_FALSE1914:%.*]]
+// SIMD-ONLY0:       cond.true1912:
+// SIMD-ONLY0-NEXT:    [[TMP790:%.*]] = load i16, ptr [[SD]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1913:%.*]] = sext i16 [[TMP790]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END1916:%.*]]
+// SIMD-ONLY0:       cond.false1914:
+// SIMD-ONLY0-NEXT:    [[TMP791:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1915:%.*]] = sext i16 [[TMP791]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END1916]]
+// SIMD-ONLY0:       cond.end1916:
+// SIMD-ONLY0-NEXT:    [[COND1917:%.*]] = phi i32 [ [[CONV1913]], [[COND_TRUE1912]] ], [ [[CONV1915]], [[COND_FALSE1914]] ]
+// SIMD-ONLY0-NEXT:    [[CONV1918:%.*]] = trunc i32 [[COND1917]] to i16
+// SIMD-ONLY0-NEXT:    store i16 [[CONV1918]], ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP792:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1919:%.*]] = sext i16 [[TMP792]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP793:%.*]] = load i16, ptr [[SE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1920:%.*]] = sext i16 [[TMP793]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP1921:%.*]] = icmp eq i32 [[CONV1919]], [[CONV1920]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP1921]], label [[IF_THEN1923:%.*]], label [[IF_END1924:%.*]]
+// SIMD-ONLY0:       if.then1923:
+// SIMD-ONLY0-NEXT:    [[TMP794:%.*]] = load i16, ptr [[SD]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP794]], ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    br label [[IF_END1924]]
+// SIMD-ONLY0:       if.end1924:
+// SIMD-ONLY0-NEXT:    [[TMP795:%.*]] = load i16, ptr [[SE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1925:%.*]] = sext i16 [[TMP795]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP796:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1926:%.*]] = sext i16 [[TMP796]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP1927:%.*]] = icmp eq i32 [[CONV1925]], [[CONV1926]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP1927]], label [[IF_THEN1929:%.*]], label [[IF_END1930:%.*]]
+// SIMD-ONLY0:       if.then1929:
+// SIMD-ONLY0-NEXT:    [[TMP797:%.*]] = load i16, ptr [[SD]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP797]], ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    br label [[IF_END1930]]
+// SIMD-ONLY0:       if.end1930:
+// SIMD-ONLY0-NEXT:    [[TMP798:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1931:%.*]] = zext i16 [[TMP798]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP799:%.*]] = load i16, ptr [[USE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1932:%.*]] = zext i16 [[TMP799]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP1933:%.*]] = icmp sgt i32 [[CONV1931]], [[CONV1932]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP1933]], label [[COND_TRUE1935:%.*]], label [[COND_FALSE1937:%.*]]
+// SIMD-ONLY0:       cond.true1935:
+// SIMD-ONLY0-NEXT:    [[TMP800:%.*]] = load i16, ptr [[USE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1936:%.*]] = zext i16 [[TMP800]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END1939:%.*]]
+// SIMD-ONLY0:       cond.false1937:
+// SIMD-ONLY0-NEXT:    [[TMP801:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1938:%.*]] = zext i16 [[TMP801]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END1939]]
+// SIMD-ONLY0:       cond.end1939:
+// SIMD-ONLY0-NEXT:    [[COND1940:%.*]] = phi i32 [ [[CONV1936]], [[COND_TRUE1935]] ], [ [[CONV1938]], [[COND_FALSE1937]] ]
+// SIMD-ONLY0-NEXT:    [[CONV1941:%.*]] = trunc i32 [[COND1940]] to i16
+// SIMD-ONLY0-NEXT:    store i16 [[CONV1941]], ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP802:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1942:%.*]] = zext i16 [[TMP802]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP803:%.*]] = load i16, ptr [[USE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1943:%.*]] = zext i16 [[TMP803]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP1944:%.*]] = icmp slt i32 [[CONV1942]], [[CONV1943]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP1944]], label [[COND_TRUE1946:%.*]], label [[COND_FALSE1948:%.*]]
+// SIMD-ONLY0:       cond.true1946:
+// SIMD-ONLY0-NEXT:    [[TMP804:%.*]] = load i16, ptr [[USE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1947:%.*]] = zext i16 [[TMP804]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END1950:%.*]]
+// SIMD-ONLY0:       cond.false1948:
+// SIMD-ONLY0-NEXT:    [[TMP805:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1949:%.*]] = zext i16 [[TMP805]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END1950]]
+// SIMD-ONLY0:       cond.end1950:
+// SIMD-ONLY0-NEXT:    [[COND1951:%.*]] = phi i32 [ [[CONV1947]], [[COND_TRUE1946]] ], [ [[CONV1949]], [[COND_FALSE1948]] ]
+// SIMD-ONLY0-NEXT:    [[CONV1952:%.*]] = trunc i32 [[COND1951]] to i16
+// SIMD-ONLY0-NEXT:    store i16 [[CONV1952]], ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP806:%.*]] = load i16, ptr [[USE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1953:%.*]] = zext i16 [[TMP806]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP807:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1954:%.*]] = zext i16 [[TMP807]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP1955:%.*]] = icmp sgt i32 [[CONV1953]], [[CONV1954]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP1955]], label [[COND_TRUE1957:%.*]], label [[COND_FALSE1959:%.*]]
+// SIMD-ONLY0:       cond.true1957:
+// SIMD-ONLY0-NEXT:    [[TMP808:%.*]] = load i16, ptr [[USE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1958:%.*]] = zext i16 [[TMP808]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END1961:%.*]]
+// SIMD-ONLY0:       cond.false1959:
+// SIMD-ONLY0-NEXT:    [[TMP809:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1960:%.*]] = zext i16 [[TMP809]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END1961]]
+// SIMD-ONLY0:       cond.end1961:
+// SIMD-ONLY0-NEXT:    [[COND1962:%.*]] = phi i32 [ [[CONV1958]], [[COND_TRUE1957]] ], [ [[CONV1960]], [[COND_FALSE1959]] ]
+// SIMD-ONLY0-NEXT:    [[CONV1963:%.*]] = trunc i32 [[COND1962]] to i16
+// SIMD-ONLY0-NEXT:    store i16 [[CONV1963]], ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP810:%.*]] = load i16, ptr [[USE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1964:%.*]] = zext i16 [[TMP810]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP811:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1965:%.*]] = zext i16 [[TMP811]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP1966:%.*]] = icmp slt i32 [[CONV1964]], [[CONV1965]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP1966]], label [[COND_TRUE1968:%.*]], label [[COND_FALSE1970:%.*]]
+// SIMD-ONLY0:       cond.true1968:
+// SIMD-ONLY0-NEXT:    [[TMP812:%.*]] = load i16, ptr [[USE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1969:%.*]] = zext i16 [[TMP812]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END1972:%.*]]
+// SIMD-ONLY0:       cond.false1970:
+// SIMD-ONLY0-NEXT:    [[TMP813:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1971:%.*]] = zext i16 [[TMP813]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END1972]]
+// SIMD-ONLY0:       cond.end1972:
+// SIMD-ONLY0-NEXT:    [[COND1973:%.*]] = phi i32 [ [[CONV1969]], [[COND_TRUE1968]] ], [ [[CONV1971]], [[COND_FALSE1970]] ]
+// SIMD-ONLY0-NEXT:    [[CONV1974:%.*]] = trunc i32 [[COND1973]] to i16
+// SIMD-ONLY0-NEXT:    store i16 [[CONV1974]], ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP814:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1975:%.*]] = zext i16 [[TMP814]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP815:%.*]] = load i16, ptr [[USE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1976:%.*]] = zext i16 [[TMP815]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP1977:%.*]] = icmp sgt i32 [[CONV1975]], [[CONV1976]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP1977]], label [[IF_THEN1979:%.*]], label [[IF_END1980:%.*]]
+// SIMD-ONLY0:       if.then1979:
+// SIMD-ONLY0-NEXT:    [[TMP816:%.*]] = load i16, ptr [[USE]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP816]], ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    br label [[IF_END1980]]
+// SIMD-ONLY0:       if.end1980:
+// SIMD-ONLY0-NEXT:    [[TMP817:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1981:%.*]] = zext i16 [[TMP817]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP818:%.*]] = load i16, ptr [[USE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1982:%.*]] = zext i16 [[TMP818]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP1983:%.*]] = icmp slt i32 [[CONV1981]], [[CONV1982]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP1983]], label [[IF_THEN1985:%.*]], label [[IF_END1986:%.*]]
+// SIMD-ONLY0:       if.then1985:
+// SIMD-ONLY0-NEXT:    [[TMP819:%.*]] = load i16, ptr [[USE]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP819]], ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    br label [[IF_END1986]]
+// SIMD-ONLY0:       if.end1986:
+// SIMD-ONLY0-NEXT:    [[TMP820:%.*]] = load i16, ptr [[USE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1987:%.*]] = zext i16 [[TMP820]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP821:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1988:%.*]] = zext i16 [[TMP821]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP1989:%.*]] = icmp sgt i32 [[CONV1987]], [[CONV1988]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP1989]], label [[IF_THEN1991:%.*]], label [[IF_END1992:%.*]]
+// SIMD-ONLY0:       if.then1991:
+// SIMD-ONLY0-NEXT:    [[TMP822:%.*]] = load i16, ptr [[USE]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP822]], ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    br label [[IF_END1992]]
+// SIMD-ONLY0:       if.end1992:
+// SIMD-ONLY0-NEXT:    [[TMP823:%.*]] = load i16, ptr [[USE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1993:%.*]] = zext i16 [[TMP823]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP824:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1994:%.*]] = zext i16 [[TMP824]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP1995:%.*]] = icmp slt i32 [[CONV1993]], [[CONV1994]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP1995]], label [[IF_THEN1997:%.*]], label [[IF_END1998:%.*]]
+// SIMD-ONLY0:       if.then1997:
+// SIMD-ONLY0-NEXT:    [[TMP825:%.*]] = load i16, ptr [[USE]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP825]], ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    br label [[IF_END1998]]
+// SIMD-ONLY0:       if.end1998:
+// SIMD-ONLY0-NEXT:    [[TMP826:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1999:%.*]] = zext i16 [[TMP826]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP827:%.*]] = load i16, ptr [[USE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2000:%.*]] = zext i16 [[TMP827]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP2001:%.*]] = icmp eq i32 [[CONV1999]], [[CONV2000]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP2001]], label [[COND_TRUE2003:%.*]], label [[COND_FALSE2005:%.*]]
+// SIMD-ONLY0:       cond.true2003:
+// SIMD-ONLY0-NEXT:    [[TMP828:%.*]] = load i16, ptr [[USD]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2004:%.*]] = zext i16 [[TMP828]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END2007:%.*]]
+// SIMD-ONLY0:       cond.false2005:
+// SIMD-ONLY0-NEXT:    [[TMP829:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2006:%.*]] = zext i16 [[TMP829]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END2007]]
+// SIMD-ONLY0:       cond.end2007:
+// SIMD-ONLY0-NEXT:    [[COND2008:%.*]] = phi i32 [ [[CONV2004]], [[COND_TRUE2003]] ], [ [[CONV2006]], [[COND_FALSE2005]] ]
+// SIMD-ONLY0-NEXT:    [[CONV2009:%.*]] = trunc i32 [[COND2008]] to i16
+// SIMD-ONLY0-NEXT:    store i16 [[CONV2009]], ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP830:%.*]] = load i16, ptr [[USE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2010:%.*]] = zext i16 [[TMP830]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP831:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2011:%.*]] = zext i16 [[TMP831]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP2012:%.*]] = icmp eq i32 [[CONV2010]], [[CONV2011]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP2012]], label [[COND_TRUE2014:%.*]], label [[COND_FALSE2016:%.*]]
+// SIMD-ONLY0:       cond.true2014:
+// SIMD-ONLY0-NEXT:    [[TMP832:%.*]] = load i16, ptr [[USD]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2015:%.*]] = zext i16 [[TMP832]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END2018:%.*]]
+// SIMD-ONLY0:       cond.false2016:
+// SIMD-ONLY0-NEXT:    [[TMP833:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2017:%.*]] = zext i16 [[TMP833]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END2018]]
+// SIMD-ONLY0:       cond.end2018:
+// SIMD-ONLY0-NEXT:    [[COND2019:%.*]] = phi i32 [ [[CONV2015]], [[COND_TRUE2014]] ], [ [[CONV2017]], [[COND_FALSE2016]] ]
+// SIMD-ONLY0-NEXT:    [[CONV2020:%.*]] = trunc i32 [[COND2019]] to i16
+// SIMD-ONLY0-NEXT:    store i16 [[CONV2020]], ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP834:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2021:%.*]] = zext i16 [[TMP834]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP835:%.*]] = load i16, ptr [[USE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2022:%.*]] = zext i16 [[TMP835]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP2023:%.*]] = icmp eq i32 [[CONV2021]], [[CONV2022]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP2023]], label [[IF_THEN2025:%.*]], label [[IF_END2026:%.*]]
+// SIMD-ONLY0:       if.then2025:
+// SIMD-ONLY0-NEXT:    [[TMP836:%.*]] = load i16, ptr [[USD]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP836]], ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    br label [[IF_END2026]]
+// SIMD-ONLY0:       if.end2026:
+// SIMD-ONLY0-NEXT:    [[TMP837:%.*]] = load i16, ptr [[USE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2027:%.*]] = zext i16 [[TMP837]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP838:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2028:%.*]] = zext i16 [[TMP838]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP2029:%.*]] = icmp eq i32 [[CONV2027]], [[CONV2028]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP2029]], label [[IF_THEN2031:%.*]], label [[IF_END2032:%.*]]
+// SIMD-ONLY0:       if.then2031:
+// SIMD-ONLY0-NEXT:    [[TMP839:%.*]] = load i16, ptr [[USD]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP839]], ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    br label [[IF_END2032]]
+// SIMD-ONLY0:       if.end2032:
+// SIMD-ONLY0-NEXT:    [[TMP840:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2033:%.*]] = sext i16 [[TMP840]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP841:%.*]] = load i16, ptr [[SE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2034:%.*]] = sext i16 [[TMP841]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP2035:%.*]] = icmp sgt i32 [[CONV2033]], [[CONV2034]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP2035]], label [[COND_TRUE2037:%.*]], label [[COND_FALSE2039:%.*]]
+// SIMD-ONLY0:       cond.true2037:
+// SIMD-ONLY0-NEXT:    [[TMP842:%.*]] = load i16, ptr [[SE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2038:%.*]] = sext i16 [[TMP842]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END2041:%.*]]
+// SIMD-ONLY0:       cond.false2039:
+// SIMD-ONLY0-NEXT:    [[TMP843:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2040:%.*]] = sext i16 [[TMP843]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END2041]]
+// SIMD-ONLY0:       cond.end2041:
+// SIMD-ONLY0-NEXT:    [[COND2042:%.*]] = phi i32 [ [[CONV2038]], [[COND_TRUE2037]] ], [ [[CONV2040]], [[COND_FALSE2039]] ]
+// SIMD-ONLY0-NEXT:    [[CONV2043:%.*]] = trunc i32 [[COND2042]] to i16
+// SIMD-ONLY0-NEXT:    store i16 [[CONV2043]], ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP844:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2044:%.*]] = sext i16 [[TMP844]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP845:%.*]] = load i16, ptr [[SE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2045:%.*]] = sext i16 [[TMP845]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP2046:%.*]] = icmp slt i32 [[CONV2044]], [[CONV2045]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP2046]], label [[COND_TRUE2048:%.*]], label [[COND_FALSE2050:%.*]]
+// SIMD-ONLY0:       cond.true2048:
+// SIMD-ONLY0-NEXT:    [[TMP846:%.*]] = load i16, ptr [[SE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2049:%.*]] = sext i16 [[TMP846]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END2052:%.*]]
+// SIMD-ONLY0:       cond.false2050:
+// SIMD-ONLY0-NEXT:    [[TMP847:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2051:%.*]] = sext i16 [[TMP847]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END2052]]
+// SIMD-ONLY0:       cond.end2052:
+// SIMD-ONLY0-NEXT:    [[COND2053:%.*]] = phi i32 [ [[CONV2049]], [[COND_TRUE2048]] ], [ [[CONV2051]], [[COND_FALSE2050]] ]
+// SIMD-ONLY0-NEXT:    [[CONV2054:%.*]] = trunc i32 [[COND2053]] to i16
+// SIMD-ONLY0-NEXT:    store i16 [[CONV2054]], ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP848:%.*]] = load i16, ptr [[SE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2055:%.*]] = sext i16 [[TMP848]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP849:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2056:%.*]] = sext i16 [[TMP849]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP2057:%.*]] = icmp sgt i32 [[CONV2055]], [[CONV2056]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP2057]], label [[COND_TRUE2059:%.*]], label [[COND_FALSE2061:%.*]]
+// SIMD-ONLY0:       cond.true2059:
+// SIMD-ONLY0-NEXT:    [[TMP850:%.*]] = load i16, ptr [[SE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2060:%.*]] = sext i16 [[TMP850]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END2063:%.*]]
+// SIMD-ONLY0:       cond.false2061:
+// SIMD-ONLY0-NEXT:    [[TMP851:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2062:%.*]] = sext i16 [[TMP851]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END2063]]
+// SIMD-ONLY0:       cond.end2063:
+// SIMD-ONLY0-NEXT:    [[COND2064:%.*]] = phi i32 [ [[CONV2060]], [[COND_TRUE2059]] ], [ [[CONV2062]], [[COND_FALSE2061]] ]
+// SIMD-ONLY0-NEXT:    [[CONV2065:%.*]] = trunc i32 [[COND2064]] to i16
+// SIMD-ONLY0-NEXT:    store i16 [[CONV2065]], ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP852:%.*]] = load i16, ptr [[SE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2066:%.*]] = sext i16 [[TMP852]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP853:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2067:%.*]] = sext i16 [[TMP853]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP2068:%.*]] = icmp slt i32 [[CONV2066]], [[CONV2067]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP2068]], label [[COND_TRUE2070:%.*]], label [[COND_FALSE2072:%.*]]
+// SIMD-ONLY0:       cond.true2070:
+// SIMD-ONLY0-NEXT:    [[TMP854:%.*]] = load i16, ptr [[SE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2071:%.*]] = sext i16 [[TMP854]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END2074:%.*]]
+// SIMD-ONLY0:       cond.false2072:
+// SIMD-ONLY0-NEXT:    [[TMP855:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2073:%.*]] = sext i16 [[TMP855]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END2074]]
+// SIMD-ONLY0:       cond.end2074:
+// SIMD-ONLY0-NEXT:    [[COND2075:%.*]] = phi i32 [ [[CONV2071]], [[COND_TRUE2070]] ], [ [[CONV2073]], [[COND_FALSE2072]] ]
+// SIMD-ONLY0-NEXT:    [[CONV2076:%.*]] = trunc i32 [[COND2075]] to i16
+// SIMD-ONLY0-NEXT:    store i16 [[CONV2076]], ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP856:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2077:%.*]] = sext i16 [[TMP856]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP857:%.*]] = load i16, ptr [[SE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2078:%.*]] = sext i16 [[TMP857]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP2079:%.*]] = icmp sgt i32 [[CONV2077]], [[CONV2078]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP2079]], label [[IF_THEN2081:%.*]], label [[IF_END2082:%.*]]
+// SIMD-ONLY0:       if.then2081:
+// SIMD-ONLY0-NEXT:    [[TMP858:%.*]] = load i16, ptr [[SE]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP858]], ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    br label [[IF_END2082]]
+// SIMD-ONLY0:       if.end2082:
+// SIMD-ONLY0-NEXT:    [[TMP859:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2083:%.*]] = sext i16 [[TMP859]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP860:%.*]] = load i16, ptr [[SE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2084:%.*]] = sext i16 [[TMP860]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP2085:%.*]] = icmp slt i32 [[CONV2083]], [[CONV2084]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP2085]], label [[IF_THEN2087:%.*]], label [[IF_END2088:%.*]]
+// SIMD-ONLY0:       if.then2087:
+// SIMD-ONLY0-NEXT:    [[TMP861:%.*]] = load i16, ptr [[SE]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP861]], ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    br label [[IF_END2088]]
+// SIMD-ONLY0:       if.end2088:
+// SIMD-ONLY0-NEXT:    [[TMP862:%.*]] = load i16, ptr [[SE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2089:%.*]] = sext i16 [[TMP862]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP863:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2090:%.*]] = sext i16 [[TMP863]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP2091:%.*]] = icmp sgt i32 [[CONV2089]], [[CONV2090]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP2091]], label [[IF_THEN2093:%.*]], label [[IF_END2094:%.*]]
+// SIMD-ONLY0:       if.then2093:
+// SIMD-ONLY0-NEXT:    [[TMP864:%.*]] = load i16, ptr [[SE]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP864]], ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    br label [[IF_END2094]]
+// SIMD-ONLY0:       if.end2094:
+// SIMD-ONLY0-NEXT:    [[TMP865:%.*]] = load i16, ptr [[SE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2095:%.*]] = sext i16 [[TMP865]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP866:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2096:%.*]] = sext i16 [[TMP866]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP2097:%.*]] = icmp slt i32 [[CONV2095]], [[CONV2096]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP2097]], label [[IF_THEN2099:%.*]], label [[IF_END2100:%.*]]
+// SIMD-ONLY0:       if.then2099:
+// SIMD-ONLY0-NEXT:    [[TMP867:%.*]] = load i16, ptr [[SE]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP867]], ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    br label [[IF_END2100]]
+// SIMD-ONLY0:       if.end2100:
+// SIMD-ONLY0-NEXT:    [[TMP868:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2101:%.*]] = sext i16 [[TMP868]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP869:%.*]] = load i16, ptr [[SE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2102:%.*]] = sext i16 [[TMP869]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP2103:%.*]] = icmp eq i32 [[CONV2101]], [[CONV2102]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP2103]], label [[COND_TRUE2105:%.*]], label [[COND_FALSE2107:%.*]]
+// SIMD-ONLY0:       cond.true2105:
+// SIMD-ONLY0-NEXT:    [[TMP870:%.*]] = load i16, ptr [[SD]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2106:%.*]] = sext i16 [[TMP870]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END2109:%.*]]
+// SIMD-ONLY0:       cond.false2107:
+// SIMD-ONLY0-NEXT:    [[TMP871:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2108:%.*]] = sext i16 [[TMP871]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END2109]]
+// SIMD-ONLY0:       cond.end2109:
+// SIMD-ONLY0-NEXT:    [[COND2110:%.*]] = phi i32 [ [[CONV2106]], [[COND_TRUE2105]] ], [ [[CONV2108]], [[COND_FALSE2107]] ]
+// SIMD-ONLY0-NEXT:    [[CONV2111:%.*]] = trunc i32 [[COND2110]] to i16
+// SIMD-ONLY0-NEXT:    store i16 [[CONV2111]], ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP872:%.*]] = load i16, ptr [[SE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2112:%.*]] = sext i16 [[TMP872]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP873:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2113:%.*]] = sext i16 [[TMP873]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP2114:%.*]] = icmp eq i32 [[CONV2112]], [[CONV2113]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP2114]], label [[COND_TRUE2116:%.*]], label [[COND_FALSE2118:%.*]]
+// SIMD-ONLY0:       cond.true2116:
+// SIMD-ONLY0-NEXT:    [[TMP874:%.*]] = load i16, ptr [[SD]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2117:%.*]] = sext i16 [[TMP874]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END2120:%.*]]
+// SIMD-ONLY0:       cond.false2118:
+// SIMD-ONLY0-NEXT:    [[TMP875:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2119:%.*]] = sext i16 [[TMP875]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END2120]]
+// SIMD-ONLY0:       cond.end2120:
+// SIMD-ONLY0-NEXT:    [[COND2121:%.*]] = phi i32 [ [[CONV2117]], [[COND_TRUE2116]] ], [ [[CONV2119]], [[COND_FALSE2118]] ]
+// SIMD-ONLY0-NEXT:    [[CONV2122:%.*]] = trunc i32 [[COND2121]] to i16
+// SIMD-ONLY0-NEXT:    store i16 [[CONV2122]], ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP876:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2123:%.*]] = sext i16 [[TMP876]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP877:%.*]] = load i16, ptr [[SE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2124:%.*]] = sext i16 [[TMP877]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP2125:%.*]] = icmp eq i32 [[CONV2123]], [[CONV2124]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP2125]], label [[IF_THEN2127:%.*]], label [[IF_END2128:%.*]]
+// SIMD-ONLY0:       if.then2127:
+// SIMD-ONLY0-NEXT:    [[TMP878:%.*]] = load i16, ptr [[SD]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP878]], ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    br label [[IF_END2128]]
+// SIMD-ONLY0:       if.end2128:
+// SIMD-ONLY0-NEXT:    [[TMP879:%.*]] = load i16, ptr [[SE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2129:%.*]] = sext i16 [[TMP879]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP880:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2130:%.*]] = sext i16 [[TMP880]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP2131:%.*]] = icmp eq i32 [[CONV2129]], [[CONV2130]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP2131]], label [[IF_THEN2133:%.*]], label [[IF_END2134:%.*]]
+// SIMD-ONLY0:       if.then2133:
+// SIMD-ONLY0-NEXT:    [[TMP881:%.*]] = load i16, ptr [[SD]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP881]], ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    br label [[IF_END2134]]
+// SIMD-ONLY0:       if.end2134:
+// SIMD-ONLY0-NEXT:    [[TMP882:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2135:%.*]] = zext i16 [[TMP882]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP883:%.*]] = load i16, ptr [[USE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2136:%.*]] = zext i16 [[TMP883]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP2137:%.*]] = icmp sgt i32 [[CONV2135]], [[CONV2136]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP2137]], label [[COND_TRUE2139:%.*]], label [[COND_FALSE2141:%.*]]
+// SIMD-ONLY0:       cond.true2139:
+// SIMD-ONLY0-NEXT:    [[TMP884:%.*]] = load i16, ptr [[USE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2140:%.*]] = zext i16 [[TMP884]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END2143:%.*]]
+// SIMD-ONLY0:       cond.false2141:
+// SIMD-ONLY0-NEXT:    [[TMP885:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2142:%.*]] = zext i16 [[TMP885]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END2143]]
+// SIMD-ONLY0:       cond.end2143:
+// SIMD-ONLY0-NEXT:    [[COND2144:%.*]] = phi i32 [ [[CONV2140]], [[COND_TRUE2139]] ], [ [[CONV2142]], [[COND_FALSE2141]] ]
+// SIMD-ONLY0-NEXT:    [[CONV2145:%.*]] = trunc i32 [[COND2144]] to i16
+// SIMD-ONLY0-NEXT:    store i16 [[CONV2145]], ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP886:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2146:%.*]] = zext i16 [[TMP886]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP887:%.*]] = load i16, ptr [[USE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2147:%.*]] = zext i16 [[TMP887]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP2148:%.*]] = icmp slt i32 [[CONV2146]], [[CONV2147]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP2148]], label [[COND_TRUE2150:%.*]], label [[COND_FALSE2152:%.*]]
+// SIMD-ONLY0:       cond.true2150:
+// SIMD-ONLY0-NEXT:    [[TMP888:%.*]] = load i16, ptr [[USE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2151:%.*]] = zext i16 [[TMP888]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END2154:%.*]]
+// SIMD-ONLY0:       cond.false2152:
+// SIMD-ONLY0-NEXT:    [[TMP889:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2153:%.*]] = zext i16 [[TMP889]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END2154]]
+// SIMD-ONLY0:       cond.end2154:
+// SIMD-ONLY0-NEXT:    [[COND2155:%.*]] = phi i32 [ [[CONV2151]], [[COND_TRUE2150]] ], [ [[CONV2153]], [[COND_FALSE2152]] ]
+// SIMD-ONLY0-NEXT:    [[CONV2156:%.*]] = trunc i32 [[COND2155]] to i16
+// SIMD-ONLY0-NEXT:    store i16 [[CONV2156]], ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP890:%.*]] = load i16, ptr [[USE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2157:%.*]] = zext i16 [[TMP890]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP891:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2158:%.*]] = zext i16 [[TMP891]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP2159:%.*]] = icmp sgt i32 [[CONV2157]], [[CONV2158]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP2159]], label [[COND_TRUE2161:%.*]], label [[COND_FALSE2163:%.*]]
+// SIMD-ONLY0:       cond.true2161:
+// SIMD-ONLY0-NEXT:    [[TMP892:%.*]] = load i16, ptr [[USE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2162:%.*]] = zext i16 [[TMP892]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END2165:%.*]]
+// SIMD-ONLY0:       cond.false2163:
+// SIMD-ONLY0-NEXT:    [[TMP893:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2164:%.*]] = zext i16 [[TMP893]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END2165]]
+// SIMD-ONLY0:       cond.end2165:
+// SIMD-ONLY0-NEXT:    [[COND2166:%.*]] = phi i32 [ [[CONV2162]], [[COND_TRUE2161]] ], [ [[CONV2164]], [[COND_FALSE2163]] ]
+// SIMD-ONLY0-NEXT:    [[CONV2167:%.*]] = trunc i32 [[COND2166]] to i16
+// SIMD-ONLY0-NEXT:    store i16 [[CONV2167]], ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP894:%.*]] = load i16, ptr [[USE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2168:%.*]] = zext i16 [[TMP894]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP895:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2169:%.*]] = zext i16 [[TMP895]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP2170:%.*]] = icmp slt i32 [[CONV2168]], [[CONV2169]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP2170]], label [[COND_TRUE2172:%.*]], label [[COND_FALSE2174:%.*]]
+// SIMD-ONLY0:       cond.true2172:
+// SIMD-ONLY0-NEXT:    [[TMP896:%.*]] = load i16, ptr [[USE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2173:%.*]] = zext i16 [[TMP896]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END2176:%.*]]
+// SIMD-ONLY0:       cond.false2174:
+// SIMD-ONLY0-NEXT:    [[TMP897:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2175:%.*]] = zext i16 [[TMP897]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END2176]]
+// SIMD-ONLY0:       cond.end2176:
+// SIMD-ONLY0-NEXT:    [[COND2177:%.*]] = phi i32 [ [[CONV2173]], [[COND_TRUE2172]] ], [ [[CONV2175]], [[COND_FALSE2174]] ]
+// SIMD-ONLY0-NEXT:    [[CONV2178:%.*]] = trunc i32 [[COND2177]] to i16
+// SIMD-ONLY0-NEXT:    store i16 [[CONV2178]], ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP898:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2179:%.*]] = zext i16 [[TMP898]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP899:%.*]] = load i16, ptr [[USE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2180:%.*]] = zext i16 [[TMP899]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP2181:%.*]] = icmp sgt i32 [[CONV2179]], [[CONV2180]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP2181]], label [[IF_THEN2183:%.*]], label [[IF_END2184:%.*]]
+// SIMD-ONLY0:       if.then2183:
+// SIMD-ONLY0-NEXT:    [[TMP900:%.*]] = load i16, ptr [[USE]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP900]], ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    br label [[IF_END2184]]
+// SIMD-ONLY0:       if.end2184:
+// SIMD-ONLY0-NEXT:    [[TMP901:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2185:%.*]] = zext i16 [[TMP901]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP902:%.*]] = load i16, ptr [[USE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2186:%.*]] = zext i16 [[TMP902]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP2187:%.*]] = icmp slt i32 [[CONV2185]], [[CONV2186]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP2187]], label [[IF_THEN2189:%.*]], label [[IF_END2190:%.*]]
+// SIMD-ONLY0:       if.then2189:
+// SIMD-ONLY0-NEXT:    [[TMP903:%.*]] = load i16, ptr [[USE]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP903]], ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    br label [[IF_END2190]]
+// SIMD-ONLY0:       if.end2190:
+// SIMD-ONLY0-NEXT:    [[TMP904:%.*]] = load i16, ptr [[USE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2191:%.*]] = zext i16 [[TMP904]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP905:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2192:%.*]] = zext i16 [[TMP905]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP2193:%.*]] = icmp sgt i32 [[CONV2191]], [[CONV2192]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP2193]], label [[IF_THEN2195:%.*]], label [[IF_END2196:%.*]]
+// SIMD-ONLY0:       if.then2195:
+// SIMD-ONLY0-NEXT:    [[TMP906:%.*]] = load i16, ptr [[USE]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP906]], ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    br label [[IF_END2196]]
+// SIMD-ONLY0:       if.end2196:
+// SIMD-ONLY0-NEXT:    [[TMP907:%.*]] = load i16, ptr [[USE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2197:%.*]] = zext i16 [[TMP907]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP908:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2198:%.*]] = zext i16 [[TMP908]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP2199:%.*]] = icmp slt i32 [[CONV2197]], [[CONV2198]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP2199]], label [[IF_THEN2201:%.*]], label [[IF_END2202:%.*]]
+// SIMD-ONLY0:       if.then2201:
+// SIMD-ONLY0-NEXT:    [[TMP909:%.*]] = load i16, ptr [[USE]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP909]], ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    br label [[IF_END2202]]
+// SIMD-ONLY0:       if.end2202:
+// SIMD-ONLY0-NEXT:    [[TMP910:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2203:%.*]] = zext i16 [[TMP910]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP911:%.*]] = load i16, ptr [[USE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2204:%.*]] = zext i16 [[TMP911]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP2205:%.*]] = icmp eq i32 [[CONV2203]], [[CONV2204]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP2205]], label [[COND_TRUE2207:%.*]], label [[COND_FALSE2209:%.*]]
+// SIMD-ONLY0:       cond.true2207:
+// SIMD-ONLY0-NEXT:    [[TMP912:%.*]] = load i16, ptr [[USD]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2208:%.*]] = zext i16 [[TMP912]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END2211:%.*]]
+// SIMD-ONLY0:       cond.false2209:
+// SIMD-ONLY0-NEXT:    [[TMP913:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2210:%.*]] = zext i16 [[TMP913]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END2211]]
+// SIMD-ONLY0:       cond.end2211:
+// SIMD-ONLY0-NEXT:    [[COND2212:%.*]] = phi i32 [ [[CONV2208]], [[COND_TRUE2207]] ], [ [[CONV2210]], [[COND_FALSE2209]] ]
+// SIMD-ONLY0-NEXT:    [[CONV2213:%.*]] = trunc i32 [[COND2212]] to i16
+// SIMD-ONLY0-NEXT:    store i16 [[CONV2213]], ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP914:%.*]] = load i16, ptr [[USE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2214:%.*]] = zext i16 [[TMP914]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP915:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2215:%.*]] = zext i16 [[TMP915]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP2216:%.*]] = icmp eq i32 [[CONV2214]], [[CONV2215]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP2216]], label [[COND_TRUE2218:%.*]], label [[COND_FALSE2220:%.*]]
+// SIMD-ONLY0:       cond.true2218:
+// SIMD-ONLY0-NEXT:    [[TMP916:%.*]] = load i16, ptr [[USD]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2219:%.*]] = zext i16 [[TMP916]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END2222:%.*]]
+// SIMD-ONLY0:       cond.false2220:
+// SIMD-ONLY0-NEXT:    [[TMP917:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2221:%.*]] = zext i16 [[TMP917]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END2222]]
+// SIMD-ONLY0:       cond.end2222:
+// SIMD-ONLY0-NEXT:    [[COND2223:%.*]] = phi i32 [ [[CONV2219]], [[COND_TRUE2218]] ], [ [[CONV2221]], [[COND_FALSE2220]] ]
+// SIMD-ONLY0-NEXT:    [[CONV2224:%.*]] = trunc i32 [[COND2223]] to i16
+// SIMD-ONLY0-NEXT:    store i16 [[CONV2224]], ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP918:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2225:%.*]] = zext i16 [[TMP918]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP919:%.*]] = load i16, ptr [[USE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2226:%.*]] = zext i16 [[TMP919]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP2227:%.*]] = icmp eq i32 [[CONV2225]], [[CONV2226]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP2227]], label [[IF_THEN2229:%.*]], label [[IF_END2230:%.*]]
+// SIMD-ONLY0:       if.then2229:
+// SIMD-ONLY0-NEXT:    [[TMP920:%.*]] = load i16, ptr [[USD]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP920]], ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    br label [[IF_END2230]]
+// SIMD-ONLY0:       if.end2230:
+// SIMD-ONLY0-NEXT:    [[TMP921:%.*]] = load i16, ptr [[USE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2231:%.*]] = zext i16 [[TMP921]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP922:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2232:%.*]] = zext i16 [[TMP922]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP2233:%.*]] = icmp eq i32 [[CONV2231]], [[CONV2232]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP2233]], label [[IF_THEN2235:%.*]], label [[IF_END2236:%.*]]
+// SIMD-ONLY0:       if.then2235:
+// SIMD-ONLY0-NEXT:    [[TMP923:%.*]] = load i16, ptr [[USD]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP923]], ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    br label [[IF_END2236]]
+// SIMD-ONLY0:       if.end2236:
+// SIMD-ONLY0-NEXT:    [[TMP924:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2237:%.*]] = sext i16 [[TMP924]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP925:%.*]] = load i16, ptr [[SE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2238:%.*]] = sext i16 [[TMP925]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP2239:%.*]] = icmp sgt i32 [[CONV2237]], [[CONV2238]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP2239]], label [[COND_TRUE2241:%.*]], label [[COND_FALSE2243:%.*]]
+// SIMD-ONLY0:       cond.true2241:
+// SIMD-ONLY0-NEXT:    [[TMP926:%.*]] = load i16, ptr [[SE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2242:%.*]] = sext i16 [[TMP926]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END2245:%.*]]
+// SIMD-ONLY0:       cond.false2243:
+// SIMD-ONLY0-NEXT:    [[TMP927:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2244:%.*]] = sext i16 [[TMP927]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END2245]]
+// SIMD-ONLY0:       cond.end2245:
+// SIMD-ONLY0-NEXT:    [[COND2246:%.*]] = phi i32 [ [[CONV2242]], [[COND_TRUE2241]] ], [ [[CONV2244]], [[COND_FALSE2243]] ]
+// SIMD-ONLY0-NEXT:    [[CONV2247:%.*]] = trunc i32 [[COND2246]] to i16
+// SIMD-ONLY0-NEXT:    store i16 [[CONV2247]], ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP928:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2248:%.*]] = sext i16 [[TMP928]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP929:%.*]] = load i16, ptr [[SE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2249:%.*]] = sext i16 [[TMP929]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP2250:%.*]] = icmp slt i32 [[CONV2248]], [[CONV2249]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP2250]], label [[COND_TRUE2252:%.*]], label [[COND_FALSE2254:%.*]]
+// SIMD-ONLY0:       cond.true2252:
+// SIMD-ONLY0-NEXT:    [[TMP930:%.*]] = load i16, ptr [[SE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2253:%.*]] = sext i16 [[TMP930]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END2256:%.*]]
+// SIMD-ONLY0:       cond.false2254:
+// SIMD-ONLY0-NEXT:    [[TMP931:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2255:%.*]] = sext i16 [[TMP931]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END2256]]
+// SIMD-ONLY0:       cond.end2256:
+// SIMD-ONLY0-NEXT:    [[COND2257:%.*]] = phi i32 [ [[CONV2253]], [[COND_TRUE2252]] ], [ [[CONV2255]], [[COND_FALSE2254]] ]
+// SIMD-ONLY0-NEXT:    [[CONV2258:%.*]] = trunc i32 [[COND2257]] to i16
+// SIMD-ONLY0-NEXT:    store i16 [[CONV2258]], ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP932:%.*]] = load i16, ptr [[SE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2259:%.*]] = sext i16 [[TMP932]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP933:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2260:%.*]] = sext i16 [[TMP933]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP2261:%.*]] = icmp sgt i32 [[CONV2259]], [[CONV2260]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP2261]], label [[COND_TRUE2263:%.*]], label [[COND_FALSE2265:%.*]]
+// SIMD-ONLY0:       cond.true2263:
+// SIMD-ONLY0-NEXT:    [[TMP934:%.*]] = load i16, ptr [[SE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2264:%.*]] = sext i16 [[TMP934]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END2267:%.*]]
+// SIMD-ONLY0:       cond.false2265:
+// SIMD-ONLY0-NEXT:    [[TMP935:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2266:%.*]] = sext i16 [[TMP935]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END2267]]
+// SIMD-ONLY0:       cond.end2267:
+// SIMD-ONLY0-NEXT:    [[COND2268:%.*]] = phi i32 [ [[CONV2264]], [[COND_TRUE2263]] ], [ [[CONV2266]], [[COND_FALSE2265]] ]
+// SIMD-ONLY0-NEXT:    [[CONV2269:%.*]] = trunc i32 [[COND2268]] to i16
+// SIMD-ONLY0-NEXT:    store i16 [[CONV2269]], ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP936:%.*]] = load i16, ptr [[SE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2270:%.*]] = sext i16 [[TMP936]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP937:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2271:%.*]] = sext i16 [[TMP937]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP2272:%.*]] = icmp slt i32 [[CONV2270]], [[CONV2271]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP2272]], label [[COND_TRUE2274:%.*]], label [[COND_FALSE2276:%.*]]
+// SIMD-ONLY0:       cond.true2274:
+// SIMD-ONLY0-NEXT:    [[TMP938:%.*]] = load i16, ptr [[SE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2275:%.*]] = sext i16 [[TMP938]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END2278:%.*]]
+// SIMD-ONLY0:       cond.false2276:
+// SIMD-ONLY0-NEXT:    [[TMP939:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2277:%.*]] = sext i16 [[TMP939]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END2278]]
+// SIMD-ONLY0:       cond.end2278:
+// SIMD-ONLY0-NEXT:    [[COND2279:%.*]] = phi i32 [ [[CONV2275]], [[COND_TRUE2274]] ], [ [[CONV2277]], [[COND_FALSE2276]] ]
+// SIMD-ONLY0-NEXT:    [[CONV2280:%.*]] = trunc i32 [[COND2279]] to i16
+// SIMD-ONLY0-NEXT:    store i16 [[CONV2280]], ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP940:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2281:%.*]] = sext i16 [[TMP940]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP941:%.*]] = load i16, ptr [[SE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2282:%.*]] = sext i16 [[TMP941]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP2283:%.*]] = icmp sgt i32 [[CONV2281]], [[CONV2282]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP2283]], label [[IF_THEN2285:%.*]], label [[IF_END2286:%.*]]
+// SIMD-ONLY0:       if.then2285:
+// SIMD-ONLY0-NEXT:    [[TMP942:%.*]] = load i16, ptr [[SE]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP942]], ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    br label [[IF_END2286]]
+// SIMD-ONLY0:       if.end2286:
+// SIMD-ONLY0-NEXT:    [[TMP943:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2287:%.*]] = sext i16 [[TMP943]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP944:%.*]] = load i16, ptr [[SE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2288:%.*]] = sext i16 [[TMP944]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP2289:%.*]] = icmp slt i32 [[CONV2287]], [[CONV2288]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP2289]], label [[IF_THEN2291:%.*]], label [[IF_END2292:%.*]]
+// SIMD-ONLY0:       if.then2291:
+// SIMD-ONLY0-NEXT:    [[TMP945:%.*]] = load i16, ptr [[SE]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP945]], ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    br label [[IF_END2292]]
+// SIMD-ONLY0:       if.end2292:
+// SIMD-ONLY0-NEXT:    [[TMP946:%.*]] = load i16, ptr [[SE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2293:%.*]] = sext i16 [[TMP946]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP947:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2294:%.*]] = sext i16 [[TMP947]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP2295:%.*]] = icmp sgt i32 [[CONV2293]], [[CONV2294]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP2295]], label [[IF_THEN2297:%.*]], label [[IF_END2298:%.*]]
+// SIMD-ONLY0:       if.then2297:
+// SIMD-ONLY0-NEXT:    [[TMP948:%.*]] = load i16, ptr [[SE]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP948]], ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    br label [[IF_END2298]]
+// SIMD-ONLY0:       if.end2298:
+// SIMD-ONLY0-NEXT:    [[TMP949:%.*]] = load i16, ptr [[SE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2299:%.*]] = sext i16 [[TMP949]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP950:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2300:%.*]] = sext i16 [[TMP950]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP2301:%.*]] = icmp slt i32 [[CONV2299]], [[CONV2300]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP2301]], label [[IF_THEN2303:%.*]], label [[IF_END2304:%.*]]
+// SIMD-ONLY0:       if.then2303:
+// SIMD-ONLY0-NEXT:    [[TMP951:%.*]] = load i16, ptr [[SE]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP951]], ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    br label [[IF_END2304]]
+// SIMD-ONLY0:       if.end2304:
+// SIMD-ONLY0-NEXT:    [[TMP952:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2305:%.*]] = sext i16 [[TMP952]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP953:%.*]] = load i16, ptr [[SE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2306:%.*]] = sext i16 [[TMP953]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP2307:%.*]] = icmp eq i32 [[CONV2305]], [[CONV2306]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP2307]], label [[COND_TRUE2309:%.*]], label [[COND_FALSE2311:%.*]]
+// SIMD-ONLY0:       cond.true2309:
+// SIMD-ONLY0-NEXT:    [[TMP954:%.*]] = load i16, ptr [[SD]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2310:%.*]] = sext i16 [[TMP954]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END2313:%.*]]
+// SIMD-ONLY0:       cond.false2311:
+// SIMD-ONLY0-NEXT:    [[TMP955:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2312:%.*]] = sext i16 [[TMP955]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END2313]]
+// SIMD-ONLY0:       cond.end2313:
+// SIMD-ONLY0-NEXT:    [[COND2314:%.*]] = phi i32 [ [[CONV2310]], [[COND_TRUE2309]] ], [ [[CONV2312]], [[COND_FALSE2311]] ]
+// SIMD-ONLY0-NEXT:    [[CONV2315:%.*]] = trunc i32 [[COND2314]] to i16
+// SIMD-ONLY0-NEXT:    store i16 [[CONV2315]], ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP956:%.*]] = load i16, ptr [[SE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2316:%.*]] = sext i16 [[TMP956]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP957:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2317:%.*]] = sext i16 [[TMP957]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP2318:%.*]] = icmp eq i32 [[CONV2316]], [[CONV2317]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP2318]], label [[COND_TRUE2320:%.*]], label [[COND_FALSE2322:%.*]]
+// SIMD-ONLY0:       cond.true2320:
+// SIMD-ONLY0-NEXT:    [[TMP958:%.*]] = load i16, ptr [[SD]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2321:%.*]] = sext i16 [[TMP958]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END2324:%.*]]
+// SIMD-ONLY0:       cond.false2322:
+// SIMD-ONLY0-NEXT:    [[TMP959:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2323:%.*]] = sext i16 [[TMP959]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END2324]]
+// SIMD-ONLY0:       cond.end2324:
+// SIMD-ONLY0-NEXT:    [[COND2325:%.*]] = phi i32 [ [[CONV2321]], [[COND_TRUE2320]] ], [ [[CONV2323]], [[COND_FALSE2322]] ]
+// SIMD-ONLY0-NEXT:    [[CONV2326:%.*]] = trunc i32 [[COND2325]] to i16
+// SIMD-ONLY0-NEXT:    store i16 [[CONV2326]], ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP960:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2327:%.*]] = sext i16 [[TMP960]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP961:%.*]] = load i16, ptr [[SE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2328:%.*]] = sext i16 [[TMP961]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP2329:%.*]] = icmp eq i32 [[CONV2327]], [[CONV2328]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP2329]], label [[IF_THEN2331:%.*]], label [[IF_END2332:%.*]]
+// SIMD-ONLY0:       if.then2331:
+// SIMD-ONLY0-NEXT:    [[TMP962:%.*]] = load i16, ptr [[SD]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP962]], ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    br label [[IF_END2332]]
+// SIMD-ONLY0:       if.end2332:
+// SIMD-ONLY0-NEXT:    [[TMP963:%.*]] = load i16, ptr [[SE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2333:%.*]] = sext i16 [[TMP963]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP964:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2334:%.*]] = sext i16 [[TMP964]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP2335:%.*]] = icmp eq i32 [[CONV2333]], [[CONV2334]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP2335]], label [[IF_THEN2337:%.*]], label [[IF_END2338:%.*]]
+// SIMD-ONLY0:       if.then2337:
+// SIMD-ONLY0-NEXT:    [[TMP965:%.*]] = load i16, ptr [[SD]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP965]], ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    br label [[IF_END2338]]
+// SIMD-ONLY0:       if.end2338:
+// SIMD-ONLY0-NEXT:    [[TMP966:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2339:%.*]] = zext i16 [[TMP966]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP967:%.*]] = load i16, ptr [[USE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2340:%.*]] = zext i16 [[TMP967]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP2341:%.*]] = icmp sgt i32 [[CONV2339]], [[CONV2340]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP2341]], label [[COND_TRUE2343:%.*]], label [[COND_FALSE2345:%.*]]
+// SIMD-ONLY0:       cond.true2343:
+// SIMD-ONLY0-NEXT:    [[TMP968:%.*]] = load i16, ptr [[USE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2344:%.*]] = zext i16 [[TMP968]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END2347:%.*]]
+// SIMD-ONLY0:       cond.false2345:
+// SIMD-ONLY0-NEXT:    [[TMP969:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2346:%.*]] = zext i16 [[TMP969]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END2347]]
+// SIMD-ONLY0:       cond.end2347:
+// SIMD-ONLY0-NEXT:    [[COND2348:%.*]] = phi i32 [ [[CONV2344]], [[COND_TRUE2343]] ], [ [[CONV2346]], [[COND_FALSE2345]] ]
+// SIMD-ONLY0-NEXT:    [[CONV2349:%.*]] = trunc i32 [[COND2348]] to i16
+// SIMD-ONLY0-NEXT:    store i16 [[CONV2349]], ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP970:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2350:%.*]] = zext i16 [[TMP970]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP971:%.*]] = load i16, ptr [[USE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2351:%.*]] = zext i16 [[TMP971]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP2352:%.*]] = icmp slt i32 [[CONV2350]], [[CONV2351]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP2352]], label [[COND_TRUE2354:%.*]], label [[COND_FALSE2356:%.*]]
+// SIMD-ONLY0:       cond.true2354:
+// SIMD-ONLY0-NEXT:    [[TMP972:%.*]] = load i16, ptr [[USE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2355:%.*]] = zext i16 [[TMP972]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END2358:%.*]]
+// SIMD-ONLY0:       cond.false2356:
+// SIMD-ONLY0-NEXT:    [[TMP973:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2357:%.*]] = zext i16 [[TMP973]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END2358]]
+// SIMD-ONLY0:       cond.end2358:
+// SIMD-ONLY0-NEXT:    [[COND2359:%.*]] = phi i32 [ [[CONV2355]], [[COND_TRUE2354]] ], [ [[CONV2357]], [[COND_FALSE2356]] ]
+// SIMD-ONLY0-NEXT:    [[CONV2360:%.*]] = trunc i32 [[COND2359]] to i16
+// SIMD-ONLY0-NEXT:    store i16 [[CONV2360]], ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP974:%.*]] = load i16, ptr [[USE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2361:%.*]] = zext i16 [[TMP974]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP975:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2362:%.*]] = zext i16 [[TMP975]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP2363:%.*]] = icmp sgt i32 [[CONV2361]], [[CONV2362]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP2363]], label [[COND_TRUE2365:%.*]], label [[COND_FALSE2367:%.*]]
+// SIMD-ONLY0:       cond.true2365:
+// SIMD-ONLY0-NEXT:    [[TMP976:%.*]] = load i16, ptr [[USE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2366:%.*]] = zext i16 [[TMP976]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END2369:%.*]]
+// SIMD-ONLY0:       cond.false2367:
+// SIMD-ONLY0-NEXT:    [[TMP977:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2368:%.*]] = zext i16 [[TMP977]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END2369]]
+// SIMD-ONLY0:       cond.end2369:
+// SIMD-ONLY0-NEXT:    [[COND2370:%.*]] = phi i32 [ [[CONV2366]], [[COND_TRUE2365]] ], [ [[CONV2368]], [[COND_FALSE2367]] ]
+// SIMD-ONLY0-NEXT:    [[CONV2371:%.*]] = trunc i32 [[COND2370]] to i16
+// SIMD-ONLY0-NEXT:    store i16 [[CONV2371]], ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP978:%.*]] = load i16, ptr [[USE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2372:%.*]] = zext i16 [[TMP978]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP979:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2373:%.*]] = zext i16 [[TMP979]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP2374:%.*]] = icmp slt i32 [[CONV2372]], [[CONV2373]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP2374]], label [[COND_TRUE2376:%.*]], label [[COND_FALSE2378:%.*]]
+// SIMD-ONLY0:       cond.true2376:
+// SIMD-ONLY0-NEXT:    [[TMP980:%.*]] = load i16, ptr [[USE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2377:%.*]] = zext i16 [[TMP980]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END2380:%.*]]
+// SIMD-ONLY0:       cond.false2378:
+// SIMD-ONLY0-NEXT:    [[TMP981:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2379:%.*]] = zext i16 [[TMP981]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END2380]]
+// SIMD-ONLY0:       cond.end2380:
+// SIMD-ONLY0-NEXT:    [[COND2381:%.*]] = phi i32 [ [[CONV2377]], [[COND_TRUE2376]] ], [ [[CONV2379]], [[COND_FALSE2378]] ]
+// SIMD-ONLY0-NEXT:    [[CONV2382:%.*]] = trunc i32 [[COND2381]] to i16
+// SIMD-ONLY0-NEXT:    store i16 [[CONV2382]], ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP982:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2383:%.*]] = zext i16 [[TMP982]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP983:%.*]] = load i16, ptr [[USE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2384:%.*]] = zext i16 [[TMP983]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP2385:%.*]] = icmp sgt i32 [[CONV2383]], [[CONV2384]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP2385]], label [[IF_THEN2387:%.*]], label [[IF_END2388:%.*]]
+// SIMD-ONLY0:       if.then2387:
+// SIMD-ONLY0-NEXT:    [[TMP984:%.*]] = load i16, ptr [[USE]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP984]], ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    br label [[IF_END2388]]
+// SIMD-ONLY0:       if.end2388:
+// SIMD-ONLY0-NEXT:    [[TMP985:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2389:%.*]] = zext i16 [[TMP985]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP986:%.*]] = load i16, ptr [[USE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2390:%.*]] = zext i16 [[TMP986]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP2391:%.*]] = icmp slt i32 [[CONV2389]], [[CONV2390]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP2391]], label [[IF_THEN2393:%.*]], label [[IF_END2394:%.*]]
+// SIMD-ONLY0:       if.then2393:
+// SIMD-ONLY0-NEXT:    [[TMP987:%.*]] = load i16, ptr [[USE]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP987]], ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    br label [[IF_END2394]]
+// SIMD-ONLY0:       if.end2394:
+// SIMD-ONLY0-NEXT:    [[TMP988:%.*]] = load i16, ptr [[USE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2395:%.*]] = zext i16 [[TMP988]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP989:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2396:%.*]] = zext i16 [[TMP989]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP2397:%.*]] = icmp sgt i32 [[CONV2395]], [[CONV2396]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP2397]], label [[IF_THEN2399:%.*]], label [[IF_END2400:%.*]]
+// SIMD-ONLY0:       if.then2399:
+// SIMD-ONLY0-NEXT:    [[TMP990:%.*]] = load i16, ptr [[USE]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP990]], ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    br label [[IF_END2400]]
+// SIMD-ONLY0:       if.end2400:
+// SIMD-ONLY0-NEXT:    [[TMP991:%.*]] = load i16, ptr [[USE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2401:%.*]] = zext i16 [[TMP991]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP992:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2402:%.*]] = zext i16 [[TMP992]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP2403:%.*]] = icmp slt i32 [[CONV2401]], [[CONV2402]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP2403]], label [[IF_THEN2405:%.*]], label [[IF_END2406:%.*]]
+// SIMD-ONLY0:       if.then2405:
+// SIMD-ONLY0-NEXT:    [[TMP993:%.*]] = load i16, ptr [[USE]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP993]], ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    br label [[IF_END2406]]
+// SIMD-ONLY0:       if.end2406:
+// SIMD-ONLY0-NEXT:    [[TMP994:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2407:%.*]] = zext i16 [[TMP994]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP995:%.*]] = load i16, ptr [[USE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2408:%.*]] = zext i16 [[TMP995]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP2409:%.*]] = icmp eq i32 [[CONV2407]], [[CONV2408]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP2409]], label [[COND_TRUE2411:%.*]], label [[COND_FALSE2413:%.*]]
+// SIMD-ONLY0:       cond.true2411:
+// SIMD-ONLY0-NEXT:    [[TMP996:%.*]] = load i16, ptr [[USD]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2412:%.*]] = zext i16 [[TMP996]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END2415:%.*]]
+// SIMD-ONLY0:       cond.false2413:
+// SIMD-ONLY0-NEXT:    [[TMP997:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2414:%.*]] = zext i16 [[TMP997]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END2415]]
+// SIMD-ONLY0:       cond.end2415:
+// SIMD-ONLY0-NEXT:    [[COND2416:%.*]] = phi i32 [ [[CONV2412]], [[COND_TRUE2411]] ], [ [[CONV2414]], [[COND_FALSE2413]] ]
+// SIMD-ONLY0-NEXT:    [[CONV2417:%.*]] = trunc i32 [[COND2416]] to i16
+// SIMD-ONLY0-NEXT:    store i16 [[CONV2417]], ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP998:%.*]] = load i16, ptr [[USE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2418:%.*]] = zext i16 [[TMP998]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP999:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2419:%.*]] = zext i16 [[TMP999]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP2420:%.*]] = icmp eq i32 [[CONV2418]], [[CONV2419]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP2420]], label [[COND_TRUE2422:%.*]], label [[COND_FALSE2424:%.*]]
+// SIMD-ONLY0:       cond.true2422:
+// SIMD-ONLY0-NEXT:    [[TMP1000:%.*]] = load i16, ptr [[USD]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2423:%.*]] = zext i16 [[TMP1000]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END2426:%.*]]
+// SIMD-ONLY0:       cond.false2424:
+// SIMD-ONLY0-NEXT:    [[TMP1001:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2425:%.*]] = zext i16 [[TMP1001]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END2426]]
+// SIMD-ONLY0:       cond.end2426:
+// SIMD-ONLY0-NEXT:    [[COND2427:%.*]] = phi i32 [ [[CONV2423]], [[COND_TRUE2422]] ], [ [[CONV2425]], [[COND_FALSE2424]] ]
+// SIMD-ONLY0-NEXT:    [[CONV2428:%.*]] = trunc i32 [[COND2427]] to i16
+// SIMD-ONLY0-NEXT:    store i16 [[CONV2428]], ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP1002:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2429:%.*]] = zext i16 [[TMP1002]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP1003:%.*]] = load i16, ptr [[USE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2430:%.*]] = zext i16 [[TMP1003]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP2431:%.*]] = icmp eq i32 [[CONV2429]], [[CONV2430]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP2431]], label [[IF_THEN2433:%.*]], label [[IF_END2434:%.*]]
+// SIMD-ONLY0:       if.then2433:
+// SIMD-ONLY0-NEXT:    [[TMP1004:%.*]] = load i16, ptr [[USD]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP1004]], ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    br label [[IF_END2434]]
+// SIMD-ONLY0:       if.end2434:
+// SIMD-ONLY0-NEXT:    [[TMP1005:%.*]] = load i16, ptr [[USE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2435:%.*]] = zext i16 [[TMP1005]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP1006:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2436:%.*]] = zext i16 [[TMP1006]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP2437:%.*]] = icmp eq i32 [[CONV2435]], [[CONV2436]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP2437]], label [[IF_THEN2439:%.*]], label [[IF_END2440:%.*]]
+// SIMD-ONLY0:       if.then2439:
+// SIMD-ONLY0-NEXT:    [[TMP1007:%.*]] = load i16, ptr [[USD]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP1007]], ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    br label [[IF_END2440]]
+// SIMD-ONLY0:       if.end2440:
+// SIMD-ONLY0-NEXT:    [[TMP1008:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP1009:%.*]] = load i32, ptr [[IE]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP2441:%.*]] = icmp sgt i32 [[TMP1008]], [[TMP1009]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP2441]], label [[COND_TRUE2443:%.*]], label [[COND_FALSE2444:%.*]]
+// SIMD-ONLY0:       cond.true2443:
+// SIMD-ONLY0-NEXT:    [[TMP1010:%.*]] = load i32, ptr [[IE]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END2445:%.*]]
+// SIMD-ONLY0:       cond.false2444:
+// SIMD-ONLY0-NEXT:    [[TMP1011:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END2445]]
+// SIMD-ONLY0:       cond.end2445:
+// SIMD-ONLY0-NEXT:    [[COND2446:%.*]] = phi i32 [ [[TMP1010]], [[COND_TRUE2443]] ], [ [[TMP1011]], [[COND_FALSE2444]] ]
+// SIMD-ONLY0-NEXT:    store i32 [[COND2446]], ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP1012:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP1013:%.*]] = load i32, ptr [[IE]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP2447:%.*]] = icmp slt i32 [[TMP1012]], [[TMP1013]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP2447]], label [[COND_TRUE2449:%.*]], label [[COND_FALSE2450:%.*]]
+// SIMD-ONLY0:       cond.true2449:
+// SIMD-ONLY0-NEXT:    [[TMP1014:%.*]] = load i32, ptr [[IE]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END2451:%.*]]
+// SIMD-ONLY0:       cond.false2450:
+// SIMD-ONLY0-NEXT:    [[TMP1015:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END2451]]
+// SIMD-ONLY0:       cond.end2451:
+// SIMD-ONLY0-NEXT:    [[COND2452:%.*]] = phi i32 [ [[TMP1014]], [[COND_TRUE2449]] ], [ [[TMP1015]], [[COND_FALSE2450]] ]
+// SIMD-ONLY0-NEXT:    store i32 [[COND2452]], ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP1016:%.*]] = load i32, ptr [[IE]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP1017:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP2453:%.*]] = icmp sgt i32 [[TMP1016]], [[TMP1017]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP2453]], label [[COND_TRUE2455:%.*]], label [[COND_FALSE2456:%.*]]
+// SIMD-ONLY0:       cond.true2455:
+// SIMD-ONLY0-NEXT:    [[TMP1018:%.*]] = load i32, ptr [[IE]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END2457:%.*]]
+// SIMD-ONLY0:       cond.false2456:
+// SIMD-ONLY0-NEXT:    [[TMP1019:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END2457]]
+// SIMD-ONLY0:       cond.end2457:
+// SIMD-ONLY0-NEXT:    [[COND2458:%.*]] = phi i32 [ [[TMP1018]], [[COND_TRUE2455]] ], [ [[TMP1019]], [[COND_FALSE2456]] ]
+// SIMD-ONLY0-NEXT:    store i32 [[COND2458]], ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP1020:%.*]] = load i32, ptr [[IE]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP1021:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP2459:%.*]] = icmp slt i32 [[TMP1020]], [[TMP1021]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP2459]], label [[COND_TRUE2461:%.*]], label [[COND_FALSE2462:%.*]]
+// SIMD-ONLY0:       cond.true2461:
+// SIMD-ONLY0-NEXT:    [[TMP1022:%.*]] = load i32, ptr [[IE]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END2463:%.*]]
+// SIMD-ONLY0:       cond.false2462:
+// SIMD-ONLY0-NEXT:    [[TMP1023:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END2463]]
+// SIMD-ONLY0:       cond.end2463:
+// SIMD-ONLY0-NEXT:    [[COND2464:%.*]] = phi i32 [ [[TMP1022]], [[COND_TRUE2461]] ], [ [[TMP1023]], [[COND_FALSE2462]] ]
+// SIMD-ONLY0-NEXT:    store i32 [[COND2464]], ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP1024:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP1025:%.*]] = load i32, ptr [[IE]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP2465:%.*]] = icmp sgt i32 [[TMP1024]], [[TMP1025]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP2465]], label [[IF_THEN2467:%.*]], label [[IF_END2468:%.*]]
+// SIMD-ONLY0:       if.then2467:
+// SIMD-ONLY0-NEXT:    [[TMP1026:%.*]] = load i32, ptr [[IE]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP1026]], ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END2468]]
+// SIMD-ONLY0:       if.end2468:
+// SIMD-ONLY0-NEXT:    [[TMP1027:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP1028:%.*]] = load i32, ptr [[IE]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP2469:%.*]] = icmp slt i32 [[TMP1027]], [[TMP1028]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP2469]], label [[IF_THEN2471:%.*]], label [[IF_END2472:%.*]]
+// SIMD-ONLY0:       if.then2471:
+// SIMD-ONLY0-NEXT:    [[TMP1029:%.*]] = load i32, ptr [[IE]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP1029]], ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END2472]]
+// SIMD-ONLY0:       if.end2472:
+// SIMD-ONLY0-NEXT:    [[TMP1030:%.*]] = load i32, ptr [[IE]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP1031:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP2473:%.*]] = icmp sgt i32 [[TMP1030]], [[TMP1031]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP2473]], label [[IF_THEN2475:%.*]], label [[IF_END2476:%.*]]
+// SIMD-ONLY0:       if.then2475:
+// SIMD-ONLY0-NEXT:    [[TMP1032:%.*]] = load i32, ptr [[IE]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP1032]], ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END2476]]
+// SIMD-ONLY0:       if.end2476:
+// SIMD-ONLY0-NEXT:    [[TMP1033:%.*]] = load i32, ptr [[IE]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP1034:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP2477:%.*]] = icmp slt i32 [[TMP1033]], [[TMP1034]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP2477]], label [[IF_THEN2479:%.*]], label [[IF_END2480:%.*]]
+// SIMD-ONLY0:       if.then2479:
+// SIMD-ONLY0-NEXT:    [[TMP1035:%.*]] = load i32, ptr [[IE]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP1035]], ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END2480]]
+// SIMD-ONLY0:       if.end2480:
+// SIMD-ONLY0-NEXT:    [[TMP1036:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP1037:%.*]] = load i32, ptr [[IE]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP2481:%.*]] = icmp eq i32 [[TMP1036]], [[TMP1037]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP2481]], label [[COND_TRUE2483:%.*]], label [[COND_FALSE2484:%.*]]
+// SIMD-ONLY0:       cond.true2483:
+// SIMD-ONLY0-NEXT:    [[TMP1038:%.*]] = load i32, ptr [[ID]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END2485:%.*]]
+// SIMD-ONLY0:       cond.false2484:
+// SIMD-ONLY0-NEXT:    [[TMP1039:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END2485]]
+// SIMD-ONLY0:       cond.end2485:
+// SIMD-ONLY0-NEXT:    [[COND2486:%.*]] = phi i32 [ [[TMP1038]], [[COND_TRUE2483]] ], [ [[TMP1039]], [[COND_FALSE2484]] ]
+// SIMD-ONLY0-NEXT:    store i32 [[COND2486]], ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP1040:%.*]] = load i32, ptr [[IE]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP1041:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP2487:%.*]] = icmp eq i32 [[TMP1040]], [[TMP1041]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP2487]], label [[COND_TRUE2489:%.*]], label [[COND_FALSE2490:%.*]]
+// SIMD-ONLY0:       cond.true2489:
+// SIMD-ONLY0-NEXT:    [[TMP1042:%.*]] = load i32, ptr [[ID]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END2491:%.*]]
+// SIMD-ONLY0:       cond.false2490:
+// SIMD-ONLY0-NEXT:    [[TMP1043:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END2491]]
+// SIMD-ONLY0:       cond.end2491:
+// SIMD-ONLY0-NEXT:    [[COND2492:%.*]] = phi i32 [ [[TMP1042]], [[COND_TRUE2489]] ], [ [[TMP1043]], [[COND_FALSE2490]] ]
+// SIMD-ONLY0-NEXT:    store i32 [[COND2492]], ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP1044:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP1045:%.*]] = load i32, ptr [[IE]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP2493:%.*]] = icmp eq i32 [[TMP1044]], [[TMP1045]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP2493]], label [[IF_THEN2495:%.*]], label [[IF_END2496:%.*]]
+// SIMD-ONLY0:       if.then2495:
+// SIMD-ONLY0-NEXT:    [[TMP1046:%.*]] = load i32, ptr [[ID]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP1046]], ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END2496]]
+// SIMD-ONLY0:       if.end2496:
+// SIMD-ONLY0-NEXT:    [[TMP1047:%.*]] = load i32, ptr [[IE]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP1048:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP2497:%.*]] = icmp eq i32 [[TMP1047]], [[TMP1048]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP2497]], label [[IF_THEN2499:%.*]], label [[IF_END2500:%.*]]
+// SIMD-ONLY0:       if.then2499:
+// SIMD-ONLY0-NEXT:    [[TMP1049:%.*]] = load i32, ptr [[ID]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP1049]], ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END2500]]
+// SIMD-ONLY0:       if.end2500:
+// SIMD-ONLY0-NEXT:    [[TMP1050:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP1051:%.*]] = load i32, ptr [[UIE]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP2501:%.*]] = icmp ugt i32 [[TMP1050]], [[TMP1051]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP2501]], label [[COND_TRUE2503:%.*]], label [[COND_FALSE2504:%.*]]
+// SIMD-ONLY0:       cond.true2503:
+// SIMD-ONLY0-NEXT:    [[TMP1052:%.*]] = load i32, ptr [[UIE]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END2505:%.*]]
+// SIMD-ONLY0:       cond.false2504:
+// SIMD-ONLY0-NEXT:    [[TMP1053:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END2505]]
+// SIMD-ONLY0:       cond.end2505:
+// SIMD-ONLY0-NEXT:    [[COND2506:%.*]] = phi i32 [ [[TMP1052]], [[COND_TRUE2503]] ], [ [[TMP1053]], [[COND_FALSE2504]] ]
+// SIMD-ONLY0-NEXT:    store i32 [[COND2506]], ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP1054:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP1055:%.*]] = load i32, ptr [[UIE]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP2507:%.*]] = icmp ult i32 [[TMP1054]], [[TMP1055]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP2507]], label [[COND_TRUE2509:%.*]], label [[COND_FALSE2510:%.*]]
+// SIMD-ONLY0:       cond.true2509:
+// SIMD-ONLY0-NEXT:    [[TMP1056:%.*]] = load i32, ptr [[UIE]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END2511:%.*]]
+// SIMD-ONLY0:       cond.false2510:
+// SIMD-ONLY0-NEXT:    [[TMP1057:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END2511]]
+// SIMD-ONLY0:       cond.end2511:
+// SIMD-ONLY0-NEXT:    [[COND2512:%.*]] = phi i32 [ [[TMP1056]], [[COND_TRUE2509]] ], [ [[TMP1057]], [[COND_FALSE2510]] ]
+// SIMD-ONLY0-NEXT:    store i32 [[COND2512]], ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP1058:%.*]] = load i32, ptr [[UIE]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP1059:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP2513:%.*]] = icmp ugt i32 [[TMP1058]], [[TMP1059]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP2513]], label [[COND_TRUE2515:%.*]], label [[COND_FALSE2516:%.*]]
+// SIMD-ONLY0:       cond.true2515:
+// SIMD-ONLY0-NEXT:    [[TMP1060:%.*]] = load i32, ptr [[UIE]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END2517:%.*]]
+// SIMD-ONLY0:       cond.false2516:
+// SIMD-ONLY0-NEXT:    [[TMP1061:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END2517]]
+// SIMD-ONLY0:       cond.end2517:
+// SIMD-ONLY0-NEXT:    [[COND2518:%.*]] = phi i32 [ [[TMP1060]], [[COND_TRUE2515]] ], [ [[TMP1061]], [[COND_FALSE2516]] ]
+// SIMD-ONLY0-NEXT:    store i32 [[COND2518]], ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP1062:%.*]] = load i32, ptr [[UIE]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP1063:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP2519:%.*]] = icmp ult i32 [[TMP1062]], [[TMP1063]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP2519]], label [[COND_TRUE2521:%.*]], label [[COND_FALSE2522:%.*]]
+// SIMD-ONLY0:       cond.true2521:
+// SIMD-ONLY0-NEXT:    [[TMP1064:%.*]] = load i32, ptr [[UIE]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END2523:%.*]]
+// SIMD-ONLY0:       cond.false2522:
+// SIMD-ONLY0-NEXT:    [[TMP1065:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END2523]]
+// SIMD-ONLY0:       cond.end2523:
+// SIMD-ONLY0-NEXT:    [[COND2524:%.*]] = phi i32 [ [[TMP1064]], [[COND_TRUE2521]] ], [ [[TMP1065]], [[COND_FALSE2522]] ]
+// SIMD-ONLY0-NEXT:    store i32 [[COND2524]], ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP1066:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP1067:%.*]] = load i32, ptr [[UIE]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP2525:%.*]] = icmp ugt i32 [[TMP1066]], [[TMP1067]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP2525]], label [[IF_THEN2527:%.*]], label [[IF_END2528:%.*]]
+// SIMD-ONLY0:       if.then2527:
+// SIMD-ONLY0-NEXT:    [[TMP1068:%.*]] = load i32, ptr [[UIE]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP1068]], ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END2528]]
+// SIMD-ONLY0:       if.end2528:
+// SIMD-ONLY0-NEXT:    [[TMP1069:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP1070:%.*]] = load i32, ptr [[UIE]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP2529:%.*]] = icmp ult i32 [[TMP1069]], [[TMP1070]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP2529]], label [[IF_THEN2531:%.*]], label [[IF_END2532:%.*]]
+// SIMD-ONLY0:       if.then2531:
+// SIMD-ONLY0-NEXT:    [[TMP1071:%.*]] = load i32, ptr [[UIE]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP1071]], ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END2532]]
+// SIMD-ONLY0:       if.end2532:
+// SIMD-ONLY0-NEXT:    [[TMP1072:%.*]] = load i32, ptr [[UIE]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP1073:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP2533:%.*]] = icmp ugt i32 [[TMP1072]], [[TMP1073]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP2533]], label [[IF_THEN2535:%.*]], label [[IF_END2536:%.*]]
+// SIMD-ONLY0:       if.then2535:
+// SIMD-ONLY0-NEXT:    [[TMP1074:%.*]] = load i32, ptr [[UIE]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP1074]], ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END2536]]
+// SIMD-ONLY0:       if.end2536:
+// SIMD-ONLY0-NEXT:    [[TMP1075:%.*]] = load i32, ptr [[UIE]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP1076:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP2537:%.*]] = icmp ult i32 [[TMP1075]], [[TMP1076]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP2537]], label [[IF_THEN2539:%.*]], label [[IF_END2540:%.*]]
+// SIMD-ONLY0:       if.then2539:
+// SIMD-ONLY0-NEXT:    [[TMP1077:%.*]] = load i32, ptr [[UIE]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP1077]], ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END2540]]
+// SIMD-ONLY0:       if.end2540:
+// SIMD-ONLY0-NEXT:    [[TMP1078:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP1079:%.*]] = load i32, ptr [[UIE]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP2541:%.*]] = icmp eq i32 [[TMP1078]], [[TMP1079]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP2541]], label [[COND_TRUE2543:%.*]], label [[COND_FALSE2544:%.*]]
+// SIMD-ONLY0:       cond.true2543:
+// SIMD-ONLY0-NEXT:    [[TMP1080:%.*]] = load i32, ptr [[UID]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END2545:%.*]]
+// SIMD-ONLY0:       cond.false2544:
+// SIMD-ONLY0-NEXT:    [[TMP1081:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END2545]]
+// SIMD-ONLY0:       cond.end2545:
+// SIMD-ONLY0-NEXT:    [[COND2546:%.*]] = phi i32 [ [[TMP1080]], [[COND_TRUE2543]] ], [ [[TMP1081]], [[COND_FALSE2544]] ]
+// SIMD-ONLY0-NEXT:    store i32 [[COND2546]], ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP1082:%.*]] = load i32, ptr [[UIE]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP1083:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP2547:%.*]] = icmp eq i32 [[TMP1082]], [[TMP1083]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP2547]], label [[COND_TRUE2549:%.*]], label [[COND_FALSE2550:%.*]]
+// SIMD-ONLY0:       cond.true2549:
+// SIMD-ONLY0-NEXT:    [[TMP1084:%.*]] = load i32, ptr [[UID]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END2551:%.*]]
+// SIMD-ONLY0:       cond.false2550:
+// SIMD-ONLY0-NEXT:    [[TMP1085:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END2551]]
+// SIMD-ONLY0:       cond.end2551:
+// SIMD-ONLY0-NEXT:    [[COND2552:%.*]] = phi i32 [ [[TMP1084]], [[COND_TRUE2549]] ], [ [[TMP1085]], [[COND_FALSE2550]] ]
+// SIMD-ONLY0-NEXT:    store i32 [[COND2552]], ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP1086:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP1087:%.*]] = load i32, ptr [[UIE]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP2553:%.*]] = icmp eq i32 [[TMP1086]], [[TMP1087]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP2553]], label [[IF_THEN2555:%.*]], label [[IF_END2556:%.*]]
+// SIMD-ONLY0:       if.then2555:
+// SIMD-ONLY0-NEXT:    [[TMP1088:%.*]] = load i32, ptr [[UID]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP1088]], ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END2556]]
+// SIMD-ONLY0:       if.end2556:
+// SIMD-ONLY0-NEXT:    [[TMP1089:%.*]] = load i32, ptr [[UIE]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP1090:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP2557:%.*]] = icmp eq i32 [[TMP1089]], [[TMP1090]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP2557]], label [[IF_THEN2559:%.*]], label [[IF_END2560:%.*]]
+// SIMD-ONLY0:       if.then2559:
+// SIMD-ONLY0-NEXT:    [[TMP1091:%.*]] = load i32, ptr [[UID]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP1091]], ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END2560]]
+// SIMD-ONLY0:       if.end2560:
+// SIMD-ONLY0-NEXT:    [[TMP1092:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP1093:%.*]] = load i32, ptr [[IE]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP2561:%.*]] = icmp sgt i32 [[TMP1092]], [[TMP1093]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP2561]], label [[COND_TRUE2563:%.*]], label [[COND_FALSE2564:%.*]]
+// SIMD-ONLY0:       cond.true2563:
+// SIMD-ONLY0-NEXT:    [[TMP1094:%.*]] = load i32, ptr [[IE]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END2565:%.*]]
+// SIMD-ONLY0:       cond.false2564:
+// SIMD-ONLY0-NEXT:    [[TMP1095:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END2565]]
+// SIMD-ONLY0:       cond.end2565:
+// SIMD-ONLY0-NEXT:    [[COND2566:%.*]] = phi i32 [ [[TMP1094]], [[COND_TRUE2563]] ], [ [[TMP1095]], [[COND_FALSE2564]] ]
+// SIMD-ONLY0-NEXT:    store i32 [[COND2566]], ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP1096:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP1097:%.*]] = load i32, ptr [[IE]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP2567:%.*]] = icmp slt i32 [[TMP1096]], [[TMP1097]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP2567]], label [[COND_TRUE2569:%.*]], label [[COND_FALSE2570:%.*]]
+// SIMD-ONLY0:       cond.true2569:
+// SIMD-ONLY0-NEXT:    [[TMP1098:%.*]] = load i32, ptr [[IE]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END2571:%.*]]
+// SIMD-ONLY0:       cond.false2570:
+// SIMD-ONLY0-NEXT:    [[TMP1099:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END2571]]
+// SIMD-ONLY0:       cond.end2571:
+// SIMD-ONLY0-NEXT:    [[COND2572:%.*]] = phi i32 [ [[TMP1098]], [[COND_TRUE2569]] ], [ [[TMP1099]], [[COND_FALSE2570]] ]
+// SIMD-ONLY0-NEXT:    store i32 [[COND2572]], ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP1100:%.*]] = load i32, ptr [[IE]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP1101:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP2573:%.*]] = icmp sgt i32 [[TMP1100]], [[TMP1101]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP2573]], label [[COND_TRUE2575:%.*]], label [[COND_FALSE2576:%.*]]
+// SIMD-ONLY0:       cond.true2575:
+// SIMD-ONLY0-NEXT:    [[TMP1102:%.*]] = load i32, ptr [[IE]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END2577:%.*]]
+// SIMD-ONLY0:       cond.false2576:
+// SIMD-ONLY0-NEXT:    [[TMP1103:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END2577]]
+// SIMD-ONLY0:       cond.end2577:
+// SIMD-ONLY0-NEXT:    [[COND2578:%.*]] = phi i32 [ [[TMP1102]], [[COND_TRUE2575]] ], [ [[TMP1103]], [[COND_FALSE2576]] ]
+// SIMD-ONLY0-NEXT:    store i32 [[COND2578]], ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP1104:%.*]] = load i32, ptr [[IE]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP1105:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP2579:%.*]] = icmp slt i32 [[TMP1104]], [[TMP1105]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP2579]], label [[COND_TRUE2581:%.*]], label [[COND_FALSE2582:%.*]]
+// SIMD-ONLY0:       cond.true2581:
+// SIMD-ONLY0-NEXT:    [[TMP1106:%.*]] = load i32, ptr [[IE]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END2583:%.*]]
+// SIMD-ONLY0:       cond.false2582:
+// SIMD-ONLY0-NEXT:    [[TMP1107:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END2583]]
+// SIMD-ONLY0:       cond.end2583:
+// SIMD-ONLY0-NEXT:    [[COND2584:%.*]] = phi i32 [ [[TMP1106]], [[COND_TRUE2581]] ], [ [[TMP1107]], [[COND_FALSE2582]] ]
+// SIMD-ONLY0-NEXT:    store i32 [[COND2584]], ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP1108:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP1109:%.*]] = load i32, ptr [[IE]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP2585:%.*]] = icmp sgt i32 [[TMP1108]], [[TMP1109]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP2585]], label [[IF_THEN2587:%.*]], label [[IF_END2588:%.*]]
+// SIMD-ONLY0:       if.then2587:
+// SIMD-ONLY0-NEXT:    [[TMP1110:%.*]] = load i32, ptr [[IE]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP1110]], ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END2588]]
+// SIMD-ONLY0:       if.end2588:
+// SIMD-ONLY0-NEXT:    [[TMP1111:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP1112:%.*]] = load i32, ptr [[IE]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP2589:%.*]] = icmp slt i32 [[TMP1111]], [[TMP1112]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP2589]], label [[IF_THEN2591:%.*]], label [[IF_END2592:%.*]]
+// SIMD-ONLY0:       if.then2591:
+// SIMD-ONLY0-NEXT:    [[TMP1113:%.*]] = load i32, ptr [[IE]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP1113]], ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END2592]]
+// SIMD-ONLY0:       if.end2592:
+// SIMD-ONLY0-NEXT:    [[TMP1114:%.*]] = load i32, ptr [[IE]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP1115:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP2593:%.*]] = icmp sgt i32 [[TMP1114]], [[TMP1115]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP2593]], label [[IF_THEN2595:%.*]], label [[IF_END2596:%.*]]
+// SIMD-ONLY0:       if.then2595:
+// SIMD-ONLY0-NEXT:    [[TMP1116:%.*]] = load i32, ptr [[IE]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP1116]], ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END2596]]
+// SIMD-ONLY0:       if.end2596:
+// SIMD-ONLY0-NEXT:    [[TMP1117:%.*]] = load i32, ptr [[IE]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP1118:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP2597:%.*]] = icmp slt i32 [[TMP1117]], [[TMP1118]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP2597]], label [[IF_THEN2599:%.*]], label [[IF_END2600:%.*]]
+// SIMD-ONLY0:       if.then2599:
+// SIMD-ONLY0-NEXT:    [[TMP1119:%.*]] = load i32, ptr [[IE]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP1119]], ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END2600]]
+// SIMD-ONLY0:       if.end2600:
+// SIMD-ONLY0-NEXT:    [[TMP1120:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP1121:%.*]] = load i32, ptr [[IE]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP2601:%.*]] = icmp eq i32 [[TMP1120]], [[TMP1121]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP2601]], label [[COND_TRUE2603:%.*]], label [[COND_FALSE2604:%.*]]
+// SIMD-ONLY0:       cond.true2603:
+// SIMD-ONLY0-NEXT:    [[TMP1122:%.*]] = load i32, ptr [[ID]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END2605:%.*]]
+// SIMD-ONLY0:       cond.false2604:
+// SIMD-ONLY0-NEXT:    [[TMP1123:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END2605]]
+// SIMD-ONLY0:       cond.end2605:
+// SIMD-ONLY0-NEXT:    [[COND2606:%.*]] = phi i32 [ [[TMP1122]], [[COND_TRUE2603]] ], [ [[TMP1123]], [[COND_FALSE2604]] ]
+// SIMD-ONLY0-NEXT:    store i32 [[COND2606]], ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP1124:%.*]] = load i32, ptr [[IE]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP1125:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP2607:%.*]] = icmp eq i32 [[TMP1124]], [[TMP1125]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP2607]], label [[COND_TRUE2609:%.*]], label [[COND_FALSE2610:%.*]]
+// SIMD-ONLY0:       cond.true2609:
+// SIMD-ONLY0-NEXT:    [[TMP1126:%.*]] = load i32, ptr [[ID]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END2611:%.*]]
+// SIMD-ONLY0:       cond.false2610:
+// SIMD-ONLY0-NEXT:    [[TMP1127:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END2611]]
+// SIMD-ONLY0:       cond.end2611:
+// SIMD-ONLY0-NEXT:    [[COND2612:%.*]] = phi i32 [ [[TMP1126]], [[COND_TRUE2609]] ], [ [[TMP1127]], [[COND_FALSE2610]] ]
+// SIMD-ONLY0-NEXT:    store i32 [[COND2612]], ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP1128:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP1129:%.*]] = load i32, ptr [[IE]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP2613:%.*]] = icmp eq i32 [[TMP1128]], [[TMP1129]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP2613]], label [[IF_THEN2615:%.*]], label [[IF_END2616:%.*]]
+// SIMD-ONLY0:       if.then2615:
+// SIMD-ONLY0-NEXT:    [[TMP1130:%.*]] = load i32, ptr [[ID]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP1130]], ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END2616]]
+// SIMD-ONLY0:       if.end2616:
+// SIMD-ONLY0-NEXT:    [[TMP1131:%.*]] = load i32, ptr [[IE]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP1132:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP2617:%.*]] = icmp eq i32 [[TMP1131]], [[TMP1132]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP2617]], label [[IF_THEN2619:%.*]], label [[IF_END2620:%.*]]
+// SIMD-ONLY0:       if.then2619:
+// SIMD-ONLY0-NEXT:    [[TMP1133:%.*]] = load i32, ptr [[ID]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP1133]], ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END2620]]
+// SIMD-ONLY0:       if.end2620:
+// SIMD-ONLY0-NEXT:    [[TMP1134:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP1135:%.*]] = load i32, ptr [[UIE]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP2621:%.*]] = icmp ugt i32 [[TMP1134]], [[TMP1135]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP2621]], label [[COND_TRUE2623:%.*]], label [[COND_FALSE2624:%.*]]
+// SIMD-ONLY0:       cond.true2623:
+// SIMD-ONLY0-NEXT:    [[TMP1136:%.*]] = load i32, ptr [[UIE]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END2625:%.*]]
+// SIMD-ONLY0:       cond.false2624:
+// SIMD-ONLY0-NEXT:    [[TMP1137:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END2625]]
+// SIMD-ONLY0:       cond.end2625:
+// SIMD-ONLY0-NEXT:    [[COND2626:%.*]] = phi i32 [ [[TMP1136]], [[COND_TRUE2623]] ], [ [[TMP1137]], [[COND_FALSE2624]] ]
+// SIMD-ONLY0-NEXT:    store i32 [[COND2626]], ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP1138:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP1139:%.*]] = load i32, ptr [[UIE]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP2627:%.*]] = icmp ult i32 [[TMP1138]], [[TMP1139]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP2627]], label [[COND_TRUE2629:%.*]], label [[COND_FALSE2630:%.*]]
+// SIMD-ONLY0:       cond.true2629:
+// SIMD-ONLY0-NEXT:    [[TMP1140:%.*]] = load i32, ptr [[UIE]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END2631:%.*]]
+// SIMD-ONLY0:       cond.false2630:
+// SIMD-ONLY0-NEXT:    [[TMP1141:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END2631]]
+// SIMD-ONLY0:       cond.end2631:
+// SIMD-ONLY0-NEXT:    [[COND2632:%.*]] = phi i32 [ [[TMP1140]], [[COND_TRUE2629]] ], [ [[TMP1141]], [[COND_FALSE2630]] ]
+// SIMD-ONLY0-NEXT:    store i32 [[COND2632]], ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP1142:%.*]] = load i32, ptr [[UIE]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP1143:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP2633:%.*]] = icmp ugt i32 [[TMP1142]], [[TMP1143]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP2633]], label [[COND_TRUE2635:%.*]], label [[COND_FALSE2636:%.*]]
+// SIMD-ONLY0:       cond.true2635:
+// SIMD-ONLY0-NEXT:    [[TMP1144:%.*]] = load i32, ptr [[UIE]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END2637:%.*]]
+// SIMD-ONLY0:       cond.false2636:
+// SIMD-ONLY0-NEXT:    [[TMP1145:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END2637]]
+// SIMD-ONLY0:       cond.end2637:
+// SIMD-ONLY0-NEXT:    [[COND2638:%.*]] = phi i32 [ [[TMP1144]], [[COND_TRUE2635]] ], [ [[TMP1145]], [[COND_FALSE2636]] ]
+// SIMD-ONLY0-NEXT:    store i32 [[COND2638]], ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP1146:%.*]] = load i32, ptr [[UIE]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP1147:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP2639:%.*]] = icmp ult i32 [[TMP1146]], [[TMP1147]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP2639]], label [[COND_TRUE2641:%.*]], label [[COND_FALSE2642:%.*]]
+// SIMD-ONLY0:       cond.true2641:
+// SIMD-ONLY0-NEXT:    [[TMP1148:%.*]] = load i32, ptr [[UIE]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END2643:%.*]]
+// SIMD-ONLY0:       cond.false2642:
+// SIMD-ONLY0-NEXT:    [[TMP1149:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END2643]]
+// SIMD-ONLY0:       cond.end2643:
+// SIMD-ONLY0-NEXT:    [[COND2644:%.*]] = phi i32 [ [[TMP1148]], [[COND_TRUE2641]] ], [ [[TMP1149]], [[COND_FALSE2642]] ]
+// SIMD-ONLY0-NEXT:    store i32 [[COND2644]], ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP1150:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP1151:%.*]] = load i32, ptr [[UIE]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP2645:%.*]] = icmp ugt i32 [[TMP1150]], [[TMP1151]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP2645]], label [[IF_THEN2647:%.*]], label [[IF_END2648:%.*]]
+// SIMD-ONLY0:       if.then2647:
+// SIMD-ONLY0-NEXT:    [[TMP1152:%.*]] = load i32, ptr [[UIE]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP1152]], ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END2648]]
+// SIMD-ONLY0:       if.end2648:
+// SIMD-ONLY0-NEXT:    [[TMP1153:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP1154:%.*]] = load i32, ptr [[UIE]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP2649:%.*]] = icmp ult i32 [[TMP1153]], [[TMP1154]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP2649]], label [[IF_THEN2651:%.*]], label [[IF_END2652:%.*]]
+// SIMD-ONLY0:       if.then2651:
+// SIMD-ONLY0-NEXT:    [[TMP1155:%.*]] = load i32, ptr [[UIE]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP1155]], ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END2652]]
+// SIMD-ONLY0:       if.end2652:
+// SIMD-ONLY0-NEXT:    [[TMP1156:%.*]] = load i32, ptr [[UIE]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP1157:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP2653:%.*]] = icmp ugt i32 [[TMP1156]], [[TMP1157]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP2653]], label [[IF_THEN2655:%.*]], label [[IF_END2656:%.*]]
+// SIMD-ONLY0:       if.then2655:
+// SIMD-ONLY0-NEXT:    [[TMP1158:%.*]] = load i32, ptr [[UIE]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP1158]], ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END2656]]
+// SIMD-ONLY0:       if.end2656:
+// SIMD-ONLY0-NEXT:    [[TMP1159:%.*]] = load i32, ptr [[UIE]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP1160:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP2657:%.*]] = icmp ult i32 [[TMP1159]], [[TMP1160]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP2657]], label [[IF_THEN2659:%.*]], label [[IF_END2660:%.*]]
+// SIMD-ONLY0:       if.then2659:
+// SIMD-ONLY0-NEXT:    [[TMP1161:%.*]] = load i32, ptr [[UIE]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP1161]], ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END2660]]
+// SIMD-ONLY0:       if.end2660:
+// SIMD-ONLY0-NEXT:    [[TMP1162:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP1163:%.*]] = load i32, ptr [[UIE]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP2661:%.*]] = icmp eq i32 [[TMP1162]], [[TMP1163]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP2661]], label [[COND_TRUE2663:%.*]], label [[COND_FALSE2664:%.*]]
+// SIMD-ONLY0:       cond.true2663:
+// SIMD-ONLY0-NEXT:    [[TMP1164:%.*]] = load i32, ptr [[UID]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END2665:%.*]]
+// SIMD-ONLY0:       cond.false2664:
+// SIMD-ONLY0-NEXT:    [[TMP1165:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END2665]]
+// SIMD-ONLY0:       cond.end2665:
+// SIMD-ONLY0-NEXT:    [[COND2666:%.*]] = phi i32 [ [[TMP1164]], [[COND_TRUE2663]] ], [ [[TMP1165]], [[COND_FALSE2664]] ]
+// SIMD-ONLY0-NEXT:    store i32 [[COND2666]], ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP1166:%.*]] = load i32, ptr [[UIE]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP1167:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP2667:%.*]] = icmp eq i32 [[TMP1166]], [[TMP1167]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP2667]], label [[COND_TRUE2669:%.*]], label [[COND_FALSE2670:%.*]]
+// SIMD-ONLY0:       cond.true2669:
+// SIMD-ONLY0-NEXT:    [[TMP1168:%.*]] = load i32, ptr [[UID]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END2671:%.*]]
+// SIMD-ONLY0:       cond.false2670:
+// SIMD-ONLY0-NEXT:    [[TMP1169:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END2671]]
+// SIMD-ONLY0:       cond.end2671:
+// SIMD-ONLY0-NEXT:    [[COND2672:%.*]] = phi i32 [ [[TMP1168]], [[COND_TRUE2669]] ], [ [[TMP1169]], [[COND_FALSE2670]] ]
+// SIMD-ONLY0-NEXT:    store i32 [[COND2672]], ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP1170:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP1171:%.*]] = load i32, ptr [[UIE]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP2673:%.*]] = icmp eq i32 [[TMP1170]], [[TMP1171]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP2673]], label [[IF_THEN2675:%.*]], label [[IF_END2676:%.*]]
+// SIMD-ONLY0:       if.then2675:
+// SIMD-ONLY0-NEXT:    [[TMP1172:%.*]] = load i32, ptr [[UID]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP1172]], ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END2676]]
+// SIMD-ONLY0:       if.end2676:
+// SIMD-ONLY0-NEXT:    [[TMP1173:%.*]] = load i32, ptr [[UIE]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP1174:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP2677:%.*]] = icmp eq i32 [[TMP1173]], [[TMP1174]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP2677]], label [[IF_THEN2679:%.*]], label [[IF_END2680:%.*]]
+// SIMD-ONLY0:       if.then2679:
+// SIMD-ONLY0-NEXT:    [[TMP1175:%.*]] = load i32, ptr [[UID]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP1175]], ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END2680]]
+// SIMD-ONLY0:       if.end2680:
+// SIMD-ONLY0-NEXT:    [[TMP1176:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP1177:%.*]] = load i32, ptr [[IE]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP2681:%.*]] = icmp sgt i32 [[TMP1176]], [[TMP1177]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP2681]], label [[COND_TRUE2683:%.*]], label [[COND_FALSE2684:%.*]]
+// SIMD-ONLY0:       cond.true2683:
+// SIMD-ONLY0-NEXT:    [[TMP1178:%.*]] = load i32, ptr [[IE]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END2685:%.*]]
+// SIMD-ONLY0:       cond.false2684:
+// SIMD-ONLY0-NEXT:    [[TMP1179:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END2685]]
+// SIMD-ONLY0:       cond.end2685:
+// SIMD-ONLY0-NEXT:    [[COND2686:%.*]] = phi i32 [ [[TMP1178]], [[COND_TRUE2683]] ], [ [[TMP1179]], [[COND_FALSE2684]] ]
+// SIMD-ONLY0-NEXT:    store i32 [[COND2686]], ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP1180:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP1181:%.*]] = load i32, ptr [[IE]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP2687:%.*]] = icmp slt i32 [[TMP1180]], [[TMP1181]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP2687]], label [[COND_TRUE2689:%.*]], label [[COND_FALSE2690:%.*]]
+// SIMD-ONLY0:       cond.true2689:
+// SIMD-ONLY0-NEXT:    [[TMP1182:%.*]] = load i32, ptr [[IE]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END2691:%.*]]
+// SIMD-ONLY0:       cond.false2690:
+// SIMD-ONLY0-NEXT:    [[TMP1183:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END2691]]
+// SIMD-ONLY0:       cond.end2691:
+// SIMD-ONLY0-NEXT:    [[COND2692:%.*]] = phi i32 [ [[TMP1182]], [[COND_TRUE2689]] ], [ [[TMP1183]], [[COND_FALSE2690]] ]
+// SIMD-ONLY0-NEXT:    store i32 [[COND2692]], ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP1184:%.*]] = load i32, ptr [[IE]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP1185:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP2693:%.*]] = icmp sgt i32 [[TMP1184]], [[TMP1185]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP2693]], label [[COND_TRUE2695:%.*]], label [[COND_FALSE2696:%.*]]
+// SIMD-ONLY0:       cond.true2695:
+// SIMD-ONLY0-NEXT:    [[TMP1186:%.*]] = load i32, ptr [[IE]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END2697:%.*]]
+// SIMD-ONLY0:       cond.false2696:
+// SIMD-ONLY0-NEXT:    [[TMP1187:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END2697]]
+// SIMD-ONLY0:       cond.end2697:
+// SIMD-ONLY0-NEXT:    [[COND2698:%.*]] = phi i32 [ [[TMP1186]], [[COND_TRUE2695]] ], [ [[TMP1187]], [[COND_FALSE2696]] ]
+// SIMD-ONLY0-NEXT:    store i32 [[COND2698]], ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP1188:%.*]] = load i32, ptr [[IE]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP1189:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP2699:%.*]] = icmp slt i32 [[TMP1188]], [[TMP1189]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP2699]], label [[COND_TRUE2701:%.*]], label [[COND_FALSE2702:%.*]]
+// SIMD-ONLY0:       cond.true2701:
+// SIMD-ONLY0-NEXT:    [[TMP1190:%.*]] = load i32, ptr [[IE]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END2703:%.*]]
+// SIMD-ONLY0:       cond.false2702:
+// SIMD-ONLY0-NEXT:    [[TMP1191:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END2703]]
+// SIMD-ONLY0:       cond.end2703:
+// SIMD-ONLY0-NEXT:    [[COND2704:%.*]] = phi i32 [ [[TMP1190]], [[COND_TRUE2701]] ], [ [[TMP1191]], [[COND_FALSE2702]] ]
+// SIMD-ONLY0-NEXT:    store i32 [[COND2704]], ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP1192:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP1193:%.*]] = load i32, ptr [[IE]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP2705:%.*]] = icmp sgt i32 [[TMP1192]], [[TMP1193]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP2705]], label [[IF_THEN2707:%.*]], label [[IF_END2708:%.*]]
+// SIMD-ONLY0:       if.then2707:
+// SIMD-ONLY0-NEXT:    [[TMP1194:%.*]] = load i32, ptr [[IE]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP1194]], ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END2708]]
+// SIMD-ONLY0:       if.end2708:
+// SIMD-ONLY0-NEXT:    [[TMP1195:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP1196:%.*]] = load i32, ptr [[IE]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP2709:%.*]] = icmp slt i32 [[TMP1195]], [[TMP1196]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP2709]], label [[IF_THEN2711:%.*]], label [[IF_END2712:%.*]]
+// SIMD-ONLY0:       if.then2711:
+// SIMD-ONLY0-NEXT:    [[TMP1197:%.*]] = load i32, ptr [[IE]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP1197]], ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END2712]]
+// SIMD-ONLY0:       if.end2712:
+// SIMD-ONLY0-NEXT:    [[TMP1198:%.*]] = load i32, ptr [[IE]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP1199:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP2713:%.*]] = icmp sgt i32 [[TMP1198]], [[TMP1199]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP2713]], label [[IF_THEN2715:%.*]], label [[IF_END2716:%.*]]
+// SIMD-ONLY0:       if.then2715:
+// SIMD-ONLY0-NEXT:    [[TMP1200:%.*]] = load i32, ptr [[IE]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP1200]], ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END2716]]
+// SIMD-ONLY0:       if.end2716:
+// SIMD-ONLY0-NEXT:    [[TMP1201:%.*]] = load i32, ptr [[IE]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP1202:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP2717:%.*]] = icmp slt i32 [[TMP1201]], [[TMP1202]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP2717]], label [[IF_THEN2719:%.*]], label [[IF_END2720:%.*]]
+// SIMD-ONLY0:       if.then2719:
+// SIMD-ONLY0-NEXT:    [[TMP1203:%.*]] = load i32, ptr [[IE]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP1203]], ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END2720]]
+// SIMD-ONLY0:       if.end2720:
+// SIMD-ONLY0-NEXT:    [[TMP1204:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP1205:%.*]] = load i32, ptr [[IE]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP2721:%.*]] = icmp eq i32 [[TMP1204]], [[TMP1205]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP2721]], label [[COND_TRUE2723:%.*]], label [[COND_FALSE2724:%.*]]
+// SIMD-ONLY0:       cond.true2723:
+// SIMD-ONLY0-NEXT:    [[TMP1206:%.*]] = load i32, ptr [[ID]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END2725:%.*]]
+// SIMD-ONLY0:       cond.false2724:
+// SIMD-ONLY0-NEXT:    [[TMP1207:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END2725]]
+// SIMD-ONLY0:       cond.end2725:
+// SIMD-ONLY0-NEXT:    [[COND2726:%.*]] = phi i32 [ [[TMP1206]], [[COND_TRUE2723]] ], [ [[TMP1207]], [[COND_FALSE2724]] ]
+// SIMD-ONLY0-NEXT:    store i32 [[COND2726]], ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP1208:%.*]] = load i32, ptr [[IE]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP1209:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP2727:%.*]] = icmp eq i32 [[TMP1208]], [[TMP1209]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP2727]], label [[COND_TRUE2729:%.*]], label [[COND_FALSE2730:%.*]]
+// SIMD-ONLY0:       cond.true2729:
+// SIMD-ONLY0-NEXT:    [[TMP1210:%.*]] = load i32, ptr [[ID]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END2731:%.*]]
+// SIMD-ONLY0:       cond.false2730:
+// SIMD-ONLY0-NEXT:    [[TMP1211:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END2731]]
+// SIMD-ONLY0:       cond.end2731:
+// SIMD-ONLY0-NEXT:    [[COND2732:%.*]] = phi i32 [ [[TMP1210]], [[COND_TRUE2729]] ], [ [[TMP1211]], [[COND_FALSE2730]] ]
+// SIMD-ONLY0-NEXT:    store i32 [[COND2732]], ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP1212:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP1213:%.*]] = load i32, ptr [[IE]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP2733:%.*]] = icmp eq i32 [[TMP1212]], [[TMP1213]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP2733]], label [[IF_THEN2735:%.*]], label [[IF_END2736:%.*]]
+// SIMD-ONLY0:       if.then2735:
+// SIMD-ONLY0-NEXT:    [[TMP1214:%.*]] = load i32, ptr [[ID]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP1214]], ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END2736]]
+// SIMD-ONLY0:       if.end2736:
+// SIMD-ONLY0-NEXT:    [[TMP1215:%.*]] = load i32, ptr [[IE]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP1216:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP2737:%.*]] = icmp eq i32 [[TMP1215]], [[TMP1216]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP2737]], label [[IF_THEN2739:%.*]], label [[IF_END2740:%.*]]
+// SIMD-ONLY0:       if.then2739:
+// SIMD-ONLY0-NEXT:    [[TMP1217:%.*]] = load i32, ptr [[ID]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP1217]], ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END2740]]
+// SIMD-ONLY0:       if.end2740:
+// SIMD-ONLY0-NEXT:    [[TMP1218:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP1219:%.*]] = load i32, ptr [[UIE]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP2741:%.*]] = icmp ugt i32 [[TMP1218]], [[TMP1219]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP2741]], label [[COND_TRUE2743:%.*]], label [[COND_FALSE2744:%.*]]
+// SIMD-ONLY0:       cond.true2743:
+// SIMD-ONLY0-NEXT:    [[TMP1220:%.*]] = load i32, ptr [[UIE]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END2745:%.*]]
+// SIMD-ONLY0:       cond.false2744:
+// SIMD-ONLY0-NEXT:    [[TMP1221:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END2745]]
+// SIMD-ONLY0:       cond.end2745:
+// SIMD-ONLY0-NEXT:    [[COND2746:%.*]] = phi i32 [ [[TMP1220]], [[COND_TRUE2743]] ], [ [[TMP1221]], [[COND_FALSE2744]] ]
+// SIMD-ONLY0-NEXT:    store i32 [[COND2746]], ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP1222:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP1223:%.*]] = load i32, ptr [[UIE]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP2747:%.*]] = icmp ult i32 [[TMP1222]], [[TMP1223]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP2747]], label [[COND_TRUE2749:%.*]], label [[COND_FALSE2750:%.*]]
+// SIMD-ONLY0:       cond.true2749:
+// SIMD-ONLY0-NEXT:    [[TMP1224:%.*]] = load i32, ptr [[UIE]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END2751:%.*]]
+// SIMD-ONLY0:       cond.false2750:
+// SIMD-ONLY0-NEXT:    [[TMP1225:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END2751]]
+// SIMD-ONLY0:       cond.end2751:
+// SIMD-ONLY0-NEXT:    [[COND2752:%.*]] = phi i32 [ [[TMP1224]], [[COND_TRUE2749]] ], [ [[TMP1225]], [[COND_FALSE2750]] ]
+// SIMD-ONLY0-NEXT:    store i32 [[COND2752]], ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP1226:%.*]] = load i32, ptr [[UIE]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP1227:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP2753:%.*]] = icmp ugt i32 [[TMP1226]], [[TMP1227]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP2753]], label [[COND_TRUE2755:%.*]], label [[COND_FALSE2756:%.*]]
+// SIMD-ONLY0:       cond.true2755:
+// SIMD-ONLY0-NEXT:    [[TMP1228:%.*]] = load i32, ptr [[UIE]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END2757:%.*]]
+// SIMD-ONLY0:       cond.false2756:
+// SIMD-ONLY0-NEXT:    [[TMP1229:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END2757]]
+// SIMD-ONLY0:       cond.end2757:
+// SIMD-ONLY0-NEXT:    [[COND2758:%.*]] = phi i32 [ [[TMP1228]], [[COND_TRUE2755]] ], [ [[TMP1229]], [[COND_FALSE2756]] ]
+// SIMD-ONLY0-NEXT:    store i32 [[COND2758]], ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP1230:%.*]] = load i32, ptr [[UIE]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP1231:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP2759:%.*]] = icmp ult i32 [[TMP1230]], [[TMP1231]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP2759]], label [[COND_TRUE2761:%.*]], label [[COND_FALSE2762:%.*]]
+// SIMD-ONLY0:       cond.true2761:
+// SIMD-ONLY0-NEXT:    [[TMP1232:%.*]] = load i32, ptr [[UIE]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END2763:%.*]]
+// SIMD-ONLY0:       cond.false2762:
+// SIMD-ONLY0-NEXT:    [[TMP1233:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END2763]]
+// SIMD-ONLY0:       cond.end2763:
+// SIMD-ONLY0-NEXT:    [[COND2764:%.*]] = phi i32 [ [[TMP1232]], [[COND_TRUE2761]] ], [ [[TMP1233]], [[COND_FALSE2762]] ]
+// SIMD-ONLY0-NEXT:    store i32 [[COND2764]], ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP1234:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP1235:%.*]] = load i32, ptr [[UIE]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP2765:%.*]] = icmp ugt i32 [[TMP1234]], [[TMP1235]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP2765]], label [[IF_THEN2767:%.*]], label [[IF_END2768:%.*]]
+// SIMD-ONLY0:       if.then2767:
+// SIMD-ONLY0-NEXT:    [[TMP1236:%.*]] = load i32, ptr [[UIE]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP1236]], ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END2768]]
+// SIMD-ONLY0:       if.end2768:
+// SIMD-ONLY0-NEXT:    [[TMP1237:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP1238:%.*]] = load i32, ptr [[UIE]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP2769:%.*]] = icmp ult i32 [[TMP1237]], [[TMP1238]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP2769]], label [[IF_THEN2771:%.*]], label [[IF_END2772:%.*]]
+// SIMD-ONLY0:       if.then2771:
+// SIMD-ONLY0-NEXT:    [[TMP1239:%.*]] = load i32, ptr [[UIE]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP1239]], ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END2772]]
+// SIMD-ONLY0:       if.end2772:
+// SIMD-ONLY0-NEXT:    [[TMP1240:%.*]] = load i32, ptr [[UIE]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP1241:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP2773:%.*]] = icmp ugt i32 [[TMP1240]], [[TMP1241]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP2773]], label [[IF_THEN2775:%.*]], label [[IF_END2776:%.*]]
+// SIMD-ONLY0:       if.then2775:
+// SIMD-ONLY0-NEXT:    [[TMP1242:%.*]] = load i32, ptr [[UIE]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP1242]], ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END2776]]
+// SIMD-ONLY0:       if.end2776:
+// SIMD-ONLY0-NEXT:    [[TMP1243:%.*]] = load i32, ptr [[UIE]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP1244:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP2777:%.*]] = icmp ult i32 [[TMP1243]], [[TMP1244]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP2777]], label [[IF_THEN2779:%.*]], label [[IF_END2780:%.*]]
+// SIMD-ONLY0:       if.then2779:
+// SIMD-ONLY0-NEXT:    [[TMP1245:%.*]] = load i32, ptr [[UIE]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP1245]], ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END2780]]
+// SIMD-ONLY0:       if.end2780:
+// SIMD-ONLY0-NEXT:    [[TMP1246:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP1247:%.*]] = load i32, ptr [[UIE]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP2781:%.*]] = icmp eq i32 [[TMP1246]], [[TMP1247]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP2781]], label [[COND_TRUE2783:%.*]], label [[COND_FALSE2784:%.*]]
+// SIMD-ONLY0:       cond.true2783:
+// SIMD-ONLY0-NEXT:    [[TMP1248:%.*]] = load i32, ptr [[UID]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END2785:%.*]]
+// SIMD-ONLY0:       cond.false2784:
+// SIMD-ONLY0-NEXT:    [[TMP1249:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END2785]]
+// SIMD-ONLY0:       cond.end2785:
+// SIMD-ONLY0-NEXT:    [[COND2786:%.*]] = phi i32 [ [[TMP1248]], [[COND_TRUE2783]] ], [ [[TMP1249]], [[COND_FALSE2784]] ]
+// SIMD-ONLY0-NEXT:    store i32 [[COND2786]], ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP1250:%.*]] = load i32, ptr [[UIE]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP1251:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP2787:%.*]] = icmp eq i32 [[TMP1250]], [[TMP1251]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP2787]], label [[COND_TRUE2789:%.*]], label [[COND_FALSE2790:%.*]]
+// SIMD-ONLY0:       cond.true2789:
+// SIMD-ONLY0-NEXT:    [[TMP1252:%.*]] = load i32, ptr [[UID]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END2791:%.*]]
+// SIMD-ONLY0:       cond.false2790:
+// SIMD-ONLY0-NEXT:    [[TMP1253:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END2791]]
+// SIMD-ONLY0:       cond.end2791:
+// SIMD-ONLY0-NEXT:    [[COND2792:%.*]] = phi i32 [ [[TMP1252]], [[COND_TRUE2789]] ], [ [[TMP1253]], [[COND_FALSE2790]] ]
+// SIMD-ONLY0-NEXT:    store i32 [[COND2792]], ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP1254:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP1255:%.*]] = load i32, ptr [[UIE]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP2793:%.*]] = icmp eq i32 [[TMP1254]], [[TMP1255]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP2793]], label [[IF_THEN2795:%.*]], label [[IF_END2796:%.*]]
+// SIMD-ONLY0:       if.then2795:
+// SIMD-ONLY0-NEXT:    [[TMP1256:%.*]] = load i32, ptr [[UID]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP1256]], ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END2796]]
+// SIMD-ONLY0:       if.end2796:
+// SIMD-ONLY0-NEXT:    [[TMP1257:%.*]] = load i32, ptr [[UIE]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP1258:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP2797:%.*]] = icmp eq i32 [[TMP1257]], [[TMP1258]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP2797]], label [[IF_THEN2799:%.*]], label [[IF_END2800:%.*]]
+// SIMD-ONLY0:       if.then2799:
+// SIMD-ONLY0-NEXT:    [[TMP1259:%.*]] = load i32, ptr [[UID]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP1259]], ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END2800]]
+// SIMD-ONLY0:       if.end2800:
+// SIMD-ONLY0-NEXT:    [[TMP1260:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP1261:%.*]] = load i32, ptr [[IE]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP2801:%.*]] = icmp sgt i32 [[TMP1260]], [[TMP1261]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP2801]], label [[COND_TRUE2803:%.*]], label [[COND_FALSE2804:%.*]]
+// SIMD-ONLY0:       cond.true2803:
+// SIMD-ONLY0-NEXT:    [[TMP1262:%.*]] = load i32, ptr [[IE]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END2805:%.*]]
+// SIMD-ONLY0:       cond.false2804:
+// SIMD-ONLY0-NEXT:    [[TMP1263:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END2805]]
+// SIMD-ONLY0:       cond.end2805:
+// SIMD-ONLY0-NEXT:    [[COND2806:%.*]] = phi i32 [ [[TMP1262]], [[COND_TRUE2803]] ], [ [[TMP1263]], [[COND_FALSE2804]] ]
+// SIMD-ONLY0-NEXT:    store i32 [[COND2806]], ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP1264:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP1265:%.*]] = load i32, ptr [[IE]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP2807:%.*]] = icmp slt i32 [[TMP1264]], [[TMP1265]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP2807]], label [[COND_TRUE2809:%.*]], label [[COND_FALSE2810:%.*]]
+// SIMD-ONLY0:       cond.true2809:
+// SIMD-ONLY0-NEXT:    [[TMP1266:%.*]] = load i32, ptr [[IE]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END2811:%.*]]
+// SIMD-ONLY0:       cond.false2810:
+// SIMD-ONLY0-NEXT:    [[TMP1267:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END2811]]
+// SIMD-ONLY0:       cond.end2811:
+// SIMD-ONLY0-NEXT:    [[COND2812:%.*]] = phi i32 [ [[TMP1266]], [[COND_TRUE2809]] ], [ [[TMP1267]], [[COND_FALSE2810]] ]
+// SIMD-ONLY0-NEXT:    store i32 [[COND2812]], ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP1268:%.*]] = load i32, ptr [[IE]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP1269:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP2813:%.*]] = icmp sgt i32 [[TMP1268]], [[TMP1269]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP2813]], label [[COND_TRUE2815:%.*]], label [[COND_FALSE2816:%.*]]
+// SIMD-ONLY0:       cond.true2815:
+// SIMD-ONLY0-NEXT:    [[TMP1270:%.*]] = load i32, ptr [[IE]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END2817:%.*]]
+// SIMD-ONLY0:       cond.false2816:
+// SIMD-ONLY0-NEXT:    [[TMP1271:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END2817]]
+// SIMD-ONLY0:       cond.end2817:
+// SIMD-ONLY0-NEXT:    [[COND2818:%.*]] = phi i32 [ [[TMP1270]], [[COND_TRUE2815]] ], [ [[TMP1271]], [[COND_FALSE2816]] ]
+// SIMD-ONLY0-NEXT:    store i32 [[COND2818]], ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP1272:%.*]] = load i32, ptr [[IE]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP1273:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP2819:%.*]] = icmp slt i32 [[TMP1272]], [[TMP1273]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP2819]], label [[COND_TRUE2821:%.*]], label [[COND_FALSE2822:%.*]]
+// SIMD-ONLY0:       cond.true2821:
+// SIMD-ONLY0-NEXT:    [[TMP1274:%.*]] = load i32, ptr [[IE]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END2823:%.*]]
+// SIMD-ONLY0:       cond.false2822:
+// SIMD-ONLY0-NEXT:    [[TMP1275:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END2823]]
+// SIMD-ONLY0:       cond.end2823:
+// SIMD-ONLY0-NEXT:    [[COND2824:%.*]] = phi i32 [ [[TMP1274]], [[COND_TRUE2821]] ], [ [[TMP1275]], [[COND_FALSE2822]] ]
+// SIMD-ONLY0-NEXT:    store i32 [[COND2824]], ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP1276:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP1277:%.*]] = load i32, ptr [[IE]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP2825:%.*]] = icmp sgt i32 [[TMP1276]], [[TMP1277]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP2825]], label [[IF_THEN2827:%.*]], label [[IF_END2828:%.*]]
+// SIMD-ONLY0:       if.then2827:
+// SIMD-ONLY0-NEXT:    [[TMP1278:%.*]] = load i32, ptr [[IE]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP1278]], ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END2828]]
+// SIMD-ONLY0:       if.end2828:
+// SIMD-ONLY0-NEXT:    [[TMP1279:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP1280:%.*]] = load i32, ptr [[IE]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP2829:%.*]] = icmp slt i32 [[TMP1279]], [[TMP1280]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP2829]], label [[IF_THEN2831:%.*]], label [[IF_END2832:%.*]]
+// SIMD-ONLY0:       if.then2831:
+// SIMD-ONLY0-NEXT:    [[TMP1281:%.*]] = load i32, ptr [[IE]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP1281]], ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END2832]]
+// SIMD-ONLY0:       if.end2832:
+// SIMD-ONLY0-NEXT:    [[TMP1282:%.*]] = load i32, ptr [[IE]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP1283:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP2833:%.*]] = icmp sgt i32 [[TMP1282]], [[TMP1283]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP2833]], label [[IF_THEN2835:%.*]], label [[IF_END2836:%.*]]
+// SIMD-ONLY0:       if.then2835:
+// SIMD-ONLY0-NEXT:    [[TMP1284:%.*]] = load i32, ptr [[IE]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP1284]], ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END2836]]
+// SIMD-ONLY0:       if.end2836:
+// SIMD-ONLY0-NEXT:    [[TMP1285:%.*]] = load i32, ptr [[IE]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP1286:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP2837:%.*]] = icmp slt i32 [[TMP1285]], [[TMP1286]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP2837]], label [[IF_THEN2839:%.*]], label [[IF_END2840:%.*]]
+// SIMD-ONLY0:       if.then2839:
+// SIMD-ONLY0-NEXT:    [[TMP1287:%.*]] = load i32, ptr [[IE]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP1287]], ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END2840]]
+// SIMD-ONLY0:       if.end2840:
+// SIMD-ONLY0-NEXT:    [[TMP1288:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP1289:%.*]] = load i32, ptr [[IE]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP2841:%.*]] = icmp eq i32 [[TMP1288]], [[TMP1289]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP2841]], label [[COND_TRUE2843:%.*]], label [[COND_FALSE2844:%.*]]
+// SIMD-ONLY0:       cond.true2843:
+// SIMD-ONLY0-NEXT:    [[TMP1290:%.*]] = load i32, ptr [[ID]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END2845:%.*]]
+// SIMD-ONLY0:       cond.false2844:
+// SIMD-ONLY0-NEXT:    [[TMP1291:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END2845]]
+// SIMD-ONLY0:       cond.end2845:
+// SIMD-ONLY0-NEXT:    [[COND2846:%.*]] = phi i32 [ [[TMP1290]], [[COND_TRUE2843]] ], [ [[TMP1291]], [[COND_FALSE2844]] ]
+// SIMD-ONLY0-NEXT:    store i32 [[COND2846]], ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP1292:%.*]] = load i32, ptr [[IE]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP1293:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP2847:%.*]] = icmp eq i32 [[TMP1292]], [[TMP1293]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP2847]], label [[COND_TRUE2849:%.*]], label [[COND_FALSE2850:%.*]]
+// SIMD-ONLY0:       cond.true2849:
+// SIMD-ONLY0-NEXT:    [[TMP1294:%.*]] = load i32, ptr [[ID]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END2851:%.*]]
+// SIMD-ONLY0:       cond.false2850:
+// SIMD-ONLY0-NEXT:    [[TMP1295:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END2851]]
+// SIMD-ONLY0:       cond.end2851:
+// SIMD-ONLY0-NEXT:    [[COND2852:%.*]] = phi i32 [ [[TMP1294]], [[COND_TRUE2849]] ], [ [[TMP1295]], [[COND_FALSE2850]] ]
+// SIMD-ONLY0-NEXT:    store i32 [[COND2852]], ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP1296:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP1297:%.*]] = load i32, ptr [[IE]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP2853:%.*]] = icmp eq i32 [[TMP1296]], [[TMP1297]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP2853]], label [[IF_THEN2855:%.*]], label [[IF_END2856:%.*]]
+// SIMD-ONLY0:       if.then2855:
+// SIMD-ONLY0-NEXT:    [[TMP1298:%.*]] = load i32, ptr [[ID]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP1298]], ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END2856]]
+// SIMD-ONLY0:       if.end2856:
+// SIMD-ONLY0-NEXT:    [[TMP1299:%.*]] = load i32, ptr [[IE]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP1300:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP2857:%.*]] = icmp eq i32 [[TMP1299]], [[TMP1300]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP2857]], label [[IF_THEN2859:%.*]], label [[IF_END2860:%.*]]
+// SIMD-ONLY0:       if.then2859:
+// SIMD-ONLY0-NEXT:    [[TMP1301:%.*]] = load i32, ptr [[ID]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP1301]], ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END2860]]
+// SIMD-ONLY0:       if.end2860:
+// SIMD-ONLY0-NEXT:    [[TMP1302:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP1303:%.*]] = load i32, ptr [[UIE]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP2861:%.*]] = icmp ugt i32 [[TMP1302]], [[TMP1303]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP2861]], label [[COND_TRUE2863:%.*]], label [[COND_FALSE2864:%.*]]
+// SIMD-ONLY0:       cond.true2863:
+// SIMD-ONLY0-NEXT:    [[TMP1304:%.*]] = load i32, ptr [[UIE]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END2865:%.*]]
+// SIMD-ONLY0:       cond.false2864:
+// SIMD-ONLY0-NEXT:    [[TMP1305:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END2865]]
+// SIMD-ONLY0:       cond.end2865:
+// SIMD-ONLY0-NEXT:    [[COND2866:%.*]] = phi i32 [ [[TMP1304]], [[COND_TRUE2863]] ], [ [[TMP1305]], [[COND_FALSE2864]] ]
+// SIMD-ONLY0-NEXT:    store i32 [[COND2866]], ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP1306:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP1307:%.*]] = load i32, ptr [[UIE]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP2867:%.*]] = icmp ult i32 [[TMP1306]], [[TMP1307]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP2867]], label [[COND_TRUE2869:%.*]], label [[COND_FALSE2870:%.*]]
+// SIMD-ONLY0:       cond.true2869:
+// SIMD-ONLY0-NEXT:    [[TMP1308:%.*]] = load i32, ptr [[UIE]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END2871:%.*]]
+// SIMD-ONLY0:       cond.false2870:
+// SIMD-ONLY0-NEXT:    [[TMP1309:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END2871]]
+// SIMD-ONLY0:       cond.end2871:
+// SIMD-ONLY0-NEXT:    [[COND2872:%.*]] = phi i32 [ [[TMP1308]], [[COND_TRUE2869]] ], [ [[TMP1309]], [[COND_FALSE2870]] ]
+// SIMD-ONLY0-NEXT:    store i32 [[COND2872]], ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP1310:%.*]] = load i32, ptr [[UIE]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP1311:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP2873:%.*]] = icmp ugt i32 [[TMP1310]], [[TMP1311]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP2873]], label [[COND_TRUE2875:%.*]], label [[COND_FALSE2876:%.*]]
+// SIMD-ONLY0:       cond.true2875:
+// SIMD-ONLY0-NEXT:    [[TMP1312:%.*]] = load i32, ptr [[UIE]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END2877:%.*]]
+// SIMD-ONLY0:       cond.false2876:
+// SIMD-ONLY0-NEXT:    [[TMP1313:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END2877]]
+// SIMD-ONLY0:       cond.end2877:
+// SIMD-ONLY0-NEXT:    [[COND2878:%.*]] = phi i32 [ [[TMP1312]], [[COND_TRUE2875]] ], [ [[TMP1313]], [[COND_FALSE2876]] ]
+// SIMD-ONLY0-NEXT:    store i32 [[COND2878]], ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP1314:%.*]] = load i32, ptr [[UIE]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP1315:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP2879:%.*]] = icmp ult i32 [[TMP1314]], [[TMP1315]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP2879]], label [[COND_TRUE2881:%.*]], label [[COND_FALSE2882:%.*]]
+// SIMD-ONLY0:       cond.true2881:
+// SIMD-ONLY0-NEXT:    [[TMP1316:%.*]] = load i32, ptr [[UIE]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END2883:%.*]]
+// SIMD-ONLY0:       cond.false2882:
+// SIMD-ONLY0-NEXT:    [[TMP1317:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END2883]]
+// SIMD-ONLY0:       cond.end2883:
+// SIMD-ONLY0-NEXT:    [[COND2884:%.*]] = phi i32 [ [[TMP1316]], [[COND_TRUE2881]] ], [ [[TMP1317]], [[COND_FALSE2882]] ]
+// SIMD-ONLY0-NEXT:    store i32 [[COND2884]], ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP1318:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP1319:%.*]] = load i32, ptr [[UIE]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP2885:%.*]] = icmp ugt i32 [[TMP1318]], [[TMP1319]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP2885]], label [[IF_THEN2887:%.*]], label [[IF_END2888:%.*]]
+// SIMD-ONLY0:       if.then2887:
+// SIMD-ONLY0-NEXT:    [[TMP1320:%.*]] = load i32, ptr [[UIE]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP1320]], ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END2888]]
+// SIMD-ONLY0:       if.end2888:
+// SIMD-ONLY0-NEXT:    [[TMP1321:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP1322:%.*]] = load i32, ptr [[UIE]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP2889:%.*]] = icmp ult i32 [[TMP1321]], [[TMP1322]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP2889]], label [[IF_THEN2891:%.*]], label [[IF_END2892:%.*]]
+// SIMD-ONLY0:       if.then2891:
+// SIMD-ONLY0-NEXT:    [[TMP1323:%.*]] = load i32, ptr [[UIE]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP1323]], ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END2892]]
+// SIMD-ONLY0:       if.end2892:
+// SIMD-ONLY0-NEXT:    [[TMP1324:%.*]] = load i32, ptr [[UIE]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP1325:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP2893:%.*]] = icmp ugt i32 [[TMP1324]], [[TMP1325]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP2893]], label [[IF_THEN2895:%.*]], label [[IF_END2896:%.*]]
+// SIMD-ONLY0:       if.then2895:
+// SIMD-ONLY0-NEXT:    [[TMP1326:%.*]] = load i32, ptr [[UIE]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP1326]], ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END2896]]
+// SIMD-ONLY0:       if.end2896:
+// SIMD-ONLY0-NEXT:    [[TMP1327:%.*]] = load i32, ptr [[UIE]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP1328:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP2897:%.*]] = icmp ult i32 [[TMP1327]], [[TMP1328]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP2897]], label [[IF_THEN2899:%.*]], label [[IF_END2900:%.*]]
+// SIMD-ONLY0:       if.then2899:
+// SIMD-ONLY0-NEXT:    [[TMP1329:%.*]] = load i32, ptr [[UIE]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP1329]], ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END2900]]
+// SIMD-ONLY0:       if.end2900:
+// SIMD-ONLY0-NEXT:    [[TMP1330:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP1331:%.*]] = load i32, ptr [[UIE]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP2901:%.*]] = icmp eq i32 [[TMP1330]], [[TMP1331]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP2901]], label [[COND_TRUE2903:%.*]], label [[COND_FALSE2904:%.*]]
+// SIMD-ONLY0:       cond.true2903:
+// SIMD-ONLY0-NEXT:    [[TMP1332:%.*]] = load i32, ptr [[UID]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END2905:%.*]]
+// SIMD-ONLY0:       cond.false2904:
+// SIMD-ONLY0-NEXT:    [[TMP1333:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END2905]]
+// SIMD-ONLY0:       cond.end2905:
+// SIMD-ONLY0-NEXT:    [[COND2906:%.*]] = phi i32 [ [[TMP1332]], [[COND_TRUE2903]] ], [ [[TMP1333]], [[COND_FALSE2904]] ]
+// SIMD-ONLY0-NEXT:    store i32 [[COND2906]], ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP1334:%.*]] = load i32, ptr [[UIE]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP1335:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP2907:%.*]] = icmp eq i32 [[TMP1334]], [[TMP1335]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP2907]], label [[COND_TRUE2909:%.*]], label [[COND_FALSE2910:%.*]]
+// SIMD-ONLY0:       cond.true2909:
+// SIMD-ONLY0-NEXT:    [[TMP1336:%.*]] = load i32, ptr [[UID]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END2911:%.*]]
+// SIMD-ONLY0:       cond.false2910:
+// SIMD-ONLY0-NEXT:    [[TMP1337:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END2911]]
+// SIMD-ONLY0:       cond.end2911:
+// SIMD-ONLY0-NEXT:    [[COND2912:%.*]] = phi i32 [ [[TMP1336]], [[COND_TRUE2909]] ], [ [[TMP1337]], [[COND_FALSE2910]] ]
+// SIMD-ONLY0-NEXT:    store i32 [[COND2912]], ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP1338:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP1339:%.*]] = load i32, ptr [[UIE]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP2913:%.*]] = icmp eq i32 [[TMP1338]], [[TMP1339]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP2913]], label [[IF_THEN2915:%.*]], label [[IF_END2916:%.*]]
+// SIMD-ONLY0:       if.then2915:
+// SIMD-ONLY0-NEXT:    [[TMP1340:%.*]] = load i32, ptr [[UID]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP1340]], ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END2916]]
+// SIMD-ONLY0:       if.end2916:
+// SIMD-ONLY0-NEXT:    [[TMP1341:%.*]] = load i32, ptr [[UIE]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP1342:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP2917:%.*]] = icmp eq i32 [[TMP1341]], [[TMP1342]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP2917]], label [[IF_THEN2919:%.*]], label [[IF_END2920:%.*]]
+// SIMD-ONLY0:       if.then2919:
+// SIMD-ONLY0-NEXT:    [[TMP1343:%.*]] = load i32, ptr [[UID]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP1343]], ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END2920]]
+// SIMD-ONLY0:       if.end2920:
+// SIMD-ONLY0-NEXT:    [[TMP1344:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP1345:%.*]] = load i32, ptr [[IE]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP2921:%.*]] = icmp sgt i32 [[TMP1344]], [[TMP1345]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP2921]], label [[COND_TRUE2923:%.*]], label [[COND_FALSE2924:%.*]]
+// SIMD-ONLY0:       cond.true2923:
+// SIMD-ONLY0-NEXT:    [[TMP1346:%.*]] = load i32, ptr [[IE]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END2925:%.*]]
+// SIMD-ONLY0:       cond.false2924:
+// SIMD-ONLY0-NEXT:    [[TMP1347:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END2925]]
+// SIMD-ONLY0:       cond.end2925:
+// SIMD-ONLY0-NEXT:    [[COND2926:%.*]] = phi i32 [ [[TMP1346]], [[COND_TRUE2923]] ], [ [[TMP1347]], [[COND_FALSE2924]] ]
+// SIMD-ONLY0-NEXT:    store i32 [[COND2926]], ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP1348:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP1349:%.*]] = load i32, ptr [[IE]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP2927:%.*]] = icmp slt i32 [[TMP1348]], [[TMP1349]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP2927]], label [[COND_TRUE2929:%.*]], label [[COND_FALSE2930:%.*]]
+// SIMD-ONLY0:       cond.true2929:
+// SIMD-ONLY0-NEXT:    [[TMP1350:%.*]] = load i32, ptr [[IE]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END2931:%.*]]
+// SIMD-ONLY0:       cond.false2930:
+// SIMD-ONLY0-NEXT:    [[TMP1351:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END2931]]
+// SIMD-ONLY0:       cond.end2931:
+// SIMD-ONLY0-NEXT:    [[COND2932:%.*]] = phi i32 [ [[TMP1350]], [[COND_TRUE2929]] ], [ [[TMP1351]], [[COND_FALSE2930]] ]
+// SIMD-ONLY0-NEXT:    store i32 [[COND2932]], ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP1352:%.*]] = load i32, ptr [[IE]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP1353:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP2933:%.*]] = icmp sgt i32 [[TMP1352]], [[TMP1353]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP2933]], label [[COND_TRUE2935:%.*]], label [[COND_FALSE2936:%.*]]
+// SIMD-ONLY0:       cond.true2935:
+// SIMD-ONLY0-NEXT:    [[TMP1354:%.*]] = load i32, ptr [[IE]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END2937:%.*]]
+// SIMD-ONLY0:       cond.false2936:
+// SIMD-ONLY0-NEXT:    [[TMP1355:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END2937]]
+// SIMD-ONLY0:       cond.end2937:
+// SIMD-ONLY0-NEXT:    [[COND2938:%.*]] = phi i32 [ [[TMP1354]], [[COND_TRUE2935]] ], [ [[TMP1355]], [[COND_FALSE2936]] ]
+// SIMD-ONLY0-NEXT:    store i32 [[COND2938]], ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP1356:%.*]] = load i32, ptr [[IE]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP1357:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP2939:%.*]] = icmp slt i32 [[TMP1356]], [[TMP1357]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP2939]], label [[COND_TRUE2941:%.*]], label [[COND_FALSE2942:%.*]]
+// SIMD-ONLY0:       cond.true2941:
+// SIMD-ONLY0-NEXT:    [[TMP1358:%.*]] = load i32, ptr [[IE]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END2943:%.*]]
+// SIMD-ONLY0:       cond.false2942:
+// SIMD-ONLY0-NEXT:    [[TMP1359:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END2943]]
+// SIMD-ONLY0:       cond.end2943:
+// SIMD-ONLY0-NEXT:    [[COND2944:%.*]] = phi i32 [ [[TMP1358]], [[COND_TRUE2941]] ], [ [[TMP1359]], [[COND_FALSE2942]] ]
+// SIMD-ONLY0-NEXT:    store i32 [[COND2944]], ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP1360:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP1361:%.*]] = load i32, ptr [[IE]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP2945:%.*]] = icmp sgt i32 [[TMP1360]], [[TMP1361]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP2945]], label [[IF_THEN2947:%.*]], label [[IF_END2948:%.*]]
+// SIMD-ONLY0:       if.then2947:
+// SIMD-ONLY0-NEXT:    [[TMP1362:%.*]] = load i32, ptr [[IE]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP1362]], ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END2948]]
+// SIMD-ONLY0:       if.end2948:
+// SIMD-ONLY0-NEXT:    [[TMP1363:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP1364:%.*]] = load i32, ptr [[IE]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP2949:%.*]] = icmp slt i32 [[TMP1363]], [[TMP1364]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP2949]], label [[IF_THEN2951:%.*]], label [[IF_END2952:%.*]]
+// SIMD-ONLY0:       if.then2951:
+// SIMD-ONLY0-NEXT:    [[TMP1365:%.*]] = load i32, ptr [[IE]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP1365]], ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END2952]]
+// SIMD-ONLY0:       if.end2952:
+// SIMD-ONLY0-NEXT:    [[TMP1366:%.*]] = load i32, ptr [[IE]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP1367:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP2953:%.*]] = icmp sgt i32 [[TMP1366]], [[TMP1367]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP2953]], label [[IF_THEN2955:%.*]], label [[IF_END2956:%.*]]
+// SIMD-ONLY0:       if.then2955:
+// SIMD-ONLY0-NEXT:    [[TMP1368:%.*]] = load i32, ptr [[IE]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP1368]], ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END2956]]
+// SIMD-ONLY0:       if.end2956:
+// SIMD-ONLY0-NEXT:    [[TMP1369:%.*]] = load i32, ptr [[IE]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP1370:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP2957:%.*]] = icmp slt i32 [[TMP1369]], [[TMP1370]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP2957]], label [[IF_THEN2959:%.*]], label [[IF_END2960:%.*]]
+// SIMD-ONLY0:       if.then2959:
+// SIMD-ONLY0-NEXT:    [[TMP1371:%.*]] = load i32, ptr [[IE]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP1371]], ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END2960]]
+// SIMD-ONLY0:       if.end2960:
+// SIMD-ONLY0-NEXT:    [[TMP1372:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP1373:%.*]] = load i32, ptr [[IE]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP2961:%.*]] = icmp eq i32 [[TMP1372]], [[TMP1373]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP2961]], label [[COND_TRUE2963:%.*]], label [[COND_FALSE2964:%.*]]
+// SIMD-ONLY0:       cond.true2963:
+// SIMD-ONLY0-NEXT:    [[TMP1374:%.*]] = load i32, ptr [[ID]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END2965:%.*]]
+// SIMD-ONLY0:       cond.false2964:
+// SIMD-ONLY0-NEXT:    [[TMP1375:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END2965]]
+// SIMD-ONLY0:       cond.end2965:
+// SIMD-ONLY0-NEXT:    [[COND2966:%.*]] = phi i32 [ [[TMP1374]], [[COND_TRUE2963]] ], [ [[TMP1375]], [[COND_FALSE2964]] ]
+// SIMD-ONLY0-NEXT:    store i32 [[COND2966]], ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP1376:%.*]] = load i32, ptr [[IE]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP1377:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP2967:%.*]] = icmp eq i32 [[TMP1376]], [[TMP1377]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP2967]], label [[COND_TRUE2969:%.*]], label [[COND_FALSE2970:%.*]]
+// SIMD-ONLY0:       cond.true2969:
+// SIMD-ONLY0-NEXT:    [[TMP1378:%.*]] = load i32, ptr [[ID]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END2971:%.*]]
+// SIMD-ONLY0:       cond.false2970:
+// SIMD-ONLY0-NEXT:    [[TMP1379:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END2971]]
+// SIMD-ONLY0:       cond.end2971:
+// SIMD-ONLY0-NEXT:    [[COND2972:%.*]] = phi i32 [ [[TMP1378]], [[COND_TRUE2969]] ], [ [[TMP1379]], [[COND_FALSE2970]] ]
+// SIMD-ONLY0-NEXT:    store i32 [[COND2972]], ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP1380:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP1381:%.*]] = load i32, ptr [[IE]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP2973:%.*]] = icmp eq i32 [[TMP1380]], [[TMP1381]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP2973]], label [[IF_THEN2975:%.*]], label [[IF_END2976:%.*]]
+// SIMD-ONLY0:       if.then2975:
+// SIMD-ONLY0-NEXT:    [[TMP1382:%.*]] = load i32, ptr [[ID]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP1382]], ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END2976]]
+// SIMD-ONLY0:       if.end2976:
+// SIMD-ONLY0-NEXT:    [[TMP1383:%.*]] = load i32, ptr [[IE]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP1384:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP2977:%.*]] = icmp eq i32 [[TMP1383]], [[TMP1384]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP2977]], label [[IF_THEN2979:%.*]], label [[IF_END2980:%.*]]
+// SIMD-ONLY0:       if.then2979:
+// SIMD-ONLY0-NEXT:    [[TMP1385:%.*]] = load i32, ptr [[ID]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP1385]], ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END2980]]
+// SIMD-ONLY0:       if.end2980:
+// SIMD-ONLY0-NEXT:    [[TMP1386:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP1387:%.*]] = load i32, ptr [[UIE]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP2981:%.*]] = icmp ugt i32 [[TMP1386]], [[TMP1387]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP2981]], label [[COND_TRUE2983:%.*]], label [[COND_FALSE2984:%.*]]
+// SIMD-ONLY0:       cond.true2983:
+// SIMD-ONLY0-NEXT:    [[TMP1388:%.*]] = load i32, ptr [[UIE]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END2985:%.*]]
+// SIMD-ONLY0:       cond.false2984:
+// SIMD-ONLY0-NEXT:    [[TMP1389:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END2985]]
+// SIMD-ONLY0:       cond.end2985:
+// SIMD-ONLY0-NEXT:    [[COND2986:%.*]] = phi i32 [ [[TMP1388]], [[COND_TRUE2983]] ], [ [[TMP1389]], [[COND_FALSE2984]] ]
+// SIMD-ONLY0-NEXT:    store i32 [[COND2986]], ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP1390:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP1391:%.*]] = load i32, ptr [[UIE]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP2987:%.*]] = icmp ult i32 [[TMP1390]], [[TMP1391]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP2987]], label [[COND_TRUE2989:%.*]], label [[COND_FALSE2990:%.*]]
+// SIMD-ONLY0:       cond.true2989:
+// SIMD-ONLY0-NEXT:    [[TMP1392:%.*]] = load i32, ptr [[UIE]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END2991:%.*]]
+// SIMD-ONLY0:       cond.false2990:
+// SIMD-ONLY0-NEXT:    [[TMP1393:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END2991]]
+// SIMD-ONLY0:       cond.end2991:
+// SIMD-ONLY0-NEXT:    [[COND2992:%.*]] = phi i32 [ [[TMP1392]], [[COND_TRUE2989]] ], [ [[TMP1393]], [[COND_FALSE2990]] ]
+// SIMD-ONLY0-NEXT:    store i32 [[COND2992]], ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP1394:%.*]] = load i32, ptr [[UIE]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP1395:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP2993:%.*]] = icmp ugt i32 [[TMP1394]], [[TMP1395]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP2993]], label [[COND_TRUE2995:%.*]], label [[COND_FALSE2996:%.*]]
+// SIMD-ONLY0:       cond.true2995:
+// SIMD-ONLY0-NEXT:    [[TMP1396:%.*]] = load i32, ptr [[UIE]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END2997:%.*]]
+// SIMD-ONLY0:       cond.false2996:
+// SIMD-ONLY0-NEXT:    [[TMP1397:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END2997]]
+// SIMD-ONLY0:       cond.end2997:
+// SIMD-ONLY0-NEXT:    [[COND2998:%.*]] = phi i32 [ [[TMP1396]], [[COND_TRUE2995]] ], [ [[TMP1397]], [[COND_FALSE2996]] ]
+// SIMD-ONLY0-NEXT:    store i32 [[COND2998]], ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP1398:%.*]] = load i32, ptr [[UIE]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP1399:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP2999:%.*]] = icmp ult i32 [[TMP1398]], [[TMP1399]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP2999]], label [[COND_TRUE3001:%.*]], label [[COND_FALSE3002:%.*]]
+// SIMD-ONLY0:       cond.true3001:
+// SIMD-ONLY0-NEXT:    [[TMP1400:%.*]] = load i32, ptr [[UIE]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END3003:%.*]]
+// SIMD-ONLY0:       cond.false3002:
+// SIMD-ONLY0-NEXT:    [[TMP1401:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END3003]]
+// SIMD-ONLY0:       cond.end3003:
+// SIMD-ONLY0-NEXT:    [[COND3004:%.*]] = phi i32 [ [[TMP1400]], [[COND_TRUE3001]] ], [ [[TMP1401]], [[COND_FALSE3002]] ]
+// SIMD-ONLY0-NEXT:    store i32 [[COND3004]], ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP1402:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP1403:%.*]] = load i32, ptr [[UIE]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP3005:%.*]] = icmp ugt i32 [[TMP1402]], [[TMP1403]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP3005]], label [[IF_THEN3007:%.*]], label [[IF_END3008:%.*]]
+// SIMD-ONLY0:       if.then3007:
+// SIMD-ONLY0-NEXT:    [[TMP1404:%.*]] = load i32, ptr [[UIE]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP1404]], ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END3008]]
+// SIMD-ONLY0:       if.end3008:
+// SIMD-ONLY0-NEXT:    [[TMP1405:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP1406:%.*]] = load i32, ptr [[UIE]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP3009:%.*]] = icmp ult i32 [[TMP1405]], [[TMP1406]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP3009]], label [[IF_THEN3011:%.*]], label [[IF_END3012:%.*]]
+// SIMD-ONLY0:       if.then3011:
+// SIMD-ONLY0-NEXT:    [[TMP1407:%.*]] = load i32, ptr [[UIE]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP1407]], ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END3012]]
+// SIMD-ONLY0:       if.end3012:
+// SIMD-ONLY0-NEXT:    [[TMP1408:%.*]] = load i32, ptr [[UIE]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP1409:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP3013:%.*]] = icmp ugt i32 [[TMP1408]], [[TMP1409]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP3013]], label [[IF_THEN3015:%.*]], label [[IF_END3016:%.*]]
+// SIMD-ONLY0:       if.then3015:
+// SIMD-ONLY0-NEXT:    [[TMP1410:%.*]] = load i32, ptr [[UIE]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP1410]], ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END3016]]
+// SIMD-ONLY0:       if.end3016:
+// SIMD-ONLY0-NEXT:    [[TMP1411:%.*]] = load i32, ptr [[UIE]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP1412:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP3017:%.*]] = icmp ult i32 [[TMP1411]], [[TMP1412]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP3017]], label [[IF_THEN3019:%.*]], label [[IF_END3020:%.*]]
+// SIMD-ONLY0:       if.then3019:
+// SIMD-ONLY0-NEXT:    [[TMP1413:%.*]] = load i32, ptr [[UIE]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP1413]], ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END3020]]
+// SIMD-ONLY0:       if.end3020:
+// SIMD-ONLY0-NEXT:    [[TMP1414:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP1415:%.*]] = load i32, ptr [[UIE]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP3021:%.*]] = icmp eq i32 [[TMP1414]], [[TMP1415]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP3021]], label [[COND_TRUE3023:%.*]], label [[COND_FALSE3024:%.*]]
+// SIMD-ONLY0:       cond.true3023:
+// SIMD-ONLY0-NEXT:    [[TMP1416:%.*]] = load i32, ptr [[UID]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END3025:%.*]]
+// SIMD-ONLY0:       cond.false3024:
+// SIMD-ONLY0-NEXT:    [[TMP1417:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END3025]]
+// SIMD-ONLY0:       cond.end3025:
+// SIMD-ONLY0-NEXT:    [[COND3026:%.*]] = phi i32 [ [[TMP1416]], [[COND_TRUE3023]] ], [ [[TMP1417]], [[COND_FALSE3024]] ]
+// SIMD-ONLY0-NEXT:    store i32 [[COND3026]], ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP1418:%.*]] = load i32, ptr [[UIE]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP1419:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP3027:%.*]] = icmp eq i32 [[TMP1418]], [[TMP1419]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP3027]], label [[COND_TRUE3029:%.*]], label [[COND_FALSE3030:%.*]]
+// SIMD-ONLY0:       cond.true3029:
+// SIMD-ONLY0-NEXT:    [[TMP1420:%.*]] = load i32, ptr [[UID]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END3031:%.*]]
+// SIMD-ONLY0:       cond.false3030:
+// SIMD-ONLY0-NEXT:    [[TMP1421:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END3031]]
+// SIMD-ONLY0:       cond.end3031:
+// SIMD-ONLY0-NEXT:    [[COND3032:%.*]] = phi i32 [ [[TMP1420]], [[COND_TRUE3029]] ], [ [[TMP1421]], [[COND_FALSE3030]] ]
+// SIMD-ONLY0-NEXT:    store i32 [[COND3032]], ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP1422:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP1423:%.*]] = load i32, ptr [[UIE]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP3033:%.*]] = icmp eq i32 [[TMP1422]], [[TMP1423]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP3033]], label [[IF_THEN3035:%.*]], label [[IF_END3036:%.*]]
+// SIMD-ONLY0:       if.then3035:
+// SIMD-ONLY0-NEXT:    [[TMP1424:%.*]] = load i32, ptr [[UID]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP1424]], ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END3036]]
+// SIMD-ONLY0:       if.end3036:
+// SIMD-ONLY0-NEXT:    [[TMP1425:%.*]] = load i32, ptr [[UIE]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP1426:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP3037:%.*]] = icmp eq i32 [[TMP1425]], [[TMP1426]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP3037]], label [[IF_THEN3039:%.*]], label [[IF_END3040:%.*]]
+// SIMD-ONLY0:       if.then3039:
+// SIMD-ONLY0-NEXT:    [[TMP1427:%.*]] = load i32, ptr [[UID]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP1427]], ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END3040]]
+// SIMD-ONLY0:       if.end3040:
+// SIMD-ONLY0-NEXT:    [[TMP1428:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP1429:%.*]] = load i32, ptr [[IE]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP3041:%.*]] = icmp sgt i32 [[TMP1428]], [[TMP1429]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP3041]], label [[COND_TRUE3043:%.*]], label [[COND_FALSE3044:%.*]]
+// SIMD-ONLY0:       cond.true3043:
+// SIMD-ONLY0-NEXT:    [[TMP1430:%.*]] = load i32, ptr [[IE]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END3045:%.*]]
+// SIMD-ONLY0:       cond.false3044:
+// SIMD-ONLY0-NEXT:    [[TMP1431:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END3045]]
+// SIMD-ONLY0:       cond.end3045:
+// SIMD-ONLY0-NEXT:    [[COND3046:%.*]] = phi i32 [ [[TMP1430]], [[COND_TRUE3043]] ], [ [[TMP1431]], [[COND_FALSE3044]] ]
+// SIMD-ONLY0-NEXT:    store i32 [[COND3046]], ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP1432:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP1433:%.*]] = load i32, ptr [[IE]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP3047:%.*]] = icmp slt i32 [[TMP1432]], [[TMP1433]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP3047]], label [[COND_TRUE3049:%.*]], label [[COND_FALSE3050:%.*]]
+// SIMD-ONLY0:       cond.true3049:
+// SIMD-ONLY0-NEXT:    [[TMP1434:%.*]] = load i32, ptr [[IE]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END3051:%.*]]
+// SIMD-ONLY0:       cond.false3050:
+// SIMD-ONLY0-NEXT:    [[TMP1435:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END3051]]
+// SIMD-ONLY0:       cond.end3051:
+// SIMD-ONLY0-NEXT:    [[COND3052:%.*]] = phi i32 [ [[TMP1434]], [[COND_TRUE3049]] ], [ [[TMP1435]], [[COND_FALSE3050]] ]
+// SIMD-ONLY0-NEXT:    store i32 [[COND3052]], ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP1436:%.*]] = load i32, ptr [[IE]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP1437:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP3053:%.*]] = icmp sgt i32 [[TMP1436]], [[TMP1437]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP3053]], label [[COND_TRUE3055:%.*]], label [[COND_FALSE3056:%.*]]
+// SIMD-ONLY0:       cond.true3055:
+// SIMD-ONLY0-NEXT:    [[TMP1438:%.*]] = load i32, ptr [[IE]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END3057:%.*]]
+// SIMD-ONLY0:       cond.false3056:
+// SIMD-ONLY0-NEXT:    [[TMP1439:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END3057]]
+// SIMD-ONLY0:       cond.end3057:
+// SIMD-ONLY0-NEXT:    [[COND3058:%.*]] = phi i32 [ [[TMP1438]], [[COND_TRUE3055]] ], [ [[TMP1439]], [[COND_FALSE3056]] ]
+// SIMD-ONLY0-NEXT:    store i32 [[COND3058]], ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP1440:%.*]] = load i32, ptr [[IE]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP1441:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP3059:%.*]] = icmp slt i32 [[TMP1440]], [[TMP1441]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP3059]], label [[COND_TRUE3061:%.*]], label [[COND_FALSE3062:%.*]]
+// SIMD-ONLY0:       cond.true3061:
+// SIMD-ONLY0-NEXT:    [[TMP1442:%.*]] = load i32, ptr [[IE]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END3063:%.*]]
+// SIMD-ONLY0:       cond.false3062:
+// SIMD-ONLY0-NEXT:    [[TMP1443:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END3063]]
+// SIMD-ONLY0:       cond.end3063:
+// SIMD-ONLY0-NEXT:    [[COND3064:%.*]] = phi i32 [ [[TMP1442]], [[COND_TRUE3061]] ], [ [[TMP1443]], [[COND_FALSE3062]] ]
+// SIMD-ONLY0-NEXT:    store i32 [[COND3064]], ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP1444:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP1445:%.*]] = load i32, ptr [[IE]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP3065:%.*]] = icmp sgt i32 [[TMP1444]], [[TMP1445]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP3065]], label [[IF_THEN3067:%.*]], label [[IF_END3068:%.*]]
+// SIMD-ONLY0:       if.then3067:
+// SIMD-ONLY0-NEXT:    [[TMP1446:%.*]] = load i32, ptr [[IE]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP1446]], ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END3068]]
+// SIMD-ONLY0:       if.end3068:
+// SIMD-ONLY0-NEXT:    [[TMP1447:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP1448:%.*]] = load i32, ptr [[IE]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP3069:%.*]] = icmp slt i32 [[TMP1447]], [[TMP1448]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP3069]], label [[IF_THEN3071:%.*]], label [[IF_END3072:%.*]]
+// SIMD-ONLY0:       if.then3071:
+// SIMD-ONLY0-NEXT:    [[TMP1449:%.*]] = load i32, ptr [[IE]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP1449]], ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END3072]]
+// SIMD-ONLY0:       if.end3072:
+// SIMD-ONLY0-NEXT:    [[TMP1450:%.*]] = load i32, ptr [[IE]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP1451:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP3073:%.*]] = icmp sgt i32 [[TMP1450]], [[TMP1451]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP3073]], label [[IF_THEN3075:%.*]], label [[IF_END3076:%.*]]
+// SIMD-ONLY0:       if.then3075:
+// SIMD-ONLY0-NEXT:    [[TMP1452:%.*]] = load i32, ptr [[IE]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP1452]], ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END3076]]
+// SIMD-ONLY0:       if.end3076:
+// SIMD-ONLY0-NEXT:    [[TMP1453:%.*]] = load i32, ptr [[IE]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP1454:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP3077:%.*]] = icmp slt i32 [[TMP1453]], [[TMP1454]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP3077]], label [[IF_THEN3079:%.*]], label [[IF_END3080:%.*]]
+// SIMD-ONLY0:       if.then3079:
+// SIMD-ONLY0-NEXT:    [[TMP1455:%.*]] = load i32, ptr [[IE]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP1455]], ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END3080]]
+// SIMD-ONLY0:       if.end3080:
+// SIMD-ONLY0-NEXT:    [[TMP1456:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP1457:%.*]] = load i32, ptr [[IE]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP3081:%.*]] = icmp eq i32 [[TMP1456]], [[TMP1457]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP3081]], label [[COND_TRUE3083:%.*]], label [[COND_FALSE3084:%.*]]
+// SIMD-ONLY0:       cond.true3083:
+// SIMD-ONLY0-NEXT:    [[TMP1458:%.*]] = load i32, ptr [[ID]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END3085:%.*]]
+// SIMD-ONLY0:       cond.false3084:
+// SIMD-ONLY0-NEXT:    [[TMP1459:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END3085]]
+// SIMD-ONLY0:       cond.end3085:
+// SIMD-ONLY0-NEXT:    [[COND3086:%.*]] = phi i32 [ [[TMP1458]], [[COND_TRUE3083]] ], [ [[TMP1459]], [[COND_FALSE3084]] ]
+// SIMD-ONLY0-NEXT:    store i32 [[COND3086]], ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP1460:%.*]] = load i32, ptr [[IE]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP1461:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP3087:%.*]] = icmp eq i32 [[TMP1460]], [[TMP1461]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP3087]], label [[COND_TRUE3089:%.*]], label [[COND_FALSE3090:%.*]]
+// SIMD-ONLY0:       cond.true3089:
+// SIMD-ONLY0-NEXT:    [[TMP1462:%.*]] = load i32, ptr [[ID]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END3091:%.*]]
+// SIMD-ONLY0:       cond.false3090:
+// SIMD-ONLY0-NEXT:    [[TMP1463:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END3091]]
+// SIMD-ONLY0:       cond.end3091:
+// SIMD-ONLY0-NEXT:    [[COND3092:%.*]] = phi i32 [ [[TMP1462]], [[COND_TRUE3089]] ], [ [[TMP1463]], [[COND_FALSE3090]] ]
+// SIMD-ONLY0-NEXT:    store i32 [[COND3092]], ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP1464:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP1465:%.*]] = load i32, ptr [[IE]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP3093:%.*]] = icmp eq i32 [[TMP1464]], [[TMP1465]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP3093]], label [[IF_THEN3095:%.*]], label [[IF_END3096:%.*]]
+// SIMD-ONLY0:       if.then3095:
+// SIMD-ONLY0-NEXT:    [[TMP1466:%.*]] = load i32, ptr [[ID]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP1466]], ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END3096]]
+// SIMD-ONLY0:       if.end3096:
+// SIMD-ONLY0-NEXT:    [[TMP1467:%.*]] = load i32, ptr [[IE]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP1468:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP3097:%.*]] = icmp eq i32 [[TMP1467]], [[TMP1468]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP3097]], label [[IF_THEN3099:%.*]], label [[IF_END3100:%.*]]
+// SIMD-ONLY0:       if.then3099:
+// SIMD-ONLY0-NEXT:    [[TMP1469:%.*]] = load i32, ptr [[ID]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP1469]], ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END3100]]
+// SIMD-ONLY0:       if.end3100:
+// SIMD-ONLY0-NEXT:    [[TMP1470:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP1471:%.*]] = load i32, ptr [[UIE]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP3101:%.*]] = icmp ugt i32 [[TMP1470]], [[TMP1471]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP3101]], label [[COND_TRUE3103:%.*]], label [[COND_FALSE3104:%.*]]
+// SIMD-ONLY0:       cond.true3103:
+// SIMD-ONLY0-NEXT:    [[TMP1472:%.*]] = load i32, ptr [[UIE]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END3105:%.*]]
+// SIMD-ONLY0:       cond.false3104:
+// SIMD-ONLY0-NEXT:    [[TMP1473:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END3105]]
+// SIMD-ONLY0:       cond.end3105:
+// SIMD-ONLY0-NEXT:    [[COND3106:%.*]] = phi i32 [ [[TMP1472]], [[COND_TRUE3103]] ], [ [[TMP1473]], [[COND_FALSE3104]] ]
+// SIMD-ONLY0-NEXT:    store i32 [[COND3106]], ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP1474:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP1475:%.*]] = load i32, ptr [[UIE]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP3107:%.*]] = icmp ult i32 [[TMP1474]], [[TMP1475]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP3107]], label [[COND_TRUE3109:%.*]], label [[COND_FALSE3110:%.*]]
+// SIMD-ONLY0:       cond.true3109:
+// SIMD-ONLY0-NEXT:    [[TMP1476:%.*]] = load i32, ptr [[UIE]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END3111:%.*]]
+// SIMD-ONLY0:       cond.false3110:
+// SIMD-ONLY0-NEXT:    [[TMP1477:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END3111]]
+// SIMD-ONLY0:       cond.end3111:
+// SIMD-ONLY0-NEXT:    [[COND3112:%.*]] = phi i32 [ [[TMP1476]], [[COND_TRUE3109]] ], [ [[TMP1477]], [[COND_FALSE3110]] ]
+// SIMD-ONLY0-NEXT:    store i32 [[COND3112]], ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP1478:%.*]] = load i32, ptr [[UIE]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP1479:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP3113:%.*]] = icmp ugt i32 [[TMP1478]], [[TMP1479]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP3113]], label [[COND_TRUE3115:%.*]], label [[COND_FALSE3116:%.*]]
+// SIMD-ONLY0:       cond.true3115:
+// SIMD-ONLY0-NEXT:    [[TMP1480:%.*]] = load i32, ptr [[UIE]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END3117:%.*]]
+// SIMD-ONLY0:       cond.false3116:
+// SIMD-ONLY0-NEXT:    [[TMP1481:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END3117]]
+// SIMD-ONLY0:       cond.end3117:
+// SIMD-ONLY0-NEXT:    [[COND3118:%.*]] = phi i32 [ [[TMP1480]], [[COND_TRUE3115]] ], [ [[TMP1481]], [[COND_FALSE3116]] ]
+// SIMD-ONLY0-NEXT:    store i32 [[COND3118]], ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP1482:%.*]] = load i32, ptr [[UIE]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP1483:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP3119:%.*]] = icmp ult i32 [[TMP1482]], [[TMP1483]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP3119]], label [[COND_TRUE3121:%.*]], label [[COND_FALSE3122:%.*]]
+// SIMD-ONLY0:       cond.true3121:
+// SIMD-ONLY0-NEXT:    [[TMP1484:%.*]] = load i32, ptr [[UIE]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END3123:%.*]]
+// SIMD-ONLY0:       cond.false3122:
+// SIMD-ONLY0-NEXT:    [[TMP1485:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END3123]]
+// SIMD-ONLY0:       cond.end3123:
+// SIMD-ONLY0-NEXT:    [[COND3124:%.*]] = phi i32 [ [[TMP1484]], [[COND_TRUE3121]] ], [ [[TMP1485]], [[COND_FALSE3122]] ]
+// SIMD-ONLY0-NEXT:    store i32 [[COND3124]], ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP1486:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP1487:%.*]] = load i32, ptr [[UIE]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP3125:%.*]] = icmp ugt i32 [[TMP1486]], [[TMP1487]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP3125]], label [[IF_THEN3127:%.*]], label [[IF_END3128:%.*]]
+// SIMD-ONLY0:       if.then3127:
+// SIMD-ONLY0-NEXT:    [[TMP1488:%.*]] = load i32, ptr [[UIE]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP1488]], ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END3128]]
+// SIMD-ONLY0:       if.end3128:
+// SIMD-ONLY0-NEXT:    [[TMP1489:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP1490:%.*]] = load i32, ptr [[UIE]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP3129:%.*]] = icmp ult i32 [[TMP1489]], [[TMP1490]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP3129]], label [[IF_THEN3131:%.*]], label [[IF_END3132:%.*]]
+// SIMD-ONLY0:       if.then3131:
+// SIMD-ONLY0-NEXT:    [[TMP1491:%.*]] = load i32, ptr [[UIE]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP1491]], ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END3132]]
+// SIMD-ONLY0:       if.end3132:
+// SIMD-ONLY0-NEXT:    [[TMP1492:%.*]] = load i32, ptr [[UIE]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP1493:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP3133:%.*]] = icmp ugt i32 [[TMP1492]], [[TMP1493]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP3133]], label [[IF_THEN3135:%.*]], label [[IF_END3136:%.*]]
+// SIMD-ONLY0:       if.then3135:
+// SIMD-ONLY0-NEXT:    [[TMP1494:%.*]] = load i32, ptr [[UIE]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP1494]], ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END3136]]
+// SIMD-ONLY0:       if.end3136:
+// SIMD-ONLY0-NEXT:    [[TMP1495:%.*]] = load i32, ptr [[UIE]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP1496:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP3137:%.*]] = icmp ult i32 [[TMP1495]], [[TMP1496]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP3137]], label [[IF_THEN3139:%.*]], label [[IF_END3140:%.*]]
+// SIMD-ONLY0:       if.then3139:
+// SIMD-ONLY0-NEXT:    [[TMP1497:%.*]] = load i32, ptr [[UIE]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP1497]], ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END3140]]
+// SIMD-ONLY0:       if.end3140:
+// SIMD-ONLY0-NEXT:    [[TMP1498:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP1499:%.*]] = load i32, ptr [[UIE]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP3141:%.*]] = icmp eq i32 [[TMP1498]], [[TMP1499]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP3141]], label [[COND_TRUE3143:%.*]], label [[COND_FALSE3144:%.*]]
+// SIMD-ONLY0:       cond.true3143:
+// SIMD-ONLY0-NEXT:    [[TMP1500:%.*]] = load i32, ptr [[UID]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END3145:%.*]]
+// SIMD-ONLY0:       cond.false3144:
+// SIMD-ONLY0-NEXT:    [[TMP1501:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END3145]]
+// SIMD-ONLY0:       cond.end3145:
+// SIMD-ONLY0-NEXT:    [[COND3146:%.*]] = phi i32 [ [[TMP1500]], [[COND_TRUE3143]] ], [ [[TMP1501]], [[COND_FALSE3144]] ]
+// SIMD-ONLY0-NEXT:    store i32 [[COND3146]], ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP1502:%.*]] = load i32, ptr [[UIE]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP1503:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP3147:%.*]] = icmp eq i32 [[TMP1502]], [[TMP1503]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP3147]], label [[COND_TRUE3149:%.*]], label [[COND_FALSE3150:%.*]]
+// SIMD-ONLY0:       cond.true3149:
+// SIMD-ONLY0-NEXT:    [[TMP1504:%.*]] = load i32, ptr [[UID]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END3151:%.*]]
+// SIMD-ONLY0:       cond.false3150:
+// SIMD-ONLY0-NEXT:    [[TMP1505:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END3151]]
+// SIMD-ONLY0:       cond.end3151:
+// SIMD-ONLY0-NEXT:    [[COND3152:%.*]] = phi i32 [ [[TMP1504]], [[COND_TRUE3149]] ], [ [[TMP1505]], [[COND_FALSE3150]] ]
+// SIMD-ONLY0-NEXT:    store i32 [[COND3152]], ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP1506:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP1507:%.*]] = load i32, ptr [[UIE]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP3153:%.*]] = icmp eq i32 [[TMP1506]], [[TMP1507]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP3153]], label [[IF_THEN3155:%.*]], label [[IF_END3156:%.*]]
+// SIMD-ONLY0:       if.then3155:
+// SIMD-ONLY0-NEXT:    [[TMP1508:%.*]] = load i32, ptr [[UID]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP1508]], ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END3156]]
+// SIMD-ONLY0:       if.end3156:
+// SIMD-ONLY0-NEXT:    [[TMP1509:%.*]] = load i32, ptr [[UIE]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP1510:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP3157:%.*]] = icmp eq i32 [[TMP1509]], [[TMP1510]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP3157]], label [[IF_THEN3159:%.*]], label [[IF_END3160:%.*]]
+// SIMD-ONLY0:       if.then3159:
+// SIMD-ONLY0-NEXT:    [[TMP1511:%.*]] = load i32, ptr [[UID]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP1511]], ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END3160]]
+// SIMD-ONLY0:       if.end3160:
+// SIMD-ONLY0-NEXT:    [[TMP1512:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP1513:%.*]] = load i64, ptr [[LE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP3161:%.*]] = icmp sgt i64 [[TMP1512]], [[TMP1513]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP3161]], label [[COND_TRUE3163:%.*]], label [[COND_FALSE3164:%.*]]
+// SIMD-ONLY0:       cond.true3163:
+// SIMD-ONLY0-NEXT:    [[TMP1514:%.*]] = load i64, ptr [[LE]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END3165:%.*]]
+// SIMD-ONLY0:       cond.false3164:
+// SIMD-ONLY0-NEXT:    [[TMP1515:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END3165]]
+// SIMD-ONLY0:       cond.end3165:
+// SIMD-ONLY0-NEXT:    [[COND3166:%.*]] = phi i64 [ [[TMP1514]], [[COND_TRUE3163]] ], [ [[TMP1515]], [[COND_FALSE3164]] ]
+// SIMD-ONLY0-NEXT:    store i64 [[COND3166]], ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP1516:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP1517:%.*]] = load i64, ptr [[LE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP3167:%.*]] = icmp slt i64 [[TMP1516]], [[TMP1517]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP3167]], label [[COND_TRUE3169:%.*]], label [[COND_FALSE3170:%.*]]
+// SIMD-ONLY0:       cond.true3169:
+// SIMD-ONLY0-NEXT:    [[TMP1518:%.*]] = load i64, ptr [[LE]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END3171:%.*]]
+// SIMD-ONLY0:       cond.false3170:
+// SIMD-ONLY0-NEXT:    [[TMP1519:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END3171]]
+// SIMD-ONLY0:       cond.end3171:
+// SIMD-ONLY0-NEXT:    [[COND3172:%.*]] = phi i64 [ [[TMP1518]], [[COND_TRUE3169]] ], [ [[TMP1519]], [[COND_FALSE3170]] ]
+// SIMD-ONLY0-NEXT:    store i64 [[COND3172]], ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP1520:%.*]] = load i64, ptr [[LE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP1521:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP3173:%.*]] = icmp sgt i64 [[TMP1520]], [[TMP1521]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP3173]], label [[COND_TRUE3175:%.*]], label [[COND_FALSE3176:%.*]]
+// SIMD-ONLY0:       cond.true3175:
+// SIMD-ONLY0-NEXT:    [[TMP1522:%.*]] = load i64, ptr [[LE]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END3177:%.*]]
+// SIMD-ONLY0:       cond.false3176:
+// SIMD-ONLY0-NEXT:    [[TMP1523:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END3177]]
+// SIMD-ONLY0:       cond.end3177:
+// SIMD-ONLY0-NEXT:    [[COND3178:%.*]] = phi i64 [ [[TMP1522]], [[COND_TRUE3175]] ], [ [[TMP1523]], [[COND_FALSE3176]] ]
+// SIMD-ONLY0-NEXT:    store i64 [[COND3178]], ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP1524:%.*]] = load i64, ptr [[LE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP1525:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP3179:%.*]] = icmp slt i64 [[TMP1524]], [[TMP1525]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP3179]], label [[COND_TRUE3181:%.*]], label [[COND_FALSE3182:%.*]]
+// SIMD-ONLY0:       cond.true3181:
+// SIMD-ONLY0-NEXT:    [[TMP1526:%.*]] = load i64, ptr [[LE]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END3183:%.*]]
+// SIMD-ONLY0:       cond.false3182:
+// SIMD-ONLY0-NEXT:    [[TMP1527:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END3183]]
+// SIMD-ONLY0:       cond.end3183:
+// SIMD-ONLY0-NEXT:    [[COND3184:%.*]] = phi i64 [ [[TMP1526]], [[COND_TRUE3181]] ], [ [[TMP1527]], [[COND_FALSE3182]] ]
+// SIMD-ONLY0-NEXT:    store i64 [[COND3184]], ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP1528:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP1529:%.*]] = load i64, ptr [[LE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP3185:%.*]] = icmp sgt i64 [[TMP1528]], [[TMP1529]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP3185]], label [[IF_THEN3187:%.*]], label [[IF_END3188:%.*]]
+// SIMD-ONLY0:       if.then3187:
+// SIMD-ONLY0-NEXT:    [[TMP1530:%.*]] = load i64, ptr [[LE]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP1530]], ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END3188]]
+// SIMD-ONLY0:       if.end3188:
+// SIMD-ONLY0-NEXT:    [[TMP1531:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP1532:%.*]] = load i64, ptr [[LE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP3189:%.*]] = icmp slt i64 [[TMP1531]], [[TMP1532]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP3189]], label [[IF_THEN3191:%.*]], label [[IF_END3192:%.*]]
+// SIMD-ONLY0:       if.then3191:
+// SIMD-ONLY0-NEXT:    [[TMP1533:%.*]] = load i64, ptr [[LE]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP1533]], ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END3192]]
+// SIMD-ONLY0:       if.end3192:
+// SIMD-ONLY0-NEXT:    [[TMP1534:%.*]] = load i64, ptr [[LE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP1535:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP3193:%.*]] = icmp sgt i64 [[TMP1534]], [[TMP1535]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP3193]], label [[IF_THEN3195:%.*]], label [[IF_END3196:%.*]]
+// SIMD-ONLY0:       if.then3195:
+// SIMD-ONLY0-NEXT:    [[TMP1536:%.*]] = load i64, ptr [[LE]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP1536]], ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END3196]]
+// SIMD-ONLY0:       if.end3196:
+// SIMD-ONLY0-NEXT:    [[TMP1537:%.*]] = load i64, ptr [[LE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP1538:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP3197:%.*]] = icmp slt i64 [[TMP1537]], [[TMP1538]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP3197]], label [[IF_THEN3199:%.*]], label [[IF_END3200:%.*]]
+// SIMD-ONLY0:       if.then3199:
+// SIMD-ONLY0-NEXT:    [[TMP1539:%.*]] = load i64, ptr [[LE]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP1539]], ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END3200]]
+// SIMD-ONLY0:       if.end3200:
+// SIMD-ONLY0-NEXT:    [[TMP1540:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP1541:%.*]] = load i64, ptr [[LE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP3201:%.*]] = icmp eq i64 [[TMP1540]], [[TMP1541]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP3201]], label [[COND_TRUE3203:%.*]], label [[COND_FALSE3204:%.*]]
+// SIMD-ONLY0:       cond.true3203:
+// SIMD-ONLY0-NEXT:    [[TMP1542:%.*]] = load i64, ptr [[LD]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END3205:%.*]]
+// SIMD-ONLY0:       cond.false3204:
+// SIMD-ONLY0-NEXT:    [[TMP1543:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END3205]]
+// SIMD-ONLY0:       cond.end3205:
+// SIMD-ONLY0-NEXT:    [[COND3206:%.*]] = phi i64 [ [[TMP1542]], [[COND_TRUE3203]] ], [ [[TMP1543]], [[COND_FALSE3204]] ]
+// SIMD-ONLY0-NEXT:    store i64 [[COND3206]], ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP1544:%.*]] = load i64, ptr [[LE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP1545:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP3207:%.*]] = icmp eq i64 [[TMP1544]], [[TMP1545]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP3207]], label [[COND_TRUE3209:%.*]], label [[COND_FALSE3210:%.*]]
+// SIMD-ONLY0:       cond.true3209:
+// SIMD-ONLY0-NEXT:    [[TMP1546:%.*]] = load i64, ptr [[LD]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END3211:%.*]]
+// SIMD-ONLY0:       cond.false3210:
+// SIMD-ONLY0-NEXT:    [[TMP1547:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END3211]]
+// SIMD-ONLY0:       cond.end3211:
+// SIMD-ONLY0-NEXT:    [[COND3212:%.*]] = phi i64 [ [[TMP1546]], [[COND_TRUE3209]] ], [ [[TMP1547]], [[COND_FALSE3210]] ]
+// SIMD-ONLY0-NEXT:    store i64 [[COND3212]], ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP1548:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP1549:%.*]] = load i64, ptr [[LE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP3213:%.*]] = icmp eq i64 [[TMP1548]], [[TMP1549]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP3213]], label [[IF_THEN3215:%.*]], label [[IF_END3216:%.*]]
+// SIMD-ONLY0:       if.then3215:
+// SIMD-ONLY0-NEXT:    [[TMP1550:%.*]] = load i64, ptr [[LD]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP1550]], ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END3216]]
+// SIMD-ONLY0:       if.end3216:
+// SIMD-ONLY0-NEXT:    [[TMP1551:%.*]] = load i64, ptr [[LE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP1552:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP3217:%.*]] = icmp eq i64 [[TMP1551]], [[TMP1552]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP3217]], label [[IF_THEN3219:%.*]], label [[IF_END3220:%.*]]
+// SIMD-ONLY0:       if.then3219:
+// SIMD-ONLY0-NEXT:    [[TMP1553:%.*]] = load i64, ptr [[LD]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP1553]], ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END3220]]
+// SIMD-ONLY0:       if.end3220:
+// SIMD-ONLY0-NEXT:    [[TMP1554:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP1555:%.*]] = load i64, ptr [[ULE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP3221:%.*]] = icmp ugt i64 [[TMP1554]], [[TMP1555]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP3221]], label [[COND_TRUE3223:%.*]], label [[COND_FALSE3224:%.*]]
+// SIMD-ONLY0:       cond.true3223:
+// SIMD-ONLY0-NEXT:    [[TMP1556:%.*]] = load i64, ptr [[ULE]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END3225:%.*]]
+// SIMD-ONLY0:       cond.false3224:
+// SIMD-ONLY0-NEXT:    [[TMP1557:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END3225]]
+// SIMD-ONLY0:       cond.end3225:
+// SIMD-ONLY0-NEXT:    [[COND3226:%.*]] = phi i64 [ [[TMP1556]], [[COND_TRUE3223]] ], [ [[TMP1557]], [[COND_FALSE3224]] ]
+// SIMD-ONLY0-NEXT:    store i64 [[COND3226]], ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP1558:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP1559:%.*]] = load i64, ptr [[ULE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP3227:%.*]] = icmp ult i64 [[TMP1558]], [[TMP1559]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP3227]], label [[COND_TRUE3229:%.*]], label [[COND_FALSE3230:%.*]]
+// SIMD-ONLY0:       cond.true3229:
+// SIMD-ONLY0-NEXT:    [[TMP1560:%.*]] = load i64, ptr [[ULE]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END3231:%.*]]
+// SIMD-ONLY0:       cond.false3230:
+// SIMD-ONLY0-NEXT:    [[TMP1561:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END3231]]
+// SIMD-ONLY0:       cond.end3231:
+// SIMD-ONLY0-NEXT:    [[COND3232:%.*]] = phi i64 [ [[TMP1560]], [[COND_TRUE3229]] ], [ [[TMP1561]], [[COND_FALSE3230]] ]
+// SIMD-ONLY0-NEXT:    store i64 [[COND3232]], ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP1562:%.*]] = load i64, ptr [[ULE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP1563:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP3233:%.*]] = icmp ugt i64 [[TMP1562]], [[TMP1563]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP3233]], label [[COND_TRUE3235:%.*]], label [[COND_FALSE3236:%.*]]
+// SIMD-ONLY0:       cond.true3235:
+// SIMD-ONLY0-NEXT:    [[TMP1564:%.*]] = load i64, ptr [[ULE]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END3237:%.*]]
+// SIMD-ONLY0:       cond.false3236:
+// SIMD-ONLY0-NEXT:    [[TMP1565:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END3237]]
+// SIMD-ONLY0:       cond.end3237:
+// SIMD-ONLY0-NEXT:    [[COND3238:%.*]] = phi i64 [ [[TMP1564]], [[COND_TRUE3235]] ], [ [[TMP1565]], [[COND_FALSE3236]] ]
+// SIMD-ONLY0-NEXT:    store i64 [[COND3238]], ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP1566:%.*]] = load i64, ptr [[ULE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP1567:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP3239:%.*]] = icmp ult i64 [[TMP1566]], [[TMP1567]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP3239]], label [[COND_TRUE3241:%.*]], label [[COND_FALSE3242:%.*]]
+// SIMD-ONLY0:       cond.true3241:
+// SIMD-ONLY0-NEXT:    [[TMP1568:%.*]] = load i64, ptr [[ULE]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END3243:%.*]]
+// SIMD-ONLY0:       cond.false3242:
+// SIMD-ONLY0-NEXT:    [[TMP1569:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END3243]]
+// SIMD-ONLY0:       cond.end3243:
+// SIMD-ONLY0-NEXT:    [[COND3244:%.*]] = phi i64 [ [[TMP1568]], [[COND_TRUE3241]] ], [ [[TMP1569]], [[COND_FALSE3242]] ]
+// SIMD-ONLY0-NEXT:    store i64 [[COND3244]], ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP1570:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP1571:%.*]] = load i64, ptr [[ULE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP3245:%.*]] = icmp ugt i64 [[TMP1570]], [[TMP1571]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP3245]], label [[IF_THEN3247:%.*]], label [[IF_END3248:%.*]]
+// SIMD-ONLY0:       if.then3247:
+// SIMD-ONLY0-NEXT:    [[TMP1572:%.*]] = load i64, ptr [[ULE]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP1572]], ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END3248]]
+// SIMD-ONLY0:       if.end3248:
+// SIMD-ONLY0-NEXT:    [[TMP1573:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP1574:%.*]] = load i64, ptr [[ULE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP3249:%.*]] = icmp ult i64 [[TMP1573]], [[TMP1574]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP3249]], label [[IF_THEN3251:%.*]], label [[IF_END3252:%.*]]
+// SIMD-ONLY0:       if.then3251:
+// SIMD-ONLY0-NEXT:    [[TMP1575:%.*]] = load i64, ptr [[ULE]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP1575]], ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END3252]]
+// SIMD-ONLY0:       if.end3252:
+// SIMD-ONLY0-NEXT:    [[TMP1576:%.*]] = load i64, ptr [[ULE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP1577:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP3253:%.*]] = icmp ugt i64 [[TMP1576]], [[TMP1577]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP3253]], label [[IF_THEN3255:%.*]], label [[IF_END3256:%.*]]
+// SIMD-ONLY0:       if.then3255:
+// SIMD-ONLY0-NEXT:    [[TMP1578:%.*]] = load i64, ptr [[ULE]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP1578]], ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END3256]]
+// SIMD-ONLY0:       if.end3256:
+// SIMD-ONLY0-NEXT:    [[TMP1579:%.*]] = load i64, ptr [[ULE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP1580:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP3257:%.*]] = icmp ult i64 [[TMP1579]], [[TMP1580]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP3257]], label [[IF_THEN3259:%.*]], label [[IF_END3260:%.*]]
+// SIMD-ONLY0:       if.then3259:
+// SIMD-ONLY0-NEXT:    [[TMP1581:%.*]] = load i64, ptr [[ULE]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP1581]], ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END3260]]
+// SIMD-ONLY0:       if.end3260:
+// SIMD-ONLY0-NEXT:    [[TMP1582:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP1583:%.*]] = load i64, ptr [[ULE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP3261:%.*]] = icmp eq i64 [[TMP1582]], [[TMP1583]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP3261]], label [[COND_TRUE3263:%.*]], label [[COND_FALSE3264:%.*]]
+// SIMD-ONLY0:       cond.true3263:
+// SIMD-ONLY0-NEXT:    [[TMP1584:%.*]] = load i64, ptr [[ULD]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END3265:%.*]]
+// SIMD-ONLY0:       cond.false3264:
+// SIMD-ONLY0-NEXT:    [[TMP1585:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END3265]]
+// SIMD-ONLY0:       cond.end3265:
+// SIMD-ONLY0-NEXT:    [[COND3266:%.*]] = phi i64 [ [[TMP1584]], [[COND_TRUE3263]] ], [ [[TMP1585]], [[COND_FALSE3264]] ]
+// SIMD-ONLY0-NEXT:    store i64 [[COND3266]], ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP1586:%.*]] = load i64, ptr [[ULE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP1587:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP3267:%.*]] = icmp eq i64 [[TMP1586]], [[TMP1587]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP3267]], label [[COND_TRUE3269:%.*]], label [[COND_FALSE3270:%.*]]
+// SIMD-ONLY0:       cond.true3269:
+// SIMD-ONLY0-NEXT:    [[TMP1588:%.*]] = load i64, ptr [[ULD]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END3271:%.*]]
+// SIMD-ONLY0:       cond.false3270:
+// SIMD-ONLY0-NEXT:    [[TMP1589:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END3271]]
+// SIMD-ONLY0:       cond.end3271:
+// SIMD-ONLY0-NEXT:    [[COND3272:%.*]] = phi i64 [ [[TMP1588]], [[COND_TRUE3269]] ], [ [[TMP1589]], [[COND_FALSE3270]] ]
+// SIMD-ONLY0-NEXT:    store i64 [[COND3272]], ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP1590:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP1591:%.*]] = load i64, ptr [[ULE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP3273:%.*]] = icmp eq i64 [[TMP1590]], [[TMP1591]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP3273]], label [[IF_THEN3275:%.*]], label [[IF_END3276:%.*]]
+// SIMD-ONLY0:       if.then3275:
+// SIMD-ONLY0-NEXT:    [[TMP1592:%.*]] = load i64, ptr [[ULD]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP1592]], ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END3276]]
+// SIMD-ONLY0:       if.end3276:
+// SIMD-ONLY0-NEXT:    [[TMP1593:%.*]] = load i64, ptr [[ULE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP1594:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP3277:%.*]] = icmp eq i64 [[TMP1593]], [[TMP1594]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP3277]], label [[IF_THEN3279:%.*]], label [[IF_END3280:%.*]]
+// SIMD-ONLY0:       if.then3279:
+// SIMD-ONLY0-NEXT:    [[TMP1595:%.*]] = load i64, ptr [[ULD]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP1595]], ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END3280]]
+// SIMD-ONLY0:       if.end3280:
+// SIMD-ONLY0-NEXT:    [[TMP1596:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP1597:%.*]] = load i64, ptr [[LE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP3281:%.*]] = icmp sgt i64 [[TMP1596]], [[TMP1597]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP3281]], label [[COND_TRUE3283:%.*]], label [[COND_FALSE3284:%.*]]
+// SIMD-ONLY0:       cond.true3283:
+// SIMD-ONLY0-NEXT:    [[TMP1598:%.*]] = load i64, ptr [[LE]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END3285:%.*]]
+// SIMD-ONLY0:       cond.false3284:
+// SIMD-ONLY0-NEXT:    [[TMP1599:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END3285]]
+// SIMD-ONLY0:       cond.end3285:
+// SIMD-ONLY0-NEXT:    [[COND3286:%.*]] = phi i64 [ [[TMP1598]], [[COND_TRUE3283]] ], [ [[TMP1599]], [[COND_FALSE3284]] ]
+// SIMD-ONLY0-NEXT:    store i64 [[COND3286]], ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP1600:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP1601:%.*]] = load i64, ptr [[LE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP3287:%.*]] = icmp slt i64 [[TMP1600]], [[TMP1601]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP3287]], label [[COND_TRUE3289:%.*]], label [[COND_FALSE3290:%.*]]
+// SIMD-ONLY0:       cond.true3289:
+// SIMD-ONLY0-NEXT:    [[TMP1602:%.*]] = load i64, ptr [[LE]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END3291:%.*]]
+// SIMD-ONLY0:       cond.false3290:
+// SIMD-ONLY0-NEXT:    [[TMP1603:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END3291]]
+// SIMD-ONLY0:       cond.end3291:
+// SIMD-ONLY0-NEXT:    [[COND3292:%.*]] = phi i64 [ [[TMP1602]], [[COND_TRUE3289]] ], [ [[TMP1603]], [[COND_FALSE3290]] ]
+// SIMD-ONLY0-NEXT:    store i64 [[COND3292]], ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP1604:%.*]] = load i64, ptr [[LE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP1605:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP3293:%.*]] = icmp sgt i64 [[TMP1604]], [[TMP1605]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP3293]], label [[COND_TRUE3295:%.*]], label [[COND_FALSE3296:%.*]]
+// SIMD-ONLY0:       cond.true3295:
+// SIMD-ONLY0-NEXT:    [[TMP1606:%.*]] = load i64, ptr [[LE]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END3297:%.*]]
+// SIMD-ONLY0:       cond.false3296:
+// SIMD-ONLY0-NEXT:    [[TMP1607:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END3297]]
+// SIMD-ONLY0:       cond.end3297:
+// SIMD-ONLY0-NEXT:    [[COND3298:%.*]] = phi i64 [ [[TMP1606]], [[COND_TRUE3295]] ], [ [[TMP1607]], [[COND_FALSE3296]] ]
+// SIMD-ONLY0-NEXT:    store i64 [[COND3298]], ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP1608:%.*]] = load i64, ptr [[LE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP1609:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP3299:%.*]] = icmp slt i64 [[TMP1608]], [[TMP1609]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP3299]], label [[COND_TRUE3301:%.*]], label [[COND_FALSE3302:%.*]]
+// SIMD-ONLY0:       cond.true3301:
+// SIMD-ONLY0-NEXT:    [[TMP1610:%.*]] = load i64, ptr [[LE]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END3303:%.*]]
+// SIMD-ONLY0:       cond.false3302:
+// SIMD-ONLY0-NEXT:    [[TMP1611:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END3303]]
+// SIMD-ONLY0:       cond.end3303:
+// SIMD-ONLY0-NEXT:    [[COND3304:%.*]] = phi i64 [ [[TMP1610]], [[COND_TRUE3301]] ], [ [[TMP1611]], [[COND_FALSE3302]] ]
+// SIMD-ONLY0-NEXT:    store i64 [[COND3304]], ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP1612:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP1613:%.*]] = load i64, ptr [[LE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP3305:%.*]] = icmp sgt i64 [[TMP1612]], [[TMP1613]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP3305]], label [[IF_THEN3307:%.*]], label [[IF_END3308:%.*]]
+// SIMD-ONLY0:       if.then3307:
+// SIMD-ONLY0-NEXT:    [[TMP1614:%.*]] = load i64, ptr [[LE]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP1614]], ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END3308]]
+// SIMD-ONLY0:       if.end3308:
+// SIMD-ONLY0-NEXT:    [[TMP1615:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP1616:%.*]] = load i64, ptr [[LE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP3309:%.*]] = icmp slt i64 [[TMP1615]], [[TMP1616]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP3309]], label [[IF_THEN3311:%.*]], label [[IF_END3312:%.*]]
+// SIMD-ONLY0:       if.then3311:
+// SIMD-ONLY0-NEXT:    [[TMP1617:%.*]] = load i64, ptr [[LE]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP1617]], ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END3312]]
+// SIMD-ONLY0:       if.end3312:
+// SIMD-ONLY0-NEXT:    [[TMP1618:%.*]] = load i64, ptr [[LE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP1619:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP3313:%.*]] = icmp sgt i64 [[TMP1618]], [[TMP1619]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP3313]], label [[IF_THEN3315:%.*]], label [[IF_END3316:%.*]]
+// SIMD-ONLY0:       if.then3315:
+// SIMD-ONLY0-NEXT:    [[TMP1620:%.*]] = load i64, ptr [[LE]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP1620]], ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END3316]]
+// SIMD-ONLY0:       if.end3316:
+// SIMD-ONLY0-NEXT:    [[TMP1621:%.*]] = load i64, ptr [[LE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP1622:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP3317:%.*]] = icmp slt i64 [[TMP1621]], [[TMP1622]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP3317]], label [[IF_THEN3319:%.*]], label [[IF_END3320:%.*]]
+// SIMD-ONLY0:       if.then3319:
+// SIMD-ONLY0-NEXT:    [[TMP1623:%.*]] = load i64, ptr [[LE]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP1623]], ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END3320]]
+// SIMD-ONLY0:       if.end3320:
+// SIMD-ONLY0-NEXT:    [[TMP1624:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP1625:%.*]] = load i64, ptr [[LE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP3321:%.*]] = icmp eq i64 [[TMP1624]], [[TMP1625]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP3321]], label [[COND_TRUE3323:%.*]], label [[COND_FALSE3324:%.*]]
+// SIMD-ONLY0:       cond.true3323:
+// SIMD-ONLY0-NEXT:    [[TMP1626:%.*]] = load i64, ptr [[LD]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END3325:%.*]]
+// SIMD-ONLY0:       cond.false3324:
+// SIMD-ONLY0-NEXT:    [[TMP1627:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END3325]]
+// SIMD-ONLY0:       cond.end3325:
+// SIMD-ONLY0-NEXT:    [[COND3326:%.*]] = phi i64 [ [[TMP1626]], [[COND_TRUE3323]] ], [ [[TMP1627]], [[COND_FALSE3324]] ]
+// SIMD-ONLY0-NEXT:    store i64 [[COND3326]], ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP1628:%.*]] = load i64, ptr [[LE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP1629:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP3327:%.*]] = icmp eq i64 [[TMP1628]], [[TMP1629]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP3327]], label [[COND_TRUE3329:%.*]], label [[COND_FALSE3330:%.*]]
+// SIMD-ONLY0:       cond.true3329:
+// SIMD-ONLY0-NEXT:    [[TMP1630:%.*]] = load i64, ptr [[LD]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END3331:%.*]]
+// SIMD-ONLY0:       cond.false3330:
+// SIMD-ONLY0-NEXT:    [[TMP1631:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END3331]]
+// SIMD-ONLY0:       cond.end3331:
+// SIMD-ONLY0-NEXT:    [[COND3332:%.*]] = phi i64 [ [[TMP1630]], [[COND_TRUE3329]] ], [ [[TMP1631]], [[COND_FALSE3330]] ]
+// SIMD-ONLY0-NEXT:    store i64 [[COND3332]], ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP1632:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP1633:%.*]] = load i64, ptr [[LE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP3333:%.*]] = icmp eq i64 [[TMP1632]], [[TMP1633]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP3333]], label [[IF_THEN3335:%.*]], label [[IF_END3336:%.*]]
+// SIMD-ONLY0:       if.then3335:
+// SIMD-ONLY0-NEXT:    [[TMP1634:%.*]] = load i64, ptr [[LD]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP1634]], ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END3336]]
+// SIMD-ONLY0:       if.end3336:
+// SIMD-ONLY0-NEXT:    [[TMP1635:%.*]] = load i64, ptr [[LE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP1636:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP3337:%.*]] = icmp eq i64 [[TMP1635]], [[TMP1636]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP3337]], label [[IF_THEN3339:%.*]], label [[IF_END3340:%.*]]
+// SIMD-ONLY0:       if.then3339:
+// SIMD-ONLY0-NEXT:    [[TMP1637:%.*]] = load i64, ptr [[LD]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP1637]], ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END3340]]
+// SIMD-ONLY0:       if.end3340:
+// SIMD-ONLY0-NEXT:    [[TMP1638:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP1639:%.*]] = load i64, ptr [[ULE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP3341:%.*]] = icmp ugt i64 [[TMP1638]], [[TMP1639]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP3341]], label [[COND_TRUE3343:%.*]], label [[COND_FALSE3344:%.*]]
+// SIMD-ONLY0:       cond.true3343:
+// SIMD-ONLY0-NEXT:    [[TMP1640:%.*]] = load i64, ptr [[ULE]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END3345:%.*]]
+// SIMD-ONLY0:       cond.false3344:
+// SIMD-ONLY0-NEXT:    [[TMP1641:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END3345]]
+// SIMD-ONLY0:       cond.end3345:
+// SIMD-ONLY0-NEXT:    [[COND3346:%.*]] = phi i64 [ [[TMP1640]], [[COND_TRUE3343]] ], [ [[TMP1641]], [[COND_FALSE3344]] ]
+// SIMD-ONLY0-NEXT:    store i64 [[COND3346]], ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP1642:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP1643:%.*]] = load i64, ptr [[ULE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP3347:%.*]] = icmp ult i64 [[TMP1642]], [[TMP1643]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP3347]], label [[COND_TRUE3349:%.*]], label [[COND_FALSE3350:%.*]]
+// SIMD-ONLY0:       cond.true3349:
+// SIMD-ONLY0-NEXT:    [[TMP1644:%.*]] = load i64, ptr [[ULE]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END3351:%.*]]
+// SIMD-ONLY0:       cond.false3350:
+// SIMD-ONLY0-NEXT:    [[TMP1645:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END3351]]
+// SIMD-ONLY0:       cond.end3351:
+// SIMD-ONLY0-NEXT:    [[COND3352:%.*]] = phi i64 [ [[TMP1644]], [[COND_TRUE3349]] ], [ [[TMP1645]], [[COND_FALSE3350]] ]
+// SIMD-ONLY0-NEXT:    store i64 [[COND3352]], ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP1646:%.*]] = load i64, ptr [[ULE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP1647:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP3353:%.*]] = icmp ugt i64 [[TMP1646]], [[TMP1647]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP3353]], label [[COND_TRUE3355:%.*]], label [[COND_FALSE3356:%.*]]
+// SIMD-ONLY0:       cond.true3355:
+// SIMD-ONLY0-NEXT:    [[TMP1648:%.*]] = load i64, ptr [[ULE]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END3357:%.*]]
+// SIMD-ONLY0:       cond.false3356:
+// SIMD-ONLY0-NEXT:    [[TMP1649:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END3357]]
+// SIMD-ONLY0:       cond.end3357:
+// SIMD-ONLY0-NEXT:    [[COND3358:%.*]] = phi i64 [ [[TMP1648]], [[COND_TRUE3355]] ], [ [[TMP1649]], [[COND_FALSE3356]] ]
+// SIMD-ONLY0-NEXT:    store i64 [[COND3358]], ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP1650:%.*]] = load i64, ptr [[ULE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP1651:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP3359:%.*]] = icmp ult i64 [[TMP1650]], [[TMP1651]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP3359]], label [[COND_TRUE3361:%.*]], label [[COND_FALSE3362:%.*]]
+// SIMD-ONLY0:       cond.true3361:
+// SIMD-ONLY0-NEXT:    [[TMP1652:%.*]] = load i64, ptr [[ULE]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END3363:%.*]]
+// SIMD-ONLY0:       cond.false3362:
+// SIMD-ONLY0-NEXT:    [[TMP1653:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END3363]]
+// SIMD-ONLY0:       cond.end3363:
+// SIMD-ONLY0-NEXT:    [[COND3364:%.*]] = phi i64 [ [[TMP1652]], [[COND_TRUE3361]] ], [ [[TMP1653]], [[COND_FALSE3362]] ]
+// SIMD-ONLY0-NEXT:    store i64 [[COND3364]], ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP1654:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP1655:%.*]] = load i64, ptr [[ULE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP3365:%.*]] = icmp ugt i64 [[TMP1654]], [[TMP1655]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP3365]], label [[IF_THEN3367:%.*]], label [[IF_END3368:%.*]]
+// SIMD-ONLY0:       if.then3367:
+// SIMD-ONLY0-NEXT:    [[TMP1656:%.*]] = load i64, ptr [[ULE]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP1656]], ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END3368]]
+// SIMD-ONLY0:       if.end3368:
+// SIMD-ONLY0-NEXT:    [[TMP1657:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP1658:%.*]] = load i64, ptr [[ULE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP3369:%.*]] = icmp ult i64 [[TMP1657]], [[TMP1658]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP3369]], label [[IF_THEN3371:%.*]], label [[IF_END3372:%.*]]
+// SIMD-ONLY0:       if.then3371:
+// SIMD-ONLY0-NEXT:    [[TMP1659:%.*]] = load i64, ptr [[ULE]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP1659]], ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END3372]]
+// SIMD-ONLY0:       if.end3372:
+// SIMD-ONLY0-NEXT:    [[TMP1660:%.*]] = load i64, ptr [[ULE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP1661:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP3373:%.*]] = icmp ugt i64 [[TMP1660]], [[TMP1661]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP3373]], label [[IF_THEN3375:%.*]], label [[IF_END3376:%.*]]
+// SIMD-ONLY0:       if.then3375:
+// SIMD-ONLY0-NEXT:    [[TMP1662:%.*]] = load i64, ptr [[ULE]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP1662]], ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END3376]]
+// SIMD-ONLY0:       if.end3376:
+// SIMD-ONLY0-NEXT:    [[TMP1663:%.*]] = load i64, ptr [[ULE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP1664:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP3377:%.*]] = icmp ult i64 [[TMP1663]], [[TMP1664]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP3377]], label [[IF_THEN3379:%.*]], label [[IF_END3380:%.*]]
+// SIMD-ONLY0:       if.then3379:
+// SIMD-ONLY0-NEXT:    [[TMP1665:%.*]] = load i64, ptr [[ULE]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP1665]], ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END3380]]
+// SIMD-ONLY0:       if.end3380:
+// SIMD-ONLY0-NEXT:    [[TMP1666:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP1667:%.*]] = load i64, ptr [[ULE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP3381:%.*]] = icmp eq i64 [[TMP1666]], [[TMP1667]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP3381]], label [[COND_TRUE3383:%.*]], label [[COND_FALSE3384:%.*]]
+// SIMD-ONLY0:       cond.true3383:
+// SIMD-ONLY0-NEXT:    [[TMP1668:%.*]] = load i64, ptr [[ULD]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END3385:%.*]]
+// SIMD-ONLY0:       cond.false3384:
+// SIMD-ONLY0-NEXT:    [[TMP1669:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END3385]]
+// SIMD-ONLY0:       cond.end3385:
+// SIMD-ONLY0-NEXT:    [[COND3386:%.*]] = phi i64 [ [[TMP1668]], [[COND_TRUE3383]] ], [ [[TMP1669]], [[COND_FALSE3384]] ]
+// SIMD-ONLY0-NEXT:    store i64 [[COND3386]], ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP1670:%.*]] = load i64, ptr [[ULE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP1671:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP3387:%.*]] = icmp eq i64 [[TMP1670]], [[TMP1671]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP3387]], label [[COND_TRUE3389:%.*]], label [[COND_FALSE3390:%.*]]
+// SIMD-ONLY0:       cond.true3389:
+// SIMD-ONLY0-NEXT:    [[TMP1672:%.*]] = load i64, ptr [[ULD]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END3391:%.*]]
+// SIMD-ONLY0:       cond.false3390:
+// SIMD-ONLY0-NEXT:    [[TMP1673:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END3391]]
+// SIMD-ONLY0:       cond.end3391:
+// SIMD-ONLY0-NEXT:    [[COND3392:%.*]] = phi i64 [ [[TMP1672]], [[COND_TRUE3389]] ], [ [[TMP1673]], [[COND_FALSE3390]] ]
+// SIMD-ONLY0-NEXT:    store i64 [[COND3392]], ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP1674:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP1675:%.*]] = load i64, ptr [[ULE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP3393:%.*]] = icmp eq i64 [[TMP1674]], [[TMP1675]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP3393]], label [[IF_THEN3395:%.*]], label [[IF_END3396:%.*]]
+// SIMD-ONLY0:       if.then3395:
+// SIMD-ONLY0-NEXT:    [[TMP1676:%.*]] = load i64, ptr [[ULD]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP1676]], ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END3396]]
+// SIMD-ONLY0:       if.end3396:
+// SIMD-ONLY0-NEXT:    [[TMP1677:%.*]] = load i64, ptr [[ULE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP1678:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP3397:%.*]] = icmp eq i64 [[TMP1677]], [[TMP1678]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP3397]], label [[IF_THEN3399:%.*]], label [[IF_END3400:%.*]]
+// SIMD-ONLY0:       if.then3399:
+// SIMD-ONLY0-NEXT:    [[TMP1679:%.*]] = load i64, ptr [[ULD]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP1679]], ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END3400]]
+// SIMD-ONLY0:       if.end3400:
+// SIMD-ONLY0-NEXT:    [[TMP1680:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP1681:%.*]] = load i64, ptr [[LE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP3401:%.*]] = icmp sgt i64 [[TMP1680]], [[TMP1681]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP3401]], label [[COND_TRUE3403:%.*]], label [[COND_FALSE3404:%.*]]
+// SIMD-ONLY0:       cond.true3403:
+// SIMD-ONLY0-NEXT:    [[TMP1682:%.*]] = load i64, ptr [[LE]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END3405:%.*]]
+// SIMD-ONLY0:       cond.false3404:
+// SIMD-ONLY0-NEXT:    [[TMP1683:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END3405]]
+// SIMD-ONLY0:       cond.end3405:
+// SIMD-ONLY0-NEXT:    [[COND3406:%.*]] = phi i64 [ [[TMP1682]], [[COND_TRUE3403]] ], [ [[TMP1683]], [[COND_FALSE3404]] ]
+// SIMD-ONLY0-NEXT:    store i64 [[COND3406]], ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP1684:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP1685:%.*]] = load i64, ptr [[LE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP3407:%.*]] = icmp slt i64 [[TMP1684]], [[TMP1685]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP3407]], label [[COND_TRUE3409:%.*]], label [[COND_FALSE3410:%.*]]
+// SIMD-ONLY0:       cond.true3409:
+// SIMD-ONLY0-NEXT:    [[TMP1686:%.*]] = load i64, ptr [[LE]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END3411:%.*]]
+// SIMD-ONLY0:       cond.false3410:
+// SIMD-ONLY0-NEXT:    [[TMP1687:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END3411]]
+// SIMD-ONLY0:       cond.end3411:
+// SIMD-ONLY0-NEXT:    [[COND3412:%.*]] = phi i64 [ [[TMP1686]], [[COND_TRUE3409]] ], [ [[TMP1687]], [[COND_FALSE3410]] ]
+// SIMD-ONLY0-NEXT:    store i64 [[COND3412]], ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP1688:%.*]] = load i64, ptr [[LE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP1689:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP3413:%.*]] = icmp sgt i64 [[TMP1688]], [[TMP1689]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP3413]], label [[COND_TRUE3415:%.*]], label [[COND_FALSE3416:%.*]]
+// SIMD-ONLY0:       cond.true3415:
+// SIMD-ONLY0-NEXT:    [[TMP1690:%.*]] = load i64, ptr [[LE]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END3417:%.*]]
+// SIMD-ONLY0:       cond.false3416:
+// SIMD-ONLY0-NEXT:    [[TMP1691:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END3417]]
+// SIMD-ONLY0:       cond.end3417:
+// SIMD-ONLY0-NEXT:    [[COND3418:%.*]] = phi i64 [ [[TMP1690]], [[COND_TRUE3415]] ], [ [[TMP1691]], [[COND_FALSE3416]] ]
+// SIMD-ONLY0-NEXT:    store i64 [[COND3418]], ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP1692:%.*]] = load i64, ptr [[LE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP1693:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP3419:%.*]] = icmp slt i64 [[TMP1692]], [[TMP1693]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP3419]], label [[COND_TRUE3421:%.*]], label [[COND_FALSE3422:%.*]]
+// SIMD-ONLY0:       cond.true3421:
+// SIMD-ONLY0-NEXT:    [[TMP1694:%.*]] = load i64, ptr [[LE]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END3423:%.*]]
+// SIMD-ONLY0:       cond.false3422:
+// SIMD-ONLY0-NEXT:    [[TMP1695:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END3423]]
+// SIMD-ONLY0:       cond.end3423:
+// SIMD-ONLY0-NEXT:    [[COND3424:%.*]] = phi i64 [ [[TMP1694]], [[COND_TRUE3421]] ], [ [[TMP1695]], [[COND_FALSE3422]] ]
+// SIMD-ONLY0-NEXT:    store i64 [[COND3424]], ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP1696:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP1697:%.*]] = load i64, ptr [[LE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP3425:%.*]] = icmp sgt i64 [[TMP1696]], [[TMP1697]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP3425]], label [[IF_THEN3427:%.*]], label [[IF_END3428:%.*]]
+// SIMD-ONLY0:       if.then3427:
+// SIMD-ONLY0-NEXT:    [[TMP1698:%.*]] = load i64, ptr [[LE]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP1698]], ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END3428]]
+// SIMD-ONLY0:       if.end3428:
+// SIMD-ONLY0-NEXT:    [[TMP1699:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP1700:%.*]] = load i64, ptr [[LE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP3429:%.*]] = icmp slt i64 [[TMP1699]], [[TMP1700]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP3429]], label [[IF_THEN3431:%.*]], label [[IF_END3432:%.*]]
+// SIMD-ONLY0:       if.then3431:
+// SIMD-ONLY0-NEXT:    [[TMP1701:%.*]] = load i64, ptr [[LE]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP1701]], ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END3432]]
+// SIMD-ONLY0:       if.end3432:
+// SIMD-ONLY0-NEXT:    [[TMP1702:%.*]] = load i64, ptr [[LE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP1703:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP3433:%.*]] = icmp sgt i64 [[TMP1702]], [[TMP1703]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP3433]], label [[IF_THEN3435:%.*]], label [[IF_END3436:%.*]]
+// SIMD-ONLY0:       if.then3435:
+// SIMD-ONLY0-NEXT:    [[TMP1704:%.*]] = load i64, ptr [[LE]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP1704]], ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END3436]]
+// SIMD-ONLY0:       if.end3436:
+// SIMD-ONLY0-NEXT:    [[TMP1705:%.*]] = load i64, ptr [[LE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP1706:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP3437:%.*]] = icmp slt i64 [[TMP1705]], [[TMP1706]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP3437]], label [[IF_THEN3439:%.*]], label [[IF_END3440:%.*]]
+// SIMD-ONLY0:       if.then3439:
+// SIMD-ONLY0-NEXT:    [[TMP1707:%.*]] = load i64, ptr [[LE]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP1707]], ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END3440]]
+// SIMD-ONLY0:       if.end3440:
+// SIMD-ONLY0-NEXT:    [[TMP1708:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP1709:%.*]] = load i64, ptr [[LE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP3441:%.*]] = icmp eq i64 [[TMP1708]], [[TMP1709]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP3441]], label [[COND_TRUE3443:%.*]], label [[COND_FALSE3444:%.*]]
+// SIMD-ONLY0:       cond.true3443:
+// SIMD-ONLY0-NEXT:    [[TMP1710:%.*]] = load i64, ptr [[LD]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END3445:%.*]]
+// SIMD-ONLY0:       cond.false3444:
+// SIMD-ONLY0-NEXT:    [[TMP1711:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END3445]]
+// SIMD-ONLY0:       cond.end3445:
+// SIMD-ONLY0-NEXT:    [[COND3446:%.*]] = phi i64 [ [[TMP1710]], [[COND_TRUE3443]] ], [ [[TMP1711]], [[COND_FALSE3444]] ]
+// SIMD-ONLY0-NEXT:    store i64 [[COND3446]], ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP1712:%.*]] = load i64, ptr [[LE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP1713:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP3447:%.*]] = icmp eq i64 [[TMP1712]], [[TMP1713]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP3447]], label [[COND_TRUE3449:%.*]], label [[COND_FALSE3450:%.*]]
+// SIMD-ONLY0:       cond.true3449:
+// SIMD-ONLY0-NEXT:    [[TMP1714:%.*]] = load i64, ptr [[LD]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END3451:%.*]]
+// SIMD-ONLY0:       cond.false3450:
+// SIMD-ONLY0-NEXT:    [[TMP1715:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END3451]]
+// SIMD-ONLY0:       cond.end3451:
+// SIMD-ONLY0-NEXT:    [[COND3452:%.*]] = phi i64 [ [[TMP1714]], [[COND_TRUE3449]] ], [ [[TMP1715]], [[COND_FALSE3450]] ]
+// SIMD-ONLY0-NEXT:    store i64 [[COND3452]], ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP1716:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP1717:%.*]] = load i64, ptr [[LE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP3453:%.*]] = icmp eq i64 [[TMP1716]], [[TMP1717]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP3453]], label [[IF_THEN3455:%.*]], label [[IF_END3456:%.*]]
+// SIMD-ONLY0:       if.then3455:
+// SIMD-ONLY0-NEXT:    [[TMP1718:%.*]] = load i64, ptr [[LD]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP1718]], ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END3456]]
+// SIMD-ONLY0:       if.end3456:
+// SIMD-ONLY0-NEXT:    [[TMP1719:%.*]] = load i64, ptr [[LE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP1720:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP3457:%.*]] = icmp eq i64 [[TMP1719]], [[TMP1720]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP3457]], label [[IF_THEN3459:%.*]], label [[IF_END3460:%.*]]
+// SIMD-ONLY0:       if.then3459:
+// SIMD-ONLY0-NEXT:    [[TMP1721:%.*]] = load i64, ptr [[LD]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP1721]], ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END3460]]
+// SIMD-ONLY0:       if.end3460:
+// SIMD-ONLY0-NEXT:    [[TMP1722:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP1723:%.*]] = load i64, ptr [[ULE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP3461:%.*]] = icmp ugt i64 [[TMP1722]], [[TMP1723]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP3461]], label [[COND_TRUE3463:%.*]], label [[COND_FALSE3464:%.*]]
+// SIMD-ONLY0:       cond.true3463:
+// SIMD-ONLY0-NEXT:    [[TMP1724:%.*]] = load i64, ptr [[ULE]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END3465:%.*]]
+// SIMD-ONLY0:       cond.false3464:
+// SIMD-ONLY0-NEXT:    [[TMP1725:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END3465]]
+// SIMD-ONLY0:       cond.end3465:
+// SIMD-ONLY0-NEXT:    [[COND3466:%.*]] = phi i64 [ [[TMP1724]], [[COND_TRUE3463]] ], [ [[TMP1725]], [[COND_FALSE3464]] ]
+// SIMD-ONLY0-NEXT:    store i64 [[COND3466]], ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP1726:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP1727:%.*]] = load i64, ptr [[ULE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP3467:%.*]] = icmp ult i64 [[TMP1726]], [[TMP1727]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP3467]], label [[COND_TRUE3469:%.*]], label [[COND_FALSE3470:%.*]]
+// SIMD-ONLY0:       cond.true3469:
+// SIMD-ONLY0-NEXT:    [[TMP1728:%.*]] = load i64, ptr [[ULE]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END3471:%.*]]
+// SIMD-ONLY0:       cond.false3470:
+// SIMD-ONLY0-NEXT:    [[TMP1729:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END3471]]
+// SIMD-ONLY0:       cond.end3471:
+// SIMD-ONLY0-NEXT:    [[COND3472:%.*]] = phi i64 [ [[TMP1728]], [[COND_TRUE3469]] ], [ [[TMP1729]], [[COND_FALSE3470]] ]
+// SIMD-ONLY0-NEXT:    store i64 [[COND3472]], ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP1730:%.*]] = load i64, ptr [[ULE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP1731:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP3473:%.*]] = icmp ugt i64 [[TMP1730]], [[TMP1731]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP3473]], label [[COND_TRUE3475:%.*]], label [[COND_FALSE3476:%.*]]
+// SIMD-ONLY0:       cond.true3475:
+// SIMD-ONLY0-NEXT:    [[TMP1732:%.*]] = load i64, ptr [[ULE]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END3477:%.*]]
+// SIMD-ONLY0:       cond.false3476:
+// SIMD-ONLY0-NEXT:    [[TMP1733:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END3477]]
+// SIMD-ONLY0:       cond.end3477:
+// SIMD-ONLY0-NEXT:    [[COND3478:%.*]] = phi i64 [ [[TMP1732]], [[COND_TRUE3475]] ], [ [[TMP1733]], [[COND_FALSE3476]] ]
+// SIMD-ONLY0-NEXT:    store i64 [[COND3478]], ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP1734:%.*]] = load i64, ptr [[ULE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP1735:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP3479:%.*]] = icmp ult i64 [[TMP1734]], [[TMP1735]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP3479]], label [[COND_TRUE3481:%.*]], label [[COND_FALSE3482:%.*]]
+// SIMD-ONLY0:       cond.true3481:
+// SIMD-ONLY0-NEXT:    [[TMP1736:%.*]] = load i64, ptr [[ULE]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END3483:%.*]]
+// SIMD-ONLY0:       cond.false3482:
+// SIMD-ONLY0-NEXT:    [[TMP1737:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END3483]]
+// SIMD-ONLY0:       cond.end3483:
+// SIMD-ONLY0-NEXT:    [[COND3484:%.*]] = phi i64 [ [[TMP1736]], [[COND_TRUE3481]] ], [ [[TMP1737]], [[COND_FALSE3482]] ]
+// SIMD-ONLY0-NEXT:    store i64 [[COND3484]], ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP1738:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP1739:%.*]] = load i64, ptr [[ULE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP3485:%.*]] = icmp ugt i64 [[TMP1738]], [[TMP1739]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP3485]], label [[IF_THEN3487:%.*]], label [[IF_END3488:%.*]]
+// SIMD-ONLY0:       if.then3487:
+// SIMD-ONLY0-NEXT:    [[TMP1740:%.*]] = load i64, ptr [[ULE]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP1740]], ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END3488]]
+// SIMD-ONLY0:       if.end3488:
+// SIMD-ONLY0-NEXT:    [[TMP1741:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP1742:%.*]] = load i64, ptr [[ULE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP3489:%.*]] = icmp ult i64 [[TMP1741]], [[TMP1742]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP3489]], label [[IF_THEN3491:%.*]], label [[IF_END3492:%.*]]
+// SIMD-ONLY0:       if.then3491:
+// SIMD-ONLY0-NEXT:    [[TMP1743:%.*]] = load i64, ptr [[ULE]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP1743]], ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END3492]]
+// SIMD-ONLY0:       if.end3492:
+// SIMD-ONLY0-NEXT:    [[TMP1744:%.*]] = load i64, ptr [[ULE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP1745:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP3493:%.*]] = icmp ugt i64 [[TMP1744]], [[TMP1745]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP3493]], label [[IF_THEN3495:%.*]], label [[IF_END3496:%.*]]
+// SIMD-ONLY0:       if.then3495:
+// SIMD-ONLY0-NEXT:    [[TMP1746:%.*]] = load i64, ptr [[ULE]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP1746]], ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END3496]]
+// SIMD-ONLY0:       if.end3496:
+// SIMD-ONLY0-NEXT:    [[TMP1747:%.*]] = load i64, ptr [[ULE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP1748:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP3497:%.*]] = icmp ult i64 [[TMP1747]], [[TMP1748]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP3497]], label [[IF_THEN3499:%.*]], label [[IF_END3500:%.*]]
+// SIMD-ONLY0:       if.then3499:
+// SIMD-ONLY0-NEXT:    [[TMP1749:%.*]] = load i64, ptr [[ULE]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP1749]], ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END3500]]
+// SIMD-ONLY0:       if.end3500:
+// SIMD-ONLY0-NEXT:    [[TMP1750:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP1751:%.*]] = load i64, ptr [[ULE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP3501:%.*]] = icmp eq i64 [[TMP1750]], [[TMP1751]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP3501]], label [[COND_TRUE3503:%.*]], label [[COND_FALSE3504:%.*]]
+// SIMD-ONLY0:       cond.true3503:
+// SIMD-ONLY0-NEXT:    [[TMP1752:%.*]] = load i64, ptr [[ULD]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END3505:%.*]]
+// SIMD-ONLY0:       cond.false3504:
+// SIMD-ONLY0-NEXT:    [[TMP1753:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END3505]]
+// SIMD-ONLY0:       cond.end3505:
+// SIMD-ONLY0-NEXT:    [[COND3506:%.*]] = phi i64 [ [[TMP1752]], [[COND_TRUE3503]] ], [ [[TMP1753]], [[COND_FALSE3504]] ]
+// SIMD-ONLY0-NEXT:    store i64 [[COND3506]], ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP1754:%.*]] = load i64, ptr [[ULE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP1755:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP3507:%.*]] = icmp eq i64 [[TMP1754]], [[TMP1755]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP3507]], label [[COND_TRUE3509:%.*]], label [[COND_FALSE3510:%.*]]
+// SIMD-ONLY0:       cond.true3509:
+// SIMD-ONLY0-NEXT:    [[TMP1756:%.*]] = load i64, ptr [[ULD]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END3511:%.*]]
+// SIMD-ONLY0:       cond.false3510:
+// SIMD-ONLY0-NEXT:    [[TMP1757:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END3511]]
+// SIMD-ONLY0:       cond.end3511:
+// SIMD-ONLY0-NEXT:    [[COND3512:%.*]] = phi i64 [ [[TMP1756]], [[COND_TRUE3509]] ], [ [[TMP1757]], [[COND_FALSE3510]] ]
+// SIMD-ONLY0-NEXT:    store i64 [[COND3512]], ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP1758:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP1759:%.*]] = load i64, ptr [[ULE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP3513:%.*]] = icmp eq i64 [[TMP1758]], [[TMP1759]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP3513]], label [[IF_THEN3515:%.*]], label [[IF_END3516:%.*]]
+// SIMD-ONLY0:       if.then3515:
+// SIMD-ONLY0-NEXT:    [[TMP1760:%.*]] = load i64, ptr [[ULD]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP1760]], ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END3516]]
+// SIMD-ONLY0:       if.end3516:
+// SIMD-ONLY0-NEXT:    [[TMP1761:%.*]] = load i64, ptr [[ULE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP1762:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP3517:%.*]] = icmp eq i64 [[TMP1761]], [[TMP1762]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP3517]], label [[IF_THEN3519:%.*]], label [[IF_END3520:%.*]]
+// SIMD-ONLY0:       if.then3519:
+// SIMD-ONLY0-NEXT:    [[TMP1763:%.*]] = load i64, ptr [[ULD]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP1763]], ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END3520]]
+// SIMD-ONLY0:       if.end3520:
+// SIMD-ONLY0-NEXT:    [[TMP1764:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP1765:%.*]] = load i64, ptr [[LE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP3521:%.*]] = icmp sgt i64 [[TMP1764]], [[TMP1765]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP3521]], label [[COND_TRUE3523:%.*]], label [[COND_FALSE3524:%.*]]
+// SIMD-ONLY0:       cond.true3523:
+// SIMD-ONLY0-NEXT:    [[TMP1766:%.*]] = load i64, ptr [[LE]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END3525:%.*]]
+// SIMD-ONLY0:       cond.false3524:
+// SIMD-ONLY0-NEXT:    [[TMP1767:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END3525]]
+// SIMD-ONLY0:       cond.end3525:
+// SIMD-ONLY0-NEXT:    [[COND3526:%.*]] = phi i64 [ [[TMP1766]], [[COND_TRUE3523]] ], [ [[TMP1767]], [[COND_FALSE3524]] ]
+// SIMD-ONLY0-NEXT:    store i64 [[COND3526]], ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP1768:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP1769:%.*]] = load i64, ptr [[LE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP3527:%.*]] = icmp slt i64 [[TMP1768]], [[TMP1769]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP3527]], label [[COND_TRUE3529:%.*]], label [[COND_FALSE3530:%.*]]
+// SIMD-ONLY0:       cond.true3529:
+// SIMD-ONLY0-NEXT:    [[TMP1770:%.*]] = load i64, ptr [[LE]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END3531:%.*]]
+// SIMD-ONLY0:       cond.false3530:
+// SIMD-ONLY0-NEXT:    [[TMP1771:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END3531]]
+// SIMD-ONLY0:       cond.end3531:
+// SIMD-ONLY0-NEXT:    [[COND3532:%.*]] = phi i64 [ [[TMP1770]], [[COND_TRUE3529]] ], [ [[TMP1771]], [[COND_FALSE3530]] ]
+// SIMD-ONLY0-NEXT:    store i64 [[COND3532]], ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP1772:%.*]] = load i64, ptr [[LE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP1773:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP3533:%.*]] = icmp sgt i64 [[TMP1772]], [[TMP1773]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP3533]], label [[COND_TRUE3535:%.*]], label [[COND_FALSE3536:%.*]]
+// SIMD-ONLY0:       cond.true3535:
+// SIMD-ONLY0-NEXT:    [[TMP1774:%.*]] = load i64, ptr [[LE]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END3537:%.*]]
+// SIMD-ONLY0:       cond.false3536:
+// SIMD-ONLY0-NEXT:    [[TMP1775:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END3537]]
+// SIMD-ONLY0:       cond.end3537:
+// SIMD-ONLY0-NEXT:    [[COND3538:%.*]] = phi i64 [ [[TMP1774]], [[COND_TRUE3535]] ], [ [[TMP1775]], [[COND_FALSE3536]] ]
+// SIMD-ONLY0-NEXT:    store i64 [[COND3538]], ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP1776:%.*]] = load i64, ptr [[LE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP1777:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP3539:%.*]] = icmp slt i64 [[TMP1776]], [[TMP1777]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP3539]], label [[COND_TRUE3541:%.*]], label [[COND_FALSE3542:%.*]]
+// SIMD-ONLY0:       cond.true3541:
+// SIMD-ONLY0-NEXT:    [[TMP1778:%.*]] = load i64, ptr [[LE]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END3543:%.*]]
+// SIMD-ONLY0:       cond.false3542:
+// SIMD-ONLY0-NEXT:    [[TMP1779:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END3543]]
+// SIMD-ONLY0:       cond.end3543:
+// SIMD-ONLY0-NEXT:    [[COND3544:%.*]] = phi i64 [ [[TMP1778]], [[COND_TRUE3541]] ], [ [[TMP1779]], [[COND_FALSE3542]] ]
+// SIMD-ONLY0-NEXT:    store i64 [[COND3544]], ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP1780:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP1781:%.*]] = load i64, ptr [[LE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP3545:%.*]] = icmp sgt i64 [[TMP1780]], [[TMP1781]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP3545]], label [[IF_THEN3547:%.*]], label [[IF_END3548:%.*]]
+// SIMD-ONLY0:       if.then3547:
+// SIMD-ONLY0-NEXT:    [[TMP1782:%.*]] = load i64, ptr [[LE]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP1782]], ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END3548]]
+// SIMD-ONLY0:       if.end3548:
+// SIMD-ONLY0-NEXT:    [[TMP1783:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP1784:%.*]] = load i64, ptr [[LE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP3549:%.*]] = icmp slt i64 [[TMP1783]], [[TMP1784]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP3549]], label [[IF_THEN3551:%.*]], label [[IF_END3552:%.*]]
+// SIMD-ONLY0:       if.then3551:
+// SIMD-ONLY0-NEXT:    [[TMP1785:%.*]] = load i64, ptr [[LE]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP1785]], ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END3552]]
+// SIMD-ONLY0:       if.end3552:
+// SIMD-ONLY0-NEXT:    [[TMP1786:%.*]] = load i64, ptr [[LE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP1787:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP3553:%.*]] = icmp sgt i64 [[TMP1786]], [[TMP1787]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP3553]], label [[IF_THEN3555:%.*]], label [[IF_END3556:%.*]]
+// SIMD-ONLY0:       if.then3555:
+// SIMD-ONLY0-NEXT:    [[TMP1788:%.*]] = load i64, ptr [[LE]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP1788]], ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END3556]]
+// SIMD-ONLY0:       if.end3556:
+// SIMD-ONLY0-NEXT:    [[TMP1789:%.*]] = load i64, ptr [[LE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP1790:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP3557:%.*]] = icmp slt i64 [[TMP1789]], [[TMP1790]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP3557]], label [[IF_THEN3559:%.*]], label [[IF_END3560:%.*]]
+// SIMD-ONLY0:       if.then3559:
+// SIMD-ONLY0-NEXT:    [[TMP1791:%.*]] = load i64, ptr [[LE]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP1791]], ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END3560]]
+// SIMD-ONLY0:       if.end3560:
+// SIMD-ONLY0-NEXT:    [[TMP1792:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP1793:%.*]] = load i64, ptr [[LE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP3561:%.*]] = icmp eq i64 [[TMP1792]], [[TMP1793]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP3561]], label [[COND_TRUE3563:%.*]], label [[COND_FALSE3564:%.*]]
+// SIMD-ONLY0:       cond.true3563:
+// SIMD-ONLY0-NEXT:    [[TMP1794:%.*]] = load i64, ptr [[LD]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END3565:%.*]]
+// SIMD-ONLY0:       cond.false3564:
+// SIMD-ONLY0-NEXT:    [[TMP1795:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END3565]]
+// SIMD-ONLY0:       cond.end3565:
+// SIMD-ONLY0-NEXT:    [[COND3566:%.*]] = phi i64 [ [[TMP1794]], [[COND_TRUE3563]] ], [ [[TMP1795]], [[COND_FALSE3564]] ]
+// SIMD-ONLY0-NEXT:    store i64 [[COND3566]], ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP1796:%.*]] = load i64, ptr [[LE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP1797:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP3567:%.*]] = icmp eq i64 [[TMP1796]], [[TMP1797]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP3567]], label [[COND_TRUE3569:%.*]], label [[COND_FALSE3570:%.*]]
+// SIMD-ONLY0:       cond.true3569:
+// SIMD-ONLY0-NEXT:    [[TMP1798:%.*]] = load i64, ptr [[LD]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END3571:%.*]]
+// SIMD-ONLY0:       cond.false3570:
+// SIMD-ONLY0-NEXT:    [[TMP1799:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END3571]]
+// SIMD-ONLY0:       cond.end3571:
+// SIMD-ONLY0-NEXT:    [[COND3572:%.*]] = phi i64 [ [[TMP1798]], [[COND_TRUE3569]] ], [ [[TMP1799]], [[COND_FALSE3570]] ]
+// SIMD-ONLY0-NEXT:    store i64 [[COND3572]], ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP1800:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP1801:%.*]] = load i64, ptr [[LE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP3573:%.*]] = icmp eq i64 [[TMP1800]], [[TMP1801]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP3573]], label [[IF_THEN3575:%.*]], label [[IF_END3576:%.*]]
+// SIMD-ONLY0:       if.then3575:
+// SIMD-ONLY0-NEXT:    [[TMP1802:%.*]] = load i64, ptr [[LD]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP1802]], ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END3576]]
+// SIMD-ONLY0:       if.end3576:
+// SIMD-ONLY0-NEXT:    [[TMP1803:%.*]] = load i64, ptr [[LE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP1804:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP3577:%.*]] = icmp eq i64 [[TMP1803]], [[TMP1804]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP3577]], label [[IF_THEN3579:%.*]], label [[IF_END3580:%.*]]
+// SIMD-ONLY0:       if.then3579:
+// SIMD-ONLY0-NEXT:    [[TMP1805:%.*]] = load i64, ptr [[LD]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP1805]], ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END3580]]
+// SIMD-ONLY0:       if.end3580:
+// SIMD-ONLY0-NEXT:    [[TMP1806:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP1807:%.*]] = load i64, ptr [[ULE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP3581:%.*]] = icmp ugt i64 [[TMP1806]], [[TMP1807]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP3581]], label [[COND_TRUE3583:%.*]], label [[COND_FALSE3584:%.*]]
+// SIMD-ONLY0:       cond.true3583:
+// SIMD-ONLY0-NEXT:    [[TMP1808:%.*]] = load i64, ptr [[ULE]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END3585:%.*]]
+// SIMD-ONLY0:       cond.false3584:
+// SIMD-ONLY0-NEXT:    [[TMP1809:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END3585]]
+// SIMD-ONLY0:       cond.end3585:
+// SIMD-ONLY0-NEXT:    [[COND3586:%.*]] = phi i64 [ [[TMP1808]], [[COND_TRUE3583]] ], [ [[TMP1809]], [[COND_FALSE3584]] ]
+// SIMD-ONLY0-NEXT:    store i64 [[COND3586]], ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP1810:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP1811:%.*]] = load i64, ptr [[ULE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP3587:%.*]] = icmp ult i64 [[TMP1810]], [[TMP1811]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP3587]], label [[COND_TRUE3589:%.*]], label [[COND_FALSE3590:%.*]]
+// SIMD-ONLY0:       cond.true3589:
+// SIMD-ONLY0-NEXT:    [[TMP1812:%.*]] = load i64, ptr [[ULE]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END3591:%.*]]
+// SIMD-ONLY0:       cond.false3590:
+// SIMD-ONLY0-NEXT:    [[TMP1813:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END3591]]
+// SIMD-ONLY0:       cond.end3591:
+// SIMD-ONLY0-NEXT:    [[COND3592:%.*]] = phi i64 [ [[TMP1812]], [[COND_TRUE3589]] ], [ [[TMP1813]], [[COND_FALSE3590]] ]
+// SIMD-ONLY0-NEXT:    store i64 [[COND3592]], ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP1814:%.*]] = load i64, ptr [[ULE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP1815:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP3593:%.*]] = icmp ugt i64 [[TMP1814]], [[TMP1815]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP3593]], label [[COND_TRUE3595:%.*]], label [[COND_FALSE3596:%.*]]
+// SIMD-ONLY0:       cond.true3595:
+// SIMD-ONLY0-NEXT:    [[TMP1816:%.*]] = load i64, ptr [[ULE]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END3597:%.*]]
+// SIMD-ONLY0:       cond.false3596:
+// SIMD-ONLY0-NEXT:    [[TMP1817:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END3597]]
+// SIMD-ONLY0:       cond.end3597:
+// SIMD-ONLY0-NEXT:    [[COND3598:%.*]] = phi i64 [ [[TMP1816]], [[COND_TRUE3595]] ], [ [[TMP1817]], [[COND_FALSE3596]] ]
+// SIMD-ONLY0-NEXT:    store i64 [[COND3598]], ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP1818:%.*]] = load i64, ptr [[ULE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP1819:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP3599:%.*]] = icmp ult i64 [[TMP1818]], [[TMP1819]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP3599]], label [[COND_TRUE3601:%.*]], label [[COND_FALSE3602:%.*]]
+// SIMD-ONLY0:       cond.true3601:
+// SIMD-ONLY0-NEXT:    [[TMP1820:%.*]] = load i64, ptr [[ULE]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END3603:%.*]]
+// SIMD-ONLY0:       cond.false3602:
+// SIMD-ONLY0-NEXT:    [[TMP1821:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END3603]]
+// SIMD-ONLY0:       cond.end3603:
+// SIMD-ONLY0-NEXT:    [[COND3604:%.*]] = phi i64 [ [[TMP1820]], [[COND_TRUE3601]] ], [ [[TMP1821]], [[COND_FALSE3602]] ]
+// SIMD-ONLY0-NEXT:    store i64 [[COND3604]], ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP1822:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP1823:%.*]] = load i64, ptr [[ULE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP3605:%.*]] = icmp ugt i64 [[TMP1822]], [[TMP1823]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP3605]], label [[IF_THEN3607:%.*]], label [[IF_END3608:%.*]]
+// SIMD-ONLY0:       if.then3607:
+// SIMD-ONLY0-NEXT:    [[TMP1824:%.*]] = load i64, ptr [[ULE]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP1824]], ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END3608]]
+// SIMD-ONLY0:       if.end3608:
+// SIMD-ONLY0-NEXT:    [[TMP1825:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP1826:%.*]] = load i64, ptr [[ULE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP3609:%.*]] = icmp ult i64 [[TMP1825]], [[TMP1826]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP3609]], label [[IF_THEN3611:%.*]], label [[IF_END3612:%.*]]
+// SIMD-ONLY0:       if.then3611:
+// SIMD-ONLY0-NEXT:    [[TMP1827:%.*]] = load i64, ptr [[ULE]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP1827]], ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END3612]]
+// SIMD-ONLY0:       if.end3612:
+// SIMD-ONLY0-NEXT:    [[TMP1828:%.*]] = load i64, ptr [[ULE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP1829:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP3613:%.*]] = icmp ugt i64 [[TMP1828]], [[TMP1829]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP3613]], label [[IF_THEN3615:%.*]], label [[IF_END3616:%.*]]
+// SIMD-ONLY0:       if.then3615:
+// SIMD-ONLY0-NEXT:    [[TMP1830:%.*]] = load i64, ptr [[ULE]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP1830]], ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END3616]]
+// SIMD-ONLY0:       if.end3616:
+// SIMD-ONLY0-NEXT:    [[TMP1831:%.*]] = load i64, ptr [[ULE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP1832:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP3617:%.*]] = icmp ult i64 [[TMP1831]], [[TMP1832]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP3617]], label [[IF_THEN3619:%.*]], label [[IF_END3620:%.*]]
+// SIMD-ONLY0:       if.then3619:
+// SIMD-ONLY0-NEXT:    [[TMP1833:%.*]] = load i64, ptr [[ULE]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP1833]], ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END3620]]
+// SIMD-ONLY0:       if.end3620:
+// SIMD-ONLY0-NEXT:    [[TMP1834:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP1835:%.*]] = load i64, ptr [[ULE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP3621:%.*]] = icmp eq i64 [[TMP1834]], [[TMP1835]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP3621]], label [[COND_TRUE3623:%.*]], label [[COND_FALSE3624:%.*]]
+// SIMD-ONLY0:       cond.true3623:
+// SIMD-ONLY0-NEXT:    [[TMP1836:%.*]] = load i64, ptr [[ULD]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END3625:%.*]]
+// SIMD-ONLY0:       cond.false3624:
+// SIMD-ONLY0-NEXT:    [[TMP1837:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END3625]]
+// SIMD-ONLY0:       cond.end3625:
+// SIMD-ONLY0-NEXT:    [[COND3626:%.*]] = phi i64 [ [[TMP1836]], [[COND_TRUE3623]] ], [ [[TMP1837]], [[COND_FALSE3624]] ]
+// SIMD-ONLY0-NEXT:    store i64 [[COND3626]], ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP1838:%.*]] = load i64, ptr [[ULE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP1839:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP3627:%.*]] = icmp eq i64 [[TMP1838]], [[TMP1839]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP3627]], label [[COND_TRUE3629:%.*]], label [[COND_FALSE3630:%.*]]
+// SIMD-ONLY0:       cond.true3629:
+// SIMD-ONLY0-NEXT:    [[TMP1840:%.*]] = load i64, ptr [[ULD]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END3631:%.*]]
+// SIMD-ONLY0:       cond.false3630:
+// SIMD-ONLY0-NEXT:    [[TMP1841:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END3631]]
+// SIMD-ONLY0:       cond.end3631:
+// SIMD-ONLY0-NEXT:    [[COND3632:%.*]] = phi i64 [ [[TMP1840]], [[COND_TRUE3629]] ], [ [[TMP1841]], [[COND_FALSE3630]] ]
+// SIMD-ONLY0-NEXT:    store i64 [[COND3632]], ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP1842:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP1843:%.*]] = load i64, ptr [[ULE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP3633:%.*]] = icmp eq i64 [[TMP1842]], [[TMP1843]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP3633]], label [[IF_THEN3635:%.*]], label [[IF_END3636:%.*]]
+// SIMD-ONLY0:       if.then3635:
+// SIMD-ONLY0-NEXT:    [[TMP1844:%.*]] = load i64, ptr [[ULD]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP1844]], ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END3636]]
+// SIMD-ONLY0:       if.end3636:
+// SIMD-ONLY0-NEXT:    [[TMP1845:%.*]] = load i64, ptr [[ULE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP1846:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP3637:%.*]] = icmp eq i64 [[TMP1845]], [[TMP1846]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP3637]], label [[IF_THEN3639:%.*]], label [[IF_END3640:%.*]]
+// SIMD-ONLY0:       if.then3639:
+// SIMD-ONLY0-NEXT:    [[TMP1847:%.*]] = load i64, ptr [[ULD]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP1847]], ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END3640]]
+// SIMD-ONLY0:       if.end3640:
+// SIMD-ONLY0-NEXT:    [[TMP1848:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP1849:%.*]] = load i64, ptr [[LE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP3641:%.*]] = icmp sgt i64 [[TMP1848]], [[TMP1849]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP3641]], label [[COND_TRUE3643:%.*]], label [[COND_FALSE3644:%.*]]
+// SIMD-ONLY0:       cond.true3643:
+// SIMD-ONLY0-NEXT:    [[TMP1850:%.*]] = load i64, ptr [[LE]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END3645:%.*]]
+// SIMD-ONLY0:       cond.false3644:
+// SIMD-ONLY0-NEXT:    [[TMP1851:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END3645]]
+// SIMD-ONLY0:       cond.end3645:
+// SIMD-ONLY0-NEXT:    [[COND3646:%.*]] = phi i64 [ [[TMP1850]], [[COND_TRUE3643]] ], [ [[TMP1851]], [[COND_FALSE3644]] ]
+// SIMD-ONLY0-NEXT:    store i64 [[COND3646]], ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP1852:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP1853:%.*]] = load i64, ptr [[LE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP3647:%.*]] = icmp slt i64 [[TMP1852]], [[TMP1853]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP3647]], label [[COND_TRUE3649:%.*]], label [[COND_FALSE3650:%.*]]
+// SIMD-ONLY0:       cond.true3649:
+// SIMD-ONLY0-NEXT:    [[TMP1854:%.*]] = load i64, ptr [[LE]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END3651:%.*]]
+// SIMD-ONLY0:       cond.false3650:
+// SIMD-ONLY0-NEXT:    [[TMP1855:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END3651]]
+// SIMD-ONLY0:       cond.end3651:
+// SIMD-ONLY0-NEXT:    [[COND3652:%.*]] = phi i64 [ [[TMP1854]], [[COND_TRUE3649]] ], [ [[TMP1855]], [[COND_FALSE3650]] ]
+// SIMD-ONLY0-NEXT:    store i64 [[COND3652]], ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP1856:%.*]] = load i64, ptr [[LE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP1857:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP3653:%.*]] = icmp sgt i64 [[TMP1856]], [[TMP1857]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP3653]], label [[COND_TRUE3655:%.*]], label [[COND_FALSE3656:%.*]]
+// SIMD-ONLY0:       cond.true3655:
+// SIMD-ONLY0-NEXT:    [[TMP1858:%.*]] = load i64, ptr [[LE]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END3657:%.*]]
+// SIMD-ONLY0:       cond.false3656:
+// SIMD-ONLY0-NEXT:    [[TMP1859:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END3657]]
+// SIMD-ONLY0:       cond.end3657:
+// SIMD-ONLY0-NEXT:    [[COND3658:%.*]] = phi i64 [ [[TMP1858]], [[COND_TRUE3655]] ], [ [[TMP1859]], [[COND_FALSE3656]] ]
+// SIMD-ONLY0-NEXT:    store i64 [[COND3658]], ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP1860:%.*]] = load i64, ptr [[LE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP1861:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP3659:%.*]] = icmp slt i64 [[TMP1860]], [[TMP1861]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP3659]], label [[COND_TRUE3661:%.*]], label [[COND_FALSE3662:%.*]]
+// SIMD-ONLY0:       cond.true3661:
+// SIMD-ONLY0-NEXT:    [[TMP1862:%.*]] = load i64, ptr [[LE]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END3663:%.*]]
+// SIMD-ONLY0:       cond.false3662:
+// SIMD-ONLY0-NEXT:    [[TMP1863:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END3663]]
+// SIMD-ONLY0:       cond.end3663:
+// SIMD-ONLY0-NEXT:    [[COND3664:%.*]] = phi i64 [ [[TMP1862]], [[COND_TRUE3661]] ], [ [[TMP1863]], [[COND_FALSE3662]] ]
+// SIMD-ONLY0-NEXT:    store i64 [[COND3664]], ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP1864:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP1865:%.*]] = load i64, ptr [[LE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP3665:%.*]] = icmp sgt i64 [[TMP1864]], [[TMP1865]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP3665]], label [[IF_THEN3667:%.*]], label [[IF_END3668:%.*]]
+// SIMD-ONLY0:       if.then3667:
+// SIMD-ONLY0-NEXT:    [[TMP1866:%.*]] = load i64, ptr [[LE]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP1866]], ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END3668]]
+// SIMD-ONLY0:       if.end3668:
+// SIMD-ONLY0-NEXT:    [[TMP1867:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP1868:%.*]] = load i64, ptr [[LE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP3669:%.*]] = icmp slt i64 [[TMP1867]], [[TMP1868]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP3669]], label [[IF_THEN3671:%.*]], label [[IF_END3672:%.*]]
+// SIMD-ONLY0:       if.then3671:
+// SIMD-ONLY0-NEXT:    [[TMP1869:%.*]] = load i64, ptr [[LE]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP1869]], ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END3672]]
+// SIMD-ONLY0:       if.end3672:
+// SIMD-ONLY0-NEXT:    [[TMP1870:%.*]] = load i64, ptr [[LE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP1871:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP3673:%.*]] = icmp sgt i64 [[TMP1870]], [[TMP1871]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP3673]], label [[IF_THEN3675:%.*]], label [[IF_END3676:%.*]]
+// SIMD-ONLY0:       if.then3675:
+// SIMD-ONLY0-NEXT:    [[TMP1872:%.*]] = load i64, ptr [[LE]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP1872]], ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END3676]]
+// SIMD-ONLY0:       if.end3676:
+// SIMD-ONLY0-NEXT:    [[TMP1873:%.*]] = load i64, ptr [[LE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP1874:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP3677:%.*]] = icmp slt i64 [[TMP1873]], [[TMP1874]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP3677]], label [[IF_THEN3679:%.*]], label [[IF_END3680:%.*]]
+// SIMD-ONLY0:       if.then3679:
+// SIMD-ONLY0-NEXT:    [[TMP1875:%.*]] = load i64, ptr [[LE]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP1875]], ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END3680]]
+// SIMD-ONLY0:       if.end3680:
+// SIMD-ONLY0-NEXT:    [[TMP1876:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP1877:%.*]] = load i64, ptr [[LE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP3681:%.*]] = icmp eq i64 [[TMP1876]], [[TMP1877]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP3681]], label [[COND_TRUE3683:%.*]], label [[COND_FALSE3684:%.*]]
+// SIMD-ONLY0:       cond.true3683:
+// SIMD-ONLY0-NEXT:    [[TMP1878:%.*]] = load i64, ptr [[LD]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END3685:%.*]]
+// SIMD-ONLY0:       cond.false3684:
+// SIMD-ONLY0-NEXT:    [[TMP1879:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END3685]]
+// SIMD-ONLY0:       cond.end3685:
+// SIMD-ONLY0-NEXT:    [[COND3686:%.*]] = phi i64 [ [[TMP1878]], [[COND_TRUE3683]] ], [ [[TMP1879]], [[COND_FALSE3684]] ]
+// SIMD-ONLY0-NEXT:    store i64 [[COND3686]], ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP1880:%.*]] = load i64, ptr [[LE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP1881:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP3687:%.*]] = icmp eq i64 [[TMP1880]], [[TMP1881]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP3687]], label [[COND_TRUE3689:%.*]], label [[COND_FALSE3690:%.*]]
+// SIMD-ONLY0:       cond.true3689:
+// SIMD-ONLY0-NEXT:    [[TMP1882:%.*]] = load i64, ptr [[LD]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END3691:%.*]]
+// SIMD-ONLY0:       cond.false3690:
+// SIMD-ONLY0-NEXT:    [[TMP1883:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END3691]]
+// SIMD-ONLY0:       cond.end3691:
+// SIMD-ONLY0-NEXT:    [[COND3692:%.*]] = phi i64 [ [[TMP1882]], [[COND_TRUE3689]] ], [ [[TMP1883]], [[COND_FALSE3690]] ]
+// SIMD-ONLY0-NEXT:    store i64 [[COND3692]], ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP1884:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP1885:%.*]] = load i64, ptr [[LE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP3693:%.*]] = icmp eq i64 [[TMP1884]], [[TMP1885]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP3693]], label [[IF_THEN3695:%.*]], label [[IF_END3696:%.*]]
+// SIMD-ONLY0:       if.then3695:
+// SIMD-ONLY0-NEXT:    [[TMP1886:%.*]] = load i64, ptr [[LD]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP1886]], ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END3696]]
+// SIMD-ONLY0:       if.end3696:
+// SIMD-ONLY0-NEXT:    [[TMP1887:%.*]] = load i64, ptr [[LE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP1888:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP3697:%.*]] = icmp eq i64 [[TMP1887]], [[TMP1888]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP3697]], label [[IF_THEN3699:%.*]], label [[IF_END3700:%.*]]
+// SIMD-ONLY0:       if.then3699:
+// SIMD-ONLY0-NEXT:    [[TMP1889:%.*]] = load i64, ptr [[LD]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP1889]], ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END3700]]
+// SIMD-ONLY0:       if.end3700:
+// SIMD-ONLY0-NEXT:    [[TMP1890:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP1891:%.*]] = load i64, ptr [[ULE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP3701:%.*]] = icmp ugt i64 [[TMP1890]], [[TMP1891]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP3701]], label [[COND_TRUE3703:%.*]], label [[COND_FALSE3704:%.*]]
+// SIMD-ONLY0:       cond.true3703:
+// SIMD-ONLY0-NEXT:    [[TMP1892:%.*]] = load i64, ptr [[ULE]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END3705:%.*]]
+// SIMD-ONLY0:       cond.false3704:
+// SIMD-ONLY0-NEXT:    [[TMP1893:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END3705]]
+// SIMD-ONLY0:       cond.end3705:
+// SIMD-ONLY0-NEXT:    [[COND3706:%.*]] = phi i64 [ [[TMP1892]], [[COND_TRUE3703]] ], [ [[TMP1893]], [[COND_FALSE3704]] ]
+// SIMD-ONLY0-NEXT:    store i64 [[COND3706]], ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP1894:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP1895:%.*]] = load i64, ptr [[ULE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP3707:%.*]] = icmp ult i64 [[TMP1894]], [[TMP1895]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP3707]], label [[COND_TRUE3709:%.*]], label [[COND_FALSE3710:%.*]]
+// SIMD-ONLY0:       cond.true3709:
+// SIMD-ONLY0-NEXT:    [[TMP1896:%.*]] = load i64, ptr [[ULE]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END3711:%.*]]
+// SIMD-ONLY0:       cond.false3710:
+// SIMD-ONLY0-NEXT:    [[TMP1897:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END3711]]
+// SIMD-ONLY0:       cond.end3711:
+// SIMD-ONLY0-NEXT:    [[COND3712:%.*]] = phi i64 [ [[TMP1896]], [[COND_TRUE3709]] ], [ [[TMP1897]], [[COND_FALSE3710]] ]
+// SIMD-ONLY0-NEXT:    store i64 [[COND3712]], ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP1898:%.*]] = load i64, ptr [[ULE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP1899:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP3713:%.*]] = icmp ugt i64 [[TMP1898]], [[TMP1899]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP3713]], label [[COND_TRUE3715:%.*]], label [[COND_FALSE3716:%.*]]
+// SIMD-ONLY0:       cond.true3715:
+// SIMD-ONLY0-NEXT:    [[TMP1900:%.*]] = load i64, ptr [[ULE]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END3717:%.*]]
+// SIMD-ONLY0:       cond.false3716:
+// SIMD-ONLY0-NEXT:    [[TMP1901:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END3717]]
+// SIMD-ONLY0:       cond.end3717:
+// SIMD-ONLY0-NEXT:    [[COND3718:%.*]] = phi i64 [ [[TMP1900]], [[COND_TRUE3715]] ], [ [[TMP1901]], [[COND_FALSE3716]] ]
+// SIMD-ONLY0-NEXT:    store i64 [[COND3718]], ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP1902:%.*]] = load i64, ptr [[ULE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP1903:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP3719:%.*]] = icmp ult i64 [[TMP1902]], [[TMP1903]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP3719]], label [[COND_TRUE3721:%.*]], label [[COND_FALSE3722:%.*]]
+// SIMD-ONLY0:       cond.true3721:
+// SIMD-ONLY0-NEXT:    [[TMP1904:%.*]] = load i64, ptr [[ULE]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END3723:%.*]]
+// SIMD-ONLY0:       cond.false3722:
+// SIMD-ONLY0-NEXT:    [[TMP1905:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END3723]]
+// SIMD-ONLY0:       cond.end3723:
+// SIMD-ONLY0-NEXT:    [[COND3724:%.*]] = phi i64 [ [[TMP1904]], [[COND_TRUE3721]] ], [ [[TMP1905]], [[COND_FALSE3722]] ]
+// SIMD-ONLY0-NEXT:    store i64 [[COND3724]], ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP1906:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP1907:%.*]] = load i64, ptr [[ULE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP3725:%.*]] = icmp ugt i64 [[TMP1906]], [[TMP1907]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP3725]], label [[IF_THEN3727:%.*]], label [[IF_END3728:%.*]]
+// SIMD-ONLY0:       if.then3727:
+// SIMD-ONLY0-NEXT:    [[TMP1908:%.*]] = load i64, ptr [[ULE]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP1908]], ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END3728]]
+// SIMD-ONLY0:       if.end3728:
+// SIMD-ONLY0-NEXT:    [[TMP1909:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP1910:%.*]] = load i64, ptr [[ULE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP3729:%.*]] = icmp ult i64 [[TMP1909]], [[TMP1910]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP3729]], label [[IF_THEN3731:%.*]], label [[IF_END3732:%.*]]
+// SIMD-ONLY0:       if.then3731:
+// SIMD-ONLY0-NEXT:    [[TMP1911:%.*]] = load i64, ptr [[ULE]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP1911]], ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END3732]]
+// SIMD-ONLY0:       if.end3732:
+// SIMD-ONLY0-NEXT:    [[TMP1912:%.*]] = load i64, ptr [[ULE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP1913:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP3733:%.*]] = icmp ugt i64 [[TMP1912]], [[TMP1913]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP3733]], label [[IF_THEN3735:%.*]], label [[IF_END3736:%.*]]
+// SIMD-ONLY0:       if.then3735:
+// SIMD-ONLY0-NEXT:    [[TMP1914:%.*]] = load i64, ptr [[ULE]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP1914]], ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END3736]]
+// SIMD-ONLY0:       if.end3736:
+// SIMD-ONLY0-NEXT:    [[TMP1915:%.*]] = load i64, ptr [[ULE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP1916:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP3737:%.*]] = icmp ult i64 [[TMP1915]], [[TMP1916]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP3737]], label [[IF_THEN3739:%.*]], label [[IF_END3740:%.*]]
+// SIMD-ONLY0:       if.then3739:
+// SIMD-ONLY0-NEXT:    [[TMP1917:%.*]] = load i64, ptr [[ULE]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP1917]], ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END3740]]
+// SIMD-ONLY0:       if.end3740:
+// SIMD-ONLY0-NEXT:    [[TMP1918:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP1919:%.*]] = load i64, ptr [[ULE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP3741:%.*]] = icmp eq i64 [[TMP1918]], [[TMP1919]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP3741]], label [[COND_TRUE3743:%.*]], label [[COND_FALSE3744:%.*]]
+// SIMD-ONLY0:       cond.true3743:
+// SIMD-ONLY0-NEXT:    [[TMP1920:%.*]] = load i64, ptr [[ULD]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END3745:%.*]]
+// SIMD-ONLY0:       cond.false3744:
+// SIMD-ONLY0-NEXT:    [[TMP1921:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END3745]]
+// SIMD-ONLY0:       cond.end3745:
+// SIMD-ONLY0-NEXT:    [[COND3746:%.*]] = phi i64 [ [[TMP1920]], [[COND_TRUE3743]] ], [ [[TMP1921]], [[COND_FALSE3744]] ]
+// SIMD-ONLY0-NEXT:    store i64 [[COND3746]], ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP1922:%.*]] = load i64, ptr [[ULE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP1923:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP3747:%.*]] = icmp eq i64 [[TMP1922]], [[TMP1923]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP3747]], label [[COND_TRUE3749:%.*]], label [[COND_FALSE3750:%.*]]
+// SIMD-ONLY0:       cond.true3749:
+// SIMD-ONLY0-NEXT:    [[TMP1924:%.*]] = load i64, ptr [[ULD]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END3751:%.*]]
+// SIMD-ONLY0:       cond.false3750:
+// SIMD-ONLY0-NEXT:    [[TMP1925:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END3751]]
+// SIMD-ONLY0:       cond.end3751:
+// SIMD-ONLY0-NEXT:    [[COND3752:%.*]] = phi i64 [ [[TMP1924]], [[COND_TRUE3749]] ], [ [[TMP1925]], [[COND_FALSE3750]] ]
+// SIMD-ONLY0-NEXT:    store i64 [[COND3752]], ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP1926:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP1927:%.*]] = load i64, ptr [[ULE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP3753:%.*]] = icmp eq i64 [[TMP1926]], [[TMP1927]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP3753]], label [[IF_THEN3755:%.*]], label [[IF_END3756:%.*]]
+// SIMD-ONLY0:       if.then3755:
+// SIMD-ONLY0-NEXT:    [[TMP1928:%.*]] = load i64, ptr [[ULD]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP1928]], ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END3756]]
+// SIMD-ONLY0:       if.end3756:
+// SIMD-ONLY0-NEXT:    [[TMP1929:%.*]] = load i64, ptr [[ULE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP1930:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP3757:%.*]] = icmp eq i64 [[TMP1929]], [[TMP1930]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP3757]], label [[IF_THEN3759:%.*]], label [[IF_END3760:%.*]]
+// SIMD-ONLY0:       if.then3759:
+// SIMD-ONLY0-NEXT:    [[TMP1931:%.*]] = load i64, ptr [[ULD]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP1931]], ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END3760]]
+// SIMD-ONLY0:       if.end3760:
+// SIMD-ONLY0-NEXT:    [[TMP1932:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP1933:%.*]] = load i64, ptr [[LE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP3761:%.*]] = icmp sgt i64 [[TMP1932]], [[TMP1933]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP3761]], label [[COND_TRUE3763:%.*]], label [[COND_FALSE3764:%.*]]
+// SIMD-ONLY0:       cond.true3763:
+// SIMD-ONLY0-NEXT:    [[TMP1934:%.*]] = load i64, ptr [[LE]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END3765:%.*]]
+// SIMD-ONLY0:       cond.false3764:
+// SIMD-ONLY0-NEXT:    [[TMP1935:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END3765]]
+// SIMD-ONLY0:       cond.end3765:
+// SIMD-ONLY0-NEXT:    [[COND3766:%.*]] = phi i64 [ [[TMP1934]], [[COND_TRUE3763]] ], [ [[TMP1935]], [[COND_FALSE3764]] ]
+// SIMD-ONLY0-NEXT:    store i64 [[COND3766]], ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP1936:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP1937:%.*]] = load i64, ptr [[LE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP3767:%.*]] = icmp slt i64 [[TMP1936]], [[TMP1937]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP3767]], label [[COND_TRUE3769:%.*]], label [[COND_FALSE3770:%.*]]
+// SIMD-ONLY0:       cond.true3769:
+// SIMD-ONLY0-NEXT:    [[TMP1938:%.*]] = load i64, ptr [[LE]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END3771:%.*]]
+// SIMD-ONLY0:       cond.false3770:
+// SIMD-ONLY0-NEXT:    [[TMP1939:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END3771]]
+// SIMD-ONLY0:       cond.end3771:
+// SIMD-ONLY0-NEXT:    [[COND3772:%.*]] = phi i64 [ [[TMP1938]], [[COND_TRUE3769]] ], [ [[TMP1939]], [[COND_FALSE3770]] ]
+// SIMD-ONLY0-NEXT:    store i64 [[COND3772]], ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP1940:%.*]] = load i64, ptr [[LE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP1941:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP3773:%.*]] = icmp sgt i64 [[TMP1940]], [[TMP1941]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP3773]], label [[COND_TRUE3775:%.*]], label [[COND_FALSE3776:%.*]]
+// SIMD-ONLY0:       cond.true3775:
+// SIMD-ONLY0-NEXT:    [[TMP1942:%.*]] = load i64, ptr [[LE]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END3777:%.*]]
+// SIMD-ONLY0:       cond.false3776:
+// SIMD-ONLY0-NEXT:    [[TMP1943:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END3777]]
+// SIMD-ONLY0:       cond.end3777:
+// SIMD-ONLY0-NEXT:    [[COND3778:%.*]] = phi i64 [ [[TMP1942]], [[COND_TRUE3775]] ], [ [[TMP1943]], [[COND_FALSE3776]] ]
+// SIMD-ONLY0-NEXT:    store i64 [[COND3778]], ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP1944:%.*]] = load i64, ptr [[LE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP1945:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP3779:%.*]] = icmp slt i64 [[TMP1944]], [[TMP1945]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP3779]], label [[COND_TRUE3781:%.*]], label [[COND_FALSE3782:%.*]]
+// SIMD-ONLY0:       cond.true3781:
+// SIMD-ONLY0-NEXT:    [[TMP1946:%.*]] = load i64, ptr [[LE]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END3783:%.*]]
+// SIMD-ONLY0:       cond.false3782:
+// SIMD-ONLY0-NEXT:    [[TMP1947:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END3783]]
+// SIMD-ONLY0:       cond.end3783:
+// SIMD-ONLY0-NEXT:    [[COND3784:%.*]] = phi i64 [ [[TMP1946]], [[COND_TRUE3781]] ], [ [[TMP1947]], [[COND_FALSE3782]] ]
+// SIMD-ONLY0-NEXT:    store i64 [[COND3784]], ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP1948:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP1949:%.*]] = load i64, ptr [[LE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP3785:%.*]] = icmp sgt i64 [[TMP1948]], [[TMP1949]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP3785]], label [[IF_THEN3787:%.*]], label [[IF_END3788:%.*]]
+// SIMD-ONLY0:       if.then3787:
+// SIMD-ONLY0-NEXT:    [[TMP1950:%.*]] = load i64, ptr [[LE]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP1950]], ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END3788]]
+// SIMD-ONLY0:       if.end3788:
+// SIMD-ONLY0-NEXT:    [[TMP1951:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP1952:%.*]] = load i64, ptr [[LE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP3789:%.*]] = icmp slt i64 [[TMP1951]], [[TMP1952]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP3789]], label [[IF_THEN3791:%.*]], label [[IF_END3792:%.*]]
+// SIMD-ONLY0:       if.then3791:
+// SIMD-ONLY0-NEXT:    [[TMP1953:%.*]] = load i64, ptr [[LE]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP1953]], ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END3792]]
+// SIMD-ONLY0:       if.end3792:
+// SIMD-ONLY0-NEXT:    [[TMP1954:%.*]] = load i64, ptr [[LE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP1955:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP3793:%.*]] = icmp sgt i64 [[TMP1954]], [[TMP1955]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP3793]], label [[IF_THEN3795:%.*]], label [[IF_END3796:%.*]]
+// SIMD-ONLY0:       if.then3795:
+// SIMD-ONLY0-NEXT:    [[TMP1956:%.*]] = load i64, ptr [[LE]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP1956]], ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END3796]]
+// SIMD-ONLY0:       if.end3796:
+// SIMD-ONLY0-NEXT:    [[TMP1957:%.*]] = load i64, ptr [[LE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP1958:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP3797:%.*]] = icmp slt i64 [[TMP1957]], [[TMP1958]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP3797]], label [[IF_THEN3799:%.*]], label [[IF_END3800:%.*]]
+// SIMD-ONLY0:       if.then3799:
+// SIMD-ONLY0-NEXT:    [[TMP1959:%.*]] = load i64, ptr [[LE]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP1959]], ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END3800]]
+// SIMD-ONLY0:       if.end3800:
+// SIMD-ONLY0-NEXT:    [[TMP1960:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP1961:%.*]] = load i64, ptr [[LE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP3801:%.*]] = icmp eq i64 [[TMP1960]], [[TMP1961]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP3801]], label [[COND_TRUE3803:%.*]], label [[COND_FALSE3804:%.*]]
+// SIMD-ONLY0:       cond.true3803:
+// SIMD-ONLY0-NEXT:    [[TMP1962:%.*]] = load i64, ptr [[LD]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END3805:%.*]]
+// SIMD-ONLY0:       cond.false3804:
+// SIMD-ONLY0-NEXT:    [[TMP1963:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END3805]]
+// SIMD-ONLY0:       cond.end3805:
+// SIMD-ONLY0-NEXT:    [[COND3806:%.*]] = phi i64 [ [[TMP1962]], [[COND_TRUE3803]] ], [ [[TMP1963]], [[COND_FALSE3804]] ]
+// SIMD-ONLY0-NEXT:    store i64 [[COND3806]], ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP1964:%.*]] = load i64, ptr [[LE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP1965:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP3807:%.*]] = icmp eq i64 [[TMP1964]], [[TMP1965]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP3807]], label [[COND_TRUE3809:%.*]], label [[COND_FALSE3810:%.*]]
+// SIMD-ONLY0:       cond.true3809:
+// SIMD-ONLY0-NEXT:    [[TMP1966:%.*]] = load i64, ptr [[LD]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END3811:%.*]]
+// SIMD-ONLY0:       cond.false3810:
+// SIMD-ONLY0-NEXT:    [[TMP1967:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END3811]]
+// SIMD-ONLY0:       cond.end3811:
+// SIMD-ONLY0-NEXT:    [[COND3812:%.*]] = phi i64 [ [[TMP1966]], [[COND_TRUE3809]] ], [ [[TMP1967]], [[COND_FALSE3810]] ]
+// SIMD-ONLY0-NEXT:    store i64 [[COND3812]], ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP1968:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP1969:%.*]] = load i64, ptr [[LE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP3813:%.*]] = icmp eq i64 [[TMP1968]], [[TMP1969]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP3813]], label [[IF_THEN3815:%.*]], label [[IF_END3816:%.*]]
+// SIMD-ONLY0:       if.then3815:
+// SIMD-ONLY0-NEXT:    [[TMP1970:%.*]] = load i64, ptr [[LD]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP1970]], ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END3816]]
+// SIMD-ONLY0:       if.end3816:
+// SIMD-ONLY0-NEXT:    [[TMP1971:%.*]] = load i64, ptr [[LE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP1972:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP3817:%.*]] = icmp eq i64 [[TMP1971]], [[TMP1972]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP3817]], label [[IF_THEN3819:%.*]], label [[IF_END3820:%.*]]
+// SIMD-ONLY0:       if.then3819:
+// SIMD-ONLY0-NEXT:    [[TMP1973:%.*]] = load i64, ptr [[LD]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP1973]], ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END3820]]
+// SIMD-ONLY0:       if.end3820:
+// SIMD-ONLY0-NEXT:    [[TMP1974:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP1975:%.*]] = load i64, ptr [[ULE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP3821:%.*]] = icmp ugt i64 [[TMP1974]], [[TMP1975]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP3821]], label [[COND_TRUE3823:%.*]], label [[COND_FALSE3824:%.*]]
+// SIMD-ONLY0:       cond.true3823:
+// SIMD-ONLY0-NEXT:    [[TMP1976:%.*]] = load i64, ptr [[ULE]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END3825:%.*]]
+// SIMD-ONLY0:       cond.false3824:
+// SIMD-ONLY0-NEXT:    [[TMP1977:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END3825]]
+// SIMD-ONLY0:       cond.end3825:
+// SIMD-ONLY0-NEXT:    [[COND3826:%.*]] = phi i64 [ [[TMP1976]], [[COND_TRUE3823]] ], [ [[TMP1977]], [[COND_FALSE3824]] ]
+// SIMD-ONLY0-NEXT:    store i64 [[COND3826]], ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP1978:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP1979:%.*]] = load i64, ptr [[ULE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP3827:%.*]] = icmp ult i64 [[TMP1978]], [[TMP1979]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP3827]], label [[COND_TRUE3829:%.*]], label [[COND_FALSE3830:%.*]]
+// SIMD-ONLY0:       cond.true3829:
+// SIMD-ONLY0-NEXT:    [[TMP1980:%.*]] = load i64, ptr [[ULE]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END3831:%.*]]
+// SIMD-ONLY0:       cond.false3830:
+// SIMD-ONLY0-NEXT:    [[TMP1981:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END3831]]
+// SIMD-ONLY0:       cond.end3831:
+// SIMD-ONLY0-NEXT:    [[COND3832:%.*]] = phi i64 [ [[TMP1980]], [[COND_TRUE3829]] ], [ [[TMP1981]], [[COND_FALSE3830]] ]
+// SIMD-ONLY0-NEXT:    store i64 [[COND3832]], ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP1982:%.*]] = load i64, ptr [[ULE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP1983:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP3833:%.*]] = icmp ugt i64 [[TMP1982]], [[TMP1983]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP3833]], label [[COND_TRUE3835:%.*]], label [[COND_FALSE3836:%.*]]
+// SIMD-ONLY0:       cond.true3835:
+// SIMD-ONLY0-NEXT:    [[TMP1984:%.*]] = load i64, ptr [[ULE]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END3837:%.*]]
+// SIMD-ONLY0:       cond.false3836:
+// SIMD-ONLY0-NEXT:    [[TMP1985:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END3837]]
+// SIMD-ONLY0:       cond.end3837:
+// SIMD-ONLY0-NEXT:    [[COND3838:%.*]] = phi i64 [ [[TMP1984]], [[COND_TRUE3835]] ], [ [[TMP1985]], [[COND_FALSE3836]] ]
+// SIMD-ONLY0-NEXT:    store i64 [[COND3838]], ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP1986:%.*]] = load i64, ptr [[ULE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP1987:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP3839:%.*]] = icmp ult i64 [[TMP1986]], [[TMP1987]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP3839]], label [[COND_TRUE3841:%.*]], label [[COND_FALSE3842:%.*]]
+// SIMD-ONLY0:       cond.true3841:
+// SIMD-ONLY0-NEXT:    [[TMP1988:%.*]] = load i64, ptr [[ULE]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END3843:%.*]]
+// SIMD-ONLY0:       cond.false3842:
+// SIMD-ONLY0-NEXT:    [[TMP1989:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END3843]]
+// SIMD-ONLY0:       cond.end3843:
+// SIMD-ONLY0-NEXT:    [[COND3844:%.*]] = phi i64 [ [[TMP1988]], [[COND_TRUE3841]] ], [ [[TMP1989]], [[COND_FALSE3842]] ]
+// SIMD-ONLY0-NEXT:    store i64 [[COND3844]], ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP1990:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP1991:%.*]] = load i64, ptr [[ULE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP3845:%.*]] = icmp ugt i64 [[TMP1990]], [[TMP1991]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP3845]], label [[IF_THEN3847:%.*]], label [[IF_END3848:%.*]]
+// SIMD-ONLY0:       if.then3847:
+// SIMD-ONLY0-NEXT:    [[TMP1992:%.*]] = load i64, ptr [[ULE]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP1992]], ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END3848]]
+// SIMD-ONLY0:       if.end3848:
+// SIMD-ONLY0-NEXT:    [[TMP1993:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP1994:%.*]] = load i64, ptr [[ULE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP3849:%.*]] = icmp ult i64 [[TMP1993]], [[TMP1994]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP3849]], label [[IF_THEN3851:%.*]], label [[IF_END3852:%.*]]
+// SIMD-ONLY0:       if.then3851:
+// SIMD-ONLY0-NEXT:    [[TMP1995:%.*]] = load i64, ptr [[ULE]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP1995]], ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END3852]]
+// SIMD-ONLY0:       if.end3852:
+// SIMD-ONLY0-NEXT:    [[TMP1996:%.*]] = load i64, ptr [[ULE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP1997:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP3853:%.*]] = icmp ugt i64 [[TMP1996]], [[TMP1997]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP3853]], label [[IF_THEN3855:%.*]], label [[IF_END3856:%.*]]
+// SIMD-ONLY0:       if.then3855:
+// SIMD-ONLY0-NEXT:    [[TMP1998:%.*]] = load i64, ptr [[ULE]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP1998]], ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END3856]]
+// SIMD-ONLY0:       if.end3856:
+// SIMD-ONLY0-NEXT:    [[TMP1999:%.*]] = load i64, ptr [[ULE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2000:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP3857:%.*]] = icmp ult i64 [[TMP1999]], [[TMP2000]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP3857]], label [[IF_THEN3859:%.*]], label [[IF_END3860:%.*]]
+// SIMD-ONLY0:       if.then3859:
+// SIMD-ONLY0-NEXT:    [[TMP2001:%.*]] = load i64, ptr [[ULE]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP2001]], ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END3860]]
+// SIMD-ONLY0:       if.end3860:
+// SIMD-ONLY0-NEXT:    [[TMP2002:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2003:%.*]] = load i64, ptr [[ULE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP3861:%.*]] = icmp eq i64 [[TMP2002]], [[TMP2003]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP3861]], label [[COND_TRUE3863:%.*]], label [[COND_FALSE3864:%.*]]
+// SIMD-ONLY0:       cond.true3863:
+// SIMD-ONLY0-NEXT:    [[TMP2004:%.*]] = load i64, ptr [[ULD]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END3865:%.*]]
+// SIMD-ONLY0:       cond.false3864:
+// SIMD-ONLY0-NEXT:    [[TMP2005:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END3865]]
+// SIMD-ONLY0:       cond.end3865:
+// SIMD-ONLY0-NEXT:    [[COND3866:%.*]] = phi i64 [ [[TMP2004]], [[COND_TRUE3863]] ], [ [[TMP2005]], [[COND_FALSE3864]] ]
+// SIMD-ONLY0-NEXT:    store i64 [[COND3866]], ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2006:%.*]] = load i64, ptr [[ULE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2007:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP3867:%.*]] = icmp eq i64 [[TMP2006]], [[TMP2007]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP3867]], label [[COND_TRUE3869:%.*]], label [[COND_FALSE3870:%.*]]
+// SIMD-ONLY0:       cond.true3869:
+// SIMD-ONLY0-NEXT:    [[TMP2008:%.*]] = load i64, ptr [[ULD]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END3871:%.*]]
+// SIMD-ONLY0:       cond.false3870:
+// SIMD-ONLY0-NEXT:    [[TMP2009:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END3871]]
+// SIMD-ONLY0:       cond.end3871:
+// SIMD-ONLY0-NEXT:    [[COND3872:%.*]] = phi i64 [ [[TMP2008]], [[COND_TRUE3869]] ], [ [[TMP2009]], [[COND_FALSE3870]] ]
+// SIMD-ONLY0-NEXT:    store i64 [[COND3872]], ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2010:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2011:%.*]] = load i64, ptr [[ULE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP3873:%.*]] = icmp eq i64 [[TMP2010]], [[TMP2011]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP3873]], label [[IF_THEN3875:%.*]], label [[IF_END3876:%.*]]
+// SIMD-ONLY0:       if.then3875:
+// SIMD-ONLY0-NEXT:    [[TMP2012:%.*]] = load i64, ptr [[ULD]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP2012]], ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END3876]]
+// SIMD-ONLY0:       if.end3876:
+// SIMD-ONLY0-NEXT:    [[TMP2013:%.*]] = load i64, ptr [[ULE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2014:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP3877:%.*]] = icmp eq i64 [[TMP2013]], [[TMP2014]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP3877]], label [[IF_THEN3879:%.*]], label [[IF_END3880:%.*]]
+// SIMD-ONLY0:       if.then3879:
+// SIMD-ONLY0-NEXT:    [[TMP2015:%.*]] = load i64, ptr [[ULD]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP2015]], ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END3880]]
+// SIMD-ONLY0:       if.end3880:
+// SIMD-ONLY0-NEXT:    [[TMP2016:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2017:%.*]] = load i64, ptr [[LLE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP3881:%.*]] = icmp sgt i64 [[TMP2016]], [[TMP2017]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP3881]], label [[COND_TRUE3883:%.*]], label [[COND_FALSE3884:%.*]]
+// SIMD-ONLY0:       cond.true3883:
+// SIMD-ONLY0-NEXT:    [[TMP2018:%.*]] = load i64, ptr [[LLE]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END3885:%.*]]
+// SIMD-ONLY0:       cond.false3884:
+// SIMD-ONLY0-NEXT:    [[TMP2019:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END3885]]
+// SIMD-ONLY0:       cond.end3885:
+// SIMD-ONLY0-NEXT:    [[COND3886:%.*]] = phi i64 [ [[TMP2018]], [[COND_TRUE3883]] ], [ [[TMP2019]], [[COND_FALSE3884]] ]
+// SIMD-ONLY0-NEXT:    store i64 [[COND3886]], ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2020:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2021:%.*]] = load i64, ptr [[LLE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP3887:%.*]] = icmp slt i64 [[TMP2020]], [[TMP2021]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP3887]], label [[COND_TRUE3889:%.*]], label [[COND_FALSE3890:%.*]]
+// SIMD-ONLY0:       cond.true3889:
+// SIMD-ONLY0-NEXT:    [[TMP2022:%.*]] = load i64, ptr [[LLE]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END3891:%.*]]
+// SIMD-ONLY0:       cond.false3890:
+// SIMD-ONLY0-NEXT:    [[TMP2023:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END3891]]
+// SIMD-ONLY0:       cond.end3891:
+// SIMD-ONLY0-NEXT:    [[COND3892:%.*]] = phi i64 [ [[TMP2022]], [[COND_TRUE3889]] ], [ [[TMP2023]], [[COND_FALSE3890]] ]
+// SIMD-ONLY0-NEXT:    store i64 [[COND3892]], ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2024:%.*]] = load i64, ptr [[LLE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2025:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP3893:%.*]] = icmp sgt i64 [[TMP2024]], [[TMP2025]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP3893]], label [[COND_TRUE3895:%.*]], label [[COND_FALSE3896:%.*]]
+// SIMD-ONLY0:       cond.true3895:
+// SIMD-ONLY0-NEXT:    [[TMP2026:%.*]] = load i64, ptr [[LLE]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END3897:%.*]]
+// SIMD-ONLY0:       cond.false3896:
+// SIMD-ONLY0-NEXT:    [[TMP2027:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END3897]]
+// SIMD-ONLY0:       cond.end3897:
+// SIMD-ONLY0-NEXT:    [[COND3898:%.*]] = phi i64 [ [[TMP2026]], [[COND_TRUE3895]] ], [ [[TMP2027]], [[COND_FALSE3896]] ]
+// SIMD-ONLY0-NEXT:    store i64 [[COND3898]], ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2028:%.*]] = load i64, ptr [[LLE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2029:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP3899:%.*]] = icmp slt i64 [[TMP2028]], [[TMP2029]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP3899]], label [[COND_TRUE3901:%.*]], label [[COND_FALSE3902:%.*]]
+// SIMD-ONLY0:       cond.true3901:
+// SIMD-ONLY0-NEXT:    [[TMP2030:%.*]] = load i64, ptr [[LLE]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END3903:%.*]]
+// SIMD-ONLY0:       cond.false3902:
+// SIMD-ONLY0-NEXT:    [[TMP2031:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END3903]]
+// SIMD-ONLY0:       cond.end3903:
+// SIMD-ONLY0-NEXT:    [[COND3904:%.*]] = phi i64 [ [[TMP2030]], [[COND_TRUE3901]] ], [ [[TMP2031]], [[COND_FALSE3902]] ]
+// SIMD-ONLY0-NEXT:    store i64 [[COND3904]], ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2032:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2033:%.*]] = load i64, ptr [[LLE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP3905:%.*]] = icmp sgt i64 [[TMP2032]], [[TMP2033]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP3905]], label [[IF_THEN3907:%.*]], label [[IF_END3908:%.*]]
+// SIMD-ONLY0:       if.then3907:
+// SIMD-ONLY0-NEXT:    [[TMP2034:%.*]] = load i64, ptr [[LLE]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP2034]], ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END3908]]
+// SIMD-ONLY0:       if.end3908:
+// SIMD-ONLY0-NEXT:    [[TMP2035:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2036:%.*]] = load i64, ptr [[LLE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP3909:%.*]] = icmp slt i64 [[TMP2035]], [[TMP2036]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP3909]], label [[IF_THEN3911:%.*]], label [[IF_END3912:%.*]]
+// SIMD-ONLY0:       if.then3911:
+// SIMD-ONLY0-NEXT:    [[TMP2037:%.*]] = load i64, ptr [[LLE]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP2037]], ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END3912]]
+// SIMD-ONLY0:       if.end3912:
+// SIMD-ONLY0-NEXT:    [[TMP2038:%.*]] = load i64, ptr [[LLE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2039:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP3913:%.*]] = icmp sgt i64 [[TMP2038]], [[TMP2039]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP3913]], label [[IF_THEN3915:%.*]], label [[IF_END3916:%.*]]
+// SIMD-ONLY0:       if.then3915:
+// SIMD-ONLY0-NEXT:    [[TMP2040:%.*]] = load i64, ptr [[LLE]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP2040]], ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END3916]]
+// SIMD-ONLY0:       if.end3916:
+// SIMD-ONLY0-NEXT:    [[TMP2041:%.*]] = load i64, ptr [[LLE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2042:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP3917:%.*]] = icmp slt i64 [[TMP2041]], [[TMP2042]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP3917]], label [[IF_THEN3919:%.*]], label [[IF_END3920:%.*]]
+// SIMD-ONLY0:       if.then3919:
+// SIMD-ONLY0-NEXT:    [[TMP2043:%.*]] = load i64, ptr [[LLE]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP2043]], ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END3920]]
+// SIMD-ONLY0:       if.end3920:
+// SIMD-ONLY0-NEXT:    [[TMP2044:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2045:%.*]] = load i64, ptr [[LLE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP3921:%.*]] = icmp eq i64 [[TMP2044]], [[TMP2045]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP3921]], label [[COND_TRUE3923:%.*]], label [[COND_FALSE3924:%.*]]
+// SIMD-ONLY0:       cond.true3923:
+// SIMD-ONLY0-NEXT:    [[TMP2046:%.*]] = load i64, ptr [[LLD]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END3925:%.*]]
+// SIMD-ONLY0:       cond.false3924:
+// SIMD-ONLY0-NEXT:    [[TMP2047:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END3925]]
+// SIMD-ONLY0:       cond.end3925:
+// SIMD-ONLY0-NEXT:    [[COND3926:%.*]] = phi i64 [ [[TMP2046]], [[COND_TRUE3923]] ], [ [[TMP2047]], [[COND_FALSE3924]] ]
+// SIMD-ONLY0-NEXT:    store i64 [[COND3926]], ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2048:%.*]] = load i64, ptr [[LLE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2049:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP3927:%.*]] = icmp eq i64 [[TMP2048]], [[TMP2049]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP3927]], label [[COND_TRUE3929:%.*]], label [[COND_FALSE3930:%.*]]
+// SIMD-ONLY0:       cond.true3929:
+// SIMD-ONLY0-NEXT:    [[TMP2050:%.*]] = load i64, ptr [[LLD]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END3931:%.*]]
+// SIMD-ONLY0:       cond.false3930:
+// SIMD-ONLY0-NEXT:    [[TMP2051:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END3931]]
+// SIMD-ONLY0:       cond.end3931:
+// SIMD-ONLY0-NEXT:    [[COND3932:%.*]] = phi i64 [ [[TMP2050]], [[COND_TRUE3929]] ], [ [[TMP2051]], [[COND_FALSE3930]] ]
+// SIMD-ONLY0-NEXT:    store i64 [[COND3932]], ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2052:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2053:%.*]] = load i64, ptr [[LLE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP3933:%.*]] = icmp eq i64 [[TMP2052]], [[TMP2053]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP3933]], label [[IF_THEN3935:%.*]], label [[IF_END3936:%.*]]
+// SIMD-ONLY0:       if.then3935:
+// SIMD-ONLY0-NEXT:    [[TMP2054:%.*]] = load i64, ptr [[LLD]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP2054]], ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END3936]]
+// SIMD-ONLY0:       if.end3936:
+// SIMD-ONLY0-NEXT:    [[TMP2055:%.*]] = load i64, ptr [[LLE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2056:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP3937:%.*]] = icmp eq i64 [[TMP2055]], [[TMP2056]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP3937]], label [[IF_THEN3939:%.*]], label [[IF_END3940:%.*]]
+// SIMD-ONLY0:       if.then3939:
+// SIMD-ONLY0-NEXT:    [[TMP2057:%.*]] = load i64, ptr [[LLD]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP2057]], ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END3940]]
+// SIMD-ONLY0:       if.end3940:
+// SIMD-ONLY0-NEXT:    [[TMP2058:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2059:%.*]] = load i64, ptr [[ULLE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP3941:%.*]] = icmp ugt i64 [[TMP2058]], [[TMP2059]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP3941]], label [[COND_TRUE3943:%.*]], label [[COND_FALSE3944:%.*]]
+// SIMD-ONLY0:       cond.true3943:
+// SIMD-ONLY0-NEXT:    [[TMP2060:%.*]] = load i64, ptr [[ULLE]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END3945:%.*]]
+// SIMD-ONLY0:       cond.false3944:
+// SIMD-ONLY0-NEXT:    [[TMP2061:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END3945]]
+// SIMD-ONLY0:       cond.end3945:
+// SIMD-ONLY0-NEXT:    [[COND3946:%.*]] = phi i64 [ [[TMP2060]], [[COND_TRUE3943]] ], [ [[TMP2061]], [[COND_FALSE3944]] ]
+// SIMD-ONLY0-NEXT:    store i64 [[COND3946]], ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2062:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2063:%.*]] = load i64, ptr [[ULLE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP3947:%.*]] = icmp ult i64 [[TMP2062]], [[TMP2063]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP3947]], label [[COND_TRUE3949:%.*]], label [[COND_FALSE3950:%.*]]
+// SIMD-ONLY0:       cond.true3949:
+// SIMD-ONLY0-NEXT:    [[TMP2064:%.*]] = load i64, ptr [[ULLE]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END3951:%.*]]
+// SIMD-ONLY0:       cond.false3950:
+// SIMD-ONLY0-NEXT:    [[TMP2065:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END3951]]
+// SIMD-ONLY0:       cond.end3951:
+// SIMD-ONLY0-NEXT:    [[COND3952:%.*]] = phi i64 [ [[TMP2064]], [[COND_TRUE3949]] ], [ [[TMP2065]], [[COND_FALSE3950]] ]
+// SIMD-ONLY0-NEXT:    store i64 [[COND3952]], ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2066:%.*]] = load i64, ptr [[ULLE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2067:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP3953:%.*]] = icmp ugt i64 [[TMP2066]], [[TMP2067]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP3953]], label [[COND_TRUE3955:%.*]], label [[COND_FALSE3956:%.*]]
+// SIMD-ONLY0:       cond.true3955:
+// SIMD-ONLY0-NEXT:    [[TMP2068:%.*]] = load i64, ptr [[ULLE]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END3957:%.*]]
+// SIMD-ONLY0:       cond.false3956:
+// SIMD-ONLY0-NEXT:    [[TMP2069:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END3957]]
+// SIMD-ONLY0:       cond.end3957:
+// SIMD-ONLY0-NEXT:    [[COND3958:%.*]] = phi i64 [ [[TMP2068]], [[COND_TRUE3955]] ], [ [[TMP2069]], [[COND_FALSE3956]] ]
+// SIMD-ONLY0-NEXT:    store i64 [[COND3958]], ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2070:%.*]] = load i64, ptr [[ULLE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2071:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP3959:%.*]] = icmp ult i64 [[TMP2070]], [[TMP2071]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP3959]], label [[COND_TRUE3961:%.*]], label [[COND_FALSE3962:%.*]]
+// SIMD-ONLY0:       cond.true3961:
+// SIMD-ONLY0-NEXT:    [[TMP2072:%.*]] = load i64, ptr [[ULLE]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END3963:%.*]]
+// SIMD-ONLY0:       cond.false3962:
+// SIMD-ONLY0-NEXT:    [[TMP2073:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END3963]]
+// SIMD-ONLY0:       cond.end3963:
+// SIMD-ONLY0-NEXT:    [[COND3964:%.*]] = phi i64 [ [[TMP2072]], [[COND_TRUE3961]] ], [ [[TMP2073]], [[COND_FALSE3962]] ]
+// SIMD-ONLY0-NEXT:    store i64 [[COND3964]], ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2074:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2075:%.*]] = load i64, ptr [[ULLE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP3965:%.*]] = icmp ugt i64 [[TMP2074]], [[TMP2075]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP3965]], label [[IF_THEN3967:%.*]], label [[IF_END3968:%.*]]
+// SIMD-ONLY0:       if.then3967:
+// SIMD-ONLY0-NEXT:    [[TMP2076:%.*]] = load i64, ptr [[ULLE]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP2076]], ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END3968]]
+// SIMD-ONLY0:       if.end3968:
+// SIMD-ONLY0-NEXT:    [[TMP2077:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2078:%.*]] = load i64, ptr [[ULLE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP3969:%.*]] = icmp ult i64 [[TMP2077]], [[TMP2078]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP3969]], label [[IF_THEN3971:%.*]], label [[IF_END3972:%.*]]
+// SIMD-ONLY0:       if.then3971:
+// SIMD-ONLY0-NEXT:    [[TMP2079:%.*]] = load i64, ptr [[ULLE]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP2079]], ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END3972]]
+// SIMD-ONLY0:       if.end3972:
+// SIMD-ONLY0-NEXT:    [[TMP2080:%.*]] = load i64, ptr [[ULLE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2081:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP3973:%.*]] = icmp ugt i64 [[TMP2080]], [[TMP2081]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP3973]], label [[IF_THEN3975:%.*]], label [[IF_END3976:%.*]]
+// SIMD-ONLY0:       if.then3975:
+// SIMD-ONLY0-NEXT:    [[TMP2082:%.*]] = load i64, ptr [[ULLE]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP2082]], ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END3976]]
+// SIMD-ONLY0:       if.end3976:
+// SIMD-ONLY0-NEXT:    [[TMP2083:%.*]] = load i64, ptr [[ULLE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2084:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP3977:%.*]] = icmp ult i64 [[TMP2083]], [[TMP2084]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP3977]], label [[IF_THEN3979:%.*]], label [[IF_END3980:%.*]]
+// SIMD-ONLY0:       if.then3979:
+// SIMD-ONLY0-NEXT:    [[TMP2085:%.*]] = load i64, ptr [[ULLE]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP2085]], ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END3980]]
+// SIMD-ONLY0:       if.end3980:
+// SIMD-ONLY0-NEXT:    [[TMP2086:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2087:%.*]] = load i64, ptr [[ULLE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP3981:%.*]] = icmp eq i64 [[TMP2086]], [[TMP2087]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP3981]], label [[COND_TRUE3983:%.*]], label [[COND_FALSE3984:%.*]]
+// SIMD-ONLY0:       cond.true3983:
+// SIMD-ONLY0-NEXT:    [[TMP2088:%.*]] = load i64, ptr [[ULLD]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END3985:%.*]]
+// SIMD-ONLY0:       cond.false3984:
+// SIMD-ONLY0-NEXT:    [[TMP2089:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END3985]]
+// SIMD-ONLY0:       cond.end3985:
+// SIMD-ONLY0-NEXT:    [[COND3986:%.*]] = phi i64 [ [[TMP2088]], [[COND_TRUE3983]] ], [ [[TMP2089]], [[COND_FALSE3984]] ]
+// SIMD-ONLY0-NEXT:    store i64 [[COND3986]], ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2090:%.*]] = load i64, ptr [[ULLE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2091:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP3987:%.*]] = icmp eq i64 [[TMP2090]], [[TMP2091]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP3987]], label [[COND_TRUE3989:%.*]], label [[COND_FALSE3990:%.*]]
+// SIMD-ONLY0:       cond.true3989:
+// SIMD-ONLY0-NEXT:    [[TMP2092:%.*]] = load i64, ptr [[ULLD]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END3991:%.*]]
+// SIMD-ONLY0:       cond.false3990:
+// SIMD-ONLY0-NEXT:    [[TMP2093:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END3991]]
+// SIMD-ONLY0:       cond.end3991:
+// SIMD-ONLY0-NEXT:    [[COND3992:%.*]] = phi i64 [ [[TMP2092]], [[COND_TRUE3989]] ], [ [[TMP2093]], [[COND_FALSE3990]] ]
+// SIMD-ONLY0-NEXT:    store i64 [[COND3992]], ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2094:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2095:%.*]] = load i64, ptr [[ULLE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP3993:%.*]] = icmp eq i64 [[TMP2094]], [[TMP2095]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP3993]], label [[IF_THEN3995:%.*]], label [[IF_END3996:%.*]]
+// SIMD-ONLY0:       if.then3995:
+// SIMD-ONLY0-NEXT:    [[TMP2096:%.*]] = load i64, ptr [[ULLD]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP2096]], ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END3996]]
+// SIMD-ONLY0:       if.end3996:
+// SIMD-ONLY0-NEXT:    [[TMP2097:%.*]] = load i64, ptr [[ULLE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2098:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP3997:%.*]] = icmp eq i64 [[TMP2097]], [[TMP2098]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP3997]], label [[IF_THEN3999:%.*]], label [[IF_END4000:%.*]]
+// SIMD-ONLY0:       if.then3999:
+// SIMD-ONLY0-NEXT:    [[TMP2099:%.*]] = load i64, ptr [[ULLD]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP2099]], ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END4000]]
+// SIMD-ONLY0:       if.end4000:
+// SIMD-ONLY0-NEXT:    [[TMP2100:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2101:%.*]] = load i64, ptr [[LLE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP4001:%.*]] = icmp sgt i64 [[TMP2100]], [[TMP2101]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP4001]], label [[COND_TRUE4003:%.*]], label [[COND_FALSE4004:%.*]]
+// SIMD-ONLY0:       cond.true4003:
+// SIMD-ONLY0-NEXT:    [[TMP2102:%.*]] = load i64, ptr [[LLE]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END4005:%.*]]
+// SIMD-ONLY0:       cond.false4004:
+// SIMD-ONLY0-NEXT:    [[TMP2103:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END4005]]
+// SIMD-ONLY0:       cond.end4005:
+// SIMD-ONLY0-NEXT:    [[COND4006:%.*]] = phi i64 [ [[TMP2102]], [[COND_TRUE4003]] ], [ [[TMP2103]], [[COND_FALSE4004]] ]
+// SIMD-ONLY0-NEXT:    store i64 [[COND4006]], ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2104:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2105:%.*]] = load i64, ptr [[LLE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP4007:%.*]] = icmp slt i64 [[TMP2104]], [[TMP2105]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP4007]], label [[COND_TRUE4009:%.*]], label [[COND_FALSE4010:%.*]]
+// SIMD-ONLY0:       cond.true4009:
+// SIMD-ONLY0-NEXT:    [[TMP2106:%.*]] = load i64, ptr [[LLE]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END4011:%.*]]
+// SIMD-ONLY0:       cond.false4010:
+// SIMD-ONLY0-NEXT:    [[TMP2107:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END4011]]
+// SIMD-ONLY0:       cond.end4011:
+// SIMD-ONLY0-NEXT:    [[COND4012:%.*]] = phi i64 [ [[TMP2106]], [[COND_TRUE4009]] ], [ [[TMP2107]], [[COND_FALSE4010]] ]
+// SIMD-ONLY0-NEXT:    store i64 [[COND4012]], ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2108:%.*]] = load i64, ptr [[LLE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2109:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP4013:%.*]] = icmp sgt i64 [[TMP2108]], [[TMP2109]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP4013]], label [[COND_TRUE4015:%.*]], label [[COND_FALSE4016:%.*]]
+// SIMD-ONLY0:       cond.true4015:
+// SIMD-ONLY0-NEXT:    [[TMP2110:%.*]] = load i64, ptr [[LLE]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END4017:%.*]]
+// SIMD-ONLY0:       cond.false4016:
+// SIMD-ONLY0-NEXT:    [[TMP2111:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END4017]]
+// SIMD-ONLY0:       cond.end4017:
+// SIMD-ONLY0-NEXT:    [[COND4018:%.*]] = phi i64 [ [[TMP2110]], [[COND_TRUE4015]] ], [ [[TMP2111]], [[COND_FALSE4016]] ]
+// SIMD-ONLY0-NEXT:    store i64 [[COND4018]], ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2112:%.*]] = load i64, ptr [[LLE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2113:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP4019:%.*]] = icmp slt i64 [[TMP2112]], [[TMP2113]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP4019]], label [[COND_TRUE4021:%.*]], label [[COND_FALSE4022:%.*]]
+// SIMD-ONLY0:       cond.true4021:
+// SIMD-ONLY0-NEXT:    [[TMP2114:%.*]] = load i64, ptr [[LLE]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END4023:%.*]]
+// SIMD-ONLY0:       cond.false4022:
+// SIMD-ONLY0-NEXT:    [[TMP2115:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END4023]]
+// SIMD-ONLY0:       cond.end4023:
+// SIMD-ONLY0-NEXT:    [[COND4024:%.*]] = phi i64 [ [[TMP2114]], [[COND_TRUE4021]] ], [ [[TMP2115]], [[COND_FALSE4022]] ]
+// SIMD-ONLY0-NEXT:    store i64 [[COND4024]], ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2116:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2117:%.*]] = load i64, ptr [[LLE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP4025:%.*]] = icmp sgt i64 [[TMP2116]], [[TMP2117]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP4025]], label [[IF_THEN4027:%.*]], label [[IF_END4028:%.*]]
+// SIMD-ONLY0:       if.then4027:
+// SIMD-ONLY0-NEXT:    [[TMP2118:%.*]] = load i64, ptr [[LLE]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP2118]], ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END4028]]
+// SIMD-ONLY0:       if.end4028:
+// SIMD-ONLY0-NEXT:    [[TMP2119:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2120:%.*]] = load i64, ptr [[LLE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP4029:%.*]] = icmp slt i64 [[TMP2119]], [[TMP2120]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP4029]], label [[IF_THEN4031:%.*]], label [[IF_END4032:%.*]]
+// SIMD-ONLY0:       if.then4031:
+// SIMD-ONLY0-NEXT:    [[TMP2121:%.*]] = load i64, ptr [[LLE]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP2121]], ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END4032]]
+// SIMD-ONLY0:       if.end4032:
+// SIMD-ONLY0-NEXT:    [[TMP2122:%.*]] = load i64, ptr [[LLE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2123:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP4033:%.*]] = icmp sgt i64 [[TMP2122]], [[TMP2123]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP4033]], label [[IF_THEN4035:%.*]], label [[IF_END4036:%.*]]
+// SIMD-ONLY0:       if.then4035:
+// SIMD-ONLY0-NEXT:    [[TMP2124:%.*]] = load i64, ptr [[LLE]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP2124]], ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END4036]]
+// SIMD-ONLY0:       if.end4036:
+// SIMD-ONLY0-NEXT:    [[TMP2125:%.*]] = load i64, ptr [[LLE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2126:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP4037:%.*]] = icmp slt i64 [[TMP2125]], [[TMP2126]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP4037]], label [[IF_THEN4039:%.*]], label [[IF_END4040:%.*]]
+// SIMD-ONLY0:       if.then4039:
+// SIMD-ONLY0-NEXT:    [[TMP2127:%.*]] = load i64, ptr [[LLE]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP2127]], ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END4040]]
+// SIMD-ONLY0:       if.end4040:
+// SIMD-ONLY0-NEXT:    [[TMP2128:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2129:%.*]] = load i64, ptr [[LLE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP4041:%.*]] = icmp eq i64 [[TMP2128]], [[TMP2129]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP4041]], label [[COND_TRUE4043:%.*]], label [[COND_FALSE4044:%.*]]
+// SIMD-ONLY0:       cond.true4043:
+// SIMD-ONLY0-NEXT:    [[TMP2130:%.*]] = load i64, ptr [[LLD]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END4045:%.*]]
+// SIMD-ONLY0:       cond.false4044:
+// SIMD-ONLY0-NEXT:    [[TMP2131:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END4045]]
+// SIMD-ONLY0:       cond.end4045:
+// SIMD-ONLY0-NEXT:    [[COND4046:%.*]] = phi i64 [ [[TMP2130]], [[COND_TRUE4043]] ], [ [[TMP2131]], [[COND_FALSE4044]] ]
+// SIMD-ONLY0-NEXT:    store i64 [[COND4046]], ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2132:%.*]] = load i64, ptr [[LLE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2133:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP4047:%.*]] = icmp eq i64 [[TMP2132]], [[TMP2133]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP4047]], label [[COND_TRUE4049:%.*]], label [[COND_FALSE4050:%.*]]
+// SIMD-ONLY0:       cond.true4049:
+// SIMD-ONLY0-NEXT:    [[TMP2134:%.*]] = load i64, ptr [[LLD]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END4051:%.*]]
+// SIMD-ONLY0:       cond.false4050:
+// SIMD-ONLY0-NEXT:    [[TMP2135:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END4051]]
+// SIMD-ONLY0:       cond.end4051:
+// SIMD-ONLY0-NEXT:    [[COND4052:%.*]] = phi i64 [ [[TMP2134]], [[COND_TRUE4049]] ], [ [[TMP2135]], [[COND_FALSE4050]] ]
+// SIMD-ONLY0-NEXT:    store i64 [[COND4052]], ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2136:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2137:%.*]] = load i64, ptr [[LLE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP4053:%.*]] = icmp eq i64 [[TMP2136]], [[TMP2137]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP4053]], label [[IF_THEN4055:%.*]], label [[IF_END4056:%.*]]
+// SIMD-ONLY0:       if.then4055:
+// SIMD-ONLY0-NEXT:    [[TMP2138:%.*]] = load i64, ptr [[LLD]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP2138]], ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END4056]]
+// SIMD-ONLY0:       if.end4056:
+// SIMD-ONLY0-NEXT:    [[TMP2139:%.*]] = load i64, ptr [[LLE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2140:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP4057:%.*]] = icmp eq i64 [[TMP2139]], [[TMP2140]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP4057]], label [[IF_THEN4059:%.*]], label [[IF_END4060:%.*]]
+// SIMD-ONLY0:       if.then4059:
+// SIMD-ONLY0-NEXT:    [[TMP2141:%.*]] = load i64, ptr [[LLD]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP2141]], ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END4060]]
+// SIMD-ONLY0:       if.end4060:
+// SIMD-ONLY0-NEXT:    [[TMP2142:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2143:%.*]] = load i64, ptr [[ULLE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP4061:%.*]] = icmp ugt i64 [[TMP2142]], [[TMP2143]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP4061]], label [[COND_TRUE4063:%.*]], label [[COND_FALSE4064:%.*]]
+// SIMD-ONLY0:       cond.true4063:
+// SIMD-ONLY0-NEXT:    [[TMP2144:%.*]] = load i64, ptr [[ULLE]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END4065:%.*]]
+// SIMD-ONLY0:       cond.false4064:
+// SIMD-ONLY0-NEXT:    [[TMP2145:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END4065]]
+// SIMD-ONLY0:       cond.end4065:
+// SIMD-ONLY0-NEXT:    [[COND4066:%.*]] = phi i64 [ [[TMP2144]], [[COND_TRUE4063]] ], [ [[TMP2145]], [[COND_FALSE4064]] ]
+// SIMD-ONLY0-NEXT:    store i64 [[COND4066]], ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2146:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2147:%.*]] = load i64, ptr [[ULLE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP4067:%.*]] = icmp ult i64 [[TMP2146]], [[TMP2147]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP4067]], label [[COND_TRUE4069:%.*]], label [[COND_FALSE4070:%.*]]
+// SIMD-ONLY0:       cond.true4069:
+// SIMD-ONLY0-NEXT:    [[TMP2148:%.*]] = load i64, ptr [[ULLE]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END4071:%.*]]
+// SIMD-ONLY0:       cond.false4070:
+// SIMD-ONLY0-NEXT:    [[TMP2149:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END4071]]
+// SIMD-ONLY0:       cond.end4071:
+// SIMD-ONLY0-NEXT:    [[COND4072:%.*]] = phi i64 [ [[TMP2148]], [[COND_TRUE4069]] ], [ [[TMP2149]], [[COND_FALSE4070]] ]
+// SIMD-ONLY0-NEXT:    store i64 [[COND4072]], ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2150:%.*]] = load i64, ptr [[ULLE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2151:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP4073:%.*]] = icmp ugt i64 [[TMP2150]], [[TMP2151]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP4073]], label [[COND_TRUE4075:%.*]], label [[COND_FALSE4076:%.*]]
+// SIMD-ONLY0:       cond.true4075:
+// SIMD-ONLY0-NEXT:    [[TMP2152:%.*]] = load i64, ptr [[ULLE]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END4077:%.*]]
+// SIMD-ONLY0:       cond.false4076:
+// SIMD-ONLY0-NEXT:    [[TMP2153:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END4077]]
+// SIMD-ONLY0:       cond.end4077:
+// SIMD-ONLY0-NEXT:    [[COND4078:%.*]] = phi i64 [ [[TMP2152]], [[COND_TRUE4075]] ], [ [[TMP2153]], [[COND_FALSE4076]] ]
+// SIMD-ONLY0-NEXT:    store i64 [[COND4078]], ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2154:%.*]] = load i64, ptr [[ULLE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2155:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP4079:%.*]] = icmp ult i64 [[TMP2154]], [[TMP2155]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP4079]], label [[COND_TRUE4081:%.*]], label [[COND_FALSE4082:%.*]]
+// SIMD-ONLY0:       cond.true4081:
+// SIMD-ONLY0-NEXT:    [[TMP2156:%.*]] = load i64, ptr [[ULLE]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END4083:%.*]]
+// SIMD-ONLY0:       cond.false4082:
+// SIMD-ONLY0-NEXT:    [[TMP2157:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END4083]]
+// SIMD-ONLY0:       cond.end4083:
+// SIMD-ONLY0-NEXT:    [[COND4084:%.*]] = phi i64 [ [[TMP2156]], [[COND_TRUE4081]] ], [ [[TMP2157]], [[COND_FALSE4082]] ]
+// SIMD-ONLY0-NEXT:    store i64 [[COND4084]], ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2158:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2159:%.*]] = load i64, ptr [[ULLE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP4085:%.*]] = icmp ugt i64 [[TMP2158]], [[TMP2159]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP4085]], label [[IF_THEN4087:%.*]], label [[IF_END4088:%.*]]
+// SIMD-ONLY0:       if.then4087:
+// SIMD-ONLY0-NEXT:    [[TMP2160:%.*]] = load i64, ptr [[ULLE]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP2160]], ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END4088]]
+// SIMD-ONLY0:       if.end4088:
+// SIMD-ONLY0-NEXT:    [[TMP2161:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2162:%.*]] = load i64, ptr [[ULLE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP4089:%.*]] = icmp ult i64 [[TMP2161]], [[TMP2162]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP4089]], label [[IF_THEN4091:%.*]], label [[IF_END4092:%.*]]
+// SIMD-ONLY0:       if.then4091:
+// SIMD-ONLY0-NEXT:    [[TMP2163:%.*]] = load i64, ptr [[ULLE]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP2163]], ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END4092]]
+// SIMD-ONLY0:       if.end4092:
+// SIMD-ONLY0-NEXT:    [[TMP2164:%.*]] = load i64, ptr [[ULLE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2165:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP4093:%.*]] = icmp ugt i64 [[TMP2164]], [[TMP2165]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP4093]], label [[IF_THEN4095:%.*]], label [[IF_END4096:%.*]]
+// SIMD-ONLY0:       if.then4095:
+// SIMD-ONLY0-NEXT:    [[TMP2166:%.*]] = load i64, ptr [[ULLE]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP2166]], ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END4096]]
+// SIMD-ONLY0:       if.end4096:
+// SIMD-ONLY0-NEXT:    [[TMP2167:%.*]] = load i64, ptr [[ULLE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2168:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP4097:%.*]] = icmp ult i64 [[TMP2167]], [[TMP2168]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP4097]], label [[IF_THEN4099:%.*]], label [[IF_END4100:%.*]]
+// SIMD-ONLY0:       if.then4099:
+// SIMD-ONLY0-NEXT:    [[TMP2169:%.*]] = load i64, ptr [[ULLE]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP2169]], ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END4100]]
+// SIMD-ONLY0:       if.end4100:
+// SIMD-ONLY0-NEXT:    [[TMP2170:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2171:%.*]] = load i64, ptr [[ULLE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP4101:%.*]] = icmp eq i64 [[TMP2170]], [[TMP2171]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP4101]], label [[COND_TRUE4103:%.*]], label [[COND_FALSE4104:%.*]]
+// SIMD-ONLY0:       cond.true4103:
+// SIMD-ONLY0-NEXT:    [[TMP2172:%.*]] = load i64, ptr [[ULLD]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END4105:%.*]]
+// SIMD-ONLY0:       cond.false4104:
+// SIMD-ONLY0-NEXT:    [[TMP2173:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END4105]]
+// SIMD-ONLY0:       cond.end4105:
+// SIMD-ONLY0-NEXT:    [[COND4106:%.*]] = phi i64 [ [[TMP2172]], [[COND_TRUE4103]] ], [ [[TMP2173]], [[COND_FALSE4104]] ]
+// SIMD-ONLY0-NEXT:    store i64 [[COND4106]], ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2174:%.*]] = load i64, ptr [[ULLE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2175:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP4107:%.*]] = icmp eq i64 [[TMP2174]], [[TMP2175]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP4107]], label [[COND_TRUE4109:%.*]], label [[COND_FALSE4110:%.*]]
+// SIMD-ONLY0:       cond.true4109:
+// SIMD-ONLY0-NEXT:    [[TMP2176:%.*]] = load i64, ptr [[ULLD]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END4111:%.*]]
+// SIMD-ONLY0:       cond.false4110:
+// SIMD-ONLY0-NEXT:    [[TMP2177:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END4111]]
+// SIMD-ONLY0:       cond.end4111:
+// SIMD-ONLY0-NEXT:    [[COND4112:%.*]] = phi i64 [ [[TMP2176]], [[COND_TRUE4109]] ], [ [[TMP2177]], [[COND_FALSE4110]] ]
+// SIMD-ONLY0-NEXT:    store i64 [[COND4112]], ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2178:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2179:%.*]] = load i64, ptr [[ULLE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP4113:%.*]] = icmp eq i64 [[TMP2178]], [[TMP2179]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP4113]], label [[IF_THEN4115:%.*]], label [[IF_END4116:%.*]]
+// SIMD-ONLY0:       if.then4115:
+// SIMD-ONLY0-NEXT:    [[TMP2180:%.*]] = load i64, ptr [[ULLD]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP2180]], ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END4116]]
+// SIMD-ONLY0:       if.end4116:
+// SIMD-ONLY0-NEXT:    [[TMP2181:%.*]] = load i64, ptr [[ULLE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2182:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP4117:%.*]] = icmp eq i64 [[TMP2181]], [[TMP2182]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP4117]], label [[IF_THEN4119:%.*]], label [[IF_END4120:%.*]]
+// SIMD-ONLY0:       if.then4119:
+// SIMD-ONLY0-NEXT:    [[TMP2183:%.*]] = load i64, ptr [[ULLD]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP2183]], ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END4120]]
+// SIMD-ONLY0:       if.end4120:
+// SIMD-ONLY0-NEXT:    [[TMP2184:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2185:%.*]] = load i64, ptr [[LLE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP4121:%.*]] = icmp sgt i64 [[TMP2184]], [[TMP2185]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP4121]], label [[COND_TRUE4123:%.*]], label [[COND_FALSE4124:%.*]]
+// SIMD-ONLY0:       cond.true4123:
+// SIMD-ONLY0-NEXT:    [[TMP2186:%.*]] = load i64, ptr [[LLE]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END4125:%.*]]
+// SIMD-ONLY0:       cond.false4124:
+// SIMD-ONLY0-NEXT:    [[TMP2187:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END4125]]
+// SIMD-ONLY0:       cond.end4125:
+// SIMD-ONLY0-NEXT:    [[COND4126:%.*]] = phi i64 [ [[TMP2186]], [[COND_TRUE4123]] ], [ [[TMP2187]], [[COND_FALSE4124]] ]
+// SIMD-ONLY0-NEXT:    store i64 [[COND4126]], ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2188:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2189:%.*]] = load i64, ptr [[LLE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP4127:%.*]] = icmp slt i64 [[TMP2188]], [[TMP2189]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP4127]], label [[COND_TRUE4129:%.*]], label [[COND_FALSE4130:%.*]]
+// SIMD-ONLY0:       cond.true4129:
+// SIMD-ONLY0-NEXT:    [[TMP2190:%.*]] = load i64, ptr [[LLE]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END4131:%.*]]
+// SIMD-ONLY0:       cond.false4130:
+// SIMD-ONLY0-NEXT:    [[TMP2191:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END4131]]
+// SIMD-ONLY0:       cond.end4131:
+// SIMD-ONLY0-NEXT:    [[COND4132:%.*]] = phi i64 [ [[TMP2190]], [[COND_TRUE4129]] ], [ [[TMP2191]], [[COND_FALSE4130]] ]
+// SIMD-ONLY0-NEXT:    store i64 [[COND4132]], ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2192:%.*]] = load i64, ptr [[LLE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2193:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP4133:%.*]] = icmp sgt i64 [[TMP2192]], [[TMP2193]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP4133]], label [[COND_TRUE4135:%.*]], label [[COND_FALSE4136:%.*]]
+// SIMD-ONLY0:       cond.true4135:
+// SIMD-ONLY0-NEXT:    [[TMP2194:%.*]] = load i64, ptr [[LLE]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END4137:%.*]]
+// SIMD-ONLY0:       cond.false4136:
+// SIMD-ONLY0-NEXT:    [[TMP2195:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END4137]]
+// SIMD-ONLY0:       cond.end4137:
+// SIMD-ONLY0-NEXT:    [[COND4138:%.*]] = phi i64 [ [[TMP2194]], [[COND_TRUE4135]] ], [ [[TMP2195]], [[COND_FALSE4136]] ]
+// SIMD-ONLY0-NEXT:    store i64 [[COND4138]], ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2196:%.*]] = load i64, ptr [[LLE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2197:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP4139:%.*]] = icmp slt i64 [[TMP2196]], [[TMP2197]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP4139]], label [[COND_TRUE4141:%.*]], label [[COND_FALSE4142:%.*]]
+// SIMD-ONLY0:       cond.true4141:
+// SIMD-ONLY0-NEXT:    [[TMP2198:%.*]] = load i64, ptr [[LLE]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END4143:%.*]]
+// SIMD-ONLY0:       cond.false4142:
+// SIMD-ONLY0-NEXT:    [[TMP2199:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END4143]]
+// SIMD-ONLY0:       cond.end4143:
+// SIMD-ONLY0-NEXT:    [[COND4144:%.*]] = phi i64 [ [[TMP2198]], [[COND_TRUE4141]] ], [ [[TMP2199]], [[COND_FALSE4142]] ]
+// SIMD-ONLY0-NEXT:    store i64 [[COND4144]], ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2200:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2201:%.*]] = load i64, ptr [[LLE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP4145:%.*]] = icmp sgt i64 [[TMP2200]], [[TMP2201]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP4145]], label [[IF_THEN4147:%.*]], label [[IF_END4148:%.*]]
+// SIMD-ONLY0:       if.then4147:
+// SIMD-ONLY0-NEXT:    [[TMP2202:%.*]] = load i64, ptr [[LLE]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP2202]], ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END4148]]
+// SIMD-ONLY0:       if.end4148:
+// SIMD-ONLY0-NEXT:    [[TMP2203:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2204:%.*]] = load i64, ptr [[LLE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP4149:%.*]] = icmp slt i64 [[TMP2203]], [[TMP2204]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP4149]], label [[IF_THEN4151:%.*]], label [[IF_END4152:%.*]]
+// SIMD-ONLY0:       if.then4151:
+// SIMD-ONLY0-NEXT:    [[TMP2205:%.*]] = load i64, ptr [[LLE]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP2205]], ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END4152]]
+// SIMD-ONLY0:       if.end4152:
+// SIMD-ONLY0-NEXT:    [[TMP2206:%.*]] = load i64, ptr [[LLE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2207:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP4153:%.*]] = icmp sgt i64 [[TMP2206]], [[TMP2207]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP4153]], label [[IF_THEN4155:%.*]], label [[IF_END4156:%.*]]
+// SIMD-ONLY0:       if.then4155:
+// SIMD-ONLY0-NEXT:    [[TMP2208:%.*]] = load i64, ptr [[LLE]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP2208]], ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END4156]]
+// SIMD-ONLY0:       if.end4156:
+// SIMD-ONLY0-NEXT:    [[TMP2209:%.*]] = load i64, ptr [[LLE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2210:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP4157:%.*]] = icmp slt i64 [[TMP2209]], [[TMP2210]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP4157]], label [[IF_THEN4159:%.*]], label [[IF_END4160:%.*]]
+// SIMD-ONLY0:       if.then4159:
+// SIMD-ONLY0-NEXT:    [[TMP2211:%.*]] = load i64, ptr [[LLE]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP2211]], ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END4160]]
+// SIMD-ONLY0:       if.end4160:
+// SIMD-ONLY0-NEXT:    [[TMP2212:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2213:%.*]] = load i64, ptr [[LLE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP4161:%.*]] = icmp eq i64 [[TMP2212]], [[TMP2213]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP4161]], label [[COND_TRUE4163:%.*]], label [[COND_FALSE4164:%.*]]
+// SIMD-ONLY0:       cond.true4163:
+// SIMD-ONLY0-NEXT:    [[TMP2214:%.*]] = load i64, ptr [[LLD]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END4165:%.*]]
+// SIMD-ONLY0:       cond.false4164:
+// SIMD-ONLY0-NEXT:    [[TMP2215:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END4165]]
+// SIMD-ONLY0:       cond.end4165:
+// SIMD-ONLY0-NEXT:    [[COND4166:%.*]] = phi i64 [ [[TMP2214]], [[COND_TRUE4163]] ], [ [[TMP2215]], [[COND_FALSE4164]] ]
+// SIMD-ONLY0-NEXT:    store i64 [[COND4166]], ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2216:%.*]] = load i64, ptr [[LLE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2217:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP4167:%.*]] = icmp eq i64 [[TMP2216]], [[TMP2217]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP4167]], label [[COND_TRUE4169:%.*]], label [[COND_FALSE4170:%.*]]
+// SIMD-ONLY0:       cond.true4169:
+// SIMD-ONLY0-NEXT:    [[TMP2218:%.*]] = load i64, ptr [[LLD]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END4171:%.*]]
+// SIMD-ONLY0:       cond.false4170:
+// SIMD-ONLY0-NEXT:    [[TMP2219:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END4171]]
+// SIMD-ONLY0:       cond.end4171:
+// SIMD-ONLY0-NEXT:    [[COND4172:%.*]] = phi i64 [ [[TMP2218]], [[COND_TRUE4169]] ], [ [[TMP2219]], [[COND_FALSE4170]] ]
+// SIMD-ONLY0-NEXT:    store i64 [[COND4172]], ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2220:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2221:%.*]] = load i64, ptr [[LLE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP4173:%.*]] = icmp eq i64 [[TMP2220]], [[TMP2221]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP4173]], label [[IF_THEN4175:%.*]], label [[IF_END4176:%.*]]
+// SIMD-ONLY0:       if.then4175:
+// SIMD-ONLY0-NEXT:    [[TMP2222:%.*]] = load i64, ptr [[LLD]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP2222]], ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END4176]]
+// SIMD-ONLY0:       if.end4176:
+// SIMD-ONLY0-NEXT:    [[TMP2223:%.*]] = load i64, ptr [[LLE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2224:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP4177:%.*]] = icmp eq i64 [[TMP2223]], [[TMP2224]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP4177]], label [[IF_THEN4179:%.*]], label [[IF_END4180:%.*]]
+// SIMD-ONLY0:       if.then4179:
+// SIMD-ONLY0-NEXT:    [[TMP2225:%.*]] = load i64, ptr [[LLD]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP2225]], ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END4180]]
+// SIMD-ONLY0:       if.end4180:
+// SIMD-ONLY0-NEXT:    [[TMP2226:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2227:%.*]] = load i64, ptr [[ULLE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP4181:%.*]] = icmp ugt i64 [[TMP2226]], [[TMP2227]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP4181]], label [[COND_TRUE4183:%.*]], label [[COND_FALSE4184:%.*]]
+// SIMD-ONLY0:       cond.true4183:
+// SIMD-ONLY0-NEXT:    [[TMP2228:%.*]] = load i64, ptr [[ULLE]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END4185:%.*]]
+// SIMD-ONLY0:       cond.false4184:
+// SIMD-ONLY0-NEXT:    [[TMP2229:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END4185]]
+// SIMD-ONLY0:       cond.end4185:
+// SIMD-ONLY0-NEXT:    [[COND4186:%.*]] = phi i64 [ [[TMP2228]], [[COND_TRUE4183]] ], [ [[TMP2229]], [[COND_FALSE4184]] ]
+// SIMD-ONLY0-NEXT:    store i64 [[COND4186]], ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2230:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2231:%.*]] = load i64, ptr [[ULLE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP4187:%.*]] = icmp ult i64 [[TMP2230]], [[TMP2231]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP4187]], label [[COND_TRUE4189:%.*]], label [[COND_FALSE4190:%.*]]
+// SIMD-ONLY0:       cond.true4189:
+// SIMD-ONLY0-NEXT:    [[TMP2232:%.*]] = load i64, ptr [[ULLE]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END4191:%.*]]
+// SIMD-ONLY0:       cond.false4190:
+// SIMD-ONLY0-NEXT:    [[TMP2233:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END4191]]
+// SIMD-ONLY0:       cond.end4191:
+// SIMD-ONLY0-NEXT:    [[COND4192:%.*]] = phi i64 [ [[TMP2232]], [[COND_TRUE4189]] ], [ [[TMP2233]], [[COND_FALSE4190]] ]
+// SIMD-ONLY0-NEXT:    store i64 [[COND4192]], ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2234:%.*]] = load i64, ptr [[ULLE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2235:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP4193:%.*]] = icmp ugt i64 [[TMP2234]], [[TMP2235]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP4193]], label [[COND_TRUE4195:%.*]], label [[COND_FALSE4196:%.*]]
+// SIMD-ONLY0:       cond.true4195:
+// SIMD-ONLY0-NEXT:    [[TMP2236:%.*]] = load i64, ptr [[ULLE]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END4197:%.*]]
+// SIMD-ONLY0:       cond.false4196:
+// SIMD-ONLY0-NEXT:    [[TMP2237:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END4197]]
+// SIMD-ONLY0:       cond.end4197:
+// SIMD-ONLY0-NEXT:    [[COND4198:%.*]] = phi i64 [ [[TMP2236]], [[COND_TRUE4195]] ], [ [[TMP2237]], [[COND_FALSE4196]] ]
+// SIMD-ONLY0-NEXT:    store i64 [[COND4198]], ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2238:%.*]] = load i64, ptr [[ULLE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2239:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP4199:%.*]] = icmp ult i64 [[TMP2238]], [[TMP2239]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP4199]], label [[COND_TRUE4201:%.*]], label [[COND_FALSE4202:%.*]]
+// SIMD-ONLY0:       cond.true4201:
+// SIMD-ONLY0-NEXT:    [[TMP2240:%.*]] = load i64, ptr [[ULLE]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END4203:%.*]]
+// SIMD-ONLY0:       cond.false4202:
+// SIMD-ONLY0-NEXT:    [[TMP2241:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END4203]]
+// SIMD-ONLY0:       cond.end4203:
+// SIMD-ONLY0-NEXT:    [[COND4204:%.*]] = phi i64 [ [[TMP2240]], [[COND_TRUE4201]] ], [ [[TMP2241]], [[COND_FALSE4202]] ]
+// SIMD-ONLY0-NEXT:    store i64 [[COND4204]], ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2242:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2243:%.*]] = load i64, ptr [[ULLE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP4205:%.*]] = icmp ugt i64 [[TMP2242]], [[TMP2243]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP4205]], label [[IF_THEN4207:%.*]], label [[IF_END4208:%.*]]
+// SIMD-ONLY0:       if.then4207:
+// SIMD-ONLY0-NEXT:    [[TMP2244:%.*]] = load i64, ptr [[ULLE]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP2244]], ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END4208]]
+// SIMD-ONLY0:       if.end4208:
+// SIMD-ONLY0-NEXT:    [[TMP2245:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2246:%.*]] = load i64, ptr [[ULLE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP4209:%.*]] = icmp ult i64 [[TMP2245]], [[TMP2246]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP4209]], label [[IF_THEN4211:%.*]], label [[IF_END4212:%.*]]
+// SIMD-ONLY0:       if.then4211:
+// SIMD-ONLY0-NEXT:    [[TMP2247:%.*]] = load i64, ptr [[ULLE]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP2247]], ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END4212]]
+// SIMD-ONLY0:       if.end4212:
+// SIMD-ONLY0-NEXT:    [[TMP2248:%.*]] = load i64, ptr [[ULLE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2249:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP4213:%.*]] = icmp ugt i64 [[TMP2248]], [[TMP2249]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP4213]], label [[IF_THEN4215:%.*]], label [[IF_END4216:%.*]]
+// SIMD-ONLY0:       if.then4215:
+// SIMD-ONLY0-NEXT:    [[TMP2250:%.*]] = load i64, ptr [[ULLE]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP2250]], ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END4216]]
+// SIMD-ONLY0:       if.end4216:
+// SIMD-ONLY0-NEXT:    [[TMP2251:%.*]] = load i64, ptr [[ULLE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2252:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP4217:%.*]] = icmp ult i64 [[TMP2251]], [[TMP2252]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP4217]], label [[IF_THEN4219:%.*]], label [[IF_END4220:%.*]]
+// SIMD-ONLY0:       if.then4219:
+// SIMD-ONLY0-NEXT:    [[TMP2253:%.*]] = load i64, ptr [[ULLE]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP2253]], ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END4220]]
+// SIMD-ONLY0:       if.end4220:
+// SIMD-ONLY0-NEXT:    [[TMP2254:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2255:%.*]] = load i64, ptr [[ULLE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP4221:%.*]] = icmp eq i64 [[TMP2254]], [[TMP2255]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP4221]], label [[COND_TRUE4223:%.*]], label [[COND_FALSE4224:%.*]]
+// SIMD-ONLY0:       cond.true4223:
+// SIMD-ONLY0-NEXT:    [[TMP2256:%.*]] = load i64, ptr [[ULLD]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END4225:%.*]]
+// SIMD-ONLY0:       cond.false4224:
+// SIMD-ONLY0-NEXT:    [[TMP2257:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END4225]]
+// SIMD-ONLY0:       cond.end4225:
+// SIMD-ONLY0-NEXT:    [[COND4226:%.*]] = phi i64 [ [[TMP2256]], [[COND_TRUE4223]] ], [ [[TMP2257]], [[COND_FALSE4224]] ]
+// SIMD-ONLY0-NEXT:    store i64 [[COND4226]], ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2258:%.*]] = load i64, ptr [[ULLE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2259:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP4227:%.*]] = icmp eq i64 [[TMP2258]], [[TMP2259]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP4227]], label [[COND_TRUE4229:%.*]], label [[COND_FALSE4230:%.*]]
+// SIMD-ONLY0:       cond.true4229:
+// SIMD-ONLY0-NEXT:    [[TMP2260:%.*]] = load i64, ptr [[ULLD]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END4231:%.*]]
+// SIMD-ONLY0:       cond.false4230:
+// SIMD-ONLY0-NEXT:    [[TMP2261:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END4231]]
+// SIMD-ONLY0:       cond.end4231:
+// SIMD-ONLY0-NEXT:    [[COND4232:%.*]] = phi i64 [ [[TMP2260]], [[COND_TRUE4229]] ], [ [[TMP2261]], [[COND_FALSE4230]] ]
+// SIMD-ONLY0-NEXT:    store i64 [[COND4232]], ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2262:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2263:%.*]] = load i64, ptr [[ULLE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP4233:%.*]] = icmp eq i64 [[TMP2262]], [[TMP2263]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP4233]], label [[IF_THEN4235:%.*]], label [[IF_END4236:%.*]]
+// SIMD-ONLY0:       if.then4235:
+// SIMD-ONLY0-NEXT:    [[TMP2264:%.*]] = load i64, ptr [[ULLD]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP2264]], ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END4236]]
+// SIMD-ONLY0:       if.end4236:
+// SIMD-ONLY0-NEXT:    [[TMP2265:%.*]] = load i64, ptr [[ULLE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2266:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP4237:%.*]] = icmp eq i64 [[TMP2265]], [[TMP2266]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP4237]], label [[IF_THEN4239:%.*]], label [[IF_END4240:%.*]]
+// SIMD-ONLY0:       if.then4239:
+// SIMD-ONLY0-NEXT:    [[TMP2267:%.*]] = load i64, ptr [[ULLD]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP2267]], ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END4240]]
+// SIMD-ONLY0:       if.end4240:
+// SIMD-ONLY0-NEXT:    [[TMP2268:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2269:%.*]] = load i64, ptr [[LLE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP4241:%.*]] = icmp sgt i64 [[TMP2268]], [[TMP2269]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP4241]], label [[COND_TRUE4243:%.*]], label [[COND_FALSE4244:%.*]]
+// SIMD-ONLY0:       cond.true4243:
+// SIMD-ONLY0-NEXT:    [[TMP2270:%.*]] = load i64, ptr [[LLE]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END4245:%.*]]
+// SIMD-ONLY0:       cond.false4244:
+// SIMD-ONLY0-NEXT:    [[TMP2271:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END4245]]
+// SIMD-ONLY0:       cond.end4245:
+// SIMD-ONLY0-NEXT:    [[COND4246:%.*]] = phi i64 [ [[TMP2270]], [[COND_TRUE4243]] ], [ [[TMP2271]], [[COND_FALSE4244]] ]
+// SIMD-ONLY0-NEXT:    store i64 [[COND4246]], ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2272:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2273:%.*]] = load i64, ptr [[LLE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP4247:%.*]] = icmp slt i64 [[TMP2272]], [[TMP2273]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP4247]], label [[COND_TRUE4249:%.*]], label [[COND_FALSE4250:%.*]]
+// SIMD-ONLY0:       cond.true4249:
+// SIMD-ONLY0-NEXT:    [[TMP2274:%.*]] = load i64, ptr [[LLE]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END4251:%.*]]
+// SIMD-ONLY0:       cond.false4250:
+// SIMD-ONLY0-NEXT:    [[TMP2275:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END4251]]
+// SIMD-ONLY0:       cond.end4251:
+// SIMD-ONLY0-NEXT:    [[COND4252:%.*]] = phi i64 [ [[TMP2274]], [[COND_TRUE4249]] ], [ [[TMP2275]], [[COND_FALSE4250]] ]
+// SIMD-ONLY0-NEXT:    store i64 [[COND4252]], ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2276:%.*]] = load i64, ptr [[LLE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2277:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP4253:%.*]] = icmp sgt i64 [[TMP2276]], [[TMP2277]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP4253]], label [[COND_TRUE4255:%.*]], label [[COND_FALSE4256:%.*]]
+// SIMD-ONLY0:       cond.true4255:
+// SIMD-ONLY0-NEXT:    [[TMP2278:%.*]] = load i64, ptr [[LLE]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END4257:%.*]]
+// SIMD-ONLY0:       cond.false4256:
+// SIMD-ONLY0-NEXT:    [[TMP2279:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END4257]]
+// SIMD-ONLY0:       cond.end4257:
+// SIMD-ONLY0-NEXT:    [[COND4258:%.*]] = phi i64 [ [[TMP2278]], [[COND_TRUE4255]] ], [ [[TMP2279]], [[COND_FALSE4256]] ]
+// SIMD-ONLY0-NEXT:    store i64 [[COND4258]], ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2280:%.*]] = load i64, ptr [[LLE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2281:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP4259:%.*]] = icmp slt i64 [[TMP2280]], [[TMP2281]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP4259]], label [[COND_TRUE4261:%.*]], label [[COND_FALSE4262:%.*]]
+// SIMD-ONLY0:       cond.true4261:
+// SIMD-ONLY0-NEXT:    [[TMP2282:%.*]] = load i64, ptr [[LLE]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END4263:%.*]]
+// SIMD-ONLY0:       cond.false4262:
+// SIMD-ONLY0-NEXT:    [[TMP2283:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END4263]]
+// SIMD-ONLY0:       cond.end4263:
+// SIMD-ONLY0-NEXT:    [[COND4264:%.*]] = phi i64 [ [[TMP2282]], [[COND_TRUE4261]] ], [ [[TMP2283]], [[COND_FALSE4262]] ]
+// SIMD-ONLY0-NEXT:    store i64 [[COND4264]], ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2284:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2285:%.*]] = load i64, ptr [[LLE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP4265:%.*]] = icmp sgt i64 [[TMP2284]], [[TMP2285]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP4265]], label [[IF_THEN4267:%.*]], label [[IF_END4268:%.*]]
+// SIMD-ONLY0:       if.then4267:
+// SIMD-ONLY0-NEXT:    [[TMP2286:%.*]] = load i64, ptr [[LLE]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP2286]], ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END4268]]
+// SIMD-ONLY0:       if.end4268:
+// SIMD-ONLY0-NEXT:    [[TMP2287:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2288:%.*]] = load i64, ptr [[LLE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP4269:%.*]] = icmp slt i64 [[TMP2287]], [[TMP2288]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP4269]], label [[IF_THEN4271:%.*]], label [[IF_END4272:%.*]]
+// SIMD-ONLY0:       if.then4271:
+// SIMD-ONLY0-NEXT:    [[TMP2289:%.*]] = load i64, ptr [[LLE]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP2289]], ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END4272]]
+// SIMD-ONLY0:       if.end4272:
+// SIMD-ONLY0-NEXT:    [[TMP2290:%.*]] = load i64, ptr [[LLE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2291:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP4273:%.*]] = icmp sgt i64 [[TMP2290]], [[TMP2291]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP4273]], label [[IF_THEN4275:%.*]], label [[IF_END4276:%.*]]
+// SIMD-ONLY0:       if.then4275:
+// SIMD-ONLY0-NEXT:    [[TMP2292:%.*]] = load i64, ptr [[LLE]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP2292]], ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END4276]]
+// SIMD-ONLY0:       if.end4276:
+// SIMD-ONLY0-NEXT:    [[TMP2293:%.*]] = load i64, ptr [[LLE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2294:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP4277:%.*]] = icmp slt i64 [[TMP2293]], [[TMP2294]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP4277]], label [[IF_THEN4279:%.*]], label [[IF_END4280:%.*]]
+// SIMD-ONLY0:       if.then4279:
+// SIMD-ONLY0-NEXT:    [[TMP2295:%.*]] = load i64, ptr [[LLE]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP2295]], ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END4280]]
+// SIMD-ONLY0:       if.end4280:
+// SIMD-ONLY0-NEXT:    [[TMP2296:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2297:%.*]] = load i64, ptr [[LLE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP4281:%.*]] = icmp eq i64 [[TMP2296]], [[TMP2297]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP4281]], label [[COND_TRUE4283:%.*]], label [[COND_FALSE4284:%.*]]
+// SIMD-ONLY0:       cond.true4283:
+// SIMD-ONLY0-NEXT:    [[TMP2298:%.*]] = load i64, ptr [[LLD]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END4285:%.*]]
+// SIMD-ONLY0:       cond.false4284:
+// SIMD-ONLY0-NEXT:    [[TMP2299:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END4285]]
+// SIMD-ONLY0:       cond.end4285:
+// SIMD-ONLY0-NEXT:    [[COND4286:%.*]] = phi i64 [ [[TMP2298]], [[COND_TRUE4283]] ], [ [[TMP2299]], [[COND_FALSE4284]] ]
+// SIMD-ONLY0-NEXT:    store i64 [[COND4286]], ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2300:%.*]] = load i64, ptr [[LLE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2301:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP4287:%.*]] = icmp eq i64 [[TMP2300]], [[TMP2301]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP4287]], label [[COND_TRUE4289:%.*]], label [[COND_FALSE4290:%.*]]
+// SIMD-ONLY0:       cond.true4289:
+// SIMD-ONLY0-NEXT:    [[TMP2302:%.*]] = load i64, ptr [[LLD]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END4291:%.*]]
+// SIMD-ONLY0:       cond.false4290:
+// SIMD-ONLY0-NEXT:    [[TMP2303:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END4291]]
+// SIMD-ONLY0:       cond.end4291:
+// SIMD-ONLY0-NEXT:    [[COND4292:%.*]] = phi i64 [ [[TMP2302]], [[COND_TRUE4289]] ], [ [[TMP2303]], [[COND_FALSE4290]] ]
+// SIMD-ONLY0-NEXT:    store i64 [[COND4292]], ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2304:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2305:%.*]] = load i64, ptr [[LLE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP4293:%.*]] = icmp eq i64 [[TMP2304]], [[TMP2305]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP4293]], label [[IF_THEN4295:%.*]], label [[IF_END4296:%.*]]
+// SIMD-ONLY0:       if.then4295:
+// SIMD-ONLY0-NEXT:    [[TMP2306:%.*]] = load i64, ptr [[LLD]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP2306]], ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END4296]]
+// SIMD-ONLY0:       if.end4296:
+// SIMD-ONLY0-NEXT:    [[TMP2307:%.*]] = load i64, ptr [[LLE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2308:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP4297:%.*]] = icmp eq i64 [[TMP2307]], [[TMP2308]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP4297]], label [[IF_THEN4299:%.*]], label [[IF_END4300:%.*]]
+// SIMD-ONLY0:       if.then4299:
+// SIMD-ONLY0-NEXT:    [[TMP2309:%.*]] = load i64, ptr [[LLD]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP2309]], ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END4300]]
+// SIMD-ONLY0:       if.end4300:
+// SIMD-ONLY0-NEXT:    [[TMP2310:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2311:%.*]] = load i64, ptr [[ULLE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP4301:%.*]] = icmp ugt i64 [[TMP2310]], [[TMP2311]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP4301]], label [[COND_TRUE4303:%.*]], label [[COND_FALSE4304:%.*]]
+// SIMD-ONLY0:       cond.true4303:
+// SIMD-ONLY0-NEXT:    [[TMP2312:%.*]] = load i64, ptr [[ULLE]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END4305:%.*]]
+// SIMD-ONLY0:       cond.false4304:
+// SIMD-ONLY0-NEXT:    [[TMP2313:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END4305]]
+// SIMD-ONLY0:       cond.end4305:
+// SIMD-ONLY0-NEXT:    [[COND4306:%.*]] = phi i64 [ [[TMP2312]], [[COND_TRUE4303]] ], [ [[TMP2313]], [[COND_FALSE4304]] ]
+// SIMD-ONLY0-NEXT:    store i64 [[COND4306]], ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2314:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2315:%.*]] = load i64, ptr [[ULLE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP4307:%.*]] = icmp ult i64 [[TMP2314]], [[TMP2315]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP4307]], label [[COND_TRUE4309:%.*]], label [[COND_FALSE4310:%.*]]
+// SIMD-ONLY0:       cond.true4309:
+// SIMD-ONLY0-NEXT:    [[TMP2316:%.*]] = load i64, ptr [[ULLE]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END4311:%.*]]
+// SIMD-ONLY0:       cond.false4310:
+// SIMD-ONLY0-NEXT:    [[TMP2317:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END4311]]
+// SIMD-ONLY0:       cond.end4311:
+// SIMD-ONLY0-NEXT:    [[COND4312:%.*]] = phi i64 [ [[TMP2316]], [[COND_TRUE4309]] ], [ [[TMP2317]], [[COND_FALSE4310]] ]
+// SIMD-ONLY0-NEXT:    store i64 [[COND4312]], ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2318:%.*]] = load i64, ptr [[ULLE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2319:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP4313:%.*]] = icmp ugt i64 [[TMP2318]], [[TMP2319]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP4313]], label [[COND_TRUE4315:%.*]], label [[COND_FALSE4316:%.*]]
+// SIMD-ONLY0:       cond.true4315:
+// SIMD-ONLY0-NEXT:    [[TMP2320:%.*]] = load i64, ptr [[ULLE]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END4317:%.*]]
+// SIMD-ONLY0:       cond.false4316:
+// SIMD-ONLY0-NEXT:    [[TMP2321:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END4317]]
+// SIMD-ONLY0:       cond.end4317:
+// SIMD-ONLY0-NEXT:    [[COND4318:%.*]] = phi i64 [ [[TMP2320]], [[COND_TRUE4315]] ], [ [[TMP2321]], [[COND_FALSE4316]] ]
+// SIMD-ONLY0-NEXT:    store i64 [[COND4318]], ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2322:%.*]] = load i64, ptr [[ULLE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2323:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP4319:%.*]] = icmp ult i64 [[TMP2322]], [[TMP2323]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP4319]], label [[COND_TRUE4321:%.*]], label [[COND_FALSE4322:%.*]]
+// SIMD-ONLY0:       cond.true4321:
+// SIMD-ONLY0-NEXT:    [[TMP2324:%.*]] = load i64, ptr [[ULLE]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END4323:%.*]]
+// SIMD-ONLY0:       cond.false4322:
+// SIMD-ONLY0-NEXT:    [[TMP2325:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END4323]]
+// SIMD-ONLY0:       cond.end4323:
+// SIMD-ONLY0-NEXT:    [[COND4324:%.*]] = phi i64 [ [[TMP2324]], [[COND_TRUE4321]] ], [ [[TMP2325]], [[COND_FALSE4322]] ]
+// SIMD-ONLY0-NEXT:    store i64 [[COND4324]], ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2326:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2327:%.*]] = load i64, ptr [[ULLE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP4325:%.*]] = icmp ugt i64 [[TMP2326]], [[TMP2327]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP4325]], label [[IF_THEN4327:%.*]], label [[IF_END4328:%.*]]
+// SIMD-ONLY0:       if.then4327:
+// SIMD-ONLY0-NEXT:    [[TMP2328:%.*]] = load i64, ptr [[ULLE]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP2328]], ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END4328]]
+// SIMD-ONLY0:       if.end4328:
+// SIMD-ONLY0-NEXT:    [[TMP2329:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2330:%.*]] = load i64, ptr [[ULLE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP4329:%.*]] = icmp ult i64 [[TMP2329]], [[TMP2330]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP4329]], label [[IF_THEN4331:%.*]], label [[IF_END4332:%.*]]
+// SIMD-ONLY0:       if.then4331:
+// SIMD-ONLY0-NEXT:    [[TMP2331:%.*]] = load i64, ptr [[ULLE]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP2331]], ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END4332]]
+// SIMD-ONLY0:       if.end4332:
+// SIMD-ONLY0-NEXT:    [[TMP2332:%.*]] = load i64, ptr [[ULLE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2333:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP4333:%.*]] = icmp ugt i64 [[TMP2332]], [[TMP2333]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP4333]], label [[IF_THEN4335:%.*]], label [[IF_END4336:%.*]]
+// SIMD-ONLY0:       if.then4335:
+// SIMD-ONLY0-NEXT:    [[TMP2334:%.*]] = load i64, ptr [[ULLE]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP2334]], ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END4336]]
+// SIMD-ONLY0:       if.end4336:
+// SIMD-ONLY0-NEXT:    [[TMP2335:%.*]] = load i64, ptr [[ULLE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2336:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP4337:%.*]] = icmp ult i64 [[TMP2335]], [[TMP2336]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP4337]], label [[IF_THEN4339:%.*]], label [[IF_END4340:%.*]]
+// SIMD-ONLY0:       if.then4339:
+// SIMD-ONLY0-NEXT:    [[TMP2337:%.*]] = load i64, ptr [[ULLE]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP2337]], ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END4340]]
+// SIMD-ONLY0:       if.end4340:
+// SIMD-ONLY0-NEXT:    [[TMP2338:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2339:%.*]] = load i64, ptr [[ULLE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP4341:%.*]] = icmp eq i64 [[TMP2338]], [[TMP2339]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP4341]], label [[COND_TRUE4343:%.*]], label [[COND_FALSE4344:%.*]]
+// SIMD-ONLY0:       cond.true4343:
+// SIMD-ONLY0-NEXT:    [[TMP2340:%.*]] = load i64, ptr [[ULLD]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END4345:%.*]]
+// SIMD-ONLY0:       cond.false4344:
+// SIMD-ONLY0-NEXT:    [[TMP2341:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END4345]]
+// SIMD-ONLY0:       cond.end4345:
+// SIMD-ONLY0-NEXT:    [[COND4346:%.*]] = phi i64 [ [[TMP2340]], [[COND_TRUE4343]] ], [ [[TMP2341]], [[COND_FALSE4344]] ]
+// SIMD-ONLY0-NEXT:    store i64 [[COND4346]], ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2342:%.*]] = load i64, ptr [[ULLE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2343:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP4347:%.*]] = icmp eq i64 [[TMP2342]], [[TMP2343]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP4347]], label [[COND_TRUE4349:%.*]], label [[COND_FALSE4350:%.*]]
+// SIMD-ONLY0:       cond.true4349:
+// SIMD-ONLY0-NEXT:    [[TMP2344:%.*]] = load i64, ptr [[ULLD]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END4351:%.*]]
+// SIMD-ONLY0:       cond.false4350:
+// SIMD-ONLY0-NEXT:    [[TMP2345:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END4351]]
+// SIMD-ONLY0:       cond.end4351:
+// SIMD-ONLY0-NEXT:    [[COND4352:%.*]] = phi i64 [ [[TMP2344]], [[COND_TRUE4349]] ], [ [[TMP2345]], [[COND_FALSE4350]] ]
+// SIMD-ONLY0-NEXT:    store i64 [[COND4352]], ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2346:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2347:%.*]] = load i64, ptr [[ULLE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP4353:%.*]] = icmp eq i64 [[TMP2346]], [[TMP2347]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP4353]], label [[IF_THEN4355:%.*]], label [[IF_END4356:%.*]]
+// SIMD-ONLY0:       if.then4355:
+// SIMD-ONLY0-NEXT:    [[TMP2348:%.*]] = load i64, ptr [[ULLD]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP2348]], ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END4356]]
+// SIMD-ONLY0:       if.end4356:
+// SIMD-ONLY0-NEXT:    [[TMP2349:%.*]] = load i64, ptr [[ULLE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2350:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP4357:%.*]] = icmp eq i64 [[TMP2349]], [[TMP2350]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP4357]], label [[IF_THEN4359:%.*]], label [[IF_END4360:%.*]]
+// SIMD-ONLY0:       if.then4359:
+// SIMD-ONLY0-NEXT:    [[TMP2351:%.*]] = load i64, ptr [[ULLD]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP2351]], ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END4360]]
+// SIMD-ONLY0:       if.end4360:
+// SIMD-ONLY0-NEXT:    [[TMP2352:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2353:%.*]] = load i64, ptr [[LLE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP4361:%.*]] = icmp sgt i64 [[TMP2352]], [[TMP2353]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP4361]], label [[COND_TRUE4363:%.*]], label [[COND_FALSE4364:%.*]]
+// SIMD-ONLY0:       cond.true4363:
+// SIMD-ONLY0-NEXT:    [[TMP2354:%.*]] = load i64, ptr [[LLE]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END4365:%.*]]
+// SIMD-ONLY0:       cond.false4364:
+// SIMD-ONLY0-NEXT:    [[TMP2355:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END4365]]
+// SIMD-ONLY0:       cond.end4365:
+// SIMD-ONLY0-NEXT:    [[COND4366:%.*]] = phi i64 [ [[TMP2354]], [[COND_TRUE4363]] ], [ [[TMP2355]], [[COND_FALSE4364]] ]
+// SIMD-ONLY0-NEXT:    store i64 [[COND4366]], ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2356:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2357:%.*]] = load i64, ptr [[LLE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP4367:%.*]] = icmp slt i64 [[TMP2356]], [[TMP2357]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP4367]], label [[COND_TRUE4369:%.*]], label [[COND_FALSE4370:%.*]]
+// SIMD-ONLY0:       cond.true4369:
+// SIMD-ONLY0-NEXT:    [[TMP2358:%.*]] = load i64, ptr [[LLE]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END4371:%.*]]
+// SIMD-ONLY0:       cond.false4370:
+// SIMD-ONLY0-NEXT:    [[TMP2359:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END4371]]
+// SIMD-ONLY0:       cond.end4371:
+// SIMD-ONLY0-NEXT:    [[COND4372:%.*]] = phi i64 [ [[TMP2358]], [[COND_TRUE4369]] ], [ [[TMP2359]], [[COND_FALSE4370]] ]
+// SIMD-ONLY0-NEXT:    store i64 [[COND4372]], ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2360:%.*]] = load i64, ptr [[LLE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2361:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP4373:%.*]] = icmp sgt i64 [[TMP2360]], [[TMP2361]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP4373]], label [[COND_TRUE4375:%.*]], label [[COND_FALSE4376:%.*]]
+// SIMD-ONLY0:       cond.true4375:
+// SIMD-ONLY0-NEXT:    [[TMP2362:%.*]] = load i64, ptr [[LLE]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END4377:%.*]]
+// SIMD-ONLY0:       cond.false4376:
+// SIMD-ONLY0-NEXT:    [[TMP2363:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END4377]]
+// SIMD-ONLY0:       cond.end4377:
+// SIMD-ONLY0-NEXT:    [[COND4378:%.*]] = phi i64 [ [[TMP2362]], [[COND_TRUE4375]] ], [ [[TMP2363]], [[COND_FALSE4376]] ]
+// SIMD-ONLY0-NEXT:    store i64 [[COND4378]], ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2364:%.*]] = load i64, ptr [[LLE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2365:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP4379:%.*]] = icmp slt i64 [[TMP2364]], [[TMP2365]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP4379]], label [[COND_TRUE4381:%.*]], label [[COND_FALSE4382:%.*]]
+// SIMD-ONLY0:       cond.true4381:
+// SIMD-ONLY0-NEXT:    [[TMP2366:%.*]] = load i64, ptr [[LLE]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END4383:%.*]]
+// SIMD-ONLY0:       cond.false4382:
+// SIMD-ONLY0-NEXT:    [[TMP2367:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END4383]]
+// SIMD-ONLY0:       cond.end4383:
+// SIMD-ONLY0-NEXT:    [[COND4384:%.*]] = phi i64 [ [[TMP2366]], [[COND_TRUE4381]] ], [ [[TMP2367]], [[COND_FALSE4382]] ]
+// SIMD-ONLY0-NEXT:    store i64 [[COND4384]], ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2368:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2369:%.*]] = load i64, ptr [[LLE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP4385:%.*]] = icmp sgt i64 [[TMP2368]], [[TMP2369]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP4385]], label [[IF_THEN4387:%.*]], label [[IF_END4388:%.*]]
+// SIMD-ONLY0:       if.then4387:
+// SIMD-ONLY0-NEXT:    [[TMP2370:%.*]] = load i64, ptr [[LLE]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP2370]], ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END4388]]
+// SIMD-ONLY0:       if.end4388:
+// SIMD-ONLY0-NEXT:    [[TMP2371:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2372:%.*]] = load i64, ptr [[LLE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP4389:%.*]] = icmp slt i64 [[TMP2371]], [[TMP2372]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP4389]], label [[IF_THEN4391:%.*]], label [[IF_END4392:%.*]]
+// SIMD-ONLY0:       if.then4391:
+// SIMD-ONLY0-NEXT:    [[TMP2373:%.*]] = load i64, ptr [[LLE]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP2373]], ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END4392]]
+// SIMD-ONLY0:       if.end4392:
+// SIMD-ONLY0-NEXT:    [[TMP2374:%.*]] = load i64, ptr [[LLE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2375:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP4393:%.*]] = icmp sgt i64 [[TMP2374]], [[TMP2375]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP4393]], label [[IF_THEN4395:%.*]], label [[IF_END4396:%.*]]
+// SIMD-ONLY0:       if.then4395:
+// SIMD-ONLY0-NEXT:    [[TMP2376:%.*]] = load i64, ptr [[LLE]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP2376]], ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END4396]]
+// SIMD-ONLY0:       if.end4396:
+// SIMD-ONLY0-NEXT:    [[TMP2377:%.*]] = load i64, ptr [[LLE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2378:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP4397:%.*]] = icmp slt i64 [[TMP2377]], [[TMP2378]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP4397]], label [[IF_THEN4399:%.*]], label [[IF_END4400:%.*]]
+// SIMD-ONLY0:       if.then4399:
+// SIMD-ONLY0-NEXT:    [[TMP2379:%.*]] = load i64, ptr [[LLE]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP2379]], ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END4400]]
+// SIMD-ONLY0:       if.end4400:
+// SIMD-ONLY0-NEXT:    [[TMP2380:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2381:%.*]] = load i64, ptr [[LLE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP4401:%.*]] = icmp eq i64 [[TMP2380]], [[TMP2381]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP4401]], label [[COND_TRUE4403:%.*]], label [[COND_FALSE4404:%.*]]
+// SIMD-ONLY0:       cond.true4403:
+// SIMD-ONLY0-NEXT:    [[TMP2382:%.*]] = load i64, ptr [[LLD]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END4405:%.*]]
+// SIMD-ONLY0:       cond.false4404:
+// SIMD-ONLY0-NEXT:    [[TMP2383:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END4405]]
+// SIMD-ONLY0:       cond.end4405:
+// SIMD-ONLY0-NEXT:    [[COND4406:%.*]] = phi i64 [ [[TMP2382]], [[COND_TRUE4403]] ], [ [[TMP2383]], [[COND_FALSE4404]] ]
+// SIMD-ONLY0-NEXT:    store i64 [[COND4406]], ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2384:%.*]] = load i64, ptr [[LLE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2385:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP4407:%.*]] = icmp eq i64 [[TMP2384]], [[TMP2385]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP4407]], label [[COND_TRUE4409:%.*]], label [[COND_FALSE4410:%.*]]
+// SIMD-ONLY0:       cond.true4409:
+// SIMD-ONLY0-NEXT:    [[TMP2386:%.*]] = load i64, ptr [[LLD]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END4411:%.*]]
+// SIMD-ONLY0:       cond.false4410:
+// SIMD-ONLY0-NEXT:    [[TMP2387:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END4411]]
+// SIMD-ONLY0:       cond.end4411:
+// SIMD-ONLY0-NEXT:    [[COND4412:%.*]] = phi i64 [ [[TMP2386]], [[COND_TRUE4409]] ], [ [[TMP2387]], [[COND_FALSE4410]] ]
+// SIMD-ONLY0-NEXT:    store i64 [[COND4412]], ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2388:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2389:%.*]] = load i64, ptr [[LLE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP4413:%.*]] = icmp eq i64 [[TMP2388]], [[TMP2389]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP4413]], label [[IF_THEN4415:%.*]], label [[IF_END4416:%.*]]
+// SIMD-ONLY0:       if.then4415:
+// SIMD-ONLY0-NEXT:    [[TMP2390:%.*]] = load i64, ptr [[LLD]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP2390]], ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END4416]]
+// SIMD-ONLY0:       if.end4416:
+// SIMD-ONLY0-NEXT:    [[TMP2391:%.*]] = load i64, ptr [[LLE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2392:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP4417:%.*]] = icmp eq i64 [[TMP2391]], [[TMP2392]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP4417]], label [[IF_THEN4419:%.*]], label [[IF_END4420:%.*]]
+// SIMD-ONLY0:       if.then4419:
+// SIMD-ONLY0-NEXT:    [[TMP2393:%.*]] = load i64, ptr [[LLD]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP2393]], ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END4420]]
+// SIMD-ONLY0:       if.end4420:
+// SIMD-ONLY0-NEXT:    [[TMP2394:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2395:%.*]] = load i64, ptr [[ULLE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP4421:%.*]] = icmp ugt i64 [[TMP2394]], [[TMP2395]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP4421]], label [[COND_TRUE4423:%.*]], label [[COND_FALSE4424:%.*]]
+// SIMD-ONLY0:       cond.true4423:
+// SIMD-ONLY0-NEXT:    [[TMP2396:%.*]] = load i64, ptr [[ULLE]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END4425:%.*]]
+// SIMD-ONLY0:       cond.false4424:
+// SIMD-ONLY0-NEXT:    [[TMP2397:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END4425]]
+// SIMD-ONLY0:       cond.end4425:
+// SIMD-ONLY0-NEXT:    [[COND4426:%.*]] = phi i64 [ [[TMP2396]], [[COND_TRUE4423]] ], [ [[TMP2397]], [[COND_FALSE4424]] ]
+// SIMD-ONLY0-NEXT:    store i64 [[COND4426]], ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2398:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2399:%.*]] = load i64, ptr [[ULLE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP4427:%.*]] = icmp ult i64 [[TMP2398]], [[TMP2399]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP4427]], label [[COND_TRUE4429:%.*]], label [[COND_FALSE4430:%.*]]
+// SIMD-ONLY0:       cond.true4429:
+// SIMD-ONLY0-NEXT:    [[TMP2400:%.*]] = load i64, ptr [[ULLE]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END4431:%.*]]
+// SIMD-ONLY0:       cond.false4430:
+// SIMD-ONLY0-NEXT:    [[TMP2401:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END4431]]
+// SIMD-ONLY0:       cond.end4431:
+// SIMD-ONLY0-NEXT:    [[COND4432:%.*]] = phi i64 [ [[TMP2400]], [[COND_TRUE4429]] ], [ [[TMP2401]], [[COND_FALSE4430]] ]
+// SIMD-ONLY0-NEXT:    store i64 [[COND4432]], ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2402:%.*]] = load i64, ptr [[ULLE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2403:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP4433:%.*]] = icmp ugt i64 [[TMP2402]], [[TMP2403]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP4433]], label [[COND_TRUE4435:%.*]], label [[COND_FALSE4436:%.*]]
+// SIMD-ONLY0:       cond.true4435:
+// SIMD-ONLY0-NEXT:    [[TMP2404:%.*]] = load i64, ptr [[ULLE]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END4437:%.*]]
+// SIMD-ONLY0:       cond.false4436:
+// SIMD-ONLY0-NEXT:    [[TMP2405:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END4437]]
+// SIMD-ONLY0:       cond.end4437:
+// SIMD-ONLY0-NEXT:    [[COND4438:%.*]] = phi i64 [ [[TMP2404]], [[COND_TRUE4435]] ], [ [[TMP2405]], [[COND_FALSE4436]] ]
+// SIMD-ONLY0-NEXT:    store i64 [[COND4438]], ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2406:%.*]] = load i64, ptr [[ULLE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2407:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP4439:%.*]] = icmp ult i64 [[TMP2406]], [[TMP2407]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP4439]], label [[COND_TRUE4441:%.*]], label [[COND_FALSE4442:%.*]]
+// SIMD-ONLY0:       cond.true4441:
+// SIMD-ONLY0-NEXT:    [[TMP2408:%.*]] = load i64, ptr [[ULLE]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END4443:%.*]]
+// SIMD-ONLY0:       cond.false4442:
+// SIMD-ONLY0-NEXT:    [[TMP2409:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END4443]]
+// SIMD-ONLY0:       cond.end4443:
+// SIMD-ONLY0-NEXT:    [[COND4444:%.*]] = phi i64 [ [[TMP2408]], [[COND_TRUE4441]] ], [ [[TMP2409]], [[COND_FALSE4442]] ]
+// SIMD-ONLY0-NEXT:    store i64 [[COND4444]], ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2410:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2411:%.*]] = load i64, ptr [[ULLE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP4445:%.*]] = icmp ugt i64 [[TMP2410]], [[TMP2411]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP4445]], label [[IF_THEN4447:%.*]], label [[IF_END4448:%.*]]
+// SIMD-ONLY0:       if.then4447:
+// SIMD-ONLY0-NEXT:    [[TMP2412:%.*]] = load i64, ptr [[ULLE]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP2412]], ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END4448]]
+// SIMD-ONLY0:       if.end4448:
+// SIMD-ONLY0-NEXT:    [[TMP2413:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2414:%.*]] = load i64, ptr [[ULLE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP4449:%.*]] = icmp ult i64 [[TMP2413]], [[TMP2414]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP4449]], label [[IF_THEN4451:%.*]], label [[IF_END4452:%.*]]
+// SIMD-ONLY0:       if.then4451:
+// SIMD-ONLY0-NEXT:    [[TMP2415:%.*]] = load i64, ptr [[ULLE]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP2415]], ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END4452]]
+// SIMD-ONLY0:       if.end4452:
+// SIMD-ONLY0-NEXT:    [[TMP2416:%.*]] = load i64, ptr [[ULLE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2417:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP4453:%.*]] = icmp ugt i64 [[TMP2416]], [[TMP2417]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP4453]], label [[IF_THEN4455:%.*]], label [[IF_END4456:%.*]]
+// SIMD-ONLY0:       if.then4455:
+// SIMD-ONLY0-NEXT:    [[TMP2418:%.*]] = load i64, ptr [[ULLE]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP2418]], ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END4456]]
+// SIMD-ONLY0:       if.end4456:
+// SIMD-ONLY0-NEXT:    [[TMP2419:%.*]] = load i64, ptr [[ULLE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2420:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP4457:%.*]] = icmp ult i64 [[TMP2419]], [[TMP2420]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP4457]], label [[IF_THEN4459:%.*]], label [[IF_END4460:%.*]]
+// SIMD-ONLY0:       if.then4459:
+// SIMD-ONLY0-NEXT:    [[TMP2421:%.*]] = load i64, ptr [[ULLE]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP2421]], ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END4460]]
+// SIMD-ONLY0:       if.end4460:
+// SIMD-ONLY0-NEXT:    [[TMP2422:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2423:%.*]] = load i64, ptr [[ULLE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP4461:%.*]] = icmp eq i64 [[TMP2422]], [[TMP2423]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP4461]], label [[COND_TRUE4463:%.*]], label [[COND_FALSE4464:%.*]]
+// SIMD-ONLY0:       cond.true4463:
+// SIMD-ONLY0-NEXT:    [[TMP2424:%.*]] = load i64, ptr [[ULLD]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END4465:%.*]]
+// SIMD-ONLY0:       cond.false4464:
+// SIMD-ONLY0-NEXT:    [[TMP2425:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END4465]]
+// SIMD-ONLY0:       cond.end4465:
+// SIMD-ONLY0-NEXT:    [[COND4466:%.*]] = phi i64 [ [[TMP2424]], [[COND_TRUE4463]] ], [ [[TMP2425]], [[COND_FALSE4464]] ]
+// SIMD-ONLY0-NEXT:    store i64 [[COND4466]], ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2426:%.*]] = load i64, ptr [[ULLE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2427:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP4467:%.*]] = icmp eq i64 [[TMP2426]], [[TMP2427]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP4467]], label [[COND_TRUE4469:%.*]], label [[COND_FALSE4470:%.*]]
+// SIMD-ONLY0:       cond.true4469:
+// SIMD-ONLY0-NEXT:    [[TMP2428:%.*]] = load i64, ptr [[ULLD]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END4471:%.*]]
+// SIMD-ONLY0:       cond.false4470:
+// SIMD-ONLY0-NEXT:    [[TMP2429:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END4471]]
+// SIMD-ONLY0:       cond.end4471:
+// SIMD-ONLY0-NEXT:    [[COND4472:%.*]] = phi i64 [ [[TMP2428]], [[COND_TRUE4469]] ], [ [[TMP2429]], [[COND_FALSE4470]] ]
+// SIMD-ONLY0-NEXT:    store i64 [[COND4472]], ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2430:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2431:%.*]] = load i64, ptr [[ULLE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP4473:%.*]] = icmp eq i64 [[TMP2430]], [[TMP2431]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP4473]], label [[IF_THEN4475:%.*]], label [[IF_END4476:%.*]]
+// SIMD-ONLY0:       if.then4475:
+// SIMD-ONLY0-NEXT:    [[TMP2432:%.*]] = load i64, ptr [[ULLD]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP2432]], ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END4476]]
+// SIMD-ONLY0:       if.end4476:
+// SIMD-ONLY0-NEXT:    [[TMP2433:%.*]] = load i64, ptr [[ULLE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2434:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP4477:%.*]] = icmp eq i64 [[TMP2433]], [[TMP2434]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP4477]], label [[IF_THEN4479:%.*]], label [[IF_END4480:%.*]]
+// SIMD-ONLY0:       if.then4479:
+// SIMD-ONLY0-NEXT:    [[TMP2435:%.*]] = load i64, ptr [[ULLD]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP2435]], ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END4480]]
+// SIMD-ONLY0:       if.end4480:
+// SIMD-ONLY0-NEXT:    [[TMP2436:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2437:%.*]] = load i64, ptr [[LLE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP4481:%.*]] = icmp sgt i64 [[TMP2436]], [[TMP2437]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP4481]], label [[COND_TRUE4483:%.*]], label [[COND_FALSE4484:%.*]]
+// SIMD-ONLY0:       cond.true4483:
+// SIMD-ONLY0-NEXT:    [[TMP2438:%.*]] = load i64, ptr [[LLE]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END4485:%.*]]
+// SIMD-ONLY0:       cond.false4484:
+// SIMD-ONLY0-NEXT:    [[TMP2439:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END4485]]
+// SIMD-ONLY0:       cond.end4485:
+// SIMD-ONLY0-NEXT:    [[COND4486:%.*]] = phi i64 [ [[TMP2438]], [[COND_TRUE4483]] ], [ [[TMP2439]], [[COND_FALSE4484]] ]
+// SIMD-ONLY0-NEXT:    store i64 [[COND4486]], ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2440:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2441:%.*]] = load i64, ptr [[LLE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP4487:%.*]] = icmp slt i64 [[TMP2440]], [[TMP2441]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP4487]], label [[COND_TRUE4489:%.*]], label [[COND_FALSE4490:%.*]]
+// SIMD-ONLY0:       cond.true4489:
+// SIMD-ONLY0-NEXT:    [[TMP2442:%.*]] = load i64, ptr [[LLE]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END4491:%.*]]
+// SIMD-ONLY0:       cond.false4490:
+// SIMD-ONLY0-NEXT:    [[TMP2443:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END4491]]
+// SIMD-ONLY0:       cond.end4491:
+// SIMD-ONLY0-NEXT:    [[COND4492:%.*]] = phi i64 [ [[TMP2442]], [[COND_TRUE4489]] ], [ [[TMP2443]], [[COND_FALSE4490]] ]
+// SIMD-ONLY0-NEXT:    store i64 [[COND4492]], ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2444:%.*]] = load i64, ptr [[LLE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2445:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP4493:%.*]] = icmp sgt i64 [[TMP2444]], [[TMP2445]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP4493]], label [[COND_TRUE4495:%.*]], label [[COND_FALSE4496:%.*]]
+// SIMD-ONLY0:       cond.true4495:
+// SIMD-ONLY0-NEXT:    [[TMP2446:%.*]] = load i64, ptr [[LLE]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END4497:%.*]]
+// SIMD-ONLY0:       cond.false4496:
+// SIMD-ONLY0-NEXT:    [[TMP2447:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END4497]]
+// SIMD-ONLY0:       cond.end4497:
+// SIMD-ONLY0-NEXT:    [[COND4498:%.*]] = phi i64 [ [[TMP2446]], [[COND_TRUE4495]] ], [ [[TMP2447]], [[COND_FALSE4496]] ]
+// SIMD-ONLY0-NEXT:    store i64 [[COND4498]], ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2448:%.*]] = load i64, ptr [[LLE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2449:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP4499:%.*]] = icmp slt i64 [[TMP2448]], [[TMP2449]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP4499]], label [[COND_TRUE4501:%.*]], label [[COND_FALSE4502:%.*]]
+// SIMD-ONLY0:       cond.true4501:
+// SIMD-ONLY0-NEXT:    [[TMP2450:%.*]] = load i64, ptr [[LLE]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END4503:%.*]]
+// SIMD-ONLY0:       cond.false4502:
+// SIMD-ONLY0-NEXT:    [[TMP2451:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END4503]]
+// SIMD-ONLY0:       cond.end4503:
+// SIMD-ONLY0-NEXT:    [[COND4504:%.*]] = phi i64 [ [[TMP2450]], [[COND_TRUE4501]] ], [ [[TMP2451]], [[COND_FALSE4502]] ]
+// SIMD-ONLY0-NEXT:    store i64 [[COND4504]], ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2452:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2453:%.*]] = load i64, ptr [[LLE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP4505:%.*]] = icmp sgt i64 [[TMP2452]], [[TMP2453]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP4505]], label [[IF_THEN4507:%.*]], label [[IF_END4508:%.*]]
+// SIMD-ONLY0:       if.then4507:
+// SIMD-ONLY0-NEXT:    [[TMP2454:%.*]] = load i64, ptr [[LLE]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP2454]], ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END4508]]
+// SIMD-ONLY0:       if.end4508:
+// SIMD-ONLY0-NEXT:    [[TMP2455:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2456:%.*]] = load i64, ptr [[LLE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP4509:%.*]] = icmp slt i64 [[TMP2455]], [[TMP2456]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP4509]], label [[IF_THEN4511:%.*]], label [[IF_END4512:%.*]]
+// SIMD-ONLY0:       if.then4511:
+// SIMD-ONLY0-NEXT:    [[TMP2457:%.*]] = load i64, ptr [[LLE]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP2457]], ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END4512]]
+// SIMD-ONLY0:       if.end4512:
+// SIMD-ONLY0-NEXT:    [[TMP2458:%.*]] = load i64, ptr [[LLE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2459:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP4513:%.*]] = icmp sgt i64 [[TMP2458]], [[TMP2459]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP4513]], label [[IF_THEN4515:%.*]], label [[IF_END4516:%.*]]
+// SIMD-ONLY0:       if.then4515:
+// SIMD-ONLY0-NEXT:    [[TMP2460:%.*]] = load i64, ptr [[LLE]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP2460]], ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END4516]]
+// SIMD-ONLY0:       if.end4516:
+// SIMD-ONLY0-NEXT:    [[TMP2461:%.*]] = load i64, ptr [[LLE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2462:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP4517:%.*]] = icmp slt i64 [[TMP2461]], [[TMP2462]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP4517]], label [[IF_THEN4519:%.*]], label [[IF_END4520:%.*]]
+// SIMD-ONLY0:       if.then4519:
+// SIMD-ONLY0-NEXT:    [[TMP2463:%.*]] = load i64, ptr [[LLE]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP2463]], ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END4520]]
+// SIMD-ONLY0:       if.end4520:
+// SIMD-ONLY0-NEXT:    [[TMP2464:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2465:%.*]] = load i64, ptr [[LLE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP4521:%.*]] = icmp eq i64 [[TMP2464]], [[TMP2465]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP4521]], label [[COND_TRUE4523:%.*]], label [[COND_FALSE4524:%.*]]
+// SIMD-ONLY0:       cond.true4523:
+// SIMD-ONLY0-NEXT:    [[TMP2466:%.*]] = load i64, ptr [[LLD]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END4525:%.*]]
+// SIMD-ONLY0:       cond.false4524:
+// SIMD-ONLY0-NEXT:    [[TMP2467:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END4525]]
+// SIMD-ONLY0:       cond.end4525:
+// SIMD-ONLY0-NEXT:    [[COND4526:%.*]] = phi i64 [ [[TMP2466]], [[COND_TRUE4523]] ], [ [[TMP2467]], [[COND_FALSE4524]] ]
+// SIMD-ONLY0-NEXT:    store i64 [[COND4526]], ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2468:%.*]] = load i64, ptr [[LLE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2469:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP4527:%.*]] = icmp eq i64 [[TMP2468]], [[TMP2469]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP4527]], label [[COND_TRUE4529:%.*]], label [[COND_FALSE4530:%.*]]
+// SIMD-ONLY0:       cond.true4529:
+// SIMD-ONLY0-NEXT:    [[TMP2470:%.*]] = load i64, ptr [[LLD]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END4531:%.*]]
+// SIMD-ONLY0:       cond.false4530:
+// SIMD-ONLY0-NEXT:    [[TMP2471:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END4531]]
+// SIMD-ONLY0:       cond.end4531:
+// SIMD-ONLY0-NEXT:    [[COND4532:%.*]] = phi i64 [ [[TMP2470]], [[COND_TRUE4529]] ], [ [[TMP2471]], [[COND_FALSE4530]] ]
+// SIMD-ONLY0-NEXT:    store i64 [[COND4532]], ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2472:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2473:%.*]] = load i64, ptr [[LLE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP4533:%.*]] = icmp eq i64 [[TMP2472]], [[TMP2473]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP4533]], label [[IF_THEN4535:%.*]], label [[IF_END4536:%.*]]
+// SIMD-ONLY0:       if.then4535:
+// SIMD-ONLY0-NEXT:    [[TMP2474:%.*]] = load i64, ptr [[LLD]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP2474]], ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END4536]]
+// SIMD-ONLY0:       if.end4536:
+// SIMD-ONLY0-NEXT:    [[TMP2475:%.*]] = load i64, ptr [[LLE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2476:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP4537:%.*]] = icmp eq i64 [[TMP2475]], [[TMP2476]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP4537]], label [[IF_THEN4539:%.*]], label [[IF_END4540:%.*]]
+// SIMD-ONLY0:       if.then4539:
+// SIMD-ONLY0-NEXT:    [[TMP2477:%.*]] = load i64, ptr [[LLD]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP2477]], ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END4540]]
+// SIMD-ONLY0:       if.end4540:
+// SIMD-ONLY0-NEXT:    [[TMP2478:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2479:%.*]] = load i64, ptr [[ULLE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP4541:%.*]] = icmp ugt i64 [[TMP2478]], [[TMP2479]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP4541]], label [[COND_TRUE4543:%.*]], label [[COND_FALSE4544:%.*]]
+// SIMD-ONLY0:       cond.true4543:
+// SIMD-ONLY0-NEXT:    [[TMP2480:%.*]] = load i64, ptr [[ULLE]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END4545:%.*]]
+// SIMD-ONLY0:       cond.false4544:
+// SIMD-ONLY0-NEXT:    [[TMP2481:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END4545]]
+// SIMD-ONLY0:       cond.end4545:
+// SIMD-ONLY0-NEXT:    [[COND4546:%.*]] = phi i64 [ [[TMP2480]], [[COND_TRUE4543]] ], [ [[TMP2481]], [[COND_FALSE4544]] ]
+// SIMD-ONLY0-NEXT:    store i64 [[COND4546]], ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2482:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2483:%.*]] = load i64, ptr [[ULLE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP4547:%.*]] = icmp ult i64 [[TMP2482]], [[TMP2483]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP4547]], label [[COND_TRUE4549:%.*]], label [[COND_FALSE4550:%.*]]
+// SIMD-ONLY0:       cond.true4549:
+// SIMD-ONLY0-NEXT:    [[TMP2484:%.*]] = load i64, ptr [[ULLE]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END4551:%.*]]
+// SIMD-ONLY0:       cond.false4550:
+// SIMD-ONLY0-NEXT:    [[TMP2485:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END4551]]
+// SIMD-ONLY0:       cond.end4551:
+// SIMD-ONLY0-NEXT:    [[COND4552:%.*]] = phi i64 [ [[TMP2484]], [[COND_TRUE4549]] ], [ [[TMP2485]], [[COND_FALSE4550]] ]
+// SIMD-ONLY0-NEXT:    store i64 [[COND4552]], ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2486:%.*]] = load i64, ptr [[ULLE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2487:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP4553:%.*]] = icmp ugt i64 [[TMP2486]], [[TMP2487]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP4553]], label [[COND_TRUE4555:%.*]], label [[COND_FALSE4556:%.*]]
+// SIMD-ONLY0:       cond.true4555:
+// SIMD-ONLY0-NEXT:    [[TMP2488:%.*]] = load i64, ptr [[ULLE]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END4557:%.*]]
+// SIMD-ONLY0:       cond.false4556:
+// SIMD-ONLY0-NEXT:    [[TMP2489:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END4557]]
+// SIMD-ONLY0:       cond.end4557:
+// SIMD-ONLY0-NEXT:    [[COND4558:%.*]] = phi i64 [ [[TMP2488]], [[COND_TRUE4555]] ], [ [[TMP2489]], [[COND_FALSE4556]] ]
+// SIMD-ONLY0-NEXT:    store i64 [[COND4558]], ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2490:%.*]] = load i64, ptr [[ULLE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2491:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP4559:%.*]] = icmp ult i64 [[TMP2490]], [[TMP2491]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP4559]], label [[COND_TRUE4561:%.*]], label [[COND_FALSE4562:%.*]]
+// SIMD-ONLY0:       cond.true4561:
+// SIMD-ONLY0-NEXT:    [[TMP2492:%.*]] = load i64, ptr [[ULLE]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END4563:%.*]]
+// SIMD-ONLY0:       cond.false4562:
+// SIMD-ONLY0-NEXT:    [[TMP2493:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END4563]]
+// SIMD-ONLY0:       cond.end4563:
+// SIMD-ONLY0-NEXT:    [[COND4564:%.*]] = phi i64 [ [[TMP2492]], [[COND_TRUE4561]] ], [ [[TMP2493]], [[COND_FALSE4562]] ]
+// SIMD-ONLY0-NEXT:    store i64 [[COND4564]], ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2494:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2495:%.*]] = load i64, ptr [[ULLE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP4565:%.*]] = icmp ugt i64 [[TMP2494]], [[TMP2495]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP4565]], label [[IF_THEN4567:%.*]], label [[IF_END4568:%.*]]
+// SIMD-ONLY0:       if.then4567:
+// SIMD-ONLY0-NEXT:    [[TMP2496:%.*]] = load i64, ptr [[ULLE]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP2496]], ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END4568]]
+// SIMD-ONLY0:       if.end4568:
+// SIMD-ONLY0-NEXT:    [[TMP2497:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2498:%.*]] = load i64, ptr [[ULLE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP4569:%.*]] = icmp ult i64 [[TMP2497]], [[TMP2498]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP4569]], label [[IF_THEN4571:%.*]], label [[IF_END4572:%.*]]
+// SIMD-ONLY0:       if.then4571:
+// SIMD-ONLY0-NEXT:    [[TMP2499:%.*]] = load i64, ptr [[ULLE]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP2499]], ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END4572]]
+// SIMD-ONLY0:       if.end4572:
+// SIMD-ONLY0-NEXT:    [[TMP2500:%.*]] = load i64, ptr [[ULLE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2501:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP4573:%.*]] = icmp ugt i64 [[TMP2500]], [[TMP2501]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP4573]], label [[IF_THEN4575:%.*]], label [[IF_END4576:%.*]]
+// SIMD-ONLY0:       if.then4575:
+// SIMD-ONLY0-NEXT:    [[TMP2502:%.*]] = load i64, ptr [[ULLE]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP2502]], ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END4576]]
+// SIMD-ONLY0:       if.end4576:
+// SIMD-ONLY0-NEXT:    [[TMP2503:%.*]] = load i64, ptr [[ULLE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2504:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP4577:%.*]] = icmp ult i64 [[TMP2503]], [[TMP2504]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP4577]], label [[IF_THEN4579:%.*]], label [[IF_END4580:%.*]]
+// SIMD-ONLY0:       if.then4579:
+// SIMD-ONLY0-NEXT:    [[TMP2505:%.*]] = load i64, ptr [[ULLE]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP2505]], ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END4580]]
+// SIMD-ONLY0:       if.end4580:
+// SIMD-ONLY0-NEXT:    [[TMP2506:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2507:%.*]] = load i64, ptr [[ULLE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP4581:%.*]] = icmp eq i64 [[TMP2506]], [[TMP2507]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP4581]], label [[COND_TRUE4583:%.*]], label [[COND_FALSE4584:%.*]]
+// SIMD-ONLY0:       cond.true4583:
+// SIMD-ONLY0-NEXT:    [[TMP2508:%.*]] = load i64, ptr [[ULLD]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END4585:%.*]]
+// SIMD-ONLY0:       cond.false4584:
+// SIMD-ONLY0-NEXT:    [[TMP2509:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END4585]]
+// SIMD-ONLY0:       cond.end4585:
+// SIMD-ONLY0-NEXT:    [[COND4586:%.*]] = phi i64 [ [[TMP2508]], [[COND_TRUE4583]] ], [ [[TMP2509]], [[COND_FALSE4584]] ]
+// SIMD-ONLY0-NEXT:    store i64 [[COND4586]], ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2510:%.*]] = load i64, ptr [[ULLE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2511:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP4587:%.*]] = icmp eq i64 [[TMP2510]], [[TMP2511]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP4587]], label [[COND_TRUE4589:%.*]], label [[COND_FALSE4590:%.*]]
+// SIMD-ONLY0:       cond.true4589:
+// SIMD-ONLY0-NEXT:    [[TMP2512:%.*]] = load i64, ptr [[ULLD]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END4591:%.*]]
+// SIMD-ONLY0:       cond.false4590:
+// SIMD-ONLY0-NEXT:    [[TMP2513:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END4591]]
+// SIMD-ONLY0:       cond.end4591:
+// SIMD-ONLY0-NEXT:    [[COND4592:%.*]] = phi i64 [ [[TMP2512]], [[COND_TRUE4589]] ], [ [[TMP2513]], [[COND_FALSE4590]] ]
+// SIMD-ONLY0-NEXT:    store i64 [[COND4592]], ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2514:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2515:%.*]] = load i64, ptr [[ULLE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP4593:%.*]] = icmp eq i64 [[TMP2514]], [[TMP2515]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP4593]], label [[IF_THEN4595:%.*]], label [[IF_END4596:%.*]]
+// SIMD-ONLY0:       if.then4595:
+// SIMD-ONLY0-NEXT:    [[TMP2516:%.*]] = load i64, ptr [[ULLD]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP2516]], ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END4596]]
+// SIMD-ONLY0:       if.end4596:
+// SIMD-ONLY0-NEXT:    [[TMP2517:%.*]] = load i64, ptr [[ULLE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2518:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP4597:%.*]] = icmp eq i64 [[TMP2517]], [[TMP2518]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP4597]], label [[IF_THEN4599:%.*]], label [[IF_END4600:%.*]]
+// SIMD-ONLY0:       if.then4599:
+// SIMD-ONLY0-NEXT:    [[TMP2519:%.*]] = load i64, ptr [[ULLD]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP2519]], ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END4600]]
+// SIMD-ONLY0:       if.end4600:
+// SIMD-ONLY0-NEXT:    [[TMP2520:%.*]] = load float, ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP2521:%.*]] = load float, ptr [[FE]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP4601:%.*]] = fcmp ogt float [[TMP2520]], [[TMP2521]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP4601]], label [[COND_TRUE4603:%.*]], label [[COND_FALSE4604:%.*]]
+// SIMD-ONLY0:       cond.true4603:
+// SIMD-ONLY0-NEXT:    [[TMP2522:%.*]] = load float, ptr [[FE]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END4605:%.*]]
+// SIMD-ONLY0:       cond.false4604:
+// SIMD-ONLY0-NEXT:    [[TMP2523:%.*]] = load float, ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END4605]]
+// SIMD-ONLY0:       cond.end4605:
+// SIMD-ONLY0-NEXT:    [[COND4606:%.*]] = phi float [ [[TMP2522]], [[COND_TRUE4603]] ], [ [[TMP2523]], [[COND_FALSE4604]] ]
+// SIMD-ONLY0-NEXT:    store float [[COND4606]], ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP2524:%.*]] = load float, ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP2525:%.*]] = load float, ptr [[FE]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP4607:%.*]] = fcmp olt float [[TMP2524]], [[TMP2525]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP4607]], label [[COND_TRUE4609:%.*]], label [[COND_FALSE4610:%.*]]
+// SIMD-ONLY0:       cond.true4609:
+// SIMD-ONLY0-NEXT:    [[TMP2526:%.*]] = load float, ptr [[FE]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END4611:%.*]]
+// SIMD-ONLY0:       cond.false4610:
+// SIMD-ONLY0-NEXT:    [[TMP2527:%.*]] = load float, ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END4611]]
+// SIMD-ONLY0:       cond.end4611:
+// SIMD-ONLY0-NEXT:    [[COND4612:%.*]] = phi float [ [[TMP2526]], [[COND_TRUE4609]] ], [ [[TMP2527]], [[COND_FALSE4610]] ]
+// SIMD-ONLY0-NEXT:    store float [[COND4612]], ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP2528:%.*]] = load float, ptr [[FE]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP2529:%.*]] = load float, ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP4613:%.*]] = fcmp ogt float [[TMP2528]], [[TMP2529]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP4613]], label [[COND_TRUE4615:%.*]], label [[COND_FALSE4616:%.*]]
+// SIMD-ONLY0:       cond.true4615:
+// SIMD-ONLY0-NEXT:    [[TMP2530:%.*]] = load float, ptr [[FE]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END4617:%.*]]
+// SIMD-ONLY0:       cond.false4616:
+// SIMD-ONLY0-NEXT:    [[TMP2531:%.*]] = load float, ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END4617]]
+// SIMD-ONLY0:       cond.end4617:
+// SIMD-ONLY0-NEXT:    [[COND4618:%.*]] = phi float [ [[TMP2530]], [[COND_TRUE4615]] ], [ [[TMP2531]], [[COND_FALSE4616]] ]
+// SIMD-ONLY0-NEXT:    store float [[COND4618]], ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP2532:%.*]] = load float, ptr [[FE]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP2533:%.*]] = load float, ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP4619:%.*]] = fcmp olt float [[TMP2532]], [[TMP2533]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP4619]], label [[COND_TRUE4621:%.*]], label [[COND_FALSE4622:%.*]]
+// SIMD-ONLY0:       cond.true4621:
+// SIMD-ONLY0-NEXT:    [[TMP2534:%.*]] = load float, ptr [[FE]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END4623:%.*]]
+// SIMD-ONLY0:       cond.false4622:
+// SIMD-ONLY0-NEXT:    [[TMP2535:%.*]] = load float, ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END4623]]
+// SIMD-ONLY0:       cond.end4623:
+// SIMD-ONLY0-NEXT:    [[COND4624:%.*]] = phi float [ [[TMP2534]], [[COND_TRUE4621]] ], [ [[TMP2535]], [[COND_FALSE4622]] ]
+// SIMD-ONLY0-NEXT:    store float [[COND4624]], ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP2536:%.*]] = load float, ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP2537:%.*]] = load float, ptr [[FE]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP4625:%.*]] = fcmp ogt float [[TMP2536]], [[TMP2537]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP4625]], label [[IF_THEN4627:%.*]], label [[IF_END4628:%.*]]
+// SIMD-ONLY0:       if.then4627:
+// SIMD-ONLY0-NEXT:    [[TMP2538:%.*]] = load float, ptr [[FE]], align 4
+// SIMD-ONLY0-NEXT:    store float [[TMP2538]], ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END4628]]
+// SIMD-ONLY0:       if.end4628:
+// SIMD-ONLY0-NEXT:    [[TMP2539:%.*]] = load float, ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP2540:%.*]] = load float, ptr [[FE]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP4629:%.*]] = fcmp olt float [[TMP2539]], [[TMP2540]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP4629]], label [[IF_THEN4631:%.*]], label [[IF_END4632:%.*]]
+// SIMD-ONLY0:       if.then4631:
+// SIMD-ONLY0-NEXT:    [[TMP2541:%.*]] = load float, ptr [[FE]], align 4
+// SIMD-ONLY0-NEXT:    store float [[TMP2541]], ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END4632]]
+// SIMD-ONLY0:       if.end4632:
+// SIMD-ONLY0-NEXT:    [[TMP2542:%.*]] = load float, ptr [[FE]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP2543:%.*]] = load float, ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP4633:%.*]] = fcmp ogt float [[TMP2542]], [[TMP2543]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP4633]], label [[IF_THEN4635:%.*]], label [[IF_END4636:%.*]]
+// SIMD-ONLY0:       if.then4635:
+// SIMD-ONLY0-NEXT:    [[TMP2544:%.*]] = load float, ptr [[FE]], align 4
+// SIMD-ONLY0-NEXT:    store float [[TMP2544]], ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END4636]]
+// SIMD-ONLY0:       if.end4636:
+// SIMD-ONLY0-NEXT:    [[TMP2545:%.*]] = load float, ptr [[FE]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP2546:%.*]] = load float, ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP4637:%.*]] = fcmp olt float [[TMP2545]], [[TMP2546]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP4637]], label [[IF_THEN4639:%.*]], label [[IF_END4640:%.*]]
+// SIMD-ONLY0:       if.then4639:
+// SIMD-ONLY0-NEXT:    [[TMP2547:%.*]] = load float, ptr [[FE]], align 4
+// SIMD-ONLY0-NEXT:    store float [[TMP2547]], ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END4640]]
+// SIMD-ONLY0:       if.end4640:
+// SIMD-ONLY0-NEXT:    [[TMP2548:%.*]] = load float, ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP2549:%.*]] = load float, ptr [[FE]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP4641:%.*]] = fcmp ogt float [[TMP2548]], [[TMP2549]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP4641]], label [[COND_TRUE4643:%.*]], label [[COND_FALSE4644:%.*]]
+// SIMD-ONLY0:       cond.true4643:
+// SIMD-ONLY0-NEXT:    [[TMP2550:%.*]] = load float, ptr [[FE]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END4645:%.*]]
+// SIMD-ONLY0:       cond.false4644:
+// SIMD-ONLY0-NEXT:    [[TMP2551:%.*]] = load float, ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END4645]]
+// SIMD-ONLY0:       cond.end4645:
+// SIMD-ONLY0-NEXT:    [[COND4646:%.*]] = phi float [ [[TMP2550]], [[COND_TRUE4643]] ], [ [[TMP2551]], [[COND_FALSE4644]] ]
+// SIMD-ONLY0-NEXT:    store float [[COND4646]], ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP2552:%.*]] = load float, ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP2553:%.*]] = load float, ptr [[FE]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP4647:%.*]] = fcmp olt float [[TMP2552]], [[TMP2553]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP4647]], label [[COND_TRUE4649:%.*]], label [[COND_FALSE4650:%.*]]
+// SIMD-ONLY0:       cond.true4649:
+// SIMD-ONLY0-NEXT:    [[TMP2554:%.*]] = load float, ptr [[FE]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END4651:%.*]]
+// SIMD-ONLY0:       cond.false4650:
+// SIMD-ONLY0-NEXT:    [[TMP2555:%.*]] = load float, ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END4651]]
+// SIMD-ONLY0:       cond.end4651:
+// SIMD-ONLY0-NEXT:    [[COND4652:%.*]] = phi float [ [[TMP2554]], [[COND_TRUE4649]] ], [ [[TMP2555]], [[COND_FALSE4650]] ]
+// SIMD-ONLY0-NEXT:    store float [[COND4652]], ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP2556:%.*]] = load float, ptr [[FE]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP2557:%.*]] = load float, ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP4653:%.*]] = fcmp ogt float [[TMP2556]], [[TMP2557]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP4653]], label [[COND_TRUE4655:%.*]], label [[COND_FALSE4656:%.*]]
+// SIMD-ONLY0:       cond.true4655:
+// SIMD-ONLY0-NEXT:    [[TMP2558:%.*]] = load float, ptr [[FE]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END4657:%.*]]
+// SIMD-ONLY0:       cond.false4656:
+// SIMD-ONLY0-NEXT:    [[TMP2559:%.*]] = load float, ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END4657]]
+// SIMD-ONLY0:       cond.end4657:
+// SIMD-ONLY0-NEXT:    [[COND4658:%.*]] = phi float [ [[TMP2558]], [[COND_TRUE4655]] ], [ [[TMP2559]], [[COND_FALSE4656]] ]
+// SIMD-ONLY0-NEXT:    store float [[COND4658]], ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP2560:%.*]] = load float, ptr [[FE]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP2561:%.*]] = load float, ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP4659:%.*]] = fcmp olt float [[TMP2560]], [[TMP2561]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP4659]], label [[COND_TRUE4661:%.*]], label [[COND_FALSE4662:%.*]]
+// SIMD-ONLY0:       cond.true4661:
+// SIMD-ONLY0-NEXT:    [[TMP2562:%.*]] = load float, ptr [[FE]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END4663:%.*]]
+// SIMD-ONLY0:       cond.false4662:
+// SIMD-ONLY0-NEXT:    [[TMP2563:%.*]] = load float, ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END4663]]
+// SIMD-ONLY0:       cond.end4663:
+// SIMD-ONLY0-NEXT:    [[COND4664:%.*]] = phi float [ [[TMP2562]], [[COND_TRUE4661]] ], [ [[TMP2563]], [[COND_FALSE4662]] ]
+// SIMD-ONLY0-NEXT:    store float [[COND4664]], ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP2564:%.*]] = load float, ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP2565:%.*]] = load float, ptr [[FE]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP4665:%.*]] = fcmp ogt float [[TMP2564]], [[TMP2565]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP4665]], label [[IF_THEN4667:%.*]], label [[IF_END4668:%.*]]
+// SIMD-ONLY0:       if.then4667:
+// SIMD-ONLY0-NEXT:    [[TMP2566:%.*]] = load float, ptr [[FE]], align 4
+// SIMD-ONLY0-NEXT:    store float [[TMP2566]], ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END4668]]
+// SIMD-ONLY0:       if.end4668:
+// SIMD-ONLY0-NEXT:    [[TMP2567:%.*]] = load float, ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP2568:%.*]] = load float, ptr [[FE]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP4669:%.*]] = fcmp olt float [[TMP2567]], [[TMP2568]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP4669]], label [[IF_THEN4671:%.*]], label [[IF_END4672:%.*]]
+// SIMD-ONLY0:       if.then4671:
+// SIMD-ONLY0-NEXT:    [[TMP2569:%.*]] = load float, ptr [[FE]], align 4
+// SIMD-ONLY0-NEXT:    store float [[TMP2569]], ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END4672]]
+// SIMD-ONLY0:       if.end4672:
+// SIMD-ONLY0-NEXT:    [[TMP2570:%.*]] = load float, ptr [[FE]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP2571:%.*]] = load float, ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP4673:%.*]] = fcmp ogt float [[TMP2570]], [[TMP2571]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP4673]], label [[IF_THEN4675:%.*]], label [[IF_END4676:%.*]]
+// SIMD-ONLY0:       if.then4675:
+// SIMD-ONLY0-NEXT:    [[TMP2572:%.*]] = load float, ptr [[FE]], align 4
+// SIMD-ONLY0-NEXT:    store float [[TMP2572]], ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END4676]]
+// SIMD-ONLY0:       if.end4676:
+// SIMD-ONLY0-NEXT:    [[TMP2573:%.*]] = load float, ptr [[FE]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP2574:%.*]] = load float, ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP4677:%.*]] = fcmp olt float [[TMP2573]], [[TMP2574]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP4677]], label [[IF_THEN4679:%.*]], label [[IF_END4680:%.*]]
+// SIMD-ONLY0:       if.then4679:
+// SIMD-ONLY0-NEXT:    [[TMP2575:%.*]] = load float, ptr [[FE]], align 4
+// SIMD-ONLY0-NEXT:    store float [[TMP2575]], ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END4680]]
+// SIMD-ONLY0:       if.end4680:
+// SIMD-ONLY0-NEXT:    [[TMP2576:%.*]] = load float, ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP2577:%.*]] = load float, ptr [[FE]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP4681:%.*]] = fcmp ogt float [[TMP2576]], [[TMP2577]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP4681]], label [[COND_TRUE4683:%.*]], label [[COND_FALSE4684:%.*]]
+// SIMD-ONLY0:       cond.true4683:
+// SIMD-ONLY0-NEXT:    [[TMP2578:%.*]] = load float, ptr [[FE]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END4685:%.*]]
+// SIMD-ONLY0:       cond.false4684:
+// SIMD-ONLY0-NEXT:    [[TMP2579:%.*]] = load float, ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END4685]]
+// SIMD-ONLY0:       cond.end4685:
+// SIMD-ONLY0-NEXT:    [[COND4686:%.*]] = phi float [ [[TMP2578]], [[COND_TRUE4683]] ], [ [[TMP2579]], [[COND_FALSE4684]] ]
+// SIMD-ONLY0-NEXT:    store float [[COND4686]], ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP2580:%.*]] = load float, ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP2581:%.*]] = load float, ptr [[FE]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP4687:%.*]] = fcmp olt float [[TMP2580]], [[TMP2581]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP4687]], label [[COND_TRUE4689:%.*]], label [[COND_FALSE4690:%.*]]
+// SIMD-ONLY0:       cond.true4689:
+// SIMD-ONLY0-NEXT:    [[TMP2582:%.*]] = load float, ptr [[FE]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END4691:%.*]]
+// SIMD-ONLY0:       cond.false4690:
+// SIMD-ONLY0-NEXT:    [[TMP2583:%.*]] = load float, ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END4691]]
+// SIMD-ONLY0:       cond.end4691:
+// SIMD-ONLY0-NEXT:    [[COND4692:%.*]] = phi float [ [[TMP2582]], [[COND_TRUE4689]] ], [ [[TMP2583]], [[COND_FALSE4690]] ]
+// SIMD-ONLY0-NEXT:    store float [[COND4692]], ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP2584:%.*]] = load float, ptr [[FE]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP2585:%.*]] = load float, ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP4693:%.*]] = fcmp ogt float [[TMP2584]], [[TMP2585]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP4693]], label [[COND_TRUE4695:%.*]], label [[COND_FALSE4696:%.*]]
+// SIMD-ONLY0:       cond.true4695:
+// SIMD-ONLY0-NEXT:    [[TMP2586:%.*]] = load float, ptr [[FE]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END4697:%.*]]
+// SIMD-ONLY0:       cond.false4696:
+// SIMD-ONLY0-NEXT:    [[TMP2587:%.*]] = load float, ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END4697]]
+// SIMD-ONLY0:       cond.end4697:
+// SIMD-ONLY0-NEXT:    [[COND4698:%.*]] = phi float [ [[TMP2586]], [[COND_TRUE4695]] ], [ [[TMP2587]], [[COND_FALSE4696]] ]
+// SIMD-ONLY0-NEXT:    store float [[COND4698]], ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP2588:%.*]] = load float, ptr [[FE]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP2589:%.*]] = load float, ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP4699:%.*]] = fcmp olt float [[TMP2588]], [[TMP2589]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP4699]], label [[COND_TRUE4701:%.*]], label [[COND_FALSE4702:%.*]]
+// SIMD-ONLY0:       cond.true4701:
+// SIMD-ONLY0-NEXT:    [[TMP2590:%.*]] = load float, ptr [[FE]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END4703:%.*]]
+// SIMD-ONLY0:       cond.false4702:
+// SIMD-ONLY0-NEXT:    [[TMP2591:%.*]] = load float, ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END4703]]
+// SIMD-ONLY0:       cond.end4703:
+// SIMD-ONLY0-NEXT:    [[COND4704:%.*]] = phi float [ [[TMP2590]], [[COND_TRUE4701]] ], [ [[TMP2591]], [[COND_FALSE4702]] ]
+// SIMD-ONLY0-NEXT:    store float [[COND4704]], ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP2592:%.*]] = load float, ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP2593:%.*]] = load float, ptr [[FE]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP4705:%.*]] = fcmp ogt float [[TMP2592]], [[TMP2593]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP4705]], label [[IF_THEN4707:%.*]], label [[IF_END4708:%.*]]
+// SIMD-ONLY0:       if.then4707:
+// SIMD-ONLY0-NEXT:    [[TMP2594:%.*]] = load float, ptr [[FE]], align 4
+// SIMD-ONLY0-NEXT:    store float [[TMP2594]], ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END4708]]
+// SIMD-ONLY0:       if.end4708:
+// SIMD-ONLY0-NEXT:    [[TMP2595:%.*]] = load float, ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP2596:%.*]] = load float, ptr [[FE]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP4709:%.*]] = fcmp olt float [[TMP2595]], [[TMP2596]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP4709]], label [[IF_THEN4711:%.*]], label [[IF_END4712:%.*]]
+// SIMD-ONLY0:       if.then4711:
+// SIMD-ONLY0-NEXT:    [[TMP2597:%.*]] = load float, ptr [[FE]], align 4
+// SIMD-ONLY0-NEXT:    store float [[TMP2597]], ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END4712]]
+// SIMD-ONLY0:       if.end4712:
+// SIMD-ONLY0-NEXT:    [[TMP2598:%.*]] = load float, ptr [[FE]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP2599:%.*]] = load float, ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP4713:%.*]] = fcmp ogt float [[TMP2598]], [[TMP2599]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP4713]], label [[IF_THEN4715:%.*]], label [[IF_END4716:%.*]]
+// SIMD-ONLY0:       if.then4715:
+// SIMD-ONLY0-NEXT:    [[TMP2600:%.*]] = load float, ptr [[FE]], align 4
+// SIMD-ONLY0-NEXT:    store float [[TMP2600]], ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END4716]]
+// SIMD-ONLY0:       if.end4716:
+// SIMD-ONLY0-NEXT:    [[TMP2601:%.*]] = load float, ptr [[FE]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP2602:%.*]] = load float, ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP4717:%.*]] = fcmp olt float [[TMP2601]], [[TMP2602]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP4717]], label [[IF_THEN4719:%.*]], label [[IF_END4720:%.*]]
+// SIMD-ONLY0:       if.then4719:
+// SIMD-ONLY0-NEXT:    [[TMP2603:%.*]] = load float, ptr [[FE]], align 4
+// SIMD-ONLY0-NEXT:    store float [[TMP2603]], ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END4720]]
+// SIMD-ONLY0:       if.end4720:
+// SIMD-ONLY0-NEXT:    [[TMP2604:%.*]] = load float, ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP2605:%.*]] = load float, ptr [[FE]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP4721:%.*]] = fcmp ogt float [[TMP2604]], [[TMP2605]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP4721]], label [[COND_TRUE4723:%.*]], label [[COND_FALSE4724:%.*]]
+// SIMD-ONLY0:       cond.true4723:
+// SIMD-ONLY0-NEXT:    [[TMP2606:%.*]] = load float, ptr [[FE]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END4725:%.*]]
+// SIMD-ONLY0:       cond.false4724:
+// SIMD-ONLY0-NEXT:    [[TMP2607:%.*]] = load float, ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END4725]]
+// SIMD-ONLY0:       cond.end4725:
+// SIMD-ONLY0-NEXT:    [[COND4726:%.*]] = phi float [ [[TMP2606]], [[COND_TRUE4723]] ], [ [[TMP2607]], [[COND_FALSE4724]] ]
+// SIMD-ONLY0-NEXT:    store float [[COND4726]], ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP2608:%.*]] = load float, ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP2609:%.*]] = load float, ptr [[FE]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP4727:%.*]] = fcmp olt float [[TMP2608]], [[TMP2609]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP4727]], label [[COND_TRUE4729:%.*]], label [[COND_FALSE4730:%.*]]
+// SIMD-ONLY0:       cond.true4729:
+// SIMD-ONLY0-NEXT:    [[TMP2610:%.*]] = load float, ptr [[FE]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END4731:%.*]]
+// SIMD-ONLY0:       cond.false4730:
+// SIMD-ONLY0-NEXT:    [[TMP2611:%.*]] = load float, ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END4731]]
+// SIMD-ONLY0:       cond.end4731:
+// SIMD-ONLY0-NEXT:    [[COND4732:%.*]] = phi float [ [[TMP2610]], [[COND_TRUE4729]] ], [ [[TMP2611]], [[COND_FALSE4730]] ]
+// SIMD-ONLY0-NEXT:    store float [[COND4732]], ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP2612:%.*]] = load float, ptr [[FE]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP2613:%.*]] = load float, ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP4733:%.*]] = fcmp ogt float [[TMP2612]], [[TMP2613]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP4733]], label [[COND_TRUE4735:%.*]], label [[COND_FALSE4736:%.*]]
+// SIMD-ONLY0:       cond.true4735:
+// SIMD-ONLY0-NEXT:    [[TMP2614:%.*]] = load float, ptr [[FE]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END4737:%.*]]
+// SIMD-ONLY0:       cond.false4736:
+// SIMD-ONLY0-NEXT:    [[TMP2615:%.*]] = load float, ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END4737]]
+// SIMD-ONLY0:       cond.end4737:
+// SIMD-ONLY0-NEXT:    [[COND4738:%.*]] = phi float [ [[TMP2614]], [[COND_TRUE4735]] ], [ [[TMP2615]], [[COND_FALSE4736]] ]
+// SIMD-ONLY0-NEXT:    store float [[COND4738]], ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP2616:%.*]] = load float, ptr [[FE]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP2617:%.*]] = load float, ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP4739:%.*]] = fcmp olt float [[TMP2616]], [[TMP2617]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP4739]], label [[COND_TRUE4741:%.*]], label [[COND_FALSE4742:%.*]]
+// SIMD-ONLY0:       cond.true4741:
+// SIMD-ONLY0-NEXT:    [[TMP2618:%.*]] = load float, ptr [[FE]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END4743:%.*]]
+// SIMD-ONLY0:       cond.false4742:
+// SIMD-ONLY0-NEXT:    [[TMP2619:%.*]] = load float, ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END4743]]
+// SIMD-ONLY0:       cond.end4743:
+// SIMD-ONLY0-NEXT:    [[COND4744:%.*]] = phi float [ [[TMP2618]], [[COND_TRUE4741]] ], [ [[TMP2619]], [[COND_FALSE4742]] ]
+// SIMD-ONLY0-NEXT:    store float [[COND4744]], ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP2620:%.*]] = load float, ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP2621:%.*]] = load float, ptr [[FE]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP4745:%.*]] = fcmp ogt float [[TMP2620]], [[TMP2621]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP4745]], label [[IF_THEN4747:%.*]], label [[IF_END4748:%.*]]
+// SIMD-ONLY0:       if.then4747:
+// SIMD-ONLY0-NEXT:    [[TMP2622:%.*]] = load float, ptr [[FE]], align 4
+// SIMD-ONLY0-NEXT:    store float [[TMP2622]], ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END4748]]
+// SIMD-ONLY0:       if.end4748:
+// SIMD-ONLY0-NEXT:    [[TMP2623:%.*]] = load float, ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP2624:%.*]] = load float, ptr [[FE]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP4749:%.*]] = fcmp olt float [[TMP2623]], [[TMP2624]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP4749]], label [[IF_THEN4751:%.*]], label [[IF_END4752:%.*]]
+// SIMD-ONLY0:       if.then4751:
+// SIMD-ONLY0-NEXT:    [[TMP2625:%.*]] = load float, ptr [[FE]], align 4
+// SIMD-ONLY0-NEXT:    store float [[TMP2625]], ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END4752]]
+// SIMD-ONLY0:       if.end4752:
+// SIMD-ONLY0-NEXT:    [[TMP2626:%.*]] = load float, ptr [[FE]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP2627:%.*]] = load float, ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP4753:%.*]] = fcmp ogt float [[TMP2626]], [[TMP2627]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP4753]], label [[IF_THEN4755:%.*]], label [[IF_END4756:%.*]]
+// SIMD-ONLY0:       if.then4755:
+// SIMD-ONLY0-NEXT:    [[TMP2628:%.*]] = load float, ptr [[FE]], align 4
+// SIMD-ONLY0-NEXT:    store float [[TMP2628]], ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END4756]]
+// SIMD-ONLY0:       if.end4756:
+// SIMD-ONLY0-NEXT:    [[TMP2629:%.*]] = load float, ptr [[FE]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP2630:%.*]] = load float, ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP4757:%.*]] = fcmp olt float [[TMP2629]], [[TMP2630]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP4757]], label [[IF_THEN4759:%.*]], label [[IF_END4760:%.*]]
+// SIMD-ONLY0:       if.then4759:
+// SIMD-ONLY0-NEXT:    [[TMP2631:%.*]] = load float, ptr [[FE]], align 4
+// SIMD-ONLY0-NEXT:    store float [[TMP2631]], ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END4760]]
+// SIMD-ONLY0:       if.end4760:
+// SIMD-ONLY0-NEXT:    [[TMP2632:%.*]] = load float, ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP2633:%.*]] = load float, ptr [[FE]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP4761:%.*]] = fcmp ogt float [[TMP2632]], [[TMP2633]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP4761]], label [[COND_TRUE4763:%.*]], label [[COND_FALSE4764:%.*]]
+// SIMD-ONLY0:       cond.true4763:
+// SIMD-ONLY0-NEXT:    [[TMP2634:%.*]] = load float, ptr [[FE]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END4765:%.*]]
+// SIMD-ONLY0:       cond.false4764:
+// SIMD-ONLY0-NEXT:    [[TMP2635:%.*]] = load float, ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END4765]]
+// SIMD-ONLY0:       cond.end4765:
+// SIMD-ONLY0-NEXT:    [[COND4766:%.*]] = phi float [ [[TMP2634]], [[COND_TRUE4763]] ], [ [[TMP2635]], [[COND_FALSE4764]] ]
+// SIMD-ONLY0-NEXT:    store float [[COND4766]], ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP2636:%.*]] = load float, ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP2637:%.*]] = load float, ptr [[FE]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP4767:%.*]] = fcmp olt float [[TMP2636]], [[TMP2637]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP4767]], label [[COND_TRUE4769:%.*]], label [[COND_FALSE4770:%.*]]
+// SIMD-ONLY0:       cond.true4769:
+// SIMD-ONLY0-NEXT:    [[TMP2638:%.*]] = load float, ptr [[FE]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END4771:%.*]]
+// SIMD-ONLY0:       cond.false4770:
+// SIMD-ONLY0-NEXT:    [[TMP2639:%.*]] = load float, ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END4771]]
+// SIMD-ONLY0:       cond.end4771:
+// SIMD-ONLY0-NEXT:    [[COND4772:%.*]] = phi float [ [[TMP2638]], [[COND_TRUE4769]] ], [ [[TMP2639]], [[COND_FALSE4770]] ]
+// SIMD-ONLY0-NEXT:    store float [[COND4772]], ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP2640:%.*]] = load float, ptr [[FE]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP2641:%.*]] = load float, ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP4773:%.*]] = fcmp ogt float [[TMP2640]], [[TMP2641]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP4773]], label [[COND_TRUE4775:%.*]], label [[COND_FALSE4776:%.*]]
+// SIMD-ONLY0:       cond.true4775:
+// SIMD-ONLY0-NEXT:    [[TMP2642:%.*]] = load float, ptr [[FE]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END4777:%.*]]
+// SIMD-ONLY0:       cond.false4776:
+// SIMD-ONLY0-NEXT:    [[TMP2643:%.*]] = load float, ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END4777]]
+// SIMD-ONLY0:       cond.end4777:
+// SIMD-ONLY0-NEXT:    [[COND4778:%.*]] = phi float [ [[TMP2642]], [[COND_TRUE4775]] ], [ [[TMP2643]], [[COND_FALSE4776]] ]
+// SIMD-ONLY0-NEXT:    store float [[COND4778]], ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP2644:%.*]] = load float, ptr [[FE]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP2645:%.*]] = load float, ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP4779:%.*]] = fcmp olt float [[TMP2644]], [[TMP2645]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP4779]], label [[COND_TRUE4781:%.*]], label [[COND_FALSE4782:%.*]]
+// SIMD-ONLY0:       cond.true4781:
+// SIMD-ONLY0-NEXT:    [[TMP2646:%.*]] = load float, ptr [[FE]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END4783:%.*]]
+// SIMD-ONLY0:       cond.false4782:
+// SIMD-ONLY0-NEXT:    [[TMP2647:%.*]] = load float, ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END4783]]
+// SIMD-ONLY0:       cond.end4783:
+// SIMD-ONLY0-NEXT:    [[COND4784:%.*]] = phi float [ [[TMP2646]], [[COND_TRUE4781]] ], [ [[TMP2647]], [[COND_FALSE4782]] ]
+// SIMD-ONLY0-NEXT:    store float [[COND4784]], ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP2648:%.*]] = load float, ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP2649:%.*]] = load float, ptr [[FE]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP4785:%.*]] = fcmp ogt float [[TMP2648]], [[TMP2649]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP4785]], label [[IF_THEN4787:%.*]], label [[IF_END4788:%.*]]
+// SIMD-ONLY0:       if.then4787:
+// SIMD-ONLY0-NEXT:    [[TMP2650:%.*]] = load float, ptr [[FE]], align 4
+// SIMD-ONLY0-NEXT:    store float [[TMP2650]], ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END4788]]
+// SIMD-ONLY0:       if.end4788:
+// SIMD-ONLY0-NEXT:    [[TMP2651:%.*]] = load float, ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP2652:%.*]] = load float, ptr [[FE]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP4789:%.*]] = fcmp olt float [[TMP2651]], [[TMP2652]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP4789]], label [[IF_THEN4791:%.*]], label [[IF_END4792:%.*]]
+// SIMD-ONLY0:       if.then4791:
+// SIMD-ONLY0-NEXT:    [[TMP2653:%.*]] = load float, ptr [[FE]], align 4
+// SIMD-ONLY0-NEXT:    store float [[TMP2653]], ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END4792]]
+// SIMD-ONLY0:       if.end4792:
+// SIMD-ONLY0-NEXT:    [[TMP2654:%.*]] = load float, ptr [[FE]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP2655:%.*]] = load float, ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP4793:%.*]] = fcmp ogt float [[TMP2654]], [[TMP2655]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP4793]], label [[IF_THEN4795:%.*]], label [[IF_END4796:%.*]]
+// SIMD-ONLY0:       if.then4795:
+// SIMD-ONLY0-NEXT:    [[TMP2656:%.*]] = load float, ptr [[FE]], align 4
+// SIMD-ONLY0-NEXT:    store float [[TMP2656]], ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END4796]]
+// SIMD-ONLY0:       if.end4796:
+// SIMD-ONLY0-NEXT:    [[TMP2657:%.*]] = load float, ptr [[FE]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP2658:%.*]] = load float, ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP4797:%.*]] = fcmp olt float [[TMP2657]], [[TMP2658]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP4797]], label [[IF_THEN4799:%.*]], label [[IF_END4800:%.*]]
+// SIMD-ONLY0:       if.then4799:
+// SIMD-ONLY0-NEXT:    [[TMP2659:%.*]] = load float, ptr [[FE]], align 4
+// SIMD-ONLY0-NEXT:    store float [[TMP2659]], ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END4800]]
+// SIMD-ONLY0:       if.end4800:
+// SIMD-ONLY0-NEXT:    [[TMP2660:%.*]] = load float, ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP2661:%.*]] = load float, ptr [[FE]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP4801:%.*]] = fcmp ogt float [[TMP2660]], [[TMP2661]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP4801]], label [[COND_TRUE4803:%.*]], label [[COND_FALSE4804:%.*]]
+// SIMD-ONLY0:       cond.true4803:
+// SIMD-ONLY0-NEXT:    [[TMP2662:%.*]] = load float, ptr [[FE]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END4805:%.*]]
+// SIMD-ONLY0:       cond.false4804:
+// SIMD-ONLY0-NEXT:    [[TMP2663:%.*]] = load float, ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END4805]]
+// SIMD-ONLY0:       cond.end4805:
+// SIMD-ONLY0-NEXT:    [[COND4806:%.*]] = phi float [ [[TMP2662]], [[COND_TRUE4803]] ], [ [[TMP2663]], [[COND_FALSE4804]] ]
+// SIMD-ONLY0-NEXT:    store float [[COND4806]], ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP2664:%.*]] = load float, ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP2665:%.*]] = load float, ptr [[FE]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP4807:%.*]] = fcmp olt float [[TMP2664]], [[TMP2665]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP4807]], label [[COND_TRUE4809:%.*]], label [[COND_FALSE4810:%.*]]
+// SIMD-ONLY0:       cond.true4809:
+// SIMD-ONLY0-NEXT:    [[TMP2666:%.*]] = load float, ptr [[FE]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END4811:%.*]]
+// SIMD-ONLY0:       cond.false4810:
+// SIMD-ONLY0-NEXT:    [[TMP2667:%.*]] = load float, ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END4811]]
+// SIMD-ONLY0:       cond.end4811:
+// SIMD-ONLY0-NEXT:    [[COND4812:%.*]] = phi float [ [[TMP2666]], [[COND_TRUE4809]] ], [ [[TMP2667]], [[COND_FALSE4810]] ]
+// SIMD-ONLY0-NEXT:    store float [[COND4812]], ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP2668:%.*]] = load float, ptr [[FE]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP2669:%.*]] = load float, ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP4813:%.*]] = fcmp ogt float [[TMP2668]], [[TMP2669]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP4813]], label [[COND_TRUE4815:%.*]], label [[COND_FALSE4816:%.*]]
+// SIMD-ONLY0:       cond.true4815:
+// SIMD-ONLY0-NEXT:    [[TMP2670:%.*]] = load float, ptr [[FE]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END4817:%.*]]
+// SIMD-ONLY0:       cond.false4816:
+// SIMD-ONLY0-NEXT:    [[TMP2671:%.*]] = load float, ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END4817]]
+// SIMD-ONLY0:       cond.end4817:
+// SIMD-ONLY0-NEXT:    [[COND4818:%.*]] = phi float [ [[TMP2670]], [[COND_TRUE4815]] ], [ [[TMP2671]], [[COND_FALSE4816]] ]
+// SIMD-ONLY0-NEXT:    store float [[COND4818]], ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP2672:%.*]] = load float, ptr [[FE]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP2673:%.*]] = load float, ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP4819:%.*]] = fcmp olt float [[TMP2672]], [[TMP2673]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP4819]], label [[COND_TRUE4821:%.*]], label [[COND_FALSE4822:%.*]]
+// SIMD-ONLY0:       cond.true4821:
+// SIMD-ONLY0-NEXT:    [[TMP2674:%.*]] = load float, ptr [[FE]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END4823:%.*]]
+// SIMD-ONLY0:       cond.false4822:
+// SIMD-ONLY0-NEXT:    [[TMP2675:%.*]] = load float, ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END4823]]
+// SIMD-ONLY0:       cond.end4823:
+// SIMD-ONLY0-NEXT:    [[COND4824:%.*]] = phi float [ [[TMP2674]], [[COND_TRUE4821]] ], [ [[TMP2675]], [[COND_FALSE4822]] ]
+// SIMD-ONLY0-NEXT:    store float [[COND4824]], ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP2676:%.*]] = load float, ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP2677:%.*]] = load float, ptr [[FE]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP4825:%.*]] = fcmp ogt float [[TMP2676]], [[TMP2677]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP4825]], label [[IF_THEN4827:%.*]], label [[IF_END4828:%.*]]
+// SIMD-ONLY0:       if.then4827:
+// SIMD-ONLY0-NEXT:    [[TMP2678:%.*]] = load float, ptr [[FE]], align 4
+// SIMD-ONLY0-NEXT:    store float [[TMP2678]], ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END4828]]
+// SIMD-ONLY0:       if.end4828:
+// SIMD-ONLY0-NEXT:    [[TMP2679:%.*]] = load float, ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP2680:%.*]] = load float, ptr [[FE]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP4829:%.*]] = fcmp olt float [[TMP2679]], [[TMP2680]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP4829]], label [[IF_THEN4831:%.*]], label [[IF_END4832:%.*]]
+// SIMD-ONLY0:       if.then4831:
+// SIMD-ONLY0-NEXT:    [[TMP2681:%.*]] = load float, ptr [[FE]], align 4
+// SIMD-ONLY0-NEXT:    store float [[TMP2681]], ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END4832]]
+// SIMD-ONLY0:       if.end4832:
+// SIMD-ONLY0-NEXT:    [[TMP2682:%.*]] = load float, ptr [[FE]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP2683:%.*]] = load float, ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP4833:%.*]] = fcmp ogt float [[TMP2682]], [[TMP2683]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP4833]], label [[IF_THEN4835:%.*]], label [[IF_END4836:%.*]]
+// SIMD-ONLY0:       if.then4835:
+// SIMD-ONLY0-NEXT:    [[TMP2684:%.*]] = load float, ptr [[FE]], align 4
+// SIMD-ONLY0-NEXT:    store float [[TMP2684]], ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END4836]]
+// SIMD-ONLY0:       if.end4836:
+// SIMD-ONLY0-NEXT:    [[TMP2685:%.*]] = load float, ptr [[FE]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP2686:%.*]] = load float, ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP4837:%.*]] = fcmp olt float [[TMP2685]], [[TMP2686]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP4837]], label [[IF_THEN4839:%.*]], label [[IF_END4840:%.*]]
+// SIMD-ONLY0:       if.then4839:
+// SIMD-ONLY0-NEXT:    [[TMP2687:%.*]] = load float, ptr [[FE]], align 4
+// SIMD-ONLY0-NEXT:    store float [[TMP2687]], ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END4840]]
+// SIMD-ONLY0:       if.end4840:
+// SIMD-ONLY0-NEXT:    [[TMP2688:%.*]] = load double, ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2689:%.*]] = load double, ptr [[DE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP4841:%.*]] = fcmp ogt double [[TMP2688]], [[TMP2689]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP4841]], label [[COND_TRUE4843:%.*]], label [[COND_FALSE4844:%.*]]
+// SIMD-ONLY0:       cond.true4843:
+// SIMD-ONLY0-NEXT:    [[TMP2690:%.*]] = load double, ptr [[DE]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END4845:%.*]]
+// SIMD-ONLY0:       cond.false4844:
+// SIMD-ONLY0-NEXT:    [[TMP2691:%.*]] = load double, ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END4845]]
+// SIMD-ONLY0:       cond.end4845:
+// SIMD-ONLY0-NEXT:    [[COND4846:%.*]] = phi double [ [[TMP2690]], [[COND_TRUE4843]] ], [ [[TMP2691]], [[COND_FALSE4844]] ]
+// SIMD-ONLY0-NEXT:    store double [[COND4846]], ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2692:%.*]] = load double, ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2693:%.*]] = load double, ptr [[DE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP4847:%.*]] = fcmp olt double [[TMP2692]], [[TMP2693]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP4847]], label [[COND_TRUE4849:%.*]], label [[COND_FALSE4850:%.*]]
+// SIMD-ONLY0:       cond.true4849:
+// SIMD-ONLY0-NEXT:    [[TMP2694:%.*]] = load double, ptr [[DE]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END4851:%.*]]
+// SIMD-ONLY0:       cond.false4850:
+// SIMD-ONLY0-NEXT:    [[TMP2695:%.*]] = load double, ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END4851]]
+// SIMD-ONLY0:       cond.end4851:
+// SIMD-ONLY0-NEXT:    [[COND4852:%.*]] = phi double [ [[TMP2694]], [[COND_TRUE4849]] ], [ [[TMP2695]], [[COND_FALSE4850]] ]
+// SIMD-ONLY0-NEXT:    store double [[COND4852]], ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2696:%.*]] = load double, ptr [[DE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2697:%.*]] = load double, ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP4853:%.*]] = fcmp ogt double [[TMP2696]], [[TMP2697]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP4853]], label [[COND_TRUE4855:%.*]], label [[COND_FALSE4856:%.*]]
+// SIMD-ONLY0:       cond.true4855:
+// SIMD-ONLY0-NEXT:    [[TMP2698:%.*]] = load double, ptr [[DE]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END4857:%.*]]
+// SIMD-ONLY0:       cond.false4856:
+// SIMD-ONLY0-NEXT:    [[TMP2699:%.*]] = load double, ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END4857]]
+// SIMD-ONLY0:       cond.end4857:
+// SIMD-ONLY0-NEXT:    [[COND4858:%.*]] = phi double [ [[TMP2698]], [[COND_TRUE4855]] ], [ [[TMP2699]], [[COND_FALSE4856]] ]
+// SIMD-ONLY0-NEXT:    store double [[COND4858]], ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2700:%.*]] = load double, ptr [[DE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2701:%.*]] = load double, ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP4859:%.*]] = fcmp olt double [[TMP2700]], [[TMP2701]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP4859]], label [[COND_TRUE4861:%.*]], label [[COND_FALSE4862:%.*]]
+// SIMD-ONLY0:       cond.true4861:
+// SIMD-ONLY0-NEXT:    [[TMP2702:%.*]] = load double, ptr [[DE]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END4863:%.*]]
+// SIMD-ONLY0:       cond.false4862:
+// SIMD-ONLY0-NEXT:    [[TMP2703:%.*]] = load double, ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END4863]]
+// SIMD-ONLY0:       cond.end4863:
+// SIMD-ONLY0-NEXT:    [[COND4864:%.*]] = phi double [ [[TMP2702]], [[COND_TRUE4861]] ], [ [[TMP2703]], [[COND_FALSE4862]] ]
+// SIMD-ONLY0-NEXT:    store double [[COND4864]], ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2704:%.*]] = load double, ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2705:%.*]] = load double, ptr [[DE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP4865:%.*]] = fcmp ogt double [[TMP2704]], [[TMP2705]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP4865]], label [[IF_THEN4867:%.*]], label [[IF_END4868:%.*]]
+// SIMD-ONLY0:       if.then4867:
+// SIMD-ONLY0-NEXT:    [[TMP2706:%.*]] = load double, ptr [[DE]], align 8
+// SIMD-ONLY0-NEXT:    store double [[TMP2706]], ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END4868]]
+// SIMD-ONLY0:       if.end4868:
+// SIMD-ONLY0-NEXT:    [[TMP2707:%.*]] = load double, ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2708:%.*]] = load double, ptr [[DE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP4869:%.*]] = fcmp olt double [[TMP2707]], [[TMP2708]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP4869]], label [[IF_THEN4871:%.*]], label [[IF_END4872:%.*]]
+// SIMD-ONLY0:       if.then4871:
+// SIMD-ONLY0-NEXT:    [[TMP2709:%.*]] = load double, ptr [[DE]], align 8
+// SIMD-ONLY0-NEXT:    store double [[TMP2709]], ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END4872]]
+// SIMD-ONLY0:       if.end4872:
+// SIMD-ONLY0-NEXT:    [[TMP2710:%.*]] = load double, ptr [[DE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2711:%.*]] = load double, ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP4873:%.*]] = fcmp ogt double [[TMP2710]], [[TMP2711]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP4873]], label [[IF_THEN4875:%.*]], label [[IF_END4876:%.*]]
+// SIMD-ONLY0:       if.then4875:
+// SIMD-ONLY0-NEXT:    [[TMP2712:%.*]] = load double, ptr [[DE]], align 8
+// SIMD-ONLY0-NEXT:    store double [[TMP2712]], ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END4876]]
+// SIMD-ONLY0:       if.end4876:
+// SIMD-ONLY0-NEXT:    [[TMP2713:%.*]] = load double, ptr [[DE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2714:%.*]] = load double, ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP4877:%.*]] = fcmp olt double [[TMP2713]], [[TMP2714]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP4877]], label [[IF_THEN4879:%.*]], label [[IF_END4880:%.*]]
+// SIMD-ONLY0:       if.then4879:
+// SIMD-ONLY0-NEXT:    [[TMP2715:%.*]] = load double, ptr [[DE]], align 8
+// SIMD-ONLY0-NEXT:    store double [[TMP2715]], ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END4880]]
+// SIMD-ONLY0:       if.end4880:
+// SIMD-ONLY0-NEXT:    [[TMP2716:%.*]] = load double, ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2717:%.*]] = load double, ptr [[DE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP4881:%.*]] = fcmp ogt double [[TMP2716]], [[TMP2717]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP4881]], label [[COND_TRUE4883:%.*]], label [[COND_FALSE4884:%.*]]
+// SIMD-ONLY0:       cond.true4883:
+// SIMD-ONLY0-NEXT:    [[TMP2718:%.*]] = load double, ptr [[DE]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END4885:%.*]]
+// SIMD-ONLY0:       cond.false4884:
+// SIMD-ONLY0-NEXT:    [[TMP2719:%.*]] = load double, ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END4885]]
+// SIMD-ONLY0:       cond.end4885:
+// SIMD-ONLY0-NEXT:    [[COND4886:%.*]] = phi double [ [[TMP2718]], [[COND_TRUE4883]] ], [ [[TMP2719]], [[COND_FALSE4884]] ]
+// SIMD-ONLY0-NEXT:    store double [[COND4886]], ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2720:%.*]] = load double, ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2721:%.*]] = load double, ptr [[DE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP4887:%.*]] = fcmp olt double [[TMP2720]], [[TMP2721]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP4887]], label [[COND_TRUE4889:%.*]], label [[COND_FALSE4890:%.*]]
+// SIMD-ONLY0:       cond.true4889:
+// SIMD-ONLY0-NEXT:    [[TMP2722:%.*]] = load double, ptr [[DE]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END4891:%.*]]
+// SIMD-ONLY0:       cond.false4890:
+// SIMD-ONLY0-NEXT:    [[TMP2723:%.*]] = load double, ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END4891]]
+// SIMD-ONLY0:       cond.end4891:
+// SIMD-ONLY0-NEXT:    [[COND4892:%.*]] = phi double [ [[TMP2722]], [[COND_TRUE4889]] ], [ [[TMP2723]], [[COND_FALSE4890]] ]
+// SIMD-ONLY0-NEXT:    store double [[COND4892]], ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2724:%.*]] = load double, ptr [[DE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2725:%.*]] = load double, ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP4893:%.*]] = fcmp ogt double [[TMP2724]], [[TMP2725]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP4893]], label [[COND_TRUE4895:%.*]], label [[COND_FALSE4896:%.*]]
+// SIMD-ONLY0:       cond.true4895:
+// SIMD-ONLY0-NEXT:    [[TMP2726:%.*]] = load double, ptr [[DE]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END4897:%.*]]
+// SIMD-ONLY0:       cond.false4896:
+// SIMD-ONLY0-NEXT:    [[TMP2727:%.*]] = load double, ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END4897]]
+// SIMD-ONLY0:       cond.end4897:
+// SIMD-ONLY0-NEXT:    [[COND4898:%.*]] = phi double [ [[TMP2726]], [[COND_TRUE4895]] ], [ [[TMP2727]], [[COND_FALSE4896]] ]
+// SIMD-ONLY0-NEXT:    store double [[COND4898]], ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2728:%.*]] = load double, ptr [[DE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2729:%.*]] = load double, ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP4899:%.*]] = fcmp olt double [[TMP2728]], [[TMP2729]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP4899]], label [[COND_TRUE4901:%.*]], label [[COND_FALSE4902:%.*]]
+// SIMD-ONLY0:       cond.true4901:
+// SIMD-ONLY0-NEXT:    [[TMP2730:%.*]] = load double, ptr [[DE]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END4903:%.*]]
+// SIMD-ONLY0:       cond.false4902:
+// SIMD-ONLY0-NEXT:    [[TMP2731:%.*]] = load double, ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END4903]]
+// SIMD-ONLY0:       cond.end4903:
+// SIMD-ONLY0-NEXT:    [[COND4904:%.*]] = phi double [ [[TMP2730]], [[COND_TRUE4901]] ], [ [[TMP2731]], [[COND_FALSE4902]] ]
+// SIMD-ONLY0-NEXT:    store double [[COND4904]], ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2732:%.*]] = load double, ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2733:%.*]] = load double, ptr [[DE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP4905:%.*]] = fcmp ogt double [[TMP2732]], [[TMP2733]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP4905]], label [[IF_THEN4907:%.*]], label [[IF_END4908:%.*]]
+// SIMD-ONLY0:       if.then4907:
+// SIMD-ONLY0-NEXT:    [[TMP2734:%.*]] = load double, ptr [[DE]], align 8
+// SIMD-ONLY0-NEXT:    store double [[TMP2734]], ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END4908]]
+// SIMD-ONLY0:       if.end4908:
+// SIMD-ONLY0-NEXT:    [[TMP2735:%.*]] = load double, ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2736:%.*]] = load double, ptr [[DE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP4909:%.*]] = fcmp olt double [[TMP2735]], [[TMP2736]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP4909]], label [[IF_THEN4911:%.*]], label [[IF_END4912:%.*]]
+// SIMD-ONLY0:       if.then4911:
+// SIMD-ONLY0-NEXT:    [[TMP2737:%.*]] = load double, ptr [[DE]], align 8
+// SIMD-ONLY0-NEXT:    store double [[TMP2737]], ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END4912]]
+// SIMD-ONLY0:       if.end4912:
+// SIMD-ONLY0-NEXT:    [[TMP2738:%.*]] = load double, ptr [[DE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2739:%.*]] = load double, ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP4913:%.*]] = fcmp ogt double [[TMP2738]], [[TMP2739]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP4913]], label [[IF_THEN4915:%.*]], label [[IF_END4916:%.*]]
+// SIMD-ONLY0:       if.then4915:
+// SIMD-ONLY0-NEXT:    [[TMP2740:%.*]] = load double, ptr [[DE]], align 8
+// SIMD-ONLY0-NEXT:    store double [[TMP2740]], ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END4916]]
+// SIMD-ONLY0:       if.end4916:
+// SIMD-ONLY0-NEXT:    [[TMP2741:%.*]] = load double, ptr [[DE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2742:%.*]] = load double, ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP4917:%.*]] = fcmp olt double [[TMP2741]], [[TMP2742]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP4917]], label [[IF_THEN4919:%.*]], label [[IF_END4920:%.*]]
+// SIMD-ONLY0:       if.then4919:
+// SIMD-ONLY0-NEXT:    [[TMP2743:%.*]] = load double, ptr [[DE]], align 8
+// SIMD-ONLY0-NEXT:    store double [[TMP2743]], ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END4920]]
+// SIMD-ONLY0:       if.end4920:
+// SIMD-ONLY0-NEXT:    [[TMP2744:%.*]] = load double, ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2745:%.*]] = load double, ptr [[DE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP4921:%.*]] = fcmp ogt double [[TMP2744]], [[TMP2745]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP4921]], label [[COND_TRUE4923:%.*]], label [[COND_FALSE4924:%.*]]
+// SIMD-ONLY0:       cond.true4923:
+// SIMD-ONLY0-NEXT:    [[TMP2746:%.*]] = load double, ptr [[DE]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END4925:%.*]]
+// SIMD-ONLY0:       cond.false4924:
+// SIMD-ONLY0-NEXT:    [[TMP2747:%.*]] = load double, ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END4925]]
+// SIMD-ONLY0:       cond.end4925:
+// SIMD-ONLY0-NEXT:    [[COND4926:%.*]] = phi double [ [[TMP2746]], [[COND_TRUE4923]] ], [ [[TMP2747]], [[COND_FALSE4924]] ]
+// SIMD-ONLY0-NEXT:    store double [[COND4926]], ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2748:%.*]] = load double, ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2749:%.*]] = load double, ptr [[DE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP4927:%.*]] = fcmp olt double [[TMP2748]], [[TMP2749]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP4927]], label [[COND_TRUE4929:%.*]], label [[COND_FALSE4930:%.*]]
+// SIMD-ONLY0:       cond.true4929:
+// SIMD-ONLY0-NEXT:    [[TMP2750:%.*]] = load double, ptr [[DE]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END4931:%.*]]
+// SIMD-ONLY0:       cond.false4930:
+// SIMD-ONLY0-NEXT:    [[TMP2751:%.*]] = load double, ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END4931]]
+// SIMD-ONLY0:       cond.end4931:
+// SIMD-ONLY0-NEXT:    [[COND4932:%.*]] = phi double [ [[TMP2750]], [[COND_TRUE4929]] ], [ [[TMP2751]], [[COND_FALSE4930]] ]
+// SIMD-ONLY0-NEXT:    store double [[COND4932]], ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2752:%.*]] = load double, ptr [[DE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2753:%.*]] = load double, ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP4933:%.*]] = fcmp ogt double [[TMP2752]], [[TMP2753]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP4933]], label [[COND_TRUE4935:%.*]], label [[COND_FALSE4936:%.*]]
+// SIMD-ONLY0:       cond.true4935:
+// SIMD-ONLY0-NEXT:    [[TMP2754:%.*]] = load double, ptr [[DE]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END4937:%.*]]
+// SIMD-ONLY0:       cond.false4936:
+// SIMD-ONLY0-NEXT:    [[TMP2755:%.*]] = load double, ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END4937]]
+// SIMD-ONLY0:       cond.end4937:
+// SIMD-ONLY0-NEXT:    [[COND4938:%.*]] = phi double [ [[TMP2754]], [[COND_TRUE4935]] ], [ [[TMP2755]], [[COND_FALSE4936]] ]
+// SIMD-ONLY0-NEXT:    store double [[COND4938]], ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2756:%.*]] = load double, ptr [[DE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2757:%.*]] = load double, ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP4939:%.*]] = fcmp olt double [[TMP2756]], [[TMP2757]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP4939]], label [[COND_TRUE4941:%.*]], label [[COND_FALSE4942:%.*]]
+// SIMD-ONLY0:       cond.true4941:
+// SIMD-ONLY0-NEXT:    [[TMP2758:%.*]] = load double, ptr [[DE]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END4943:%.*]]
+// SIMD-ONLY0:       cond.false4942:
+// SIMD-ONLY0-NEXT:    [[TMP2759:%.*]] = load double, ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END4943]]
+// SIMD-ONLY0:       cond.end4943:
+// SIMD-ONLY0-NEXT:    [[COND4944:%.*]] = phi double [ [[TMP2758]], [[COND_TRUE4941]] ], [ [[TMP2759]], [[COND_FALSE4942]] ]
+// SIMD-ONLY0-NEXT:    store double [[COND4944]], ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2760:%.*]] = load double, ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2761:%.*]] = load double, ptr [[DE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP4945:%.*]] = fcmp ogt double [[TMP2760]], [[TMP2761]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP4945]], label [[IF_THEN4947:%.*]], label [[IF_END4948:%.*]]
+// SIMD-ONLY0:       if.then4947:
+// SIMD-ONLY0-NEXT:    [[TMP2762:%.*]] = load double, ptr [[DE]], align 8
+// SIMD-ONLY0-NEXT:    store double [[TMP2762]], ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END4948]]
+// SIMD-ONLY0:       if.end4948:
+// SIMD-ONLY0-NEXT:    [[TMP2763:%.*]] = load double, ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2764:%.*]] = load double, ptr [[DE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP4949:%.*]] = fcmp olt double [[TMP2763]], [[TMP2764]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP4949]], label [[IF_THEN4951:%.*]], label [[IF_END4952:%.*]]
+// SIMD-ONLY0:       if.then4951:
+// SIMD-ONLY0-NEXT:    [[TMP2765:%.*]] = load double, ptr [[DE]], align 8
+// SIMD-ONLY0-NEXT:    store double [[TMP2765]], ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END4952]]
+// SIMD-ONLY0:       if.end4952:
+// SIMD-ONLY0-NEXT:    [[TMP2766:%.*]] = load double, ptr [[DE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2767:%.*]] = load double, ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP4953:%.*]] = fcmp ogt double [[TMP2766]], [[TMP2767]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP4953]], label [[IF_THEN4955:%.*]], label [[IF_END4956:%.*]]
+// SIMD-ONLY0:       if.then4955:
+// SIMD-ONLY0-NEXT:    [[TMP2768:%.*]] = load double, ptr [[DE]], align 8
+// SIMD-ONLY0-NEXT:    store double [[TMP2768]], ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END4956]]
+// SIMD-ONLY0:       if.end4956:
+// SIMD-ONLY0-NEXT:    [[TMP2769:%.*]] = load double, ptr [[DE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2770:%.*]] = load double, ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP4957:%.*]] = fcmp olt double [[TMP2769]], [[TMP2770]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP4957]], label [[IF_THEN4959:%.*]], label [[IF_END4960:%.*]]
+// SIMD-ONLY0:       if.then4959:
+// SIMD-ONLY0-NEXT:    [[TMP2771:%.*]] = load double, ptr [[DE]], align 8
+// SIMD-ONLY0-NEXT:    store double [[TMP2771]], ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END4960]]
+// SIMD-ONLY0:       if.end4960:
+// SIMD-ONLY0-NEXT:    [[TMP2772:%.*]] = load double, ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2773:%.*]] = load double, ptr [[DE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP4961:%.*]] = fcmp ogt double [[TMP2772]], [[TMP2773]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP4961]], label [[COND_TRUE4963:%.*]], label [[COND_FALSE4964:%.*]]
+// SIMD-ONLY0:       cond.true4963:
+// SIMD-ONLY0-NEXT:    [[TMP2774:%.*]] = load double, ptr [[DE]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END4965:%.*]]
+// SIMD-ONLY0:       cond.false4964:
+// SIMD-ONLY0-NEXT:    [[TMP2775:%.*]] = load double, ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END4965]]
+// SIMD-ONLY0:       cond.end4965:
+// SIMD-ONLY0-NEXT:    [[COND4966:%.*]] = phi double [ [[TMP2774]], [[COND_TRUE4963]] ], [ [[TMP2775]], [[COND_FALSE4964]] ]
+// SIMD-ONLY0-NEXT:    store double [[COND4966]], ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2776:%.*]] = load double, ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2777:%.*]] = load double, ptr [[DE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP4967:%.*]] = fcmp olt double [[TMP2776]], [[TMP2777]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP4967]], label [[COND_TRUE4969:%.*]], label [[COND_FALSE4970:%.*]]
+// SIMD-ONLY0:       cond.true4969:
+// SIMD-ONLY0-NEXT:    [[TMP2778:%.*]] = load double, ptr [[DE]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END4971:%.*]]
+// SIMD-ONLY0:       cond.false4970:
+// SIMD-ONLY0-NEXT:    [[TMP2779:%.*]] = load double, ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END4971]]
+// SIMD-ONLY0:       cond.end4971:
+// SIMD-ONLY0-NEXT:    [[COND4972:%.*]] = phi double [ [[TMP2778]], [[COND_TRUE4969]] ], [ [[TMP2779]], [[COND_FALSE4970]] ]
+// SIMD-ONLY0-NEXT:    store double [[COND4972]], ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2780:%.*]] = load double, ptr [[DE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2781:%.*]] = load double, ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP4973:%.*]] = fcmp ogt double [[TMP2780]], [[TMP2781]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP4973]], label [[COND_TRUE4975:%.*]], label [[COND_FALSE4976:%.*]]
+// SIMD-ONLY0:       cond.true4975:
+// SIMD-ONLY0-NEXT:    [[TMP2782:%.*]] = load double, ptr [[DE]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END4977:%.*]]
+// SIMD-ONLY0:       cond.false4976:
+// SIMD-ONLY0-NEXT:    [[TMP2783:%.*]] = load double, ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END4977]]
+// SIMD-ONLY0:       cond.end4977:
+// SIMD-ONLY0-NEXT:    [[COND4978:%.*]] = phi double [ [[TMP2782]], [[COND_TRUE4975]] ], [ [[TMP2783]], [[COND_FALSE4976]] ]
+// SIMD-ONLY0-NEXT:    store double [[COND4978]], ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2784:%.*]] = load double, ptr [[DE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2785:%.*]] = load double, ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP4979:%.*]] = fcmp olt double [[TMP2784]], [[TMP2785]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP4979]], label [[COND_TRUE4981:%.*]], label [[COND_FALSE4982:%.*]]
+// SIMD-ONLY0:       cond.true4981:
+// SIMD-ONLY0-NEXT:    [[TMP2786:%.*]] = load double, ptr [[DE]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END4983:%.*]]
+// SIMD-ONLY0:       cond.false4982:
+// SIMD-ONLY0-NEXT:    [[TMP2787:%.*]] = load double, ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END4983]]
+// SIMD-ONLY0:       cond.end4983:
+// SIMD-ONLY0-NEXT:    [[COND4984:%.*]] = phi double [ [[TMP2786]], [[COND_TRUE4981]] ], [ [[TMP2787]], [[COND_FALSE4982]] ]
+// SIMD-ONLY0-NEXT:    store double [[COND4984]], ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2788:%.*]] = load double, ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2789:%.*]] = load double, ptr [[DE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP4985:%.*]] = fcmp ogt double [[TMP2788]], [[TMP2789]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP4985]], label [[IF_THEN4987:%.*]], label [[IF_END4988:%.*]]
+// SIMD-ONLY0:       if.then4987:
+// SIMD-ONLY0-NEXT:    [[TMP2790:%.*]] = load double, ptr [[DE]], align 8
+// SIMD-ONLY0-NEXT:    store double [[TMP2790]], ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END4988]]
+// SIMD-ONLY0:       if.end4988:
+// SIMD-ONLY0-NEXT:    [[TMP2791:%.*]] = load double, ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2792:%.*]] = load double, ptr [[DE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP4989:%.*]] = fcmp olt double [[TMP2791]], [[TMP2792]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP4989]], label [[IF_THEN4991:%.*]], label [[IF_END4992:%.*]]
+// SIMD-ONLY0:       if.then4991:
+// SIMD-ONLY0-NEXT:    [[TMP2793:%.*]] = load double, ptr [[DE]], align 8
+// SIMD-ONLY0-NEXT:    store double [[TMP2793]], ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END4992]]
+// SIMD-ONLY0:       if.end4992:
+// SIMD-ONLY0-NEXT:    [[TMP2794:%.*]] = load double, ptr [[DE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2795:%.*]] = load double, ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP4993:%.*]] = fcmp ogt double [[TMP2794]], [[TMP2795]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP4993]], label [[IF_THEN4995:%.*]], label [[IF_END4996:%.*]]
+// SIMD-ONLY0:       if.then4995:
+// SIMD-ONLY0-NEXT:    [[TMP2796:%.*]] = load double, ptr [[DE]], align 8
+// SIMD-ONLY0-NEXT:    store double [[TMP2796]], ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END4996]]
+// SIMD-ONLY0:       if.end4996:
+// SIMD-ONLY0-NEXT:    [[TMP2797:%.*]] = load double, ptr [[DE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2798:%.*]] = load double, ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP4997:%.*]] = fcmp olt double [[TMP2797]], [[TMP2798]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP4997]], label [[IF_THEN4999:%.*]], label [[IF_END5000:%.*]]
+// SIMD-ONLY0:       if.then4999:
+// SIMD-ONLY0-NEXT:    [[TMP2799:%.*]] = load double, ptr [[DE]], align 8
+// SIMD-ONLY0-NEXT:    store double [[TMP2799]], ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END5000]]
+// SIMD-ONLY0:       if.end5000:
+// SIMD-ONLY0-NEXT:    [[TMP2800:%.*]] = load double, ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2801:%.*]] = load double, ptr [[DE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP5001:%.*]] = fcmp ogt double [[TMP2800]], [[TMP2801]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP5001]], label [[COND_TRUE5003:%.*]], label [[COND_FALSE5004:%.*]]
+// SIMD-ONLY0:       cond.true5003:
+// SIMD-ONLY0-NEXT:    [[TMP2802:%.*]] = load double, ptr [[DE]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END5005:%.*]]
+// SIMD-ONLY0:       cond.false5004:
+// SIMD-ONLY0-NEXT:    [[TMP2803:%.*]] = load double, ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END5005]]
+// SIMD-ONLY0:       cond.end5005:
+// SIMD-ONLY0-NEXT:    [[COND5006:%.*]] = phi double [ [[TMP2802]], [[COND_TRUE5003]] ], [ [[TMP2803]], [[COND_FALSE5004]] ]
+// SIMD-ONLY0-NEXT:    store double [[COND5006]], ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2804:%.*]] = load double, ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2805:%.*]] = load double, ptr [[DE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP5007:%.*]] = fcmp olt double [[TMP2804]], [[TMP2805]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP5007]], label [[COND_TRUE5009:%.*]], label [[COND_FALSE5010:%.*]]
+// SIMD-ONLY0:       cond.true5009:
+// SIMD-ONLY0-NEXT:    [[TMP2806:%.*]] = load double, ptr [[DE]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END5011:%.*]]
+// SIMD-ONLY0:       cond.false5010:
+// SIMD-ONLY0-NEXT:    [[TMP2807:%.*]] = load double, ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END5011]]
+// SIMD-ONLY0:       cond.end5011:
+// SIMD-ONLY0-NEXT:    [[COND5012:%.*]] = phi double [ [[TMP2806]], [[COND_TRUE5009]] ], [ [[TMP2807]], [[COND_FALSE5010]] ]
+// SIMD-ONLY0-NEXT:    store double [[COND5012]], ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2808:%.*]] = load double, ptr [[DE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2809:%.*]] = load double, ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP5013:%.*]] = fcmp ogt double [[TMP2808]], [[TMP2809]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP5013]], label [[COND_TRUE5015:%.*]], label [[COND_FALSE5016:%.*]]
+// SIMD-ONLY0:       cond.true5015:
+// SIMD-ONLY0-NEXT:    [[TMP2810:%.*]] = load double, ptr [[DE]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END5017:%.*]]
+// SIMD-ONLY0:       cond.false5016:
+// SIMD-ONLY0-NEXT:    [[TMP2811:%.*]] = load double, ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END5017]]
+// SIMD-ONLY0:       cond.end5017:
+// SIMD-ONLY0-NEXT:    [[COND5018:%.*]] = phi double [ [[TMP2810]], [[COND_TRUE5015]] ], [ [[TMP2811]], [[COND_FALSE5016]] ]
+// SIMD-ONLY0-NEXT:    store double [[COND5018]], ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2812:%.*]] = load double, ptr [[DE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2813:%.*]] = load double, ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP5019:%.*]] = fcmp olt double [[TMP2812]], [[TMP2813]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP5019]], label [[COND_TRUE5021:%.*]], label [[COND_FALSE5022:%.*]]
+// SIMD-ONLY0:       cond.true5021:
+// SIMD-ONLY0-NEXT:    [[TMP2814:%.*]] = load double, ptr [[DE]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END5023:%.*]]
+// SIMD-ONLY0:       cond.false5022:
+// SIMD-ONLY0-NEXT:    [[TMP2815:%.*]] = load double, ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END5023]]
+// SIMD-ONLY0:       cond.end5023:
+// SIMD-ONLY0-NEXT:    [[COND5024:%.*]] = phi double [ [[TMP2814]], [[COND_TRUE5021]] ], [ [[TMP2815]], [[COND_FALSE5022]] ]
+// SIMD-ONLY0-NEXT:    store double [[COND5024]], ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2816:%.*]] = load double, ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2817:%.*]] = load double, ptr [[DE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP5025:%.*]] = fcmp ogt double [[TMP2816]], [[TMP2817]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP5025]], label [[IF_THEN5027:%.*]], label [[IF_END5028:%.*]]
+// SIMD-ONLY0:       if.then5027:
+// SIMD-ONLY0-NEXT:    [[TMP2818:%.*]] = load double, ptr [[DE]], align 8
+// SIMD-ONLY0-NEXT:    store double [[TMP2818]], ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END5028]]
+// SIMD-ONLY0:       if.end5028:
+// SIMD-ONLY0-NEXT:    [[TMP2819:%.*]] = load double, ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2820:%.*]] = load double, ptr [[DE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP5029:%.*]] = fcmp olt double [[TMP2819]], [[TMP2820]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP5029]], label [[IF_THEN5031:%.*]], label [[IF_END5032:%.*]]
+// SIMD-ONLY0:       if.then5031:
+// SIMD-ONLY0-NEXT:    [[TMP2821:%.*]] = load double, ptr [[DE]], align 8
+// SIMD-ONLY0-NEXT:    store double [[TMP2821]], ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END5032]]
+// SIMD-ONLY0:       if.end5032:
+// SIMD-ONLY0-NEXT:    [[TMP2822:%.*]] = load double, ptr [[DE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2823:%.*]] = load double, ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP5033:%.*]] = fcmp ogt double [[TMP2822]], [[TMP2823]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP5033]], label [[IF_THEN5035:%.*]], label [[IF_END5036:%.*]]
+// SIMD-ONLY0:       if.then5035:
+// SIMD-ONLY0-NEXT:    [[TMP2824:%.*]] = load double, ptr [[DE]], align 8
+// SIMD-ONLY0-NEXT:    store double [[TMP2824]], ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END5036]]
+// SIMD-ONLY0:       if.end5036:
+// SIMD-ONLY0-NEXT:    [[TMP2825:%.*]] = load double, ptr [[DE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2826:%.*]] = load double, ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP5037:%.*]] = fcmp olt double [[TMP2825]], [[TMP2826]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP5037]], label [[IF_THEN5039:%.*]], label [[IF_END5040:%.*]]
+// SIMD-ONLY0:       if.then5039:
+// SIMD-ONLY0-NEXT:    [[TMP2827:%.*]] = load double, ptr [[DE]], align 8
+// SIMD-ONLY0-NEXT:    store double [[TMP2827]], ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END5040]]
+// SIMD-ONLY0:       if.end5040:
+// SIMD-ONLY0-NEXT:    [[TMP2828:%.*]] = load double, ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2829:%.*]] = load double, ptr [[DE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP5041:%.*]] = fcmp ogt double [[TMP2828]], [[TMP2829]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP5041]], label [[COND_TRUE5043:%.*]], label [[COND_FALSE5044:%.*]]
+// SIMD-ONLY0:       cond.true5043:
+// SIMD-ONLY0-NEXT:    [[TMP2830:%.*]] = load double, ptr [[DE]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END5045:%.*]]
+// SIMD-ONLY0:       cond.false5044:
+// SIMD-ONLY0-NEXT:    [[TMP2831:%.*]] = load double, ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END5045]]
+// SIMD-ONLY0:       cond.end5045:
+// SIMD-ONLY0-NEXT:    [[COND5046:%.*]] = phi double [ [[TMP2830]], [[COND_TRUE5043]] ], [ [[TMP2831]], [[COND_FALSE5044]] ]
+// SIMD-ONLY0-NEXT:    store double [[COND5046]], ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2832:%.*]] = load double, ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2833:%.*]] = load double, ptr [[DE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP5047:%.*]] = fcmp olt double [[TMP2832]], [[TMP2833]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP5047]], label [[COND_TRUE5049:%.*]], label [[COND_FALSE5050:%.*]]
+// SIMD-ONLY0:       cond.true5049:
+// SIMD-ONLY0-NEXT:    [[TMP2834:%.*]] = load double, ptr [[DE]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END5051:%.*]]
+// SIMD-ONLY0:       cond.false5050:
+// SIMD-ONLY0-NEXT:    [[TMP2835:%.*]] = load double, ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END5051]]
+// SIMD-ONLY0:       cond.end5051:
+// SIMD-ONLY0-NEXT:    [[COND5052:%.*]] = phi double [ [[TMP2834]], [[COND_TRUE5049]] ], [ [[TMP2835]], [[COND_FALSE5050]] ]
+// SIMD-ONLY0-NEXT:    store double [[COND5052]], ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2836:%.*]] = load double, ptr [[DE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2837:%.*]] = load double, ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP5053:%.*]] = fcmp ogt double [[TMP2836]], [[TMP2837]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP5053]], label [[COND_TRUE5055:%.*]], label [[COND_FALSE5056:%.*]]
+// SIMD-ONLY0:       cond.true5055:
+// SIMD-ONLY0-NEXT:    [[TMP2838:%.*]] = load double, ptr [[DE]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END5057:%.*]]
+// SIMD-ONLY0:       cond.false5056:
+// SIMD-ONLY0-NEXT:    [[TMP2839:%.*]] = load double, ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END5057]]
+// SIMD-ONLY0:       cond.end5057:
+// SIMD-ONLY0-NEXT:    [[COND5058:%.*]] = phi double [ [[TMP2838]], [[COND_TRUE5055]] ], [ [[TMP2839]], [[COND_FALSE5056]] ]
+// SIMD-ONLY0-NEXT:    store double [[COND5058]], ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2840:%.*]] = load double, ptr [[DE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2841:%.*]] = load double, ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP5059:%.*]] = fcmp olt double [[TMP2840]], [[TMP2841]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP5059]], label [[COND_TRUE5061:%.*]], label [[COND_FALSE5062:%.*]]
+// SIMD-ONLY0:       cond.true5061:
+// SIMD-ONLY0-NEXT:    [[TMP2842:%.*]] = load double, ptr [[DE]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END5063:%.*]]
+// SIMD-ONLY0:       cond.false5062:
+// SIMD-ONLY0-NEXT:    [[TMP2843:%.*]] = load double, ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END5063]]
+// SIMD-ONLY0:       cond.end5063:
+// SIMD-ONLY0-NEXT:    [[COND5064:%.*]] = phi double [ [[TMP2842]], [[COND_TRUE5061]] ], [ [[TMP2843]], [[COND_FALSE5062]] ]
+// SIMD-ONLY0-NEXT:    store double [[COND5064]], ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2844:%.*]] = load double, ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2845:%.*]] = load double, ptr [[DE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP5065:%.*]] = fcmp ogt double [[TMP2844]], [[TMP2845]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP5065]], label [[IF_THEN5067:%.*]], label [[IF_END5068:%.*]]
+// SIMD-ONLY0:       if.then5067:
+// SIMD-ONLY0-NEXT:    [[TMP2846:%.*]] = load double, ptr [[DE]], align 8
+// SIMD-ONLY0-NEXT:    store double [[TMP2846]], ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END5068]]
+// SIMD-ONLY0:       if.end5068:
+// SIMD-ONLY0-NEXT:    [[TMP2847:%.*]] = load double, ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2848:%.*]] = load double, ptr [[DE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP5069:%.*]] = fcmp olt double [[TMP2847]], [[TMP2848]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP5069]], label [[IF_THEN5071:%.*]], label [[IF_END5072:%.*]]
+// SIMD-ONLY0:       if.then5071:
+// SIMD-ONLY0-NEXT:    [[TMP2849:%.*]] = load double, ptr [[DE]], align 8
+// SIMD-ONLY0-NEXT:    store double [[TMP2849]], ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END5072]]
+// SIMD-ONLY0:       if.end5072:
+// SIMD-ONLY0-NEXT:    [[TMP2850:%.*]] = load double, ptr [[DE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2851:%.*]] = load double, ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP5073:%.*]] = fcmp ogt double [[TMP2850]], [[TMP2851]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP5073]], label [[IF_THEN5075:%.*]], label [[IF_END5076:%.*]]
+// SIMD-ONLY0:       if.then5075:
+// SIMD-ONLY0-NEXT:    [[TMP2852:%.*]] = load double, ptr [[DE]], align 8
+// SIMD-ONLY0-NEXT:    store double [[TMP2852]], ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END5076]]
+// SIMD-ONLY0:       if.end5076:
+// SIMD-ONLY0-NEXT:    [[TMP2853:%.*]] = load double, ptr [[DE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2854:%.*]] = load double, ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP5077:%.*]] = fcmp olt double [[TMP2853]], [[TMP2854]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP5077]], label [[IF_THEN5079:%.*]], label [[IF_END5080:%.*]]
+// SIMD-ONLY0:       if.then5079:
+// SIMD-ONLY0-NEXT:    [[TMP2855:%.*]] = load double, ptr [[DE]], align 8
+// SIMD-ONLY0-NEXT:    store double [[TMP2855]], ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END5080]]
+// SIMD-ONLY0:       if.end5080:
+// SIMD-ONLY0-NEXT:    ret void
+//
+//
+// SIMD-ONLY0-LABEL: @bar(
+// SIMD-ONLY0-NEXT:  entry:
+// SIMD-ONLY0-NEXT:    [[CX:%.*]] = alloca i8, align 1
+// SIMD-ONLY0-NEXT:    [[CV:%.*]] = alloca i8, align 1
+// SIMD-ONLY0-NEXT:    [[CR:%.*]] = alloca i8, align 1
+// SIMD-ONLY0-NEXT:    [[CE:%.*]] = alloca i8, align 1
+// SIMD-ONLY0-NEXT:    [[CD:%.*]] = alloca i8, align 1
+// SIMD-ONLY0-NEXT:    [[UCX:%.*]] = alloca i8, align 1
+// SIMD-ONLY0-NEXT:    [[UCV:%.*]] = alloca i8, align 1
+// SIMD-ONLY0-NEXT:    [[UCR:%.*]] = alloca i8, align 1
+// SIMD-ONLY0-NEXT:    [[UCE:%.*]] = alloca i8, align 1
+// SIMD-ONLY0-NEXT:    [[UCD:%.*]] = alloca i8, align 1
+// SIMD-ONLY0-NEXT:    [[SX:%.*]] = alloca i16, align 2
+// SIMD-ONLY0-NEXT:    [[SV:%.*]] = alloca i16, align 2
+// SIMD-ONLY0-NEXT:    [[SR:%.*]] = alloca i16, align 2
+// SIMD-ONLY0-NEXT:    [[SE:%.*]] = alloca i16, align 2
+// SIMD-ONLY0-NEXT:    [[SD:%.*]] = alloca i16, align 2
+// SIMD-ONLY0-NEXT:    [[USX:%.*]] = alloca i16, align 2
+// SIMD-ONLY0-NEXT:    [[USV:%.*]] = alloca i16, align 2
+// SIMD-ONLY0-NEXT:    [[USR:%.*]] = alloca i16, align 2
+// SIMD-ONLY0-NEXT:    [[USE:%.*]] = alloca i16, align 2
+// SIMD-ONLY0-NEXT:    [[USD:%.*]] = alloca i16, align 2
+// SIMD-ONLY0-NEXT:    [[IX:%.*]] = alloca i32, align 4
+// SIMD-ONLY0-NEXT:    [[IV:%.*]] = alloca i32, align 4
+// SIMD-ONLY0-NEXT:    [[IR:%.*]] = alloca i32, align 4
+// SIMD-ONLY0-NEXT:    [[IE:%.*]] = alloca i32, align 4
+// SIMD-ONLY0-NEXT:    [[ID:%.*]] = alloca i32, align 4
+// SIMD-ONLY0-NEXT:    [[UIX:%.*]] = alloca i32, align 4
+// SIMD-ONLY0-NEXT:    [[UIV:%.*]] = alloca i32, align 4
+// SIMD-ONLY0-NEXT:    [[UIR:%.*]] = alloca i32, align 4
+// SIMD-ONLY0-NEXT:    [[UIE:%.*]] = alloca i32, align 4
+// SIMD-ONLY0-NEXT:    [[UID:%.*]] = alloca i32, align 4
+// SIMD-ONLY0-NEXT:    [[LX:%.*]] = alloca i64, align 8
+// SIMD-ONLY0-NEXT:    [[LV:%.*]] = alloca i64, align 8
+// SIMD-ONLY0-NEXT:    [[LR:%.*]] = alloca i64, align 8
+// SIMD-ONLY0-NEXT:    [[LE:%.*]] = alloca i64, align 8
+// SIMD-ONLY0-NEXT:    [[LD:%.*]] = alloca i64, align 8
+// SIMD-ONLY0-NEXT:    [[ULX:%.*]] = alloca i64, align 8
+// SIMD-ONLY0-NEXT:    [[ULV:%.*]] = alloca i64, align 8
+// SIMD-ONLY0-NEXT:    [[ULR:%.*]] = alloca i64, align 8
+// SIMD-ONLY0-NEXT:    [[ULE:%.*]] = alloca i64, align 8
+// SIMD-ONLY0-NEXT:    [[ULD:%.*]] = alloca i64, align 8
+// SIMD-ONLY0-NEXT:    [[LLX:%.*]] = alloca i64, align 8
+// SIMD-ONLY0-NEXT:    [[LLV:%.*]] = alloca i64, align 8
+// SIMD-ONLY0-NEXT:    [[LLR:%.*]] = alloca i64, align 8
+// SIMD-ONLY0-NEXT:    [[LLE:%.*]] = alloca i64, align 8
+// SIMD-ONLY0-NEXT:    [[LLD:%.*]] = alloca i64, align 8
+// SIMD-ONLY0-NEXT:    [[ULLX:%.*]] = alloca i64, align 8
+// SIMD-ONLY0-NEXT:    [[ULLV:%.*]] = alloca i64, align 8
+// SIMD-ONLY0-NEXT:    [[ULLR:%.*]] = alloca i64, align 8
+// SIMD-ONLY0-NEXT:    [[ULLE:%.*]] = alloca i64, align 8
+// SIMD-ONLY0-NEXT:    [[ULLD:%.*]] = alloca i64, align 8
+// SIMD-ONLY0-NEXT:    [[FX:%.*]] = alloca float, align 4
+// SIMD-ONLY0-NEXT:    [[FV:%.*]] = alloca float, align 4
+// SIMD-ONLY0-NEXT:    [[FE:%.*]] = alloca float, align 4
+// SIMD-ONLY0-NEXT:    [[FD:%.*]] = alloca float, align 4
+// SIMD-ONLY0-NEXT:    [[DX:%.*]] = alloca double, align 8
+// SIMD-ONLY0-NEXT:    [[DV:%.*]] = alloca double, align 8
+// SIMD-ONLY0-NEXT:    [[DE:%.*]] = alloca double, align 8
+// SIMD-ONLY0-NEXT:    [[DD:%.*]] = alloca double, align 8
+// SIMD-ONLY0-NEXT:    [[TMP0:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP0]], ptr [[CV]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP1:%.*]] = load i8, ptr [[CE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV:%.*]] = sext i8 [[TMP1]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP2:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV1:%.*]] = sext i8 [[TMP2]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP:%.*]] = icmp sgt i32 [[CONV]], [[CONV1]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_END:%.*]]
+// SIMD-ONLY0:       if.then:
+// SIMD-ONLY0-NEXT:    [[TMP3:%.*]] = load i8, ptr [[CE]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP3]], ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    br label [[IF_END]]
+// SIMD-ONLY0:       if.end:
+// SIMD-ONLY0-NEXT:    [[TMP4:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP4]], ptr [[CV]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP5:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV3:%.*]] = sext i8 [[TMP5]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP6:%.*]] = load i8, ptr [[CE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV4:%.*]] = sext i8 [[TMP6]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP5:%.*]] = icmp sgt i32 [[CONV3]], [[CONV4]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP5]], label [[IF_THEN7:%.*]], label [[IF_END8:%.*]]
+// SIMD-ONLY0:       if.then7:
+// SIMD-ONLY0-NEXT:    [[TMP7:%.*]] = load i8, ptr [[CE]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP7]], ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    br label [[IF_END8]]
+// SIMD-ONLY0:       if.end8:
+// SIMD-ONLY0-NEXT:    [[TMP8:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP8]], ptr [[CV]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP9:%.*]] = load i8, ptr [[CE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV9:%.*]] = sext i8 [[TMP9]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP10:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV10:%.*]] = sext i8 [[TMP10]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP11:%.*]] = icmp slt i32 [[CONV9]], [[CONV10]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP11]], label [[IF_THEN13:%.*]], label [[IF_END14:%.*]]
+// SIMD-ONLY0:       if.then13:
+// SIMD-ONLY0-NEXT:    [[TMP11:%.*]] = load i8, ptr [[CE]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP11]], ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    br label [[IF_END14]]
+// SIMD-ONLY0:       if.end14:
+// SIMD-ONLY0-NEXT:    [[TMP12:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP12]], ptr [[CV]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP13:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV15:%.*]] = sext i8 [[TMP13]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP14:%.*]] = load i8, ptr [[CE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV16:%.*]] = sext i8 [[TMP14]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP17:%.*]] = icmp slt i32 [[CONV15]], [[CONV16]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP17]], label [[IF_THEN19:%.*]], label [[IF_END20:%.*]]
+// SIMD-ONLY0:       if.then19:
+// SIMD-ONLY0-NEXT:    [[TMP15:%.*]] = load i8, ptr [[CE]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP15]], ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    br label [[IF_END20]]
+// SIMD-ONLY0:       if.end20:
+// SIMD-ONLY0-NEXT:    [[TMP16:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP16]], ptr [[CV]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP17:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV21:%.*]] = sext i8 [[TMP17]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP18:%.*]] = load i8, ptr [[CE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV22:%.*]] = sext i8 [[TMP18]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP23:%.*]] = icmp eq i32 [[CONV21]], [[CONV22]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP23]], label [[IF_THEN25:%.*]], label [[IF_END26:%.*]]
+// SIMD-ONLY0:       if.then25:
+// SIMD-ONLY0-NEXT:    [[TMP19:%.*]] = load i8, ptr [[CD]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP19]], ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    br label [[IF_END26]]
+// SIMD-ONLY0:       if.end26:
+// SIMD-ONLY0-NEXT:    [[TMP20:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP20]], ptr [[CV]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP21:%.*]] = load i8, ptr [[CE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV27:%.*]] = sext i8 [[TMP21]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP22:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV28:%.*]] = sext i8 [[TMP22]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP29:%.*]] = icmp eq i32 [[CONV27]], [[CONV28]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP29]], label [[IF_THEN31:%.*]], label [[IF_END32:%.*]]
+// SIMD-ONLY0:       if.then31:
+// SIMD-ONLY0-NEXT:    [[TMP23:%.*]] = load i8, ptr [[CD]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP23]], ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    br label [[IF_END32]]
+// SIMD-ONLY0:       if.end32:
+// SIMD-ONLY0-NEXT:    [[TMP24:%.*]] = load i8, ptr [[CE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV33:%.*]] = sext i8 [[TMP24]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP25:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV34:%.*]] = sext i8 [[TMP25]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP35:%.*]] = icmp sgt i32 [[CONV33]], [[CONV34]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP35]], label [[IF_THEN37:%.*]], label [[IF_END38:%.*]]
+// SIMD-ONLY0:       if.then37:
+// SIMD-ONLY0-NEXT:    [[TMP26:%.*]] = load i8, ptr [[CE]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP26]], ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    br label [[IF_END38]]
+// SIMD-ONLY0:       if.end38:
+// SIMD-ONLY0-NEXT:    [[TMP27:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP27]], ptr [[CV]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP28:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV39:%.*]] = sext i8 [[TMP28]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP29:%.*]] = load i8, ptr [[CE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV40:%.*]] = sext i8 [[TMP29]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP41:%.*]] = icmp sgt i32 [[CONV39]], [[CONV40]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP41]], label [[IF_THEN43:%.*]], label [[IF_END44:%.*]]
+// SIMD-ONLY0:       if.then43:
+// SIMD-ONLY0-NEXT:    [[TMP30:%.*]] = load i8, ptr [[CE]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP30]], ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    br label [[IF_END44]]
+// SIMD-ONLY0:       if.end44:
+// SIMD-ONLY0-NEXT:    [[TMP31:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP31]], ptr [[CV]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP32:%.*]] = load i8, ptr [[CE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV45:%.*]] = sext i8 [[TMP32]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP33:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV46:%.*]] = sext i8 [[TMP33]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP47:%.*]] = icmp slt i32 [[CONV45]], [[CONV46]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP47]], label [[IF_THEN49:%.*]], label [[IF_END50:%.*]]
+// SIMD-ONLY0:       if.then49:
+// SIMD-ONLY0-NEXT:    [[TMP34:%.*]] = load i8, ptr [[CE]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP34]], ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    br label [[IF_END50]]
+// SIMD-ONLY0:       if.end50:
+// SIMD-ONLY0-NEXT:    [[TMP35:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP35]], ptr [[CV]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP36:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV51:%.*]] = sext i8 [[TMP36]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP37:%.*]] = load i8, ptr [[CE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV52:%.*]] = sext i8 [[TMP37]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP53:%.*]] = icmp slt i32 [[CONV51]], [[CONV52]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP53]], label [[IF_THEN55:%.*]], label [[IF_END56:%.*]]
+// SIMD-ONLY0:       if.then55:
+// SIMD-ONLY0-NEXT:    [[TMP38:%.*]] = load i8, ptr [[CE]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP38]], ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    br label [[IF_END56]]
+// SIMD-ONLY0:       if.end56:
+// SIMD-ONLY0-NEXT:    [[TMP39:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP39]], ptr [[CV]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP40:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV57:%.*]] = sext i8 [[TMP40]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP41:%.*]] = load i8, ptr [[CE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV58:%.*]] = sext i8 [[TMP41]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP59:%.*]] = icmp eq i32 [[CONV57]], [[CONV58]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP59]], label [[IF_THEN61:%.*]], label [[IF_END62:%.*]]
+// SIMD-ONLY0:       if.then61:
+// SIMD-ONLY0-NEXT:    [[TMP42:%.*]] = load i8, ptr [[CD]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP42]], ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    br label [[IF_END62]]
+// SIMD-ONLY0:       if.end62:
+// SIMD-ONLY0-NEXT:    [[TMP43:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP43]], ptr [[CV]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP44:%.*]] = load i8, ptr [[CE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV63:%.*]] = sext i8 [[TMP44]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP45:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV64:%.*]] = sext i8 [[TMP45]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP65:%.*]] = icmp eq i32 [[CONV63]], [[CONV64]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP65]], label [[IF_THEN67:%.*]], label [[IF_END68:%.*]]
+// SIMD-ONLY0:       if.then67:
+// SIMD-ONLY0-NEXT:    [[TMP46:%.*]] = load i8, ptr [[CD]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP46]], ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    br label [[IF_END68]]
+// SIMD-ONLY0:       if.end68:
+// SIMD-ONLY0-NEXT:    [[TMP47:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP47]], ptr [[CV]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP48:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV69:%.*]] = sext i8 [[TMP48]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP49:%.*]] = load i8, ptr [[CE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV70:%.*]] = sext i8 [[TMP49]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP71:%.*]] = icmp eq i32 [[CONV69]], [[CONV70]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP71]], label [[IF_THEN73:%.*]], label [[IF_ELSE:%.*]]
+// SIMD-ONLY0:       if.then73:
+// SIMD-ONLY0-NEXT:    [[TMP50:%.*]] = load i8, ptr [[CD]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP50]], ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    br label [[IF_END74:%.*]]
+// SIMD-ONLY0:       if.else:
+// SIMD-ONLY0-NEXT:    [[TMP51:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP51]], ptr [[CV]], align 1
+// SIMD-ONLY0-NEXT:    br label [[IF_END74]]
+// SIMD-ONLY0:       if.end74:
+// SIMD-ONLY0-NEXT:    [[TMP52:%.*]] = load i8, ptr [[CE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV75:%.*]] = sext i8 [[TMP52]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP53:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV76:%.*]] = sext i8 [[TMP53]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP77:%.*]] = icmp eq i32 [[CONV75]], [[CONV76]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP77]], label [[IF_THEN79:%.*]], label [[IF_ELSE80:%.*]]
+// SIMD-ONLY0:       if.then79:
+// SIMD-ONLY0-NEXT:    [[TMP54:%.*]] = load i8, ptr [[CD]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP54]], ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    br label [[IF_END81:%.*]]
+// SIMD-ONLY0:       if.else80:
+// SIMD-ONLY0-NEXT:    [[TMP55:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP55]], ptr [[CV]], align 1
+// SIMD-ONLY0-NEXT:    br label [[IF_END81]]
+// SIMD-ONLY0:       if.end81:
+// SIMD-ONLY0-NEXT:    [[TMP56:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV82:%.*]] = sext i8 [[TMP56]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP57:%.*]] = load i8, ptr [[CE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV83:%.*]] = sext i8 [[TMP57]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP84:%.*]] = icmp eq i32 [[CONV82]], [[CONV83]]
+// SIMD-ONLY0-NEXT:    [[CONV85:%.*]] = zext i1 [[CMP84]] to i32
+// SIMD-ONLY0-NEXT:    [[CONV86:%.*]] = trunc i32 [[CONV85]] to i8
+// SIMD-ONLY0-NEXT:    store i8 [[CONV86]], ptr [[CR]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP58:%.*]] = load i8, ptr [[CR]], align 1
+// SIMD-ONLY0-NEXT:    [[TOBOOL:%.*]] = icmp ne i8 [[TMP58]], 0
+// SIMD-ONLY0-NEXT:    br i1 [[TOBOOL]], label [[IF_THEN87:%.*]], label [[IF_END88:%.*]]
+// SIMD-ONLY0:       if.then87:
+// SIMD-ONLY0-NEXT:    [[TMP59:%.*]] = load i8, ptr [[CD]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP59]], ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    br label [[IF_END88]]
+// SIMD-ONLY0:       if.end88:
+// SIMD-ONLY0-NEXT:    [[TMP60:%.*]] = load i8, ptr [[CE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV89:%.*]] = sext i8 [[TMP60]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP61:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV90:%.*]] = sext i8 [[TMP61]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP91:%.*]] = icmp eq i32 [[CONV89]], [[CONV90]]
+// SIMD-ONLY0-NEXT:    [[CONV92:%.*]] = zext i1 [[CMP91]] to i32
+// SIMD-ONLY0-NEXT:    [[CONV93:%.*]] = trunc i32 [[CONV92]] to i8
+// SIMD-ONLY0-NEXT:    store i8 [[CONV93]], ptr [[CR]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP62:%.*]] = load i8, ptr [[CR]], align 1
+// SIMD-ONLY0-NEXT:    [[TOBOOL94:%.*]] = icmp ne i8 [[TMP62]], 0
+// SIMD-ONLY0-NEXT:    br i1 [[TOBOOL94]], label [[IF_THEN95:%.*]], label [[IF_END96:%.*]]
+// SIMD-ONLY0:       if.then95:
+// SIMD-ONLY0-NEXT:    [[TMP63:%.*]] = load i8, ptr [[CD]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP63]], ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    br label [[IF_END96]]
+// SIMD-ONLY0:       if.end96:
+// SIMD-ONLY0-NEXT:    [[TMP64:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV97:%.*]] = sext i8 [[TMP64]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP65:%.*]] = load i8, ptr [[CE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV98:%.*]] = sext i8 [[TMP65]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP99:%.*]] = icmp eq i32 [[CONV97]], [[CONV98]]
+// SIMD-ONLY0-NEXT:    [[CONV100:%.*]] = zext i1 [[CMP99]] to i32
+// SIMD-ONLY0-NEXT:    [[CONV101:%.*]] = trunc i32 [[CONV100]] to i8
+// SIMD-ONLY0-NEXT:    store i8 [[CONV101]], ptr [[CR]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP66:%.*]] = load i8, ptr [[CR]], align 1
+// SIMD-ONLY0-NEXT:    [[TOBOOL102:%.*]] = icmp ne i8 [[TMP66]], 0
+// SIMD-ONLY0-NEXT:    br i1 [[TOBOOL102]], label [[IF_THEN103:%.*]], label [[IF_ELSE104:%.*]]
+// SIMD-ONLY0:       if.then103:
+// SIMD-ONLY0-NEXT:    [[TMP67:%.*]] = load i8, ptr [[CD]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP67]], ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    br label [[IF_END105:%.*]]
+// SIMD-ONLY0:       if.else104:
+// SIMD-ONLY0-NEXT:    [[TMP68:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP68]], ptr [[CV]], align 1
+// SIMD-ONLY0-NEXT:    br label [[IF_END105]]
+// SIMD-ONLY0:       if.end105:
+// SIMD-ONLY0-NEXT:    [[TMP69:%.*]] = load i8, ptr [[CE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV106:%.*]] = sext i8 [[TMP69]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP70:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV107:%.*]] = sext i8 [[TMP70]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP108:%.*]] = icmp eq i32 [[CONV106]], [[CONV107]]
+// SIMD-ONLY0-NEXT:    [[CONV109:%.*]] = zext i1 [[CMP108]] to i32
+// SIMD-ONLY0-NEXT:    [[CONV110:%.*]] = trunc i32 [[CONV109]] to i8
+// SIMD-ONLY0-NEXT:    store i8 [[CONV110]], ptr [[CR]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP71:%.*]] = load i8, ptr [[CR]], align 1
+// SIMD-ONLY0-NEXT:    [[TOBOOL111:%.*]] = icmp ne i8 [[TMP71]], 0
+// SIMD-ONLY0-NEXT:    br i1 [[TOBOOL111]], label [[IF_THEN112:%.*]], label [[IF_ELSE113:%.*]]
+// SIMD-ONLY0:       if.then112:
+// SIMD-ONLY0-NEXT:    [[TMP72:%.*]] = load i8, ptr [[CD]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP72]], ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    br label [[IF_END114:%.*]]
+// SIMD-ONLY0:       if.else113:
+// SIMD-ONLY0-NEXT:    [[TMP73:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP73]], ptr [[CV]], align 1
+// SIMD-ONLY0-NEXT:    br label [[IF_END114]]
+// SIMD-ONLY0:       if.end114:
+// SIMD-ONLY0-NEXT:    [[TMP74:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP74]], ptr [[CV]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP75:%.*]] = load i8, ptr [[CE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV115:%.*]] = sext i8 [[TMP75]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP76:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV116:%.*]] = sext i8 [[TMP76]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP117:%.*]] = icmp sgt i32 [[CONV115]], [[CONV116]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP117]], label [[IF_THEN119:%.*]], label [[IF_END120:%.*]]
+// SIMD-ONLY0:       if.then119:
+// SIMD-ONLY0-NEXT:    [[TMP77:%.*]] = load i8, ptr [[CE]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP77]], ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    br label [[IF_END120]]
+// SIMD-ONLY0:       if.end120:
+// SIMD-ONLY0-NEXT:    [[TMP78:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP78]], ptr [[CV]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP79:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV121:%.*]] = sext i8 [[TMP79]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP80:%.*]] = load i8, ptr [[CE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV122:%.*]] = sext i8 [[TMP80]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP123:%.*]] = icmp sgt i32 [[CONV121]], [[CONV122]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP123]], label [[IF_THEN125:%.*]], label [[IF_END126:%.*]]
+// SIMD-ONLY0:       if.then125:
+// SIMD-ONLY0-NEXT:    [[TMP81:%.*]] = load i8, ptr [[CE]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP81]], ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    br label [[IF_END126]]
+// SIMD-ONLY0:       if.end126:
+// SIMD-ONLY0-NEXT:    [[TMP82:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP82]], ptr [[CV]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP83:%.*]] = load i8, ptr [[CE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV127:%.*]] = sext i8 [[TMP83]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP84:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV128:%.*]] = sext i8 [[TMP84]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP129:%.*]] = icmp slt i32 [[CONV127]], [[CONV128]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP129]], label [[IF_THEN131:%.*]], label [[IF_END132:%.*]]
+// SIMD-ONLY0:       if.then131:
+// SIMD-ONLY0-NEXT:    [[TMP85:%.*]] = load i8, ptr [[CE]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP85]], ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    br label [[IF_END132]]
+// SIMD-ONLY0:       if.end132:
+// SIMD-ONLY0-NEXT:    [[TMP86:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP86]], ptr [[CV]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP87:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV133:%.*]] = sext i8 [[TMP87]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP88:%.*]] = load i8, ptr [[CE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV134:%.*]] = sext i8 [[TMP88]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP135:%.*]] = icmp slt i32 [[CONV133]], [[CONV134]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP135]], label [[IF_THEN137:%.*]], label [[IF_END138:%.*]]
+// SIMD-ONLY0:       if.then137:
+// SIMD-ONLY0-NEXT:    [[TMP89:%.*]] = load i8, ptr [[CE]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP89]], ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    br label [[IF_END138]]
+// SIMD-ONLY0:       if.end138:
+// SIMD-ONLY0-NEXT:    [[TMP90:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP90]], ptr [[CV]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP91:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV139:%.*]] = sext i8 [[TMP91]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP92:%.*]] = load i8, ptr [[CE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV140:%.*]] = sext i8 [[TMP92]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP141:%.*]] = icmp eq i32 [[CONV139]], [[CONV140]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP141]], label [[IF_THEN143:%.*]], label [[IF_END144:%.*]]
+// SIMD-ONLY0:       if.then143:
+// SIMD-ONLY0-NEXT:    [[TMP93:%.*]] = load i8, ptr [[CD]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP93]], ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    br label [[IF_END144]]
+// SIMD-ONLY0:       if.end144:
+// SIMD-ONLY0-NEXT:    [[TMP94:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP94]], ptr [[CV]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP95:%.*]] = load i8, ptr [[CE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV145:%.*]] = sext i8 [[TMP95]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP96:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV146:%.*]] = sext i8 [[TMP96]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP147:%.*]] = icmp eq i32 [[CONV145]], [[CONV146]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP147]], label [[IF_THEN149:%.*]], label [[IF_END150:%.*]]
+// SIMD-ONLY0:       if.then149:
+// SIMD-ONLY0-NEXT:    [[TMP97:%.*]] = load i8, ptr [[CD]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP97]], ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    br label [[IF_END150]]
+// SIMD-ONLY0:       if.end150:
+// SIMD-ONLY0-NEXT:    [[TMP98:%.*]] = load i8, ptr [[CE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV151:%.*]] = sext i8 [[TMP98]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP99:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV152:%.*]] = sext i8 [[TMP99]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP153:%.*]] = icmp sgt i32 [[CONV151]], [[CONV152]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP153]], label [[IF_THEN155:%.*]], label [[IF_END156:%.*]]
+// SIMD-ONLY0:       if.then155:
+// SIMD-ONLY0-NEXT:    [[TMP100:%.*]] = load i8, ptr [[CE]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP100]], ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    br label [[IF_END156]]
+// SIMD-ONLY0:       if.end156:
+// SIMD-ONLY0-NEXT:    [[TMP101:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP101]], ptr [[CV]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP102:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV157:%.*]] = sext i8 [[TMP102]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP103:%.*]] = load i8, ptr [[CE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV158:%.*]] = sext i8 [[TMP103]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP159:%.*]] = icmp sgt i32 [[CONV157]], [[CONV158]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP159]], label [[IF_THEN161:%.*]], label [[IF_END162:%.*]]
+// SIMD-ONLY0:       if.then161:
+// SIMD-ONLY0-NEXT:    [[TMP104:%.*]] = load i8, ptr [[CE]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP104]], ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    br label [[IF_END162]]
+// SIMD-ONLY0:       if.end162:
+// SIMD-ONLY0-NEXT:    [[TMP105:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP105]], ptr [[CV]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP106:%.*]] = load i8, ptr [[CE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV163:%.*]] = sext i8 [[TMP106]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP107:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV164:%.*]] = sext i8 [[TMP107]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP165:%.*]] = icmp slt i32 [[CONV163]], [[CONV164]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP165]], label [[IF_THEN167:%.*]], label [[IF_END168:%.*]]
+// SIMD-ONLY0:       if.then167:
+// SIMD-ONLY0-NEXT:    [[TMP108:%.*]] = load i8, ptr [[CE]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP108]], ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    br label [[IF_END168]]
+// SIMD-ONLY0:       if.end168:
+// SIMD-ONLY0-NEXT:    [[TMP109:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP109]], ptr [[CV]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP110:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV169:%.*]] = sext i8 [[TMP110]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP111:%.*]] = load i8, ptr [[CE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV170:%.*]] = sext i8 [[TMP111]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP171:%.*]] = icmp slt i32 [[CONV169]], [[CONV170]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP171]], label [[IF_THEN173:%.*]], label [[IF_END174:%.*]]
+// SIMD-ONLY0:       if.then173:
+// SIMD-ONLY0-NEXT:    [[TMP112:%.*]] = load i8, ptr [[CE]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP112]], ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    br label [[IF_END174]]
+// SIMD-ONLY0:       if.end174:
+// SIMD-ONLY0-NEXT:    [[TMP113:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP113]], ptr [[CV]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP114:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV175:%.*]] = sext i8 [[TMP114]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP115:%.*]] = load i8, ptr [[CE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV176:%.*]] = sext i8 [[TMP115]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP177:%.*]] = icmp eq i32 [[CONV175]], [[CONV176]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP177]], label [[IF_THEN179:%.*]], label [[IF_END180:%.*]]
+// SIMD-ONLY0:       if.then179:
+// SIMD-ONLY0-NEXT:    [[TMP116:%.*]] = load i8, ptr [[CD]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP116]], ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    br label [[IF_END180]]
+// SIMD-ONLY0:       if.end180:
+// SIMD-ONLY0-NEXT:    [[TMP117:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP117]], ptr [[CV]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP118:%.*]] = load i8, ptr [[CE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV181:%.*]] = sext i8 [[TMP118]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP119:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV182:%.*]] = sext i8 [[TMP119]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP183:%.*]] = icmp eq i32 [[CONV181]], [[CONV182]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP183]], label [[IF_THEN185:%.*]], label [[IF_END186:%.*]]
+// SIMD-ONLY0:       if.then185:
+// SIMD-ONLY0-NEXT:    [[TMP120:%.*]] = load i8, ptr [[CD]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP120]], ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    br label [[IF_END186]]
+// SIMD-ONLY0:       if.end186:
+// SIMD-ONLY0-NEXT:    [[TMP121:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP121]], ptr [[CV]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP122:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV187:%.*]] = sext i8 [[TMP122]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP123:%.*]] = load i8, ptr [[CE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV188:%.*]] = sext i8 [[TMP123]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP189:%.*]] = icmp eq i32 [[CONV187]], [[CONV188]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP189]], label [[IF_THEN191:%.*]], label [[IF_ELSE192:%.*]]
+// SIMD-ONLY0:       if.then191:
+// SIMD-ONLY0-NEXT:    [[TMP124:%.*]] = load i8, ptr [[CD]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP124]], ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    br label [[IF_END193:%.*]]
+// SIMD-ONLY0:       if.else192:
+// SIMD-ONLY0-NEXT:    [[TMP125:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP125]], ptr [[CV]], align 1
+// SIMD-ONLY0-NEXT:    br label [[IF_END193]]
+// SIMD-ONLY0:       if.end193:
+// SIMD-ONLY0-NEXT:    [[TMP126:%.*]] = load i8, ptr [[CE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV194:%.*]] = sext i8 [[TMP126]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP127:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV195:%.*]] = sext i8 [[TMP127]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP196:%.*]] = icmp eq i32 [[CONV194]], [[CONV195]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP196]], label [[IF_THEN198:%.*]], label [[IF_ELSE199:%.*]]
+// SIMD-ONLY0:       if.then198:
+// SIMD-ONLY0-NEXT:    [[TMP128:%.*]] = load i8, ptr [[CD]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP128]], ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    br label [[IF_END200:%.*]]
+// SIMD-ONLY0:       if.else199:
+// SIMD-ONLY0-NEXT:    [[TMP129:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP129]], ptr [[CV]], align 1
+// SIMD-ONLY0-NEXT:    br label [[IF_END200]]
+// SIMD-ONLY0:       if.end200:
+// SIMD-ONLY0-NEXT:    [[TMP130:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV201:%.*]] = sext i8 [[TMP130]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP131:%.*]] = load i8, ptr [[CE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV202:%.*]] = sext i8 [[TMP131]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP203:%.*]] = icmp eq i32 [[CONV201]], [[CONV202]]
+// SIMD-ONLY0-NEXT:    [[CONV204:%.*]] = zext i1 [[CMP203]] to i32
+// SIMD-ONLY0-NEXT:    [[CONV205:%.*]] = trunc i32 [[CONV204]] to i8
+// SIMD-ONLY0-NEXT:    store i8 [[CONV205]], ptr [[CR]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP132:%.*]] = load i8, ptr [[CR]], align 1
+// SIMD-ONLY0-NEXT:    [[TOBOOL206:%.*]] = icmp ne i8 [[TMP132]], 0
+// SIMD-ONLY0-NEXT:    br i1 [[TOBOOL206]], label [[IF_THEN207:%.*]], label [[IF_END208:%.*]]
+// SIMD-ONLY0:       if.then207:
+// SIMD-ONLY0-NEXT:    [[TMP133:%.*]] = load i8, ptr [[CD]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP133]], ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    br label [[IF_END208]]
+// SIMD-ONLY0:       if.end208:
+// SIMD-ONLY0-NEXT:    [[TMP134:%.*]] = load i8, ptr [[CE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV209:%.*]] = sext i8 [[TMP134]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP135:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV210:%.*]] = sext i8 [[TMP135]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP211:%.*]] = icmp eq i32 [[CONV209]], [[CONV210]]
+// SIMD-ONLY0-NEXT:    [[CONV212:%.*]] = zext i1 [[CMP211]] to i32
+// SIMD-ONLY0-NEXT:    [[CONV213:%.*]] = trunc i32 [[CONV212]] to i8
+// SIMD-ONLY0-NEXT:    store i8 [[CONV213]], ptr [[CR]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP136:%.*]] = load i8, ptr [[CR]], align 1
+// SIMD-ONLY0-NEXT:    [[TOBOOL214:%.*]] = icmp ne i8 [[TMP136]], 0
+// SIMD-ONLY0-NEXT:    br i1 [[TOBOOL214]], label [[IF_THEN215:%.*]], label [[IF_END216:%.*]]
+// SIMD-ONLY0:       if.then215:
+// SIMD-ONLY0-NEXT:    [[TMP137:%.*]] = load i8, ptr [[CD]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP137]], ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    br label [[IF_END216]]
+// SIMD-ONLY0:       if.end216:
+// SIMD-ONLY0-NEXT:    [[TMP138:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV217:%.*]] = sext i8 [[TMP138]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP139:%.*]] = load i8, ptr [[CE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV218:%.*]] = sext i8 [[TMP139]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP219:%.*]] = icmp eq i32 [[CONV217]], [[CONV218]]
+// SIMD-ONLY0-NEXT:    [[CONV220:%.*]] = zext i1 [[CMP219]] to i32
+// SIMD-ONLY0-NEXT:    [[CONV221:%.*]] = trunc i32 [[CONV220]] to i8
+// SIMD-ONLY0-NEXT:    store i8 [[CONV221]], ptr [[CR]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP140:%.*]] = load i8, ptr [[CR]], align 1
+// SIMD-ONLY0-NEXT:    [[TOBOOL222:%.*]] = icmp ne i8 [[TMP140]], 0
+// SIMD-ONLY0-NEXT:    br i1 [[TOBOOL222]], label [[IF_THEN223:%.*]], label [[IF_ELSE224:%.*]]
+// SIMD-ONLY0:       if.then223:
+// SIMD-ONLY0-NEXT:    [[TMP141:%.*]] = load i8, ptr [[CD]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP141]], ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    br label [[IF_END225:%.*]]
+// SIMD-ONLY0:       if.else224:
+// SIMD-ONLY0-NEXT:    [[TMP142:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP142]], ptr [[CV]], align 1
+// SIMD-ONLY0-NEXT:    br label [[IF_END225]]
+// SIMD-ONLY0:       if.end225:
+// SIMD-ONLY0-NEXT:    [[TMP143:%.*]] = load i8, ptr [[CE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV226:%.*]] = sext i8 [[TMP143]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP144:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV227:%.*]] = sext i8 [[TMP144]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP228:%.*]] = icmp eq i32 [[CONV226]], [[CONV227]]
+// SIMD-ONLY0-NEXT:    [[CONV229:%.*]] = zext i1 [[CMP228]] to i32
+// SIMD-ONLY0-NEXT:    [[CONV230:%.*]] = trunc i32 [[CONV229]] to i8
+// SIMD-ONLY0-NEXT:    store i8 [[CONV230]], ptr [[CR]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP145:%.*]] = load i8, ptr [[CR]], align 1
+// SIMD-ONLY0-NEXT:    [[TOBOOL231:%.*]] = icmp ne i8 [[TMP145]], 0
+// SIMD-ONLY0-NEXT:    br i1 [[TOBOOL231]], label [[IF_THEN232:%.*]], label [[IF_ELSE233:%.*]]
+// SIMD-ONLY0:       if.then232:
+// SIMD-ONLY0-NEXT:    [[TMP146:%.*]] = load i8, ptr [[CD]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP146]], ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    br label [[IF_END234:%.*]]
+// SIMD-ONLY0:       if.else233:
+// SIMD-ONLY0-NEXT:    [[TMP147:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP147]], ptr [[CV]], align 1
+// SIMD-ONLY0-NEXT:    br label [[IF_END234]]
+// SIMD-ONLY0:       if.end234:
+// SIMD-ONLY0-NEXT:    [[TMP148:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP148]], ptr [[CV]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP149:%.*]] = load i8, ptr [[CE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV235:%.*]] = sext i8 [[TMP149]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP150:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV236:%.*]] = sext i8 [[TMP150]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP237:%.*]] = icmp sgt i32 [[CONV235]], [[CONV236]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP237]], label [[IF_THEN239:%.*]], label [[IF_END240:%.*]]
+// SIMD-ONLY0:       if.then239:
+// SIMD-ONLY0-NEXT:    [[TMP151:%.*]] = load i8, ptr [[CE]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP151]], ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    br label [[IF_END240]]
+// SIMD-ONLY0:       if.end240:
+// SIMD-ONLY0-NEXT:    [[TMP152:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP152]], ptr [[CV]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP153:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV241:%.*]] = sext i8 [[TMP153]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP154:%.*]] = load i8, ptr [[CE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV242:%.*]] = sext i8 [[TMP154]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP243:%.*]] = icmp sgt i32 [[CONV241]], [[CONV242]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP243]], label [[IF_THEN245:%.*]], label [[IF_END246:%.*]]
+// SIMD-ONLY0:       if.then245:
+// SIMD-ONLY0-NEXT:    [[TMP155:%.*]] = load i8, ptr [[CE]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP155]], ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    br label [[IF_END246]]
+// SIMD-ONLY0:       if.end246:
+// SIMD-ONLY0-NEXT:    [[TMP156:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP156]], ptr [[CV]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP157:%.*]] = load i8, ptr [[CE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV247:%.*]] = sext i8 [[TMP157]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP158:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV248:%.*]] = sext i8 [[TMP158]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP249:%.*]] = icmp slt i32 [[CONV247]], [[CONV248]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP249]], label [[IF_THEN251:%.*]], label [[IF_END252:%.*]]
+// SIMD-ONLY0:       if.then251:
+// SIMD-ONLY0-NEXT:    [[TMP159:%.*]] = load i8, ptr [[CE]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP159]], ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    br label [[IF_END252]]
+// SIMD-ONLY0:       if.end252:
+// SIMD-ONLY0-NEXT:    [[TMP160:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP160]], ptr [[CV]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP161:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV253:%.*]] = sext i8 [[TMP161]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP162:%.*]] = load i8, ptr [[CE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV254:%.*]] = sext i8 [[TMP162]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP255:%.*]] = icmp slt i32 [[CONV253]], [[CONV254]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP255]], label [[IF_THEN257:%.*]], label [[IF_END258:%.*]]
+// SIMD-ONLY0:       if.then257:
+// SIMD-ONLY0-NEXT:    [[TMP163:%.*]] = load i8, ptr [[CE]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP163]], ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    br label [[IF_END258]]
+// SIMD-ONLY0:       if.end258:
+// SIMD-ONLY0-NEXT:    [[TMP164:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP164]], ptr [[CV]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP165:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV259:%.*]] = sext i8 [[TMP165]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP166:%.*]] = load i8, ptr [[CE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV260:%.*]] = sext i8 [[TMP166]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP261:%.*]] = icmp eq i32 [[CONV259]], [[CONV260]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP261]], label [[IF_THEN263:%.*]], label [[IF_END264:%.*]]
+// SIMD-ONLY0:       if.then263:
+// SIMD-ONLY0-NEXT:    [[TMP167:%.*]] = load i8, ptr [[CD]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP167]], ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    br label [[IF_END264]]
+// SIMD-ONLY0:       if.end264:
+// SIMD-ONLY0-NEXT:    [[TMP168:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP168]], ptr [[CV]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP169:%.*]] = load i8, ptr [[CE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV265:%.*]] = sext i8 [[TMP169]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP170:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV266:%.*]] = sext i8 [[TMP170]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP267:%.*]] = icmp eq i32 [[CONV265]], [[CONV266]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP267]], label [[IF_THEN269:%.*]], label [[IF_END270:%.*]]
+// SIMD-ONLY0:       if.then269:
+// SIMD-ONLY0-NEXT:    [[TMP171:%.*]] = load i8, ptr [[CD]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP171]], ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    br label [[IF_END270]]
+// SIMD-ONLY0:       if.end270:
+// SIMD-ONLY0-NEXT:    [[TMP172:%.*]] = load i8, ptr [[CE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV271:%.*]] = sext i8 [[TMP172]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP173:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV272:%.*]] = sext i8 [[TMP173]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP273:%.*]] = icmp sgt i32 [[CONV271]], [[CONV272]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP273]], label [[IF_THEN275:%.*]], label [[IF_END276:%.*]]
+// SIMD-ONLY0:       if.then275:
+// SIMD-ONLY0-NEXT:    [[TMP174:%.*]] = load i8, ptr [[CE]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP174]], ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    br label [[IF_END276]]
+// SIMD-ONLY0:       if.end276:
+// SIMD-ONLY0-NEXT:    [[TMP175:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP175]], ptr [[CV]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP176:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV277:%.*]] = sext i8 [[TMP176]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP177:%.*]] = load i8, ptr [[CE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV278:%.*]] = sext i8 [[TMP177]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP279:%.*]] = icmp sgt i32 [[CONV277]], [[CONV278]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP279]], label [[IF_THEN281:%.*]], label [[IF_END282:%.*]]
+// SIMD-ONLY0:       if.then281:
+// SIMD-ONLY0-NEXT:    [[TMP178:%.*]] = load i8, ptr [[CE]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP178]], ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    br label [[IF_END282]]
+// SIMD-ONLY0:       if.end282:
+// SIMD-ONLY0-NEXT:    [[TMP179:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP179]], ptr [[CV]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP180:%.*]] = load i8, ptr [[CE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV283:%.*]] = sext i8 [[TMP180]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP181:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV284:%.*]] = sext i8 [[TMP181]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP285:%.*]] = icmp slt i32 [[CONV283]], [[CONV284]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP285]], label [[IF_THEN287:%.*]], label [[IF_END288:%.*]]
+// SIMD-ONLY0:       if.then287:
+// SIMD-ONLY0-NEXT:    [[TMP182:%.*]] = load i8, ptr [[CE]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP182]], ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    br label [[IF_END288]]
+// SIMD-ONLY0:       if.end288:
+// SIMD-ONLY0-NEXT:    [[TMP183:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP183]], ptr [[CV]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP184:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV289:%.*]] = sext i8 [[TMP184]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP185:%.*]] = load i8, ptr [[CE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV290:%.*]] = sext i8 [[TMP185]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP291:%.*]] = icmp slt i32 [[CONV289]], [[CONV290]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP291]], label [[IF_THEN293:%.*]], label [[IF_END294:%.*]]
+// SIMD-ONLY0:       if.then293:
+// SIMD-ONLY0-NEXT:    [[TMP186:%.*]] = load i8, ptr [[CE]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP186]], ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    br label [[IF_END294]]
+// SIMD-ONLY0:       if.end294:
+// SIMD-ONLY0-NEXT:    [[TMP187:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP187]], ptr [[CV]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP188:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV295:%.*]] = sext i8 [[TMP188]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP189:%.*]] = load i8, ptr [[CE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV296:%.*]] = sext i8 [[TMP189]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP297:%.*]] = icmp eq i32 [[CONV295]], [[CONV296]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP297]], label [[IF_THEN299:%.*]], label [[IF_END300:%.*]]
+// SIMD-ONLY0:       if.then299:
+// SIMD-ONLY0-NEXT:    [[TMP190:%.*]] = load i8, ptr [[CD]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP190]], ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    br label [[IF_END300]]
+// SIMD-ONLY0:       if.end300:
+// SIMD-ONLY0-NEXT:    [[TMP191:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP191]], ptr [[CV]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP192:%.*]] = load i8, ptr [[CE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV301:%.*]] = sext i8 [[TMP192]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP193:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV302:%.*]] = sext i8 [[TMP193]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP303:%.*]] = icmp eq i32 [[CONV301]], [[CONV302]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP303]], label [[IF_THEN305:%.*]], label [[IF_END306:%.*]]
+// SIMD-ONLY0:       if.then305:
+// SIMD-ONLY0-NEXT:    [[TMP194:%.*]] = load i8, ptr [[CD]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP194]], ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    br label [[IF_END306]]
+// SIMD-ONLY0:       if.end306:
+// SIMD-ONLY0-NEXT:    [[TMP195:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP195]], ptr [[CV]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP196:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV307:%.*]] = sext i8 [[TMP196]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP197:%.*]] = load i8, ptr [[CE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV308:%.*]] = sext i8 [[TMP197]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP309:%.*]] = icmp eq i32 [[CONV307]], [[CONV308]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP309]], label [[IF_THEN311:%.*]], label [[IF_ELSE312:%.*]]
+// SIMD-ONLY0:       if.then311:
+// SIMD-ONLY0-NEXT:    [[TMP198:%.*]] = load i8, ptr [[CD]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP198]], ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    br label [[IF_END313:%.*]]
+// SIMD-ONLY0:       if.else312:
+// SIMD-ONLY0-NEXT:    [[TMP199:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP199]], ptr [[CV]], align 1
+// SIMD-ONLY0-NEXT:    br label [[IF_END313]]
+// SIMD-ONLY0:       if.end313:
+// SIMD-ONLY0-NEXT:    [[TMP200:%.*]] = load i8, ptr [[CE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV314:%.*]] = sext i8 [[TMP200]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP201:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV315:%.*]] = sext i8 [[TMP201]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP316:%.*]] = icmp eq i32 [[CONV314]], [[CONV315]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP316]], label [[IF_THEN318:%.*]], label [[IF_ELSE319:%.*]]
+// SIMD-ONLY0:       if.then318:
+// SIMD-ONLY0-NEXT:    [[TMP202:%.*]] = load i8, ptr [[CD]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP202]], ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    br label [[IF_END320:%.*]]
+// SIMD-ONLY0:       if.else319:
+// SIMD-ONLY0-NEXT:    [[TMP203:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP203]], ptr [[CV]], align 1
+// SIMD-ONLY0-NEXT:    br label [[IF_END320]]
+// SIMD-ONLY0:       if.end320:
+// SIMD-ONLY0-NEXT:    [[TMP204:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV321:%.*]] = sext i8 [[TMP204]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP205:%.*]] = load i8, ptr [[CE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV322:%.*]] = sext i8 [[TMP205]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP323:%.*]] = icmp eq i32 [[CONV321]], [[CONV322]]
+// SIMD-ONLY0-NEXT:    [[CONV324:%.*]] = zext i1 [[CMP323]] to i32
+// SIMD-ONLY0-NEXT:    [[CONV325:%.*]] = trunc i32 [[CONV324]] to i8
+// SIMD-ONLY0-NEXT:    store i8 [[CONV325]], ptr [[CR]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP206:%.*]] = load i8, ptr [[CR]], align 1
+// SIMD-ONLY0-NEXT:    [[TOBOOL326:%.*]] = icmp ne i8 [[TMP206]], 0
+// SIMD-ONLY0-NEXT:    br i1 [[TOBOOL326]], label [[IF_THEN327:%.*]], label [[IF_END328:%.*]]
+// SIMD-ONLY0:       if.then327:
+// SIMD-ONLY0-NEXT:    [[TMP207:%.*]] = load i8, ptr [[CD]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP207]], ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    br label [[IF_END328]]
+// SIMD-ONLY0:       if.end328:
+// SIMD-ONLY0-NEXT:    [[TMP208:%.*]] = load i8, ptr [[CE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV329:%.*]] = sext i8 [[TMP208]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP209:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV330:%.*]] = sext i8 [[TMP209]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP331:%.*]] = icmp eq i32 [[CONV329]], [[CONV330]]
+// SIMD-ONLY0-NEXT:    [[CONV332:%.*]] = zext i1 [[CMP331]] to i32
+// SIMD-ONLY0-NEXT:    [[CONV333:%.*]] = trunc i32 [[CONV332]] to i8
+// SIMD-ONLY0-NEXT:    store i8 [[CONV333]], ptr [[CR]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP210:%.*]] = load i8, ptr [[CR]], align 1
+// SIMD-ONLY0-NEXT:    [[TOBOOL334:%.*]] = icmp ne i8 [[TMP210]], 0
+// SIMD-ONLY0-NEXT:    br i1 [[TOBOOL334]], label [[IF_THEN335:%.*]], label [[IF_END336:%.*]]
+// SIMD-ONLY0:       if.then335:
+// SIMD-ONLY0-NEXT:    [[TMP211:%.*]] = load i8, ptr [[CD]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP211]], ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    br label [[IF_END336]]
+// SIMD-ONLY0:       if.end336:
+// SIMD-ONLY0-NEXT:    [[TMP212:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV337:%.*]] = sext i8 [[TMP212]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP213:%.*]] = load i8, ptr [[CE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV338:%.*]] = sext i8 [[TMP213]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP339:%.*]] = icmp eq i32 [[CONV337]], [[CONV338]]
+// SIMD-ONLY0-NEXT:    [[CONV340:%.*]] = zext i1 [[CMP339]] to i32
+// SIMD-ONLY0-NEXT:    [[CONV341:%.*]] = trunc i32 [[CONV340]] to i8
+// SIMD-ONLY0-NEXT:    store i8 [[CONV341]], ptr [[CR]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP214:%.*]] = load i8, ptr [[CR]], align 1
+// SIMD-ONLY0-NEXT:    [[TOBOOL342:%.*]] = icmp ne i8 [[TMP214]], 0
+// SIMD-ONLY0-NEXT:    br i1 [[TOBOOL342]], label [[IF_THEN343:%.*]], label [[IF_ELSE344:%.*]]
+// SIMD-ONLY0:       if.then343:
+// SIMD-ONLY0-NEXT:    [[TMP215:%.*]] = load i8, ptr [[CD]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP215]], ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    br label [[IF_END345:%.*]]
+// SIMD-ONLY0:       if.else344:
+// SIMD-ONLY0-NEXT:    [[TMP216:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP216]], ptr [[CV]], align 1
+// SIMD-ONLY0-NEXT:    br label [[IF_END345]]
+// SIMD-ONLY0:       if.end345:
+// SIMD-ONLY0-NEXT:    [[TMP217:%.*]] = load i8, ptr [[CE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV346:%.*]] = sext i8 [[TMP217]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP218:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV347:%.*]] = sext i8 [[TMP218]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP348:%.*]] = icmp eq i32 [[CONV346]], [[CONV347]]
+// SIMD-ONLY0-NEXT:    [[CONV349:%.*]] = zext i1 [[CMP348]] to i32
+// SIMD-ONLY0-NEXT:    [[CONV350:%.*]] = trunc i32 [[CONV349]] to i8
+// SIMD-ONLY0-NEXT:    store i8 [[CONV350]], ptr [[CR]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP219:%.*]] = load i8, ptr [[CR]], align 1
+// SIMD-ONLY0-NEXT:    [[TOBOOL351:%.*]] = icmp ne i8 [[TMP219]], 0
+// SIMD-ONLY0-NEXT:    br i1 [[TOBOOL351]], label [[IF_THEN352:%.*]], label [[IF_ELSE353:%.*]]
+// SIMD-ONLY0:       if.then352:
+// SIMD-ONLY0-NEXT:    [[TMP220:%.*]] = load i8, ptr [[CD]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP220]], ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    br label [[IF_END354:%.*]]
+// SIMD-ONLY0:       if.else353:
+// SIMD-ONLY0-NEXT:    [[TMP221:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP221]], ptr [[CV]], align 1
+// SIMD-ONLY0-NEXT:    br label [[IF_END354]]
+// SIMD-ONLY0:       if.end354:
+// SIMD-ONLY0-NEXT:    [[TMP222:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP222]], ptr [[CV]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP223:%.*]] = load i8, ptr [[CE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV355:%.*]] = sext i8 [[TMP223]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP224:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV356:%.*]] = sext i8 [[TMP224]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP357:%.*]] = icmp sgt i32 [[CONV355]], [[CONV356]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP357]], label [[IF_THEN359:%.*]], label [[IF_END360:%.*]]
+// SIMD-ONLY0:       if.then359:
+// SIMD-ONLY0-NEXT:    [[TMP225:%.*]] = load i8, ptr [[CE]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP225]], ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    br label [[IF_END360]]
+// SIMD-ONLY0:       if.end360:
+// SIMD-ONLY0-NEXT:    [[TMP226:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP226]], ptr [[CV]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP227:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV361:%.*]] = sext i8 [[TMP227]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP228:%.*]] = load i8, ptr [[CE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV362:%.*]] = sext i8 [[TMP228]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP363:%.*]] = icmp sgt i32 [[CONV361]], [[CONV362]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP363]], label [[IF_THEN365:%.*]], label [[IF_END366:%.*]]
+// SIMD-ONLY0:       if.then365:
+// SIMD-ONLY0-NEXT:    [[TMP229:%.*]] = load i8, ptr [[CE]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP229]], ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    br label [[IF_END366]]
+// SIMD-ONLY0:       if.end366:
+// SIMD-ONLY0-NEXT:    [[TMP230:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP230]], ptr [[CV]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP231:%.*]] = load i8, ptr [[CE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV367:%.*]] = sext i8 [[TMP231]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP232:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV368:%.*]] = sext i8 [[TMP232]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP369:%.*]] = icmp slt i32 [[CONV367]], [[CONV368]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP369]], label [[IF_THEN371:%.*]], label [[IF_END372:%.*]]
+// SIMD-ONLY0:       if.then371:
+// SIMD-ONLY0-NEXT:    [[TMP233:%.*]] = load i8, ptr [[CE]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP233]], ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    br label [[IF_END372]]
+// SIMD-ONLY0:       if.end372:
+// SIMD-ONLY0-NEXT:    [[TMP234:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP234]], ptr [[CV]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP235:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV373:%.*]] = sext i8 [[TMP235]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP236:%.*]] = load i8, ptr [[CE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV374:%.*]] = sext i8 [[TMP236]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP375:%.*]] = icmp slt i32 [[CONV373]], [[CONV374]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP375]], label [[IF_THEN377:%.*]], label [[IF_END378:%.*]]
+// SIMD-ONLY0:       if.then377:
+// SIMD-ONLY0-NEXT:    [[TMP237:%.*]] = load i8, ptr [[CE]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP237]], ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    br label [[IF_END378]]
+// SIMD-ONLY0:       if.end378:
+// SIMD-ONLY0-NEXT:    [[TMP238:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP238]], ptr [[CV]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP239:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV379:%.*]] = sext i8 [[TMP239]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP240:%.*]] = load i8, ptr [[CE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV380:%.*]] = sext i8 [[TMP240]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP381:%.*]] = icmp eq i32 [[CONV379]], [[CONV380]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP381]], label [[IF_THEN383:%.*]], label [[IF_END384:%.*]]
+// SIMD-ONLY0:       if.then383:
+// SIMD-ONLY0-NEXT:    [[TMP241:%.*]] = load i8, ptr [[CD]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP241]], ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    br label [[IF_END384]]
+// SIMD-ONLY0:       if.end384:
+// SIMD-ONLY0-NEXT:    [[TMP242:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP242]], ptr [[CV]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP243:%.*]] = load i8, ptr [[CE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV385:%.*]] = sext i8 [[TMP243]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP244:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV386:%.*]] = sext i8 [[TMP244]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP387:%.*]] = icmp eq i32 [[CONV385]], [[CONV386]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP387]], label [[IF_THEN389:%.*]], label [[IF_END390:%.*]]
+// SIMD-ONLY0:       if.then389:
+// SIMD-ONLY0-NEXT:    [[TMP245:%.*]] = load i8, ptr [[CD]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP245]], ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    br label [[IF_END390]]
+// SIMD-ONLY0:       if.end390:
+// SIMD-ONLY0-NEXT:    [[TMP246:%.*]] = load i8, ptr [[CE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV391:%.*]] = sext i8 [[TMP246]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP247:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV392:%.*]] = sext i8 [[TMP247]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP393:%.*]] = icmp sgt i32 [[CONV391]], [[CONV392]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP393]], label [[IF_THEN395:%.*]], label [[IF_END396:%.*]]
+// SIMD-ONLY0:       if.then395:
+// SIMD-ONLY0-NEXT:    [[TMP248:%.*]] = load i8, ptr [[CE]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP248]], ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    br label [[IF_END396]]
+// SIMD-ONLY0:       if.end396:
+// SIMD-ONLY0-NEXT:    [[TMP249:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP249]], ptr [[CV]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP250:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV397:%.*]] = sext i8 [[TMP250]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP251:%.*]] = load i8, ptr [[CE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV398:%.*]] = sext i8 [[TMP251]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP399:%.*]] = icmp sgt i32 [[CONV397]], [[CONV398]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP399]], label [[IF_THEN401:%.*]], label [[IF_END402:%.*]]
+// SIMD-ONLY0:       if.then401:
+// SIMD-ONLY0-NEXT:    [[TMP252:%.*]] = load i8, ptr [[CE]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP252]], ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    br label [[IF_END402]]
+// SIMD-ONLY0:       if.end402:
+// SIMD-ONLY0-NEXT:    [[TMP253:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP253]], ptr [[CV]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP254:%.*]] = load i8, ptr [[CE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV403:%.*]] = sext i8 [[TMP254]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP255:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV404:%.*]] = sext i8 [[TMP255]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP405:%.*]] = icmp slt i32 [[CONV403]], [[CONV404]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP405]], label [[IF_THEN407:%.*]], label [[IF_END408:%.*]]
+// SIMD-ONLY0:       if.then407:
+// SIMD-ONLY0-NEXT:    [[TMP256:%.*]] = load i8, ptr [[CE]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP256]], ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    br label [[IF_END408]]
+// SIMD-ONLY0:       if.end408:
+// SIMD-ONLY0-NEXT:    [[TMP257:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP257]], ptr [[CV]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP258:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV409:%.*]] = sext i8 [[TMP258]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP259:%.*]] = load i8, ptr [[CE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV410:%.*]] = sext i8 [[TMP259]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP411:%.*]] = icmp slt i32 [[CONV409]], [[CONV410]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP411]], label [[IF_THEN413:%.*]], label [[IF_END414:%.*]]
+// SIMD-ONLY0:       if.then413:
+// SIMD-ONLY0-NEXT:    [[TMP260:%.*]] = load i8, ptr [[CE]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP260]], ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    br label [[IF_END414]]
+// SIMD-ONLY0:       if.end414:
+// SIMD-ONLY0-NEXT:    [[TMP261:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP261]], ptr [[CV]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP262:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV415:%.*]] = sext i8 [[TMP262]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP263:%.*]] = load i8, ptr [[CE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV416:%.*]] = sext i8 [[TMP263]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP417:%.*]] = icmp eq i32 [[CONV415]], [[CONV416]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP417]], label [[IF_THEN419:%.*]], label [[IF_END420:%.*]]
+// SIMD-ONLY0:       if.then419:
+// SIMD-ONLY0-NEXT:    [[TMP264:%.*]] = load i8, ptr [[CD]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP264]], ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    br label [[IF_END420]]
+// SIMD-ONLY0:       if.end420:
+// SIMD-ONLY0-NEXT:    [[TMP265:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP265]], ptr [[CV]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP266:%.*]] = load i8, ptr [[CE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV421:%.*]] = sext i8 [[TMP266]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP267:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV422:%.*]] = sext i8 [[TMP267]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP423:%.*]] = icmp eq i32 [[CONV421]], [[CONV422]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP423]], label [[IF_THEN425:%.*]], label [[IF_END426:%.*]]
+// SIMD-ONLY0:       if.then425:
+// SIMD-ONLY0-NEXT:    [[TMP268:%.*]] = load i8, ptr [[CD]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP268]], ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    br label [[IF_END426]]
+// SIMD-ONLY0:       if.end426:
+// SIMD-ONLY0-NEXT:    [[TMP269:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP269]], ptr [[CV]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP270:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV427:%.*]] = sext i8 [[TMP270]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP271:%.*]] = load i8, ptr [[CE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV428:%.*]] = sext i8 [[TMP271]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP429:%.*]] = icmp eq i32 [[CONV427]], [[CONV428]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP429]], label [[IF_THEN431:%.*]], label [[IF_ELSE432:%.*]]
+// SIMD-ONLY0:       if.then431:
+// SIMD-ONLY0-NEXT:    [[TMP272:%.*]] = load i8, ptr [[CD]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP272]], ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    br label [[IF_END433:%.*]]
+// SIMD-ONLY0:       if.else432:
+// SIMD-ONLY0-NEXT:    [[TMP273:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP273]], ptr [[CV]], align 1
+// SIMD-ONLY0-NEXT:    br label [[IF_END433]]
+// SIMD-ONLY0:       if.end433:
+// SIMD-ONLY0-NEXT:    [[TMP274:%.*]] = load i8, ptr [[CE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV434:%.*]] = sext i8 [[TMP274]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP275:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV435:%.*]] = sext i8 [[TMP275]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP436:%.*]] = icmp eq i32 [[CONV434]], [[CONV435]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP436]], label [[IF_THEN438:%.*]], label [[IF_ELSE439:%.*]]
+// SIMD-ONLY0:       if.then438:
+// SIMD-ONLY0-NEXT:    [[TMP276:%.*]] = load i8, ptr [[CD]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP276]], ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    br label [[IF_END440:%.*]]
+// SIMD-ONLY0:       if.else439:
+// SIMD-ONLY0-NEXT:    [[TMP277:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP277]], ptr [[CV]], align 1
+// SIMD-ONLY0-NEXT:    br label [[IF_END440]]
+// SIMD-ONLY0:       if.end440:
+// SIMD-ONLY0-NEXT:    [[TMP278:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV441:%.*]] = sext i8 [[TMP278]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP279:%.*]] = load i8, ptr [[CE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV442:%.*]] = sext i8 [[TMP279]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP443:%.*]] = icmp eq i32 [[CONV441]], [[CONV442]]
+// SIMD-ONLY0-NEXT:    [[CONV444:%.*]] = zext i1 [[CMP443]] to i32
+// SIMD-ONLY0-NEXT:    [[CONV445:%.*]] = trunc i32 [[CONV444]] to i8
+// SIMD-ONLY0-NEXT:    store i8 [[CONV445]], ptr [[CR]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP280:%.*]] = load i8, ptr [[CR]], align 1
+// SIMD-ONLY0-NEXT:    [[TOBOOL446:%.*]] = icmp ne i8 [[TMP280]], 0
+// SIMD-ONLY0-NEXT:    br i1 [[TOBOOL446]], label [[IF_THEN447:%.*]], label [[IF_END448:%.*]]
+// SIMD-ONLY0:       if.then447:
+// SIMD-ONLY0-NEXT:    [[TMP281:%.*]] = load i8, ptr [[CD]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP281]], ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    br label [[IF_END448]]
+// SIMD-ONLY0:       if.end448:
+// SIMD-ONLY0-NEXT:    [[TMP282:%.*]] = load i8, ptr [[CE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV449:%.*]] = sext i8 [[TMP282]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP283:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV450:%.*]] = sext i8 [[TMP283]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP451:%.*]] = icmp eq i32 [[CONV449]], [[CONV450]]
+// SIMD-ONLY0-NEXT:    [[CONV452:%.*]] = zext i1 [[CMP451]] to i32
+// SIMD-ONLY0-NEXT:    [[CONV453:%.*]] = trunc i32 [[CONV452]] to i8
+// SIMD-ONLY0-NEXT:    store i8 [[CONV453]], ptr [[CR]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP284:%.*]] = load i8, ptr [[CR]], align 1
+// SIMD-ONLY0-NEXT:    [[TOBOOL454:%.*]] = icmp ne i8 [[TMP284]], 0
+// SIMD-ONLY0-NEXT:    br i1 [[TOBOOL454]], label [[IF_THEN455:%.*]], label [[IF_END456:%.*]]
+// SIMD-ONLY0:       if.then455:
+// SIMD-ONLY0-NEXT:    [[TMP285:%.*]] = load i8, ptr [[CD]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP285]], ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    br label [[IF_END456]]
+// SIMD-ONLY0:       if.end456:
+// SIMD-ONLY0-NEXT:    [[TMP286:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV457:%.*]] = sext i8 [[TMP286]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP287:%.*]] = load i8, ptr [[CE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV458:%.*]] = sext i8 [[TMP287]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP459:%.*]] = icmp eq i32 [[CONV457]], [[CONV458]]
+// SIMD-ONLY0-NEXT:    [[CONV460:%.*]] = zext i1 [[CMP459]] to i32
+// SIMD-ONLY0-NEXT:    [[CONV461:%.*]] = trunc i32 [[CONV460]] to i8
+// SIMD-ONLY0-NEXT:    store i8 [[CONV461]], ptr [[CR]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP288:%.*]] = load i8, ptr [[CR]], align 1
+// SIMD-ONLY0-NEXT:    [[TOBOOL462:%.*]] = icmp ne i8 [[TMP288]], 0
+// SIMD-ONLY0-NEXT:    br i1 [[TOBOOL462]], label [[IF_THEN463:%.*]], label [[IF_ELSE464:%.*]]
+// SIMD-ONLY0:       if.then463:
+// SIMD-ONLY0-NEXT:    [[TMP289:%.*]] = load i8, ptr [[CD]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP289]], ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    br label [[IF_END465:%.*]]
+// SIMD-ONLY0:       if.else464:
+// SIMD-ONLY0-NEXT:    [[TMP290:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP290]], ptr [[CV]], align 1
+// SIMD-ONLY0-NEXT:    br label [[IF_END465]]
+// SIMD-ONLY0:       if.end465:
+// SIMD-ONLY0-NEXT:    [[TMP291:%.*]] = load i8, ptr [[CE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV466:%.*]] = sext i8 [[TMP291]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP292:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV467:%.*]] = sext i8 [[TMP292]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP468:%.*]] = icmp eq i32 [[CONV466]], [[CONV467]]
+// SIMD-ONLY0-NEXT:    [[CONV469:%.*]] = zext i1 [[CMP468]] to i32
+// SIMD-ONLY0-NEXT:    [[CONV470:%.*]] = trunc i32 [[CONV469]] to i8
+// SIMD-ONLY0-NEXT:    store i8 [[CONV470]], ptr [[CR]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP293:%.*]] = load i8, ptr [[CR]], align 1
+// SIMD-ONLY0-NEXT:    [[TOBOOL471:%.*]] = icmp ne i8 [[TMP293]], 0
+// SIMD-ONLY0-NEXT:    br i1 [[TOBOOL471]], label [[IF_THEN472:%.*]], label [[IF_ELSE473:%.*]]
+// SIMD-ONLY0:       if.then472:
+// SIMD-ONLY0-NEXT:    [[TMP294:%.*]] = load i8, ptr [[CD]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP294]], ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    br label [[IF_END474:%.*]]
+// SIMD-ONLY0:       if.else473:
+// SIMD-ONLY0-NEXT:    [[TMP295:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP295]], ptr [[CV]], align 1
+// SIMD-ONLY0-NEXT:    br label [[IF_END474]]
+// SIMD-ONLY0:       if.end474:
+// SIMD-ONLY0-NEXT:    [[TMP296:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP296]], ptr [[CV]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP297:%.*]] = load i8, ptr [[CE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV475:%.*]] = sext i8 [[TMP297]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP298:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV476:%.*]] = sext i8 [[TMP298]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP477:%.*]] = icmp sgt i32 [[CONV475]], [[CONV476]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP477]], label [[IF_THEN479:%.*]], label [[IF_END480:%.*]]
+// SIMD-ONLY0:       if.then479:
+// SIMD-ONLY0-NEXT:    [[TMP299:%.*]] = load i8, ptr [[CE]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP299]], ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    br label [[IF_END480]]
+// SIMD-ONLY0:       if.end480:
+// SIMD-ONLY0-NEXT:    [[TMP300:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP300]], ptr [[CV]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP301:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV481:%.*]] = sext i8 [[TMP301]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP302:%.*]] = load i8, ptr [[CE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV482:%.*]] = sext i8 [[TMP302]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP483:%.*]] = icmp sgt i32 [[CONV481]], [[CONV482]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP483]], label [[IF_THEN485:%.*]], label [[IF_END486:%.*]]
+// SIMD-ONLY0:       if.then485:
+// SIMD-ONLY0-NEXT:    [[TMP303:%.*]] = load i8, ptr [[CE]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP303]], ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    br label [[IF_END486]]
+// SIMD-ONLY0:       if.end486:
+// SIMD-ONLY0-NEXT:    [[TMP304:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP304]], ptr [[CV]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP305:%.*]] = load i8, ptr [[CE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV487:%.*]] = sext i8 [[TMP305]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP306:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV488:%.*]] = sext i8 [[TMP306]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP489:%.*]] = icmp slt i32 [[CONV487]], [[CONV488]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP489]], label [[IF_THEN491:%.*]], label [[IF_END492:%.*]]
+// SIMD-ONLY0:       if.then491:
+// SIMD-ONLY0-NEXT:    [[TMP307:%.*]] = load i8, ptr [[CE]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP307]], ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    br label [[IF_END492]]
+// SIMD-ONLY0:       if.end492:
+// SIMD-ONLY0-NEXT:    [[TMP308:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP308]], ptr [[CV]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP309:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV493:%.*]] = sext i8 [[TMP309]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP310:%.*]] = load i8, ptr [[CE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV494:%.*]] = sext i8 [[TMP310]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP495:%.*]] = icmp slt i32 [[CONV493]], [[CONV494]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP495]], label [[IF_THEN497:%.*]], label [[IF_END498:%.*]]
+// SIMD-ONLY0:       if.then497:
+// SIMD-ONLY0-NEXT:    [[TMP311:%.*]] = load i8, ptr [[CE]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP311]], ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    br label [[IF_END498]]
+// SIMD-ONLY0:       if.end498:
+// SIMD-ONLY0-NEXT:    [[TMP312:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP312]], ptr [[CV]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP313:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV499:%.*]] = sext i8 [[TMP313]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP314:%.*]] = load i8, ptr [[CE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV500:%.*]] = sext i8 [[TMP314]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP501:%.*]] = icmp eq i32 [[CONV499]], [[CONV500]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP501]], label [[IF_THEN503:%.*]], label [[IF_END504:%.*]]
+// SIMD-ONLY0:       if.then503:
+// SIMD-ONLY0-NEXT:    [[TMP315:%.*]] = load i8, ptr [[CD]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP315]], ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    br label [[IF_END504]]
+// SIMD-ONLY0:       if.end504:
+// SIMD-ONLY0-NEXT:    [[TMP316:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP316]], ptr [[CV]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP317:%.*]] = load i8, ptr [[CE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV505:%.*]] = sext i8 [[TMP317]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP318:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV506:%.*]] = sext i8 [[TMP318]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP507:%.*]] = icmp eq i32 [[CONV505]], [[CONV506]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP507]], label [[IF_THEN509:%.*]], label [[IF_END510:%.*]]
+// SIMD-ONLY0:       if.then509:
+// SIMD-ONLY0-NEXT:    [[TMP319:%.*]] = load i8, ptr [[CD]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP319]], ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    br label [[IF_END510]]
+// SIMD-ONLY0:       if.end510:
+// SIMD-ONLY0-NEXT:    [[TMP320:%.*]] = load i8, ptr [[CE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV511:%.*]] = sext i8 [[TMP320]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP321:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV512:%.*]] = sext i8 [[TMP321]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP513:%.*]] = icmp sgt i32 [[CONV511]], [[CONV512]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP513]], label [[IF_THEN515:%.*]], label [[IF_END516:%.*]]
+// SIMD-ONLY0:       if.then515:
+// SIMD-ONLY0-NEXT:    [[TMP322:%.*]] = load i8, ptr [[CE]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP322]], ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    br label [[IF_END516]]
+// SIMD-ONLY0:       if.end516:
+// SIMD-ONLY0-NEXT:    [[TMP323:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP323]], ptr [[CV]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP324:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV517:%.*]] = sext i8 [[TMP324]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP325:%.*]] = load i8, ptr [[CE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV518:%.*]] = sext i8 [[TMP325]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP519:%.*]] = icmp sgt i32 [[CONV517]], [[CONV518]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP519]], label [[IF_THEN521:%.*]], label [[IF_END522:%.*]]
+// SIMD-ONLY0:       if.then521:
+// SIMD-ONLY0-NEXT:    [[TMP326:%.*]] = load i8, ptr [[CE]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP326]], ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    br label [[IF_END522]]
+// SIMD-ONLY0:       if.end522:
+// SIMD-ONLY0-NEXT:    [[TMP327:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP327]], ptr [[CV]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP328:%.*]] = load i8, ptr [[CE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV523:%.*]] = sext i8 [[TMP328]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP329:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV524:%.*]] = sext i8 [[TMP329]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP525:%.*]] = icmp slt i32 [[CONV523]], [[CONV524]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP525]], label [[IF_THEN527:%.*]], label [[IF_END528:%.*]]
+// SIMD-ONLY0:       if.then527:
+// SIMD-ONLY0-NEXT:    [[TMP330:%.*]] = load i8, ptr [[CE]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP330]], ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    br label [[IF_END528]]
+// SIMD-ONLY0:       if.end528:
+// SIMD-ONLY0-NEXT:    [[TMP331:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP331]], ptr [[CV]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP332:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV529:%.*]] = sext i8 [[TMP332]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP333:%.*]] = load i8, ptr [[CE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV530:%.*]] = sext i8 [[TMP333]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP531:%.*]] = icmp slt i32 [[CONV529]], [[CONV530]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP531]], label [[IF_THEN533:%.*]], label [[IF_END534:%.*]]
+// SIMD-ONLY0:       if.then533:
+// SIMD-ONLY0-NEXT:    [[TMP334:%.*]] = load i8, ptr [[CE]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP334]], ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    br label [[IF_END534]]
+// SIMD-ONLY0:       if.end534:
+// SIMD-ONLY0-NEXT:    [[TMP335:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP335]], ptr [[CV]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP336:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV535:%.*]] = sext i8 [[TMP336]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP337:%.*]] = load i8, ptr [[CE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV536:%.*]] = sext i8 [[TMP337]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP537:%.*]] = icmp eq i32 [[CONV535]], [[CONV536]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP537]], label [[IF_THEN539:%.*]], label [[IF_END540:%.*]]
+// SIMD-ONLY0:       if.then539:
+// SIMD-ONLY0-NEXT:    [[TMP338:%.*]] = load i8, ptr [[CD]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP338]], ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    br label [[IF_END540]]
+// SIMD-ONLY0:       if.end540:
+// SIMD-ONLY0-NEXT:    [[TMP339:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP339]], ptr [[CV]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP340:%.*]] = load i8, ptr [[CE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV541:%.*]] = sext i8 [[TMP340]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP341:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV542:%.*]] = sext i8 [[TMP341]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP543:%.*]] = icmp eq i32 [[CONV541]], [[CONV542]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP543]], label [[IF_THEN545:%.*]], label [[IF_END546:%.*]]
+// SIMD-ONLY0:       if.then545:
+// SIMD-ONLY0-NEXT:    [[TMP342:%.*]] = load i8, ptr [[CD]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP342]], ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    br label [[IF_END546]]
+// SIMD-ONLY0:       if.end546:
+// SIMD-ONLY0-NEXT:    [[TMP343:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP343]], ptr [[CV]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP344:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV547:%.*]] = sext i8 [[TMP344]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP345:%.*]] = load i8, ptr [[CE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV548:%.*]] = sext i8 [[TMP345]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP549:%.*]] = icmp eq i32 [[CONV547]], [[CONV548]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP549]], label [[IF_THEN551:%.*]], label [[IF_ELSE552:%.*]]
+// SIMD-ONLY0:       if.then551:
+// SIMD-ONLY0-NEXT:    [[TMP346:%.*]] = load i8, ptr [[CD]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP346]], ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    br label [[IF_END553:%.*]]
+// SIMD-ONLY0:       if.else552:
+// SIMD-ONLY0-NEXT:    [[TMP347:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP347]], ptr [[CV]], align 1
+// SIMD-ONLY0-NEXT:    br label [[IF_END553]]
+// SIMD-ONLY0:       if.end553:
+// SIMD-ONLY0-NEXT:    [[TMP348:%.*]] = load i8, ptr [[CE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV554:%.*]] = sext i8 [[TMP348]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP349:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV555:%.*]] = sext i8 [[TMP349]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP556:%.*]] = icmp eq i32 [[CONV554]], [[CONV555]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP556]], label [[IF_THEN558:%.*]], label [[IF_ELSE559:%.*]]
+// SIMD-ONLY0:       if.then558:
+// SIMD-ONLY0-NEXT:    [[TMP350:%.*]] = load i8, ptr [[CD]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP350]], ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    br label [[IF_END560:%.*]]
+// SIMD-ONLY0:       if.else559:
+// SIMD-ONLY0-NEXT:    [[TMP351:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP351]], ptr [[CV]], align 1
+// SIMD-ONLY0-NEXT:    br label [[IF_END560]]
+// SIMD-ONLY0:       if.end560:
+// SIMD-ONLY0-NEXT:    [[TMP352:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV561:%.*]] = sext i8 [[TMP352]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP353:%.*]] = load i8, ptr [[CE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV562:%.*]] = sext i8 [[TMP353]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP563:%.*]] = icmp eq i32 [[CONV561]], [[CONV562]]
+// SIMD-ONLY0-NEXT:    [[CONV564:%.*]] = zext i1 [[CMP563]] to i32
+// SIMD-ONLY0-NEXT:    [[CONV565:%.*]] = trunc i32 [[CONV564]] to i8
+// SIMD-ONLY0-NEXT:    store i8 [[CONV565]], ptr [[CR]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP354:%.*]] = load i8, ptr [[CR]], align 1
+// SIMD-ONLY0-NEXT:    [[TOBOOL566:%.*]] = icmp ne i8 [[TMP354]], 0
+// SIMD-ONLY0-NEXT:    br i1 [[TOBOOL566]], label [[IF_THEN567:%.*]], label [[IF_END568:%.*]]
+// SIMD-ONLY0:       if.then567:
+// SIMD-ONLY0-NEXT:    [[TMP355:%.*]] = load i8, ptr [[CD]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP355]], ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    br label [[IF_END568]]
+// SIMD-ONLY0:       if.end568:
+// SIMD-ONLY0-NEXT:    [[TMP356:%.*]] = load i8, ptr [[CE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV569:%.*]] = sext i8 [[TMP356]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP357:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV570:%.*]] = sext i8 [[TMP357]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP571:%.*]] = icmp eq i32 [[CONV569]], [[CONV570]]
+// SIMD-ONLY0-NEXT:    [[CONV572:%.*]] = zext i1 [[CMP571]] to i32
+// SIMD-ONLY0-NEXT:    [[CONV573:%.*]] = trunc i32 [[CONV572]] to i8
+// SIMD-ONLY0-NEXT:    store i8 [[CONV573]], ptr [[CR]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP358:%.*]] = load i8, ptr [[CR]], align 1
+// SIMD-ONLY0-NEXT:    [[TOBOOL574:%.*]] = icmp ne i8 [[TMP358]], 0
+// SIMD-ONLY0-NEXT:    br i1 [[TOBOOL574]], label [[IF_THEN575:%.*]], label [[IF_END576:%.*]]
+// SIMD-ONLY0:       if.then575:
+// SIMD-ONLY0-NEXT:    [[TMP359:%.*]] = load i8, ptr [[CD]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP359]], ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    br label [[IF_END576]]
+// SIMD-ONLY0:       if.end576:
+// SIMD-ONLY0-NEXT:    [[TMP360:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV577:%.*]] = sext i8 [[TMP360]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP361:%.*]] = load i8, ptr [[CE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV578:%.*]] = sext i8 [[TMP361]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP579:%.*]] = icmp eq i32 [[CONV577]], [[CONV578]]
+// SIMD-ONLY0-NEXT:    [[CONV580:%.*]] = zext i1 [[CMP579]] to i32
+// SIMD-ONLY0-NEXT:    [[CONV581:%.*]] = trunc i32 [[CONV580]] to i8
+// SIMD-ONLY0-NEXT:    store i8 [[CONV581]], ptr [[CR]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP362:%.*]] = load i8, ptr [[CR]], align 1
+// SIMD-ONLY0-NEXT:    [[TOBOOL582:%.*]] = icmp ne i8 [[TMP362]], 0
+// SIMD-ONLY0-NEXT:    br i1 [[TOBOOL582]], label [[IF_THEN583:%.*]], label [[IF_ELSE584:%.*]]
+// SIMD-ONLY0:       if.then583:
+// SIMD-ONLY0-NEXT:    [[TMP363:%.*]] = load i8, ptr [[CD]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP363]], ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    br label [[IF_END585:%.*]]
+// SIMD-ONLY0:       if.else584:
+// SIMD-ONLY0-NEXT:    [[TMP364:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP364]], ptr [[CV]], align 1
+// SIMD-ONLY0-NEXT:    br label [[IF_END585]]
+// SIMD-ONLY0:       if.end585:
+// SIMD-ONLY0-NEXT:    [[TMP365:%.*]] = load i8, ptr [[CE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV586:%.*]] = sext i8 [[TMP365]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP366:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV587:%.*]] = sext i8 [[TMP366]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP588:%.*]] = icmp eq i32 [[CONV586]], [[CONV587]]
+// SIMD-ONLY0-NEXT:    [[CONV589:%.*]] = zext i1 [[CMP588]] to i32
+// SIMD-ONLY0-NEXT:    [[CONV590:%.*]] = trunc i32 [[CONV589]] to i8
+// SIMD-ONLY0-NEXT:    store i8 [[CONV590]], ptr [[CR]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP367:%.*]] = load i8, ptr [[CR]], align 1
+// SIMD-ONLY0-NEXT:    [[TOBOOL591:%.*]] = icmp ne i8 [[TMP367]], 0
+// SIMD-ONLY0-NEXT:    br i1 [[TOBOOL591]], label [[IF_THEN592:%.*]], label [[IF_ELSE593:%.*]]
+// SIMD-ONLY0:       if.then592:
+// SIMD-ONLY0-NEXT:    [[TMP368:%.*]] = load i8, ptr [[CD]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP368]], ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    br label [[IF_END594:%.*]]
+// SIMD-ONLY0:       if.else593:
+// SIMD-ONLY0-NEXT:    [[TMP369:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP369]], ptr [[CV]], align 1
+// SIMD-ONLY0-NEXT:    br label [[IF_END594]]
+// SIMD-ONLY0:       if.end594:
+// SIMD-ONLY0-NEXT:    [[TMP370:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP370]], ptr [[CV]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP371:%.*]] = load i8, ptr [[CE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV595:%.*]] = sext i8 [[TMP371]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP372:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV596:%.*]] = sext i8 [[TMP372]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP597:%.*]] = icmp sgt i32 [[CONV595]], [[CONV596]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP597]], label [[IF_THEN599:%.*]], label [[IF_END600:%.*]]
+// SIMD-ONLY0:       if.then599:
+// SIMD-ONLY0-NEXT:    [[TMP373:%.*]] = load i8, ptr [[CE]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP373]], ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    br label [[IF_END600]]
+// SIMD-ONLY0:       if.end600:
+// SIMD-ONLY0-NEXT:    [[TMP374:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP374]], ptr [[CV]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP375:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV601:%.*]] = sext i8 [[TMP375]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP376:%.*]] = load i8, ptr [[CE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV602:%.*]] = sext i8 [[TMP376]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP603:%.*]] = icmp sgt i32 [[CONV601]], [[CONV602]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP603]], label [[IF_THEN605:%.*]], label [[IF_END606:%.*]]
+// SIMD-ONLY0:       if.then605:
+// SIMD-ONLY0-NEXT:    [[TMP377:%.*]] = load i8, ptr [[CE]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP377]], ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    br label [[IF_END606]]
+// SIMD-ONLY0:       if.end606:
+// SIMD-ONLY0-NEXT:    [[TMP378:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP378]], ptr [[CV]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP379:%.*]] = load i8, ptr [[CE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV607:%.*]] = sext i8 [[TMP379]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP380:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV608:%.*]] = sext i8 [[TMP380]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP609:%.*]] = icmp slt i32 [[CONV607]], [[CONV608]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP609]], label [[IF_THEN611:%.*]], label [[IF_END612:%.*]]
+// SIMD-ONLY0:       if.then611:
+// SIMD-ONLY0-NEXT:    [[TMP381:%.*]] = load i8, ptr [[CE]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP381]], ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    br label [[IF_END612]]
+// SIMD-ONLY0:       if.end612:
+// SIMD-ONLY0-NEXT:    [[TMP382:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP382]], ptr [[CV]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP383:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV613:%.*]] = sext i8 [[TMP383]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP384:%.*]] = load i8, ptr [[CE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV614:%.*]] = sext i8 [[TMP384]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP615:%.*]] = icmp slt i32 [[CONV613]], [[CONV614]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP615]], label [[IF_THEN617:%.*]], label [[IF_END618:%.*]]
+// SIMD-ONLY0:       if.then617:
+// SIMD-ONLY0-NEXT:    [[TMP385:%.*]] = load i8, ptr [[CE]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP385]], ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    br label [[IF_END618]]
+// SIMD-ONLY0:       if.end618:
+// SIMD-ONLY0-NEXT:    [[TMP386:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP386]], ptr [[CV]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP387:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV619:%.*]] = sext i8 [[TMP387]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP388:%.*]] = load i8, ptr [[CE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV620:%.*]] = sext i8 [[TMP388]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP621:%.*]] = icmp eq i32 [[CONV619]], [[CONV620]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP621]], label [[IF_THEN623:%.*]], label [[IF_END624:%.*]]
+// SIMD-ONLY0:       if.then623:
+// SIMD-ONLY0-NEXT:    [[TMP389:%.*]] = load i8, ptr [[CD]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP389]], ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    br label [[IF_END624]]
+// SIMD-ONLY0:       if.end624:
+// SIMD-ONLY0-NEXT:    [[TMP390:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP390]], ptr [[CV]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP391:%.*]] = load i8, ptr [[CE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV625:%.*]] = sext i8 [[TMP391]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP392:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV626:%.*]] = sext i8 [[TMP392]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP627:%.*]] = icmp eq i32 [[CONV625]], [[CONV626]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP627]], label [[IF_THEN629:%.*]], label [[IF_END630:%.*]]
+// SIMD-ONLY0:       if.then629:
+// SIMD-ONLY0-NEXT:    [[TMP393:%.*]] = load i8, ptr [[CD]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP393]], ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    br label [[IF_END630]]
+// SIMD-ONLY0:       if.end630:
+// SIMD-ONLY0-NEXT:    [[TMP394:%.*]] = load i8, ptr [[CE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV631:%.*]] = sext i8 [[TMP394]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP395:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV632:%.*]] = sext i8 [[TMP395]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP633:%.*]] = icmp sgt i32 [[CONV631]], [[CONV632]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP633]], label [[IF_THEN635:%.*]], label [[IF_END636:%.*]]
+// SIMD-ONLY0:       if.then635:
+// SIMD-ONLY0-NEXT:    [[TMP396:%.*]] = load i8, ptr [[CE]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP396]], ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    br label [[IF_END636]]
+// SIMD-ONLY0:       if.end636:
+// SIMD-ONLY0-NEXT:    [[TMP397:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP397]], ptr [[CV]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP398:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV637:%.*]] = sext i8 [[TMP398]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP399:%.*]] = load i8, ptr [[CE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV638:%.*]] = sext i8 [[TMP399]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP639:%.*]] = icmp sgt i32 [[CONV637]], [[CONV638]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP639]], label [[IF_THEN641:%.*]], label [[IF_END642:%.*]]
+// SIMD-ONLY0:       if.then641:
+// SIMD-ONLY0-NEXT:    [[TMP400:%.*]] = load i8, ptr [[CE]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP400]], ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    br label [[IF_END642]]
+// SIMD-ONLY0:       if.end642:
+// SIMD-ONLY0-NEXT:    [[TMP401:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP401]], ptr [[CV]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP402:%.*]] = load i8, ptr [[CE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV643:%.*]] = sext i8 [[TMP402]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP403:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV644:%.*]] = sext i8 [[TMP403]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP645:%.*]] = icmp slt i32 [[CONV643]], [[CONV644]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP645]], label [[IF_THEN647:%.*]], label [[IF_END648:%.*]]
+// SIMD-ONLY0:       if.then647:
+// SIMD-ONLY0-NEXT:    [[TMP404:%.*]] = load i8, ptr [[CE]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP404]], ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    br label [[IF_END648]]
+// SIMD-ONLY0:       if.end648:
+// SIMD-ONLY0-NEXT:    [[TMP405:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP405]], ptr [[CV]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP406:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV649:%.*]] = sext i8 [[TMP406]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP407:%.*]] = load i8, ptr [[CE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV650:%.*]] = sext i8 [[TMP407]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP651:%.*]] = icmp slt i32 [[CONV649]], [[CONV650]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP651]], label [[IF_THEN653:%.*]], label [[IF_END654:%.*]]
+// SIMD-ONLY0:       if.then653:
+// SIMD-ONLY0-NEXT:    [[TMP408:%.*]] = load i8, ptr [[CE]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP408]], ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    br label [[IF_END654]]
+// SIMD-ONLY0:       if.end654:
+// SIMD-ONLY0-NEXT:    [[TMP409:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP409]], ptr [[CV]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP410:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV655:%.*]] = sext i8 [[TMP410]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP411:%.*]] = load i8, ptr [[CE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV656:%.*]] = sext i8 [[TMP411]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP657:%.*]] = icmp eq i32 [[CONV655]], [[CONV656]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP657]], label [[IF_THEN659:%.*]], label [[IF_END660:%.*]]
+// SIMD-ONLY0:       if.then659:
+// SIMD-ONLY0-NEXT:    [[TMP412:%.*]] = load i8, ptr [[CD]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP412]], ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    br label [[IF_END660]]
+// SIMD-ONLY0:       if.end660:
+// SIMD-ONLY0-NEXT:    [[TMP413:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP413]], ptr [[CV]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP414:%.*]] = load i8, ptr [[CE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV661:%.*]] = sext i8 [[TMP414]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP415:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV662:%.*]] = sext i8 [[TMP415]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP663:%.*]] = icmp eq i32 [[CONV661]], [[CONV662]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP663]], label [[IF_THEN665:%.*]], label [[IF_END666:%.*]]
+// SIMD-ONLY0:       if.then665:
+// SIMD-ONLY0-NEXT:    [[TMP416:%.*]] = load i8, ptr [[CD]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP416]], ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    br label [[IF_END666]]
+// SIMD-ONLY0:       if.end666:
+// SIMD-ONLY0-NEXT:    [[TMP417:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP417]], ptr [[CV]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP418:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV667:%.*]] = sext i8 [[TMP418]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP419:%.*]] = load i8, ptr [[CE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV668:%.*]] = sext i8 [[TMP419]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP669:%.*]] = icmp eq i32 [[CONV667]], [[CONV668]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP669]], label [[IF_THEN671:%.*]], label [[IF_ELSE672:%.*]]
+// SIMD-ONLY0:       if.then671:
+// SIMD-ONLY0-NEXT:    [[TMP420:%.*]] = load i8, ptr [[CD]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP420]], ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    br label [[IF_END673:%.*]]
+// SIMD-ONLY0:       if.else672:
+// SIMD-ONLY0-NEXT:    [[TMP421:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP421]], ptr [[CV]], align 1
+// SIMD-ONLY0-NEXT:    br label [[IF_END673]]
+// SIMD-ONLY0:       if.end673:
+// SIMD-ONLY0-NEXT:    [[TMP422:%.*]] = load i8, ptr [[CE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV674:%.*]] = sext i8 [[TMP422]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP423:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV675:%.*]] = sext i8 [[TMP423]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP676:%.*]] = icmp eq i32 [[CONV674]], [[CONV675]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP676]], label [[IF_THEN678:%.*]], label [[IF_ELSE679:%.*]]
+// SIMD-ONLY0:       if.then678:
+// SIMD-ONLY0-NEXT:    [[TMP424:%.*]] = load i8, ptr [[CD]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP424]], ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    br label [[IF_END680:%.*]]
+// SIMD-ONLY0:       if.else679:
+// SIMD-ONLY0-NEXT:    [[TMP425:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP425]], ptr [[CV]], align 1
+// SIMD-ONLY0-NEXT:    br label [[IF_END680]]
+// SIMD-ONLY0:       if.end680:
+// SIMD-ONLY0-NEXT:    [[TMP426:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV681:%.*]] = sext i8 [[TMP426]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP427:%.*]] = load i8, ptr [[CE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV682:%.*]] = sext i8 [[TMP427]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP683:%.*]] = icmp eq i32 [[CONV681]], [[CONV682]]
+// SIMD-ONLY0-NEXT:    [[CONV684:%.*]] = zext i1 [[CMP683]] to i32
+// SIMD-ONLY0-NEXT:    [[CONV685:%.*]] = trunc i32 [[CONV684]] to i8
+// SIMD-ONLY0-NEXT:    store i8 [[CONV685]], ptr [[CR]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP428:%.*]] = load i8, ptr [[CR]], align 1
+// SIMD-ONLY0-NEXT:    [[TOBOOL686:%.*]] = icmp ne i8 [[TMP428]], 0
+// SIMD-ONLY0-NEXT:    br i1 [[TOBOOL686]], label [[IF_THEN687:%.*]], label [[IF_END688:%.*]]
+// SIMD-ONLY0:       if.then687:
+// SIMD-ONLY0-NEXT:    [[TMP429:%.*]] = load i8, ptr [[CD]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP429]], ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    br label [[IF_END688]]
+// SIMD-ONLY0:       if.end688:
+// SIMD-ONLY0-NEXT:    [[TMP430:%.*]] = load i8, ptr [[CE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV689:%.*]] = sext i8 [[TMP430]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP431:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV690:%.*]] = sext i8 [[TMP431]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP691:%.*]] = icmp eq i32 [[CONV689]], [[CONV690]]
+// SIMD-ONLY0-NEXT:    [[CONV692:%.*]] = zext i1 [[CMP691]] to i32
+// SIMD-ONLY0-NEXT:    [[CONV693:%.*]] = trunc i32 [[CONV692]] to i8
+// SIMD-ONLY0-NEXT:    store i8 [[CONV693]], ptr [[CR]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP432:%.*]] = load i8, ptr [[CR]], align 1
+// SIMD-ONLY0-NEXT:    [[TOBOOL694:%.*]] = icmp ne i8 [[TMP432]], 0
+// SIMD-ONLY0-NEXT:    br i1 [[TOBOOL694]], label [[IF_THEN695:%.*]], label [[IF_END696:%.*]]
+// SIMD-ONLY0:       if.then695:
+// SIMD-ONLY0-NEXT:    [[TMP433:%.*]] = load i8, ptr [[CD]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP433]], ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    br label [[IF_END696]]
+// SIMD-ONLY0:       if.end696:
+// SIMD-ONLY0-NEXT:    [[TMP434:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV697:%.*]] = sext i8 [[TMP434]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP435:%.*]] = load i8, ptr [[CE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV698:%.*]] = sext i8 [[TMP435]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP699:%.*]] = icmp eq i32 [[CONV697]], [[CONV698]]
+// SIMD-ONLY0-NEXT:    [[CONV700:%.*]] = zext i1 [[CMP699]] to i32
+// SIMD-ONLY0-NEXT:    [[CONV701:%.*]] = trunc i32 [[CONV700]] to i8
+// SIMD-ONLY0-NEXT:    store i8 [[CONV701]], ptr [[CR]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP436:%.*]] = load i8, ptr [[CR]], align 1
+// SIMD-ONLY0-NEXT:    [[TOBOOL702:%.*]] = icmp ne i8 [[TMP436]], 0
+// SIMD-ONLY0-NEXT:    br i1 [[TOBOOL702]], label [[IF_THEN703:%.*]], label [[IF_ELSE704:%.*]]
+// SIMD-ONLY0:       if.then703:
+// SIMD-ONLY0-NEXT:    [[TMP437:%.*]] = load i8, ptr [[CD]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP437]], ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    br label [[IF_END705:%.*]]
+// SIMD-ONLY0:       if.else704:
+// SIMD-ONLY0-NEXT:    [[TMP438:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP438]], ptr [[CV]], align 1
+// SIMD-ONLY0-NEXT:    br label [[IF_END705]]
+// SIMD-ONLY0:       if.end705:
+// SIMD-ONLY0-NEXT:    [[TMP439:%.*]] = load i8, ptr [[CE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV706:%.*]] = sext i8 [[TMP439]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP440:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV707:%.*]] = sext i8 [[TMP440]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP708:%.*]] = icmp eq i32 [[CONV706]], [[CONV707]]
+// SIMD-ONLY0-NEXT:    [[CONV709:%.*]] = zext i1 [[CMP708]] to i32
+// SIMD-ONLY0-NEXT:    [[CONV710:%.*]] = trunc i32 [[CONV709]] to i8
+// SIMD-ONLY0-NEXT:    store i8 [[CONV710]], ptr [[CR]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP441:%.*]] = load i8, ptr [[CR]], align 1
+// SIMD-ONLY0-NEXT:    [[TOBOOL711:%.*]] = icmp ne i8 [[TMP441]], 0
+// SIMD-ONLY0-NEXT:    br i1 [[TOBOOL711]], label [[IF_THEN712:%.*]], label [[IF_ELSE713:%.*]]
+// SIMD-ONLY0:       if.then712:
+// SIMD-ONLY0-NEXT:    [[TMP442:%.*]] = load i8, ptr [[CD]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP442]], ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    br label [[IF_END714:%.*]]
+// SIMD-ONLY0:       if.else713:
+// SIMD-ONLY0-NEXT:    [[TMP443:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP443]], ptr [[CV]], align 1
+// SIMD-ONLY0-NEXT:    br label [[IF_END714]]
+// SIMD-ONLY0:       if.end714:
+// SIMD-ONLY0-NEXT:    [[TMP444:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP444]], ptr [[UCV]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP445:%.*]] = load i8, ptr [[UCE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV715:%.*]] = zext i8 [[TMP445]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP446:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV716:%.*]] = zext i8 [[TMP446]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP717:%.*]] = icmp sgt i32 [[CONV715]], [[CONV716]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP717]], label [[IF_THEN719:%.*]], label [[IF_END720:%.*]]
+// SIMD-ONLY0:       if.then719:
+// SIMD-ONLY0-NEXT:    [[TMP447:%.*]] = load i8, ptr [[UCE]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP447]], ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    br label [[IF_END720]]
+// SIMD-ONLY0:       if.end720:
+// SIMD-ONLY0-NEXT:    [[TMP448:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP448]], ptr [[UCV]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP449:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV721:%.*]] = zext i8 [[TMP449]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP450:%.*]] = load i8, ptr [[UCE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV722:%.*]] = zext i8 [[TMP450]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP723:%.*]] = icmp sgt i32 [[CONV721]], [[CONV722]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP723]], label [[IF_THEN725:%.*]], label [[IF_END726:%.*]]
+// SIMD-ONLY0:       if.then725:
+// SIMD-ONLY0-NEXT:    [[TMP451:%.*]] = load i8, ptr [[UCE]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP451]], ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    br label [[IF_END726]]
+// SIMD-ONLY0:       if.end726:
+// SIMD-ONLY0-NEXT:    [[TMP452:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP452]], ptr [[UCV]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP453:%.*]] = load i8, ptr [[UCE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV727:%.*]] = zext i8 [[TMP453]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP454:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV728:%.*]] = zext i8 [[TMP454]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP729:%.*]] = icmp slt i32 [[CONV727]], [[CONV728]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP729]], label [[IF_THEN731:%.*]], label [[IF_END732:%.*]]
+// SIMD-ONLY0:       if.then731:
+// SIMD-ONLY0-NEXT:    [[TMP455:%.*]] = load i8, ptr [[UCE]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP455]], ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    br label [[IF_END732]]
+// SIMD-ONLY0:       if.end732:
+// SIMD-ONLY0-NEXT:    [[TMP456:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP456]], ptr [[UCV]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP457:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV733:%.*]] = zext i8 [[TMP457]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP458:%.*]] = load i8, ptr [[UCE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV734:%.*]] = zext i8 [[TMP458]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP735:%.*]] = icmp slt i32 [[CONV733]], [[CONV734]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP735]], label [[IF_THEN737:%.*]], label [[IF_END738:%.*]]
+// SIMD-ONLY0:       if.then737:
+// SIMD-ONLY0-NEXT:    [[TMP459:%.*]] = load i8, ptr [[UCE]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP459]], ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    br label [[IF_END738]]
+// SIMD-ONLY0:       if.end738:
+// SIMD-ONLY0-NEXT:    [[TMP460:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP460]], ptr [[UCV]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP461:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV739:%.*]] = zext i8 [[TMP461]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP462:%.*]] = load i8, ptr [[UCE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV740:%.*]] = zext i8 [[TMP462]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP741:%.*]] = icmp eq i32 [[CONV739]], [[CONV740]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP741]], label [[IF_THEN743:%.*]], label [[IF_END744:%.*]]
+// SIMD-ONLY0:       if.then743:
+// SIMD-ONLY0-NEXT:    [[TMP463:%.*]] = load i8, ptr [[UCD]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP463]], ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    br label [[IF_END744]]
+// SIMD-ONLY0:       if.end744:
+// SIMD-ONLY0-NEXT:    [[TMP464:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP464]], ptr [[UCV]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP465:%.*]] = load i8, ptr [[UCE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV745:%.*]] = zext i8 [[TMP465]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP466:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV746:%.*]] = zext i8 [[TMP466]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP747:%.*]] = icmp eq i32 [[CONV745]], [[CONV746]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP747]], label [[IF_THEN749:%.*]], label [[IF_END750:%.*]]
+// SIMD-ONLY0:       if.then749:
+// SIMD-ONLY0-NEXT:    [[TMP467:%.*]] = load i8, ptr [[UCD]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP467]], ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    br label [[IF_END750]]
+// SIMD-ONLY0:       if.end750:
+// SIMD-ONLY0-NEXT:    [[TMP468:%.*]] = load i8, ptr [[UCE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV751:%.*]] = zext i8 [[TMP468]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP469:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV752:%.*]] = zext i8 [[TMP469]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP753:%.*]] = icmp sgt i32 [[CONV751]], [[CONV752]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP753]], label [[IF_THEN755:%.*]], label [[IF_END756:%.*]]
+// SIMD-ONLY0:       if.then755:
+// SIMD-ONLY0-NEXT:    [[TMP470:%.*]] = load i8, ptr [[UCE]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP470]], ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    br label [[IF_END756]]
+// SIMD-ONLY0:       if.end756:
+// SIMD-ONLY0-NEXT:    [[TMP471:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP471]], ptr [[UCV]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP472:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV757:%.*]] = zext i8 [[TMP472]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP473:%.*]] = load i8, ptr [[UCE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV758:%.*]] = zext i8 [[TMP473]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP759:%.*]] = icmp sgt i32 [[CONV757]], [[CONV758]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP759]], label [[IF_THEN761:%.*]], label [[IF_END762:%.*]]
+// SIMD-ONLY0:       if.then761:
+// SIMD-ONLY0-NEXT:    [[TMP474:%.*]] = load i8, ptr [[UCE]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP474]], ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    br label [[IF_END762]]
+// SIMD-ONLY0:       if.end762:
+// SIMD-ONLY0-NEXT:    [[TMP475:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP475]], ptr [[UCV]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP476:%.*]] = load i8, ptr [[UCE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV763:%.*]] = zext i8 [[TMP476]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP477:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV764:%.*]] = zext i8 [[TMP477]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP765:%.*]] = icmp slt i32 [[CONV763]], [[CONV764]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP765]], label [[IF_THEN767:%.*]], label [[IF_END768:%.*]]
+// SIMD-ONLY0:       if.then767:
+// SIMD-ONLY0-NEXT:    [[TMP478:%.*]] = load i8, ptr [[UCE]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP478]], ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    br label [[IF_END768]]
+// SIMD-ONLY0:       if.end768:
+// SIMD-ONLY0-NEXT:    [[TMP479:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP479]], ptr [[UCV]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP480:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV769:%.*]] = zext i8 [[TMP480]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP481:%.*]] = load i8, ptr [[UCE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV770:%.*]] = zext i8 [[TMP481]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP771:%.*]] = icmp slt i32 [[CONV769]], [[CONV770]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP771]], label [[IF_THEN773:%.*]], label [[IF_END774:%.*]]
+// SIMD-ONLY0:       if.then773:
+// SIMD-ONLY0-NEXT:    [[TMP482:%.*]] = load i8, ptr [[UCE]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP482]], ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    br label [[IF_END774]]
+// SIMD-ONLY0:       if.end774:
+// SIMD-ONLY0-NEXT:    [[TMP483:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP483]], ptr [[UCV]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP484:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV775:%.*]] = zext i8 [[TMP484]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP485:%.*]] = load i8, ptr [[UCE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV776:%.*]] = zext i8 [[TMP485]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP777:%.*]] = icmp eq i32 [[CONV775]], [[CONV776]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP777]], label [[IF_THEN779:%.*]], label [[IF_END780:%.*]]
+// SIMD-ONLY0:       if.then779:
+// SIMD-ONLY0-NEXT:    [[TMP486:%.*]] = load i8, ptr [[UCD]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP486]], ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    br label [[IF_END780]]
+// SIMD-ONLY0:       if.end780:
+// SIMD-ONLY0-NEXT:    [[TMP487:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP487]], ptr [[UCV]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP488:%.*]] = load i8, ptr [[UCE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV781:%.*]] = zext i8 [[TMP488]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP489:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV782:%.*]] = zext i8 [[TMP489]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP783:%.*]] = icmp eq i32 [[CONV781]], [[CONV782]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP783]], label [[IF_THEN785:%.*]], label [[IF_END786:%.*]]
+// SIMD-ONLY0:       if.then785:
+// SIMD-ONLY0-NEXT:    [[TMP490:%.*]] = load i8, ptr [[UCD]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP490]], ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    br label [[IF_END786]]
+// SIMD-ONLY0:       if.end786:
+// SIMD-ONLY0-NEXT:    [[TMP491:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP491]], ptr [[UCV]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP492:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV787:%.*]] = zext i8 [[TMP492]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP493:%.*]] = load i8, ptr [[UCE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV788:%.*]] = zext i8 [[TMP493]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP789:%.*]] = icmp eq i32 [[CONV787]], [[CONV788]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP789]], label [[IF_THEN791:%.*]], label [[IF_ELSE792:%.*]]
+// SIMD-ONLY0:       if.then791:
+// SIMD-ONLY0-NEXT:    [[TMP494:%.*]] = load i8, ptr [[UCD]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP494]], ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    br label [[IF_END793:%.*]]
+// SIMD-ONLY0:       if.else792:
+// SIMD-ONLY0-NEXT:    [[TMP495:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP495]], ptr [[UCV]], align 1
+// SIMD-ONLY0-NEXT:    br label [[IF_END793]]
+// SIMD-ONLY0:       if.end793:
+// SIMD-ONLY0-NEXT:    [[TMP496:%.*]] = load i8, ptr [[UCE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV794:%.*]] = zext i8 [[TMP496]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP497:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV795:%.*]] = zext i8 [[TMP497]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP796:%.*]] = icmp eq i32 [[CONV794]], [[CONV795]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP796]], label [[IF_THEN798:%.*]], label [[IF_ELSE799:%.*]]
+// SIMD-ONLY0:       if.then798:
+// SIMD-ONLY0-NEXT:    [[TMP498:%.*]] = load i8, ptr [[UCD]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP498]], ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    br label [[IF_END800:%.*]]
+// SIMD-ONLY0:       if.else799:
+// SIMD-ONLY0-NEXT:    [[TMP499:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP499]], ptr [[UCV]], align 1
+// SIMD-ONLY0-NEXT:    br label [[IF_END800]]
+// SIMD-ONLY0:       if.end800:
+// SIMD-ONLY0-NEXT:    [[TMP500:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV801:%.*]] = zext i8 [[TMP500]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP501:%.*]] = load i8, ptr [[UCE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV802:%.*]] = zext i8 [[TMP501]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP803:%.*]] = icmp eq i32 [[CONV801]], [[CONV802]]
+// SIMD-ONLY0-NEXT:    [[CONV804:%.*]] = zext i1 [[CMP803]] to i32
+// SIMD-ONLY0-NEXT:    [[CONV805:%.*]] = trunc i32 [[CONV804]] to i8
+// SIMD-ONLY0-NEXT:    store i8 [[CONV805]], ptr [[UCR]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP502:%.*]] = load i8, ptr [[UCR]], align 1
+// SIMD-ONLY0-NEXT:    [[TOBOOL806:%.*]] = icmp ne i8 [[TMP502]], 0
+// SIMD-ONLY0-NEXT:    br i1 [[TOBOOL806]], label [[IF_THEN807:%.*]], label [[IF_END808:%.*]]
+// SIMD-ONLY0:       if.then807:
+// SIMD-ONLY0-NEXT:    [[TMP503:%.*]] = load i8, ptr [[UCD]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP503]], ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    br label [[IF_END808]]
+// SIMD-ONLY0:       if.end808:
+// SIMD-ONLY0-NEXT:    [[TMP504:%.*]] = load i8, ptr [[UCE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV809:%.*]] = zext i8 [[TMP504]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP505:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV810:%.*]] = zext i8 [[TMP505]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP811:%.*]] = icmp eq i32 [[CONV809]], [[CONV810]]
+// SIMD-ONLY0-NEXT:    [[CONV812:%.*]] = zext i1 [[CMP811]] to i32
+// SIMD-ONLY0-NEXT:    [[CONV813:%.*]] = trunc i32 [[CONV812]] to i8
+// SIMD-ONLY0-NEXT:    store i8 [[CONV813]], ptr [[UCR]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP506:%.*]] = load i8, ptr [[UCR]], align 1
+// SIMD-ONLY0-NEXT:    [[TOBOOL814:%.*]] = icmp ne i8 [[TMP506]], 0
+// SIMD-ONLY0-NEXT:    br i1 [[TOBOOL814]], label [[IF_THEN815:%.*]], label [[IF_END816:%.*]]
+// SIMD-ONLY0:       if.then815:
+// SIMD-ONLY0-NEXT:    [[TMP507:%.*]] = load i8, ptr [[UCD]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP507]], ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    br label [[IF_END816]]
+// SIMD-ONLY0:       if.end816:
+// SIMD-ONLY0-NEXT:    [[TMP508:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV817:%.*]] = zext i8 [[TMP508]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP509:%.*]] = load i8, ptr [[UCE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV818:%.*]] = zext i8 [[TMP509]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP819:%.*]] = icmp eq i32 [[CONV817]], [[CONV818]]
+// SIMD-ONLY0-NEXT:    [[CONV820:%.*]] = zext i1 [[CMP819]] to i32
+// SIMD-ONLY0-NEXT:    [[CONV821:%.*]] = trunc i32 [[CONV820]] to i8
+// SIMD-ONLY0-NEXT:    store i8 [[CONV821]], ptr [[UCR]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP510:%.*]] = load i8, ptr [[UCR]], align 1
+// SIMD-ONLY0-NEXT:    [[TOBOOL822:%.*]] = icmp ne i8 [[TMP510]], 0
+// SIMD-ONLY0-NEXT:    br i1 [[TOBOOL822]], label [[IF_THEN823:%.*]], label [[IF_ELSE824:%.*]]
+// SIMD-ONLY0:       if.then823:
+// SIMD-ONLY0-NEXT:    [[TMP511:%.*]] = load i8, ptr [[UCD]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP511]], ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    br label [[IF_END825:%.*]]
+// SIMD-ONLY0:       if.else824:
+// SIMD-ONLY0-NEXT:    [[TMP512:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP512]], ptr [[UCV]], align 1
+// SIMD-ONLY0-NEXT:    br label [[IF_END825]]
+// SIMD-ONLY0:       if.end825:
+// SIMD-ONLY0-NEXT:    [[TMP513:%.*]] = load i8, ptr [[UCE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV826:%.*]] = zext i8 [[TMP513]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP514:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV827:%.*]] = zext i8 [[TMP514]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP828:%.*]] = icmp eq i32 [[CONV826]], [[CONV827]]
+// SIMD-ONLY0-NEXT:    [[CONV829:%.*]] = zext i1 [[CMP828]] to i32
+// SIMD-ONLY0-NEXT:    [[CONV830:%.*]] = trunc i32 [[CONV829]] to i8
+// SIMD-ONLY0-NEXT:    store i8 [[CONV830]], ptr [[UCR]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP515:%.*]] = load i8, ptr [[UCR]], align 1
+// SIMD-ONLY0-NEXT:    [[TOBOOL831:%.*]] = icmp ne i8 [[TMP515]], 0
+// SIMD-ONLY0-NEXT:    br i1 [[TOBOOL831]], label [[IF_THEN832:%.*]], label [[IF_ELSE833:%.*]]
+// SIMD-ONLY0:       if.then832:
+// SIMD-ONLY0-NEXT:    [[TMP516:%.*]] = load i8, ptr [[UCD]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP516]], ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    br label [[IF_END834:%.*]]
+// SIMD-ONLY0:       if.else833:
+// SIMD-ONLY0-NEXT:    [[TMP517:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP517]], ptr [[UCV]], align 1
+// SIMD-ONLY0-NEXT:    br label [[IF_END834]]
+// SIMD-ONLY0:       if.end834:
+// SIMD-ONLY0-NEXT:    [[TMP518:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP518]], ptr [[UCV]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP519:%.*]] = load i8, ptr [[UCE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV835:%.*]] = zext i8 [[TMP519]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP520:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV836:%.*]] = zext i8 [[TMP520]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP837:%.*]] = icmp sgt i32 [[CONV835]], [[CONV836]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP837]], label [[IF_THEN839:%.*]], label [[IF_END840:%.*]]
+// SIMD-ONLY0:       if.then839:
+// SIMD-ONLY0-NEXT:    [[TMP521:%.*]] = load i8, ptr [[UCE]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP521]], ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    br label [[IF_END840]]
+// SIMD-ONLY0:       if.end840:
+// SIMD-ONLY0-NEXT:    [[TMP522:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP522]], ptr [[UCV]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP523:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV841:%.*]] = zext i8 [[TMP523]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP524:%.*]] = load i8, ptr [[UCE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV842:%.*]] = zext i8 [[TMP524]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP843:%.*]] = icmp sgt i32 [[CONV841]], [[CONV842]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP843]], label [[IF_THEN845:%.*]], label [[IF_END846:%.*]]
+// SIMD-ONLY0:       if.then845:
+// SIMD-ONLY0-NEXT:    [[TMP525:%.*]] = load i8, ptr [[UCE]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP525]], ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    br label [[IF_END846]]
+// SIMD-ONLY0:       if.end846:
+// SIMD-ONLY0-NEXT:    [[TMP526:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP526]], ptr [[UCV]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP527:%.*]] = load i8, ptr [[UCE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV847:%.*]] = zext i8 [[TMP527]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP528:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV848:%.*]] = zext i8 [[TMP528]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP849:%.*]] = icmp slt i32 [[CONV847]], [[CONV848]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP849]], label [[IF_THEN851:%.*]], label [[IF_END852:%.*]]
+// SIMD-ONLY0:       if.then851:
+// SIMD-ONLY0-NEXT:    [[TMP529:%.*]] = load i8, ptr [[UCE]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP529]], ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    br label [[IF_END852]]
+// SIMD-ONLY0:       if.end852:
+// SIMD-ONLY0-NEXT:    [[TMP530:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP530]], ptr [[UCV]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP531:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV853:%.*]] = zext i8 [[TMP531]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP532:%.*]] = load i8, ptr [[UCE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV854:%.*]] = zext i8 [[TMP532]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP855:%.*]] = icmp slt i32 [[CONV853]], [[CONV854]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP855]], label [[IF_THEN857:%.*]], label [[IF_END858:%.*]]
+// SIMD-ONLY0:       if.then857:
+// SIMD-ONLY0-NEXT:    [[TMP533:%.*]] = load i8, ptr [[UCE]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP533]], ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    br label [[IF_END858]]
+// SIMD-ONLY0:       if.end858:
+// SIMD-ONLY0-NEXT:    [[TMP534:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP534]], ptr [[UCV]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP535:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV859:%.*]] = zext i8 [[TMP535]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP536:%.*]] = load i8, ptr [[UCE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV860:%.*]] = zext i8 [[TMP536]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP861:%.*]] = icmp eq i32 [[CONV859]], [[CONV860]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP861]], label [[IF_THEN863:%.*]], label [[IF_END864:%.*]]
+// SIMD-ONLY0:       if.then863:
+// SIMD-ONLY0-NEXT:    [[TMP537:%.*]] = load i8, ptr [[UCD]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP537]], ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    br label [[IF_END864]]
+// SIMD-ONLY0:       if.end864:
+// SIMD-ONLY0-NEXT:    [[TMP538:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP538]], ptr [[UCV]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP539:%.*]] = load i8, ptr [[UCE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV865:%.*]] = zext i8 [[TMP539]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP540:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV866:%.*]] = zext i8 [[TMP540]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP867:%.*]] = icmp eq i32 [[CONV865]], [[CONV866]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP867]], label [[IF_THEN869:%.*]], label [[IF_END870:%.*]]
+// SIMD-ONLY0:       if.then869:
+// SIMD-ONLY0-NEXT:    [[TMP541:%.*]] = load i8, ptr [[UCD]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP541]], ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    br label [[IF_END870]]
+// SIMD-ONLY0:       if.end870:
+// SIMD-ONLY0-NEXT:    [[TMP542:%.*]] = load i8, ptr [[UCE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV871:%.*]] = zext i8 [[TMP542]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP543:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV872:%.*]] = zext i8 [[TMP543]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP873:%.*]] = icmp sgt i32 [[CONV871]], [[CONV872]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP873]], label [[IF_THEN875:%.*]], label [[IF_END876:%.*]]
+// SIMD-ONLY0:       if.then875:
+// SIMD-ONLY0-NEXT:    [[TMP544:%.*]] = load i8, ptr [[UCE]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP544]], ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    br label [[IF_END876]]
+// SIMD-ONLY0:       if.end876:
+// SIMD-ONLY0-NEXT:    [[TMP545:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP545]], ptr [[UCV]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP546:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV877:%.*]] = zext i8 [[TMP546]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP547:%.*]] = load i8, ptr [[UCE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV878:%.*]] = zext i8 [[TMP547]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP879:%.*]] = icmp sgt i32 [[CONV877]], [[CONV878]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP879]], label [[IF_THEN881:%.*]], label [[IF_END882:%.*]]
+// SIMD-ONLY0:       if.then881:
+// SIMD-ONLY0-NEXT:    [[TMP548:%.*]] = load i8, ptr [[UCE]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP548]], ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    br label [[IF_END882]]
+// SIMD-ONLY0:       if.end882:
+// SIMD-ONLY0-NEXT:    [[TMP549:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP549]], ptr [[UCV]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP550:%.*]] = load i8, ptr [[UCE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV883:%.*]] = zext i8 [[TMP550]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP551:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV884:%.*]] = zext i8 [[TMP551]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP885:%.*]] = icmp slt i32 [[CONV883]], [[CONV884]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP885]], label [[IF_THEN887:%.*]], label [[IF_END888:%.*]]
+// SIMD-ONLY0:       if.then887:
+// SIMD-ONLY0-NEXT:    [[TMP552:%.*]] = load i8, ptr [[UCE]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP552]], ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    br label [[IF_END888]]
+// SIMD-ONLY0:       if.end888:
+// SIMD-ONLY0-NEXT:    [[TMP553:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP553]], ptr [[UCV]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP554:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV889:%.*]] = zext i8 [[TMP554]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP555:%.*]] = load i8, ptr [[UCE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV890:%.*]] = zext i8 [[TMP555]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP891:%.*]] = icmp slt i32 [[CONV889]], [[CONV890]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP891]], label [[IF_THEN893:%.*]], label [[IF_END894:%.*]]
+// SIMD-ONLY0:       if.then893:
+// SIMD-ONLY0-NEXT:    [[TMP556:%.*]] = load i8, ptr [[UCE]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP556]], ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    br label [[IF_END894]]
+// SIMD-ONLY0:       if.end894:
+// SIMD-ONLY0-NEXT:    [[TMP557:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP557]], ptr [[UCV]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP558:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV895:%.*]] = zext i8 [[TMP558]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP559:%.*]] = load i8, ptr [[UCE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV896:%.*]] = zext i8 [[TMP559]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP897:%.*]] = icmp eq i32 [[CONV895]], [[CONV896]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP897]], label [[IF_THEN899:%.*]], label [[IF_END900:%.*]]
+// SIMD-ONLY0:       if.then899:
+// SIMD-ONLY0-NEXT:    [[TMP560:%.*]] = load i8, ptr [[UCD]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP560]], ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    br label [[IF_END900]]
+// SIMD-ONLY0:       if.end900:
+// SIMD-ONLY0-NEXT:    [[TMP561:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP561]], ptr [[UCV]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP562:%.*]] = load i8, ptr [[UCE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV901:%.*]] = zext i8 [[TMP562]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP563:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV902:%.*]] = zext i8 [[TMP563]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP903:%.*]] = icmp eq i32 [[CONV901]], [[CONV902]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP903]], label [[IF_THEN905:%.*]], label [[IF_END906:%.*]]
+// SIMD-ONLY0:       if.then905:
+// SIMD-ONLY0-NEXT:    [[TMP564:%.*]] = load i8, ptr [[UCD]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP564]], ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    br label [[IF_END906]]
+// SIMD-ONLY0:       if.end906:
+// SIMD-ONLY0-NEXT:    [[TMP565:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP565]], ptr [[UCV]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP566:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV907:%.*]] = zext i8 [[TMP566]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP567:%.*]] = load i8, ptr [[UCE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV908:%.*]] = zext i8 [[TMP567]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP909:%.*]] = icmp eq i32 [[CONV907]], [[CONV908]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP909]], label [[IF_THEN911:%.*]], label [[IF_ELSE912:%.*]]
+// SIMD-ONLY0:       if.then911:
+// SIMD-ONLY0-NEXT:    [[TMP568:%.*]] = load i8, ptr [[UCD]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP568]], ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    br label [[IF_END913:%.*]]
+// SIMD-ONLY0:       if.else912:
+// SIMD-ONLY0-NEXT:    [[TMP569:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP569]], ptr [[UCV]], align 1
+// SIMD-ONLY0-NEXT:    br label [[IF_END913]]
+// SIMD-ONLY0:       if.end913:
+// SIMD-ONLY0-NEXT:    [[TMP570:%.*]] = load i8, ptr [[UCE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV914:%.*]] = zext i8 [[TMP570]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP571:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV915:%.*]] = zext i8 [[TMP571]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP916:%.*]] = icmp eq i32 [[CONV914]], [[CONV915]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP916]], label [[IF_THEN918:%.*]], label [[IF_ELSE919:%.*]]
+// SIMD-ONLY0:       if.then918:
+// SIMD-ONLY0-NEXT:    [[TMP572:%.*]] = load i8, ptr [[UCD]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP572]], ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    br label [[IF_END920:%.*]]
+// SIMD-ONLY0:       if.else919:
+// SIMD-ONLY0-NEXT:    [[TMP573:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP573]], ptr [[UCV]], align 1
+// SIMD-ONLY0-NEXT:    br label [[IF_END920]]
+// SIMD-ONLY0:       if.end920:
+// SIMD-ONLY0-NEXT:    [[TMP574:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV921:%.*]] = zext i8 [[TMP574]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP575:%.*]] = load i8, ptr [[UCE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV922:%.*]] = zext i8 [[TMP575]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP923:%.*]] = icmp eq i32 [[CONV921]], [[CONV922]]
+// SIMD-ONLY0-NEXT:    [[CONV924:%.*]] = zext i1 [[CMP923]] to i32
+// SIMD-ONLY0-NEXT:    [[CONV925:%.*]] = trunc i32 [[CONV924]] to i8
+// SIMD-ONLY0-NEXT:    store i8 [[CONV925]], ptr [[UCR]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP576:%.*]] = load i8, ptr [[UCR]], align 1
+// SIMD-ONLY0-NEXT:    [[TOBOOL926:%.*]] = icmp ne i8 [[TMP576]], 0
+// SIMD-ONLY0-NEXT:    br i1 [[TOBOOL926]], label [[IF_THEN927:%.*]], label [[IF_END928:%.*]]
+// SIMD-ONLY0:       if.then927:
+// SIMD-ONLY0-NEXT:    [[TMP577:%.*]] = load i8, ptr [[UCD]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP577]], ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    br label [[IF_END928]]
+// SIMD-ONLY0:       if.end928:
+// SIMD-ONLY0-NEXT:    [[TMP578:%.*]] = load i8, ptr [[UCE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV929:%.*]] = zext i8 [[TMP578]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP579:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV930:%.*]] = zext i8 [[TMP579]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP931:%.*]] = icmp eq i32 [[CONV929]], [[CONV930]]
+// SIMD-ONLY0-NEXT:    [[CONV932:%.*]] = zext i1 [[CMP931]] to i32
+// SIMD-ONLY0-NEXT:    [[CONV933:%.*]] = trunc i32 [[CONV932]] to i8
+// SIMD-ONLY0-NEXT:    store i8 [[CONV933]], ptr [[UCR]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP580:%.*]] = load i8, ptr [[UCR]], align 1
+// SIMD-ONLY0-NEXT:    [[TOBOOL934:%.*]] = icmp ne i8 [[TMP580]], 0
+// SIMD-ONLY0-NEXT:    br i1 [[TOBOOL934]], label [[IF_THEN935:%.*]], label [[IF_END936:%.*]]
+// SIMD-ONLY0:       if.then935:
+// SIMD-ONLY0-NEXT:    [[TMP581:%.*]] = load i8, ptr [[UCD]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP581]], ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    br label [[IF_END936]]
+// SIMD-ONLY0:       if.end936:
+// SIMD-ONLY0-NEXT:    [[TMP582:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV937:%.*]] = zext i8 [[TMP582]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP583:%.*]] = load i8, ptr [[UCE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV938:%.*]] = zext i8 [[TMP583]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP939:%.*]] = icmp eq i32 [[CONV937]], [[CONV938]]
+// SIMD-ONLY0-NEXT:    [[CONV940:%.*]] = zext i1 [[CMP939]] to i32
+// SIMD-ONLY0-NEXT:    [[CONV941:%.*]] = trunc i32 [[CONV940]] to i8
+// SIMD-ONLY0-NEXT:    store i8 [[CONV941]], ptr [[UCR]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP584:%.*]] = load i8, ptr [[UCR]], align 1
+// SIMD-ONLY0-NEXT:    [[TOBOOL942:%.*]] = icmp ne i8 [[TMP584]], 0
+// SIMD-ONLY0-NEXT:    br i1 [[TOBOOL942]], label [[IF_THEN943:%.*]], label [[IF_ELSE944:%.*]]
+// SIMD-ONLY0:       if.then943:
+// SIMD-ONLY0-NEXT:    [[TMP585:%.*]] = load i8, ptr [[UCD]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP585]], ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    br label [[IF_END945:%.*]]
+// SIMD-ONLY0:       if.else944:
+// SIMD-ONLY0-NEXT:    [[TMP586:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP586]], ptr [[UCV]], align 1
+// SIMD-ONLY0-NEXT:    br label [[IF_END945]]
+// SIMD-ONLY0:       if.end945:
+// SIMD-ONLY0-NEXT:    [[TMP587:%.*]] = load i8, ptr [[UCE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV946:%.*]] = zext i8 [[TMP587]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP588:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV947:%.*]] = zext i8 [[TMP588]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP948:%.*]] = icmp eq i32 [[CONV946]], [[CONV947]]
+// SIMD-ONLY0-NEXT:    [[CONV949:%.*]] = zext i1 [[CMP948]] to i32
+// SIMD-ONLY0-NEXT:    [[CONV950:%.*]] = trunc i32 [[CONV949]] to i8
+// SIMD-ONLY0-NEXT:    store i8 [[CONV950]], ptr [[UCR]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP589:%.*]] = load i8, ptr [[UCR]], align 1
+// SIMD-ONLY0-NEXT:    [[TOBOOL951:%.*]] = icmp ne i8 [[TMP589]], 0
+// SIMD-ONLY0-NEXT:    br i1 [[TOBOOL951]], label [[IF_THEN952:%.*]], label [[IF_ELSE953:%.*]]
+// SIMD-ONLY0:       if.then952:
+// SIMD-ONLY0-NEXT:    [[TMP590:%.*]] = load i8, ptr [[UCD]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP590]], ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    br label [[IF_END954:%.*]]
+// SIMD-ONLY0:       if.else953:
+// SIMD-ONLY0-NEXT:    [[TMP591:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP591]], ptr [[UCV]], align 1
+// SIMD-ONLY0-NEXT:    br label [[IF_END954]]
+// SIMD-ONLY0:       if.end954:
+// SIMD-ONLY0-NEXT:    [[TMP592:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP592]], ptr [[UCV]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP593:%.*]] = load i8, ptr [[UCE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV955:%.*]] = zext i8 [[TMP593]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP594:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV956:%.*]] = zext i8 [[TMP594]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP957:%.*]] = icmp sgt i32 [[CONV955]], [[CONV956]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP957]], label [[IF_THEN959:%.*]], label [[IF_END960:%.*]]
+// SIMD-ONLY0:       if.then959:
+// SIMD-ONLY0-NEXT:    [[TMP595:%.*]] = load i8, ptr [[UCE]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP595]], ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    br label [[IF_END960]]
+// SIMD-ONLY0:       if.end960:
+// SIMD-ONLY0-NEXT:    [[TMP596:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP596]], ptr [[UCV]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP597:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV961:%.*]] = zext i8 [[TMP597]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP598:%.*]] = load i8, ptr [[UCE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV962:%.*]] = zext i8 [[TMP598]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP963:%.*]] = icmp sgt i32 [[CONV961]], [[CONV962]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP963]], label [[IF_THEN965:%.*]], label [[IF_END966:%.*]]
+// SIMD-ONLY0:       if.then965:
+// SIMD-ONLY0-NEXT:    [[TMP599:%.*]] = load i8, ptr [[UCE]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP599]], ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    br label [[IF_END966]]
+// SIMD-ONLY0:       if.end966:
+// SIMD-ONLY0-NEXT:    [[TMP600:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP600]], ptr [[UCV]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP601:%.*]] = load i8, ptr [[UCE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV967:%.*]] = zext i8 [[TMP601]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP602:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV968:%.*]] = zext i8 [[TMP602]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP969:%.*]] = icmp slt i32 [[CONV967]], [[CONV968]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP969]], label [[IF_THEN971:%.*]], label [[IF_END972:%.*]]
+// SIMD-ONLY0:       if.then971:
+// SIMD-ONLY0-NEXT:    [[TMP603:%.*]] = load i8, ptr [[UCE]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP603]], ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    br label [[IF_END972]]
+// SIMD-ONLY0:       if.end972:
+// SIMD-ONLY0-NEXT:    [[TMP604:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP604]], ptr [[UCV]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP605:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV973:%.*]] = zext i8 [[TMP605]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP606:%.*]] = load i8, ptr [[UCE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV974:%.*]] = zext i8 [[TMP606]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP975:%.*]] = icmp slt i32 [[CONV973]], [[CONV974]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP975]], label [[IF_THEN977:%.*]], label [[IF_END978:%.*]]
+// SIMD-ONLY0:       if.then977:
+// SIMD-ONLY0-NEXT:    [[TMP607:%.*]] = load i8, ptr [[UCE]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP607]], ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    br label [[IF_END978]]
+// SIMD-ONLY0:       if.end978:
+// SIMD-ONLY0-NEXT:    [[TMP608:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP608]], ptr [[UCV]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP609:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV979:%.*]] = zext i8 [[TMP609]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP610:%.*]] = load i8, ptr [[UCE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV980:%.*]] = zext i8 [[TMP610]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP981:%.*]] = icmp eq i32 [[CONV979]], [[CONV980]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP981]], label [[IF_THEN983:%.*]], label [[IF_END984:%.*]]
+// SIMD-ONLY0:       if.then983:
+// SIMD-ONLY0-NEXT:    [[TMP611:%.*]] = load i8, ptr [[UCD]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP611]], ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    br label [[IF_END984]]
+// SIMD-ONLY0:       if.end984:
+// SIMD-ONLY0-NEXT:    [[TMP612:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP612]], ptr [[UCV]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP613:%.*]] = load i8, ptr [[UCE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV985:%.*]] = zext i8 [[TMP613]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP614:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV986:%.*]] = zext i8 [[TMP614]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP987:%.*]] = icmp eq i32 [[CONV985]], [[CONV986]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP987]], label [[IF_THEN989:%.*]], label [[IF_END990:%.*]]
+// SIMD-ONLY0:       if.then989:
+// SIMD-ONLY0-NEXT:    [[TMP615:%.*]] = load i8, ptr [[UCD]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP615]], ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    br label [[IF_END990]]
+// SIMD-ONLY0:       if.end990:
+// SIMD-ONLY0-NEXT:    [[TMP616:%.*]] = load i8, ptr [[UCE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV991:%.*]] = zext i8 [[TMP616]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP617:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV992:%.*]] = zext i8 [[TMP617]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP993:%.*]] = icmp sgt i32 [[CONV991]], [[CONV992]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP993]], label [[IF_THEN995:%.*]], label [[IF_END996:%.*]]
+// SIMD-ONLY0:       if.then995:
+// SIMD-ONLY0-NEXT:    [[TMP618:%.*]] = load i8, ptr [[UCE]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP618]], ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    br label [[IF_END996]]
+// SIMD-ONLY0:       if.end996:
+// SIMD-ONLY0-NEXT:    [[TMP619:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP619]], ptr [[UCV]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP620:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV997:%.*]] = zext i8 [[TMP620]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP621:%.*]] = load i8, ptr [[UCE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV998:%.*]] = zext i8 [[TMP621]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP999:%.*]] = icmp sgt i32 [[CONV997]], [[CONV998]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP999]], label [[IF_THEN1001:%.*]], label [[IF_END1002:%.*]]
+// SIMD-ONLY0:       if.then1001:
+// SIMD-ONLY0-NEXT:    [[TMP622:%.*]] = load i8, ptr [[UCE]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP622]], ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    br label [[IF_END1002]]
+// SIMD-ONLY0:       if.end1002:
+// SIMD-ONLY0-NEXT:    [[TMP623:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP623]], ptr [[UCV]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP624:%.*]] = load i8, ptr [[UCE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV1003:%.*]] = zext i8 [[TMP624]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP625:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV1004:%.*]] = zext i8 [[TMP625]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP1005:%.*]] = icmp slt i32 [[CONV1003]], [[CONV1004]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP1005]], label [[IF_THEN1007:%.*]], label [[IF_END1008:%.*]]
+// SIMD-ONLY0:       if.then1007:
+// SIMD-ONLY0-NEXT:    [[TMP626:%.*]] = load i8, ptr [[UCE]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP626]], ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    br label [[IF_END1008]]
+// SIMD-ONLY0:       if.end1008:
+// SIMD-ONLY0-NEXT:    [[TMP627:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP627]], ptr [[UCV]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP628:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV1009:%.*]] = zext i8 [[TMP628]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP629:%.*]] = load i8, ptr [[UCE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV1010:%.*]] = zext i8 [[TMP629]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP1011:%.*]] = icmp slt i32 [[CONV1009]], [[CONV1010]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP1011]], label [[IF_THEN1013:%.*]], label [[IF_END1014:%.*]]
+// SIMD-ONLY0:       if.then1013:
+// SIMD-ONLY0-NEXT:    [[TMP630:%.*]] = load i8, ptr [[UCE]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP630]], ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    br label [[IF_END1014]]
+// SIMD-ONLY0:       if.end1014:
+// SIMD-ONLY0-NEXT:    [[TMP631:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP631]], ptr [[UCV]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP632:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV1015:%.*]] = zext i8 [[TMP632]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP633:%.*]] = load i8, ptr [[UCE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV1016:%.*]] = zext i8 [[TMP633]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP1017:%.*]] = icmp eq i32 [[CONV1015]], [[CONV1016]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP1017]], label [[IF_THEN1019:%.*]], label [[IF_END1020:%.*]]
+// SIMD-ONLY0:       if.then1019:
+// SIMD-ONLY0-NEXT:    [[TMP634:%.*]] = load i8, ptr [[UCD]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP634]], ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    br label [[IF_END1020]]
+// SIMD-ONLY0:       if.end1020:
+// SIMD-ONLY0-NEXT:    [[TMP635:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP635]], ptr [[UCV]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP636:%.*]] = load i8, ptr [[UCE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV1021:%.*]] = zext i8 [[TMP636]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP637:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV1022:%.*]] = zext i8 [[TMP637]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP1023:%.*]] = icmp eq i32 [[CONV1021]], [[CONV1022]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP1023]], label [[IF_THEN1025:%.*]], label [[IF_END1026:%.*]]
+// SIMD-ONLY0:       if.then1025:
+// SIMD-ONLY0-NEXT:    [[TMP638:%.*]] = load i8, ptr [[UCD]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP638]], ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    br label [[IF_END1026]]
+// SIMD-ONLY0:       if.end1026:
+// SIMD-ONLY0-NEXT:    [[TMP639:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP639]], ptr [[UCV]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP640:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV1027:%.*]] = zext i8 [[TMP640]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP641:%.*]] = load i8, ptr [[UCE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV1028:%.*]] = zext i8 [[TMP641]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP1029:%.*]] = icmp eq i32 [[CONV1027]], [[CONV1028]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP1029]], label [[IF_THEN1031:%.*]], label [[IF_ELSE1032:%.*]]
+// SIMD-ONLY0:       if.then1031:
+// SIMD-ONLY0-NEXT:    [[TMP642:%.*]] = load i8, ptr [[UCD]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP642]], ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    br label [[IF_END1033:%.*]]
+// SIMD-ONLY0:       if.else1032:
+// SIMD-ONLY0-NEXT:    [[TMP643:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP643]], ptr [[UCV]], align 1
+// SIMD-ONLY0-NEXT:    br label [[IF_END1033]]
+// SIMD-ONLY0:       if.end1033:
+// SIMD-ONLY0-NEXT:    [[TMP644:%.*]] = load i8, ptr [[UCE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV1034:%.*]] = zext i8 [[TMP644]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP645:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV1035:%.*]] = zext i8 [[TMP645]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP1036:%.*]] = icmp eq i32 [[CONV1034]], [[CONV1035]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP1036]], label [[IF_THEN1038:%.*]], label [[IF_ELSE1039:%.*]]
+// SIMD-ONLY0:       if.then1038:
+// SIMD-ONLY0-NEXT:    [[TMP646:%.*]] = load i8, ptr [[UCD]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP646]], ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    br label [[IF_END1040:%.*]]
+// SIMD-ONLY0:       if.else1039:
+// SIMD-ONLY0-NEXT:    [[TMP647:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP647]], ptr [[UCV]], align 1
+// SIMD-ONLY0-NEXT:    br label [[IF_END1040]]
+// SIMD-ONLY0:       if.end1040:
+// SIMD-ONLY0-NEXT:    [[TMP648:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV1041:%.*]] = zext i8 [[TMP648]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP649:%.*]] = load i8, ptr [[UCE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV1042:%.*]] = zext i8 [[TMP649]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP1043:%.*]] = icmp eq i32 [[CONV1041]], [[CONV1042]]
+// SIMD-ONLY0-NEXT:    [[CONV1044:%.*]] = zext i1 [[CMP1043]] to i32
+// SIMD-ONLY0-NEXT:    [[CONV1045:%.*]] = trunc i32 [[CONV1044]] to i8
+// SIMD-ONLY0-NEXT:    store i8 [[CONV1045]], ptr [[UCR]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP650:%.*]] = load i8, ptr [[UCR]], align 1
+// SIMD-ONLY0-NEXT:    [[TOBOOL1046:%.*]] = icmp ne i8 [[TMP650]], 0
+// SIMD-ONLY0-NEXT:    br i1 [[TOBOOL1046]], label [[IF_THEN1047:%.*]], label [[IF_END1048:%.*]]
+// SIMD-ONLY0:       if.then1047:
+// SIMD-ONLY0-NEXT:    [[TMP651:%.*]] = load i8, ptr [[UCD]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP651]], ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    br label [[IF_END1048]]
+// SIMD-ONLY0:       if.end1048:
+// SIMD-ONLY0-NEXT:    [[TMP652:%.*]] = load i8, ptr [[UCE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV1049:%.*]] = zext i8 [[TMP652]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP653:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV1050:%.*]] = zext i8 [[TMP653]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP1051:%.*]] = icmp eq i32 [[CONV1049]], [[CONV1050]]
+// SIMD-ONLY0-NEXT:    [[CONV1052:%.*]] = zext i1 [[CMP1051]] to i32
+// SIMD-ONLY0-NEXT:    [[CONV1053:%.*]] = trunc i32 [[CONV1052]] to i8
+// SIMD-ONLY0-NEXT:    store i8 [[CONV1053]], ptr [[UCR]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP654:%.*]] = load i8, ptr [[UCR]], align 1
+// SIMD-ONLY0-NEXT:    [[TOBOOL1054:%.*]] = icmp ne i8 [[TMP654]], 0
+// SIMD-ONLY0-NEXT:    br i1 [[TOBOOL1054]], label [[IF_THEN1055:%.*]], label [[IF_END1056:%.*]]
+// SIMD-ONLY0:       if.then1055:
+// SIMD-ONLY0-NEXT:    [[TMP655:%.*]] = load i8, ptr [[UCD]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP655]], ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    br label [[IF_END1056]]
+// SIMD-ONLY0:       if.end1056:
+// SIMD-ONLY0-NEXT:    [[TMP656:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV1057:%.*]] = zext i8 [[TMP656]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP657:%.*]] = load i8, ptr [[UCE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV1058:%.*]] = zext i8 [[TMP657]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP1059:%.*]] = icmp eq i32 [[CONV1057]], [[CONV1058]]
+// SIMD-ONLY0-NEXT:    [[CONV1060:%.*]] = zext i1 [[CMP1059]] to i32
+// SIMD-ONLY0-NEXT:    [[CONV1061:%.*]] = trunc i32 [[CONV1060]] to i8
+// SIMD-ONLY0-NEXT:    store i8 [[CONV1061]], ptr [[UCR]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP658:%.*]] = load i8, ptr [[UCR]], align 1
+// SIMD-ONLY0-NEXT:    [[TOBOOL1062:%.*]] = icmp ne i8 [[TMP658]], 0
+// SIMD-ONLY0-NEXT:    br i1 [[TOBOOL1062]], label [[IF_THEN1063:%.*]], label [[IF_ELSE1064:%.*]]
+// SIMD-ONLY0:       if.then1063:
+// SIMD-ONLY0-NEXT:    [[TMP659:%.*]] = load i8, ptr [[UCD]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP659]], ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    br label [[IF_END1065:%.*]]
+// SIMD-ONLY0:       if.else1064:
+// SIMD-ONLY0-NEXT:    [[TMP660:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP660]], ptr [[UCV]], align 1
+// SIMD-ONLY0-NEXT:    br label [[IF_END1065]]
+// SIMD-ONLY0:       if.end1065:
+// SIMD-ONLY0-NEXT:    [[TMP661:%.*]] = load i8, ptr [[UCE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV1066:%.*]] = zext i8 [[TMP661]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP662:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV1067:%.*]] = zext i8 [[TMP662]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP1068:%.*]] = icmp eq i32 [[CONV1066]], [[CONV1067]]
+// SIMD-ONLY0-NEXT:    [[CONV1069:%.*]] = zext i1 [[CMP1068]] to i32
+// SIMD-ONLY0-NEXT:    [[CONV1070:%.*]] = trunc i32 [[CONV1069]] to i8
+// SIMD-ONLY0-NEXT:    store i8 [[CONV1070]], ptr [[UCR]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP663:%.*]] = load i8, ptr [[UCR]], align 1
+// SIMD-ONLY0-NEXT:    [[TOBOOL1071:%.*]] = icmp ne i8 [[TMP663]], 0
+// SIMD-ONLY0-NEXT:    br i1 [[TOBOOL1071]], label [[IF_THEN1072:%.*]], label [[IF_ELSE1073:%.*]]
+// SIMD-ONLY0:       if.then1072:
+// SIMD-ONLY0-NEXT:    [[TMP664:%.*]] = load i8, ptr [[UCD]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP664]], ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    br label [[IF_END1074:%.*]]
+// SIMD-ONLY0:       if.else1073:
+// SIMD-ONLY0-NEXT:    [[TMP665:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP665]], ptr [[UCV]], align 1
+// SIMD-ONLY0-NEXT:    br label [[IF_END1074]]
+// SIMD-ONLY0:       if.end1074:
+// SIMD-ONLY0-NEXT:    [[TMP666:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP666]], ptr [[UCV]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP667:%.*]] = load i8, ptr [[UCE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV1075:%.*]] = zext i8 [[TMP667]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP668:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV1076:%.*]] = zext i8 [[TMP668]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP1077:%.*]] = icmp sgt i32 [[CONV1075]], [[CONV1076]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP1077]], label [[IF_THEN1079:%.*]], label [[IF_END1080:%.*]]
+// SIMD-ONLY0:       if.then1079:
+// SIMD-ONLY0-NEXT:    [[TMP669:%.*]] = load i8, ptr [[UCE]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP669]], ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    br label [[IF_END1080]]
+// SIMD-ONLY0:       if.end1080:
+// SIMD-ONLY0-NEXT:    [[TMP670:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP670]], ptr [[UCV]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP671:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV1081:%.*]] = zext i8 [[TMP671]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP672:%.*]] = load i8, ptr [[UCE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV1082:%.*]] = zext i8 [[TMP672]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP1083:%.*]] = icmp sgt i32 [[CONV1081]], [[CONV1082]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP1083]], label [[IF_THEN1085:%.*]], label [[IF_END1086:%.*]]
+// SIMD-ONLY0:       if.then1085:
+// SIMD-ONLY0-NEXT:    [[TMP673:%.*]] = load i8, ptr [[UCE]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP673]], ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    br label [[IF_END1086]]
+// SIMD-ONLY0:       if.end1086:
+// SIMD-ONLY0-NEXT:    [[TMP674:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP674]], ptr [[UCV]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP675:%.*]] = load i8, ptr [[UCE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV1087:%.*]] = zext i8 [[TMP675]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP676:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV1088:%.*]] = zext i8 [[TMP676]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP1089:%.*]] = icmp slt i32 [[CONV1087]], [[CONV1088]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP1089]], label [[IF_THEN1091:%.*]], label [[IF_END1092:%.*]]
+// SIMD-ONLY0:       if.then1091:
+// SIMD-ONLY0-NEXT:    [[TMP677:%.*]] = load i8, ptr [[UCE]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP677]], ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    br label [[IF_END1092]]
+// SIMD-ONLY0:       if.end1092:
+// SIMD-ONLY0-NEXT:    [[TMP678:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP678]], ptr [[UCV]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP679:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV1093:%.*]] = zext i8 [[TMP679]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP680:%.*]] = load i8, ptr [[UCE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV1094:%.*]] = zext i8 [[TMP680]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP1095:%.*]] = icmp slt i32 [[CONV1093]], [[CONV1094]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP1095]], label [[IF_THEN1097:%.*]], label [[IF_END1098:%.*]]
+// SIMD-ONLY0:       if.then1097:
+// SIMD-ONLY0-NEXT:    [[TMP681:%.*]] = load i8, ptr [[UCE]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP681]], ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    br label [[IF_END1098]]
+// SIMD-ONLY0:       if.end1098:
+// SIMD-ONLY0-NEXT:    [[TMP682:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP682]], ptr [[UCV]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP683:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV1099:%.*]] = zext i8 [[TMP683]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP684:%.*]] = load i8, ptr [[UCE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV1100:%.*]] = zext i8 [[TMP684]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP1101:%.*]] = icmp eq i32 [[CONV1099]], [[CONV1100]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP1101]], label [[IF_THEN1103:%.*]], label [[IF_END1104:%.*]]
+// SIMD-ONLY0:       if.then1103:
+// SIMD-ONLY0-NEXT:    [[TMP685:%.*]] = load i8, ptr [[UCD]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP685]], ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    br label [[IF_END1104]]
+// SIMD-ONLY0:       if.end1104:
+// SIMD-ONLY0-NEXT:    [[TMP686:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP686]], ptr [[UCV]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP687:%.*]] = load i8, ptr [[UCE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV1105:%.*]] = zext i8 [[TMP687]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP688:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV1106:%.*]] = zext i8 [[TMP688]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP1107:%.*]] = icmp eq i32 [[CONV1105]], [[CONV1106]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP1107]], label [[IF_THEN1109:%.*]], label [[IF_END1110:%.*]]
+// SIMD-ONLY0:       if.then1109:
+// SIMD-ONLY0-NEXT:    [[TMP689:%.*]] = load i8, ptr [[UCD]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP689]], ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    br label [[IF_END1110]]
+// SIMD-ONLY0:       if.end1110:
+// SIMD-ONLY0-NEXT:    [[TMP690:%.*]] = load i8, ptr [[UCE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV1111:%.*]] = zext i8 [[TMP690]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP691:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV1112:%.*]] = zext i8 [[TMP691]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP1113:%.*]] = icmp sgt i32 [[CONV1111]], [[CONV1112]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP1113]], label [[IF_THEN1115:%.*]], label [[IF_END1116:%.*]]
+// SIMD-ONLY0:       if.then1115:
+// SIMD-ONLY0-NEXT:    [[TMP692:%.*]] = load i8, ptr [[UCE]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP692]], ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    br label [[IF_END1116]]
+// SIMD-ONLY0:       if.end1116:
+// SIMD-ONLY0-NEXT:    [[TMP693:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP693]], ptr [[UCV]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP694:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV1117:%.*]] = zext i8 [[TMP694]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP695:%.*]] = load i8, ptr [[UCE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV1118:%.*]] = zext i8 [[TMP695]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP1119:%.*]] = icmp sgt i32 [[CONV1117]], [[CONV1118]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP1119]], label [[IF_THEN1121:%.*]], label [[IF_END1122:%.*]]
+// SIMD-ONLY0:       if.then1121:
+// SIMD-ONLY0-NEXT:    [[TMP696:%.*]] = load i8, ptr [[UCE]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP696]], ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    br label [[IF_END1122]]
+// SIMD-ONLY0:       if.end1122:
+// SIMD-ONLY0-NEXT:    [[TMP697:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP697]], ptr [[UCV]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP698:%.*]] = load i8, ptr [[UCE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV1123:%.*]] = zext i8 [[TMP698]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP699:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV1124:%.*]] = zext i8 [[TMP699]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP1125:%.*]] = icmp slt i32 [[CONV1123]], [[CONV1124]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP1125]], label [[IF_THEN1127:%.*]], label [[IF_END1128:%.*]]
+// SIMD-ONLY0:       if.then1127:
+// SIMD-ONLY0-NEXT:    [[TMP700:%.*]] = load i8, ptr [[UCE]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP700]], ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    br label [[IF_END1128]]
+// SIMD-ONLY0:       if.end1128:
+// SIMD-ONLY0-NEXT:    [[TMP701:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP701]], ptr [[UCV]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP702:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV1129:%.*]] = zext i8 [[TMP702]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP703:%.*]] = load i8, ptr [[UCE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV1130:%.*]] = zext i8 [[TMP703]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP1131:%.*]] = icmp slt i32 [[CONV1129]], [[CONV1130]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP1131]], label [[IF_THEN1133:%.*]], label [[IF_END1134:%.*]]
+// SIMD-ONLY0:       if.then1133:
+// SIMD-ONLY0-NEXT:    [[TMP704:%.*]] = load i8, ptr [[UCE]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP704]], ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    br label [[IF_END1134]]
+// SIMD-ONLY0:       if.end1134:
+// SIMD-ONLY0-NEXT:    [[TMP705:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP705]], ptr [[UCV]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP706:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV1135:%.*]] = zext i8 [[TMP706]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP707:%.*]] = load i8, ptr [[UCE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV1136:%.*]] = zext i8 [[TMP707]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP1137:%.*]] = icmp eq i32 [[CONV1135]], [[CONV1136]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP1137]], label [[IF_THEN1139:%.*]], label [[IF_END1140:%.*]]
+// SIMD-ONLY0:       if.then1139:
+// SIMD-ONLY0-NEXT:    [[TMP708:%.*]] = load i8, ptr [[UCD]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP708]], ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    br label [[IF_END1140]]
+// SIMD-ONLY0:       if.end1140:
+// SIMD-ONLY0-NEXT:    [[TMP709:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP709]], ptr [[UCV]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP710:%.*]] = load i8, ptr [[UCE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV1141:%.*]] = zext i8 [[TMP710]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP711:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV1142:%.*]] = zext i8 [[TMP711]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP1143:%.*]] = icmp eq i32 [[CONV1141]], [[CONV1142]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP1143]], label [[IF_THEN1145:%.*]], label [[IF_END1146:%.*]]
+// SIMD-ONLY0:       if.then1145:
+// SIMD-ONLY0-NEXT:    [[TMP712:%.*]] = load i8, ptr [[UCD]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP712]], ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    br label [[IF_END1146]]
+// SIMD-ONLY0:       if.end1146:
+// SIMD-ONLY0-NEXT:    [[TMP713:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP713]], ptr [[UCV]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP714:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV1147:%.*]] = zext i8 [[TMP714]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP715:%.*]] = load i8, ptr [[UCE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV1148:%.*]] = zext i8 [[TMP715]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP1149:%.*]] = icmp eq i32 [[CONV1147]], [[CONV1148]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP1149]], label [[IF_THEN1151:%.*]], label [[IF_ELSE1152:%.*]]
+// SIMD-ONLY0:       if.then1151:
+// SIMD-ONLY0-NEXT:    [[TMP716:%.*]] = load i8, ptr [[UCD]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP716]], ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    br label [[IF_END1153:%.*]]
+// SIMD-ONLY0:       if.else1152:
+// SIMD-ONLY0-NEXT:    [[TMP717:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP717]], ptr [[UCV]], align 1
+// SIMD-ONLY0-NEXT:    br label [[IF_END1153]]
+// SIMD-ONLY0:       if.end1153:
+// SIMD-ONLY0-NEXT:    [[TMP718:%.*]] = load i8, ptr [[UCE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV1154:%.*]] = zext i8 [[TMP718]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP719:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV1155:%.*]] = zext i8 [[TMP719]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP1156:%.*]] = icmp eq i32 [[CONV1154]], [[CONV1155]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP1156]], label [[IF_THEN1158:%.*]], label [[IF_ELSE1159:%.*]]
+// SIMD-ONLY0:       if.then1158:
+// SIMD-ONLY0-NEXT:    [[TMP720:%.*]] = load i8, ptr [[UCD]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP720]], ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    br label [[IF_END1160:%.*]]
+// SIMD-ONLY0:       if.else1159:
+// SIMD-ONLY0-NEXT:    [[TMP721:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP721]], ptr [[UCV]], align 1
+// SIMD-ONLY0-NEXT:    br label [[IF_END1160]]
+// SIMD-ONLY0:       if.end1160:
+// SIMD-ONLY0-NEXT:    [[TMP722:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV1161:%.*]] = zext i8 [[TMP722]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP723:%.*]] = load i8, ptr [[UCE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV1162:%.*]] = zext i8 [[TMP723]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP1163:%.*]] = icmp eq i32 [[CONV1161]], [[CONV1162]]
+// SIMD-ONLY0-NEXT:    [[CONV1164:%.*]] = zext i1 [[CMP1163]] to i32
+// SIMD-ONLY0-NEXT:    [[CONV1165:%.*]] = trunc i32 [[CONV1164]] to i8
+// SIMD-ONLY0-NEXT:    store i8 [[CONV1165]], ptr [[UCR]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP724:%.*]] = load i8, ptr [[UCR]], align 1
+// SIMD-ONLY0-NEXT:    [[TOBOOL1166:%.*]] = icmp ne i8 [[TMP724]], 0
+// SIMD-ONLY0-NEXT:    br i1 [[TOBOOL1166]], label [[IF_THEN1167:%.*]], label [[IF_END1168:%.*]]
+// SIMD-ONLY0:       if.then1167:
+// SIMD-ONLY0-NEXT:    [[TMP725:%.*]] = load i8, ptr [[UCD]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP725]], ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    br label [[IF_END1168]]
+// SIMD-ONLY0:       if.end1168:
+// SIMD-ONLY0-NEXT:    [[TMP726:%.*]] = load i8, ptr [[UCE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV1169:%.*]] = zext i8 [[TMP726]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP727:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV1170:%.*]] = zext i8 [[TMP727]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP1171:%.*]] = icmp eq i32 [[CONV1169]], [[CONV1170]]
+// SIMD-ONLY0-NEXT:    [[CONV1172:%.*]] = zext i1 [[CMP1171]] to i32
+// SIMD-ONLY0-NEXT:    [[CONV1173:%.*]] = trunc i32 [[CONV1172]] to i8
+// SIMD-ONLY0-NEXT:    store i8 [[CONV1173]], ptr [[UCR]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP728:%.*]] = load i8, ptr [[UCR]], align 1
+// SIMD-ONLY0-NEXT:    [[TOBOOL1174:%.*]] = icmp ne i8 [[TMP728]], 0
+// SIMD-ONLY0-NEXT:    br i1 [[TOBOOL1174]], label [[IF_THEN1175:%.*]], label [[IF_END1176:%.*]]
+// SIMD-ONLY0:       if.then1175:
+// SIMD-ONLY0-NEXT:    [[TMP729:%.*]] = load i8, ptr [[UCD]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP729]], ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    br label [[IF_END1176]]
+// SIMD-ONLY0:       if.end1176:
+// SIMD-ONLY0-NEXT:    [[TMP730:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV1177:%.*]] = zext i8 [[TMP730]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP731:%.*]] = load i8, ptr [[UCE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV1178:%.*]] = zext i8 [[TMP731]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP1179:%.*]] = icmp eq i32 [[CONV1177]], [[CONV1178]]
+// SIMD-ONLY0-NEXT:    [[CONV1180:%.*]] = zext i1 [[CMP1179]] to i32
+// SIMD-ONLY0-NEXT:    [[CONV1181:%.*]] = trunc i32 [[CONV1180]] to i8
+// SIMD-ONLY0-NEXT:    store i8 [[CONV1181]], ptr [[UCR]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP732:%.*]] = load i8, ptr [[UCR]], align 1
+// SIMD-ONLY0-NEXT:    [[TOBOOL1182:%.*]] = icmp ne i8 [[TMP732]], 0
+// SIMD-ONLY0-NEXT:    br i1 [[TOBOOL1182]], label [[IF_THEN1183:%.*]], label [[IF_ELSE1184:%.*]]
+// SIMD-ONLY0:       if.then1183:
+// SIMD-ONLY0-NEXT:    [[TMP733:%.*]] = load i8, ptr [[UCD]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP733]], ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    br label [[IF_END1185:%.*]]
+// SIMD-ONLY0:       if.else1184:
+// SIMD-ONLY0-NEXT:    [[TMP734:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP734]], ptr [[UCV]], align 1
+// SIMD-ONLY0-NEXT:    br label [[IF_END1185]]
+// SIMD-ONLY0:       if.end1185:
+// SIMD-ONLY0-NEXT:    [[TMP735:%.*]] = load i8, ptr [[UCE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV1186:%.*]] = zext i8 [[TMP735]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP736:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV1187:%.*]] = zext i8 [[TMP736]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP1188:%.*]] = icmp eq i32 [[CONV1186]], [[CONV1187]]
+// SIMD-ONLY0-NEXT:    [[CONV1189:%.*]] = zext i1 [[CMP1188]] to i32
+// SIMD-ONLY0-NEXT:    [[CONV1190:%.*]] = trunc i32 [[CONV1189]] to i8
+// SIMD-ONLY0-NEXT:    store i8 [[CONV1190]], ptr [[UCR]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP737:%.*]] = load i8, ptr [[UCR]], align 1
+// SIMD-ONLY0-NEXT:    [[TOBOOL1191:%.*]] = icmp ne i8 [[TMP737]], 0
+// SIMD-ONLY0-NEXT:    br i1 [[TOBOOL1191]], label [[IF_THEN1192:%.*]], label [[IF_ELSE1193:%.*]]
+// SIMD-ONLY0:       if.then1192:
+// SIMD-ONLY0-NEXT:    [[TMP738:%.*]] = load i8, ptr [[UCD]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP738]], ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    br label [[IF_END1194:%.*]]
+// SIMD-ONLY0:       if.else1193:
+// SIMD-ONLY0-NEXT:    [[TMP739:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP739]], ptr [[UCV]], align 1
+// SIMD-ONLY0-NEXT:    br label [[IF_END1194]]
+// SIMD-ONLY0:       if.end1194:
+// SIMD-ONLY0-NEXT:    [[TMP740:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP740]], ptr [[UCV]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP741:%.*]] = load i8, ptr [[UCE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV1195:%.*]] = zext i8 [[TMP741]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP742:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV1196:%.*]] = zext i8 [[TMP742]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP1197:%.*]] = icmp sgt i32 [[CONV1195]], [[CONV1196]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP1197]], label [[IF_THEN1199:%.*]], label [[IF_END1200:%.*]]
+// SIMD-ONLY0:       if.then1199:
+// SIMD-ONLY0-NEXT:    [[TMP743:%.*]] = load i8, ptr [[UCE]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP743]], ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    br label [[IF_END1200]]
+// SIMD-ONLY0:       if.end1200:
+// SIMD-ONLY0-NEXT:    [[TMP744:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP744]], ptr [[UCV]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP745:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV1201:%.*]] = zext i8 [[TMP745]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP746:%.*]] = load i8, ptr [[UCE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV1202:%.*]] = zext i8 [[TMP746]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP1203:%.*]] = icmp sgt i32 [[CONV1201]], [[CONV1202]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP1203]], label [[IF_THEN1205:%.*]], label [[IF_END1206:%.*]]
+// SIMD-ONLY0:       if.then1205:
+// SIMD-ONLY0-NEXT:    [[TMP747:%.*]] = load i8, ptr [[UCE]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP747]], ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    br label [[IF_END1206]]
+// SIMD-ONLY0:       if.end1206:
+// SIMD-ONLY0-NEXT:    [[TMP748:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP748]], ptr [[UCV]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP749:%.*]] = load i8, ptr [[UCE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV1207:%.*]] = zext i8 [[TMP749]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP750:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV1208:%.*]] = zext i8 [[TMP750]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP1209:%.*]] = icmp slt i32 [[CONV1207]], [[CONV1208]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP1209]], label [[IF_THEN1211:%.*]], label [[IF_END1212:%.*]]
+// SIMD-ONLY0:       if.then1211:
+// SIMD-ONLY0-NEXT:    [[TMP751:%.*]] = load i8, ptr [[UCE]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP751]], ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    br label [[IF_END1212]]
+// SIMD-ONLY0:       if.end1212:
+// SIMD-ONLY0-NEXT:    [[TMP752:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP752]], ptr [[UCV]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP753:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV1213:%.*]] = zext i8 [[TMP753]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP754:%.*]] = load i8, ptr [[UCE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV1214:%.*]] = zext i8 [[TMP754]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP1215:%.*]] = icmp slt i32 [[CONV1213]], [[CONV1214]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP1215]], label [[IF_THEN1217:%.*]], label [[IF_END1218:%.*]]
+// SIMD-ONLY0:       if.then1217:
+// SIMD-ONLY0-NEXT:    [[TMP755:%.*]] = load i8, ptr [[UCE]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP755]], ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    br label [[IF_END1218]]
+// SIMD-ONLY0:       if.end1218:
+// SIMD-ONLY0-NEXT:    [[TMP756:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP756]], ptr [[UCV]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP757:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV1219:%.*]] = zext i8 [[TMP757]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP758:%.*]] = load i8, ptr [[UCE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV1220:%.*]] = zext i8 [[TMP758]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP1221:%.*]] = icmp eq i32 [[CONV1219]], [[CONV1220]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP1221]], label [[IF_THEN1223:%.*]], label [[IF_END1224:%.*]]
+// SIMD-ONLY0:       if.then1223:
+// SIMD-ONLY0-NEXT:    [[TMP759:%.*]] = load i8, ptr [[UCD]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP759]], ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    br label [[IF_END1224]]
+// SIMD-ONLY0:       if.end1224:
+// SIMD-ONLY0-NEXT:    [[TMP760:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP760]], ptr [[UCV]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP761:%.*]] = load i8, ptr [[UCE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV1225:%.*]] = zext i8 [[TMP761]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP762:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV1226:%.*]] = zext i8 [[TMP762]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP1227:%.*]] = icmp eq i32 [[CONV1225]], [[CONV1226]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP1227]], label [[IF_THEN1229:%.*]], label [[IF_END1230:%.*]]
+// SIMD-ONLY0:       if.then1229:
+// SIMD-ONLY0-NEXT:    [[TMP763:%.*]] = load i8, ptr [[UCD]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP763]], ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    br label [[IF_END1230]]
+// SIMD-ONLY0:       if.end1230:
+// SIMD-ONLY0-NEXT:    [[TMP764:%.*]] = load i8, ptr [[UCE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV1231:%.*]] = zext i8 [[TMP764]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP765:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV1232:%.*]] = zext i8 [[TMP765]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP1233:%.*]] = icmp sgt i32 [[CONV1231]], [[CONV1232]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP1233]], label [[IF_THEN1235:%.*]], label [[IF_END1236:%.*]]
+// SIMD-ONLY0:       if.then1235:
+// SIMD-ONLY0-NEXT:    [[TMP766:%.*]] = load i8, ptr [[UCE]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP766]], ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    br label [[IF_END1236]]
+// SIMD-ONLY0:       if.end1236:
+// SIMD-ONLY0-NEXT:    [[TMP767:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP767]], ptr [[UCV]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP768:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV1237:%.*]] = zext i8 [[TMP768]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP769:%.*]] = load i8, ptr [[UCE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV1238:%.*]] = zext i8 [[TMP769]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP1239:%.*]] = icmp sgt i32 [[CONV1237]], [[CONV1238]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP1239]], label [[IF_THEN1241:%.*]], label [[IF_END1242:%.*]]
+// SIMD-ONLY0:       if.then1241:
+// SIMD-ONLY0-NEXT:    [[TMP770:%.*]] = load i8, ptr [[UCE]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP770]], ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    br label [[IF_END1242]]
+// SIMD-ONLY0:       if.end1242:
+// SIMD-ONLY0-NEXT:    [[TMP771:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP771]], ptr [[UCV]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP772:%.*]] = load i8, ptr [[UCE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV1243:%.*]] = zext i8 [[TMP772]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP773:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV1244:%.*]] = zext i8 [[TMP773]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP1245:%.*]] = icmp slt i32 [[CONV1243]], [[CONV1244]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP1245]], label [[IF_THEN1247:%.*]], label [[IF_END1248:%.*]]
+// SIMD-ONLY0:       if.then1247:
+// SIMD-ONLY0-NEXT:    [[TMP774:%.*]] = load i8, ptr [[UCE]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP774]], ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    br label [[IF_END1248]]
+// SIMD-ONLY0:       if.end1248:
+// SIMD-ONLY0-NEXT:    [[TMP775:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP775]], ptr [[UCV]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP776:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV1249:%.*]] = zext i8 [[TMP776]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP777:%.*]] = load i8, ptr [[UCE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV1250:%.*]] = zext i8 [[TMP777]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP1251:%.*]] = icmp slt i32 [[CONV1249]], [[CONV1250]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP1251]], label [[IF_THEN1253:%.*]], label [[IF_END1254:%.*]]
+// SIMD-ONLY0:       if.then1253:
+// SIMD-ONLY0-NEXT:    [[TMP778:%.*]] = load i8, ptr [[UCE]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP778]], ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    br label [[IF_END1254]]
+// SIMD-ONLY0:       if.end1254:
+// SIMD-ONLY0-NEXT:    [[TMP779:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP779]], ptr [[UCV]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP780:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV1255:%.*]] = zext i8 [[TMP780]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP781:%.*]] = load i8, ptr [[UCE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV1256:%.*]] = zext i8 [[TMP781]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP1257:%.*]] = icmp eq i32 [[CONV1255]], [[CONV1256]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP1257]], label [[IF_THEN1259:%.*]], label [[IF_END1260:%.*]]
+// SIMD-ONLY0:       if.then1259:
+// SIMD-ONLY0-NEXT:    [[TMP782:%.*]] = load i8, ptr [[UCD]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP782]], ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    br label [[IF_END1260]]
+// SIMD-ONLY0:       if.end1260:
+// SIMD-ONLY0-NEXT:    [[TMP783:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP783]], ptr [[UCV]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP784:%.*]] = load i8, ptr [[UCE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV1261:%.*]] = zext i8 [[TMP784]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP785:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV1262:%.*]] = zext i8 [[TMP785]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP1263:%.*]] = icmp eq i32 [[CONV1261]], [[CONV1262]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP1263]], label [[IF_THEN1265:%.*]], label [[IF_END1266:%.*]]
+// SIMD-ONLY0:       if.then1265:
+// SIMD-ONLY0-NEXT:    [[TMP786:%.*]] = load i8, ptr [[UCD]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP786]], ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    br label [[IF_END1266]]
+// SIMD-ONLY0:       if.end1266:
+// SIMD-ONLY0-NEXT:    [[TMP787:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP787]], ptr [[UCV]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP788:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV1267:%.*]] = zext i8 [[TMP788]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP789:%.*]] = load i8, ptr [[UCE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV1268:%.*]] = zext i8 [[TMP789]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP1269:%.*]] = icmp eq i32 [[CONV1267]], [[CONV1268]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP1269]], label [[IF_THEN1271:%.*]], label [[IF_ELSE1272:%.*]]
+// SIMD-ONLY0:       if.then1271:
+// SIMD-ONLY0-NEXT:    [[TMP790:%.*]] = load i8, ptr [[UCD]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP790]], ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    br label [[IF_END1273:%.*]]
+// SIMD-ONLY0:       if.else1272:
+// SIMD-ONLY0-NEXT:    [[TMP791:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP791]], ptr [[UCV]], align 1
+// SIMD-ONLY0-NEXT:    br label [[IF_END1273]]
+// SIMD-ONLY0:       if.end1273:
+// SIMD-ONLY0-NEXT:    [[TMP792:%.*]] = load i8, ptr [[UCE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV1274:%.*]] = zext i8 [[TMP792]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP793:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV1275:%.*]] = zext i8 [[TMP793]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP1276:%.*]] = icmp eq i32 [[CONV1274]], [[CONV1275]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP1276]], label [[IF_THEN1278:%.*]], label [[IF_ELSE1279:%.*]]
+// SIMD-ONLY0:       if.then1278:
+// SIMD-ONLY0-NEXT:    [[TMP794:%.*]] = load i8, ptr [[UCD]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP794]], ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    br label [[IF_END1280:%.*]]
+// SIMD-ONLY0:       if.else1279:
+// SIMD-ONLY0-NEXT:    [[TMP795:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP795]], ptr [[UCV]], align 1
+// SIMD-ONLY0-NEXT:    br label [[IF_END1280]]
+// SIMD-ONLY0:       if.end1280:
+// SIMD-ONLY0-NEXT:    [[TMP796:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV1281:%.*]] = zext i8 [[TMP796]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP797:%.*]] = load i8, ptr [[UCE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV1282:%.*]] = zext i8 [[TMP797]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP1283:%.*]] = icmp eq i32 [[CONV1281]], [[CONV1282]]
+// SIMD-ONLY0-NEXT:    [[CONV1284:%.*]] = zext i1 [[CMP1283]] to i32
+// SIMD-ONLY0-NEXT:    [[CONV1285:%.*]] = trunc i32 [[CONV1284]] to i8
+// SIMD-ONLY0-NEXT:    store i8 [[CONV1285]], ptr [[UCR]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP798:%.*]] = load i8, ptr [[UCR]], align 1
+// SIMD-ONLY0-NEXT:    [[TOBOOL1286:%.*]] = icmp ne i8 [[TMP798]], 0
+// SIMD-ONLY0-NEXT:    br i1 [[TOBOOL1286]], label [[IF_THEN1287:%.*]], label [[IF_END1288:%.*]]
+// SIMD-ONLY0:       if.then1287:
+// SIMD-ONLY0-NEXT:    [[TMP799:%.*]] = load i8, ptr [[UCD]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP799]], ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    br label [[IF_END1288]]
+// SIMD-ONLY0:       if.end1288:
+// SIMD-ONLY0-NEXT:    [[TMP800:%.*]] = load i8, ptr [[UCE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV1289:%.*]] = zext i8 [[TMP800]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP801:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV1290:%.*]] = zext i8 [[TMP801]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP1291:%.*]] = icmp eq i32 [[CONV1289]], [[CONV1290]]
+// SIMD-ONLY0-NEXT:    [[CONV1292:%.*]] = zext i1 [[CMP1291]] to i32
+// SIMD-ONLY0-NEXT:    [[CONV1293:%.*]] = trunc i32 [[CONV1292]] to i8
+// SIMD-ONLY0-NEXT:    store i8 [[CONV1293]], ptr [[UCR]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP802:%.*]] = load i8, ptr [[UCR]], align 1
+// SIMD-ONLY0-NEXT:    [[TOBOOL1294:%.*]] = icmp ne i8 [[TMP802]], 0
+// SIMD-ONLY0-NEXT:    br i1 [[TOBOOL1294]], label [[IF_THEN1295:%.*]], label [[IF_END1296:%.*]]
+// SIMD-ONLY0:       if.then1295:
+// SIMD-ONLY0-NEXT:    [[TMP803:%.*]] = load i8, ptr [[UCD]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP803]], ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    br label [[IF_END1296]]
+// SIMD-ONLY0:       if.end1296:
+// SIMD-ONLY0-NEXT:    [[TMP804:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV1297:%.*]] = zext i8 [[TMP804]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP805:%.*]] = load i8, ptr [[UCE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV1298:%.*]] = zext i8 [[TMP805]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP1299:%.*]] = icmp eq i32 [[CONV1297]], [[CONV1298]]
+// SIMD-ONLY0-NEXT:    [[CONV1300:%.*]] = zext i1 [[CMP1299]] to i32
+// SIMD-ONLY0-NEXT:    [[CONV1301:%.*]] = trunc i32 [[CONV1300]] to i8
+// SIMD-ONLY0-NEXT:    store i8 [[CONV1301]], ptr [[UCR]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP806:%.*]] = load i8, ptr [[UCR]], align 1
+// SIMD-ONLY0-NEXT:    [[TOBOOL1302:%.*]] = icmp ne i8 [[TMP806]], 0
+// SIMD-ONLY0-NEXT:    br i1 [[TOBOOL1302]], label [[IF_THEN1303:%.*]], label [[IF_ELSE1304:%.*]]
+// SIMD-ONLY0:       if.then1303:
+// SIMD-ONLY0-NEXT:    [[TMP807:%.*]] = load i8, ptr [[UCD]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP807]], ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    br label [[IF_END1305:%.*]]
+// SIMD-ONLY0:       if.else1304:
+// SIMD-ONLY0-NEXT:    [[TMP808:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP808]], ptr [[UCV]], align 1
+// SIMD-ONLY0-NEXT:    br label [[IF_END1305]]
+// SIMD-ONLY0:       if.end1305:
+// SIMD-ONLY0-NEXT:    [[TMP809:%.*]] = load i8, ptr [[UCE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV1306:%.*]] = zext i8 [[TMP809]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP810:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV1307:%.*]] = zext i8 [[TMP810]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP1308:%.*]] = icmp eq i32 [[CONV1306]], [[CONV1307]]
+// SIMD-ONLY0-NEXT:    [[CONV1309:%.*]] = zext i1 [[CMP1308]] to i32
+// SIMD-ONLY0-NEXT:    [[CONV1310:%.*]] = trunc i32 [[CONV1309]] to i8
+// SIMD-ONLY0-NEXT:    store i8 [[CONV1310]], ptr [[UCR]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP811:%.*]] = load i8, ptr [[UCR]], align 1
+// SIMD-ONLY0-NEXT:    [[TOBOOL1311:%.*]] = icmp ne i8 [[TMP811]], 0
+// SIMD-ONLY0-NEXT:    br i1 [[TOBOOL1311]], label [[IF_THEN1312:%.*]], label [[IF_ELSE1313:%.*]]
+// SIMD-ONLY0:       if.then1312:
+// SIMD-ONLY0-NEXT:    [[TMP812:%.*]] = load i8, ptr [[UCD]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP812]], ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    br label [[IF_END1314:%.*]]
+// SIMD-ONLY0:       if.else1313:
+// SIMD-ONLY0-NEXT:    [[TMP813:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP813]], ptr [[UCV]], align 1
+// SIMD-ONLY0-NEXT:    br label [[IF_END1314]]
+// SIMD-ONLY0:       if.end1314:
+// SIMD-ONLY0-NEXT:    [[TMP814:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP814]], ptr [[UCV]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP815:%.*]] = load i8, ptr [[UCE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV1315:%.*]] = zext i8 [[TMP815]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP816:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV1316:%.*]] = zext i8 [[TMP816]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP1317:%.*]] = icmp sgt i32 [[CONV1315]], [[CONV1316]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP1317]], label [[IF_THEN1319:%.*]], label [[IF_END1320:%.*]]
+// SIMD-ONLY0:       if.then1319:
+// SIMD-ONLY0-NEXT:    [[TMP817:%.*]] = load i8, ptr [[UCE]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP817]], ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    br label [[IF_END1320]]
+// SIMD-ONLY0:       if.end1320:
+// SIMD-ONLY0-NEXT:    [[TMP818:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP818]], ptr [[UCV]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP819:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV1321:%.*]] = zext i8 [[TMP819]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP820:%.*]] = load i8, ptr [[UCE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV1322:%.*]] = zext i8 [[TMP820]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP1323:%.*]] = icmp sgt i32 [[CONV1321]], [[CONV1322]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP1323]], label [[IF_THEN1325:%.*]], label [[IF_END1326:%.*]]
+// SIMD-ONLY0:       if.then1325:
+// SIMD-ONLY0-NEXT:    [[TMP821:%.*]] = load i8, ptr [[UCE]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP821]], ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    br label [[IF_END1326]]
+// SIMD-ONLY0:       if.end1326:
+// SIMD-ONLY0-NEXT:    [[TMP822:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP822]], ptr [[UCV]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP823:%.*]] = load i8, ptr [[UCE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV1327:%.*]] = zext i8 [[TMP823]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP824:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV1328:%.*]] = zext i8 [[TMP824]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP1329:%.*]] = icmp slt i32 [[CONV1327]], [[CONV1328]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP1329]], label [[IF_THEN1331:%.*]], label [[IF_END1332:%.*]]
+// SIMD-ONLY0:       if.then1331:
+// SIMD-ONLY0-NEXT:    [[TMP825:%.*]] = load i8, ptr [[UCE]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP825]], ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    br label [[IF_END1332]]
+// SIMD-ONLY0:       if.end1332:
+// SIMD-ONLY0-NEXT:    [[TMP826:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP826]], ptr [[UCV]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP827:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV1333:%.*]] = zext i8 [[TMP827]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP828:%.*]] = load i8, ptr [[UCE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV1334:%.*]] = zext i8 [[TMP828]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP1335:%.*]] = icmp slt i32 [[CONV1333]], [[CONV1334]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP1335]], label [[IF_THEN1337:%.*]], label [[IF_END1338:%.*]]
+// SIMD-ONLY0:       if.then1337:
+// SIMD-ONLY0-NEXT:    [[TMP829:%.*]] = load i8, ptr [[UCE]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP829]], ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    br label [[IF_END1338]]
+// SIMD-ONLY0:       if.end1338:
+// SIMD-ONLY0-NEXT:    [[TMP830:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP830]], ptr [[UCV]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP831:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV1339:%.*]] = zext i8 [[TMP831]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP832:%.*]] = load i8, ptr [[UCE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV1340:%.*]] = zext i8 [[TMP832]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP1341:%.*]] = icmp eq i32 [[CONV1339]], [[CONV1340]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP1341]], label [[IF_THEN1343:%.*]], label [[IF_END1344:%.*]]
+// SIMD-ONLY0:       if.then1343:
+// SIMD-ONLY0-NEXT:    [[TMP833:%.*]] = load i8, ptr [[UCD]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP833]], ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    br label [[IF_END1344]]
+// SIMD-ONLY0:       if.end1344:
+// SIMD-ONLY0-NEXT:    [[TMP834:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP834]], ptr [[UCV]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP835:%.*]] = load i8, ptr [[UCE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV1345:%.*]] = zext i8 [[TMP835]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP836:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV1346:%.*]] = zext i8 [[TMP836]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP1347:%.*]] = icmp eq i32 [[CONV1345]], [[CONV1346]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP1347]], label [[IF_THEN1349:%.*]], label [[IF_END1350:%.*]]
+// SIMD-ONLY0:       if.then1349:
+// SIMD-ONLY0-NEXT:    [[TMP837:%.*]] = load i8, ptr [[UCD]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP837]], ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    br label [[IF_END1350]]
+// SIMD-ONLY0:       if.end1350:
+// SIMD-ONLY0-NEXT:    [[TMP838:%.*]] = load i8, ptr [[UCE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV1351:%.*]] = zext i8 [[TMP838]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP839:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV1352:%.*]] = zext i8 [[TMP839]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP1353:%.*]] = icmp sgt i32 [[CONV1351]], [[CONV1352]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP1353]], label [[IF_THEN1355:%.*]], label [[IF_END1356:%.*]]
+// SIMD-ONLY0:       if.then1355:
+// SIMD-ONLY0-NEXT:    [[TMP840:%.*]] = load i8, ptr [[UCE]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP840]], ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    br label [[IF_END1356]]
+// SIMD-ONLY0:       if.end1356:
+// SIMD-ONLY0-NEXT:    [[TMP841:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP841]], ptr [[UCV]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP842:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV1357:%.*]] = zext i8 [[TMP842]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP843:%.*]] = load i8, ptr [[UCE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV1358:%.*]] = zext i8 [[TMP843]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP1359:%.*]] = icmp sgt i32 [[CONV1357]], [[CONV1358]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP1359]], label [[IF_THEN1361:%.*]], label [[IF_END1362:%.*]]
+// SIMD-ONLY0:       if.then1361:
+// SIMD-ONLY0-NEXT:    [[TMP844:%.*]] = load i8, ptr [[UCE]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP844]], ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    br label [[IF_END1362]]
+// SIMD-ONLY0:       if.end1362:
+// SIMD-ONLY0-NEXT:    [[TMP845:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP845]], ptr [[UCV]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP846:%.*]] = load i8, ptr [[UCE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV1363:%.*]] = zext i8 [[TMP846]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP847:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV1364:%.*]] = zext i8 [[TMP847]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP1365:%.*]] = icmp slt i32 [[CONV1363]], [[CONV1364]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP1365]], label [[IF_THEN1367:%.*]], label [[IF_END1368:%.*]]
+// SIMD-ONLY0:       if.then1367:
+// SIMD-ONLY0-NEXT:    [[TMP848:%.*]] = load i8, ptr [[UCE]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP848]], ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    br label [[IF_END1368]]
+// SIMD-ONLY0:       if.end1368:
+// SIMD-ONLY0-NEXT:    [[TMP849:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP849]], ptr [[UCV]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP850:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV1369:%.*]] = zext i8 [[TMP850]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP851:%.*]] = load i8, ptr [[UCE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV1370:%.*]] = zext i8 [[TMP851]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP1371:%.*]] = icmp slt i32 [[CONV1369]], [[CONV1370]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP1371]], label [[IF_THEN1373:%.*]], label [[IF_END1374:%.*]]
+// SIMD-ONLY0:       if.then1373:
+// SIMD-ONLY0-NEXT:    [[TMP852:%.*]] = load i8, ptr [[UCE]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP852]], ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    br label [[IF_END1374]]
+// SIMD-ONLY0:       if.end1374:
+// SIMD-ONLY0-NEXT:    [[TMP853:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP853]], ptr [[UCV]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP854:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV1375:%.*]] = zext i8 [[TMP854]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP855:%.*]] = load i8, ptr [[UCE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV1376:%.*]] = zext i8 [[TMP855]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP1377:%.*]] = icmp eq i32 [[CONV1375]], [[CONV1376]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP1377]], label [[IF_THEN1379:%.*]], label [[IF_END1380:%.*]]
+// SIMD-ONLY0:       if.then1379:
+// SIMD-ONLY0-NEXT:    [[TMP856:%.*]] = load i8, ptr [[UCD]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP856]], ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    br label [[IF_END1380]]
+// SIMD-ONLY0:       if.end1380:
+// SIMD-ONLY0-NEXT:    [[TMP857:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP857]], ptr [[UCV]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP858:%.*]] = load i8, ptr [[UCE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV1381:%.*]] = zext i8 [[TMP858]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP859:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV1382:%.*]] = zext i8 [[TMP859]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP1383:%.*]] = icmp eq i32 [[CONV1381]], [[CONV1382]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP1383]], label [[IF_THEN1385:%.*]], label [[IF_END1386:%.*]]
+// SIMD-ONLY0:       if.then1385:
+// SIMD-ONLY0-NEXT:    [[TMP860:%.*]] = load i8, ptr [[UCD]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP860]], ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    br label [[IF_END1386]]
+// SIMD-ONLY0:       if.end1386:
+// SIMD-ONLY0-NEXT:    [[TMP861:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP861]], ptr [[UCV]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP862:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV1387:%.*]] = zext i8 [[TMP862]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP863:%.*]] = load i8, ptr [[UCE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV1388:%.*]] = zext i8 [[TMP863]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP1389:%.*]] = icmp eq i32 [[CONV1387]], [[CONV1388]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP1389]], label [[IF_THEN1391:%.*]], label [[IF_ELSE1392:%.*]]
+// SIMD-ONLY0:       if.then1391:
+// SIMD-ONLY0-NEXT:    [[TMP864:%.*]] = load i8, ptr [[UCD]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP864]], ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    br label [[IF_END1393:%.*]]
+// SIMD-ONLY0:       if.else1392:
+// SIMD-ONLY0-NEXT:    [[TMP865:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP865]], ptr [[UCV]], align 1
+// SIMD-ONLY0-NEXT:    br label [[IF_END1393]]
+// SIMD-ONLY0:       if.end1393:
+// SIMD-ONLY0-NEXT:    [[TMP866:%.*]] = load i8, ptr [[UCE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV1394:%.*]] = zext i8 [[TMP866]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP867:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV1395:%.*]] = zext i8 [[TMP867]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP1396:%.*]] = icmp eq i32 [[CONV1394]], [[CONV1395]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP1396]], label [[IF_THEN1398:%.*]], label [[IF_ELSE1399:%.*]]
+// SIMD-ONLY0:       if.then1398:
+// SIMD-ONLY0-NEXT:    [[TMP868:%.*]] = load i8, ptr [[UCD]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP868]], ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    br label [[IF_END1400:%.*]]
+// SIMD-ONLY0:       if.else1399:
+// SIMD-ONLY0-NEXT:    [[TMP869:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP869]], ptr [[UCV]], align 1
+// SIMD-ONLY0-NEXT:    br label [[IF_END1400]]
+// SIMD-ONLY0:       if.end1400:
+// SIMD-ONLY0-NEXT:    [[TMP870:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV1401:%.*]] = zext i8 [[TMP870]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP871:%.*]] = load i8, ptr [[UCE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV1402:%.*]] = zext i8 [[TMP871]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP1403:%.*]] = icmp eq i32 [[CONV1401]], [[CONV1402]]
+// SIMD-ONLY0-NEXT:    [[CONV1404:%.*]] = zext i1 [[CMP1403]] to i32
+// SIMD-ONLY0-NEXT:    [[CONV1405:%.*]] = trunc i32 [[CONV1404]] to i8
+// SIMD-ONLY0-NEXT:    store i8 [[CONV1405]], ptr [[UCR]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP872:%.*]] = load i8, ptr [[UCR]], align 1
+// SIMD-ONLY0-NEXT:    [[TOBOOL1406:%.*]] = icmp ne i8 [[TMP872]], 0
+// SIMD-ONLY0-NEXT:    br i1 [[TOBOOL1406]], label [[IF_THEN1407:%.*]], label [[IF_END1408:%.*]]
+// SIMD-ONLY0:       if.then1407:
+// SIMD-ONLY0-NEXT:    [[TMP873:%.*]] = load i8, ptr [[UCD]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP873]], ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    br label [[IF_END1408]]
+// SIMD-ONLY0:       if.end1408:
+// SIMD-ONLY0-NEXT:    [[TMP874:%.*]] = load i8, ptr [[UCE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV1409:%.*]] = zext i8 [[TMP874]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP875:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV1410:%.*]] = zext i8 [[TMP875]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP1411:%.*]] = icmp eq i32 [[CONV1409]], [[CONV1410]]
+// SIMD-ONLY0-NEXT:    [[CONV1412:%.*]] = zext i1 [[CMP1411]] to i32
+// SIMD-ONLY0-NEXT:    [[CONV1413:%.*]] = trunc i32 [[CONV1412]] to i8
+// SIMD-ONLY0-NEXT:    store i8 [[CONV1413]], ptr [[UCR]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP876:%.*]] = load i8, ptr [[UCR]], align 1
+// SIMD-ONLY0-NEXT:    [[TOBOOL1414:%.*]] = icmp ne i8 [[TMP876]], 0
+// SIMD-ONLY0-NEXT:    br i1 [[TOBOOL1414]], label [[IF_THEN1415:%.*]], label [[IF_END1416:%.*]]
+// SIMD-ONLY0:       if.then1415:
+// SIMD-ONLY0-NEXT:    [[TMP877:%.*]] = load i8, ptr [[UCD]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP877]], ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    br label [[IF_END1416]]
+// SIMD-ONLY0:       if.end1416:
+// SIMD-ONLY0-NEXT:    [[TMP878:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV1417:%.*]] = zext i8 [[TMP878]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP879:%.*]] = load i8, ptr [[UCE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV1418:%.*]] = zext i8 [[TMP879]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP1419:%.*]] = icmp eq i32 [[CONV1417]], [[CONV1418]]
+// SIMD-ONLY0-NEXT:    [[CONV1420:%.*]] = zext i1 [[CMP1419]] to i32
+// SIMD-ONLY0-NEXT:    [[CONV1421:%.*]] = trunc i32 [[CONV1420]] to i8
+// SIMD-ONLY0-NEXT:    store i8 [[CONV1421]], ptr [[UCR]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP880:%.*]] = load i8, ptr [[UCR]], align 1
+// SIMD-ONLY0-NEXT:    [[TOBOOL1422:%.*]] = icmp ne i8 [[TMP880]], 0
+// SIMD-ONLY0-NEXT:    br i1 [[TOBOOL1422]], label [[IF_THEN1423:%.*]], label [[IF_ELSE1424:%.*]]
+// SIMD-ONLY0:       if.then1423:
+// SIMD-ONLY0-NEXT:    [[TMP881:%.*]] = load i8, ptr [[UCD]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP881]], ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    br label [[IF_END1425:%.*]]
+// SIMD-ONLY0:       if.else1424:
+// SIMD-ONLY0-NEXT:    [[TMP882:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP882]], ptr [[UCV]], align 1
+// SIMD-ONLY0-NEXT:    br label [[IF_END1425]]
+// SIMD-ONLY0:       if.end1425:
+// SIMD-ONLY0-NEXT:    [[TMP883:%.*]] = load i8, ptr [[UCE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV1426:%.*]] = zext i8 [[TMP883]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP884:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV1427:%.*]] = zext i8 [[TMP884]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP1428:%.*]] = icmp eq i32 [[CONV1426]], [[CONV1427]]
+// SIMD-ONLY0-NEXT:    [[CONV1429:%.*]] = zext i1 [[CMP1428]] to i32
+// SIMD-ONLY0-NEXT:    [[CONV1430:%.*]] = trunc i32 [[CONV1429]] to i8
+// SIMD-ONLY0-NEXT:    store i8 [[CONV1430]], ptr [[UCR]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP885:%.*]] = load i8, ptr [[UCR]], align 1
+// SIMD-ONLY0-NEXT:    [[TOBOOL1431:%.*]] = icmp ne i8 [[TMP885]], 0
+// SIMD-ONLY0-NEXT:    br i1 [[TOBOOL1431]], label [[IF_THEN1432:%.*]], label [[IF_ELSE1433:%.*]]
+// SIMD-ONLY0:       if.then1432:
+// SIMD-ONLY0-NEXT:    [[TMP886:%.*]] = load i8, ptr [[UCD]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP886]], ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    br label [[IF_END1434:%.*]]
+// SIMD-ONLY0:       if.else1433:
+// SIMD-ONLY0-NEXT:    [[TMP887:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP887]], ptr [[UCV]], align 1
+// SIMD-ONLY0-NEXT:    br label [[IF_END1434]]
+// SIMD-ONLY0:       if.end1434:
+// SIMD-ONLY0-NEXT:    [[TMP888:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP888]], ptr [[SV]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP889:%.*]] = load i16, ptr [[SE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1435:%.*]] = sext i16 [[TMP889]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP890:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1436:%.*]] = sext i16 [[TMP890]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP1437:%.*]] = icmp sgt i32 [[CONV1435]], [[CONV1436]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP1437]], label [[IF_THEN1439:%.*]], label [[IF_END1440:%.*]]
+// SIMD-ONLY0:       if.then1439:
+// SIMD-ONLY0-NEXT:    [[TMP891:%.*]] = load i16, ptr [[SE]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP891]], ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    br label [[IF_END1440]]
+// SIMD-ONLY0:       if.end1440:
+// SIMD-ONLY0-NEXT:    [[TMP892:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP892]], ptr [[SV]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP893:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1441:%.*]] = sext i16 [[TMP893]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP894:%.*]] = load i16, ptr [[SE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1442:%.*]] = sext i16 [[TMP894]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP1443:%.*]] = icmp sgt i32 [[CONV1441]], [[CONV1442]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP1443]], label [[IF_THEN1445:%.*]], label [[IF_END1446:%.*]]
+// SIMD-ONLY0:       if.then1445:
+// SIMD-ONLY0-NEXT:    [[TMP895:%.*]] = load i16, ptr [[SE]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP895]], ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    br label [[IF_END1446]]
+// SIMD-ONLY0:       if.end1446:
+// SIMD-ONLY0-NEXT:    [[TMP896:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP896]], ptr [[SV]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP897:%.*]] = load i16, ptr [[SE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1447:%.*]] = sext i16 [[TMP897]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP898:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1448:%.*]] = sext i16 [[TMP898]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP1449:%.*]] = icmp slt i32 [[CONV1447]], [[CONV1448]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP1449]], label [[IF_THEN1451:%.*]], label [[IF_END1452:%.*]]
+// SIMD-ONLY0:       if.then1451:
+// SIMD-ONLY0-NEXT:    [[TMP899:%.*]] = load i16, ptr [[SE]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP899]], ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    br label [[IF_END1452]]
+// SIMD-ONLY0:       if.end1452:
+// SIMD-ONLY0-NEXT:    [[TMP900:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP900]], ptr [[SV]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP901:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1453:%.*]] = sext i16 [[TMP901]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP902:%.*]] = load i16, ptr [[SE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1454:%.*]] = sext i16 [[TMP902]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP1455:%.*]] = icmp slt i32 [[CONV1453]], [[CONV1454]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP1455]], label [[IF_THEN1457:%.*]], label [[IF_END1458:%.*]]
+// SIMD-ONLY0:       if.then1457:
+// SIMD-ONLY0-NEXT:    [[TMP903:%.*]] = load i16, ptr [[SE]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP903]], ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    br label [[IF_END1458]]
+// SIMD-ONLY0:       if.end1458:
+// SIMD-ONLY0-NEXT:    [[TMP904:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP904]], ptr [[SV]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP905:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1459:%.*]] = sext i16 [[TMP905]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP906:%.*]] = load i16, ptr [[SE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1460:%.*]] = sext i16 [[TMP906]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP1461:%.*]] = icmp eq i32 [[CONV1459]], [[CONV1460]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP1461]], label [[IF_THEN1463:%.*]], label [[IF_END1464:%.*]]
+// SIMD-ONLY0:       if.then1463:
+// SIMD-ONLY0-NEXT:    [[TMP907:%.*]] = load i16, ptr [[SD]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP907]], ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    br label [[IF_END1464]]
+// SIMD-ONLY0:       if.end1464:
+// SIMD-ONLY0-NEXT:    [[TMP908:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP908]], ptr [[SV]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP909:%.*]] = load i16, ptr [[SE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1465:%.*]] = sext i16 [[TMP909]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP910:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1466:%.*]] = sext i16 [[TMP910]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP1467:%.*]] = icmp eq i32 [[CONV1465]], [[CONV1466]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP1467]], label [[IF_THEN1469:%.*]], label [[IF_END1470:%.*]]
+// SIMD-ONLY0:       if.then1469:
+// SIMD-ONLY0-NEXT:    [[TMP911:%.*]] = load i16, ptr [[SD]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP911]], ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    br label [[IF_END1470]]
+// SIMD-ONLY0:       if.end1470:
+// SIMD-ONLY0-NEXT:    [[TMP912:%.*]] = load i16, ptr [[SE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1471:%.*]] = sext i16 [[TMP912]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP913:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1472:%.*]] = sext i16 [[TMP913]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP1473:%.*]] = icmp sgt i32 [[CONV1471]], [[CONV1472]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP1473]], label [[IF_THEN1475:%.*]], label [[IF_END1476:%.*]]
+// SIMD-ONLY0:       if.then1475:
+// SIMD-ONLY0-NEXT:    [[TMP914:%.*]] = load i16, ptr [[SE]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP914]], ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    br label [[IF_END1476]]
+// SIMD-ONLY0:       if.end1476:
+// SIMD-ONLY0-NEXT:    [[TMP915:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP915]], ptr [[SV]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP916:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1477:%.*]] = sext i16 [[TMP916]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP917:%.*]] = load i16, ptr [[SE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1478:%.*]] = sext i16 [[TMP917]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP1479:%.*]] = icmp sgt i32 [[CONV1477]], [[CONV1478]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP1479]], label [[IF_THEN1481:%.*]], label [[IF_END1482:%.*]]
+// SIMD-ONLY0:       if.then1481:
+// SIMD-ONLY0-NEXT:    [[TMP918:%.*]] = load i16, ptr [[SE]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP918]], ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    br label [[IF_END1482]]
+// SIMD-ONLY0:       if.end1482:
+// SIMD-ONLY0-NEXT:    [[TMP919:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP919]], ptr [[SV]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP920:%.*]] = load i16, ptr [[SE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1483:%.*]] = sext i16 [[TMP920]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP921:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1484:%.*]] = sext i16 [[TMP921]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP1485:%.*]] = icmp slt i32 [[CONV1483]], [[CONV1484]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP1485]], label [[IF_THEN1487:%.*]], label [[IF_END1488:%.*]]
+// SIMD-ONLY0:       if.then1487:
+// SIMD-ONLY0-NEXT:    [[TMP922:%.*]] = load i16, ptr [[SE]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP922]], ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    br label [[IF_END1488]]
+// SIMD-ONLY0:       if.end1488:
+// SIMD-ONLY0-NEXT:    [[TMP923:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP923]], ptr [[SV]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP924:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1489:%.*]] = sext i16 [[TMP924]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP925:%.*]] = load i16, ptr [[SE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1490:%.*]] = sext i16 [[TMP925]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP1491:%.*]] = icmp slt i32 [[CONV1489]], [[CONV1490]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP1491]], label [[IF_THEN1493:%.*]], label [[IF_END1494:%.*]]
+// SIMD-ONLY0:       if.then1493:
+// SIMD-ONLY0-NEXT:    [[TMP926:%.*]] = load i16, ptr [[SE]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP926]], ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    br label [[IF_END1494]]
+// SIMD-ONLY0:       if.end1494:
+// SIMD-ONLY0-NEXT:    [[TMP927:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP927]], ptr [[SV]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP928:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1495:%.*]] = sext i16 [[TMP928]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP929:%.*]] = load i16, ptr [[SE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1496:%.*]] = sext i16 [[TMP929]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP1497:%.*]] = icmp eq i32 [[CONV1495]], [[CONV1496]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP1497]], label [[IF_THEN1499:%.*]], label [[IF_END1500:%.*]]
+// SIMD-ONLY0:       if.then1499:
+// SIMD-ONLY0-NEXT:    [[TMP930:%.*]] = load i16, ptr [[SD]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP930]], ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    br label [[IF_END1500]]
+// SIMD-ONLY0:       if.end1500:
+// SIMD-ONLY0-NEXT:    [[TMP931:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP931]], ptr [[SV]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP932:%.*]] = load i16, ptr [[SE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1501:%.*]] = sext i16 [[TMP932]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP933:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1502:%.*]] = sext i16 [[TMP933]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP1503:%.*]] = icmp eq i32 [[CONV1501]], [[CONV1502]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP1503]], label [[IF_THEN1505:%.*]], label [[IF_END1506:%.*]]
+// SIMD-ONLY0:       if.then1505:
+// SIMD-ONLY0-NEXT:    [[TMP934:%.*]] = load i16, ptr [[SD]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP934]], ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    br label [[IF_END1506]]
+// SIMD-ONLY0:       if.end1506:
+// SIMD-ONLY0-NEXT:    [[TMP935:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP935]], ptr [[SV]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP936:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1507:%.*]] = sext i16 [[TMP936]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP937:%.*]] = load i16, ptr [[SE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1508:%.*]] = sext i16 [[TMP937]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP1509:%.*]] = icmp eq i32 [[CONV1507]], [[CONV1508]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP1509]], label [[IF_THEN1511:%.*]], label [[IF_ELSE1512:%.*]]
+// SIMD-ONLY0:       if.then1511:
+// SIMD-ONLY0-NEXT:    [[TMP938:%.*]] = load i16, ptr [[SD]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP938]], ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    br label [[IF_END1513:%.*]]
+// SIMD-ONLY0:       if.else1512:
+// SIMD-ONLY0-NEXT:    [[TMP939:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP939]], ptr [[SV]], align 2
+// SIMD-ONLY0-NEXT:    br label [[IF_END1513]]
+// SIMD-ONLY0:       if.end1513:
+// SIMD-ONLY0-NEXT:    [[TMP940:%.*]] = load i16, ptr [[SE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1514:%.*]] = sext i16 [[TMP940]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP941:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1515:%.*]] = sext i16 [[TMP941]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP1516:%.*]] = icmp eq i32 [[CONV1514]], [[CONV1515]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP1516]], label [[IF_THEN1518:%.*]], label [[IF_ELSE1519:%.*]]
+// SIMD-ONLY0:       if.then1518:
+// SIMD-ONLY0-NEXT:    [[TMP942:%.*]] = load i16, ptr [[SD]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP942]], ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    br label [[IF_END1520:%.*]]
+// SIMD-ONLY0:       if.else1519:
+// SIMD-ONLY0-NEXT:    [[TMP943:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP943]], ptr [[SV]], align 2
+// SIMD-ONLY0-NEXT:    br label [[IF_END1520]]
+// SIMD-ONLY0:       if.end1520:
+// SIMD-ONLY0-NEXT:    [[TMP944:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1521:%.*]] = sext i16 [[TMP944]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP945:%.*]] = load i16, ptr [[SE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1522:%.*]] = sext i16 [[TMP945]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP1523:%.*]] = icmp eq i32 [[CONV1521]], [[CONV1522]]
+// SIMD-ONLY0-NEXT:    [[CONV1524:%.*]] = zext i1 [[CMP1523]] to i32
+// SIMD-ONLY0-NEXT:    [[CONV1525:%.*]] = trunc i32 [[CONV1524]] to i16
+// SIMD-ONLY0-NEXT:    store i16 [[CONV1525]], ptr [[SR]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP946:%.*]] = load i16, ptr [[SR]], align 2
+// SIMD-ONLY0-NEXT:    [[TOBOOL1526:%.*]] = icmp ne i16 [[TMP946]], 0
+// SIMD-ONLY0-NEXT:    br i1 [[TOBOOL1526]], label [[IF_THEN1527:%.*]], label [[IF_END1528:%.*]]
+// SIMD-ONLY0:       if.then1527:
+// SIMD-ONLY0-NEXT:    [[TMP947:%.*]] = load i16, ptr [[SD]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP947]], ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    br label [[IF_END1528]]
+// SIMD-ONLY0:       if.end1528:
+// SIMD-ONLY0-NEXT:    [[TMP948:%.*]] = load i16, ptr [[SE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1529:%.*]] = sext i16 [[TMP948]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP949:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1530:%.*]] = sext i16 [[TMP949]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP1531:%.*]] = icmp eq i32 [[CONV1529]], [[CONV1530]]
+// SIMD-ONLY0-NEXT:    [[CONV1532:%.*]] = zext i1 [[CMP1531]] to i32
+// SIMD-ONLY0-NEXT:    [[CONV1533:%.*]] = trunc i32 [[CONV1532]] to i16
+// SIMD-ONLY0-NEXT:    store i16 [[CONV1533]], ptr [[SR]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP950:%.*]] = load i16, ptr [[SR]], align 2
+// SIMD-ONLY0-NEXT:    [[TOBOOL1534:%.*]] = icmp ne i16 [[TMP950]], 0
+// SIMD-ONLY0-NEXT:    br i1 [[TOBOOL1534]], label [[IF_THEN1535:%.*]], label [[IF_END1536:%.*]]
+// SIMD-ONLY0:       if.then1535:
+// SIMD-ONLY0-NEXT:    [[TMP951:%.*]] = load i16, ptr [[SD]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP951]], ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    br label [[IF_END1536]]
+// SIMD-ONLY0:       if.end1536:
+// SIMD-ONLY0-NEXT:    [[TMP952:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1537:%.*]] = sext i16 [[TMP952]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP953:%.*]] = load i16, ptr [[SE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1538:%.*]] = sext i16 [[TMP953]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP1539:%.*]] = icmp eq i32 [[CONV1537]], [[CONV1538]]
+// SIMD-ONLY0-NEXT:    [[CONV1540:%.*]] = zext i1 [[CMP1539]] to i32
+// SIMD-ONLY0-NEXT:    [[CONV1541:%.*]] = trunc i32 [[CONV1540]] to i16
+// SIMD-ONLY0-NEXT:    store i16 [[CONV1541]], ptr [[SR]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP954:%.*]] = load i16, ptr [[SR]], align 2
+// SIMD-ONLY0-NEXT:    [[TOBOOL1542:%.*]] = icmp ne i16 [[TMP954]], 0
+// SIMD-ONLY0-NEXT:    br i1 [[TOBOOL1542]], label [[IF_THEN1543:%.*]], label [[IF_ELSE1544:%.*]]
+// SIMD-ONLY0:       if.then1543:
+// SIMD-ONLY0-NEXT:    [[TMP955:%.*]] = load i16, ptr [[SD]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP955]], ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    br label [[IF_END1545:%.*]]
+// SIMD-ONLY0:       if.else1544:
+// SIMD-ONLY0-NEXT:    [[TMP956:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP956]], ptr [[SV]], align 2
+// SIMD-ONLY0-NEXT:    br label [[IF_END1545]]
+// SIMD-ONLY0:       if.end1545:
+// SIMD-ONLY0-NEXT:    [[TMP957:%.*]] = load i16, ptr [[SE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1546:%.*]] = sext i16 [[TMP957]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP958:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1547:%.*]] = sext i16 [[TMP958]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP1548:%.*]] = icmp eq i32 [[CONV1546]], [[CONV1547]]
+// SIMD-ONLY0-NEXT:    [[CONV1549:%.*]] = zext i1 [[CMP1548]] to i32
+// SIMD-ONLY0-NEXT:    [[CONV1550:%.*]] = trunc i32 [[CONV1549]] to i16
+// SIMD-ONLY0-NEXT:    store i16 [[CONV1550]], ptr [[SR]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP959:%.*]] = load i16, ptr [[SR]], align 2
+// SIMD-ONLY0-NEXT:    [[TOBOOL1551:%.*]] = icmp ne i16 [[TMP959]], 0
+// SIMD-ONLY0-NEXT:    br i1 [[TOBOOL1551]], label [[IF_THEN1552:%.*]], label [[IF_ELSE1553:%.*]]
+// SIMD-ONLY0:       if.then1552:
+// SIMD-ONLY0-NEXT:    [[TMP960:%.*]] = load i16, ptr [[SD]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP960]], ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    br label [[IF_END1554:%.*]]
+// SIMD-ONLY0:       if.else1553:
+// SIMD-ONLY0-NEXT:    [[TMP961:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP961]], ptr [[SV]], align 2
+// SIMD-ONLY0-NEXT:    br label [[IF_END1554]]
+// SIMD-ONLY0:       if.end1554:
+// SIMD-ONLY0-NEXT:    [[TMP962:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP962]], ptr [[SV]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP963:%.*]] = load i16, ptr [[SE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1555:%.*]] = sext i16 [[TMP963]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP964:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1556:%.*]] = sext i16 [[TMP964]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP1557:%.*]] = icmp sgt i32 [[CONV1555]], [[CONV1556]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP1557]], label [[IF_THEN1559:%.*]], label [[IF_END1560:%.*]]
+// SIMD-ONLY0:       if.then1559:
+// SIMD-ONLY0-NEXT:    [[TMP965:%.*]] = load i16, ptr [[SE]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP965]], ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    br label [[IF_END1560]]
+// SIMD-ONLY0:       if.end1560:
+// SIMD-ONLY0-NEXT:    [[TMP966:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP966]], ptr [[SV]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP967:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1561:%.*]] = sext i16 [[TMP967]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP968:%.*]] = load i16, ptr [[SE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1562:%.*]] = sext i16 [[TMP968]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP1563:%.*]] = icmp sgt i32 [[CONV1561]], [[CONV1562]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP1563]], label [[IF_THEN1565:%.*]], label [[IF_END1566:%.*]]
+// SIMD-ONLY0:       if.then1565:
+// SIMD-ONLY0-NEXT:    [[TMP969:%.*]] = load i16, ptr [[SE]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP969]], ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    br label [[IF_END1566]]
+// SIMD-ONLY0:       if.end1566:
+// SIMD-ONLY0-NEXT:    [[TMP970:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP970]], ptr [[SV]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP971:%.*]] = load i16, ptr [[SE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1567:%.*]] = sext i16 [[TMP971]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP972:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1568:%.*]] = sext i16 [[TMP972]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP1569:%.*]] = icmp slt i32 [[CONV1567]], [[CONV1568]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP1569]], label [[IF_THEN1571:%.*]], label [[IF_END1572:%.*]]
+// SIMD-ONLY0:       if.then1571:
+// SIMD-ONLY0-NEXT:    [[TMP973:%.*]] = load i16, ptr [[SE]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP973]], ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    br label [[IF_END1572]]
+// SIMD-ONLY0:       if.end1572:
+// SIMD-ONLY0-NEXT:    [[TMP974:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP974]], ptr [[SV]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP975:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1573:%.*]] = sext i16 [[TMP975]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP976:%.*]] = load i16, ptr [[SE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1574:%.*]] = sext i16 [[TMP976]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP1575:%.*]] = icmp slt i32 [[CONV1573]], [[CONV1574]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP1575]], label [[IF_THEN1577:%.*]], label [[IF_END1578:%.*]]
+// SIMD-ONLY0:       if.then1577:
+// SIMD-ONLY0-NEXT:    [[TMP977:%.*]] = load i16, ptr [[SE]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP977]], ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    br label [[IF_END1578]]
+// SIMD-ONLY0:       if.end1578:
+// SIMD-ONLY0-NEXT:    [[TMP978:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP978]], ptr [[SV]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP979:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1579:%.*]] = sext i16 [[TMP979]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP980:%.*]] = load i16, ptr [[SE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1580:%.*]] = sext i16 [[TMP980]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP1581:%.*]] = icmp eq i32 [[CONV1579]], [[CONV1580]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP1581]], label [[IF_THEN1583:%.*]], label [[IF_END1584:%.*]]
+// SIMD-ONLY0:       if.then1583:
+// SIMD-ONLY0-NEXT:    [[TMP981:%.*]] = load i16, ptr [[SD]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP981]], ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    br label [[IF_END1584]]
+// SIMD-ONLY0:       if.end1584:
+// SIMD-ONLY0-NEXT:    [[TMP982:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP982]], ptr [[SV]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP983:%.*]] = load i16, ptr [[SE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1585:%.*]] = sext i16 [[TMP983]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP984:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1586:%.*]] = sext i16 [[TMP984]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP1587:%.*]] = icmp eq i32 [[CONV1585]], [[CONV1586]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP1587]], label [[IF_THEN1589:%.*]], label [[IF_END1590:%.*]]
+// SIMD-ONLY0:       if.then1589:
+// SIMD-ONLY0-NEXT:    [[TMP985:%.*]] = load i16, ptr [[SD]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP985]], ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    br label [[IF_END1590]]
+// SIMD-ONLY0:       if.end1590:
+// SIMD-ONLY0-NEXT:    [[TMP986:%.*]] = load i16, ptr [[SE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1591:%.*]] = sext i16 [[TMP986]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP987:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1592:%.*]] = sext i16 [[TMP987]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP1593:%.*]] = icmp sgt i32 [[CONV1591]], [[CONV1592]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP1593]], label [[IF_THEN1595:%.*]], label [[IF_END1596:%.*]]
+// SIMD-ONLY0:       if.then1595:
+// SIMD-ONLY0-NEXT:    [[TMP988:%.*]] = load i16, ptr [[SE]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP988]], ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    br label [[IF_END1596]]
+// SIMD-ONLY0:       if.end1596:
+// SIMD-ONLY0-NEXT:    [[TMP989:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP989]], ptr [[SV]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP990:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1597:%.*]] = sext i16 [[TMP990]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP991:%.*]] = load i16, ptr [[SE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1598:%.*]] = sext i16 [[TMP991]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP1599:%.*]] = icmp sgt i32 [[CONV1597]], [[CONV1598]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP1599]], label [[IF_THEN1601:%.*]], label [[IF_END1602:%.*]]
+// SIMD-ONLY0:       if.then1601:
+// SIMD-ONLY0-NEXT:    [[TMP992:%.*]] = load i16, ptr [[SE]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP992]], ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    br label [[IF_END1602]]
+// SIMD-ONLY0:       if.end1602:
+// SIMD-ONLY0-NEXT:    [[TMP993:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP993]], ptr [[SV]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP994:%.*]] = load i16, ptr [[SE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1603:%.*]] = sext i16 [[TMP994]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP995:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1604:%.*]] = sext i16 [[TMP995]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP1605:%.*]] = icmp slt i32 [[CONV1603]], [[CONV1604]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP1605]], label [[IF_THEN1607:%.*]], label [[IF_END1608:%.*]]
+// SIMD-ONLY0:       if.then1607:
+// SIMD-ONLY0-NEXT:    [[TMP996:%.*]] = load i16, ptr [[SE]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP996]], ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    br label [[IF_END1608]]
+// SIMD-ONLY0:       if.end1608:
+// SIMD-ONLY0-NEXT:    [[TMP997:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP997]], ptr [[SV]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP998:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1609:%.*]] = sext i16 [[TMP998]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP999:%.*]] = load i16, ptr [[SE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1610:%.*]] = sext i16 [[TMP999]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP1611:%.*]] = icmp slt i32 [[CONV1609]], [[CONV1610]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP1611]], label [[IF_THEN1613:%.*]], label [[IF_END1614:%.*]]
+// SIMD-ONLY0:       if.then1613:
+// SIMD-ONLY0-NEXT:    [[TMP1000:%.*]] = load i16, ptr [[SE]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP1000]], ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    br label [[IF_END1614]]
+// SIMD-ONLY0:       if.end1614:
+// SIMD-ONLY0-NEXT:    [[TMP1001:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP1001]], ptr [[SV]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP1002:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1615:%.*]] = sext i16 [[TMP1002]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP1003:%.*]] = load i16, ptr [[SE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1616:%.*]] = sext i16 [[TMP1003]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP1617:%.*]] = icmp eq i32 [[CONV1615]], [[CONV1616]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP1617]], label [[IF_THEN1619:%.*]], label [[IF_END1620:%.*]]
+// SIMD-ONLY0:       if.then1619:
+// SIMD-ONLY0-NEXT:    [[TMP1004:%.*]] = load i16, ptr [[SD]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP1004]], ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    br label [[IF_END1620]]
+// SIMD-ONLY0:       if.end1620:
+// SIMD-ONLY0-NEXT:    [[TMP1005:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP1005]], ptr [[SV]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP1006:%.*]] = load i16, ptr [[SE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1621:%.*]] = sext i16 [[TMP1006]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP1007:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1622:%.*]] = sext i16 [[TMP1007]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP1623:%.*]] = icmp eq i32 [[CONV1621]], [[CONV1622]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP1623]], label [[IF_THEN1625:%.*]], label [[IF_END1626:%.*]]
+// SIMD-ONLY0:       if.then1625:
+// SIMD-ONLY0-NEXT:    [[TMP1008:%.*]] = load i16, ptr [[SD]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP1008]], ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    br label [[IF_END1626]]
+// SIMD-ONLY0:       if.end1626:
+// SIMD-ONLY0-NEXT:    [[TMP1009:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP1009]], ptr [[SV]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP1010:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1627:%.*]] = sext i16 [[TMP1010]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP1011:%.*]] = load i16, ptr [[SE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1628:%.*]] = sext i16 [[TMP1011]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP1629:%.*]] = icmp eq i32 [[CONV1627]], [[CONV1628]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP1629]], label [[IF_THEN1631:%.*]], label [[IF_ELSE1632:%.*]]
+// SIMD-ONLY0:       if.then1631:
+// SIMD-ONLY0-NEXT:    [[TMP1012:%.*]] = load i16, ptr [[SD]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP1012]], ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    br label [[IF_END1633:%.*]]
+// SIMD-ONLY0:       if.else1632:
+// SIMD-ONLY0-NEXT:    [[TMP1013:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP1013]], ptr [[SV]], align 2
+// SIMD-ONLY0-NEXT:    br label [[IF_END1633]]
+// SIMD-ONLY0:       if.end1633:
+// SIMD-ONLY0-NEXT:    [[TMP1014:%.*]] = load i16, ptr [[SE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1634:%.*]] = sext i16 [[TMP1014]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP1015:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1635:%.*]] = sext i16 [[TMP1015]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP1636:%.*]] = icmp eq i32 [[CONV1634]], [[CONV1635]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP1636]], label [[IF_THEN1638:%.*]], label [[IF_ELSE1639:%.*]]
+// SIMD-ONLY0:       if.then1638:
+// SIMD-ONLY0-NEXT:    [[TMP1016:%.*]] = load i16, ptr [[SD]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP1016]], ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    br label [[IF_END1640:%.*]]
+// SIMD-ONLY0:       if.else1639:
+// SIMD-ONLY0-NEXT:    [[TMP1017:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP1017]], ptr [[SV]], align 2
+// SIMD-ONLY0-NEXT:    br label [[IF_END1640]]
+// SIMD-ONLY0:       if.end1640:
+// SIMD-ONLY0-NEXT:    [[TMP1018:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1641:%.*]] = sext i16 [[TMP1018]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP1019:%.*]] = load i16, ptr [[SE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1642:%.*]] = sext i16 [[TMP1019]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP1643:%.*]] = icmp eq i32 [[CONV1641]], [[CONV1642]]
+// SIMD-ONLY0-NEXT:    [[CONV1644:%.*]] = zext i1 [[CMP1643]] to i32
+// SIMD-ONLY0-NEXT:    [[CONV1645:%.*]] = trunc i32 [[CONV1644]] to i16
+// SIMD-ONLY0-NEXT:    store i16 [[CONV1645]], ptr [[SR]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP1020:%.*]] = load i16, ptr [[SR]], align 2
+// SIMD-ONLY0-NEXT:    [[TOBOOL1646:%.*]] = icmp ne i16 [[TMP1020]], 0
+// SIMD-ONLY0-NEXT:    br i1 [[TOBOOL1646]], label [[IF_THEN1647:%.*]], label [[IF_END1648:%.*]]
+// SIMD-ONLY0:       if.then1647:
+// SIMD-ONLY0-NEXT:    [[TMP1021:%.*]] = load i16, ptr [[SD]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP1021]], ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    br label [[IF_END1648]]
+// SIMD-ONLY0:       if.end1648:
+// SIMD-ONLY0-NEXT:    [[TMP1022:%.*]] = load i16, ptr [[SE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1649:%.*]] = sext i16 [[TMP1022]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP1023:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1650:%.*]] = sext i16 [[TMP1023]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP1651:%.*]] = icmp eq i32 [[CONV1649]], [[CONV1650]]
+// SIMD-ONLY0-NEXT:    [[CONV1652:%.*]] = zext i1 [[CMP1651]] to i32
+// SIMD-ONLY0-NEXT:    [[CONV1653:%.*]] = trunc i32 [[CONV1652]] to i16
+// SIMD-ONLY0-NEXT:    store i16 [[CONV1653]], ptr [[SR]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP1024:%.*]] = load i16, ptr [[SR]], align 2
+// SIMD-ONLY0-NEXT:    [[TOBOOL1654:%.*]] = icmp ne i16 [[TMP1024]], 0
+// SIMD-ONLY0-NEXT:    br i1 [[TOBOOL1654]], label [[IF_THEN1655:%.*]], label [[IF_END1656:%.*]]
+// SIMD-ONLY0:       if.then1655:
+// SIMD-ONLY0-NEXT:    [[TMP1025:%.*]] = load i16, ptr [[SD]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP1025]], ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    br label [[IF_END1656]]
+// SIMD-ONLY0:       if.end1656:
+// SIMD-ONLY0-NEXT:    [[TMP1026:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1657:%.*]] = sext i16 [[TMP1026]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP1027:%.*]] = load i16, ptr [[SE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1658:%.*]] = sext i16 [[TMP1027]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP1659:%.*]] = icmp eq i32 [[CONV1657]], [[CONV1658]]
+// SIMD-ONLY0-NEXT:    [[CONV1660:%.*]] = zext i1 [[CMP1659]] to i32
+// SIMD-ONLY0-NEXT:    [[CONV1661:%.*]] = trunc i32 [[CONV1660]] to i16
+// SIMD-ONLY0-NEXT:    store i16 [[CONV1661]], ptr [[SR]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP1028:%.*]] = load i16, ptr [[SR]], align 2
+// SIMD-ONLY0-NEXT:    [[TOBOOL1662:%.*]] = icmp ne i16 [[TMP1028]], 0
+// SIMD-ONLY0-NEXT:    br i1 [[TOBOOL1662]], label [[IF_THEN1663:%.*]], label [[IF_ELSE1664:%.*]]
+// SIMD-ONLY0:       if.then1663:
+// SIMD-ONLY0-NEXT:    [[TMP1029:%.*]] = load i16, ptr [[SD]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP1029]], ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    br label [[IF_END1665:%.*]]
+// SIMD-ONLY0:       if.else1664:
+// SIMD-ONLY0-NEXT:    [[TMP1030:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP1030]], ptr [[SV]], align 2
+// SIMD-ONLY0-NEXT:    br label [[IF_END1665]]
+// SIMD-ONLY0:       if.end1665:
+// SIMD-ONLY0-NEXT:    [[TMP1031:%.*]] = load i16, ptr [[SE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1666:%.*]] = sext i16 [[TMP1031]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP1032:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1667:%.*]] = sext i16 [[TMP1032]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP1668:%.*]] = icmp eq i32 [[CONV1666]], [[CONV1667]]
+// SIMD-ONLY0-NEXT:    [[CONV1669:%.*]] = zext i1 [[CMP1668]] to i32
+// SIMD-ONLY0-NEXT:    [[CONV1670:%.*]] = trunc i32 [[CONV1669]] to i16
+// SIMD-ONLY0-NEXT:    store i16 [[CONV1670]], ptr [[SR]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP1033:%.*]] = load i16, ptr [[SR]], align 2
+// SIMD-ONLY0-NEXT:    [[TOBOOL1671:%.*]] = icmp ne i16 [[TMP1033]], 0
+// SIMD-ONLY0-NEXT:    br i1 [[TOBOOL1671]], label [[IF_THEN1672:%.*]], label [[IF_ELSE1673:%.*]]
+// SIMD-ONLY0:       if.then1672:
+// SIMD-ONLY0-NEXT:    [[TMP1034:%.*]] = load i16, ptr [[SD]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP1034]], ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    br label [[IF_END1674:%.*]]
+// SIMD-ONLY0:       if.else1673:
+// SIMD-ONLY0-NEXT:    [[TMP1035:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP1035]], ptr [[SV]], align 2
+// SIMD-ONLY0-NEXT:    br label [[IF_END1674]]
+// SIMD-ONLY0:       if.end1674:
+// SIMD-ONLY0-NEXT:    [[TMP1036:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP1036]], ptr [[SV]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP1037:%.*]] = load i16, ptr [[SE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1675:%.*]] = sext i16 [[TMP1037]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP1038:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1676:%.*]] = sext i16 [[TMP1038]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP1677:%.*]] = icmp sgt i32 [[CONV1675]], [[CONV1676]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP1677]], label [[IF_THEN1679:%.*]], label [[IF_END1680:%.*]]
+// SIMD-ONLY0:       if.then1679:
+// SIMD-ONLY0-NEXT:    [[TMP1039:%.*]] = load i16, ptr [[SE]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP1039]], ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    br label [[IF_END1680]]
+// SIMD-ONLY0:       if.end1680:
+// SIMD-ONLY0-NEXT:    [[TMP1040:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP1040]], ptr [[SV]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP1041:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1681:%.*]] = sext i16 [[TMP1041]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP1042:%.*]] = load i16, ptr [[SE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1682:%.*]] = sext i16 [[TMP1042]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP1683:%.*]] = icmp sgt i32 [[CONV1681]], [[CONV1682]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP1683]], label [[IF_THEN1685:%.*]], label [[IF_END1686:%.*]]
+// SIMD-ONLY0:       if.then1685:
+// SIMD-ONLY0-NEXT:    [[TMP1043:%.*]] = load i16, ptr [[SE]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP1043]], ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    br label [[IF_END1686]]
+// SIMD-ONLY0:       if.end1686:
+// SIMD-ONLY0-NEXT:    [[TMP1044:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP1044]], ptr [[SV]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP1045:%.*]] = load i16, ptr [[SE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1687:%.*]] = sext i16 [[TMP1045]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP1046:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1688:%.*]] = sext i16 [[TMP1046]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP1689:%.*]] = icmp slt i32 [[CONV1687]], [[CONV1688]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP1689]], label [[IF_THEN1691:%.*]], label [[IF_END1692:%.*]]
+// SIMD-ONLY0:       if.then1691:
+// SIMD-ONLY0-NEXT:    [[TMP1047:%.*]] = load i16, ptr [[SE]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP1047]], ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    br label [[IF_END1692]]
+// SIMD-ONLY0:       if.end1692:
+// SIMD-ONLY0-NEXT:    [[TMP1048:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP1048]], ptr [[SV]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP1049:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1693:%.*]] = sext i16 [[TMP1049]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP1050:%.*]] = load i16, ptr [[SE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1694:%.*]] = sext i16 [[TMP1050]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP1695:%.*]] = icmp slt i32 [[CONV1693]], [[CONV1694]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP1695]], label [[IF_THEN1697:%.*]], label [[IF_END1698:%.*]]
+// SIMD-ONLY0:       if.then1697:
+// SIMD-ONLY0-NEXT:    [[TMP1051:%.*]] = load i16, ptr [[SE]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP1051]], ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    br label [[IF_END1698]]
+// SIMD-ONLY0:       if.end1698:
+// SIMD-ONLY0-NEXT:    [[TMP1052:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP1052]], ptr [[SV]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP1053:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1699:%.*]] = sext i16 [[TMP1053]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP1054:%.*]] = load i16, ptr [[SE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1700:%.*]] = sext i16 [[TMP1054]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP1701:%.*]] = icmp eq i32 [[CONV1699]], [[CONV1700]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP1701]], label [[IF_THEN1703:%.*]], label [[IF_END1704:%.*]]
+// SIMD-ONLY0:       if.then1703:
+// SIMD-ONLY0-NEXT:    [[TMP1055:%.*]] = load i16, ptr [[SD]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP1055]], ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    br label [[IF_END1704]]
+// SIMD-ONLY0:       if.end1704:
+// SIMD-ONLY0-NEXT:    [[TMP1056:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP1056]], ptr [[SV]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP1057:%.*]] = load i16, ptr [[SE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1705:%.*]] = sext i16 [[TMP1057]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP1058:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1706:%.*]] = sext i16 [[TMP1058]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP1707:%.*]] = icmp eq i32 [[CONV1705]], [[CONV1706]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP1707]], label [[IF_THEN1709:%.*]], label [[IF_END1710:%.*]]
+// SIMD-ONLY0:       if.then1709:
+// SIMD-ONLY0-NEXT:    [[TMP1059:%.*]] = load i16, ptr [[SD]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP1059]], ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    br label [[IF_END1710]]
+// SIMD-ONLY0:       if.end1710:
+// SIMD-ONLY0-NEXT:    [[TMP1060:%.*]] = load i16, ptr [[SE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1711:%.*]] = sext i16 [[TMP1060]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP1061:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1712:%.*]] = sext i16 [[TMP1061]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP1713:%.*]] = icmp sgt i32 [[CONV1711]], [[CONV1712]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP1713]], label [[IF_THEN1715:%.*]], label [[IF_END1716:%.*]]
+// SIMD-ONLY0:       if.then1715:
+// SIMD-ONLY0-NEXT:    [[TMP1062:%.*]] = load i16, ptr [[SE]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP1062]], ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    br label [[IF_END1716]]
+// SIMD-ONLY0:       if.end1716:
+// SIMD-ONLY0-NEXT:    [[TMP1063:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP1063]], ptr [[SV]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP1064:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1717:%.*]] = sext i16 [[TMP1064]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP1065:%.*]] = load i16, ptr [[SE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1718:%.*]] = sext i16 [[TMP1065]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP1719:%.*]] = icmp sgt i32 [[CONV1717]], [[CONV1718]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP1719]], label [[IF_THEN1721:%.*]], label [[IF_END1722:%.*]]
+// SIMD-ONLY0:       if.then1721:
+// SIMD-ONLY0-NEXT:    [[TMP1066:%.*]] = load i16, ptr [[SE]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP1066]], ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    br label [[IF_END1722]]
+// SIMD-ONLY0:       if.end1722:
+// SIMD-ONLY0-NEXT:    [[TMP1067:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP1067]], ptr [[SV]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP1068:%.*]] = load i16, ptr [[SE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1723:%.*]] = sext i16 [[TMP1068]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP1069:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1724:%.*]] = sext i16 [[TMP1069]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP1725:%.*]] = icmp slt i32 [[CONV1723]], [[CONV1724]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP1725]], label [[IF_THEN1727:%.*]], label [[IF_END1728:%.*]]
+// SIMD-ONLY0:       if.then1727:
+// SIMD-ONLY0-NEXT:    [[TMP1070:%.*]] = load i16, ptr [[SE]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP1070]], ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    br label [[IF_END1728]]
+// SIMD-ONLY0:       if.end1728:
+// SIMD-ONLY0-NEXT:    [[TMP1071:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP1071]], ptr [[SV]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP1072:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1729:%.*]] = sext i16 [[TMP1072]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP1073:%.*]] = load i16, ptr [[SE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1730:%.*]] = sext i16 [[TMP1073]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP1731:%.*]] = icmp slt i32 [[CONV1729]], [[CONV1730]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP1731]], label [[IF_THEN1733:%.*]], label [[IF_END1734:%.*]]
+// SIMD-ONLY0:       if.then1733:
+// SIMD-ONLY0-NEXT:    [[TMP1074:%.*]] = load i16, ptr [[SE]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP1074]], ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    br label [[IF_END1734]]
+// SIMD-ONLY0:       if.end1734:
+// SIMD-ONLY0-NEXT:    [[TMP1075:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP1075]], ptr [[SV]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP1076:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1735:%.*]] = sext i16 [[TMP1076]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP1077:%.*]] = load i16, ptr [[SE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1736:%.*]] = sext i16 [[TMP1077]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP1737:%.*]] = icmp eq i32 [[CONV1735]], [[CONV1736]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP1737]], label [[IF_THEN1739:%.*]], label [[IF_END1740:%.*]]
+// SIMD-ONLY0:       if.then1739:
+// SIMD-ONLY0-NEXT:    [[TMP1078:%.*]] = load i16, ptr [[SD]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP1078]], ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    br label [[IF_END1740]]
+// SIMD-ONLY0:       if.end1740:
+// SIMD-ONLY0-NEXT:    [[TMP1079:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP1079]], ptr [[SV]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP1080:%.*]] = load i16, ptr [[SE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1741:%.*]] = sext i16 [[TMP1080]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP1081:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1742:%.*]] = sext i16 [[TMP1081]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP1743:%.*]] = icmp eq i32 [[CONV1741]], [[CONV1742]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP1743]], label [[IF_THEN1745:%.*]], label [[IF_END1746:%.*]]
+// SIMD-ONLY0:       if.then1745:
+// SIMD-ONLY0-NEXT:    [[TMP1082:%.*]] = load i16, ptr [[SD]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP1082]], ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    br label [[IF_END1746]]
+// SIMD-ONLY0:       if.end1746:
+// SIMD-ONLY0-NEXT:    [[TMP1083:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP1083]], ptr [[SV]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP1084:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1747:%.*]] = sext i16 [[TMP1084]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP1085:%.*]] = load i16, ptr [[SE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1748:%.*]] = sext i16 [[TMP1085]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP1749:%.*]] = icmp eq i32 [[CONV1747]], [[CONV1748]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP1749]], label [[IF_THEN1751:%.*]], label [[IF_ELSE1752:%.*]]
+// SIMD-ONLY0:       if.then1751:
+// SIMD-ONLY0-NEXT:    [[TMP1086:%.*]] = load i16, ptr [[SD]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP1086]], ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    br label [[IF_END1753:%.*]]
+// SIMD-ONLY0:       if.else1752:
+// SIMD-ONLY0-NEXT:    [[TMP1087:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP1087]], ptr [[SV]], align 2
+// SIMD-ONLY0-NEXT:    br label [[IF_END1753]]
+// SIMD-ONLY0:       if.end1753:
+// SIMD-ONLY0-NEXT:    [[TMP1088:%.*]] = load i16, ptr [[SE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1754:%.*]] = sext i16 [[TMP1088]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP1089:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1755:%.*]] = sext i16 [[TMP1089]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP1756:%.*]] = icmp eq i32 [[CONV1754]], [[CONV1755]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP1756]], label [[IF_THEN1758:%.*]], label [[IF_ELSE1759:%.*]]
+// SIMD-ONLY0:       if.then1758:
+// SIMD-ONLY0-NEXT:    [[TMP1090:%.*]] = load i16, ptr [[SD]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP1090]], ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    br label [[IF_END1760:%.*]]
+// SIMD-ONLY0:       if.else1759:
+// SIMD-ONLY0-NEXT:    [[TMP1091:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP1091]], ptr [[SV]], align 2
+// SIMD-ONLY0-NEXT:    br label [[IF_END1760]]
+// SIMD-ONLY0:       if.end1760:
+// SIMD-ONLY0-NEXT:    [[TMP1092:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1761:%.*]] = sext i16 [[TMP1092]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP1093:%.*]] = load i16, ptr [[SE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1762:%.*]] = sext i16 [[TMP1093]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP1763:%.*]] = icmp eq i32 [[CONV1761]], [[CONV1762]]
+// SIMD-ONLY0-NEXT:    [[CONV1764:%.*]] = zext i1 [[CMP1763]] to i32
+// SIMD-ONLY0-NEXT:    [[CONV1765:%.*]] = trunc i32 [[CONV1764]] to i16
+// SIMD-ONLY0-NEXT:    store i16 [[CONV1765]], ptr [[SR]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP1094:%.*]] = load i16, ptr [[SR]], align 2
+// SIMD-ONLY0-NEXT:    [[TOBOOL1766:%.*]] = icmp ne i16 [[TMP1094]], 0
+// SIMD-ONLY0-NEXT:    br i1 [[TOBOOL1766]], label [[IF_THEN1767:%.*]], label [[IF_END1768:%.*]]
+// SIMD-ONLY0:       if.then1767:
+// SIMD-ONLY0-NEXT:    [[TMP1095:%.*]] = load i16, ptr [[SD]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP1095]], ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    br label [[IF_END1768]]
+// SIMD-ONLY0:       if.end1768:
+// SIMD-ONLY0-NEXT:    [[TMP1096:%.*]] = load i16, ptr [[SE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1769:%.*]] = sext i16 [[TMP1096]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP1097:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1770:%.*]] = sext i16 [[TMP1097]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP1771:%.*]] = icmp eq i32 [[CONV1769]], [[CONV1770]]
+// SIMD-ONLY0-NEXT:    [[CONV1772:%.*]] = zext i1 [[CMP1771]] to i32
+// SIMD-ONLY0-NEXT:    [[CONV1773:%.*]] = trunc i32 [[CONV1772]] to i16
+// SIMD-ONLY0-NEXT:    store i16 [[CONV1773]], ptr [[SR]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP1098:%.*]] = load i16, ptr [[SR]], align 2
+// SIMD-ONLY0-NEXT:    [[TOBOOL1774:%.*]] = icmp ne i16 [[TMP1098]], 0
+// SIMD-ONLY0-NEXT:    br i1 [[TOBOOL1774]], label [[IF_THEN1775:%.*]], label [[IF_END1776:%.*]]
+// SIMD-ONLY0:       if.then1775:
+// SIMD-ONLY0-NEXT:    [[TMP1099:%.*]] = load i16, ptr [[SD]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP1099]], ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    br label [[IF_END1776]]
+// SIMD-ONLY0:       if.end1776:
+// SIMD-ONLY0-NEXT:    [[TMP1100:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1777:%.*]] = sext i16 [[TMP1100]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP1101:%.*]] = load i16, ptr [[SE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1778:%.*]] = sext i16 [[TMP1101]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP1779:%.*]] = icmp eq i32 [[CONV1777]], [[CONV1778]]
+// SIMD-ONLY0-NEXT:    [[CONV1780:%.*]] = zext i1 [[CMP1779]] to i32
+// SIMD-ONLY0-NEXT:    [[CONV1781:%.*]] = trunc i32 [[CONV1780]] to i16
+// SIMD-ONLY0-NEXT:    store i16 [[CONV1781]], ptr [[SR]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP1102:%.*]] = load i16, ptr [[SR]], align 2
+// SIMD-ONLY0-NEXT:    [[TOBOOL1782:%.*]] = icmp ne i16 [[TMP1102]], 0
+// SIMD-ONLY0-NEXT:    br i1 [[TOBOOL1782]], label [[IF_THEN1783:%.*]], label [[IF_ELSE1784:%.*]]
+// SIMD-ONLY0:       if.then1783:
+// SIMD-ONLY0-NEXT:    [[TMP1103:%.*]] = load i16, ptr [[SD]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP1103]], ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    br label [[IF_END1785:%.*]]
+// SIMD-ONLY0:       if.else1784:
+// SIMD-ONLY0-NEXT:    [[TMP1104:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP1104]], ptr [[SV]], align 2
+// SIMD-ONLY0-NEXT:    br label [[IF_END1785]]
+// SIMD-ONLY0:       if.end1785:
+// SIMD-ONLY0-NEXT:    [[TMP1105:%.*]] = load i16, ptr [[SE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1786:%.*]] = sext i16 [[TMP1105]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP1106:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1787:%.*]] = sext i16 [[TMP1106]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP1788:%.*]] = icmp eq i32 [[CONV1786]], [[CONV1787]]
+// SIMD-ONLY0-NEXT:    [[CONV1789:%.*]] = zext i1 [[CMP1788]] to i32
+// SIMD-ONLY0-NEXT:    [[CONV1790:%.*]] = trunc i32 [[CONV1789]] to i16
+// SIMD-ONLY0-NEXT:    store i16 [[CONV1790]], ptr [[SR]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP1107:%.*]] = load i16, ptr [[SR]], align 2
+// SIMD-ONLY0-NEXT:    [[TOBOOL1791:%.*]] = icmp ne i16 [[TMP1107]], 0
+// SIMD-ONLY0-NEXT:    br i1 [[TOBOOL1791]], label [[IF_THEN1792:%.*]], label [[IF_ELSE1793:%.*]]
+// SIMD-ONLY0:       if.then1792:
+// SIMD-ONLY0-NEXT:    [[TMP1108:%.*]] = load i16, ptr [[SD]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP1108]], ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    br label [[IF_END1794:%.*]]
+// SIMD-ONLY0:       if.else1793:
+// SIMD-ONLY0-NEXT:    [[TMP1109:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP1109]], ptr [[SV]], align 2
+// SIMD-ONLY0-NEXT:    br label [[IF_END1794]]
+// SIMD-ONLY0:       if.end1794:
+// SIMD-ONLY0-NEXT:    [[TMP1110:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP1110]], ptr [[SV]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP1111:%.*]] = load i16, ptr [[SE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1795:%.*]] = sext i16 [[TMP1111]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP1112:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1796:%.*]] = sext i16 [[TMP1112]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP1797:%.*]] = icmp sgt i32 [[CONV1795]], [[CONV1796]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP1797]], label [[IF_THEN1799:%.*]], label [[IF_END1800:%.*]]
+// SIMD-ONLY0:       if.then1799:
+// SIMD-ONLY0-NEXT:    [[TMP1113:%.*]] = load i16, ptr [[SE]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP1113]], ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    br label [[IF_END1800]]
+// SIMD-ONLY0:       if.end1800:
+// SIMD-ONLY0-NEXT:    [[TMP1114:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP1114]], ptr [[SV]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP1115:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1801:%.*]] = sext i16 [[TMP1115]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP1116:%.*]] = load i16, ptr [[SE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1802:%.*]] = sext i16 [[TMP1116]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP1803:%.*]] = icmp sgt i32 [[CONV1801]], [[CONV1802]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP1803]], label [[IF_THEN1805:%.*]], label [[IF_END1806:%.*]]
+// SIMD-ONLY0:       if.then1805:
+// SIMD-ONLY0-NEXT:    [[TMP1117:%.*]] = load i16, ptr [[SE]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP1117]], ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    br label [[IF_END1806]]
+// SIMD-ONLY0:       if.end1806:
+// SIMD-ONLY0-NEXT:    [[TMP1118:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP1118]], ptr [[SV]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP1119:%.*]] = load i16, ptr [[SE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1807:%.*]] = sext i16 [[TMP1119]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP1120:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1808:%.*]] = sext i16 [[TMP1120]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP1809:%.*]] = icmp slt i32 [[CONV1807]], [[CONV1808]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP1809]], label [[IF_THEN1811:%.*]], label [[IF_END1812:%.*]]
+// SIMD-ONLY0:       if.then1811:
+// SIMD-ONLY0-NEXT:    [[TMP1121:%.*]] = load i16, ptr [[SE]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP1121]], ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    br label [[IF_END1812]]
+// SIMD-ONLY0:       if.end1812:
+// SIMD-ONLY0-NEXT:    [[TMP1122:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP1122]], ptr [[SV]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP1123:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1813:%.*]] = sext i16 [[TMP1123]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP1124:%.*]] = load i16, ptr [[SE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1814:%.*]] = sext i16 [[TMP1124]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP1815:%.*]] = icmp slt i32 [[CONV1813]], [[CONV1814]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP1815]], label [[IF_THEN1817:%.*]], label [[IF_END1818:%.*]]
+// SIMD-ONLY0:       if.then1817:
+// SIMD-ONLY0-NEXT:    [[TMP1125:%.*]] = load i16, ptr [[SE]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP1125]], ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    br label [[IF_END1818]]
+// SIMD-ONLY0:       if.end1818:
+// SIMD-ONLY0-NEXT:    [[TMP1126:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP1126]], ptr [[SV]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP1127:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1819:%.*]] = sext i16 [[TMP1127]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP1128:%.*]] = load i16, ptr [[SE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1820:%.*]] = sext i16 [[TMP1128]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP1821:%.*]] = icmp eq i32 [[CONV1819]], [[CONV1820]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP1821]], label [[IF_THEN1823:%.*]], label [[IF_END1824:%.*]]
+// SIMD-ONLY0:       if.then1823:
+// SIMD-ONLY0-NEXT:    [[TMP1129:%.*]] = load i16, ptr [[SD]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP1129]], ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    br label [[IF_END1824]]
+// SIMD-ONLY0:       if.end1824:
+// SIMD-ONLY0-NEXT:    [[TMP1130:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP1130]], ptr [[SV]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP1131:%.*]] = load i16, ptr [[SE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1825:%.*]] = sext i16 [[TMP1131]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP1132:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1826:%.*]] = sext i16 [[TMP1132]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP1827:%.*]] = icmp eq i32 [[CONV1825]], [[CONV1826]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP1827]], label [[IF_THEN1829:%.*]], label [[IF_END1830:%.*]]
+// SIMD-ONLY0:       if.then1829:
+// SIMD-ONLY0-NEXT:    [[TMP1133:%.*]] = load i16, ptr [[SD]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP1133]], ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    br label [[IF_END1830]]
+// SIMD-ONLY0:       if.end1830:
+// SIMD-ONLY0-NEXT:    [[TMP1134:%.*]] = load i16, ptr [[SE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1831:%.*]] = sext i16 [[TMP1134]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP1135:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1832:%.*]] = sext i16 [[TMP1135]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP1833:%.*]] = icmp sgt i32 [[CONV1831]], [[CONV1832]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP1833]], label [[IF_THEN1835:%.*]], label [[IF_END1836:%.*]]
+// SIMD-ONLY0:       if.then1835:
+// SIMD-ONLY0-NEXT:    [[TMP1136:%.*]] = load i16, ptr [[SE]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP1136]], ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    br label [[IF_END1836]]
+// SIMD-ONLY0:       if.end1836:
+// SIMD-ONLY0-NEXT:    [[TMP1137:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP1137]], ptr [[SV]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP1138:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1837:%.*]] = sext i16 [[TMP1138]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP1139:%.*]] = load i16, ptr [[SE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1838:%.*]] = sext i16 [[TMP1139]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP1839:%.*]] = icmp sgt i32 [[CONV1837]], [[CONV1838]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP1839]], label [[IF_THEN1841:%.*]], label [[IF_END1842:%.*]]
+// SIMD-ONLY0:       if.then1841:
+// SIMD-ONLY0-NEXT:    [[TMP1140:%.*]] = load i16, ptr [[SE]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP1140]], ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    br label [[IF_END1842]]
+// SIMD-ONLY0:       if.end1842:
+// SIMD-ONLY0-NEXT:    [[TMP1141:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP1141]], ptr [[SV]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP1142:%.*]] = load i16, ptr [[SE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1843:%.*]] = sext i16 [[TMP1142]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP1143:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1844:%.*]] = sext i16 [[TMP1143]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP1845:%.*]] = icmp slt i32 [[CONV1843]], [[CONV1844]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP1845]], label [[IF_THEN1847:%.*]], label [[IF_END1848:%.*]]
+// SIMD-ONLY0:       if.then1847:
+// SIMD-ONLY0-NEXT:    [[TMP1144:%.*]] = load i16, ptr [[SE]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP1144]], ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    br label [[IF_END1848]]
+// SIMD-ONLY0:       if.end1848:
+// SIMD-ONLY0-NEXT:    [[TMP1145:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP1145]], ptr [[SV]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP1146:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1849:%.*]] = sext i16 [[TMP1146]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP1147:%.*]] = load i16, ptr [[SE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1850:%.*]] = sext i16 [[TMP1147]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP1851:%.*]] = icmp slt i32 [[CONV1849]], [[CONV1850]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP1851]], label [[IF_THEN1853:%.*]], label [[IF_END1854:%.*]]
+// SIMD-ONLY0:       if.then1853:
+// SIMD-ONLY0-NEXT:    [[TMP1148:%.*]] = load i16, ptr [[SE]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP1148]], ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    br label [[IF_END1854]]
+// SIMD-ONLY0:       if.end1854:
+// SIMD-ONLY0-NEXT:    [[TMP1149:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP1149]], ptr [[SV]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP1150:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1855:%.*]] = sext i16 [[TMP1150]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP1151:%.*]] = load i16, ptr [[SE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1856:%.*]] = sext i16 [[TMP1151]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP1857:%.*]] = icmp eq i32 [[CONV1855]], [[CONV1856]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP1857]], label [[IF_THEN1859:%.*]], label [[IF_END1860:%.*]]
+// SIMD-ONLY0:       if.then1859:
+// SIMD-ONLY0-NEXT:    [[TMP1152:%.*]] = load i16, ptr [[SD]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP1152]], ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    br label [[IF_END1860]]
+// SIMD-ONLY0:       if.end1860:
+// SIMD-ONLY0-NEXT:    [[TMP1153:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP1153]], ptr [[SV]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP1154:%.*]] = load i16, ptr [[SE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1861:%.*]] = sext i16 [[TMP1154]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP1155:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1862:%.*]] = sext i16 [[TMP1155]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP1863:%.*]] = icmp eq i32 [[CONV1861]], [[CONV1862]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP1863]], label [[IF_THEN1865:%.*]], label [[IF_END1866:%.*]]
+// SIMD-ONLY0:       if.then1865:
+// SIMD-ONLY0-NEXT:    [[TMP1156:%.*]] = load i16, ptr [[SD]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP1156]], ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    br label [[IF_END1866]]
+// SIMD-ONLY0:       if.end1866:
+// SIMD-ONLY0-NEXT:    [[TMP1157:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP1157]], ptr [[SV]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP1158:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1867:%.*]] = sext i16 [[TMP1158]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP1159:%.*]] = load i16, ptr [[SE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1868:%.*]] = sext i16 [[TMP1159]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP1869:%.*]] = icmp eq i32 [[CONV1867]], [[CONV1868]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP1869]], label [[IF_THEN1871:%.*]], label [[IF_ELSE1872:%.*]]
+// SIMD-ONLY0:       if.then1871:
+// SIMD-ONLY0-NEXT:    [[TMP1160:%.*]] = load i16, ptr [[SD]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP1160]], ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    br label [[IF_END1873:%.*]]
+// SIMD-ONLY0:       if.else1872:
+// SIMD-ONLY0-NEXT:    [[TMP1161:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP1161]], ptr [[SV]], align 2
+// SIMD-ONLY0-NEXT:    br label [[IF_END1873]]
+// SIMD-ONLY0:       if.end1873:
+// SIMD-ONLY0-NEXT:    [[TMP1162:%.*]] = load i16, ptr [[SE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1874:%.*]] = sext i16 [[TMP1162]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP1163:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1875:%.*]] = sext i16 [[TMP1163]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP1876:%.*]] = icmp eq i32 [[CONV1874]], [[CONV1875]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP1876]], label [[IF_THEN1878:%.*]], label [[IF_ELSE1879:%.*]]
+// SIMD-ONLY0:       if.then1878:
+// SIMD-ONLY0-NEXT:    [[TMP1164:%.*]] = load i16, ptr [[SD]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP1164]], ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    br label [[IF_END1880:%.*]]
+// SIMD-ONLY0:       if.else1879:
+// SIMD-ONLY0-NEXT:    [[TMP1165:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP1165]], ptr [[SV]], align 2
+// SIMD-ONLY0-NEXT:    br label [[IF_END1880]]
+// SIMD-ONLY0:       if.end1880:
+// SIMD-ONLY0-NEXT:    [[TMP1166:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1881:%.*]] = sext i16 [[TMP1166]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP1167:%.*]] = load i16, ptr [[SE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1882:%.*]] = sext i16 [[TMP1167]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP1883:%.*]] = icmp eq i32 [[CONV1881]], [[CONV1882]]
+// SIMD-ONLY0-NEXT:    [[CONV1884:%.*]] = zext i1 [[CMP1883]] to i32
+// SIMD-ONLY0-NEXT:    [[CONV1885:%.*]] = trunc i32 [[CONV1884]] to i16
+// SIMD-ONLY0-NEXT:    store i16 [[CONV1885]], ptr [[SR]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP1168:%.*]] = load i16, ptr [[SR]], align 2
+// SIMD-ONLY0-NEXT:    [[TOBOOL1886:%.*]] = icmp ne i16 [[TMP1168]], 0
+// SIMD-ONLY0-NEXT:    br i1 [[TOBOOL1886]], label [[IF_THEN1887:%.*]], label [[IF_END1888:%.*]]
+// SIMD-ONLY0:       if.then1887:
+// SIMD-ONLY0-NEXT:    [[TMP1169:%.*]] = load i16, ptr [[SD]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP1169]], ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    br label [[IF_END1888]]
+// SIMD-ONLY0:       if.end1888:
+// SIMD-ONLY0-NEXT:    [[TMP1170:%.*]] = load i16, ptr [[SE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1889:%.*]] = sext i16 [[TMP1170]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP1171:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1890:%.*]] = sext i16 [[TMP1171]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP1891:%.*]] = icmp eq i32 [[CONV1889]], [[CONV1890]]
+// SIMD-ONLY0-NEXT:    [[CONV1892:%.*]] = zext i1 [[CMP1891]] to i32
+// SIMD-ONLY0-NEXT:    [[CONV1893:%.*]] = trunc i32 [[CONV1892]] to i16
+// SIMD-ONLY0-NEXT:    store i16 [[CONV1893]], ptr [[SR]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP1172:%.*]] = load i16, ptr [[SR]], align 2
+// SIMD-ONLY0-NEXT:    [[TOBOOL1894:%.*]] = icmp ne i16 [[TMP1172]], 0
+// SIMD-ONLY0-NEXT:    br i1 [[TOBOOL1894]], label [[IF_THEN1895:%.*]], label [[IF_END1896:%.*]]
+// SIMD-ONLY0:       if.then1895:
+// SIMD-ONLY0-NEXT:    [[TMP1173:%.*]] = load i16, ptr [[SD]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP1173]], ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    br label [[IF_END1896]]
+// SIMD-ONLY0:       if.end1896:
+// SIMD-ONLY0-NEXT:    [[TMP1174:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1897:%.*]] = sext i16 [[TMP1174]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP1175:%.*]] = load i16, ptr [[SE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1898:%.*]] = sext i16 [[TMP1175]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP1899:%.*]] = icmp eq i32 [[CONV1897]], [[CONV1898]]
+// SIMD-ONLY0-NEXT:    [[CONV1900:%.*]] = zext i1 [[CMP1899]] to i32
+// SIMD-ONLY0-NEXT:    [[CONV1901:%.*]] = trunc i32 [[CONV1900]] to i16
+// SIMD-ONLY0-NEXT:    store i16 [[CONV1901]], ptr [[SR]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP1176:%.*]] = load i16, ptr [[SR]], align 2
+// SIMD-ONLY0-NEXT:    [[TOBOOL1902:%.*]] = icmp ne i16 [[TMP1176]], 0
+// SIMD-ONLY0-NEXT:    br i1 [[TOBOOL1902]], label [[IF_THEN1903:%.*]], label [[IF_ELSE1904:%.*]]
+// SIMD-ONLY0:       if.then1903:
+// SIMD-ONLY0-NEXT:    [[TMP1177:%.*]] = load i16, ptr [[SD]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP1177]], ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    br label [[IF_END1905:%.*]]
+// SIMD-ONLY0:       if.else1904:
+// SIMD-ONLY0-NEXT:    [[TMP1178:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP1178]], ptr [[SV]], align 2
+// SIMD-ONLY0-NEXT:    br label [[IF_END1905]]
+// SIMD-ONLY0:       if.end1905:
+// SIMD-ONLY0-NEXT:    [[TMP1179:%.*]] = load i16, ptr [[SE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1906:%.*]] = sext i16 [[TMP1179]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP1180:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1907:%.*]] = sext i16 [[TMP1180]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP1908:%.*]] = icmp eq i32 [[CONV1906]], [[CONV1907]]
+// SIMD-ONLY0-NEXT:    [[CONV1909:%.*]] = zext i1 [[CMP1908]] to i32
+// SIMD-ONLY0-NEXT:    [[CONV1910:%.*]] = trunc i32 [[CONV1909]] to i16
+// SIMD-ONLY0-NEXT:    store i16 [[CONV1910]], ptr [[SR]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP1181:%.*]] = load i16, ptr [[SR]], align 2
+// SIMD-ONLY0-NEXT:    [[TOBOOL1911:%.*]] = icmp ne i16 [[TMP1181]], 0
+// SIMD-ONLY0-NEXT:    br i1 [[TOBOOL1911]], label [[IF_THEN1912:%.*]], label [[IF_ELSE1913:%.*]]
+// SIMD-ONLY0:       if.then1912:
+// SIMD-ONLY0-NEXT:    [[TMP1182:%.*]] = load i16, ptr [[SD]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP1182]], ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    br label [[IF_END1914:%.*]]
+// SIMD-ONLY0:       if.else1913:
+// SIMD-ONLY0-NEXT:    [[TMP1183:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP1183]], ptr [[SV]], align 2
+// SIMD-ONLY0-NEXT:    br label [[IF_END1914]]
+// SIMD-ONLY0:       if.end1914:
+// SIMD-ONLY0-NEXT:    [[TMP1184:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP1184]], ptr [[SV]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP1185:%.*]] = load i16, ptr [[SE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1915:%.*]] = sext i16 [[TMP1185]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP1186:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1916:%.*]] = sext i16 [[TMP1186]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP1917:%.*]] = icmp sgt i32 [[CONV1915]], [[CONV1916]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP1917]], label [[IF_THEN1919:%.*]], label [[IF_END1920:%.*]]
+// SIMD-ONLY0:       if.then1919:
+// SIMD-ONLY0-NEXT:    [[TMP1187:%.*]] = load i16, ptr [[SE]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP1187]], ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    br label [[IF_END1920]]
+// SIMD-ONLY0:       if.end1920:
+// SIMD-ONLY0-NEXT:    [[TMP1188:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP1188]], ptr [[SV]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP1189:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1921:%.*]] = sext i16 [[TMP1189]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP1190:%.*]] = load i16, ptr [[SE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1922:%.*]] = sext i16 [[TMP1190]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP1923:%.*]] = icmp sgt i32 [[CONV1921]], [[CONV1922]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP1923]], label [[IF_THEN1925:%.*]], label [[IF_END1926:%.*]]
+// SIMD-ONLY0:       if.then1925:
+// SIMD-ONLY0-NEXT:    [[TMP1191:%.*]] = load i16, ptr [[SE]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP1191]], ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    br label [[IF_END1926]]
+// SIMD-ONLY0:       if.end1926:
+// SIMD-ONLY0-NEXT:    [[TMP1192:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP1192]], ptr [[SV]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP1193:%.*]] = load i16, ptr [[SE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1927:%.*]] = sext i16 [[TMP1193]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP1194:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1928:%.*]] = sext i16 [[TMP1194]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP1929:%.*]] = icmp slt i32 [[CONV1927]], [[CONV1928]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP1929]], label [[IF_THEN1931:%.*]], label [[IF_END1932:%.*]]
+// SIMD-ONLY0:       if.then1931:
+// SIMD-ONLY0-NEXT:    [[TMP1195:%.*]] = load i16, ptr [[SE]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP1195]], ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    br label [[IF_END1932]]
+// SIMD-ONLY0:       if.end1932:
+// SIMD-ONLY0-NEXT:    [[TMP1196:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP1196]], ptr [[SV]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP1197:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1933:%.*]] = sext i16 [[TMP1197]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP1198:%.*]] = load i16, ptr [[SE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1934:%.*]] = sext i16 [[TMP1198]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP1935:%.*]] = icmp slt i32 [[CONV1933]], [[CONV1934]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP1935]], label [[IF_THEN1937:%.*]], label [[IF_END1938:%.*]]
+// SIMD-ONLY0:       if.then1937:
+// SIMD-ONLY0-NEXT:    [[TMP1199:%.*]] = load i16, ptr [[SE]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP1199]], ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    br label [[IF_END1938]]
+// SIMD-ONLY0:       if.end1938:
+// SIMD-ONLY0-NEXT:    [[TMP1200:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP1200]], ptr [[SV]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP1201:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1939:%.*]] = sext i16 [[TMP1201]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP1202:%.*]] = load i16, ptr [[SE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1940:%.*]] = sext i16 [[TMP1202]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP1941:%.*]] = icmp eq i32 [[CONV1939]], [[CONV1940]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP1941]], label [[IF_THEN1943:%.*]], label [[IF_END1944:%.*]]
+// SIMD-ONLY0:       if.then1943:
+// SIMD-ONLY0-NEXT:    [[TMP1203:%.*]] = load i16, ptr [[SD]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP1203]], ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    br label [[IF_END1944]]
+// SIMD-ONLY0:       if.end1944:
+// SIMD-ONLY0-NEXT:    [[TMP1204:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP1204]], ptr [[SV]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP1205:%.*]] = load i16, ptr [[SE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1945:%.*]] = sext i16 [[TMP1205]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP1206:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1946:%.*]] = sext i16 [[TMP1206]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP1947:%.*]] = icmp eq i32 [[CONV1945]], [[CONV1946]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP1947]], label [[IF_THEN1949:%.*]], label [[IF_END1950:%.*]]
+// SIMD-ONLY0:       if.then1949:
+// SIMD-ONLY0-NEXT:    [[TMP1207:%.*]] = load i16, ptr [[SD]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP1207]], ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    br label [[IF_END1950]]
+// SIMD-ONLY0:       if.end1950:
+// SIMD-ONLY0-NEXT:    [[TMP1208:%.*]] = load i16, ptr [[SE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1951:%.*]] = sext i16 [[TMP1208]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP1209:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1952:%.*]] = sext i16 [[TMP1209]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP1953:%.*]] = icmp sgt i32 [[CONV1951]], [[CONV1952]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP1953]], label [[IF_THEN1955:%.*]], label [[IF_END1956:%.*]]
+// SIMD-ONLY0:       if.then1955:
+// SIMD-ONLY0-NEXT:    [[TMP1210:%.*]] = load i16, ptr [[SE]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP1210]], ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    br label [[IF_END1956]]
+// SIMD-ONLY0:       if.end1956:
+// SIMD-ONLY0-NEXT:    [[TMP1211:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP1211]], ptr [[SV]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP1212:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1957:%.*]] = sext i16 [[TMP1212]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP1213:%.*]] = load i16, ptr [[SE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1958:%.*]] = sext i16 [[TMP1213]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP1959:%.*]] = icmp sgt i32 [[CONV1957]], [[CONV1958]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP1959]], label [[IF_THEN1961:%.*]], label [[IF_END1962:%.*]]
+// SIMD-ONLY0:       if.then1961:
+// SIMD-ONLY0-NEXT:    [[TMP1214:%.*]] = load i16, ptr [[SE]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP1214]], ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    br label [[IF_END1962]]
+// SIMD-ONLY0:       if.end1962:
+// SIMD-ONLY0-NEXT:    [[TMP1215:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP1215]], ptr [[SV]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP1216:%.*]] = load i16, ptr [[SE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1963:%.*]] = sext i16 [[TMP1216]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP1217:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1964:%.*]] = sext i16 [[TMP1217]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP1965:%.*]] = icmp slt i32 [[CONV1963]], [[CONV1964]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP1965]], label [[IF_THEN1967:%.*]], label [[IF_END1968:%.*]]
+// SIMD-ONLY0:       if.then1967:
+// SIMD-ONLY0-NEXT:    [[TMP1218:%.*]] = load i16, ptr [[SE]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP1218]], ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    br label [[IF_END1968]]
+// SIMD-ONLY0:       if.end1968:
+// SIMD-ONLY0-NEXT:    [[TMP1219:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP1219]], ptr [[SV]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP1220:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1969:%.*]] = sext i16 [[TMP1220]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP1221:%.*]] = load i16, ptr [[SE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1970:%.*]] = sext i16 [[TMP1221]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP1971:%.*]] = icmp slt i32 [[CONV1969]], [[CONV1970]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP1971]], label [[IF_THEN1973:%.*]], label [[IF_END1974:%.*]]
+// SIMD-ONLY0:       if.then1973:
+// SIMD-ONLY0-NEXT:    [[TMP1222:%.*]] = load i16, ptr [[SE]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP1222]], ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    br label [[IF_END1974]]
+// SIMD-ONLY0:       if.end1974:
+// SIMD-ONLY0-NEXT:    [[TMP1223:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP1223]], ptr [[SV]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP1224:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1975:%.*]] = sext i16 [[TMP1224]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP1225:%.*]] = load i16, ptr [[SE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1976:%.*]] = sext i16 [[TMP1225]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP1977:%.*]] = icmp eq i32 [[CONV1975]], [[CONV1976]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP1977]], label [[IF_THEN1979:%.*]], label [[IF_END1980:%.*]]
+// SIMD-ONLY0:       if.then1979:
+// SIMD-ONLY0-NEXT:    [[TMP1226:%.*]] = load i16, ptr [[SD]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP1226]], ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    br label [[IF_END1980]]
+// SIMD-ONLY0:       if.end1980:
+// SIMD-ONLY0-NEXT:    [[TMP1227:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP1227]], ptr [[SV]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP1228:%.*]] = load i16, ptr [[SE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1981:%.*]] = sext i16 [[TMP1228]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP1229:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1982:%.*]] = sext i16 [[TMP1229]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP1983:%.*]] = icmp eq i32 [[CONV1981]], [[CONV1982]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP1983]], label [[IF_THEN1985:%.*]], label [[IF_END1986:%.*]]
+// SIMD-ONLY0:       if.then1985:
+// SIMD-ONLY0-NEXT:    [[TMP1230:%.*]] = load i16, ptr [[SD]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP1230]], ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    br label [[IF_END1986]]
+// SIMD-ONLY0:       if.end1986:
+// SIMD-ONLY0-NEXT:    [[TMP1231:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP1231]], ptr [[SV]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP1232:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1987:%.*]] = sext i16 [[TMP1232]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP1233:%.*]] = load i16, ptr [[SE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1988:%.*]] = sext i16 [[TMP1233]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP1989:%.*]] = icmp eq i32 [[CONV1987]], [[CONV1988]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP1989]], label [[IF_THEN1991:%.*]], label [[IF_ELSE1992:%.*]]
+// SIMD-ONLY0:       if.then1991:
+// SIMD-ONLY0-NEXT:    [[TMP1234:%.*]] = load i16, ptr [[SD]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP1234]], ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    br label [[IF_END1993:%.*]]
+// SIMD-ONLY0:       if.else1992:
+// SIMD-ONLY0-NEXT:    [[TMP1235:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP1235]], ptr [[SV]], align 2
+// SIMD-ONLY0-NEXT:    br label [[IF_END1993]]
+// SIMD-ONLY0:       if.end1993:
+// SIMD-ONLY0-NEXT:    [[TMP1236:%.*]] = load i16, ptr [[SE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1994:%.*]] = sext i16 [[TMP1236]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP1237:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1995:%.*]] = sext i16 [[TMP1237]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP1996:%.*]] = icmp eq i32 [[CONV1994]], [[CONV1995]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP1996]], label [[IF_THEN1998:%.*]], label [[IF_ELSE1999:%.*]]
+// SIMD-ONLY0:       if.then1998:
+// SIMD-ONLY0-NEXT:    [[TMP1238:%.*]] = load i16, ptr [[SD]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP1238]], ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    br label [[IF_END2000:%.*]]
+// SIMD-ONLY0:       if.else1999:
+// SIMD-ONLY0-NEXT:    [[TMP1239:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP1239]], ptr [[SV]], align 2
+// SIMD-ONLY0-NEXT:    br label [[IF_END2000]]
+// SIMD-ONLY0:       if.end2000:
+// SIMD-ONLY0-NEXT:    [[TMP1240:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2001:%.*]] = sext i16 [[TMP1240]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP1241:%.*]] = load i16, ptr [[SE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2002:%.*]] = sext i16 [[TMP1241]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP2003:%.*]] = icmp eq i32 [[CONV2001]], [[CONV2002]]
+// SIMD-ONLY0-NEXT:    [[CONV2004:%.*]] = zext i1 [[CMP2003]] to i32
+// SIMD-ONLY0-NEXT:    [[CONV2005:%.*]] = trunc i32 [[CONV2004]] to i16
+// SIMD-ONLY0-NEXT:    store i16 [[CONV2005]], ptr [[SR]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP1242:%.*]] = load i16, ptr [[SR]], align 2
+// SIMD-ONLY0-NEXT:    [[TOBOOL2006:%.*]] = icmp ne i16 [[TMP1242]], 0
+// SIMD-ONLY0-NEXT:    br i1 [[TOBOOL2006]], label [[IF_THEN2007:%.*]], label [[IF_END2008:%.*]]
+// SIMD-ONLY0:       if.then2007:
+// SIMD-ONLY0-NEXT:    [[TMP1243:%.*]] = load i16, ptr [[SD]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP1243]], ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    br label [[IF_END2008]]
+// SIMD-ONLY0:       if.end2008:
+// SIMD-ONLY0-NEXT:    [[TMP1244:%.*]] = load i16, ptr [[SE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2009:%.*]] = sext i16 [[TMP1244]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP1245:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2010:%.*]] = sext i16 [[TMP1245]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP2011:%.*]] = icmp eq i32 [[CONV2009]], [[CONV2010]]
+// SIMD-ONLY0-NEXT:    [[CONV2012:%.*]] = zext i1 [[CMP2011]] to i32
+// SIMD-ONLY0-NEXT:    [[CONV2013:%.*]] = trunc i32 [[CONV2012]] to i16
+// SIMD-ONLY0-NEXT:    store i16 [[CONV2013]], ptr [[SR]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP1246:%.*]] = load i16, ptr [[SR]], align 2
+// SIMD-ONLY0-NEXT:    [[TOBOOL2014:%.*]] = icmp ne i16 [[TMP1246]], 0
+// SIMD-ONLY0-NEXT:    br i1 [[TOBOOL2014]], label [[IF_THEN2015:%.*]], label [[IF_END2016:%.*]]
+// SIMD-ONLY0:       if.then2015:
+// SIMD-ONLY0-NEXT:    [[TMP1247:%.*]] = load i16, ptr [[SD]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP1247]], ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    br label [[IF_END2016]]
+// SIMD-ONLY0:       if.end2016:
+// SIMD-ONLY0-NEXT:    [[TMP1248:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2017:%.*]] = sext i16 [[TMP1248]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP1249:%.*]] = load i16, ptr [[SE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2018:%.*]] = sext i16 [[TMP1249]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP2019:%.*]] = icmp eq i32 [[CONV2017]], [[CONV2018]]
+// SIMD-ONLY0-NEXT:    [[CONV2020:%.*]] = zext i1 [[CMP2019]] to i32
+// SIMD-ONLY0-NEXT:    [[CONV2021:%.*]] = trunc i32 [[CONV2020]] to i16
+// SIMD-ONLY0-NEXT:    store i16 [[CONV2021]], ptr [[SR]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP1250:%.*]] = load i16, ptr [[SR]], align 2
+// SIMD-ONLY0-NEXT:    [[TOBOOL2022:%.*]] = icmp ne i16 [[TMP1250]], 0
+// SIMD-ONLY0-NEXT:    br i1 [[TOBOOL2022]], label [[IF_THEN2023:%.*]], label [[IF_ELSE2024:%.*]]
+// SIMD-ONLY0:       if.then2023:
+// SIMD-ONLY0-NEXT:    [[TMP1251:%.*]] = load i16, ptr [[SD]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP1251]], ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    br label [[IF_END2025:%.*]]
+// SIMD-ONLY0:       if.else2024:
+// SIMD-ONLY0-NEXT:    [[TMP1252:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP1252]], ptr [[SV]], align 2
+// SIMD-ONLY0-NEXT:    br label [[IF_END2025]]
+// SIMD-ONLY0:       if.end2025:
+// SIMD-ONLY0-NEXT:    [[TMP1253:%.*]] = load i16, ptr [[SE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2026:%.*]] = sext i16 [[TMP1253]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP1254:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2027:%.*]] = sext i16 [[TMP1254]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP2028:%.*]] = icmp eq i32 [[CONV2026]], [[CONV2027]]
+// SIMD-ONLY0-NEXT:    [[CONV2029:%.*]] = zext i1 [[CMP2028]] to i32
+// SIMD-ONLY0-NEXT:    [[CONV2030:%.*]] = trunc i32 [[CONV2029]] to i16
+// SIMD-ONLY0-NEXT:    store i16 [[CONV2030]], ptr [[SR]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP1255:%.*]] = load i16, ptr [[SR]], align 2
+// SIMD-ONLY0-NEXT:    [[TOBOOL2031:%.*]] = icmp ne i16 [[TMP1255]], 0
+// SIMD-ONLY0-NEXT:    br i1 [[TOBOOL2031]], label [[IF_THEN2032:%.*]], label [[IF_ELSE2033:%.*]]
+// SIMD-ONLY0:       if.then2032:
+// SIMD-ONLY0-NEXT:    [[TMP1256:%.*]] = load i16, ptr [[SD]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP1256]], ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    br label [[IF_END2034:%.*]]
+// SIMD-ONLY0:       if.else2033:
+// SIMD-ONLY0-NEXT:    [[TMP1257:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP1257]], ptr [[SV]], align 2
+// SIMD-ONLY0-NEXT:    br label [[IF_END2034]]
+// SIMD-ONLY0:       if.end2034:
+// SIMD-ONLY0-NEXT:    [[TMP1258:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP1258]], ptr [[SV]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP1259:%.*]] = load i16, ptr [[SE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2035:%.*]] = sext i16 [[TMP1259]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP1260:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2036:%.*]] = sext i16 [[TMP1260]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP2037:%.*]] = icmp sgt i32 [[CONV2035]], [[CONV2036]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP2037]], label [[IF_THEN2039:%.*]], label [[IF_END2040:%.*]]
+// SIMD-ONLY0:       if.then2039:
+// SIMD-ONLY0-NEXT:    [[TMP1261:%.*]] = load i16, ptr [[SE]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP1261]], ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    br label [[IF_END2040]]
+// SIMD-ONLY0:       if.end2040:
+// SIMD-ONLY0-NEXT:    [[TMP1262:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP1262]], ptr [[SV]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP1263:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2041:%.*]] = sext i16 [[TMP1263]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP1264:%.*]] = load i16, ptr [[SE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2042:%.*]] = sext i16 [[TMP1264]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP2043:%.*]] = icmp sgt i32 [[CONV2041]], [[CONV2042]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP2043]], label [[IF_THEN2045:%.*]], label [[IF_END2046:%.*]]
+// SIMD-ONLY0:       if.then2045:
+// SIMD-ONLY0-NEXT:    [[TMP1265:%.*]] = load i16, ptr [[SE]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP1265]], ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    br label [[IF_END2046]]
+// SIMD-ONLY0:       if.end2046:
+// SIMD-ONLY0-NEXT:    [[TMP1266:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP1266]], ptr [[SV]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP1267:%.*]] = load i16, ptr [[SE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2047:%.*]] = sext i16 [[TMP1267]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP1268:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2048:%.*]] = sext i16 [[TMP1268]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP2049:%.*]] = icmp slt i32 [[CONV2047]], [[CONV2048]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP2049]], label [[IF_THEN2051:%.*]], label [[IF_END2052:%.*]]
+// SIMD-ONLY0:       if.then2051:
+// SIMD-ONLY0-NEXT:    [[TMP1269:%.*]] = load i16, ptr [[SE]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP1269]], ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    br label [[IF_END2052]]
+// SIMD-ONLY0:       if.end2052:
+// SIMD-ONLY0-NEXT:    [[TMP1270:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP1270]], ptr [[SV]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP1271:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2053:%.*]] = sext i16 [[TMP1271]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP1272:%.*]] = load i16, ptr [[SE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2054:%.*]] = sext i16 [[TMP1272]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP2055:%.*]] = icmp slt i32 [[CONV2053]], [[CONV2054]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP2055]], label [[IF_THEN2057:%.*]], label [[IF_END2058:%.*]]
+// SIMD-ONLY0:       if.then2057:
+// SIMD-ONLY0-NEXT:    [[TMP1273:%.*]] = load i16, ptr [[SE]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP1273]], ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    br label [[IF_END2058]]
+// SIMD-ONLY0:       if.end2058:
+// SIMD-ONLY0-NEXT:    [[TMP1274:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP1274]], ptr [[SV]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP1275:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2059:%.*]] = sext i16 [[TMP1275]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP1276:%.*]] = load i16, ptr [[SE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2060:%.*]] = sext i16 [[TMP1276]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP2061:%.*]] = icmp eq i32 [[CONV2059]], [[CONV2060]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP2061]], label [[IF_THEN2063:%.*]], label [[IF_END2064:%.*]]
+// SIMD-ONLY0:       if.then2063:
+// SIMD-ONLY0-NEXT:    [[TMP1277:%.*]] = load i16, ptr [[SD]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP1277]], ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    br label [[IF_END2064]]
+// SIMD-ONLY0:       if.end2064:
+// SIMD-ONLY0-NEXT:    [[TMP1278:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP1278]], ptr [[SV]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP1279:%.*]] = load i16, ptr [[SE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2065:%.*]] = sext i16 [[TMP1279]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP1280:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2066:%.*]] = sext i16 [[TMP1280]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP2067:%.*]] = icmp eq i32 [[CONV2065]], [[CONV2066]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP2067]], label [[IF_THEN2069:%.*]], label [[IF_END2070:%.*]]
+// SIMD-ONLY0:       if.then2069:
+// SIMD-ONLY0-NEXT:    [[TMP1281:%.*]] = load i16, ptr [[SD]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP1281]], ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    br label [[IF_END2070]]
+// SIMD-ONLY0:       if.end2070:
+// SIMD-ONLY0-NEXT:    [[TMP1282:%.*]] = load i16, ptr [[SE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2071:%.*]] = sext i16 [[TMP1282]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP1283:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2072:%.*]] = sext i16 [[TMP1283]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP2073:%.*]] = icmp sgt i32 [[CONV2071]], [[CONV2072]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP2073]], label [[IF_THEN2075:%.*]], label [[IF_END2076:%.*]]
+// SIMD-ONLY0:       if.then2075:
+// SIMD-ONLY0-NEXT:    [[TMP1284:%.*]] = load i16, ptr [[SE]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP1284]], ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    br label [[IF_END2076]]
+// SIMD-ONLY0:       if.end2076:
+// SIMD-ONLY0-NEXT:    [[TMP1285:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP1285]], ptr [[SV]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP1286:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2077:%.*]] = sext i16 [[TMP1286]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP1287:%.*]] = load i16, ptr [[SE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2078:%.*]] = sext i16 [[TMP1287]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP2079:%.*]] = icmp sgt i32 [[CONV2077]], [[CONV2078]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP2079]], label [[IF_THEN2081:%.*]], label [[IF_END2082:%.*]]
+// SIMD-ONLY0:       if.then2081:
+// SIMD-ONLY0-NEXT:    [[TMP1288:%.*]] = load i16, ptr [[SE]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP1288]], ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    br label [[IF_END2082]]
+// SIMD-ONLY0:       if.end2082:
+// SIMD-ONLY0-NEXT:    [[TMP1289:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP1289]], ptr [[SV]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP1290:%.*]] = load i16, ptr [[SE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2083:%.*]] = sext i16 [[TMP1290]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP1291:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2084:%.*]] = sext i16 [[TMP1291]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP2085:%.*]] = icmp slt i32 [[CONV2083]], [[CONV2084]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP2085]], label [[IF_THEN2087:%.*]], label [[IF_END2088:%.*]]
+// SIMD-ONLY0:       if.then2087:
+// SIMD-ONLY0-NEXT:    [[TMP1292:%.*]] = load i16, ptr [[SE]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP1292]], ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    br label [[IF_END2088]]
+// SIMD-ONLY0:       if.end2088:
+// SIMD-ONLY0-NEXT:    [[TMP1293:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP1293]], ptr [[SV]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP1294:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2089:%.*]] = sext i16 [[TMP1294]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP1295:%.*]] = load i16, ptr [[SE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2090:%.*]] = sext i16 [[TMP1295]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP2091:%.*]] = icmp slt i32 [[CONV2089]], [[CONV2090]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP2091]], label [[IF_THEN2093:%.*]], label [[IF_END2094:%.*]]
+// SIMD-ONLY0:       if.then2093:
+// SIMD-ONLY0-NEXT:    [[TMP1296:%.*]] = load i16, ptr [[SE]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP1296]], ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    br label [[IF_END2094]]
+// SIMD-ONLY0:       if.end2094:
+// SIMD-ONLY0-NEXT:    [[TMP1297:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP1297]], ptr [[SV]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP1298:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2095:%.*]] = sext i16 [[TMP1298]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP1299:%.*]] = load i16, ptr [[SE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2096:%.*]] = sext i16 [[TMP1299]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP2097:%.*]] = icmp eq i32 [[CONV2095]], [[CONV2096]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP2097]], label [[IF_THEN2099:%.*]], label [[IF_END2100:%.*]]
+// SIMD-ONLY0:       if.then2099:
+// SIMD-ONLY0-NEXT:    [[TMP1300:%.*]] = load i16, ptr [[SD]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP1300]], ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    br label [[IF_END2100]]
+// SIMD-ONLY0:       if.end2100:
+// SIMD-ONLY0-NEXT:    [[TMP1301:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP1301]], ptr [[SV]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP1302:%.*]] = load i16, ptr [[SE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2101:%.*]] = sext i16 [[TMP1302]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP1303:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2102:%.*]] = sext i16 [[TMP1303]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP2103:%.*]] = icmp eq i32 [[CONV2101]], [[CONV2102]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP2103]], label [[IF_THEN2105:%.*]], label [[IF_END2106:%.*]]
+// SIMD-ONLY0:       if.then2105:
+// SIMD-ONLY0-NEXT:    [[TMP1304:%.*]] = load i16, ptr [[SD]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP1304]], ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    br label [[IF_END2106]]
+// SIMD-ONLY0:       if.end2106:
+// SIMD-ONLY0-NEXT:    [[TMP1305:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP1305]], ptr [[SV]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP1306:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2107:%.*]] = sext i16 [[TMP1306]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP1307:%.*]] = load i16, ptr [[SE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2108:%.*]] = sext i16 [[TMP1307]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP2109:%.*]] = icmp eq i32 [[CONV2107]], [[CONV2108]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP2109]], label [[IF_THEN2111:%.*]], label [[IF_ELSE2112:%.*]]
+// SIMD-ONLY0:       if.then2111:
+// SIMD-ONLY0-NEXT:    [[TMP1308:%.*]] = load i16, ptr [[SD]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP1308]], ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    br label [[IF_END2113:%.*]]
+// SIMD-ONLY0:       if.else2112:
+// SIMD-ONLY0-NEXT:    [[TMP1309:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP1309]], ptr [[SV]], align 2
+// SIMD-ONLY0-NEXT:    br label [[IF_END2113]]
+// SIMD-ONLY0:       if.end2113:
+// SIMD-ONLY0-NEXT:    [[TMP1310:%.*]] = load i16, ptr [[SE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2114:%.*]] = sext i16 [[TMP1310]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP1311:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2115:%.*]] = sext i16 [[TMP1311]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP2116:%.*]] = icmp eq i32 [[CONV2114]], [[CONV2115]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP2116]], label [[IF_THEN2118:%.*]], label [[IF_ELSE2119:%.*]]
+// SIMD-ONLY0:       if.then2118:
+// SIMD-ONLY0-NEXT:    [[TMP1312:%.*]] = load i16, ptr [[SD]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP1312]], ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    br label [[IF_END2120:%.*]]
+// SIMD-ONLY0:       if.else2119:
+// SIMD-ONLY0-NEXT:    [[TMP1313:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP1313]], ptr [[SV]], align 2
+// SIMD-ONLY0-NEXT:    br label [[IF_END2120]]
+// SIMD-ONLY0:       if.end2120:
+// SIMD-ONLY0-NEXT:    [[TMP1314:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2121:%.*]] = sext i16 [[TMP1314]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP1315:%.*]] = load i16, ptr [[SE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2122:%.*]] = sext i16 [[TMP1315]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP2123:%.*]] = icmp eq i32 [[CONV2121]], [[CONV2122]]
+// SIMD-ONLY0-NEXT:    [[CONV2124:%.*]] = zext i1 [[CMP2123]] to i32
+// SIMD-ONLY0-NEXT:    [[CONV2125:%.*]] = trunc i32 [[CONV2124]] to i16
+// SIMD-ONLY0-NEXT:    store i16 [[CONV2125]], ptr [[SR]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP1316:%.*]] = load i16, ptr [[SR]], align 2
+// SIMD-ONLY0-NEXT:    [[TOBOOL2126:%.*]] = icmp ne i16 [[TMP1316]], 0
+// SIMD-ONLY0-NEXT:    br i1 [[TOBOOL2126]], label [[IF_THEN2127:%.*]], label [[IF_END2128:%.*]]
+// SIMD-ONLY0:       if.then2127:
+// SIMD-ONLY0-NEXT:    [[TMP1317:%.*]] = load i16, ptr [[SD]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP1317]], ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    br label [[IF_END2128]]
+// SIMD-ONLY0:       if.end2128:
+// SIMD-ONLY0-NEXT:    [[TMP1318:%.*]] = load i16, ptr [[SE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2129:%.*]] = sext i16 [[TMP1318]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP1319:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2130:%.*]] = sext i16 [[TMP1319]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP2131:%.*]] = icmp eq i32 [[CONV2129]], [[CONV2130]]
+// SIMD-ONLY0-NEXT:    [[CONV2132:%.*]] = zext i1 [[CMP2131]] to i32
+// SIMD-ONLY0-NEXT:    [[CONV2133:%.*]] = trunc i32 [[CONV2132]] to i16
+// SIMD-ONLY0-NEXT:    store i16 [[CONV2133]], ptr [[SR]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP1320:%.*]] = load i16, ptr [[SR]], align 2
+// SIMD-ONLY0-NEXT:    [[TOBOOL2134:%.*]] = icmp ne i16 [[TMP1320]], 0
+// SIMD-ONLY0-NEXT:    br i1 [[TOBOOL2134]], label [[IF_THEN2135:%.*]], label [[IF_END2136:%.*]]
+// SIMD-ONLY0:       if.then2135:
+// SIMD-ONLY0-NEXT:    [[TMP1321:%.*]] = load i16, ptr [[SD]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP1321]], ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    br label [[IF_END2136]]
+// SIMD-ONLY0:       if.end2136:
+// SIMD-ONLY0-NEXT:    [[TMP1322:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2137:%.*]] = sext i16 [[TMP1322]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP1323:%.*]] = load i16, ptr [[SE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2138:%.*]] = sext i16 [[TMP1323]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP2139:%.*]] = icmp eq i32 [[CONV2137]], [[CONV2138]]
+// SIMD-ONLY0-NEXT:    [[CONV2140:%.*]] = zext i1 [[CMP2139]] to i32
+// SIMD-ONLY0-NEXT:    [[CONV2141:%.*]] = trunc i32 [[CONV2140]] to i16
+// SIMD-ONLY0-NEXT:    store i16 [[CONV2141]], ptr [[SR]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP1324:%.*]] = load i16, ptr [[SR]], align 2
+// SIMD-ONLY0-NEXT:    [[TOBOOL2142:%.*]] = icmp ne i16 [[TMP1324]], 0
+// SIMD-ONLY0-NEXT:    br i1 [[TOBOOL2142]], label [[IF_THEN2143:%.*]], label [[IF_ELSE2144:%.*]]
+// SIMD-ONLY0:       if.then2143:
+// SIMD-ONLY0-NEXT:    [[TMP1325:%.*]] = load i16, ptr [[SD]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP1325]], ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    br label [[IF_END2145:%.*]]
+// SIMD-ONLY0:       if.else2144:
+// SIMD-ONLY0-NEXT:    [[TMP1326:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP1326]], ptr [[SV]], align 2
+// SIMD-ONLY0-NEXT:    br label [[IF_END2145]]
+// SIMD-ONLY0:       if.end2145:
+// SIMD-ONLY0-NEXT:    [[TMP1327:%.*]] = load i16, ptr [[SE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2146:%.*]] = sext i16 [[TMP1327]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP1328:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2147:%.*]] = sext i16 [[TMP1328]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP2148:%.*]] = icmp eq i32 [[CONV2146]], [[CONV2147]]
+// SIMD-ONLY0-NEXT:    [[CONV2149:%.*]] = zext i1 [[CMP2148]] to i32
+// SIMD-ONLY0-NEXT:    [[CONV2150:%.*]] = trunc i32 [[CONV2149]] to i16
+// SIMD-ONLY0-NEXT:    store i16 [[CONV2150]], ptr [[SR]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP1329:%.*]] = load i16, ptr [[SR]], align 2
+// SIMD-ONLY0-NEXT:    [[TOBOOL2151:%.*]] = icmp ne i16 [[TMP1329]], 0
+// SIMD-ONLY0-NEXT:    br i1 [[TOBOOL2151]], label [[IF_THEN2152:%.*]], label [[IF_ELSE2153:%.*]]
+// SIMD-ONLY0:       if.then2152:
+// SIMD-ONLY0-NEXT:    [[TMP1330:%.*]] = load i16, ptr [[SD]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP1330]], ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    br label [[IF_END2154:%.*]]
+// SIMD-ONLY0:       if.else2153:
+// SIMD-ONLY0-NEXT:    [[TMP1331:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP1331]], ptr [[SV]], align 2
+// SIMD-ONLY0-NEXT:    br label [[IF_END2154]]
+// SIMD-ONLY0:       if.end2154:
+// SIMD-ONLY0-NEXT:    [[TMP1332:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP1332]], ptr [[USV]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP1333:%.*]] = load i16, ptr [[USE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2155:%.*]] = zext i16 [[TMP1333]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP1334:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2156:%.*]] = zext i16 [[TMP1334]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP2157:%.*]] = icmp sgt i32 [[CONV2155]], [[CONV2156]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP2157]], label [[IF_THEN2159:%.*]], label [[IF_END2160:%.*]]
+// SIMD-ONLY0:       if.then2159:
+// SIMD-ONLY0-NEXT:    [[TMP1335:%.*]] = load i16, ptr [[USE]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP1335]], ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    br label [[IF_END2160]]
+// SIMD-ONLY0:       if.end2160:
+// SIMD-ONLY0-NEXT:    [[TMP1336:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP1336]], ptr [[USV]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP1337:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2161:%.*]] = zext i16 [[TMP1337]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP1338:%.*]] = load i16, ptr [[USE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2162:%.*]] = zext i16 [[TMP1338]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP2163:%.*]] = icmp sgt i32 [[CONV2161]], [[CONV2162]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP2163]], label [[IF_THEN2165:%.*]], label [[IF_END2166:%.*]]
+// SIMD-ONLY0:       if.then2165:
+// SIMD-ONLY0-NEXT:    [[TMP1339:%.*]] = load i16, ptr [[USE]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP1339]], ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    br label [[IF_END2166]]
+// SIMD-ONLY0:       if.end2166:
+// SIMD-ONLY0-NEXT:    [[TMP1340:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP1340]], ptr [[USV]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP1341:%.*]] = load i16, ptr [[USE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2167:%.*]] = zext i16 [[TMP1341]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP1342:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2168:%.*]] = zext i16 [[TMP1342]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP2169:%.*]] = icmp slt i32 [[CONV2167]], [[CONV2168]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP2169]], label [[IF_THEN2171:%.*]], label [[IF_END2172:%.*]]
+// SIMD-ONLY0:       if.then2171:
+// SIMD-ONLY0-NEXT:    [[TMP1343:%.*]] = load i16, ptr [[USE]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP1343]], ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    br label [[IF_END2172]]
+// SIMD-ONLY0:       if.end2172:
+// SIMD-ONLY0-NEXT:    [[TMP1344:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP1344]], ptr [[USV]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP1345:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2173:%.*]] = zext i16 [[TMP1345]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP1346:%.*]] = load i16, ptr [[USE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2174:%.*]] = zext i16 [[TMP1346]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP2175:%.*]] = icmp slt i32 [[CONV2173]], [[CONV2174]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP2175]], label [[IF_THEN2177:%.*]], label [[IF_END2178:%.*]]
+// SIMD-ONLY0:       if.then2177:
+// SIMD-ONLY0-NEXT:    [[TMP1347:%.*]] = load i16, ptr [[USE]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP1347]], ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    br label [[IF_END2178]]
+// SIMD-ONLY0:       if.end2178:
+// SIMD-ONLY0-NEXT:    [[TMP1348:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP1348]], ptr [[USV]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP1349:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2179:%.*]] = zext i16 [[TMP1349]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP1350:%.*]] = load i16, ptr [[USE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2180:%.*]] = zext i16 [[TMP1350]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP2181:%.*]] = icmp eq i32 [[CONV2179]], [[CONV2180]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP2181]], label [[IF_THEN2183:%.*]], label [[IF_END2184:%.*]]
+// SIMD-ONLY0:       if.then2183:
+// SIMD-ONLY0-NEXT:    [[TMP1351:%.*]] = load i16, ptr [[USD]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP1351]], ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    br label [[IF_END2184]]
+// SIMD-ONLY0:       if.end2184:
+// SIMD-ONLY0-NEXT:    [[TMP1352:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP1352]], ptr [[USV]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP1353:%.*]] = load i16, ptr [[USE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2185:%.*]] = zext i16 [[TMP1353]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP1354:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2186:%.*]] = zext i16 [[TMP1354]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP2187:%.*]] = icmp eq i32 [[CONV2185]], [[CONV2186]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP2187]], label [[IF_THEN2189:%.*]], label [[IF_END2190:%.*]]
+// SIMD-ONLY0:       if.then2189:
+// SIMD-ONLY0-NEXT:    [[TMP1355:%.*]] = load i16, ptr [[USD]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP1355]], ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    br label [[IF_END2190]]
+// SIMD-ONLY0:       if.end2190:
+// SIMD-ONLY0-NEXT:    [[TMP1356:%.*]] = load i16, ptr [[USE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2191:%.*]] = zext i16 [[TMP1356]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP1357:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2192:%.*]] = zext i16 [[TMP1357]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP2193:%.*]] = icmp sgt i32 [[CONV2191]], [[CONV2192]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP2193]], label [[IF_THEN2195:%.*]], label [[IF_END2196:%.*]]
+// SIMD-ONLY0:       if.then2195:
+// SIMD-ONLY0-NEXT:    [[TMP1358:%.*]] = load i16, ptr [[USE]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP1358]], ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    br label [[IF_END2196]]
+// SIMD-ONLY0:       if.end2196:
+// SIMD-ONLY0-NEXT:    [[TMP1359:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP1359]], ptr [[USV]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP1360:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2197:%.*]] = zext i16 [[TMP1360]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP1361:%.*]] = load i16, ptr [[USE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2198:%.*]] = zext i16 [[TMP1361]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP2199:%.*]] = icmp sgt i32 [[CONV2197]], [[CONV2198]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP2199]], label [[IF_THEN2201:%.*]], label [[IF_END2202:%.*]]
+// SIMD-ONLY0:       if.then2201:
+// SIMD-ONLY0-NEXT:    [[TMP1362:%.*]] = load i16, ptr [[USE]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP1362]], ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    br label [[IF_END2202]]
+// SIMD-ONLY0:       if.end2202:
+// SIMD-ONLY0-NEXT:    [[TMP1363:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP1363]], ptr [[USV]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP1364:%.*]] = load i16, ptr [[USE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2203:%.*]] = zext i16 [[TMP1364]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP1365:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2204:%.*]] = zext i16 [[TMP1365]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP2205:%.*]] = icmp slt i32 [[CONV2203]], [[CONV2204]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP2205]], label [[IF_THEN2207:%.*]], label [[IF_END2208:%.*]]
+// SIMD-ONLY0:       if.then2207:
+// SIMD-ONLY0-NEXT:    [[TMP1366:%.*]] = load i16, ptr [[USE]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP1366]], ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    br label [[IF_END2208]]
+// SIMD-ONLY0:       if.end2208:
+// SIMD-ONLY0-NEXT:    [[TMP1367:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP1367]], ptr [[USV]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP1368:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2209:%.*]] = zext i16 [[TMP1368]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP1369:%.*]] = load i16, ptr [[USE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2210:%.*]] = zext i16 [[TMP1369]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP2211:%.*]] = icmp slt i32 [[CONV2209]], [[CONV2210]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP2211]], label [[IF_THEN2213:%.*]], label [[IF_END2214:%.*]]
+// SIMD-ONLY0:       if.then2213:
+// SIMD-ONLY0-NEXT:    [[TMP1370:%.*]] = load i16, ptr [[USE]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP1370]], ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    br label [[IF_END2214]]
+// SIMD-ONLY0:       if.end2214:
+// SIMD-ONLY0-NEXT:    [[TMP1371:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP1371]], ptr [[USV]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP1372:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2215:%.*]] = zext i16 [[TMP1372]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP1373:%.*]] = load i16, ptr [[USE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2216:%.*]] = zext i16 [[TMP1373]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP2217:%.*]] = icmp eq i32 [[CONV2215]], [[CONV2216]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP2217]], label [[IF_THEN2219:%.*]], label [[IF_END2220:%.*]]
+// SIMD-ONLY0:       if.then2219:
+// SIMD-ONLY0-NEXT:    [[TMP1374:%.*]] = load i16, ptr [[USD]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP1374]], ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    br label [[IF_END2220]]
+// SIMD-ONLY0:       if.end2220:
+// SIMD-ONLY0-NEXT:    [[TMP1375:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP1375]], ptr [[USV]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP1376:%.*]] = load i16, ptr [[USE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2221:%.*]] = zext i16 [[TMP1376]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP1377:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2222:%.*]] = zext i16 [[TMP1377]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP2223:%.*]] = icmp eq i32 [[CONV2221]], [[CONV2222]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP2223]], label [[IF_THEN2225:%.*]], label [[IF_END2226:%.*]]
+// SIMD-ONLY0:       if.then2225:
+// SIMD-ONLY0-NEXT:    [[TMP1378:%.*]] = load i16, ptr [[USD]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP1378]], ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    br label [[IF_END2226]]
+// SIMD-ONLY0:       if.end2226:
+// SIMD-ONLY0-NEXT:    [[TMP1379:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP1379]], ptr [[USV]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP1380:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2227:%.*]] = zext i16 [[TMP1380]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP1381:%.*]] = load i16, ptr [[USE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2228:%.*]] = zext i16 [[TMP1381]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP2229:%.*]] = icmp eq i32 [[CONV2227]], [[CONV2228]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP2229]], label [[IF_THEN2231:%.*]], label [[IF_ELSE2232:%.*]]
+// SIMD-ONLY0:       if.then2231:
+// SIMD-ONLY0-NEXT:    [[TMP1382:%.*]] = load i16, ptr [[USD]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP1382]], ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    br label [[IF_END2233:%.*]]
+// SIMD-ONLY0:       if.else2232:
+// SIMD-ONLY0-NEXT:    [[TMP1383:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP1383]], ptr [[USV]], align 2
+// SIMD-ONLY0-NEXT:    br label [[IF_END2233]]
+// SIMD-ONLY0:       if.end2233:
+// SIMD-ONLY0-NEXT:    [[TMP1384:%.*]] = load i16, ptr [[USE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2234:%.*]] = zext i16 [[TMP1384]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP1385:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2235:%.*]] = zext i16 [[TMP1385]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP2236:%.*]] = icmp eq i32 [[CONV2234]], [[CONV2235]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP2236]], label [[IF_THEN2238:%.*]], label [[IF_ELSE2239:%.*]]
+// SIMD-ONLY0:       if.then2238:
+// SIMD-ONLY0-NEXT:    [[TMP1386:%.*]] = load i16, ptr [[USD]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP1386]], ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    br label [[IF_END2240:%.*]]
+// SIMD-ONLY0:       if.else2239:
+// SIMD-ONLY0-NEXT:    [[TMP1387:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP1387]], ptr [[USV]], align 2
+// SIMD-ONLY0-NEXT:    br label [[IF_END2240]]
+// SIMD-ONLY0:       if.end2240:
+// SIMD-ONLY0-NEXT:    [[TMP1388:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2241:%.*]] = zext i16 [[TMP1388]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP1389:%.*]] = load i16, ptr [[USE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2242:%.*]] = zext i16 [[TMP1389]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP2243:%.*]] = icmp eq i32 [[CONV2241]], [[CONV2242]]
+// SIMD-ONLY0-NEXT:    [[CONV2244:%.*]] = zext i1 [[CMP2243]] to i32
+// SIMD-ONLY0-NEXT:    [[CONV2245:%.*]] = trunc i32 [[CONV2244]] to i16
+// SIMD-ONLY0-NEXT:    store i16 [[CONV2245]], ptr [[USR]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP1390:%.*]] = load i16, ptr [[USR]], align 2
+// SIMD-ONLY0-NEXT:    [[TOBOOL2246:%.*]] = icmp ne i16 [[TMP1390]], 0
+// SIMD-ONLY0-NEXT:    br i1 [[TOBOOL2246]], label [[IF_THEN2247:%.*]], label [[IF_END2248:%.*]]
+// SIMD-ONLY0:       if.then2247:
+// SIMD-ONLY0-NEXT:    [[TMP1391:%.*]] = load i16, ptr [[USD]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP1391]], ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    br label [[IF_END2248]]
+// SIMD-ONLY0:       if.end2248:
+// SIMD-ONLY0-NEXT:    [[TMP1392:%.*]] = load i16, ptr [[USE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2249:%.*]] = zext i16 [[TMP1392]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP1393:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2250:%.*]] = zext i16 [[TMP1393]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP2251:%.*]] = icmp eq i32 [[CONV2249]], [[CONV2250]]
+// SIMD-ONLY0-NEXT:    [[CONV2252:%.*]] = zext i1 [[CMP2251]] to i32
+// SIMD-ONLY0-NEXT:    [[CONV2253:%.*]] = trunc i32 [[CONV2252]] to i16
+// SIMD-ONLY0-NEXT:    store i16 [[CONV2253]], ptr [[USR]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP1394:%.*]] = load i16, ptr [[USR]], align 2
+// SIMD-ONLY0-NEXT:    [[TOBOOL2254:%.*]] = icmp ne i16 [[TMP1394]], 0
+// SIMD-ONLY0-NEXT:    br i1 [[TOBOOL2254]], label [[IF_THEN2255:%.*]], label [[IF_END2256:%.*]]
+// SIMD-ONLY0:       if.then2255:
+// SIMD-ONLY0-NEXT:    [[TMP1395:%.*]] = load i16, ptr [[USD]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP1395]], ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    br label [[IF_END2256]]
+// SIMD-ONLY0:       if.end2256:
+// SIMD-ONLY0-NEXT:    [[TMP1396:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2257:%.*]] = zext i16 [[TMP1396]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP1397:%.*]] = load i16, ptr [[USE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2258:%.*]] = zext i16 [[TMP1397]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP2259:%.*]] = icmp eq i32 [[CONV2257]], [[CONV2258]]
+// SIMD-ONLY0-NEXT:    [[CONV2260:%.*]] = zext i1 [[CMP2259]] to i32
+// SIMD-ONLY0-NEXT:    [[CONV2261:%.*]] = trunc i32 [[CONV2260]] to i16
+// SIMD-ONLY0-NEXT:    store i16 [[CONV2261]], ptr [[USR]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP1398:%.*]] = load i16, ptr [[USR]], align 2
+// SIMD-ONLY0-NEXT:    [[TOBOOL2262:%.*]] = icmp ne i16 [[TMP1398]], 0
+// SIMD-ONLY0-NEXT:    br i1 [[TOBOOL2262]], label [[IF_THEN2263:%.*]], label [[IF_ELSE2264:%.*]]
+// SIMD-ONLY0:       if.then2263:
+// SIMD-ONLY0-NEXT:    [[TMP1399:%.*]] = load i16, ptr [[USD]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP1399]], ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    br label [[IF_END2265:%.*]]
+// SIMD-ONLY0:       if.else2264:
+// SIMD-ONLY0-NEXT:    [[TMP1400:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP1400]], ptr [[USV]], align 2
+// SIMD-ONLY0-NEXT:    br label [[IF_END2265]]
+// SIMD-ONLY0:       if.end2265:
+// SIMD-ONLY0-NEXT:    [[TMP1401:%.*]] = load i16, ptr [[USE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2266:%.*]] = zext i16 [[TMP1401]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP1402:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2267:%.*]] = zext i16 [[TMP1402]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP2268:%.*]] = icmp eq i32 [[CONV2266]], [[CONV2267]]
+// SIMD-ONLY0-NEXT:    [[CONV2269:%.*]] = zext i1 [[CMP2268]] to i32
+// SIMD-ONLY0-NEXT:    [[CONV2270:%.*]] = trunc i32 [[CONV2269]] to i16
+// SIMD-ONLY0-NEXT:    store i16 [[CONV2270]], ptr [[USR]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP1403:%.*]] = load i16, ptr [[USR]], align 2
+// SIMD-ONLY0-NEXT:    [[TOBOOL2271:%.*]] = icmp ne i16 [[TMP1403]], 0
+// SIMD-ONLY0-NEXT:    br i1 [[TOBOOL2271]], label [[IF_THEN2272:%.*]], label [[IF_ELSE2273:%.*]]
+// SIMD-ONLY0:       if.then2272:
+// SIMD-ONLY0-NEXT:    [[TMP1404:%.*]] = load i16, ptr [[USD]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP1404]], ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    br label [[IF_END2274:%.*]]
+// SIMD-ONLY0:       if.else2273:
+// SIMD-ONLY0-NEXT:    [[TMP1405:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP1405]], ptr [[USV]], align 2
+// SIMD-ONLY0-NEXT:    br label [[IF_END2274]]
+// SIMD-ONLY0:       if.end2274:
+// SIMD-ONLY0-NEXT:    [[TMP1406:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP1406]], ptr [[USV]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP1407:%.*]] = load i16, ptr [[USE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2275:%.*]] = zext i16 [[TMP1407]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP1408:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2276:%.*]] = zext i16 [[TMP1408]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP2277:%.*]] = icmp sgt i32 [[CONV2275]], [[CONV2276]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP2277]], label [[IF_THEN2279:%.*]], label [[IF_END2280:%.*]]
+// SIMD-ONLY0:       if.then2279:
+// SIMD-ONLY0-NEXT:    [[TMP1409:%.*]] = load i16, ptr [[USE]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP1409]], ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    br label [[IF_END2280]]
+// SIMD-ONLY0:       if.end2280:
+// SIMD-ONLY0-NEXT:    [[TMP1410:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP1410]], ptr [[USV]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP1411:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2281:%.*]] = zext i16 [[TMP1411]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP1412:%.*]] = load i16, ptr [[USE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2282:%.*]] = zext i16 [[TMP1412]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP2283:%.*]] = icmp sgt i32 [[CONV2281]], [[CONV2282]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP2283]], label [[IF_THEN2285:%.*]], label [[IF_END2286:%.*]]
+// SIMD-ONLY0:       if.then2285:
+// SIMD-ONLY0-NEXT:    [[TMP1413:%.*]] = load i16, ptr [[USE]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP1413]], ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    br label [[IF_END2286]]
+// SIMD-ONLY0:       if.end2286:
+// SIMD-ONLY0-NEXT:    [[TMP1414:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP1414]], ptr [[USV]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP1415:%.*]] = load i16, ptr [[USE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2287:%.*]] = zext i16 [[TMP1415]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP1416:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2288:%.*]] = zext i16 [[TMP1416]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP2289:%.*]] = icmp slt i32 [[CONV2287]], [[CONV2288]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP2289]], label [[IF_THEN2291:%.*]], label [[IF_END2292:%.*]]
+// SIMD-ONLY0:       if.then2291:
+// SIMD-ONLY0-NEXT:    [[TMP1417:%.*]] = load i16, ptr [[USE]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP1417]], ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    br label [[IF_END2292]]
+// SIMD-ONLY0:       if.end2292:
+// SIMD-ONLY0-NEXT:    [[TMP1418:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP1418]], ptr [[USV]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP1419:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2293:%.*]] = zext i16 [[TMP1419]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP1420:%.*]] = load i16, ptr [[USE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2294:%.*]] = zext i16 [[TMP1420]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP2295:%.*]] = icmp slt i32 [[CONV2293]], [[CONV2294]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP2295]], label [[IF_THEN2297:%.*]], label [[IF_END2298:%.*]]
+// SIMD-ONLY0:       if.then2297:
+// SIMD-ONLY0-NEXT:    [[TMP1421:%.*]] = load i16, ptr [[USE]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP1421]], ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    br label [[IF_END2298]]
+// SIMD-ONLY0:       if.end2298:
+// SIMD-ONLY0-NEXT:    [[TMP1422:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP1422]], ptr [[USV]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP1423:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2299:%.*]] = zext i16 [[TMP1423]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP1424:%.*]] = load i16, ptr [[USE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2300:%.*]] = zext i16 [[TMP1424]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP2301:%.*]] = icmp eq i32 [[CONV2299]], [[CONV2300]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP2301]], label [[IF_THEN2303:%.*]], label [[IF_END2304:%.*]]
+// SIMD-ONLY0:       if.then2303:
+// SIMD-ONLY0-NEXT:    [[TMP1425:%.*]] = load i16, ptr [[USD]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP1425]], ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    br label [[IF_END2304]]
+// SIMD-ONLY0:       if.end2304:
+// SIMD-ONLY0-NEXT:    [[TMP1426:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP1426]], ptr [[USV]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP1427:%.*]] = load i16, ptr [[USE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2305:%.*]] = zext i16 [[TMP1427]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP1428:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2306:%.*]] = zext i16 [[TMP1428]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP2307:%.*]] = icmp eq i32 [[CONV2305]], [[CONV2306]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP2307]], label [[IF_THEN2309:%.*]], label [[IF_END2310:%.*]]
+// SIMD-ONLY0:       if.then2309:
+// SIMD-ONLY0-NEXT:    [[TMP1429:%.*]] = load i16, ptr [[USD]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP1429]], ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    br label [[IF_END2310]]
+// SIMD-ONLY0:       if.end2310:
+// SIMD-ONLY0-NEXT:    [[TMP1430:%.*]] = load i16, ptr [[USE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2311:%.*]] = zext i16 [[TMP1430]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP1431:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2312:%.*]] = zext i16 [[TMP1431]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP2313:%.*]] = icmp sgt i32 [[CONV2311]], [[CONV2312]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP2313]], label [[IF_THEN2315:%.*]], label [[IF_END2316:%.*]]
+// SIMD-ONLY0:       if.then2315:
+// SIMD-ONLY0-NEXT:    [[TMP1432:%.*]] = load i16, ptr [[USE]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP1432]], ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    br label [[IF_END2316]]
+// SIMD-ONLY0:       if.end2316:
+// SIMD-ONLY0-NEXT:    [[TMP1433:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP1433]], ptr [[USV]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP1434:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2317:%.*]] = zext i16 [[TMP1434]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP1435:%.*]] = load i16, ptr [[USE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2318:%.*]] = zext i16 [[TMP1435]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP2319:%.*]] = icmp sgt i32 [[CONV2317]], [[CONV2318]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP2319]], label [[IF_THEN2321:%.*]], label [[IF_END2322:%.*]]
+// SIMD-ONLY0:       if.then2321:
+// SIMD-ONLY0-NEXT:    [[TMP1436:%.*]] = load i16, ptr [[USE]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP1436]], ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    br label [[IF_END2322]]
+// SIMD-ONLY0:       if.end2322:
+// SIMD-ONLY0-NEXT:    [[TMP1437:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP1437]], ptr [[USV]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP1438:%.*]] = load i16, ptr [[USE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2323:%.*]] = zext i16 [[TMP1438]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP1439:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2324:%.*]] = zext i16 [[TMP1439]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP2325:%.*]] = icmp slt i32 [[CONV2323]], [[CONV2324]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP2325]], label [[IF_THEN2327:%.*]], label [[IF_END2328:%.*]]
+// SIMD-ONLY0:       if.then2327:
+// SIMD-ONLY0-NEXT:    [[TMP1440:%.*]] = load i16, ptr [[USE]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP1440]], ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    br label [[IF_END2328]]
+// SIMD-ONLY0:       if.end2328:
+// SIMD-ONLY0-NEXT:    [[TMP1441:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP1441]], ptr [[USV]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP1442:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2329:%.*]] = zext i16 [[TMP1442]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP1443:%.*]] = load i16, ptr [[USE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2330:%.*]] = zext i16 [[TMP1443]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP2331:%.*]] = icmp slt i32 [[CONV2329]], [[CONV2330]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP2331]], label [[IF_THEN2333:%.*]], label [[IF_END2334:%.*]]
+// SIMD-ONLY0:       if.then2333:
+// SIMD-ONLY0-NEXT:    [[TMP1444:%.*]] = load i16, ptr [[USE]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP1444]], ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    br label [[IF_END2334]]
+// SIMD-ONLY0:       if.end2334:
+// SIMD-ONLY0-NEXT:    [[TMP1445:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP1445]], ptr [[USV]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP1446:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2335:%.*]] = zext i16 [[TMP1446]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP1447:%.*]] = load i16, ptr [[USE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2336:%.*]] = zext i16 [[TMP1447]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP2337:%.*]] = icmp eq i32 [[CONV2335]], [[CONV2336]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP2337]], label [[IF_THEN2339:%.*]], label [[IF_END2340:%.*]]
+// SIMD-ONLY0:       if.then2339:
+// SIMD-ONLY0-NEXT:    [[TMP1448:%.*]] = load i16, ptr [[USD]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP1448]], ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    br label [[IF_END2340]]
+// SIMD-ONLY0:       if.end2340:
+// SIMD-ONLY0-NEXT:    [[TMP1449:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP1449]], ptr [[USV]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP1450:%.*]] = load i16, ptr [[USE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2341:%.*]] = zext i16 [[TMP1450]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP1451:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2342:%.*]] = zext i16 [[TMP1451]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP2343:%.*]] = icmp eq i32 [[CONV2341]], [[CONV2342]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP2343]], label [[IF_THEN2345:%.*]], label [[IF_END2346:%.*]]
+// SIMD-ONLY0:       if.then2345:
+// SIMD-ONLY0-NEXT:    [[TMP1452:%.*]] = load i16, ptr [[USD]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP1452]], ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    br label [[IF_END2346]]
+// SIMD-ONLY0:       if.end2346:
+// SIMD-ONLY0-NEXT:    [[TMP1453:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP1453]], ptr [[USV]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP1454:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2347:%.*]] = zext i16 [[TMP1454]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP1455:%.*]] = load i16, ptr [[USE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2348:%.*]] = zext i16 [[TMP1455]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP2349:%.*]] = icmp eq i32 [[CONV2347]], [[CONV2348]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP2349]], label [[IF_THEN2351:%.*]], label [[IF_ELSE2352:%.*]]
+// SIMD-ONLY0:       if.then2351:
+// SIMD-ONLY0-NEXT:    [[TMP1456:%.*]] = load i16, ptr [[USD]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP1456]], ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    br label [[IF_END2353:%.*]]
+// SIMD-ONLY0:       if.else2352:
+// SIMD-ONLY0-NEXT:    [[TMP1457:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP1457]], ptr [[USV]], align 2
+// SIMD-ONLY0-NEXT:    br label [[IF_END2353]]
+// SIMD-ONLY0:       if.end2353:
+// SIMD-ONLY0-NEXT:    [[TMP1458:%.*]] = load i16, ptr [[USE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2354:%.*]] = zext i16 [[TMP1458]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP1459:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2355:%.*]] = zext i16 [[TMP1459]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP2356:%.*]] = icmp eq i32 [[CONV2354]], [[CONV2355]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP2356]], label [[IF_THEN2358:%.*]], label [[IF_ELSE2359:%.*]]
+// SIMD-ONLY0:       if.then2358:
+// SIMD-ONLY0-NEXT:    [[TMP1460:%.*]] = load i16, ptr [[USD]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP1460]], ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    br label [[IF_END2360:%.*]]
+// SIMD-ONLY0:       if.else2359:
+// SIMD-ONLY0-NEXT:    [[TMP1461:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP1461]], ptr [[USV]], align 2
+// SIMD-ONLY0-NEXT:    br label [[IF_END2360]]
+// SIMD-ONLY0:       if.end2360:
+// SIMD-ONLY0-NEXT:    [[TMP1462:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2361:%.*]] = zext i16 [[TMP1462]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP1463:%.*]] = load i16, ptr [[USE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2362:%.*]] = zext i16 [[TMP1463]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP2363:%.*]] = icmp eq i32 [[CONV2361]], [[CONV2362]]
+// SIMD-ONLY0-NEXT:    [[CONV2364:%.*]] = zext i1 [[CMP2363]] to i32
+// SIMD-ONLY0-NEXT:    [[CONV2365:%.*]] = trunc i32 [[CONV2364]] to i16
+// SIMD-ONLY0-NEXT:    store i16 [[CONV2365]], ptr [[USR]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP1464:%.*]] = load i16, ptr [[USR]], align 2
+// SIMD-ONLY0-NEXT:    [[TOBOOL2366:%.*]] = icmp ne i16 [[TMP1464]], 0
+// SIMD-ONLY0-NEXT:    br i1 [[TOBOOL2366]], label [[IF_THEN2367:%.*]], label [[IF_END2368:%.*]]
+// SIMD-ONLY0:       if.then2367:
+// SIMD-ONLY0-NEXT:    [[TMP1465:%.*]] = load i16, ptr [[USD]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP1465]], ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    br label [[IF_END2368]]
+// SIMD-ONLY0:       if.end2368:
+// SIMD-ONLY0-NEXT:    [[TMP1466:%.*]] = load i16, ptr [[USE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2369:%.*]] = zext i16 [[TMP1466]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP1467:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2370:%.*]] = zext i16 [[TMP1467]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP2371:%.*]] = icmp eq i32 [[CONV2369]], [[CONV2370]]
+// SIMD-ONLY0-NEXT:    [[CONV2372:%.*]] = zext i1 [[CMP2371]] to i32
+// SIMD-ONLY0-NEXT:    [[CONV2373:%.*]] = trunc i32 [[CONV2372]] to i16
+// SIMD-ONLY0-NEXT:    store i16 [[CONV2373]], ptr [[USR]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP1468:%.*]] = load i16, ptr [[USR]], align 2
+// SIMD-ONLY0-NEXT:    [[TOBOOL2374:%.*]] = icmp ne i16 [[TMP1468]], 0
+// SIMD-ONLY0-NEXT:    br i1 [[TOBOOL2374]], label [[IF_THEN2375:%.*]], label [[IF_END2376:%.*]]
+// SIMD-ONLY0:       if.then2375:
+// SIMD-ONLY0-NEXT:    [[TMP1469:%.*]] = load i16, ptr [[USD]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP1469]], ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    br label [[IF_END2376]]
+// SIMD-ONLY0:       if.end2376:
+// SIMD-ONLY0-NEXT:    [[TMP1470:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2377:%.*]] = zext i16 [[TMP1470]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP1471:%.*]] = load i16, ptr [[USE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2378:%.*]] = zext i16 [[TMP1471]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP2379:%.*]] = icmp eq i32 [[CONV2377]], [[CONV2378]]
+// SIMD-ONLY0-NEXT:    [[CONV2380:%.*]] = zext i1 [[CMP2379]] to i32
+// SIMD-ONLY0-NEXT:    [[CONV2381:%.*]] = trunc i32 [[CONV2380]] to i16
+// SIMD-ONLY0-NEXT:    store i16 [[CONV2381]], ptr [[USR]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP1472:%.*]] = load i16, ptr [[USR]], align 2
+// SIMD-ONLY0-NEXT:    [[TOBOOL2382:%.*]] = icmp ne i16 [[TMP1472]], 0
+// SIMD-ONLY0-NEXT:    br i1 [[TOBOOL2382]], label [[IF_THEN2383:%.*]], label [[IF_ELSE2384:%.*]]
+// SIMD-ONLY0:       if.then2383:
+// SIMD-ONLY0-NEXT:    [[TMP1473:%.*]] = load i16, ptr [[USD]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP1473]], ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    br label [[IF_END2385:%.*]]
+// SIMD-ONLY0:       if.else2384:
+// SIMD-ONLY0-NEXT:    [[TMP1474:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP1474]], ptr [[USV]], align 2
+// SIMD-ONLY0-NEXT:    br label [[IF_END2385]]
+// SIMD-ONLY0:       if.end2385:
+// SIMD-ONLY0-NEXT:    [[TMP1475:%.*]] = load i16, ptr [[USE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2386:%.*]] = zext i16 [[TMP1475]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP1476:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2387:%.*]] = zext i16 [[TMP1476]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP2388:%.*]] = icmp eq i32 [[CONV2386]], [[CONV2387]]
+// SIMD-ONLY0-NEXT:    [[CONV2389:%.*]] = zext i1 [[CMP2388]] to i32
+// SIMD-ONLY0-NEXT:    [[CONV2390:%.*]] = trunc i32 [[CONV2389]] to i16
+// SIMD-ONLY0-NEXT:    store i16 [[CONV2390]], ptr [[USR]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP1477:%.*]] = load i16, ptr [[USR]], align 2
+// SIMD-ONLY0-NEXT:    [[TOBOOL2391:%.*]] = icmp ne i16 [[TMP1477]], 0
+// SIMD-ONLY0-NEXT:    br i1 [[TOBOOL2391]], label [[IF_THEN2392:%.*]], label [[IF_ELSE2393:%.*]]
+// SIMD-ONLY0:       if.then2392:
+// SIMD-ONLY0-NEXT:    [[TMP1478:%.*]] = load i16, ptr [[USD]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP1478]], ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    br label [[IF_END2394:%.*]]
+// SIMD-ONLY0:       if.else2393:
+// SIMD-ONLY0-NEXT:    [[TMP1479:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP1479]], ptr [[USV]], align 2
+// SIMD-ONLY0-NEXT:    br label [[IF_END2394]]
+// SIMD-ONLY0:       if.end2394:
+// SIMD-ONLY0-NEXT:    [[TMP1480:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP1480]], ptr [[USV]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP1481:%.*]] = load i16, ptr [[USE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2395:%.*]] = zext i16 [[TMP1481]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP1482:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2396:%.*]] = zext i16 [[TMP1482]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP2397:%.*]] = icmp sgt i32 [[CONV2395]], [[CONV2396]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP2397]], label [[IF_THEN2399:%.*]], label [[IF_END2400:%.*]]
+// SIMD-ONLY0:       if.then2399:
+// SIMD-ONLY0-NEXT:    [[TMP1483:%.*]] = load i16, ptr [[USE]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP1483]], ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    br label [[IF_END2400]]
+// SIMD-ONLY0:       if.end2400:
+// SIMD-ONLY0-NEXT:    [[TMP1484:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP1484]], ptr [[USV]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP1485:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2401:%.*]] = zext i16 [[TMP1485]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP1486:%.*]] = load i16, ptr [[USE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2402:%.*]] = zext i16 [[TMP1486]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP2403:%.*]] = icmp sgt i32 [[CONV2401]], [[CONV2402]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP2403]], label [[IF_THEN2405:%.*]], label [[IF_END2406:%.*]]
+// SIMD-ONLY0:       if.then2405:
+// SIMD-ONLY0-NEXT:    [[TMP1487:%.*]] = load i16, ptr [[USE]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP1487]], ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    br label [[IF_END2406]]
+// SIMD-ONLY0:       if.end2406:
+// SIMD-ONLY0-NEXT:    [[TMP1488:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP1488]], ptr [[USV]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP1489:%.*]] = load i16, ptr [[USE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2407:%.*]] = zext i16 [[TMP1489]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP1490:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2408:%.*]] = zext i16 [[TMP1490]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP2409:%.*]] = icmp slt i32 [[CONV2407]], [[CONV2408]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP2409]], label [[IF_THEN2411:%.*]], label [[IF_END2412:%.*]]
+// SIMD-ONLY0:       if.then2411:
+// SIMD-ONLY0-NEXT:    [[TMP1491:%.*]] = load i16, ptr [[USE]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP1491]], ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    br label [[IF_END2412]]
+// SIMD-ONLY0:       if.end2412:
+// SIMD-ONLY0-NEXT:    [[TMP1492:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP1492]], ptr [[USV]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP1493:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2413:%.*]] = zext i16 [[TMP1493]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP1494:%.*]] = load i16, ptr [[USE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2414:%.*]] = zext i16 [[TMP1494]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP2415:%.*]] = icmp slt i32 [[CONV2413]], [[CONV2414]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP2415]], label [[IF_THEN2417:%.*]], label [[IF_END2418:%.*]]
+// SIMD-ONLY0:       if.then2417:
+// SIMD-ONLY0-NEXT:    [[TMP1495:%.*]] = load i16, ptr [[USE]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP1495]], ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    br label [[IF_END2418]]
+// SIMD-ONLY0:       if.end2418:
+// SIMD-ONLY0-NEXT:    [[TMP1496:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP1496]], ptr [[USV]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP1497:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2419:%.*]] = zext i16 [[TMP1497]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP1498:%.*]] = load i16, ptr [[USE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2420:%.*]] = zext i16 [[TMP1498]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP2421:%.*]] = icmp eq i32 [[CONV2419]], [[CONV2420]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP2421]], label [[IF_THEN2423:%.*]], label [[IF_END2424:%.*]]
+// SIMD-ONLY0:       if.then2423:
+// SIMD-ONLY0-NEXT:    [[TMP1499:%.*]] = load i16, ptr [[USD]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP1499]], ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    br label [[IF_END2424]]
+// SIMD-ONLY0:       if.end2424:
+// SIMD-ONLY0-NEXT:    [[TMP1500:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP1500]], ptr [[USV]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP1501:%.*]] = load i16, ptr [[USE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2425:%.*]] = zext i16 [[TMP1501]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP1502:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2426:%.*]] = zext i16 [[TMP1502]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP2427:%.*]] = icmp eq i32 [[CONV2425]], [[CONV2426]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP2427]], label [[IF_THEN2429:%.*]], label [[IF_END2430:%.*]]
+// SIMD-ONLY0:       if.then2429:
+// SIMD-ONLY0-NEXT:    [[TMP1503:%.*]] = load i16, ptr [[USD]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP1503]], ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    br label [[IF_END2430]]
+// SIMD-ONLY0:       if.end2430:
+// SIMD-ONLY0-NEXT:    [[TMP1504:%.*]] = load i16, ptr [[USE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2431:%.*]] = zext i16 [[TMP1504]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP1505:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2432:%.*]] = zext i16 [[TMP1505]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP2433:%.*]] = icmp sgt i32 [[CONV2431]], [[CONV2432]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP2433]], label [[IF_THEN2435:%.*]], label [[IF_END2436:%.*]]
+// SIMD-ONLY0:       if.then2435:
+// SIMD-ONLY0-NEXT:    [[TMP1506:%.*]] = load i16, ptr [[USE]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP1506]], ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    br label [[IF_END2436]]
+// SIMD-ONLY0:       if.end2436:
+// SIMD-ONLY0-NEXT:    [[TMP1507:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP1507]], ptr [[USV]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP1508:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2437:%.*]] = zext i16 [[TMP1508]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP1509:%.*]] = load i16, ptr [[USE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2438:%.*]] = zext i16 [[TMP1509]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP2439:%.*]] = icmp sgt i32 [[CONV2437]], [[CONV2438]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP2439]], label [[IF_THEN2441:%.*]], label [[IF_END2442:%.*]]
+// SIMD-ONLY0:       if.then2441:
+// SIMD-ONLY0-NEXT:    [[TMP1510:%.*]] = load i16, ptr [[USE]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP1510]], ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    br label [[IF_END2442]]
+// SIMD-ONLY0:       if.end2442:
+// SIMD-ONLY0-NEXT:    [[TMP1511:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP1511]], ptr [[USV]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP1512:%.*]] = load i16, ptr [[USE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2443:%.*]] = zext i16 [[TMP1512]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP1513:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2444:%.*]] = zext i16 [[TMP1513]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP2445:%.*]] = icmp slt i32 [[CONV2443]], [[CONV2444]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP2445]], label [[IF_THEN2447:%.*]], label [[IF_END2448:%.*]]
+// SIMD-ONLY0:       if.then2447:
+// SIMD-ONLY0-NEXT:    [[TMP1514:%.*]] = load i16, ptr [[USE]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP1514]], ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    br label [[IF_END2448]]
+// SIMD-ONLY0:       if.end2448:
+// SIMD-ONLY0-NEXT:    [[TMP1515:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP1515]], ptr [[USV]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP1516:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2449:%.*]] = zext i16 [[TMP1516]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP1517:%.*]] = load i16, ptr [[USE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2450:%.*]] = zext i16 [[TMP1517]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP2451:%.*]] = icmp slt i32 [[CONV2449]], [[CONV2450]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP2451]], label [[IF_THEN2453:%.*]], label [[IF_END2454:%.*]]
+// SIMD-ONLY0:       if.then2453:
+// SIMD-ONLY0-NEXT:    [[TMP1518:%.*]] = load i16, ptr [[USE]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP1518]], ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    br label [[IF_END2454]]
+// SIMD-ONLY0:       if.end2454:
+// SIMD-ONLY0-NEXT:    [[TMP1519:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP1519]], ptr [[USV]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP1520:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2455:%.*]] = zext i16 [[TMP1520]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP1521:%.*]] = load i16, ptr [[USE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2456:%.*]] = zext i16 [[TMP1521]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP2457:%.*]] = icmp eq i32 [[CONV2455]], [[CONV2456]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP2457]], label [[IF_THEN2459:%.*]], label [[IF_END2460:%.*]]
+// SIMD-ONLY0:       if.then2459:
+// SIMD-ONLY0-NEXT:    [[TMP1522:%.*]] = load i16, ptr [[USD]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP1522]], ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    br label [[IF_END2460]]
+// SIMD-ONLY0:       if.end2460:
+// SIMD-ONLY0-NEXT:    [[TMP1523:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP1523]], ptr [[USV]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP1524:%.*]] = load i16, ptr [[USE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2461:%.*]] = zext i16 [[TMP1524]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP1525:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2462:%.*]] = zext i16 [[TMP1525]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP2463:%.*]] = icmp eq i32 [[CONV2461]], [[CONV2462]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP2463]], label [[IF_THEN2465:%.*]], label [[IF_END2466:%.*]]
+// SIMD-ONLY0:       if.then2465:
+// SIMD-ONLY0-NEXT:    [[TMP1526:%.*]] = load i16, ptr [[USD]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP1526]], ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    br label [[IF_END2466]]
+// SIMD-ONLY0:       if.end2466:
+// SIMD-ONLY0-NEXT:    [[TMP1527:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP1527]], ptr [[USV]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP1528:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2467:%.*]] = zext i16 [[TMP1528]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP1529:%.*]] = load i16, ptr [[USE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2468:%.*]] = zext i16 [[TMP1529]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP2469:%.*]] = icmp eq i32 [[CONV2467]], [[CONV2468]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP2469]], label [[IF_THEN2471:%.*]], label [[IF_ELSE2472:%.*]]
+// SIMD-ONLY0:       if.then2471:
+// SIMD-ONLY0-NEXT:    [[TMP1530:%.*]] = load i16, ptr [[USD]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP1530]], ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    br label [[IF_END2473:%.*]]
+// SIMD-ONLY0:       if.else2472:
+// SIMD-ONLY0-NEXT:    [[TMP1531:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP1531]], ptr [[USV]], align 2
+// SIMD-ONLY0-NEXT:    br label [[IF_END2473]]
+// SIMD-ONLY0:       if.end2473:
+// SIMD-ONLY0-NEXT:    [[TMP1532:%.*]] = load i16, ptr [[USE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2474:%.*]] = zext i16 [[TMP1532]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP1533:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2475:%.*]] = zext i16 [[TMP1533]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP2476:%.*]] = icmp eq i32 [[CONV2474]], [[CONV2475]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP2476]], label [[IF_THEN2478:%.*]], label [[IF_ELSE2479:%.*]]
+// SIMD-ONLY0:       if.then2478:
+// SIMD-ONLY0-NEXT:    [[TMP1534:%.*]] = load i16, ptr [[USD]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP1534]], ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    br label [[IF_END2480:%.*]]
+// SIMD-ONLY0:       if.else2479:
+// SIMD-ONLY0-NEXT:    [[TMP1535:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP1535]], ptr [[USV]], align 2
+// SIMD-ONLY0-NEXT:    br label [[IF_END2480]]
+// SIMD-ONLY0:       if.end2480:
+// SIMD-ONLY0-NEXT:    [[TMP1536:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2481:%.*]] = zext i16 [[TMP1536]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP1537:%.*]] = load i16, ptr [[USE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2482:%.*]] = zext i16 [[TMP1537]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP2483:%.*]] = icmp eq i32 [[CONV2481]], [[CONV2482]]
+// SIMD-ONLY0-NEXT:    [[CONV2484:%.*]] = zext i1 [[CMP2483]] to i32
+// SIMD-ONLY0-NEXT:    [[CONV2485:%.*]] = trunc i32 [[CONV2484]] to i16
+// SIMD-ONLY0-NEXT:    store i16 [[CONV2485]], ptr [[USR]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP1538:%.*]] = load i16, ptr [[USR]], align 2
+// SIMD-ONLY0-NEXT:    [[TOBOOL2486:%.*]] = icmp ne i16 [[TMP1538]], 0
+// SIMD-ONLY0-NEXT:    br i1 [[TOBOOL2486]], label [[IF_THEN2487:%.*]], label [[IF_END2488:%.*]]
+// SIMD-ONLY0:       if.then2487:
+// SIMD-ONLY0-NEXT:    [[TMP1539:%.*]] = load i16, ptr [[USD]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP1539]], ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    br label [[IF_END2488]]
+// SIMD-ONLY0:       if.end2488:
+// SIMD-ONLY0-NEXT:    [[TMP1540:%.*]] = load i16, ptr [[USE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2489:%.*]] = zext i16 [[TMP1540]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP1541:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2490:%.*]] = zext i16 [[TMP1541]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP2491:%.*]] = icmp eq i32 [[CONV2489]], [[CONV2490]]
+// SIMD-ONLY0-NEXT:    [[CONV2492:%.*]] = zext i1 [[CMP2491]] to i32
+// SIMD-ONLY0-NEXT:    [[CONV2493:%.*]] = trunc i32 [[CONV2492]] to i16
+// SIMD-ONLY0-NEXT:    store i16 [[CONV2493]], ptr [[USR]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP1542:%.*]] = load i16, ptr [[USR]], align 2
+// SIMD-ONLY0-NEXT:    [[TOBOOL2494:%.*]] = icmp ne i16 [[TMP1542]], 0
+// SIMD-ONLY0-NEXT:    br i1 [[TOBOOL2494]], label [[IF_THEN2495:%.*]], label [[IF_END2496:%.*]]
+// SIMD-ONLY0:       if.then2495:
+// SIMD-ONLY0-NEXT:    [[TMP1543:%.*]] = load i16, ptr [[USD]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP1543]], ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    br label [[IF_END2496]]
+// SIMD-ONLY0:       if.end2496:
+// SIMD-ONLY0-NEXT:    [[TMP1544:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2497:%.*]] = zext i16 [[TMP1544]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP1545:%.*]] = load i16, ptr [[USE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2498:%.*]] = zext i16 [[TMP1545]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP2499:%.*]] = icmp eq i32 [[CONV2497]], [[CONV2498]]
+// SIMD-ONLY0-NEXT:    [[CONV2500:%.*]] = zext i1 [[CMP2499]] to i32
+// SIMD-ONLY0-NEXT:    [[CONV2501:%.*]] = trunc i32 [[CONV2500]] to i16
+// SIMD-ONLY0-NEXT:    store i16 [[CONV2501]], ptr [[USR]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP1546:%.*]] = load i16, ptr [[USR]], align 2
+// SIMD-ONLY0-NEXT:    [[TOBOOL2502:%.*]] = icmp ne i16 [[TMP1546]], 0
+// SIMD-ONLY0-NEXT:    br i1 [[TOBOOL2502]], label [[IF_THEN2503:%.*]], label [[IF_ELSE2504:%.*]]
+// SIMD-ONLY0:       if.then2503:
+// SIMD-ONLY0-NEXT:    [[TMP1547:%.*]] = load i16, ptr [[USD]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP1547]], ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    br label [[IF_END2505:%.*]]
+// SIMD-ONLY0:       if.else2504:
+// SIMD-ONLY0-NEXT:    [[TMP1548:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP1548]], ptr [[USV]], align 2
+// SIMD-ONLY0-NEXT:    br label [[IF_END2505]]
+// SIMD-ONLY0:       if.end2505:
+// SIMD-ONLY0-NEXT:    [[TMP1549:%.*]] = load i16, ptr [[USE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2506:%.*]] = zext i16 [[TMP1549]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP1550:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2507:%.*]] = zext i16 [[TMP1550]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP2508:%.*]] = icmp eq i32 [[CONV2506]], [[CONV2507]]
+// SIMD-ONLY0-NEXT:    [[CONV2509:%.*]] = zext i1 [[CMP2508]] to i32
+// SIMD-ONLY0-NEXT:    [[CONV2510:%.*]] = trunc i32 [[CONV2509]] to i16
+// SIMD-ONLY0-NEXT:    store i16 [[CONV2510]], ptr [[USR]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP1551:%.*]] = load i16, ptr [[USR]], align 2
+// SIMD-ONLY0-NEXT:    [[TOBOOL2511:%.*]] = icmp ne i16 [[TMP1551]], 0
+// SIMD-ONLY0-NEXT:    br i1 [[TOBOOL2511]], label [[IF_THEN2512:%.*]], label [[IF_ELSE2513:%.*]]
+// SIMD-ONLY0:       if.then2512:
+// SIMD-ONLY0-NEXT:    [[TMP1552:%.*]] = load i16, ptr [[USD]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP1552]], ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    br label [[IF_END2514:%.*]]
+// SIMD-ONLY0:       if.else2513:
+// SIMD-ONLY0-NEXT:    [[TMP1553:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP1553]], ptr [[USV]], align 2
+// SIMD-ONLY0-NEXT:    br label [[IF_END2514]]
+// SIMD-ONLY0:       if.end2514:
+// SIMD-ONLY0-NEXT:    [[TMP1554:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP1554]], ptr [[USV]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP1555:%.*]] = load i16, ptr [[USE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2515:%.*]] = zext i16 [[TMP1555]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP1556:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2516:%.*]] = zext i16 [[TMP1556]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP2517:%.*]] = icmp sgt i32 [[CONV2515]], [[CONV2516]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP2517]], label [[IF_THEN2519:%.*]], label [[IF_END2520:%.*]]
+// SIMD-ONLY0:       if.then2519:
+// SIMD-ONLY0-NEXT:    [[TMP1557:%.*]] = load i16, ptr [[USE]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP1557]], ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    br label [[IF_END2520]]
+// SIMD-ONLY0:       if.end2520:
+// SIMD-ONLY0-NEXT:    [[TMP1558:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP1558]], ptr [[USV]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP1559:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2521:%.*]] = zext i16 [[TMP1559]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP1560:%.*]] = load i16, ptr [[USE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2522:%.*]] = zext i16 [[TMP1560]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP2523:%.*]] = icmp sgt i32 [[CONV2521]], [[CONV2522]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP2523]], label [[IF_THEN2525:%.*]], label [[IF_END2526:%.*]]
+// SIMD-ONLY0:       if.then2525:
+// SIMD-ONLY0-NEXT:    [[TMP1561:%.*]] = load i16, ptr [[USE]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP1561]], ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    br label [[IF_END2526]]
+// SIMD-ONLY0:       if.end2526:
+// SIMD-ONLY0-NEXT:    [[TMP1562:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP1562]], ptr [[USV]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP1563:%.*]] = load i16, ptr [[USE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2527:%.*]] = zext i16 [[TMP1563]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP1564:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2528:%.*]] = zext i16 [[TMP1564]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP2529:%.*]] = icmp slt i32 [[CONV2527]], [[CONV2528]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP2529]], label [[IF_THEN2531:%.*]], label [[IF_END2532:%.*]]
+// SIMD-ONLY0:       if.then2531:
+// SIMD-ONLY0-NEXT:    [[TMP1565:%.*]] = load i16, ptr [[USE]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP1565]], ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    br label [[IF_END2532]]
+// SIMD-ONLY0:       if.end2532:
+// SIMD-ONLY0-NEXT:    [[TMP1566:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP1566]], ptr [[USV]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP1567:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2533:%.*]] = zext i16 [[TMP1567]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP1568:%.*]] = load i16, ptr [[USE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2534:%.*]] = zext i16 [[TMP1568]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP2535:%.*]] = icmp slt i32 [[CONV2533]], [[CONV2534]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP2535]], label [[IF_THEN2537:%.*]], label [[IF_END2538:%.*]]
+// SIMD-ONLY0:       if.then2537:
+// SIMD-ONLY0-NEXT:    [[TMP1569:%.*]] = load i16, ptr [[USE]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP1569]], ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    br label [[IF_END2538]]
+// SIMD-ONLY0:       if.end2538:
+// SIMD-ONLY0-NEXT:    [[TMP1570:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP1570]], ptr [[USV]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP1571:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2539:%.*]] = zext i16 [[TMP1571]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP1572:%.*]] = load i16, ptr [[USE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2540:%.*]] = zext i16 [[TMP1572]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP2541:%.*]] = icmp eq i32 [[CONV2539]], [[CONV2540]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP2541]], label [[IF_THEN2543:%.*]], label [[IF_END2544:%.*]]
+// SIMD-ONLY0:       if.then2543:
+// SIMD-ONLY0-NEXT:    [[TMP1573:%.*]] = load i16, ptr [[USD]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP1573]], ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    br label [[IF_END2544]]
+// SIMD-ONLY0:       if.end2544:
+// SIMD-ONLY0-NEXT:    [[TMP1574:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP1574]], ptr [[USV]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP1575:%.*]] = load i16, ptr [[USE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2545:%.*]] = zext i16 [[TMP1575]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP1576:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2546:%.*]] = zext i16 [[TMP1576]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP2547:%.*]] = icmp eq i32 [[CONV2545]], [[CONV2546]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP2547]], label [[IF_THEN2549:%.*]], label [[IF_END2550:%.*]]
+// SIMD-ONLY0:       if.then2549:
+// SIMD-ONLY0-NEXT:    [[TMP1577:%.*]] = load i16, ptr [[USD]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP1577]], ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    br label [[IF_END2550]]
+// SIMD-ONLY0:       if.end2550:
+// SIMD-ONLY0-NEXT:    [[TMP1578:%.*]] = load i16, ptr [[USE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2551:%.*]] = zext i16 [[TMP1578]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP1579:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2552:%.*]] = zext i16 [[TMP1579]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP2553:%.*]] = icmp sgt i32 [[CONV2551]], [[CONV2552]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP2553]], label [[IF_THEN2555:%.*]], label [[IF_END2556:%.*]]
+// SIMD-ONLY0:       if.then2555:
+// SIMD-ONLY0-NEXT:    [[TMP1580:%.*]] = load i16, ptr [[USE]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP1580]], ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    br label [[IF_END2556]]
+// SIMD-ONLY0:       if.end2556:
+// SIMD-ONLY0-NEXT:    [[TMP1581:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP1581]], ptr [[USV]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP1582:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2557:%.*]] = zext i16 [[TMP1582]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP1583:%.*]] = load i16, ptr [[USE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2558:%.*]] = zext i16 [[TMP1583]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP2559:%.*]] = icmp sgt i32 [[CONV2557]], [[CONV2558]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP2559]], label [[IF_THEN2561:%.*]], label [[IF_END2562:%.*]]
+// SIMD-ONLY0:       if.then2561:
+// SIMD-ONLY0-NEXT:    [[TMP1584:%.*]] = load i16, ptr [[USE]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP1584]], ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    br label [[IF_END2562]]
+// SIMD-ONLY0:       if.end2562:
+// SIMD-ONLY0-NEXT:    [[TMP1585:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP1585]], ptr [[USV]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP1586:%.*]] = load i16, ptr [[USE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2563:%.*]] = zext i16 [[TMP1586]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP1587:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2564:%.*]] = zext i16 [[TMP1587]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP2565:%.*]] = icmp slt i32 [[CONV2563]], [[CONV2564]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP2565]], label [[IF_THEN2567:%.*]], label [[IF_END2568:%.*]]
+// SIMD-ONLY0:       if.then2567:
+// SIMD-ONLY0-NEXT:    [[TMP1588:%.*]] = load i16, ptr [[USE]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP1588]], ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    br label [[IF_END2568]]
+// SIMD-ONLY0:       if.end2568:
+// SIMD-ONLY0-NEXT:    [[TMP1589:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP1589]], ptr [[USV]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP1590:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2569:%.*]] = zext i16 [[TMP1590]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP1591:%.*]] = load i16, ptr [[USE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2570:%.*]] = zext i16 [[TMP1591]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP2571:%.*]] = icmp slt i32 [[CONV2569]], [[CONV2570]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP2571]], label [[IF_THEN2573:%.*]], label [[IF_END2574:%.*]]
+// SIMD-ONLY0:       if.then2573:
+// SIMD-ONLY0-NEXT:    [[TMP1592:%.*]] = load i16, ptr [[USE]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP1592]], ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    br label [[IF_END2574]]
+// SIMD-ONLY0:       if.end2574:
+// SIMD-ONLY0-NEXT:    [[TMP1593:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP1593]], ptr [[USV]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP1594:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2575:%.*]] = zext i16 [[TMP1594]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP1595:%.*]] = load i16, ptr [[USE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2576:%.*]] = zext i16 [[TMP1595]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP2577:%.*]] = icmp eq i32 [[CONV2575]], [[CONV2576]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP2577]], label [[IF_THEN2579:%.*]], label [[IF_END2580:%.*]]
+// SIMD-ONLY0:       if.then2579:
+// SIMD-ONLY0-NEXT:    [[TMP1596:%.*]] = load i16, ptr [[USD]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP1596]], ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    br label [[IF_END2580]]
+// SIMD-ONLY0:       if.end2580:
+// SIMD-ONLY0-NEXT:    [[TMP1597:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP1597]], ptr [[USV]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP1598:%.*]] = load i16, ptr [[USE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2581:%.*]] = zext i16 [[TMP1598]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP1599:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2582:%.*]] = zext i16 [[TMP1599]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP2583:%.*]] = icmp eq i32 [[CONV2581]], [[CONV2582]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP2583]], label [[IF_THEN2585:%.*]], label [[IF_END2586:%.*]]
+// SIMD-ONLY0:       if.then2585:
+// SIMD-ONLY0-NEXT:    [[TMP1600:%.*]] = load i16, ptr [[USD]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP1600]], ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    br label [[IF_END2586]]
+// SIMD-ONLY0:       if.end2586:
+// SIMD-ONLY0-NEXT:    [[TMP1601:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP1601]], ptr [[USV]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP1602:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2587:%.*]] = zext i16 [[TMP1602]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP1603:%.*]] = load i16, ptr [[USE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2588:%.*]] = zext i16 [[TMP1603]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP2589:%.*]] = icmp eq i32 [[CONV2587]], [[CONV2588]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP2589]], label [[IF_THEN2591:%.*]], label [[IF_ELSE2592:%.*]]
+// SIMD-ONLY0:       if.then2591:
+// SIMD-ONLY0-NEXT:    [[TMP1604:%.*]] = load i16, ptr [[USD]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP1604]], ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    br label [[IF_END2593:%.*]]
+// SIMD-ONLY0:       if.else2592:
+// SIMD-ONLY0-NEXT:    [[TMP1605:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP1605]], ptr [[USV]], align 2
+// SIMD-ONLY0-NEXT:    br label [[IF_END2593]]
+// SIMD-ONLY0:       if.end2593:
+// SIMD-ONLY0-NEXT:    [[TMP1606:%.*]] = load i16, ptr [[USE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2594:%.*]] = zext i16 [[TMP1606]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP1607:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2595:%.*]] = zext i16 [[TMP1607]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP2596:%.*]] = icmp eq i32 [[CONV2594]], [[CONV2595]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP2596]], label [[IF_THEN2598:%.*]], label [[IF_ELSE2599:%.*]]
+// SIMD-ONLY0:       if.then2598:
+// SIMD-ONLY0-NEXT:    [[TMP1608:%.*]] = load i16, ptr [[USD]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP1608]], ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    br label [[IF_END2600:%.*]]
+// SIMD-ONLY0:       if.else2599:
+// SIMD-ONLY0-NEXT:    [[TMP1609:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP1609]], ptr [[USV]], align 2
+// SIMD-ONLY0-NEXT:    br label [[IF_END2600]]
+// SIMD-ONLY0:       if.end2600:
+// SIMD-ONLY0-NEXT:    [[TMP1610:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2601:%.*]] = zext i16 [[TMP1610]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP1611:%.*]] = load i16, ptr [[USE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2602:%.*]] = zext i16 [[TMP1611]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP2603:%.*]] = icmp eq i32 [[CONV2601]], [[CONV2602]]
+// SIMD-ONLY0-NEXT:    [[CONV2604:%.*]] = zext i1 [[CMP2603]] to i32
+// SIMD-ONLY0-NEXT:    [[CONV2605:%.*]] = trunc i32 [[CONV2604]] to i16
+// SIMD-ONLY0-NEXT:    store i16 [[CONV2605]], ptr [[USR]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP1612:%.*]] = load i16, ptr [[USR]], align 2
+// SIMD-ONLY0-NEXT:    [[TOBOOL2606:%.*]] = icmp ne i16 [[TMP1612]], 0
+// SIMD-ONLY0-NEXT:    br i1 [[TOBOOL2606]], label [[IF_THEN2607:%.*]], label [[IF_END2608:%.*]]
+// SIMD-ONLY0:       if.then2607:
+// SIMD-ONLY0-NEXT:    [[TMP1613:%.*]] = load i16, ptr [[USD]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP1613]], ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    br label [[IF_END2608]]
+// SIMD-ONLY0:       if.end2608:
+// SIMD-ONLY0-NEXT:    [[TMP1614:%.*]] = load i16, ptr [[USE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2609:%.*]] = zext i16 [[TMP1614]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP1615:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2610:%.*]] = zext i16 [[TMP1615]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP2611:%.*]] = icmp eq i32 [[CONV2609]], [[CONV2610]]
+// SIMD-ONLY0-NEXT:    [[CONV2612:%.*]] = zext i1 [[CMP2611]] to i32
+// SIMD-ONLY0-NEXT:    [[CONV2613:%.*]] = trunc i32 [[CONV2612]] to i16
+// SIMD-ONLY0-NEXT:    store i16 [[CONV2613]], ptr [[USR]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP1616:%.*]] = load i16, ptr [[USR]], align 2
+// SIMD-ONLY0-NEXT:    [[TOBOOL2614:%.*]] = icmp ne i16 [[TMP1616]], 0
+// SIMD-ONLY0-NEXT:    br i1 [[TOBOOL2614]], label [[IF_THEN2615:%.*]], label [[IF_END2616:%.*]]
+// SIMD-ONLY0:       if.then2615:
+// SIMD-ONLY0-NEXT:    [[TMP1617:%.*]] = load i16, ptr [[USD]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP1617]], ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    br label [[IF_END2616]]
+// SIMD-ONLY0:       if.end2616:
+// SIMD-ONLY0-NEXT:    [[TMP1618:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2617:%.*]] = zext i16 [[TMP1618]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP1619:%.*]] = load i16, ptr [[USE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2618:%.*]] = zext i16 [[TMP1619]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP2619:%.*]] = icmp eq i32 [[CONV2617]], [[CONV2618]]
+// SIMD-ONLY0-NEXT:    [[CONV2620:%.*]] = zext i1 [[CMP2619]] to i32
+// SIMD-ONLY0-NEXT:    [[CONV2621:%.*]] = trunc i32 [[CONV2620]] to i16
+// SIMD-ONLY0-NEXT:    store i16 [[CONV2621]], ptr [[USR]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP1620:%.*]] = load i16, ptr [[USR]], align 2
+// SIMD-ONLY0-NEXT:    [[TOBOOL2622:%.*]] = icmp ne i16 [[TMP1620]], 0
+// SIMD-ONLY0-NEXT:    br i1 [[TOBOOL2622]], label [[IF_THEN2623:%.*]], label [[IF_ELSE2624:%.*]]
+// SIMD-ONLY0:       if.then2623:
+// SIMD-ONLY0-NEXT:    [[TMP1621:%.*]] = load i16, ptr [[USD]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP1621]], ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    br label [[IF_END2625:%.*]]
+// SIMD-ONLY0:       if.else2624:
+// SIMD-ONLY0-NEXT:    [[TMP1622:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP1622]], ptr [[USV]], align 2
+// SIMD-ONLY0-NEXT:    br label [[IF_END2625]]
+// SIMD-ONLY0:       if.end2625:
+// SIMD-ONLY0-NEXT:    [[TMP1623:%.*]] = load i16, ptr [[USE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2626:%.*]] = zext i16 [[TMP1623]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP1624:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2627:%.*]] = zext i16 [[TMP1624]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP2628:%.*]] = icmp eq i32 [[CONV2626]], [[CONV2627]]
+// SIMD-ONLY0-NEXT:    [[CONV2629:%.*]] = zext i1 [[CMP2628]] to i32
+// SIMD-ONLY0-NEXT:    [[CONV2630:%.*]] = trunc i32 [[CONV2629]] to i16
+// SIMD-ONLY0-NEXT:    store i16 [[CONV2630]], ptr [[USR]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP1625:%.*]] = load i16, ptr [[USR]], align 2
+// SIMD-ONLY0-NEXT:    [[TOBOOL2631:%.*]] = icmp ne i16 [[TMP1625]], 0
+// SIMD-ONLY0-NEXT:    br i1 [[TOBOOL2631]], label [[IF_THEN2632:%.*]], label [[IF_ELSE2633:%.*]]
+// SIMD-ONLY0:       if.then2632:
+// SIMD-ONLY0-NEXT:    [[TMP1626:%.*]] = load i16, ptr [[USD]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP1626]], ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    br label [[IF_END2634:%.*]]
+// SIMD-ONLY0:       if.else2633:
+// SIMD-ONLY0-NEXT:    [[TMP1627:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP1627]], ptr [[USV]], align 2
+// SIMD-ONLY0-NEXT:    br label [[IF_END2634]]
+// SIMD-ONLY0:       if.end2634:
+// SIMD-ONLY0-NEXT:    [[TMP1628:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP1628]], ptr [[USV]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP1629:%.*]] = load i16, ptr [[USE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2635:%.*]] = zext i16 [[TMP1629]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP1630:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2636:%.*]] = zext i16 [[TMP1630]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP2637:%.*]] = icmp sgt i32 [[CONV2635]], [[CONV2636]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP2637]], label [[IF_THEN2639:%.*]], label [[IF_END2640:%.*]]
+// SIMD-ONLY0:       if.then2639:
+// SIMD-ONLY0-NEXT:    [[TMP1631:%.*]] = load i16, ptr [[USE]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP1631]], ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    br label [[IF_END2640]]
+// SIMD-ONLY0:       if.end2640:
+// SIMD-ONLY0-NEXT:    [[TMP1632:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP1632]], ptr [[USV]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP1633:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2641:%.*]] = zext i16 [[TMP1633]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP1634:%.*]] = load i16, ptr [[USE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2642:%.*]] = zext i16 [[TMP1634]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP2643:%.*]] = icmp sgt i32 [[CONV2641]], [[CONV2642]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP2643]], label [[IF_THEN2645:%.*]], label [[IF_END2646:%.*]]
+// SIMD-ONLY0:       if.then2645:
+// SIMD-ONLY0-NEXT:    [[TMP1635:%.*]] = load i16, ptr [[USE]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP1635]], ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    br label [[IF_END2646]]
+// SIMD-ONLY0:       if.end2646:
+// SIMD-ONLY0-NEXT:    [[TMP1636:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP1636]], ptr [[USV]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP1637:%.*]] = load i16, ptr [[USE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2647:%.*]] = zext i16 [[TMP1637]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP1638:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2648:%.*]] = zext i16 [[TMP1638]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP2649:%.*]] = icmp slt i32 [[CONV2647]], [[CONV2648]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP2649]], label [[IF_THEN2651:%.*]], label [[IF_END2652:%.*]]
+// SIMD-ONLY0:       if.then2651:
+// SIMD-ONLY0-NEXT:    [[TMP1639:%.*]] = load i16, ptr [[USE]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP1639]], ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    br label [[IF_END2652]]
+// SIMD-ONLY0:       if.end2652:
+// SIMD-ONLY0-NEXT:    [[TMP1640:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP1640]], ptr [[USV]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP1641:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2653:%.*]] = zext i16 [[TMP1641]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP1642:%.*]] = load i16, ptr [[USE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2654:%.*]] = zext i16 [[TMP1642]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP2655:%.*]] = icmp slt i32 [[CONV2653]], [[CONV2654]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP2655]], label [[IF_THEN2657:%.*]], label [[IF_END2658:%.*]]
+// SIMD-ONLY0:       if.then2657:
+// SIMD-ONLY0-NEXT:    [[TMP1643:%.*]] = load i16, ptr [[USE]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP1643]], ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    br label [[IF_END2658]]
+// SIMD-ONLY0:       if.end2658:
+// SIMD-ONLY0-NEXT:    [[TMP1644:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP1644]], ptr [[USV]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP1645:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2659:%.*]] = zext i16 [[TMP1645]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP1646:%.*]] = load i16, ptr [[USE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2660:%.*]] = zext i16 [[TMP1646]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP2661:%.*]] = icmp eq i32 [[CONV2659]], [[CONV2660]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP2661]], label [[IF_THEN2663:%.*]], label [[IF_END2664:%.*]]
+// SIMD-ONLY0:       if.then2663:
+// SIMD-ONLY0-NEXT:    [[TMP1647:%.*]] = load i16, ptr [[USD]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP1647]], ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    br label [[IF_END2664]]
+// SIMD-ONLY0:       if.end2664:
+// SIMD-ONLY0-NEXT:    [[TMP1648:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP1648]], ptr [[USV]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP1649:%.*]] = load i16, ptr [[USE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2665:%.*]] = zext i16 [[TMP1649]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP1650:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2666:%.*]] = zext i16 [[TMP1650]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP2667:%.*]] = icmp eq i32 [[CONV2665]], [[CONV2666]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP2667]], label [[IF_THEN2669:%.*]], label [[IF_END2670:%.*]]
+// SIMD-ONLY0:       if.then2669:
+// SIMD-ONLY0-NEXT:    [[TMP1651:%.*]] = load i16, ptr [[USD]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP1651]], ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    br label [[IF_END2670]]
+// SIMD-ONLY0:       if.end2670:
+// SIMD-ONLY0-NEXT:    [[TMP1652:%.*]] = load i16, ptr [[USE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2671:%.*]] = zext i16 [[TMP1652]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP1653:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2672:%.*]] = zext i16 [[TMP1653]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP2673:%.*]] = icmp sgt i32 [[CONV2671]], [[CONV2672]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP2673]], label [[IF_THEN2675:%.*]], label [[IF_END2676:%.*]]
+// SIMD-ONLY0:       if.then2675:
+// SIMD-ONLY0-NEXT:    [[TMP1654:%.*]] = load i16, ptr [[USE]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP1654]], ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    br label [[IF_END2676]]
+// SIMD-ONLY0:       if.end2676:
+// SIMD-ONLY0-NEXT:    [[TMP1655:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP1655]], ptr [[USV]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP1656:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2677:%.*]] = zext i16 [[TMP1656]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP1657:%.*]] = load i16, ptr [[USE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2678:%.*]] = zext i16 [[TMP1657]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP2679:%.*]] = icmp sgt i32 [[CONV2677]], [[CONV2678]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP2679]], label [[IF_THEN2681:%.*]], label [[IF_END2682:%.*]]
+// SIMD-ONLY0:       if.then2681:
+// SIMD-ONLY0-NEXT:    [[TMP1658:%.*]] = load i16, ptr [[USE]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP1658]], ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    br label [[IF_END2682]]
+// SIMD-ONLY0:       if.end2682:
+// SIMD-ONLY0-NEXT:    [[TMP1659:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP1659]], ptr [[USV]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP1660:%.*]] = load i16, ptr [[USE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2683:%.*]] = zext i16 [[TMP1660]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP1661:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2684:%.*]] = zext i16 [[TMP1661]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP2685:%.*]] = icmp slt i32 [[CONV2683]], [[CONV2684]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP2685]], label [[IF_THEN2687:%.*]], label [[IF_END2688:%.*]]
+// SIMD-ONLY0:       if.then2687:
+// SIMD-ONLY0-NEXT:    [[TMP1662:%.*]] = load i16, ptr [[USE]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP1662]], ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    br label [[IF_END2688]]
+// SIMD-ONLY0:       if.end2688:
+// SIMD-ONLY0-NEXT:    [[TMP1663:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP1663]], ptr [[USV]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP1664:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2689:%.*]] = zext i16 [[TMP1664]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP1665:%.*]] = load i16, ptr [[USE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2690:%.*]] = zext i16 [[TMP1665]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP2691:%.*]] = icmp slt i32 [[CONV2689]], [[CONV2690]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP2691]], label [[IF_THEN2693:%.*]], label [[IF_END2694:%.*]]
+// SIMD-ONLY0:       if.then2693:
+// SIMD-ONLY0-NEXT:    [[TMP1666:%.*]] = load i16, ptr [[USE]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP1666]], ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    br label [[IF_END2694]]
+// SIMD-ONLY0:       if.end2694:
+// SIMD-ONLY0-NEXT:    [[TMP1667:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP1667]], ptr [[USV]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP1668:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2695:%.*]] = zext i16 [[TMP1668]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP1669:%.*]] = load i16, ptr [[USE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2696:%.*]] = zext i16 [[TMP1669]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP2697:%.*]] = icmp eq i32 [[CONV2695]], [[CONV2696]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP2697]], label [[IF_THEN2699:%.*]], label [[IF_END2700:%.*]]
+// SIMD-ONLY0:       if.then2699:
+// SIMD-ONLY0-NEXT:    [[TMP1670:%.*]] = load i16, ptr [[USD]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP1670]], ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    br label [[IF_END2700]]
+// SIMD-ONLY0:       if.end2700:
+// SIMD-ONLY0-NEXT:    [[TMP1671:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP1671]], ptr [[USV]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP1672:%.*]] = load i16, ptr [[USE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2701:%.*]] = zext i16 [[TMP1672]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP1673:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2702:%.*]] = zext i16 [[TMP1673]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP2703:%.*]] = icmp eq i32 [[CONV2701]], [[CONV2702]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP2703]], label [[IF_THEN2705:%.*]], label [[IF_END2706:%.*]]
+// SIMD-ONLY0:       if.then2705:
+// SIMD-ONLY0-NEXT:    [[TMP1674:%.*]] = load i16, ptr [[USD]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP1674]], ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    br label [[IF_END2706]]
+// SIMD-ONLY0:       if.end2706:
+// SIMD-ONLY0-NEXT:    [[TMP1675:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP1675]], ptr [[USV]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP1676:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2707:%.*]] = zext i16 [[TMP1676]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP1677:%.*]] = load i16, ptr [[USE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2708:%.*]] = zext i16 [[TMP1677]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP2709:%.*]] = icmp eq i32 [[CONV2707]], [[CONV2708]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP2709]], label [[IF_THEN2711:%.*]], label [[IF_ELSE2712:%.*]]
+// SIMD-ONLY0:       if.then2711:
+// SIMD-ONLY0-NEXT:    [[TMP1678:%.*]] = load i16, ptr [[USD]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP1678]], ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    br label [[IF_END2713:%.*]]
+// SIMD-ONLY0:       if.else2712:
+// SIMD-ONLY0-NEXT:    [[TMP1679:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP1679]], ptr [[USV]], align 2
+// SIMD-ONLY0-NEXT:    br label [[IF_END2713]]
+// SIMD-ONLY0:       if.end2713:
+// SIMD-ONLY0-NEXT:    [[TMP1680:%.*]] = load i16, ptr [[USE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2714:%.*]] = zext i16 [[TMP1680]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP1681:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2715:%.*]] = zext i16 [[TMP1681]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP2716:%.*]] = icmp eq i32 [[CONV2714]], [[CONV2715]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP2716]], label [[IF_THEN2718:%.*]], label [[IF_ELSE2719:%.*]]
+// SIMD-ONLY0:       if.then2718:
+// SIMD-ONLY0-NEXT:    [[TMP1682:%.*]] = load i16, ptr [[USD]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP1682]], ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    br label [[IF_END2720:%.*]]
+// SIMD-ONLY0:       if.else2719:
+// SIMD-ONLY0-NEXT:    [[TMP1683:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP1683]], ptr [[USV]], align 2
+// SIMD-ONLY0-NEXT:    br label [[IF_END2720]]
+// SIMD-ONLY0:       if.end2720:
+// SIMD-ONLY0-NEXT:    [[TMP1684:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2721:%.*]] = zext i16 [[TMP1684]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP1685:%.*]] = load i16, ptr [[USE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2722:%.*]] = zext i16 [[TMP1685]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP2723:%.*]] = icmp eq i32 [[CONV2721]], [[CONV2722]]
+// SIMD-ONLY0-NEXT:    [[CONV2724:%.*]] = zext i1 [[CMP2723]] to i32
+// SIMD-ONLY0-NEXT:    [[CONV2725:%.*]] = trunc i32 [[CONV2724]] to i16
+// SIMD-ONLY0-NEXT:    store i16 [[CONV2725]], ptr [[USR]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP1686:%.*]] = load i16, ptr [[USR]], align 2
+// SIMD-ONLY0-NEXT:    [[TOBOOL2726:%.*]] = icmp ne i16 [[TMP1686]], 0
+// SIMD-ONLY0-NEXT:    br i1 [[TOBOOL2726]], label [[IF_THEN2727:%.*]], label [[IF_END2728:%.*]]
+// SIMD-ONLY0:       if.then2727:
+// SIMD-ONLY0-NEXT:    [[TMP1687:%.*]] = load i16, ptr [[USD]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP1687]], ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    br label [[IF_END2728]]
+// SIMD-ONLY0:       if.end2728:
+// SIMD-ONLY0-NEXT:    [[TMP1688:%.*]] = load i16, ptr [[USE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2729:%.*]] = zext i16 [[TMP1688]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP1689:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2730:%.*]] = zext i16 [[TMP1689]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP2731:%.*]] = icmp eq i32 [[CONV2729]], [[CONV2730]]
+// SIMD-ONLY0-NEXT:    [[CONV2732:%.*]] = zext i1 [[CMP2731]] to i32
+// SIMD-ONLY0-NEXT:    [[CONV2733:%.*]] = trunc i32 [[CONV2732]] to i16
+// SIMD-ONLY0-NEXT:    store i16 [[CONV2733]], ptr [[USR]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP1690:%.*]] = load i16, ptr [[USR]], align 2
+// SIMD-ONLY0-NEXT:    [[TOBOOL2734:%.*]] = icmp ne i16 [[TMP1690]], 0
+// SIMD-ONLY0-NEXT:    br i1 [[TOBOOL2734]], label [[IF_THEN2735:%.*]], label [[IF_END2736:%.*]]
+// SIMD-ONLY0:       if.then2735:
+// SIMD-ONLY0-NEXT:    [[TMP1691:%.*]] = load i16, ptr [[USD]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP1691]], ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    br label [[IF_END2736]]
+// SIMD-ONLY0:       if.end2736:
+// SIMD-ONLY0-NEXT:    [[TMP1692:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2737:%.*]] = zext i16 [[TMP1692]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP1693:%.*]] = load i16, ptr [[USE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2738:%.*]] = zext i16 [[TMP1693]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP2739:%.*]] = icmp eq i32 [[CONV2737]], [[CONV2738]]
+// SIMD-ONLY0-NEXT:    [[CONV2740:%.*]] = zext i1 [[CMP2739]] to i32
+// SIMD-ONLY0-NEXT:    [[CONV2741:%.*]] = trunc i32 [[CONV2740]] to i16
+// SIMD-ONLY0-NEXT:    store i16 [[CONV2741]], ptr [[USR]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP1694:%.*]] = load i16, ptr [[USR]], align 2
+// SIMD-ONLY0-NEXT:    [[TOBOOL2742:%.*]] = icmp ne i16 [[TMP1694]], 0
+// SIMD-ONLY0-NEXT:    br i1 [[TOBOOL2742]], label [[IF_THEN2743:%.*]], label [[IF_ELSE2744:%.*]]
+// SIMD-ONLY0:       if.then2743:
+// SIMD-ONLY0-NEXT:    [[TMP1695:%.*]] = load i16, ptr [[USD]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP1695]], ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    br label [[IF_END2745:%.*]]
+// SIMD-ONLY0:       if.else2744:
+// SIMD-ONLY0-NEXT:    [[TMP1696:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP1696]], ptr [[USV]], align 2
+// SIMD-ONLY0-NEXT:    br label [[IF_END2745]]
+// SIMD-ONLY0:       if.end2745:
+// SIMD-ONLY0-NEXT:    [[TMP1697:%.*]] = load i16, ptr [[USE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2746:%.*]] = zext i16 [[TMP1697]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP1698:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2747:%.*]] = zext i16 [[TMP1698]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP2748:%.*]] = icmp eq i32 [[CONV2746]], [[CONV2747]]
+// SIMD-ONLY0-NEXT:    [[CONV2749:%.*]] = zext i1 [[CMP2748]] to i32
+// SIMD-ONLY0-NEXT:    [[CONV2750:%.*]] = trunc i32 [[CONV2749]] to i16
+// SIMD-ONLY0-NEXT:    store i16 [[CONV2750]], ptr [[USR]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP1699:%.*]] = load i16, ptr [[USR]], align 2
+// SIMD-ONLY0-NEXT:    [[TOBOOL2751:%.*]] = icmp ne i16 [[TMP1699]], 0
+// SIMD-ONLY0-NEXT:    br i1 [[TOBOOL2751]], label [[IF_THEN2752:%.*]], label [[IF_ELSE2753:%.*]]
+// SIMD-ONLY0:       if.then2752:
+// SIMD-ONLY0-NEXT:    [[TMP1700:%.*]] = load i16, ptr [[USD]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP1700]], ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    br label [[IF_END2754:%.*]]
+// SIMD-ONLY0:       if.else2753:
+// SIMD-ONLY0-NEXT:    [[TMP1701:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP1701]], ptr [[USV]], align 2
+// SIMD-ONLY0-NEXT:    br label [[IF_END2754]]
+// SIMD-ONLY0:       if.end2754:
+// SIMD-ONLY0-NEXT:    [[TMP1702:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP1702]], ptr [[USV]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP1703:%.*]] = load i16, ptr [[USE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2755:%.*]] = zext i16 [[TMP1703]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP1704:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2756:%.*]] = zext i16 [[TMP1704]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP2757:%.*]] = icmp sgt i32 [[CONV2755]], [[CONV2756]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP2757]], label [[IF_THEN2759:%.*]], label [[IF_END2760:%.*]]
+// SIMD-ONLY0:       if.then2759:
+// SIMD-ONLY0-NEXT:    [[TMP1705:%.*]] = load i16, ptr [[USE]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP1705]], ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    br label [[IF_END2760]]
+// SIMD-ONLY0:       if.end2760:
+// SIMD-ONLY0-NEXT:    [[TMP1706:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP1706]], ptr [[USV]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP1707:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2761:%.*]] = zext i16 [[TMP1707]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP1708:%.*]] = load i16, ptr [[USE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2762:%.*]] = zext i16 [[TMP1708]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP2763:%.*]] = icmp sgt i32 [[CONV2761]], [[CONV2762]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP2763]], label [[IF_THEN2765:%.*]], label [[IF_END2766:%.*]]
+// SIMD-ONLY0:       if.then2765:
+// SIMD-ONLY0-NEXT:    [[TMP1709:%.*]] = load i16, ptr [[USE]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP1709]], ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    br label [[IF_END2766]]
+// SIMD-ONLY0:       if.end2766:
+// SIMD-ONLY0-NEXT:    [[TMP1710:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP1710]], ptr [[USV]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP1711:%.*]] = load i16, ptr [[USE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2767:%.*]] = zext i16 [[TMP1711]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP1712:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2768:%.*]] = zext i16 [[TMP1712]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP2769:%.*]] = icmp slt i32 [[CONV2767]], [[CONV2768]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP2769]], label [[IF_THEN2771:%.*]], label [[IF_END2772:%.*]]
+// SIMD-ONLY0:       if.then2771:
+// SIMD-ONLY0-NEXT:    [[TMP1713:%.*]] = load i16, ptr [[USE]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP1713]], ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    br label [[IF_END2772]]
+// SIMD-ONLY0:       if.end2772:
+// SIMD-ONLY0-NEXT:    [[TMP1714:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP1714]], ptr [[USV]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP1715:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2773:%.*]] = zext i16 [[TMP1715]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP1716:%.*]] = load i16, ptr [[USE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2774:%.*]] = zext i16 [[TMP1716]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP2775:%.*]] = icmp slt i32 [[CONV2773]], [[CONV2774]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP2775]], label [[IF_THEN2777:%.*]], label [[IF_END2778:%.*]]
+// SIMD-ONLY0:       if.then2777:
+// SIMD-ONLY0-NEXT:    [[TMP1717:%.*]] = load i16, ptr [[USE]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP1717]], ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    br label [[IF_END2778]]
+// SIMD-ONLY0:       if.end2778:
+// SIMD-ONLY0-NEXT:    [[TMP1718:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP1718]], ptr [[USV]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP1719:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2779:%.*]] = zext i16 [[TMP1719]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP1720:%.*]] = load i16, ptr [[USE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2780:%.*]] = zext i16 [[TMP1720]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP2781:%.*]] = icmp eq i32 [[CONV2779]], [[CONV2780]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP2781]], label [[IF_THEN2783:%.*]], label [[IF_END2784:%.*]]
+// SIMD-ONLY0:       if.then2783:
+// SIMD-ONLY0-NEXT:    [[TMP1721:%.*]] = load i16, ptr [[USD]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP1721]], ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    br label [[IF_END2784]]
+// SIMD-ONLY0:       if.end2784:
+// SIMD-ONLY0-NEXT:    [[TMP1722:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP1722]], ptr [[USV]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP1723:%.*]] = load i16, ptr [[USE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2785:%.*]] = zext i16 [[TMP1723]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP1724:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2786:%.*]] = zext i16 [[TMP1724]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP2787:%.*]] = icmp eq i32 [[CONV2785]], [[CONV2786]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP2787]], label [[IF_THEN2789:%.*]], label [[IF_END2790:%.*]]
+// SIMD-ONLY0:       if.then2789:
+// SIMD-ONLY0-NEXT:    [[TMP1725:%.*]] = load i16, ptr [[USD]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP1725]], ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    br label [[IF_END2790]]
+// SIMD-ONLY0:       if.end2790:
+// SIMD-ONLY0-NEXT:    [[TMP1726:%.*]] = load i16, ptr [[USE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2791:%.*]] = zext i16 [[TMP1726]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP1727:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2792:%.*]] = zext i16 [[TMP1727]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP2793:%.*]] = icmp sgt i32 [[CONV2791]], [[CONV2792]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP2793]], label [[IF_THEN2795:%.*]], label [[IF_END2796:%.*]]
+// SIMD-ONLY0:       if.then2795:
+// SIMD-ONLY0-NEXT:    [[TMP1728:%.*]] = load i16, ptr [[USE]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP1728]], ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    br label [[IF_END2796]]
+// SIMD-ONLY0:       if.end2796:
+// SIMD-ONLY0-NEXT:    [[TMP1729:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP1729]], ptr [[USV]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP1730:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2797:%.*]] = zext i16 [[TMP1730]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP1731:%.*]] = load i16, ptr [[USE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2798:%.*]] = zext i16 [[TMP1731]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP2799:%.*]] = icmp sgt i32 [[CONV2797]], [[CONV2798]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP2799]], label [[IF_THEN2801:%.*]], label [[IF_END2802:%.*]]
+// SIMD-ONLY0:       if.then2801:
+// SIMD-ONLY0-NEXT:    [[TMP1732:%.*]] = load i16, ptr [[USE]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP1732]], ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    br label [[IF_END2802]]
+// SIMD-ONLY0:       if.end2802:
+// SIMD-ONLY0-NEXT:    [[TMP1733:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP1733]], ptr [[USV]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP1734:%.*]] = load i16, ptr [[USE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2803:%.*]] = zext i16 [[TMP1734]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP1735:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2804:%.*]] = zext i16 [[TMP1735]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP2805:%.*]] = icmp slt i32 [[CONV2803]], [[CONV2804]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP2805]], label [[IF_THEN2807:%.*]], label [[IF_END2808:%.*]]
+// SIMD-ONLY0:       if.then2807:
+// SIMD-ONLY0-NEXT:    [[TMP1736:%.*]] = load i16, ptr [[USE]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP1736]], ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    br label [[IF_END2808]]
+// SIMD-ONLY0:       if.end2808:
+// SIMD-ONLY0-NEXT:    [[TMP1737:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP1737]], ptr [[USV]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP1738:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2809:%.*]] = zext i16 [[TMP1738]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP1739:%.*]] = load i16, ptr [[USE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2810:%.*]] = zext i16 [[TMP1739]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP2811:%.*]] = icmp slt i32 [[CONV2809]], [[CONV2810]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP2811]], label [[IF_THEN2813:%.*]], label [[IF_END2814:%.*]]
+// SIMD-ONLY0:       if.then2813:
+// SIMD-ONLY0-NEXT:    [[TMP1740:%.*]] = load i16, ptr [[USE]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP1740]], ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    br label [[IF_END2814]]
+// SIMD-ONLY0:       if.end2814:
+// SIMD-ONLY0-NEXT:    [[TMP1741:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP1741]], ptr [[USV]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP1742:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2815:%.*]] = zext i16 [[TMP1742]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP1743:%.*]] = load i16, ptr [[USE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2816:%.*]] = zext i16 [[TMP1743]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP2817:%.*]] = icmp eq i32 [[CONV2815]], [[CONV2816]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP2817]], label [[IF_THEN2819:%.*]], label [[IF_END2820:%.*]]
+// SIMD-ONLY0:       if.then2819:
+// SIMD-ONLY0-NEXT:    [[TMP1744:%.*]] = load i16, ptr [[USD]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP1744]], ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    br label [[IF_END2820]]
+// SIMD-ONLY0:       if.end2820:
+// SIMD-ONLY0-NEXT:    [[TMP1745:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP1745]], ptr [[USV]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP1746:%.*]] = load i16, ptr [[USE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2821:%.*]] = zext i16 [[TMP1746]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP1747:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2822:%.*]] = zext i16 [[TMP1747]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP2823:%.*]] = icmp eq i32 [[CONV2821]], [[CONV2822]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP2823]], label [[IF_THEN2825:%.*]], label [[IF_END2826:%.*]]
+// SIMD-ONLY0:       if.then2825:
+// SIMD-ONLY0-NEXT:    [[TMP1748:%.*]] = load i16, ptr [[USD]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP1748]], ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    br label [[IF_END2826]]
+// SIMD-ONLY0:       if.end2826:
+// SIMD-ONLY0-NEXT:    [[TMP1749:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP1749]], ptr [[USV]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP1750:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2827:%.*]] = zext i16 [[TMP1750]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP1751:%.*]] = load i16, ptr [[USE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2828:%.*]] = zext i16 [[TMP1751]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP2829:%.*]] = icmp eq i32 [[CONV2827]], [[CONV2828]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP2829]], label [[IF_THEN2831:%.*]], label [[IF_ELSE2832:%.*]]
+// SIMD-ONLY0:       if.then2831:
+// SIMD-ONLY0-NEXT:    [[TMP1752:%.*]] = load i16, ptr [[USD]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP1752]], ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    br label [[IF_END2833:%.*]]
+// SIMD-ONLY0:       if.else2832:
+// SIMD-ONLY0-NEXT:    [[TMP1753:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP1753]], ptr [[USV]], align 2
+// SIMD-ONLY0-NEXT:    br label [[IF_END2833]]
+// SIMD-ONLY0:       if.end2833:
+// SIMD-ONLY0-NEXT:    [[TMP1754:%.*]] = load i16, ptr [[USE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2834:%.*]] = zext i16 [[TMP1754]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP1755:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2835:%.*]] = zext i16 [[TMP1755]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP2836:%.*]] = icmp eq i32 [[CONV2834]], [[CONV2835]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP2836]], label [[IF_THEN2838:%.*]], label [[IF_ELSE2839:%.*]]
+// SIMD-ONLY0:       if.then2838:
+// SIMD-ONLY0-NEXT:    [[TMP1756:%.*]] = load i16, ptr [[USD]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP1756]], ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    br label [[IF_END2840:%.*]]
+// SIMD-ONLY0:       if.else2839:
+// SIMD-ONLY0-NEXT:    [[TMP1757:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP1757]], ptr [[USV]], align 2
+// SIMD-ONLY0-NEXT:    br label [[IF_END2840]]
+// SIMD-ONLY0:       if.end2840:
+// SIMD-ONLY0-NEXT:    [[TMP1758:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2841:%.*]] = zext i16 [[TMP1758]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP1759:%.*]] = load i16, ptr [[USE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2842:%.*]] = zext i16 [[TMP1759]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP2843:%.*]] = icmp eq i32 [[CONV2841]], [[CONV2842]]
+// SIMD-ONLY0-NEXT:    [[CONV2844:%.*]] = zext i1 [[CMP2843]] to i32
+// SIMD-ONLY0-NEXT:    [[CONV2845:%.*]] = trunc i32 [[CONV2844]] to i16
+// SIMD-ONLY0-NEXT:    store i16 [[CONV2845]], ptr [[USR]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP1760:%.*]] = load i16, ptr [[USR]], align 2
+// SIMD-ONLY0-NEXT:    [[TOBOOL2846:%.*]] = icmp ne i16 [[TMP1760]], 0
+// SIMD-ONLY0-NEXT:    br i1 [[TOBOOL2846]], label [[IF_THEN2847:%.*]], label [[IF_END2848:%.*]]
+// SIMD-ONLY0:       if.then2847:
+// SIMD-ONLY0-NEXT:    [[TMP1761:%.*]] = load i16, ptr [[USD]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP1761]], ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    br label [[IF_END2848]]
+// SIMD-ONLY0:       if.end2848:
+// SIMD-ONLY0-NEXT:    [[TMP1762:%.*]] = load i16, ptr [[USE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2849:%.*]] = zext i16 [[TMP1762]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP1763:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2850:%.*]] = zext i16 [[TMP1763]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP2851:%.*]] = icmp eq i32 [[CONV2849]], [[CONV2850]]
+// SIMD-ONLY0-NEXT:    [[CONV2852:%.*]] = zext i1 [[CMP2851]] to i32
+// SIMD-ONLY0-NEXT:    [[CONV2853:%.*]] = trunc i32 [[CONV2852]] to i16
+// SIMD-ONLY0-NEXT:    store i16 [[CONV2853]], ptr [[USR]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP1764:%.*]] = load i16, ptr [[USR]], align 2
+// SIMD-ONLY0-NEXT:    [[TOBOOL2854:%.*]] = icmp ne i16 [[TMP1764]], 0
+// SIMD-ONLY0-NEXT:    br i1 [[TOBOOL2854]], label [[IF_THEN2855:%.*]], label [[IF_END2856:%.*]]
+// SIMD-ONLY0:       if.then2855:
+// SIMD-ONLY0-NEXT:    [[TMP1765:%.*]] = load i16, ptr [[USD]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP1765]], ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    br label [[IF_END2856]]
+// SIMD-ONLY0:       if.end2856:
+// SIMD-ONLY0-NEXT:    [[TMP1766:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2857:%.*]] = zext i16 [[TMP1766]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP1767:%.*]] = load i16, ptr [[USE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2858:%.*]] = zext i16 [[TMP1767]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP2859:%.*]] = icmp eq i32 [[CONV2857]], [[CONV2858]]
+// SIMD-ONLY0-NEXT:    [[CONV2860:%.*]] = zext i1 [[CMP2859]] to i32
+// SIMD-ONLY0-NEXT:    [[CONV2861:%.*]] = trunc i32 [[CONV2860]] to i16
+// SIMD-ONLY0-NEXT:    store i16 [[CONV2861]], ptr [[USR]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP1768:%.*]] = load i16, ptr [[USR]], align 2
+// SIMD-ONLY0-NEXT:    [[TOBOOL2862:%.*]] = icmp ne i16 [[TMP1768]], 0
+// SIMD-ONLY0-NEXT:    br i1 [[TOBOOL2862]], label [[IF_THEN2863:%.*]], label [[IF_ELSE2864:%.*]]
+// SIMD-ONLY0:       if.then2863:
+// SIMD-ONLY0-NEXT:    [[TMP1769:%.*]] = load i16, ptr [[USD]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP1769]], ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    br label [[IF_END2865:%.*]]
+// SIMD-ONLY0:       if.else2864:
+// SIMD-ONLY0-NEXT:    [[TMP1770:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP1770]], ptr [[USV]], align 2
+// SIMD-ONLY0-NEXT:    br label [[IF_END2865]]
+// SIMD-ONLY0:       if.end2865:
+// SIMD-ONLY0-NEXT:    [[TMP1771:%.*]] = load i16, ptr [[USE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2866:%.*]] = zext i16 [[TMP1771]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP1772:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV2867:%.*]] = zext i16 [[TMP1772]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP2868:%.*]] = icmp eq i32 [[CONV2866]], [[CONV2867]]
+// SIMD-ONLY0-NEXT:    [[CONV2869:%.*]] = zext i1 [[CMP2868]] to i32
+// SIMD-ONLY0-NEXT:    [[CONV2870:%.*]] = trunc i32 [[CONV2869]] to i16
+// SIMD-ONLY0-NEXT:    store i16 [[CONV2870]], ptr [[USR]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP1773:%.*]] = load i16, ptr [[USR]], align 2
+// SIMD-ONLY0-NEXT:    [[TOBOOL2871:%.*]] = icmp ne i16 [[TMP1773]], 0
+// SIMD-ONLY0-NEXT:    br i1 [[TOBOOL2871]], label [[IF_THEN2872:%.*]], label [[IF_ELSE2873:%.*]]
+// SIMD-ONLY0:       if.then2872:
+// SIMD-ONLY0-NEXT:    [[TMP1774:%.*]] = load i16, ptr [[USD]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP1774]], ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    br label [[IF_END2874:%.*]]
+// SIMD-ONLY0:       if.else2873:
+// SIMD-ONLY0-NEXT:    [[TMP1775:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP1775]], ptr [[USV]], align 2
+// SIMD-ONLY0-NEXT:    br label [[IF_END2874]]
+// SIMD-ONLY0:       if.end2874:
+// SIMD-ONLY0-NEXT:    [[TMP1776:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP1776]], ptr [[IV]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP1777:%.*]] = load i32, ptr [[IE]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP1778:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP2875:%.*]] = icmp sgt i32 [[TMP1777]], [[TMP1778]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP2875]], label [[IF_THEN2877:%.*]], label [[IF_END2878:%.*]]
+// SIMD-ONLY0:       if.then2877:
+// SIMD-ONLY0-NEXT:    [[TMP1779:%.*]] = load i32, ptr [[IE]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP1779]], ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END2878]]
+// SIMD-ONLY0:       if.end2878:
+// SIMD-ONLY0-NEXT:    [[TMP1780:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP1780]], ptr [[IV]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP1781:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP1782:%.*]] = load i32, ptr [[IE]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP2879:%.*]] = icmp sgt i32 [[TMP1781]], [[TMP1782]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP2879]], label [[IF_THEN2881:%.*]], label [[IF_END2882:%.*]]
+// SIMD-ONLY0:       if.then2881:
+// SIMD-ONLY0-NEXT:    [[TMP1783:%.*]] = load i32, ptr [[IE]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP1783]], ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END2882]]
+// SIMD-ONLY0:       if.end2882:
+// SIMD-ONLY0-NEXT:    [[TMP1784:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP1784]], ptr [[IV]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP1785:%.*]] = load i32, ptr [[IE]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP1786:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP2883:%.*]] = icmp slt i32 [[TMP1785]], [[TMP1786]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP2883]], label [[IF_THEN2885:%.*]], label [[IF_END2886:%.*]]
+// SIMD-ONLY0:       if.then2885:
+// SIMD-ONLY0-NEXT:    [[TMP1787:%.*]] = load i32, ptr [[IE]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP1787]], ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END2886]]
+// SIMD-ONLY0:       if.end2886:
+// SIMD-ONLY0-NEXT:    [[TMP1788:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP1788]], ptr [[IV]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP1789:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP1790:%.*]] = load i32, ptr [[IE]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP2887:%.*]] = icmp slt i32 [[TMP1789]], [[TMP1790]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP2887]], label [[IF_THEN2889:%.*]], label [[IF_END2890:%.*]]
+// SIMD-ONLY0:       if.then2889:
+// SIMD-ONLY0-NEXT:    [[TMP1791:%.*]] = load i32, ptr [[IE]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP1791]], ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END2890]]
+// SIMD-ONLY0:       if.end2890:
+// SIMD-ONLY0-NEXT:    [[TMP1792:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP1792]], ptr [[IV]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP1793:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP1794:%.*]] = load i32, ptr [[IE]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP2891:%.*]] = icmp eq i32 [[TMP1793]], [[TMP1794]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP2891]], label [[IF_THEN2893:%.*]], label [[IF_END2894:%.*]]
+// SIMD-ONLY0:       if.then2893:
+// SIMD-ONLY0-NEXT:    [[TMP1795:%.*]] = load i32, ptr [[ID]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP1795]], ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END2894]]
+// SIMD-ONLY0:       if.end2894:
+// SIMD-ONLY0-NEXT:    [[TMP1796:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP1796]], ptr [[IV]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP1797:%.*]] = load i32, ptr [[IE]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP1798:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP2895:%.*]] = icmp eq i32 [[TMP1797]], [[TMP1798]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP2895]], label [[IF_THEN2897:%.*]], label [[IF_END2898:%.*]]
+// SIMD-ONLY0:       if.then2897:
+// SIMD-ONLY0-NEXT:    [[TMP1799:%.*]] = load i32, ptr [[ID]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP1799]], ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END2898]]
+// SIMD-ONLY0:       if.end2898:
+// SIMD-ONLY0-NEXT:    [[TMP1800:%.*]] = load i32, ptr [[IE]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP1801:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP2899:%.*]] = icmp sgt i32 [[TMP1800]], [[TMP1801]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP2899]], label [[IF_THEN2901:%.*]], label [[IF_END2902:%.*]]
+// SIMD-ONLY0:       if.then2901:
+// SIMD-ONLY0-NEXT:    [[TMP1802:%.*]] = load i32, ptr [[IE]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP1802]], ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END2902]]
+// SIMD-ONLY0:       if.end2902:
+// SIMD-ONLY0-NEXT:    [[TMP1803:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP1803]], ptr [[IV]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP1804:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP1805:%.*]] = load i32, ptr [[IE]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP2903:%.*]] = icmp sgt i32 [[TMP1804]], [[TMP1805]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP2903]], label [[IF_THEN2905:%.*]], label [[IF_END2906:%.*]]
+// SIMD-ONLY0:       if.then2905:
+// SIMD-ONLY0-NEXT:    [[TMP1806:%.*]] = load i32, ptr [[IE]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP1806]], ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END2906]]
+// SIMD-ONLY0:       if.end2906:
+// SIMD-ONLY0-NEXT:    [[TMP1807:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP1807]], ptr [[IV]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP1808:%.*]] = load i32, ptr [[IE]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP1809:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP2907:%.*]] = icmp slt i32 [[TMP1808]], [[TMP1809]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP2907]], label [[IF_THEN2909:%.*]], label [[IF_END2910:%.*]]
+// SIMD-ONLY0:       if.then2909:
+// SIMD-ONLY0-NEXT:    [[TMP1810:%.*]] = load i32, ptr [[IE]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP1810]], ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END2910]]
+// SIMD-ONLY0:       if.end2910:
+// SIMD-ONLY0-NEXT:    [[TMP1811:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP1811]], ptr [[IV]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP1812:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP1813:%.*]] = load i32, ptr [[IE]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP2911:%.*]] = icmp slt i32 [[TMP1812]], [[TMP1813]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP2911]], label [[IF_THEN2913:%.*]], label [[IF_END2914:%.*]]
+// SIMD-ONLY0:       if.then2913:
+// SIMD-ONLY0-NEXT:    [[TMP1814:%.*]] = load i32, ptr [[IE]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP1814]], ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END2914]]
+// SIMD-ONLY0:       if.end2914:
+// SIMD-ONLY0-NEXT:    [[TMP1815:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP1815]], ptr [[IV]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP1816:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP1817:%.*]] = load i32, ptr [[IE]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP2915:%.*]] = icmp eq i32 [[TMP1816]], [[TMP1817]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP2915]], label [[IF_THEN2917:%.*]], label [[IF_END2918:%.*]]
+// SIMD-ONLY0:       if.then2917:
+// SIMD-ONLY0-NEXT:    [[TMP1818:%.*]] = load i32, ptr [[ID]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP1818]], ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END2918]]
+// SIMD-ONLY0:       if.end2918:
+// SIMD-ONLY0-NEXT:    [[TMP1819:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP1819]], ptr [[IV]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP1820:%.*]] = load i32, ptr [[IE]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP1821:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP2919:%.*]] = icmp eq i32 [[TMP1820]], [[TMP1821]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP2919]], label [[IF_THEN2921:%.*]], label [[IF_END2922:%.*]]
+// SIMD-ONLY0:       if.then2921:
+// SIMD-ONLY0-NEXT:    [[TMP1822:%.*]] = load i32, ptr [[ID]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP1822]], ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END2922]]
+// SIMD-ONLY0:       if.end2922:
+// SIMD-ONLY0-NEXT:    [[TMP1823:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP1823]], ptr [[IV]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP1824:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP1825:%.*]] = load i32, ptr [[IE]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP2923:%.*]] = icmp eq i32 [[TMP1824]], [[TMP1825]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP2923]], label [[IF_THEN2925:%.*]], label [[IF_ELSE2926:%.*]]
+// SIMD-ONLY0:       if.then2925:
+// SIMD-ONLY0-NEXT:    [[TMP1826:%.*]] = load i32, ptr [[ID]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP1826]], ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END2927:%.*]]
+// SIMD-ONLY0:       if.else2926:
+// SIMD-ONLY0-NEXT:    [[TMP1827:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP1827]], ptr [[IV]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END2927]]
+// SIMD-ONLY0:       if.end2927:
+// SIMD-ONLY0-NEXT:    [[TMP1828:%.*]] = load i32, ptr [[IE]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP1829:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP2928:%.*]] = icmp eq i32 [[TMP1828]], [[TMP1829]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP2928]], label [[IF_THEN2930:%.*]], label [[IF_ELSE2931:%.*]]
+// SIMD-ONLY0:       if.then2930:
+// SIMD-ONLY0-NEXT:    [[TMP1830:%.*]] = load i32, ptr [[ID]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP1830]], ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END2932:%.*]]
+// SIMD-ONLY0:       if.else2931:
+// SIMD-ONLY0-NEXT:    [[TMP1831:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP1831]], ptr [[IV]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END2932]]
+// SIMD-ONLY0:       if.end2932:
+// SIMD-ONLY0-NEXT:    [[TMP1832:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP1833:%.*]] = load i32, ptr [[IE]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP2933:%.*]] = icmp eq i32 [[TMP1832]], [[TMP1833]]
+// SIMD-ONLY0-NEXT:    [[CONV2934:%.*]] = zext i1 [[CMP2933]] to i32
+// SIMD-ONLY0-NEXT:    store i32 [[CONV2934]], ptr [[IR]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP1834:%.*]] = load i32, ptr [[IR]], align 4
+// SIMD-ONLY0-NEXT:    [[TOBOOL2935:%.*]] = icmp ne i32 [[TMP1834]], 0
+// SIMD-ONLY0-NEXT:    br i1 [[TOBOOL2935]], label [[IF_THEN2936:%.*]], label [[IF_END2937:%.*]]
+// SIMD-ONLY0:       if.then2936:
+// SIMD-ONLY0-NEXT:    [[TMP1835:%.*]] = load i32, ptr [[ID]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP1835]], ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END2937]]
+// SIMD-ONLY0:       if.end2937:
+// SIMD-ONLY0-NEXT:    [[TMP1836:%.*]] = load i32, ptr [[IE]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP1837:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP2938:%.*]] = icmp eq i32 [[TMP1836]], [[TMP1837]]
+// SIMD-ONLY0-NEXT:    [[CONV2939:%.*]] = zext i1 [[CMP2938]] to i32
+// SIMD-ONLY0-NEXT:    store i32 [[CONV2939]], ptr [[IR]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP1838:%.*]] = load i32, ptr [[IR]], align 4
+// SIMD-ONLY0-NEXT:    [[TOBOOL2940:%.*]] = icmp ne i32 [[TMP1838]], 0
+// SIMD-ONLY0-NEXT:    br i1 [[TOBOOL2940]], label [[IF_THEN2941:%.*]], label [[IF_END2942:%.*]]
+// SIMD-ONLY0:       if.then2941:
+// SIMD-ONLY0-NEXT:    [[TMP1839:%.*]] = load i32, ptr [[ID]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP1839]], ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END2942]]
+// SIMD-ONLY0:       if.end2942:
+// SIMD-ONLY0-NEXT:    [[TMP1840:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP1841:%.*]] = load i32, ptr [[IE]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP2943:%.*]] = icmp eq i32 [[TMP1840]], [[TMP1841]]
+// SIMD-ONLY0-NEXT:    [[CONV2944:%.*]] = zext i1 [[CMP2943]] to i32
+// SIMD-ONLY0-NEXT:    store i32 [[CONV2944]], ptr [[IR]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP1842:%.*]] = load i32, ptr [[IR]], align 4
+// SIMD-ONLY0-NEXT:    [[TOBOOL2945:%.*]] = icmp ne i32 [[TMP1842]], 0
+// SIMD-ONLY0-NEXT:    br i1 [[TOBOOL2945]], label [[IF_THEN2946:%.*]], label [[IF_ELSE2947:%.*]]
+// SIMD-ONLY0:       if.then2946:
+// SIMD-ONLY0-NEXT:    [[TMP1843:%.*]] = load i32, ptr [[ID]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP1843]], ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END2948:%.*]]
+// SIMD-ONLY0:       if.else2947:
+// SIMD-ONLY0-NEXT:    [[TMP1844:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP1844]], ptr [[IV]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END2948]]
+// SIMD-ONLY0:       if.end2948:
+// SIMD-ONLY0-NEXT:    [[TMP1845:%.*]] = load i32, ptr [[IE]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP1846:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP2949:%.*]] = icmp eq i32 [[TMP1845]], [[TMP1846]]
+// SIMD-ONLY0-NEXT:    [[CONV2950:%.*]] = zext i1 [[CMP2949]] to i32
+// SIMD-ONLY0-NEXT:    store i32 [[CONV2950]], ptr [[IR]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP1847:%.*]] = load i32, ptr [[IR]], align 4
+// SIMD-ONLY0-NEXT:    [[TOBOOL2951:%.*]] = icmp ne i32 [[TMP1847]], 0
+// SIMD-ONLY0-NEXT:    br i1 [[TOBOOL2951]], label [[IF_THEN2952:%.*]], label [[IF_ELSE2953:%.*]]
+// SIMD-ONLY0:       if.then2952:
+// SIMD-ONLY0-NEXT:    [[TMP1848:%.*]] = load i32, ptr [[ID]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP1848]], ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END2954:%.*]]
+// SIMD-ONLY0:       if.else2953:
+// SIMD-ONLY0-NEXT:    [[TMP1849:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP1849]], ptr [[IV]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END2954]]
+// SIMD-ONLY0:       if.end2954:
+// SIMD-ONLY0-NEXT:    [[TMP1850:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP1850]], ptr [[IV]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP1851:%.*]] = load i32, ptr [[IE]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP1852:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP2955:%.*]] = icmp sgt i32 [[TMP1851]], [[TMP1852]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP2955]], label [[IF_THEN2957:%.*]], label [[IF_END2958:%.*]]
+// SIMD-ONLY0:       if.then2957:
+// SIMD-ONLY0-NEXT:    [[TMP1853:%.*]] = load i32, ptr [[IE]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP1853]], ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END2958]]
+// SIMD-ONLY0:       if.end2958:
+// SIMD-ONLY0-NEXT:    [[TMP1854:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP1854]], ptr [[IV]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP1855:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP1856:%.*]] = load i32, ptr [[IE]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP2959:%.*]] = icmp sgt i32 [[TMP1855]], [[TMP1856]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP2959]], label [[IF_THEN2961:%.*]], label [[IF_END2962:%.*]]
+// SIMD-ONLY0:       if.then2961:
+// SIMD-ONLY0-NEXT:    [[TMP1857:%.*]] = load i32, ptr [[IE]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP1857]], ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END2962]]
+// SIMD-ONLY0:       if.end2962:
+// SIMD-ONLY0-NEXT:    [[TMP1858:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP1858]], ptr [[IV]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP1859:%.*]] = load i32, ptr [[IE]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP1860:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP2963:%.*]] = icmp slt i32 [[TMP1859]], [[TMP1860]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP2963]], label [[IF_THEN2965:%.*]], label [[IF_END2966:%.*]]
+// SIMD-ONLY0:       if.then2965:
+// SIMD-ONLY0-NEXT:    [[TMP1861:%.*]] = load i32, ptr [[IE]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP1861]], ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END2966]]
+// SIMD-ONLY0:       if.end2966:
+// SIMD-ONLY0-NEXT:    [[TMP1862:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP1862]], ptr [[IV]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP1863:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP1864:%.*]] = load i32, ptr [[IE]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP2967:%.*]] = icmp slt i32 [[TMP1863]], [[TMP1864]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP2967]], label [[IF_THEN2969:%.*]], label [[IF_END2970:%.*]]
+// SIMD-ONLY0:       if.then2969:
+// SIMD-ONLY0-NEXT:    [[TMP1865:%.*]] = load i32, ptr [[IE]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP1865]], ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END2970]]
+// SIMD-ONLY0:       if.end2970:
+// SIMD-ONLY0-NEXT:    [[TMP1866:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP1866]], ptr [[IV]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP1867:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP1868:%.*]] = load i32, ptr [[IE]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP2971:%.*]] = icmp eq i32 [[TMP1867]], [[TMP1868]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP2971]], label [[IF_THEN2973:%.*]], label [[IF_END2974:%.*]]
+// SIMD-ONLY0:       if.then2973:
+// SIMD-ONLY0-NEXT:    [[TMP1869:%.*]] = load i32, ptr [[ID]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP1869]], ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END2974]]
+// SIMD-ONLY0:       if.end2974:
+// SIMD-ONLY0-NEXT:    [[TMP1870:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP1870]], ptr [[IV]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP1871:%.*]] = load i32, ptr [[IE]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP1872:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP2975:%.*]] = icmp eq i32 [[TMP1871]], [[TMP1872]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP2975]], label [[IF_THEN2977:%.*]], label [[IF_END2978:%.*]]
+// SIMD-ONLY0:       if.then2977:
+// SIMD-ONLY0-NEXT:    [[TMP1873:%.*]] = load i32, ptr [[ID]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP1873]], ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END2978]]
+// SIMD-ONLY0:       if.end2978:
+// SIMD-ONLY0-NEXT:    [[TMP1874:%.*]] = load i32, ptr [[IE]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP1875:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP2979:%.*]] = icmp sgt i32 [[TMP1874]], [[TMP1875]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP2979]], label [[IF_THEN2981:%.*]], label [[IF_END2982:%.*]]
+// SIMD-ONLY0:       if.then2981:
+// SIMD-ONLY0-NEXT:    [[TMP1876:%.*]] = load i32, ptr [[IE]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP1876]], ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END2982]]
+// SIMD-ONLY0:       if.end2982:
+// SIMD-ONLY0-NEXT:    [[TMP1877:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP1877]], ptr [[IV]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP1878:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP1879:%.*]] = load i32, ptr [[IE]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP2983:%.*]] = icmp sgt i32 [[TMP1878]], [[TMP1879]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP2983]], label [[IF_THEN2985:%.*]], label [[IF_END2986:%.*]]
+// SIMD-ONLY0:       if.then2985:
+// SIMD-ONLY0-NEXT:    [[TMP1880:%.*]] = load i32, ptr [[IE]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP1880]], ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END2986]]
+// SIMD-ONLY0:       if.end2986:
+// SIMD-ONLY0-NEXT:    [[TMP1881:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP1881]], ptr [[IV]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP1882:%.*]] = load i32, ptr [[IE]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP1883:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP2987:%.*]] = icmp slt i32 [[TMP1882]], [[TMP1883]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP2987]], label [[IF_THEN2989:%.*]], label [[IF_END2990:%.*]]
+// SIMD-ONLY0:       if.then2989:
+// SIMD-ONLY0-NEXT:    [[TMP1884:%.*]] = load i32, ptr [[IE]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP1884]], ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END2990]]
+// SIMD-ONLY0:       if.end2990:
+// SIMD-ONLY0-NEXT:    [[TMP1885:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP1885]], ptr [[IV]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP1886:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP1887:%.*]] = load i32, ptr [[IE]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP2991:%.*]] = icmp slt i32 [[TMP1886]], [[TMP1887]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP2991]], label [[IF_THEN2993:%.*]], label [[IF_END2994:%.*]]
+// SIMD-ONLY0:       if.then2993:
+// SIMD-ONLY0-NEXT:    [[TMP1888:%.*]] = load i32, ptr [[IE]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP1888]], ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END2994]]
+// SIMD-ONLY0:       if.end2994:
+// SIMD-ONLY0-NEXT:    [[TMP1889:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP1889]], ptr [[IV]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP1890:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP1891:%.*]] = load i32, ptr [[IE]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP2995:%.*]] = icmp eq i32 [[TMP1890]], [[TMP1891]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP2995]], label [[IF_THEN2997:%.*]], label [[IF_END2998:%.*]]
+// SIMD-ONLY0:       if.then2997:
+// SIMD-ONLY0-NEXT:    [[TMP1892:%.*]] = load i32, ptr [[ID]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP1892]], ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END2998]]
+// SIMD-ONLY0:       if.end2998:
+// SIMD-ONLY0-NEXT:    [[TMP1893:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP1893]], ptr [[IV]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP1894:%.*]] = load i32, ptr [[IE]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP1895:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP2999:%.*]] = icmp eq i32 [[TMP1894]], [[TMP1895]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP2999]], label [[IF_THEN3001:%.*]], label [[IF_END3002:%.*]]
+// SIMD-ONLY0:       if.then3001:
+// SIMD-ONLY0-NEXT:    [[TMP1896:%.*]] = load i32, ptr [[ID]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP1896]], ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END3002]]
+// SIMD-ONLY0:       if.end3002:
+// SIMD-ONLY0-NEXT:    [[TMP1897:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP1897]], ptr [[IV]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP1898:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP1899:%.*]] = load i32, ptr [[IE]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP3003:%.*]] = icmp eq i32 [[TMP1898]], [[TMP1899]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP3003]], label [[IF_THEN3005:%.*]], label [[IF_ELSE3006:%.*]]
+// SIMD-ONLY0:       if.then3005:
+// SIMD-ONLY0-NEXT:    [[TMP1900:%.*]] = load i32, ptr [[ID]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP1900]], ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END3007:%.*]]
+// SIMD-ONLY0:       if.else3006:
+// SIMD-ONLY0-NEXT:    [[TMP1901:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP1901]], ptr [[IV]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END3007]]
+// SIMD-ONLY0:       if.end3007:
+// SIMD-ONLY0-NEXT:    [[TMP1902:%.*]] = load i32, ptr [[IE]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP1903:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP3008:%.*]] = icmp eq i32 [[TMP1902]], [[TMP1903]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP3008]], label [[IF_THEN3010:%.*]], label [[IF_ELSE3011:%.*]]
+// SIMD-ONLY0:       if.then3010:
+// SIMD-ONLY0-NEXT:    [[TMP1904:%.*]] = load i32, ptr [[ID]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP1904]], ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END3012:%.*]]
+// SIMD-ONLY0:       if.else3011:
+// SIMD-ONLY0-NEXT:    [[TMP1905:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP1905]], ptr [[IV]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END3012]]
+// SIMD-ONLY0:       if.end3012:
+// SIMD-ONLY0-NEXT:    [[TMP1906:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP1907:%.*]] = load i32, ptr [[IE]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP3013:%.*]] = icmp eq i32 [[TMP1906]], [[TMP1907]]
+// SIMD-ONLY0-NEXT:    [[CONV3014:%.*]] = zext i1 [[CMP3013]] to i32
+// SIMD-ONLY0-NEXT:    store i32 [[CONV3014]], ptr [[IR]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP1908:%.*]] = load i32, ptr [[IR]], align 4
+// SIMD-ONLY0-NEXT:    [[TOBOOL3015:%.*]] = icmp ne i32 [[TMP1908]], 0
+// SIMD-ONLY0-NEXT:    br i1 [[TOBOOL3015]], label [[IF_THEN3016:%.*]], label [[IF_END3017:%.*]]
+// SIMD-ONLY0:       if.then3016:
+// SIMD-ONLY0-NEXT:    [[TMP1909:%.*]] = load i32, ptr [[ID]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP1909]], ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END3017]]
+// SIMD-ONLY0:       if.end3017:
+// SIMD-ONLY0-NEXT:    [[TMP1910:%.*]] = load i32, ptr [[IE]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP1911:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP3018:%.*]] = icmp eq i32 [[TMP1910]], [[TMP1911]]
+// SIMD-ONLY0-NEXT:    [[CONV3019:%.*]] = zext i1 [[CMP3018]] to i32
+// SIMD-ONLY0-NEXT:    store i32 [[CONV3019]], ptr [[IR]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP1912:%.*]] = load i32, ptr [[IR]], align 4
+// SIMD-ONLY0-NEXT:    [[TOBOOL3020:%.*]] = icmp ne i32 [[TMP1912]], 0
+// SIMD-ONLY0-NEXT:    br i1 [[TOBOOL3020]], label [[IF_THEN3021:%.*]], label [[IF_END3022:%.*]]
+// SIMD-ONLY0:       if.then3021:
+// SIMD-ONLY0-NEXT:    [[TMP1913:%.*]] = load i32, ptr [[ID]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP1913]], ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END3022]]
+// SIMD-ONLY0:       if.end3022:
+// SIMD-ONLY0-NEXT:    [[TMP1914:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP1915:%.*]] = load i32, ptr [[IE]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP3023:%.*]] = icmp eq i32 [[TMP1914]], [[TMP1915]]
+// SIMD-ONLY0-NEXT:    [[CONV3024:%.*]] = zext i1 [[CMP3023]] to i32
+// SIMD-ONLY0-NEXT:    store i32 [[CONV3024]], ptr [[IR]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP1916:%.*]] = load i32, ptr [[IR]], align 4
+// SIMD-ONLY0-NEXT:    [[TOBOOL3025:%.*]] = icmp ne i32 [[TMP1916]], 0
+// SIMD-ONLY0-NEXT:    br i1 [[TOBOOL3025]], label [[IF_THEN3026:%.*]], label [[IF_ELSE3027:%.*]]
+// SIMD-ONLY0:       if.then3026:
+// SIMD-ONLY0-NEXT:    [[TMP1917:%.*]] = load i32, ptr [[ID]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP1917]], ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END3028:%.*]]
+// SIMD-ONLY0:       if.else3027:
+// SIMD-ONLY0-NEXT:    [[TMP1918:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP1918]], ptr [[IV]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END3028]]
+// SIMD-ONLY0:       if.end3028:
+// SIMD-ONLY0-NEXT:    [[TMP1919:%.*]] = load i32, ptr [[IE]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP1920:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP3029:%.*]] = icmp eq i32 [[TMP1919]], [[TMP1920]]
+// SIMD-ONLY0-NEXT:    [[CONV3030:%.*]] = zext i1 [[CMP3029]] to i32
+// SIMD-ONLY0-NEXT:    store i32 [[CONV3030]], ptr [[IR]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP1921:%.*]] = load i32, ptr [[IR]], align 4
+// SIMD-ONLY0-NEXT:    [[TOBOOL3031:%.*]] = icmp ne i32 [[TMP1921]], 0
+// SIMD-ONLY0-NEXT:    br i1 [[TOBOOL3031]], label [[IF_THEN3032:%.*]], label [[IF_ELSE3033:%.*]]
+// SIMD-ONLY0:       if.then3032:
+// SIMD-ONLY0-NEXT:    [[TMP1922:%.*]] = load i32, ptr [[ID]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP1922]], ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END3034:%.*]]
+// SIMD-ONLY0:       if.else3033:
+// SIMD-ONLY0-NEXT:    [[TMP1923:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP1923]], ptr [[IV]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END3034]]
+// SIMD-ONLY0:       if.end3034:
+// SIMD-ONLY0-NEXT:    [[TMP1924:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP1924]], ptr [[IV]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP1925:%.*]] = load i32, ptr [[IE]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP1926:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP3035:%.*]] = icmp sgt i32 [[TMP1925]], [[TMP1926]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP3035]], label [[IF_THEN3037:%.*]], label [[IF_END3038:%.*]]
+// SIMD-ONLY0:       if.then3037:
+// SIMD-ONLY0-NEXT:    [[TMP1927:%.*]] = load i32, ptr [[IE]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP1927]], ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END3038]]
+// SIMD-ONLY0:       if.end3038:
+// SIMD-ONLY0-NEXT:    [[TMP1928:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP1928]], ptr [[IV]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP1929:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP1930:%.*]] = load i32, ptr [[IE]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP3039:%.*]] = icmp sgt i32 [[TMP1929]], [[TMP1930]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP3039]], label [[IF_THEN3041:%.*]], label [[IF_END3042:%.*]]
+// SIMD-ONLY0:       if.then3041:
+// SIMD-ONLY0-NEXT:    [[TMP1931:%.*]] = load i32, ptr [[IE]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP1931]], ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END3042]]
+// SIMD-ONLY0:       if.end3042:
+// SIMD-ONLY0-NEXT:    [[TMP1932:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP1932]], ptr [[IV]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP1933:%.*]] = load i32, ptr [[IE]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP1934:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP3043:%.*]] = icmp slt i32 [[TMP1933]], [[TMP1934]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP3043]], label [[IF_THEN3045:%.*]], label [[IF_END3046:%.*]]
+// SIMD-ONLY0:       if.then3045:
+// SIMD-ONLY0-NEXT:    [[TMP1935:%.*]] = load i32, ptr [[IE]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP1935]], ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END3046]]
+// SIMD-ONLY0:       if.end3046:
+// SIMD-ONLY0-NEXT:    [[TMP1936:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP1936]], ptr [[IV]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP1937:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP1938:%.*]] = load i32, ptr [[IE]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP3047:%.*]] = icmp slt i32 [[TMP1937]], [[TMP1938]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP3047]], label [[IF_THEN3049:%.*]], label [[IF_END3050:%.*]]
+// SIMD-ONLY0:       if.then3049:
+// SIMD-ONLY0-NEXT:    [[TMP1939:%.*]] = load i32, ptr [[IE]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP1939]], ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END3050]]
+// SIMD-ONLY0:       if.end3050:
+// SIMD-ONLY0-NEXT:    [[TMP1940:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP1940]], ptr [[IV]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP1941:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP1942:%.*]] = load i32, ptr [[IE]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP3051:%.*]] = icmp eq i32 [[TMP1941]], [[TMP1942]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP3051]], label [[IF_THEN3053:%.*]], label [[IF_END3054:%.*]]
+// SIMD-ONLY0:       if.then3053:
+// SIMD-ONLY0-NEXT:    [[TMP1943:%.*]] = load i32, ptr [[ID]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP1943]], ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END3054]]
+// SIMD-ONLY0:       if.end3054:
+// SIMD-ONLY0-NEXT:    [[TMP1944:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP1944]], ptr [[IV]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP1945:%.*]] = load i32, ptr [[IE]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP1946:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP3055:%.*]] = icmp eq i32 [[TMP1945]], [[TMP1946]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP3055]], label [[IF_THEN3057:%.*]], label [[IF_END3058:%.*]]
+// SIMD-ONLY0:       if.then3057:
+// SIMD-ONLY0-NEXT:    [[TMP1947:%.*]] = load i32, ptr [[ID]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP1947]], ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END3058]]
+// SIMD-ONLY0:       if.end3058:
+// SIMD-ONLY0-NEXT:    [[TMP1948:%.*]] = load i32, ptr [[IE]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP1949:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP3059:%.*]] = icmp sgt i32 [[TMP1948]], [[TMP1949]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP3059]], label [[IF_THEN3061:%.*]], label [[IF_END3062:%.*]]
+// SIMD-ONLY0:       if.then3061:
+// SIMD-ONLY0-NEXT:    [[TMP1950:%.*]] = load i32, ptr [[IE]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP1950]], ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END3062]]
+// SIMD-ONLY0:       if.end3062:
+// SIMD-ONLY0-NEXT:    [[TMP1951:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP1951]], ptr [[IV]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP1952:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP1953:%.*]] = load i32, ptr [[IE]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP3063:%.*]] = icmp sgt i32 [[TMP1952]], [[TMP1953]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP3063]], label [[IF_THEN3065:%.*]], label [[IF_END3066:%.*]]
+// SIMD-ONLY0:       if.then3065:
+// SIMD-ONLY0-NEXT:    [[TMP1954:%.*]] = load i32, ptr [[IE]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP1954]], ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END3066]]
+// SIMD-ONLY0:       if.end3066:
+// SIMD-ONLY0-NEXT:    [[TMP1955:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP1955]], ptr [[IV]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP1956:%.*]] = load i32, ptr [[IE]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP1957:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP3067:%.*]] = icmp slt i32 [[TMP1956]], [[TMP1957]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP3067]], label [[IF_THEN3069:%.*]], label [[IF_END3070:%.*]]
+// SIMD-ONLY0:       if.then3069:
+// SIMD-ONLY0-NEXT:    [[TMP1958:%.*]] = load i32, ptr [[IE]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP1958]], ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END3070]]
+// SIMD-ONLY0:       if.end3070:
+// SIMD-ONLY0-NEXT:    [[TMP1959:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP1959]], ptr [[IV]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP1960:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP1961:%.*]] = load i32, ptr [[IE]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP3071:%.*]] = icmp slt i32 [[TMP1960]], [[TMP1961]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP3071]], label [[IF_THEN3073:%.*]], label [[IF_END3074:%.*]]
+// SIMD-ONLY0:       if.then3073:
+// SIMD-ONLY0-NEXT:    [[TMP1962:%.*]] = load i32, ptr [[IE]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP1962]], ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END3074]]
+// SIMD-ONLY0:       if.end3074:
+// SIMD-ONLY0-NEXT:    [[TMP1963:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP1963]], ptr [[IV]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP1964:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP1965:%.*]] = load i32, ptr [[IE]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP3075:%.*]] = icmp eq i32 [[TMP1964]], [[TMP1965]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP3075]], label [[IF_THEN3077:%.*]], label [[IF_END3078:%.*]]
+// SIMD-ONLY0:       if.then3077:
+// SIMD-ONLY0-NEXT:    [[TMP1966:%.*]] = load i32, ptr [[ID]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP1966]], ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END3078]]
+// SIMD-ONLY0:       if.end3078:
+// SIMD-ONLY0-NEXT:    [[TMP1967:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP1967]], ptr [[IV]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP1968:%.*]] = load i32, ptr [[IE]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP1969:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP3079:%.*]] = icmp eq i32 [[TMP1968]], [[TMP1969]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP3079]], label [[IF_THEN3081:%.*]], label [[IF_END3082:%.*]]
+// SIMD-ONLY0:       if.then3081:
+// SIMD-ONLY0-NEXT:    [[TMP1970:%.*]] = load i32, ptr [[ID]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP1970]], ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END3082]]
+// SIMD-ONLY0:       if.end3082:
+// SIMD-ONLY0-NEXT:    [[TMP1971:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP1971]], ptr [[IV]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP1972:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP1973:%.*]] = load i32, ptr [[IE]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP3083:%.*]] = icmp eq i32 [[TMP1972]], [[TMP1973]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP3083]], label [[IF_THEN3085:%.*]], label [[IF_ELSE3086:%.*]]
+// SIMD-ONLY0:       if.then3085:
+// SIMD-ONLY0-NEXT:    [[TMP1974:%.*]] = load i32, ptr [[ID]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP1974]], ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END3087:%.*]]
+// SIMD-ONLY0:       if.else3086:
+// SIMD-ONLY0-NEXT:    [[TMP1975:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP1975]], ptr [[IV]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END3087]]
+// SIMD-ONLY0:       if.end3087:
+// SIMD-ONLY0-NEXT:    [[TMP1976:%.*]] = load i32, ptr [[IE]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP1977:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP3088:%.*]] = icmp eq i32 [[TMP1976]], [[TMP1977]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP3088]], label [[IF_THEN3090:%.*]], label [[IF_ELSE3091:%.*]]
+// SIMD-ONLY0:       if.then3090:
+// SIMD-ONLY0-NEXT:    [[TMP1978:%.*]] = load i32, ptr [[ID]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP1978]], ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END3092:%.*]]
+// SIMD-ONLY0:       if.else3091:
+// SIMD-ONLY0-NEXT:    [[TMP1979:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP1979]], ptr [[IV]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END3092]]
+// SIMD-ONLY0:       if.end3092:
+// SIMD-ONLY0-NEXT:    [[TMP1980:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP1981:%.*]] = load i32, ptr [[IE]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP3093:%.*]] = icmp eq i32 [[TMP1980]], [[TMP1981]]
+// SIMD-ONLY0-NEXT:    [[CONV3094:%.*]] = zext i1 [[CMP3093]] to i32
+// SIMD-ONLY0-NEXT:    store i32 [[CONV3094]], ptr [[IR]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP1982:%.*]] = load i32, ptr [[IR]], align 4
+// SIMD-ONLY0-NEXT:    [[TOBOOL3095:%.*]] = icmp ne i32 [[TMP1982]], 0
+// SIMD-ONLY0-NEXT:    br i1 [[TOBOOL3095]], label [[IF_THEN3096:%.*]], label [[IF_END3097:%.*]]
+// SIMD-ONLY0:       if.then3096:
+// SIMD-ONLY0-NEXT:    [[TMP1983:%.*]] = load i32, ptr [[ID]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP1983]], ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END3097]]
+// SIMD-ONLY0:       if.end3097:
+// SIMD-ONLY0-NEXT:    [[TMP1984:%.*]] = load i32, ptr [[IE]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP1985:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP3098:%.*]] = icmp eq i32 [[TMP1984]], [[TMP1985]]
+// SIMD-ONLY0-NEXT:    [[CONV3099:%.*]] = zext i1 [[CMP3098]] to i32
+// SIMD-ONLY0-NEXT:    store i32 [[CONV3099]], ptr [[IR]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP1986:%.*]] = load i32, ptr [[IR]], align 4
+// SIMD-ONLY0-NEXT:    [[TOBOOL3100:%.*]] = icmp ne i32 [[TMP1986]], 0
+// SIMD-ONLY0-NEXT:    br i1 [[TOBOOL3100]], label [[IF_THEN3101:%.*]], label [[IF_END3102:%.*]]
+// SIMD-ONLY0:       if.then3101:
+// SIMD-ONLY0-NEXT:    [[TMP1987:%.*]] = load i32, ptr [[ID]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP1987]], ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END3102]]
+// SIMD-ONLY0:       if.end3102:
+// SIMD-ONLY0-NEXT:    [[TMP1988:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP1989:%.*]] = load i32, ptr [[IE]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP3103:%.*]] = icmp eq i32 [[TMP1988]], [[TMP1989]]
+// SIMD-ONLY0-NEXT:    [[CONV3104:%.*]] = zext i1 [[CMP3103]] to i32
+// SIMD-ONLY0-NEXT:    store i32 [[CONV3104]], ptr [[IR]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP1990:%.*]] = load i32, ptr [[IR]], align 4
+// SIMD-ONLY0-NEXT:    [[TOBOOL3105:%.*]] = icmp ne i32 [[TMP1990]], 0
+// SIMD-ONLY0-NEXT:    br i1 [[TOBOOL3105]], label [[IF_THEN3106:%.*]], label [[IF_ELSE3107:%.*]]
+// SIMD-ONLY0:       if.then3106:
+// SIMD-ONLY0-NEXT:    [[TMP1991:%.*]] = load i32, ptr [[ID]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP1991]], ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END3108:%.*]]
+// SIMD-ONLY0:       if.else3107:
+// SIMD-ONLY0-NEXT:    [[TMP1992:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP1992]], ptr [[IV]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END3108]]
+// SIMD-ONLY0:       if.end3108:
+// SIMD-ONLY0-NEXT:    [[TMP1993:%.*]] = load i32, ptr [[IE]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP1994:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP3109:%.*]] = icmp eq i32 [[TMP1993]], [[TMP1994]]
+// SIMD-ONLY0-NEXT:    [[CONV3110:%.*]] = zext i1 [[CMP3109]] to i32
+// SIMD-ONLY0-NEXT:    store i32 [[CONV3110]], ptr [[IR]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP1995:%.*]] = load i32, ptr [[IR]], align 4
+// SIMD-ONLY0-NEXT:    [[TOBOOL3111:%.*]] = icmp ne i32 [[TMP1995]], 0
+// SIMD-ONLY0-NEXT:    br i1 [[TOBOOL3111]], label [[IF_THEN3112:%.*]], label [[IF_ELSE3113:%.*]]
+// SIMD-ONLY0:       if.then3112:
+// SIMD-ONLY0-NEXT:    [[TMP1996:%.*]] = load i32, ptr [[ID]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP1996]], ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END3114:%.*]]
+// SIMD-ONLY0:       if.else3113:
+// SIMD-ONLY0-NEXT:    [[TMP1997:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP1997]], ptr [[IV]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END3114]]
+// SIMD-ONLY0:       if.end3114:
+// SIMD-ONLY0-NEXT:    [[TMP1998:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP1998]], ptr [[IV]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP1999:%.*]] = load i32, ptr [[IE]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP2000:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP3115:%.*]] = icmp sgt i32 [[TMP1999]], [[TMP2000]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP3115]], label [[IF_THEN3117:%.*]], label [[IF_END3118:%.*]]
+// SIMD-ONLY0:       if.then3117:
+// SIMD-ONLY0-NEXT:    [[TMP2001:%.*]] = load i32, ptr [[IE]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP2001]], ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END3118]]
+// SIMD-ONLY0:       if.end3118:
+// SIMD-ONLY0-NEXT:    [[TMP2002:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP2002]], ptr [[IV]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP2003:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP2004:%.*]] = load i32, ptr [[IE]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP3119:%.*]] = icmp sgt i32 [[TMP2003]], [[TMP2004]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP3119]], label [[IF_THEN3121:%.*]], label [[IF_END3122:%.*]]
+// SIMD-ONLY0:       if.then3121:
+// SIMD-ONLY0-NEXT:    [[TMP2005:%.*]] = load i32, ptr [[IE]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP2005]], ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END3122]]
+// SIMD-ONLY0:       if.end3122:
+// SIMD-ONLY0-NEXT:    [[TMP2006:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP2006]], ptr [[IV]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP2007:%.*]] = load i32, ptr [[IE]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP2008:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP3123:%.*]] = icmp slt i32 [[TMP2007]], [[TMP2008]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP3123]], label [[IF_THEN3125:%.*]], label [[IF_END3126:%.*]]
+// SIMD-ONLY0:       if.then3125:
+// SIMD-ONLY0-NEXT:    [[TMP2009:%.*]] = load i32, ptr [[IE]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP2009]], ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END3126]]
+// SIMD-ONLY0:       if.end3126:
+// SIMD-ONLY0-NEXT:    [[TMP2010:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP2010]], ptr [[IV]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP2011:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP2012:%.*]] = load i32, ptr [[IE]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP3127:%.*]] = icmp slt i32 [[TMP2011]], [[TMP2012]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP3127]], label [[IF_THEN3129:%.*]], label [[IF_END3130:%.*]]
+// SIMD-ONLY0:       if.then3129:
+// SIMD-ONLY0-NEXT:    [[TMP2013:%.*]] = load i32, ptr [[IE]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP2013]], ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END3130]]
+// SIMD-ONLY0:       if.end3130:
+// SIMD-ONLY0-NEXT:    [[TMP2014:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP2014]], ptr [[IV]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP2015:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP2016:%.*]] = load i32, ptr [[IE]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP3131:%.*]] = icmp eq i32 [[TMP2015]], [[TMP2016]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP3131]], label [[IF_THEN3133:%.*]], label [[IF_END3134:%.*]]
+// SIMD-ONLY0:       if.then3133:
+// SIMD-ONLY0-NEXT:    [[TMP2017:%.*]] = load i32, ptr [[ID]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP2017]], ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END3134]]
+// SIMD-ONLY0:       if.end3134:
+// SIMD-ONLY0-NEXT:    [[TMP2018:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP2018]], ptr [[IV]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP2019:%.*]] = load i32, ptr [[IE]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP2020:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP3135:%.*]] = icmp eq i32 [[TMP2019]], [[TMP2020]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP3135]], label [[IF_THEN3137:%.*]], label [[IF_END3138:%.*]]
+// SIMD-ONLY0:       if.then3137:
+// SIMD-ONLY0-NEXT:    [[TMP2021:%.*]] = load i32, ptr [[ID]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP2021]], ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END3138]]
+// SIMD-ONLY0:       if.end3138:
+// SIMD-ONLY0-NEXT:    [[TMP2022:%.*]] = load i32, ptr [[IE]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP2023:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP3139:%.*]] = icmp sgt i32 [[TMP2022]], [[TMP2023]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP3139]], label [[IF_THEN3141:%.*]], label [[IF_END3142:%.*]]
+// SIMD-ONLY0:       if.then3141:
+// SIMD-ONLY0-NEXT:    [[TMP2024:%.*]] = load i32, ptr [[IE]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP2024]], ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END3142]]
+// SIMD-ONLY0:       if.end3142:
+// SIMD-ONLY0-NEXT:    [[TMP2025:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP2025]], ptr [[IV]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP2026:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP2027:%.*]] = load i32, ptr [[IE]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP3143:%.*]] = icmp sgt i32 [[TMP2026]], [[TMP2027]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP3143]], label [[IF_THEN3145:%.*]], label [[IF_END3146:%.*]]
+// SIMD-ONLY0:       if.then3145:
+// SIMD-ONLY0-NEXT:    [[TMP2028:%.*]] = load i32, ptr [[IE]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP2028]], ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END3146]]
+// SIMD-ONLY0:       if.end3146:
+// SIMD-ONLY0-NEXT:    [[TMP2029:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP2029]], ptr [[IV]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP2030:%.*]] = load i32, ptr [[IE]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP2031:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP3147:%.*]] = icmp slt i32 [[TMP2030]], [[TMP2031]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP3147]], label [[IF_THEN3149:%.*]], label [[IF_END3150:%.*]]
+// SIMD-ONLY0:       if.then3149:
+// SIMD-ONLY0-NEXT:    [[TMP2032:%.*]] = load i32, ptr [[IE]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP2032]], ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END3150]]
+// SIMD-ONLY0:       if.end3150:
+// SIMD-ONLY0-NEXT:    [[TMP2033:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP2033]], ptr [[IV]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP2034:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP2035:%.*]] = load i32, ptr [[IE]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP3151:%.*]] = icmp slt i32 [[TMP2034]], [[TMP2035]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP3151]], label [[IF_THEN3153:%.*]], label [[IF_END3154:%.*]]
+// SIMD-ONLY0:       if.then3153:
+// SIMD-ONLY0-NEXT:    [[TMP2036:%.*]] = load i32, ptr [[IE]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP2036]], ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END3154]]
+// SIMD-ONLY0:       if.end3154:
+// SIMD-ONLY0-NEXT:    [[TMP2037:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP2037]], ptr [[IV]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP2038:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP2039:%.*]] = load i32, ptr [[IE]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP3155:%.*]] = icmp eq i32 [[TMP2038]], [[TMP2039]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP3155]], label [[IF_THEN3157:%.*]], label [[IF_END3158:%.*]]
+// SIMD-ONLY0:       if.then3157:
+// SIMD-ONLY0-NEXT:    [[TMP2040:%.*]] = load i32, ptr [[ID]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP2040]], ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END3158]]
+// SIMD-ONLY0:       if.end3158:
+// SIMD-ONLY0-NEXT:    [[TMP2041:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP2041]], ptr [[IV]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP2042:%.*]] = load i32, ptr [[IE]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP2043:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP3159:%.*]] = icmp eq i32 [[TMP2042]], [[TMP2043]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP3159]], label [[IF_THEN3161:%.*]], label [[IF_END3162:%.*]]
+// SIMD-ONLY0:       if.then3161:
+// SIMD-ONLY0-NEXT:    [[TMP2044:%.*]] = load i32, ptr [[ID]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP2044]], ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END3162]]
+// SIMD-ONLY0:       if.end3162:
+// SIMD-ONLY0-NEXT:    [[TMP2045:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP2045]], ptr [[IV]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP2046:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP2047:%.*]] = load i32, ptr [[IE]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP3163:%.*]] = icmp eq i32 [[TMP2046]], [[TMP2047]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP3163]], label [[IF_THEN3165:%.*]], label [[IF_ELSE3166:%.*]]
+// SIMD-ONLY0:       if.then3165:
+// SIMD-ONLY0-NEXT:    [[TMP2048:%.*]] = load i32, ptr [[ID]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP2048]], ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END3167:%.*]]
+// SIMD-ONLY0:       if.else3166:
+// SIMD-ONLY0-NEXT:    [[TMP2049:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP2049]], ptr [[IV]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END3167]]
+// SIMD-ONLY0:       if.end3167:
+// SIMD-ONLY0-NEXT:    [[TMP2050:%.*]] = load i32, ptr [[IE]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP2051:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP3168:%.*]] = icmp eq i32 [[TMP2050]], [[TMP2051]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP3168]], label [[IF_THEN3170:%.*]], label [[IF_ELSE3171:%.*]]
+// SIMD-ONLY0:       if.then3170:
+// SIMD-ONLY0-NEXT:    [[TMP2052:%.*]] = load i32, ptr [[ID]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP2052]], ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END3172:%.*]]
+// SIMD-ONLY0:       if.else3171:
+// SIMD-ONLY0-NEXT:    [[TMP2053:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP2053]], ptr [[IV]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END3172]]
+// SIMD-ONLY0:       if.end3172:
+// SIMD-ONLY0-NEXT:    [[TMP2054:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP2055:%.*]] = load i32, ptr [[IE]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP3173:%.*]] = icmp eq i32 [[TMP2054]], [[TMP2055]]
+// SIMD-ONLY0-NEXT:    [[CONV3174:%.*]] = zext i1 [[CMP3173]] to i32
+// SIMD-ONLY0-NEXT:    store i32 [[CONV3174]], ptr [[IR]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP2056:%.*]] = load i32, ptr [[IR]], align 4
+// SIMD-ONLY0-NEXT:    [[TOBOOL3175:%.*]] = icmp ne i32 [[TMP2056]], 0
+// SIMD-ONLY0-NEXT:    br i1 [[TOBOOL3175]], label [[IF_THEN3176:%.*]], label [[IF_END3177:%.*]]
+// SIMD-ONLY0:       if.then3176:
+// SIMD-ONLY0-NEXT:    [[TMP2057:%.*]] = load i32, ptr [[ID]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP2057]], ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END3177]]
+// SIMD-ONLY0:       if.end3177:
+// SIMD-ONLY0-NEXT:    [[TMP2058:%.*]] = load i32, ptr [[IE]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP2059:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP3178:%.*]] = icmp eq i32 [[TMP2058]], [[TMP2059]]
+// SIMD-ONLY0-NEXT:    [[CONV3179:%.*]] = zext i1 [[CMP3178]] to i32
+// SIMD-ONLY0-NEXT:    store i32 [[CONV3179]], ptr [[IR]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP2060:%.*]] = load i32, ptr [[IR]], align 4
+// SIMD-ONLY0-NEXT:    [[TOBOOL3180:%.*]] = icmp ne i32 [[TMP2060]], 0
+// SIMD-ONLY0-NEXT:    br i1 [[TOBOOL3180]], label [[IF_THEN3181:%.*]], label [[IF_END3182:%.*]]
+// SIMD-ONLY0:       if.then3181:
+// SIMD-ONLY0-NEXT:    [[TMP2061:%.*]] = load i32, ptr [[ID]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP2061]], ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END3182]]
+// SIMD-ONLY0:       if.end3182:
+// SIMD-ONLY0-NEXT:    [[TMP2062:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP2063:%.*]] = load i32, ptr [[IE]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP3183:%.*]] = icmp eq i32 [[TMP2062]], [[TMP2063]]
+// SIMD-ONLY0-NEXT:    [[CONV3184:%.*]] = zext i1 [[CMP3183]] to i32
+// SIMD-ONLY0-NEXT:    store i32 [[CONV3184]], ptr [[IR]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP2064:%.*]] = load i32, ptr [[IR]], align 4
+// SIMD-ONLY0-NEXT:    [[TOBOOL3185:%.*]] = icmp ne i32 [[TMP2064]], 0
+// SIMD-ONLY0-NEXT:    br i1 [[TOBOOL3185]], label [[IF_THEN3186:%.*]], label [[IF_ELSE3187:%.*]]
+// SIMD-ONLY0:       if.then3186:
+// SIMD-ONLY0-NEXT:    [[TMP2065:%.*]] = load i32, ptr [[ID]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP2065]], ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END3188:%.*]]
+// SIMD-ONLY0:       if.else3187:
+// SIMD-ONLY0-NEXT:    [[TMP2066:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP2066]], ptr [[IV]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END3188]]
+// SIMD-ONLY0:       if.end3188:
+// SIMD-ONLY0-NEXT:    [[TMP2067:%.*]] = load i32, ptr [[IE]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP2068:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP3189:%.*]] = icmp eq i32 [[TMP2067]], [[TMP2068]]
+// SIMD-ONLY0-NEXT:    [[CONV3190:%.*]] = zext i1 [[CMP3189]] to i32
+// SIMD-ONLY0-NEXT:    store i32 [[CONV3190]], ptr [[IR]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP2069:%.*]] = load i32, ptr [[IR]], align 4
+// SIMD-ONLY0-NEXT:    [[TOBOOL3191:%.*]] = icmp ne i32 [[TMP2069]], 0
+// SIMD-ONLY0-NEXT:    br i1 [[TOBOOL3191]], label [[IF_THEN3192:%.*]], label [[IF_ELSE3193:%.*]]
+// SIMD-ONLY0:       if.then3192:
+// SIMD-ONLY0-NEXT:    [[TMP2070:%.*]] = load i32, ptr [[ID]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP2070]], ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END3194:%.*]]
+// SIMD-ONLY0:       if.else3193:
+// SIMD-ONLY0-NEXT:    [[TMP2071:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP2071]], ptr [[IV]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END3194]]
+// SIMD-ONLY0:       if.end3194:
+// SIMD-ONLY0-NEXT:    [[TMP2072:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP2072]], ptr [[IV]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP2073:%.*]] = load i32, ptr [[IE]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP2074:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP3195:%.*]] = icmp sgt i32 [[TMP2073]], [[TMP2074]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP3195]], label [[IF_THEN3197:%.*]], label [[IF_END3198:%.*]]
+// SIMD-ONLY0:       if.then3197:
+// SIMD-ONLY0-NEXT:    [[TMP2075:%.*]] = load i32, ptr [[IE]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP2075]], ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END3198]]
+// SIMD-ONLY0:       if.end3198:
+// SIMD-ONLY0-NEXT:    [[TMP2076:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP2076]], ptr [[IV]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP2077:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP2078:%.*]] = load i32, ptr [[IE]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP3199:%.*]] = icmp sgt i32 [[TMP2077]], [[TMP2078]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP3199]], label [[IF_THEN3201:%.*]], label [[IF_END3202:%.*]]
+// SIMD-ONLY0:       if.then3201:
+// SIMD-ONLY0-NEXT:    [[TMP2079:%.*]] = load i32, ptr [[IE]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP2079]], ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END3202]]
+// SIMD-ONLY0:       if.end3202:
+// SIMD-ONLY0-NEXT:    [[TMP2080:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP2080]], ptr [[IV]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP2081:%.*]] = load i32, ptr [[IE]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP2082:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP3203:%.*]] = icmp slt i32 [[TMP2081]], [[TMP2082]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP3203]], label [[IF_THEN3205:%.*]], label [[IF_END3206:%.*]]
+// SIMD-ONLY0:       if.then3205:
+// SIMD-ONLY0-NEXT:    [[TMP2083:%.*]] = load i32, ptr [[IE]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP2083]], ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END3206]]
+// SIMD-ONLY0:       if.end3206:
+// SIMD-ONLY0-NEXT:    [[TMP2084:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP2084]], ptr [[IV]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP2085:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP2086:%.*]] = load i32, ptr [[IE]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP3207:%.*]] = icmp slt i32 [[TMP2085]], [[TMP2086]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP3207]], label [[IF_THEN3209:%.*]], label [[IF_END3210:%.*]]
+// SIMD-ONLY0:       if.then3209:
+// SIMD-ONLY0-NEXT:    [[TMP2087:%.*]] = load i32, ptr [[IE]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP2087]], ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END3210]]
+// SIMD-ONLY0:       if.end3210:
+// SIMD-ONLY0-NEXT:    [[TMP2088:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP2088]], ptr [[IV]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP2089:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP2090:%.*]] = load i32, ptr [[IE]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP3211:%.*]] = icmp eq i32 [[TMP2089]], [[TMP2090]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP3211]], label [[IF_THEN3213:%.*]], label [[IF_END3214:%.*]]
+// SIMD-ONLY0:       if.then3213:
+// SIMD-ONLY0-NEXT:    [[TMP2091:%.*]] = load i32, ptr [[ID]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP2091]], ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END3214]]
+// SIMD-ONLY0:       if.end3214:
+// SIMD-ONLY0-NEXT:    [[TMP2092:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP2092]], ptr [[IV]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP2093:%.*]] = load i32, ptr [[IE]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP2094:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP3215:%.*]] = icmp eq i32 [[TMP2093]], [[TMP2094]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP3215]], label [[IF_THEN3217:%.*]], label [[IF_END3218:%.*]]
+// SIMD-ONLY0:       if.then3217:
+// SIMD-ONLY0-NEXT:    [[TMP2095:%.*]] = load i32, ptr [[ID]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP2095]], ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END3218]]
+// SIMD-ONLY0:       if.end3218:
+// SIMD-ONLY0-NEXT:    [[TMP2096:%.*]] = load i32, ptr [[IE]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP2097:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP3219:%.*]] = icmp sgt i32 [[TMP2096]], [[TMP2097]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP3219]], label [[IF_THEN3221:%.*]], label [[IF_END3222:%.*]]
+// SIMD-ONLY0:       if.then3221:
+// SIMD-ONLY0-NEXT:    [[TMP2098:%.*]] = load i32, ptr [[IE]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP2098]], ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END3222]]
+// SIMD-ONLY0:       if.end3222:
+// SIMD-ONLY0-NEXT:    [[TMP2099:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP2099]], ptr [[IV]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP2100:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP2101:%.*]] = load i32, ptr [[IE]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP3223:%.*]] = icmp sgt i32 [[TMP2100]], [[TMP2101]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP3223]], label [[IF_THEN3225:%.*]], label [[IF_END3226:%.*]]
+// SIMD-ONLY0:       if.then3225:
+// SIMD-ONLY0-NEXT:    [[TMP2102:%.*]] = load i32, ptr [[IE]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP2102]], ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END3226]]
+// SIMD-ONLY0:       if.end3226:
+// SIMD-ONLY0-NEXT:    [[TMP2103:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP2103]], ptr [[IV]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP2104:%.*]] = load i32, ptr [[IE]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP2105:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP3227:%.*]] = icmp slt i32 [[TMP2104]], [[TMP2105]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP3227]], label [[IF_THEN3229:%.*]], label [[IF_END3230:%.*]]
+// SIMD-ONLY0:       if.then3229:
+// SIMD-ONLY0-NEXT:    [[TMP2106:%.*]] = load i32, ptr [[IE]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP2106]], ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END3230]]
+// SIMD-ONLY0:       if.end3230:
+// SIMD-ONLY0-NEXT:    [[TMP2107:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP2107]], ptr [[IV]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP2108:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP2109:%.*]] = load i32, ptr [[IE]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP3231:%.*]] = icmp slt i32 [[TMP2108]], [[TMP2109]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP3231]], label [[IF_THEN3233:%.*]], label [[IF_END3234:%.*]]
+// SIMD-ONLY0:       if.then3233:
+// SIMD-ONLY0-NEXT:    [[TMP2110:%.*]] = load i32, ptr [[IE]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP2110]], ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END3234]]
+// SIMD-ONLY0:       if.end3234:
+// SIMD-ONLY0-NEXT:    [[TMP2111:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP2111]], ptr [[IV]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP2112:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP2113:%.*]] = load i32, ptr [[IE]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP3235:%.*]] = icmp eq i32 [[TMP2112]], [[TMP2113]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP3235]], label [[IF_THEN3237:%.*]], label [[IF_END3238:%.*]]
+// SIMD-ONLY0:       if.then3237:
+// SIMD-ONLY0-NEXT:    [[TMP2114:%.*]] = load i32, ptr [[ID]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP2114]], ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END3238]]
+// SIMD-ONLY0:       if.end3238:
+// SIMD-ONLY0-NEXT:    [[TMP2115:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP2115]], ptr [[IV]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP2116:%.*]] = load i32, ptr [[IE]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP2117:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP3239:%.*]] = icmp eq i32 [[TMP2116]], [[TMP2117]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP3239]], label [[IF_THEN3241:%.*]], label [[IF_END3242:%.*]]
+// SIMD-ONLY0:       if.then3241:
+// SIMD-ONLY0-NEXT:    [[TMP2118:%.*]] = load i32, ptr [[ID]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP2118]], ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END3242]]
+// SIMD-ONLY0:       if.end3242:
+// SIMD-ONLY0-NEXT:    [[TMP2119:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP2119]], ptr [[IV]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP2120:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP2121:%.*]] = load i32, ptr [[IE]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP3243:%.*]] = icmp eq i32 [[TMP2120]], [[TMP2121]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP3243]], label [[IF_THEN3245:%.*]], label [[IF_ELSE3246:%.*]]
+// SIMD-ONLY0:       if.then3245:
+// SIMD-ONLY0-NEXT:    [[TMP2122:%.*]] = load i32, ptr [[ID]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP2122]], ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END3247:%.*]]
+// SIMD-ONLY0:       if.else3246:
+// SIMD-ONLY0-NEXT:    [[TMP2123:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP2123]], ptr [[IV]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END3247]]
+// SIMD-ONLY0:       if.end3247:
+// SIMD-ONLY0-NEXT:    [[TMP2124:%.*]] = load i32, ptr [[IE]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP2125:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP3248:%.*]] = icmp eq i32 [[TMP2124]], [[TMP2125]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP3248]], label [[IF_THEN3250:%.*]], label [[IF_ELSE3251:%.*]]
+// SIMD-ONLY0:       if.then3250:
+// SIMD-ONLY0-NEXT:    [[TMP2126:%.*]] = load i32, ptr [[ID]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP2126]], ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END3252:%.*]]
+// SIMD-ONLY0:       if.else3251:
+// SIMD-ONLY0-NEXT:    [[TMP2127:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP2127]], ptr [[IV]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END3252]]
+// SIMD-ONLY0:       if.end3252:
+// SIMD-ONLY0-NEXT:    [[TMP2128:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP2129:%.*]] = load i32, ptr [[IE]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP3253:%.*]] = icmp eq i32 [[TMP2128]], [[TMP2129]]
+// SIMD-ONLY0-NEXT:    [[CONV3254:%.*]] = zext i1 [[CMP3253]] to i32
+// SIMD-ONLY0-NEXT:    store i32 [[CONV3254]], ptr [[IR]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP2130:%.*]] = load i32, ptr [[IR]], align 4
+// SIMD-ONLY0-NEXT:    [[TOBOOL3255:%.*]] = icmp ne i32 [[TMP2130]], 0
+// SIMD-ONLY0-NEXT:    br i1 [[TOBOOL3255]], label [[IF_THEN3256:%.*]], label [[IF_END3257:%.*]]
+// SIMD-ONLY0:       if.then3256:
+// SIMD-ONLY0-NEXT:    [[TMP2131:%.*]] = load i32, ptr [[ID]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP2131]], ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END3257]]
+// SIMD-ONLY0:       if.end3257:
+// SIMD-ONLY0-NEXT:    [[TMP2132:%.*]] = load i32, ptr [[IE]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP2133:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP3258:%.*]] = icmp eq i32 [[TMP2132]], [[TMP2133]]
+// SIMD-ONLY0-NEXT:    [[CONV3259:%.*]] = zext i1 [[CMP3258]] to i32
+// SIMD-ONLY0-NEXT:    store i32 [[CONV3259]], ptr [[IR]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP2134:%.*]] = load i32, ptr [[IR]], align 4
+// SIMD-ONLY0-NEXT:    [[TOBOOL3260:%.*]] = icmp ne i32 [[TMP2134]], 0
+// SIMD-ONLY0-NEXT:    br i1 [[TOBOOL3260]], label [[IF_THEN3261:%.*]], label [[IF_END3262:%.*]]
+// SIMD-ONLY0:       if.then3261:
+// SIMD-ONLY0-NEXT:    [[TMP2135:%.*]] = load i32, ptr [[ID]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP2135]], ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END3262]]
+// SIMD-ONLY0:       if.end3262:
+// SIMD-ONLY0-NEXT:    [[TMP2136:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP2137:%.*]] = load i32, ptr [[IE]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP3263:%.*]] = icmp eq i32 [[TMP2136]], [[TMP2137]]
+// SIMD-ONLY0-NEXT:    [[CONV3264:%.*]] = zext i1 [[CMP3263]] to i32
+// SIMD-ONLY0-NEXT:    store i32 [[CONV3264]], ptr [[IR]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP2138:%.*]] = load i32, ptr [[IR]], align 4
+// SIMD-ONLY0-NEXT:    [[TOBOOL3265:%.*]] = icmp ne i32 [[TMP2138]], 0
+// SIMD-ONLY0-NEXT:    br i1 [[TOBOOL3265]], label [[IF_THEN3266:%.*]], label [[IF_ELSE3267:%.*]]
+// SIMD-ONLY0:       if.then3266:
+// SIMD-ONLY0-NEXT:    [[TMP2139:%.*]] = load i32, ptr [[ID]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP2139]], ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END3268:%.*]]
+// SIMD-ONLY0:       if.else3267:
+// SIMD-ONLY0-NEXT:    [[TMP2140:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP2140]], ptr [[IV]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END3268]]
+// SIMD-ONLY0:       if.end3268:
+// SIMD-ONLY0-NEXT:    [[TMP2141:%.*]] = load i32, ptr [[IE]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP2142:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP3269:%.*]] = icmp eq i32 [[TMP2141]], [[TMP2142]]
+// SIMD-ONLY0-NEXT:    [[CONV3270:%.*]] = zext i1 [[CMP3269]] to i32
+// SIMD-ONLY0-NEXT:    store i32 [[CONV3270]], ptr [[IR]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP2143:%.*]] = load i32, ptr [[IR]], align 4
+// SIMD-ONLY0-NEXT:    [[TOBOOL3271:%.*]] = icmp ne i32 [[TMP2143]], 0
+// SIMD-ONLY0-NEXT:    br i1 [[TOBOOL3271]], label [[IF_THEN3272:%.*]], label [[IF_ELSE3273:%.*]]
+// SIMD-ONLY0:       if.then3272:
+// SIMD-ONLY0-NEXT:    [[TMP2144:%.*]] = load i32, ptr [[ID]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP2144]], ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END3274:%.*]]
+// SIMD-ONLY0:       if.else3273:
+// SIMD-ONLY0-NEXT:    [[TMP2145:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP2145]], ptr [[IV]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END3274]]
+// SIMD-ONLY0:       if.end3274:
+// SIMD-ONLY0-NEXT:    [[TMP2146:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP2146]], ptr [[IV]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP2147:%.*]] = load i32, ptr [[IE]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP2148:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP3275:%.*]] = icmp sgt i32 [[TMP2147]], [[TMP2148]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP3275]], label [[IF_THEN3277:%.*]], label [[IF_END3278:%.*]]
+// SIMD-ONLY0:       if.then3277:
+// SIMD-ONLY0-NEXT:    [[TMP2149:%.*]] = load i32, ptr [[IE]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP2149]], ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END3278]]
+// SIMD-ONLY0:       if.end3278:
+// SIMD-ONLY0-NEXT:    [[TMP2150:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP2150]], ptr [[IV]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP2151:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP2152:%.*]] = load i32, ptr [[IE]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP3279:%.*]] = icmp sgt i32 [[TMP2151]], [[TMP2152]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP3279]], label [[IF_THEN3281:%.*]], label [[IF_END3282:%.*]]
+// SIMD-ONLY0:       if.then3281:
+// SIMD-ONLY0-NEXT:    [[TMP2153:%.*]] = load i32, ptr [[IE]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP2153]], ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END3282]]
+// SIMD-ONLY0:       if.end3282:
+// SIMD-ONLY0-NEXT:    [[TMP2154:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP2154]], ptr [[IV]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP2155:%.*]] = load i32, ptr [[IE]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP2156:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP3283:%.*]] = icmp slt i32 [[TMP2155]], [[TMP2156]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP3283]], label [[IF_THEN3285:%.*]], label [[IF_END3286:%.*]]
+// SIMD-ONLY0:       if.then3285:
+// SIMD-ONLY0-NEXT:    [[TMP2157:%.*]] = load i32, ptr [[IE]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP2157]], ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END3286]]
+// SIMD-ONLY0:       if.end3286:
+// SIMD-ONLY0-NEXT:    [[TMP2158:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP2158]], ptr [[IV]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP2159:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP2160:%.*]] = load i32, ptr [[IE]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP3287:%.*]] = icmp slt i32 [[TMP2159]], [[TMP2160]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP3287]], label [[IF_THEN3289:%.*]], label [[IF_END3290:%.*]]
+// SIMD-ONLY0:       if.then3289:
+// SIMD-ONLY0-NEXT:    [[TMP2161:%.*]] = load i32, ptr [[IE]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP2161]], ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END3290]]
+// SIMD-ONLY0:       if.end3290:
+// SIMD-ONLY0-NEXT:    [[TMP2162:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP2162]], ptr [[IV]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP2163:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP2164:%.*]] = load i32, ptr [[IE]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP3291:%.*]] = icmp eq i32 [[TMP2163]], [[TMP2164]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP3291]], label [[IF_THEN3293:%.*]], label [[IF_END3294:%.*]]
+// SIMD-ONLY0:       if.then3293:
+// SIMD-ONLY0-NEXT:    [[TMP2165:%.*]] = load i32, ptr [[ID]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP2165]], ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END3294]]
+// SIMD-ONLY0:       if.end3294:
+// SIMD-ONLY0-NEXT:    [[TMP2166:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP2166]], ptr [[IV]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP2167:%.*]] = load i32, ptr [[IE]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP2168:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP3295:%.*]] = icmp eq i32 [[TMP2167]], [[TMP2168]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP3295]], label [[IF_THEN3297:%.*]], label [[IF_END3298:%.*]]
+// SIMD-ONLY0:       if.then3297:
+// SIMD-ONLY0-NEXT:    [[TMP2169:%.*]] = load i32, ptr [[ID]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP2169]], ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END3298]]
+// SIMD-ONLY0:       if.end3298:
+// SIMD-ONLY0-NEXT:    [[TMP2170:%.*]] = load i32, ptr [[IE]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP2171:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP3299:%.*]] = icmp sgt i32 [[TMP2170]], [[TMP2171]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP3299]], label [[IF_THEN3301:%.*]], label [[IF_END3302:%.*]]
+// SIMD-ONLY0:       if.then3301:
+// SIMD-ONLY0-NEXT:    [[TMP2172:%.*]] = load i32, ptr [[IE]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP2172]], ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END3302]]
+// SIMD-ONLY0:       if.end3302:
+// SIMD-ONLY0-NEXT:    [[TMP2173:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP2173]], ptr [[IV]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP2174:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP2175:%.*]] = load i32, ptr [[IE]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP3303:%.*]] = icmp sgt i32 [[TMP2174]], [[TMP2175]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP3303]], label [[IF_THEN3305:%.*]], label [[IF_END3306:%.*]]
+// SIMD-ONLY0:       if.then3305:
+// SIMD-ONLY0-NEXT:    [[TMP2176:%.*]] = load i32, ptr [[IE]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP2176]], ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END3306]]
+// SIMD-ONLY0:       if.end3306:
+// SIMD-ONLY0-NEXT:    [[TMP2177:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP2177]], ptr [[IV]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP2178:%.*]] = load i32, ptr [[IE]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP2179:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP3307:%.*]] = icmp slt i32 [[TMP2178]], [[TMP2179]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP3307]], label [[IF_THEN3309:%.*]], label [[IF_END3310:%.*]]
+// SIMD-ONLY0:       if.then3309:
+// SIMD-ONLY0-NEXT:    [[TMP2180:%.*]] = load i32, ptr [[IE]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP2180]], ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END3310]]
+// SIMD-ONLY0:       if.end3310:
+// SIMD-ONLY0-NEXT:    [[TMP2181:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP2181]], ptr [[IV]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP2182:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP2183:%.*]] = load i32, ptr [[IE]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP3311:%.*]] = icmp slt i32 [[TMP2182]], [[TMP2183]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP3311]], label [[IF_THEN3313:%.*]], label [[IF_END3314:%.*]]
+// SIMD-ONLY0:       if.then3313:
+// SIMD-ONLY0-NEXT:    [[TMP2184:%.*]] = load i32, ptr [[IE]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP2184]], ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END3314]]
+// SIMD-ONLY0:       if.end3314:
+// SIMD-ONLY0-NEXT:    [[TMP2185:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP2185]], ptr [[IV]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP2186:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP2187:%.*]] = load i32, ptr [[IE]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP3315:%.*]] = icmp eq i32 [[TMP2186]], [[TMP2187]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP3315]], label [[IF_THEN3317:%.*]], label [[IF_END3318:%.*]]
+// SIMD-ONLY0:       if.then3317:
+// SIMD-ONLY0-NEXT:    [[TMP2188:%.*]] = load i32, ptr [[ID]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP2188]], ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END3318]]
+// SIMD-ONLY0:       if.end3318:
+// SIMD-ONLY0-NEXT:    [[TMP2189:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP2189]], ptr [[IV]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP2190:%.*]] = load i32, ptr [[IE]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP2191:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP3319:%.*]] = icmp eq i32 [[TMP2190]], [[TMP2191]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP3319]], label [[IF_THEN3321:%.*]], label [[IF_END3322:%.*]]
+// SIMD-ONLY0:       if.then3321:
+// SIMD-ONLY0-NEXT:    [[TMP2192:%.*]] = load i32, ptr [[ID]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP2192]], ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END3322]]
+// SIMD-ONLY0:       if.end3322:
+// SIMD-ONLY0-NEXT:    [[TMP2193:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP2193]], ptr [[IV]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP2194:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP2195:%.*]] = load i32, ptr [[IE]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP3323:%.*]] = icmp eq i32 [[TMP2194]], [[TMP2195]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP3323]], label [[IF_THEN3325:%.*]], label [[IF_ELSE3326:%.*]]
+// SIMD-ONLY0:       if.then3325:
+// SIMD-ONLY0-NEXT:    [[TMP2196:%.*]] = load i32, ptr [[ID]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP2196]], ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END3327:%.*]]
+// SIMD-ONLY0:       if.else3326:
+// SIMD-ONLY0-NEXT:    [[TMP2197:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP2197]], ptr [[IV]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END3327]]
+// SIMD-ONLY0:       if.end3327:
+// SIMD-ONLY0-NEXT:    [[TMP2198:%.*]] = load i32, ptr [[IE]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP2199:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP3328:%.*]] = icmp eq i32 [[TMP2198]], [[TMP2199]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP3328]], label [[IF_THEN3330:%.*]], label [[IF_ELSE3331:%.*]]
+// SIMD-ONLY0:       if.then3330:
+// SIMD-ONLY0-NEXT:    [[TMP2200:%.*]] = load i32, ptr [[ID]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP2200]], ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END3332:%.*]]
+// SIMD-ONLY0:       if.else3331:
+// SIMD-ONLY0-NEXT:    [[TMP2201:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP2201]], ptr [[IV]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END3332]]
+// SIMD-ONLY0:       if.end3332:
+// SIMD-ONLY0-NEXT:    [[TMP2202:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP2203:%.*]] = load i32, ptr [[IE]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP3333:%.*]] = icmp eq i32 [[TMP2202]], [[TMP2203]]
+// SIMD-ONLY0-NEXT:    [[CONV3334:%.*]] = zext i1 [[CMP3333]] to i32
+// SIMD-ONLY0-NEXT:    store i32 [[CONV3334]], ptr [[IR]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP2204:%.*]] = load i32, ptr [[IR]], align 4
+// SIMD-ONLY0-NEXT:    [[TOBOOL3335:%.*]] = icmp ne i32 [[TMP2204]], 0
+// SIMD-ONLY0-NEXT:    br i1 [[TOBOOL3335]], label [[IF_THEN3336:%.*]], label [[IF_END3337:%.*]]
+// SIMD-ONLY0:       if.then3336:
+// SIMD-ONLY0-NEXT:    [[TMP2205:%.*]] = load i32, ptr [[ID]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP2205]], ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END3337]]
+// SIMD-ONLY0:       if.end3337:
+// SIMD-ONLY0-NEXT:    [[TMP2206:%.*]] = load i32, ptr [[IE]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP2207:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP3338:%.*]] = icmp eq i32 [[TMP2206]], [[TMP2207]]
+// SIMD-ONLY0-NEXT:    [[CONV3339:%.*]] = zext i1 [[CMP3338]] to i32
+// SIMD-ONLY0-NEXT:    store i32 [[CONV3339]], ptr [[IR]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP2208:%.*]] = load i32, ptr [[IR]], align 4
+// SIMD-ONLY0-NEXT:    [[TOBOOL3340:%.*]] = icmp ne i32 [[TMP2208]], 0
+// SIMD-ONLY0-NEXT:    br i1 [[TOBOOL3340]], label [[IF_THEN3341:%.*]], label [[IF_END3342:%.*]]
+// SIMD-ONLY0:       if.then3341:
+// SIMD-ONLY0-NEXT:    [[TMP2209:%.*]] = load i32, ptr [[ID]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP2209]], ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END3342]]
+// SIMD-ONLY0:       if.end3342:
+// SIMD-ONLY0-NEXT:    [[TMP2210:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP2211:%.*]] = load i32, ptr [[IE]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP3343:%.*]] = icmp eq i32 [[TMP2210]], [[TMP2211]]
+// SIMD-ONLY0-NEXT:    [[CONV3344:%.*]] = zext i1 [[CMP3343]] to i32
+// SIMD-ONLY0-NEXT:    store i32 [[CONV3344]], ptr [[IR]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP2212:%.*]] = load i32, ptr [[IR]], align 4
+// SIMD-ONLY0-NEXT:    [[TOBOOL3345:%.*]] = icmp ne i32 [[TMP2212]], 0
+// SIMD-ONLY0-NEXT:    br i1 [[TOBOOL3345]], label [[IF_THEN3346:%.*]], label [[IF_ELSE3347:%.*]]
+// SIMD-ONLY0:       if.then3346:
+// SIMD-ONLY0-NEXT:    [[TMP2213:%.*]] = load i32, ptr [[ID]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP2213]], ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END3348:%.*]]
+// SIMD-ONLY0:       if.else3347:
+// SIMD-ONLY0-NEXT:    [[TMP2214:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP2214]], ptr [[IV]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END3348]]
+// SIMD-ONLY0:       if.end3348:
+// SIMD-ONLY0-NEXT:    [[TMP2215:%.*]] = load i32, ptr [[IE]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP2216:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP3349:%.*]] = icmp eq i32 [[TMP2215]], [[TMP2216]]
+// SIMD-ONLY0-NEXT:    [[CONV3350:%.*]] = zext i1 [[CMP3349]] to i32
+// SIMD-ONLY0-NEXT:    store i32 [[CONV3350]], ptr [[IR]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP2217:%.*]] = load i32, ptr [[IR]], align 4
+// SIMD-ONLY0-NEXT:    [[TOBOOL3351:%.*]] = icmp ne i32 [[TMP2217]], 0
+// SIMD-ONLY0-NEXT:    br i1 [[TOBOOL3351]], label [[IF_THEN3352:%.*]], label [[IF_ELSE3353:%.*]]
+// SIMD-ONLY0:       if.then3352:
+// SIMD-ONLY0-NEXT:    [[TMP2218:%.*]] = load i32, ptr [[ID]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP2218]], ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END3354:%.*]]
+// SIMD-ONLY0:       if.else3353:
+// SIMD-ONLY0-NEXT:    [[TMP2219:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP2219]], ptr [[IV]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END3354]]
+// SIMD-ONLY0:       if.end3354:
+// SIMD-ONLY0-NEXT:    [[TMP2220:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP2220]], ptr [[UIV]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP2221:%.*]] = load i32, ptr [[UIE]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP2222:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP3355:%.*]] = icmp ugt i32 [[TMP2221]], [[TMP2222]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP3355]], label [[IF_THEN3357:%.*]], label [[IF_END3358:%.*]]
+// SIMD-ONLY0:       if.then3357:
+// SIMD-ONLY0-NEXT:    [[TMP2223:%.*]] = load i32, ptr [[UIE]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP2223]], ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END3358]]
+// SIMD-ONLY0:       if.end3358:
+// SIMD-ONLY0-NEXT:    [[TMP2224:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP2224]], ptr [[UIV]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP2225:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP2226:%.*]] = load i32, ptr [[UIE]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP3359:%.*]] = icmp ugt i32 [[TMP2225]], [[TMP2226]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP3359]], label [[IF_THEN3361:%.*]], label [[IF_END3362:%.*]]
+// SIMD-ONLY0:       if.then3361:
+// SIMD-ONLY0-NEXT:    [[TMP2227:%.*]] = load i32, ptr [[UIE]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP2227]], ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END3362]]
+// SIMD-ONLY0:       if.end3362:
+// SIMD-ONLY0-NEXT:    [[TMP2228:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP2228]], ptr [[UIV]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP2229:%.*]] = load i32, ptr [[UIE]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP2230:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP3363:%.*]] = icmp ult i32 [[TMP2229]], [[TMP2230]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP3363]], label [[IF_THEN3365:%.*]], label [[IF_END3366:%.*]]
+// SIMD-ONLY0:       if.then3365:
+// SIMD-ONLY0-NEXT:    [[TMP2231:%.*]] = load i32, ptr [[UIE]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP2231]], ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END3366]]
+// SIMD-ONLY0:       if.end3366:
+// SIMD-ONLY0-NEXT:    [[TMP2232:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP2232]], ptr [[UIV]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP2233:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP2234:%.*]] = load i32, ptr [[UIE]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP3367:%.*]] = icmp ult i32 [[TMP2233]], [[TMP2234]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP3367]], label [[IF_THEN3369:%.*]], label [[IF_END3370:%.*]]
+// SIMD-ONLY0:       if.then3369:
+// SIMD-ONLY0-NEXT:    [[TMP2235:%.*]] = load i32, ptr [[UIE]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP2235]], ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END3370]]
+// SIMD-ONLY0:       if.end3370:
+// SIMD-ONLY0-NEXT:    [[TMP2236:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP2236]], ptr [[UIV]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP2237:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP2238:%.*]] = load i32, ptr [[UIE]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP3371:%.*]] = icmp eq i32 [[TMP2237]], [[TMP2238]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP3371]], label [[IF_THEN3373:%.*]], label [[IF_END3374:%.*]]
+// SIMD-ONLY0:       if.then3373:
+// SIMD-ONLY0-NEXT:    [[TMP2239:%.*]] = load i32, ptr [[UID]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP2239]], ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END3374]]
+// SIMD-ONLY0:       if.end3374:
+// SIMD-ONLY0-NEXT:    [[TMP2240:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP2240]], ptr [[UIV]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP2241:%.*]] = load i32, ptr [[UIE]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP2242:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP3375:%.*]] = icmp eq i32 [[TMP2241]], [[TMP2242]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP3375]], label [[IF_THEN3377:%.*]], label [[IF_END3378:%.*]]
+// SIMD-ONLY0:       if.then3377:
+// SIMD-ONLY0-NEXT:    [[TMP2243:%.*]] = load i32, ptr [[UID]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP2243]], ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END3378]]
+// SIMD-ONLY0:       if.end3378:
+// SIMD-ONLY0-NEXT:    [[TMP2244:%.*]] = load i32, ptr [[UIE]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP2245:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP3379:%.*]] = icmp ugt i32 [[TMP2244]], [[TMP2245]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP3379]], label [[IF_THEN3381:%.*]], label [[IF_END3382:%.*]]
+// SIMD-ONLY0:       if.then3381:
+// SIMD-ONLY0-NEXT:    [[TMP2246:%.*]] = load i32, ptr [[UIE]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP2246]], ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END3382]]
+// SIMD-ONLY0:       if.end3382:
+// SIMD-ONLY0-NEXT:    [[TMP2247:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP2247]], ptr [[UIV]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP2248:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP2249:%.*]] = load i32, ptr [[UIE]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP3383:%.*]] = icmp ugt i32 [[TMP2248]], [[TMP2249]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP3383]], label [[IF_THEN3385:%.*]], label [[IF_END3386:%.*]]
+// SIMD-ONLY0:       if.then3385:
+// SIMD-ONLY0-NEXT:    [[TMP2250:%.*]] = load i32, ptr [[UIE]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP2250]], ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END3386]]
+// SIMD-ONLY0:       if.end3386:
+// SIMD-ONLY0-NEXT:    [[TMP2251:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP2251]], ptr [[UIV]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP2252:%.*]] = load i32, ptr [[UIE]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP2253:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP3387:%.*]] = icmp ult i32 [[TMP2252]], [[TMP2253]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP3387]], label [[IF_THEN3389:%.*]], label [[IF_END3390:%.*]]
+// SIMD-ONLY0:       if.then3389:
+// SIMD-ONLY0-NEXT:    [[TMP2254:%.*]] = load i32, ptr [[UIE]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP2254]], ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END3390]]
+// SIMD-ONLY0:       if.end3390:
+// SIMD-ONLY0-NEXT:    [[TMP2255:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP2255]], ptr [[UIV]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP2256:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP2257:%.*]] = load i32, ptr [[UIE]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP3391:%.*]] = icmp ult i32 [[TMP2256]], [[TMP2257]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP3391]], label [[IF_THEN3393:%.*]], label [[IF_END3394:%.*]]
+// SIMD-ONLY0:       if.then3393:
+// SIMD-ONLY0-NEXT:    [[TMP2258:%.*]] = load i32, ptr [[UIE]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP2258]], ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END3394]]
+// SIMD-ONLY0:       if.end3394:
+// SIMD-ONLY0-NEXT:    [[TMP2259:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP2259]], ptr [[UIV]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP2260:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP2261:%.*]] = load i32, ptr [[UIE]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP3395:%.*]] = icmp eq i32 [[TMP2260]], [[TMP2261]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP3395]], label [[IF_THEN3397:%.*]], label [[IF_END3398:%.*]]
+// SIMD-ONLY0:       if.then3397:
+// SIMD-ONLY0-NEXT:    [[TMP2262:%.*]] = load i32, ptr [[UID]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP2262]], ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END3398]]
+// SIMD-ONLY0:       if.end3398:
+// SIMD-ONLY0-NEXT:    [[TMP2263:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP2263]], ptr [[UIV]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP2264:%.*]] = load i32, ptr [[UIE]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP2265:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP3399:%.*]] = icmp eq i32 [[TMP2264]], [[TMP2265]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP3399]], label [[IF_THEN3401:%.*]], label [[IF_END3402:%.*]]
+// SIMD-ONLY0:       if.then3401:
+// SIMD-ONLY0-NEXT:    [[TMP2266:%.*]] = load i32, ptr [[UID]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP2266]], ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END3402]]
+// SIMD-ONLY0:       if.end3402:
+// SIMD-ONLY0-NEXT:    [[TMP2267:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP2267]], ptr [[UIV]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP2268:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP2269:%.*]] = load i32, ptr [[UIE]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP3403:%.*]] = icmp eq i32 [[TMP2268]], [[TMP2269]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP3403]], label [[IF_THEN3405:%.*]], label [[IF_ELSE3406:%.*]]
+// SIMD-ONLY0:       if.then3405:
+// SIMD-ONLY0-NEXT:    [[TMP2270:%.*]] = load i32, ptr [[UID]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP2270]], ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END3407:%.*]]
+// SIMD-ONLY0:       if.else3406:
+// SIMD-ONLY0-NEXT:    [[TMP2271:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP2271]], ptr [[UIV]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END3407]]
+// SIMD-ONLY0:       if.end3407:
+// SIMD-ONLY0-NEXT:    [[TMP2272:%.*]] = load i32, ptr [[UIE]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP2273:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP3408:%.*]] = icmp eq i32 [[TMP2272]], [[TMP2273]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP3408]], label [[IF_THEN3410:%.*]], label [[IF_ELSE3411:%.*]]
+// SIMD-ONLY0:       if.then3410:
+// SIMD-ONLY0-NEXT:    [[TMP2274:%.*]] = load i32, ptr [[UID]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP2274]], ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END3412:%.*]]
+// SIMD-ONLY0:       if.else3411:
+// SIMD-ONLY0-NEXT:    [[TMP2275:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP2275]], ptr [[UIV]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END3412]]
+// SIMD-ONLY0:       if.end3412:
+// SIMD-ONLY0-NEXT:    [[TMP2276:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP2277:%.*]] = load i32, ptr [[UIE]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP3413:%.*]] = icmp eq i32 [[TMP2276]], [[TMP2277]]
+// SIMD-ONLY0-NEXT:    [[CONV3414:%.*]] = zext i1 [[CMP3413]] to i32
+// SIMD-ONLY0-NEXT:    store i32 [[CONV3414]], ptr [[UIR]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP2278:%.*]] = load i32, ptr [[UIR]], align 4
+// SIMD-ONLY0-NEXT:    [[TOBOOL3415:%.*]] = icmp ne i32 [[TMP2278]], 0
+// SIMD-ONLY0-NEXT:    br i1 [[TOBOOL3415]], label [[IF_THEN3416:%.*]], label [[IF_END3417:%.*]]
+// SIMD-ONLY0:       if.then3416:
+// SIMD-ONLY0-NEXT:    [[TMP2279:%.*]] = load i32, ptr [[UID]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP2279]], ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END3417]]
+// SIMD-ONLY0:       if.end3417:
+// SIMD-ONLY0-NEXT:    [[TMP2280:%.*]] = load i32, ptr [[UIE]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP2281:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP3418:%.*]] = icmp eq i32 [[TMP2280]], [[TMP2281]]
+// SIMD-ONLY0-NEXT:    [[CONV3419:%.*]] = zext i1 [[CMP3418]] to i32
+// SIMD-ONLY0-NEXT:    store i32 [[CONV3419]], ptr [[UIR]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP2282:%.*]] = load i32, ptr [[UIR]], align 4
+// SIMD-ONLY0-NEXT:    [[TOBOOL3420:%.*]] = icmp ne i32 [[TMP2282]], 0
+// SIMD-ONLY0-NEXT:    br i1 [[TOBOOL3420]], label [[IF_THEN3421:%.*]], label [[IF_END3422:%.*]]
+// SIMD-ONLY0:       if.then3421:
+// SIMD-ONLY0-NEXT:    [[TMP2283:%.*]] = load i32, ptr [[UID]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP2283]], ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END3422]]
+// SIMD-ONLY0:       if.end3422:
+// SIMD-ONLY0-NEXT:    [[TMP2284:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP2285:%.*]] = load i32, ptr [[UIE]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP3423:%.*]] = icmp eq i32 [[TMP2284]], [[TMP2285]]
+// SIMD-ONLY0-NEXT:    [[CONV3424:%.*]] = zext i1 [[CMP3423]] to i32
+// SIMD-ONLY0-NEXT:    store i32 [[CONV3424]], ptr [[UIR]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP2286:%.*]] = load i32, ptr [[UIR]], align 4
+// SIMD-ONLY0-NEXT:    [[TOBOOL3425:%.*]] = icmp ne i32 [[TMP2286]], 0
+// SIMD-ONLY0-NEXT:    br i1 [[TOBOOL3425]], label [[IF_THEN3426:%.*]], label [[IF_ELSE3427:%.*]]
+// SIMD-ONLY0:       if.then3426:
+// SIMD-ONLY0-NEXT:    [[TMP2287:%.*]] = load i32, ptr [[UID]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP2287]], ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END3428:%.*]]
+// SIMD-ONLY0:       if.else3427:
+// SIMD-ONLY0-NEXT:    [[TMP2288:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP2288]], ptr [[UIV]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END3428]]
+// SIMD-ONLY0:       if.end3428:
+// SIMD-ONLY0-NEXT:    [[TMP2289:%.*]] = load i32, ptr [[UIE]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP2290:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP3429:%.*]] = icmp eq i32 [[TMP2289]], [[TMP2290]]
+// SIMD-ONLY0-NEXT:    [[CONV3430:%.*]] = zext i1 [[CMP3429]] to i32
+// SIMD-ONLY0-NEXT:    store i32 [[CONV3430]], ptr [[UIR]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP2291:%.*]] = load i32, ptr [[UIR]], align 4
+// SIMD-ONLY0-NEXT:    [[TOBOOL3431:%.*]] = icmp ne i32 [[TMP2291]], 0
+// SIMD-ONLY0-NEXT:    br i1 [[TOBOOL3431]], label [[IF_THEN3432:%.*]], label [[IF_ELSE3433:%.*]]
+// SIMD-ONLY0:       if.then3432:
+// SIMD-ONLY0-NEXT:    [[TMP2292:%.*]] = load i32, ptr [[UID]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP2292]], ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END3434:%.*]]
+// SIMD-ONLY0:       if.else3433:
+// SIMD-ONLY0-NEXT:    [[TMP2293:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP2293]], ptr [[UIV]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END3434]]
+// SIMD-ONLY0:       if.end3434:
+// SIMD-ONLY0-NEXT:    [[TMP2294:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP2294]], ptr [[UIV]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP2295:%.*]] = load i32, ptr [[UIE]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP2296:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP3435:%.*]] = icmp ugt i32 [[TMP2295]], [[TMP2296]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP3435]], label [[IF_THEN3437:%.*]], label [[IF_END3438:%.*]]
+// SIMD-ONLY0:       if.then3437:
+// SIMD-ONLY0-NEXT:    [[TMP2297:%.*]] = load i32, ptr [[UIE]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP2297]], ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END3438]]
+// SIMD-ONLY0:       if.end3438:
+// SIMD-ONLY0-NEXT:    [[TMP2298:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP2298]], ptr [[UIV]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP2299:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP2300:%.*]] = load i32, ptr [[UIE]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP3439:%.*]] = icmp ugt i32 [[TMP2299]], [[TMP2300]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP3439]], label [[IF_THEN3441:%.*]], label [[IF_END3442:%.*]]
+// SIMD-ONLY0:       if.then3441:
+// SIMD-ONLY0-NEXT:    [[TMP2301:%.*]] = load i32, ptr [[UIE]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP2301]], ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END3442]]
+// SIMD-ONLY0:       if.end3442:
+// SIMD-ONLY0-NEXT:    [[TMP2302:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP2302]], ptr [[UIV]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP2303:%.*]] = load i32, ptr [[UIE]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP2304:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP3443:%.*]] = icmp ult i32 [[TMP2303]], [[TMP2304]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP3443]], label [[IF_THEN3445:%.*]], label [[IF_END3446:%.*]]
+// SIMD-ONLY0:       if.then3445:
+// SIMD-ONLY0-NEXT:    [[TMP2305:%.*]] = load i32, ptr [[UIE]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP2305]], ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END3446]]
+// SIMD-ONLY0:       if.end3446:
+// SIMD-ONLY0-NEXT:    [[TMP2306:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP2306]], ptr [[UIV]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP2307:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP2308:%.*]] = load i32, ptr [[UIE]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP3447:%.*]] = icmp ult i32 [[TMP2307]], [[TMP2308]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP3447]], label [[IF_THEN3449:%.*]], label [[IF_END3450:%.*]]
+// SIMD-ONLY0:       if.then3449:
+// SIMD-ONLY0-NEXT:    [[TMP2309:%.*]] = load i32, ptr [[UIE]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP2309]], ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END3450]]
+// SIMD-ONLY0:       if.end3450:
+// SIMD-ONLY0-NEXT:    [[TMP2310:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP2310]], ptr [[UIV]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP2311:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP2312:%.*]] = load i32, ptr [[UIE]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP3451:%.*]] = icmp eq i32 [[TMP2311]], [[TMP2312]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP3451]], label [[IF_THEN3453:%.*]], label [[IF_END3454:%.*]]
+// SIMD-ONLY0:       if.then3453:
+// SIMD-ONLY0-NEXT:    [[TMP2313:%.*]] = load i32, ptr [[UID]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP2313]], ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END3454]]
+// SIMD-ONLY0:       if.end3454:
+// SIMD-ONLY0-NEXT:    [[TMP2314:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP2314]], ptr [[UIV]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP2315:%.*]] = load i32, ptr [[UIE]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP2316:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP3455:%.*]] = icmp eq i32 [[TMP2315]], [[TMP2316]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP3455]], label [[IF_THEN3457:%.*]], label [[IF_END3458:%.*]]
+// SIMD-ONLY0:       if.then3457:
+// SIMD-ONLY0-NEXT:    [[TMP2317:%.*]] = load i32, ptr [[UID]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP2317]], ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END3458]]
+// SIMD-ONLY0:       if.end3458:
+// SIMD-ONLY0-NEXT:    [[TMP2318:%.*]] = load i32, ptr [[UIE]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP2319:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP3459:%.*]] = icmp ugt i32 [[TMP2318]], [[TMP2319]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP3459]], label [[IF_THEN3461:%.*]], label [[IF_END3462:%.*]]
+// SIMD-ONLY0:       if.then3461:
+// SIMD-ONLY0-NEXT:    [[TMP2320:%.*]] = load i32, ptr [[UIE]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP2320]], ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END3462]]
+// SIMD-ONLY0:       if.end3462:
+// SIMD-ONLY0-NEXT:    [[TMP2321:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP2321]], ptr [[UIV]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP2322:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP2323:%.*]] = load i32, ptr [[UIE]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP3463:%.*]] = icmp ugt i32 [[TMP2322]], [[TMP2323]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP3463]], label [[IF_THEN3465:%.*]], label [[IF_END3466:%.*]]
+// SIMD-ONLY0:       if.then3465:
+// SIMD-ONLY0-NEXT:    [[TMP2324:%.*]] = load i32, ptr [[UIE]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP2324]], ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END3466]]
+// SIMD-ONLY0:       if.end3466:
+// SIMD-ONLY0-NEXT:    [[TMP2325:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP2325]], ptr [[UIV]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP2326:%.*]] = load i32, ptr [[UIE]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP2327:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP3467:%.*]] = icmp ult i32 [[TMP2326]], [[TMP2327]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP3467]], label [[IF_THEN3469:%.*]], label [[IF_END3470:%.*]]
+// SIMD-ONLY0:       if.then3469:
+// SIMD-ONLY0-NEXT:    [[TMP2328:%.*]] = load i32, ptr [[UIE]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP2328]], ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END3470]]
+// SIMD-ONLY0:       if.end3470:
+// SIMD-ONLY0-NEXT:    [[TMP2329:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP2329]], ptr [[UIV]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP2330:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP2331:%.*]] = load i32, ptr [[UIE]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP3471:%.*]] = icmp ult i32 [[TMP2330]], [[TMP2331]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP3471]], label [[IF_THEN3473:%.*]], label [[IF_END3474:%.*]]
+// SIMD-ONLY0:       if.then3473:
+// SIMD-ONLY0-NEXT:    [[TMP2332:%.*]] = load i32, ptr [[UIE]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP2332]], ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END3474]]
+// SIMD-ONLY0:       if.end3474:
+// SIMD-ONLY0-NEXT:    [[TMP2333:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP2333]], ptr [[UIV]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP2334:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP2335:%.*]] = load i32, ptr [[UIE]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP3475:%.*]] = icmp eq i32 [[TMP2334]], [[TMP2335]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP3475]], label [[IF_THEN3477:%.*]], label [[IF_END3478:%.*]]
+// SIMD-ONLY0:       if.then3477:
+// SIMD-ONLY0-NEXT:    [[TMP2336:%.*]] = load i32, ptr [[UID]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP2336]], ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END3478]]
+// SIMD-ONLY0:       if.end3478:
+// SIMD-ONLY0-NEXT:    [[TMP2337:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP2337]], ptr [[UIV]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP2338:%.*]] = load i32, ptr [[UIE]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP2339:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP3479:%.*]] = icmp eq i32 [[TMP2338]], [[TMP2339]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP3479]], label [[IF_THEN3481:%.*]], label [[IF_END3482:%.*]]
+// SIMD-ONLY0:       if.then3481:
+// SIMD-ONLY0-NEXT:    [[TMP2340:%.*]] = load i32, ptr [[UID]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP2340]], ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END3482]]
+// SIMD-ONLY0:       if.end3482:
+// SIMD-ONLY0-NEXT:    [[TMP2341:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP2341]], ptr [[UIV]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP2342:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP2343:%.*]] = load i32, ptr [[UIE]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP3483:%.*]] = icmp eq i32 [[TMP2342]], [[TMP2343]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP3483]], label [[IF_THEN3485:%.*]], label [[IF_ELSE3486:%.*]]
+// SIMD-ONLY0:       if.then3485:
+// SIMD-ONLY0-NEXT:    [[TMP2344:%.*]] = load i32, ptr [[UID]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP2344]], ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END3487:%.*]]
+// SIMD-ONLY0:       if.else3486:
+// SIMD-ONLY0-NEXT:    [[TMP2345:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP2345]], ptr [[UIV]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END3487]]
+// SIMD-ONLY0:       if.end3487:
+// SIMD-ONLY0-NEXT:    [[TMP2346:%.*]] = load i32, ptr [[UIE]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP2347:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP3488:%.*]] = icmp eq i32 [[TMP2346]], [[TMP2347]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP3488]], label [[IF_THEN3490:%.*]], label [[IF_ELSE3491:%.*]]
+// SIMD-ONLY0:       if.then3490:
+// SIMD-ONLY0-NEXT:    [[TMP2348:%.*]] = load i32, ptr [[UID]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP2348]], ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END3492:%.*]]
+// SIMD-ONLY0:       if.else3491:
+// SIMD-ONLY0-NEXT:    [[TMP2349:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP2349]], ptr [[UIV]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END3492]]
+// SIMD-ONLY0:       if.end3492:
+// SIMD-ONLY0-NEXT:    [[TMP2350:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP2351:%.*]] = load i32, ptr [[UIE]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP3493:%.*]] = icmp eq i32 [[TMP2350]], [[TMP2351]]
+// SIMD-ONLY0-NEXT:    [[CONV3494:%.*]] = zext i1 [[CMP3493]] to i32
+// SIMD-ONLY0-NEXT:    store i32 [[CONV3494]], ptr [[UIR]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP2352:%.*]] = load i32, ptr [[UIR]], align 4
+// SIMD-ONLY0-NEXT:    [[TOBOOL3495:%.*]] = icmp ne i32 [[TMP2352]], 0
+// SIMD-ONLY0-NEXT:    br i1 [[TOBOOL3495]], label [[IF_THEN3496:%.*]], label [[IF_END3497:%.*]]
+// SIMD-ONLY0:       if.then3496:
+// SIMD-ONLY0-NEXT:    [[TMP2353:%.*]] = load i32, ptr [[UID]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP2353]], ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END3497]]
+// SIMD-ONLY0:       if.end3497:
+// SIMD-ONLY0-NEXT:    [[TMP2354:%.*]] = load i32, ptr [[UIE]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP2355:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP3498:%.*]] = icmp eq i32 [[TMP2354]], [[TMP2355]]
+// SIMD-ONLY0-NEXT:    [[CONV3499:%.*]] = zext i1 [[CMP3498]] to i32
+// SIMD-ONLY0-NEXT:    store i32 [[CONV3499]], ptr [[UIR]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP2356:%.*]] = load i32, ptr [[UIR]], align 4
+// SIMD-ONLY0-NEXT:    [[TOBOOL3500:%.*]] = icmp ne i32 [[TMP2356]], 0
+// SIMD-ONLY0-NEXT:    br i1 [[TOBOOL3500]], label [[IF_THEN3501:%.*]], label [[IF_END3502:%.*]]
+// SIMD-ONLY0:       if.then3501:
+// SIMD-ONLY0-NEXT:    [[TMP2357:%.*]] = load i32, ptr [[UID]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP2357]], ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END3502]]
+// SIMD-ONLY0:       if.end3502:
+// SIMD-ONLY0-NEXT:    [[TMP2358:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP2359:%.*]] = load i32, ptr [[UIE]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP3503:%.*]] = icmp eq i32 [[TMP2358]], [[TMP2359]]
+// SIMD-ONLY0-NEXT:    [[CONV3504:%.*]] = zext i1 [[CMP3503]] to i32
+// SIMD-ONLY0-NEXT:    store i32 [[CONV3504]], ptr [[UIR]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP2360:%.*]] = load i32, ptr [[UIR]], align 4
+// SIMD-ONLY0-NEXT:    [[TOBOOL3505:%.*]] = icmp ne i32 [[TMP2360]], 0
+// SIMD-ONLY0-NEXT:    br i1 [[TOBOOL3505]], label [[IF_THEN3506:%.*]], label [[IF_ELSE3507:%.*]]
+// SIMD-ONLY0:       if.then3506:
+// SIMD-ONLY0-NEXT:    [[TMP2361:%.*]] = load i32, ptr [[UID]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP2361]], ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END3508:%.*]]
+// SIMD-ONLY0:       if.else3507:
+// SIMD-ONLY0-NEXT:    [[TMP2362:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP2362]], ptr [[UIV]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END3508]]
+// SIMD-ONLY0:       if.end3508:
+// SIMD-ONLY0-NEXT:    [[TMP2363:%.*]] = load i32, ptr [[UIE]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP2364:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP3509:%.*]] = icmp eq i32 [[TMP2363]], [[TMP2364]]
+// SIMD-ONLY0-NEXT:    [[CONV3510:%.*]] = zext i1 [[CMP3509]] to i32
+// SIMD-ONLY0-NEXT:    store i32 [[CONV3510]], ptr [[UIR]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP2365:%.*]] = load i32, ptr [[UIR]], align 4
+// SIMD-ONLY0-NEXT:    [[TOBOOL3511:%.*]] = icmp ne i32 [[TMP2365]], 0
+// SIMD-ONLY0-NEXT:    br i1 [[TOBOOL3511]], label [[IF_THEN3512:%.*]], label [[IF_ELSE3513:%.*]]
+// SIMD-ONLY0:       if.then3512:
+// SIMD-ONLY0-NEXT:    [[TMP2366:%.*]] = load i32, ptr [[UID]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP2366]], ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END3514:%.*]]
+// SIMD-ONLY0:       if.else3513:
+// SIMD-ONLY0-NEXT:    [[TMP2367:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP2367]], ptr [[UIV]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END3514]]
+// SIMD-ONLY0:       if.end3514:
+// SIMD-ONLY0-NEXT:    [[TMP2368:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP2368]], ptr [[UIV]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP2369:%.*]] = load i32, ptr [[UIE]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP2370:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP3515:%.*]] = icmp ugt i32 [[TMP2369]], [[TMP2370]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP3515]], label [[IF_THEN3517:%.*]], label [[IF_END3518:%.*]]
+// SIMD-ONLY0:       if.then3517:
+// SIMD-ONLY0-NEXT:    [[TMP2371:%.*]] = load i32, ptr [[UIE]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP2371]], ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END3518]]
+// SIMD-ONLY0:       if.end3518:
+// SIMD-ONLY0-NEXT:    [[TMP2372:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP2372]], ptr [[UIV]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP2373:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP2374:%.*]] = load i32, ptr [[UIE]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP3519:%.*]] = icmp ugt i32 [[TMP2373]], [[TMP2374]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP3519]], label [[IF_THEN3521:%.*]], label [[IF_END3522:%.*]]
+// SIMD-ONLY0:       if.then3521:
+// SIMD-ONLY0-NEXT:    [[TMP2375:%.*]] = load i32, ptr [[UIE]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP2375]], ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END3522]]
+// SIMD-ONLY0:       if.end3522:
+// SIMD-ONLY0-NEXT:    [[TMP2376:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP2376]], ptr [[UIV]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP2377:%.*]] = load i32, ptr [[UIE]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP2378:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP3523:%.*]] = icmp ult i32 [[TMP2377]], [[TMP2378]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP3523]], label [[IF_THEN3525:%.*]], label [[IF_END3526:%.*]]
+// SIMD-ONLY0:       if.then3525:
+// SIMD-ONLY0-NEXT:    [[TMP2379:%.*]] = load i32, ptr [[UIE]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP2379]], ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END3526]]
+// SIMD-ONLY0:       if.end3526:
+// SIMD-ONLY0-NEXT:    [[TMP2380:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP2380]], ptr [[UIV]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP2381:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP2382:%.*]] = load i32, ptr [[UIE]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP3527:%.*]] = icmp ult i32 [[TMP2381]], [[TMP2382]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP3527]], label [[IF_THEN3529:%.*]], label [[IF_END3530:%.*]]
+// SIMD-ONLY0:       if.then3529:
+// SIMD-ONLY0-NEXT:    [[TMP2383:%.*]] = load i32, ptr [[UIE]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP2383]], ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END3530]]
+// SIMD-ONLY0:       if.end3530:
+// SIMD-ONLY0-NEXT:    [[TMP2384:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP2384]], ptr [[UIV]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP2385:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP2386:%.*]] = load i32, ptr [[UIE]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP3531:%.*]] = icmp eq i32 [[TMP2385]], [[TMP2386]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP3531]], label [[IF_THEN3533:%.*]], label [[IF_END3534:%.*]]
+// SIMD-ONLY0:       if.then3533:
+// SIMD-ONLY0-NEXT:    [[TMP2387:%.*]] = load i32, ptr [[UID]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP2387]], ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END3534]]
+// SIMD-ONLY0:       if.end3534:
+// SIMD-ONLY0-NEXT:    [[TMP2388:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP2388]], ptr [[UIV]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP2389:%.*]] = load i32, ptr [[UIE]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP2390:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP3535:%.*]] = icmp eq i32 [[TMP2389]], [[TMP2390]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP3535]], label [[IF_THEN3537:%.*]], label [[IF_END3538:%.*]]
+// SIMD-ONLY0:       if.then3537:
+// SIMD-ONLY0-NEXT:    [[TMP2391:%.*]] = load i32, ptr [[UID]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP2391]], ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END3538]]
+// SIMD-ONLY0:       if.end3538:
+// SIMD-ONLY0-NEXT:    [[TMP2392:%.*]] = load i32, ptr [[UIE]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP2393:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP3539:%.*]] = icmp ugt i32 [[TMP2392]], [[TMP2393]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP3539]], label [[IF_THEN3541:%.*]], label [[IF_END3542:%.*]]
+// SIMD-ONLY0:       if.then3541:
+// SIMD-ONLY0-NEXT:    [[TMP2394:%.*]] = load i32, ptr [[UIE]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP2394]], ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END3542]]
+// SIMD-ONLY0:       if.end3542:
+// SIMD-ONLY0-NEXT:    [[TMP2395:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP2395]], ptr [[UIV]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP2396:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP2397:%.*]] = load i32, ptr [[UIE]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP3543:%.*]] = icmp ugt i32 [[TMP2396]], [[TMP2397]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP3543]], label [[IF_THEN3545:%.*]], label [[IF_END3546:%.*]]
+// SIMD-ONLY0:       if.then3545:
+// SIMD-ONLY0-NEXT:    [[TMP2398:%.*]] = load i32, ptr [[UIE]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP2398]], ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END3546]]
+// SIMD-ONLY0:       if.end3546:
+// SIMD-ONLY0-NEXT:    [[TMP2399:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP2399]], ptr [[UIV]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP2400:%.*]] = load i32, ptr [[UIE]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP2401:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP3547:%.*]] = icmp ult i32 [[TMP2400]], [[TMP2401]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP3547]], label [[IF_THEN3549:%.*]], label [[IF_END3550:%.*]]
+// SIMD-ONLY0:       if.then3549:
+// SIMD-ONLY0-NEXT:    [[TMP2402:%.*]] = load i32, ptr [[UIE]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP2402]], ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END3550]]
+// SIMD-ONLY0:       if.end3550:
+// SIMD-ONLY0-NEXT:    [[TMP2403:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP2403]], ptr [[UIV]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP2404:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP2405:%.*]] = load i32, ptr [[UIE]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP3551:%.*]] = icmp ult i32 [[TMP2404]], [[TMP2405]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP3551]], label [[IF_THEN3553:%.*]], label [[IF_END3554:%.*]]
+// SIMD-ONLY0:       if.then3553:
+// SIMD-ONLY0-NEXT:    [[TMP2406:%.*]] = load i32, ptr [[UIE]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP2406]], ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END3554]]
+// SIMD-ONLY0:       if.end3554:
+// SIMD-ONLY0-NEXT:    [[TMP2407:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP2407]], ptr [[UIV]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP2408:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP2409:%.*]] = load i32, ptr [[UIE]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP3555:%.*]] = icmp eq i32 [[TMP2408]], [[TMP2409]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP3555]], label [[IF_THEN3557:%.*]], label [[IF_END3558:%.*]]
+// SIMD-ONLY0:       if.then3557:
+// SIMD-ONLY0-NEXT:    [[TMP2410:%.*]] = load i32, ptr [[UID]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP2410]], ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END3558]]
+// SIMD-ONLY0:       if.end3558:
+// SIMD-ONLY0-NEXT:    [[TMP2411:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP2411]], ptr [[UIV]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP2412:%.*]] = load i32, ptr [[UIE]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP2413:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP3559:%.*]] = icmp eq i32 [[TMP2412]], [[TMP2413]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP3559]], label [[IF_THEN3561:%.*]], label [[IF_END3562:%.*]]
+// SIMD-ONLY0:       if.then3561:
+// SIMD-ONLY0-NEXT:    [[TMP2414:%.*]] = load i32, ptr [[UID]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP2414]], ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END3562]]
+// SIMD-ONLY0:       if.end3562:
+// SIMD-ONLY0-NEXT:    [[TMP2415:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP2415]], ptr [[UIV]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP2416:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP2417:%.*]] = load i32, ptr [[UIE]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP3563:%.*]] = icmp eq i32 [[TMP2416]], [[TMP2417]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP3563]], label [[IF_THEN3565:%.*]], label [[IF_ELSE3566:%.*]]
+// SIMD-ONLY0:       if.then3565:
+// SIMD-ONLY0-NEXT:    [[TMP2418:%.*]] = load i32, ptr [[UID]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP2418]], ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END3567:%.*]]
+// SIMD-ONLY0:       if.else3566:
+// SIMD-ONLY0-NEXT:    [[TMP2419:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP2419]], ptr [[UIV]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END3567]]
+// SIMD-ONLY0:       if.end3567:
+// SIMD-ONLY0-NEXT:    [[TMP2420:%.*]] = load i32, ptr [[UIE]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP2421:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP3568:%.*]] = icmp eq i32 [[TMP2420]], [[TMP2421]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP3568]], label [[IF_THEN3570:%.*]], label [[IF_ELSE3571:%.*]]
+// SIMD-ONLY0:       if.then3570:
+// SIMD-ONLY0-NEXT:    [[TMP2422:%.*]] = load i32, ptr [[UID]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP2422]], ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END3572:%.*]]
+// SIMD-ONLY0:       if.else3571:
+// SIMD-ONLY0-NEXT:    [[TMP2423:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP2423]], ptr [[UIV]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END3572]]
+// SIMD-ONLY0:       if.end3572:
+// SIMD-ONLY0-NEXT:    [[TMP2424:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP2425:%.*]] = load i32, ptr [[UIE]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP3573:%.*]] = icmp eq i32 [[TMP2424]], [[TMP2425]]
+// SIMD-ONLY0-NEXT:    [[CONV3574:%.*]] = zext i1 [[CMP3573]] to i32
+// SIMD-ONLY0-NEXT:    store i32 [[CONV3574]], ptr [[UIR]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP2426:%.*]] = load i32, ptr [[UIR]], align 4
+// SIMD-ONLY0-NEXT:    [[TOBOOL3575:%.*]] = icmp ne i32 [[TMP2426]], 0
+// SIMD-ONLY0-NEXT:    br i1 [[TOBOOL3575]], label [[IF_THEN3576:%.*]], label [[IF_END3577:%.*]]
+// SIMD-ONLY0:       if.then3576:
+// SIMD-ONLY0-NEXT:    [[TMP2427:%.*]] = load i32, ptr [[UID]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP2427]], ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END3577]]
+// SIMD-ONLY0:       if.end3577:
+// SIMD-ONLY0-NEXT:    [[TMP2428:%.*]] = load i32, ptr [[UIE]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP2429:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP3578:%.*]] = icmp eq i32 [[TMP2428]], [[TMP2429]]
+// SIMD-ONLY0-NEXT:    [[CONV3579:%.*]] = zext i1 [[CMP3578]] to i32
+// SIMD-ONLY0-NEXT:    store i32 [[CONV3579]], ptr [[UIR]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP2430:%.*]] = load i32, ptr [[UIR]], align 4
+// SIMD-ONLY0-NEXT:    [[TOBOOL3580:%.*]] = icmp ne i32 [[TMP2430]], 0
+// SIMD-ONLY0-NEXT:    br i1 [[TOBOOL3580]], label [[IF_THEN3581:%.*]], label [[IF_END3582:%.*]]
+// SIMD-ONLY0:       if.then3581:
+// SIMD-ONLY0-NEXT:    [[TMP2431:%.*]] = load i32, ptr [[UID]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP2431]], ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END3582]]
+// SIMD-ONLY0:       if.end3582:
+// SIMD-ONLY0-NEXT:    [[TMP2432:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP2433:%.*]] = load i32, ptr [[UIE]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP3583:%.*]] = icmp eq i32 [[TMP2432]], [[TMP2433]]
+// SIMD-ONLY0-NEXT:    [[CONV3584:%.*]] = zext i1 [[CMP3583]] to i32
+// SIMD-ONLY0-NEXT:    store i32 [[CONV3584]], ptr [[UIR]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP2434:%.*]] = load i32, ptr [[UIR]], align 4
+// SIMD-ONLY0-NEXT:    [[TOBOOL3585:%.*]] = icmp ne i32 [[TMP2434]], 0
+// SIMD-ONLY0-NEXT:    br i1 [[TOBOOL3585]], label [[IF_THEN3586:%.*]], label [[IF_ELSE3587:%.*]]
+// SIMD-ONLY0:       if.then3586:
+// SIMD-ONLY0-NEXT:    [[TMP2435:%.*]] = load i32, ptr [[UID]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP2435]], ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END3588:%.*]]
+// SIMD-ONLY0:       if.else3587:
+// SIMD-ONLY0-NEXT:    [[TMP2436:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP2436]], ptr [[UIV]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END3588]]
+// SIMD-ONLY0:       if.end3588:
+// SIMD-ONLY0-NEXT:    [[TMP2437:%.*]] = load i32, ptr [[UIE]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP2438:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP3589:%.*]] = icmp eq i32 [[TMP2437]], [[TMP2438]]
+// SIMD-ONLY0-NEXT:    [[CONV3590:%.*]] = zext i1 [[CMP3589]] to i32
+// SIMD-ONLY0-NEXT:    store i32 [[CONV3590]], ptr [[UIR]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP2439:%.*]] = load i32, ptr [[UIR]], align 4
+// SIMD-ONLY0-NEXT:    [[TOBOOL3591:%.*]] = icmp ne i32 [[TMP2439]], 0
+// SIMD-ONLY0-NEXT:    br i1 [[TOBOOL3591]], label [[IF_THEN3592:%.*]], label [[IF_ELSE3593:%.*]]
+// SIMD-ONLY0:       if.then3592:
+// SIMD-ONLY0-NEXT:    [[TMP2440:%.*]] = load i32, ptr [[UID]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP2440]], ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END3594:%.*]]
+// SIMD-ONLY0:       if.else3593:
+// SIMD-ONLY0-NEXT:    [[TMP2441:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP2441]], ptr [[UIV]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END3594]]
+// SIMD-ONLY0:       if.end3594:
+// SIMD-ONLY0-NEXT:    [[TMP2442:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP2442]], ptr [[UIV]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP2443:%.*]] = load i32, ptr [[UIE]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP2444:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP3595:%.*]] = icmp ugt i32 [[TMP2443]], [[TMP2444]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP3595]], label [[IF_THEN3597:%.*]], label [[IF_END3598:%.*]]
+// SIMD-ONLY0:       if.then3597:
+// SIMD-ONLY0-NEXT:    [[TMP2445:%.*]] = load i32, ptr [[UIE]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP2445]], ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END3598]]
+// SIMD-ONLY0:       if.end3598:
+// SIMD-ONLY0-NEXT:    [[TMP2446:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP2446]], ptr [[UIV]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP2447:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP2448:%.*]] = load i32, ptr [[UIE]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP3599:%.*]] = icmp ugt i32 [[TMP2447]], [[TMP2448]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP3599]], label [[IF_THEN3601:%.*]], label [[IF_END3602:%.*]]
+// SIMD-ONLY0:       if.then3601:
+// SIMD-ONLY0-NEXT:    [[TMP2449:%.*]] = load i32, ptr [[UIE]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP2449]], ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END3602]]
+// SIMD-ONLY0:       if.end3602:
+// SIMD-ONLY0-NEXT:    [[TMP2450:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP2450]], ptr [[UIV]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP2451:%.*]] = load i32, ptr [[UIE]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP2452:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP3603:%.*]] = icmp ult i32 [[TMP2451]], [[TMP2452]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP3603]], label [[IF_THEN3605:%.*]], label [[IF_END3606:%.*]]
+// SIMD-ONLY0:       if.then3605:
+// SIMD-ONLY0-NEXT:    [[TMP2453:%.*]] = load i32, ptr [[UIE]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP2453]], ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END3606]]
+// SIMD-ONLY0:       if.end3606:
+// SIMD-ONLY0-NEXT:    [[TMP2454:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP2454]], ptr [[UIV]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP2455:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP2456:%.*]] = load i32, ptr [[UIE]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP3607:%.*]] = icmp ult i32 [[TMP2455]], [[TMP2456]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP3607]], label [[IF_THEN3609:%.*]], label [[IF_END3610:%.*]]
+// SIMD-ONLY0:       if.then3609:
+// SIMD-ONLY0-NEXT:    [[TMP2457:%.*]] = load i32, ptr [[UIE]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP2457]], ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END3610]]
+// SIMD-ONLY0:       if.end3610:
+// SIMD-ONLY0-NEXT:    [[TMP2458:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP2458]], ptr [[UIV]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP2459:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP2460:%.*]] = load i32, ptr [[UIE]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP3611:%.*]] = icmp eq i32 [[TMP2459]], [[TMP2460]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP3611]], label [[IF_THEN3613:%.*]], label [[IF_END3614:%.*]]
+// SIMD-ONLY0:       if.then3613:
+// SIMD-ONLY0-NEXT:    [[TMP2461:%.*]] = load i32, ptr [[UID]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP2461]], ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END3614]]
+// SIMD-ONLY0:       if.end3614:
+// SIMD-ONLY0-NEXT:    [[TMP2462:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP2462]], ptr [[UIV]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP2463:%.*]] = load i32, ptr [[UIE]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP2464:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP3615:%.*]] = icmp eq i32 [[TMP2463]], [[TMP2464]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP3615]], label [[IF_THEN3617:%.*]], label [[IF_END3618:%.*]]
+// SIMD-ONLY0:       if.then3617:
+// SIMD-ONLY0-NEXT:    [[TMP2465:%.*]] = load i32, ptr [[UID]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP2465]], ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END3618]]
+// SIMD-ONLY0:       if.end3618:
+// SIMD-ONLY0-NEXT:    [[TMP2466:%.*]] = load i32, ptr [[UIE]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP2467:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP3619:%.*]] = icmp ugt i32 [[TMP2466]], [[TMP2467]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP3619]], label [[IF_THEN3621:%.*]], label [[IF_END3622:%.*]]
+// SIMD-ONLY0:       if.then3621:
+// SIMD-ONLY0-NEXT:    [[TMP2468:%.*]] = load i32, ptr [[UIE]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP2468]], ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END3622]]
+// SIMD-ONLY0:       if.end3622:
+// SIMD-ONLY0-NEXT:    [[TMP2469:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP2469]], ptr [[UIV]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP2470:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP2471:%.*]] = load i32, ptr [[UIE]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP3623:%.*]] = icmp ugt i32 [[TMP2470]], [[TMP2471]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP3623]], label [[IF_THEN3625:%.*]], label [[IF_END3626:%.*]]
+// SIMD-ONLY0:       if.then3625:
+// SIMD-ONLY0-NEXT:    [[TMP2472:%.*]] = load i32, ptr [[UIE]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP2472]], ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END3626]]
+// SIMD-ONLY0:       if.end3626:
+// SIMD-ONLY0-NEXT:    [[TMP2473:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP2473]], ptr [[UIV]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP2474:%.*]] = load i32, ptr [[UIE]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP2475:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP3627:%.*]] = icmp ult i32 [[TMP2474]], [[TMP2475]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP3627]], label [[IF_THEN3629:%.*]], label [[IF_END3630:%.*]]
+// SIMD-ONLY0:       if.then3629:
+// SIMD-ONLY0-NEXT:    [[TMP2476:%.*]] = load i32, ptr [[UIE]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP2476]], ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END3630]]
+// SIMD-ONLY0:       if.end3630:
+// SIMD-ONLY0-NEXT:    [[TMP2477:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP2477]], ptr [[UIV]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP2478:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP2479:%.*]] = load i32, ptr [[UIE]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP3631:%.*]] = icmp ult i32 [[TMP2478]], [[TMP2479]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP3631]], label [[IF_THEN3633:%.*]], label [[IF_END3634:%.*]]
+// SIMD-ONLY0:       if.then3633:
+// SIMD-ONLY0-NEXT:    [[TMP2480:%.*]] = load i32, ptr [[UIE]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP2480]], ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END3634]]
+// SIMD-ONLY0:       if.end3634:
+// SIMD-ONLY0-NEXT:    [[TMP2481:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP2481]], ptr [[UIV]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP2482:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP2483:%.*]] = load i32, ptr [[UIE]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP3635:%.*]] = icmp eq i32 [[TMP2482]], [[TMP2483]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP3635]], label [[IF_THEN3637:%.*]], label [[IF_END3638:%.*]]
+// SIMD-ONLY0:       if.then3637:
+// SIMD-ONLY0-NEXT:    [[TMP2484:%.*]] = load i32, ptr [[UID]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP2484]], ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END3638]]
+// SIMD-ONLY0:       if.end3638:
+// SIMD-ONLY0-NEXT:    [[TMP2485:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP2485]], ptr [[UIV]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP2486:%.*]] = load i32, ptr [[UIE]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP2487:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP3639:%.*]] = icmp eq i32 [[TMP2486]], [[TMP2487]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP3639]], label [[IF_THEN3641:%.*]], label [[IF_END3642:%.*]]
+// SIMD-ONLY0:       if.then3641:
+// SIMD-ONLY0-NEXT:    [[TMP2488:%.*]] = load i32, ptr [[UID]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP2488]], ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END3642]]
+// SIMD-ONLY0:       if.end3642:
+// SIMD-ONLY0-NEXT:    [[TMP2489:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP2489]], ptr [[UIV]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP2490:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP2491:%.*]] = load i32, ptr [[UIE]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP3643:%.*]] = icmp eq i32 [[TMP2490]], [[TMP2491]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP3643]], label [[IF_THEN3645:%.*]], label [[IF_ELSE3646:%.*]]
+// SIMD-ONLY0:       if.then3645:
+// SIMD-ONLY0-NEXT:    [[TMP2492:%.*]] = load i32, ptr [[UID]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP2492]], ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END3647:%.*]]
+// SIMD-ONLY0:       if.else3646:
+// SIMD-ONLY0-NEXT:    [[TMP2493:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP2493]], ptr [[UIV]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END3647]]
+// SIMD-ONLY0:       if.end3647:
+// SIMD-ONLY0-NEXT:    [[TMP2494:%.*]] = load i32, ptr [[UIE]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP2495:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP3648:%.*]] = icmp eq i32 [[TMP2494]], [[TMP2495]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP3648]], label [[IF_THEN3650:%.*]], label [[IF_ELSE3651:%.*]]
+// SIMD-ONLY0:       if.then3650:
+// SIMD-ONLY0-NEXT:    [[TMP2496:%.*]] = load i32, ptr [[UID]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP2496]], ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END3652:%.*]]
+// SIMD-ONLY0:       if.else3651:
+// SIMD-ONLY0-NEXT:    [[TMP2497:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP2497]], ptr [[UIV]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END3652]]
+// SIMD-ONLY0:       if.end3652:
+// SIMD-ONLY0-NEXT:    [[TMP2498:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP2499:%.*]] = load i32, ptr [[UIE]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP3653:%.*]] = icmp eq i32 [[TMP2498]], [[TMP2499]]
+// SIMD-ONLY0-NEXT:    [[CONV3654:%.*]] = zext i1 [[CMP3653]] to i32
+// SIMD-ONLY0-NEXT:    store i32 [[CONV3654]], ptr [[UIR]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP2500:%.*]] = load i32, ptr [[UIR]], align 4
+// SIMD-ONLY0-NEXT:    [[TOBOOL3655:%.*]] = icmp ne i32 [[TMP2500]], 0
+// SIMD-ONLY0-NEXT:    br i1 [[TOBOOL3655]], label [[IF_THEN3656:%.*]], label [[IF_END3657:%.*]]
+// SIMD-ONLY0:       if.then3656:
+// SIMD-ONLY0-NEXT:    [[TMP2501:%.*]] = load i32, ptr [[UID]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP2501]], ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END3657]]
+// SIMD-ONLY0:       if.end3657:
+// SIMD-ONLY0-NEXT:    [[TMP2502:%.*]] = load i32, ptr [[UIE]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP2503:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP3658:%.*]] = icmp eq i32 [[TMP2502]], [[TMP2503]]
+// SIMD-ONLY0-NEXT:    [[CONV3659:%.*]] = zext i1 [[CMP3658]] to i32
+// SIMD-ONLY0-NEXT:    store i32 [[CONV3659]], ptr [[UIR]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP2504:%.*]] = load i32, ptr [[UIR]], align 4
+// SIMD-ONLY0-NEXT:    [[TOBOOL3660:%.*]] = icmp ne i32 [[TMP2504]], 0
+// SIMD-ONLY0-NEXT:    br i1 [[TOBOOL3660]], label [[IF_THEN3661:%.*]], label [[IF_END3662:%.*]]
+// SIMD-ONLY0:       if.then3661:
+// SIMD-ONLY0-NEXT:    [[TMP2505:%.*]] = load i32, ptr [[UID]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP2505]], ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END3662]]
+// SIMD-ONLY0:       if.end3662:
+// SIMD-ONLY0-NEXT:    [[TMP2506:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP2507:%.*]] = load i32, ptr [[UIE]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP3663:%.*]] = icmp eq i32 [[TMP2506]], [[TMP2507]]
+// SIMD-ONLY0-NEXT:    [[CONV3664:%.*]] = zext i1 [[CMP3663]] to i32
+// SIMD-ONLY0-NEXT:    store i32 [[CONV3664]], ptr [[UIR]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP2508:%.*]] = load i32, ptr [[UIR]], align 4
+// SIMD-ONLY0-NEXT:    [[TOBOOL3665:%.*]] = icmp ne i32 [[TMP2508]], 0
+// SIMD-ONLY0-NEXT:    br i1 [[TOBOOL3665]], label [[IF_THEN3666:%.*]], label [[IF_ELSE3667:%.*]]
+// SIMD-ONLY0:       if.then3666:
+// SIMD-ONLY0-NEXT:    [[TMP2509:%.*]] = load i32, ptr [[UID]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP2509]], ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END3668:%.*]]
+// SIMD-ONLY0:       if.else3667:
+// SIMD-ONLY0-NEXT:    [[TMP2510:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP2510]], ptr [[UIV]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END3668]]
+// SIMD-ONLY0:       if.end3668:
+// SIMD-ONLY0-NEXT:    [[TMP2511:%.*]] = load i32, ptr [[UIE]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP2512:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP3669:%.*]] = icmp eq i32 [[TMP2511]], [[TMP2512]]
+// SIMD-ONLY0-NEXT:    [[CONV3670:%.*]] = zext i1 [[CMP3669]] to i32
+// SIMD-ONLY0-NEXT:    store i32 [[CONV3670]], ptr [[UIR]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP2513:%.*]] = load i32, ptr [[UIR]], align 4
+// SIMD-ONLY0-NEXT:    [[TOBOOL3671:%.*]] = icmp ne i32 [[TMP2513]], 0
+// SIMD-ONLY0-NEXT:    br i1 [[TOBOOL3671]], label [[IF_THEN3672:%.*]], label [[IF_ELSE3673:%.*]]
+// SIMD-ONLY0:       if.then3672:
+// SIMD-ONLY0-NEXT:    [[TMP2514:%.*]] = load i32, ptr [[UID]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP2514]], ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END3674:%.*]]
+// SIMD-ONLY0:       if.else3673:
+// SIMD-ONLY0-NEXT:    [[TMP2515:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP2515]], ptr [[UIV]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END3674]]
+// SIMD-ONLY0:       if.end3674:
+// SIMD-ONLY0-NEXT:    [[TMP2516:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP2516]], ptr [[UIV]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP2517:%.*]] = load i32, ptr [[UIE]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP2518:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP3675:%.*]] = icmp ugt i32 [[TMP2517]], [[TMP2518]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP3675]], label [[IF_THEN3677:%.*]], label [[IF_END3678:%.*]]
+// SIMD-ONLY0:       if.then3677:
+// SIMD-ONLY0-NEXT:    [[TMP2519:%.*]] = load i32, ptr [[UIE]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP2519]], ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END3678]]
+// SIMD-ONLY0:       if.end3678:
+// SIMD-ONLY0-NEXT:    [[TMP2520:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP2520]], ptr [[UIV]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP2521:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP2522:%.*]] = load i32, ptr [[UIE]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP3679:%.*]] = icmp ugt i32 [[TMP2521]], [[TMP2522]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP3679]], label [[IF_THEN3681:%.*]], label [[IF_END3682:%.*]]
+// SIMD-ONLY0:       if.then3681:
+// SIMD-ONLY0-NEXT:    [[TMP2523:%.*]] = load i32, ptr [[UIE]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP2523]], ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END3682]]
+// SIMD-ONLY0:       if.end3682:
+// SIMD-ONLY0-NEXT:    [[TMP2524:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP2524]], ptr [[UIV]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP2525:%.*]] = load i32, ptr [[UIE]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP2526:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP3683:%.*]] = icmp ult i32 [[TMP2525]], [[TMP2526]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP3683]], label [[IF_THEN3685:%.*]], label [[IF_END3686:%.*]]
+// SIMD-ONLY0:       if.then3685:
+// SIMD-ONLY0-NEXT:    [[TMP2527:%.*]] = load i32, ptr [[UIE]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP2527]], ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END3686]]
+// SIMD-ONLY0:       if.end3686:
+// SIMD-ONLY0-NEXT:    [[TMP2528:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP2528]], ptr [[UIV]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP2529:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP2530:%.*]] = load i32, ptr [[UIE]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP3687:%.*]] = icmp ult i32 [[TMP2529]], [[TMP2530]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP3687]], label [[IF_THEN3689:%.*]], label [[IF_END3690:%.*]]
+// SIMD-ONLY0:       if.then3689:
+// SIMD-ONLY0-NEXT:    [[TMP2531:%.*]] = load i32, ptr [[UIE]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP2531]], ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END3690]]
+// SIMD-ONLY0:       if.end3690:
+// SIMD-ONLY0-NEXT:    [[TMP2532:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP2532]], ptr [[UIV]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP2533:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP2534:%.*]] = load i32, ptr [[UIE]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP3691:%.*]] = icmp eq i32 [[TMP2533]], [[TMP2534]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP3691]], label [[IF_THEN3693:%.*]], label [[IF_END3694:%.*]]
+// SIMD-ONLY0:       if.then3693:
+// SIMD-ONLY0-NEXT:    [[TMP2535:%.*]] = load i32, ptr [[UID]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP2535]], ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END3694]]
+// SIMD-ONLY0:       if.end3694:
+// SIMD-ONLY0-NEXT:    [[TMP2536:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP2536]], ptr [[UIV]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP2537:%.*]] = load i32, ptr [[UIE]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP2538:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP3695:%.*]] = icmp eq i32 [[TMP2537]], [[TMP2538]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP3695]], label [[IF_THEN3697:%.*]], label [[IF_END3698:%.*]]
+// SIMD-ONLY0:       if.then3697:
+// SIMD-ONLY0-NEXT:    [[TMP2539:%.*]] = load i32, ptr [[UID]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP2539]], ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END3698]]
+// SIMD-ONLY0:       if.end3698:
+// SIMD-ONLY0-NEXT:    [[TMP2540:%.*]] = load i32, ptr [[UIE]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP2541:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP3699:%.*]] = icmp ugt i32 [[TMP2540]], [[TMP2541]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP3699]], label [[IF_THEN3701:%.*]], label [[IF_END3702:%.*]]
+// SIMD-ONLY0:       if.then3701:
+// SIMD-ONLY0-NEXT:    [[TMP2542:%.*]] = load i32, ptr [[UIE]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP2542]], ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END3702]]
+// SIMD-ONLY0:       if.end3702:
+// SIMD-ONLY0-NEXT:    [[TMP2543:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP2543]], ptr [[UIV]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP2544:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP2545:%.*]] = load i32, ptr [[UIE]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP3703:%.*]] = icmp ugt i32 [[TMP2544]], [[TMP2545]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP3703]], label [[IF_THEN3705:%.*]], label [[IF_END3706:%.*]]
+// SIMD-ONLY0:       if.then3705:
+// SIMD-ONLY0-NEXT:    [[TMP2546:%.*]] = load i32, ptr [[UIE]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP2546]], ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END3706]]
+// SIMD-ONLY0:       if.end3706:
+// SIMD-ONLY0-NEXT:    [[TMP2547:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP2547]], ptr [[UIV]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP2548:%.*]] = load i32, ptr [[UIE]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP2549:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP3707:%.*]] = icmp ult i32 [[TMP2548]], [[TMP2549]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP3707]], label [[IF_THEN3709:%.*]], label [[IF_END3710:%.*]]
+// SIMD-ONLY0:       if.then3709:
+// SIMD-ONLY0-NEXT:    [[TMP2550:%.*]] = load i32, ptr [[UIE]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP2550]], ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END3710]]
+// SIMD-ONLY0:       if.end3710:
+// SIMD-ONLY0-NEXT:    [[TMP2551:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP2551]], ptr [[UIV]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP2552:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP2553:%.*]] = load i32, ptr [[UIE]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP3711:%.*]] = icmp ult i32 [[TMP2552]], [[TMP2553]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP3711]], label [[IF_THEN3713:%.*]], label [[IF_END3714:%.*]]
+// SIMD-ONLY0:       if.then3713:
+// SIMD-ONLY0-NEXT:    [[TMP2554:%.*]] = load i32, ptr [[UIE]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP2554]], ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END3714]]
+// SIMD-ONLY0:       if.end3714:
+// SIMD-ONLY0-NEXT:    [[TMP2555:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP2555]], ptr [[UIV]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP2556:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP2557:%.*]] = load i32, ptr [[UIE]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP3715:%.*]] = icmp eq i32 [[TMP2556]], [[TMP2557]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP3715]], label [[IF_THEN3717:%.*]], label [[IF_END3718:%.*]]
+// SIMD-ONLY0:       if.then3717:
+// SIMD-ONLY0-NEXT:    [[TMP2558:%.*]] = load i32, ptr [[UID]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP2558]], ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END3718]]
+// SIMD-ONLY0:       if.end3718:
+// SIMD-ONLY0-NEXT:    [[TMP2559:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP2559]], ptr [[UIV]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP2560:%.*]] = load i32, ptr [[UIE]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP2561:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP3719:%.*]] = icmp eq i32 [[TMP2560]], [[TMP2561]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP3719]], label [[IF_THEN3721:%.*]], label [[IF_END3722:%.*]]
+// SIMD-ONLY0:       if.then3721:
+// SIMD-ONLY0-NEXT:    [[TMP2562:%.*]] = load i32, ptr [[UID]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP2562]], ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END3722]]
+// SIMD-ONLY0:       if.end3722:
+// SIMD-ONLY0-NEXT:    [[TMP2563:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP2563]], ptr [[UIV]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP2564:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP2565:%.*]] = load i32, ptr [[UIE]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP3723:%.*]] = icmp eq i32 [[TMP2564]], [[TMP2565]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP3723]], label [[IF_THEN3725:%.*]], label [[IF_ELSE3726:%.*]]
+// SIMD-ONLY0:       if.then3725:
+// SIMD-ONLY0-NEXT:    [[TMP2566:%.*]] = load i32, ptr [[UID]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP2566]], ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END3727:%.*]]
+// SIMD-ONLY0:       if.else3726:
+// SIMD-ONLY0-NEXT:    [[TMP2567:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP2567]], ptr [[UIV]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END3727]]
+// SIMD-ONLY0:       if.end3727:
+// SIMD-ONLY0-NEXT:    [[TMP2568:%.*]] = load i32, ptr [[UIE]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP2569:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP3728:%.*]] = icmp eq i32 [[TMP2568]], [[TMP2569]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP3728]], label [[IF_THEN3730:%.*]], label [[IF_ELSE3731:%.*]]
+// SIMD-ONLY0:       if.then3730:
+// SIMD-ONLY0-NEXT:    [[TMP2570:%.*]] = load i32, ptr [[UID]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP2570]], ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END3732:%.*]]
+// SIMD-ONLY0:       if.else3731:
+// SIMD-ONLY0-NEXT:    [[TMP2571:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP2571]], ptr [[UIV]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END3732]]
+// SIMD-ONLY0:       if.end3732:
+// SIMD-ONLY0-NEXT:    [[TMP2572:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP2573:%.*]] = load i32, ptr [[UIE]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP3733:%.*]] = icmp eq i32 [[TMP2572]], [[TMP2573]]
+// SIMD-ONLY0-NEXT:    [[CONV3734:%.*]] = zext i1 [[CMP3733]] to i32
+// SIMD-ONLY0-NEXT:    store i32 [[CONV3734]], ptr [[UIR]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP2574:%.*]] = load i32, ptr [[UIR]], align 4
+// SIMD-ONLY0-NEXT:    [[TOBOOL3735:%.*]] = icmp ne i32 [[TMP2574]], 0
+// SIMD-ONLY0-NEXT:    br i1 [[TOBOOL3735]], label [[IF_THEN3736:%.*]], label [[IF_END3737:%.*]]
+// SIMD-ONLY0:       if.then3736:
+// SIMD-ONLY0-NEXT:    [[TMP2575:%.*]] = load i32, ptr [[UID]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP2575]], ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END3737]]
+// SIMD-ONLY0:       if.end3737:
+// SIMD-ONLY0-NEXT:    [[TMP2576:%.*]] = load i32, ptr [[UIE]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP2577:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP3738:%.*]] = icmp eq i32 [[TMP2576]], [[TMP2577]]
+// SIMD-ONLY0-NEXT:    [[CONV3739:%.*]] = zext i1 [[CMP3738]] to i32
+// SIMD-ONLY0-NEXT:    store i32 [[CONV3739]], ptr [[UIR]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP2578:%.*]] = load i32, ptr [[UIR]], align 4
+// SIMD-ONLY0-NEXT:    [[TOBOOL3740:%.*]] = icmp ne i32 [[TMP2578]], 0
+// SIMD-ONLY0-NEXT:    br i1 [[TOBOOL3740]], label [[IF_THEN3741:%.*]], label [[IF_END3742:%.*]]
+// SIMD-ONLY0:       if.then3741:
+// SIMD-ONLY0-NEXT:    [[TMP2579:%.*]] = load i32, ptr [[UID]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP2579]], ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END3742]]
+// SIMD-ONLY0:       if.end3742:
+// SIMD-ONLY0-NEXT:    [[TMP2580:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP2581:%.*]] = load i32, ptr [[UIE]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP3743:%.*]] = icmp eq i32 [[TMP2580]], [[TMP2581]]
+// SIMD-ONLY0-NEXT:    [[CONV3744:%.*]] = zext i1 [[CMP3743]] to i32
+// SIMD-ONLY0-NEXT:    store i32 [[CONV3744]], ptr [[UIR]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP2582:%.*]] = load i32, ptr [[UIR]], align 4
+// SIMD-ONLY0-NEXT:    [[TOBOOL3745:%.*]] = icmp ne i32 [[TMP2582]], 0
+// SIMD-ONLY0-NEXT:    br i1 [[TOBOOL3745]], label [[IF_THEN3746:%.*]], label [[IF_ELSE3747:%.*]]
+// SIMD-ONLY0:       if.then3746:
+// SIMD-ONLY0-NEXT:    [[TMP2583:%.*]] = load i32, ptr [[UID]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP2583]], ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END3748:%.*]]
+// SIMD-ONLY0:       if.else3747:
+// SIMD-ONLY0-NEXT:    [[TMP2584:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP2584]], ptr [[UIV]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END3748]]
+// SIMD-ONLY0:       if.end3748:
+// SIMD-ONLY0-NEXT:    [[TMP2585:%.*]] = load i32, ptr [[UIE]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP2586:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP3749:%.*]] = icmp eq i32 [[TMP2585]], [[TMP2586]]
+// SIMD-ONLY0-NEXT:    [[CONV3750:%.*]] = zext i1 [[CMP3749]] to i32
+// SIMD-ONLY0-NEXT:    store i32 [[CONV3750]], ptr [[UIR]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP2587:%.*]] = load i32, ptr [[UIR]], align 4
+// SIMD-ONLY0-NEXT:    [[TOBOOL3751:%.*]] = icmp ne i32 [[TMP2587]], 0
+// SIMD-ONLY0-NEXT:    br i1 [[TOBOOL3751]], label [[IF_THEN3752:%.*]], label [[IF_ELSE3753:%.*]]
+// SIMD-ONLY0:       if.then3752:
+// SIMD-ONLY0-NEXT:    [[TMP2588:%.*]] = load i32, ptr [[UID]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP2588]], ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END3754:%.*]]
+// SIMD-ONLY0:       if.else3753:
+// SIMD-ONLY0-NEXT:    [[TMP2589:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP2589]], ptr [[UIV]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END3754]]
+// SIMD-ONLY0:       if.end3754:
+// SIMD-ONLY0-NEXT:    [[TMP2590:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP2590]], ptr [[UIV]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP2591:%.*]] = load i32, ptr [[UIE]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP2592:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP3755:%.*]] = icmp ugt i32 [[TMP2591]], [[TMP2592]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP3755]], label [[IF_THEN3757:%.*]], label [[IF_END3758:%.*]]
+// SIMD-ONLY0:       if.then3757:
+// SIMD-ONLY0-NEXT:    [[TMP2593:%.*]] = load i32, ptr [[UIE]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP2593]], ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END3758]]
+// SIMD-ONLY0:       if.end3758:
+// SIMD-ONLY0-NEXT:    [[TMP2594:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP2594]], ptr [[UIV]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP2595:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP2596:%.*]] = load i32, ptr [[UIE]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP3759:%.*]] = icmp ugt i32 [[TMP2595]], [[TMP2596]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP3759]], label [[IF_THEN3761:%.*]], label [[IF_END3762:%.*]]
+// SIMD-ONLY0:       if.then3761:
+// SIMD-ONLY0-NEXT:    [[TMP2597:%.*]] = load i32, ptr [[UIE]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP2597]], ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END3762]]
+// SIMD-ONLY0:       if.end3762:
+// SIMD-ONLY0-NEXT:    [[TMP2598:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP2598]], ptr [[UIV]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP2599:%.*]] = load i32, ptr [[UIE]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP2600:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP3763:%.*]] = icmp ult i32 [[TMP2599]], [[TMP2600]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP3763]], label [[IF_THEN3765:%.*]], label [[IF_END3766:%.*]]
+// SIMD-ONLY0:       if.then3765:
+// SIMD-ONLY0-NEXT:    [[TMP2601:%.*]] = load i32, ptr [[UIE]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP2601]], ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END3766]]
+// SIMD-ONLY0:       if.end3766:
+// SIMD-ONLY0-NEXT:    [[TMP2602:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP2602]], ptr [[UIV]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP2603:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP2604:%.*]] = load i32, ptr [[UIE]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP3767:%.*]] = icmp ult i32 [[TMP2603]], [[TMP2604]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP3767]], label [[IF_THEN3769:%.*]], label [[IF_END3770:%.*]]
+// SIMD-ONLY0:       if.then3769:
+// SIMD-ONLY0-NEXT:    [[TMP2605:%.*]] = load i32, ptr [[UIE]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP2605]], ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END3770]]
+// SIMD-ONLY0:       if.end3770:
+// SIMD-ONLY0-NEXT:    [[TMP2606:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP2606]], ptr [[UIV]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP2607:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP2608:%.*]] = load i32, ptr [[UIE]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP3771:%.*]] = icmp eq i32 [[TMP2607]], [[TMP2608]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP3771]], label [[IF_THEN3773:%.*]], label [[IF_END3774:%.*]]
+// SIMD-ONLY0:       if.then3773:
+// SIMD-ONLY0-NEXT:    [[TMP2609:%.*]] = load i32, ptr [[UID]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP2609]], ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END3774]]
+// SIMD-ONLY0:       if.end3774:
+// SIMD-ONLY0-NEXT:    [[TMP2610:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP2610]], ptr [[UIV]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP2611:%.*]] = load i32, ptr [[UIE]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP2612:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP3775:%.*]] = icmp eq i32 [[TMP2611]], [[TMP2612]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP3775]], label [[IF_THEN3777:%.*]], label [[IF_END3778:%.*]]
+// SIMD-ONLY0:       if.then3777:
+// SIMD-ONLY0-NEXT:    [[TMP2613:%.*]] = load i32, ptr [[UID]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP2613]], ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END3778]]
+// SIMD-ONLY0:       if.end3778:
+// SIMD-ONLY0-NEXT:    [[TMP2614:%.*]] = load i32, ptr [[UIE]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP2615:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP3779:%.*]] = icmp ugt i32 [[TMP2614]], [[TMP2615]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP3779]], label [[IF_THEN3781:%.*]], label [[IF_END3782:%.*]]
+// SIMD-ONLY0:       if.then3781:
+// SIMD-ONLY0-NEXT:    [[TMP2616:%.*]] = load i32, ptr [[UIE]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP2616]], ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END3782]]
+// SIMD-ONLY0:       if.end3782:
+// SIMD-ONLY0-NEXT:    [[TMP2617:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP2617]], ptr [[UIV]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP2618:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP2619:%.*]] = load i32, ptr [[UIE]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP3783:%.*]] = icmp ugt i32 [[TMP2618]], [[TMP2619]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP3783]], label [[IF_THEN3785:%.*]], label [[IF_END3786:%.*]]
+// SIMD-ONLY0:       if.then3785:
+// SIMD-ONLY0-NEXT:    [[TMP2620:%.*]] = load i32, ptr [[UIE]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP2620]], ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END3786]]
+// SIMD-ONLY0:       if.end3786:
+// SIMD-ONLY0-NEXT:    [[TMP2621:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP2621]], ptr [[UIV]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP2622:%.*]] = load i32, ptr [[UIE]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP2623:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP3787:%.*]] = icmp ult i32 [[TMP2622]], [[TMP2623]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP3787]], label [[IF_THEN3789:%.*]], label [[IF_END3790:%.*]]
+// SIMD-ONLY0:       if.then3789:
+// SIMD-ONLY0-NEXT:    [[TMP2624:%.*]] = load i32, ptr [[UIE]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP2624]], ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END3790]]
+// SIMD-ONLY0:       if.end3790:
+// SIMD-ONLY0-NEXT:    [[TMP2625:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP2625]], ptr [[UIV]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP2626:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP2627:%.*]] = load i32, ptr [[UIE]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP3791:%.*]] = icmp ult i32 [[TMP2626]], [[TMP2627]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP3791]], label [[IF_THEN3793:%.*]], label [[IF_END3794:%.*]]
+// SIMD-ONLY0:       if.then3793:
+// SIMD-ONLY0-NEXT:    [[TMP2628:%.*]] = load i32, ptr [[UIE]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP2628]], ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END3794]]
+// SIMD-ONLY0:       if.end3794:
+// SIMD-ONLY0-NEXT:    [[TMP2629:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP2629]], ptr [[UIV]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP2630:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP2631:%.*]] = load i32, ptr [[UIE]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP3795:%.*]] = icmp eq i32 [[TMP2630]], [[TMP2631]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP3795]], label [[IF_THEN3797:%.*]], label [[IF_END3798:%.*]]
+// SIMD-ONLY0:       if.then3797:
+// SIMD-ONLY0-NEXT:    [[TMP2632:%.*]] = load i32, ptr [[UID]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP2632]], ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END3798]]
+// SIMD-ONLY0:       if.end3798:
+// SIMD-ONLY0-NEXT:    [[TMP2633:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP2633]], ptr [[UIV]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP2634:%.*]] = load i32, ptr [[UIE]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP2635:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP3799:%.*]] = icmp eq i32 [[TMP2634]], [[TMP2635]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP3799]], label [[IF_THEN3801:%.*]], label [[IF_END3802:%.*]]
+// SIMD-ONLY0:       if.then3801:
+// SIMD-ONLY0-NEXT:    [[TMP2636:%.*]] = load i32, ptr [[UID]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP2636]], ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END3802]]
+// SIMD-ONLY0:       if.end3802:
+// SIMD-ONLY0-NEXT:    [[TMP2637:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP2637]], ptr [[UIV]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP2638:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP2639:%.*]] = load i32, ptr [[UIE]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP3803:%.*]] = icmp eq i32 [[TMP2638]], [[TMP2639]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP3803]], label [[IF_THEN3805:%.*]], label [[IF_ELSE3806:%.*]]
+// SIMD-ONLY0:       if.then3805:
+// SIMD-ONLY0-NEXT:    [[TMP2640:%.*]] = load i32, ptr [[UID]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP2640]], ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END3807:%.*]]
+// SIMD-ONLY0:       if.else3806:
+// SIMD-ONLY0-NEXT:    [[TMP2641:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP2641]], ptr [[UIV]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END3807]]
+// SIMD-ONLY0:       if.end3807:
+// SIMD-ONLY0-NEXT:    [[TMP2642:%.*]] = load i32, ptr [[UIE]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP2643:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP3808:%.*]] = icmp eq i32 [[TMP2642]], [[TMP2643]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP3808]], label [[IF_THEN3810:%.*]], label [[IF_ELSE3811:%.*]]
+// SIMD-ONLY0:       if.then3810:
+// SIMD-ONLY0-NEXT:    [[TMP2644:%.*]] = load i32, ptr [[UID]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP2644]], ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END3812:%.*]]
+// SIMD-ONLY0:       if.else3811:
+// SIMD-ONLY0-NEXT:    [[TMP2645:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP2645]], ptr [[UIV]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END3812]]
+// SIMD-ONLY0:       if.end3812:
+// SIMD-ONLY0-NEXT:    [[TMP2646:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP2647:%.*]] = load i32, ptr [[UIE]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP3813:%.*]] = icmp eq i32 [[TMP2646]], [[TMP2647]]
+// SIMD-ONLY0-NEXT:    [[CONV3814:%.*]] = zext i1 [[CMP3813]] to i32
+// SIMD-ONLY0-NEXT:    store i32 [[CONV3814]], ptr [[UIR]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP2648:%.*]] = load i32, ptr [[UIR]], align 4
+// SIMD-ONLY0-NEXT:    [[TOBOOL3815:%.*]] = icmp ne i32 [[TMP2648]], 0
+// SIMD-ONLY0-NEXT:    br i1 [[TOBOOL3815]], label [[IF_THEN3816:%.*]], label [[IF_END3817:%.*]]
+// SIMD-ONLY0:       if.then3816:
+// SIMD-ONLY0-NEXT:    [[TMP2649:%.*]] = load i32, ptr [[UID]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP2649]], ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END3817]]
+// SIMD-ONLY0:       if.end3817:
+// SIMD-ONLY0-NEXT:    [[TMP2650:%.*]] = load i32, ptr [[UIE]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP2651:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP3818:%.*]] = icmp eq i32 [[TMP2650]], [[TMP2651]]
+// SIMD-ONLY0-NEXT:    [[CONV3819:%.*]] = zext i1 [[CMP3818]] to i32
+// SIMD-ONLY0-NEXT:    store i32 [[CONV3819]], ptr [[UIR]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP2652:%.*]] = load i32, ptr [[UIR]], align 4
+// SIMD-ONLY0-NEXT:    [[TOBOOL3820:%.*]] = icmp ne i32 [[TMP2652]], 0
+// SIMD-ONLY0-NEXT:    br i1 [[TOBOOL3820]], label [[IF_THEN3821:%.*]], label [[IF_END3822:%.*]]
+// SIMD-ONLY0:       if.then3821:
+// SIMD-ONLY0-NEXT:    [[TMP2653:%.*]] = load i32, ptr [[UID]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP2653]], ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END3822]]
+// SIMD-ONLY0:       if.end3822:
+// SIMD-ONLY0-NEXT:    [[TMP2654:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP2655:%.*]] = load i32, ptr [[UIE]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP3823:%.*]] = icmp eq i32 [[TMP2654]], [[TMP2655]]
+// SIMD-ONLY0-NEXT:    [[CONV3824:%.*]] = zext i1 [[CMP3823]] to i32
+// SIMD-ONLY0-NEXT:    store i32 [[CONV3824]], ptr [[UIR]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP2656:%.*]] = load i32, ptr [[UIR]], align 4
+// SIMD-ONLY0-NEXT:    [[TOBOOL3825:%.*]] = icmp ne i32 [[TMP2656]], 0
+// SIMD-ONLY0-NEXT:    br i1 [[TOBOOL3825]], label [[IF_THEN3826:%.*]], label [[IF_ELSE3827:%.*]]
+// SIMD-ONLY0:       if.then3826:
+// SIMD-ONLY0-NEXT:    [[TMP2657:%.*]] = load i32, ptr [[UID]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP2657]], ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END3828:%.*]]
+// SIMD-ONLY0:       if.else3827:
+// SIMD-ONLY0-NEXT:    [[TMP2658:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP2658]], ptr [[UIV]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END3828]]
+// SIMD-ONLY0:       if.end3828:
+// SIMD-ONLY0-NEXT:    [[TMP2659:%.*]] = load i32, ptr [[UIE]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP2660:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP3829:%.*]] = icmp eq i32 [[TMP2659]], [[TMP2660]]
+// SIMD-ONLY0-NEXT:    [[CONV3830:%.*]] = zext i1 [[CMP3829]] to i32
+// SIMD-ONLY0-NEXT:    store i32 [[CONV3830]], ptr [[UIR]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP2661:%.*]] = load i32, ptr [[UIR]], align 4
+// SIMD-ONLY0-NEXT:    [[TOBOOL3831:%.*]] = icmp ne i32 [[TMP2661]], 0
+// SIMD-ONLY0-NEXT:    br i1 [[TOBOOL3831]], label [[IF_THEN3832:%.*]], label [[IF_ELSE3833:%.*]]
+// SIMD-ONLY0:       if.then3832:
+// SIMD-ONLY0-NEXT:    [[TMP2662:%.*]] = load i32, ptr [[UID]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP2662]], ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END3834:%.*]]
+// SIMD-ONLY0:       if.else3833:
+// SIMD-ONLY0-NEXT:    [[TMP2663:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP2663]], ptr [[UIV]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END3834]]
+// SIMD-ONLY0:       if.end3834:
+// SIMD-ONLY0-NEXT:    [[TMP2664:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP2664]], ptr [[LV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2665:%.*]] = load i64, ptr [[LE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2666:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP3835:%.*]] = icmp sgt i64 [[TMP2665]], [[TMP2666]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP3835]], label [[IF_THEN3837:%.*]], label [[IF_END3838:%.*]]
+// SIMD-ONLY0:       if.then3837:
+// SIMD-ONLY0-NEXT:    [[TMP2667:%.*]] = load i64, ptr [[LE]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP2667]], ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END3838]]
+// SIMD-ONLY0:       if.end3838:
+// SIMD-ONLY0-NEXT:    [[TMP2668:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP2668]], ptr [[LV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2669:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2670:%.*]] = load i64, ptr [[LE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP3839:%.*]] = icmp sgt i64 [[TMP2669]], [[TMP2670]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP3839]], label [[IF_THEN3841:%.*]], label [[IF_END3842:%.*]]
+// SIMD-ONLY0:       if.then3841:
+// SIMD-ONLY0-NEXT:    [[TMP2671:%.*]] = load i64, ptr [[LE]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP2671]], ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END3842]]
+// SIMD-ONLY0:       if.end3842:
+// SIMD-ONLY0-NEXT:    [[TMP2672:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP2672]], ptr [[LV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2673:%.*]] = load i64, ptr [[LE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2674:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP3843:%.*]] = icmp slt i64 [[TMP2673]], [[TMP2674]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP3843]], label [[IF_THEN3845:%.*]], label [[IF_END3846:%.*]]
+// SIMD-ONLY0:       if.then3845:
+// SIMD-ONLY0-NEXT:    [[TMP2675:%.*]] = load i64, ptr [[LE]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP2675]], ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END3846]]
+// SIMD-ONLY0:       if.end3846:
+// SIMD-ONLY0-NEXT:    [[TMP2676:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP2676]], ptr [[LV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2677:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2678:%.*]] = load i64, ptr [[LE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP3847:%.*]] = icmp slt i64 [[TMP2677]], [[TMP2678]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP3847]], label [[IF_THEN3849:%.*]], label [[IF_END3850:%.*]]
+// SIMD-ONLY0:       if.then3849:
+// SIMD-ONLY0-NEXT:    [[TMP2679:%.*]] = load i64, ptr [[LE]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP2679]], ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END3850]]
+// SIMD-ONLY0:       if.end3850:
+// SIMD-ONLY0-NEXT:    [[TMP2680:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP2680]], ptr [[LV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2681:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2682:%.*]] = load i64, ptr [[LE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP3851:%.*]] = icmp eq i64 [[TMP2681]], [[TMP2682]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP3851]], label [[IF_THEN3853:%.*]], label [[IF_END3854:%.*]]
+// SIMD-ONLY0:       if.then3853:
+// SIMD-ONLY0-NEXT:    [[TMP2683:%.*]] = load i64, ptr [[LD]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP2683]], ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END3854]]
+// SIMD-ONLY0:       if.end3854:
+// SIMD-ONLY0-NEXT:    [[TMP2684:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP2684]], ptr [[LV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2685:%.*]] = load i64, ptr [[LE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2686:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP3855:%.*]] = icmp eq i64 [[TMP2685]], [[TMP2686]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP3855]], label [[IF_THEN3857:%.*]], label [[IF_END3858:%.*]]
+// SIMD-ONLY0:       if.then3857:
+// SIMD-ONLY0-NEXT:    [[TMP2687:%.*]] = load i64, ptr [[LD]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP2687]], ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END3858]]
+// SIMD-ONLY0:       if.end3858:
+// SIMD-ONLY0-NEXT:    [[TMP2688:%.*]] = load i64, ptr [[LE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2689:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP3859:%.*]] = icmp sgt i64 [[TMP2688]], [[TMP2689]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP3859]], label [[IF_THEN3861:%.*]], label [[IF_END3862:%.*]]
+// SIMD-ONLY0:       if.then3861:
+// SIMD-ONLY0-NEXT:    [[TMP2690:%.*]] = load i64, ptr [[LE]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP2690]], ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END3862]]
+// SIMD-ONLY0:       if.end3862:
+// SIMD-ONLY0-NEXT:    [[TMP2691:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP2691]], ptr [[LV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2692:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2693:%.*]] = load i64, ptr [[LE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP3863:%.*]] = icmp sgt i64 [[TMP2692]], [[TMP2693]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP3863]], label [[IF_THEN3865:%.*]], label [[IF_END3866:%.*]]
+// SIMD-ONLY0:       if.then3865:
+// SIMD-ONLY0-NEXT:    [[TMP2694:%.*]] = load i64, ptr [[LE]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP2694]], ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END3866]]
+// SIMD-ONLY0:       if.end3866:
+// SIMD-ONLY0-NEXT:    [[TMP2695:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP2695]], ptr [[LV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2696:%.*]] = load i64, ptr [[LE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2697:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP3867:%.*]] = icmp slt i64 [[TMP2696]], [[TMP2697]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP3867]], label [[IF_THEN3869:%.*]], label [[IF_END3870:%.*]]
+// SIMD-ONLY0:       if.then3869:
+// SIMD-ONLY0-NEXT:    [[TMP2698:%.*]] = load i64, ptr [[LE]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP2698]], ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END3870]]
+// SIMD-ONLY0:       if.end3870:
+// SIMD-ONLY0-NEXT:    [[TMP2699:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP2699]], ptr [[LV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2700:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2701:%.*]] = load i64, ptr [[LE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP3871:%.*]] = icmp slt i64 [[TMP2700]], [[TMP2701]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP3871]], label [[IF_THEN3873:%.*]], label [[IF_END3874:%.*]]
+// SIMD-ONLY0:       if.then3873:
+// SIMD-ONLY0-NEXT:    [[TMP2702:%.*]] = load i64, ptr [[LE]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP2702]], ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END3874]]
+// SIMD-ONLY0:       if.end3874:
+// SIMD-ONLY0-NEXT:    [[TMP2703:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP2703]], ptr [[LV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2704:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2705:%.*]] = load i64, ptr [[LE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP3875:%.*]] = icmp eq i64 [[TMP2704]], [[TMP2705]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP3875]], label [[IF_THEN3877:%.*]], label [[IF_END3878:%.*]]
+// SIMD-ONLY0:       if.then3877:
+// SIMD-ONLY0-NEXT:    [[TMP2706:%.*]] = load i64, ptr [[LD]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP2706]], ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END3878]]
+// SIMD-ONLY0:       if.end3878:
+// SIMD-ONLY0-NEXT:    [[TMP2707:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP2707]], ptr [[LV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2708:%.*]] = load i64, ptr [[LE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2709:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP3879:%.*]] = icmp eq i64 [[TMP2708]], [[TMP2709]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP3879]], label [[IF_THEN3881:%.*]], label [[IF_END3882:%.*]]
+// SIMD-ONLY0:       if.then3881:
+// SIMD-ONLY0-NEXT:    [[TMP2710:%.*]] = load i64, ptr [[LD]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP2710]], ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END3882]]
+// SIMD-ONLY0:       if.end3882:
+// SIMD-ONLY0-NEXT:    [[TMP2711:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP2711]], ptr [[LV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2712:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2713:%.*]] = load i64, ptr [[LE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP3883:%.*]] = icmp eq i64 [[TMP2712]], [[TMP2713]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP3883]], label [[IF_THEN3885:%.*]], label [[IF_ELSE3886:%.*]]
+// SIMD-ONLY0:       if.then3885:
+// SIMD-ONLY0-NEXT:    [[TMP2714:%.*]] = load i64, ptr [[LD]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP2714]], ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END3887:%.*]]
+// SIMD-ONLY0:       if.else3886:
+// SIMD-ONLY0-NEXT:    [[TMP2715:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP2715]], ptr [[LV]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END3887]]
+// SIMD-ONLY0:       if.end3887:
+// SIMD-ONLY0-NEXT:    [[TMP2716:%.*]] = load i64, ptr [[LE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2717:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP3888:%.*]] = icmp eq i64 [[TMP2716]], [[TMP2717]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP3888]], label [[IF_THEN3890:%.*]], label [[IF_ELSE3891:%.*]]
+// SIMD-ONLY0:       if.then3890:
+// SIMD-ONLY0-NEXT:    [[TMP2718:%.*]] = load i64, ptr [[LD]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP2718]], ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END3892:%.*]]
+// SIMD-ONLY0:       if.else3891:
+// SIMD-ONLY0-NEXT:    [[TMP2719:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP2719]], ptr [[LV]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END3892]]
+// SIMD-ONLY0:       if.end3892:
+// SIMD-ONLY0-NEXT:    [[TMP2720:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2721:%.*]] = load i64, ptr [[LE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP3893:%.*]] = icmp eq i64 [[TMP2720]], [[TMP2721]]
+// SIMD-ONLY0-NEXT:    [[CONV3894:%.*]] = zext i1 [[CMP3893]] to i32
+// SIMD-ONLY0-NEXT:    [[CONV3895:%.*]] = sext i32 [[CONV3894]] to i64
+// SIMD-ONLY0-NEXT:    store i64 [[CONV3895]], ptr [[LR]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2722:%.*]] = load i64, ptr [[LR]], align 8
+// SIMD-ONLY0-NEXT:    [[TOBOOL3896:%.*]] = icmp ne i64 [[TMP2722]], 0
+// SIMD-ONLY0-NEXT:    br i1 [[TOBOOL3896]], label [[IF_THEN3897:%.*]], label [[IF_END3898:%.*]]
+// SIMD-ONLY0:       if.then3897:
+// SIMD-ONLY0-NEXT:    [[TMP2723:%.*]] = load i64, ptr [[LD]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP2723]], ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END3898]]
+// SIMD-ONLY0:       if.end3898:
+// SIMD-ONLY0-NEXT:    [[TMP2724:%.*]] = load i64, ptr [[LE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2725:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP3899:%.*]] = icmp eq i64 [[TMP2724]], [[TMP2725]]
+// SIMD-ONLY0-NEXT:    [[CONV3900:%.*]] = zext i1 [[CMP3899]] to i32
+// SIMD-ONLY0-NEXT:    [[CONV3901:%.*]] = sext i32 [[CONV3900]] to i64
+// SIMD-ONLY0-NEXT:    store i64 [[CONV3901]], ptr [[LR]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2726:%.*]] = load i64, ptr [[LR]], align 8
+// SIMD-ONLY0-NEXT:    [[TOBOOL3902:%.*]] = icmp ne i64 [[TMP2726]], 0
+// SIMD-ONLY0-NEXT:    br i1 [[TOBOOL3902]], label [[IF_THEN3903:%.*]], label [[IF_END3904:%.*]]
+// SIMD-ONLY0:       if.then3903:
+// SIMD-ONLY0-NEXT:    [[TMP2727:%.*]] = load i64, ptr [[LD]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP2727]], ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END3904]]
+// SIMD-ONLY0:       if.end3904:
+// SIMD-ONLY0-NEXT:    [[TMP2728:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2729:%.*]] = load i64, ptr [[LE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP3905:%.*]] = icmp eq i64 [[TMP2728]], [[TMP2729]]
+// SIMD-ONLY0-NEXT:    [[CONV3906:%.*]] = zext i1 [[CMP3905]] to i32
+// SIMD-ONLY0-NEXT:    [[CONV3907:%.*]] = sext i32 [[CONV3906]] to i64
+// SIMD-ONLY0-NEXT:    store i64 [[CONV3907]], ptr [[LR]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2730:%.*]] = load i64, ptr [[LR]], align 8
+// SIMD-ONLY0-NEXT:    [[TOBOOL3908:%.*]] = icmp ne i64 [[TMP2730]], 0
+// SIMD-ONLY0-NEXT:    br i1 [[TOBOOL3908]], label [[IF_THEN3909:%.*]], label [[IF_ELSE3910:%.*]]
+// SIMD-ONLY0:       if.then3909:
+// SIMD-ONLY0-NEXT:    [[TMP2731:%.*]] = load i64, ptr [[LD]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP2731]], ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END3911:%.*]]
+// SIMD-ONLY0:       if.else3910:
+// SIMD-ONLY0-NEXT:    [[TMP2732:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP2732]], ptr [[LV]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END3911]]
+// SIMD-ONLY0:       if.end3911:
+// SIMD-ONLY0-NEXT:    [[TMP2733:%.*]] = load i64, ptr [[LE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2734:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP3912:%.*]] = icmp eq i64 [[TMP2733]], [[TMP2734]]
+// SIMD-ONLY0-NEXT:    [[CONV3913:%.*]] = zext i1 [[CMP3912]] to i32
+// SIMD-ONLY0-NEXT:    [[CONV3914:%.*]] = sext i32 [[CONV3913]] to i64
+// SIMD-ONLY0-NEXT:    store i64 [[CONV3914]], ptr [[LR]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2735:%.*]] = load i64, ptr [[LR]], align 8
+// SIMD-ONLY0-NEXT:    [[TOBOOL3915:%.*]] = icmp ne i64 [[TMP2735]], 0
+// SIMD-ONLY0-NEXT:    br i1 [[TOBOOL3915]], label [[IF_THEN3916:%.*]], label [[IF_ELSE3917:%.*]]
+// SIMD-ONLY0:       if.then3916:
+// SIMD-ONLY0-NEXT:    [[TMP2736:%.*]] = load i64, ptr [[LD]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP2736]], ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END3918:%.*]]
+// SIMD-ONLY0:       if.else3917:
+// SIMD-ONLY0-NEXT:    [[TMP2737:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP2737]], ptr [[LV]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END3918]]
+// SIMD-ONLY0:       if.end3918:
+// SIMD-ONLY0-NEXT:    [[TMP2738:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP2738]], ptr [[LV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2739:%.*]] = load i64, ptr [[LE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2740:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP3919:%.*]] = icmp sgt i64 [[TMP2739]], [[TMP2740]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP3919]], label [[IF_THEN3921:%.*]], label [[IF_END3922:%.*]]
+// SIMD-ONLY0:       if.then3921:
+// SIMD-ONLY0-NEXT:    [[TMP2741:%.*]] = load i64, ptr [[LE]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP2741]], ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END3922]]
+// SIMD-ONLY0:       if.end3922:
+// SIMD-ONLY0-NEXT:    [[TMP2742:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP2742]], ptr [[LV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2743:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2744:%.*]] = load i64, ptr [[LE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP3923:%.*]] = icmp sgt i64 [[TMP2743]], [[TMP2744]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP3923]], label [[IF_THEN3925:%.*]], label [[IF_END3926:%.*]]
+// SIMD-ONLY0:       if.then3925:
+// SIMD-ONLY0-NEXT:    [[TMP2745:%.*]] = load i64, ptr [[LE]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP2745]], ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END3926]]
+// SIMD-ONLY0:       if.end3926:
+// SIMD-ONLY0-NEXT:    [[TMP2746:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP2746]], ptr [[LV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2747:%.*]] = load i64, ptr [[LE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2748:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP3927:%.*]] = icmp slt i64 [[TMP2747]], [[TMP2748]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP3927]], label [[IF_THEN3929:%.*]], label [[IF_END3930:%.*]]
+// SIMD-ONLY0:       if.then3929:
+// SIMD-ONLY0-NEXT:    [[TMP2749:%.*]] = load i64, ptr [[LE]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP2749]], ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END3930]]
+// SIMD-ONLY0:       if.end3930:
+// SIMD-ONLY0-NEXT:    [[TMP2750:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP2750]], ptr [[LV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2751:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2752:%.*]] = load i64, ptr [[LE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP3931:%.*]] = icmp slt i64 [[TMP2751]], [[TMP2752]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP3931]], label [[IF_THEN3933:%.*]], label [[IF_END3934:%.*]]
+// SIMD-ONLY0:       if.then3933:
+// SIMD-ONLY0-NEXT:    [[TMP2753:%.*]] = load i64, ptr [[LE]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP2753]], ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END3934]]
+// SIMD-ONLY0:       if.end3934:
+// SIMD-ONLY0-NEXT:    [[TMP2754:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP2754]], ptr [[LV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2755:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2756:%.*]] = load i64, ptr [[LE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP3935:%.*]] = icmp eq i64 [[TMP2755]], [[TMP2756]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP3935]], label [[IF_THEN3937:%.*]], label [[IF_END3938:%.*]]
+// SIMD-ONLY0:       if.then3937:
+// SIMD-ONLY0-NEXT:    [[TMP2757:%.*]] = load i64, ptr [[LD]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP2757]], ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END3938]]
+// SIMD-ONLY0:       if.end3938:
+// SIMD-ONLY0-NEXT:    [[TMP2758:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP2758]], ptr [[LV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2759:%.*]] = load i64, ptr [[LE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2760:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP3939:%.*]] = icmp eq i64 [[TMP2759]], [[TMP2760]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP3939]], label [[IF_THEN3941:%.*]], label [[IF_END3942:%.*]]
+// SIMD-ONLY0:       if.then3941:
+// SIMD-ONLY0-NEXT:    [[TMP2761:%.*]] = load i64, ptr [[LD]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP2761]], ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END3942]]
+// SIMD-ONLY0:       if.end3942:
+// SIMD-ONLY0-NEXT:    [[TMP2762:%.*]] = load i64, ptr [[LE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2763:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP3943:%.*]] = icmp sgt i64 [[TMP2762]], [[TMP2763]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP3943]], label [[IF_THEN3945:%.*]], label [[IF_END3946:%.*]]
+// SIMD-ONLY0:       if.then3945:
+// SIMD-ONLY0-NEXT:    [[TMP2764:%.*]] = load i64, ptr [[LE]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP2764]], ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END3946]]
+// SIMD-ONLY0:       if.end3946:
+// SIMD-ONLY0-NEXT:    [[TMP2765:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP2765]], ptr [[LV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2766:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2767:%.*]] = load i64, ptr [[LE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP3947:%.*]] = icmp sgt i64 [[TMP2766]], [[TMP2767]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP3947]], label [[IF_THEN3949:%.*]], label [[IF_END3950:%.*]]
+// SIMD-ONLY0:       if.then3949:
+// SIMD-ONLY0-NEXT:    [[TMP2768:%.*]] = load i64, ptr [[LE]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP2768]], ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END3950]]
+// SIMD-ONLY0:       if.end3950:
+// SIMD-ONLY0-NEXT:    [[TMP2769:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP2769]], ptr [[LV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2770:%.*]] = load i64, ptr [[LE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2771:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP3951:%.*]] = icmp slt i64 [[TMP2770]], [[TMP2771]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP3951]], label [[IF_THEN3953:%.*]], label [[IF_END3954:%.*]]
+// SIMD-ONLY0:       if.then3953:
+// SIMD-ONLY0-NEXT:    [[TMP2772:%.*]] = load i64, ptr [[LE]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP2772]], ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END3954]]
+// SIMD-ONLY0:       if.end3954:
+// SIMD-ONLY0-NEXT:    [[TMP2773:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP2773]], ptr [[LV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2774:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2775:%.*]] = load i64, ptr [[LE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP3955:%.*]] = icmp slt i64 [[TMP2774]], [[TMP2775]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP3955]], label [[IF_THEN3957:%.*]], label [[IF_END3958:%.*]]
+// SIMD-ONLY0:       if.then3957:
+// SIMD-ONLY0-NEXT:    [[TMP2776:%.*]] = load i64, ptr [[LE]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP2776]], ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END3958]]
+// SIMD-ONLY0:       if.end3958:
+// SIMD-ONLY0-NEXT:    [[TMP2777:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP2777]], ptr [[LV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2778:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2779:%.*]] = load i64, ptr [[LE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP3959:%.*]] = icmp eq i64 [[TMP2778]], [[TMP2779]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP3959]], label [[IF_THEN3961:%.*]], label [[IF_END3962:%.*]]
+// SIMD-ONLY0:       if.then3961:
+// SIMD-ONLY0-NEXT:    [[TMP2780:%.*]] = load i64, ptr [[LD]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP2780]], ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END3962]]
+// SIMD-ONLY0:       if.end3962:
+// SIMD-ONLY0-NEXT:    [[TMP2781:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP2781]], ptr [[LV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2782:%.*]] = load i64, ptr [[LE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2783:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP3963:%.*]] = icmp eq i64 [[TMP2782]], [[TMP2783]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP3963]], label [[IF_THEN3965:%.*]], label [[IF_END3966:%.*]]
+// SIMD-ONLY0:       if.then3965:
+// SIMD-ONLY0-NEXT:    [[TMP2784:%.*]] = load i64, ptr [[LD]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP2784]], ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END3966]]
+// SIMD-ONLY0:       if.end3966:
+// SIMD-ONLY0-NEXT:    [[TMP2785:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP2785]], ptr [[LV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2786:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2787:%.*]] = load i64, ptr [[LE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP3967:%.*]] = icmp eq i64 [[TMP2786]], [[TMP2787]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP3967]], label [[IF_THEN3969:%.*]], label [[IF_ELSE3970:%.*]]
+// SIMD-ONLY0:       if.then3969:
+// SIMD-ONLY0-NEXT:    [[TMP2788:%.*]] = load i64, ptr [[LD]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP2788]], ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END3971:%.*]]
+// SIMD-ONLY0:       if.else3970:
+// SIMD-ONLY0-NEXT:    [[TMP2789:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP2789]], ptr [[LV]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END3971]]
+// SIMD-ONLY0:       if.end3971:
+// SIMD-ONLY0-NEXT:    [[TMP2790:%.*]] = load i64, ptr [[LE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2791:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP3972:%.*]] = icmp eq i64 [[TMP2790]], [[TMP2791]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP3972]], label [[IF_THEN3974:%.*]], label [[IF_ELSE3975:%.*]]
+// SIMD-ONLY0:       if.then3974:
+// SIMD-ONLY0-NEXT:    [[TMP2792:%.*]] = load i64, ptr [[LD]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP2792]], ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END3976:%.*]]
+// SIMD-ONLY0:       if.else3975:
+// SIMD-ONLY0-NEXT:    [[TMP2793:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP2793]], ptr [[LV]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END3976]]
+// SIMD-ONLY0:       if.end3976:
+// SIMD-ONLY0-NEXT:    [[TMP2794:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2795:%.*]] = load i64, ptr [[LE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP3977:%.*]] = icmp eq i64 [[TMP2794]], [[TMP2795]]
+// SIMD-ONLY0-NEXT:    [[CONV3978:%.*]] = zext i1 [[CMP3977]] to i32
+// SIMD-ONLY0-NEXT:    [[CONV3979:%.*]] = sext i32 [[CONV3978]] to i64
+// SIMD-ONLY0-NEXT:    store i64 [[CONV3979]], ptr [[LR]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2796:%.*]] = load i64, ptr [[LR]], align 8
+// SIMD-ONLY0-NEXT:    [[TOBOOL3980:%.*]] = icmp ne i64 [[TMP2796]], 0
+// SIMD-ONLY0-NEXT:    br i1 [[TOBOOL3980]], label [[IF_THEN3981:%.*]], label [[IF_END3982:%.*]]
+// SIMD-ONLY0:       if.then3981:
+// SIMD-ONLY0-NEXT:    [[TMP2797:%.*]] = load i64, ptr [[LD]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP2797]], ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END3982]]
+// SIMD-ONLY0:       if.end3982:
+// SIMD-ONLY0-NEXT:    [[TMP2798:%.*]] = load i64, ptr [[LE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2799:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP3983:%.*]] = icmp eq i64 [[TMP2798]], [[TMP2799]]
+// SIMD-ONLY0-NEXT:    [[CONV3984:%.*]] = zext i1 [[CMP3983]] to i32
+// SIMD-ONLY0-NEXT:    [[CONV3985:%.*]] = sext i32 [[CONV3984]] to i64
+// SIMD-ONLY0-NEXT:    store i64 [[CONV3985]], ptr [[LR]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2800:%.*]] = load i64, ptr [[LR]], align 8
+// SIMD-ONLY0-NEXT:    [[TOBOOL3986:%.*]] = icmp ne i64 [[TMP2800]], 0
+// SIMD-ONLY0-NEXT:    br i1 [[TOBOOL3986]], label [[IF_THEN3987:%.*]], label [[IF_END3988:%.*]]
+// SIMD-ONLY0:       if.then3987:
+// SIMD-ONLY0-NEXT:    [[TMP2801:%.*]] = load i64, ptr [[LD]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP2801]], ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END3988]]
+// SIMD-ONLY0:       if.end3988:
+// SIMD-ONLY0-NEXT:    [[TMP2802:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2803:%.*]] = load i64, ptr [[LE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP3989:%.*]] = icmp eq i64 [[TMP2802]], [[TMP2803]]
+// SIMD-ONLY0-NEXT:    [[CONV3990:%.*]] = zext i1 [[CMP3989]] to i32
+// SIMD-ONLY0-NEXT:    [[CONV3991:%.*]] = sext i32 [[CONV3990]] to i64
+// SIMD-ONLY0-NEXT:    store i64 [[CONV3991]], ptr [[LR]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2804:%.*]] = load i64, ptr [[LR]], align 8
+// SIMD-ONLY0-NEXT:    [[TOBOOL3992:%.*]] = icmp ne i64 [[TMP2804]], 0
+// SIMD-ONLY0-NEXT:    br i1 [[TOBOOL3992]], label [[IF_THEN3993:%.*]], label [[IF_ELSE3994:%.*]]
+// SIMD-ONLY0:       if.then3993:
+// SIMD-ONLY0-NEXT:    [[TMP2805:%.*]] = load i64, ptr [[LD]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP2805]], ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END3995:%.*]]
+// SIMD-ONLY0:       if.else3994:
+// SIMD-ONLY0-NEXT:    [[TMP2806:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP2806]], ptr [[LV]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END3995]]
+// SIMD-ONLY0:       if.end3995:
+// SIMD-ONLY0-NEXT:    [[TMP2807:%.*]] = load i64, ptr [[LE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2808:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP3996:%.*]] = icmp eq i64 [[TMP2807]], [[TMP2808]]
+// SIMD-ONLY0-NEXT:    [[CONV3997:%.*]] = zext i1 [[CMP3996]] to i32
+// SIMD-ONLY0-NEXT:    [[CONV3998:%.*]] = sext i32 [[CONV3997]] to i64
+// SIMD-ONLY0-NEXT:    store i64 [[CONV3998]], ptr [[LR]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2809:%.*]] = load i64, ptr [[LR]], align 8
+// SIMD-ONLY0-NEXT:    [[TOBOOL3999:%.*]] = icmp ne i64 [[TMP2809]], 0
+// SIMD-ONLY0-NEXT:    br i1 [[TOBOOL3999]], label [[IF_THEN4000:%.*]], label [[IF_ELSE4001:%.*]]
+// SIMD-ONLY0:       if.then4000:
+// SIMD-ONLY0-NEXT:    [[TMP2810:%.*]] = load i64, ptr [[LD]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP2810]], ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END4002:%.*]]
+// SIMD-ONLY0:       if.else4001:
+// SIMD-ONLY0-NEXT:    [[TMP2811:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP2811]], ptr [[LV]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END4002]]
+// SIMD-ONLY0:       if.end4002:
+// SIMD-ONLY0-NEXT:    [[TMP2812:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP2812]], ptr [[LV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2813:%.*]] = load i64, ptr [[LE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2814:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP4003:%.*]] = icmp sgt i64 [[TMP2813]], [[TMP2814]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP4003]], label [[IF_THEN4005:%.*]], label [[IF_END4006:%.*]]
+// SIMD-ONLY0:       if.then4005:
+// SIMD-ONLY0-NEXT:    [[TMP2815:%.*]] = load i64, ptr [[LE]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP2815]], ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END4006]]
+// SIMD-ONLY0:       if.end4006:
+// SIMD-ONLY0-NEXT:    [[TMP2816:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP2816]], ptr [[LV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2817:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2818:%.*]] = load i64, ptr [[LE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP4007:%.*]] = icmp sgt i64 [[TMP2817]], [[TMP2818]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP4007]], label [[IF_THEN4009:%.*]], label [[IF_END4010:%.*]]
+// SIMD-ONLY0:       if.then4009:
+// SIMD-ONLY0-NEXT:    [[TMP2819:%.*]] = load i64, ptr [[LE]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP2819]], ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END4010]]
+// SIMD-ONLY0:       if.end4010:
+// SIMD-ONLY0-NEXT:    [[TMP2820:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP2820]], ptr [[LV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2821:%.*]] = load i64, ptr [[LE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2822:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP4011:%.*]] = icmp slt i64 [[TMP2821]], [[TMP2822]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP4011]], label [[IF_THEN4013:%.*]], label [[IF_END4014:%.*]]
+// SIMD-ONLY0:       if.then4013:
+// SIMD-ONLY0-NEXT:    [[TMP2823:%.*]] = load i64, ptr [[LE]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP2823]], ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END4014]]
+// SIMD-ONLY0:       if.end4014:
+// SIMD-ONLY0-NEXT:    [[TMP2824:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP2824]], ptr [[LV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2825:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2826:%.*]] = load i64, ptr [[LE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP4015:%.*]] = icmp slt i64 [[TMP2825]], [[TMP2826]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP4015]], label [[IF_THEN4017:%.*]], label [[IF_END4018:%.*]]
+// SIMD-ONLY0:       if.then4017:
+// SIMD-ONLY0-NEXT:    [[TMP2827:%.*]] = load i64, ptr [[LE]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP2827]], ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END4018]]
+// SIMD-ONLY0:       if.end4018:
+// SIMD-ONLY0-NEXT:    [[TMP2828:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP2828]], ptr [[LV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2829:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2830:%.*]] = load i64, ptr [[LE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP4019:%.*]] = icmp eq i64 [[TMP2829]], [[TMP2830]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP4019]], label [[IF_THEN4021:%.*]], label [[IF_END4022:%.*]]
+// SIMD-ONLY0:       if.then4021:
+// SIMD-ONLY0-NEXT:    [[TMP2831:%.*]] = load i64, ptr [[LD]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP2831]], ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END4022]]
+// SIMD-ONLY0:       if.end4022:
+// SIMD-ONLY0-NEXT:    [[TMP2832:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP2832]], ptr [[LV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2833:%.*]] = load i64, ptr [[LE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2834:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP4023:%.*]] = icmp eq i64 [[TMP2833]], [[TMP2834]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP4023]], label [[IF_THEN4025:%.*]], label [[IF_END4026:%.*]]
+// SIMD-ONLY0:       if.then4025:
+// SIMD-ONLY0-NEXT:    [[TMP2835:%.*]] = load i64, ptr [[LD]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP2835]], ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END4026]]
+// SIMD-ONLY0:       if.end4026:
+// SIMD-ONLY0-NEXT:    [[TMP2836:%.*]] = load i64, ptr [[LE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2837:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP4027:%.*]] = icmp sgt i64 [[TMP2836]], [[TMP2837]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP4027]], label [[IF_THEN4029:%.*]], label [[IF_END4030:%.*]]
+// SIMD-ONLY0:       if.then4029:
+// SIMD-ONLY0-NEXT:    [[TMP2838:%.*]] = load i64, ptr [[LE]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP2838]], ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END4030]]
+// SIMD-ONLY0:       if.end4030:
+// SIMD-ONLY0-NEXT:    [[TMP2839:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP2839]], ptr [[LV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2840:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2841:%.*]] = load i64, ptr [[LE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP4031:%.*]] = icmp sgt i64 [[TMP2840]], [[TMP2841]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP4031]], label [[IF_THEN4033:%.*]], label [[IF_END4034:%.*]]
+// SIMD-ONLY0:       if.then4033:
+// SIMD-ONLY0-NEXT:    [[TMP2842:%.*]] = load i64, ptr [[LE]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP2842]], ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END4034]]
+// SIMD-ONLY0:       if.end4034:
+// SIMD-ONLY0-NEXT:    [[TMP2843:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP2843]], ptr [[LV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2844:%.*]] = load i64, ptr [[LE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2845:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP4035:%.*]] = icmp slt i64 [[TMP2844]], [[TMP2845]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP4035]], label [[IF_THEN4037:%.*]], label [[IF_END4038:%.*]]
+// SIMD-ONLY0:       if.then4037:
+// SIMD-ONLY0-NEXT:    [[TMP2846:%.*]] = load i64, ptr [[LE]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP2846]], ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END4038]]
+// SIMD-ONLY0:       if.end4038:
+// SIMD-ONLY0-NEXT:    [[TMP2847:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP2847]], ptr [[LV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2848:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2849:%.*]] = load i64, ptr [[LE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP4039:%.*]] = icmp slt i64 [[TMP2848]], [[TMP2849]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP4039]], label [[IF_THEN4041:%.*]], label [[IF_END4042:%.*]]
+// SIMD-ONLY0:       if.then4041:
+// SIMD-ONLY0-NEXT:    [[TMP2850:%.*]] = load i64, ptr [[LE]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP2850]], ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END4042]]
+// SIMD-ONLY0:       if.end4042:
+// SIMD-ONLY0-NEXT:    [[TMP2851:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP2851]], ptr [[LV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2852:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2853:%.*]] = load i64, ptr [[LE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP4043:%.*]] = icmp eq i64 [[TMP2852]], [[TMP2853]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP4043]], label [[IF_THEN4045:%.*]], label [[IF_END4046:%.*]]
+// SIMD-ONLY0:       if.then4045:
+// SIMD-ONLY0-NEXT:    [[TMP2854:%.*]] = load i64, ptr [[LD]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP2854]], ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END4046]]
+// SIMD-ONLY0:       if.end4046:
+// SIMD-ONLY0-NEXT:    [[TMP2855:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP2855]], ptr [[LV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2856:%.*]] = load i64, ptr [[LE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2857:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP4047:%.*]] = icmp eq i64 [[TMP2856]], [[TMP2857]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP4047]], label [[IF_THEN4049:%.*]], label [[IF_END4050:%.*]]
+// SIMD-ONLY0:       if.then4049:
+// SIMD-ONLY0-NEXT:    [[TMP2858:%.*]] = load i64, ptr [[LD]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP2858]], ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END4050]]
+// SIMD-ONLY0:       if.end4050:
+// SIMD-ONLY0-NEXT:    [[TMP2859:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP2859]], ptr [[LV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2860:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2861:%.*]] = load i64, ptr [[LE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP4051:%.*]] = icmp eq i64 [[TMP2860]], [[TMP2861]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP4051]], label [[IF_THEN4053:%.*]], label [[IF_ELSE4054:%.*]]
+// SIMD-ONLY0:       if.then4053:
+// SIMD-ONLY0-NEXT:    [[TMP2862:%.*]] = load i64, ptr [[LD]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP2862]], ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END4055:%.*]]
+// SIMD-ONLY0:       if.else4054:
+// SIMD-ONLY0-NEXT:    [[TMP2863:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP2863]], ptr [[LV]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END4055]]
+// SIMD-ONLY0:       if.end4055:
+// SIMD-ONLY0-NEXT:    [[TMP2864:%.*]] = load i64, ptr [[LE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2865:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP4056:%.*]] = icmp eq i64 [[TMP2864]], [[TMP2865]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP4056]], label [[IF_THEN4058:%.*]], label [[IF_ELSE4059:%.*]]
+// SIMD-ONLY0:       if.then4058:
+// SIMD-ONLY0-NEXT:    [[TMP2866:%.*]] = load i64, ptr [[LD]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP2866]], ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END4060:%.*]]
+// SIMD-ONLY0:       if.else4059:
+// SIMD-ONLY0-NEXT:    [[TMP2867:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP2867]], ptr [[LV]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END4060]]
+// SIMD-ONLY0:       if.end4060:
+// SIMD-ONLY0-NEXT:    [[TMP2868:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2869:%.*]] = load i64, ptr [[LE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP4061:%.*]] = icmp eq i64 [[TMP2868]], [[TMP2869]]
+// SIMD-ONLY0-NEXT:    [[CONV4062:%.*]] = zext i1 [[CMP4061]] to i32
+// SIMD-ONLY0-NEXT:    [[CONV4063:%.*]] = sext i32 [[CONV4062]] to i64
+// SIMD-ONLY0-NEXT:    store i64 [[CONV4063]], ptr [[LR]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2870:%.*]] = load i64, ptr [[LR]], align 8
+// SIMD-ONLY0-NEXT:    [[TOBOOL4064:%.*]] = icmp ne i64 [[TMP2870]], 0
+// SIMD-ONLY0-NEXT:    br i1 [[TOBOOL4064]], label [[IF_THEN4065:%.*]], label [[IF_END4066:%.*]]
+// SIMD-ONLY0:       if.then4065:
+// SIMD-ONLY0-NEXT:    [[TMP2871:%.*]] = load i64, ptr [[LD]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP2871]], ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END4066]]
+// SIMD-ONLY0:       if.end4066:
+// SIMD-ONLY0-NEXT:    [[TMP2872:%.*]] = load i64, ptr [[LE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2873:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP4067:%.*]] = icmp eq i64 [[TMP2872]], [[TMP2873]]
+// SIMD-ONLY0-NEXT:    [[CONV4068:%.*]] = zext i1 [[CMP4067]] to i32
+// SIMD-ONLY0-NEXT:    [[CONV4069:%.*]] = sext i32 [[CONV4068]] to i64
+// SIMD-ONLY0-NEXT:    store i64 [[CONV4069]], ptr [[LR]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2874:%.*]] = load i64, ptr [[LR]], align 8
+// SIMD-ONLY0-NEXT:    [[TOBOOL4070:%.*]] = icmp ne i64 [[TMP2874]], 0
+// SIMD-ONLY0-NEXT:    br i1 [[TOBOOL4070]], label [[IF_THEN4071:%.*]], label [[IF_END4072:%.*]]
+// SIMD-ONLY0:       if.then4071:
+// SIMD-ONLY0-NEXT:    [[TMP2875:%.*]] = load i64, ptr [[LD]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP2875]], ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END4072]]
+// SIMD-ONLY0:       if.end4072:
+// SIMD-ONLY0-NEXT:    [[TMP2876:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2877:%.*]] = load i64, ptr [[LE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP4073:%.*]] = icmp eq i64 [[TMP2876]], [[TMP2877]]
+// SIMD-ONLY0-NEXT:    [[CONV4074:%.*]] = zext i1 [[CMP4073]] to i32
+// SIMD-ONLY0-NEXT:    [[CONV4075:%.*]] = sext i32 [[CONV4074]] to i64
+// SIMD-ONLY0-NEXT:    store i64 [[CONV4075]], ptr [[LR]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2878:%.*]] = load i64, ptr [[LR]], align 8
+// SIMD-ONLY0-NEXT:    [[TOBOOL4076:%.*]] = icmp ne i64 [[TMP2878]], 0
+// SIMD-ONLY0-NEXT:    br i1 [[TOBOOL4076]], label [[IF_THEN4077:%.*]], label [[IF_ELSE4078:%.*]]
+// SIMD-ONLY0:       if.then4077:
+// SIMD-ONLY0-NEXT:    [[TMP2879:%.*]] = load i64, ptr [[LD]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP2879]], ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END4079:%.*]]
+// SIMD-ONLY0:       if.else4078:
+// SIMD-ONLY0-NEXT:    [[TMP2880:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP2880]], ptr [[LV]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END4079]]
+// SIMD-ONLY0:       if.end4079:
+// SIMD-ONLY0-NEXT:    [[TMP2881:%.*]] = load i64, ptr [[LE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2882:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP4080:%.*]] = icmp eq i64 [[TMP2881]], [[TMP2882]]
+// SIMD-ONLY0-NEXT:    [[CONV4081:%.*]] = zext i1 [[CMP4080]] to i32
+// SIMD-ONLY0-NEXT:    [[CONV4082:%.*]] = sext i32 [[CONV4081]] to i64
+// SIMD-ONLY0-NEXT:    store i64 [[CONV4082]], ptr [[LR]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2883:%.*]] = load i64, ptr [[LR]], align 8
+// SIMD-ONLY0-NEXT:    [[TOBOOL4083:%.*]] = icmp ne i64 [[TMP2883]], 0
+// SIMD-ONLY0-NEXT:    br i1 [[TOBOOL4083]], label [[IF_THEN4084:%.*]], label [[IF_ELSE4085:%.*]]
+// SIMD-ONLY0:       if.then4084:
+// SIMD-ONLY0-NEXT:    [[TMP2884:%.*]] = load i64, ptr [[LD]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP2884]], ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END4086:%.*]]
+// SIMD-ONLY0:       if.else4085:
+// SIMD-ONLY0-NEXT:    [[TMP2885:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP2885]], ptr [[LV]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END4086]]
+// SIMD-ONLY0:       if.end4086:
+// SIMD-ONLY0-NEXT:    [[TMP2886:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP2886]], ptr [[LV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2887:%.*]] = load i64, ptr [[LE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2888:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP4087:%.*]] = icmp sgt i64 [[TMP2887]], [[TMP2888]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP4087]], label [[IF_THEN4089:%.*]], label [[IF_END4090:%.*]]
+// SIMD-ONLY0:       if.then4089:
+// SIMD-ONLY0-NEXT:    [[TMP2889:%.*]] = load i64, ptr [[LE]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP2889]], ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END4090]]
+// SIMD-ONLY0:       if.end4090:
+// SIMD-ONLY0-NEXT:    [[TMP2890:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP2890]], ptr [[LV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2891:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2892:%.*]] = load i64, ptr [[LE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP4091:%.*]] = icmp sgt i64 [[TMP2891]], [[TMP2892]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP4091]], label [[IF_THEN4093:%.*]], label [[IF_END4094:%.*]]
+// SIMD-ONLY0:       if.then4093:
+// SIMD-ONLY0-NEXT:    [[TMP2893:%.*]] = load i64, ptr [[LE]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP2893]], ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END4094]]
+// SIMD-ONLY0:       if.end4094:
+// SIMD-ONLY0-NEXT:    [[TMP2894:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP2894]], ptr [[LV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2895:%.*]] = load i64, ptr [[LE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2896:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP4095:%.*]] = icmp slt i64 [[TMP2895]], [[TMP2896]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP4095]], label [[IF_THEN4097:%.*]], label [[IF_END4098:%.*]]
+// SIMD-ONLY0:       if.then4097:
+// SIMD-ONLY0-NEXT:    [[TMP2897:%.*]] = load i64, ptr [[LE]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP2897]], ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END4098]]
+// SIMD-ONLY0:       if.end4098:
+// SIMD-ONLY0-NEXT:    [[TMP2898:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP2898]], ptr [[LV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2899:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2900:%.*]] = load i64, ptr [[LE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP4099:%.*]] = icmp slt i64 [[TMP2899]], [[TMP2900]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP4099]], label [[IF_THEN4101:%.*]], label [[IF_END4102:%.*]]
+// SIMD-ONLY0:       if.then4101:
+// SIMD-ONLY0-NEXT:    [[TMP2901:%.*]] = load i64, ptr [[LE]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP2901]], ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END4102]]
+// SIMD-ONLY0:       if.end4102:
+// SIMD-ONLY0-NEXT:    [[TMP2902:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP2902]], ptr [[LV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2903:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2904:%.*]] = load i64, ptr [[LE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP4103:%.*]] = icmp eq i64 [[TMP2903]], [[TMP2904]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP4103]], label [[IF_THEN4105:%.*]], label [[IF_END4106:%.*]]
+// SIMD-ONLY0:       if.then4105:
+// SIMD-ONLY0-NEXT:    [[TMP2905:%.*]] = load i64, ptr [[LD]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP2905]], ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END4106]]
+// SIMD-ONLY0:       if.end4106:
+// SIMD-ONLY0-NEXT:    [[TMP2906:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP2906]], ptr [[LV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2907:%.*]] = load i64, ptr [[LE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2908:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP4107:%.*]] = icmp eq i64 [[TMP2907]], [[TMP2908]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP4107]], label [[IF_THEN4109:%.*]], label [[IF_END4110:%.*]]
+// SIMD-ONLY0:       if.then4109:
+// SIMD-ONLY0-NEXT:    [[TMP2909:%.*]] = load i64, ptr [[LD]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP2909]], ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END4110]]
+// SIMD-ONLY0:       if.end4110:
+// SIMD-ONLY0-NEXT:    [[TMP2910:%.*]] = load i64, ptr [[LE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2911:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP4111:%.*]] = icmp sgt i64 [[TMP2910]], [[TMP2911]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP4111]], label [[IF_THEN4113:%.*]], label [[IF_END4114:%.*]]
+// SIMD-ONLY0:       if.then4113:
+// SIMD-ONLY0-NEXT:    [[TMP2912:%.*]] = load i64, ptr [[LE]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP2912]], ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END4114]]
+// SIMD-ONLY0:       if.end4114:
+// SIMD-ONLY0-NEXT:    [[TMP2913:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP2913]], ptr [[LV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2914:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2915:%.*]] = load i64, ptr [[LE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP4115:%.*]] = icmp sgt i64 [[TMP2914]], [[TMP2915]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP4115]], label [[IF_THEN4117:%.*]], label [[IF_END4118:%.*]]
+// SIMD-ONLY0:       if.then4117:
+// SIMD-ONLY0-NEXT:    [[TMP2916:%.*]] = load i64, ptr [[LE]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP2916]], ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END4118]]
+// SIMD-ONLY0:       if.end4118:
+// SIMD-ONLY0-NEXT:    [[TMP2917:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP2917]], ptr [[LV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2918:%.*]] = load i64, ptr [[LE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2919:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP4119:%.*]] = icmp slt i64 [[TMP2918]], [[TMP2919]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP4119]], label [[IF_THEN4121:%.*]], label [[IF_END4122:%.*]]
+// SIMD-ONLY0:       if.then4121:
+// SIMD-ONLY0-NEXT:    [[TMP2920:%.*]] = load i64, ptr [[LE]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP2920]], ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END4122]]
+// SIMD-ONLY0:       if.end4122:
+// SIMD-ONLY0-NEXT:    [[TMP2921:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP2921]], ptr [[LV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2922:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2923:%.*]] = load i64, ptr [[LE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP4123:%.*]] = icmp slt i64 [[TMP2922]], [[TMP2923]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP4123]], label [[IF_THEN4125:%.*]], label [[IF_END4126:%.*]]
+// SIMD-ONLY0:       if.then4125:
+// SIMD-ONLY0-NEXT:    [[TMP2924:%.*]] = load i64, ptr [[LE]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP2924]], ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END4126]]
+// SIMD-ONLY0:       if.end4126:
+// SIMD-ONLY0-NEXT:    [[TMP2925:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP2925]], ptr [[LV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2926:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2927:%.*]] = load i64, ptr [[LE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP4127:%.*]] = icmp eq i64 [[TMP2926]], [[TMP2927]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP4127]], label [[IF_THEN4129:%.*]], label [[IF_END4130:%.*]]
+// SIMD-ONLY0:       if.then4129:
+// SIMD-ONLY0-NEXT:    [[TMP2928:%.*]] = load i64, ptr [[LD]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP2928]], ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END4130]]
+// SIMD-ONLY0:       if.end4130:
+// SIMD-ONLY0-NEXT:    [[TMP2929:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP2929]], ptr [[LV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2930:%.*]] = load i64, ptr [[LE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2931:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP4131:%.*]] = icmp eq i64 [[TMP2930]], [[TMP2931]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP4131]], label [[IF_THEN4133:%.*]], label [[IF_END4134:%.*]]
+// SIMD-ONLY0:       if.then4133:
+// SIMD-ONLY0-NEXT:    [[TMP2932:%.*]] = load i64, ptr [[LD]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP2932]], ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END4134]]
+// SIMD-ONLY0:       if.end4134:
+// SIMD-ONLY0-NEXT:    [[TMP2933:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP2933]], ptr [[LV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2934:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2935:%.*]] = load i64, ptr [[LE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP4135:%.*]] = icmp eq i64 [[TMP2934]], [[TMP2935]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP4135]], label [[IF_THEN4137:%.*]], label [[IF_ELSE4138:%.*]]
+// SIMD-ONLY0:       if.then4137:
+// SIMD-ONLY0-NEXT:    [[TMP2936:%.*]] = load i64, ptr [[LD]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP2936]], ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END4139:%.*]]
+// SIMD-ONLY0:       if.else4138:
+// SIMD-ONLY0-NEXT:    [[TMP2937:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP2937]], ptr [[LV]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END4139]]
+// SIMD-ONLY0:       if.end4139:
+// SIMD-ONLY0-NEXT:    [[TMP2938:%.*]] = load i64, ptr [[LE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2939:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP4140:%.*]] = icmp eq i64 [[TMP2938]], [[TMP2939]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP4140]], label [[IF_THEN4142:%.*]], label [[IF_ELSE4143:%.*]]
+// SIMD-ONLY0:       if.then4142:
+// SIMD-ONLY0-NEXT:    [[TMP2940:%.*]] = load i64, ptr [[LD]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP2940]], ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END4144:%.*]]
+// SIMD-ONLY0:       if.else4143:
+// SIMD-ONLY0-NEXT:    [[TMP2941:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP2941]], ptr [[LV]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END4144]]
+// SIMD-ONLY0:       if.end4144:
+// SIMD-ONLY0-NEXT:    [[TMP2942:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2943:%.*]] = load i64, ptr [[LE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP4145:%.*]] = icmp eq i64 [[TMP2942]], [[TMP2943]]
+// SIMD-ONLY0-NEXT:    [[CONV4146:%.*]] = zext i1 [[CMP4145]] to i32
+// SIMD-ONLY0-NEXT:    [[CONV4147:%.*]] = sext i32 [[CONV4146]] to i64
+// SIMD-ONLY0-NEXT:    store i64 [[CONV4147]], ptr [[LR]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2944:%.*]] = load i64, ptr [[LR]], align 8
+// SIMD-ONLY0-NEXT:    [[TOBOOL4148:%.*]] = icmp ne i64 [[TMP2944]], 0
+// SIMD-ONLY0-NEXT:    br i1 [[TOBOOL4148]], label [[IF_THEN4149:%.*]], label [[IF_END4150:%.*]]
+// SIMD-ONLY0:       if.then4149:
+// SIMD-ONLY0-NEXT:    [[TMP2945:%.*]] = load i64, ptr [[LD]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP2945]], ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END4150]]
+// SIMD-ONLY0:       if.end4150:
+// SIMD-ONLY0-NEXT:    [[TMP2946:%.*]] = load i64, ptr [[LE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2947:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP4151:%.*]] = icmp eq i64 [[TMP2946]], [[TMP2947]]
+// SIMD-ONLY0-NEXT:    [[CONV4152:%.*]] = zext i1 [[CMP4151]] to i32
+// SIMD-ONLY0-NEXT:    [[CONV4153:%.*]] = sext i32 [[CONV4152]] to i64
+// SIMD-ONLY0-NEXT:    store i64 [[CONV4153]], ptr [[LR]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2948:%.*]] = load i64, ptr [[LR]], align 8
+// SIMD-ONLY0-NEXT:    [[TOBOOL4154:%.*]] = icmp ne i64 [[TMP2948]], 0
+// SIMD-ONLY0-NEXT:    br i1 [[TOBOOL4154]], label [[IF_THEN4155:%.*]], label [[IF_END4156:%.*]]
+// SIMD-ONLY0:       if.then4155:
+// SIMD-ONLY0-NEXT:    [[TMP2949:%.*]] = load i64, ptr [[LD]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP2949]], ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END4156]]
+// SIMD-ONLY0:       if.end4156:
+// SIMD-ONLY0-NEXT:    [[TMP2950:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2951:%.*]] = load i64, ptr [[LE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP4157:%.*]] = icmp eq i64 [[TMP2950]], [[TMP2951]]
+// SIMD-ONLY0-NEXT:    [[CONV4158:%.*]] = zext i1 [[CMP4157]] to i32
+// SIMD-ONLY0-NEXT:    [[CONV4159:%.*]] = sext i32 [[CONV4158]] to i64
+// SIMD-ONLY0-NEXT:    store i64 [[CONV4159]], ptr [[LR]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2952:%.*]] = load i64, ptr [[LR]], align 8
+// SIMD-ONLY0-NEXT:    [[TOBOOL4160:%.*]] = icmp ne i64 [[TMP2952]], 0
+// SIMD-ONLY0-NEXT:    br i1 [[TOBOOL4160]], label [[IF_THEN4161:%.*]], label [[IF_ELSE4162:%.*]]
+// SIMD-ONLY0:       if.then4161:
+// SIMD-ONLY0-NEXT:    [[TMP2953:%.*]] = load i64, ptr [[LD]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP2953]], ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END4163:%.*]]
+// SIMD-ONLY0:       if.else4162:
+// SIMD-ONLY0-NEXT:    [[TMP2954:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP2954]], ptr [[LV]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END4163]]
+// SIMD-ONLY0:       if.end4163:
+// SIMD-ONLY0-NEXT:    [[TMP2955:%.*]] = load i64, ptr [[LE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2956:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP4164:%.*]] = icmp eq i64 [[TMP2955]], [[TMP2956]]
+// SIMD-ONLY0-NEXT:    [[CONV4165:%.*]] = zext i1 [[CMP4164]] to i32
+// SIMD-ONLY0-NEXT:    [[CONV4166:%.*]] = sext i32 [[CONV4165]] to i64
+// SIMD-ONLY0-NEXT:    store i64 [[CONV4166]], ptr [[LR]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2957:%.*]] = load i64, ptr [[LR]], align 8
+// SIMD-ONLY0-NEXT:    [[TOBOOL4167:%.*]] = icmp ne i64 [[TMP2957]], 0
+// SIMD-ONLY0-NEXT:    br i1 [[TOBOOL4167]], label [[IF_THEN4168:%.*]], label [[IF_ELSE4169:%.*]]
+// SIMD-ONLY0:       if.then4168:
+// SIMD-ONLY0-NEXT:    [[TMP2958:%.*]] = load i64, ptr [[LD]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP2958]], ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END4170:%.*]]
+// SIMD-ONLY0:       if.else4169:
+// SIMD-ONLY0-NEXT:    [[TMP2959:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP2959]], ptr [[LV]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END4170]]
+// SIMD-ONLY0:       if.end4170:
+// SIMD-ONLY0-NEXT:    [[TMP2960:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP2960]], ptr [[LV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2961:%.*]] = load i64, ptr [[LE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2962:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP4171:%.*]] = icmp sgt i64 [[TMP2961]], [[TMP2962]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP4171]], label [[IF_THEN4173:%.*]], label [[IF_END4174:%.*]]
+// SIMD-ONLY0:       if.then4173:
+// SIMD-ONLY0-NEXT:    [[TMP2963:%.*]] = load i64, ptr [[LE]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP2963]], ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END4174]]
+// SIMD-ONLY0:       if.end4174:
+// SIMD-ONLY0-NEXT:    [[TMP2964:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP2964]], ptr [[LV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2965:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2966:%.*]] = load i64, ptr [[LE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP4175:%.*]] = icmp sgt i64 [[TMP2965]], [[TMP2966]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP4175]], label [[IF_THEN4177:%.*]], label [[IF_END4178:%.*]]
+// SIMD-ONLY0:       if.then4177:
+// SIMD-ONLY0-NEXT:    [[TMP2967:%.*]] = load i64, ptr [[LE]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP2967]], ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END4178]]
+// SIMD-ONLY0:       if.end4178:
+// SIMD-ONLY0-NEXT:    [[TMP2968:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP2968]], ptr [[LV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2969:%.*]] = load i64, ptr [[LE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2970:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP4179:%.*]] = icmp slt i64 [[TMP2969]], [[TMP2970]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP4179]], label [[IF_THEN4181:%.*]], label [[IF_END4182:%.*]]
+// SIMD-ONLY0:       if.then4181:
+// SIMD-ONLY0-NEXT:    [[TMP2971:%.*]] = load i64, ptr [[LE]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP2971]], ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END4182]]
+// SIMD-ONLY0:       if.end4182:
+// SIMD-ONLY0-NEXT:    [[TMP2972:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP2972]], ptr [[LV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2973:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2974:%.*]] = load i64, ptr [[LE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP4183:%.*]] = icmp slt i64 [[TMP2973]], [[TMP2974]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP4183]], label [[IF_THEN4185:%.*]], label [[IF_END4186:%.*]]
+// SIMD-ONLY0:       if.then4185:
+// SIMD-ONLY0-NEXT:    [[TMP2975:%.*]] = load i64, ptr [[LE]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP2975]], ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END4186]]
+// SIMD-ONLY0:       if.end4186:
+// SIMD-ONLY0-NEXT:    [[TMP2976:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP2976]], ptr [[LV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2977:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2978:%.*]] = load i64, ptr [[LE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP4187:%.*]] = icmp eq i64 [[TMP2977]], [[TMP2978]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP4187]], label [[IF_THEN4189:%.*]], label [[IF_END4190:%.*]]
+// SIMD-ONLY0:       if.then4189:
+// SIMD-ONLY0-NEXT:    [[TMP2979:%.*]] = load i64, ptr [[LD]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP2979]], ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END4190]]
+// SIMD-ONLY0:       if.end4190:
+// SIMD-ONLY0-NEXT:    [[TMP2980:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP2980]], ptr [[LV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2981:%.*]] = load i64, ptr [[LE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2982:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP4191:%.*]] = icmp eq i64 [[TMP2981]], [[TMP2982]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP4191]], label [[IF_THEN4193:%.*]], label [[IF_END4194:%.*]]
+// SIMD-ONLY0:       if.then4193:
+// SIMD-ONLY0-NEXT:    [[TMP2983:%.*]] = load i64, ptr [[LD]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP2983]], ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END4194]]
+// SIMD-ONLY0:       if.end4194:
+// SIMD-ONLY0-NEXT:    [[TMP2984:%.*]] = load i64, ptr [[LE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2985:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP4195:%.*]] = icmp sgt i64 [[TMP2984]], [[TMP2985]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP4195]], label [[IF_THEN4197:%.*]], label [[IF_END4198:%.*]]
+// SIMD-ONLY0:       if.then4197:
+// SIMD-ONLY0-NEXT:    [[TMP2986:%.*]] = load i64, ptr [[LE]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP2986]], ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END4198]]
+// SIMD-ONLY0:       if.end4198:
+// SIMD-ONLY0-NEXT:    [[TMP2987:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP2987]], ptr [[LV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2988:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2989:%.*]] = load i64, ptr [[LE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP4199:%.*]] = icmp sgt i64 [[TMP2988]], [[TMP2989]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP4199]], label [[IF_THEN4201:%.*]], label [[IF_END4202:%.*]]
+// SIMD-ONLY0:       if.then4201:
+// SIMD-ONLY0-NEXT:    [[TMP2990:%.*]] = load i64, ptr [[LE]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP2990]], ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END4202]]
+// SIMD-ONLY0:       if.end4202:
+// SIMD-ONLY0-NEXT:    [[TMP2991:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP2991]], ptr [[LV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2992:%.*]] = load i64, ptr [[LE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2993:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP4203:%.*]] = icmp slt i64 [[TMP2992]], [[TMP2993]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP4203]], label [[IF_THEN4205:%.*]], label [[IF_END4206:%.*]]
+// SIMD-ONLY0:       if.then4205:
+// SIMD-ONLY0-NEXT:    [[TMP2994:%.*]] = load i64, ptr [[LE]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP2994]], ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END4206]]
+// SIMD-ONLY0:       if.end4206:
+// SIMD-ONLY0-NEXT:    [[TMP2995:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP2995]], ptr [[LV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2996:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2997:%.*]] = load i64, ptr [[LE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP4207:%.*]] = icmp slt i64 [[TMP2996]], [[TMP2997]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP4207]], label [[IF_THEN4209:%.*]], label [[IF_END4210:%.*]]
+// SIMD-ONLY0:       if.then4209:
+// SIMD-ONLY0-NEXT:    [[TMP2998:%.*]] = load i64, ptr [[LE]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP2998]], ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END4210]]
+// SIMD-ONLY0:       if.end4210:
+// SIMD-ONLY0-NEXT:    [[TMP2999:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP2999]], ptr [[LV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3000:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3001:%.*]] = load i64, ptr [[LE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP4211:%.*]] = icmp eq i64 [[TMP3000]], [[TMP3001]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP4211]], label [[IF_THEN4213:%.*]], label [[IF_END4214:%.*]]
+// SIMD-ONLY0:       if.then4213:
+// SIMD-ONLY0-NEXT:    [[TMP3002:%.*]] = load i64, ptr [[LD]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3002]], ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END4214]]
+// SIMD-ONLY0:       if.end4214:
+// SIMD-ONLY0-NEXT:    [[TMP3003:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3003]], ptr [[LV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3004:%.*]] = load i64, ptr [[LE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3005:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP4215:%.*]] = icmp eq i64 [[TMP3004]], [[TMP3005]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP4215]], label [[IF_THEN4217:%.*]], label [[IF_END4218:%.*]]
+// SIMD-ONLY0:       if.then4217:
+// SIMD-ONLY0-NEXT:    [[TMP3006:%.*]] = load i64, ptr [[LD]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3006]], ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END4218]]
+// SIMD-ONLY0:       if.end4218:
+// SIMD-ONLY0-NEXT:    [[TMP3007:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3007]], ptr [[LV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3008:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3009:%.*]] = load i64, ptr [[LE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP4219:%.*]] = icmp eq i64 [[TMP3008]], [[TMP3009]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP4219]], label [[IF_THEN4221:%.*]], label [[IF_ELSE4222:%.*]]
+// SIMD-ONLY0:       if.then4221:
+// SIMD-ONLY0-NEXT:    [[TMP3010:%.*]] = load i64, ptr [[LD]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3010]], ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END4223:%.*]]
+// SIMD-ONLY0:       if.else4222:
+// SIMD-ONLY0-NEXT:    [[TMP3011:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3011]], ptr [[LV]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END4223]]
+// SIMD-ONLY0:       if.end4223:
+// SIMD-ONLY0-NEXT:    [[TMP3012:%.*]] = load i64, ptr [[LE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3013:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP4224:%.*]] = icmp eq i64 [[TMP3012]], [[TMP3013]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP4224]], label [[IF_THEN4226:%.*]], label [[IF_ELSE4227:%.*]]
+// SIMD-ONLY0:       if.then4226:
+// SIMD-ONLY0-NEXT:    [[TMP3014:%.*]] = load i64, ptr [[LD]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3014]], ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END4228:%.*]]
+// SIMD-ONLY0:       if.else4227:
+// SIMD-ONLY0-NEXT:    [[TMP3015:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3015]], ptr [[LV]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END4228]]
+// SIMD-ONLY0:       if.end4228:
+// SIMD-ONLY0-NEXT:    [[TMP3016:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3017:%.*]] = load i64, ptr [[LE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP4229:%.*]] = icmp eq i64 [[TMP3016]], [[TMP3017]]
+// SIMD-ONLY0-NEXT:    [[CONV4230:%.*]] = zext i1 [[CMP4229]] to i32
+// SIMD-ONLY0-NEXT:    [[CONV4231:%.*]] = sext i32 [[CONV4230]] to i64
+// SIMD-ONLY0-NEXT:    store i64 [[CONV4231]], ptr [[LR]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3018:%.*]] = load i64, ptr [[LR]], align 8
+// SIMD-ONLY0-NEXT:    [[TOBOOL4232:%.*]] = icmp ne i64 [[TMP3018]], 0
+// SIMD-ONLY0-NEXT:    br i1 [[TOBOOL4232]], label [[IF_THEN4233:%.*]], label [[IF_END4234:%.*]]
+// SIMD-ONLY0:       if.then4233:
+// SIMD-ONLY0-NEXT:    [[TMP3019:%.*]] = load i64, ptr [[LD]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3019]], ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END4234]]
+// SIMD-ONLY0:       if.end4234:
+// SIMD-ONLY0-NEXT:    [[TMP3020:%.*]] = load i64, ptr [[LE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3021:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP4235:%.*]] = icmp eq i64 [[TMP3020]], [[TMP3021]]
+// SIMD-ONLY0-NEXT:    [[CONV4236:%.*]] = zext i1 [[CMP4235]] to i32
+// SIMD-ONLY0-NEXT:    [[CONV4237:%.*]] = sext i32 [[CONV4236]] to i64
+// SIMD-ONLY0-NEXT:    store i64 [[CONV4237]], ptr [[LR]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3022:%.*]] = load i64, ptr [[LR]], align 8
+// SIMD-ONLY0-NEXT:    [[TOBOOL4238:%.*]] = icmp ne i64 [[TMP3022]], 0
+// SIMD-ONLY0-NEXT:    br i1 [[TOBOOL4238]], label [[IF_THEN4239:%.*]], label [[IF_END4240:%.*]]
+// SIMD-ONLY0:       if.then4239:
+// SIMD-ONLY0-NEXT:    [[TMP3023:%.*]] = load i64, ptr [[LD]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3023]], ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END4240]]
+// SIMD-ONLY0:       if.end4240:
+// SIMD-ONLY0-NEXT:    [[TMP3024:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3025:%.*]] = load i64, ptr [[LE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP4241:%.*]] = icmp eq i64 [[TMP3024]], [[TMP3025]]
+// SIMD-ONLY0-NEXT:    [[CONV4242:%.*]] = zext i1 [[CMP4241]] to i32
+// SIMD-ONLY0-NEXT:    [[CONV4243:%.*]] = sext i32 [[CONV4242]] to i64
+// SIMD-ONLY0-NEXT:    store i64 [[CONV4243]], ptr [[LR]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3026:%.*]] = load i64, ptr [[LR]], align 8
+// SIMD-ONLY0-NEXT:    [[TOBOOL4244:%.*]] = icmp ne i64 [[TMP3026]], 0
+// SIMD-ONLY0-NEXT:    br i1 [[TOBOOL4244]], label [[IF_THEN4245:%.*]], label [[IF_ELSE4246:%.*]]
+// SIMD-ONLY0:       if.then4245:
+// SIMD-ONLY0-NEXT:    [[TMP3027:%.*]] = load i64, ptr [[LD]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3027]], ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END4247:%.*]]
+// SIMD-ONLY0:       if.else4246:
+// SIMD-ONLY0-NEXT:    [[TMP3028:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3028]], ptr [[LV]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END4247]]
+// SIMD-ONLY0:       if.end4247:
+// SIMD-ONLY0-NEXT:    [[TMP3029:%.*]] = load i64, ptr [[LE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3030:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP4248:%.*]] = icmp eq i64 [[TMP3029]], [[TMP3030]]
+// SIMD-ONLY0-NEXT:    [[CONV4249:%.*]] = zext i1 [[CMP4248]] to i32
+// SIMD-ONLY0-NEXT:    [[CONV4250:%.*]] = sext i32 [[CONV4249]] to i64
+// SIMD-ONLY0-NEXT:    store i64 [[CONV4250]], ptr [[LR]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3031:%.*]] = load i64, ptr [[LR]], align 8
+// SIMD-ONLY0-NEXT:    [[TOBOOL4251:%.*]] = icmp ne i64 [[TMP3031]], 0
+// SIMD-ONLY0-NEXT:    br i1 [[TOBOOL4251]], label [[IF_THEN4252:%.*]], label [[IF_ELSE4253:%.*]]
+// SIMD-ONLY0:       if.then4252:
+// SIMD-ONLY0-NEXT:    [[TMP3032:%.*]] = load i64, ptr [[LD]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3032]], ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END4254:%.*]]
+// SIMD-ONLY0:       if.else4253:
+// SIMD-ONLY0-NEXT:    [[TMP3033:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3033]], ptr [[LV]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END4254]]
+// SIMD-ONLY0:       if.end4254:
+// SIMD-ONLY0-NEXT:    [[TMP3034:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3034]], ptr [[LV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3035:%.*]] = load i64, ptr [[LE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3036:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP4255:%.*]] = icmp sgt i64 [[TMP3035]], [[TMP3036]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP4255]], label [[IF_THEN4257:%.*]], label [[IF_END4258:%.*]]
+// SIMD-ONLY0:       if.then4257:
+// SIMD-ONLY0-NEXT:    [[TMP3037:%.*]] = load i64, ptr [[LE]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3037]], ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END4258]]
+// SIMD-ONLY0:       if.end4258:
+// SIMD-ONLY0-NEXT:    [[TMP3038:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3038]], ptr [[LV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3039:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3040:%.*]] = load i64, ptr [[LE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP4259:%.*]] = icmp sgt i64 [[TMP3039]], [[TMP3040]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP4259]], label [[IF_THEN4261:%.*]], label [[IF_END4262:%.*]]
+// SIMD-ONLY0:       if.then4261:
+// SIMD-ONLY0-NEXT:    [[TMP3041:%.*]] = load i64, ptr [[LE]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3041]], ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END4262]]
+// SIMD-ONLY0:       if.end4262:
+// SIMD-ONLY0-NEXT:    [[TMP3042:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3042]], ptr [[LV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3043:%.*]] = load i64, ptr [[LE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3044:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP4263:%.*]] = icmp slt i64 [[TMP3043]], [[TMP3044]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP4263]], label [[IF_THEN4265:%.*]], label [[IF_END4266:%.*]]
+// SIMD-ONLY0:       if.then4265:
+// SIMD-ONLY0-NEXT:    [[TMP3045:%.*]] = load i64, ptr [[LE]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3045]], ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END4266]]
+// SIMD-ONLY0:       if.end4266:
+// SIMD-ONLY0-NEXT:    [[TMP3046:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3046]], ptr [[LV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3047:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3048:%.*]] = load i64, ptr [[LE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP4267:%.*]] = icmp slt i64 [[TMP3047]], [[TMP3048]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP4267]], label [[IF_THEN4269:%.*]], label [[IF_END4270:%.*]]
+// SIMD-ONLY0:       if.then4269:
+// SIMD-ONLY0-NEXT:    [[TMP3049:%.*]] = load i64, ptr [[LE]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3049]], ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END4270]]
+// SIMD-ONLY0:       if.end4270:
+// SIMD-ONLY0-NEXT:    [[TMP3050:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3050]], ptr [[LV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3051:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3052:%.*]] = load i64, ptr [[LE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP4271:%.*]] = icmp eq i64 [[TMP3051]], [[TMP3052]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP4271]], label [[IF_THEN4273:%.*]], label [[IF_END4274:%.*]]
+// SIMD-ONLY0:       if.then4273:
+// SIMD-ONLY0-NEXT:    [[TMP3053:%.*]] = load i64, ptr [[LD]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3053]], ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END4274]]
+// SIMD-ONLY0:       if.end4274:
+// SIMD-ONLY0-NEXT:    [[TMP3054:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3054]], ptr [[LV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3055:%.*]] = load i64, ptr [[LE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3056:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP4275:%.*]] = icmp eq i64 [[TMP3055]], [[TMP3056]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP4275]], label [[IF_THEN4277:%.*]], label [[IF_END4278:%.*]]
+// SIMD-ONLY0:       if.then4277:
+// SIMD-ONLY0-NEXT:    [[TMP3057:%.*]] = load i64, ptr [[LD]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3057]], ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END4278]]
+// SIMD-ONLY0:       if.end4278:
+// SIMD-ONLY0-NEXT:    [[TMP3058:%.*]] = load i64, ptr [[LE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3059:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP4279:%.*]] = icmp sgt i64 [[TMP3058]], [[TMP3059]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP4279]], label [[IF_THEN4281:%.*]], label [[IF_END4282:%.*]]
+// SIMD-ONLY0:       if.then4281:
+// SIMD-ONLY0-NEXT:    [[TMP3060:%.*]] = load i64, ptr [[LE]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3060]], ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END4282]]
+// SIMD-ONLY0:       if.end4282:
+// SIMD-ONLY0-NEXT:    [[TMP3061:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3061]], ptr [[LV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3062:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3063:%.*]] = load i64, ptr [[LE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP4283:%.*]] = icmp sgt i64 [[TMP3062]], [[TMP3063]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP4283]], label [[IF_THEN4285:%.*]], label [[IF_END4286:%.*]]
+// SIMD-ONLY0:       if.then4285:
+// SIMD-ONLY0-NEXT:    [[TMP3064:%.*]] = load i64, ptr [[LE]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3064]], ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END4286]]
+// SIMD-ONLY0:       if.end4286:
+// SIMD-ONLY0-NEXT:    [[TMP3065:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3065]], ptr [[LV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3066:%.*]] = load i64, ptr [[LE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3067:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP4287:%.*]] = icmp slt i64 [[TMP3066]], [[TMP3067]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP4287]], label [[IF_THEN4289:%.*]], label [[IF_END4290:%.*]]
+// SIMD-ONLY0:       if.then4289:
+// SIMD-ONLY0-NEXT:    [[TMP3068:%.*]] = load i64, ptr [[LE]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3068]], ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END4290]]
+// SIMD-ONLY0:       if.end4290:
+// SIMD-ONLY0-NEXT:    [[TMP3069:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3069]], ptr [[LV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3070:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3071:%.*]] = load i64, ptr [[LE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP4291:%.*]] = icmp slt i64 [[TMP3070]], [[TMP3071]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP4291]], label [[IF_THEN4293:%.*]], label [[IF_END4294:%.*]]
+// SIMD-ONLY0:       if.then4293:
+// SIMD-ONLY0-NEXT:    [[TMP3072:%.*]] = load i64, ptr [[LE]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3072]], ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END4294]]
+// SIMD-ONLY0:       if.end4294:
+// SIMD-ONLY0-NEXT:    [[TMP3073:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3073]], ptr [[LV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3074:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3075:%.*]] = load i64, ptr [[LE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP4295:%.*]] = icmp eq i64 [[TMP3074]], [[TMP3075]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP4295]], label [[IF_THEN4297:%.*]], label [[IF_END4298:%.*]]
+// SIMD-ONLY0:       if.then4297:
+// SIMD-ONLY0-NEXT:    [[TMP3076:%.*]] = load i64, ptr [[LD]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3076]], ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END4298]]
+// SIMD-ONLY0:       if.end4298:
+// SIMD-ONLY0-NEXT:    [[TMP3077:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3077]], ptr [[LV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3078:%.*]] = load i64, ptr [[LE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3079:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP4299:%.*]] = icmp eq i64 [[TMP3078]], [[TMP3079]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP4299]], label [[IF_THEN4301:%.*]], label [[IF_END4302:%.*]]
+// SIMD-ONLY0:       if.then4301:
+// SIMD-ONLY0-NEXT:    [[TMP3080:%.*]] = load i64, ptr [[LD]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3080]], ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END4302]]
+// SIMD-ONLY0:       if.end4302:
+// SIMD-ONLY0-NEXT:    [[TMP3081:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3081]], ptr [[LV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3082:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3083:%.*]] = load i64, ptr [[LE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP4303:%.*]] = icmp eq i64 [[TMP3082]], [[TMP3083]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP4303]], label [[IF_THEN4305:%.*]], label [[IF_ELSE4306:%.*]]
+// SIMD-ONLY0:       if.then4305:
+// SIMD-ONLY0-NEXT:    [[TMP3084:%.*]] = load i64, ptr [[LD]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3084]], ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END4307:%.*]]
+// SIMD-ONLY0:       if.else4306:
+// SIMD-ONLY0-NEXT:    [[TMP3085:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3085]], ptr [[LV]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END4307]]
+// SIMD-ONLY0:       if.end4307:
+// SIMD-ONLY0-NEXT:    [[TMP3086:%.*]] = load i64, ptr [[LE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3087:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP4308:%.*]] = icmp eq i64 [[TMP3086]], [[TMP3087]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP4308]], label [[IF_THEN4310:%.*]], label [[IF_ELSE4311:%.*]]
+// SIMD-ONLY0:       if.then4310:
+// SIMD-ONLY0-NEXT:    [[TMP3088:%.*]] = load i64, ptr [[LD]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3088]], ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END4312:%.*]]
+// SIMD-ONLY0:       if.else4311:
+// SIMD-ONLY0-NEXT:    [[TMP3089:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3089]], ptr [[LV]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END4312]]
+// SIMD-ONLY0:       if.end4312:
+// SIMD-ONLY0-NEXT:    [[TMP3090:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3091:%.*]] = load i64, ptr [[LE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP4313:%.*]] = icmp eq i64 [[TMP3090]], [[TMP3091]]
+// SIMD-ONLY0-NEXT:    [[CONV4314:%.*]] = zext i1 [[CMP4313]] to i32
+// SIMD-ONLY0-NEXT:    [[CONV4315:%.*]] = sext i32 [[CONV4314]] to i64
+// SIMD-ONLY0-NEXT:    store i64 [[CONV4315]], ptr [[LR]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3092:%.*]] = load i64, ptr [[LR]], align 8
+// SIMD-ONLY0-NEXT:    [[TOBOOL4316:%.*]] = icmp ne i64 [[TMP3092]], 0
+// SIMD-ONLY0-NEXT:    br i1 [[TOBOOL4316]], label [[IF_THEN4317:%.*]], label [[IF_END4318:%.*]]
+// SIMD-ONLY0:       if.then4317:
+// SIMD-ONLY0-NEXT:    [[TMP3093:%.*]] = load i64, ptr [[LD]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3093]], ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END4318]]
+// SIMD-ONLY0:       if.end4318:
+// SIMD-ONLY0-NEXT:    [[TMP3094:%.*]] = load i64, ptr [[LE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3095:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP4319:%.*]] = icmp eq i64 [[TMP3094]], [[TMP3095]]
+// SIMD-ONLY0-NEXT:    [[CONV4320:%.*]] = zext i1 [[CMP4319]] to i32
+// SIMD-ONLY0-NEXT:    [[CONV4321:%.*]] = sext i32 [[CONV4320]] to i64
+// SIMD-ONLY0-NEXT:    store i64 [[CONV4321]], ptr [[LR]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3096:%.*]] = load i64, ptr [[LR]], align 8
+// SIMD-ONLY0-NEXT:    [[TOBOOL4322:%.*]] = icmp ne i64 [[TMP3096]], 0
+// SIMD-ONLY0-NEXT:    br i1 [[TOBOOL4322]], label [[IF_THEN4323:%.*]], label [[IF_END4324:%.*]]
+// SIMD-ONLY0:       if.then4323:
+// SIMD-ONLY0-NEXT:    [[TMP3097:%.*]] = load i64, ptr [[LD]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3097]], ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END4324]]
+// SIMD-ONLY0:       if.end4324:
+// SIMD-ONLY0-NEXT:    [[TMP3098:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3099:%.*]] = load i64, ptr [[LE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP4325:%.*]] = icmp eq i64 [[TMP3098]], [[TMP3099]]
+// SIMD-ONLY0-NEXT:    [[CONV4326:%.*]] = zext i1 [[CMP4325]] to i32
+// SIMD-ONLY0-NEXT:    [[CONV4327:%.*]] = sext i32 [[CONV4326]] to i64
+// SIMD-ONLY0-NEXT:    store i64 [[CONV4327]], ptr [[LR]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3100:%.*]] = load i64, ptr [[LR]], align 8
+// SIMD-ONLY0-NEXT:    [[TOBOOL4328:%.*]] = icmp ne i64 [[TMP3100]], 0
+// SIMD-ONLY0-NEXT:    br i1 [[TOBOOL4328]], label [[IF_THEN4329:%.*]], label [[IF_ELSE4330:%.*]]
+// SIMD-ONLY0:       if.then4329:
+// SIMD-ONLY0-NEXT:    [[TMP3101:%.*]] = load i64, ptr [[LD]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3101]], ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END4331:%.*]]
+// SIMD-ONLY0:       if.else4330:
+// SIMD-ONLY0-NEXT:    [[TMP3102:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3102]], ptr [[LV]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END4331]]
+// SIMD-ONLY0:       if.end4331:
+// SIMD-ONLY0-NEXT:    [[TMP3103:%.*]] = load i64, ptr [[LE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3104:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP4332:%.*]] = icmp eq i64 [[TMP3103]], [[TMP3104]]
+// SIMD-ONLY0-NEXT:    [[CONV4333:%.*]] = zext i1 [[CMP4332]] to i32
+// SIMD-ONLY0-NEXT:    [[CONV4334:%.*]] = sext i32 [[CONV4333]] to i64
+// SIMD-ONLY0-NEXT:    store i64 [[CONV4334]], ptr [[LR]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3105:%.*]] = load i64, ptr [[LR]], align 8
+// SIMD-ONLY0-NEXT:    [[TOBOOL4335:%.*]] = icmp ne i64 [[TMP3105]], 0
+// SIMD-ONLY0-NEXT:    br i1 [[TOBOOL4335]], label [[IF_THEN4336:%.*]], label [[IF_ELSE4337:%.*]]
+// SIMD-ONLY0:       if.then4336:
+// SIMD-ONLY0-NEXT:    [[TMP3106:%.*]] = load i64, ptr [[LD]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3106]], ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END4338:%.*]]
+// SIMD-ONLY0:       if.else4337:
+// SIMD-ONLY0-NEXT:    [[TMP3107:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3107]], ptr [[LV]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END4338]]
+// SIMD-ONLY0:       if.end4338:
+// SIMD-ONLY0-NEXT:    [[TMP3108:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3108]], ptr [[ULV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3109:%.*]] = load i64, ptr [[ULE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3110:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP4339:%.*]] = icmp ugt i64 [[TMP3109]], [[TMP3110]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP4339]], label [[IF_THEN4341:%.*]], label [[IF_END4342:%.*]]
+// SIMD-ONLY0:       if.then4341:
+// SIMD-ONLY0-NEXT:    [[TMP3111:%.*]] = load i64, ptr [[ULE]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3111]], ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END4342]]
+// SIMD-ONLY0:       if.end4342:
+// SIMD-ONLY0-NEXT:    [[TMP3112:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3112]], ptr [[ULV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3113:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3114:%.*]] = load i64, ptr [[ULE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP4343:%.*]] = icmp ugt i64 [[TMP3113]], [[TMP3114]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP4343]], label [[IF_THEN4345:%.*]], label [[IF_END4346:%.*]]
+// SIMD-ONLY0:       if.then4345:
+// SIMD-ONLY0-NEXT:    [[TMP3115:%.*]] = load i64, ptr [[ULE]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3115]], ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END4346]]
+// SIMD-ONLY0:       if.end4346:
+// SIMD-ONLY0-NEXT:    [[TMP3116:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3116]], ptr [[ULV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3117:%.*]] = load i64, ptr [[ULE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3118:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP4347:%.*]] = icmp ult i64 [[TMP3117]], [[TMP3118]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP4347]], label [[IF_THEN4349:%.*]], label [[IF_END4350:%.*]]
+// SIMD-ONLY0:       if.then4349:
+// SIMD-ONLY0-NEXT:    [[TMP3119:%.*]] = load i64, ptr [[ULE]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3119]], ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END4350]]
+// SIMD-ONLY0:       if.end4350:
+// SIMD-ONLY0-NEXT:    [[TMP3120:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3120]], ptr [[ULV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3121:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3122:%.*]] = load i64, ptr [[ULE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP4351:%.*]] = icmp ult i64 [[TMP3121]], [[TMP3122]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP4351]], label [[IF_THEN4353:%.*]], label [[IF_END4354:%.*]]
+// SIMD-ONLY0:       if.then4353:
+// SIMD-ONLY0-NEXT:    [[TMP3123:%.*]] = load i64, ptr [[ULE]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3123]], ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END4354]]
+// SIMD-ONLY0:       if.end4354:
+// SIMD-ONLY0-NEXT:    [[TMP3124:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3124]], ptr [[ULV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3125:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3126:%.*]] = load i64, ptr [[ULE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP4355:%.*]] = icmp eq i64 [[TMP3125]], [[TMP3126]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP4355]], label [[IF_THEN4357:%.*]], label [[IF_END4358:%.*]]
+// SIMD-ONLY0:       if.then4357:
+// SIMD-ONLY0-NEXT:    [[TMP3127:%.*]] = load i64, ptr [[ULD]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3127]], ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END4358]]
+// SIMD-ONLY0:       if.end4358:
+// SIMD-ONLY0-NEXT:    [[TMP3128:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3128]], ptr [[ULV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3129:%.*]] = load i64, ptr [[ULE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3130:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP4359:%.*]] = icmp eq i64 [[TMP3129]], [[TMP3130]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP4359]], label [[IF_THEN4361:%.*]], label [[IF_END4362:%.*]]
+// SIMD-ONLY0:       if.then4361:
+// SIMD-ONLY0-NEXT:    [[TMP3131:%.*]] = load i64, ptr [[ULD]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3131]], ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END4362]]
+// SIMD-ONLY0:       if.end4362:
+// SIMD-ONLY0-NEXT:    [[TMP3132:%.*]] = load i64, ptr [[ULE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3133:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP4363:%.*]] = icmp ugt i64 [[TMP3132]], [[TMP3133]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP4363]], label [[IF_THEN4365:%.*]], label [[IF_END4366:%.*]]
+// SIMD-ONLY0:       if.then4365:
+// SIMD-ONLY0-NEXT:    [[TMP3134:%.*]] = load i64, ptr [[ULE]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3134]], ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END4366]]
+// SIMD-ONLY0:       if.end4366:
+// SIMD-ONLY0-NEXT:    [[TMP3135:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3135]], ptr [[ULV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3136:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3137:%.*]] = load i64, ptr [[ULE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP4367:%.*]] = icmp ugt i64 [[TMP3136]], [[TMP3137]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP4367]], label [[IF_THEN4369:%.*]], label [[IF_END4370:%.*]]
+// SIMD-ONLY0:       if.then4369:
+// SIMD-ONLY0-NEXT:    [[TMP3138:%.*]] = load i64, ptr [[ULE]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3138]], ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END4370]]
+// SIMD-ONLY0:       if.end4370:
+// SIMD-ONLY0-NEXT:    [[TMP3139:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3139]], ptr [[ULV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3140:%.*]] = load i64, ptr [[ULE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3141:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP4371:%.*]] = icmp ult i64 [[TMP3140]], [[TMP3141]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP4371]], label [[IF_THEN4373:%.*]], label [[IF_END4374:%.*]]
+// SIMD-ONLY0:       if.then4373:
+// SIMD-ONLY0-NEXT:    [[TMP3142:%.*]] = load i64, ptr [[ULE]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3142]], ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END4374]]
+// SIMD-ONLY0:       if.end4374:
+// SIMD-ONLY0-NEXT:    [[TMP3143:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3143]], ptr [[ULV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3144:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3145:%.*]] = load i64, ptr [[ULE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP4375:%.*]] = icmp ult i64 [[TMP3144]], [[TMP3145]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP4375]], label [[IF_THEN4377:%.*]], label [[IF_END4378:%.*]]
+// SIMD-ONLY0:       if.then4377:
+// SIMD-ONLY0-NEXT:    [[TMP3146:%.*]] = load i64, ptr [[ULE]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3146]], ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END4378]]
+// SIMD-ONLY0:       if.end4378:
+// SIMD-ONLY0-NEXT:    [[TMP3147:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3147]], ptr [[ULV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3148:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3149:%.*]] = load i64, ptr [[ULE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP4379:%.*]] = icmp eq i64 [[TMP3148]], [[TMP3149]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP4379]], label [[IF_THEN4381:%.*]], label [[IF_END4382:%.*]]
+// SIMD-ONLY0:       if.then4381:
+// SIMD-ONLY0-NEXT:    [[TMP3150:%.*]] = load i64, ptr [[ULD]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3150]], ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END4382]]
+// SIMD-ONLY0:       if.end4382:
+// SIMD-ONLY0-NEXT:    [[TMP3151:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3151]], ptr [[ULV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3152:%.*]] = load i64, ptr [[ULE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3153:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP4383:%.*]] = icmp eq i64 [[TMP3152]], [[TMP3153]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP4383]], label [[IF_THEN4385:%.*]], label [[IF_END4386:%.*]]
+// SIMD-ONLY0:       if.then4385:
+// SIMD-ONLY0-NEXT:    [[TMP3154:%.*]] = load i64, ptr [[ULD]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3154]], ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END4386]]
+// SIMD-ONLY0:       if.end4386:
+// SIMD-ONLY0-NEXT:    [[TMP3155:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3155]], ptr [[ULV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3156:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3157:%.*]] = load i64, ptr [[ULE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP4387:%.*]] = icmp eq i64 [[TMP3156]], [[TMP3157]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP4387]], label [[IF_THEN4389:%.*]], label [[IF_ELSE4390:%.*]]
+// SIMD-ONLY0:       if.then4389:
+// SIMD-ONLY0-NEXT:    [[TMP3158:%.*]] = load i64, ptr [[ULD]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3158]], ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END4391:%.*]]
+// SIMD-ONLY0:       if.else4390:
+// SIMD-ONLY0-NEXT:    [[TMP3159:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3159]], ptr [[ULV]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END4391]]
+// SIMD-ONLY0:       if.end4391:
+// SIMD-ONLY0-NEXT:    [[TMP3160:%.*]] = load i64, ptr [[ULE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3161:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP4392:%.*]] = icmp eq i64 [[TMP3160]], [[TMP3161]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP4392]], label [[IF_THEN4394:%.*]], label [[IF_ELSE4395:%.*]]
+// SIMD-ONLY0:       if.then4394:
+// SIMD-ONLY0-NEXT:    [[TMP3162:%.*]] = load i64, ptr [[ULD]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3162]], ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END4396:%.*]]
+// SIMD-ONLY0:       if.else4395:
+// SIMD-ONLY0-NEXT:    [[TMP3163:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3163]], ptr [[ULV]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END4396]]
+// SIMD-ONLY0:       if.end4396:
+// SIMD-ONLY0-NEXT:    [[TMP3164:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3165:%.*]] = load i64, ptr [[ULE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP4397:%.*]] = icmp eq i64 [[TMP3164]], [[TMP3165]]
+// SIMD-ONLY0-NEXT:    [[CONV4398:%.*]] = zext i1 [[CMP4397]] to i32
+// SIMD-ONLY0-NEXT:    [[CONV4399:%.*]] = sext i32 [[CONV4398]] to i64
+// SIMD-ONLY0-NEXT:    store i64 [[CONV4399]], ptr [[ULR]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3166:%.*]] = load i64, ptr [[ULR]], align 8
+// SIMD-ONLY0-NEXT:    [[TOBOOL4400:%.*]] = icmp ne i64 [[TMP3166]], 0
+// SIMD-ONLY0-NEXT:    br i1 [[TOBOOL4400]], label [[IF_THEN4401:%.*]], label [[IF_END4402:%.*]]
+// SIMD-ONLY0:       if.then4401:
+// SIMD-ONLY0-NEXT:    [[TMP3167:%.*]] = load i64, ptr [[ULD]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3167]], ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END4402]]
+// SIMD-ONLY0:       if.end4402:
+// SIMD-ONLY0-NEXT:    [[TMP3168:%.*]] = load i64, ptr [[ULE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3169:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP4403:%.*]] = icmp eq i64 [[TMP3168]], [[TMP3169]]
+// SIMD-ONLY0-NEXT:    [[CONV4404:%.*]] = zext i1 [[CMP4403]] to i32
+// SIMD-ONLY0-NEXT:    [[CONV4405:%.*]] = sext i32 [[CONV4404]] to i64
+// SIMD-ONLY0-NEXT:    store i64 [[CONV4405]], ptr [[ULR]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3170:%.*]] = load i64, ptr [[ULR]], align 8
+// SIMD-ONLY0-NEXT:    [[TOBOOL4406:%.*]] = icmp ne i64 [[TMP3170]], 0
+// SIMD-ONLY0-NEXT:    br i1 [[TOBOOL4406]], label [[IF_THEN4407:%.*]], label [[IF_END4408:%.*]]
+// SIMD-ONLY0:       if.then4407:
+// SIMD-ONLY0-NEXT:    [[TMP3171:%.*]] = load i64, ptr [[ULD]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3171]], ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END4408]]
+// SIMD-ONLY0:       if.end4408:
+// SIMD-ONLY0-NEXT:    [[TMP3172:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3173:%.*]] = load i64, ptr [[ULE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP4409:%.*]] = icmp eq i64 [[TMP3172]], [[TMP3173]]
+// SIMD-ONLY0-NEXT:    [[CONV4410:%.*]] = zext i1 [[CMP4409]] to i32
+// SIMD-ONLY0-NEXT:    [[CONV4411:%.*]] = sext i32 [[CONV4410]] to i64
+// SIMD-ONLY0-NEXT:    store i64 [[CONV4411]], ptr [[ULR]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3174:%.*]] = load i64, ptr [[ULR]], align 8
+// SIMD-ONLY0-NEXT:    [[TOBOOL4412:%.*]] = icmp ne i64 [[TMP3174]], 0
+// SIMD-ONLY0-NEXT:    br i1 [[TOBOOL4412]], label [[IF_THEN4413:%.*]], label [[IF_ELSE4414:%.*]]
+// SIMD-ONLY0:       if.then4413:
+// SIMD-ONLY0-NEXT:    [[TMP3175:%.*]] = load i64, ptr [[ULD]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3175]], ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END4415:%.*]]
+// SIMD-ONLY0:       if.else4414:
+// SIMD-ONLY0-NEXT:    [[TMP3176:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3176]], ptr [[ULV]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END4415]]
+// SIMD-ONLY0:       if.end4415:
+// SIMD-ONLY0-NEXT:    [[TMP3177:%.*]] = load i64, ptr [[ULE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3178:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP4416:%.*]] = icmp eq i64 [[TMP3177]], [[TMP3178]]
+// SIMD-ONLY0-NEXT:    [[CONV4417:%.*]] = zext i1 [[CMP4416]] to i32
+// SIMD-ONLY0-NEXT:    [[CONV4418:%.*]] = sext i32 [[CONV4417]] to i64
+// SIMD-ONLY0-NEXT:    store i64 [[CONV4418]], ptr [[ULR]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3179:%.*]] = load i64, ptr [[ULR]], align 8
+// SIMD-ONLY0-NEXT:    [[TOBOOL4419:%.*]] = icmp ne i64 [[TMP3179]], 0
+// SIMD-ONLY0-NEXT:    br i1 [[TOBOOL4419]], label [[IF_THEN4420:%.*]], label [[IF_ELSE4421:%.*]]
+// SIMD-ONLY0:       if.then4420:
+// SIMD-ONLY0-NEXT:    [[TMP3180:%.*]] = load i64, ptr [[ULD]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3180]], ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END4422:%.*]]
+// SIMD-ONLY0:       if.else4421:
+// SIMD-ONLY0-NEXT:    [[TMP3181:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3181]], ptr [[ULV]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END4422]]
+// SIMD-ONLY0:       if.end4422:
+// SIMD-ONLY0-NEXT:    [[TMP3182:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3182]], ptr [[ULV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3183:%.*]] = load i64, ptr [[ULE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3184:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP4423:%.*]] = icmp ugt i64 [[TMP3183]], [[TMP3184]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP4423]], label [[IF_THEN4425:%.*]], label [[IF_END4426:%.*]]
+// SIMD-ONLY0:       if.then4425:
+// SIMD-ONLY0-NEXT:    [[TMP3185:%.*]] = load i64, ptr [[ULE]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3185]], ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END4426]]
+// SIMD-ONLY0:       if.end4426:
+// SIMD-ONLY0-NEXT:    [[TMP3186:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3186]], ptr [[ULV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3187:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3188:%.*]] = load i64, ptr [[ULE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP4427:%.*]] = icmp ugt i64 [[TMP3187]], [[TMP3188]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP4427]], label [[IF_THEN4429:%.*]], label [[IF_END4430:%.*]]
+// SIMD-ONLY0:       if.then4429:
+// SIMD-ONLY0-NEXT:    [[TMP3189:%.*]] = load i64, ptr [[ULE]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3189]], ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END4430]]
+// SIMD-ONLY0:       if.end4430:
+// SIMD-ONLY0-NEXT:    [[TMP3190:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3190]], ptr [[ULV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3191:%.*]] = load i64, ptr [[ULE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3192:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP4431:%.*]] = icmp ult i64 [[TMP3191]], [[TMP3192]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP4431]], label [[IF_THEN4433:%.*]], label [[IF_END4434:%.*]]
+// SIMD-ONLY0:       if.then4433:
+// SIMD-ONLY0-NEXT:    [[TMP3193:%.*]] = load i64, ptr [[ULE]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3193]], ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END4434]]
+// SIMD-ONLY0:       if.end4434:
+// SIMD-ONLY0-NEXT:    [[TMP3194:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3194]], ptr [[ULV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3195:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3196:%.*]] = load i64, ptr [[ULE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP4435:%.*]] = icmp ult i64 [[TMP3195]], [[TMP3196]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP4435]], label [[IF_THEN4437:%.*]], label [[IF_END4438:%.*]]
+// SIMD-ONLY0:       if.then4437:
+// SIMD-ONLY0-NEXT:    [[TMP3197:%.*]] = load i64, ptr [[ULE]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3197]], ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END4438]]
+// SIMD-ONLY0:       if.end4438:
+// SIMD-ONLY0-NEXT:    [[TMP3198:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3198]], ptr [[ULV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3199:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3200:%.*]] = load i64, ptr [[ULE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP4439:%.*]] = icmp eq i64 [[TMP3199]], [[TMP3200]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP4439]], label [[IF_THEN4441:%.*]], label [[IF_END4442:%.*]]
+// SIMD-ONLY0:       if.then4441:
+// SIMD-ONLY0-NEXT:    [[TMP3201:%.*]] = load i64, ptr [[ULD]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3201]], ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END4442]]
+// SIMD-ONLY0:       if.end4442:
+// SIMD-ONLY0-NEXT:    [[TMP3202:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3202]], ptr [[ULV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3203:%.*]] = load i64, ptr [[ULE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3204:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP4443:%.*]] = icmp eq i64 [[TMP3203]], [[TMP3204]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP4443]], label [[IF_THEN4445:%.*]], label [[IF_END4446:%.*]]
+// SIMD-ONLY0:       if.then4445:
+// SIMD-ONLY0-NEXT:    [[TMP3205:%.*]] = load i64, ptr [[ULD]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3205]], ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END4446]]
+// SIMD-ONLY0:       if.end4446:
+// SIMD-ONLY0-NEXT:    [[TMP3206:%.*]] = load i64, ptr [[ULE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3207:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP4447:%.*]] = icmp ugt i64 [[TMP3206]], [[TMP3207]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP4447]], label [[IF_THEN4449:%.*]], label [[IF_END4450:%.*]]
+// SIMD-ONLY0:       if.then4449:
+// SIMD-ONLY0-NEXT:    [[TMP3208:%.*]] = load i64, ptr [[ULE]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3208]], ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END4450]]
+// SIMD-ONLY0:       if.end4450:
+// SIMD-ONLY0-NEXT:    [[TMP3209:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3209]], ptr [[ULV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3210:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3211:%.*]] = load i64, ptr [[ULE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP4451:%.*]] = icmp ugt i64 [[TMP3210]], [[TMP3211]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP4451]], label [[IF_THEN4453:%.*]], label [[IF_END4454:%.*]]
+// SIMD-ONLY0:       if.then4453:
+// SIMD-ONLY0-NEXT:    [[TMP3212:%.*]] = load i64, ptr [[ULE]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3212]], ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END4454]]
+// SIMD-ONLY0:       if.end4454:
+// SIMD-ONLY0-NEXT:    [[TMP3213:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3213]], ptr [[ULV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3214:%.*]] = load i64, ptr [[ULE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3215:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP4455:%.*]] = icmp ult i64 [[TMP3214]], [[TMP3215]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP4455]], label [[IF_THEN4457:%.*]], label [[IF_END4458:%.*]]
+// SIMD-ONLY0:       if.then4457:
+// SIMD-ONLY0-NEXT:    [[TMP3216:%.*]] = load i64, ptr [[ULE]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3216]], ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END4458]]
+// SIMD-ONLY0:       if.end4458:
+// SIMD-ONLY0-NEXT:    [[TMP3217:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3217]], ptr [[ULV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3218:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3219:%.*]] = load i64, ptr [[ULE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP4459:%.*]] = icmp ult i64 [[TMP3218]], [[TMP3219]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP4459]], label [[IF_THEN4461:%.*]], label [[IF_END4462:%.*]]
+// SIMD-ONLY0:       if.then4461:
+// SIMD-ONLY0-NEXT:    [[TMP3220:%.*]] = load i64, ptr [[ULE]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3220]], ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END4462]]
+// SIMD-ONLY0:       if.end4462:
+// SIMD-ONLY0-NEXT:    [[TMP3221:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3221]], ptr [[ULV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3222:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3223:%.*]] = load i64, ptr [[ULE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP4463:%.*]] = icmp eq i64 [[TMP3222]], [[TMP3223]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP4463]], label [[IF_THEN4465:%.*]], label [[IF_END4466:%.*]]
+// SIMD-ONLY0:       if.then4465:
+// SIMD-ONLY0-NEXT:    [[TMP3224:%.*]] = load i64, ptr [[ULD]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3224]], ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END4466]]
+// SIMD-ONLY0:       if.end4466:
+// SIMD-ONLY0-NEXT:    [[TMP3225:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3225]], ptr [[ULV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3226:%.*]] = load i64, ptr [[ULE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3227:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP4467:%.*]] = icmp eq i64 [[TMP3226]], [[TMP3227]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP4467]], label [[IF_THEN4469:%.*]], label [[IF_END4470:%.*]]
+// SIMD-ONLY0:       if.then4469:
+// SIMD-ONLY0-NEXT:    [[TMP3228:%.*]] = load i64, ptr [[ULD]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3228]], ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END4470]]
+// SIMD-ONLY0:       if.end4470:
+// SIMD-ONLY0-NEXT:    [[TMP3229:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3229]], ptr [[ULV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3230:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3231:%.*]] = load i64, ptr [[ULE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP4471:%.*]] = icmp eq i64 [[TMP3230]], [[TMP3231]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP4471]], label [[IF_THEN4473:%.*]], label [[IF_ELSE4474:%.*]]
+// SIMD-ONLY0:       if.then4473:
+// SIMD-ONLY0-NEXT:    [[TMP3232:%.*]] = load i64, ptr [[ULD]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3232]], ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END4475:%.*]]
+// SIMD-ONLY0:       if.else4474:
+// SIMD-ONLY0-NEXT:    [[TMP3233:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3233]], ptr [[ULV]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END4475]]
+// SIMD-ONLY0:       if.end4475:
+// SIMD-ONLY0-NEXT:    [[TMP3234:%.*]] = load i64, ptr [[ULE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3235:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP4476:%.*]] = icmp eq i64 [[TMP3234]], [[TMP3235]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP4476]], label [[IF_THEN4478:%.*]], label [[IF_ELSE4479:%.*]]
+// SIMD-ONLY0:       if.then4478:
+// SIMD-ONLY0-NEXT:    [[TMP3236:%.*]] = load i64, ptr [[ULD]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3236]], ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END4480:%.*]]
+// SIMD-ONLY0:       if.else4479:
+// SIMD-ONLY0-NEXT:    [[TMP3237:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3237]], ptr [[ULV]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END4480]]
+// SIMD-ONLY0:       if.end4480:
+// SIMD-ONLY0-NEXT:    [[TMP3238:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3239:%.*]] = load i64, ptr [[ULE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP4481:%.*]] = icmp eq i64 [[TMP3238]], [[TMP3239]]
+// SIMD-ONLY0-NEXT:    [[CONV4482:%.*]] = zext i1 [[CMP4481]] to i32
+// SIMD-ONLY0-NEXT:    [[CONV4483:%.*]] = sext i32 [[CONV4482]] to i64
+// SIMD-ONLY0-NEXT:    store i64 [[CONV4483]], ptr [[ULR]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3240:%.*]] = load i64, ptr [[ULR]], align 8
+// SIMD-ONLY0-NEXT:    [[TOBOOL4484:%.*]] = icmp ne i64 [[TMP3240]], 0
+// SIMD-ONLY0-NEXT:    br i1 [[TOBOOL4484]], label [[IF_THEN4485:%.*]], label [[IF_END4486:%.*]]
+// SIMD-ONLY0:       if.then4485:
+// SIMD-ONLY0-NEXT:    [[TMP3241:%.*]] = load i64, ptr [[ULD]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3241]], ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END4486]]
+// SIMD-ONLY0:       if.end4486:
+// SIMD-ONLY0-NEXT:    [[TMP3242:%.*]] = load i64, ptr [[ULE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3243:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP4487:%.*]] = icmp eq i64 [[TMP3242]], [[TMP3243]]
+// SIMD-ONLY0-NEXT:    [[CONV4488:%.*]] = zext i1 [[CMP4487]] to i32
+// SIMD-ONLY0-NEXT:    [[CONV4489:%.*]] = sext i32 [[CONV4488]] to i64
+// SIMD-ONLY0-NEXT:    store i64 [[CONV4489]], ptr [[ULR]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3244:%.*]] = load i64, ptr [[ULR]], align 8
+// SIMD-ONLY0-NEXT:    [[TOBOOL4490:%.*]] = icmp ne i64 [[TMP3244]], 0
+// SIMD-ONLY0-NEXT:    br i1 [[TOBOOL4490]], label [[IF_THEN4491:%.*]], label [[IF_END4492:%.*]]
+// SIMD-ONLY0:       if.then4491:
+// SIMD-ONLY0-NEXT:    [[TMP3245:%.*]] = load i64, ptr [[ULD]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3245]], ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END4492]]
+// SIMD-ONLY0:       if.end4492:
+// SIMD-ONLY0-NEXT:    [[TMP3246:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3247:%.*]] = load i64, ptr [[ULE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP4493:%.*]] = icmp eq i64 [[TMP3246]], [[TMP3247]]
+// SIMD-ONLY0-NEXT:    [[CONV4494:%.*]] = zext i1 [[CMP4493]] to i32
+// SIMD-ONLY0-NEXT:    [[CONV4495:%.*]] = sext i32 [[CONV4494]] to i64
+// SIMD-ONLY0-NEXT:    store i64 [[CONV4495]], ptr [[ULR]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3248:%.*]] = load i64, ptr [[ULR]], align 8
+// SIMD-ONLY0-NEXT:    [[TOBOOL4496:%.*]] = icmp ne i64 [[TMP3248]], 0
+// SIMD-ONLY0-NEXT:    br i1 [[TOBOOL4496]], label [[IF_THEN4497:%.*]], label [[IF_ELSE4498:%.*]]
+// SIMD-ONLY0:       if.then4497:
+// SIMD-ONLY0-NEXT:    [[TMP3249:%.*]] = load i64, ptr [[ULD]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3249]], ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END4499:%.*]]
+// SIMD-ONLY0:       if.else4498:
+// SIMD-ONLY0-NEXT:    [[TMP3250:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3250]], ptr [[ULV]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END4499]]
+// SIMD-ONLY0:       if.end4499:
+// SIMD-ONLY0-NEXT:    [[TMP3251:%.*]] = load i64, ptr [[ULE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3252:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP4500:%.*]] = icmp eq i64 [[TMP3251]], [[TMP3252]]
+// SIMD-ONLY0-NEXT:    [[CONV4501:%.*]] = zext i1 [[CMP4500]] to i32
+// SIMD-ONLY0-NEXT:    [[CONV4502:%.*]] = sext i32 [[CONV4501]] to i64
+// SIMD-ONLY0-NEXT:    store i64 [[CONV4502]], ptr [[ULR]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3253:%.*]] = load i64, ptr [[ULR]], align 8
+// SIMD-ONLY0-NEXT:    [[TOBOOL4503:%.*]] = icmp ne i64 [[TMP3253]], 0
+// SIMD-ONLY0-NEXT:    br i1 [[TOBOOL4503]], label [[IF_THEN4504:%.*]], label [[IF_ELSE4505:%.*]]
+// SIMD-ONLY0:       if.then4504:
+// SIMD-ONLY0-NEXT:    [[TMP3254:%.*]] = load i64, ptr [[ULD]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3254]], ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END4506:%.*]]
+// SIMD-ONLY0:       if.else4505:
+// SIMD-ONLY0-NEXT:    [[TMP3255:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3255]], ptr [[ULV]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END4506]]
+// SIMD-ONLY0:       if.end4506:
+// SIMD-ONLY0-NEXT:    [[TMP3256:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3256]], ptr [[ULV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3257:%.*]] = load i64, ptr [[ULE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3258:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP4507:%.*]] = icmp ugt i64 [[TMP3257]], [[TMP3258]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP4507]], label [[IF_THEN4509:%.*]], label [[IF_END4510:%.*]]
+// SIMD-ONLY0:       if.then4509:
+// SIMD-ONLY0-NEXT:    [[TMP3259:%.*]] = load i64, ptr [[ULE]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3259]], ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END4510]]
+// SIMD-ONLY0:       if.end4510:
+// SIMD-ONLY0-NEXT:    [[TMP3260:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3260]], ptr [[ULV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3261:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3262:%.*]] = load i64, ptr [[ULE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP4511:%.*]] = icmp ugt i64 [[TMP3261]], [[TMP3262]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP4511]], label [[IF_THEN4513:%.*]], label [[IF_END4514:%.*]]
+// SIMD-ONLY0:       if.then4513:
+// SIMD-ONLY0-NEXT:    [[TMP3263:%.*]] = load i64, ptr [[ULE]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3263]], ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END4514]]
+// SIMD-ONLY0:       if.end4514:
+// SIMD-ONLY0-NEXT:    [[TMP3264:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3264]], ptr [[ULV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3265:%.*]] = load i64, ptr [[ULE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3266:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP4515:%.*]] = icmp ult i64 [[TMP3265]], [[TMP3266]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP4515]], label [[IF_THEN4517:%.*]], label [[IF_END4518:%.*]]
+// SIMD-ONLY0:       if.then4517:
+// SIMD-ONLY0-NEXT:    [[TMP3267:%.*]] = load i64, ptr [[ULE]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3267]], ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END4518]]
+// SIMD-ONLY0:       if.end4518:
+// SIMD-ONLY0-NEXT:    [[TMP3268:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3268]], ptr [[ULV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3269:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3270:%.*]] = load i64, ptr [[ULE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP4519:%.*]] = icmp ult i64 [[TMP3269]], [[TMP3270]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP4519]], label [[IF_THEN4521:%.*]], label [[IF_END4522:%.*]]
+// SIMD-ONLY0:       if.then4521:
+// SIMD-ONLY0-NEXT:    [[TMP3271:%.*]] = load i64, ptr [[ULE]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3271]], ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END4522]]
+// SIMD-ONLY0:       if.end4522:
+// SIMD-ONLY0-NEXT:    [[TMP3272:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3272]], ptr [[ULV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3273:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3274:%.*]] = load i64, ptr [[ULE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP4523:%.*]] = icmp eq i64 [[TMP3273]], [[TMP3274]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP4523]], label [[IF_THEN4525:%.*]], label [[IF_END4526:%.*]]
+// SIMD-ONLY0:       if.then4525:
+// SIMD-ONLY0-NEXT:    [[TMP3275:%.*]] = load i64, ptr [[ULD]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3275]], ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END4526]]
+// SIMD-ONLY0:       if.end4526:
+// SIMD-ONLY0-NEXT:    [[TMP3276:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3276]], ptr [[ULV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3277:%.*]] = load i64, ptr [[ULE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3278:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP4527:%.*]] = icmp eq i64 [[TMP3277]], [[TMP3278]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP4527]], label [[IF_THEN4529:%.*]], label [[IF_END4530:%.*]]
+// SIMD-ONLY0:       if.then4529:
+// SIMD-ONLY0-NEXT:    [[TMP3279:%.*]] = load i64, ptr [[ULD]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3279]], ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END4530]]
+// SIMD-ONLY0:       if.end4530:
+// SIMD-ONLY0-NEXT:    [[TMP3280:%.*]] = load i64, ptr [[ULE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3281:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP4531:%.*]] = icmp ugt i64 [[TMP3280]], [[TMP3281]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP4531]], label [[IF_THEN4533:%.*]], label [[IF_END4534:%.*]]
+// SIMD-ONLY0:       if.then4533:
+// SIMD-ONLY0-NEXT:    [[TMP3282:%.*]] = load i64, ptr [[ULE]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3282]], ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END4534]]
+// SIMD-ONLY0:       if.end4534:
+// SIMD-ONLY0-NEXT:    [[TMP3283:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3283]], ptr [[ULV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3284:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3285:%.*]] = load i64, ptr [[ULE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP4535:%.*]] = icmp ugt i64 [[TMP3284]], [[TMP3285]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP4535]], label [[IF_THEN4537:%.*]], label [[IF_END4538:%.*]]
+// SIMD-ONLY0:       if.then4537:
+// SIMD-ONLY0-NEXT:    [[TMP3286:%.*]] = load i64, ptr [[ULE]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3286]], ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END4538]]
+// SIMD-ONLY0:       if.end4538:
+// SIMD-ONLY0-NEXT:    [[TMP3287:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3287]], ptr [[ULV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3288:%.*]] = load i64, ptr [[ULE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3289:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP4539:%.*]] = icmp ult i64 [[TMP3288]], [[TMP3289]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP4539]], label [[IF_THEN4541:%.*]], label [[IF_END4542:%.*]]
+// SIMD-ONLY0:       if.then4541:
+// SIMD-ONLY0-NEXT:    [[TMP3290:%.*]] = load i64, ptr [[ULE]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3290]], ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END4542]]
+// SIMD-ONLY0:       if.end4542:
+// SIMD-ONLY0-NEXT:    [[TMP3291:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3291]], ptr [[ULV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3292:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3293:%.*]] = load i64, ptr [[ULE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP4543:%.*]] = icmp ult i64 [[TMP3292]], [[TMP3293]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP4543]], label [[IF_THEN4545:%.*]], label [[IF_END4546:%.*]]
+// SIMD-ONLY0:       if.then4545:
+// SIMD-ONLY0-NEXT:    [[TMP3294:%.*]] = load i64, ptr [[ULE]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3294]], ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END4546]]
+// SIMD-ONLY0:       if.end4546:
+// SIMD-ONLY0-NEXT:    [[TMP3295:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3295]], ptr [[ULV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3296:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3297:%.*]] = load i64, ptr [[ULE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP4547:%.*]] = icmp eq i64 [[TMP3296]], [[TMP3297]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP4547]], label [[IF_THEN4549:%.*]], label [[IF_END4550:%.*]]
+// SIMD-ONLY0:       if.then4549:
+// SIMD-ONLY0-NEXT:    [[TMP3298:%.*]] = load i64, ptr [[ULD]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3298]], ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END4550]]
+// SIMD-ONLY0:       if.end4550:
+// SIMD-ONLY0-NEXT:    [[TMP3299:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3299]], ptr [[ULV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3300:%.*]] = load i64, ptr [[ULE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3301:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP4551:%.*]] = icmp eq i64 [[TMP3300]], [[TMP3301]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP4551]], label [[IF_THEN4553:%.*]], label [[IF_END4554:%.*]]
+// SIMD-ONLY0:       if.then4553:
+// SIMD-ONLY0-NEXT:    [[TMP3302:%.*]] = load i64, ptr [[ULD]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3302]], ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END4554]]
+// SIMD-ONLY0:       if.end4554:
+// SIMD-ONLY0-NEXT:    [[TMP3303:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3303]], ptr [[ULV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3304:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3305:%.*]] = load i64, ptr [[ULE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP4555:%.*]] = icmp eq i64 [[TMP3304]], [[TMP3305]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP4555]], label [[IF_THEN4557:%.*]], label [[IF_ELSE4558:%.*]]
+// SIMD-ONLY0:       if.then4557:
+// SIMD-ONLY0-NEXT:    [[TMP3306:%.*]] = load i64, ptr [[ULD]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3306]], ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END4559:%.*]]
+// SIMD-ONLY0:       if.else4558:
+// SIMD-ONLY0-NEXT:    [[TMP3307:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3307]], ptr [[ULV]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END4559]]
+// SIMD-ONLY0:       if.end4559:
+// SIMD-ONLY0-NEXT:    [[TMP3308:%.*]] = load i64, ptr [[ULE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3309:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP4560:%.*]] = icmp eq i64 [[TMP3308]], [[TMP3309]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP4560]], label [[IF_THEN4562:%.*]], label [[IF_ELSE4563:%.*]]
+// SIMD-ONLY0:       if.then4562:
+// SIMD-ONLY0-NEXT:    [[TMP3310:%.*]] = load i64, ptr [[ULD]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3310]], ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END4564:%.*]]
+// SIMD-ONLY0:       if.else4563:
+// SIMD-ONLY0-NEXT:    [[TMP3311:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3311]], ptr [[ULV]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END4564]]
+// SIMD-ONLY0:       if.end4564:
+// SIMD-ONLY0-NEXT:    [[TMP3312:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3313:%.*]] = load i64, ptr [[ULE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP4565:%.*]] = icmp eq i64 [[TMP3312]], [[TMP3313]]
+// SIMD-ONLY0-NEXT:    [[CONV4566:%.*]] = zext i1 [[CMP4565]] to i32
+// SIMD-ONLY0-NEXT:    [[CONV4567:%.*]] = sext i32 [[CONV4566]] to i64
+// SIMD-ONLY0-NEXT:    store i64 [[CONV4567]], ptr [[ULR]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3314:%.*]] = load i64, ptr [[ULR]], align 8
+// SIMD-ONLY0-NEXT:    [[TOBOOL4568:%.*]] = icmp ne i64 [[TMP3314]], 0
+// SIMD-ONLY0-NEXT:    br i1 [[TOBOOL4568]], label [[IF_THEN4569:%.*]], label [[IF_END4570:%.*]]
+// SIMD-ONLY0:       if.then4569:
+// SIMD-ONLY0-NEXT:    [[TMP3315:%.*]] = load i64, ptr [[ULD]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3315]], ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END4570]]
+// SIMD-ONLY0:       if.end4570:
+// SIMD-ONLY0-NEXT:    [[TMP3316:%.*]] = load i64, ptr [[ULE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3317:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP4571:%.*]] = icmp eq i64 [[TMP3316]], [[TMP3317]]
+// SIMD-ONLY0-NEXT:    [[CONV4572:%.*]] = zext i1 [[CMP4571]] to i32
+// SIMD-ONLY0-NEXT:    [[CONV4573:%.*]] = sext i32 [[CONV4572]] to i64
+// SIMD-ONLY0-NEXT:    store i64 [[CONV4573]], ptr [[ULR]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3318:%.*]] = load i64, ptr [[ULR]], align 8
+// SIMD-ONLY0-NEXT:    [[TOBOOL4574:%.*]] = icmp ne i64 [[TMP3318]], 0
+// SIMD-ONLY0-NEXT:    br i1 [[TOBOOL4574]], label [[IF_THEN4575:%.*]], label [[IF_END4576:%.*]]
+// SIMD-ONLY0:       if.then4575:
+// SIMD-ONLY0-NEXT:    [[TMP3319:%.*]] = load i64, ptr [[ULD]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3319]], ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END4576]]
+// SIMD-ONLY0:       if.end4576:
+// SIMD-ONLY0-NEXT:    [[TMP3320:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3321:%.*]] = load i64, ptr [[ULE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP4577:%.*]] = icmp eq i64 [[TMP3320]], [[TMP3321]]
+// SIMD-ONLY0-NEXT:    [[CONV4578:%.*]] = zext i1 [[CMP4577]] to i32
+// SIMD-ONLY0-NEXT:    [[CONV4579:%.*]] = sext i32 [[CONV4578]] to i64
+// SIMD-ONLY0-NEXT:    store i64 [[CONV4579]], ptr [[ULR]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3322:%.*]] = load i64, ptr [[ULR]], align 8
+// SIMD-ONLY0-NEXT:    [[TOBOOL4580:%.*]] = icmp ne i64 [[TMP3322]], 0
+// SIMD-ONLY0-NEXT:    br i1 [[TOBOOL4580]], label [[IF_THEN4581:%.*]], label [[IF_ELSE4582:%.*]]
+// SIMD-ONLY0:       if.then4581:
+// SIMD-ONLY0-NEXT:    [[TMP3323:%.*]] = load i64, ptr [[ULD]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3323]], ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END4583:%.*]]
+// SIMD-ONLY0:       if.else4582:
+// SIMD-ONLY0-NEXT:    [[TMP3324:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3324]], ptr [[ULV]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END4583]]
+// SIMD-ONLY0:       if.end4583:
+// SIMD-ONLY0-NEXT:    [[TMP3325:%.*]] = load i64, ptr [[ULE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3326:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP4584:%.*]] = icmp eq i64 [[TMP3325]], [[TMP3326]]
+// SIMD-ONLY0-NEXT:    [[CONV4585:%.*]] = zext i1 [[CMP4584]] to i32
+// SIMD-ONLY0-NEXT:    [[CONV4586:%.*]] = sext i32 [[CONV4585]] to i64
+// SIMD-ONLY0-NEXT:    store i64 [[CONV4586]], ptr [[ULR]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3327:%.*]] = load i64, ptr [[ULR]], align 8
+// SIMD-ONLY0-NEXT:    [[TOBOOL4587:%.*]] = icmp ne i64 [[TMP3327]], 0
+// SIMD-ONLY0-NEXT:    br i1 [[TOBOOL4587]], label [[IF_THEN4588:%.*]], label [[IF_ELSE4589:%.*]]
+// SIMD-ONLY0:       if.then4588:
+// SIMD-ONLY0-NEXT:    [[TMP3328:%.*]] = load i64, ptr [[ULD]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3328]], ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END4590:%.*]]
+// SIMD-ONLY0:       if.else4589:
+// SIMD-ONLY0-NEXT:    [[TMP3329:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3329]], ptr [[ULV]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END4590]]
+// SIMD-ONLY0:       if.end4590:
+// SIMD-ONLY0-NEXT:    [[TMP3330:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3330]], ptr [[ULV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3331:%.*]] = load i64, ptr [[ULE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3332:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP4591:%.*]] = icmp ugt i64 [[TMP3331]], [[TMP3332]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP4591]], label [[IF_THEN4593:%.*]], label [[IF_END4594:%.*]]
+// SIMD-ONLY0:       if.then4593:
+// SIMD-ONLY0-NEXT:    [[TMP3333:%.*]] = load i64, ptr [[ULE]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3333]], ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END4594]]
+// SIMD-ONLY0:       if.end4594:
+// SIMD-ONLY0-NEXT:    [[TMP3334:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3334]], ptr [[ULV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3335:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3336:%.*]] = load i64, ptr [[ULE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP4595:%.*]] = icmp ugt i64 [[TMP3335]], [[TMP3336]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP4595]], label [[IF_THEN4597:%.*]], label [[IF_END4598:%.*]]
+// SIMD-ONLY0:       if.then4597:
+// SIMD-ONLY0-NEXT:    [[TMP3337:%.*]] = load i64, ptr [[ULE]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3337]], ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END4598]]
+// SIMD-ONLY0:       if.end4598:
+// SIMD-ONLY0-NEXT:    [[TMP3338:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3338]], ptr [[ULV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3339:%.*]] = load i64, ptr [[ULE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3340:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP4599:%.*]] = icmp ult i64 [[TMP3339]], [[TMP3340]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP4599]], label [[IF_THEN4601:%.*]], label [[IF_END4602:%.*]]
+// SIMD-ONLY0:       if.then4601:
+// SIMD-ONLY0-NEXT:    [[TMP3341:%.*]] = load i64, ptr [[ULE]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3341]], ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END4602]]
+// SIMD-ONLY0:       if.end4602:
+// SIMD-ONLY0-NEXT:    [[TMP3342:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3342]], ptr [[ULV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3343:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3344:%.*]] = load i64, ptr [[ULE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP4603:%.*]] = icmp ult i64 [[TMP3343]], [[TMP3344]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP4603]], label [[IF_THEN4605:%.*]], label [[IF_END4606:%.*]]
+// SIMD-ONLY0:       if.then4605:
+// SIMD-ONLY0-NEXT:    [[TMP3345:%.*]] = load i64, ptr [[ULE]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3345]], ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END4606]]
+// SIMD-ONLY0:       if.end4606:
+// SIMD-ONLY0-NEXT:    [[TMP3346:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3346]], ptr [[ULV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3347:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3348:%.*]] = load i64, ptr [[ULE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP4607:%.*]] = icmp eq i64 [[TMP3347]], [[TMP3348]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP4607]], label [[IF_THEN4609:%.*]], label [[IF_END4610:%.*]]
+// SIMD-ONLY0:       if.then4609:
+// SIMD-ONLY0-NEXT:    [[TMP3349:%.*]] = load i64, ptr [[ULD]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3349]], ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END4610]]
+// SIMD-ONLY0:       if.end4610:
+// SIMD-ONLY0-NEXT:    [[TMP3350:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3350]], ptr [[ULV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3351:%.*]] = load i64, ptr [[ULE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3352:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP4611:%.*]] = icmp eq i64 [[TMP3351]], [[TMP3352]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP4611]], label [[IF_THEN4613:%.*]], label [[IF_END4614:%.*]]
+// SIMD-ONLY0:       if.then4613:
+// SIMD-ONLY0-NEXT:    [[TMP3353:%.*]] = load i64, ptr [[ULD]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3353]], ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END4614]]
+// SIMD-ONLY0:       if.end4614:
+// SIMD-ONLY0-NEXT:    [[TMP3354:%.*]] = load i64, ptr [[ULE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3355:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP4615:%.*]] = icmp ugt i64 [[TMP3354]], [[TMP3355]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP4615]], label [[IF_THEN4617:%.*]], label [[IF_END4618:%.*]]
+// SIMD-ONLY0:       if.then4617:
+// SIMD-ONLY0-NEXT:    [[TMP3356:%.*]] = load i64, ptr [[ULE]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3356]], ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END4618]]
+// SIMD-ONLY0:       if.end4618:
+// SIMD-ONLY0-NEXT:    [[TMP3357:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3357]], ptr [[ULV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3358:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3359:%.*]] = load i64, ptr [[ULE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP4619:%.*]] = icmp ugt i64 [[TMP3358]], [[TMP3359]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP4619]], label [[IF_THEN4621:%.*]], label [[IF_END4622:%.*]]
+// SIMD-ONLY0:       if.then4621:
+// SIMD-ONLY0-NEXT:    [[TMP3360:%.*]] = load i64, ptr [[ULE]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3360]], ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END4622]]
+// SIMD-ONLY0:       if.end4622:
+// SIMD-ONLY0-NEXT:    [[TMP3361:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3361]], ptr [[ULV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3362:%.*]] = load i64, ptr [[ULE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3363:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP4623:%.*]] = icmp ult i64 [[TMP3362]], [[TMP3363]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP4623]], label [[IF_THEN4625:%.*]], label [[IF_END4626:%.*]]
+// SIMD-ONLY0:       if.then4625:
+// SIMD-ONLY0-NEXT:    [[TMP3364:%.*]] = load i64, ptr [[ULE]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3364]], ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END4626]]
+// SIMD-ONLY0:       if.end4626:
+// SIMD-ONLY0-NEXT:    [[TMP3365:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3365]], ptr [[ULV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3366:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3367:%.*]] = load i64, ptr [[ULE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP4627:%.*]] = icmp ult i64 [[TMP3366]], [[TMP3367]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP4627]], label [[IF_THEN4629:%.*]], label [[IF_END4630:%.*]]
+// SIMD-ONLY0:       if.then4629:
+// SIMD-ONLY0-NEXT:    [[TMP3368:%.*]] = load i64, ptr [[ULE]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3368]], ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END4630]]
+// SIMD-ONLY0:       if.end4630:
+// SIMD-ONLY0-NEXT:    [[TMP3369:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3369]], ptr [[ULV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3370:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3371:%.*]] = load i64, ptr [[ULE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP4631:%.*]] = icmp eq i64 [[TMP3370]], [[TMP3371]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP4631]], label [[IF_THEN4633:%.*]], label [[IF_END4634:%.*]]
+// SIMD-ONLY0:       if.then4633:
+// SIMD-ONLY0-NEXT:    [[TMP3372:%.*]] = load i64, ptr [[ULD]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3372]], ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END4634]]
+// SIMD-ONLY0:       if.end4634:
+// SIMD-ONLY0-NEXT:    [[TMP3373:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3373]], ptr [[ULV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3374:%.*]] = load i64, ptr [[ULE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3375:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP4635:%.*]] = icmp eq i64 [[TMP3374]], [[TMP3375]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP4635]], label [[IF_THEN4637:%.*]], label [[IF_END4638:%.*]]
+// SIMD-ONLY0:       if.then4637:
+// SIMD-ONLY0-NEXT:    [[TMP3376:%.*]] = load i64, ptr [[ULD]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3376]], ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END4638]]
+// SIMD-ONLY0:       if.end4638:
+// SIMD-ONLY0-NEXT:    [[TMP3377:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3377]], ptr [[ULV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3378:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3379:%.*]] = load i64, ptr [[ULE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP4639:%.*]] = icmp eq i64 [[TMP3378]], [[TMP3379]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP4639]], label [[IF_THEN4641:%.*]], label [[IF_ELSE4642:%.*]]
+// SIMD-ONLY0:       if.then4641:
+// SIMD-ONLY0-NEXT:    [[TMP3380:%.*]] = load i64, ptr [[ULD]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3380]], ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END4643:%.*]]
+// SIMD-ONLY0:       if.else4642:
+// SIMD-ONLY0-NEXT:    [[TMP3381:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3381]], ptr [[ULV]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END4643]]
+// SIMD-ONLY0:       if.end4643:
+// SIMD-ONLY0-NEXT:    [[TMP3382:%.*]] = load i64, ptr [[ULE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3383:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP4644:%.*]] = icmp eq i64 [[TMP3382]], [[TMP3383]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP4644]], label [[IF_THEN4646:%.*]], label [[IF_ELSE4647:%.*]]
+// SIMD-ONLY0:       if.then4646:
+// SIMD-ONLY0-NEXT:    [[TMP3384:%.*]] = load i64, ptr [[ULD]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3384]], ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END4648:%.*]]
+// SIMD-ONLY0:       if.else4647:
+// SIMD-ONLY0-NEXT:    [[TMP3385:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3385]], ptr [[ULV]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END4648]]
+// SIMD-ONLY0:       if.end4648:
+// SIMD-ONLY0-NEXT:    [[TMP3386:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3387:%.*]] = load i64, ptr [[ULE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP4649:%.*]] = icmp eq i64 [[TMP3386]], [[TMP3387]]
+// SIMD-ONLY0-NEXT:    [[CONV4650:%.*]] = zext i1 [[CMP4649]] to i32
+// SIMD-ONLY0-NEXT:    [[CONV4651:%.*]] = sext i32 [[CONV4650]] to i64
+// SIMD-ONLY0-NEXT:    store i64 [[CONV4651]], ptr [[ULR]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3388:%.*]] = load i64, ptr [[ULR]], align 8
+// SIMD-ONLY0-NEXT:    [[TOBOOL4652:%.*]] = icmp ne i64 [[TMP3388]], 0
+// SIMD-ONLY0-NEXT:    br i1 [[TOBOOL4652]], label [[IF_THEN4653:%.*]], label [[IF_END4654:%.*]]
+// SIMD-ONLY0:       if.then4653:
+// SIMD-ONLY0-NEXT:    [[TMP3389:%.*]] = load i64, ptr [[ULD]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3389]], ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END4654]]
+// SIMD-ONLY0:       if.end4654:
+// SIMD-ONLY0-NEXT:    [[TMP3390:%.*]] = load i64, ptr [[ULE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3391:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP4655:%.*]] = icmp eq i64 [[TMP3390]], [[TMP3391]]
+// SIMD-ONLY0-NEXT:    [[CONV4656:%.*]] = zext i1 [[CMP4655]] to i32
+// SIMD-ONLY0-NEXT:    [[CONV4657:%.*]] = sext i32 [[CONV4656]] to i64
+// SIMD-ONLY0-NEXT:    store i64 [[CONV4657]], ptr [[ULR]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3392:%.*]] = load i64, ptr [[ULR]], align 8
+// SIMD-ONLY0-NEXT:    [[TOBOOL4658:%.*]] = icmp ne i64 [[TMP3392]], 0
+// SIMD-ONLY0-NEXT:    br i1 [[TOBOOL4658]], label [[IF_THEN4659:%.*]], label [[IF_END4660:%.*]]
+// SIMD-ONLY0:       if.then4659:
+// SIMD-ONLY0-NEXT:    [[TMP3393:%.*]] = load i64, ptr [[ULD]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3393]], ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END4660]]
+// SIMD-ONLY0:       if.end4660:
+// SIMD-ONLY0-NEXT:    [[TMP3394:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3395:%.*]] = load i64, ptr [[ULE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP4661:%.*]] = icmp eq i64 [[TMP3394]], [[TMP3395]]
+// SIMD-ONLY0-NEXT:    [[CONV4662:%.*]] = zext i1 [[CMP4661]] to i32
+// SIMD-ONLY0-NEXT:    [[CONV4663:%.*]] = sext i32 [[CONV4662]] to i64
+// SIMD-ONLY0-NEXT:    store i64 [[CONV4663]], ptr [[ULR]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3396:%.*]] = load i64, ptr [[ULR]], align 8
+// SIMD-ONLY0-NEXT:    [[TOBOOL4664:%.*]] = icmp ne i64 [[TMP3396]], 0
+// SIMD-ONLY0-NEXT:    br i1 [[TOBOOL4664]], label [[IF_THEN4665:%.*]], label [[IF_ELSE4666:%.*]]
+// SIMD-ONLY0:       if.then4665:
+// SIMD-ONLY0-NEXT:    [[TMP3397:%.*]] = load i64, ptr [[ULD]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3397]], ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END4667:%.*]]
+// SIMD-ONLY0:       if.else4666:
+// SIMD-ONLY0-NEXT:    [[TMP3398:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3398]], ptr [[ULV]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END4667]]
+// SIMD-ONLY0:       if.end4667:
+// SIMD-ONLY0-NEXT:    [[TMP3399:%.*]] = load i64, ptr [[ULE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3400:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP4668:%.*]] = icmp eq i64 [[TMP3399]], [[TMP3400]]
+// SIMD-ONLY0-NEXT:    [[CONV4669:%.*]] = zext i1 [[CMP4668]] to i32
+// SIMD-ONLY0-NEXT:    [[CONV4670:%.*]] = sext i32 [[CONV4669]] to i64
+// SIMD-ONLY0-NEXT:    store i64 [[CONV4670]], ptr [[ULR]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3401:%.*]] = load i64, ptr [[ULR]], align 8
+// SIMD-ONLY0-NEXT:    [[TOBOOL4671:%.*]] = icmp ne i64 [[TMP3401]], 0
+// SIMD-ONLY0-NEXT:    br i1 [[TOBOOL4671]], label [[IF_THEN4672:%.*]], label [[IF_ELSE4673:%.*]]
+// SIMD-ONLY0:       if.then4672:
+// SIMD-ONLY0-NEXT:    [[TMP3402:%.*]] = load i64, ptr [[ULD]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3402]], ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END4674:%.*]]
+// SIMD-ONLY0:       if.else4673:
+// SIMD-ONLY0-NEXT:    [[TMP3403:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3403]], ptr [[ULV]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END4674]]
+// SIMD-ONLY0:       if.end4674:
+// SIMD-ONLY0-NEXT:    [[TMP3404:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3404]], ptr [[ULV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3405:%.*]] = load i64, ptr [[ULE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3406:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP4675:%.*]] = icmp ugt i64 [[TMP3405]], [[TMP3406]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP4675]], label [[IF_THEN4677:%.*]], label [[IF_END4678:%.*]]
+// SIMD-ONLY0:       if.then4677:
+// SIMD-ONLY0-NEXT:    [[TMP3407:%.*]] = load i64, ptr [[ULE]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3407]], ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END4678]]
+// SIMD-ONLY0:       if.end4678:
+// SIMD-ONLY0-NEXT:    [[TMP3408:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3408]], ptr [[ULV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3409:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3410:%.*]] = load i64, ptr [[ULE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP4679:%.*]] = icmp ugt i64 [[TMP3409]], [[TMP3410]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP4679]], label [[IF_THEN4681:%.*]], label [[IF_END4682:%.*]]
+// SIMD-ONLY0:       if.then4681:
+// SIMD-ONLY0-NEXT:    [[TMP3411:%.*]] = load i64, ptr [[ULE]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3411]], ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END4682]]
+// SIMD-ONLY0:       if.end4682:
+// SIMD-ONLY0-NEXT:    [[TMP3412:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3412]], ptr [[ULV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3413:%.*]] = load i64, ptr [[ULE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3414:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP4683:%.*]] = icmp ult i64 [[TMP3413]], [[TMP3414]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP4683]], label [[IF_THEN4685:%.*]], label [[IF_END4686:%.*]]
+// SIMD-ONLY0:       if.then4685:
+// SIMD-ONLY0-NEXT:    [[TMP3415:%.*]] = load i64, ptr [[ULE]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3415]], ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END4686]]
+// SIMD-ONLY0:       if.end4686:
+// SIMD-ONLY0-NEXT:    [[TMP3416:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3416]], ptr [[ULV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3417:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3418:%.*]] = load i64, ptr [[ULE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP4687:%.*]] = icmp ult i64 [[TMP3417]], [[TMP3418]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP4687]], label [[IF_THEN4689:%.*]], label [[IF_END4690:%.*]]
+// SIMD-ONLY0:       if.then4689:
+// SIMD-ONLY0-NEXT:    [[TMP3419:%.*]] = load i64, ptr [[ULE]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3419]], ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END4690]]
+// SIMD-ONLY0:       if.end4690:
+// SIMD-ONLY0-NEXT:    [[TMP3420:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3420]], ptr [[ULV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3421:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3422:%.*]] = load i64, ptr [[ULE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP4691:%.*]] = icmp eq i64 [[TMP3421]], [[TMP3422]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP4691]], label [[IF_THEN4693:%.*]], label [[IF_END4694:%.*]]
+// SIMD-ONLY0:       if.then4693:
+// SIMD-ONLY0-NEXT:    [[TMP3423:%.*]] = load i64, ptr [[ULD]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3423]], ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END4694]]
+// SIMD-ONLY0:       if.end4694:
+// SIMD-ONLY0-NEXT:    [[TMP3424:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3424]], ptr [[ULV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3425:%.*]] = load i64, ptr [[ULE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3426:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP4695:%.*]] = icmp eq i64 [[TMP3425]], [[TMP3426]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP4695]], label [[IF_THEN4697:%.*]], label [[IF_END4698:%.*]]
+// SIMD-ONLY0:       if.then4697:
+// SIMD-ONLY0-NEXT:    [[TMP3427:%.*]] = load i64, ptr [[ULD]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3427]], ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END4698]]
+// SIMD-ONLY0:       if.end4698:
+// SIMD-ONLY0-NEXT:    [[TMP3428:%.*]] = load i64, ptr [[ULE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3429:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP4699:%.*]] = icmp ugt i64 [[TMP3428]], [[TMP3429]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP4699]], label [[IF_THEN4701:%.*]], label [[IF_END4702:%.*]]
+// SIMD-ONLY0:       if.then4701:
+// SIMD-ONLY0-NEXT:    [[TMP3430:%.*]] = load i64, ptr [[ULE]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3430]], ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END4702]]
+// SIMD-ONLY0:       if.end4702:
+// SIMD-ONLY0-NEXT:    [[TMP3431:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3431]], ptr [[ULV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3432:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3433:%.*]] = load i64, ptr [[ULE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP4703:%.*]] = icmp ugt i64 [[TMP3432]], [[TMP3433]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP4703]], label [[IF_THEN4705:%.*]], label [[IF_END4706:%.*]]
+// SIMD-ONLY0:       if.then4705:
+// SIMD-ONLY0-NEXT:    [[TMP3434:%.*]] = load i64, ptr [[ULE]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3434]], ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END4706]]
+// SIMD-ONLY0:       if.end4706:
+// SIMD-ONLY0-NEXT:    [[TMP3435:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3435]], ptr [[ULV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3436:%.*]] = load i64, ptr [[ULE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3437:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP4707:%.*]] = icmp ult i64 [[TMP3436]], [[TMP3437]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP4707]], label [[IF_THEN4709:%.*]], label [[IF_END4710:%.*]]
+// SIMD-ONLY0:       if.then4709:
+// SIMD-ONLY0-NEXT:    [[TMP3438:%.*]] = load i64, ptr [[ULE]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3438]], ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END4710]]
+// SIMD-ONLY0:       if.end4710:
+// SIMD-ONLY0-NEXT:    [[TMP3439:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3439]], ptr [[ULV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3440:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3441:%.*]] = load i64, ptr [[ULE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP4711:%.*]] = icmp ult i64 [[TMP3440]], [[TMP3441]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP4711]], label [[IF_THEN4713:%.*]], label [[IF_END4714:%.*]]
+// SIMD-ONLY0:       if.then4713:
+// SIMD-ONLY0-NEXT:    [[TMP3442:%.*]] = load i64, ptr [[ULE]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3442]], ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END4714]]
+// SIMD-ONLY0:       if.end4714:
+// SIMD-ONLY0-NEXT:    [[TMP3443:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3443]], ptr [[ULV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3444:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3445:%.*]] = load i64, ptr [[ULE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP4715:%.*]] = icmp eq i64 [[TMP3444]], [[TMP3445]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP4715]], label [[IF_THEN4717:%.*]], label [[IF_END4718:%.*]]
+// SIMD-ONLY0:       if.then4717:
+// SIMD-ONLY0-NEXT:    [[TMP3446:%.*]] = load i64, ptr [[ULD]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3446]], ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END4718]]
+// SIMD-ONLY0:       if.end4718:
+// SIMD-ONLY0-NEXT:    [[TMP3447:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3447]], ptr [[ULV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3448:%.*]] = load i64, ptr [[ULE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3449:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP4719:%.*]] = icmp eq i64 [[TMP3448]], [[TMP3449]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP4719]], label [[IF_THEN4721:%.*]], label [[IF_END4722:%.*]]
+// SIMD-ONLY0:       if.then4721:
+// SIMD-ONLY0-NEXT:    [[TMP3450:%.*]] = load i64, ptr [[ULD]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3450]], ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END4722]]
+// SIMD-ONLY0:       if.end4722:
+// SIMD-ONLY0-NEXT:    [[TMP3451:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3451]], ptr [[ULV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3452:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3453:%.*]] = load i64, ptr [[ULE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP4723:%.*]] = icmp eq i64 [[TMP3452]], [[TMP3453]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP4723]], label [[IF_THEN4725:%.*]], label [[IF_ELSE4726:%.*]]
+// SIMD-ONLY0:       if.then4725:
+// SIMD-ONLY0-NEXT:    [[TMP3454:%.*]] = load i64, ptr [[ULD]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3454]], ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END4727:%.*]]
+// SIMD-ONLY0:       if.else4726:
+// SIMD-ONLY0-NEXT:    [[TMP3455:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3455]], ptr [[ULV]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END4727]]
+// SIMD-ONLY0:       if.end4727:
+// SIMD-ONLY0-NEXT:    [[TMP3456:%.*]] = load i64, ptr [[ULE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3457:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP4728:%.*]] = icmp eq i64 [[TMP3456]], [[TMP3457]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP4728]], label [[IF_THEN4730:%.*]], label [[IF_ELSE4731:%.*]]
+// SIMD-ONLY0:       if.then4730:
+// SIMD-ONLY0-NEXT:    [[TMP3458:%.*]] = load i64, ptr [[ULD]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3458]], ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END4732:%.*]]
+// SIMD-ONLY0:       if.else4731:
+// SIMD-ONLY0-NEXT:    [[TMP3459:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3459]], ptr [[ULV]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END4732]]
+// SIMD-ONLY0:       if.end4732:
+// SIMD-ONLY0-NEXT:    [[TMP3460:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3461:%.*]] = load i64, ptr [[ULE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP4733:%.*]] = icmp eq i64 [[TMP3460]], [[TMP3461]]
+// SIMD-ONLY0-NEXT:    [[CONV4734:%.*]] = zext i1 [[CMP4733]] to i32
+// SIMD-ONLY0-NEXT:    [[CONV4735:%.*]] = sext i32 [[CONV4734]] to i64
+// SIMD-ONLY0-NEXT:    store i64 [[CONV4735]], ptr [[ULR]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3462:%.*]] = load i64, ptr [[ULR]], align 8
+// SIMD-ONLY0-NEXT:    [[TOBOOL4736:%.*]] = icmp ne i64 [[TMP3462]], 0
+// SIMD-ONLY0-NEXT:    br i1 [[TOBOOL4736]], label [[IF_THEN4737:%.*]], label [[IF_END4738:%.*]]
+// SIMD-ONLY0:       if.then4737:
+// SIMD-ONLY0-NEXT:    [[TMP3463:%.*]] = load i64, ptr [[ULD]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3463]], ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END4738]]
+// SIMD-ONLY0:       if.end4738:
+// SIMD-ONLY0-NEXT:    [[TMP3464:%.*]] = load i64, ptr [[ULE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3465:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP4739:%.*]] = icmp eq i64 [[TMP3464]], [[TMP3465]]
+// SIMD-ONLY0-NEXT:    [[CONV4740:%.*]] = zext i1 [[CMP4739]] to i32
+// SIMD-ONLY0-NEXT:    [[CONV4741:%.*]] = sext i32 [[CONV4740]] to i64
+// SIMD-ONLY0-NEXT:    store i64 [[CONV4741]], ptr [[ULR]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3466:%.*]] = load i64, ptr [[ULR]], align 8
+// SIMD-ONLY0-NEXT:    [[TOBOOL4742:%.*]] = icmp ne i64 [[TMP3466]], 0
+// SIMD-ONLY0-NEXT:    br i1 [[TOBOOL4742]], label [[IF_THEN4743:%.*]], label [[IF_END4744:%.*]]
+// SIMD-ONLY0:       if.then4743:
+// SIMD-ONLY0-NEXT:    [[TMP3467:%.*]] = load i64, ptr [[ULD]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3467]], ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END4744]]
+// SIMD-ONLY0:       if.end4744:
+// SIMD-ONLY0-NEXT:    [[TMP3468:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3469:%.*]] = load i64, ptr [[ULE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP4745:%.*]] = icmp eq i64 [[TMP3468]], [[TMP3469]]
+// SIMD-ONLY0-NEXT:    [[CONV4746:%.*]] = zext i1 [[CMP4745]] to i32
+// SIMD-ONLY0-NEXT:    [[CONV4747:%.*]] = sext i32 [[CONV4746]] to i64
+// SIMD-ONLY0-NEXT:    store i64 [[CONV4747]], ptr [[ULR]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3470:%.*]] = load i64, ptr [[ULR]], align 8
+// SIMD-ONLY0-NEXT:    [[TOBOOL4748:%.*]] = icmp ne i64 [[TMP3470]], 0
+// SIMD-ONLY0-NEXT:    br i1 [[TOBOOL4748]], label [[IF_THEN4749:%.*]], label [[IF_ELSE4750:%.*]]
+// SIMD-ONLY0:       if.then4749:
+// SIMD-ONLY0-NEXT:    [[TMP3471:%.*]] = load i64, ptr [[ULD]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3471]], ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END4751:%.*]]
+// SIMD-ONLY0:       if.else4750:
+// SIMD-ONLY0-NEXT:    [[TMP3472:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3472]], ptr [[ULV]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END4751]]
+// SIMD-ONLY0:       if.end4751:
+// SIMD-ONLY0-NEXT:    [[TMP3473:%.*]] = load i64, ptr [[ULE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3474:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP4752:%.*]] = icmp eq i64 [[TMP3473]], [[TMP3474]]
+// SIMD-ONLY0-NEXT:    [[CONV4753:%.*]] = zext i1 [[CMP4752]] to i32
+// SIMD-ONLY0-NEXT:    [[CONV4754:%.*]] = sext i32 [[CONV4753]] to i64
+// SIMD-ONLY0-NEXT:    store i64 [[CONV4754]], ptr [[ULR]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3475:%.*]] = load i64, ptr [[ULR]], align 8
+// SIMD-ONLY0-NEXT:    [[TOBOOL4755:%.*]] = icmp ne i64 [[TMP3475]], 0
+// SIMD-ONLY0-NEXT:    br i1 [[TOBOOL4755]], label [[IF_THEN4756:%.*]], label [[IF_ELSE4757:%.*]]
+// SIMD-ONLY0:       if.then4756:
+// SIMD-ONLY0-NEXT:    [[TMP3476:%.*]] = load i64, ptr [[ULD]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3476]], ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END4758:%.*]]
+// SIMD-ONLY0:       if.else4757:
+// SIMD-ONLY0-NEXT:    [[TMP3477:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3477]], ptr [[ULV]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END4758]]
+// SIMD-ONLY0:       if.end4758:
+// SIMD-ONLY0-NEXT:    [[TMP3478:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3478]], ptr [[ULV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3479:%.*]] = load i64, ptr [[ULE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3480:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP4759:%.*]] = icmp ugt i64 [[TMP3479]], [[TMP3480]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP4759]], label [[IF_THEN4761:%.*]], label [[IF_END4762:%.*]]
+// SIMD-ONLY0:       if.then4761:
+// SIMD-ONLY0-NEXT:    [[TMP3481:%.*]] = load i64, ptr [[ULE]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3481]], ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END4762]]
+// SIMD-ONLY0:       if.end4762:
+// SIMD-ONLY0-NEXT:    [[TMP3482:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3482]], ptr [[ULV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3483:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3484:%.*]] = load i64, ptr [[ULE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP4763:%.*]] = icmp ugt i64 [[TMP3483]], [[TMP3484]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP4763]], label [[IF_THEN4765:%.*]], label [[IF_END4766:%.*]]
+// SIMD-ONLY0:       if.then4765:
+// SIMD-ONLY0-NEXT:    [[TMP3485:%.*]] = load i64, ptr [[ULE]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3485]], ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END4766]]
+// SIMD-ONLY0:       if.end4766:
+// SIMD-ONLY0-NEXT:    [[TMP3486:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3486]], ptr [[ULV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3487:%.*]] = load i64, ptr [[ULE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3488:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP4767:%.*]] = icmp ult i64 [[TMP3487]], [[TMP3488]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP4767]], label [[IF_THEN4769:%.*]], label [[IF_END4770:%.*]]
+// SIMD-ONLY0:       if.then4769:
+// SIMD-ONLY0-NEXT:    [[TMP3489:%.*]] = load i64, ptr [[ULE]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3489]], ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END4770]]
+// SIMD-ONLY0:       if.end4770:
+// SIMD-ONLY0-NEXT:    [[TMP3490:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3490]], ptr [[ULV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3491:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3492:%.*]] = load i64, ptr [[ULE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP4771:%.*]] = icmp ult i64 [[TMP3491]], [[TMP3492]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP4771]], label [[IF_THEN4773:%.*]], label [[IF_END4774:%.*]]
+// SIMD-ONLY0:       if.then4773:
+// SIMD-ONLY0-NEXT:    [[TMP3493:%.*]] = load i64, ptr [[ULE]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3493]], ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END4774]]
+// SIMD-ONLY0:       if.end4774:
+// SIMD-ONLY0-NEXT:    [[TMP3494:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3494]], ptr [[ULV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3495:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3496:%.*]] = load i64, ptr [[ULE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP4775:%.*]] = icmp eq i64 [[TMP3495]], [[TMP3496]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP4775]], label [[IF_THEN4777:%.*]], label [[IF_END4778:%.*]]
+// SIMD-ONLY0:       if.then4777:
+// SIMD-ONLY0-NEXT:    [[TMP3497:%.*]] = load i64, ptr [[ULD]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3497]], ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END4778]]
+// SIMD-ONLY0:       if.end4778:
+// SIMD-ONLY0-NEXT:    [[TMP3498:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3498]], ptr [[ULV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3499:%.*]] = load i64, ptr [[ULE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3500:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP4779:%.*]] = icmp eq i64 [[TMP3499]], [[TMP3500]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP4779]], label [[IF_THEN4781:%.*]], label [[IF_END4782:%.*]]
+// SIMD-ONLY0:       if.then4781:
+// SIMD-ONLY0-NEXT:    [[TMP3501:%.*]] = load i64, ptr [[ULD]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3501]], ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END4782]]
+// SIMD-ONLY0:       if.end4782:
+// SIMD-ONLY0-NEXT:    [[TMP3502:%.*]] = load i64, ptr [[ULE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3503:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP4783:%.*]] = icmp ugt i64 [[TMP3502]], [[TMP3503]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP4783]], label [[IF_THEN4785:%.*]], label [[IF_END4786:%.*]]
+// SIMD-ONLY0:       if.then4785:
+// SIMD-ONLY0-NEXT:    [[TMP3504:%.*]] = load i64, ptr [[ULE]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3504]], ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END4786]]
+// SIMD-ONLY0:       if.end4786:
+// SIMD-ONLY0-NEXT:    [[TMP3505:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3505]], ptr [[ULV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3506:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3507:%.*]] = load i64, ptr [[ULE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP4787:%.*]] = icmp ugt i64 [[TMP3506]], [[TMP3507]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP4787]], label [[IF_THEN4789:%.*]], label [[IF_END4790:%.*]]
+// SIMD-ONLY0:       if.then4789:
+// SIMD-ONLY0-NEXT:    [[TMP3508:%.*]] = load i64, ptr [[ULE]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3508]], ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END4790]]
+// SIMD-ONLY0:       if.end4790:
+// SIMD-ONLY0-NEXT:    [[TMP3509:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3509]], ptr [[ULV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3510:%.*]] = load i64, ptr [[ULE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3511:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP4791:%.*]] = icmp ult i64 [[TMP3510]], [[TMP3511]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP4791]], label [[IF_THEN4793:%.*]], label [[IF_END4794:%.*]]
+// SIMD-ONLY0:       if.then4793:
+// SIMD-ONLY0-NEXT:    [[TMP3512:%.*]] = load i64, ptr [[ULE]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3512]], ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END4794]]
+// SIMD-ONLY0:       if.end4794:
+// SIMD-ONLY0-NEXT:    [[TMP3513:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3513]], ptr [[ULV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3514:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3515:%.*]] = load i64, ptr [[ULE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP4795:%.*]] = icmp ult i64 [[TMP3514]], [[TMP3515]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP4795]], label [[IF_THEN4797:%.*]], label [[IF_END4798:%.*]]
+// SIMD-ONLY0:       if.then4797:
+// SIMD-ONLY0-NEXT:    [[TMP3516:%.*]] = load i64, ptr [[ULE]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3516]], ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END4798]]
+// SIMD-ONLY0:       if.end4798:
+// SIMD-ONLY0-NEXT:    [[TMP3517:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3517]], ptr [[ULV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3518:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3519:%.*]] = load i64, ptr [[ULE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP4799:%.*]] = icmp eq i64 [[TMP3518]], [[TMP3519]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP4799]], label [[IF_THEN4801:%.*]], label [[IF_END4802:%.*]]
+// SIMD-ONLY0:       if.then4801:
+// SIMD-ONLY0-NEXT:    [[TMP3520:%.*]] = load i64, ptr [[ULD]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3520]], ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END4802]]
+// SIMD-ONLY0:       if.end4802:
+// SIMD-ONLY0-NEXT:    [[TMP3521:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3521]], ptr [[ULV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3522:%.*]] = load i64, ptr [[ULE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3523:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP4803:%.*]] = icmp eq i64 [[TMP3522]], [[TMP3523]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP4803]], label [[IF_THEN4805:%.*]], label [[IF_END4806:%.*]]
+// SIMD-ONLY0:       if.then4805:
+// SIMD-ONLY0-NEXT:    [[TMP3524:%.*]] = load i64, ptr [[ULD]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3524]], ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END4806]]
+// SIMD-ONLY0:       if.end4806:
+// SIMD-ONLY0-NEXT:    [[TMP3525:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3525]], ptr [[ULV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3526:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3527:%.*]] = load i64, ptr [[ULE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP4807:%.*]] = icmp eq i64 [[TMP3526]], [[TMP3527]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP4807]], label [[IF_THEN4809:%.*]], label [[IF_ELSE4810:%.*]]
+// SIMD-ONLY0:       if.then4809:
+// SIMD-ONLY0-NEXT:    [[TMP3528:%.*]] = load i64, ptr [[ULD]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3528]], ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END4811:%.*]]
+// SIMD-ONLY0:       if.else4810:
+// SIMD-ONLY0-NEXT:    [[TMP3529:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3529]], ptr [[ULV]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END4811]]
+// SIMD-ONLY0:       if.end4811:
+// SIMD-ONLY0-NEXT:    [[TMP3530:%.*]] = load i64, ptr [[ULE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3531:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP4812:%.*]] = icmp eq i64 [[TMP3530]], [[TMP3531]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP4812]], label [[IF_THEN4814:%.*]], label [[IF_ELSE4815:%.*]]
+// SIMD-ONLY0:       if.then4814:
+// SIMD-ONLY0-NEXT:    [[TMP3532:%.*]] = load i64, ptr [[ULD]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3532]], ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END4816:%.*]]
+// SIMD-ONLY0:       if.else4815:
+// SIMD-ONLY0-NEXT:    [[TMP3533:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3533]], ptr [[ULV]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END4816]]
+// SIMD-ONLY0:       if.end4816:
+// SIMD-ONLY0-NEXT:    [[TMP3534:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3535:%.*]] = load i64, ptr [[ULE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP4817:%.*]] = icmp eq i64 [[TMP3534]], [[TMP3535]]
+// SIMD-ONLY0-NEXT:    [[CONV4818:%.*]] = zext i1 [[CMP4817]] to i32
+// SIMD-ONLY0-NEXT:    [[CONV4819:%.*]] = sext i32 [[CONV4818]] to i64
+// SIMD-ONLY0-NEXT:    store i64 [[CONV4819]], ptr [[ULR]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3536:%.*]] = load i64, ptr [[ULR]], align 8
+// SIMD-ONLY0-NEXT:    [[TOBOOL4820:%.*]] = icmp ne i64 [[TMP3536]], 0
+// SIMD-ONLY0-NEXT:    br i1 [[TOBOOL4820]], label [[IF_THEN4821:%.*]], label [[IF_END4822:%.*]]
+// SIMD-ONLY0:       if.then4821:
+// SIMD-ONLY0-NEXT:    [[TMP3537:%.*]] = load i64, ptr [[ULD]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3537]], ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END4822]]
+// SIMD-ONLY0:       if.end4822:
+// SIMD-ONLY0-NEXT:    [[TMP3538:%.*]] = load i64, ptr [[ULE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3539:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP4823:%.*]] = icmp eq i64 [[TMP3538]], [[TMP3539]]
+// SIMD-ONLY0-NEXT:    [[CONV4824:%.*]] = zext i1 [[CMP4823]] to i32
+// SIMD-ONLY0-NEXT:    [[CONV4825:%.*]] = sext i32 [[CONV4824]] to i64
+// SIMD-ONLY0-NEXT:    store i64 [[CONV4825]], ptr [[ULR]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3540:%.*]] = load i64, ptr [[ULR]], align 8
+// SIMD-ONLY0-NEXT:    [[TOBOOL4826:%.*]] = icmp ne i64 [[TMP3540]], 0
+// SIMD-ONLY0-NEXT:    br i1 [[TOBOOL4826]], label [[IF_THEN4827:%.*]], label [[IF_END4828:%.*]]
+// SIMD-ONLY0:       if.then4827:
+// SIMD-ONLY0-NEXT:    [[TMP3541:%.*]] = load i64, ptr [[ULD]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3541]], ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END4828]]
+// SIMD-ONLY0:       if.end4828:
+// SIMD-ONLY0-NEXT:    [[TMP3542:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3543:%.*]] = load i64, ptr [[ULE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP4829:%.*]] = icmp eq i64 [[TMP3542]], [[TMP3543]]
+// SIMD-ONLY0-NEXT:    [[CONV4830:%.*]] = zext i1 [[CMP4829]] to i32
+// SIMD-ONLY0-NEXT:    [[CONV4831:%.*]] = sext i32 [[CONV4830]] to i64
+// SIMD-ONLY0-NEXT:    store i64 [[CONV4831]], ptr [[ULR]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3544:%.*]] = load i64, ptr [[ULR]], align 8
+// SIMD-ONLY0-NEXT:    [[TOBOOL4832:%.*]] = icmp ne i64 [[TMP3544]], 0
+// SIMD-ONLY0-NEXT:    br i1 [[TOBOOL4832]], label [[IF_THEN4833:%.*]], label [[IF_ELSE4834:%.*]]
+// SIMD-ONLY0:       if.then4833:
+// SIMD-ONLY0-NEXT:    [[TMP3545:%.*]] = load i64, ptr [[ULD]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3545]], ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END4835:%.*]]
+// SIMD-ONLY0:       if.else4834:
+// SIMD-ONLY0-NEXT:    [[TMP3546:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3546]], ptr [[ULV]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END4835]]
+// SIMD-ONLY0:       if.end4835:
+// SIMD-ONLY0-NEXT:    [[TMP3547:%.*]] = load i64, ptr [[ULE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3548:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP4836:%.*]] = icmp eq i64 [[TMP3547]], [[TMP3548]]
+// SIMD-ONLY0-NEXT:    [[CONV4837:%.*]] = zext i1 [[CMP4836]] to i32
+// SIMD-ONLY0-NEXT:    [[CONV4838:%.*]] = sext i32 [[CONV4837]] to i64
+// SIMD-ONLY0-NEXT:    store i64 [[CONV4838]], ptr [[ULR]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3549:%.*]] = load i64, ptr [[ULR]], align 8
+// SIMD-ONLY0-NEXT:    [[TOBOOL4839:%.*]] = icmp ne i64 [[TMP3549]], 0
+// SIMD-ONLY0-NEXT:    br i1 [[TOBOOL4839]], label [[IF_THEN4840:%.*]], label [[IF_ELSE4841:%.*]]
+// SIMD-ONLY0:       if.then4840:
+// SIMD-ONLY0-NEXT:    [[TMP3550:%.*]] = load i64, ptr [[ULD]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3550]], ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END4842:%.*]]
+// SIMD-ONLY0:       if.else4841:
+// SIMD-ONLY0-NEXT:    [[TMP3551:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3551]], ptr [[ULV]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END4842]]
+// SIMD-ONLY0:       if.end4842:
+// SIMD-ONLY0-NEXT:    [[TMP3552:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3552]], ptr [[LLV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3553:%.*]] = load i64, ptr [[LLE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3554:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP4843:%.*]] = icmp sgt i64 [[TMP3553]], [[TMP3554]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP4843]], label [[IF_THEN4845:%.*]], label [[IF_END4846:%.*]]
+// SIMD-ONLY0:       if.then4845:
+// SIMD-ONLY0-NEXT:    [[TMP3555:%.*]] = load i64, ptr [[LLE]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3555]], ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END4846]]
+// SIMD-ONLY0:       if.end4846:
+// SIMD-ONLY0-NEXT:    [[TMP3556:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3556]], ptr [[LLV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3557:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3558:%.*]] = load i64, ptr [[LLE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP4847:%.*]] = icmp sgt i64 [[TMP3557]], [[TMP3558]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP4847]], label [[IF_THEN4849:%.*]], label [[IF_END4850:%.*]]
+// SIMD-ONLY0:       if.then4849:
+// SIMD-ONLY0-NEXT:    [[TMP3559:%.*]] = load i64, ptr [[LLE]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3559]], ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END4850]]
+// SIMD-ONLY0:       if.end4850:
+// SIMD-ONLY0-NEXT:    [[TMP3560:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3560]], ptr [[LLV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3561:%.*]] = load i64, ptr [[LLE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3562:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP4851:%.*]] = icmp slt i64 [[TMP3561]], [[TMP3562]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP4851]], label [[IF_THEN4853:%.*]], label [[IF_END4854:%.*]]
+// SIMD-ONLY0:       if.then4853:
+// SIMD-ONLY0-NEXT:    [[TMP3563:%.*]] = load i64, ptr [[LLE]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3563]], ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END4854]]
+// SIMD-ONLY0:       if.end4854:
+// SIMD-ONLY0-NEXT:    [[TMP3564:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3564]], ptr [[LLV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3565:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3566:%.*]] = load i64, ptr [[LLE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP4855:%.*]] = icmp slt i64 [[TMP3565]], [[TMP3566]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP4855]], label [[IF_THEN4857:%.*]], label [[IF_END4858:%.*]]
+// SIMD-ONLY0:       if.then4857:
+// SIMD-ONLY0-NEXT:    [[TMP3567:%.*]] = load i64, ptr [[LLE]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3567]], ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END4858]]
+// SIMD-ONLY0:       if.end4858:
+// SIMD-ONLY0-NEXT:    [[TMP3568:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3568]], ptr [[LLV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3569:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3570:%.*]] = load i64, ptr [[LLE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP4859:%.*]] = icmp eq i64 [[TMP3569]], [[TMP3570]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP4859]], label [[IF_THEN4861:%.*]], label [[IF_END4862:%.*]]
+// SIMD-ONLY0:       if.then4861:
+// SIMD-ONLY0-NEXT:    [[TMP3571:%.*]] = load i64, ptr [[LLD]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3571]], ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END4862]]
+// SIMD-ONLY0:       if.end4862:
+// SIMD-ONLY0-NEXT:    [[TMP3572:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3572]], ptr [[LLV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3573:%.*]] = load i64, ptr [[LLE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3574:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP4863:%.*]] = icmp eq i64 [[TMP3573]], [[TMP3574]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP4863]], label [[IF_THEN4865:%.*]], label [[IF_END4866:%.*]]
+// SIMD-ONLY0:       if.then4865:
+// SIMD-ONLY0-NEXT:    [[TMP3575:%.*]] = load i64, ptr [[LLD]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3575]], ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END4866]]
+// SIMD-ONLY0:       if.end4866:
+// SIMD-ONLY0-NEXT:    [[TMP3576:%.*]] = load i64, ptr [[LLE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3577:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP4867:%.*]] = icmp sgt i64 [[TMP3576]], [[TMP3577]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP4867]], label [[IF_THEN4869:%.*]], label [[IF_END4870:%.*]]
+// SIMD-ONLY0:       if.then4869:
+// SIMD-ONLY0-NEXT:    [[TMP3578:%.*]] = load i64, ptr [[LLE]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3578]], ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END4870]]
+// SIMD-ONLY0:       if.end4870:
+// SIMD-ONLY0-NEXT:    [[TMP3579:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3579]], ptr [[LLV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3580:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3581:%.*]] = load i64, ptr [[LLE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP4871:%.*]] = icmp sgt i64 [[TMP3580]], [[TMP3581]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP4871]], label [[IF_THEN4873:%.*]], label [[IF_END4874:%.*]]
+// SIMD-ONLY0:       if.then4873:
+// SIMD-ONLY0-NEXT:    [[TMP3582:%.*]] = load i64, ptr [[LLE]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3582]], ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END4874]]
+// SIMD-ONLY0:       if.end4874:
+// SIMD-ONLY0-NEXT:    [[TMP3583:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3583]], ptr [[LLV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3584:%.*]] = load i64, ptr [[LLE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3585:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP4875:%.*]] = icmp slt i64 [[TMP3584]], [[TMP3585]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP4875]], label [[IF_THEN4877:%.*]], label [[IF_END4878:%.*]]
+// SIMD-ONLY0:       if.then4877:
+// SIMD-ONLY0-NEXT:    [[TMP3586:%.*]] = load i64, ptr [[LLE]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3586]], ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END4878]]
+// SIMD-ONLY0:       if.end4878:
+// SIMD-ONLY0-NEXT:    [[TMP3587:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3587]], ptr [[LLV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3588:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3589:%.*]] = load i64, ptr [[LLE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP4879:%.*]] = icmp slt i64 [[TMP3588]], [[TMP3589]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP4879]], label [[IF_THEN4881:%.*]], label [[IF_END4882:%.*]]
+// SIMD-ONLY0:       if.then4881:
+// SIMD-ONLY0-NEXT:    [[TMP3590:%.*]] = load i64, ptr [[LLE]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3590]], ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END4882]]
+// SIMD-ONLY0:       if.end4882:
+// SIMD-ONLY0-NEXT:    [[TMP3591:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3591]], ptr [[LLV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3592:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3593:%.*]] = load i64, ptr [[LLE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP4883:%.*]] = icmp eq i64 [[TMP3592]], [[TMP3593]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP4883]], label [[IF_THEN4885:%.*]], label [[IF_END4886:%.*]]
+// SIMD-ONLY0:       if.then4885:
+// SIMD-ONLY0-NEXT:    [[TMP3594:%.*]] = load i64, ptr [[LLD]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3594]], ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END4886]]
+// SIMD-ONLY0:       if.end4886:
+// SIMD-ONLY0-NEXT:    [[TMP3595:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3595]], ptr [[LLV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3596:%.*]] = load i64, ptr [[LLE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3597:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP4887:%.*]] = icmp eq i64 [[TMP3596]], [[TMP3597]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP4887]], label [[IF_THEN4889:%.*]], label [[IF_END4890:%.*]]
+// SIMD-ONLY0:       if.then4889:
+// SIMD-ONLY0-NEXT:    [[TMP3598:%.*]] = load i64, ptr [[LLD]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3598]], ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END4890]]
+// SIMD-ONLY0:       if.end4890:
+// SIMD-ONLY0-NEXT:    [[TMP3599:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3599]], ptr [[LLV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3600:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3601:%.*]] = load i64, ptr [[LLE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP4891:%.*]] = icmp eq i64 [[TMP3600]], [[TMP3601]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP4891]], label [[IF_THEN4893:%.*]], label [[IF_ELSE4894:%.*]]
+// SIMD-ONLY0:       if.then4893:
+// SIMD-ONLY0-NEXT:    [[TMP3602:%.*]] = load i64, ptr [[LLD]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3602]], ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END4895:%.*]]
+// SIMD-ONLY0:       if.else4894:
+// SIMD-ONLY0-NEXT:    [[TMP3603:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3603]], ptr [[LLV]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END4895]]
+// SIMD-ONLY0:       if.end4895:
+// SIMD-ONLY0-NEXT:    [[TMP3604:%.*]] = load i64, ptr [[LLE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3605:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP4896:%.*]] = icmp eq i64 [[TMP3604]], [[TMP3605]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP4896]], label [[IF_THEN4898:%.*]], label [[IF_ELSE4899:%.*]]
+// SIMD-ONLY0:       if.then4898:
+// SIMD-ONLY0-NEXT:    [[TMP3606:%.*]] = load i64, ptr [[LLD]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3606]], ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END4900:%.*]]
+// SIMD-ONLY0:       if.else4899:
+// SIMD-ONLY0-NEXT:    [[TMP3607:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3607]], ptr [[LLV]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END4900]]
+// SIMD-ONLY0:       if.end4900:
+// SIMD-ONLY0-NEXT:    [[TMP3608:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3609:%.*]] = load i64, ptr [[LLE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP4901:%.*]] = icmp eq i64 [[TMP3608]], [[TMP3609]]
+// SIMD-ONLY0-NEXT:    [[CONV4902:%.*]] = zext i1 [[CMP4901]] to i32
+// SIMD-ONLY0-NEXT:    [[CONV4903:%.*]] = sext i32 [[CONV4902]] to i64
+// SIMD-ONLY0-NEXT:    store i64 [[CONV4903]], ptr [[LLR]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3610:%.*]] = load i64, ptr [[LLR]], align 8
+// SIMD-ONLY0-NEXT:    [[TOBOOL4904:%.*]] = icmp ne i64 [[TMP3610]], 0
+// SIMD-ONLY0-NEXT:    br i1 [[TOBOOL4904]], label [[IF_THEN4905:%.*]], label [[IF_END4906:%.*]]
+// SIMD-ONLY0:       if.then4905:
+// SIMD-ONLY0-NEXT:    [[TMP3611:%.*]] = load i64, ptr [[LLD]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3611]], ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END4906]]
+// SIMD-ONLY0:       if.end4906:
+// SIMD-ONLY0-NEXT:    [[TMP3612:%.*]] = load i64, ptr [[LLE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3613:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP4907:%.*]] = icmp eq i64 [[TMP3612]], [[TMP3613]]
+// SIMD-ONLY0-NEXT:    [[CONV4908:%.*]] = zext i1 [[CMP4907]] to i32
+// SIMD-ONLY0-NEXT:    [[CONV4909:%.*]] = sext i32 [[CONV4908]] to i64
+// SIMD-ONLY0-NEXT:    store i64 [[CONV4909]], ptr [[LLR]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3614:%.*]] = load i64, ptr [[LLR]], align 8
+// SIMD-ONLY0-NEXT:    [[TOBOOL4910:%.*]] = icmp ne i64 [[TMP3614]], 0
+// SIMD-ONLY0-NEXT:    br i1 [[TOBOOL4910]], label [[IF_THEN4911:%.*]], label [[IF_END4912:%.*]]
+// SIMD-ONLY0:       if.then4911:
+// SIMD-ONLY0-NEXT:    [[TMP3615:%.*]] = load i64, ptr [[LLD]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3615]], ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END4912]]
+// SIMD-ONLY0:       if.end4912:
+// SIMD-ONLY0-NEXT:    [[TMP3616:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3617:%.*]] = load i64, ptr [[LLE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP4913:%.*]] = icmp eq i64 [[TMP3616]], [[TMP3617]]
+// SIMD-ONLY0-NEXT:    [[CONV4914:%.*]] = zext i1 [[CMP4913]] to i32
+// SIMD-ONLY0-NEXT:    [[CONV4915:%.*]] = sext i32 [[CONV4914]] to i64
+// SIMD-ONLY0-NEXT:    store i64 [[CONV4915]], ptr [[LLR]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3618:%.*]] = load i64, ptr [[LLR]], align 8
+// SIMD-ONLY0-NEXT:    [[TOBOOL4916:%.*]] = icmp ne i64 [[TMP3618]], 0
+// SIMD-ONLY0-NEXT:    br i1 [[TOBOOL4916]], label [[IF_THEN4917:%.*]], label [[IF_ELSE4918:%.*]]
+// SIMD-ONLY0:       if.then4917:
+// SIMD-ONLY0-NEXT:    [[TMP3619:%.*]] = load i64, ptr [[LLD]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3619]], ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END4919:%.*]]
+// SIMD-ONLY0:       if.else4918:
+// SIMD-ONLY0-NEXT:    [[TMP3620:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3620]], ptr [[LLV]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END4919]]
+// SIMD-ONLY0:       if.end4919:
+// SIMD-ONLY0-NEXT:    [[TMP3621:%.*]] = load i64, ptr [[LLE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3622:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP4920:%.*]] = icmp eq i64 [[TMP3621]], [[TMP3622]]
+// SIMD-ONLY0-NEXT:    [[CONV4921:%.*]] = zext i1 [[CMP4920]] to i32
+// SIMD-ONLY0-NEXT:    [[CONV4922:%.*]] = sext i32 [[CONV4921]] to i64
+// SIMD-ONLY0-NEXT:    store i64 [[CONV4922]], ptr [[LLR]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3623:%.*]] = load i64, ptr [[LLR]], align 8
+// SIMD-ONLY0-NEXT:    [[TOBOOL4923:%.*]] = icmp ne i64 [[TMP3623]], 0
+// SIMD-ONLY0-NEXT:    br i1 [[TOBOOL4923]], label [[IF_THEN4924:%.*]], label [[IF_ELSE4925:%.*]]
+// SIMD-ONLY0:       if.then4924:
+// SIMD-ONLY0-NEXT:    [[TMP3624:%.*]] = load i64, ptr [[LLD]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3624]], ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END4926:%.*]]
+// SIMD-ONLY0:       if.else4925:
+// SIMD-ONLY0-NEXT:    [[TMP3625:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3625]], ptr [[LLV]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END4926]]
+// SIMD-ONLY0:       if.end4926:
+// SIMD-ONLY0-NEXT:    [[TMP3626:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3626]], ptr [[LLV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3627:%.*]] = load i64, ptr [[LLE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3628:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP4927:%.*]] = icmp sgt i64 [[TMP3627]], [[TMP3628]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP4927]], label [[IF_THEN4929:%.*]], label [[IF_END4930:%.*]]
+// SIMD-ONLY0:       if.then4929:
+// SIMD-ONLY0-NEXT:    [[TMP3629:%.*]] = load i64, ptr [[LLE]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3629]], ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END4930]]
+// SIMD-ONLY0:       if.end4930:
+// SIMD-ONLY0-NEXT:    [[TMP3630:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3630]], ptr [[LLV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3631:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3632:%.*]] = load i64, ptr [[LLE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP4931:%.*]] = icmp sgt i64 [[TMP3631]], [[TMP3632]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP4931]], label [[IF_THEN4933:%.*]], label [[IF_END4934:%.*]]
+// SIMD-ONLY0:       if.then4933:
+// SIMD-ONLY0-NEXT:    [[TMP3633:%.*]] = load i64, ptr [[LLE]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3633]], ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END4934]]
+// SIMD-ONLY0:       if.end4934:
+// SIMD-ONLY0-NEXT:    [[TMP3634:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3634]], ptr [[LLV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3635:%.*]] = load i64, ptr [[LLE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3636:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP4935:%.*]] = icmp slt i64 [[TMP3635]], [[TMP3636]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP4935]], label [[IF_THEN4937:%.*]], label [[IF_END4938:%.*]]
+// SIMD-ONLY0:       if.then4937:
+// SIMD-ONLY0-NEXT:    [[TMP3637:%.*]] = load i64, ptr [[LLE]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3637]], ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END4938]]
+// SIMD-ONLY0:       if.end4938:
+// SIMD-ONLY0-NEXT:    [[TMP3638:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3638]], ptr [[LLV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3639:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3640:%.*]] = load i64, ptr [[LLE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP4939:%.*]] = icmp slt i64 [[TMP3639]], [[TMP3640]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP4939]], label [[IF_THEN4941:%.*]], label [[IF_END4942:%.*]]
+// SIMD-ONLY0:       if.then4941:
+// SIMD-ONLY0-NEXT:    [[TMP3641:%.*]] = load i64, ptr [[LLE]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3641]], ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END4942]]
+// SIMD-ONLY0:       if.end4942:
+// SIMD-ONLY0-NEXT:    [[TMP3642:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3642]], ptr [[LLV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3643:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3644:%.*]] = load i64, ptr [[LLE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP4943:%.*]] = icmp eq i64 [[TMP3643]], [[TMP3644]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP4943]], label [[IF_THEN4945:%.*]], label [[IF_END4946:%.*]]
+// SIMD-ONLY0:       if.then4945:
+// SIMD-ONLY0-NEXT:    [[TMP3645:%.*]] = load i64, ptr [[LLD]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3645]], ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END4946]]
+// SIMD-ONLY0:       if.end4946:
+// SIMD-ONLY0-NEXT:    [[TMP3646:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3646]], ptr [[LLV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3647:%.*]] = load i64, ptr [[LLE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3648:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP4947:%.*]] = icmp eq i64 [[TMP3647]], [[TMP3648]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP4947]], label [[IF_THEN4949:%.*]], label [[IF_END4950:%.*]]
+// SIMD-ONLY0:       if.then4949:
+// SIMD-ONLY0-NEXT:    [[TMP3649:%.*]] = load i64, ptr [[LLD]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3649]], ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END4950]]
+// SIMD-ONLY0:       if.end4950:
+// SIMD-ONLY0-NEXT:    [[TMP3650:%.*]] = load i64, ptr [[LLE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3651:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP4951:%.*]] = icmp sgt i64 [[TMP3650]], [[TMP3651]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP4951]], label [[IF_THEN4953:%.*]], label [[IF_END4954:%.*]]
+// SIMD-ONLY0:       if.then4953:
+// SIMD-ONLY0-NEXT:    [[TMP3652:%.*]] = load i64, ptr [[LLE]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3652]], ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END4954]]
+// SIMD-ONLY0:       if.end4954:
+// SIMD-ONLY0-NEXT:    [[TMP3653:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3653]], ptr [[LLV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3654:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3655:%.*]] = load i64, ptr [[LLE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP4955:%.*]] = icmp sgt i64 [[TMP3654]], [[TMP3655]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP4955]], label [[IF_THEN4957:%.*]], label [[IF_END4958:%.*]]
+// SIMD-ONLY0:       if.then4957:
+// SIMD-ONLY0-NEXT:    [[TMP3656:%.*]] = load i64, ptr [[LLE]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3656]], ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END4958]]
+// SIMD-ONLY0:       if.end4958:
+// SIMD-ONLY0-NEXT:    [[TMP3657:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3657]], ptr [[LLV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3658:%.*]] = load i64, ptr [[LLE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3659:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP4959:%.*]] = icmp slt i64 [[TMP3658]], [[TMP3659]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP4959]], label [[IF_THEN4961:%.*]], label [[IF_END4962:%.*]]
+// SIMD-ONLY0:       if.then4961:
+// SIMD-ONLY0-NEXT:    [[TMP3660:%.*]] = load i64, ptr [[LLE]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3660]], ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END4962]]
+// SIMD-ONLY0:       if.end4962:
+// SIMD-ONLY0-NEXT:    [[TMP3661:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3661]], ptr [[LLV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3662:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3663:%.*]] = load i64, ptr [[LLE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP4963:%.*]] = icmp slt i64 [[TMP3662]], [[TMP3663]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP4963]], label [[IF_THEN4965:%.*]], label [[IF_END4966:%.*]]
+// SIMD-ONLY0:       if.then4965:
+// SIMD-ONLY0-NEXT:    [[TMP3664:%.*]] = load i64, ptr [[LLE]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3664]], ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END4966]]
+// SIMD-ONLY0:       if.end4966:
+// SIMD-ONLY0-NEXT:    [[TMP3665:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3665]], ptr [[LLV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3666:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3667:%.*]] = load i64, ptr [[LLE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP4967:%.*]] = icmp eq i64 [[TMP3666]], [[TMP3667]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP4967]], label [[IF_THEN4969:%.*]], label [[IF_END4970:%.*]]
+// SIMD-ONLY0:       if.then4969:
+// SIMD-ONLY0-NEXT:    [[TMP3668:%.*]] = load i64, ptr [[LLD]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3668]], ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END4970]]
+// SIMD-ONLY0:       if.end4970:
+// SIMD-ONLY0-NEXT:    [[TMP3669:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3669]], ptr [[LLV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3670:%.*]] = load i64, ptr [[LLE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3671:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP4971:%.*]] = icmp eq i64 [[TMP3670]], [[TMP3671]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP4971]], label [[IF_THEN4973:%.*]], label [[IF_END4974:%.*]]
+// SIMD-ONLY0:       if.then4973:
+// SIMD-ONLY0-NEXT:    [[TMP3672:%.*]] = load i64, ptr [[LLD]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3672]], ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END4974]]
+// SIMD-ONLY0:       if.end4974:
+// SIMD-ONLY0-NEXT:    [[TMP3673:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3673]], ptr [[LLV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3674:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3675:%.*]] = load i64, ptr [[LLE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP4975:%.*]] = icmp eq i64 [[TMP3674]], [[TMP3675]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP4975]], label [[IF_THEN4977:%.*]], label [[IF_ELSE4978:%.*]]
+// SIMD-ONLY0:       if.then4977:
+// SIMD-ONLY0-NEXT:    [[TMP3676:%.*]] = load i64, ptr [[LLD]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3676]], ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END4979:%.*]]
+// SIMD-ONLY0:       if.else4978:
+// SIMD-ONLY0-NEXT:    [[TMP3677:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3677]], ptr [[LLV]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END4979]]
+// SIMD-ONLY0:       if.end4979:
+// SIMD-ONLY0-NEXT:    [[TMP3678:%.*]] = load i64, ptr [[LLE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3679:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP4980:%.*]] = icmp eq i64 [[TMP3678]], [[TMP3679]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP4980]], label [[IF_THEN4982:%.*]], label [[IF_ELSE4983:%.*]]
+// SIMD-ONLY0:       if.then4982:
+// SIMD-ONLY0-NEXT:    [[TMP3680:%.*]] = load i64, ptr [[LLD]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3680]], ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END4984:%.*]]
+// SIMD-ONLY0:       if.else4983:
+// SIMD-ONLY0-NEXT:    [[TMP3681:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3681]], ptr [[LLV]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END4984]]
+// SIMD-ONLY0:       if.end4984:
+// SIMD-ONLY0-NEXT:    [[TMP3682:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3683:%.*]] = load i64, ptr [[LLE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP4985:%.*]] = icmp eq i64 [[TMP3682]], [[TMP3683]]
+// SIMD-ONLY0-NEXT:    [[CONV4986:%.*]] = zext i1 [[CMP4985]] to i32
+// SIMD-ONLY0-NEXT:    [[CONV4987:%.*]] = sext i32 [[CONV4986]] to i64
+// SIMD-ONLY0-NEXT:    store i64 [[CONV4987]], ptr [[LLR]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3684:%.*]] = load i64, ptr [[LLR]], align 8
+// SIMD-ONLY0-NEXT:    [[TOBOOL4988:%.*]] = icmp ne i64 [[TMP3684]], 0
+// SIMD-ONLY0-NEXT:    br i1 [[TOBOOL4988]], label [[IF_THEN4989:%.*]], label [[IF_END4990:%.*]]
+// SIMD-ONLY0:       if.then4989:
+// SIMD-ONLY0-NEXT:    [[TMP3685:%.*]] = load i64, ptr [[LLD]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3685]], ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END4990]]
+// SIMD-ONLY0:       if.end4990:
+// SIMD-ONLY0-NEXT:    [[TMP3686:%.*]] = load i64, ptr [[LLE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3687:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP4991:%.*]] = icmp eq i64 [[TMP3686]], [[TMP3687]]
+// SIMD-ONLY0-NEXT:    [[CONV4992:%.*]] = zext i1 [[CMP4991]] to i32
+// SIMD-ONLY0-NEXT:    [[CONV4993:%.*]] = sext i32 [[CONV4992]] to i64
+// SIMD-ONLY0-NEXT:    store i64 [[CONV4993]], ptr [[LLR]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3688:%.*]] = load i64, ptr [[LLR]], align 8
+// SIMD-ONLY0-NEXT:    [[TOBOOL4994:%.*]] = icmp ne i64 [[TMP3688]], 0
+// SIMD-ONLY0-NEXT:    br i1 [[TOBOOL4994]], label [[IF_THEN4995:%.*]], label [[IF_END4996:%.*]]
+// SIMD-ONLY0:       if.then4995:
+// SIMD-ONLY0-NEXT:    [[TMP3689:%.*]] = load i64, ptr [[LLD]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3689]], ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END4996]]
+// SIMD-ONLY0:       if.end4996:
+// SIMD-ONLY0-NEXT:    [[TMP3690:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3691:%.*]] = load i64, ptr [[LLE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP4997:%.*]] = icmp eq i64 [[TMP3690]], [[TMP3691]]
+// SIMD-ONLY0-NEXT:    [[CONV4998:%.*]] = zext i1 [[CMP4997]] to i32
+// SIMD-ONLY0-NEXT:    [[CONV4999:%.*]] = sext i32 [[CONV4998]] to i64
+// SIMD-ONLY0-NEXT:    store i64 [[CONV4999]], ptr [[LLR]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3692:%.*]] = load i64, ptr [[LLR]], align 8
+// SIMD-ONLY0-NEXT:    [[TOBOOL5000:%.*]] = icmp ne i64 [[TMP3692]], 0
+// SIMD-ONLY0-NEXT:    br i1 [[TOBOOL5000]], label [[IF_THEN5001:%.*]], label [[IF_ELSE5002:%.*]]
+// SIMD-ONLY0:       if.then5001:
+// SIMD-ONLY0-NEXT:    [[TMP3693:%.*]] = load i64, ptr [[LLD]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3693]], ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END5003:%.*]]
+// SIMD-ONLY0:       if.else5002:
+// SIMD-ONLY0-NEXT:    [[TMP3694:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3694]], ptr [[LLV]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END5003]]
+// SIMD-ONLY0:       if.end5003:
+// SIMD-ONLY0-NEXT:    [[TMP3695:%.*]] = load i64, ptr [[LLE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3696:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP5004:%.*]] = icmp eq i64 [[TMP3695]], [[TMP3696]]
+// SIMD-ONLY0-NEXT:    [[CONV5005:%.*]] = zext i1 [[CMP5004]] to i32
+// SIMD-ONLY0-NEXT:    [[CONV5006:%.*]] = sext i32 [[CONV5005]] to i64
+// SIMD-ONLY0-NEXT:    store i64 [[CONV5006]], ptr [[LLR]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3697:%.*]] = load i64, ptr [[LLR]], align 8
+// SIMD-ONLY0-NEXT:    [[TOBOOL5007:%.*]] = icmp ne i64 [[TMP3697]], 0
+// SIMD-ONLY0-NEXT:    br i1 [[TOBOOL5007]], label [[IF_THEN5008:%.*]], label [[IF_ELSE5009:%.*]]
+// SIMD-ONLY0:       if.then5008:
+// SIMD-ONLY0-NEXT:    [[TMP3698:%.*]] = load i64, ptr [[LLD]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3698]], ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END5010:%.*]]
+// SIMD-ONLY0:       if.else5009:
+// SIMD-ONLY0-NEXT:    [[TMP3699:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3699]], ptr [[LLV]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END5010]]
+// SIMD-ONLY0:       if.end5010:
+// SIMD-ONLY0-NEXT:    [[TMP3700:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3700]], ptr [[LLV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3701:%.*]] = load i64, ptr [[LLE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3702:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP5011:%.*]] = icmp sgt i64 [[TMP3701]], [[TMP3702]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP5011]], label [[IF_THEN5013:%.*]], label [[IF_END5014:%.*]]
+// SIMD-ONLY0:       if.then5013:
+// SIMD-ONLY0-NEXT:    [[TMP3703:%.*]] = load i64, ptr [[LLE]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3703]], ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END5014]]
+// SIMD-ONLY0:       if.end5014:
+// SIMD-ONLY0-NEXT:    [[TMP3704:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3704]], ptr [[LLV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3705:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3706:%.*]] = load i64, ptr [[LLE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP5015:%.*]] = icmp sgt i64 [[TMP3705]], [[TMP3706]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP5015]], label [[IF_THEN5017:%.*]], label [[IF_END5018:%.*]]
+// SIMD-ONLY0:       if.then5017:
+// SIMD-ONLY0-NEXT:    [[TMP3707:%.*]] = load i64, ptr [[LLE]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3707]], ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END5018]]
+// SIMD-ONLY0:       if.end5018:
+// SIMD-ONLY0-NEXT:    [[TMP3708:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3708]], ptr [[LLV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3709:%.*]] = load i64, ptr [[LLE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3710:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP5019:%.*]] = icmp slt i64 [[TMP3709]], [[TMP3710]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP5019]], label [[IF_THEN5021:%.*]], label [[IF_END5022:%.*]]
+// SIMD-ONLY0:       if.then5021:
+// SIMD-ONLY0-NEXT:    [[TMP3711:%.*]] = load i64, ptr [[LLE]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3711]], ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END5022]]
+// SIMD-ONLY0:       if.end5022:
+// SIMD-ONLY0-NEXT:    [[TMP3712:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3712]], ptr [[LLV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3713:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3714:%.*]] = load i64, ptr [[LLE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP5023:%.*]] = icmp slt i64 [[TMP3713]], [[TMP3714]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP5023]], label [[IF_THEN5025:%.*]], label [[IF_END5026:%.*]]
+// SIMD-ONLY0:       if.then5025:
+// SIMD-ONLY0-NEXT:    [[TMP3715:%.*]] = load i64, ptr [[LLE]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3715]], ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END5026]]
+// SIMD-ONLY0:       if.end5026:
+// SIMD-ONLY0-NEXT:    [[TMP3716:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3716]], ptr [[LLV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3717:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3718:%.*]] = load i64, ptr [[LLE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP5027:%.*]] = icmp eq i64 [[TMP3717]], [[TMP3718]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP5027]], label [[IF_THEN5029:%.*]], label [[IF_END5030:%.*]]
+// SIMD-ONLY0:       if.then5029:
+// SIMD-ONLY0-NEXT:    [[TMP3719:%.*]] = load i64, ptr [[LLD]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3719]], ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END5030]]
+// SIMD-ONLY0:       if.end5030:
+// SIMD-ONLY0-NEXT:    [[TMP3720:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3720]], ptr [[LLV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3721:%.*]] = load i64, ptr [[LLE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3722:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP5031:%.*]] = icmp eq i64 [[TMP3721]], [[TMP3722]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP5031]], label [[IF_THEN5033:%.*]], label [[IF_END5034:%.*]]
+// SIMD-ONLY0:       if.then5033:
+// SIMD-ONLY0-NEXT:    [[TMP3723:%.*]] = load i64, ptr [[LLD]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3723]], ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END5034]]
+// SIMD-ONLY0:       if.end5034:
+// SIMD-ONLY0-NEXT:    [[TMP3724:%.*]] = load i64, ptr [[LLE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3725:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP5035:%.*]] = icmp sgt i64 [[TMP3724]], [[TMP3725]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP5035]], label [[IF_THEN5037:%.*]], label [[IF_END5038:%.*]]
+// SIMD-ONLY0:       if.then5037:
+// SIMD-ONLY0-NEXT:    [[TMP3726:%.*]] = load i64, ptr [[LLE]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3726]], ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END5038]]
+// SIMD-ONLY0:       if.end5038:
+// SIMD-ONLY0-NEXT:    [[TMP3727:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3727]], ptr [[LLV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3728:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3729:%.*]] = load i64, ptr [[LLE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP5039:%.*]] = icmp sgt i64 [[TMP3728]], [[TMP3729]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP5039]], label [[IF_THEN5041:%.*]], label [[IF_END5042:%.*]]
+// SIMD-ONLY0:       if.then5041:
+// SIMD-ONLY0-NEXT:    [[TMP3730:%.*]] = load i64, ptr [[LLE]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3730]], ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END5042]]
+// SIMD-ONLY0:       if.end5042:
+// SIMD-ONLY0-NEXT:    [[TMP3731:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3731]], ptr [[LLV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3732:%.*]] = load i64, ptr [[LLE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3733:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP5043:%.*]] = icmp slt i64 [[TMP3732]], [[TMP3733]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP5043]], label [[IF_THEN5045:%.*]], label [[IF_END5046:%.*]]
+// SIMD-ONLY0:       if.then5045:
+// SIMD-ONLY0-NEXT:    [[TMP3734:%.*]] = load i64, ptr [[LLE]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3734]], ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END5046]]
+// SIMD-ONLY0:       if.end5046:
+// SIMD-ONLY0-NEXT:    [[TMP3735:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3735]], ptr [[LLV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3736:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3737:%.*]] = load i64, ptr [[LLE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP5047:%.*]] = icmp slt i64 [[TMP3736]], [[TMP3737]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP5047]], label [[IF_THEN5049:%.*]], label [[IF_END5050:%.*]]
+// SIMD-ONLY0:       if.then5049:
+// SIMD-ONLY0-NEXT:    [[TMP3738:%.*]] = load i64, ptr [[LLE]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3738]], ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END5050]]
+// SIMD-ONLY0:       if.end5050:
+// SIMD-ONLY0-NEXT:    [[TMP3739:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3739]], ptr [[LLV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3740:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3741:%.*]] = load i64, ptr [[LLE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP5051:%.*]] = icmp eq i64 [[TMP3740]], [[TMP3741]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP5051]], label [[IF_THEN5053:%.*]], label [[IF_END5054:%.*]]
+// SIMD-ONLY0:       if.then5053:
+// SIMD-ONLY0-NEXT:    [[TMP3742:%.*]] = load i64, ptr [[LLD]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3742]], ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END5054]]
+// SIMD-ONLY0:       if.end5054:
+// SIMD-ONLY0-NEXT:    [[TMP3743:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3743]], ptr [[LLV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3744:%.*]] = load i64, ptr [[LLE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3745:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP5055:%.*]] = icmp eq i64 [[TMP3744]], [[TMP3745]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP5055]], label [[IF_THEN5057:%.*]], label [[IF_END5058:%.*]]
+// SIMD-ONLY0:       if.then5057:
+// SIMD-ONLY0-NEXT:    [[TMP3746:%.*]] = load i64, ptr [[LLD]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3746]], ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END5058]]
+// SIMD-ONLY0:       if.end5058:
+// SIMD-ONLY0-NEXT:    [[TMP3747:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3747]], ptr [[LLV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3748:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3749:%.*]] = load i64, ptr [[LLE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP5059:%.*]] = icmp eq i64 [[TMP3748]], [[TMP3749]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP5059]], label [[IF_THEN5061:%.*]], label [[IF_ELSE5062:%.*]]
+// SIMD-ONLY0:       if.then5061:
+// SIMD-ONLY0-NEXT:    [[TMP3750:%.*]] = load i64, ptr [[LLD]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3750]], ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END5063:%.*]]
+// SIMD-ONLY0:       if.else5062:
+// SIMD-ONLY0-NEXT:    [[TMP3751:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3751]], ptr [[LLV]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END5063]]
+// SIMD-ONLY0:       if.end5063:
+// SIMD-ONLY0-NEXT:    [[TMP3752:%.*]] = load i64, ptr [[LLE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3753:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP5064:%.*]] = icmp eq i64 [[TMP3752]], [[TMP3753]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP5064]], label [[IF_THEN5066:%.*]], label [[IF_ELSE5067:%.*]]
+// SIMD-ONLY0:       if.then5066:
+// SIMD-ONLY0-NEXT:    [[TMP3754:%.*]] = load i64, ptr [[LLD]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3754]], ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END5068:%.*]]
+// SIMD-ONLY0:       if.else5067:
+// SIMD-ONLY0-NEXT:    [[TMP3755:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3755]], ptr [[LLV]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END5068]]
+// SIMD-ONLY0:       if.end5068:
+// SIMD-ONLY0-NEXT:    [[TMP3756:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3757:%.*]] = load i64, ptr [[LLE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP5069:%.*]] = icmp eq i64 [[TMP3756]], [[TMP3757]]
+// SIMD-ONLY0-NEXT:    [[CONV5070:%.*]] = zext i1 [[CMP5069]] to i32
+// SIMD-ONLY0-NEXT:    [[CONV5071:%.*]] = sext i32 [[CONV5070]] to i64
+// SIMD-ONLY0-NEXT:    store i64 [[CONV5071]], ptr [[LLR]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3758:%.*]] = load i64, ptr [[LLR]], align 8
+// SIMD-ONLY0-NEXT:    [[TOBOOL5072:%.*]] = icmp ne i64 [[TMP3758]], 0
+// SIMD-ONLY0-NEXT:    br i1 [[TOBOOL5072]], label [[IF_THEN5073:%.*]], label [[IF_END5074:%.*]]
+// SIMD-ONLY0:       if.then5073:
+// SIMD-ONLY0-NEXT:    [[TMP3759:%.*]] = load i64, ptr [[LLD]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3759]], ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END5074]]
+// SIMD-ONLY0:       if.end5074:
+// SIMD-ONLY0-NEXT:    [[TMP3760:%.*]] = load i64, ptr [[LLE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3761:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP5075:%.*]] = icmp eq i64 [[TMP3760]], [[TMP3761]]
+// SIMD-ONLY0-NEXT:    [[CONV5076:%.*]] = zext i1 [[CMP5075]] to i32
+// SIMD-ONLY0-NEXT:    [[CONV5077:%.*]] = sext i32 [[CONV5076]] to i64
+// SIMD-ONLY0-NEXT:    store i64 [[CONV5077]], ptr [[LLR]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3762:%.*]] = load i64, ptr [[LLR]], align 8
+// SIMD-ONLY0-NEXT:    [[TOBOOL5078:%.*]] = icmp ne i64 [[TMP3762]], 0
+// SIMD-ONLY0-NEXT:    br i1 [[TOBOOL5078]], label [[IF_THEN5079:%.*]], label [[IF_END5080:%.*]]
+// SIMD-ONLY0:       if.then5079:
+// SIMD-ONLY0-NEXT:    [[TMP3763:%.*]] = load i64, ptr [[LLD]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3763]], ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END5080]]
+// SIMD-ONLY0:       if.end5080:
+// SIMD-ONLY0-NEXT:    [[TMP3764:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3765:%.*]] = load i64, ptr [[LLE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP5081:%.*]] = icmp eq i64 [[TMP3764]], [[TMP3765]]
+// SIMD-ONLY0-NEXT:    [[CONV5082:%.*]] = zext i1 [[CMP5081]] to i32
+// SIMD-ONLY0-NEXT:    [[CONV5083:%.*]] = sext i32 [[CONV5082]] to i64
+// SIMD-ONLY0-NEXT:    store i64 [[CONV5083]], ptr [[LLR]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3766:%.*]] = load i64, ptr [[LLR]], align 8
+// SIMD-ONLY0-NEXT:    [[TOBOOL5084:%.*]] = icmp ne i64 [[TMP3766]], 0
+// SIMD-ONLY0-NEXT:    br i1 [[TOBOOL5084]], label [[IF_THEN5085:%.*]], label [[IF_ELSE5086:%.*]]
+// SIMD-ONLY0:       if.then5085:
+// SIMD-ONLY0-NEXT:    [[TMP3767:%.*]] = load i64, ptr [[LLD]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3767]], ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END5087:%.*]]
+// SIMD-ONLY0:       if.else5086:
+// SIMD-ONLY0-NEXT:    [[TMP3768:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3768]], ptr [[LLV]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END5087]]
+// SIMD-ONLY0:       if.end5087:
+// SIMD-ONLY0-NEXT:    [[TMP3769:%.*]] = load i64, ptr [[LLE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3770:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP5088:%.*]] = icmp eq i64 [[TMP3769]], [[TMP3770]]
+// SIMD-ONLY0-NEXT:    [[CONV5089:%.*]] = zext i1 [[CMP5088]] to i32
+// SIMD-ONLY0-NEXT:    [[CONV5090:%.*]] = sext i32 [[CONV5089]] to i64
+// SIMD-ONLY0-NEXT:    store i64 [[CONV5090]], ptr [[LLR]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3771:%.*]] = load i64, ptr [[LLR]], align 8
+// SIMD-ONLY0-NEXT:    [[TOBOOL5091:%.*]] = icmp ne i64 [[TMP3771]], 0
+// SIMD-ONLY0-NEXT:    br i1 [[TOBOOL5091]], label [[IF_THEN5092:%.*]], label [[IF_ELSE5093:%.*]]
+// SIMD-ONLY0:       if.then5092:
+// SIMD-ONLY0-NEXT:    [[TMP3772:%.*]] = load i64, ptr [[LLD]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3772]], ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END5094:%.*]]
+// SIMD-ONLY0:       if.else5093:
+// SIMD-ONLY0-NEXT:    [[TMP3773:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3773]], ptr [[LLV]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END5094]]
+// SIMD-ONLY0:       if.end5094:
+// SIMD-ONLY0-NEXT:    [[TMP3774:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3774]], ptr [[LLV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3775:%.*]] = load i64, ptr [[LLE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3776:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP5095:%.*]] = icmp sgt i64 [[TMP3775]], [[TMP3776]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP5095]], label [[IF_THEN5097:%.*]], label [[IF_END5098:%.*]]
+// SIMD-ONLY0:       if.then5097:
+// SIMD-ONLY0-NEXT:    [[TMP3777:%.*]] = load i64, ptr [[LLE]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3777]], ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END5098]]
+// SIMD-ONLY0:       if.end5098:
+// SIMD-ONLY0-NEXT:    [[TMP3778:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3778]], ptr [[LLV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3779:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3780:%.*]] = load i64, ptr [[LLE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP5099:%.*]] = icmp sgt i64 [[TMP3779]], [[TMP3780]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP5099]], label [[IF_THEN5101:%.*]], label [[IF_END5102:%.*]]
+// SIMD-ONLY0:       if.then5101:
+// SIMD-ONLY0-NEXT:    [[TMP3781:%.*]] = load i64, ptr [[LLE]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3781]], ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END5102]]
+// SIMD-ONLY0:       if.end5102:
+// SIMD-ONLY0-NEXT:    [[TMP3782:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3782]], ptr [[LLV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3783:%.*]] = load i64, ptr [[LLE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3784:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP5103:%.*]] = icmp slt i64 [[TMP3783]], [[TMP3784]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP5103]], label [[IF_THEN5105:%.*]], label [[IF_END5106:%.*]]
+// SIMD-ONLY0:       if.then5105:
+// SIMD-ONLY0-NEXT:    [[TMP3785:%.*]] = load i64, ptr [[LLE]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3785]], ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END5106]]
+// SIMD-ONLY0:       if.end5106:
+// SIMD-ONLY0-NEXT:    [[TMP3786:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3786]], ptr [[LLV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3787:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3788:%.*]] = load i64, ptr [[LLE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP5107:%.*]] = icmp slt i64 [[TMP3787]], [[TMP3788]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP5107]], label [[IF_THEN5109:%.*]], label [[IF_END5110:%.*]]
+// SIMD-ONLY0:       if.then5109:
+// SIMD-ONLY0-NEXT:    [[TMP3789:%.*]] = load i64, ptr [[LLE]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3789]], ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END5110]]
+// SIMD-ONLY0:       if.end5110:
+// SIMD-ONLY0-NEXT:    [[TMP3790:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3790]], ptr [[LLV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3791:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3792:%.*]] = load i64, ptr [[LLE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP5111:%.*]] = icmp eq i64 [[TMP3791]], [[TMP3792]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP5111]], label [[IF_THEN5113:%.*]], label [[IF_END5114:%.*]]
+// SIMD-ONLY0:       if.then5113:
+// SIMD-ONLY0-NEXT:    [[TMP3793:%.*]] = load i64, ptr [[LLD]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3793]], ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END5114]]
+// SIMD-ONLY0:       if.end5114:
+// SIMD-ONLY0-NEXT:    [[TMP3794:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3794]], ptr [[LLV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3795:%.*]] = load i64, ptr [[LLE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3796:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP5115:%.*]] = icmp eq i64 [[TMP3795]], [[TMP3796]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP5115]], label [[IF_THEN5117:%.*]], label [[IF_END5118:%.*]]
+// SIMD-ONLY0:       if.then5117:
+// SIMD-ONLY0-NEXT:    [[TMP3797:%.*]] = load i64, ptr [[LLD]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3797]], ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END5118]]
+// SIMD-ONLY0:       if.end5118:
+// SIMD-ONLY0-NEXT:    [[TMP3798:%.*]] = load i64, ptr [[LLE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3799:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP5119:%.*]] = icmp sgt i64 [[TMP3798]], [[TMP3799]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP5119]], label [[IF_THEN5121:%.*]], label [[IF_END5122:%.*]]
+// SIMD-ONLY0:       if.then5121:
+// SIMD-ONLY0-NEXT:    [[TMP3800:%.*]] = load i64, ptr [[LLE]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3800]], ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END5122]]
+// SIMD-ONLY0:       if.end5122:
+// SIMD-ONLY0-NEXT:    [[TMP3801:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3801]], ptr [[LLV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3802:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3803:%.*]] = load i64, ptr [[LLE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP5123:%.*]] = icmp sgt i64 [[TMP3802]], [[TMP3803]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP5123]], label [[IF_THEN5125:%.*]], label [[IF_END5126:%.*]]
+// SIMD-ONLY0:       if.then5125:
+// SIMD-ONLY0-NEXT:    [[TMP3804:%.*]] = load i64, ptr [[LLE]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3804]], ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END5126]]
+// SIMD-ONLY0:       if.end5126:
+// SIMD-ONLY0-NEXT:    [[TMP3805:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3805]], ptr [[LLV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3806:%.*]] = load i64, ptr [[LLE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3807:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP5127:%.*]] = icmp slt i64 [[TMP3806]], [[TMP3807]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP5127]], label [[IF_THEN5129:%.*]], label [[IF_END5130:%.*]]
+// SIMD-ONLY0:       if.then5129:
+// SIMD-ONLY0-NEXT:    [[TMP3808:%.*]] = load i64, ptr [[LLE]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3808]], ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END5130]]
+// SIMD-ONLY0:       if.end5130:
+// SIMD-ONLY0-NEXT:    [[TMP3809:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3809]], ptr [[LLV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3810:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3811:%.*]] = load i64, ptr [[LLE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP5131:%.*]] = icmp slt i64 [[TMP3810]], [[TMP3811]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP5131]], label [[IF_THEN5133:%.*]], label [[IF_END5134:%.*]]
+// SIMD-ONLY0:       if.then5133:
+// SIMD-ONLY0-NEXT:    [[TMP3812:%.*]] = load i64, ptr [[LLE]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3812]], ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END5134]]
+// SIMD-ONLY0:       if.end5134:
+// SIMD-ONLY0-NEXT:    [[TMP3813:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3813]], ptr [[LLV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3814:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3815:%.*]] = load i64, ptr [[LLE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP5135:%.*]] = icmp eq i64 [[TMP3814]], [[TMP3815]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP5135]], label [[IF_THEN5137:%.*]], label [[IF_END5138:%.*]]
+// SIMD-ONLY0:       if.then5137:
+// SIMD-ONLY0-NEXT:    [[TMP3816:%.*]] = load i64, ptr [[LLD]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3816]], ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END5138]]
+// SIMD-ONLY0:       if.end5138:
+// SIMD-ONLY0-NEXT:    [[TMP3817:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3817]], ptr [[LLV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3818:%.*]] = load i64, ptr [[LLE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3819:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP5139:%.*]] = icmp eq i64 [[TMP3818]], [[TMP3819]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP5139]], label [[IF_THEN5141:%.*]], label [[IF_END5142:%.*]]
+// SIMD-ONLY0:       if.then5141:
+// SIMD-ONLY0-NEXT:    [[TMP3820:%.*]] = load i64, ptr [[LLD]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3820]], ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END5142]]
+// SIMD-ONLY0:       if.end5142:
+// SIMD-ONLY0-NEXT:    [[TMP3821:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3821]], ptr [[LLV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3822:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3823:%.*]] = load i64, ptr [[LLE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP5143:%.*]] = icmp eq i64 [[TMP3822]], [[TMP3823]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP5143]], label [[IF_THEN5145:%.*]], label [[IF_ELSE5146:%.*]]
+// SIMD-ONLY0:       if.then5145:
+// SIMD-ONLY0-NEXT:    [[TMP3824:%.*]] = load i64, ptr [[LLD]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3824]], ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END5147:%.*]]
+// SIMD-ONLY0:       if.else5146:
+// SIMD-ONLY0-NEXT:    [[TMP3825:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3825]], ptr [[LLV]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END5147]]
+// SIMD-ONLY0:       if.end5147:
+// SIMD-ONLY0-NEXT:    [[TMP3826:%.*]] = load i64, ptr [[LLE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3827:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP5148:%.*]] = icmp eq i64 [[TMP3826]], [[TMP3827]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP5148]], label [[IF_THEN5150:%.*]], label [[IF_ELSE5151:%.*]]
+// SIMD-ONLY0:       if.then5150:
+// SIMD-ONLY0-NEXT:    [[TMP3828:%.*]] = load i64, ptr [[LLD]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3828]], ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END5152:%.*]]
+// SIMD-ONLY0:       if.else5151:
+// SIMD-ONLY0-NEXT:    [[TMP3829:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3829]], ptr [[LLV]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END5152]]
+// SIMD-ONLY0:       if.end5152:
+// SIMD-ONLY0-NEXT:    [[TMP3830:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3831:%.*]] = load i64, ptr [[LLE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP5153:%.*]] = icmp eq i64 [[TMP3830]], [[TMP3831]]
+// SIMD-ONLY0-NEXT:    [[CONV5154:%.*]] = zext i1 [[CMP5153]] to i32
+// SIMD-ONLY0-NEXT:    [[CONV5155:%.*]] = sext i32 [[CONV5154]] to i64
+// SIMD-ONLY0-NEXT:    store i64 [[CONV5155]], ptr [[LLR]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3832:%.*]] = load i64, ptr [[LLR]], align 8
+// SIMD-ONLY0-NEXT:    [[TOBOOL5156:%.*]] = icmp ne i64 [[TMP3832]], 0
+// SIMD-ONLY0-NEXT:    br i1 [[TOBOOL5156]], label [[IF_THEN5157:%.*]], label [[IF_END5158:%.*]]
+// SIMD-ONLY0:       if.then5157:
+// SIMD-ONLY0-NEXT:    [[TMP3833:%.*]] = load i64, ptr [[LLD]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3833]], ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END5158]]
+// SIMD-ONLY0:       if.end5158:
+// SIMD-ONLY0-NEXT:    [[TMP3834:%.*]] = load i64, ptr [[LLE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3835:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP5159:%.*]] = icmp eq i64 [[TMP3834]], [[TMP3835]]
+// SIMD-ONLY0-NEXT:    [[CONV5160:%.*]] = zext i1 [[CMP5159]] to i32
+// SIMD-ONLY0-NEXT:    [[CONV5161:%.*]] = sext i32 [[CONV5160]] to i64
+// SIMD-ONLY0-NEXT:    store i64 [[CONV5161]], ptr [[LLR]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3836:%.*]] = load i64, ptr [[LLR]], align 8
+// SIMD-ONLY0-NEXT:    [[TOBOOL5162:%.*]] = icmp ne i64 [[TMP3836]], 0
+// SIMD-ONLY0-NEXT:    br i1 [[TOBOOL5162]], label [[IF_THEN5163:%.*]], label [[IF_END5164:%.*]]
+// SIMD-ONLY0:       if.then5163:
+// SIMD-ONLY0-NEXT:    [[TMP3837:%.*]] = load i64, ptr [[LLD]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3837]], ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END5164]]
+// SIMD-ONLY0:       if.end5164:
+// SIMD-ONLY0-NEXT:    [[TMP3838:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3839:%.*]] = load i64, ptr [[LLE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP5165:%.*]] = icmp eq i64 [[TMP3838]], [[TMP3839]]
+// SIMD-ONLY0-NEXT:    [[CONV5166:%.*]] = zext i1 [[CMP5165]] to i32
+// SIMD-ONLY0-NEXT:    [[CONV5167:%.*]] = sext i32 [[CONV5166]] to i64
+// SIMD-ONLY0-NEXT:    store i64 [[CONV5167]], ptr [[LLR]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3840:%.*]] = load i64, ptr [[LLR]], align 8
+// SIMD-ONLY0-NEXT:    [[TOBOOL5168:%.*]] = icmp ne i64 [[TMP3840]], 0
+// SIMD-ONLY0-NEXT:    br i1 [[TOBOOL5168]], label [[IF_THEN5169:%.*]], label [[IF_ELSE5170:%.*]]
+// SIMD-ONLY0:       if.then5169:
+// SIMD-ONLY0-NEXT:    [[TMP3841:%.*]] = load i64, ptr [[LLD]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3841]], ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END5171:%.*]]
+// SIMD-ONLY0:       if.else5170:
+// SIMD-ONLY0-NEXT:    [[TMP3842:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3842]], ptr [[LLV]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END5171]]
+// SIMD-ONLY0:       if.end5171:
+// SIMD-ONLY0-NEXT:    [[TMP3843:%.*]] = load i64, ptr [[LLE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3844:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP5172:%.*]] = icmp eq i64 [[TMP3843]], [[TMP3844]]
+// SIMD-ONLY0-NEXT:    [[CONV5173:%.*]] = zext i1 [[CMP5172]] to i32
+// SIMD-ONLY0-NEXT:    [[CONV5174:%.*]] = sext i32 [[CONV5173]] to i64
+// SIMD-ONLY0-NEXT:    store i64 [[CONV5174]], ptr [[LLR]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3845:%.*]] = load i64, ptr [[LLR]], align 8
+// SIMD-ONLY0-NEXT:    [[TOBOOL5175:%.*]] = icmp ne i64 [[TMP3845]], 0
+// SIMD-ONLY0-NEXT:    br i1 [[TOBOOL5175]], label [[IF_THEN5176:%.*]], label [[IF_ELSE5177:%.*]]
+// SIMD-ONLY0:       if.then5176:
+// SIMD-ONLY0-NEXT:    [[TMP3846:%.*]] = load i64, ptr [[LLD]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3846]], ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END5178:%.*]]
+// SIMD-ONLY0:       if.else5177:
+// SIMD-ONLY0-NEXT:    [[TMP3847:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3847]], ptr [[LLV]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END5178]]
+// SIMD-ONLY0:       if.end5178:
+// SIMD-ONLY0-NEXT:    [[TMP3848:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3848]], ptr [[LLV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3849:%.*]] = load i64, ptr [[LLE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3850:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP5179:%.*]] = icmp sgt i64 [[TMP3849]], [[TMP3850]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP5179]], label [[IF_THEN5181:%.*]], label [[IF_END5182:%.*]]
+// SIMD-ONLY0:       if.then5181:
+// SIMD-ONLY0-NEXT:    [[TMP3851:%.*]] = load i64, ptr [[LLE]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3851]], ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END5182]]
+// SIMD-ONLY0:       if.end5182:
+// SIMD-ONLY0-NEXT:    [[TMP3852:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3852]], ptr [[LLV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3853:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3854:%.*]] = load i64, ptr [[LLE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP5183:%.*]] = icmp sgt i64 [[TMP3853]], [[TMP3854]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP5183]], label [[IF_THEN5185:%.*]], label [[IF_END5186:%.*]]
+// SIMD-ONLY0:       if.then5185:
+// SIMD-ONLY0-NEXT:    [[TMP3855:%.*]] = load i64, ptr [[LLE]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3855]], ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END5186]]
+// SIMD-ONLY0:       if.end5186:
+// SIMD-ONLY0-NEXT:    [[TMP3856:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3856]], ptr [[LLV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3857:%.*]] = load i64, ptr [[LLE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3858:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP5187:%.*]] = icmp slt i64 [[TMP3857]], [[TMP3858]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP5187]], label [[IF_THEN5189:%.*]], label [[IF_END5190:%.*]]
+// SIMD-ONLY0:       if.then5189:
+// SIMD-ONLY0-NEXT:    [[TMP3859:%.*]] = load i64, ptr [[LLE]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3859]], ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END5190]]
+// SIMD-ONLY0:       if.end5190:
+// SIMD-ONLY0-NEXT:    [[TMP3860:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3860]], ptr [[LLV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3861:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3862:%.*]] = load i64, ptr [[LLE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP5191:%.*]] = icmp slt i64 [[TMP3861]], [[TMP3862]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP5191]], label [[IF_THEN5193:%.*]], label [[IF_END5194:%.*]]
+// SIMD-ONLY0:       if.then5193:
+// SIMD-ONLY0-NEXT:    [[TMP3863:%.*]] = load i64, ptr [[LLE]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3863]], ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END5194]]
+// SIMD-ONLY0:       if.end5194:
+// SIMD-ONLY0-NEXT:    [[TMP3864:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3864]], ptr [[LLV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3865:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3866:%.*]] = load i64, ptr [[LLE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP5195:%.*]] = icmp eq i64 [[TMP3865]], [[TMP3866]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP5195]], label [[IF_THEN5197:%.*]], label [[IF_END5198:%.*]]
+// SIMD-ONLY0:       if.then5197:
+// SIMD-ONLY0-NEXT:    [[TMP3867:%.*]] = load i64, ptr [[LLD]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3867]], ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END5198]]
+// SIMD-ONLY0:       if.end5198:
+// SIMD-ONLY0-NEXT:    [[TMP3868:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3868]], ptr [[LLV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3869:%.*]] = load i64, ptr [[LLE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3870:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP5199:%.*]] = icmp eq i64 [[TMP3869]], [[TMP3870]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP5199]], label [[IF_THEN5201:%.*]], label [[IF_END5202:%.*]]
+// SIMD-ONLY0:       if.then5201:
+// SIMD-ONLY0-NEXT:    [[TMP3871:%.*]] = load i64, ptr [[LLD]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3871]], ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END5202]]
+// SIMD-ONLY0:       if.end5202:
+// SIMD-ONLY0-NEXT:    [[TMP3872:%.*]] = load i64, ptr [[LLE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3873:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP5203:%.*]] = icmp sgt i64 [[TMP3872]], [[TMP3873]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP5203]], label [[IF_THEN5205:%.*]], label [[IF_END5206:%.*]]
+// SIMD-ONLY0:       if.then5205:
+// SIMD-ONLY0-NEXT:    [[TMP3874:%.*]] = load i64, ptr [[LLE]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3874]], ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END5206]]
+// SIMD-ONLY0:       if.end5206:
+// SIMD-ONLY0-NEXT:    [[TMP3875:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3875]], ptr [[LLV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3876:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3877:%.*]] = load i64, ptr [[LLE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP5207:%.*]] = icmp sgt i64 [[TMP3876]], [[TMP3877]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP5207]], label [[IF_THEN5209:%.*]], label [[IF_END5210:%.*]]
+// SIMD-ONLY0:       if.then5209:
+// SIMD-ONLY0-NEXT:    [[TMP3878:%.*]] = load i64, ptr [[LLE]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3878]], ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END5210]]
+// SIMD-ONLY0:       if.end5210:
+// SIMD-ONLY0-NEXT:    [[TMP3879:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3879]], ptr [[LLV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3880:%.*]] = load i64, ptr [[LLE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3881:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP5211:%.*]] = icmp slt i64 [[TMP3880]], [[TMP3881]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP5211]], label [[IF_THEN5213:%.*]], label [[IF_END5214:%.*]]
+// SIMD-ONLY0:       if.then5213:
+// SIMD-ONLY0-NEXT:    [[TMP3882:%.*]] = load i64, ptr [[LLE]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3882]], ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END5214]]
+// SIMD-ONLY0:       if.end5214:
+// SIMD-ONLY0-NEXT:    [[TMP3883:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3883]], ptr [[LLV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3884:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3885:%.*]] = load i64, ptr [[LLE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP5215:%.*]] = icmp slt i64 [[TMP3884]], [[TMP3885]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP5215]], label [[IF_THEN5217:%.*]], label [[IF_END5218:%.*]]
+// SIMD-ONLY0:       if.then5217:
+// SIMD-ONLY0-NEXT:    [[TMP3886:%.*]] = load i64, ptr [[LLE]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3886]], ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END5218]]
+// SIMD-ONLY0:       if.end5218:
+// SIMD-ONLY0-NEXT:    [[TMP3887:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3887]], ptr [[LLV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3888:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3889:%.*]] = load i64, ptr [[LLE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP5219:%.*]] = icmp eq i64 [[TMP3888]], [[TMP3889]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP5219]], label [[IF_THEN5221:%.*]], label [[IF_END5222:%.*]]
+// SIMD-ONLY0:       if.then5221:
+// SIMD-ONLY0-NEXT:    [[TMP3890:%.*]] = load i64, ptr [[LLD]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3890]], ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END5222]]
+// SIMD-ONLY0:       if.end5222:
+// SIMD-ONLY0-NEXT:    [[TMP3891:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3891]], ptr [[LLV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3892:%.*]] = load i64, ptr [[LLE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3893:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP5223:%.*]] = icmp eq i64 [[TMP3892]], [[TMP3893]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP5223]], label [[IF_THEN5225:%.*]], label [[IF_END5226:%.*]]
+// SIMD-ONLY0:       if.then5225:
+// SIMD-ONLY0-NEXT:    [[TMP3894:%.*]] = load i64, ptr [[LLD]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3894]], ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END5226]]
+// SIMD-ONLY0:       if.end5226:
+// SIMD-ONLY0-NEXT:    [[TMP3895:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3895]], ptr [[LLV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3896:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3897:%.*]] = load i64, ptr [[LLE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP5227:%.*]] = icmp eq i64 [[TMP3896]], [[TMP3897]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP5227]], label [[IF_THEN5229:%.*]], label [[IF_ELSE5230:%.*]]
+// SIMD-ONLY0:       if.then5229:
+// SIMD-ONLY0-NEXT:    [[TMP3898:%.*]] = load i64, ptr [[LLD]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3898]], ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END5231:%.*]]
+// SIMD-ONLY0:       if.else5230:
+// SIMD-ONLY0-NEXT:    [[TMP3899:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3899]], ptr [[LLV]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END5231]]
+// SIMD-ONLY0:       if.end5231:
+// SIMD-ONLY0-NEXT:    [[TMP3900:%.*]] = load i64, ptr [[LLE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3901:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP5232:%.*]] = icmp eq i64 [[TMP3900]], [[TMP3901]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP5232]], label [[IF_THEN5234:%.*]], label [[IF_ELSE5235:%.*]]
+// SIMD-ONLY0:       if.then5234:
+// SIMD-ONLY0-NEXT:    [[TMP3902:%.*]] = load i64, ptr [[LLD]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3902]], ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END5236:%.*]]
+// SIMD-ONLY0:       if.else5235:
+// SIMD-ONLY0-NEXT:    [[TMP3903:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3903]], ptr [[LLV]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END5236]]
+// SIMD-ONLY0:       if.end5236:
+// SIMD-ONLY0-NEXT:    [[TMP3904:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3905:%.*]] = load i64, ptr [[LLE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP5237:%.*]] = icmp eq i64 [[TMP3904]], [[TMP3905]]
+// SIMD-ONLY0-NEXT:    [[CONV5238:%.*]] = zext i1 [[CMP5237]] to i32
+// SIMD-ONLY0-NEXT:    [[CONV5239:%.*]] = sext i32 [[CONV5238]] to i64
+// SIMD-ONLY0-NEXT:    store i64 [[CONV5239]], ptr [[LLR]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3906:%.*]] = load i64, ptr [[LLR]], align 8
+// SIMD-ONLY0-NEXT:    [[TOBOOL5240:%.*]] = icmp ne i64 [[TMP3906]], 0
+// SIMD-ONLY0-NEXT:    br i1 [[TOBOOL5240]], label [[IF_THEN5241:%.*]], label [[IF_END5242:%.*]]
+// SIMD-ONLY0:       if.then5241:
+// SIMD-ONLY0-NEXT:    [[TMP3907:%.*]] = load i64, ptr [[LLD]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3907]], ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END5242]]
+// SIMD-ONLY0:       if.end5242:
+// SIMD-ONLY0-NEXT:    [[TMP3908:%.*]] = load i64, ptr [[LLE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3909:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP5243:%.*]] = icmp eq i64 [[TMP3908]], [[TMP3909]]
+// SIMD-ONLY0-NEXT:    [[CONV5244:%.*]] = zext i1 [[CMP5243]] to i32
+// SIMD-ONLY0-NEXT:    [[CONV5245:%.*]] = sext i32 [[CONV5244]] to i64
+// SIMD-ONLY0-NEXT:    store i64 [[CONV5245]], ptr [[LLR]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3910:%.*]] = load i64, ptr [[LLR]], align 8
+// SIMD-ONLY0-NEXT:    [[TOBOOL5246:%.*]] = icmp ne i64 [[TMP3910]], 0
+// SIMD-ONLY0-NEXT:    br i1 [[TOBOOL5246]], label [[IF_THEN5247:%.*]], label [[IF_END5248:%.*]]
+// SIMD-ONLY0:       if.then5247:
+// SIMD-ONLY0-NEXT:    [[TMP3911:%.*]] = load i64, ptr [[LLD]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3911]], ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END5248]]
+// SIMD-ONLY0:       if.end5248:
+// SIMD-ONLY0-NEXT:    [[TMP3912:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3913:%.*]] = load i64, ptr [[LLE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP5249:%.*]] = icmp eq i64 [[TMP3912]], [[TMP3913]]
+// SIMD-ONLY0-NEXT:    [[CONV5250:%.*]] = zext i1 [[CMP5249]] to i32
+// SIMD-ONLY0-NEXT:    [[CONV5251:%.*]] = sext i32 [[CONV5250]] to i64
+// SIMD-ONLY0-NEXT:    store i64 [[CONV5251]], ptr [[LLR]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3914:%.*]] = load i64, ptr [[LLR]], align 8
+// SIMD-ONLY0-NEXT:    [[TOBOOL5252:%.*]] = icmp ne i64 [[TMP3914]], 0
+// SIMD-ONLY0-NEXT:    br i1 [[TOBOOL5252]], label [[IF_THEN5253:%.*]], label [[IF_ELSE5254:%.*]]
+// SIMD-ONLY0:       if.then5253:
+// SIMD-ONLY0-NEXT:    [[TMP3915:%.*]] = load i64, ptr [[LLD]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3915]], ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END5255:%.*]]
+// SIMD-ONLY0:       if.else5254:
+// SIMD-ONLY0-NEXT:    [[TMP3916:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3916]], ptr [[LLV]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END5255]]
+// SIMD-ONLY0:       if.end5255:
+// SIMD-ONLY0-NEXT:    [[TMP3917:%.*]] = load i64, ptr [[LLE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3918:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP5256:%.*]] = icmp eq i64 [[TMP3917]], [[TMP3918]]
+// SIMD-ONLY0-NEXT:    [[CONV5257:%.*]] = zext i1 [[CMP5256]] to i32
+// SIMD-ONLY0-NEXT:    [[CONV5258:%.*]] = sext i32 [[CONV5257]] to i64
+// SIMD-ONLY0-NEXT:    store i64 [[CONV5258]], ptr [[LLR]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3919:%.*]] = load i64, ptr [[LLR]], align 8
+// SIMD-ONLY0-NEXT:    [[TOBOOL5259:%.*]] = icmp ne i64 [[TMP3919]], 0
+// SIMD-ONLY0-NEXT:    br i1 [[TOBOOL5259]], label [[IF_THEN5260:%.*]], label [[IF_ELSE5261:%.*]]
+// SIMD-ONLY0:       if.then5260:
+// SIMD-ONLY0-NEXT:    [[TMP3920:%.*]] = load i64, ptr [[LLD]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3920]], ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END5262:%.*]]
+// SIMD-ONLY0:       if.else5261:
+// SIMD-ONLY0-NEXT:    [[TMP3921:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3921]], ptr [[LLV]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END5262]]
+// SIMD-ONLY0:       if.end5262:
+// SIMD-ONLY0-NEXT:    [[TMP3922:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3922]], ptr [[LLV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3923:%.*]] = load i64, ptr [[LLE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3924:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP5263:%.*]] = icmp sgt i64 [[TMP3923]], [[TMP3924]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP5263]], label [[IF_THEN5265:%.*]], label [[IF_END5266:%.*]]
+// SIMD-ONLY0:       if.then5265:
+// SIMD-ONLY0-NEXT:    [[TMP3925:%.*]] = load i64, ptr [[LLE]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3925]], ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END5266]]
+// SIMD-ONLY0:       if.end5266:
+// SIMD-ONLY0-NEXT:    [[TMP3926:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3926]], ptr [[LLV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3927:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3928:%.*]] = load i64, ptr [[LLE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP5267:%.*]] = icmp sgt i64 [[TMP3927]], [[TMP3928]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP5267]], label [[IF_THEN5269:%.*]], label [[IF_END5270:%.*]]
+// SIMD-ONLY0:       if.then5269:
+// SIMD-ONLY0-NEXT:    [[TMP3929:%.*]] = load i64, ptr [[LLE]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3929]], ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END5270]]
+// SIMD-ONLY0:       if.end5270:
+// SIMD-ONLY0-NEXT:    [[TMP3930:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3930]], ptr [[LLV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3931:%.*]] = load i64, ptr [[LLE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3932:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP5271:%.*]] = icmp slt i64 [[TMP3931]], [[TMP3932]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP5271]], label [[IF_THEN5273:%.*]], label [[IF_END5274:%.*]]
+// SIMD-ONLY0:       if.then5273:
+// SIMD-ONLY0-NEXT:    [[TMP3933:%.*]] = load i64, ptr [[LLE]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3933]], ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END5274]]
+// SIMD-ONLY0:       if.end5274:
+// SIMD-ONLY0-NEXT:    [[TMP3934:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3934]], ptr [[LLV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3935:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3936:%.*]] = load i64, ptr [[LLE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP5275:%.*]] = icmp slt i64 [[TMP3935]], [[TMP3936]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP5275]], label [[IF_THEN5277:%.*]], label [[IF_END5278:%.*]]
+// SIMD-ONLY0:       if.then5277:
+// SIMD-ONLY0-NEXT:    [[TMP3937:%.*]] = load i64, ptr [[LLE]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3937]], ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END5278]]
+// SIMD-ONLY0:       if.end5278:
+// SIMD-ONLY0-NEXT:    [[TMP3938:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3938]], ptr [[LLV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3939:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3940:%.*]] = load i64, ptr [[LLE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP5279:%.*]] = icmp eq i64 [[TMP3939]], [[TMP3940]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP5279]], label [[IF_THEN5281:%.*]], label [[IF_END5282:%.*]]
+// SIMD-ONLY0:       if.then5281:
+// SIMD-ONLY0-NEXT:    [[TMP3941:%.*]] = load i64, ptr [[LLD]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3941]], ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END5282]]
+// SIMD-ONLY0:       if.end5282:
+// SIMD-ONLY0-NEXT:    [[TMP3942:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3942]], ptr [[LLV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3943:%.*]] = load i64, ptr [[LLE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3944:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP5283:%.*]] = icmp eq i64 [[TMP3943]], [[TMP3944]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP5283]], label [[IF_THEN5285:%.*]], label [[IF_END5286:%.*]]
+// SIMD-ONLY0:       if.then5285:
+// SIMD-ONLY0-NEXT:    [[TMP3945:%.*]] = load i64, ptr [[LLD]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3945]], ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END5286]]
+// SIMD-ONLY0:       if.end5286:
+// SIMD-ONLY0-NEXT:    [[TMP3946:%.*]] = load i64, ptr [[LLE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3947:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP5287:%.*]] = icmp sgt i64 [[TMP3946]], [[TMP3947]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP5287]], label [[IF_THEN5289:%.*]], label [[IF_END5290:%.*]]
+// SIMD-ONLY0:       if.then5289:
+// SIMD-ONLY0-NEXT:    [[TMP3948:%.*]] = load i64, ptr [[LLE]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3948]], ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END5290]]
+// SIMD-ONLY0:       if.end5290:
+// SIMD-ONLY0-NEXT:    [[TMP3949:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3949]], ptr [[LLV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3950:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3951:%.*]] = load i64, ptr [[LLE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP5291:%.*]] = icmp sgt i64 [[TMP3950]], [[TMP3951]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP5291]], label [[IF_THEN5293:%.*]], label [[IF_END5294:%.*]]
+// SIMD-ONLY0:       if.then5293:
+// SIMD-ONLY0-NEXT:    [[TMP3952:%.*]] = load i64, ptr [[LLE]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3952]], ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END5294]]
+// SIMD-ONLY0:       if.end5294:
+// SIMD-ONLY0-NEXT:    [[TMP3953:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3953]], ptr [[LLV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3954:%.*]] = load i64, ptr [[LLE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3955:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP5295:%.*]] = icmp slt i64 [[TMP3954]], [[TMP3955]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP5295]], label [[IF_THEN5297:%.*]], label [[IF_END5298:%.*]]
+// SIMD-ONLY0:       if.then5297:
+// SIMD-ONLY0-NEXT:    [[TMP3956:%.*]] = load i64, ptr [[LLE]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3956]], ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END5298]]
+// SIMD-ONLY0:       if.end5298:
+// SIMD-ONLY0-NEXT:    [[TMP3957:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3957]], ptr [[LLV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3958:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3959:%.*]] = load i64, ptr [[LLE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP5299:%.*]] = icmp slt i64 [[TMP3958]], [[TMP3959]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP5299]], label [[IF_THEN5301:%.*]], label [[IF_END5302:%.*]]
+// SIMD-ONLY0:       if.then5301:
+// SIMD-ONLY0-NEXT:    [[TMP3960:%.*]] = load i64, ptr [[LLE]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3960]], ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END5302]]
+// SIMD-ONLY0:       if.end5302:
+// SIMD-ONLY0-NEXT:    [[TMP3961:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3961]], ptr [[LLV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3962:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3963:%.*]] = load i64, ptr [[LLE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP5303:%.*]] = icmp eq i64 [[TMP3962]], [[TMP3963]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP5303]], label [[IF_THEN5305:%.*]], label [[IF_END5306:%.*]]
+// SIMD-ONLY0:       if.then5305:
+// SIMD-ONLY0-NEXT:    [[TMP3964:%.*]] = load i64, ptr [[LLD]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3964]], ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END5306]]
+// SIMD-ONLY0:       if.end5306:
+// SIMD-ONLY0-NEXT:    [[TMP3965:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3965]], ptr [[LLV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3966:%.*]] = load i64, ptr [[LLE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3967:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP5307:%.*]] = icmp eq i64 [[TMP3966]], [[TMP3967]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP5307]], label [[IF_THEN5309:%.*]], label [[IF_END5310:%.*]]
+// SIMD-ONLY0:       if.then5309:
+// SIMD-ONLY0-NEXT:    [[TMP3968:%.*]] = load i64, ptr [[LLD]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3968]], ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END5310]]
+// SIMD-ONLY0:       if.end5310:
+// SIMD-ONLY0-NEXT:    [[TMP3969:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3969]], ptr [[LLV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3970:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3971:%.*]] = load i64, ptr [[LLE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP5311:%.*]] = icmp eq i64 [[TMP3970]], [[TMP3971]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP5311]], label [[IF_THEN5313:%.*]], label [[IF_ELSE5314:%.*]]
+// SIMD-ONLY0:       if.then5313:
+// SIMD-ONLY0-NEXT:    [[TMP3972:%.*]] = load i64, ptr [[LLD]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3972]], ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END5315:%.*]]
+// SIMD-ONLY0:       if.else5314:
+// SIMD-ONLY0-NEXT:    [[TMP3973:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3973]], ptr [[LLV]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END5315]]
+// SIMD-ONLY0:       if.end5315:
+// SIMD-ONLY0-NEXT:    [[TMP3974:%.*]] = load i64, ptr [[LLE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3975:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP5316:%.*]] = icmp eq i64 [[TMP3974]], [[TMP3975]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP5316]], label [[IF_THEN5318:%.*]], label [[IF_ELSE5319:%.*]]
+// SIMD-ONLY0:       if.then5318:
+// SIMD-ONLY0-NEXT:    [[TMP3976:%.*]] = load i64, ptr [[LLD]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3976]], ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END5320:%.*]]
+// SIMD-ONLY0:       if.else5319:
+// SIMD-ONLY0-NEXT:    [[TMP3977:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3977]], ptr [[LLV]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END5320]]
+// SIMD-ONLY0:       if.end5320:
+// SIMD-ONLY0-NEXT:    [[TMP3978:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3979:%.*]] = load i64, ptr [[LLE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP5321:%.*]] = icmp eq i64 [[TMP3978]], [[TMP3979]]
+// SIMD-ONLY0-NEXT:    [[CONV5322:%.*]] = zext i1 [[CMP5321]] to i32
+// SIMD-ONLY0-NEXT:    [[CONV5323:%.*]] = sext i32 [[CONV5322]] to i64
+// SIMD-ONLY0-NEXT:    store i64 [[CONV5323]], ptr [[LLR]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3980:%.*]] = load i64, ptr [[LLR]], align 8
+// SIMD-ONLY0-NEXT:    [[TOBOOL5324:%.*]] = icmp ne i64 [[TMP3980]], 0
+// SIMD-ONLY0-NEXT:    br i1 [[TOBOOL5324]], label [[IF_THEN5325:%.*]], label [[IF_END5326:%.*]]
+// SIMD-ONLY0:       if.then5325:
+// SIMD-ONLY0-NEXT:    [[TMP3981:%.*]] = load i64, ptr [[LLD]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3981]], ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END5326]]
+// SIMD-ONLY0:       if.end5326:
+// SIMD-ONLY0-NEXT:    [[TMP3982:%.*]] = load i64, ptr [[LLE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3983:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP5327:%.*]] = icmp eq i64 [[TMP3982]], [[TMP3983]]
+// SIMD-ONLY0-NEXT:    [[CONV5328:%.*]] = zext i1 [[CMP5327]] to i32
+// SIMD-ONLY0-NEXT:    [[CONV5329:%.*]] = sext i32 [[CONV5328]] to i64
+// SIMD-ONLY0-NEXT:    store i64 [[CONV5329]], ptr [[LLR]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3984:%.*]] = load i64, ptr [[LLR]], align 8
+// SIMD-ONLY0-NEXT:    [[TOBOOL5330:%.*]] = icmp ne i64 [[TMP3984]], 0
+// SIMD-ONLY0-NEXT:    br i1 [[TOBOOL5330]], label [[IF_THEN5331:%.*]], label [[IF_END5332:%.*]]
+// SIMD-ONLY0:       if.then5331:
+// SIMD-ONLY0-NEXT:    [[TMP3985:%.*]] = load i64, ptr [[LLD]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3985]], ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END5332]]
+// SIMD-ONLY0:       if.end5332:
+// SIMD-ONLY0-NEXT:    [[TMP3986:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3987:%.*]] = load i64, ptr [[LLE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP5333:%.*]] = icmp eq i64 [[TMP3986]], [[TMP3987]]
+// SIMD-ONLY0-NEXT:    [[CONV5334:%.*]] = zext i1 [[CMP5333]] to i32
+// SIMD-ONLY0-NEXT:    [[CONV5335:%.*]] = sext i32 [[CONV5334]] to i64
+// SIMD-ONLY0-NEXT:    store i64 [[CONV5335]], ptr [[LLR]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3988:%.*]] = load i64, ptr [[LLR]], align 8
+// SIMD-ONLY0-NEXT:    [[TOBOOL5336:%.*]] = icmp ne i64 [[TMP3988]], 0
+// SIMD-ONLY0-NEXT:    br i1 [[TOBOOL5336]], label [[IF_THEN5337:%.*]], label [[IF_ELSE5338:%.*]]
+// SIMD-ONLY0:       if.then5337:
+// SIMD-ONLY0-NEXT:    [[TMP3989:%.*]] = load i64, ptr [[LLD]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3989]], ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END5339:%.*]]
+// SIMD-ONLY0:       if.else5338:
+// SIMD-ONLY0-NEXT:    [[TMP3990:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3990]], ptr [[LLV]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END5339]]
+// SIMD-ONLY0:       if.end5339:
+// SIMD-ONLY0-NEXT:    [[TMP3991:%.*]] = load i64, ptr [[LLE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3992:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP5340:%.*]] = icmp eq i64 [[TMP3991]], [[TMP3992]]
+// SIMD-ONLY0-NEXT:    [[CONV5341:%.*]] = zext i1 [[CMP5340]] to i32
+// SIMD-ONLY0-NEXT:    [[CONV5342:%.*]] = sext i32 [[CONV5341]] to i64
+// SIMD-ONLY0-NEXT:    store i64 [[CONV5342]], ptr [[LLR]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3993:%.*]] = load i64, ptr [[LLR]], align 8
+// SIMD-ONLY0-NEXT:    [[TOBOOL5343:%.*]] = icmp ne i64 [[TMP3993]], 0
+// SIMD-ONLY0-NEXT:    br i1 [[TOBOOL5343]], label [[IF_THEN5344:%.*]], label [[IF_ELSE5345:%.*]]
+// SIMD-ONLY0:       if.then5344:
+// SIMD-ONLY0-NEXT:    [[TMP3994:%.*]] = load i64, ptr [[LLD]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3994]], ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END5346:%.*]]
+// SIMD-ONLY0:       if.else5345:
+// SIMD-ONLY0-NEXT:    [[TMP3995:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3995]], ptr [[LLV]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END5346]]
+// SIMD-ONLY0:       if.end5346:
+// SIMD-ONLY0-NEXT:    [[TMP3996:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3996]], ptr [[ULLV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3997:%.*]] = load i64, ptr [[ULLE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP3998:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP5347:%.*]] = icmp ugt i64 [[TMP3997]], [[TMP3998]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP5347]], label [[IF_THEN5349:%.*]], label [[IF_END5350:%.*]]
+// SIMD-ONLY0:       if.then5349:
+// SIMD-ONLY0-NEXT:    [[TMP3999:%.*]] = load i64, ptr [[ULLE]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP3999]], ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END5350]]
+// SIMD-ONLY0:       if.end5350:
+// SIMD-ONLY0-NEXT:    [[TMP4000:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP4000]], ptr [[ULLV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP4001:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP4002:%.*]] = load i64, ptr [[ULLE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP5351:%.*]] = icmp ugt i64 [[TMP4001]], [[TMP4002]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP5351]], label [[IF_THEN5353:%.*]], label [[IF_END5354:%.*]]
+// SIMD-ONLY0:       if.then5353:
+// SIMD-ONLY0-NEXT:    [[TMP4003:%.*]] = load i64, ptr [[ULLE]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP4003]], ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END5354]]
+// SIMD-ONLY0:       if.end5354:
+// SIMD-ONLY0-NEXT:    [[TMP4004:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP4004]], ptr [[ULLV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP4005:%.*]] = load i64, ptr [[ULLE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP4006:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP5355:%.*]] = icmp ult i64 [[TMP4005]], [[TMP4006]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP5355]], label [[IF_THEN5357:%.*]], label [[IF_END5358:%.*]]
+// SIMD-ONLY0:       if.then5357:
+// SIMD-ONLY0-NEXT:    [[TMP4007:%.*]] = load i64, ptr [[ULLE]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP4007]], ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END5358]]
+// SIMD-ONLY0:       if.end5358:
+// SIMD-ONLY0-NEXT:    [[TMP4008:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP4008]], ptr [[ULLV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP4009:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP4010:%.*]] = load i64, ptr [[ULLE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP5359:%.*]] = icmp ult i64 [[TMP4009]], [[TMP4010]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP5359]], label [[IF_THEN5361:%.*]], label [[IF_END5362:%.*]]
+// SIMD-ONLY0:       if.then5361:
+// SIMD-ONLY0-NEXT:    [[TMP4011:%.*]] = load i64, ptr [[ULLE]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP4011]], ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END5362]]
+// SIMD-ONLY0:       if.end5362:
+// SIMD-ONLY0-NEXT:    [[TMP4012:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP4012]], ptr [[ULLV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP4013:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP4014:%.*]] = load i64, ptr [[ULLE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP5363:%.*]] = icmp eq i64 [[TMP4013]], [[TMP4014]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP5363]], label [[IF_THEN5365:%.*]], label [[IF_END5366:%.*]]
+// SIMD-ONLY0:       if.then5365:
+// SIMD-ONLY0-NEXT:    [[TMP4015:%.*]] = load i64, ptr [[ULLD]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP4015]], ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END5366]]
+// SIMD-ONLY0:       if.end5366:
+// SIMD-ONLY0-NEXT:    [[TMP4016:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP4016]], ptr [[ULLV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP4017:%.*]] = load i64, ptr [[ULLE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP4018:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP5367:%.*]] = icmp eq i64 [[TMP4017]], [[TMP4018]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP5367]], label [[IF_THEN5369:%.*]], label [[IF_END5370:%.*]]
+// SIMD-ONLY0:       if.then5369:
+// SIMD-ONLY0-NEXT:    [[TMP4019:%.*]] = load i64, ptr [[ULLD]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP4019]], ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END5370]]
+// SIMD-ONLY0:       if.end5370:
+// SIMD-ONLY0-NEXT:    [[TMP4020:%.*]] = load i64, ptr [[ULLE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP4021:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP5371:%.*]] = icmp ugt i64 [[TMP4020]], [[TMP4021]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP5371]], label [[IF_THEN5373:%.*]], label [[IF_END5374:%.*]]
+// SIMD-ONLY0:       if.then5373:
+// SIMD-ONLY0-NEXT:    [[TMP4022:%.*]] = load i64, ptr [[ULLE]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP4022]], ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END5374]]
+// SIMD-ONLY0:       if.end5374:
+// SIMD-ONLY0-NEXT:    [[TMP4023:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP4023]], ptr [[ULLV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP4024:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP4025:%.*]] = load i64, ptr [[ULLE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP5375:%.*]] = icmp ugt i64 [[TMP4024]], [[TMP4025]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP5375]], label [[IF_THEN5377:%.*]], label [[IF_END5378:%.*]]
+// SIMD-ONLY0:       if.then5377:
+// SIMD-ONLY0-NEXT:    [[TMP4026:%.*]] = load i64, ptr [[ULLE]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP4026]], ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END5378]]
+// SIMD-ONLY0:       if.end5378:
+// SIMD-ONLY0-NEXT:    [[TMP4027:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP4027]], ptr [[ULLV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP4028:%.*]] = load i64, ptr [[ULLE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP4029:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP5379:%.*]] = icmp ult i64 [[TMP4028]], [[TMP4029]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP5379]], label [[IF_THEN5381:%.*]], label [[IF_END5382:%.*]]
+// SIMD-ONLY0:       if.then5381:
+// SIMD-ONLY0-NEXT:    [[TMP4030:%.*]] = load i64, ptr [[ULLE]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP4030]], ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END5382]]
+// SIMD-ONLY0:       if.end5382:
+// SIMD-ONLY0-NEXT:    [[TMP4031:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP4031]], ptr [[ULLV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP4032:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP4033:%.*]] = load i64, ptr [[ULLE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP5383:%.*]] = icmp ult i64 [[TMP4032]], [[TMP4033]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP5383]], label [[IF_THEN5385:%.*]], label [[IF_END5386:%.*]]
+// SIMD-ONLY0:       if.then5385:
+// SIMD-ONLY0-NEXT:    [[TMP4034:%.*]] = load i64, ptr [[ULLE]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP4034]], ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END5386]]
+// SIMD-ONLY0:       if.end5386:
+// SIMD-ONLY0-NEXT:    [[TMP4035:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP4035]], ptr [[ULLV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP4036:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP4037:%.*]] = load i64, ptr [[ULLE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP5387:%.*]] = icmp eq i64 [[TMP4036]], [[TMP4037]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP5387]], label [[IF_THEN5389:%.*]], label [[IF_END5390:%.*]]
+// SIMD-ONLY0:       if.then5389:
+// SIMD-ONLY0-NEXT:    [[TMP4038:%.*]] = load i64, ptr [[ULLD]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP4038]], ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END5390]]
+// SIMD-ONLY0:       if.end5390:
+// SIMD-ONLY0-NEXT:    [[TMP4039:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP4039]], ptr [[ULLV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP4040:%.*]] = load i64, ptr [[ULLE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP4041:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP5391:%.*]] = icmp eq i64 [[TMP4040]], [[TMP4041]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP5391]], label [[IF_THEN5393:%.*]], label [[IF_END5394:%.*]]
+// SIMD-ONLY0:       if.then5393:
+// SIMD-ONLY0-NEXT:    [[TMP4042:%.*]] = load i64, ptr [[ULLD]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP4042]], ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END5394]]
+// SIMD-ONLY0:       if.end5394:
+// SIMD-ONLY0-NEXT:    [[TMP4043:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP4043]], ptr [[ULLV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP4044:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP4045:%.*]] = load i64, ptr [[ULLE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP5395:%.*]] = icmp eq i64 [[TMP4044]], [[TMP4045]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP5395]], label [[IF_THEN5397:%.*]], label [[IF_ELSE5398:%.*]]
+// SIMD-ONLY0:       if.then5397:
+// SIMD-ONLY0-NEXT:    [[TMP4046:%.*]] = load i64, ptr [[ULLD]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP4046]], ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END5399:%.*]]
+// SIMD-ONLY0:       if.else5398:
+// SIMD-ONLY0-NEXT:    [[TMP4047:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP4047]], ptr [[ULLV]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END5399]]
+// SIMD-ONLY0:       if.end5399:
+// SIMD-ONLY0-NEXT:    [[TMP4048:%.*]] = load i64, ptr [[ULLE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP4049:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP5400:%.*]] = icmp eq i64 [[TMP4048]], [[TMP4049]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP5400]], label [[IF_THEN5402:%.*]], label [[IF_ELSE5403:%.*]]
+// SIMD-ONLY0:       if.then5402:
+// SIMD-ONLY0-NEXT:    [[TMP4050:%.*]] = load i64, ptr [[ULLD]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP4050]], ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END5404:%.*]]
+// SIMD-ONLY0:       if.else5403:
+// SIMD-ONLY0-NEXT:    [[TMP4051:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP4051]], ptr [[ULLV]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END5404]]
+// SIMD-ONLY0:       if.end5404:
+// SIMD-ONLY0-NEXT:    [[TMP4052:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP4053:%.*]] = load i64, ptr [[ULLE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP5405:%.*]] = icmp eq i64 [[TMP4052]], [[TMP4053]]
+// SIMD-ONLY0-NEXT:    [[CONV5406:%.*]] = zext i1 [[CMP5405]] to i32
+// SIMD-ONLY0-NEXT:    [[CONV5407:%.*]] = sext i32 [[CONV5406]] to i64
+// SIMD-ONLY0-NEXT:    store i64 [[CONV5407]], ptr [[ULLR]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP4054:%.*]] = load i64, ptr [[ULLR]], align 8
+// SIMD-ONLY0-NEXT:    [[TOBOOL5408:%.*]] = icmp ne i64 [[TMP4054]], 0
+// SIMD-ONLY0-NEXT:    br i1 [[TOBOOL5408]], label [[IF_THEN5409:%.*]], label [[IF_END5410:%.*]]
+// SIMD-ONLY0:       if.then5409:
+// SIMD-ONLY0-NEXT:    [[TMP4055:%.*]] = load i64, ptr [[ULLD]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP4055]], ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END5410]]
+// SIMD-ONLY0:       if.end5410:
+// SIMD-ONLY0-NEXT:    [[TMP4056:%.*]] = load i64, ptr [[ULLE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP4057:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP5411:%.*]] = icmp eq i64 [[TMP4056]], [[TMP4057]]
+// SIMD-ONLY0-NEXT:    [[CONV5412:%.*]] = zext i1 [[CMP5411]] to i32
+// SIMD-ONLY0-NEXT:    [[CONV5413:%.*]] = sext i32 [[CONV5412]] to i64
+// SIMD-ONLY0-NEXT:    store i64 [[CONV5413]], ptr [[ULLR]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP4058:%.*]] = load i64, ptr [[ULLR]], align 8
+// SIMD-ONLY0-NEXT:    [[TOBOOL5414:%.*]] = icmp ne i64 [[TMP4058]], 0
+// SIMD-ONLY0-NEXT:    br i1 [[TOBOOL5414]], label [[IF_THEN5415:%.*]], label [[IF_END5416:%.*]]
+// SIMD-ONLY0:       if.then5415:
+// SIMD-ONLY0-NEXT:    [[TMP4059:%.*]] = load i64, ptr [[ULLD]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP4059]], ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END5416]]
+// SIMD-ONLY0:       if.end5416:
+// SIMD-ONLY0-NEXT:    [[TMP4060:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP4061:%.*]] = load i64, ptr [[ULLE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP5417:%.*]] = icmp eq i64 [[TMP4060]], [[TMP4061]]
+// SIMD-ONLY0-NEXT:    [[CONV5418:%.*]] = zext i1 [[CMP5417]] to i32
+// SIMD-ONLY0-NEXT:    [[CONV5419:%.*]] = sext i32 [[CONV5418]] to i64
+// SIMD-ONLY0-NEXT:    store i64 [[CONV5419]], ptr [[ULLR]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP4062:%.*]] = load i64, ptr [[ULLR]], align 8
+// SIMD-ONLY0-NEXT:    [[TOBOOL5420:%.*]] = icmp ne i64 [[TMP4062]], 0
+// SIMD-ONLY0-NEXT:    br i1 [[TOBOOL5420]], label [[IF_THEN5421:%.*]], label [[IF_ELSE5422:%.*]]
+// SIMD-ONLY0:       if.then5421:
+// SIMD-ONLY0-NEXT:    [[TMP4063:%.*]] = load i64, ptr [[ULLD]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP4063]], ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END5423:%.*]]
+// SIMD-ONLY0:       if.else5422:
+// SIMD-ONLY0-NEXT:    [[TMP4064:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP4064]], ptr [[ULLV]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END5423]]
+// SIMD-ONLY0:       if.end5423:
+// SIMD-ONLY0-NEXT:    [[TMP4065:%.*]] = load i64, ptr [[ULLE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP4066:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP5424:%.*]] = icmp eq i64 [[TMP4065]], [[TMP4066]]
+// SIMD-ONLY0-NEXT:    [[CONV5425:%.*]] = zext i1 [[CMP5424]] to i32
+// SIMD-ONLY0-NEXT:    [[CONV5426:%.*]] = sext i32 [[CONV5425]] to i64
+// SIMD-ONLY0-NEXT:    store i64 [[CONV5426]], ptr [[ULLR]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP4067:%.*]] = load i64, ptr [[ULLR]], align 8
+// SIMD-ONLY0-NEXT:    [[TOBOOL5427:%.*]] = icmp ne i64 [[TMP4067]], 0
+// SIMD-ONLY0-NEXT:    br i1 [[TOBOOL5427]], label [[IF_THEN5428:%.*]], label [[IF_ELSE5429:%.*]]
+// SIMD-ONLY0:       if.then5428:
+// SIMD-ONLY0-NEXT:    [[TMP4068:%.*]] = load i64, ptr [[ULLD]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP4068]], ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END5430:%.*]]
+// SIMD-ONLY0:       if.else5429:
+// SIMD-ONLY0-NEXT:    [[TMP4069:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP4069]], ptr [[ULLV]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END5430]]
+// SIMD-ONLY0:       if.end5430:
+// SIMD-ONLY0-NEXT:    [[TMP4070:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP4070]], ptr [[ULLV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP4071:%.*]] = load i64, ptr [[ULLE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP4072:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP5431:%.*]] = icmp ugt i64 [[TMP4071]], [[TMP4072]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP5431]], label [[IF_THEN5433:%.*]], label [[IF_END5434:%.*]]
+// SIMD-ONLY0:       if.then5433:
+// SIMD-ONLY0-NEXT:    [[TMP4073:%.*]] = load i64, ptr [[ULLE]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP4073]], ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END5434]]
+// SIMD-ONLY0:       if.end5434:
+// SIMD-ONLY0-NEXT:    [[TMP4074:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP4074]], ptr [[ULLV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP4075:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP4076:%.*]] = load i64, ptr [[ULLE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP5435:%.*]] = icmp ugt i64 [[TMP4075]], [[TMP4076]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP5435]], label [[IF_THEN5437:%.*]], label [[IF_END5438:%.*]]
+// SIMD-ONLY0:       if.then5437:
+// SIMD-ONLY0-NEXT:    [[TMP4077:%.*]] = load i64, ptr [[ULLE]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP4077]], ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END5438]]
+// SIMD-ONLY0:       if.end5438:
+// SIMD-ONLY0-NEXT:    [[TMP4078:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP4078]], ptr [[ULLV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP4079:%.*]] = load i64, ptr [[ULLE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP4080:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP5439:%.*]] = icmp ult i64 [[TMP4079]], [[TMP4080]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP5439]], label [[IF_THEN5441:%.*]], label [[IF_END5442:%.*]]
+// SIMD-ONLY0:       if.then5441:
+// SIMD-ONLY0-NEXT:    [[TMP4081:%.*]] = load i64, ptr [[ULLE]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP4081]], ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END5442]]
+// SIMD-ONLY0:       if.end5442:
+// SIMD-ONLY0-NEXT:    [[TMP4082:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP4082]], ptr [[ULLV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP4083:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP4084:%.*]] = load i64, ptr [[ULLE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP5443:%.*]] = icmp ult i64 [[TMP4083]], [[TMP4084]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP5443]], label [[IF_THEN5445:%.*]], label [[IF_END5446:%.*]]
+// SIMD-ONLY0:       if.then5445:
+// SIMD-ONLY0-NEXT:    [[TMP4085:%.*]] = load i64, ptr [[ULLE]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP4085]], ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END5446]]
+// SIMD-ONLY0:       if.end5446:
+// SIMD-ONLY0-NEXT:    [[TMP4086:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP4086]], ptr [[ULLV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP4087:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP4088:%.*]] = load i64, ptr [[ULLE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP5447:%.*]] = icmp eq i64 [[TMP4087]], [[TMP4088]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP5447]], label [[IF_THEN5449:%.*]], label [[IF_END5450:%.*]]
+// SIMD-ONLY0:       if.then5449:
+// SIMD-ONLY0-NEXT:    [[TMP4089:%.*]] = load i64, ptr [[ULLD]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP4089]], ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END5450]]
+// SIMD-ONLY0:       if.end5450:
+// SIMD-ONLY0-NEXT:    [[TMP4090:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP4090]], ptr [[ULLV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP4091:%.*]] = load i64, ptr [[ULLE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP4092:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP5451:%.*]] = icmp eq i64 [[TMP4091]], [[TMP4092]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP5451]], label [[IF_THEN5453:%.*]], label [[IF_END5454:%.*]]
+// SIMD-ONLY0:       if.then5453:
+// SIMD-ONLY0-NEXT:    [[TMP4093:%.*]] = load i64, ptr [[ULLD]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP4093]], ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END5454]]
+// SIMD-ONLY0:       if.end5454:
+// SIMD-ONLY0-NEXT:    [[TMP4094:%.*]] = load i64, ptr [[ULLE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP4095:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP5455:%.*]] = icmp ugt i64 [[TMP4094]], [[TMP4095]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP5455]], label [[IF_THEN5457:%.*]], label [[IF_END5458:%.*]]
+// SIMD-ONLY0:       if.then5457:
+// SIMD-ONLY0-NEXT:    [[TMP4096:%.*]] = load i64, ptr [[ULLE]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP4096]], ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END5458]]
+// SIMD-ONLY0:       if.end5458:
+// SIMD-ONLY0-NEXT:    [[TMP4097:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP4097]], ptr [[ULLV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP4098:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP4099:%.*]] = load i64, ptr [[ULLE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP5459:%.*]] = icmp ugt i64 [[TMP4098]], [[TMP4099]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP5459]], label [[IF_THEN5461:%.*]], label [[IF_END5462:%.*]]
+// SIMD-ONLY0:       if.then5461:
+// SIMD-ONLY0-NEXT:    [[TMP4100:%.*]] = load i64, ptr [[ULLE]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP4100]], ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END5462]]
+// SIMD-ONLY0:       if.end5462:
+// SIMD-ONLY0-NEXT:    [[TMP4101:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP4101]], ptr [[ULLV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP4102:%.*]] = load i64, ptr [[ULLE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP4103:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP5463:%.*]] = icmp ult i64 [[TMP4102]], [[TMP4103]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP5463]], label [[IF_THEN5465:%.*]], label [[IF_END5466:%.*]]
+// SIMD-ONLY0:       if.then5465:
+// SIMD-ONLY0-NEXT:    [[TMP4104:%.*]] = load i64, ptr [[ULLE]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP4104]], ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END5466]]
+// SIMD-ONLY0:       if.end5466:
+// SIMD-ONLY0-NEXT:    [[TMP4105:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP4105]], ptr [[ULLV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP4106:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP4107:%.*]] = load i64, ptr [[ULLE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP5467:%.*]] = icmp ult i64 [[TMP4106]], [[TMP4107]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP5467]], label [[IF_THEN5469:%.*]], label [[IF_END5470:%.*]]
+// SIMD-ONLY0:       if.then5469:
+// SIMD-ONLY0-NEXT:    [[TMP4108:%.*]] = load i64, ptr [[ULLE]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP4108]], ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END5470]]
+// SIMD-ONLY0:       if.end5470:
+// SIMD-ONLY0-NEXT:    [[TMP4109:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP4109]], ptr [[ULLV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP4110:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP4111:%.*]] = load i64, ptr [[ULLE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP5471:%.*]] = icmp eq i64 [[TMP4110]], [[TMP4111]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP5471]], label [[IF_THEN5473:%.*]], label [[IF_END5474:%.*]]
+// SIMD-ONLY0:       if.then5473:
+// SIMD-ONLY0-NEXT:    [[TMP4112:%.*]] = load i64, ptr [[ULLD]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP4112]], ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END5474]]
+// SIMD-ONLY0:       if.end5474:
+// SIMD-ONLY0-NEXT:    [[TMP4113:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP4113]], ptr [[ULLV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP4114:%.*]] = load i64, ptr [[ULLE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP4115:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP5475:%.*]] = icmp eq i64 [[TMP4114]], [[TMP4115]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP5475]], label [[IF_THEN5477:%.*]], label [[IF_END5478:%.*]]
+// SIMD-ONLY0:       if.then5477:
+// SIMD-ONLY0-NEXT:    [[TMP4116:%.*]] = load i64, ptr [[ULLD]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP4116]], ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END5478]]
+// SIMD-ONLY0:       if.end5478:
+// SIMD-ONLY0-NEXT:    [[TMP4117:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP4117]], ptr [[ULLV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP4118:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP4119:%.*]] = load i64, ptr [[ULLE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP5479:%.*]] = icmp eq i64 [[TMP4118]], [[TMP4119]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP5479]], label [[IF_THEN5481:%.*]], label [[IF_ELSE5482:%.*]]
+// SIMD-ONLY0:       if.then5481:
+// SIMD-ONLY0-NEXT:    [[TMP4120:%.*]] = load i64, ptr [[ULLD]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP4120]], ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END5483:%.*]]
+// SIMD-ONLY0:       if.else5482:
+// SIMD-ONLY0-NEXT:    [[TMP4121:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP4121]], ptr [[ULLV]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END5483]]
+// SIMD-ONLY0:       if.end5483:
+// SIMD-ONLY0-NEXT:    [[TMP4122:%.*]] = load i64, ptr [[ULLE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP4123:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP5484:%.*]] = icmp eq i64 [[TMP4122]], [[TMP4123]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP5484]], label [[IF_THEN5486:%.*]], label [[IF_ELSE5487:%.*]]
+// SIMD-ONLY0:       if.then5486:
+// SIMD-ONLY0-NEXT:    [[TMP4124:%.*]] = load i64, ptr [[ULLD]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP4124]], ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END5488:%.*]]
+// SIMD-ONLY0:       if.else5487:
+// SIMD-ONLY0-NEXT:    [[TMP4125:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP4125]], ptr [[ULLV]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END5488]]
+// SIMD-ONLY0:       if.end5488:
+// SIMD-ONLY0-NEXT:    [[TMP4126:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP4127:%.*]] = load i64, ptr [[ULLE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP5489:%.*]] = icmp eq i64 [[TMP4126]], [[TMP4127]]
+// SIMD-ONLY0-NEXT:    [[CONV5490:%.*]] = zext i1 [[CMP5489]] to i32
+// SIMD-ONLY0-NEXT:    [[CONV5491:%.*]] = sext i32 [[CONV5490]] to i64
+// SIMD-ONLY0-NEXT:    store i64 [[CONV5491]], ptr [[ULLR]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP4128:%.*]] = load i64, ptr [[ULLR]], align 8
+// SIMD-ONLY0-NEXT:    [[TOBOOL5492:%.*]] = icmp ne i64 [[TMP4128]], 0
+// SIMD-ONLY0-NEXT:    br i1 [[TOBOOL5492]], label [[IF_THEN5493:%.*]], label [[IF_END5494:%.*]]
+// SIMD-ONLY0:       if.then5493:
+// SIMD-ONLY0-NEXT:    [[TMP4129:%.*]] = load i64, ptr [[ULLD]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP4129]], ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END5494]]
+// SIMD-ONLY0:       if.end5494:
+// SIMD-ONLY0-NEXT:    [[TMP4130:%.*]] = load i64, ptr [[ULLE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP4131:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP5495:%.*]] = icmp eq i64 [[TMP4130]], [[TMP4131]]
+// SIMD-ONLY0-NEXT:    [[CONV5496:%.*]] = zext i1 [[CMP5495]] to i32
+// SIMD-ONLY0-NEXT:    [[CONV5497:%.*]] = sext i32 [[CONV5496]] to i64
+// SIMD-ONLY0-NEXT:    store i64 [[CONV5497]], ptr [[ULLR]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP4132:%.*]] = load i64, ptr [[ULLR]], align 8
+// SIMD-ONLY0-NEXT:    [[TOBOOL5498:%.*]] = icmp ne i64 [[TMP4132]], 0
+// SIMD-ONLY0-NEXT:    br i1 [[TOBOOL5498]], label [[IF_THEN5499:%.*]], label [[IF_END5500:%.*]]
+// SIMD-ONLY0:       if.then5499:
+// SIMD-ONLY0-NEXT:    [[TMP4133:%.*]] = load i64, ptr [[ULLD]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP4133]], ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END5500]]
+// SIMD-ONLY0:       if.end5500:
+// SIMD-ONLY0-NEXT:    [[TMP4134:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP4135:%.*]] = load i64, ptr [[ULLE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP5501:%.*]] = icmp eq i64 [[TMP4134]], [[TMP4135]]
+// SIMD-ONLY0-NEXT:    [[CONV5502:%.*]] = zext i1 [[CMP5501]] to i32
+// SIMD-ONLY0-NEXT:    [[CONV5503:%.*]] = sext i32 [[CONV5502]] to i64
+// SIMD-ONLY0-NEXT:    store i64 [[CONV5503]], ptr [[ULLR]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP4136:%.*]] = load i64, ptr [[ULLR]], align 8
+// SIMD-ONLY0-NEXT:    [[TOBOOL5504:%.*]] = icmp ne i64 [[TMP4136]], 0
+// SIMD-ONLY0-NEXT:    br i1 [[TOBOOL5504]], label [[IF_THEN5505:%.*]], label [[IF_ELSE5506:%.*]]
+// SIMD-ONLY0:       if.then5505:
+// SIMD-ONLY0-NEXT:    [[TMP4137:%.*]] = load i64, ptr [[ULLD]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP4137]], ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END5507:%.*]]
+// SIMD-ONLY0:       if.else5506:
+// SIMD-ONLY0-NEXT:    [[TMP4138:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP4138]], ptr [[ULLV]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END5507]]
+// SIMD-ONLY0:       if.end5507:
+// SIMD-ONLY0-NEXT:    [[TMP4139:%.*]] = load i64, ptr [[ULLE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP4140:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP5508:%.*]] = icmp eq i64 [[TMP4139]], [[TMP4140]]
+// SIMD-ONLY0-NEXT:    [[CONV5509:%.*]] = zext i1 [[CMP5508]] to i32
+// SIMD-ONLY0-NEXT:    [[CONV5510:%.*]] = sext i32 [[CONV5509]] to i64
+// SIMD-ONLY0-NEXT:    store i64 [[CONV5510]], ptr [[ULLR]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP4141:%.*]] = load i64, ptr [[ULLR]], align 8
+// SIMD-ONLY0-NEXT:    [[TOBOOL5511:%.*]] = icmp ne i64 [[TMP4141]], 0
+// SIMD-ONLY0-NEXT:    br i1 [[TOBOOL5511]], label [[IF_THEN5512:%.*]], label [[IF_ELSE5513:%.*]]
+// SIMD-ONLY0:       if.then5512:
+// SIMD-ONLY0-NEXT:    [[TMP4142:%.*]] = load i64, ptr [[ULLD]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP4142]], ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END5514:%.*]]
+// SIMD-ONLY0:       if.else5513:
+// SIMD-ONLY0-NEXT:    [[TMP4143:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP4143]], ptr [[ULLV]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END5514]]
+// SIMD-ONLY0:       if.end5514:
+// SIMD-ONLY0-NEXT:    [[TMP4144:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP4144]], ptr [[ULLV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP4145:%.*]] = load i64, ptr [[ULLE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP4146:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP5515:%.*]] = icmp ugt i64 [[TMP4145]], [[TMP4146]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP5515]], label [[IF_THEN5517:%.*]], label [[IF_END5518:%.*]]
+// SIMD-ONLY0:       if.then5517:
+// SIMD-ONLY0-NEXT:    [[TMP4147:%.*]] = load i64, ptr [[ULLE]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP4147]], ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END5518]]
+// SIMD-ONLY0:       if.end5518:
+// SIMD-ONLY0-NEXT:    [[TMP4148:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP4148]], ptr [[ULLV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP4149:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP4150:%.*]] = load i64, ptr [[ULLE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP5519:%.*]] = icmp ugt i64 [[TMP4149]], [[TMP4150]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP5519]], label [[IF_THEN5521:%.*]], label [[IF_END5522:%.*]]
+// SIMD-ONLY0:       if.then5521:
+// SIMD-ONLY0-NEXT:    [[TMP4151:%.*]] = load i64, ptr [[ULLE]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP4151]], ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END5522]]
+// SIMD-ONLY0:       if.end5522:
+// SIMD-ONLY0-NEXT:    [[TMP4152:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP4152]], ptr [[ULLV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP4153:%.*]] = load i64, ptr [[ULLE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP4154:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP5523:%.*]] = icmp ult i64 [[TMP4153]], [[TMP4154]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP5523]], label [[IF_THEN5525:%.*]], label [[IF_END5526:%.*]]
+// SIMD-ONLY0:       if.then5525:
+// SIMD-ONLY0-NEXT:    [[TMP4155:%.*]] = load i64, ptr [[ULLE]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP4155]], ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END5526]]
+// SIMD-ONLY0:       if.end5526:
+// SIMD-ONLY0-NEXT:    [[TMP4156:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP4156]], ptr [[ULLV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP4157:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP4158:%.*]] = load i64, ptr [[ULLE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP5527:%.*]] = icmp ult i64 [[TMP4157]], [[TMP4158]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP5527]], label [[IF_THEN5529:%.*]], label [[IF_END5530:%.*]]
+// SIMD-ONLY0:       if.then5529:
+// SIMD-ONLY0-NEXT:    [[TMP4159:%.*]] = load i64, ptr [[ULLE]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP4159]], ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END5530]]
+// SIMD-ONLY0:       if.end5530:
+// SIMD-ONLY0-NEXT:    [[TMP4160:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP4160]], ptr [[ULLV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP4161:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP4162:%.*]] = load i64, ptr [[ULLE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP5531:%.*]] = icmp eq i64 [[TMP4161]], [[TMP4162]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP5531]], label [[IF_THEN5533:%.*]], label [[IF_END5534:%.*]]
+// SIMD-ONLY0:       if.then5533:
+// SIMD-ONLY0-NEXT:    [[TMP4163:%.*]] = load i64, ptr [[ULLD]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP4163]], ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END5534]]
+// SIMD-ONLY0:       if.end5534:
+// SIMD-ONLY0-NEXT:    [[TMP4164:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP4164]], ptr [[ULLV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP4165:%.*]] = load i64, ptr [[ULLE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP4166:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP5535:%.*]] = icmp eq i64 [[TMP4165]], [[TMP4166]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP5535]], label [[IF_THEN5537:%.*]], label [[IF_END5538:%.*]]
+// SIMD-ONLY0:       if.then5537:
+// SIMD-ONLY0-NEXT:    [[TMP4167:%.*]] = load i64, ptr [[ULLD]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP4167]], ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END5538]]
+// SIMD-ONLY0:       if.end5538:
+// SIMD-ONLY0-NEXT:    [[TMP4168:%.*]] = load i64, ptr [[ULLE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP4169:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP5539:%.*]] = icmp ugt i64 [[TMP4168]], [[TMP4169]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP5539]], label [[IF_THEN5541:%.*]], label [[IF_END5542:%.*]]
+// SIMD-ONLY0:       if.then5541:
+// SIMD-ONLY0-NEXT:    [[TMP4170:%.*]] = load i64, ptr [[ULLE]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP4170]], ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END5542]]
+// SIMD-ONLY0:       if.end5542:
+// SIMD-ONLY0-NEXT:    [[TMP4171:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP4171]], ptr [[ULLV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP4172:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP4173:%.*]] = load i64, ptr [[ULLE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP5543:%.*]] = icmp ugt i64 [[TMP4172]], [[TMP4173]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP5543]], label [[IF_THEN5545:%.*]], label [[IF_END5546:%.*]]
+// SIMD-ONLY0:       if.then5545:
+// SIMD-ONLY0-NEXT:    [[TMP4174:%.*]] = load i64, ptr [[ULLE]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP4174]], ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END5546]]
+// SIMD-ONLY0:       if.end5546:
+// SIMD-ONLY0-NEXT:    [[TMP4175:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP4175]], ptr [[ULLV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP4176:%.*]] = load i64, ptr [[ULLE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP4177:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP5547:%.*]] = icmp ult i64 [[TMP4176]], [[TMP4177]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP5547]], label [[IF_THEN5549:%.*]], label [[IF_END5550:%.*]]
+// SIMD-ONLY0:       if.then5549:
+// SIMD-ONLY0-NEXT:    [[TMP4178:%.*]] = load i64, ptr [[ULLE]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP4178]], ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END5550]]
+// SIMD-ONLY0:       if.end5550:
+// SIMD-ONLY0-NEXT:    [[TMP4179:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP4179]], ptr [[ULLV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP4180:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP4181:%.*]] = load i64, ptr [[ULLE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP5551:%.*]] = icmp ult i64 [[TMP4180]], [[TMP4181]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP5551]], label [[IF_THEN5553:%.*]], label [[IF_END5554:%.*]]
+// SIMD-ONLY0:       if.then5553:
+// SIMD-ONLY0-NEXT:    [[TMP4182:%.*]] = load i64, ptr [[ULLE]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP4182]], ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END5554]]
+// SIMD-ONLY0:       if.end5554:
+// SIMD-ONLY0-NEXT:    [[TMP4183:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP4183]], ptr [[ULLV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP4184:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP4185:%.*]] = load i64, ptr [[ULLE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP5555:%.*]] = icmp eq i64 [[TMP4184]], [[TMP4185]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP5555]], label [[IF_THEN5557:%.*]], label [[IF_END5558:%.*]]
+// SIMD-ONLY0:       if.then5557:
+// SIMD-ONLY0-NEXT:    [[TMP4186:%.*]] = load i64, ptr [[ULLD]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP4186]], ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END5558]]
+// SIMD-ONLY0:       if.end5558:
+// SIMD-ONLY0-NEXT:    [[TMP4187:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP4187]], ptr [[ULLV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP4188:%.*]] = load i64, ptr [[ULLE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP4189:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP5559:%.*]] = icmp eq i64 [[TMP4188]], [[TMP4189]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP5559]], label [[IF_THEN5561:%.*]], label [[IF_END5562:%.*]]
+// SIMD-ONLY0:       if.then5561:
+// SIMD-ONLY0-NEXT:    [[TMP4190:%.*]] = load i64, ptr [[ULLD]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP4190]], ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END5562]]
+// SIMD-ONLY0:       if.end5562:
+// SIMD-ONLY0-NEXT:    [[TMP4191:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP4191]], ptr [[ULLV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP4192:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP4193:%.*]] = load i64, ptr [[ULLE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP5563:%.*]] = icmp eq i64 [[TMP4192]], [[TMP4193]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP5563]], label [[IF_THEN5565:%.*]], label [[IF_ELSE5566:%.*]]
+// SIMD-ONLY0:       if.then5565:
+// SIMD-ONLY0-NEXT:    [[TMP4194:%.*]] = load i64, ptr [[ULLD]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP4194]], ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END5567:%.*]]
+// SIMD-ONLY0:       if.else5566:
+// SIMD-ONLY0-NEXT:    [[TMP4195:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP4195]], ptr [[ULLV]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END5567]]
+// SIMD-ONLY0:       if.end5567:
+// SIMD-ONLY0-NEXT:    [[TMP4196:%.*]] = load i64, ptr [[ULLE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP4197:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP5568:%.*]] = icmp eq i64 [[TMP4196]], [[TMP4197]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP5568]], label [[IF_THEN5570:%.*]], label [[IF_ELSE5571:%.*]]
+// SIMD-ONLY0:       if.then5570:
+// SIMD-ONLY0-NEXT:    [[TMP4198:%.*]] = load i64, ptr [[ULLD]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP4198]], ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END5572:%.*]]
+// SIMD-ONLY0:       if.else5571:
+// SIMD-ONLY0-NEXT:    [[TMP4199:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP4199]], ptr [[ULLV]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END5572]]
+// SIMD-ONLY0:       if.end5572:
+// SIMD-ONLY0-NEXT:    [[TMP4200:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP4201:%.*]] = load i64, ptr [[ULLE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP5573:%.*]] = icmp eq i64 [[TMP4200]], [[TMP4201]]
+// SIMD-ONLY0-NEXT:    [[CONV5574:%.*]] = zext i1 [[CMP5573]] to i32
+// SIMD-ONLY0-NEXT:    [[CONV5575:%.*]] = sext i32 [[CONV5574]] to i64
+// SIMD-ONLY0-NEXT:    store i64 [[CONV5575]], ptr [[ULLR]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP4202:%.*]] = load i64, ptr [[ULLR]], align 8
+// SIMD-ONLY0-NEXT:    [[TOBOOL5576:%.*]] = icmp ne i64 [[TMP4202]], 0
+// SIMD-ONLY0-NEXT:    br i1 [[TOBOOL5576]], label [[IF_THEN5577:%.*]], label [[IF_END5578:%.*]]
+// SIMD-ONLY0:       if.then5577:
+// SIMD-ONLY0-NEXT:    [[TMP4203:%.*]] = load i64, ptr [[ULLD]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP4203]], ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END5578]]
+// SIMD-ONLY0:       if.end5578:
+// SIMD-ONLY0-NEXT:    [[TMP4204:%.*]] = load i64, ptr [[ULLE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP4205:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP5579:%.*]] = icmp eq i64 [[TMP4204]], [[TMP4205]]
+// SIMD-ONLY0-NEXT:    [[CONV5580:%.*]] = zext i1 [[CMP5579]] to i32
+// SIMD-ONLY0-NEXT:    [[CONV5581:%.*]] = sext i32 [[CONV5580]] to i64
+// SIMD-ONLY0-NEXT:    store i64 [[CONV5581]], ptr [[ULLR]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP4206:%.*]] = load i64, ptr [[ULLR]], align 8
+// SIMD-ONLY0-NEXT:    [[TOBOOL5582:%.*]] = icmp ne i64 [[TMP4206]], 0
+// SIMD-ONLY0-NEXT:    br i1 [[TOBOOL5582]], label [[IF_THEN5583:%.*]], label [[IF_END5584:%.*]]
+// SIMD-ONLY0:       if.then5583:
+// SIMD-ONLY0-NEXT:    [[TMP4207:%.*]] = load i64, ptr [[ULLD]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP4207]], ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END5584]]
+// SIMD-ONLY0:       if.end5584:
+// SIMD-ONLY0-NEXT:    [[TMP4208:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP4209:%.*]] = load i64, ptr [[ULLE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP5585:%.*]] = icmp eq i64 [[TMP4208]], [[TMP4209]]
+// SIMD-ONLY0-NEXT:    [[CONV5586:%.*]] = zext i1 [[CMP5585]] to i32
+// SIMD-ONLY0-NEXT:    [[CONV5587:%.*]] = sext i32 [[CONV5586]] to i64
+// SIMD-ONLY0-NEXT:    store i64 [[CONV5587]], ptr [[ULLR]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP4210:%.*]] = load i64, ptr [[ULLR]], align 8
+// SIMD-ONLY0-NEXT:    [[TOBOOL5588:%.*]] = icmp ne i64 [[TMP4210]], 0
+// SIMD-ONLY0-NEXT:    br i1 [[TOBOOL5588]], label [[IF_THEN5589:%.*]], label [[IF_ELSE5590:%.*]]
+// SIMD-ONLY0:       if.then5589:
+// SIMD-ONLY0-NEXT:    [[TMP4211:%.*]] = load i64, ptr [[ULLD]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP4211]], ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END5591:%.*]]
+// SIMD-ONLY0:       if.else5590:
+// SIMD-ONLY0-NEXT:    [[TMP4212:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP4212]], ptr [[ULLV]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END5591]]
+// SIMD-ONLY0:       if.end5591:
+// SIMD-ONLY0-NEXT:    [[TMP4213:%.*]] = load i64, ptr [[ULLE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP4214:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP5592:%.*]] = icmp eq i64 [[TMP4213]], [[TMP4214]]
+// SIMD-ONLY0-NEXT:    [[CONV5593:%.*]] = zext i1 [[CMP5592]] to i32
+// SIMD-ONLY0-NEXT:    [[CONV5594:%.*]] = sext i32 [[CONV5593]] to i64
+// SIMD-ONLY0-NEXT:    store i64 [[CONV5594]], ptr [[ULLR]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP4215:%.*]] = load i64, ptr [[ULLR]], align 8
+// SIMD-ONLY0-NEXT:    [[TOBOOL5595:%.*]] = icmp ne i64 [[TMP4215]], 0
+// SIMD-ONLY0-NEXT:    br i1 [[TOBOOL5595]], label [[IF_THEN5596:%.*]], label [[IF_ELSE5597:%.*]]
+// SIMD-ONLY0:       if.then5596:
+// SIMD-ONLY0-NEXT:    [[TMP4216:%.*]] = load i64, ptr [[ULLD]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP4216]], ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END5598:%.*]]
+// SIMD-ONLY0:       if.else5597:
+// SIMD-ONLY0-NEXT:    [[TMP4217:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP4217]], ptr [[ULLV]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END5598]]
+// SIMD-ONLY0:       if.end5598:
+// SIMD-ONLY0-NEXT:    [[TMP4218:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP4218]], ptr [[ULLV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP4219:%.*]] = load i64, ptr [[ULLE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP4220:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP5599:%.*]] = icmp ugt i64 [[TMP4219]], [[TMP4220]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP5599]], label [[IF_THEN5601:%.*]], label [[IF_END5602:%.*]]
+// SIMD-ONLY0:       if.then5601:
+// SIMD-ONLY0-NEXT:    [[TMP4221:%.*]] = load i64, ptr [[ULLE]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP4221]], ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END5602]]
+// SIMD-ONLY0:       if.end5602:
+// SIMD-ONLY0-NEXT:    [[TMP4222:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP4222]], ptr [[ULLV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP4223:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP4224:%.*]] = load i64, ptr [[ULLE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP5603:%.*]] = icmp ugt i64 [[TMP4223]], [[TMP4224]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP5603]], label [[IF_THEN5605:%.*]], label [[IF_END5606:%.*]]
+// SIMD-ONLY0:       if.then5605:
+// SIMD-ONLY0-NEXT:    [[TMP4225:%.*]] = load i64, ptr [[ULLE]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP4225]], ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END5606]]
+// SIMD-ONLY0:       if.end5606:
+// SIMD-ONLY0-NEXT:    [[TMP4226:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP4226]], ptr [[ULLV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP4227:%.*]] = load i64, ptr [[ULLE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP4228:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP5607:%.*]] = icmp ult i64 [[TMP4227]], [[TMP4228]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP5607]], label [[IF_THEN5609:%.*]], label [[IF_END5610:%.*]]
+// SIMD-ONLY0:       if.then5609:
+// SIMD-ONLY0-NEXT:    [[TMP4229:%.*]] = load i64, ptr [[ULLE]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP4229]], ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END5610]]
+// SIMD-ONLY0:       if.end5610:
+// SIMD-ONLY0-NEXT:    [[TMP4230:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP4230]], ptr [[ULLV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP4231:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP4232:%.*]] = load i64, ptr [[ULLE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP5611:%.*]] = icmp ult i64 [[TMP4231]], [[TMP4232]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP5611]], label [[IF_THEN5613:%.*]], label [[IF_END5614:%.*]]
+// SIMD-ONLY0:       if.then5613:
+// SIMD-ONLY0-NEXT:    [[TMP4233:%.*]] = load i64, ptr [[ULLE]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP4233]], ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END5614]]
+// SIMD-ONLY0:       if.end5614:
+// SIMD-ONLY0-NEXT:    [[TMP4234:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP4234]], ptr [[ULLV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP4235:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP4236:%.*]] = load i64, ptr [[ULLE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP5615:%.*]] = icmp eq i64 [[TMP4235]], [[TMP4236]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP5615]], label [[IF_THEN5617:%.*]], label [[IF_END5618:%.*]]
+// SIMD-ONLY0:       if.then5617:
+// SIMD-ONLY0-NEXT:    [[TMP4237:%.*]] = load i64, ptr [[ULLD]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP4237]], ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END5618]]
+// SIMD-ONLY0:       if.end5618:
+// SIMD-ONLY0-NEXT:    [[TMP4238:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP4238]], ptr [[ULLV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP4239:%.*]] = load i64, ptr [[ULLE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP4240:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP5619:%.*]] = icmp eq i64 [[TMP4239]], [[TMP4240]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP5619]], label [[IF_THEN5621:%.*]], label [[IF_END5622:%.*]]
+// SIMD-ONLY0:       if.then5621:
+// SIMD-ONLY0-NEXT:    [[TMP4241:%.*]] = load i64, ptr [[ULLD]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP4241]], ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END5622]]
+// SIMD-ONLY0:       if.end5622:
+// SIMD-ONLY0-NEXT:    [[TMP4242:%.*]] = load i64, ptr [[ULLE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP4243:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP5623:%.*]] = icmp ugt i64 [[TMP4242]], [[TMP4243]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP5623]], label [[IF_THEN5625:%.*]], label [[IF_END5626:%.*]]
+// SIMD-ONLY0:       if.then5625:
+// SIMD-ONLY0-NEXT:    [[TMP4244:%.*]] = load i64, ptr [[ULLE]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP4244]], ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END5626]]
+// SIMD-ONLY0:       if.end5626:
+// SIMD-ONLY0-NEXT:    [[TMP4245:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP4245]], ptr [[ULLV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP4246:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP4247:%.*]] = load i64, ptr [[ULLE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP5627:%.*]] = icmp ugt i64 [[TMP4246]], [[TMP4247]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP5627]], label [[IF_THEN5629:%.*]], label [[IF_END5630:%.*]]
+// SIMD-ONLY0:       if.then5629:
+// SIMD-ONLY0-NEXT:    [[TMP4248:%.*]] = load i64, ptr [[ULLE]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP4248]], ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END5630]]
+// SIMD-ONLY0:       if.end5630:
+// SIMD-ONLY0-NEXT:    [[TMP4249:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP4249]], ptr [[ULLV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP4250:%.*]] = load i64, ptr [[ULLE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP4251:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP5631:%.*]] = icmp ult i64 [[TMP4250]], [[TMP4251]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP5631]], label [[IF_THEN5633:%.*]], label [[IF_END5634:%.*]]
+// SIMD-ONLY0:       if.then5633:
+// SIMD-ONLY0-NEXT:    [[TMP4252:%.*]] = load i64, ptr [[ULLE]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP4252]], ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END5634]]
+// SIMD-ONLY0:       if.end5634:
+// SIMD-ONLY0-NEXT:    [[TMP4253:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP4253]], ptr [[ULLV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP4254:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP4255:%.*]] = load i64, ptr [[ULLE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP5635:%.*]] = icmp ult i64 [[TMP4254]], [[TMP4255]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP5635]], label [[IF_THEN5637:%.*]], label [[IF_END5638:%.*]]
+// SIMD-ONLY0:       if.then5637:
+// SIMD-ONLY0-NEXT:    [[TMP4256:%.*]] = load i64, ptr [[ULLE]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP4256]], ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END5638]]
+// SIMD-ONLY0:       if.end5638:
+// SIMD-ONLY0-NEXT:    [[TMP4257:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP4257]], ptr [[ULLV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP4258:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP4259:%.*]] = load i64, ptr [[ULLE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP5639:%.*]] = icmp eq i64 [[TMP4258]], [[TMP4259]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP5639]], label [[IF_THEN5641:%.*]], label [[IF_END5642:%.*]]
+// SIMD-ONLY0:       if.then5641:
+// SIMD-ONLY0-NEXT:    [[TMP4260:%.*]] = load i64, ptr [[ULLD]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP4260]], ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END5642]]
+// SIMD-ONLY0:       if.end5642:
+// SIMD-ONLY0-NEXT:    [[TMP4261:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP4261]], ptr [[ULLV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP4262:%.*]] = load i64, ptr [[ULLE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP4263:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP5643:%.*]] = icmp eq i64 [[TMP4262]], [[TMP4263]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP5643]], label [[IF_THEN5645:%.*]], label [[IF_END5646:%.*]]
+// SIMD-ONLY0:       if.then5645:
+// SIMD-ONLY0-NEXT:    [[TMP4264:%.*]] = load i64, ptr [[ULLD]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP4264]], ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END5646]]
+// SIMD-ONLY0:       if.end5646:
+// SIMD-ONLY0-NEXT:    [[TMP4265:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP4265]], ptr [[ULLV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP4266:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP4267:%.*]] = load i64, ptr [[ULLE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP5647:%.*]] = icmp eq i64 [[TMP4266]], [[TMP4267]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP5647]], label [[IF_THEN5649:%.*]], label [[IF_ELSE5650:%.*]]
+// SIMD-ONLY0:       if.then5649:
+// SIMD-ONLY0-NEXT:    [[TMP4268:%.*]] = load i64, ptr [[ULLD]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP4268]], ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END5651:%.*]]
+// SIMD-ONLY0:       if.else5650:
+// SIMD-ONLY0-NEXT:    [[TMP4269:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP4269]], ptr [[ULLV]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END5651]]
+// SIMD-ONLY0:       if.end5651:
+// SIMD-ONLY0-NEXT:    [[TMP4270:%.*]] = load i64, ptr [[ULLE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP4271:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP5652:%.*]] = icmp eq i64 [[TMP4270]], [[TMP4271]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP5652]], label [[IF_THEN5654:%.*]], label [[IF_ELSE5655:%.*]]
+// SIMD-ONLY0:       if.then5654:
+// SIMD-ONLY0-NEXT:    [[TMP4272:%.*]] = load i64, ptr [[ULLD]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP4272]], ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END5656:%.*]]
+// SIMD-ONLY0:       if.else5655:
+// SIMD-ONLY0-NEXT:    [[TMP4273:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP4273]], ptr [[ULLV]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END5656]]
+// SIMD-ONLY0:       if.end5656:
+// SIMD-ONLY0-NEXT:    [[TMP4274:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP4275:%.*]] = load i64, ptr [[ULLE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP5657:%.*]] = icmp eq i64 [[TMP4274]], [[TMP4275]]
+// SIMD-ONLY0-NEXT:    [[CONV5658:%.*]] = zext i1 [[CMP5657]] to i32
+// SIMD-ONLY0-NEXT:    [[CONV5659:%.*]] = sext i32 [[CONV5658]] to i64
+// SIMD-ONLY0-NEXT:    store i64 [[CONV5659]], ptr [[ULLR]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP4276:%.*]] = load i64, ptr [[ULLR]], align 8
+// SIMD-ONLY0-NEXT:    [[TOBOOL5660:%.*]] = icmp ne i64 [[TMP4276]], 0
+// SIMD-ONLY0-NEXT:    br i1 [[TOBOOL5660]], label [[IF_THEN5661:%.*]], label [[IF_END5662:%.*]]
+// SIMD-ONLY0:       if.then5661:
+// SIMD-ONLY0-NEXT:    [[TMP4277:%.*]] = load i64, ptr [[ULLD]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP4277]], ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END5662]]
+// SIMD-ONLY0:       if.end5662:
+// SIMD-ONLY0-NEXT:    [[TMP4278:%.*]] = load i64, ptr [[ULLE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP4279:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP5663:%.*]] = icmp eq i64 [[TMP4278]], [[TMP4279]]
+// SIMD-ONLY0-NEXT:    [[CONV5664:%.*]] = zext i1 [[CMP5663]] to i32
+// SIMD-ONLY0-NEXT:    [[CONV5665:%.*]] = sext i32 [[CONV5664]] to i64
+// SIMD-ONLY0-NEXT:    store i64 [[CONV5665]], ptr [[ULLR]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP4280:%.*]] = load i64, ptr [[ULLR]], align 8
+// SIMD-ONLY0-NEXT:    [[TOBOOL5666:%.*]] = icmp ne i64 [[TMP4280]], 0
+// SIMD-ONLY0-NEXT:    br i1 [[TOBOOL5666]], label [[IF_THEN5667:%.*]], label [[IF_END5668:%.*]]
+// SIMD-ONLY0:       if.then5667:
+// SIMD-ONLY0-NEXT:    [[TMP4281:%.*]] = load i64, ptr [[ULLD]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP4281]], ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END5668]]
+// SIMD-ONLY0:       if.end5668:
+// SIMD-ONLY0-NEXT:    [[TMP4282:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP4283:%.*]] = load i64, ptr [[ULLE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP5669:%.*]] = icmp eq i64 [[TMP4282]], [[TMP4283]]
+// SIMD-ONLY0-NEXT:    [[CONV5670:%.*]] = zext i1 [[CMP5669]] to i32
+// SIMD-ONLY0-NEXT:    [[CONV5671:%.*]] = sext i32 [[CONV5670]] to i64
+// SIMD-ONLY0-NEXT:    store i64 [[CONV5671]], ptr [[ULLR]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP4284:%.*]] = load i64, ptr [[ULLR]], align 8
+// SIMD-ONLY0-NEXT:    [[TOBOOL5672:%.*]] = icmp ne i64 [[TMP4284]], 0
+// SIMD-ONLY0-NEXT:    br i1 [[TOBOOL5672]], label [[IF_THEN5673:%.*]], label [[IF_ELSE5674:%.*]]
+// SIMD-ONLY0:       if.then5673:
+// SIMD-ONLY0-NEXT:    [[TMP4285:%.*]] = load i64, ptr [[ULLD]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP4285]], ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END5675:%.*]]
+// SIMD-ONLY0:       if.else5674:
+// SIMD-ONLY0-NEXT:    [[TMP4286:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP4286]], ptr [[ULLV]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END5675]]
+// SIMD-ONLY0:       if.end5675:
+// SIMD-ONLY0-NEXT:    [[TMP4287:%.*]] = load i64, ptr [[ULLE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP4288:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP5676:%.*]] = icmp eq i64 [[TMP4287]], [[TMP4288]]
+// SIMD-ONLY0-NEXT:    [[CONV5677:%.*]] = zext i1 [[CMP5676]] to i32
+// SIMD-ONLY0-NEXT:    [[CONV5678:%.*]] = sext i32 [[CONV5677]] to i64
+// SIMD-ONLY0-NEXT:    store i64 [[CONV5678]], ptr [[ULLR]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP4289:%.*]] = load i64, ptr [[ULLR]], align 8
+// SIMD-ONLY0-NEXT:    [[TOBOOL5679:%.*]] = icmp ne i64 [[TMP4289]], 0
+// SIMD-ONLY0-NEXT:    br i1 [[TOBOOL5679]], label [[IF_THEN5680:%.*]], label [[IF_ELSE5681:%.*]]
+// SIMD-ONLY0:       if.then5680:
+// SIMD-ONLY0-NEXT:    [[TMP4290:%.*]] = load i64, ptr [[ULLD]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP4290]], ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END5682:%.*]]
+// SIMD-ONLY0:       if.else5681:
+// SIMD-ONLY0-NEXT:    [[TMP4291:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP4291]], ptr [[ULLV]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END5682]]
+// SIMD-ONLY0:       if.end5682:
+// SIMD-ONLY0-NEXT:    [[TMP4292:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP4292]], ptr [[ULLV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP4293:%.*]] = load i64, ptr [[ULLE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP4294:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP5683:%.*]] = icmp ugt i64 [[TMP4293]], [[TMP4294]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP5683]], label [[IF_THEN5685:%.*]], label [[IF_END5686:%.*]]
+// SIMD-ONLY0:       if.then5685:
+// SIMD-ONLY0-NEXT:    [[TMP4295:%.*]] = load i64, ptr [[ULLE]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP4295]], ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END5686]]
+// SIMD-ONLY0:       if.end5686:
+// SIMD-ONLY0-NEXT:    [[TMP4296:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP4296]], ptr [[ULLV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP4297:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP4298:%.*]] = load i64, ptr [[ULLE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP5687:%.*]] = icmp ugt i64 [[TMP4297]], [[TMP4298]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP5687]], label [[IF_THEN5689:%.*]], label [[IF_END5690:%.*]]
+// SIMD-ONLY0:       if.then5689:
+// SIMD-ONLY0-NEXT:    [[TMP4299:%.*]] = load i64, ptr [[ULLE]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP4299]], ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END5690]]
+// SIMD-ONLY0:       if.end5690:
+// SIMD-ONLY0-NEXT:    [[TMP4300:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP4300]], ptr [[ULLV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP4301:%.*]] = load i64, ptr [[ULLE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP4302:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP5691:%.*]] = icmp ult i64 [[TMP4301]], [[TMP4302]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP5691]], label [[IF_THEN5693:%.*]], label [[IF_END5694:%.*]]
+// SIMD-ONLY0:       if.then5693:
+// SIMD-ONLY0-NEXT:    [[TMP4303:%.*]] = load i64, ptr [[ULLE]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP4303]], ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END5694]]
+// SIMD-ONLY0:       if.end5694:
+// SIMD-ONLY0-NEXT:    [[TMP4304:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP4304]], ptr [[ULLV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP4305:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP4306:%.*]] = load i64, ptr [[ULLE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP5695:%.*]] = icmp ult i64 [[TMP4305]], [[TMP4306]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP5695]], label [[IF_THEN5697:%.*]], label [[IF_END5698:%.*]]
+// SIMD-ONLY0:       if.then5697:
+// SIMD-ONLY0-NEXT:    [[TMP4307:%.*]] = load i64, ptr [[ULLE]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP4307]], ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END5698]]
+// SIMD-ONLY0:       if.end5698:
+// SIMD-ONLY0-NEXT:    [[TMP4308:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP4308]], ptr [[ULLV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP4309:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP4310:%.*]] = load i64, ptr [[ULLE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP5699:%.*]] = icmp eq i64 [[TMP4309]], [[TMP4310]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP5699]], label [[IF_THEN5701:%.*]], label [[IF_END5702:%.*]]
+// SIMD-ONLY0:       if.then5701:
+// SIMD-ONLY0-NEXT:    [[TMP4311:%.*]] = load i64, ptr [[ULLD]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP4311]], ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END5702]]
+// SIMD-ONLY0:       if.end5702:
+// SIMD-ONLY0-NEXT:    [[TMP4312:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP4312]], ptr [[ULLV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP4313:%.*]] = load i64, ptr [[ULLE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP4314:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP5703:%.*]] = icmp eq i64 [[TMP4313]], [[TMP4314]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP5703]], label [[IF_THEN5705:%.*]], label [[IF_END5706:%.*]]
+// SIMD-ONLY0:       if.then5705:
+// SIMD-ONLY0-NEXT:    [[TMP4315:%.*]] = load i64, ptr [[ULLD]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP4315]], ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END5706]]
+// SIMD-ONLY0:       if.end5706:
+// SIMD-ONLY0-NEXT:    [[TMP4316:%.*]] = load i64, ptr [[ULLE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP4317:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP5707:%.*]] = icmp ugt i64 [[TMP4316]], [[TMP4317]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP5707]], label [[IF_THEN5709:%.*]], label [[IF_END5710:%.*]]
+// SIMD-ONLY0:       if.then5709:
+// SIMD-ONLY0-NEXT:    [[TMP4318:%.*]] = load i64, ptr [[ULLE]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP4318]], ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END5710]]
+// SIMD-ONLY0:       if.end5710:
+// SIMD-ONLY0-NEXT:    [[TMP4319:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP4319]], ptr [[ULLV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP4320:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP4321:%.*]] = load i64, ptr [[ULLE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP5711:%.*]] = icmp ugt i64 [[TMP4320]], [[TMP4321]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP5711]], label [[IF_THEN5713:%.*]], label [[IF_END5714:%.*]]
+// SIMD-ONLY0:       if.then5713:
+// SIMD-ONLY0-NEXT:    [[TMP4322:%.*]] = load i64, ptr [[ULLE]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP4322]], ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END5714]]
+// SIMD-ONLY0:       if.end5714:
+// SIMD-ONLY0-NEXT:    [[TMP4323:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP4323]], ptr [[ULLV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP4324:%.*]] = load i64, ptr [[ULLE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP4325:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP5715:%.*]] = icmp ult i64 [[TMP4324]], [[TMP4325]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP5715]], label [[IF_THEN5717:%.*]], label [[IF_END5718:%.*]]
+// SIMD-ONLY0:       if.then5717:
+// SIMD-ONLY0-NEXT:    [[TMP4326:%.*]] = load i64, ptr [[ULLE]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP4326]], ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END5718]]
+// SIMD-ONLY0:       if.end5718:
+// SIMD-ONLY0-NEXT:    [[TMP4327:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP4327]], ptr [[ULLV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP4328:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP4329:%.*]] = load i64, ptr [[ULLE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP5719:%.*]] = icmp ult i64 [[TMP4328]], [[TMP4329]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP5719]], label [[IF_THEN5721:%.*]], label [[IF_END5722:%.*]]
+// SIMD-ONLY0:       if.then5721:
+// SIMD-ONLY0-NEXT:    [[TMP4330:%.*]] = load i64, ptr [[ULLE]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP4330]], ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END5722]]
+// SIMD-ONLY0:       if.end5722:
+// SIMD-ONLY0-NEXT:    [[TMP4331:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP4331]], ptr [[ULLV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP4332:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP4333:%.*]] = load i64, ptr [[ULLE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP5723:%.*]] = icmp eq i64 [[TMP4332]], [[TMP4333]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP5723]], label [[IF_THEN5725:%.*]], label [[IF_END5726:%.*]]
+// SIMD-ONLY0:       if.then5725:
+// SIMD-ONLY0-NEXT:    [[TMP4334:%.*]] = load i64, ptr [[ULLD]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP4334]], ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END5726]]
+// SIMD-ONLY0:       if.end5726:
+// SIMD-ONLY0-NEXT:    [[TMP4335:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP4335]], ptr [[ULLV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP4336:%.*]] = load i64, ptr [[ULLE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP4337:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP5727:%.*]] = icmp eq i64 [[TMP4336]], [[TMP4337]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP5727]], label [[IF_THEN5729:%.*]], label [[IF_END5730:%.*]]
+// SIMD-ONLY0:       if.then5729:
+// SIMD-ONLY0-NEXT:    [[TMP4338:%.*]] = load i64, ptr [[ULLD]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP4338]], ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END5730]]
+// SIMD-ONLY0:       if.end5730:
+// SIMD-ONLY0-NEXT:    [[TMP4339:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP4339]], ptr [[ULLV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP4340:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP4341:%.*]] = load i64, ptr [[ULLE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP5731:%.*]] = icmp eq i64 [[TMP4340]], [[TMP4341]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP5731]], label [[IF_THEN5733:%.*]], label [[IF_ELSE5734:%.*]]
+// SIMD-ONLY0:       if.then5733:
+// SIMD-ONLY0-NEXT:    [[TMP4342:%.*]] = load i64, ptr [[ULLD]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP4342]], ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END5735:%.*]]
+// SIMD-ONLY0:       if.else5734:
+// SIMD-ONLY0-NEXT:    [[TMP4343:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP4343]], ptr [[ULLV]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END5735]]
+// SIMD-ONLY0:       if.end5735:
+// SIMD-ONLY0-NEXT:    [[TMP4344:%.*]] = load i64, ptr [[ULLE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP4345:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP5736:%.*]] = icmp eq i64 [[TMP4344]], [[TMP4345]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP5736]], label [[IF_THEN5738:%.*]], label [[IF_ELSE5739:%.*]]
+// SIMD-ONLY0:       if.then5738:
+// SIMD-ONLY0-NEXT:    [[TMP4346:%.*]] = load i64, ptr [[ULLD]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP4346]], ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END5740:%.*]]
+// SIMD-ONLY0:       if.else5739:
+// SIMD-ONLY0-NEXT:    [[TMP4347:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP4347]], ptr [[ULLV]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END5740]]
+// SIMD-ONLY0:       if.end5740:
+// SIMD-ONLY0-NEXT:    [[TMP4348:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP4349:%.*]] = load i64, ptr [[ULLE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP5741:%.*]] = icmp eq i64 [[TMP4348]], [[TMP4349]]
+// SIMD-ONLY0-NEXT:    [[CONV5742:%.*]] = zext i1 [[CMP5741]] to i32
+// SIMD-ONLY0-NEXT:    [[CONV5743:%.*]] = sext i32 [[CONV5742]] to i64
+// SIMD-ONLY0-NEXT:    store i64 [[CONV5743]], ptr [[ULLR]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP4350:%.*]] = load i64, ptr [[ULLR]], align 8
+// SIMD-ONLY0-NEXT:    [[TOBOOL5744:%.*]] = icmp ne i64 [[TMP4350]], 0
+// SIMD-ONLY0-NEXT:    br i1 [[TOBOOL5744]], label [[IF_THEN5745:%.*]], label [[IF_END5746:%.*]]
+// SIMD-ONLY0:       if.then5745:
+// SIMD-ONLY0-NEXT:    [[TMP4351:%.*]] = load i64, ptr [[ULLD]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP4351]], ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END5746]]
+// SIMD-ONLY0:       if.end5746:
+// SIMD-ONLY0-NEXT:    [[TMP4352:%.*]] = load i64, ptr [[ULLE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP4353:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP5747:%.*]] = icmp eq i64 [[TMP4352]], [[TMP4353]]
+// SIMD-ONLY0-NEXT:    [[CONV5748:%.*]] = zext i1 [[CMP5747]] to i32
+// SIMD-ONLY0-NEXT:    [[CONV5749:%.*]] = sext i32 [[CONV5748]] to i64
+// SIMD-ONLY0-NEXT:    store i64 [[CONV5749]], ptr [[ULLR]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP4354:%.*]] = load i64, ptr [[ULLR]], align 8
+// SIMD-ONLY0-NEXT:    [[TOBOOL5750:%.*]] = icmp ne i64 [[TMP4354]], 0
+// SIMD-ONLY0-NEXT:    br i1 [[TOBOOL5750]], label [[IF_THEN5751:%.*]], label [[IF_END5752:%.*]]
+// SIMD-ONLY0:       if.then5751:
+// SIMD-ONLY0-NEXT:    [[TMP4355:%.*]] = load i64, ptr [[ULLD]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP4355]], ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END5752]]
+// SIMD-ONLY0:       if.end5752:
+// SIMD-ONLY0-NEXT:    [[TMP4356:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP4357:%.*]] = load i64, ptr [[ULLE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP5753:%.*]] = icmp eq i64 [[TMP4356]], [[TMP4357]]
+// SIMD-ONLY0-NEXT:    [[CONV5754:%.*]] = zext i1 [[CMP5753]] to i32
+// SIMD-ONLY0-NEXT:    [[CONV5755:%.*]] = sext i32 [[CONV5754]] to i64
+// SIMD-ONLY0-NEXT:    store i64 [[CONV5755]], ptr [[ULLR]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP4358:%.*]] = load i64, ptr [[ULLR]], align 8
+// SIMD-ONLY0-NEXT:    [[TOBOOL5756:%.*]] = icmp ne i64 [[TMP4358]], 0
+// SIMD-ONLY0-NEXT:    br i1 [[TOBOOL5756]], label [[IF_THEN5757:%.*]], label [[IF_ELSE5758:%.*]]
+// SIMD-ONLY0:       if.then5757:
+// SIMD-ONLY0-NEXT:    [[TMP4359:%.*]] = load i64, ptr [[ULLD]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP4359]], ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END5759:%.*]]
+// SIMD-ONLY0:       if.else5758:
+// SIMD-ONLY0-NEXT:    [[TMP4360:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP4360]], ptr [[ULLV]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END5759]]
+// SIMD-ONLY0:       if.end5759:
+// SIMD-ONLY0-NEXT:    [[TMP4361:%.*]] = load i64, ptr [[ULLE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP4362:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP5760:%.*]] = icmp eq i64 [[TMP4361]], [[TMP4362]]
+// SIMD-ONLY0-NEXT:    [[CONV5761:%.*]] = zext i1 [[CMP5760]] to i32
+// SIMD-ONLY0-NEXT:    [[CONV5762:%.*]] = sext i32 [[CONV5761]] to i64
+// SIMD-ONLY0-NEXT:    store i64 [[CONV5762]], ptr [[ULLR]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP4363:%.*]] = load i64, ptr [[ULLR]], align 8
+// SIMD-ONLY0-NEXT:    [[TOBOOL5763:%.*]] = icmp ne i64 [[TMP4363]], 0
+// SIMD-ONLY0-NEXT:    br i1 [[TOBOOL5763]], label [[IF_THEN5764:%.*]], label [[IF_ELSE5765:%.*]]
+// SIMD-ONLY0:       if.then5764:
+// SIMD-ONLY0-NEXT:    [[TMP4364:%.*]] = load i64, ptr [[ULLD]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP4364]], ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END5766:%.*]]
+// SIMD-ONLY0:       if.else5765:
+// SIMD-ONLY0-NEXT:    [[TMP4365:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP4365]], ptr [[ULLV]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END5766]]
+// SIMD-ONLY0:       if.end5766:
+// SIMD-ONLY0-NEXT:    [[TMP4366:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP4366]], ptr [[ULLV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP4367:%.*]] = load i64, ptr [[ULLE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP4368:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP5767:%.*]] = icmp ugt i64 [[TMP4367]], [[TMP4368]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP5767]], label [[IF_THEN5769:%.*]], label [[IF_END5770:%.*]]
+// SIMD-ONLY0:       if.then5769:
+// SIMD-ONLY0-NEXT:    [[TMP4369:%.*]] = load i64, ptr [[ULLE]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP4369]], ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END5770]]
+// SIMD-ONLY0:       if.end5770:
+// SIMD-ONLY0-NEXT:    [[TMP4370:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP4370]], ptr [[ULLV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP4371:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP4372:%.*]] = load i64, ptr [[ULLE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP5771:%.*]] = icmp ugt i64 [[TMP4371]], [[TMP4372]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP5771]], label [[IF_THEN5773:%.*]], label [[IF_END5774:%.*]]
+// SIMD-ONLY0:       if.then5773:
+// SIMD-ONLY0-NEXT:    [[TMP4373:%.*]] = load i64, ptr [[ULLE]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP4373]], ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END5774]]
+// SIMD-ONLY0:       if.end5774:
+// SIMD-ONLY0-NEXT:    [[TMP4374:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP4374]], ptr [[ULLV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP4375:%.*]] = load i64, ptr [[ULLE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP4376:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP5775:%.*]] = icmp ult i64 [[TMP4375]], [[TMP4376]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP5775]], label [[IF_THEN5777:%.*]], label [[IF_END5778:%.*]]
+// SIMD-ONLY0:       if.then5777:
+// SIMD-ONLY0-NEXT:    [[TMP4377:%.*]] = load i64, ptr [[ULLE]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP4377]], ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END5778]]
+// SIMD-ONLY0:       if.end5778:
+// SIMD-ONLY0-NEXT:    [[TMP4378:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP4378]], ptr [[ULLV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP4379:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP4380:%.*]] = load i64, ptr [[ULLE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP5779:%.*]] = icmp ult i64 [[TMP4379]], [[TMP4380]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP5779]], label [[IF_THEN5781:%.*]], label [[IF_END5782:%.*]]
+// SIMD-ONLY0:       if.then5781:
+// SIMD-ONLY0-NEXT:    [[TMP4381:%.*]] = load i64, ptr [[ULLE]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP4381]], ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END5782]]
+// SIMD-ONLY0:       if.end5782:
+// SIMD-ONLY0-NEXT:    [[TMP4382:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP4382]], ptr [[ULLV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP4383:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP4384:%.*]] = load i64, ptr [[ULLE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP5783:%.*]] = icmp eq i64 [[TMP4383]], [[TMP4384]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP5783]], label [[IF_THEN5785:%.*]], label [[IF_END5786:%.*]]
+// SIMD-ONLY0:       if.then5785:
+// SIMD-ONLY0-NEXT:    [[TMP4385:%.*]] = load i64, ptr [[ULLD]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP4385]], ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END5786]]
+// SIMD-ONLY0:       if.end5786:
+// SIMD-ONLY0-NEXT:    [[TMP4386:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP4386]], ptr [[ULLV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP4387:%.*]] = load i64, ptr [[ULLE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP4388:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP5787:%.*]] = icmp eq i64 [[TMP4387]], [[TMP4388]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP5787]], label [[IF_THEN5789:%.*]], label [[IF_END5790:%.*]]
+// SIMD-ONLY0:       if.then5789:
+// SIMD-ONLY0-NEXT:    [[TMP4389:%.*]] = load i64, ptr [[ULLD]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP4389]], ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END5790]]
+// SIMD-ONLY0:       if.end5790:
+// SIMD-ONLY0-NEXT:    [[TMP4390:%.*]] = load i64, ptr [[ULLE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP4391:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP5791:%.*]] = icmp ugt i64 [[TMP4390]], [[TMP4391]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP5791]], label [[IF_THEN5793:%.*]], label [[IF_END5794:%.*]]
+// SIMD-ONLY0:       if.then5793:
+// SIMD-ONLY0-NEXT:    [[TMP4392:%.*]] = load i64, ptr [[ULLE]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP4392]], ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END5794]]
+// SIMD-ONLY0:       if.end5794:
+// SIMD-ONLY0-NEXT:    [[TMP4393:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP4393]], ptr [[ULLV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP4394:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP4395:%.*]] = load i64, ptr [[ULLE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP5795:%.*]] = icmp ugt i64 [[TMP4394]], [[TMP4395]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP5795]], label [[IF_THEN5797:%.*]], label [[IF_END5798:%.*]]
+// SIMD-ONLY0:       if.then5797:
+// SIMD-ONLY0-NEXT:    [[TMP4396:%.*]] = load i64, ptr [[ULLE]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP4396]], ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END5798]]
+// SIMD-ONLY0:       if.end5798:
+// SIMD-ONLY0-NEXT:    [[TMP4397:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP4397]], ptr [[ULLV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP4398:%.*]] = load i64, ptr [[ULLE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP4399:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP5799:%.*]] = icmp ult i64 [[TMP4398]], [[TMP4399]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP5799]], label [[IF_THEN5801:%.*]], label [[IF_END5802:%.*]]
+// SIMD-ONLY0:       if.then5801:
+// SIMD-ONLY0-NEXT:    [[TMP4400:%.*]] = load i64, ptr [[ULLE]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP4400]], ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END5802]]
+// SIMD-ONLY0:       if.end5802:
+// SIMD-ONLY0-NEXT:    [[TMP4401:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP4401]], ptr [[ULLV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP4402:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP4403:%.*]] = load i64, ptr [[ULLE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP5803:%.*]] = icmp ult i64 [[TMP4402]], [[TMP4403]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP5803]], label [[IF_THEN5805:%.*]], label [[IF_END5806:%.*]]
+// SIMD-ONLY0:       if.then5805:
+// SIMD-ONLY0-NEXT:    [[TMP4404:%.*]] = load i64, ptr [[ULLE]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP4404]], ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END5806]]
+// SIMD-ONLY0:       if.end5806:
+// SIMD-ONLY0-NEXT:    [[TMP4405:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP4405]], ptr [[ULLV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP4406:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP4407:%.*]] = load i64, ptr [[ULLE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP5807:%.*]] = icmp eq i64 [[TMP4406]], [[TMP4407]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP5807]], label [[IF_THEN5809:%.*]], label [[IF_END5810:%.*]]
+// SIMD-ONLY0:       if.then5809:
+// SIMD-ONLY0-NEXT:    [[TMP4408:%.*]] = load i64, ptr [[ULLD]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP4408]], ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END5810]]
+// SIMD-ONLY0:       if.end5810:
+// SIMD-ONLY0-NEXT:    [[TMP4409:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP4409]], ptr [[ULLV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP4410:%.*]] = load i64, ptr [[ULLE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP4411:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP5811:%.*]] = icmp eq i64 [[TMP4410]], [[TMP4411]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP5811]], label [[IF_THEN5813:%.*]], label [[IF_END5814:%.*]]
+// SIMD-ONLY0:       if.then5813:
+// SIMD-ONLY0-NEXT:    [[TMP4412:%.*]] = load i64, ptr [[ULLD]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP4412]], ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END5814]]
+// SIMD-ONLY0:       if.end5814:
+// SIMD-ONLY0-NEXT:    [[TMP4413:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP4413]], ptr [[ULLV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP4414:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP4415:%.*]] = load i64, ptr [[ULLE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP5815:%.*]] = icmp eq i64 [[TMP4414]], [[TMP4415]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP5815]], label [[IF_THEN5817:%.*]], label [[IF_ELSE5818:%.*]]
+// SIMD-ONLY0:       if.then5817:
+// SIMD-ONLY0-NEXT:    [[TMP4416:%.*]] = load i64, ptr [[ULLD]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP4416]], ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END5819:%.*]]
+// SIMD-ONLY0:       if.else5818:
+// SIMD-ONLY0-NEXT:    [[TMP4417:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP4417]], ptr [[ULLV]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END5819]]
+// SIMD-ONLY0:       if.end5819:
+// SIMD-ONLY0-NEXT:    [[TMP4418:%.*]] = load i64, ptr [[ULLE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP4419:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP5820:%.*]] = icmp eq i64 [[TMP4418]], [[TMP4419]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP5820]], label [[IF_THEN5822:%.*]], label [[IF_ELSE5823:%.*]]
+// SIMD-ONLY0:       if.then5822:
+// SIMD-ONLY0-NEXT:    [[TMP4420:%.*]] = load i64, ptr [[ULLD]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP4420]], ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END5824:%.*]]
+// SIMD-ONLY0:       if.else5823:
+// SIMD-ONLY0-NEXT:    [[TMP4421:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP4421]], ptr [[ULLV]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END5824]]
+// SIMD-ONLY0:       if.end5824:
+// SIMD-ONLY0-NEXT:    [[TMP4422:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP4423:%.*]] = load i64, ptr [[ULLE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP5825:%.*]] = icmp eq i64 [[TMP4422]], [[TMP4423]]
+// SIMD-ONLY0-NEXT:    [[CONV5826:%.*]] = zext i1 [[CMP5825]] to i32
+// SIMD-ONLY0-NEXT:    [[CONV5827:%.*]] = sext i32 [[CONV5826]] to i64
+// SIMD-ONLY0-NEXT:    store i64 [[CONV5827]], ptr [[ULLR]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP4424:%.*]] = load i64, ptr [[ULLR]], align 8
+// SIMD-ONLY0-NEXT:    [[TOBOOL5828:%.*]] = icmp ne i64 [[TMP4424]], 0
+// SIMD-ONLY0-NEXT:    br i1 [[TOBOOL5828]], label [[IF_THEN5829:%.*]], label [[IF_END5830:%.*]]
+// SIMD-ONLY0:       if.then5829:
+// SIMD-ONLY0-NEXT:    [[TMP4425:%.*]] = load i64, ptr [[ULLD]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP4425]], ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END5830]]
+// SIMD-ONLY0:       if.end5830:
+// SIMD-ONLY0-NEXT:    [[TMP4426:%.*]] = load i64, ptr [[ULLE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP4427:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP5831:%.*]] = icmp eq i64 [[TMP4426]], [[TMP4427]]
+// SIMD-ONLY0-NEXT:    [[CONV5832:%.*]] = zext i1 [[CMP5831]] to i32
+// SIMD-ONLY0-NEXT:    [[CONV5833:%.*]] = sext i32 [[CONV5832]] to i64
+// SIMD-ONLY0-NEXT:    store i64 [[CONV5833]], ptr [[ULLR]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP4428:%.*]] = load i64, ptr [[ULLR]], align 8
+// SIMD-ONLY0-NEXT:    [[TOBOOL5834:%.*]] = icmp ne i64 [[TMP4428]], 0
+// SIMD-ONLY0-NEXT:    br i1 [[TOBOOL5834]], label [[IF_THEN5835:%.*]], label [[IF_END5836:%.*]]
+// SIMD-ONLY0:       if.then5835:
+// SIMD-ONLY0-NEXT:    [[TMP4429:%.*]] = load i64, ptr [[ULLD]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP4429]], ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END5836]]
+// SIMD-ONLY0:       if.end5836:
+// SIMD-ONLY0-NEXT:    [[TMP4430:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP4431:%.*]] = load i64, ptr [[ULLE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP5837:%.*]] = icmp eq i64 [[TMP4430]], [[TMP4431]]
+// SIMD-ONLY0-NEXT:    [[CONV5838:%.*]] = zext i1 [[CMP5837]] to i32
+// SIMD-ONLY0-NEXT:    [[CONV5839:%.*]] = sext i32 [[CONV5838]] to i64
+// SIMD-ONLY0-NEXT:    store i64 [[CONV5839]], ptr [[ULLR]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP4432:%.*]] = load i64, ptr [[ULLR]], align 8
+// SIMD-ONLY0-NEXT:    [[TOBOOL5840:%.*]] = icmp ne i64 [[TMP4432]], 0
+// SIMD-ONLY0-NEXT:    br i1 [[TOBOOL5840]], label [[IF_THEN5841:%.*]], label [[IF_ELSE5842:%.*]]
+// SIMD-ONLY0:       if.then5841:
+// SIMD-ONLY0-NEXT:    [[TMP4433:%.*]] = load i64, ptr [[ULLD]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP4433]], ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END5843:%.*]]
+// SIMD-ONLY0:       if.else5842:
+// SIMD-ONLY0-NEXT:    [[TMP4434:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP4434]], ptr [[ULLV]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END5843]]
+// SIMD-ONLY0:       if.end5843:
+// SIMD-ONLY0-NEXT:    [[TMP4435:%.*]] = load i64, ptr [[ULLE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP4436:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP5844:%.*]] = icmp eq i64 [[TMP4435]], [[TMP4436]]
+// SIMD-ONLY0-NEXT:    [[CONV5845:%.*]] = zext i1 [[CMP5844]] to i32
+// SIMD-ONLY0-NEXT:    [[CONV5846:%.*]] = sext i32 [[CONV5845]] to i64
+// SIMD-ONLY0-NEXT:    store i64 [[CONV5846]], ptr [[ULLR]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP4437:%.*]] = load i64, ptr [[ULLR]], align 8
+// SIMD-ONLY0-NEXT:    [[TOBOOL5847:%.*]] = icmp ne i64 [[TMP4437]], 0
+// SIMD-ONLY0-NEXT:    br i1 [[TOBOOL5847]], label [[IF_THEN5848:%.*]], label [[IF_ELSE5849:%.*]]
+// SIMD-ONLY0:       if.then5848:
+// SIMD-ONLY0-NEXT:    [[TMP4438:%.*]] = load i64, ptr [[ULLD]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP4438]], ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END5850:%.*]]
+// SIMD-ONLY0:       if.else5849:
+// SIMD-ONLY0-NEXT:    [[TMP4439:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP4439]], ptr [[ULLV]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END5850]]
+// SIMD-ONLY0:       if.end5850:
+// SIMD-ONLY0-NEXT:    [[TMP4440:%.*]] = load float, ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    store float [[TMP4440]], ptr [[FV]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP4441:%.*]] = load float, ptr [[FE]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP4442:%.*]] = load float, ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP5851:%.*]] = fcmp ogt float [[TMP4441]], [[TMP4442]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP5851]], label [[IF_THEN5853:%.*]], label [[IF_END5854:%.*]]
+// SIMD-ONLY0:       if.then5853:
+// SIMD-ONLY0-NEXT:    [[TMP4443:%.*]] = load float, ptr [[FE]], align 4
+// SIMD-ONLY0-NEXT:    store float [[TMP4443]], ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END5854]]
+// SIMD-ONLY0:       if.end5854:
+// SIMD-ONLY0-NEXT:    [[TMP4444:%.*]] = load float, ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    store float [[TMP4444]], ptr [[FV]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP4445:%.*]] = load float, ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP4446:%.*]] = load float, ptr [[FE]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP5855:%.*]] = fcmp ogt float [[TMP4445]], [[TMP4446]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP5855]], label [[IF_THEN5857:%.*]], label [[IF_END5858:%.*]]
+// SIMD-ONLY0:       if.then5857:
+// SIMD-ONLY0-NEXT:    [[TMP4447:%.*]] = load float, ptr [[FE]], align 4
+// SIMD-ONLY0-NEXT:    store float [[TMP4447]], ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END5858]]
+// SIMD-ONLY0:       if.end5858:
+// SIMD-ONLY0-NEXT:    [[TMP4448:%.*]] = load float, ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    store float [[TMP4448]], ptr [[FV]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP4449:%.*]] = load float, ptr [[FE]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP4450:%.*]] = load float, ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP5859:%.*]] = fcmp olt float [[TMP4449]], [[TMP4450]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP5859]], label [[IF_THEN5861:%.*]], label [[IF_END5862:%.*]]
+// SIMD-ONLY0:       if.then5861:
+// SIMD-ONLY0-NEXT:    [[TMP4451:%.*]] = load float, ptr [[FE]], align 4
+// SIMD-ONLY0-NEXT:    store float [[TMP4451]], ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END5862]]
+// SIMD-ONLY0:       if.end5862:
+// SIMD-ONLY0-NEXT:    [[TMP4452:%.*]] = load float, ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    store float [[TMP4452]], ptr [[FV]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP4453:%.*]] = load float, ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP4454:%.*]] = load float, ptr [[FE]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP5863:%.*]] = fcmp olt float [[TMP4453]], [[TMP4454]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP5863]], label [[IF_THEN5865:%.*]], label [[IF_END5866:%.*]]
+// SIMD-ONLY0:       if.then5865:
+// SIMD-ONLY0-NEXT:    [[TMP4455:%.*]] = load float, ptr [[FE]], align 4
+// SIMD-ONLY0-NEXT:    store float [[TMP4455]], ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END5866]]
+// SIMD-ONLY0:       if.end5866:
+// SIMD-ONLY0-NEXT:    [[TMP4456:%.*]] = load float, ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    store float [[TMP4456]], ptr [[FV]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP4457:%.*]] = load float, ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP4458:%.*]] = load float, ptr [[FE]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP5867:%.*]] = fcmp oeq float [[TMP4457]], [[TMP4458]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP5867]], label [[IF_THEN5869:%.*]], label [[IF_END5870:%.*]]
+// SIMD-ONLY0:       if.then5869:
+// SIMD-ONLY0-NEXT:    [[TMP4459:%.*]] = load float, ptr [[FD]], align 4
+// SIMD-ONLY0-NEXT:    store float [[TMP4459]], ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END5870]]
+// SIMD-ONLY0:       if.end5870:
+// SIMD-ONLY0-NEXT:    [[TMP4460:%.*]] = load float, ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    store float [[TMP4460]], ptr [[FV]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP4461:%.*]] = load float, ptr [[FE]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP4462:%.*]] = load float, ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP5871:%.*]] = fcmp oeq float [[TMP4461]], [[TMP4462]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP5871]], label [[IF_THEN5873:%.*]], label [[IF_END5874:%.*]]
+// SIMD-ONLY0:       if.then5873:
+// SIMD-ONLY0-NEXT:    [[TMP4463:%.*]] = load float, ptr [[FD]], align 4
+// SIMD-ONLY0-NEXT:    store float [[TMP4463]], ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END5874]]
+// SIMD-ONLY0:       if.end5874:
+// SIMD-ONLY0-NEXT:    [[TMP4464:%.*]] = load float, ptr [[FE]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP4465:%.*]] = load float, ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP5875:%.*]] = fcmp ogt float [[TMP4464]], [[TMP4465]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP5875]], label [[IF_THEN5877:%.*]], label [[IF_END5878:%.*]]
+// SIMD-ONLY0:       if.then5877:
+// SIMD-ONLY0-NEXT:    [[TMP4466:%.*]] = load float, ptr [[FE]], align 4
+// SIMD-ONLY0-NEXT:    store float [[TMP4466]], ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END5878]]
+// SIMD-ONLY0:       if.end5878:
+// SIMD-ONLY0-NEXT:    [[TMP4467:%.*]] = load float, ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    store float [[TMP4467]], ptr [[FV]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP4468:%.*]] = load float, ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP4469:%.*]] = load float, ptr [[FE]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP5879:%.*]] = fcmp ogt float [[TMP4468]], [[TMP4469]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP5879]], label [[IF_THEN5881:%.*]], label [[IF_END5882:%.*]]
+// SIMD-ONLY0:       if.then5881:
+// SIMD-ONLY0-NEXT:    [[TMP4470:%.*]] = load float, ptr [[FE]], align 4
+// SIMD-ONLY0-NEXT:    store float [[TMP4470]], ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END5882]]
+// SIMD-ONLY0:       if.end5882:
+// SIMD-ONLY0-NEXT:    [[TMP4471:%.*]] = load float, ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    store float [[TMP4471]], ptr [[FV]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP4472:%.*]] = load float, ptr [[FE]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP4473:%.*]] = load float, ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP5883:%.*]] = fcmp olt float [[TMP4472]], [[TMP4473]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP5883]], label [[IF_THEN5885:%.*]], label [[IF_END5886:%.*]]
+// SIMD-ONLY0:       if.then5885:
+// SIMD-ONLY0-NEXT:    [[TMP4474:%.*]] = load float, ptr [[FE]], align 4
+// SIMD-ONLY0-NEXT:    store float [[TMP4474]], ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END5886]]
+// SIMD-ONLY0:       if.end5886:
+// SIMD-ONLY0-NEXT:    [[TMP4475:%.*]] = load float, ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    store float [[TMP4475]], ptr [[FV]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP4476:%.*]] = load float, ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP4477:%.*]] = load float, ptr [[FE]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP5887:%.*]] = fcmp olt float [[TMP4476]], [[TMP4477]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP5887]], label [[IF_THEN5889:%.*]], label [[IF_END5890:%.*]]
+// SIMD-ONLY0:       if.then5889:
+// SIMD-ONLY0-NEXT:    [[TMP4478:%.*]] = load float, ptr [[FE]], align 4
+// SIMD-ONLY0-NEXT:    store float [[TMP4478]], ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END5890]]
+// SIMD-ONLY0:       if.end5890:
+// SIMD-ONLY0-NEXT:    [[TMP4479:%.*]] = load float, ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    store float [[TMP4479]], ptr [[FV]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP4480:%.*]] = load float, ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP4481:%.*]] = load float, ptr [[FE]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP5891:%.*]] = fcmp oeq float [[TMP4480]], [[TMP4481]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP5891]], label [[IF_THEN5893:%.*]], label [[IF_END5894:%.*]]
+// SIMD-ONLY0:       if.then5893:
+// SIMD-ONLY0-NEXT:    [[TMP4482:%.*]] = load float, ptr [[FD]], align 4
+// SIMD-ONLY0-NEXT:    store float [[TMP4482]], ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END5894]]
+// SIMD-ONLY0:       if.end5894:
+// SIMD-ONLY0-NEXT:    [[TMP4483:%.*]] = load float, ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    store float [[TMP4483]], ptr [[FV]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP4484:%.*]] = load float, ptr [[FE]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP4485:%.*]] = load float, ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP5895:%.*]] = fcmp oeq float [[TMP4484]], [[TMP4485]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP5895]], label [[IF_THEN5897:%.*]], label [[IF_END5898:%.*]]
+// SIMD-ONLY0:       if.then5897:
+// SIMD-ONLY0-NEXT:    [[TMP4486:%.*]] = load float, ptr [[FD]], align 4
+// SIMD-ONLY0-NEXT:    store float [[TMP4486]], ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END5898]]
+// SIMD-ONLY0:       if.end5898:
+// SIMD-ONLY0-NEXT:    [[TMP4487:%.*]] = load float, ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    store float [[TMP4487]], ptr [[FV]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP4488:%.*]] = load float, ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP4489:%.*]] = load float, ptr [[FE]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP5899:%.*]] = fcmp oeq float [[TMP4488]], [[TMP4489]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP5899]], label [[IF_THEN5901:%.*]], label [[IF_ELSE5902:%.*]]
+// SIMD-ONLY0:       if.then5901:
+// SIMD-ONLY0-NEXT:    [[TMP4490:%.*]] = load float, ptr [[FD]], align 4
+// SIMD-ONLY0-NEXT:    store float [[TMP4490]], ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END5903:%.*]]
+// SIMD-ONLY0:       if.else5902:
+// SIMD-ONLY0-NEXT:    [[TMP4491:%.*]] = load float, ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    store float [[TMP4491]], ptr [[FV]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END5903]]
+// SIMD-ONLY0:       if.end5903:
+// SIMD-ONLY0-NEXT:    [[TMP4492:%.*]] = load float, ptr [[FE]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP4493:%.*]] = load float, ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP5904:%.*]] = fcmp oeq float [[TMP4492]], [[TMP4493]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP5904]], label [[IF_THEN5906:%.*]], label [[IF_ELSE5907:%.*]]
+// SIMD-ONLY0:       if.then5906:
+// SIMD-ONLY0-NEXT:    [[TMP4494:%.*]] = load float, ptr [[FD]], align 4
+// SIMD-ONLY0-NEXT:    store float [[TMP4494]], ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END5908:%.*]]
+// SIMD-ONLY0:       if.else5907:
+// SIMD-ONLY0-NEXT:    [[TMP4495:%.*]] = load float, ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    store float [[TMP4495]], ptr [[FV]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END5908]]
+// SIMD-ONLY0:       if.end5908:
+// SIMD-ONLY0-NEXT:    [[TMP4496:%.*]] = load float, ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP4497:%.*]] = load float, ptr [[FE]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP5909:%.*]] = fcmp oeq float [[TMP4496]], [[TMP4497]]
+// SIMD-ONLY0-NEXT:    [[CONV5910:%.*]] = zext i1 [[CMP5909]] to i32
+// SIMD-ONLY0-NEXT:    store i32 [[CONV5910]], ptr [[IR]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP4498:%.*]] = load i32, ptr [[IR]], align 4
+// SIMD-ONLY0-NEXT:    [[TOBOOL5911:%.*]] = icmp ne i32 [[TMP4498]], 0
+// SIMD-ONLY0-NEXT:    br i1 [[TOBOOL5911]], label [[IF_THEN5912:%.*]], label [[IF_END5913:%.*]]
+// SIMD-ONLY0:       if.then5912:
+// SIMD-ONLY0-NEXT:    [[TMP4499:%.*]] = load float, ptr [[FD]], align 4
+// SIMD-ONLY0-NEXT:    store float [[TMP4499]], ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END5913]]
+// SIMD-ONLY0:       if.end5913:
+// SIMD-ONLY0-NEXT:    [[TMP4500:%.*]] = load float, ptr [[FE]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP4501:%.*]] = load float, ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP5914:%.*]] = fcmp oeq float [[TMP4500]], [[TMP4501]]
+// SIMD-ONLY0-NEXT:    [[CONV5915:%.*]] = zext i1 [[CMP5914]] to i32
+// SIMD-ONLY0-NEXT:    store i32 [[CONV5915]], ptr [[IR]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP4502:%.*]] = load i32, ptr [[IR]], align 4
+// SIMD-ONLY0-NEXT:    [[TOBOOL5916:%.*]] = icmp ne i32 [[TMP4502]], 0
+// SIMD-ONLY0-NEXT:    br i1 [[TOBOOL5916]], label [[IF_THEN5917:%.*]], label [[IF_END5918:%.*]]
+// SIMD-ONLY0:       if.then5917:
+// SIMD-ONLY0-NEXT:    [[TMP4503:%.*]] = load float, ptr [[FD]], align 4
+// SIMD-ONLY0-NEXT:    store float [[TMP4503]], ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END5918]]
+// SIMD-ONLY0:       if.end5918:
+// SIMD-ONLY0-NEXT:    [[TMP4504:%.*]] = load float, ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP4505:%.*]] = load float, ptr [[FE]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP5919:%.*]] = fcmp oeq float [[TMP4504]], [[TMP4505]]
+// SIMD-ONLY0-NEXT:    [[CONV5920:%.*]] = zext i1 [[CMP5919]] to i32
+// SIMD-ONLY0-NEXT:    store i32 [[CONV5920]], ptr [[IR]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP4506:%.*]] = load i32, ptr [[IR]], align 4
+// SIMD-ONLY0-NEXT:    [[TOBOOL5921:%.*]] = icmp ne i32 [[TMP4506]], 0
+// SIMD-ONLY0-NEXT:    br i1 [[TOBOOL5921]], label [[IF_THEN5922:%.*]], label [[IF_ELSE5923:%.*]]
+// SIMD-ONLY0:       if.then5922:
+// SIMD-ONLY0-NEXT:    [[TMP4507:%.*]] = load float, ptr [[FD]], align 4
+// SIMD-ONLY0-NEXT:    store float [[TMP4507]], ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END5924:%.*]]
+// SIMD-ONLY0:       if.else5923:
+// SIMD-ONLY0-NEXT:    [[TMP4508:%.*]] = load float, ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    store float [[TMP4508]], ptr [[FV]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END5924]]
+// SIMD-ONLY0:       if.end5924:
+// SIMD-ONLY0-NEXT:    [[TMP4509:%.*]] = load float, ptr [[FE]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP4510:%.*]] = load float, ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP5925:%.*]] = fcmp oeq float [[TMP4509]], [[TMP4510]]
+// SIMD-ONLY0-NEXT:    [[CONV5926:%.*]] = zext i1 [[CMP5925]] to i32
+// SIMD-ONLY0-NEXT:    store i32 [[CONV5926]], ptr [[IR]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP4511:%.*]] = load i32, ptr [[IR]], align 4
+// SIMD-ONLY0-NEXT:    [[TOBOOL5927:%.*]] = icmp ne i32 [[TMP4511]], 0
+// SIMD-ONLY0-NEXT:    br i1 [[TOBOOL5927]], label [[IF_THEN5928:%.*]], label [[IF_ELSE5929:%.*]]
+// SIMD-ONLY0:       if.then5928:
+// SIMD-ONLY0-NEXT:    [[TMP4512:%.*]] = load float, ptr [[FD]], align 4
+// SIMD-ONLY0-NEXT:    store float [[TMP4512]], ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END5930:%.*]]
+// SIMD-ONLY0:       if.else5929:
+// SIMD-ONLY0-NEXT:    [[TMP4513:%.*]] = load float, ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    store float [[TMP4513]], ptr [[FV]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END5930]]
+// SIMD-ONLY0:       if.end5930:
+// SIMD-ONLY0-NEXT:    [[TMP4514:%.*]] = load float, ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    store float [[TMP4514]], ptr [[FV]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP4515:%.*]] = load float, ptr [[FE]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP4516:%.*]] = load float, ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP5931:%.*]] = fcmp ogt float [[TMP4515]], [[TMP4516]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP5931]], label [[IF_THEN5933:%.*]], label [[IF_END5934:%.*]]
+// SIMD-ONLY0:       if.then5933:
+// SIMD-ONLY0-NEXT:    [[TMP4517:%.*]] = load float, ptr [[FE]], align 4
+// SIMD-ONLY0-NEXT:    store float [[TMP4517]], ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END5934]]
+// SIMD-ONLY0:       if.end5934:
+// SIMD-ONLY0-NEXT:    [[TMP4518:%.*]] = load float, ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    store float [[TMP4518]], ptr [[FV]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP4519:%.*]] = load float, ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP4520:%.*]] = load float, ptr [[FE]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP5935:%.*]] = fcmp ogt float [[TMP4519]], [[TMP4520]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP5935]], label [[IF_THEN5937:%.*]], label [[IF_END5938:%.*]]
+// SIMD-ONLY0:       if.then5937:
+// SIMD-ONLY0-NEXT:    [[TMP4521:%.*]] = load float, ptr [[FE]], align 4
+// SIMD-ONLY0-NEXT:    store float [[TMP4521]], ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END5938]]
+// SIMD-ONLY0:       if.end5938:
+// SIMD-ONLY0-NEXT:    [[TMP4522:%.*]] = load float, ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    store float [[TMP4522]], ptr [[FV]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP4523:%.*]] = load float, ptr [[FE]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP4524:%.*]] = load float, ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP5939:%.*]] = fcmp olt float [[TMP4523]], [[TMP4524]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP5939]], label [[IF_THEN5941:%.*]], label [[IF_END5942:%.*]]
+// SIMD-ONLY0:       if.then5941:
+// SIMD-ONLY0-NEXT:    [[TMP4525:%.*]] = load float, ptr [[FE]], align 4
+// SIMD-ONLY0-NEXT:    store float [[TMP4525]], ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END5942]]
+// SIMD-ONLY0:       if.end5942:
+// SIMD-ONLY0-NEXT:    [[TMP4526:%.*]] = load float, ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    store float [[TMP4526]], ptr [[FV]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP4527:%.*]] = load float, ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP4528:%.*]] = load float, ptr [[FE]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP5943:%.*]] = fcmp olt float [[TMP4527]], [[TMP4528]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP5943]], label [[IF_THEN5945:%.*]], label [[IF_END5946:%.*]]
+// SIMD-ONLY0:       if.then5945:
+// SIMD-ONLY0-NEXT:    [[TMP4529:%.*]] = load float, ptr [[FE]], align 4
+// SIMD-ONLY0-NEXT:    store float [[TMP4529]], ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END5946]]
+// SIMD-ONLY0:       if.end5946:
+// SIMD-ONLY0-NEXT:    [[TMP4530:%.*]] = load float, ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    store float [[TMP4530]], ptr [[FV]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP4531:%.*]] = load float, ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP4532:%.*]] = load float, ptr [[FE]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP5947:%.*]] = fcmp oeq float [[TMP4531]], [[TMP4532]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP5947]], label [[IF_THEN5949:%.*]], label [[IF_END5950:%.*]]
+// SIMD-ONLY0:       if.then5949:
+// SIMD-ONLY0-NEXT:    [[TMP4533:%.*]] = load float, ptr [[FD]], align 4
+// SIMD-ONLY0-NEXT:    store float [[TMP4533]], ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END5950]]
+// SIMD-ONLY0:       if.end5950:
+// SIMD-ONLY0-NEXT:    [[TMP4534:%.*]] = load float, ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    store float [[TMP4534]], ptr [[FV]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP4535:%.*]] = load float, ptr [[FE]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP4536:%.*]] = load float, ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP5951:%.*]] = fcmp oeq float [[TMP4535]], [[TMP4536]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP5951]], label [[IF_THEN5953:%.*]], label [[IF_END5954:%.*]]
+// SIMD-ONLY0:       if.then5953:
+// SIMD-ONLY0-NEXT:    [[TMP4537:%.*]] = load float, ptr [[FD]], align 4
+// SIMD-ONLY0-NEXT:    store float [[TMP4537]], ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END5954]]
+// SIMD-ONLY0:       if.end5954:
+// SIMD-ONLY0-NEXT:    [[TMP4538:%.*]] = load float, ptr [[FE]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP4539:%.*]] = load float, ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP5955:%.*]] = fcmp ogt float [[TMP4538]], [[TMP4539]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP5955]], label [[IF_THEN5957:%.*]], label [[IF_END5958:%.*]]
+// SIMD-ONLY0:       if.then5957:
+// SIMD-ONLY0-NEXT:    [[TMP4540:%.*]] = load float, ptr [[FE]], align 4
+// SIMD-ONLY0-NEXT:    store float [[TMP4540]], ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END5958]]
+// SIMD-ONLY0:       if.end5958:
+// SIMD-ONLY0-NEXT:    [[TMP4541:%.*]] = load float, ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    store float [[TMP4541]], ptr [[FV]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP4542:%.*]] = load float, ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP4543:%.*]] = load float, ptr [[FE]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP5959:%.*]] = fcmp ogt float [[TMP4542]], [[TMP4543]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP5959]], label [[IF_THEN5961:%.*]], label [[IF_END5962:%.*]]
+// SIMD-ONLY0:       if.then5961:
+// SIMD-ONLY0-NEXT:    [[TMP4544:%.*]] = load float, ptr [[FE]], align 4
+// SIMD-ONLY0-NEXT:    store float [[TMP4544]], ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END5962]]
+// SIMD-ONLY0:       if.end5962:
+// SIMD-ONLY0-NEXT:    [[TMP4545:%.*]] = load float, ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    store float [[TMP4545]], ptr [[FV]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP4546:%.*]] = load float, ptr [[FE]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP4547:%.*]] = load float, ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP5963:%.*]] = fcmp olt float [[TMP4546]], [[TMP4547]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP5963]], label [[IF_THEN5965:%.*]], label [[IF_END5966:%.*]]
+// SIMD-ONLY0:       if.then5965:
+// SIMD-ONLY0-NEXT:    [[TMP4548:%.*]] = load float, ptr [[FE]], align 4
+// SIMD-ONLY0-NEXT:    store float [[TMP4548]], ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END5966]]
+// SIMD-ONLY0:       if.end5966:
+// SIMD-ONLY0-NEXT:    [[TMP4549:%.*]] = load float, ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    store float [[TMP4549]], ptr [[FV]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP4550:%.*]] = load float, ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP4551:%.*]] = load float, ptr [[FE]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP5967:%.*]] = fcmp olt float [[TMP4550]], [[TMP4551]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP5967]], label [[IF_THEN5969:%.*]], label [[IF_END5970:%.*]]
+// SIMD-ONLY0:       if.then5969:
+// SIMD-ONLY0-NEXT:    [[TMP4552:%.*]] = load float, ptr [[FE]], align 4
+// SIMD-ONLY0-NEXT:    store float [[TMP4552]], ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END5970]]
+// SIMD-ONLY0:       if.end5970:
+// SIMD-ONLY0-NEXT:    [[TMP4553:%.*]] = load float, ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    store float [[TMP4553]], ptr [[FV]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP4554:%.*]] = load float, ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP4555:%.*]] = load float, ptr [[FE]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP5971:%.*]] = fcmp oeq float [[TMP4554]], [[TMP4555]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP5971]], label [[IF_THEN5973:%.*]], label [[IF_END5974:%.*]]
+// SIMD-ONLY0:       if.then5973:
+// SIMD-ONLY0-NEXT:    [[TMP4556:%.*]] = load float, ptr [[FD]], align 4
+// SIMD-ONLY0-NEXT:    store float [[TMP4556]], ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END5974]]
+// SIMD-ONLY0:       if.end5974:
+// SIMD-ONLY0-NEXT:    [[TMP4557:%.*]] = load float, ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    store float [[TMP4557]], ptr [[FV]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP4558:%.*]] = load float, ptr [[FE]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP4559:%.*]] = load float, ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP5975:%.*]] = fcmp oeq float [[TMP4558]], [[TMP4559]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP5975]], label [[IF_THEN5977:%.*]], label [[IF_END5978:%.*]]
+// SIMD-ONLY0:       if.then5977:
+// SIMD-ONLY0-NEXT:    [[TMP4560:%.*]] = load float, ptr [[FD]], align 4
+// SIMD-ONLY0-NEXT:    store float [[TMP4560]], ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END5978]]
+// SIMD-ONLY0:       if.end5978:
+// SIMD-ONLY0-NEXT:    [[TMP4561:%.*]] = load float, ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    store float [[TMP4561]], ptr [[FV]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP4562:%.*]] = load float, ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP4563:%.*]] = load float, ptr [[FE]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP5979:%.*]] = fcmp oeq float [[TMP4562]], [[TMP4563]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP5979]], label [[IF_THEN5981:%.*]], label [[IF_ELSE5982:%.*]]
+// SIMD-ONLY0:       if.then5981:
+// SIMD-ONLY0-NEXT:    [[TMP4564:%.*]] = load float, ptr [[FD]], align 4
+// SIMD-ONLY0-NEXT:    store float [[TMP4564]], ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END5983:%.*]]
+// SIMD-ONLY0:       if.else5982:
+// SIMD-ONLY0-NEXT:    [[TMP4565:%.*]] = load float, ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    store float [[TMP4565]], ptr [[FV]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END5983]]
+// SIMD-ONLY0:       if.end5983:
+// SIMD-ONLY0-NEXT:    [[TMP4566:%.*]] = load float, ptr [[FE]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP4567:%.*]] = load float, ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP5984:%.*]] = fcmp oeq float [[TMP4566]], [[TMP4567]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP5984]], label [[IF_THEN5986:%.*]], label [[IF_ELSE5987:%.*]]
+// SIMD-ONLY0:       if.then5986:
+// SIMD-ONLY0-NEXT:    [[TMP4568:%.*]] = load float, ptr [[FD]], align 4
+// SIMD-ONLY0-NEXT:    store float [[TMP4568]], ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END5988:%.*]]
+// SIMD-ONLY0:       if.else5987:
+// SIMD-ONLY0-NEXT:    [[TMP4569:%.*]] = load float, ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    store float [[TMP4569]], ptr [[FV]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END5988]]
+// SIMD-ONLY0:       if.end5988:
+// SIMD-ONLY0-NEXT:    [[TMP4570:%.*]] = load float, ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP4571:%.*]] = load float, ptr [[FE]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP5989:%.*]] = fcmp oeq float [[TMP4570]], [[TMP4571]]
+// SIMD-ONLY0-NEXT:    [[CONV5990:%.*]] = zext i1 [[CMP5989]] to i32
+// SIMD-ONLY0-NEXT:    store i32 [[CONV5990]], ptr [[IR]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP4572:%.*]] = load i32, ptr [[IR]], align 4
+// SIMD-ONLY0-NEXT:    [[TOBOOL5991:%.*]] = icmp ne i32 [[TMP4572]], 0
+// SIMD-ONLY0-NEXT:    br i1 [[TOBOOL5991]], label [[IF_THEN5992:%.*]], label [[IF_END5993:%.*]]
+// SIMD-ONLY0:       if.then5992:
+// SIMD-ONLY0-NEXT:    [[TMP4573:%.*]] = load float, ptr [[FD]], align 4
+// SIMD-ONLY0-NEXT:    store float [[TMP4573]], ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END5993]]
+// SIMD-ONLY0:       if.end5993:
+// SIMD-ONLY0-NEXT:    [[TMP4574:%.*]] = load float, ptr [[FE]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP4575:%.*]] = load float, ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP5994:%.*]] = fcmp oeq float [[TMP4574]], [[TMP4575]]
+// SIMD-ONLY0-NEXT:    [[CONV5995:%.*]] = zext i1 [[CMP5994]] to i32
+// SIMD-ONLY0-NEXT:    store i32 [[CONV5995]], ptr [[IR]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP4576:%.*]] = load i32, ptr [[IR]], align 4
+// SIMD-ONLY0-NEXT:    [[TOBOOL5996:%.*]] = icmp ne i32 [[TMP4576]], 0
+// SIMD-ONLY0-NEXT:    br i1 [[TOBOOL5996]], label [[IF_THEN5997:%.*]], label [[IF_END5998:%.*]]
+// SIMD-ONLY0:       if.then5997:
+// SIMD-ONLY0-NEXT:    [[TMP4577:%.*]] = load float, ptr [[FD]], align 4
+// SIMD-ONLY0-NEXT:    store float [[TMP4577]], ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END5998]]
+// SIMD-ONLY0:       if.end5998:
+// SIMD-ONLY0-NEXT:    [[TMP4578:%.*]] = load float, ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP4579:%.*]] = load float, ptr [[FE]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP5999:%.*]] = fcmp oeq float [[TMP4578]], [[TMP4579]]
+// SIMD-ONLY0-NEXT:    [[CONV6000:%.*]] = zext i1 [[CMP5999]] to i32
+// SIMD-ONLY0-NEXT:    store i32 [[CONV6000]], ptr [[IR]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP4580:%.*]] = load i32, ptr [[IR]], align 4
+// SIMD-ONLY0-NEXT:    [[TOBOOL6001:%.*]] = icmp ne i32 [[TMP4580]], 0
+// SIMD-ONLY0-NEXT:    br i1 [[TOBOOL6001]], label [[IF_THEN6002:%.*]], label [[IF_ELSE6003:%.*]]
+// SIMD-ONLY0:       if.then6002:
+// SIMD-ONLY0-NEXT:    [[TMP4581:%.*]] = load float, ptr [[FD]], align 4
+// SIMD-ONLY0-NEXT:    store float [[TMP4581]], ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END6004:%.*]]
+// SIMD-ONLY0:       if.else6003:
+// SIMD-ONLY0-NEXT:    [[TMP4582:%.*]] = load float, ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    store float [[TMP4582]], ptr [[FV]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END6004]]
+// SIMD-ONLY0:       if.end6004:
+// SIMD-ONLY0-NEXT:    [[TMP4583:%.*]] = load float, ptr [[FE]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP4584:%.*]] = load float, ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP6005:%.*]] = fcmp oeq float [[TMP4583]], [[TMP4584]]
+// SIMD-ONLY0-NEXT:    [[CONV6006:%.*]] = zext i1 [[CMP6005]] to i32
+// SIMD-ONLY0-NEXT:    store i32 [[CONV6006]], ptr [[IR]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP4585:%.*]] = load i32, ptr [[IR]], align 4
+// SIMD-ONLY0-NEXT:    [[TOBOOL6007:%.*]] = icmp ne i32 [[TMP4585]], 0
+// SIMD-ONLY0-NEXT:    br i1 [[TOBOOL6007]], label [[IF_THEN6008:%.*]], label [[IF_ELSE6009:%.*]]
+// SIMD-ONLY0:       if.then6008:
+// SIMD-ONLY0-NEXT:    [[TMP4586:%.*]] = load float, ptr [[FD]], align 4
+// SIMD-ONLY0-NEXT:    store float [[TMP4586]], ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END6010:%.*]]
+// SIMD-ONLY0:       if.else6009:
+// SIMD-ONLY0-NEXT:    [[TMP4587:%.*]] = load float, ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    store float [[TMP4587]], ptr [[FV]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END6010]]
+// SIMD-ONLY0:       if.end6010:
+// SIMD-ONLY0-NEXT:    [[TMP4588:%.*]] = load float, ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    store float [[TMP4588]], ptr [[FV]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP4589:%.*]] = load float, ptr [[FE]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP4590:%.*]] = load float, ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP6011:%.*]] = fcmp ogt float [[TMP4589]], [[TMP4590]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP6011]], label [[IF_THEN6013:%.*]], label [[IF_END6014:%.*]]
+// SIMD-ONLY0:       if.then6013:
+// SIMD-ONLY0-NEXT:    [[TMP4591:%.*]] = load float, ptr [[FE]], align 4
+// SIMD-ONLY0-NEXT:    store float [[TMP4591]], ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END6014]]
+// SIMD-ONLY0:       if.end6014:
+// SIMD-ONLY0-NEXT:    [[TMP4592:%.*]] = load float, ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    store float [[TMP4592]], ptr [[FV]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP4593:%.*]] = load float, ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP4594:%.*]] = load float, ptr [[FE]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP6015:%.*]] = fcmp ogt float [[TMP4593]], [[TMP4594]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP6015]], label [[IF_THEN6017:%.*]], label [[IF_END6018:%.*]]
+// SIMD-ONLY0:       if.then6017:
+// SIMD-ONLY0-NEXT:    [[TMP4595:%.*]] = load float, ptr [[FE]], align 4
+// SIMD-ONLY0-NEXT:    store float [[TMP4595]], ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END6018]]
+// SIMD-ONLY0:       if.end6018:
+// SIMD-ONLY0-NEXT:    [[TMP4596:%.*]] = load float, ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    store float [[TMP4596]], ptr [[FV]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP4597:%.*]] = load float, ptr [[FE]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP4598:%.*]] = load float, ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP6019:%.*]] = fcmp olt float [[TMP4597]], [[TMP4598]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP6019]], label [[IF_THEN6021:%.*]], label [[IF_END6022:%.*]]
+// SIMD-ONLY0:       if.then6021:
+// SIMD-ONLY0-NEXT:    [[TMP4599:%.*]] = load float, ptr [[FE]], align 4
+// SIMD-ONLY0-NEXT:    store float [[TMP4599]], ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END6022]]
+// SIMD-ONLY0:       if.end6022:
+// SIMD-ONLY0-NEXT:    [[TMP4600:%.*]] = load float, ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    store float [[TMP4600]], ptr [[FV]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP4601:%.*]] = load float, ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP4602:%.*]] = load float, ptr [[FE]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP6023:%.*]] = fcmp olt float [[TMP4601]], [[TMP4602]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP6023]], label [[IF_THEN6025:%.*]], label [[IF_END6026:%.*]]
+// SIMD-ONLY0:       if.then6025:
+// SIMD-ONLY0-NEXT:    [[TMP4603:%.*]] = load float, ptr [[FE]], align 4
+// SIMD-ONLY0-NEXT:    store float [[TMP4603]], ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END6026]]
+// SIMD-ONLY0:       if.end6026:
+// SIMD-ONLY0-NEXT:    [[TMP4604:%.*]] = load float, ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    store float [[TMP4604]], ptr [[FV]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP4605:%.*]] = load float, ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP4606:%.*]] = load float, ptr [[FE]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP6027:%.*]] = fcmp oeq float [[TMP4605]], [[TMP4606]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP6027]], label [[IF_THEN6029:%.*]], label [[IF_END6030:%.*]]
+// SIMD-ONLY0:       if.then6029:
+// SIMD-ONLY0-NEXT:    [[TMP4607:%.*]] = load float, ptr [[FD]], align 4
+// SIMD-ONLY0-NEXT:    store float [[TMP4607]], ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END6030]]
+// SIMD-ONLY0:       if.end6030:
+// SIMD-ONLY0-NEXT:    [[TMP4608:%.*]] = load float, ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    store float [[TMP4608]], ptr [[FV]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP4609:%.*]] = load float, ptr [[FE]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP4610:%.*]] = load float, ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP6031:%.*]] = fcmp oeq float [[TMP4609]], [[TMP4610]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP6031]], label [[IF_THEN6033:%.*]], label [[IF_END6034:%.*]]
+// SIMD-ONLY0:       if.then6033:
+// SIMD-ONLY0-NEXT:    [[TMP4611:%.*]] = load float, ptr [[FD]], align 4
+// SIMD-ONLY0-NEXT:    store float [[TMP4611]], ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END6034]]
+// SIMD-ONLY0:       if.end6034:
+// SIMD-ONLY0-NEXT:    [[TMP4612:%.*]] = load float, ptr [[FE]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP4613:%.*]] = load float, ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP6035:%.*]] = fcmp ogt float [[TMP4612]], [[TMP4613]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP6035]], label [[IF_THEN6037:%.*]], label [[IF_END6038:%.*]]
+// SIMD-ONLY0:       if.then6037:
+// SIMD-ONLY0-NEXT:    [[TMP4614:%.*]] = load float, ptr [[FE]], align 4
+// SIMD-ONLY0-NEXT:    store float [[TMP4614]], ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END6038]]
+// SIMD-ONLY0:       if.end6038:
+// SIMD-ONLY0-NEXT:    [[TMP4615:%.*]] = load float, ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    store float [[TMP4615]], ptr [[FV]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP4616:%.*]] = load float, ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP4617:%.*]] = load float, ptr [[FE]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP6039:%.*]] = fcmp ogt float [[TMP4616]], [[TMP4617]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP6039]], label [[IF_THEN6041:%.*]], label [[IF_END6042:%.*]]
+// SIMD-ONLY0:       if.then6041:
+// SIMD-ONLY0-NEXT:    [[TMP4618:%.*]] = load float, ptr [[FE]], align 4
+// SIMD-ONLY0-NEXT:    store float [[TMP4618]], ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END6042]]
+// SIMD-ONLY0:       if.end6042:
+// SIMD-ONLY0-NEXT:    [[TMP4619:%.*]] = load float, ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    store float [[TMP4619]], ptr [[FV]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP4620:%.*]] = load float, ptr [[FE]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP4621:%.*]] = load float, ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP6043:%.*]] = fcmp olt float [[TMP4620]], [[TMP4621]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP6043]], label [[IF_THEN6045:%.*]], label [[IF_END6046:%.*]]
+// SIMD-ONLY0:       if.then6045:
+// SIMD-ONLY0-NEXT:    [[TMP4622:%.*]] = load float, ptr [[FE]], align 4
+// SIMD-ONLY0-NEXT:    store float [[TMP4622]], ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END6046]]
+// SIMD-ONLY0:       if.end6046:
+// SIMD-ONLY0-NEXT:    [[TMP4623:%.*]] = load float, ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    store float [[TMP4623]], ptr [[FV]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP4624:%.*]] = load float, ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP4625:%.*]] = load float, ptr [[FE]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP6047:%.*]] = fcmp olt float [[TMP4624]], [[TMP4625]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP6047]], label [[IF_THEN6049:%.*]], label [[IF_END6050:%.*]]
+// SIMD-ONLY0:       if.then6049:
+// SIMD-ONLY0-NEXT:    [[TMP4626:%.*]] = load float, ptr [[FE]], align 4
+// SIMD-ONLY0-NEXT:    store float [[TMP4626]], ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END6050]]
+// SIMD-ONLY0:       if.end6050:
+// SIMD-ONLY0-NEXT:    [[TMP4627:%.*]] = load float, ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    store float [[TMP4627]], ptr [[FV]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP4628:%.*]] = load float, ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP4629:%.*]] = load float, ptr [[FE]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP6051:%.*]] = fcmp oeq float [[TMP4628]], [[TMP4629]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP6051]], label [[IF_THEN6053:%.*]], label [[IF_END6054:%.*]]
+// SIMD-ONLY0:       if.then6053:
+// SIMD-ONLY0-NEXT:    [[TMP4630:%.*]] = load float, ptr [[FD]], align 4
+// SIMD-ONLY0-NEXT:    store float [[TMP4630]], ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END6054]]
+// SIMD-ONLY0:       if.end6054:
+// SIMD-ONLY0-NEXT:    [[TMP4631:%.*]] = load float, ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    store float [[TMP4631]], ptr [[FV]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP4632:%.*]] = load float, ptr [[FE]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP4633:%.*]] = load float, ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP6055:%.*]] = fcmp oeq float [[TMP4632]], [[TMP4633]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP6055]], label [[IF_THEN6057:%.*]], label [[IF_END6058:%.*]]
+// SIMD-ONLY0:       if.then6057:
+// SIMD-ONLY0-NEXT:    [[TMP4634:%.*]] = load float, ptr [[FD]], align 4
+// SIMD-ONLY0-NEXT:    store float [[TMP4634]], ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END6058]]
+// SIMD-ONLY0:       if.end6058:
+// SIMD-ONLY0-NEXT:    [[TMP4635:%.*]] = load float, ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    store float [[TMP4635]], ptr [[FV]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP4636:%.*]] = load float, ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP4637:%.*]] = load float, ptr [[FE]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP6059:%.*]] = fcmp oeq float [[TMP4636]], [[TMP4637]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP6059]], label [[IF_THEN6061:%.*]], label [[IF_ELSE6062:%.*]]
+// SIMD-ONLY0:       if.then6061:
+// SIMD-ONLY0-NEXT:    [[TMP4638:%.*]] = load float, ptr [[FD]], align 4
+// SIMD-ONLY0-NEXT:    store float [[TMP4638]], ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END6063:%.*]]
+// SIMD-ONLY0:       if.else6062:
+// SIMD-ONLY0-NEXT:    [[TMP4639:%.*]] = load float, ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    store float [[TMP4639]], ptr [[FV]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END6063]]
+// SIMD-ONLY0:       if.end6063:
+// SIMD-ONLY0-NEXT:    [[TMP4640:%.*]] = load float, ptr [[FE]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP4641:%.*]] = load float, ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP6064:%.*]] = fcmp oeq float [[TMP4640]], [[TMP4641]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP6064]], label [[IF_THEN6066:%.*]], label [[IF_ELSE6067:%.*]]
+// SIMD-ONLY0:       if.then6066:
+// SIMD-ONLY0-NEXT:    [[TMP4642:%.*]] = load float, ptr [[FD]], align 4
+// SIMD-ONLY0-NEXT:    store float [[TMP4642]], ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END6068:%.*]]
+// SIMD-ONLY0:       if.else6067:
+// SIMD-ONLY0-NEXT:    [[TMP4643:%.*]] = load float, ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    store float [[TMP4643]], ptr [[FV]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END6068]]
+// SIMD-ONLY0:       if.end6068:
+// SIMD-ONLY0-NEXT:    [[TMP4644:%.*]] = load float, ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP4645:%.*]] = load float, ptr [[FE]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP6069:%.*]] = fcmp oeq float [[TMP4644]], [[TMP4645]]
+// SIMD-ONLY0-NEXT:    [[CONV6070:%.*]] = zext i1 [[CMP6069]] to i32
+// SIMD-ONLY0-NEXT:    store i32 [[CONV6070]], ptr [[IR]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP4646:%.*]] = load i32, ptr [[IR]], align 4
+// SIMD-ONLY0-NEXT:    [[TOBOOL6071:%.*]] = icmp ne i32 [[TMP4646]], 0
+// SIMD-ONLY0-NEXT:    br i1 [[TOBOOL6071]], label [[IF_THEN6072:%.*]], label [[IF_END6073:%.*]]
+// SIMD-ONLY0:       if.then6072:
+// SIMD-ONLY0-NEXT:    [[TMP4647:%.*]] = load float, ptr [[FD]], align 4
+// SIMD-ONLY0-NEXT:    store float [[TMP4647]], ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END6073]]
+// SIMD-ONLY0:       if.end6073:
+// SIMD-ONLY0-NEXT:    [[TMP4648:%.*]] = load float, ptr [[FE]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP4649:%.*]] = load float, ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP6074:%.*]] = fcmp oeq float [[TMP4648]], [[TMP4649]]
+// SIMD-ONLY0-NEXT:    [[CONV6075:%.*]] = zext i1 [[CMP6074]] to i32
+// SIMD-ONLY0-NEXT:    store i32 [[CONV6075]], ptr [[IR]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP4650:%.*]] = load i32, ptr [[IR]], align 4
+// SIMD-ONLY0-NEXT:    [[TOBOOL6076:%.*]] = icmp ne i32 [[TMP4650]], 0
+// SIMD-ONLY0-NEXT:    br i1 [[TOBOOL6076]], label [[IF_THEN6077:%.*]], label [[IF_END6078:%.*]]
+// SIMD-ONLY0:       if.then6077:
+// SIMD-ONLY0-NEXT:    [[TMP4651:%.*]] = load float, ptr [[FD]], align 4
+// SIMD-ONLY0-NEXT:    store float [[TMP4651]], ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END6078]]
+// SIMD-ONLY0:       if.end6078:
+// SIMD-ONLY0-NEXT:    [[TMP4652:%.*]] = load float, ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP4653:%.*]] = load float, ptr [[FE]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP6079:%.*]] = fcmp oeq float [[TMP4652]], [[TMP4653]]
+// SIMD-ONLY0-NEXT:    [[CONV6080:%.*]] = zext i1 [[CMP6079]] to i32
+// SIMD-ONLY0-NEXT:    store i32 [[CONV6080]], ptr [[IR]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP4654:%.*]] = load i32, ptr [[IR]], align 4
+// SIMD-ONLY0-NEXT:    [[TOBOOL6081:%.*]] = icmp ne i32 [[TMP4654]], 0
+// SIMD-ONLY0-NEXT:    br i1 [[TOBOOL6081]], label [[IF_THEN6082:%.*]], label [[IF_ELSE6083:%.*]]
+// SIMD-ONLY0:       if.then6082:
+// SIMD-ONLY0-NEXT:    [[TMP4655:%.*]] = load float, ptr [[FD]], align 4
+// SIMD-ONLY0-NEXT:    store float [[TMP4655]], ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END6084:%.*]]
+// SIMD-ONLY0:       if.else6083:
+// SIMD-ONLY0-NEXT:    [[TMP4656:%.*]] = load float, ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    store float [[TMP4656]], ptr [[FV]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END6084]]
+// SIMD-ONLY0:       if.end6084:
+// SIMD-ONLY0-NEXT:    [[TMP4657:%.*]] = load float, ptr [[FE]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP4658:%.*]] = load float, ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP6085:%.*]] = fcmp oeq float [[TMP4657]], [[TMP4658]]
+// SIMD-ONLY0-NEXT:    [[CONV6086:%.*]] = zext i1 [[CMP6085]] to i32
+// SIMD-ONLY0-NEXT:    store i32 [[CONV6086]], ptr [[IR]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP4659:%.*]] = load i32, ptr [[IR]], align 4
+// SIMD-ONLY0-NEXT:    [[TOBOOL6087:%.*]] = icmp ne i32 [[TMP4659]], 0
+// SIMD-ONLY0-NEXT:    br i1 [[TOBOOL6087]], label [[IF_THEN6088:%.*]], label [[IF_ELSE6089:%.*]]
+// SIMD-ONLY0:       if.then6088:
+// SIMD-ONLY0-NEXT:    [[TMP4660:%.*]] = load float, ptr [[FD]], align 4
+// SIMD-ONLY0-NEXT:    store float [[TMP4660]], ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END6090:%.*]]
+// SIMD-ONLY0:       if.else6089:
+// SIMD-ONLY0-NEXT:    [[TMP4661:%.*]] = load float, ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    store float [[TMP4661]], ptr [[FV]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END6090]]
+// SIMD-ONLY0:       if.end6090:
+// SIMD-ONLY0-NEXT:    [[TMP4662:%.*]] = load float, ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    store float [[TMP4662]], ptr [[FV]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP4663:%.*]] = load float, ptr [[FE]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP4664:%.*]] = load float, ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP6091:%.*]] = fcmp ogt float [[TMP4663]], [[TMP4664]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP6091]], label [[IF_THEN6093:%.*]], label [[IF_END6094:%.*]]
+// SIMD-ONLY0:       if.then6093:
+// SIMD-ONLY0-NEXT:    [[TMP4665:%.*]] = load float, ptr [[FE]], align 4
+// SIMD-ONLY0-NEXT:    store float [[TMP4665]], ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END6094]]
+// SIMD-ONLY0:       if.end6094:
+// SIMD-ONLY0-NEXT:    [[TMP4666:%.*]] = load float, ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    store float [[TMP4666]], ptr [[FV]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP4667:%.*]] = load float, ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP4668:%.*]] = load float, ptr [[FE]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP6095:%.*]] = fcmp ogt float [[TMP4667]], [[TMP4668]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP6095]], label [[IF_THEN6097:%.*]], label [[IF_END6098:%.*]]
+// SIMD-ONLY0:       if.then6097:
+// SIMD-ONLY0-NEXT:    [[TMP4669:%.*]] = load float, ptr [[FE]], align 4
+// SIMD-ONLY0-NEXT:    store float [[TMP4669]], ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END6098]]
+// SIMD-ONLY0:       if.end6098:
+// SIMD-ONLY0-NEXT:    [[TMP4670:%.*]] = load float, ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    store float [[TMP4670]], ptr [[FV]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP4671:%.*]] = load float, ptr [[FE]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP4672:%.*]] = load float, ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP6099:%.*]] = fcmp olt float [[TMP4671]], [[TMP4672]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP6099]], label [[IF_THEN6101:%.*]], label [[IF_END6102:%.*]]
+// SIMD-ONLY0:       if.then6101:
+// SIMD-ONLY0-NEXT:    [[TMP4673:%.*]] = load float, ptr [[FE]], align 4
+// SIMD-ONLY0-NEXT:    store float [[TMP4673]], ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END6102]]
+// SIMD-ONLY0:       if.end6102:
+// SIMD-ONLY0-NEXT:    [[TMP4674:%.*]] = load float, ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    store float [[TMP4674]], ptr [[FV]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP4675:%.*]] = load float, ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP4676:%.*]] = load float, ptr [[FE]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP6103:%.*]] = fcmp olt float [[TMP4675]], [[TMP4676]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP6103]], label [[IF_THEN6105:%.*]], label [[IF_END6106:%.*]]
+// SIMD-ONLY0:       if.then6105:
+// SIMD-ONLY0-NEXT:    [[TMP4677:%.*]] = load float, ptr [[FE]], align 4
+// SIMD-ONLY0-NEXT:    store float [[TMP4677]], ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END6106]]
+// SIMD-ONLY0:       if.end6106:
+// SIMD-ONLY0-NEXT:    [[TMP4678:%.*]] = load float, ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    store float [[TMP4678]], ptr [[FV]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP4679:%.*]] = load float, ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP4680:%.*]] = load float, ptr [[FE]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP6107:%.*]] = fcmp oeq float [[TMP4679]], [[TMP4680]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP6107]], label [[IF_THEN6109:%.*]], label [[IF_END6110:%.*]]
+// SIMD-ONLY0:       if.then6109:
+// SIMD-ONLY0-NEXT:    [[TMP4681:%.*]] = load float, ptr [[FD]], align 4
+// SIMD-ONLY0-NEXT:    store float [[TMP4681]], ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END6110]]
+// SIMD-ONLY0:       if.end6110:
+// SIMD-ONLY0-NEXT:    [[TMP4682:%.*]] = load float, ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    store float [[TMP4682]], ptr [[FV]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP4683:%.*]] = load float, ptr [[FE]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP4684:%.*]] = load float, ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP6111:%.*]] = fcmp oeq float [[TMP4683]], [[TMP4684]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP6111]], label [[IF_THEN6113:%.*]], label [[IF_END6114:%.*]]
+// SIMD-ONLY0:       if.then6113:
+// SIMD-ONLY0-NEXT:    [[TMP4685:%.*]] = load float, ptr [[FD]], align 4
+// SIMD-ONLY0-NEXT:    store float [[TMP4685]], ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END6114]]
+// SIMD-ONLY0:       if.end6114:
+// SIMD-ONLY0-NEXT:    [[TMP4686:%.*]] = load float, ptr [[FE]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP4687:%.*]] = load float, ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP6115:%.*]] = fcmp ogt float [[TMP4686]], [[TMP4687]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP6115]], label [[IF_THEN6117:%.*]], label [[IF_END6118:%.*]]
+// SIMD-ONLY0:       if.then6117:
+// SIMD-ONLY0-NEXT:    [[TMP4688:%.*]] = load float, ptr [[FE]], align 4
+// SIMD-ONLY0-NEXT:    store float [[TMP4688]], ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END6118]]
+// SIMD-ONLY0:       if.end6118:
+// SIMD-ONLY0-NEXT:    [[TMP4689:%.*]] = load float, ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    store float [[TMP4689]], ptr [[FV]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP4690:%.*]] = load float, ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP4691:%.*]] = load float, ptr [[FE]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP6119:%.*]] = fcmp ogt float [[TMP4690]], [[TMP4691]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP6119]], label [[IF_THEN6121:%.*]], label [[IF_END6122:%.*]]
+// SIMD-ONLY0:       if.then6121:
+// SIMD-ONLY0-NEXT:    [[TMP4692:%.*]] = load float, ptr [[FE]], align 4
+// SIMD-ONLY0-NEXT:    store float [[TMP4692]], ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END6122]]
+// SIMD-ONLY0:       if.end6122:
+// SIMD-ONLY0-NEXT:    [[TMP4693:%.*]] = load float, ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    store float [[TMP4693]], ptr [[FV]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP4694:%.*]] = load float, ptr [[FE]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP4695:%.*]] = load float, ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP6123:%.*]] = fcmp olt float [[TMP4694]], [[TMP4695]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP6123]], label [[IF_THEN6125:%.*]], label [[IF_END6126:%.*]]
+// SIMD-ONLY0:       if.then6125:
+// SIMD-ONLY0-NEXT:    [[TMP4696:%.*]] = load float, ptr [[FE]], align 4
+// SIMD-ONLY0-NEXT:    store float [[TMP4696]], ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END6126]]
+// SIMD-ONLY0:       if.end6126:
+// SIMD-ONLY0-NEXT:    [[TMP4697:%.*]] = load float, ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    store float [[TMP4697]], ptr [[FV]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP4698:%.*]] = load float, ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP4699:%.*]] = load float, ptr [[FE]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP6127:%.*]] = fcmp olt float [[TMP4698]], [[TMP4699]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP6127]], label [[IF_THEN6129:%.*]], label [[IF_END6130:%.*]]
+// SIMD-ONLY0:       if.then6129:
+// SIMD-ONLY0-NEXT:    [[TMP4700:%.*]] = load float, ptr [[FE]], align 4
+// SIMD-ONLY0-NEXT:    store float [[TMP4700]], ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END6130]]
+// SIMD-ONLY0:       if.end6130:
+// SIMD-ONLY0-NEXT:    [[TMP4701:%.*]] = load float, ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    store float [[TMP4701]], ptr [[FV]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP4702:%.*]] = load float, ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP4703:%.*]] = load float, ptr [[FE]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP6131:%.*]] = fcmp oeq float [[TMP4702]], [[TMP4703]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP6131]], label [[IF_THEN6133:%.*]], label [[IF_END6134:%.*]]
+// SIMD-ONLY0:       if.then6133:
+// SIMD-ONLY0-NEXT:    [[TMP4704:%.*]] = load float, ptr [[FD]], align 4
+// SIMD-ONLY0-NEXT:    store float [[TMP4704]], ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END6134]]
+// SIMD-ONLY0:       if.end6134:
+// SIMD-ONLY0-NEXT:    [[TMP4705:%.*]] = load float, ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    store float [[TMP4705]], ptr [[FV]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP4706:%.*]] = load float, ptr [[FE]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP4707:%.*]] = load float, ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP6135:%.*]] = fcmp oeq float [[TMP4706]], [[TMP4707]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP6135]], label [[IF_THEN6137:%.*]], label [[IF_END6138:%.*]]
+// SIMD-ONLY0:       if.then6137:
+// SIMD-ONLY0-NEXT:    [[TMP4708:%.*]] = load float, ptr [[FD]], align 4
+// SIMD-ONLY0-NEXT:    store float [[TMP4708]], ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END6138]]
+// SIMD-ONLY0:       if.end6138:
+// SIMD-ONLY0-NEXT:    [[TMP4709:%.*]] = load float, ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    store float [[TMP4709]], ptr [[FV]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP4710:%.*]] = load float, ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP4711:%.*]] = load float, ptr [[FE]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP6139:%.*]] = fcmp oeq float [[TMP4710]], [[TMP4711]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP6139]], label [[IF_THEN6141:%.*]], label [[IF_ELSE6142:%.*]]
+// SIMD-ONLY0:       if.then6141:
+// SIMD-ONLY0-NEXT:    [[TMP4712:%.*]] = load float, ptr [[FD]], align 4
+// SIMD-ONLY0-NEXT:    store float [[TMP4712]], ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END6143:%.*]]
+// SIMD-ONLY0:       if.else6142:
+// SIMD-ONLY0-NEXT:    [[TMP4713:%.*]] = load float, ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    store float [[TMP4713]], ptr [[FV]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END6143]]
+// SIMD-ONLY0:       if.end6143:
+// SIMD-ONLY0-NEXT:    [[TMP4714:%.*]] = load float, ptr [[FE]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP4715:%.*]] = load float, ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP6144:%.*]] = fcmp oeq float [[TMP4714]], [[TMP4715]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP6144]], label [[IF_THEN6146:%.*]], label [[IF_ELSE6147:%.*]]
+// SIMD-ONLY0:       if.then6146:
+// SIMD-ONLY0-NEXT:    [[TMP4716:%.*]] = load float, ptr [[FD]], align 4
+// SIMD-ONLY0-NEXT:    store float [[TMP4716]], ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END6148:%.*]]
+// SIMD-ONLY0:       if.else6147:
+// SIMD-ONLY0-NEXT:    [[TMP4717:%.*]] = load float, ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    store float [[TMP4717]], ptr [[FV]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END6148]]
+// SIMD-ONLY0:       if.end6148:
+// SIMD-ONLY0-NEXT:    [[TMP4718:%.*]] = load float, ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP4719:%.*]] = load float, ptr [[FE]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP6149:%.*]] = fcmp oeq float [[TMP4718]], [[TMP4719]]
+// SIMD-ONLY0-NEXT:    [[CONV6150:%.*]] = zext i1 [[CMP6149]] to i32
+// SIMD-ONLY0-NEXT:    store i32 [[CONV6150]], ptr [[IR]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP4720:%.*]] = load i32, ptr [[IR]], align 4
+// SIMD-ONLY0-NEXT:    [[TOBOOL6151:%.*]] = icmp ne i32 [[TMP4720]], 0
+// SIMD-ONLY0-NEXT:    br i1 [[TOBOOL6151]], label [[IF_THEN6152:%.*]], label [[IF_END6153:%.*]]
+// SIMD-ONLY0:       if.then6152:
+// SIMD-ONLY0-NEXT:    [[TMP4721:%.*]] = load float, ptr [[FD]], align 4
+// SIMD-ONLY0-NEXT:    store float [[TMP4721]], ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END6153]]
+// SIMD-ONLY0:       if.end6153:
+// SIMD-ONLY0-NEXT:    [[TMP4722:%.*]] = load float, ptr [[FE]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP4723:%.*]] = load float, ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP6154:%.*]] = fcmp oeq float [[TMP4722]], [[TMP4723]]
+// SIMD-ONLY0-NEXT:    [[CONV6155:%.*]] = zext i1 [[CMP6154]] to i32
+// SIMD-ONLY0-NEXT:    store i32 [[CONV6155]], ptr [[IR]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP4724:%.*]] = load i32, ptr [[IR]], align 4
+// SIMD-ONLY0-NEXT:    [[TOBOOL6156:%.*]] = icmp ne i32 [[TMP4724]], 0
+// SIMD-ONLY0-NEXT:    br i1 [[TOBOOL6156]], label [[IF_THEN6157:%.*]], label [[IF_END6158:%.*]]
+// SIMD-ONLY0:       if.then6157:
+// SIMD-ONLY0-NEXT:    [[TMP4725:%.*]] = load float, ptr [[FD]], align 4
+// SIMD-ONLY0-NEXT:    store float [[TMP4725]], ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END6158]]
+// SIMD-ONLY0:       if.end6158:
+// SIMD-ONLY0-NEXT:    [[TMP4726:%.*]] = load float, ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP4727:%.*]] = load float, ptr [[FE]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP6159:%.*]] = fcmp oeq float [[TMP4726]], [[TMP4727]]
+// SIMD-ONLY0-NEXT:    [[CONV6160:%.*]] = zext i1 [[CMP6159]] to i32
+// SIMD-ONLY0-NEXT:    store i32 [[CONV6160]], ptr [[IR]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP4728:%.*]] = load i32, ptr [[IR]], align 4
+// SIMD-ONLY0-NEXT:    [[TOBOOL6161:%.*]] = icmp ne i32 [[TMP4728]], 0
+// SIMD-ONLY0-NEXT:    br i1 [[TOBOOL6161]], label [[IF_THEN6162:%.*]], label [[IF_ELSE6163:%.*]]
+// SIMD-ONLY0:       if.then6162:
+// SIMD-ONLY0-NEXT:    [[TMP4729:%.*]] = load float, ptr [[FD]], align 4
+// SIMD-ONLY0-NEXT:    store float [[TMP4729]], ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END6164:%.*]]
+// SIMD-ONLY0:       if.else6163:
+// SIMD-ONLY0-NEXT:    [[TMP4730:%.*]] = load float, ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    store float [[TMP4730]], ptr [[FV]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END6164]]
+// SIMD-ONLY0:       if.end6164:
+// SIMD-ONLY0-NEXT:    [[TMP4731:%.*]] = load float, ptr [[FE]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP4732:%.*]] = load float, ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP6165:%.*]] = fcmp oeq float [[TMP4731]], [[TMP4732]]
+// SIMD-ONLY0-NEXT:    [[CONV6166:%.*]] = zext i1 [[CMP6165]] to i32
+// SIMD-ONLY0-NEXT:    store i32 [[CONV6166]], ptr [[IR]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP4733:%.*]] = load i32, ptr [[IR]], align 4
+// SIMD-ONLY0-NEXT:    [[TOBOOL6167:%.*]] = icmp ne i32 [[TMP4733]], 0
+// SIMD-ONLY0-NEXT:    br i1 [[TOBOOL6167]], label [[IF_THEN6168:%.*]], label [[IF_ELSE6169:%.*]]
+// SIMD-ONLY0:       if.then6168:
+// SIMD-ONLY0-NEXT:    [[TMP4734:%.*]] = load float, ptr [[FD]], align 4
+// SIMD-ONLY0-NEXT:    store float [[TMP4734]], ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END6170:%.*]]
+// SIMD-ONLY0:       if.else6169:
+// SIMD-ONLY0-NEXT:    [[TMP4735:%.*]] = load float, ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    store float [[TMP4735]], ptr [[FV]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END6170]]
+// SIMD-ONLY0:       if.end6170:
+// SIMD-ONLY0-NEXT:    [[TMP4736:%.*]] = load float, ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    store float [[TMP4736]], ptr [[FV]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP4737:%.*]] = load float, ptr [[FE]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP4738:%.*]] = load float, ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP6171:%.*]] = fcmp ogt float [[TMP4737]], [[TMP4738]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP6171]], label [[IF_THEN6173:%.*]], label [[IF_END6174:%.*]]
+// SIMD-ONLY0:       if.then6173:
+// SIMD-ONLY0-NEXT:    [[TMP4739:%.*]] = load float, ptr [[FE]], align 4
+// SIMD-ONLY0-NEXT:    store float [[TMP4739]], ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END6174]]
+// SIMD-ONLY0:       if.end6174:
+// SIMD-ONLY0-NEXT:    [[TMP4740:%.*]] = load float, ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    store float [[TMP4740]], ptr [[FV]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP4741:%.*]] = load float, ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP4742:%.*]] = load float, ptr [[FE]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP6175:%.*]] = fcmp ogt float [[TMP4741]], [[TMP4742]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP6175]], label [[IF_THEN6177:%.*]], label [[IF_END6178:%.*]]
+// SIMD-ONLY0:       if.then6177:
+// SIMD-ONLY0-NEXT:    [[TMP4743:%.*]] = load float, ptr [[FE]], align 4
+// SIMD-ONLY0-NEXT:    store float [[TMP4743]], ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END6178]]
+// SIMD-ONLY0:       if.end6178:
+// SIMD-ONLY0-NEXT:    [[TMP4744:%.*]] = load float, ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    store float [[TMP4744]], ptr [[FV]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP4745:%.*]] = load float, ptr [[FE]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP4746:%.*]] = load float, ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP6179:%.*]] = fcmp olt float [[TMP4745]], [[TMP4746]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP6179]], label [[IF_THEN6181:%.*]], label [[IF_END6182:%.*]]
+// SIMD-ONLY0:       if.then6181:
+// SIMD-ONLY0-NEXT:    [[TMP4747:%.*]] = load float, ptr [[FE]], align 4
+// SIMD-ONLY0-NEXT:    store float [[TMP4747]], ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END6182]]
+// SIMD-ONLY0:       if.end6182:
+// SIMD-ONLY0-NEXT:    [[TMP4748:%.*]] = load float, ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    store float [[TMP4748]], ptr [[FV]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP4749:%.*]] = load float, ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP4750:%.*]] = load float, ptr [[FE]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP6183:%.*]] = fcmp olt float [[TMP4749]], [[TMP4750]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP6183]], label [[IF_THEN6185:%.*]], label [[IF_END6186:%.*]]
+// SIMD-ONLY0:       if.then6185:
+// SIMD-ONLY0-NEXT:    [[TMP4751:%.*]] = load float, ptr [[FE]], align 4
+// SIMD-ONLY0-NEXT:    store float [[TMP4751]], ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END6186]]
+// SIMD-ONLY0:       if.end6186:
+// SIMD-ONLY0-NEXT:    [[TMP4752:%.*]] = load float, ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    store float [[TMP4752]], ptr [[FV]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP4753:%.*]] = load float, ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP4754:%.*]] = load float, ptr [[FE]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP6187:%.*]] = fcmp oeq float [[TMP4753]], [[TMP4754]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP6187]], label [[IF_THEN6189:%.*]], label [[IF_END6190:%.*]]
+// SIMD-ONLY0:       if.then6189:
+// SIMD-ONLY0-NEXT:    [[TMP4755:%.*]] = load float, ptr [[FD]], align 4
+// SIMD-ONLY0-NEXT:    store float [[TMP4755]], ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END6190]]
+// SIMD-ONLY0:       if.end6190:
+// SIMD-ONLY0-NEXT:    [[TMP4756:%.*]] = load float, ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    store float [[TMP4756]], ptr [[FV]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP4757:%.*]] = load float, ptr [[FE]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP4758:%.*]] = load float, ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP6191:%.*]] = fcmp oeq float [[TMP4757]], [[TMP4758]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP6191]], label [[IF_THEN6193:%.*]], label [[IF_END6194:%.*]]
+// SIMD-ONLY0:       if.then6193:
+// SIMD-ONLY0-NEXT:    [[TMP4759:%.*]] = load float, ptr [[FD]], align 4
+// SIMD-ONLY0-NEXT:    store float [[TMP4759]], ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END6194]]
+// SIMD-ONLY0:       if.end6194:
+// SIMD-ONLY0-NEXT:    [[TMP4760:%.*]] = load float, ptr [[FE]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP4761:%.*]] = load float, ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP6195:%.*]] = fcmp ogt float [[TMP4760]], [[TMP4761]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP6195]], label [[IF_THEN6197:%.*]], label [[IF_END6198:%.*]]
+// SIMD-ONLY0:       if.then6197:
+// SIMD-ONLY0-NEXT:    [[TMP4762:%.*]] = load float, ptr [[FE]], align 4
+// SIMD-ONLY0-NEXT:    store float [[TMP4762]], ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END6198]]
+// SIMD-ONLY0:       if.end6198:
+// SIMD-ONLY0-NEXT:    [[TMP4763:%.*]] = load float, ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    store float [[TMP4763]], ptr [[FV]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP4764:%.*]] = load float, ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP4765:%.*]] = load float, ptr [[FE]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP6199:%.*]] = fcmp ogt float [[TMP4764]], [[TMP4765]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP6199]], label [[IF_THEN6201:%.*]], label [[IF_END6202:%.*]]
+// SIMD-ONLY0:       if.then6201:
+// SIMD-ONLY0-NEXT:    [[TMP4766:%.*]] = load float, ptr [[FE]], align 4
+// SIMD-ONLY0-NEXT:    store float [[TMP4766]], ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END6202]]
+// SIMD-ONLY0:       if.end6202:
+// SIMD-ONLY0-NEXT:    [[TMP4767:%.*]] = load float, ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    store float [[TMP4767]], ptr [[FV]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP4768:%.*]] = load float, ptr [[FE]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP4769:%.*]] = load float, ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP6203:%.*]] = fcmp olt float [[TMP4768]], [[TMP4769]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP6203]], label [[IF_THEN6205:%.*]], label [[IF_END6206:%.*]]
+// SIMD-ONLY0:       if.then6205:
+// SIMD-ONLY0-NEXT:    [[TMP4770:%.*]] = load float, ptr [[FE]], align 4
+// SIMD-ONLY0-NEXT:    store float [[TMP4770]], ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END6206]]
+// SIMD-ONLY0:       if.end6206:
+// SIMD-ONLY0-NEXT:    [[TMP4771:%.*]] = load float, ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    store float [[TMP4771]], ptr [[FV]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP4772:%.*]] = load float, ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP4773:%.*]] = load float, ptr [[FE]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP6207:%.*]] = fcmp olt float [[TMP4772]], [[TMP4773]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP6207]], label [[IF_THEN6209:%.*]], label [[IF_END6210:%.*]]
+// SIMD-ONLY0:       if.then6209:
+// SIMD-ONLY0-NEXT:    [[TMP4774:%.*]] = load float, ptr [[FE]], align 4
+// SIMD-ONLY0-NEXT:    store float [[TMP4774]], ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END6210]]
+// SIMD-ONLY0:       if.end6210:
+// SIMD-ONLY0-NEXT:    [[TMP4775:%.*]] = load float, ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    store float [[TMP4775]], ptr [[FV]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP4776:%.*]] = load float, ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP4777:%.*]] = load float, ptr [[FE]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP6211:%.*]] = fcmp oeq float [[TMP4776]], [[TMP4777]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP6211]], label [[IF_THEN6213:%.*]], label [[IF_END6214:%.*]]
+// SIMD-ONLY0:       if.then6213:
+// SIMD-ONLY0-NEXT:    [[TMP4778:%.*]] = load float, ptr [[FD]], align 4
+// SIMD-ONLY0-NEXT:    store float [[TMP4778]], ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END6214]]
+// SIMD-ONLY0:       if.end6214:
+// SIMD-ONLY0-NEXT:    [[TMP4779:%.*]] = load float, ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    store float [[TMP4779]], ptr [[FV]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP4780:%.*]] = load float, ptr [[FE]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP4781:%.*]] = load float, ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP6215:%.*]] = fcmp oeq float [[TMP4780]], [[TMP4781]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP6215]], label [[IF_THEN6217:%.*]], label [[IF_END6218:%.*]]
+// SIMD-ONLY0:       if.then6217:
+// SIMD-ONLY0-NEXT:    [[TMP4782:%.*]] = load float, ptr [[FD]], align 4
+// SIMD-ONLY0-NEXT:    store float [[TMP4782]], ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END6218]]
+// SIMD-ONLY0:       if.end6218:
+// SIMD-ONLY0-NEXT:    [[TMP4783:%.*]] = load float, ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    store float [[TMP4783]], ptr [[FV]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP4784:%.*]] = load float, ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP4785:%.*]] = load float, ptr [[FE]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP6219:%.*]] = fcmp oeq float [[TMP4784]], [[TMP4785]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP6219]], label [[IF_THEN6221:%.*]], label [[IF_ELSE6222:%.*]]
+// SIMD-ONLY0:       if.then6221:
+// SIMD-ONLY0-NEXT:    [[TMP4786:%.*]] = load float, ptr [[FD]], align 4
+// SIMD-ONLY0-NEXT:    store float [[TMP4786]], ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END6223:%.*]]
+// SIMD-ONLY0:       if.else6222:
+// SIMD-ONLY0-NEXT:    [[TMP4787:%.*]] = load float, ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    store float [[TMP4787]], ptr [[FV]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END6223]]
+// SIMD-ONLY0:       if.end6223:
+// SIMD-ONLY0-NEXT:    [[TMP4788:%.*]] = load float, ptr [[FE]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP4789:%.*]] = load float, ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP6224:%.*]] = fcmp oeq float [[TMP4788]], [[TMP4789]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP6224]], label [[IF_THEN6226:%.*]], label [[IF_ELSE6227:%.*]]
+// SIMD-ONLY0:       if.then6226:
+// SIMD-ONLY0-NEXT:    [[TMP4790:%.*]] = load float, ptr [[FD]], align 4
+// SIMD-ONLY0-NEXT:    store float [[TMP4790]], ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END6228:%.*]]
+// SIMD-ONLY0:       if.else6227:
+// SIMD-ONLY0-NEXT:    [[TMP4791:%.*]] = load float, ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    store float [[TMP4791]], ptr [[FV]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END6228]]
+// SIMD-ONLY0:       if.end6228:
+// SIMD-ONLY0-NEXT:    [[TMP4792:%.*]] = load float, ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP4793:%.*]] = load float, ptr [[FE]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP6229:%.*]] = fcmp oeq float [[TMP4792]], [[TMP4793]]
+// SIMD-ONLY0-NEXT:    [[CONV6230:%.*]] = zext i1 [[CMP6229]] to i32
+// SIMD-ONLY0-NEXT:    store i32 [[CONV6230]], ptr [[IR]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP4794:%.*]] = load i32, ptr [[IR]], align 4
+// SIMD-ONLY0-NEXT:    [[TOBOOL6231:%.*]] = icmp ne i32 [[TMP4794]], 0
+// SIMD-ONLY0-NEXT:    br i1 [[TOBOOL6231]], label [[IF_THEN6232:%.*]], label [[IF_END6233:%.*]]
+// SIMD-ONLY0:       if.then6232:
+// SIMD-ONLY0-NEXT:    [[TMP4795:%.*]] = load float, ptr [[FD]], align 4
+// SIMD-ONLY0-NEXT:    store float [[TMP4795]], ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END6233]]
+// SIMD-ONLY0:       if.end6233:
+// SIMD-ONLY0-NEXT:    [[TMP4796:%.*]] = load float, ptr [[FE]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP4797:%.*]] = load float, ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP6234:%.*]] = fcmp oeq float [[TMP4796]], [[TMP4797]]
+// SIMD-ONLY0-NEXT:    [[CONV6235:%.*]] = zext i1 [[CMP6234]] to i32
+// SIMD-ONLY0-NEXT:    store i32 [[CONV6235]], ptr [[IR]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP4798:%.*]] = load i32, ptr [[IR]], align 4
+// SIMD-ONLY0-NEXT:    [[TOBOOL6236:%.*]] = icmp ne i32 [[TMP4798]], 0
+// SIMD-ONLY0-NEXT:    br i1 [[TOBOOL6236]], label [[IF_THEN6237:%.*]], label [[IF_END6238:%.*]]
+// SIMD-ONLY0:       if.then6237:
+// SIMD-ONLY0-NEXT:    [[TMP4799:%.*]] = load float, ptr [[FD]], align 4
+// SIMD-ONLY0-NEXT:    store float [[TMP4799]], ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END6238]]
+// SIMD-ONLY0:       if.end6238:
+// SIMD-ONLY0-NEXT:    [[TMP4800:%.*]] = load float, ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP4801:%.*]] = load float, ptr [[FE]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP6239:%.*]] = fcmp oeq float [[TMP4800]], [[TMP4801]]
+// SIMD-ONLY0-NEXT:    [[CONV6240:%.*]] = zext i1 [[CMP6239]] to i32
+// SIMD-ONLY0-NEXT:    store i32 [[CONV6240]], ptr [[IR]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP4802:%.*]] = load i32, ptr [[IR]], align 4
+// SIMD-ONLY0-NEXT:    [[TOBOOL6241:%.*]] = icmp ne i32 [[TMP4802]], 0
+// SIMD-ONLY0-NEXT:    br i1 [[TOBOOL6241]], label [[IF_THEN6242:%.*]], label [[IF_ELSE6243:%.*]]
+// SIMD-ONLY0:       if.then6242:
+// SIMD-ONLY0-NEXT:    [[TMP4803:%.*]] = load float, ptr [[FD]], align 4
+// SIMD-ONLY0-NEXT:    store float [[TMP4803]], ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END6244:%.*]]
+// SIMD-ONLY0:       if.else6243:
+// SIMD-ONLY0-NEXT:    [[TMP4804:%.*]] = load float, ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    store float [[TMP4804]], ptr [[FV]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END6244]]
+// SIMD-ONLY0:       if.end6244:
+// SIMD-ONLY0-NEXT:    [[TMP4805:%.*]] = load float, ptr [[FE]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP4806:%.*]] = load float, ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP6245:%.*]] = fcmp oeq float [[TMP4805]], [[TMP4806]]
+// SIMD-ONLY0-NEXT:    [[CONV6246:%.*]] = zext i1 [[CMP6245]] to i32
+// SIMD-ONLY0-NEXT:    store i32 [[CONV6246]], ptr [[IR]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP4807:%.*]] = load i32, ptr [[IR]], align 4
+// SIMD-ONLY0-NEXT:    [[TOBOOL6247:%.*]] = icmp ne i32 [[TMP4807]], 0
+// SIMD-ONLY0-NEXT:    br i1 [[TOBOOL6247]], label [[IF_THEN6248:%.*]], label [[IF_ELSE6249:%.*]]
+// SIMD-ONLY0:       if.then6248:
+// SIMD-ONLY0-NEXT:    [[TMP4808:%.*]] = load float, ptr [[FD]], align 4
+// SIMD-ONLY0-NEXT:    store float [[TMP4808]], ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END6250:%.*]]
+// SIMD-ONLY0:       if.else6249:
+// SIMD-ONLY0-NEXT:    [[TMP4809:%.*]] = load float, ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    store float [[TMP4809]], ptr [[FV]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END6250]]
+// SIMD-ONLY0:       if.end6250:
+// SIMD-ONLY0-NEXT:    [[TMP4810:%.*]] = load float, ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    store float [[TMP4810]], ptr [[FV]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP4811:%.*]] = load float, ptr [[FE]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP4812:%.*]] = load float, ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP6251:%.*]] = fcmp ogt float [[TMP4811]], [[TMP4812]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP6251]], label [[IF_THEN6253:%.*]], label [[IF_END6254:%.*]]
+// SIMD-ONLY0:       if.then6253:
+// SIMD-ONLY0-NEXT:    [[TMP4813:%.*]] = load float, ptr [[FE]], align 4
+// SIMD-ONLY0-NEXT:    store float [[TMP4813]], ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END6254]]
+// SIMD-ONLY0:       if.end6254:
+// SIMD-ONLY0-NEXT:    [[TMP4814:%.*]] = load float, ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    store float [[TMP4814]], ptr [[FV]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP4815:%.*]] = load float, ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP4816:%.*]] = load float, ptr [[FE]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP6255:%.*]] = fcmp ogt float [[TMP4815]], [[TMP4816]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP6255]], label [[IF_THEN6257:%.*]], label [[IF_END6258:%.*]]
+// SIMD-ONLY0:       if.then6257:
+// SIMD-ONLY0-NEXT:    [[TMP4817:%.*]] = load float, ptr [[FE]], align 4
+// SIMD-ONLY0-NEXT:    store float [[TMP4817]], ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END6258]]
+// SIMD-ONLY0:       if.end6258:
+// SIMD-ONLY0-NEXT:    [[TMP4818:%.*]] = load float, ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    store float [[TMP4818]], ptr [[FV]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP4819:%.*]] = load float, ptr [[FE]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP4820:%.*]] = load float, ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP6259:%.*]] = fcmp olt float [[TMP4819]], [[TMP4820]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP6259]], label [[IF_THEN6261:%.*]], label [[IF_END6262:%.*]]
+// SIMD-ONLY0:       if.then6261:
+// SIMD-ONLY0-NEXT:    [[TMP4821:%.*]] = load float, ptr [[FE]], align 4
+// SIMD-ONLY0-NEXT:    store float [[TMP4821]], ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END6262]]
+// SIMD-ONLY0:       if.end6262:
+// SIMD-ONLY0-NEXT:    [[TMP4822:%.*]] = load float, ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    store float [[TMP4822]], ptr [[FV]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP4823:%.*]] = load float, ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP4824:%.*]] = load float, ptr [[FE]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP6263:%.*]] = fcmp olt float [[TMP4823]], [[TMP4824]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP6263]], label [[IF_THEN6265:%.*]], label [[IF_END6266:%.*]]
+// SIMD-ONLY0:       if.then6265:
+// SIMD-ONLY0-NEXT:    [[TMP4825:%.*]] = load float, ptr [[FE]], align 4
+// SIMD-ONLY0-NEXT:    store float [[TMP4825]], ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END6266]]
+// SIMD-ONLY0:       if.end6266:
+// SIMD-ONLY0-NEXT:    [[TMP4826:%.*]] = load float, ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    store float [[TMP4826]], ptr [[FV]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP4827:%.*]] = load float, ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP4828:%.*]] = load float, ptr [[FE]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP6267:%.*]] = fcmp oeq float [[TMP4827]], [[TMP4828]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP6267]], label [[IF_THEN6269:%.*]], label [[IF_END6270:%.*]]
+// SIMD-ONLY0:       if.then6269:
+// SIMD-ONLY0-NEXT:    [[TMP4829:%.*]] = load float, ptr [[FD]], align 4
+// SIMD-ONLY0-NEXT:    store float [[TMP4829]], ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END6270]]
+// SIMD-ONLY0:       if.end6270:
+// SIMD-ONLY0-NEXT:    [[TMP4830:%.*]] = load float, ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    store float [[TMP4830]], ptr [[FV]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP4831:%.*]] = load float, ptr [[FE]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP4832:%.*]] = load float, ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP6271:%.*]] = fcmp oeq float [[TMP4831]], [[TMP4832]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP6271]], label [[IF_THEN6273:%.*]], label [[IF_END6274:%.*]]
+// SIMD-ONLY0:       if.then6273:
+// SIMD-ONLY0-NEXT:    [[TMP4833:%.*]] = load float, ptr [[FD]], align 4
+// SIMD-ONLY0-NEXT:    store float [[TMP4833]], ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END6274]]
+// SIMD-ONLY0:       if.end6274:
+// SIMD-ONLY0-NEXT:    [[TMP4834:%.*]] = load float, ptr [[FE]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP4835:%.*]] = load float, ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP6275:%.*]] = fcmp ogt float [[TMP4834]], [[TMP4835]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP6275]], label [[IF_THEN6277:%.*]], label [[IF_END6278:%.*]]
+// SIMD-ONLY0:       if.then6277:
+// SIMD-ONLY0-NEXT:    [[TMP4836:%.*]] = load float, ptr [[FE]], align 4
+// SIMD-ONLY0-NEXT:    store float [[TMP4836]], ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END6278]]
+// SIMD-ONLY0:       if.end6278:
+// SIMD-ONLY0-NEXT:    [[TMP4837:%.*]] = load float, ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    store float [[TMP4837]], ptr [[FV]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP4838:%.*]] = load float, ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP4839:%.*]] = load float, ptr [[FE]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP6279:%.*]] = fcmp ogt float [[TMP4838]], [[TMP4839]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP6279]], label [[IF_THEN6281:%.*]], label [[IF_END6282:%.*]]
+// SIMD-ONLY0:       if.then6281:
+// SIMD-ONLY0-NEXT:    [[TMP4840:%.*]] = load float, ptr [[FE]], align 4
+// SIMD-ONLY0-NEXT:    store float [[TMP4840]], ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END6282]]
+// SIMD-ONLY0:       if.end6282:
+// SIMD-ONLY0-NEXT:    [[TMP4841:%.*]] = load float, ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    store float [[TMP4841]], ptr [[FV]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP4842:%.*]] = load float, ptr [[FE]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP4843:%.*]] = load float, ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP6283:%.*]] = fcmp olt float [[TMP4842]], [[TMP4843]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP6283]], label [[IF_THEN6285:%.*]], label [[IF_END6286:%.*]]
+// SIMD-ONLY0:       if.then6285:
+// SIMD-ONLY0-NEXT:    [[TMP4844:%.*]] = load float, ptr [[FE]], align 4
+// SIMD-ONLY0-NEXT:    store float [[TMP4844]], ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END6286]]
+// SIMD-ONLY0:       if.end6286:
+// SIMD-ONLY0-NEXT:    [[TMP4845:%.*]] = load float, ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    store float [[TMP4845]], ptr [[FV]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP4846:%.*]] = load float, ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP4847:%.*]] = load float, ptr [[FE]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP6287:%.*]] = fcmp olt float [[TMP4846]], [[TMP4847]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP6287]], label [[IF_THEN6289:%.*]], label [[IF_END6290:%.*]]
+// SIMD-ONLY0:       if.then6289:
+// SIMD-ONLY0-NEXT:    [[TMP4848:%.*]] = load float, ptr [[FE]], align 4
+// SIMD-ONLY0-NEXT:    store float [[TMP4848]], ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END6290]]
+// SIMD-ONLY0:       if.end6290:
+// SIMD-ONLY0-NEXT:    [[TMP4849:%.*]] = load float, ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    store float [[TMP4849]], ptr [[FV]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP4850:%.*]] = load float, ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP4851:%.*]] = load float, ptr [[FE]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP6291:%.*]] = fcmp oeq float [[TMP4850]], [[TMP4851]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP6291]], label [[IF_THEN6293:%.*]], label [[IF_END6294:%.*]]
+// SIMD-ONLY0:       if.then6293:
+// SIMD-ONLY0-NEXT:    [[TMP4852:%.*]] = load float, ptr [[FD]], align 4
+// SIMD-ONLY0-NEXT:    store float [[TMP4852]], ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END6294]]
+// SIMD-ONLY0:       if.end6294:
+// SIMD-ONLY0-NEXT:    [[TMP4853:%.*]] = load float, ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    store float [[TMP4853]], ptr [[FV]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP4854:%.*]] = load float, ptr [[FE]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP4855:%.*]] = load float, ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP6295:%.*]] = fcmp oeq float [[TMP4854]], [[TMP4855]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP6295]], label [[IF_THEN6297:%.*]], label [[IF_END6298:%.*]]
+// SIMD-ONLY0:       if.then6297:
+// SIMD-ONLY0-NEXT:    [[TMP4856:%.*]] = load float, ptr [[FD]], align 4
+// SIMD-ONLY0-NEXT:    store float [[TMP4856]], ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END6298]]
+// SIMD-ONLY0:       if.end6298:
+// SIMD-ONLY0-NEXT:    [[TMP4857:%.*]] = load float, ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    store float [[TMP4857]], ptr [[FV]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP4858:%.*]] = load float, ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP4859:%.*]] = load float, ptr [[FE]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP6299:%.*]] = fcmp oeq float [[TMP4858]], [[TMP4859]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP6299]], label [[IF_THEN6301:%.*]], label [[IF_ELSE6302:%.*]]
+// SIMD-ONLY0:       if.then6301:
+// SIMD-ONLY0-NEXT:    [[TMP4860:%.*]] = load float, ptr [[FD]], align 4
+// SIMD-ONLY0-NEXT:    store float [[TMP4860]], ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END6303:%.*]]
+// SIMD-ONLY0:       if.else6302:
+// SIMD-ONLY0-NEXT:    [[TMP4861:%.*]] = load float, ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    store float [[TMP4861]], ptr [[FV]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END6303]]
+// SIMD-ONLY0:       if.end6303:
+// SIMD-ONLY0-NEXT:    [[TMP4862:%.*]] = load float, ptr [[FE]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP4863:%.*]] = load float, ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP6304:%.*]] = fcmp oeq float [[TMP4862]], [[TMP4863]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP6304]], label [[IF_THEN6306:%.*]], label [[IF_ELSE6307:%.*]]
+// SIMD-ONLY0:       if.then6306:
+// SIMD-ONLY0-NEXT:    [[TMP4864:%.*]] = load float, ptr [[FD]], align 4
+// SIMD-ONLY0-NEXT:    store float [[TMP4864]], ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END6308:%.*]]
+// SIMD-ONLY0:       if.else6307:
+// SIMD-ONLY0-NEXT:    [[TMP4865:%.*]] = load float, ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    store float [[TMP4865]], ptr [[FV]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END6308]]
+// SIMD-ONLY0:       if.end6308:
+// SIMD-ONLY0-NEXT:    [[TMP4866:%.*]] = load float, ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP4867:%.*]] = load float, ptr [[FE]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP6309:%.*]] = fcmp oeq float [[TMP4866]], [[TMP4867]]
+// SIMD-ONLY0-NEXT:    [[CONV6310:%.*]] = zext i1 [[CMP6309]] to i32
+// SIMD-ONLY0-NEXT:    store i32 [[CONV6310]], ptr [[IR]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP4868:%.*]] = load i32, ptr [[IR]], align 4
+// SIMD-ONLY0-NEXT:    [[TOBOOL6311:%.*]] = icmp ne i32 [[TMP4868]], 0
+// SIMD-ONLY0-NEXT:    br i1 [[TOBOOL6311]], label [[IF_THEN6312:%.*]], label [[IF_END6313:%.*]]
+// SIMD-ONLY0:       if.then6312:
+// SIMD-ONLY0-NEXT:    [[TMP4869:%.*]] = load float, ptr [[FD]], align 4
+// SIMD-ONLY0-NEXT:    store float [[TMP4869]], ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END6313]]
+// SIMD-ONLY0:       if.end6313:
+// SIMD-ONLY0-NEXT:    [[TMP4870:%.*]] = load float, ptr [[FE]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP4871:%.*]] = load float, ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP6314:%.*]] = fcmp oeq float [[TMP4870]], [[TMP4871]]
+// SIMD-ONLY0-NEXT:    [[CONV6315:%.*]] = zext i1 [[CMP6314]] to i32
+// SIMD-ONLY0-NEXT:    store i32 [[CONV6315]], ptr [[IR]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP4872:%.*]] = load i32, ptr [[IR]], align 4
+// SIMD-ONLY0-NEXT:    [[TOBOOL6316:%.*]] = icmp ne i32 [[TMP4872]], 0
+// SIMD-ONLY0-NEXT:    br i1 [[TOBOOL6316]], label [[IF_THEN6317:%.*]], label [[IF_END6318:%.*]]
+// SIMD-ONLY0:       if.then6317:
+// SIMD-ONLY0-NEXT:    [[TMP4873:%.*]] = load float, ptr [[FD]], align 4
+// SIMD-ONLY0-NEXT:    store float [[TMP4873]], ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END6318]]
+// SIMD-ONLY0:       if.end6318:
+// SIMD-ONLY0-NEXT:    [[TMP4874:%.*]] = load float, ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP4875:%.*]] = load float, ptr [[FE]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP6319:%.*]] = fcmp oeq float [[TMP4874]], [[TMP4875]]
+// SIMD-ONLY0-NEXT:    [[CONV6320:%.*]] = zext i1 [[CMP6319]] to i32
+// SIMD-ONLY0-NEXT:    store i32 [[CONV6320]], ptr [[IR]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP4876:%.*]] = load i32, ptr [[IR]], align 4
+// SIMD-ONLY0-NEXT:    [[TOBOOL6321:%.*]] = icmp ne i32 [[TMP4876]], 0
+// SIMD-ONLY0-NEXT:    br i1 [[TOBOOL6321]], label [[IF_THEN6322:%.*]], label [[IF_ELSE6323:%.*]]
+// SIMD-ONLY0:       if.then6322:
+// SIMD-ONLY0-NEXT:    [[TMP4877:%.*]] = load float, ptr [[FD]], align 4
+// SIMD-ONLY0-NEXT:    store float [[TMP4877]], ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END6324:%.*]]
+// SIMD-ONLY0:       if.else6323:
+// SIMD-ONLY0-NEXT:    [[TMP4878:%.*]] = load float, ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    store float [[TMP4878]], ptr [[FV]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END6324]]
+// SIMD-ONLY0:       if.end6324:
+// SIMD-ONLY0-NEXT:    [[TMP4879:%.*]] = load float, ptr [[FE]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP4880:%.*]] = load float, ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP6325:%.*]] = fcmp oeq float [[TMP4879]], [[TMP4880]]
+// SIMD-ONLY0-NEXT:    [[CONV6326:%.*]] = zext i1 [[CMP6325]] to i32
+// SIMD-ONLY0-NEXT:    store i32 [[CONV6326]], ptr [[IR]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP4881:%.*]] = load i32, ptr [[IR]], align 4
+// SIMD-ONLY0-NEXT:    [[TOBOOL6327:%.*]] = icmp ne i32 [[TMP4881]], 0
+// SIMD-ONLY0-NEXT:    br i1 [[TOBOOL6327]], label [[IF_THEN6328:%.*]], label [[IF_ELSE6329:%.*]]
+// SIMD-ONLY0:       if.then6328:
+// SIMD-ONLY0-NEXT:    [[TMP4882:%.*]] = load float, ptr [[FD]], align 4
+// SIMD-ONLY0-NEXT:    store float [[TMP4882]], ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END6330:%.*]]
+// SIMD-ONLY0:       if.else6329:
+// SIMD-ONLY0-NEXT:    [[TMP4883:%.*]] = load float, ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    store float [[TMP4883]], ptr [[FV]], align 4
+// SIMD-ONLY0-NEXT:    br label [[IF_END6330]]
+// SIMD-ONLY0:       if.end6330:
+// SIMD-ONLY0-NEXT:    [[TMP4884:%.*]] = load double, ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    store double [[TMP4884]], ptr [[DV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP4885:%.*]] = load double, ptr [[DE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP4886:%.*]] = load double, ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP6331:%.*]] = fcmp ogt double [[TMP4885]], [[TMP4886]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP6331]], label [[IF_THEN6333:%.*]], label [[IF_END6334:%.*]]
+// SIMD-ONLY0:       if.then6333:
+// SIMD-ONLY0-NEXT:    [[TMP4887:%.*]] = load double, ptr [[DE]], align 8
+// SIMD-ONLY0-NEXT:    store double [[TMP4887]], ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END6334]]
+// SIMD-ONLY0:       if.end6334:
+// SIMD-ONLY0-NEXT:    [[TMP4888:%.*]] = load double, ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    store double [[TMP4888]], ptr [[DV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP4889:%.*]] = load double, ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP4890:%.*]] = load double, ptr [[DE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP6335:%.*]] = fcmp ogt double [[TMP4889]], [[TMP4890]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP6335]], label [[IF_THEN6337:%.*]], label [[IF_END6338:%.*]]
+// SIMD-ONLY0:       if.then6337:
+// SIMD-ONLY0-NEXT:    [[TMP4891:%.*]] = load double, ptr [[DE]], align 8
+// SIMD-ONLY0-NEXT:    store double [[TMP4891]], ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END6338]]
+// SIMD-ONLY0:       if.end6338:
+// SIMD-ONLY0-NEXT:    [[TMP4892:%.*]] = load double, ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    store double [[TMP4892]], ptr [[DV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP4893:%.*]] = load double, ptr [[DE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP4894:%.*]] = load double, ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP6339:%.*]] = fcmp olt double [[TMP4893]], [[TMP4894]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP6339]], label [[IF_THEN6341:%.*]], label [[IF_END6342:%.*]]
+// SIMD-ONLY0:       if.then6341:
+// SIMD-ONLY0-NEXT:    [[TMP4895:%.*]] = load double, ptr [[DE]], align 8
+// SIMD-ONLY0-NEXT:    store double [[TMP4895]], ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END6342]]
+// SIMD-ONLY0:       if.end6342:
+// SIMD-ONLY0-NEXT:    [[TMP4896:%.*]] = load double, ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    store double [[TMP4896]], ptr [[DV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP4897:%.*]] = load double, ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP4898:%.*]] = load double, ptr [[DE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP6343:%.*]] = fcmp olt double [[TMP4897]], [[TMP4898]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP6343]], label [[IF_THEN6345:%.*]], label [[IF_END6346:%.*]]
+// SIMD-ONLY0:       if.then6345:
+// SIMD-ONLY0-NEXT:    [[TMP4899:%.*]] = load double, ptr [[DE]], align 8
+// SIMD-ONLY0-NEXT:    store double [[TMP4899]], ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END6346]]
+// SIMD-ONLY0:       if.end6346:
+// SIMD-ONLY0-NEXT:    [[TMP4900:%.*]] = load double, ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    store double [[TMP4900]], ptr [[DV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP4901:%.*]] = load double, ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP4902:%.*]] = load double, ptr [[DE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP6347:%.*]] = fcmp oeq double [[TMP4901]], [[TMP4902]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP6347]], label [[IF_THEN6349:%.*]], label [[IF_END6350:%.*]]
+// SIMD-ONLY0:       if.then6349:
+// SIMD-ONLY0-NEXT:    [[TMP4903:%.*]] = load double, ptr [[DD]], align 8
+// SIMD-ONLY0-NEXT:    store double [[TMP4903]], ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END6350]]
+// SIMD-ONLY0:       if.end6350:
+// SIMD-ONLY0-NEXT:    [[TMP4904:%.*]] = load double, ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    store double [[TMP4904]], ptr [[DV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP4905:%.*]] = load double, ptr [[DE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP4906:%.*]] = load double, ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP6351:%.*]] = fcmp oeq double [[TMP4905]], [[TMP4906]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP6351]], label [[IF_THEN6353:%.*]], label [[IF_END6354:%.*]]
+// SIMD-ONLY0:       if.then6353:
+// SIMD-ONLY0-NEXT:    [[TMP4907:%.*]] = load double, ptr [[DD]], align 8
+// SIMD-ONLY0-NEXT:    store double [[TMP4907]], ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END6354]]
+// SIMD-ONLY0:       if.end6354:
+// SIMD-ONLY0-NEXT:    [[TMP4908:%.*]] = load double, ptr [[DE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP4909:%.*]] = load double, ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP6355:%.*]] = fcmp ogt double [[TMP4908]], [[TMP4909]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP6355]], label [[IF_THEN6357:%.*]], label [[IF_END6358:%.*]]
+// SIMD-ONLY0:       if.then6357:
+// SIMD-ONLY0-NEXT:    [[TMP4910:%.*]] = load double, ptr [[DE]], align 8
+// SIMD-ONLY0-NEXT:    store double [[TMP4910]], ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END6358]]
+// SIMD-ONLY0:       if.end6358:
+// SIMD-ONLY0-NEXT:    [[TMP4911:%.*]] = load double, ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    store double [[TMP4911]], ptr [[DV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP4912:%.*]] = load double, ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP4913:%.*]] = load double, ptr [[DE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP6359:%.*]] = fcmp ogt double [[TMP4912]], [[TMP4913]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP6359]], label [[IF_THEN6361:%.*]], label [[IF_END6362:%.*]]
+// SIMD-ONLY0:       if.then6361:
+// SIMD-ONLY0-NEXT:    [[TMP4914:%.*]] = load double, ptr [[DE]], align 8
+// SIMD-ONLY0-NEXT:    store double [[TMP4914]], ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END6362]]
+// SIMD-ONLY0:       if.end6362:
+// SIMD-ONLY0-NEXT:    [[TMP4915:%.*]] = load double, ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    store double [[TMP4915]], ptr [[DV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP4916:%.*]] = load double, ptr [[DE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP4917:%.*]] = load double, ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP6363:%.*]] = fcmp olt double [[TMP4916]], [[TMP4917]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP6363]], label [[IF_THEN6365:%.*]], label [[IF_END6366:%.*]]
+// SIMD-ONLY0:       if.then6365:
+// SIMD-ONLY0-NEXT:    [[TMP4918:%.*]] = load double, ptr [[DE]], align 8
+// SIMD-ONLY0-NEXT:    store double [[TMP4918]], ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END6366]]
+// SIMD-ONLY0:       if.end6366:
+// SIMD-ONLY0-NEXT:    [[TMP4919:%.*]] = load double, ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    store double [[TMP4919]], ptr [[DV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP4920:%.*]] = load double, ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP4921:%.*]] = load double, ptr [[DE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP6367:%.*]] = fcmp olt double [[TMP4920]], [[TMP4921]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP6367]], label [[IF_THEN6369:%.*]], label [[IF_END6370:%.*]]
+// SIMD-ONLY0:       if.then6369:
+// SIMD-ONLY0-NEXT:    [[TMP4922:%.*]] = load double, ptr [[DE]], align 8
+// SIMD-ONLY0-NEXT:    store double [[TMP4922]], ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END6370]]
+// SIMD-ONLY0:       if.end6370:
+// SIMD-ONLY0-NEXT:    [[TMP4923:%.*]] = load double, ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    store double [[TMP4923]], ptr [[DV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP4924:%.*]] = load double, ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP4925:%.*]] = load double, ptr [[DE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP6371:%.*]] = fcmp oeq double [[TMP4924]], [[TMP4925]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP6371]], label [[IF_THEN6373:%.*]], label [[IF_END6374:%.*]]
+// SIMD-ONLY0:       if.then6373:
+// SIMD-ONLY0-NEXT:    [[TMP4926:%.*]] = load double, ptr [[DD]], align 8
+// SIMD-ONLY0-NEXT:    store double [[TMP4926]], ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END6374]]
+// SIMD-ONLY0:       if.end6374:
+// SIMD-ONLY0-NEXT:    [[TMP4927:%.*]] = load double, ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    store double [[TMP4927]], ptr [[DV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP4928:%.*]] = load double, ptr [[DE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP4929:%.*]] = load double, ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP6375:%.*]] = fcmp oeq double [[TMP4928]], [[TMP4929]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP6375]], label [[IF_THEN6377:%.*]], label [[IF_END6378:%.*]]
+// SIMD-ONLY0:       if.then6377:
+// SIMD-ONLY0-NEXT:    [[TMP4930:%.*]] = load double, ptr [[DD]], align 8
+// SIMD-ONLY0-NEXT:    store double [[TMP4930]], ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END6378]]
+// SIMD-ONLY0:       if.end6378:
+// SIMD-ONLY0-NEXT:    [[TMP4931:%.*]] = load double, ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    store double [[TMP4931]], ptr [[DV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP4932:%.*]] = load double, ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP4933:%.*]] = load double, ptr [[DE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP6379:%.*]] = fcmp oeq double [[TMP4932]], [[TMP4933]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP6379]], label [[IF_THEN6381:%.*]], label [[IF_ELSE6382:%.*]]
+// SIMD-ONLY0:       if.then6381:
+// SIMD-ONLY0-NEXT:    [[TMP4934:%.*]] = load double, ptr [[DD]], align 8
+// SIMD-ONLY0-NEXT:    store double [[TMP4934]], ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END6383:%.*]]
+// SIMD-ONLY0:       if.else6382:
+// SIMD-ONLY0-NEXT:    [[TMP4935:%.*]] = load double, ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    store double [[TMP4935]], ptr [[DV]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END6383]]
+// SIMD-ONLY0:       if.end6383:
+// SIMD-ONLY0-NEXT:    [[TMP4936:%.*]] = load double, ptr [[DE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP4937:%.*]] = load double, ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP6384:%.*]] = fcmp oeq double [[TMP4936]], [[TMP4937]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP6384]], label [[IF_THEN6386:%.*]], label [[IF_ELSE6387:%.*]]
+// SIMD-ONLY0:       if.then6386:
+// SIMD-ONLY0-NEXT:    [[TMP4938:%.*]] = load double, ptr [[DD]], align 8
+// SIMD-ONLY0-NEXT:    store double [[TMP4938]], ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END6388:%.*]]
+// SIMD-ONLY0:       if.else6387:
+// SIMD-ONLY0-NEXT:    [[TMP4939:%.*]] = load double, ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    store double [[TMP4939]], ptr [[DV]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END6388]]
+// SIMD-ONLY0:       if.end6388:
+// SIMD-ONLY0-NEXT:    [[TMP4940:%.*]] = load double, ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP4941:%.*]] = load double, ptr [[DE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP6389:%.*]] = fcmp oeq double [[TMP4940]], [[TMP4941]]
+// SIMD-ONLY0-NEXT:    [[CONV6390:%.*]] = zext i1 [[CMP6389]] to i32
+// SIMD-ONLY0-NEXT:    store i32 [[CONV6390]], ptr [[IR]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP4942:%.*]] = load i32, ptr [[IR]], align 4
+// SIMD-ONLY0-NEXT:    [[TOBOOL6391:%.*]] = icmp ne i32 [[TMP4942]], 0
+// SIMD-ONLY0-NEXT:    br i1 [[TOBOOL6391]], label [[IF_THEN6392:%.*]], label [[IF_END6393:%.*]]
+// SIMD-ONLY0:       if.then6392:
+// SIMD-ONLY0-NEXT:    [[TMP4943:%.*]] = load double, ptr [[DD]], align 8
+// SIMD-ONLY0-NEXT:    store double [[TMP4943]], ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END6393]]
+// SIMD-ONLY0:       if.end6393:
+// SIMD-ONLY0-NEXT:    [[TMP4944:%.*]] = load double, ptr [[DE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP4945:%.*]] = load double, ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP6394:%.*]] = fcmp oeq double [[TMP4944]], [[TMP4945]]
+// SIMD-ONLY0-NEXT:    [[CONV6395:%.*]] = zext i1 [[CMP6394]] to i32
+// SIMD-ONLY0-NEXT:    store i32 [[CONV6395]], ptr [[IR]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP4946:%.*]] = load i32, ptr [[IR]], align 4
+// SIMD-ONLY0-NEXT:    [[TOBOOL6396:%.*]] = icmp ne i32 [[TMP4946]], 0
+// SIMD-ONLY0-NEXT:    br i1 [[TOBOOL6396]], label [[IF_THEN6397:%.*]], label [[IF_END6398:%.*]]
+// SIMD-ONLY0:       if.then6397:
+// SIMD-ONLY0-NEXT:    [[TMP4947:%.*]] = load double, ptr [[DD]], align 8
+// SIMD-ONLY0-NEXT:    store double [[TMP4947]], ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END6398]]
+// SIMD-ONLY0:       if.end6398:
+// SIMD-ONLY0-NEXT:    [[TMP4948:%.*]] = load double, ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP4949:%.*]] = load double, ptr [[DE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP6399:%.*]] = fcmp oeq double [[TMP4948]], [[TMP4949]]
+// SIMD-ONLY0-NEXT:    [[CONV6400:%.*]] = zext i1 [[CMP6399]] to i32
+// SIMD-ONLY0-NEXT:    store i32 [[CONV6400]], ptr [[IR]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP4950:%.*]] = load i32, ptr [[IR]], align 4
+// SIMD-ONLY0-NEXT:    [[TOBOOL6401:%.*]] = icmp ne i32 [[TMP4950]], 0
+// SIMD-ONLY0-NEXT:    br i1 [[TOBOOL6401]], label [[IF_THEN6402:%.*]], label [[IF_ELSE6403:%.*]]
+// SIMD-ONLY0:       if.then6402:
+// SIMD-ONLY0-NEXT:    [[TMP4951:%.*]] = load double, ptr [[DD]], align 8
+// SIMD-ONLY0-NEXT:    store double [[TMP4951]], ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END6404:%.*]]
+// SIMD-ONLY0:       if.else6403:
+// SIMD-ONLY0-NEXT:    [[TMP4952:%.*]] = load double, ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    store double [[TMP4952]], ptr [[DV]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END6404]]
+// SIMD-ONLY0:       if.end6404:
+// SIMD-ONLY0-NEXT:    [[TMP4953:%.*]] = load double, ptr [[DE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP4954:%.*]] = load double, ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP6405:%.*]] = fcmp oeq double [[TMP4953]], [[TMP4954]]
+// SIMD-ONLY0-NEXT:    [[CONV6406:%.*]] = zext i1 [[CMP6405]] to i32
+// SIMD-ONLY0-NEXT:    store i32 [[CONV6406]], ptr [[IR]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP4955:%.*]] = load i32, ptr [[IR]], align 4
+// SIMD-ONLY0-NEXT:    [[TOBOOL6407:%.*]] = icmp ne i32 [[TMP4955]], 0
+// SIMD-ONLY0-NEXT:    br i1 [[TOBOOL6407]], label [[IF_THEN6408:%.*]], label [[IF_ELSE6409:%.*]]
+// SIMD-ONLY0:       if.then6408:
+// SIMD-ONLY0-NEXT:    [[TMP4956:%.*]] = load double, ptr [[DD]], align 8
+// SIMD-ONLY0-NEXT:    store double [[TMP4956]], ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END6410:%.*]]
+// SIMD-ONLY0:       if.else6409:
+// SIMD-ONLY0-NEXT:    [[TMP4957:%.*]] = load double, ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    store double [[TMP4957]], ptr [[DV]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END6410]]
+// SIMD-ONLY0:       if.end6410:
+// SIMD-ONLY0-NEXT:    [[TMP4958:%.*]] = load double, ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    store double [[TMP4958]], ptr [[DV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP4959:%.*]] = load double, ptr [[DE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP4960:%.*]] = load double, ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP6411:%.*]] = fcmp ogt double [[TMP4959]], [[TMP4960]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP6411]], label [[IF_THEN6413:%.*]], label [[IF_END6414:%.*]]
+// SIMD-ONLY0:       if.then6413:
+// SIMD-ONLY0-NEXT:    [[TMP4961:%.*]] = load double, ptr [[DE]], align 8
+// SIMD-ONLY0-NEXT:    store double [[TMP4961]], ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END6414]]
+// SIMD-ONLY0:       if.end6414:
+// SIMD-ONLY0-NEXT:    [[TMP4962:%.*]] = load double, ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    store double [[TMP4962]], ptr [[DV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP4963:%.*]] = load double, ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP4964:%.*]] = load double, ptr [[DE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP6415:%.*]] = fcmp ogt double [[TMP4963]], [[TMP4964]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP6415]], label [[IF_THEN6417:%.*]], label [[IF_END6418:%.*]]
+// SIMD-ONLY0:       if.then6417:
+// SIMD-ONLY0-NEXT:    [[TMP4965:%.*]] = load double, ptr [[DE]], align 8
+// SIMD-ONLY0-NEXT:    store double [[TMP4965]], ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END6418]]
+// SIMD-ONLY0:       if.end6418:
+// SIMD-ONLY0-NEXT:    [[TMP4966:%.*]] = load double, ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    store double [[TMP4966]], ptr [[DV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP4967:%.*]] = load double, ptr [[DE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP4968:%.*]] = load double, ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP6419:%.*]] = fcmp olt double [[TMP4967]], [[TMP4968]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP6419]], label [[IF_THEN6421:%.*]], label [[IF_END6422:%.*]]
+// SIMD-ONLY0:       if.then6421:
+// SIMD-ONLY0-NEXT:    [[TMP4969:%.*]] = load double, ptr [[DE]], align 8
+// SIMD-ONLY0-NEXT:    store double [[TMP4969]], ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END6422]]
+// SIMD-ONLY0:       if.end6422:
+// SIMD-ONLY0-NEXT:    [[TMP4970:%.*]] = load double, ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    store double [[TMP4970]], ptr [[DV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP4971:%.*]] = load double, ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP4972:%.*]] = load double, ptr [[DE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP6423:%.*]] = fcmp olt double [[TMP4971]], [[TMP4972]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP6423]], label [[IF_THEN6425:%.*]], label [[IF_END6426:%.*]]
+// SIMD-ONLY0:       if.then6425:
+// SIMD-ONLY0-NEXT:    [[TMP4973:%.*]] = load double, ptr [[DE]], align 8
+// SIMD-ONLY0-NEXT:    store double [[TMP4973]], ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END6426]]
+// SIMD-ONLY0:       if.end6426:
+// SIMD-ONLY0-NEXT:    [[TMP4974:%.*]] = load double, ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    store double [[TMP4974]], ptr [[DV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP4975:%.*]] = load double, ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP4976:%.*]] = load double, ptr [[DE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP6427:%.*]] = fcmp oeq double [[TMP4975]], [[TMP4976]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP6427]], label [[IF_THEN6429:%.*]], label [[IF_END6430:%.*]]
+// SIMD-ONLY0:       if.then6429:
+// SIMD-ONLY0-NEXT:    [[TMP4977:%.*]] = load double, ptr [[DD]], align 8
+// SIMD-ONLY0-NEXT:    store double [[TMP4977]], ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END6430]]
+// SIMD-ONLY0:       if.end6430:
+// SIMD-ONLY0-NEXT:    [[TMP4978:%.*]] = load double, ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    store double [[TMP4978]], ptr [[DV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP4979:%.*]] = load double, ptr [[DE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP4980:%.*]] = load double, ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP6431:%.*]] = fcmp oeq double [[TMP4979]], [[TMP4980]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP6431]], label [[IF_THEN6433:%.*]], label [[IF_END6434:%.*]]
+// SIMD-ONLY0:       if.then6433:
+// SIMD-ONLY0-NEXT:    [[TMP4981:%.*]] = load double, ptr [[DD]], align 8
+// SIMD-ONLY0-NEXT:    store double [[TMP4981]], ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END6434]]
+// SIMD-ONLY0:       if.end6434:
+// SIMD-ONLY0-NEXT:    [[TMP4982:%.*]] = load double, ptr [[DE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP4983:%.*]] = load double, ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP6435:%.*]] = fcmp ogt double [[TMP4982]], [[TMP4983]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP6435]], label [[IF_THEN6437:%.*]], label [[IF_END6438:%.*]]
+// SIMD-ONLY0:       if.then6437:
+// SIMD-ONLY0-NEXT:    [[TMP4984:%.*]] = load double, ptr [[DE]], align 8
+// SIMD-ONLY0-NEXT:    store double [[TMP4984]], ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END6438]]
+// SIMD-ONLY0:       if.end6438:
+// SIMD-ONLY0-NEXT:    [[TMP4985:%.*]] = load double, ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    store double [[TMP4985]], ptr [[DV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP4986:%.*]] = load double, ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP4987:%.*]] = load double, ptr [[DE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP6439:%.*]] = fcmp ogt double [[TMP4986]], [[TMP4987]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP6439]], label [[IF_THEN6441:%.*]], label [[IF_END6442:%.*]]
+// SIMD-ONLY0:       if.then6441:
+// SIMD-ONLY0-NEXT:    [[TMP4988:%.*]] = load double, ptr [[DE]], align 8
+// SIMD-ONLY0-NEXT:    store double [[TMP4988]], ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END6442]]
+// SIMD-ONLY0:       if.end6442:
+// SIMD-ONLY0-NEXT:    [[TMP4989:%.*]] = load double, ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    store double [[TMP4989]], ptr [[DV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP4990:%.*]] = load double, ptr [[DE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP4991:%.*]] = load double, ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP6443:%.*]] = fcmp olt double [[TMP4990]], [[TMP4991]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP6443]], label [[IF_THEN6445:%.*]], label [[IF_END6446:%.*]]
+// SIMD-ONLY0:       if.then6445:
+// SIMD-ONLY0-NEXT:    [[TMP4992:%.*]] = load double, ptr [[DE]], align 8
+// SIMD-ONLY0-NEXT:    store double [[TMP4992]], ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END6446]]
+// SIMD-ONLY0:       if.end6446:
+// SIMD-ONLY0-NEXT:    [[TMP4993:%.*]] = load double, ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    store double [[TMP4993]], ptr [[DV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP4994:%.*]] = load double, ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP4995:%.*]] = load double, ptr [[DE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP6447:%.*]] = fcmp olt double [[TMP4994]], [[TMP4995]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP6447]], label [[IF_THEN6449:%.*]], label [[IF_END6450:%.*]]
+// SIMD-ONLY0:       if.then6449:
+// SIMD-ONLY0-NEXT:    [[TMP4996:%.*]] = load double, ptr [[DE]], align 8
+// SIMD-ONLY0-NEXT:    store double [[TMP4996]], ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END6450]]
+// SIMD-ONLY0:       if.end6450:
+// SIMD-ONLY0-NEXT:    [[TMP4997:%.*]] = load double, ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    store double [[TMP4997]], ptr [[DV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP4998:%.*]] = load double, ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP4999:%.*]] = load double, ptr [[DE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP6451:%.*]] = fcmp oeq double [[TMP4998]], [[TMP4999]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP6451]], label [[IF_THEN6453:%.*]], label [[IF_END6454:%.*]]
+// SIMD-ONLY0:       if.then6453:
+// SIMD-ONLY0-NEXT:    [[TMP5000:%.*]] = load double, ptr [[DD]], align 8
+// SIMD-ONLY0-NEXT:    store double [[TMP5000]], ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END6454]]
+// SIMD-ONLY0:       if.end6454:
+// SIMD-ONLY0-NEXT:    [[TMP5001:%.*]] = load double, ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    store double [[TMP5001]], ptr [[DV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP5002:%.*]] = load double, ptr [[DE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP5003:%.*]] = load double, ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP6455:%.*]] = fcmp oeq double [[TMP5002]], [[TMP5003]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP6455]], label [[IF_THEN6457:%.*]], label [[IF_END6458:%.*]]
+// SIMD-ONLY0:       if.then6457:
+// SIMD-ONLY0-NEXT:    [[TMP5004:%.*]] = load double, ptr [[DD]], align 8
+// SIMD-ONLY0-NEXT:    store double [[TMP5004]], ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END6458]]
+// SIMD-ONLY0:       if.end6458:
+// SIMD-ONLY0-NEXT:    [[TMP5005:%.*]] = load double, ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    store double [[TMP5005]], ptr [[DV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP5006:%.*]] = load double, ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP5007:%.*]] = load double, ptr [[DE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP6459:%.*]] = fcmp oeq double [[TMP5006]], [[TMP5007]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP6459]], label [[IF_THEN6461:%.*]], label [[IF_ELSE6462:%.*]]
+// SIMD-ONLY0:       if.then6461:
+// SIMD-ONLY0-NEXT:    [[TMP5008:%.*]] = load double, ptr [[DD]], align 8
+// SIMD-ONLY0-NEXT:    store double [[TMP5008]], ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END6463:%.*]]
+// SIMD-ONLY0:       if.else6462:
+// SIMD-ONLY0-NEXT:    [[TMP5009:%.*]] = load double, ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    store double [[TMP5009]], ptr [[DV]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END6463]]
+// SIMD-ONLY0:       if.end6463:
+// SIMD-ONLY0-NEXT:    [[TMP5010:%.*]] = load double, ptr [[DE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP5011:%.*]] = load double, ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP6464:%.*]] = fcmp oeq double [[TMP5010]], [[TMP5011]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP6464]], label [[IF_THEN6466:%.*]], label [[IF_ELSE6467:%.*]]
+// SIMD-ONLY0:       if.then6466:
+// SIMD-ONLY0-NEXT:    [[TMP5012:%.*]] = load double, ptr [[DD]], align 8
+// SIMD-ONLY0-NEXT:    store double [[TMP5012]], ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END6468:%.*]]
+// SIMD-ONLY0:       if.else6467:
+// SIMD-ONLY0-NEXT:    [[TMP5013:%.*]] = load double, ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    store double [[TMP5013]], ptr [[DV]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END6468]]
+// SIMD-ONLY0:       if.end6468:
+// SIMD-ONLY0-NEXT:    [[TMP5014:%.*]] = load double, ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP5015:%.*]] = load double, ptr [[DE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP6469:%.*]] = fcmp oeq double [[TMP5014]], [[TMP5015]]
+// SIMD-ONLY0-NEXT:    [[CONV6470:%.*]] = zext i1 [[CMP6469]] to i32
+// SIMD-ONLY0-NEXT:    store i32 [[CONV6470]], ptr [[IR]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP5016:%.*]] = load i32, ptr [[IR]], align 4
+// SIMD-ONLY0-NEXT:    [[TOBOOL6471:%.*]] = icmp ne i32 [[TMP5016]], 0
+// SIMD-ONLY0-NEXT:    br i1 [[TOBOOL6471]], label [[IF_THEN6472:%.*]], label [[IF_END6473:%.*]]
+// SIMD-ONLY0:       if.then6472:
+// SIMD-ONLY0-NEXT:    [[TMP5017:%.*]] = load double, ptr [[DD]], align 8
+// SIMD-ONLY0-NEXT:    store double [[TMP5017]], ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END6473]]
+// SIMD-ONLY0:       if.end6473:
+// SIMD-ONLY0-NEXT:    [[TMP5018:%.*]] = load double, ptr [[DE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP5019:%.*]] = load double, ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP6474:%.*]] = fcmp oeq double [[TMP5018]], [[TMP5019]]
+// SIMD-ONLY0-NEXT:    [[CONV6475:%.*]] = zext i1 [[CMP6474]] to i32
+// SIMD-ONLY0-NEXT:    store i32 [[CONV6475]], ptr [[IR]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP5020:%.*]] = load i32, ptr [[IR]], align 4
+// SIMD-ONLY0-NEXT:    [[TOBOOL6476:%.*]] = icmp ne i32 [[TMP5020]], 0
+// SIMD-ONLY0-NEXT:    br i1 [[TOBOOL6476]], label [[IF_THEN6477:%.*]], label [[IF_END6478:%.*]]
+// SIMD-ONLY0:       if.then6477:
+// SIMD-ONLY0-NEXT:    [[TMP5021:%.*]] = load double, ptr [[DD]], align 8
+// SIMD-ONLY0-NEXT:    store double [[TMP5021]], ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END6478]]
+// SIMD-ONLY0:       if.end6478:
+// SIMD-ONLY0-NEXT:    [[TMP5022:%.*]] = load double, ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP5023:%.*]] = load double, ptr [[DE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP6479:%.*]] = fcmp oeq double [[TMP5022]], [[TMP5023]]
+// SIMD-ONLY0-NEXT:    [[CONV6480:%.*]] = zext i1 [[CMP6479]] to i32
+// SIMD-ONLY0-NEXT:    store i32 [[CONV6480]], ptr [[IR]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP5024:%.*]] = load i32, ptr [[IR]], align 4
+// SIMD-ONLY0-NEXT:    [[TOBOOL6481:%.*]] = icmp ne i32 [[TMP5024]], 0
+// SIMD-ONLY0-NEXT:    br i1 [[TOBOOL6481]], label [[IF_THEN6482:%.*]], label [[IF_ELSE6483:%.*]]
+// SIMD-ONLY0:       if.then6482:
+// SIMD-ONLY0-NEXT:    [[TMP5025:%.*]] = load double, ptr [[DD]], align 8
+// SIMD-ONLY0-NEXT:    store double [[TMP5025]], ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END6484:%.*]]
+// SIMD-ONLY0:       if.else6483:
+// SIMD-ONLY0-NEXT:    [[TMP5026:%.*]] = load double, ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    store double [[TMP5026]], ptr [[DV]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END6484]]
+// SIMD-ONLY0:       if.end6484:
+// SIMD-ONLY0-NEXT:    [[TMP5027:%.*]] = load double, ptr [[DE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP5028:%.*]] = load double, ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP6485:%.*]] = fcmp oeq double [[TMP5027]], [[TMP5028]]
+// SIMD-ONLY0-NEXT:    [[CONV6486:%.*]] = zext i1 [[CMP6485]] to i32
+// SIMD-ONLY0-NEXT:    store i32 [[CONV6486]], ptr [[IR]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP5029:%.*]] = load i32, ptr [[IR]], align 4
+// SIMD-ONLY0-NEXT:    [[TOBOOL6487:%.*]] = icmp ne i32 [[TMP5029]], 0
+// SIMD-ONLY0-NEXT:    br i1 [[TOBOOL6487]], label [[IF_THEN6488:%.*]], label [[IF_ELSE6489:%.*]]
+// SIMD-ONLY0:       if.then6488:
+// SIMD-ONLY0-NEXT:    [[TMP5030:%.*]] = load double, ptr [[DD]], align 8
+// SIMD-ONLY0-NEXT:    store double [[TMP5030]], ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END6490:%.*]]
+// SIMD-ONLY0:       if.else6489:
+// SIMD-ONLY0-NEXT:    [[TMP5031:%.*]] = load double, ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    store double [[TMP5031]], ptr [[DV]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END6490]]
+// SIMD-ONLY0:       if.end6490:
+// SIMD-ONLY0-NEXT:    [[TMP5032:%.*]] = load double, ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    store double [[TMP5032]], ptr [[DV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP5033:%.*]] = load double, ptr [[DE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP5034:%.*]] = load double, ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP6491:%.*]] = fcmp ogt double [[TMP5033]], [[TMP5034]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP6491]], label [[IF_THEN6493:%.*]], label [[IF_END6494:%.*]]
+// SIMD-ONLY0:       if.then6493:
+// SIMD-ONLY0-NEXT:    [[TMP5035:%.*]] = load double, ptr [[DE]], align 8
+// SIMD-ONLY0-NEXT:    store double [[TMP5035]], ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END6494]]
+// SIMD-ONLY0:       if.end6494:
+// SIMD-ONLY0-NEXT:    [[TMP5036:%.*]] = load double, ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    store double [[TMP5036]], ptr [[DV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP5037:%.*]] = load double, ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP5038:%.*]] = load double, ptr [[DE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP6495:%.*]] = fcmp ogt double [[TMP5037]], [[TMP5038]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP6495]], label [[IF_THEN6497:%.*]], label [[IF_END6498:%.*]]
+// SIMD-ONLY0:       if.then6497:
+// SIMD-ONLY0-NEXT:    [[TMP5039:%.*]] = load double, ptr [[DE]], align 8
+// SIMD-ONLY0-NEXT:    store double [[TMP5039]], ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END6498]]
+// SIMD-ONLY0:       if.end6498:
+// SIMD-ONLY0-NEXT:    [[TMP5040:%.*]] = load double, ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    store double [[TMP5040]], ptr [[DV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP5041:%.*]] = load double, ptr [[DE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP5042:%.*]] = load double, ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP6499:%.*]] = fcmp olt double [[TMP5041]], [[TMP5042]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP6499]], label [[IF_THEN6501:%.*]], label [[IF_END6502:%.*]]
+// SIMD-ONLY0:       if.then6501:
+// SIMD-ONLY0-NEXT:    [[TMP5043:%.*]] = load double, ptr [[DE]], align 8
+// SIMD-ONLY0-NEXT:    store double [[TMP5043]], ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END6502]]
+// SIMD-ONLY0:       if.end6502:
+// SIMD-ONLY0-NEXT:    [[TMP5044:%.*]] = load double, ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    store double [[TMP5044]], ptr [[DV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP5045:%.*]] = load double, ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP5046:%.*]] = load double, ptr [[DE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP6503:%.*]] = fcmp olt double [[TMP5045]], [[TMP5046]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP6503]], label [[IF_THEN6505:%.*]], label [[IF_END6506:%.*]]
+// SIMD-ONLY0:       if.then6505:
+// SIMD-ONLY0-NEXT:    [[TMP5047:%.*]] = load double, ptr [[DE]], align 8
+// SIMD-ONLY0-NEXT:    store double [[TMP5047]], ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END6506]]
+// SIMD-ONLY0:       if.end6506:
+// SIMD-ONLY0-NEXT:    [[TMP5048:%.*]] = load double, ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    store double [[TMP5048]], ptr [[DV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP5049:%.*]] = load double, ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP5050:%.*]] = load double, ptr [[DE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP6507:%.*]] = fcmp oeq double [[TMP5049]], [[TMP5050]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP6507]], label [[IF_THEN6509:%.*]], label [[IF_END6510:%.*]]
+// SIMD-ONLY0:       if.then6509:
+// SIMD-ONLY0-NEXT:    [[TMP5051:%.*]] = load double, ptr [[DD]], align 8
+// SIMD-ONLY0-NEXT:    store double [[TMP5051]], ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END6510]]
+// SIMD-ONLY0:       if.end6510:
+// SIMD-ONLY0-NEXT:    [[TMP5052:%.*]] = load double, ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    store double [[TMP5052]], ptr [[DV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP5053:%.*]] = load double, ptr [[DE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP5054:%.*]] = load double, ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP6511:%.*]] = fcmp oeq double [[TMP5053]], [[TMP5054]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP6511]], label [[IF_THEN6513:%.*]], label [[IF_END6514:%.*]]
+// SIMD-ONLY0:       if.then6513:
+// SIMD-ONLY0-NEXT:    [[TMP5055:%.*]] = load double, ptr [[DD]], align 8
+// SIMD-ONLY0-NEXT:    store double [[TMP5055]], ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END6514]]
+// SIMD-ONLY0:       if.end6514:
+// SIMD-ONLY0-NEXT:    [[TMP5056:%.*]] = load double, ptr [[DE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP5057:%.*]] = load double, ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP6515:%.*]] = fcmp ogt double [[TMP5056]], [[TMP5057]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP6515]], label [[IF_THEN6517:%.*]], label [[IF_END6518:%.*]]
+// SIMD-ONLY0:       if.then6517:
+// SIMD-ONLY0-NEXT:    [[TMP5058:%.*]] = load double, ptr [[DE]], align 8
+// SIMD-ONLY0-NEXT:    store double [[TMP5058]], ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END6518]]
+// SIMD-ONLY0:       if.end6518:
+// SIMD-ONLY0-NEXT:    [[TMP5059:%.*]] = load double, ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    store double [[TMP5059]], ptr [[DV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP5060:%.*]] = load double, ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP5061:%.*]] = load double, ptr [[DE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP6519:%.*]] = fcmp ogt double [[TMP5060]], [[TMP5061]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP6519]], label [[IF_THEN6521:%.*]], label [[IF_END6522:%.*]]
+// SIMD-ONLY0:       if.then6521:
+// SIMD-ONLY0-NEXT:    [[TMP5062:%.*]] = load double, ptr [[DE]], align 8
+// SIMD-ONLY0-NEXT:    store double [[TMP5062]], ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END6522]]
+// SIMD-ONLY0:       if.end6522:
+// SIMD-ONLY0-NEXT:    [[TMP5063:%.*]] = load double, ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    store double [[TMP5063]], ptr [[DV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP5064:%.*]] = load double, ptr [[DE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP5065:%.*]] = load double, ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP6523:%.*]] = fcmp olt double [[TMP5064]], [[TMP5065]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP6523]], label [[IF_THEN6525:%.*]], label [[IF_END6526:%.*]]
+// SIMD-ONLY0:       if.then6525:
+// SIMD-ONLY0-NEXT:    [[TMP5066:%.*]] = load double, ptr [[DE]], align 8
+// SIMD-ONLY0-NEXT:    store double [[TMP5066]], ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END6526]]
+// SIMD-ONLY0:       if.end6526:
+// SIMD-ONLY0-NEXT:    [[TMP5067:%.*]] = load double, ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    store double [[TMP5067]], ptr [[DV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP5068:%.*]] = load double, ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP5069:%.*]] = load double, ptr [[DE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP6527:%.*]] = fcmp olt double [[TMP5068]], [[TMP5069]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP6527]], label [[IF_THEN6529:%.*]], label [[IF_END6530:%.*]]
+// SIMD-ONLY0:       if.then6529:
+// SIMD-ONLY0-NEXT:    [[TMP5070:%.*]] = load double, ptr [[DE]], align 8
+// SIMD-ONLY0-NEXT:    store double [[TMP5070]], ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END6530]]
+// SIMD-ONLY0:       if.end6530:
+// SIMD-ONLY0-NEXT:    [[TMP5071:%.*]] = load double, ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    store double [[TMP5071]], ptr [[DV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP5072:%.*]] = load double, ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP5073:%.*]] = load double, ptr [[DE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP6531:%.*]] = fcmp oeq double [[TMP5072]], [[TMP5073]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP6531]], label [[IF_THEN6533:%.*]], label [[IF_END6534:%.*]]
+// SIMD-ONLY0:       if.then6533:
+// SIMD-ONLY0-NEXT:    [[TMP5074:%.*]] = load double, ptr [[DD]], align 8
+// SIMD-ONLY0-NEXT:    store double [[TMP5074]], ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END6534]]
+// SIMD-ONLY0:       if.end6534:
+// SIMD-ONLY0-NEXT:    [[TMP5075:%.*]] = load double, ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    store double [[TMP5075]], ptr [[DV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP5076:%.*]] = load double, ptr [[DE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP5077:%.*]] = load double, ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP6535:%.*]] = fcmp oeq double [[TMP5076]], [[TMP5077]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP6535]], label [[IF_THEN6537:%.*]], label [[IF_END6538:%.*]]
+// SIMD-ONLY0:       if.then6537:
+// SIMD-ONLY0-NEXT:    [[TMP5078:%.*]] = load double, ptr [[DD]], align 8
+// SIMD-ONLY0-NEXT:    store double [[TMP5078]], ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END6538]]
+// SIMD-ONLY0:       if.end6538:
+// SIMD-ONLY0-NEXT:    [[TMP5079:%.*]] = load double, ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    store double [[TMP5079]], ptr [[DV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP5080:%.*]] = load double, ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP5081:%.*]] = load double, ptr [[DE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP6539:%.*]] = fcmp oeq double [[TMP5080]], [[TMP5081]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP6539]], label [[IF_THEN6541:%.*]], label [[IF_ELSE6542:%.*]]
+// SIMD-ONLY0:       if.then6541:
+// SIMD-ONLY0-NEXT:    [[TMP5082:%.*]] = load double, ptr [[DD]], align 8
+// SIMD-ONLY0-NEXT:    store double [[TMP5082]], ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END6543:%.*]]
+// SIMD-ONLY0:       if.else6542:
+// SIMD-ONLY0-NEXT:    [[TMP5083:%.*]] = load double, ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    store double [[TMP5083]], ptr [[DV]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END6543]]
+// SIMD-ONLY0:       if.end6543:
+// SIMD-ONLY0-NEXT:    [[TMP5084:%.*]] = load double, ptr [[DE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP5085:%.*]] = load double, ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP6544:%.*]] = fcmp oeq double [[TMP5084]], [[TMP5085]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP6544]], label [[IF_THEN6546:%.*]], label [[IF_ELSE6547:%.*]]
+// SIMD-ONLY0:       if.then6546:
+// SIMD-ONLY0-NEXT:    [[TMP5086:%.*]] = load double, ptr [[DD]], align 8
+// SIMD-ONLY0-NEXT:    store double [[TMP5086]], ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END6548:%.*]]
+// SIMD-ONLY0:       if.else6547:
+// SIMD-ONLY0-NEXT:    [[TMP5087:%.*]] = load double, ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    store double [[TMP5087]], ptr [[DV]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END6548]]
+// SIMD-ONLY0:       if.end6548:
+// SIMD-ONLY0-NEXT:    [[TMP5088:%.*]] = load double, ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP5089:%.*]] = load double, ptr [[DE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP6549:%.*]] = fcmp oeq double [[TMP5088]], [[TMP5089]]
+// SIMD-ONLY0-NEXT:    [[CONV6550:%.*]] = zext i1 [[CMP6549]] to i32
+// SIMD-ONLY0-NEXT:    store i32 [[CONV6550]], ptr [[IR]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP5090:%.*]] = load i32, ptr [[IR]], align 4
+// SIMD-ONLY0-NEXT:    [[TOBOOL6551:%.*]] = icmp ne i32 [[TMP5090]], 0
+// SIMD-ONLY0-NEXT:    br i1 [[TOBOOL6551]], label [[IF_THEN6552:%.*]], label [[IF_END6553:%.*]]
+// SIMD-ONLY0:       if.then6552:
+// SIMD-ONLY0-NEXT:    [[TMP5091:%.*]] = load double, ptr [[DD]], align 8
+// SIMD-ONLY0-NEXT:    store double [[TMP5091]], ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END6553]]
+// SIMD-ONLY0:       if.end6553:
+// SIMD-ONLY0-NEXT:    [[TMP5092:%.*]] = load double, ptr [[DE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP5093:%.*]] = load double, ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP6554:%.*]] = fcmp oeq double [[TMP5092]], [[TMP5093]]
+// SIMD-ONLY0-NEXT:    [[CONV6555:%.*]] = zext i1 [[CMP6554]] to i32
+// SIMD-ONLY0-NEXT:    store i32 [[CONV6555]], ptr [[IR]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP5094:%.*]] = load i32, ptr [[IR]], align 4
+// SIMD-ONLY0-NEXT:    [[TOBOOL6556:%.*]] = icmp ne i32 [[TMP5094]], 0
+// SIMD-ONLY0-NEXT:    br i1 [[TOBOOL6556]], label [[IF_THEN6557:%.*]], label [[IF_END6558:%.*]]
+// SIMD-ONLY0:       if.then6557:
+// SIMD-ONLY0-NEXT:    [[TMP5095:%.*]] = load double, ptr [[DD]], align 8
+// SIMD-ONLY0-NEXT:    store double [[TMP5095]], ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END6558]]
+// SIMD-ONLY0:       if.end6558:
+// SIMD-ONLY0-NEXT:    [[TMP5096:%.*]] = load double, ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP5097:%.*]] = load double, ptr [[DE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP6559:%.*]] = fcmp oeq double [[TMP5096]], [[TMP5097]]
+// SIMD-ONLY0-NEXT:    [[CONV6560:%.*]] = zext i1 [[CMP6559]] to i32
+// SIMD-ONLY0-NEXT:    store i32 [[CONV6560]], ptr [[IR]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP5098:%.*]] = load i32, ptr [[IR]], align 4
+// SIMD-ONLY0-NEXT:    [[TOBOOL6561:%.*]] = icmp ne i32 [[TMP5098]], 0
+// SIMD-ONLY0-NEXT:    br i1 [[TOBOOL6561]], label [[IF_THEN6562:%.*]], label [[IF_ELSE6563:%.*]]
+// SIMD-ONLY0:       if.then6562:
+// SIMD-ONLY0-NEXT:    [[TMP5099:%.*]] = load double, ptr [[DD]], align 8
+// SIMD-ONLY0-NEXT:    store double [[TMP5099]], ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END6564:%.*]]
+// SIMD-ONLY0:       if.else6563:
+// SIMD-ONLY0-NEXT:    [[TMP5100:%.*]] = load double, ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    store double [[TMP5100]], ptr [[DV]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END6564]]
+// SIMD-ONLY0:       if.end6564:
+// SIMD-ONLY0-NEXT:    [[TMP5101:%.*]] = load double, ptr [[DE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP5102:%.*]] = load double, ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP6565:%.*]] = fcmp oeq double [[TMP5101]], [[TMP5102]]
+// SIMD-ONLY0-NEXT:    [[CONV6566:%.*]] = zext i1 [[CMP6565]] to i32
+// SIMD-ONLY0-NEXT:    store i32 [[CONV6566]], ptr [[IR]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP5103:%.*]] = load i32, ptr [[IR]], align 4
+// SIMD-ONLY0-NEXT:    [[TOBOOL6567:%.*]] = icmp ne i32 [[TMP5103]], 0
+// SIMD-ONLY0-NEXT:    br i1 [[TOBOOL6567]], label [[IF_THEN6568:%.*]], label [[IF_ELSE6569:%.*]]
+// SIMD-ONLY0:       if.then6568:
+// SIMD-ONLY0-NEXT:    [[TMP5104:%.*]] = load double, ptr [[DD]], align 8
+// SIMD-ONLY0-NEXT:    store double [[TMP5104]], ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END6570:%.*]]
+// SIMD-ONLY0:       if.else6569:
+// SIMD-ONLY0-NEXT:    [[TMP5105:%.*]] = load double, ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    store double [[TMP5105]], ptr [[DV]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END6570]]
+// SIMD-ONLY0:       if.end6570:
+// SIMD-ONLY0-NEXT:    [[TMP5106:%.*]] = load double, ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    store double [[TMP5106]], ptr [[DV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP5107:%.*]] = load double, ptr [[DE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP5108:%.*]] = load double, ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP6571:%.*]] = fcmp ogt double [[TMP5107]], [[TMP5108]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP6571]], label [[IF_THEN6573:%.*]], label [[IF_END6574:%.*]]
+// SIMD-ONLY0:       if.then6573:
+// SIMD-ONLY0-NEXT:    [[TMP5109:%.*]] = load double, ptr [[DE]], align 8
+// SIMD-ONLY0-NEXT:    store double [[TMP5109]], ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END6574]]
+// SIMD-ONLY0:       if.end6574:
+// SIMD-ONLY0-NEXT:    [[TMP5110:%.*]] = load double, ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    store double [[TMP5110]], ptr [[DV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP5111:%.*]] = load double, ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP5112:%.*]] = load double, ptr [[DE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP6575:%.*]] = fcmp ogt double [[TMP5111]], [[TMP5112]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP6575]], label [[IF_THEN6577:%.*]], label [[IF_END6578:%.*]]
+// SIMD-ONLY0:       if.then6577:
+// SIMD-ONLY0-NEXT:    [[TMP5113:%.*]] = load double, ptr [[DE]], align 8
+// SIMD-ONLY0-NEXT:    store double [[TMP5113]], ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END6578]]
+// SIMD-ONLY0:       if.end6578:
+// SIMD-ONLY0-NEXT:    [[TMP5114:%.*]] = load double, ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    store double [[TMP5114]], ptr [[DV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP5115:%.*]] = load double, ptr [[DE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP5116:%.*]] = load double, ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP6579:%.*]] = fcmp olt double [[TMP5115]], [[TMP5116]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP6579]], label [[IF_THEN6581:%.*]], label [[IF_END6582:%.*]]
+// SIMD-ONLY0:       if.then6581:
+// SIMD-ONLY0-NEXT:    [[TMP5117:%.*]] = load double, ptr [[DE]], align 8
+// SIMD-ONLY0-NEXT:    store double [[TMP5117]], ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END6582]]
+// SIMD-ONLY0:       if.end6582:
+// SIMD-ONLY0-NEXT:    [[TMP5118:%.*]] = load double, ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    store double [[TMP5118]], ptr [[DV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP5119:%.*]] = load double, ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP5120:%.*]] = load double, ptr [[DE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP6583:%.*]] = fcmp olt double [[TMP5119]], [[TMP5120]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP6583]], label [[IF_THEN6585:%.*]], label [[IF_END6586:%.*]]
+// SIMD-ONLY0:       if.then6585:
+// SIMD-ONLY0-NEXT:    [[TMP5121:%.*]] = load double, ptr [[DE]], align 8
+// SIMD-ONLY0-NEXT:    store double [[TMP5121]], ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END6586]]
+// SIMD-ONLY0:       if.end6586:
+// SIMD-ONLY0-NEXT:    [[TMP5122:%.*]] = load double, ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    store double [[TMP5122]], ptr [[DV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP5123:%.*]] = load double, ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP5124:%.*]] = load double, ptr [[DE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP6587:%.*]] = fcmp oeq double [[TMP5123]], [[TMP5124]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP6587]], label [[IF_THEN6589:%.*]], label [[IF_END6590:%.*]]
+// SIMD-ONLY0:       if.then6589:
+// SIMD-ONLY0-NEXT:    [[TMP5125:%.*]] = load double, ptr [[DD]], align 8
+// SIMD-ONLY0-NEXT:    store double [[TMP5125]], ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END6590]]
+// SIMD-ONLY0:       if.end6590:
+// SIMD-ONLY0-NEXT:    [[TMP5126:%.*]] = load double, ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    store double [[TMP5126]], ptr [[DV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP5127:%.*]] = load double, ptr [[DE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP5128:%.*]] = load double, ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP6591:%.*]] = fcmp oeq double [[TMP5127]], [[TMP5128]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP6591]], label [[IF_THEN6593:%.*]], label [[IF_END6594:%.*]]
+// SIMD-ONLY0:       if.then6593:
+// SIMD-ONLY0-NEXT:    [[TMP5129:%.*]] = load double, ptr [[DD]], align 8
+// SIMD-ONLY0-NEXT:    store double [[TMP5129]], ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END6594]]
+// SIMD-ONLY0:       if.end6594:
+// SIMD-ONLY0-NEXT:    [[TMP5130:%.*]] = load double, ptr [[DE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP5131:%.*]] = load double, ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP6595:%.*]] = fcmp ogt double [[TMP5130]], [[TMP5131]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP6595]], label [[IF_THEN6597:%.*]], label [[IF_END6598:%.*]]
+// SIMD-ONLY0:       if.then6597:
+// SIMD-ONLY0-NEXT:    [[TMP5132:%.*]] = load double, ptr [[DE]], align 8
+// SIMD-ONLY0-NEXT:    store double [[TMP5132]], ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END6598]]
+// SIMD-ONLY0:       if.end6598:
+// SIMD-ONLY0-NEXT:    [[TMP5133:%.*]] = load double, ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    store double [[TMP5133]], ptr [[DV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP5134:%.*]] = load double, ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP5135:%.*]] = load double, ptr [[DE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP6599:%.*]] = fcmp ogt double [[TMP5134]], [[TMP5135]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP6599]], label [[IF_THEN6601:%.*]], label [[IF_END6602:%.*]]
+// SIMD-ONLY0:       if.then6601:
+// SIMD-ONLY0-NEXT:    [[TMP5136:%.*]] = load double, ptr [[DE]], align 8
+// SIMD-ONLY0-NEXT:    store double [[TMP5136]], ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END6602]]
+// SIMD-ONLY0:       if.end6602:
+// SIMD-ONLY0-NEXT:    [[TMP5137:%.*]] = load double, ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    store double [[TMP5137]], ptr [[DV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP5138:%.*]] = load double, ptr [[DE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP5139:%.*]] = load double, ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP6603:%.*]] = fcmp olt double [[TMP5138]], [[TMP5139]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP6603]], label [[IF_THEN6605:%.*]], label [[IF_END6606:%.*]]
+// SIMD-ONLY0:       if.then6605:
+// SIMD-ONLY0-NEXT:    [[TMP5140:%.*]] = load double, ptr [[DE]], align 8
+// SIMD-ONLY0-NEXT:    store double [[TMP5140]], ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END6606]]
+// SIMD-ONLY0:       if.end6606:
+// SIMD-ONLY0-NEXT:    [[TMP5141:%.*]] = load double, ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    store double [[TMP5141]], ptr [[DV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP5142:%.*]] = load double, ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP5143:%.*]] = load double, ptr [[DE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP6607:%.*]] = fcmp olt double [[TMP5142]], [[TMP5143]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP6607]], label [[IF_THEN6609:%.*]], label [[IF_END6610:%.*]]
+// SIMD-ONLY0:       if.then6609:
+// SIMD-ONLY0-NEXT:    [[TMP5144:%.*]] = load double, ptr [[DE]], align 8
+// SIMD-ONLY0-NEXT:    store double [[TMP5144]], ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END6610]]
+// SIMD-ONLY0:       if.end6610:
+// SIMD-ONLY0-NEXT:    [[TMP5145:%.*]] = load double, ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    store double [[TMP5145]], ptr [[DV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP5146:%.*]] = load double, ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP5147:%.*]] = load double, ptr [[DE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP6611:%.*]] = fcmp oeq double [[TMP5146]], [[TMP5147]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP6611]], label [[IF_THEN6613:%.*]], label [[IF_END6614:%.*]]
+// SIMD-ONLY0:       if.then6613:
+// SIMD-ONLY0-NEXT:    [[TMP5148:%.*]] = load double, ptr [[DD]], align 8
+// SIMD-ONLY0-NEXT:    store double [[TMP5148]], ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END6614]]
+// SIMD-ONLY0:       if.end6614:
+// SIMD-ONLY0-NEXT:    [[TMP5149:%.*]] = load double, ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    store double [[TMP5149]], ptr [[DV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP5150:%.*]] = load double, ptr [[DE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP5151:%.*]] = load double, ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP6615:%.*]] = fcmp oeq double [[TMP5150]], [[TMP5151]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP6615]], label [[IF_THEN6617:%.*]], label [[IF_END6618:%.*]]
+// SIMD-ONLY0:       if.then6617:
+// SIMD-ONLY0-NEXT:    [[TMP5152:%.*]] = load double, ptr [[DD]], align 8
+// SIMD-ONLY0-NEXT:    store double [[TMP5152]], ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END6618]]
+// SIMD-ONLY0:       if.end6618:
+// SIMD-ONLY0-NEXT:    [[TMP5153:%.*]] = load double, ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    store double [[TMP5153]], ptr [[DV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP5154:%.*]] = load double, ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP5155:%.*]] = load double, ptr [[DE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP6619:%.*]] = fcmp oeq double [[TMP5154]], [[TMP5155]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP6619]], label [[IF_THEN6621:%.*]], label [[IF_ELSE6622:%.*]]
+// SIMD-ONLY0:       if.then6621:
+// SIMD-ONLY0-NEXT:    [[TMP5156:%.*]] = load double, ptr [[DD]], align 8
+// SIMD-ONLY0-NEXT:    store double [[TMP5156]], ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END6623:%.*]]
+// SIMD-ONLY0:       if.else6622:
+// SIMD-ONLY0-NEXT:    [[TMP5157:%.*]] = load double, ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    store double [[TMP5157]], ptr [[DV]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END6623]]
+// SIMD-ONLY0:       if.end6623:
+// SIMD-ONLY0-NEXT:    [[TMP5158:%.*]] = load double, ptr [[DE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP5159:%.*]] = load double, ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP6624:%.*]] = fcmp oeq double [[TMP5158]], [[TMP5159]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP6624]], label [[IF_THEN6626:%.*]], label [[IF_ELSE6627:%.*]]
+// SIMD-ONLY0:       if.then6626:
+// SIMD-ONLY0-NEXT:    [[TMP5160:%.*]] = load double, ptr [[DD]], align 8
+// SIMD-ONLY0-NEXT:    store double [[TMP5160]], ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END6628:%.*]]
+// SIMD-ONLY0:       if.else6627:
+// SIMD-ONLY0-NEXT:    [[TMP5161:%.*]] = load double, ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    store double [[TMP5161]], ptr [[DV]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END6628]]
+// SIMD-ONLY0:       if.end6628:
+// SIMD-ONLY0-NEXT:    [[TMP5162:%.*]] = load double, ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP5163:%.*]] = load double, ptr [[DE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP6629:%.*]] = fcmp oeq double [[TMP5162]], [[TMP5163]]
+// SIMD-ONLY0-NEXT:    [[CONV6630:%.*]] = zext i1 [[CMP6629]] to i32
+// SIMD-ONLY0-NEXT:    store i32 [[CONV6630]], ptr [[IR]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP5164:%.*]] = load i32, ptr [[IR]], align 4
+// SIMD-ONLY0-NEXT:    [[TOBOOL6631:%.*]] = icmp ne i32 [[TMP5164]], 0
+// SIMD-ONLY0-NEXT:    br i1 [[TOBOOL6631]], label [[IF_THEN6632:%.*]], label [[IF_END6633:%.*]]
+// SIMD-ONLY0:       if.then6632:
+// SIMD-ONLY0-NEXT:    [[TMP5165:%.*]] = load double, ptr [[DD]], align 8
+// SIMD-ONLY0-NEXT:    store double [[TMP5165]], ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END6633]]
+// SIMD-ONLY0:       if.end6633:
+// SIMD-ONLY0-NEXT:    [[TMP5166:%.*]] = load double, ptr [[DE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP5167:%.*]] = load double, ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP6634:%.*]] = fcmp oeq double [[TMP5166]], [[TMP5167]]
+// SIMD-ONLY0-NEXT:    [[CONV6635:%.*]] = zext i1 [[CMP6634]] to i32
+// SIMD-ONLY0-NEXT:    store i32 [[CONV6635]], ptr [[IR]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP5168:%.*]] = load i32, ptr [[IR]], align 4
+// SIMD-ONLY0-NEXT:    [[TOBOOL6636:%.*]] = icmp ne i32 [[TMP5168]], 0
+// SIMD-ONLY0-NEXT:    br i1 [[TOBOOL6636]], label [[IF_THEN6637:%.*]], label [[IF_END6638:%.*]]
+// SIMD-ONLY0:       if.then6637:
+// SIMD-ONLY0-NEXT:    [[TMP5169:%.*]] = load double, ptr [[DD]], align 8
+// SIMD-ONLY0-NEXT:    store double [[TMP5169]], ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END6638]]
+// SIMD-ONLY0:       if.end6638:
+// SIMD-ONLY0-NEXT:    [[TMP5170:%.*]] = load double, ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP5171:%.*]] = load double, ptr [[DE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP6639:%.*]] = fcmp oeq double [[TMP5170]], [[TMP5171]]
+// SIMD-ONLY0-NEXT:    [[CONV6640:%.*]] = zext i1 [[CMP6639]] to i32
+// SIMD-ONLY0-NEXT:    store i32 [[CONV6640]], ptr [[IR]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP5172:%.*]] = load i32, ptr [[IR]], align 4
+// SIMD-ONLY0-NEXT:    [[TOBOOL6641:%.*]] = icmp ne i32 [[TMP5172]], 0
+// SIMD-ONLY0-NEXT:    br i1 [[TOBOOL6641]], label [[IF_THEN6642:%.*]], label [[IF_ELSE6643:%.*]]
+// SIMD-ONLY0:       if.then6642:
+// SIMD-ONLY0-NEXT:    [[TMP5173:%.*]] = load double, ptr [[DD]], align 8
+// SIMD-ONLY0-NEXT:    store double [[TMP5173]], ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END6644:%.*]]
+// SIMD-ONLY0:       if.else6643:
+// SIMD-ONLY0-NEXT:    [[TMP5174:%.*]] = load double, ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    store double [[TMP5174]], ptr [[DV]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END6644]]
+// SIMD-ONLY0:       if.end6644:
+// SIMD-ONLY0-NEXT:    [[TMP5175:%.*]] = load double, ptr [[DE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP5176:%.*]] = load double, ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP6645:%.*]] = fcmp oeq double [[TMP5175]], [[TMP5176]]
+// SIMD-ONLY0-NEXT:    [[CONV6646:%.*]] = zext i1 [[CMP6645]] to i32
+// SIMD-ONLY0-NEXT:    store i32 [[CONV6646]], ptr [[IR]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP5177:%.*]] = load i32, ptr [[IR]], align 4
+// SIMD-ONLY0-NEXT:    [[TOBOOL6647:%.*]] = icmp ne i32 [[TMP5177]], 0
+// SIMD-ONLY0-NEXT:    br i1 [[TOBOOL6647]], label [[IF_THEN6648:%.*]], label [[IF_ELSE6649:%.*]]
+// SIMD-ONLY0:       if.then6648:
+// SIMD-ONLY0-NEXT:    [[TMP5178:%.*]] = load double, ptr [[DD]], align 8
+// SIMD-ONLY0-NEXT:    store double [[TMP5178]], ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END6650:%.*]]
+// SIMD-ONLY0:       if.else6649:
+// SIMD-ONLY0-NEXT:    [[TMP5179:%.*]] = load double, ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    store double [[TMP5179]], ptr [[DV]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END6650]]
+// SIMD-ONLY0:       if.end6650:
+// SIMD-ONLY0-NEXT:    [[TMP5180:%.*]] = load double, ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    store double [[TMP5180]], ptr [[DV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP5181:%.*]] = load double, ptr [[DE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP5182:%.*]] = load double, ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP6651:%.*]] = fcmp ogt double [[TMP5181]], [[TMP5182]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP6651]], label [[IF_THEN6653:%.*]], label [[IF_END6654:%.*]]
+// SIMD-ONLY0:       if.then6653:
+// SIMD-ONLY0-NEXT:    [[TMP5183:%.*]] = load double, ptr [[DE]], align 8
+// SIMD-ONLY0-NEXT:    store double [[TMP5183]], ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END6654]]
+// SIMD-ONLY0:       if.end6654:
+// SIMD-ONLY0-NEXT:    [[TMP5184:%.*]] = load double, ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    store double [[TMP5184]], ptr [[DV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP5185:%.*]] = load double, ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP5186:%.*]] = load double, ptr [[DE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP6655:%.*]] = fcmp ogt double [[TMP5185]], [[TMP5186]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP6655]], label [[IF_THEN6657:%.*]], label [[IF_END6658:%.*]]
+// SIMD-ONLY0:       if.then6657:
+// SIMD-ONLY0-NEXT:    [[TMP5187:%.*]] = load double, ptr [[DE]], align 8
+// SIMD-ONLY0-NEXT:    store double [[TMP5187]], ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END6658]]
+// SIMD-ONLY0:       if.end6658:
+// SIMD-ONLY0-NEXT:    [[TMP5188:%.*]] = load double, ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    store double [[TMP5188]], ptr [[DV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP5189:%.*]] = load double, ptr [[DE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP5190:%.*]] = load double, ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP6659:%.*]] = fcmp olt double [[TMP5189]], [[TMP5190]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP6659]], label [[IF_THEN6661:%.*]], label [[IF_END6662:%.*]]
+// SIMD-ONLY0:       if.then6661:
+// SIMD-ONLY0-NEXT:    [[TMP5191:%.*]] = load double, ptr [[DE]], align 8
+// SIMD-ONLY0-NEXT:    store double [[TMP5191]], ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END6662]]
+// SIMD-ONLY0:       if.end6662:
+// SIMD-ONLY0-NEXT:    [[TMP5192:%.*]] = load double, ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    store double [[TMP5192]], ptr [[DV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP5193:%.*]] = load double, ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP5194:%.*]] = load double, ptr [[DE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP6663:%.*]] = fcmp olt double [[TMP5193]], [[TMP5194]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP6663]], label [[IF_THEN6665:%.*]], label [[IF_END6666:%.*]]
+// SIMD-ONLY0:       if.then6665:
+// SIMD-ONLY0-NEXT:    [[TMP5195:%.*]] = load double, ptr [[DE]], align 8
+// SIMD-ONLY0-NEXT:    store double [[TMP5195]], ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END6666]]
+// SIMD-ONLY0:       if.end6666:
+// SIMD-ONLY0-NEXT:    [[TMP5196:%.*]] = load double, ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    store double [[TMP5196]], ptr [[DV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP5197:%.*]] = load double, ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP5198:%.*]] = load double, ptr [[DE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP6667:%.*]] = fcmp oeq double [[TMP5197]], [[TMP5198]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP6667]], label [[IF_THEN6669:%.*]], label [[IF_END6670:%.*]]
+// SIMD-ONLY0:       if.then6669:
+// SIMD-ONLY0-NEXT:    [[TMP5199:%.*]] = load double, ptr [[DD]], align 8
+// SIMD-ONLY0-NEXT:    store double [[TMP5199]], ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END6670]]
+// SIMD-ONLY0:       if.end6670:
+// SIMD-ONLY0-NEXT:    [[TMP5200:%.*]] = load double, ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    store double [[TMP5200]], ptr [[DV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP5201:%.*]] = load double, ptr [[DE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP5202:%.*]] = load double, ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP6671:%.*]] = fcmp oeq double [[TMP5201]], [[TMP5202]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP6671]], label [[IF_THEN6673:%.*]], label [[IF_END6674:%.*]]
+// SIMD-ONLY0:       if.then6673:
+// SIMD-ONLY0-NEXT:    [[TMP5203:%.*]] = load double, ptr [[DD]], align 8
+// SIMD-ONLY0-NEXT:    store double [[TMP5203]], ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END6674]]
+// SIMD-ONLY0:       if.end6674:
+// SIMD-ONLY0-NEXT:    [[TMP5204:%.*]] = load double, ptr [[DE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP5205:%.*]] = load double, ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP6675:%.*]] = fcmp ogt double [[TMP5204]], [[TMP5205]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP6675]], label [[IF_THEN6677:%.*]], label [[IF_END6678:%.*]]
+// SIMD-ONLY0:       if.then6677:
+// SIMD-ONLY0-NEXT:    [[TMP5206:%.*]] = load double, ptr [[DE]], align 8
+// SIMD-ONLY0-NEXT:    store double [[TMP5206]], ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END6678]]
+// SIMD-ONLY0:       if.end6678:
+// SIMD-ONLY0-NEXT:    [[TMP5207:%.*]] = load double, ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    store double [[TMP5207]], ptr [[DV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP5208:%.*]] = load double, ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP5209:%.*]] = load double, ptr [[DE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP6679:%.*]] = fcmp ogt double [[TMP5208]], [[TMP5209]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP6679]], label [[IF_THEN6681:%.*]], label [[IF_END6682:%.*]]
+// SIMD-ONLY0:       if.then6681:
+// SIMD-ONLY0-NEXT:    [[TMP5210:%.*]] = load double, ptr [[DE]], align 8
+// SIMD-ONLY0-NEXT:    store double [[TMP5210]], ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END6682]]
+// SIMD-ONLY0:       if.end6682:
+// SIMD-ONLY0-NEXT:    [[TMP5211:%.*]] = load double, ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    store double [[TMP5211]], ptr [[DV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP5212:%.*]] = load double, ptr [[DE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP5213:%.*]] = load double, ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP6683:%.*]] = fcmp olt double [[TMP5212]], [[TMP5213]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP6683]], label [[IF_THEN6685:%.*]], label [[IF_END6686:%.*]]
+// SIMD-ONLY0:       if.then6685:
+// SIMD-ONLY0-NEXT:    [[TMP5214:%.*]] = load double, ptr [[DE]], align 8
+// SIMD-ONLY0-NEXT:    store double [[TMP5214]], ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END6686]]
+// SIMD-ONLY0:       if.end6686:
+// SIMD-ONLY0-NEXT:    [[TMP5215:%.*]] = load double, ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    store double [[TMP5215]], ptr [[DV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP5216:%.*]] = load double, ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP5217:%.*]] = load double, ptr [[DE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP6687:%.*]] = fcmp olt double [[TMP5216]], [[TMP5217]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP6687]], label [[IF_THEN6689:%.*]], label [[IF_END6690:%.*]]
+// SIMD-ONLY0:       if.then6689:
+// SIMD-ONLY0-NEXT:    [[TMP5218:%.*]] = load double, ptr [[DE]], align 8
+// SIMD-ONLY0-NEXT:    store double [[TMP5218]], ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END6690]]
+// SIMD-ONLY0:       if.end6690:
+// SIMD-ONLY0-NEXT:    [[TMP5219:%.*]] = load double, ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    store double [[TMP5219]], ptr [[DV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP5220:%.*]] = load double, ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP5221:%.*]] = load double, ptr [[DE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP6691:%.*]] = fcmp oeq double [[TMP5220]], [[TMP5221]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP6691]], label [[IF_THEN6693:%.*]], label [[IF_END6694:%.*]]
+// SIMD-ONLY0:       if.then6693:
+// SIMD-ONLY0-NEXT:    [[TMP5222:%.*]] = load double, ptr [[DD]], align 8
+// SIMD-ONLY0-NEXT:    store double [[TMP5222]], ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END6694]]
+// SIMD-ONLY0:       if.end6694:
+// SIMD-ONLY0-NEXT:    [[TMP5223:%.*]] = load double, ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    store double [[TMP5223]], ptr [[DV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP5224:%.*]] = load double, ptr [[DE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP5225:%.*]] = load double, ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP6695:%.*]] = fcmp oeq double [[TMP5224]], [[TMP5225]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP6695]], label [[IF_THEN6697:%.*]], label [[IF_END6698:%.*]]
+// SIMD-ONLY0:       if.then6697:
+// SIMD-ONLY0-NEXT:    [[TMP5226:%.*]] = load double, ptr [[DD]], align 8
+// SIMD-ONLY0-NEXT:    store double [[TMP5226]], ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END6698]]
+// SIMD-ONLY0:       if.end6698:
+// SIMD-ONLY0-NEXT:    [[TMP5227:%.*]] = load double, ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    store double [[TMP5227]], ptr [[DV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP5228:%.*]] = load double, ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP5229:%.*]] = load double, ptr [[DE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP6699:%.*]] = fcmp oeq double [[TMP5228]], [[TMP5229]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP6699]], label [[IF_THEN6701:%.*]], label [[IF_ELSE6702:%.*]]
+// SIMD-ONLY0:       if.then6701:
+// SIMD-ONLY0-NEXT:    [[TMP5230:%.*]] = load double, ptr [[DD]], align 8
+// SIMD-ONLY0-NEXT:    store double [[TMP5230]], ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END6703:%.*]]
+// SIMD-ONLY0:       if.else6702:
+// SIMD-ONLY0-NEXT:    [[TMP5231:%.*]] = load double, ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    store double [[TMP5231]], ptr [[DV]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END6703]]
+// SIMD-ONLY0:       if.end6703:
+// SIMD-ONLY0-NEXT:    [[TMP5232:%.*]] = load double, ptr [[DE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP5233:%.*]] = load double, ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP6704:%.*]] = fcmp oeq double [[TMP5232]], [[TMP5233]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP6704]], label [[IF_THEN6706:%.*]], label [[IF_ELSE6707:%.*]]
+// SIMD-ONLY0:       if.then6706:
+// SIMD-ONLY0-NEXT:    [[TMP5234:%.*]] = load double, ptr [[DD]], align 8
+// SIMD-ONLY0-NEXT:    store double [[TMP5234]], ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END6708:%.*]]
+// SIMD-ONLY0:       if.else6707:
+// SIMD-ONLY0-NEXT:    [[TMP5235:%.*]] = load double, ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    store double [[TMP5235]], ptr [[DV]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END6708]]
+// SIMD-ONLY0:       if.end6708:
+// SIMD-ONLY0-NEXT:    [[TMP5236:%.*]] = load double, ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP5237:%.*]] = load double, ptr [[DE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP6709:%.*]] = fcmp oeq double [[TMP5236]], [[TMP5237]]
+// SIMD-ONLY0-NEXT:    [[CONV6710:%.*]] = zext i1 [[CMP6709]] to i32
+// SIMD-ONLY0-NEXT:    store i32 [[CONV6710]], ptr [[IR]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP5238:%.*]] = load i32, ptr [[IR]], align 4
+// SIMD-ONLY0-NEXT:    [[TOBOOL6711:%.*]] = icmp ne i32 [[TMP5238]], 0
+// SIMD-ONLY0-NEXT:    br i1 [[TOBOOL6711]], label [[IF_THEN6712:%.*]], label [[IF_END6713:%.*]]
+// SIMD-ONLY0:       if.then6712:
+// SIMD-ONLY0-NEXT:    [[TMP5239:%.*]] = load double, ptr [[DD]], align 8
+// SIMD-ONLY0-NEXT:    store double [[TMP5239]], ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END6713]]
+// SIMD-ONLY0:       if.end6713:
+// SIMD-ONLY0-NEXT:    [[TMP5240:%.*]] = load double, ptr [[DE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP5241:%.*]] = load double, ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP6714:%.*]] = fcmp oeq double [[TMP5240]], [[TMP5241]]
+// SIMD-ONLY0-NEXT:    [[CONV6715:%.*]] = zext i1 [[CMP6714]] to i32
+// SIMD-ONLY0-NEXT:    store i32 [[CONV6715]], ptr [[IR]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP5242:%.*]] = load i32, ptr [[IR]], align 4
+// SIMD-ONLY0-NEXT:    [[TOBOOL6716:%.*]] = icmp ne i32 [[TMP5242]], 0
+// SIMD-ONLY0-NEXT:    br i1 [[TOBOOL6716]], label [[IF_THEN6717:%.*]], label [[IF_END6718:%.*]]
+// SIMD-ONLY0:       if.then6717:
+// SIMD-ONLY0-NEXT:    [[TMP5243:%.*]] = load double, ptr [[DD]], align 8
+// SIMD-ONLY0-NEXT:    store double [[TMP5243]], ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END6718]]
+// SIMD-ONLY0:       if.end6718:
+// SIMD-ONLY0-NEXT:    [[TMP5244:%.*]] = load double, ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP5245:%.*]] = load double, ptr [[DE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP6719:%.*]] = fcmp oeq double [[TMP5244]], [[TMP5245]]
+// SIMD-ONLY0-NEXT:    [[CONV6720:%.*]] = zext i1 [[CMP6719]] to i32
+// SIMD-ONLY0-NEXT:    store i32 [[CONV6720]], ptr [[IR]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP5246:%.*]] = load i32, ptr [[IR]], align 4
+// SIMD-ONLY0-NEXT:    [[TOBOOL6721:%.*]] = icmp ne i32 [[TMP5246]], 0
+// SIMD-ONLY0-NEXT:    br i1 [[TOBOOL6721]], label [[IF_THEN6722:%.*]], label [[IF_ELSE6723:%.*]]
+// SIMD-ONLY0:       if.then6722:
+// SIMD-ONLY0-NEXT:    [[TMP5247:%.*]] = load double, ptr [[DD]], align 8
+// SIMD-ONLY0-NEXT:    store double [[TMP5247]], ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END6724:%.*]]
+// SIMD-ONLY0:       if.else6723:
+// SIMD-ONLY0-NEXT:    [[TMP5248:%.*]] = load double, ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    store double [[TMP5248]], ptr [[DV]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END6724]]
+// SIMD-ONLY0:       if.end6724:
+// SIMD-ONLY0-NEXT:    [[TMP5249:%.*]] = load double, ptr [[DE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP5250:%.*]] = load double, ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP6725:%.*]] = fcmp oeq double [[TMP5249]], [[TMP5250]]
+// SIMD-ONLY0-NEXT:    [[CONV6726:%.*]] = zext i1 [[CMP6725]] to i32
+// SIMD-ONLY0-NEXT:    store i32 [[CONV6726]], ptr [[IR]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP5251:%.*]] = load i32, ptr [[IR]], align 4
+// SIMD-ONLY0-NEXT:    [[TOBOOL6727:%.*]] = icmp ne i32 [[TMP5251]], 0
+// SIMD-ONLY0-NEXT:    br i1 [[TOBOOL6727]], label [[IF_THEN6728:%.*]], label [[IF_ELSE6729:%.*]]
+// SIMD-ONLY0:       if.then6728:
+// SIMD-ONLY0-NEXT:    [[TMP5252:%.*]] = load double, ptr [[DD]], align 8
+// SIMD-ONLY0-NEXT:    store double [[TMP5252]], ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END6730:%.*]]
+// SIMD-ONLY0:       if.else6729:
+// SIMD-ONLY0-NEXT:    [[TMP5253:%.*]] = load double, ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    store double [[TMP5253]], ptr [[DV]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END6730]]
+// SIMD-ONLY0:       if.end6730:
+// SIMD-ONLY0-NEXT:    [[TMP5254:%.*]] = load double, ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    store double [[TMP5254]], ptr [[DV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP5255:%.*]] = load double, ptr [[DE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP5256:%.*]] = load double, ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP6731:%.*]] = fcmp ogt double [[TMP5255]], [[TMP5256]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP6731]], label [[IF_THEN6733:%.*]], label [[IF_END6734:%.*]]
+// SIMD-ONLY0:       if.then6733:
+// SIMD-ONLY0-NEXT:    [[TMP5257:%.*]] = load double, ptr [[DE]], align 8
+// SIMD-ONLY0-NEXT:    store double [[TMP5257]], ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END6734]]
+// SIMD-ONLY0:       if.end6734:
+// SIMD-ONLY0-NEXT:    [[TMP5258:%.*]] = load double, ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    store double [[TMP5258]], ptr [[DV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP5259:%.*]] = load double, ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP5260:%.*]] = load double, ptr [[DE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP6735:%.*]] = fcmp ogt double [[TMP5259]], [[TMP5260]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP6735]], label [[IF_THEN6737:%.*]], label [[IF_END6738:%.*]]
+// SIMD-ONLY0:       if.then6737:
+// SIMD-ONLY0-NEXT:    [[TMP5261:%.*]] = load double, ptr [[DE]], align 8
+// SIMD-ONLY0-NEXT:    store double [[TMP5261]], ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END6738]]
+// SIMD-ONLY0:       if.end6738:
+// SIMD-ONLY0-NEXT:    [[TMP5262:%.*]] = load double, ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    store double [[TMP5262]], ptr [[DV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP5263:%.*]] = load double, ptr [[DE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP5264:%.*]] = load double, ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP6739:%.*]] = fcmp olt double [[TMP5263]], [[TMP5264]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP6739]], label [[IF_THEN6741:%.*]], label [[IF_END6742:%.*]]
+// SIMD-ONLY0:       if.then6741:
+// SIMD-ONLY0-NEXT:    [[TMP5265:%.*]] = load double, ptr [[DE]], align 8
+// SIMD-ONLY0-NEXT:    store double [[TMP5265]], ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END6742]]
+// SIMD-ONLY0:       if.end6742:
+// SIMD-ONLY0-NEXT:    [[TMP5266:%.*]] = load double, ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    store double [[TMP5266]], ptr [[DV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP5267:%.*]] = load double, ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP5268:%.*]] = load double, ptr [[DE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP6743:%.*]] = fcmp olt double [[TMP5267]], [[TMP5268]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP6743]], label [[IF_THEN6745:%.*]], label [[IF_END6746:%.*]]
+// SIMD-ONLY0:       if.then6745:
+// SIMD-ONLY0-NEXT:    [[TMP5269:%.*]] = load double, ptr [[DE]], align 8
+// SIMD-ONLY0-NEXT:    store double [[TMP5269]], ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END6746]]
+// SIMD-ONLY0:       if.end6746:
+// SIMD-ONLY0-NEXT:    [[TMP5270:%.*]] = load double, ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    store double [[TMP5270]], ptr [[DV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP5271:%.*]] = load double, ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP5272:%.*]] = load double, ptr [[DE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP6747:%.*]] = fcmp oeq double [[TMP5271]], [[TMP5272]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP6747]], label [[IF_THEN6749:%.*]], label [[IF_END6750:%.*]]
+// SIMD-ONLY0:       if.then6749:
+// SIMD-ONLY0-NEXT:    [[TMP5273:%.*]] = load double, ptr [[DD]], align 8
+// SIMD-ONLY0-NEXT:    store double [[TMP5273]], ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END6750]]
+// SIMD-ONLY0:       if.end6750:
+// SIMD-ONLY0-NEXT:    [[TMP5274:%.*]] = load double, ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    store double [[TMP5274]], ptr [[DV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP5275:%.*]] = load double, ptr [[DE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP5276:%.*]] = load double, ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP6751:%.*]] = fcmp oeq double [[TMP5275]], [[TMP5276]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP6751]], label [[IF_THEN6753:%.*]], label [[IF_END6754:%.*]]
+// SIMD-ONLY0:       if.then6753:
+// SIMD-ONLY0-NEXT:    [[TMP5277:%.*]] = load double, ptr [[DD]], align 8
+// SIMD-ONLY0-NEXT:    store double [[TMP5277]], ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END6754]]
+// SIMD-ONLY0:       if.end6754:
+// SIMD-ONLY0-NEXT:    [[TMP5278:%.*]] = load double, ptr [[DE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP5279:%.*]] = load double, ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP6755:%.*]] = fcmp ogt double [[TMP5278]], [[TMP5279]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP6755]], label [[IF_THEN6757:%.*]], label [[IF_END6758:%.*]]
+// SIMD-ONLY0:       if.then6757:
+// SIMD-ONLY0-NEXT:    [[TMP5280:%.*]] = load double, ptr [[DE]], align 8
+// SIMD-ONLY0-NEXT:    store double [[TMP5280]], ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END6758]]
+// SIMD-ONLY0:       if.end6758:
+// SIMD-ONLY0-NEXT:    [[TMP5281:%.*]] = load double, ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    store double [[TMP5281]], ptr [[DV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP5282:%.*]] = load double, ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP5283:%.*]] = load double, ptr [[DE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP6759:%.*]] = fcmp ogt double [[TMP5282]], [[TMP5283]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP6759]], label [[IF_THEN6761:%.*]], label [[IF_END6762:%.*]]
+// SIMD-ONLY0:       if.then6761:
+// SIMD-ONLY0-NEXT:    [[TMP5284:%.*]] = load double, ptr [[DE]], align 8
+// SIMD-ONLY0-NEXT:    store double [[TMP5284]], ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END6762]]
+// SIMD-ONLY0:       if.end6762:
+// SIMD-ONLY0-NEXT:    [[TMP5285:%.*]] = load double, ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    store double [[TMP5285]], ptr [[DV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP5286:%.*]] = load double, ptr [[DE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP5287:%.*]] = load double, ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP6763:%.*]] = fcmp olt double [[TMP5286]], [[TMP5287]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP6763]], label [[IF_THEN6765:%.*]], label [[IF_END6766:%.*]]
+// SIMD-ONLY0:       if.then6765:
+// SIMD-ONLY0-NEXT:    [[TMP5288:%.*]] = load double, ptr [[DE]], align 8
+// SIMD-ONLY0-NEXT:    store double [[TMP5288]], ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END6766]]
+// SIMD-ONLY0:       if.end6766:
+// SIMD-ONLY0-NEXT:    [[TMP5289:%.*]] = load double, ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    store double [[TMP5289]], ptr [[DV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP5290:%.*]] = load double, ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP5291:%.*]] = load double, ptr [[DE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP6767:%.*]] = fcmp olt double [[TMP5290]], [[TMP5291]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP6767]], label [[IF_THEN6769:%.*]], label [[IF_END6770:%.*]]
+// SIMD-ONLY0:       if.then6769:
+// SIMD-ONLY0-NEXT:    [[TMP5292:%.*]] = load double, ptr [[DE]], align 8
+// SIMD-ONLY0-NEXT:    store double [[TMP5292]], ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END6770]]
+// SIMD-ONLY0:       if.end6770:
+// SIMD-ONLY0-NEXT:    [[TMP5293:%.*]] = load double, ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    store double [[TMP5293]], ptr [[DV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP5294:%.*]] = load double, ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP5295:%.*]] = load double, ptr [[DE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP6771:%.*]] = fcmp oeq double [[TMP5294]], [[TMP5295]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP6771]], label [[IF_THEN6773:%.*]], label [[IF_END6774:%.*]]
+// SIMD-ONLY0:       if.then6773:
+// SIMD-ONLY0-NEXT:    [[TMP5296:%.*]] = load double, ptr [[DD]], align 8
+// SIMD-ONLY0-NEXT:    store double [[TMP5296]], ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END6774]]
+// SIMD-ONLY0:       if.end6774:
+// SIMD-ONLY0-NEXT:    [[TMP5297:%.*]] = load double, ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    store double [[TMP5297]], ptr [[DV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP5298:%.*]] = load double, ptr [[DE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP5299:%.*]] = load double, ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP6775:%.*]] = fcmp oeq double [[TMP5298]], [[TMP5299]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP6775]], label [[IF_THEN6777:%.*]], label [[IF_END6778:%.*]]
+// SIMD-ONLY0:       if.then6777:
+// SIMD-ONLY0-NEXT:    [[TMP5300:%.*]] = load double, ptr [[DD]], align 8
+// SIMD-ONLY0-NEXT:    store double [[TMP5300]], ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END6778]]
+// SIMD-ONLY0:       if.end6778:
+// SIMD-ONLY0-NEXT:    [[TMP5301:%.*]] = load double, ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    store double [[TMP5301]], ptr [[DV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP5302:%.*]] = load double, ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP5303:%.*]] = load double, ptr [[DE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP6779:%.*]] = fcmp oeq double [[TMP5302]], [[TMP5303]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP6779]], label [[IF_THEN6781:%.*]], label [[IF_ELSE6782:%.*]]
+// SIMD-ONLY0:       if.then6781:
+// SIMD-ONLY0-NEXT:    [[TMP5304:%.*]] = load double, ptr [[DD]], align 8
+// SIMD-ONLY0-NEXT:    store double [[TMP5304]], ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END6783:%.*]]
+// SIMD-ONLY0:       if.else6782:
+// SIMD-ONLY0-NEXT:    [[TMP5305:%.*]] = load double, ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    store double [[TMP5305]], ptr [[DV]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END6783]]
+// SIMD-ONLY0:       if.end6783:
+// SIMD-ONLY0-NEXT:    [[TMP5306:%.*]] = load double, ptr [[DE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP5307:%.*]] = load double, ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP6784:%.*]] = fcmp oeq double [[TMP5306]], [[TMP5307]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP6784]], label [[IF_THEN6786:%.*]], label [[IF_ELSE6787:%.*]]
+// SIMD-ONLY0:       if.then6786:
+// SIMD-ONLY0-NEXT:    [[TMP5308:%.*]] = load double, ptr [[DD]], align 8
+// SIMD-ONLY0-NEXT:    store double [[TMP5308]], ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END6788:%.*]]
+// SIMD-ONLY0:       if.else6787:
+// SIMD-ONLY0-NEXT:    [[TMP5309:%.*]] = load double, ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    store double [[TMP5309]], ptr [[DV]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END6788]]
+// SIMD-ONLY0:       if.end6788:
+// SIMD-ONLY0-NEXT:    [[TMP5310:%.*]] = load double, ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP5311:%.*]] = load double, ptr [[DE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP6789:%.*]] = fcmp oeq double [[TMP5310]], [[TMP5311]]
+// SIMD-ONLY0-NEXT:    [[CONV6790:%.*]] = zext i1 [[CMP6789]] to i32
+// SIMD-ONLY0-NEXT:    store i32 [[CONV6790]], ptr [[IR]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP5312:%.*]] = load i32, ptr [[IR]], align 4
+// SIMD-ONLY0-NEXT:    [[TOBOOL6791:%.*]] = icmp ne i32 [[TMP5312]], 0
+// SIMD-ONLY0-NEXT:    br i1 [[TOBOOL6791]], label [[IF_THEN6792:%.*]], label [[IF_END6793:%.*]]
+// SIMD-ONLY0:       if.then6792:
+// SIMD-ONLY0-NEXT:    [[TMP5313:%.*]] = load double, ptr [[DD]], align 8
+// SIMD-ONLY0-NEXT:    store double [[TMP5313]], ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END6793]]
+// SIMD-ONLY0:       if.end6793:
+// SIMD-ONLY0-NEXT:    [[TMP5314:%.*]] = load double, ptr [[DE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP5315:%.*]] = load double, ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP6794:%.*]] = fcmp oeq double [[TMP5314]], [[TMP5315]]
+// SIMD-ONLY0-NEXT:    [[CONV6795:%.*]] = zext i1 [[CMP6794]] to i32
+// SIMD-ONLY0-NEXT:    store i32 [[CONV6795]], ptr [[IR]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP5316:%.*]] = load i32, ptr [[IR]], align 4
+// SIMD-ONLY0-NEXT:    [[TOBOOL6796:%.*]] = icmp ne i32 [[TMP5316]], 0
+// SIMD-ONLY0-NEXT:    br i1 [[TOBOOL6796]], label [[IF_THEN6797:%.*]], label [[IF_END6798:%.*]]
+// SIMD-ONLY0:       if.then6797:
+// SIMD-ONLY0-NEXT:    [[TMP5317:%.*]] = load double, ptr [[DD]], align 8
+// SIMD-ONLY0-NEXT:    store double [[TMP5317]], ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END6798]]
+// SIMD-ONLY0:       if.end6798:
+// SIMD-ONLY0-NEXT:    [[TMP5318:%.*]] = load double, ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP5319:%.*]] = load double, ptr [[DE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP6799:%.*]] = fcmp oeq double [[TMP5318]], [[TMP5319]]
+// SIMD-ONLY0-NEXT:    [[CONV6800:%.*]] = zext i1 [[CMP6799]] to i32
+// SIMD-ONLY0-NEXT:    store i32 [[CONV6800]], ptr [[IR]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP5320:%.*]] = load i32, ptr [[IR]], align 4
+// SIMD-ONLY0-NEXT:    [[TOBOOL6801:%.*]] = icmp ne i32 [[TMP5320]], 0
+// SIMD-ONLY0-NEXT:    br i1 [[TOBOOL6801]], label [[IF_THEN6802:%.*]], label [[IF_ELSE6803:%.*]]
+// SIMD-ONLY0:       if.then6802:
+// SIMD-ONLY0-NEXT:    [[TMP5321:%.*]] = load double, ptr [[DD]], align 8
+// SIMD-ONLY0-NEXT:    store double [[TMP5321]], ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END6804:%.*]]
+// SIMD-ONLY0:       if.else6803:
+// SIMD-ONLY0-NEXT:    [[TMP5322:%.*]] = load double, ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    store double [[TMP5322]], ptr [[DV]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END6804]]
+// SIMD-ONLY0:       if.end6804:
+// SIMD-ONLY0-NEXT:    [[TMP5323:%.*]] = load double, ptr [[DE]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP5324:%.*]] = load double, ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP6805:%.*]] = fcmp oeq double [[TMP5323]], [[TMP5324]]
+// SIMD-ONLY0-NEXT:    [[CONV6806:%.*]] = zext i1 [[CMP6805]] to i32
+// SIMD-ONLY0-NEXT:    store i32 [[CONV6806]], ptr [[IR]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP5325:%.*]] = load i32, ptr [[IR]], align 4
+// SIMD-ONLY0-NEXT:    [[TOBOOL6807:%.*]] = icmp ne i32 [[TMP5325]], 0
+// SIMD-ONLY0-NEXT:    br i1 [[TOBOOL6807]], label [[IF_THEN6808:%.*]], label [[IF_ELSE6809:%.*]]
+// SIMD-ONLY0:       if.then6808:
+// SIMD-ONLY0-NEXT:    [[TMP5326:%.*]] = load double, ptr [[DD]], align 8
+// SIMD-ONLY0-NEXT:    store double [[TMP5326]], ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END6810:%.*]]
+// SIMD-ONLY0:       if.else6809:
+// SIMD-ONLY0-NEXT:    [[TMP5327:%.*]] = load double, ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    store double [[TMP5327]], ptr [[DV]], align 8
+// SIMD-ONLY0-NEXT:    br label [[IF_END6810]]
+// SIMD-ONLY0:       if.end6810:
+// SIMD-ONLY0-NEXT:    ret void
+//
+//
+// SIMD-ONLY0-LABEL: @cxevd(
+// SIMD-ONLY0-NEXT:  entry:
+// SIMD-ONLY0-NEXT:    [[CX:%.*]] = alloca i8, align 1
+// SIMD-ONLY0-NEXT:    [[CV:%.*]] = alloca i8, align 1
+// SIMD-ONLY0-NEXT:    [[CE:%.*]] = alloca i8, align 1
+// SIMD-ONLY0-NEXT:    [[CD:%.*]] = alloca i8, align 1
+// SIMD-ONLY0-NEXT:    [[TMP0:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP0]], ptr [[CV]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP1:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV:%.*]] = sext i8 [[TMP1]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP2:%.*]] = load i8, ptr [[CE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV1:%.*]] = sext i8 [[TMP2]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP:%.*]] = icmp sgt i32 [[CONV]], [[CONV1]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
+// SIMD-ONLY0:       cond.true:
+// SIMD-ONLY0-NEXT:    [[TMP3:%.*]] = load i8, ptr [[CE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV3:%.*]] = sext i8 [[TMP3]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END:%.*]]
+// SIMD-ONLY0:       cond.false:
+// SIMD-ONLY0-NEXT:    [[TMP4:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV4:%.*]] = sext i8 [[TMP4]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END]]
+// SIMD-ONLY0:       cond.end:
+// SIMD-ONLY0-NEXT:    [[COND:%.*]] = phi i32 [ [[CONV3]], [[COND_TRUE]] ], [ [[CONV4]], [[COND_FALSE]] ]
+// SIMD-ONLY0-NEXT:    [[CONV5:%.*]] = trunc i32 [[COND]] to i8
+// SIMD-ONLY0-NEXT:    store i8 [[CONV5]], ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP5:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP5]], ptr [[CV]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP6:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV6:%.*]] = sext i8 [[TMP6]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP7:%.*]] = load i8, ptr [[CE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV7:%.*]] = sext i8 [[TMP7]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP8:%.*]] = icmp slt i32 [[CONV6]], [[CONV7]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP8]], label [[COND_TRUE10:%.*]], label [[COND_FALSE12:%.*]]
+// SIMD-ONLY0:       cond.true10:
+// SIMD-ONLY0-NEXT:    [[TMP8:%.*]] = load i8, ptr [[CE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV11:%.*]] = sext i8 [[TMP8]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END14:%.*]]
+// SIMD-ONLY0:       cond.false12:
+// SIMD-ONLY0-NEXT:    [[TMP9:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV13:%.*]] = sext i8 [[TMP9]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END14]]
+// SIMD-ONLY0:       cond.end14:
+// SIMD-ONLY0-NEXT:    [[COND15:%.*]] = phi i32 [ [[CONV11]], [[COND_TRUE10]] ], [ [[CONV13]], [[COND_FALSE12]] ]
+// SIMD-ONLY0-NEXT:    [[CONV16:%.*]] = trunc i32 [[COND15]] to i8
+// SIMD-ONLY0-NEXT:    store i8 [[CONV16]], ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP10:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP10]], ptr [[CV]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP11:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV17:%.*]] = sext i8 [[TMP11]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP12:%.*]] = load i8, ptr [[CE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV18:%.*]] = sext i8 [[TMP12]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP19:%.*]] = icmp eq i32 [[CONV17]], [[CONV18]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP19]], label [[COND_TRUE21:%.*]], label [[COND_FALSE23:%.*]]
+// SIMD-ONLY0:       cond.true21:
+// SIMD-ONLY0-NEXT:    [[TMP13:%.*]] = load i8, ptr [[CD]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV22:%.*]] = sext i8 [[TMP13]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END25:%.*]]
+// SIMD-ONLY0:       cond.false23:
+// SIMD-ONLY0-NEXT:    [[TMP14:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV24:%.*]] = sext i8 [[TMP14]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END25]]
+// SIMD-ONLY0:       cond.end25:
+// SIMD-ONLY0-NEXT:    [[COND26:%.*]] = phi i32 [ [[CONV22]], [[COND_TRUE21]] ], [ [[CONV24]], [[COND_FALSE23]] ]
+// SIMD-ONLY0-NEXT:    [[CONV27:%.*]] = trunc i32 [[COND26]] to i8
+// SIMD-ONLY0-NEXT:    store i8 [[CONV27]], ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP15:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV28:%.*]] = sext i8 [[TMP15]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP16:%.*]] = load i8, ptr [[CE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV29:%.*]] = sext i8 [[TMP16]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP30:%.*]] = icmp sgt i32 [[CONV28]], [[CONV29]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP30]], label [[COND_TRUE32:%.*]], label [[COND_FALSE34:%.*]]
+// SIMD-ONLY0:       cond.true32:
+// SIMD-ONLY0-NEXT:    [[TMP17:%.*]] = load i8, ptr [[CE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV33:%.*]] = sext i8 [[TMP17]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END36:%.*]]
+// SIMD-ONLY0:       cond.false34:
+// SIMD-ONLY0-NEXT:    [[TMP18:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV35:%.*]] = sext i8 [[TMP18]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END36]]
+// SIMD-ONLY0:       cond.end36:
+// SIMD-ONLY0-NEXT:    [[COND37:%.*]] = phi i32 [ [[CONV33]], [[COND_TRUE32]] ], [ [[CONV35]], [[COND_FALSE34]] ]
+// SIMD-ONLY0-NEXT:    [[CONV38:%.*]] = trunc i32 [[COND37]] to i8
+// SIMD-ONLY0-NEXT:    store i8 [[CONV38]], ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP19:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP19]], ptr [[CV]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP20:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV39:%.*]] = sext i8 [[TMP20]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP21:%.*]] = load i8, ptr [[CE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV40:%.*]] = sext i8 [[TMP21]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP41:%.*]] = icmp slt i32 [[CONV39]], [[CONV40]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP41]], label [[COND_TRUE43:%.*]], label [[COND_FALSE45:%.*]]
+// SIMD-ONLY0:       cond.true43:
+// SIMD-ONLY0-NEXT:    [[TMP22:%.*]] = load i8, ptr [[CE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV44:%.*]] = sext i8 [[TMP22]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END47:%.*]]
+// SIMD-ONLY0:       cond.false45:
+// SIMD-ONLY0-NEXT:    [[TMP23:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV46:%.*]] = sext i8 [[TMP23]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END47]]
+// SIMD-ONLY0:       cond.end47:
+// SIMD-ONLY0-NEXT:    [[COND48:%.*]] = phi i32 [ [[CONV44]], [[COND_TRUE43]] ], [ [[CONV46]], [[COND_FALSE45]] ]
+// SIMD-ONLY0-NEXT:    [[CONV49:%.*]] = trunc i32 [[COND48]] to i8
+// SIMD-ONLY0-NEXT:    store i8 [[CONV49]], ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP24:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP24]], ptr [[CV]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP25:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV50:%.*]] = sext i8 [[TMP25]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP26:%.*]] = load i8, ptr [[CE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV51:%.*]] = sext i8 [[TMP26]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP52:%.*]] = icmp eq i32 [[CONV50]], [[CONV51]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP52]], label [[COND_TRUE54:%.*]], label [[COND_FALSE56:%.*]]
+// SIMD-ONLY0:       cond.true54:
+// SIMD-ONLY0-NEXT:    [[TMP27:%.*]] = load i8, ptr [[CD]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV55:%.*]] = sext i8 [[TMP27]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END58:%.*]]
+// SIMD-ONLY0:       cond.false56:
+// SIMD-ONLY0-NEXT:    [[TMP28:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV57:%.*]] = sext i8 [[TMP28]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END58]]
+// SIMD-ONLY0:       cond.end58:
+// SIMD-ONLY0-NEXT:    [[COND59:%.*]] = phi i32 [ [[CONV55]], [[COND_TRUE54]] ], [ [[CONV57]], [[COND_FALSE56]] ]
+// SIMD-ONLY0-NEXT:    [[CONV60:%.*]] = trunc i32 [[COND59]] to i8
+// SIMD-ONLY0-NEXT:    store i8 [[CONV60]], ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP29:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP29]], ptr [[CV]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP30:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP30]], ptr [[CV]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP31:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV61:%.*]] = sext i8 [[TMP31]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP32:%.*]] = load i8, ptr [[CE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV62:%.*]] = sext i8 [[TMP32]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP63:%.*]] = icmp sgt i32 [[CONV61]], [[CONV62]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP63]], label [[COND_TRUE65:%.*]], label [[COND_FALSE67:%.*]]
+// SIMD-ONLY0:       cond.true65:
+// SIMD-ONLY0-NEXT:    [[TMP33:%.*]] = load i8, ptr [[CE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV66:%.*]] = sext i8 [[TMP33]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END69:%.*]]
+// SIMD-ONLY0:       cond.false67:
+// SIMD-ONLY0-NEXT:    [[TMP34:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV68:%.*]] = sext i8 [[TMP34]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END69]]
+// SIMD-ONLY0:       cond.end69:
+// SIMD-ONLY0-NEXT:    [[COND70:%.*]] = phi i32 [ [[CONV66]], [[COND_TRUE65]] ], [ [[CONV68]], [[COND_FALSE67]] ]
+// SIMD-ONLY0-NEXT:    [[CONV71:%.*]] = trunc i32 [[COND70]] to i8
+// SIMD-ONLY0-NEXT:    store i8 [[CONV71]], ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP35:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP35]], ptr [[CV]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP36:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV72:%.*]] = sext i8 [[TMP36]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP37:%.*]] = load i8, ptr [[CE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV73:%.*]] = sext i8 [[TMP37]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP74:%.*]] = icmp slt i32 [[CONV72]], [[CONV73]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP74]], label [[COND_TRUE76:%.*]], label [[COND_FALSE78:%.*]]
+// SIMD-ONLY0:       cond.true76:
+// SIMD-ONLY0-NEXT:    [[TMP38:%.*]] = load i8, ptr [[CE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV77:%.*]] = sext i8 [[TMP38]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END80:%.*]]
+// SIMD-ONLY0:       cond.false78:
+// SIMD-ONLY0-NEXT:    [[TMP39:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV79:%.*]] = sext i8 [[TMP39]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END80]]
+// SIMD-ONLY0:       cond.end80:
+// SIMD-ONLY0-NEXT:    [[COND81:%.*]] = phi i32 [ [[CONV77]], [[COND_TRUE76]] ], [ [[CONV79]], [[COND_FALSE78]] ]
+// SIMD-ONLY0-NEXT:    [[CONV82:%.*]] = trunc i32 [[COND81]] to i8
+// SIMD-ONLY0-NEXT:    store i8 [[CONV82]], ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP40:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP40]], ptr [[CV]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP41:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV83:%.*]] = sext i8 [[TMP41]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP42:%.*]] = load i8, ptr [[CE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV84:%.*]] = sext i8 [[TMP42]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP85:%.*]] = icmp eq i32 [[CONV83]], [[CONV84]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP85]], label [[COND_TRUE87:%.*]], label [[COND_FALSE89:%.*]]
+// SIMD-ONLY0:       cond.true87:
+// SIMD-ONLY0-NEXT:    [[TMP43:%.*]] = load i8, ptr [[CD]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV88:%.*]] = sext i8 [[TMP43]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END91:%.*]]
+// SIMD-ONLY0:       cond.false89:
+// SIMD-ONLY0-NEXT:    [[TMP44:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV90:%.*]] = sext i8 [[TMP44]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END91]]
+// SIMD-ONLY0:       cond.end91:
+// SIMD-ONLY0-NEXT:    [[COND92:%.*]] = phi i32 [ [[CONV88]], [[COND_TRUE87]] ], [ [[CONV90]], [[COND_FALSE89]] ]
+// SIMD-ONLY0-NEXT:    [[CONV93:%.*]] = trunc i32 [[COND92]] to i8
+// SIMD-ONLY0-NEXT:    store i8 [[CONV93]], ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP45:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV94:%.*]] = sext i8 [[TMP45]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP46:%.*]] = load i8, ptr [[CE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV95:%.*]] = sext i8 [[TMP46]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP96:%.*]] = icmp sgt i32 [[CONV94]], [[CONV95]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP96]], label [[COND_TRUE98:%.*]], label [[COND_FALSE100:%.*]]
+// SIMD-ONLY0:       cond.true98:
+// SIMD-ONLY0-NEXT:    [[TMP47:%.*]] = load i8, ptr [[CE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV99:%.*]] = sext i8 [[TMP47]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END102:%.*]]
+// SIMD-ONLY0:       cond.false100:
+// SIMD-ONLY0-NEXT:    [[TMP48:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV101:%.*]] = sext i8 [[TMP48]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END102]]
+// SIMD-ONLY0:       cond.end102:
+// SIMD-ONLY0-NEXT:    [[COND103:%.*]] = phi i32 [ [[CONV99]], [[COND_TRUE98]] ], [ [[CONV101]], [[COND_FALSE100]] ]
+// SIMD-ONLY0-NEXT:    [[CONV104:%.*]] = trunc i32 [[COND103]] to i8
+// SIMD-ONLY0-NEXT:    store i8 [[CONV104]], ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP49:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP49]], ptr [[CV]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP50:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV105:%.*]] = sext i8 [[TMP50]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP51:%.*]] = load i8, ptr [[CE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV106:%.*]] = sext i8 [[TMP51]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP107:%.*]] = icmp slt i32 [[CONV105]], [[CONV106]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP107]], label [[COND_TRUE109:%.*]], label [[COND_FALSE111:%.*]]
+// SIMD-ONLY0:       cond.true109:
+// SIMD-ONLY0-NEXT:    [[TMP52:%.*]] = load i8, ptr [[CE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV110:%.*]] = sext i8 [[TMP52]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END113:%.*]]
+// SIMD-ONLY0:       cond.false111:
+// SIMD-ONLY0-NEXT:    [[TMP53:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV112:%.*]] = sext i8 [[TMP53]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END113]]
+// SIMD-ONLY0:       cond.end113:
+// SIMD-ONLY0-NEXT:    [[COND114:%.*]] = phi i32 [ [[CONV110]], [[COND_TRUE109]] ], [ [[CONV112]], [[COND_FALSE111]] ]
+// SIMD-ONLY0-NEXT:    [[CONV115:%.*]] = trunc i32 [[COND114]] to i8
+// SIMD-ONLY0-NEXT:    store i8 [[CONV115]], ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP54:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP54]], ptr [[CV]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP55:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV116:%.*]] = sext i8 [[TMP55]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP56:%.*]] = load i8, ptr [[CE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV117:%.*]] = sext i8 [[TMP56]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP118:%.*]] = icmp eq i32 [[CONV116]], [[CONV117]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP118]], label [[COND_TRUE120:%.*]], label [[COND_FALSE122:%.*]]
+// SIMD-ONLY0:       cond.true120:
+// SIMD-ONLY0-NEXT:    [[TMP57:%.*]] = load i8, ptr [[CD]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV121:%.*]] = sext i8 [[TMP57]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END124:%.*]]
+// SIMD-ONLY0:       cond.false122:
+// SIMD-ONLY0-NEXT:    [[TMP58:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV123:%.*]] = sext i8 [[TMP58]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END124]]
+// SIMD-ONLY0:       cond.end124:
+// SIMD-ONLY0-NEXT:    [[COND125:%.*]] = phi i32 [ [[CONV121]], [[COND_TRUE120]] ], [ [[CONV123]], [[COND_FALSE122]] ]
+// SIMD-ONLY0-NEXT:    [[CONV126:%.*]] = trunc i32 [[COND125]] to i8
+// SIMD-ONLY0-NEXT:    store i8 [[CONV126]], ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP59:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP59]], ptr [[CV]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP60:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP60]], ptr [[CV]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP61:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV127:%.*]] = sext i8 [[TMP61]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP62:%.*]] = load i8, ptr [[CE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV128:%.*]] = sext i8 [[TMP62]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP129:%.*]] = icmp sgt i32 [[CONV127]], [[CONV128]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP129]], label [[COND_TRUE131:%.*]], label [[COND_FALSE133:%.*]]
+// SIMD-ONLY0:       cond.true131:
+// SIMD-ONLY0-NEXT:    [[TMP63:%.*]] = load i8, ptr [[CE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV132:%.*]] = sext i8 [[TMP63]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END135:%.*]]
+// SIMD-ONLY0:       cond.false133:
+// SIMD-ONLY0-NEXT:    [[TMP64:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV134:%.*]] = sext i8 [[TMP64]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END135]]
+// SIMD-ONLY0:       cond.end135:
+// SIMD-ONLY0-NEXT:    [[COND136:%.*]] = phi i32 [ [[CONV132]], [[COND_TRUE131]] ], [ [[CONV134]], [[COND_FALSE133]] ]
+// SIMD-ONLY0-NEXT:    [[CONV137:%.*]] = trunc i32 [[COND136]] to i8
+// SIMD-ONLY0-NEXT:    store i8 [[CONV137]], ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP65:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP65]], ptr [[CV]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP66:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV138:%.*]] = sext i8 [[TMP66]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP67:%.*]] = load i8, ptr [[CE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV139:%.*]] = sext i8 [[TMP67]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP140:%.*]] = icmp slt i32 [[CONV138]], [[CONV139]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP140]], label [[COND_TRUE142:%.*]], label [[COND_FALSE144:%.*]]
+// SIMD-ONLY0:       cond.true142:
+// SIMD-ONLY0-NEXT:    [[TMP68:%.*]] = load i8, ptr [[CE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV143:%.*]] = sext i8 [[TMP68]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END146:%.*]]
+// SIMD-ONLY0:       cond.false144:
+// SIMD-ONLY0-NEXT:    [[TMP69:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV145:%.*]] = sext i8 [[TMP69]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END146]]
+// SIMD-ONLY0:       cond.end146:
+// SIMD-ONLY0-NEXT:    [[COND147:%.*]] = phi i32 [ [[CONV143]], [[COND_TRUE142]] ], [ [[CONV145]], [[COND_FALSE144]] ]
+// SIMD-ONLY0-NEXT:    [[CONV148:%.*]] = trunc i32 [[COND147]] to i8
+// SIMD-ONLY0-NEXT:    store i8 [[CONV148]], ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP70:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP70]], ptr [[CV]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP71:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV149:%.*]] = sext i8 [[TMP71]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP72:%.*]] = load i8, ptr [[CE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV150:%.*]] = sext i8 [[TMP72]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP151:%.*]] = icmp eq i32 [[CONV149]], [[CONV150]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP151]], label [[COND_TRUE153:%.*]], label [[COND_FALSE155:%.*]]
+// SIMD-ONLY0:       cond.true153:
+// SIMD-ONLY0-NEXT:    [[TMP73:%.*]] = load i8, ptr [[CD]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV154:%.*]] = sext i8 [[TMP73]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END157:%.*]]
+// SIMD-ONLY0:       cond.false155:
+// SIMD-ONLY0-NEXT:    [[TMP74:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV156:%.*]] = sext i8 [[TMP74]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END157]]
+// SIMD-ONLY0:       cond.end157:
+// SIMD-ONLY0-NEXT:    [[COND158:%.*]] = phi i32 [ [[CONV154]], [[COND_TRUE153]] ], [ [[CONV156]], [[COND_FALSE155]] ]
+// SIMD-ONLY0-NEXT:    [[CONV159:%.*]] = trunc i32 [[COND158]] to i8
+// SIMD-ONLY0-NEXT:    store i8 [[CONV159]], ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP75:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV160:%.*]] = sext i8 [[TMP75]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP76:%.*]] = load i8, ptr [[CE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV161:%.*]] = sext i8 [[TMP76]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP162:%.*]] = icmp sgt i32 [[CONV160]], [[CONV161]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP162]], label [[COND_TRUE164:%.*]], label [[COND_FALSE166:%.*]]
+// SIMD-ONLY0:       cond.true164:
+// SIMD-ONLY0-NEXT:    [[TMP77:%.*]] = load i8, ptr [[CE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV165:%.*]] = sext i8 [[TMP77]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END168:%.*]]
+// SIMD-ONLY0:       cond.false166:
+// SIMD-ONLY0-NEXT:    [[TMP78:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV167:%.*]] = sext i8 [[TMP78]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END168]]
+// SIMD-ONLY0:       cond.end168:
+// SIMD-ONLY0-NEXT:    [[COND169:%.*]] = phi i32 [ [[CONV165]], [[COND_TRUE164]] ], [ [[CONV167]], [[COND_FALSE166]] ]
+// SIMD-ONLY0-NEXT:    [[CONV170:%.*]] = trunc i32 [[COND169]] to i8
+// SIMD-ONLY0-NEXT:    store i8 [[CONV170]], ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP79:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP79]], ptr [[CV]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP80:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV171:%.*]] = sext i8 [[TMP80]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP81:%.*]] = load i8, ptr [[CE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV172:%.*]] = sext i8 [[TMP81]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP173:%.*]] = icmp slt i32 [[CONV171]], [[CONV172]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP173]], label [[COND_TRUE175:%.*]], label [[COND_FALSE177:%.*]]
+// SIMD-ONLY0:       cond.true175:
+// SIMD-ONLY0-NEXT:    [[TMP82:%.*]] = load i8, ptr [[CE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV176:%.*]] = sext i8 [[TMP82]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END179:%.*]]
+// SIMD-ONLY0:       cond.false177:
+// SIMD-ONLY0-NEXT:    [[TMP83:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV178:%.*]] = sext i8 [[TMP83]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END179]]
+// SIMD-ONLY0:       cond.end179:
+// SIMD-ONLY0-NEXT:    [[COND180:%.*]] = phi i32 [ [[CONV176]], [[COND_TRUE175]] ], [ [[CONV178]], [[COND_FALSE177]] ]
+// SIMD-ONLY0-NEXT:    [[CONV181:%.*]] = trunc i32 [[COND180]] to i8
+// SIMD-ONLY0-NEXT:    store i8 [[CONV181]], ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP84:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP84]], ptr [[CV]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP85:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV182:%.*]] = sext i8 [[TMP85]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP86:%.*]] = load i8, ptr [[CE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV183:%.*]] = sext i8 [[TMP86]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP184:%.*]] = icmp eq i32 [[CONV182]], [[CONV183]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP184]], label [[COND_TRUE186:%.*]], label [[COND_FALSE188:%.*]]
+// SIMD-ONLY0:       cond.true186:
+// SIMD-ONLY0-NEXT:    [[TMP87:%.*]] = load i8, ptr [[CD]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV187:%.*]] = sext i8 [[TMP87]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END190:%.*]]
+// SIMD-ONLY0:       cond.false188:
+// SIMD-ONLY0-NEXT:    [[TMP88:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV189:%.*]] = sext i8 [[TMP88]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END190]]
+// SIMD-ONLY0:       cond.end190:
+// SIMD-ONLY0-NEXT:    [[COND191:%.*]] = phi i32 [ [[CONV187]], [[COND_TRUE186]] ], [ [[CONV189]], [[COND_FALSE188]] ]
+// SIMD-ONLY0-NEXT:    [[CONV192:%.*]] = trunc i32 [[COND191]] to i8
+// SIMD-ONLY0-NEXT:    store i8 [[CONV192]], ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP89:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP89]], ptr [[CV]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP90:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP90]], ptr [[CV]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP91:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV193:%.*]] = sext i8 [[TMP91]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP92:%.*]] = load i8, ptr [[CE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV194:%.*]] = sext i8 [[TMP92]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP195:%.*]] = icmp sgt i32 [[CONV193]], [[CONV194]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP195]], label [[COND_TRUE197:%.*]], label [[COND_FALSE199:%.*]]
+// SIMD-ONLY0:       cond.true197:
+// SIMD-ONLY0-NEXT:    [[TMP93:%.*]] = load i8, ptr [[CE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV198:%.*]] = sext i8 [[TMP93]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END201:%.*]]
+// SIMD-ONLY0:       cond.false199:
+// SIMD-ONLY0-NEXT:    [[TMP94:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV200:%.*]] = sext i8 [[TMP94]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END201]]
+// SIMD-ONLY0:       cond.end201:
+// SIMD-ONLY0-NEXT:    [[COND202:%.*]] = phi i32 [ [[CONV198]], [[COND_TRUE197]] ], [ [[CONV200]], [[COND_FALSE199]] ]
+// SIMD-ONLY0-NEXT:    [[CONV203:%.*]] = trunc i32 [[COND202]] to i8
+// SIMD-ONLY0-NEXT:    store i8 [[CONV203]], ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP95:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP95]], ptr [[CV]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP96:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV204:%.*]] = sext i8 [[TMP96]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP97:%.*]] = load i8, ptr [[CE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV205:%.*]] = sext i8 [[TMP97]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP206:%.*]] = icmp slt i32 [[CONV204]], [[CONV205]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP206]], label [[COND_TRUE208:%.*]], label [[COND_FALSE210:%.*]]
+// SIMD-ONLY0:       cond.true208:
+// SIMD-ONLY0-NEXT:    [[TMP98:%.*]] = load i8, ptr [[CE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV209:%.*]] = sext i8 [[TMP98]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END212:%.*]]
+// SIMD-ONLY0:       cond.false210:
+// SIMD-ONLY0-NEXT:    [[TMP99:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV211:%.*]] = sext i8 [[TMP99]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END212]]
+// SIMD-ONLY0:       cond.end212:
+// SIMD-ONLY0-NEXT:    [[COND213:%.*]] = phi i32 [ [[CONV209]], [[COND_TRUE208]] ], [ [[CONV211]], [[COND_FALSE210]] ]
+// SIMD-ONLY0-NEXT:    [[CONV214:%.*]] = trunc i32 [[COND213]] to i8
+// SIMD-ONLY0-NEXT:    store i8 [[CONV214]], ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP100:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP100]], ptr [[CV]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP101:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV215:%.*]] = sext i8 [[TMP101]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP102:%.*]] = load i8, ptr [[CE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV216:%.*]] = sext i8 [[TMP102]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP217:%.*]] = icmp eq i32 [[CONV215]], [[CONV216]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP217]], label [[COND_TRUE219:%.*]], label [[COND_FALSE221:%.*]]
+// SIMD-ONLY0:       cond.true219:
+// SIMD-ONLY0-NEXT:    [[TMP103:%.*]] = load i8, ptr [[CD]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV220:%.*]] = sext i8 [[TMP103]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END223:%.*]]
+// SIMD-ONLY0:       cond.false221:
+// SIMD-ONLY0-NEXT:    [[TMP104:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV222:%.*]] = sext i8 [[TMP104]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END223]]
+// SIMD-ONLY0:       cond.end223:
+// SIMD-ONLY0-NEXT:    [[COND224:%.*]] = phi i32 [ [[CONV220]], [[COND_TRUE219]] ], [ [[CONV222]], [[COND_FALSE221]] ]
+// SIMD-ONLY0-NEXT:    [[CONV225:%.*]] = trunc i32 [[COND224]] to i8
+// SIMD-ONLY0-NEXT:    store i8 [[CONV225]], ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP105:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV226:%.*]] = sext i8 [[TMP105]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP106:%.*]] = load i8, ptr [[CE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV227:%.*]] = sext i8 [[TMP106]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP228:%.*]] = icmp sgt i32 [[CONV226]], [[CONV227]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP228]], label [[COND_TRUE230:%.*]], label [[COND_FALSE232:%.*]]
+// SIMD-ONLY0:       cond.true230:
+// SIMD-ONLY0-NEXT:    [[TMP107:%.*]] = load i8, ptr [[CE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV231:%.*]] = sext i8 [[TMP107]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END234:%.*]]
+// SIMD-ONLY0:       cond.false232:
+// SIMD-ONLY0-NEXT:    [[TMP108:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV233:%.*]] = sext i8 [[TMP108]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END234]]
+// SIMD-ONLY0:       cond.end234:
+// SIMD-ONLY0-NEXT:    [[COND235:%.*]] = phi i32 [ [[CONV231]], [[COND_TRUE230]] ], [ [[CONV233]], [[COND_FALSE232]] ]
+// SIMD-ONLY0-NEXT:    [[CONV236:%.*]] = trunc i32 [[COND235]] to i8
+// SIMD-ONLY0-NEXT:    store i8 [[CONV236]], ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP109:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP109]], ptr [[CV]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP110:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV237:%.*]] = sext i8 [[TMP110]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP111:%.*]] = load i8, ptr [[CE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV238:%.*]] = sext i8 [[TMP111]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP239:%.*]] = icmp slt i32 [[CONV237]], [[CONV238]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP239]], label [[COND_TRUE241:%.*]], label [[COND_FALSE243:%.*]]
+// SIMD-ONLY0:       cond.true241:
+// SIMD-ONLY0-NEXT:    [[TMP112:%.*]] = load i8, ptr [[CE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV242:%.*]] = sext i8 [[TMP112]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END245:%.*]]
+// SIMD-ONLY0:       cond.false243:
+// SIMD-ONLY0-NEXT:    [[TMP113:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV244:%.*]] = sext i8 [[TMP113]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END245]]
+// SIMD-ONLY0:       cond.end245:
+// SIMD-ONLY0-NEXT:    [[COND246:%.*]] = phi i32 [ [[CONV242]], [[COND_TRUE241]] ], [ [[CONV244]], [[COND_FALSE243]] ]
+// SIMD-ONLY0-NEXT:    [[CONV247:%.*]] = trunc i32 [[COND246]] to i8
+// SIMD-ONLY0-NEXT:    store i8 [[CONV247]], ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP114:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP114]], ptr [[CV]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP115:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV248:%.*]] = sext i8 [[TMP115]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP116:%.*]] = load i8, ptr [[CE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV249:%.*]] = sext i8 [[TMP116]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP250:%.*]] = icmp eq i32 [[CONV248]], [[CONV249]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP250]], label [[COND_TRUE252:%.*]], label [[COND_FALSE254:%.*]]
+// SIMD-ONLY0:       cond.true252:
+// SIMD-ONLY0-NEXT:    [[TMP117:%.*]] = load i8, ptr [[CD]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV253:%.*]] = sext i8 [[TMP117]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END256:%.*]]
+// SIMD-ONLY0:       cond.false254:
+// SIMD-ONLY0-NEXT:    [[TMP118:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV255:%.*]] = sext i8 [[TMP118]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END256]]
+// SIMD-ONLY0:       cond.end256:
+// SIMD-ONLY0-NEXT:    [[COND257:%.*]] = phi i32 [ [[CONV253]], [[COND_TRUE252]] ], [ [[CONV255]], [[COND_FALSE254]] ]
+// SIMD-ONLY0-NEXT:    [[CONV258:%.*]] = trunc i32 [[COND257]] to i8
+// SIMD-ONLY0-NEXT:    store i8 [[CONV258]], ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP119:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP119]], ptr [[CV]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP120:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP120]], ptr [[CV]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP121:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV259:%.*]] = sext i8 [[TMP121]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP122:%.*]] = load i8, ptr [[CE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV260:%.*]] = sext i8 [[TMP122]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP261:%.*]] = icmp sgt i32 [[CONV259]], [[CONV260]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP261]], label [[COND_TRUE263:%.*]], label [[COND_FALSE265:%.*]]
+// SIMD-ONLY0:       cond.true263:
+// SIMD-ONLY0-NEXT:    [[TMP123:%.*]] = load i8, ptr [[CE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV264:%.*]] = sext i8 [[TMP123]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END267:%.*]]
+// SIMD-ONLY0:       cond.false265:
+// SIMD-ONLY0-NEXT:    [[TMP124:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV266:%.*]] = sext i8 [[TMP124]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END267]]
+// SIMD-ONLY0:       cond.end267:
+// SIMD-ONLY0-NEXT:    [[COND268:%.*]] = phi i32 [ [[CONV264]], [[COND_TRUE263]] ], [ [[CONV266]], [[COND_FALSE265]] ]
+// SIMD-ONLY0-NEXT:    [[CONV269:%.*]] = trunc i32 [[COND268]] to i8
+// SIMD-ONLY0-NEXT:    store i8 [[CONV269]], ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP125:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP125]], ptr [[CV]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP126:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV270:%.*]] = sext i8 [[TMP126]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP127:%.*]] = load i8, ptr [[CE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV271:%.*]] = sext i8 [[TMP127]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP272:%.*]] = icmp slt i32 [[CONV270]], [[CONV271]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP272]], label [[COND_TRUE274:%.*]], label [[COND_FALSE276:%.*]]
+// SIMD-ONLY0:       cond.true274:
+// SIMD-ONLY0-NEXT:    [[TMP128:%.*]] = load i8, ptr [[CE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV275:%.*]] = sext i8 [[TMP128]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END278:%.*]]
+// SIMD-ONLY0:       cond.false276:
+// SIMD-ONLY0-NEXT:    [[TMP129:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV277:%.*]] = sext i8 [[TMP129]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END278]]
+// SIMD-ONLY0:       cond.end278:
+// SIMD-ONLY0-NEXT:    [[COND279:%.*]] = phi i32 [ [[CONV275]], [[COND_TRUE274]] ], [ [[CONV277]], [[COND_FALSE276]] ]
+// SIMD-ONLY0-NEXT:    [[CONV280:%.*]] = trunc i32 [[COND279]] to i8
+// SIMD-ONLY0-NEXT:    store i8 [[CONV280]], ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP130:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP130]], ptr [[CV]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP131:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV281:%.*]] = sext i8 [[TMP131]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP132:%.*]] = load i8, ptr [[CE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV282:%.*]] = sext i8 [[TMP132]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP283:%.*]] = icmp eq i32 [[CONV281]], [[CONV282]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP283]], label [[COND_TRUE285:%.*]], label [[COND_FALSE287:%.*]]
+// SIMD-ONLY0:       cond.true285:
+// SIMD-ONLY0-NEXT:    [[TMP133:%.*]] = load i8, ptr [[CD]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV286:%.*]] = sext i8 [[TMP133]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END289:%.*]]
+// SIMD-ONLY0:       cond.false287:
+// SIMD-ONLY0-NEXT:    [[TMP134:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV288:%.*]] = sext i8 [[TMP134]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END289]]
+// SIMD-ONLY0:       cond.end289:
+// SIMD-ONLY0-NEXT:    [[COND290:%.*]] = phi i32 [ [[CONV286]], [[COND_TRUE285]] ], [ [[CONV288]], [[COND_FALSE287]] ]
+// SIMD-ONLY0-NEXT:    [[CONV291:%.*]] = trunc i32 [[COND290]] to i8
+// SIMD-ONLY0-NEXT:    store i8 [[CONV291]], ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP135:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV292:%.*]] = sext i8 [[TMP135]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP136:%.*]] = load i8, ptr [[CE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV293:%.*]] = sext i8 [[TMP136]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP294:%.*]] = icmp sgt i32 [[CONV292]], [[CONV293]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP294]], label [[COND_TRUE296:%.*]], label [[COND_FALSE298:%.*]]
+// SIMD-ONLY0:       cond.true296:
+// SIMD-ONLY0-NEXT:    [[TMP137:%.*]] = load i8, ptr [[CE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV297:%.*]] = sext i8 [[TMP137]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END300:%.*]]
+// SIMD-ONLY0:       cond.false298:
+// SIMD-ONLY0-NEXT:    [[TMP138:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV299:%.*]] = sext i8 [[TMP138]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END300]]
+// SIMD-ONLY0:       cond.end300:
+// SIMD-ONLY0-NEXT:    [[COND301:%.*]] = phi i32 [ [[CONV297]], [[COND_TRUE296]] ], [ [[CONV299]], [[COND_FALSE298]] ]
+// SIMD-ONLY0-NEXT:    [[CONV302:%.*]] = trunc i32 [[COND301]] to i8
+// SIMD-ONLY0-NEXT:    store i8 [[CONV302]], ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP139:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP139]], ptr [[CV]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP140:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV303:%.*]] = sext i8 [[TMP140]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP141:%.*]] = load i8, ptr [[CE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV304:%.*]] = sext i8 [[TMP141]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP305:%.*]] = icmp slt i32 [[CONV303]], [[CONV304]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP305]], label [[COND_TRUE307:%.*]], label [[COND_FALSE309:%.*]]
+// SIMD-ONLY0:       cond.true307:
+// SIMD-ONLY0-NEXT:    [[TMP142:%.*]] = load i8, ptr [[CE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV308:%.*]] = sext i8 [[TMP142]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END311:%.*]]
+// SIMD-ONLY0:       cond.false309:
+// SIMD-ONLY0-NEXT:    [[TMP143:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV310:%.*]] = sext i8 [[TMP143]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END311]]
+// SIMD-ONLY0:       cond.end311:
+// SIMD-ONLY0-NEXT:    [[COND312:%.*]] = phi i32 [ [[CONV308]], [[COND_TRUE307]] ], [ [[CONV310]], [[COND_FALSE309]] ]
+// SIMD-ONLY0-NEXT:    [[CONV313:%.*]] = trunc i32 [[COND312]] to i8
+// SIMD-ONLY0-NEXT:    store i8 [[CONV313]], ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP144:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP144]], ptr [[CV]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP145:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV314:%.*]] = sext i8 [[TMP145]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP146:%.*]] = load i8, ptr [[CE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV315:%.*]] = sext i8 [[TMP146]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP316:%.*]] = icmp eq i32 [[CONV314]], [[CONV315]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP316]], label [[COND_TRUE318:%.*]], label [[COND_FALSE320:%.*]]
+// SIMD-ONLY0:       cond.true318:
+// SIMD-ONLY0-NEXT:    [[TMP147:%.*]] = load i8, ptr [[CD]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV319:%.*]] = sext i8 [[TMP147]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END322:%.*]]
+// SIMD-ONLY0:       cond.false320:
+// SIMD-ONLY0-NEXT:    [[TMP148:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV321:%.*]] = sext i8 [[TMP148]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END322]]
+// SIMD-ONLY0:       cond.end322:
+// SIMD-ONLY0-NEXT:    [[COND323:%.*]] = phi i32 [ [[CONV319]], [[COND_TRUE318]] ], [ [[CONV321]], [[COND_FALSE320]] ]
+// SIMD-ONLY0-NEXT:    [[CONV324:%.*]] = trunc i32 [[COND323]] to i8
+// SIMD-ONLY0-NEXT:    store i8 [[CONV324]], ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP149:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP149]], ptr [[CV]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP150:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP150]], ptr [[CV]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP151:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV325:%.*]] = sext i8 [[TMP151]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP152:%.*]] = load i8, ptr [[CE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV326:%.*]] = sext i8 [[TMP152]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP327:%.*]] = icmp sgt i32 [[CONV325]], [[CONV326]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP327]], label [[COND_TRUE329:%.*]], label [[COND_FALSE331:%.*]]
+// SIMD-ONLY0:       cond.true329:
+// SIMD-ONLY0-NEXT:    [[TMP153:%.*]] = load i8, ptr [[CE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV330:%.*]] = sext i8 [[TMP153]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END333:%.*]]
+// SIMD-ONLY0:       cond.false331:
+// SIMD-ONLY0-NEXT:    [[TMP154:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV332:%.*]] = sext i8 [[TMP154]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END333]]
+// SIMD-ONLY0:       cond.end333:
+// SIMD-ONLY0-NEXT:    [[COND334:%.*]] = phi i32 [ [[CONV330]], [[COND_TRUE329]] ], [ [[CONV332]], [[COND_FALSE331]] ]
+// SIMD-ONLY0-NEXT:    [[CONV335:%.*]] = trunc i32 [[COND334]] to i8
+// SIMD-ONLY0-NEXT:    store i8 [[CONV335]], ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP155:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP155]], ptr [[CV]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP156:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV336:%.*]] = sext i8 [[TMP156]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP157:%.*]] = load i8, ptr [[CE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV337:%.*]] = sext i8 [[TMP157]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP338:%.*]] = icmp slt i32 [[CONV336]], [[CONV337]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP338]], label [[COND_TRUE340:%.*]], label [[COND_FALSE342:%.*]]
+// SIMD-ONLY0:       cond.true340:
+// SIMD-ONLY0-NEXT:    [[TMP158:%.*]] = load i8, ptr [[CE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV341:%.*]] = sext i8 [[TMP158]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END344:%.*]]
+// SIMD-ONLY0:       cond.false342:
+// SIMD-ONLY0-NEXT:    [[TMP159:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV343:%.*]] = sext i8 [[TMP159]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END344]]
+// SIMD-ONLY0:       cond.end344:
+// SIMD-ONLY0-NEXT:    [[COND345:%.*]] = phi i32 [ [[CONV341]], [[COND_TRUE340]] ], [ [[CONV343]], [[COND_FALSE342]] ]
+// SIMD-ONLY0-NEXT:    [[CONV346:%.*]] = trunc i32 [[COND345]] to i8
+// SIMD-ONLY0-NEXT:    store i8 [[CONV346]], ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP160:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP160]], ptr [[CV]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP161:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV347:%.*]] = sext i8 [[TMP161]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP162:%.*]] = load i8, ptr [[CE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV348:%.*]] = sext i8 [[TMP162]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP349:%.*]] = icmp eq i32 [[CONV347]], [[CONV348]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP349]], label [[COND_TRUE351:%.*]], label [[COND_FALSE353:%.*]]
+// SIMD-ONLY0:       cond.true351:
+// SIMD-ONLY0-NEXT:    [[TMP163:%.*]] = load i8, ptr [[CD]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV352:%.*]] = sext i8 [[TMP163]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END355:%.*]]
+// SIMD-ONLY0:       cond.false353:
+// SIMD-ONLY0-NEXT:    [[TMP164:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV354:%.*]] = sext i8 [[TMP164]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END355]]
+// SIMD-ONLY0:       cond.end355:
+// SIMD-ONLY0-NEXT:    [[COND356:%.*]] = phi i32 [ [[CONV352]], [[COND_TRUE351]] ], [ [[CONV354]], [[COND_FALSE353]] ]
+// SIMD-ONLY0-NEXT:    [[CONV357:%.*]] = trunc i32 [[COND356]] to i8
+// SIMD-ONLY0-NEXT:    store i8 [[CONV357]], ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP165:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV358:%.*]] = sext i8 [[TMP165]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP166:%.*]] = load i8, ptr [[CE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV359:%.*]] = sext i8 [[TMP166]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP360:%.*]] = icmp sgt i32 [[CONV358]], [[CONV359]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP360]], label [[COND_TRUE362:%.*]], label [[COND_FALSE364:%.*]]
+// SIMD-ONLY0:       cond.true362:
+// SIMD-ONLY0-NEXT:    [[TMP167:%.*]] = load i8, ptr [[CE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV363:%.*]] = sext i8 [[TMP167]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END366:%.*]]
+// SIMD-ONLY0:       cond.false364:
+// SIMD-ONLY0-NEXT:    [[TMP168:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV365:%.*]] = sext i8 [[TMP168]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END366]]
+// SIMD-ONLY0:       cond.end366:
+// SIMD-ONLY0-NEXT:    [[COND367:%.*]] = phi i32 [ [[CONV363]], [[COND_TRUE362]] ], [ [[CONV365]], [[COND_FALSE364]] ]
+// SIMD-ONLY0-NEXT:    [[CONV368:%.*]] = trunc i32 [[COND367]] to i8
+// SIMD-ONLY0-NEXT:    store i8 [[CONV368]], ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP169:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP169]], ptr [[CV]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP170:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV369:%.*]] = sext i8 [[TMP170]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP171:%.*]] = load i8, ptr [[CE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV370:%.*]] = sext i8 [[TMP171]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP371:%.*]] = icmp slt i32 [[CONV369]], [[CONV370]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP371]], label [[COND_TRUE373:%.*]], label [[COND_FALSE375:%.*]]
+// SIMD-ONLY0:       cond.true373:
+// SIMD-ONLY0-NEXT:    [[TMP172:%.*]] = load i8, ptr [[CE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV374:%.*]] = sext i8 [[TMP172]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END377:%.*]]
+// SIMD-ONLY0:       cond.false375:
+// SIMD-ONLY0-NEXT:    [[TMP173:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV376:%.*]] = sext i8 [[TMP173]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END377]]
+// SIMD-ONLY0:       cond.end377:
+// SIMD-ONLY0-NEXT:    [[COND378:%.*]] = phi i32 [ [[CONV374]], [[COND_TRUE373]] ], [ [[CONV376]], [[COND_FALSE375]] ]
+// SIMD-ONLY0-NEXT:    [[CONV379:%.*]] = trunc i32 [[COND378]] to i8
+// SIMD-ONLY0-NEXT:    store i8 [[CONV379]], ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP174:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP174]], ptr [[CV]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP175:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV380:%.*]] = sext i8 [[TMP175]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP176:%.*]] = load i8, ptr [[CE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV381:%.*]] = sext i8 [[TMP176]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP382:%.*]] = icmp eq i32 [[CONV380]], [[CONV381]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP382]], label [[COND_TRUE384:%.*]], label [[COND_FALSE386:%.*]]
+// SIMD-ONLY0:       cond.true384:
+// SIMD-ONLY0-NEXT:    [[TMP177:%.*]] = load i8, ptr [[CD]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV385:%.*]] = sext i8 [[TMP177]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END388:%.*]]
+// SIMD-ONLY0:       cond.false386:
+// SIMD-ONLY0-NEXT:    [[TMP178:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV387:%.*]] = sext i8 [[TMP178]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END388]]
+// SIMD-ONLY0:       cond.end388:
+// SIMD-ONLY0-NEXT:    [[COND389:%.*]] = phi i32 [ [[CONV385]], [[COND_TRUE384]] ], [ [[CONV387]], [[COND_FALSE386]] ]
+// SIMD-ONLY0-NEXT:    [[CONV390:%.*]] = trunc i32 [[COND389]] to i8
+// SIMD-ONLY0-NEXT:    store i8 [[CONV390]], ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP179:%.*]] = load i8, ptr [[CX]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP179]], ptr [[CV]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP180:%.*]] = load i8, ptr [[CV]], align 1
+// SIMD-ONLY0-NEXT:    ret i8 [[TMP180]]
+//
+//
+// SIMD-ONLY0-LABEL: @ucxevd(
+// SIMD-ONLY0-NEXT:  entry:
+// SIMD-ONLY0-NEXT:    [[UCX:%.*]] = alloca i8, align 1
+// SIMD-ONLY0-NEXT:    [[UCV:%.*]] = alloca i8, align 1
+// SIMD-ONLY0-NEXT:    [[UCE:%.*]] = alloca i8, align 1
+// SIMD-ONLY0-NEXT:    [[UCD:%.*]] = alloca i8, align 1
+// SIMD-ONLY0-NEXT:    [[TMP0:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP0]], ptr [[UCV]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP1:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV:%.*]] = zext i8 [[TMP1]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP2:%.*]] = load i8, ptr [[UCE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV1:%.*]] = zext i8 [[TMP2]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP:%.*]] = icmp sgt i32 [[CONV]], [[CONV1]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
+// SIMD-ONLY0:       cond.true:
+// SIMD-ONLY0-NEXT:    [[TMP3:%.*]] = load i8, ptr [[UCE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV3:%.*]] = zext i8 [[TMP3]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END:%.*]]
+// SIMD-ONLY0:       cond.false:
+// SIMD-ONLY0-NEXT:    [[TMP4:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV4:%.*]] = zext i8 [[TMP4]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END]]
+// SIMD-ONLY0:       cond.end:
+// SIMD-ONLY0-NEXT:    [[COND:%.*]] = phi i32 [ [[CONV3]], [[COND_TRUE]] ], [ [[CONV4]], [[COND_FALSE]] ]
+// SIMD-ONLY0-NEXT:    [[CONV5:%.*]] = trunc i32 [[COND]] to i8
+// SIMD-ONLY0-NEXT:    store i8 [[CONV5]], ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP5:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP5]], ptr [[UCV]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP6:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV6:%.*]] = zext i8 [[TMP6]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP7:%.*]] = load i8, ptr [[UCE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV7:%.*]] = zext i8 [[TMP7]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP8:%.*]] = icmp slt i32 [[CONV6]], [[CONV7]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP8]], label [[COND_TRUE10:%.*]], label [[COND_FALSE12:%.*]]
+// SIMD-ONLY0:       cond.true10:
+// SIMD-ONLY0-NEXT:    [[TMP8:%.*]] = load i8, ptr [[UCE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV11:%.*]] = zext i8 [[TMP8]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END14:%.*]]
+// SIMD-ONLY0:       cond.false12:
+// SIMD-ONLY0-NEXT:    [[TMP9:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV13:%.*]] = zext i8 [[TMP9]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END14]]
+// SIMD-ONLY0:       cond.end14:
+// SIMD-ONLY0-NEXT:    [[COND15:%.*]] = phi i32 [ [[CONV11]], [[COND_TRUE10]] ], [ [[CONV13]], [[COND_FALSE12]] ]
+// SIMD-ONLY0-NEXT:    [[CONV16:%.*]] = trunc i32 [[COND15]] to i8
+// SIMD-ONLY0-NEXT:    store i8 [[CONV16]], ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP10:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP10]], ptr [[UCV]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP11:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV17:%.*]] = zext i8 [[TMP11]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP12:%.*]] = load i8, ptr [[UCE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV18:%.*]] = zext i8 [[TMP12]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP19:%.*]] = icmp eq i32 [[CONV17]], [[CONV18]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP19]], label [[COND_TRUE21:%.*]], label [[COND_FALSE23:%.*]]
+// SIMD-ONLY0:       cond.true21:
+// SIMD-ONLY0-NEXT:    [[TMP13:%.*]] = load i8, ptr [[UCD]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV22:%.*]] = zext i8 [[TMP13]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END25:%.*]]
+// SIMD-ONLY0:       cond.false23:
+// SIMD-ONLY0-NEXT:    [[TMP14:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV24:%.*]] = zext i8 [[TMP14]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END25]]
+// SIMD-ONLY0:       cond.end25:
+// SIMD-ONLY0-NEXT:    [[COND26:%.*]] = phi i32 [ [[CONV22]], [[COND_TRUE21]] ], [ [[CONV24]], [[COND_FALSE23]] ]
+// SIMD-ONLY0-NEXT:    [[CONV27:%.*]] = trunc i32 [[COND26]] to i8
+// SIMD-ONLY0-NEXT:    store i8 [[CONV27]], ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP15:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV28:%.*]] = zext i8 [[TMP15]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP16:%.*]] = load i8, ptr [[UCE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV29:%.*]] = zext i8 [[TMP16]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP30:%.*]] = icmp sgt i32 [[CONV28]], [[CONV29]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP30]], label [[COND_TRUE32:%.*]], label [[COND_FALSE34:%.*]]
+// SIMD-ONLY0:       cond.true32:
+// SIMD-ONLY0-NEXT:    [[TMP17:%.*]] = load i8, ptr [[UCE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV33:%.*]] = zext i8 [[TMP17]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END36:%.*]]
+// SIMD-ONLY0:       cond.false34:
+// SIMD-ONLY0-NEXT:    [[TMP18:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV35:%.*]] = zext i8 [[TMP18]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END36]]
+// SIMD-ONLY0:       cond.end36:
+// SIMD-ONLY0-NEXT:    [[COND37:%.*]] = phi i32 [ [[CONV33]], [[COND_TRUE32]] ], [ [[CONV35]], [[COND_FALSE34]] ]
+// SIMD-ONLY0-NEXT:    [[CONV38:%.*]] = trunc i32 [[COND37]] to i8
+// SIMD-ONLY0-NEXT:    store i8 [[CONV38]], ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP19:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP19]], ptr [[UCV]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP20:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV39:%.*]] = zext i8 [[TMP20]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP21:%.*]] = load i8, ptr [[UCE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV40:%.*]] = zext i8 [[TMP21]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP41:%.*]] = icmp slt i32 [[CONV39]], [[CONV40]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP41]], label [[COND_TRUE43:%.*]], label [[COND_FALSE45:%.*]]
+// SIMD-ONLY0:       cond.true43:
+// SIMD-ONLY0-NEXT:    [[TMP22:%.*]] = load i8, ptr [[UCE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV44:%.*]] = zext i8 [[TMP22]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END47:%.*]]
+// SIMD-ONLY0:       cond.false45:
+// SIMD-ONLY0-NEXT:    [[TMP23:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV46:%.*]] = zext i8 [[TMP23]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END47]]
+// SIMD-ONLY0:       cond.end47:
+// SIMD-ONLY0-NEXT:    [[COND48:%.*]] = phi i32 [ [[CONV44]], [[COND_TRUE43]] ], [ [[CONV46]], [[COND_FALSE45]] ]
+// SIMD-ONLY0-NEXT:    [[CONV49:%.*]] = trunc i32 [[COND48]] to i8
+// SIMD-ONLY0-NEXT:    store i8 [[CONV49]], ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP24:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP24]], ptr [[UCV]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP25:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV50:%.*]] = zext i8 [[TMP25]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP26:%.*]] = load i8, ptr [[UCE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV51:%.*]] = zext i8 [[TMP26]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP52:%.*]] = icmp eq i32 [[CONV50]], [[CONV51]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP52]], label [[COND_TRUE54:%.*]], label [[COND_FALSE56:%.*]]
+// SIMD-ONLY0:       cond.true54:
+// SIMD-ONLY0-NEXT:    [[TMP27:%.*]] = load i8, ptr [[UCD]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV55:%.*]] = zext i8 [[TMP27]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END58:%.*]]
+// SIMD-ONLY0:       cond.false56:
+// SIMD-ONLY0-NEXT:    [[TMP28:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV57:%.*]] = zext i8 [[TMP28]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END58]]
+// SIMD-ONLY0:       cond.end58:
+// SIMD-ONLY0-NEXT:    [[COND59:%.*]] = phi i32 [ [[CONV55]], [[COND_TRUE54]] ], [ [[CONV57]], [[COND_FALSE56]] ]
+// SIMD-ONLY0-NEXT:    [[CONV60:%.*]] = trunc i32 [[COND59]] to i8
+// SIMD-ONLY0-NEXT:    store i8 [[CONV60]], ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP29:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP29]], ptr [[UCV]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP30:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP30]], ptr [[UCV]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP31:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV61:%.*]] = zext i8 [[TMP31]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP32:%.*]] = load i8, ptr [[UCE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV62:%.*]] = zext i8 [[TMP32]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP63:%.*]] = icmp sgt i32 [[CONV61]], [[CONV62]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP63]], label [[COND_TRUE65:%.*]], label [[COND_FALSE67:%.*]]
+// SIMD-ONLY0:       cond.true65:
+// SIMD-ONLY0-NEXT:    [[TMP33:%.*]] = load i8, ptr [[UCE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV66:%.*]] = zext i8 [[TMP33]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END69:%.*]]
+// SIMD-ONLY0:       cond.false67:
+// SIMD-ONLY0-NEXT:    [[TMP34:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV68:%.*]] = zext i8 [[TMP34]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END69]]
+// SIMD-ONLY0:       cond.end69:
+// SIMD-ONLY0-NEXT:    [[COND70:%.*]] = phi i32 [ [[CONV66]], [[COND_TRUE65]] ], [ [[CONV68]], [[COND_FALSE67]] ]
+// SIMD-ONLY0-NEXT:    [[CONV71:%.*]] = trunc i32 [[COND70]] to i8
+// SIMD-ONLY0-NEXT:    store i8 [[CONV71]], ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP35:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP35]], ptr [[UCV]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP36:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV72:%.*]] = zext i8 [[TMP36]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP37:%.*]] = load i8, ptr [[UCE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV73:%.*]] = zext i8 [[TMP37]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP74:%.*]] = icmp slt i32 [[CONV72]], [[CONV73]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP74]], label [[COND_TRUE76:%.*]], label [[COND_FALSE78:%.*]]
+// SIMD-ONLY0:       cond.true76:
+// SIMD-ONLY0-NEXT:    [[TMP38:%.*]] = load i8, ptr [[UCE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV77:%.*]] = zext i8 [[TMP38]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END80:%.*]]
+// SIMD-ONLY0:       cond.false78:
+// SIMD-ONLY0-NEXT:    [[TMP39:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV79:%.*]] = zext i8 [[TMP39]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END80]]
+// SIMD-ONLY0:       cond.end80:
+// SIMD-ONLY0-NEXT:    [[COND81:%.*]] = phi i32 [ [[CONV77]], [[COND_TRUE76]] ], [ [[CONV79]], [[COND_FALSE78]] ]
+// SIMD-ONLY0-NEXT:    [[CONV82:%.*]] = trunc i32 [[COND81]] to i8
+// SIMD-ONLY0-NEXT:    store i8 [[CONV82]], ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP40:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP40]], ptr [[UCV]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP41:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV83:%.*]] = zext i8 [[TMP41]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP42:%.*]] = load i8, ptr [[UCE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV84:%.*]] = zext i8 [[TMP42]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP85:%.*]] = icmp eq i32 [[CONV83]], [[CONV84]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP85]], label [[COND_TRUE87:%.*]], label [[COND_FALSE89:%.*]]
+// SIMD-ONLY0:       cond.true87:
+// SIMD-ONLY0-NEXT:    [[TMP43:%.*]] = load i8, ptr [[UCD]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV88:%.*]] = zext i8 [[TMP43]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END91:%.*]]
+// SIMD-ONLY0:       cond.false89:
+// SIMD-ONLY0-NEXT:    [[TMP44:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV90:%.*]] = zext i8 [[TMP44]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END91]]
+// SIMD-ONLY0:       cond.end91:
+// SIMD-ONLY0-NEXT:    [[COND92:%.*]] = phi i32 [ [[CONV88]], [[COND_TRUE87]] ], [ [[CONV90]], [[COND_FALSE89]] ]
+// SIMD-ONLY0-NEXT:    [[CONV93:%.*]] = trunc i32 [[COND92]] to i8
+// SIMD-ONLY0-NEXT:    store i8 [[CONV93]], ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP45:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV94:%.*]] = zext i8 [[TMP45]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP46:%.*]] = load i8, ptr [[UCE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV95:%.*]] = zext i8 [[TMP46]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP96:%.*]] = icmp sgt i32 [[CONV94]], [[CONV95]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP96]], label [[COND_TRUE98:%.*]], label [[COND_FALSE100:%.*]]
+// SIMD-ONLY0:       cond.true98:
+// SIMD-ONLY0-NEXT:    [[TMP47:%.*]] = load i8, ptr [[UCE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV99:%.*]] = zext i8 [[TMP47]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END102:%.*]]
+// SIMD-ONLY0:       cond.false100:
+// SIMD-ONLY0-NEXT:    [[TMP48:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV101:%.*]] = zext i8 [[TMP48]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END102]]
+// SIMD-ONLY0:       cond.end102:
+// SIMD-ONLY0-NEXT:    [[COND103:%.*]] = phi i32 [ [[CONV99]], [[COND_TRUE98]] ], [ [[CONV101]], [[COND_FALSE100]] ]
+// SIMD-ONLY0-NEXT:    [[CONV104:%.*]] = trunc i32 [[COND103]] to i8
+// SIMD-ONLY0-NEXT:    store i8 [[CONV104]], ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP49:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP49]], ptr [[UCV]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP50:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV105:%.*]] = zext i8 [[TMP50]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP51:%.*]] = load i8, ptr [[UCE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV106:%.*]] = zext i8 [[TMP51]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP107:%.*]] = icmp slt i32 [[CONV105]], [[CONV106]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP107]], label [[COND_TRUE109:%.*]], label [[COND_FALSE111:%.*]]
+// SIMD-ONLY0:       cond.true109:
+// SIMD-ONLY0-NEXT:    [[TMP52:%.*]] = load i8, ptr [[UCE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV110:%.*]] = zext i8 [[TMP52]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END113:%.*]]
+// SIMD-ONLY0:       cond.false111:
+// SIMD-ONLY0-NEXT:    [[TMP53:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV112:%.*]] = zext i8 [[TMP53]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END113]]
+// SIMD-ONLY0:       cond.end113:
+// SIMD-ONLY0-NEXT:    [[COND114:%.*]] = phi i32 [ [[CONV110]], [[COND_TRUE109]] ], [ [[CONV112]], [[COND_FALSE111]] ]
+// SIMD-ONLY0-NEXT:    [[CONV115:%.*]] = trunc i32 [[COND114]] to i8
+// SIMD-ONLY0-NEXT:    store i8 [[CONV115]], ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP54:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP54]], ptr [[UCV]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP55:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV116:%.*]] = zext i8 [[TMP55]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP56:%.*]] = load i8, ptr [[UCE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV117:%.*]] = zext i8 [[TMP56]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP118:%.*]] = icmp eq i32 [[CONV116]], [[CONV117]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP118]], label [[COND_TRUE120:%.*]], label [[COND_FALSE122:%.*]]
+// SIMD-ONLY0:       cond.true120:
+// SIMD-ONLY0-NEXT:    [[TMP57:%.*]] = load i8, ptr [[UCD]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV121:%.*]] = zext i8 [[TMP57]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END124:%.*]]
+// SIMD-ONLY0:       cond.false122:
+// SIMD-ONLY0-NEXT:    [[TMP58:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV123:%.*]] = zext i8 [[TMP58]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END124]]
+// SIMD-ONLY0:       cond.end124:
+// SIMD-ONLY0-NEXT:    [[COND125:%.*]] = phi i32 [ [[CONV121]], [[COND_TRUE120]] ], [ [[CONV123]], [[COND_FALSE122]] ]
+// SIMD-ONLY0-NEXT:    [[CONV126:%.*]] = trunc i32 [[COND125]] to i8
+// SIMD-ONLY0-NEXT:    store i8 [[CONV126]], ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP59:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP59]], ptr [[UCV]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP60:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP60]], ptr [[UCV]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP61:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV127:%.*]] = zext i8 [[TMP61]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP62:%.*]] = load i8, ptr [[UCE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV128:%.*]] = zext i8 [[TMP62]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP129:%.*]] = icmp sgt i32 [[CONV127]], [[CONV128]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP129]], label [[COND_TRUE131:%.*]], label [[COND_FALSE133:%.*]]
+// SIMD-ONLY0:       cond.true131:
+// SIMD-ONLY0-NEXT:    [[TMP63:%.*]] = load i8, ptr [[UCE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV132:%.*]] = zext i8 [[TMP63]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END135:%.*]]
+// SIMD-ONLY0:       cond.false133:
+// SIMD-ONLY0-NEXT:    [[TMP64:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV134:%.*]] = zext i8 [[TMP64]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END135]]
+// SIMD-ONLY0:       cond.end135:
+// SIMD-ONLY0-NEXT:    [[COND136:%.*]] = phi i32 [ [[CONV132]], [[COND_TRUE131]] ], [ [[CONV134]], [[COND_FALSE133]] ]
+// SIMD-ONLY0-NEXT:    [[CONV137:%.*]] = trunc i32 [[COND136]] to i8
+// SIMD-ONLY0-NEXT:    store i8 [[CONV137]], ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP65:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP65]], ptr [[UCV]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP66:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV138:%.*]] = zext i8 [[TMP66]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP67:%.*]] = load i8, ptr [[UCE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV139:%.*]] = zext i8 [[TMP67]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP140:%.*]] = icmp slt i32 [[CONV138]], [[CONV139]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP140]], label [[COND_TRUE142:%.*]], label [[COND_FALSE144:%.*]]
+// SIMD-ONLY0:       cond.true142:
+// SIMD-ONLY0-NEXT:    [[TMP68:%.*]] = load i8, ptr [[UCE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV143:%.*]] = zext i8 [[TMP68]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END146:%.*]]
+// SIMD-ONLY0:       cond.false144:
+// SIMD-ONLY0-NEXT:    [[TMP69:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV145:%.*]] = zext i8 [[TMP69]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END146]]
+// SIMD-ONLY0:       cond.end146:
+// SIMD-ONLY0-NEXT:    [[COND147:%.*]] = phi i32 [ [[CONV143]], [[COND_TRUE142]] ], [ [[CONV145]], [[COND_FALSE144]] ]
+// SIMD-ONLY0-NEXT:    [[CONV148:%.*]] = trunc i32 [[COND147]] to i8
+// SIMD-ONLY0-NEXT:    store i8 [[CONV148]], ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP70:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP70]], ptr [[UCV]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP71:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV149:%.*]] = zext i8 [[TMP71]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP72:%.*]] = load i8, ptr [[UCE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV150:%.*]] = zext i8 [[TMP72]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP151:%.*]] = icmp eq i32 [[CONV149]], [[CONV150]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP151]], label [[COND_TRUE153:%.*]], label [[COND_FALSE155:%.*]]
+// SIMD-ONLY0:       cond.true153:
+// SIMD-ONLY0-NEXT:    [[TMP73:%.*]] = load i8, ptr [[UCD]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV154:%.*]] = zext i8 [[TMP73]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END157:%.*]]
+// SIMD-ONLY0:       cond.false155:
+// SIMD-ONLY0-NEXT:    [[TMP74:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV156:%.*]] = zext i8 [[TMP74]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END157]]
+// SIMD-ONLY0:       cond.end157:
+// SIMD-ONLY0-NEXT:    [[COND158:%.*]] = phi i32 [ [[CONV154]], [[COND_TRUE153]] ], [ [[CONV156]], [[COND_FALSE155]] ]
+// SIMD-ONLY0-NEXT:    [[CONV159:%.*]] = trunc i32 [[COND158]] to i8
+// SIMD-ONLY0-NEXT:    store i8 [[CONV159]], ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP75:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV160:%.*]] = zext i8 [[TMP75]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP76:%.*]] = load i8, ptr [[UCE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV161:%.*]] = zext i8 [[TMP76]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP162:%.*]] = icmp sgt i32 [[CONV160]], [[CONV161]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP162]], label [[COND_TRUE164:%.*]], label [[COND_FALSE166:%.*]]
+// SIMD-ONLY0:       cond.true164:
+// SIMD-ONLY0-NEXT:    [[TMP77:%.*]] = load i8, ptr [[UCE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV165:%.*]] = zext i8 [[TMP77]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END168:%.*]]
+// SIMD-ONLY0:       cond.false166:
+// SIMD-ONLY0-NEXT:    [[TMP78:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV167:%.*]] = zext i8 [[TMP78]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END168]]
+// SIMD-ONLY0:       cond.end168:
+// SIMD-ONLY0-NEXT:    [[COND169:%.*]] = phi i32 [ [[CONV165]], [[COND_TRUE164]] ], [ [[CONV167]], [[COND_FALSE166]] ]
+// SIMD-ONLY0-NEXT:    [[CONV170:%.*]] = trunc i32 [[COND169]] to i8
+// SIMD-ONLY0-NEXT:    store i8 [[CONV170]], ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP79:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP79]], ptr [[UCV]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP80:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV171:%.*]] = zext i8 [[TMP80]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP81:%.*]] = load i8, ptr [[UCE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV172:%.*]] = zext i8 [[TMP81]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP173:%.*]] = icmp slt i32 [[CONV171]], [[CONV172]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP173]], label [[COND_TRUE175:%.*]], label [[COND_FALSE177:%.*]]
+// SIMD-ONLY0:       cond.true175:
+// SIMD-ONLY0-NEXT:    [[TMP82:%.*]] = load i8, ptr [[UCE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV176:%.*]] = zext i8 [[TMP82]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END179:%.*]]
+// SIMD-ONLY0:       cond.false177:
+// SIMD-ONLY0-NEXT:    [[TMP83:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV178:%.*]] = zext i8 [[TMP83]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END179]]
+// SIMD-ONLY0:       cond.end179:
+// SIMD-ONLY0-NEXT:    [[COND180:%.*]] = phi i32 [ [[CONV176]], [[COND_TRUE175]] ], [ [[CONV178]], [[COND_FALSE177]] ]
+// SIMD-ONLY0-NEXT:    [[CONV181:%.*]] = trunc i32 [[COND180]] to i8
+// SIMD-ONLY0-NEXT:    store i8 [[CONV181]], ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP84:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP84]], ptr [[UCV]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP85:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV182:%.*]] = zext i8 [[TMP85]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP86:%.*]] = load i8, ptr [[UCE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV183:%.*]] = zext i8 [[TMP86]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP184:%.*]] = icmp eq i32 [[CONV182]], [[CONV183]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP184]], label [[COND_TRUE186:%.*]], label [[COND_FALSE188:%.*]]
+// SIMD-ONLY0:       cond.true186:
+// SIMD-ONLY0-NEXT:    [[TMP87:%.*]] = load i8, ptr [[UCD]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV187:%.*]] = zext i8 [[TMP87]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END190:%.*]]
+// SIMD-ONLY0:       cond.false188:
+// SIMD-ONLY0-NEXT:    [[TMP88:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV189:%.*]] = zext i8 [[TMP88]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END190]]
+// SIMD-ONLY0:       cond.end190:
+// SIMD-ONLY0-NEXT:    [[COND191:%.*]] = phi i32 [ [[CONV187]], [[COND_TRUE186]] ], [ [[CONV189]], [[COND_FALSE188]] ]
+// SIMD-ONLY0-NEXT:    [[CONV192:%.*]] = trunc i32 [[COND191]] to i8
+// SIMD-ONLY0-NEXT:    store i8 [[CONV192]], ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP89:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP89]], ptr [[UCV]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP90:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP90]], ptr [[UCV]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP91:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV193:%.*]] = zext i8 [[TMP91]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP92:%.*]] = load i8, ptr [[UCE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV194:%.*]] = zext i8 [[TMP92]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP195:%.*]] = icmp sgt i32 [[CONV193]], [[CONV194]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP195]], label [[COND_TRUE197:%.*]], label [[COND_FALSE199:%.*]]
+// SIMD-ONLY0:       cond.true197:
+// SIMD-ONLY0-NEXT:    [[TMP93:%.*]] = load i8, ptr [[UCE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV198:%.*]] = zext i8 [[TMP93]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END201:%.*]]
+// SIMD-ONLY0:       cond.false199:
+// SIMD-ONLY0-NEXT:    [[TMP94:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV200:%.*]] = zext i8 [[TMP94]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END201]]
+// SIMD-ONLY0:       cond.end201:
+// SIMD-ONLY0-NEXT:    [[COND202:%.*]] = phi i32 [ [[CONV198]], [[COND_TRUE197]] ], [ [[CONV200]], [[COND_FALSE199]] ]
+// SIMD-ONLY0-NEXT:    [[CONV203:%.*]] = trunc i32 [[COND202]] to i8
+// SIMD-ONLY0-NEXT:    store i8 [[CONV203]], ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP95:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP95]], ptr [[UCV]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP96:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV204:%.*]] = zext i8 [[TMP96]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP97:%.*]] = load i8, ptr [[UCE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV205:%.*]] = zext i8 [[TMP97]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP206:%.*]] = icmp slt i32 [[CONV204]], [[CONV205]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP206]], label [[COND_TRUE208:%.*]], label [[COND_FALSE210:%.*]]
+// SIMD-ONLY0:       cond.true208:
+// SIMD-ONLY0-NEXT:    [[TMP98:%.*]] = load i8, ptr [[UCE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV209:%.*]] = zext i8 [[TMP98]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END212:%.*]]
+// SIMD-ONLY0:       cond.false210:
+// SIMD-ONLY0-NEXT:    [[TMP99:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV211:%.*]] = zext i8 [[TMP99]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END212]]
+// SIMD-ONLY0:       cond.end212:
+// SIMD-ONLY0-NEXT:    [[COND213:%.*]] = phi i32 [ [[CONV209]], [[COND_TRUE208]] ], [ [[CONV211]], [[COND_FALSE210]] ]
+// SIMD-ONLY0-NEXT:    [[CONV214:%.*]] = trunc i32 [[COND213]] to i8
+// SIMD-ONLY0-NEXT:    store i8 [[CONV214]], ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP100:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP100]], ptr [[UCV]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP101:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV215:%.*]] = zext i8 [[TMP101]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP102:%.*]] = load i8, ptr [[UCE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV216:%.*]] = zext i8 [[TMP102]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP217:%.*]] = icmp eq i32 [[CONV215]], [[CONV216]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP217]], label [[COND_TRUE219:%.*]], label [[COND_FALSE221:%.*]]
+// SIMD-ONLY0:       cond.true219:
+// SIMD-ONLY0-NEXT:    [[TMP103:%.*]] = load i8, ptr [[UCD]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV220:%.*]] = zext i8 [[TMP103]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END223:%.*]]
+// SIMD-ONLY0:       cond.false221:
+// SIMD-ONLY0-NEXT:    [[TMP104:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV222:%.*]] = zext i8 [[TMP104]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END223]]
+// SIMD-ONLY0:       cond.end223:
+// SIMD-ONLY0-NEXT:    [[COND224:%.*]] = phi i32 [ [[CONV220]], [[COND_TRUE219]] ], [ [[CONV222]], [[COND_FALSE221]] ]
+// SIMD-ONLY0-NEXT:    [[CONV225:%.*]] = trunc i32 [[COND224]] to i8
+// SIMD-ONLY0-NEXT:    store i8 [[CONV225]], ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP105:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV226:%.*]] = zext i8 [[TMP105]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP106:%.*]] = load i8, ptr [[UCE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV227:%.*]] = zext i8 [[TMP106]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP228:%.*]] = icmp sgt i32 [[CONV226]], [[CONV227]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP228]], label [[COND_TRUE230:%.*]], label [[COND_FALSE232:%.*]]
+// SIMD-ONLY0:       cond.true230:
+// SIMD-ONLY0-NEXT:    [[TMP107:%.*]] = load i8, ptr [[UCE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV231:%.*]] = zext i8 [[TMP107]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END234:%.*]]
+// SIMD-ONLY0:       cond.false232:
+// SIMD-ONLY0-NEXT:    [[TMP108:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV233:%.*]] = zext i8 [[TMP108]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END234]]
+// SIMD-ONLY0:       cond.end234:
+// SIMD-ONLY0-NEXT:    [[COND235:%.*]] = phi i32 [ [[CONV231]], [[COND_TRUE230]] ], [ [[CONV233]], [[COND_FALSE232]] ]
+// SIMD-ONLY0-NEXT:    [[CONV236:%.*]] = trunc i32 [[COND235]] to i8
+// SIMD-ONLY0-NEXT:    store i8 [[CONV236]], ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP109:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP109]], ptr [[UCV]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP110:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV237:%.*]] = zext i8 [[TMP110]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP111:%.*]] = load i8, ptr [[UCE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV238:%.*]] = zext i8 [[TMP111]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP239:%.*]] = icmp slt i32 [[CONV237]], [[CONV238]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP239]], label [[COND_TRUE241:%.*]], label [[COND_FALSE243:%.*]]
+// SIMD-ONLY0:       cond.true241:
+// SIMD-ONLY0-NEXT:    [[TMP112:%.*]] = load i8, ptr [[UCE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV242:%.*]] = zext i8 [[TMP112]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END245:%.*]]
+// SIMD-ONLY0:       cond.false243:
+// SIMD-ONLY0-NEXT:    [[TMP113:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV244:%.*]] = zext i8 [[TMP113]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END245]]
+// SIMD-ONLY0:       cond.end245:
+// SIMD-ONLY0-NEXT:    [[COND246:%.*]] = phi i32 [ [[CONV242]], [[COND_TRUE241]] ], [ [[CONV244]], [[COND_FALSE243]] ]
+// SIMD-ONLY0-NEXT:    [[CONV247:%.*]] = trunc i32 [[COND246]] to i8
+// SIMD-ONLY0-NEXT:    store i8 [[CONV247]], ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP114:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP114]], ptr [[UCV]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP115:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV248:%.*]] = zext i8 [[TMP115]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP116:%.*]] = load i8, ptr [[UCE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV249:%.*]] = zext i8 [[TMP116]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP250:%.*]] = icmp eq i32 [[CONV248]], [[CONV249]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP250]], label [[COND_TRUE252:%.*]], label [[COND_FALSE254:%.*]]
+// SIMD-ONLY0:       cond.true252:
+// SIMD-ONLY0-NEXT:    [[TMP117:%.*]] = load i8, ptr [[UCD]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV253:%.*]] = zext i8 [[TMP117]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END256:%.*]]
+// SIMD-ONLY0:       cond.false254:
+// SIMD-ONLY0-NEXT:    [[TMP118:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV255:%.*]] = zext i8 [[TMP118]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END256]]
+// SIMD-ONLY0:       cond.end256:
+// SIMD-ONLY0-NEXT:    [[COND257:%.*]] = phi i32 [ [[CONV253]], [[COND_TRUE252]] ], [ [[CONV255]], [[COND_FALSE254]] ]
+// SIMD-ONLY0-NEXT:    [[CONV258:%.*]] = trunc i32 [[COND257]] to i8
+// SIMD-ONLY0-NEXT:    store i8 [[CONV258]], ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP119:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP119]], ptr [[UCV]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP120:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP120]], ptr [[UCV]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP121:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV259:%.*]] = zext i8 [[TMP121]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP122:%.*]] = load i8, ptr [[UCE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV260:%.*]] = zext i8 [[TMP122]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP261:%.*]] = icmp sgt i32 [[CONV259]], [[CONV260]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP261]], label [[COND_TRUE263:%.*]], label [[COND_FALSE265:%.*]]
+// SIMD-ONLY0:       cond.true263:
+// SIMD-ONLY0-NEXT:    [[TMP123:%.*]] = load i8, ptr [[UCE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV264:%.*]] = zext i8 [[TMP123]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END267:%.*]]
+// SIMD-ONLY0:       cond.false265:
+// SIMD-ONLY0-NEXT:    [[TMP124:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV266:%.*]] = zext i8 [[TMP124]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END267]]
+// SIMD-ONLY0:       cond.end267:
+// SIMD-ONLY0-NEXT:    [[COND268:%.*]] = phi i32 [ [[CONV264]], [[COND_TRUE263]] ], [ [[CONV266]], [[COND_FALSE265]] ]
+// SIMD-ONLY0-NEXT:    [[CONV269:%.*]] = trunc i32 [[COND268]] to i8
+// SIMD-ONLY0-NEXT:    store i8 [[CONV269]], ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP125:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP125]], ptr [[UCV]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP126:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV270:%.*]] = zext i8 [[TMP126]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP127:%.*]] = load i8, ptr [[UCE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV271:%.*]] = zext i8 [[TMP127]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP272:%.*]] = icmp slt i32 [[CONV270]], [[CONV271]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP272]], label [[COND_TRUE274:%.*]], label [[COND_FALSE276:%.*]]
+// SIMD-ONLY0:       cond.true274:
+// SIMD-ONLY0-NEXT:    [[TMP128:%.*]] = load i8, ptr [[UCE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV275:%.*]] = zext i8 [[TMP128]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END278:%.*]]
+// SIMD-ONLY0:       cond.false276:
+// SIMD-ONLY0-NEXT:    [[TMP129:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV277:%.*]] = zext i8 [[TMP129]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END278]]
+// SIMD-ONLY0:       cond.end278:
+// SIMD-ONLY0-NEXT:    [[COND279:%.*]] = phi i32 [ [[CONV275]], [[COND_TRUE274]] ], [ [[CONV277]], [[COND_FALSE276]] ]
+// SIMD-ONLY0-NEXT:    [[CONV280:%.*]] = trunc i32 [[COND279]] to i8
+// SIMD-ONLY0-NEXT:    store i8 [[CONV280]], ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP130:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP130]], ptr [[UCV]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP131:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV281:%.*]] = zext i8 [[TMP131]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP132:%.*]] = load i8, ptr [[UCE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV282:%.*]] = zext i8 [[TMP132]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP283:%.*]] = icmp eq i32 [[CONV281]], [[CONV282]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP283]], label [[COND_TRUE285:%.*]], label [[COND_FALSE287:%.*]]
+// SIMD-ONLY0:       cond.true285:
+// SIMD-ONLY0-NEXT:    [[TMP133:%.*]] = load i8, ptr [[UCD]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV286:%.*]] = zext i8 [[TMP133]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END289:%.*]]
+// SIMD-ONLY0:       cond.false287:
+// SIMD-ONLY0-NEXT:    [[TMP134:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV288:%.*]] = zext i8 [[TMP134]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END289]]
+// SIMD-ONLY0:       cond.end289:
+// SIMD-ONLY0-NEXT:    [[COND290:%.*]] = phi i32 [ [[CONV286]], [[COND_TRUE285]] ], [ [[CONV288]], [[COND_FALSE287]] ]
+// SIMD-ONLY0-NEXT:    [[CONV291:%.*]] = trunc i32 [[COND290]] to i8
+// SIMD-ONLY0-NEXT:    store i8 [[CONV291]], ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP135:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV292:%.*]] = zext i8 [[TMP135]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP136:%.*]] = load i8, ptr [[UCE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV293:%.*]] = zext i8 [[TMP136]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP294:%.*]] = icmp sgt i32 [[CONV292]], [[CONV293]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP294]], label [[COND_TRUE296:%.*]], label [[COND_FALSE298:%.*]]
+// SIMD-ONLY0:       cond.true296:
+// SIMD-ONLY0-NEXT:    [[TMP137:%.*]] = load i8, ptr [[UCE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV297:%.*]] = zext i8 [[TMP137]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END300:%.*]]
+// SIMD-ONLY0:       cond.false298:
+// SIMD-ONLY0-NEXT:    [[TMP138:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV299:%.*]] = zext i8 [[TMP138]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END300]]
+// SIMD-ONLY0:       cond.end300:
+// SIMD-ONLY0-NEXT:    [[COND301:%.*]] = phi i32 [ [[CONV297]], [[COND_TRUE296]] ], [ [[CONV299]], [[COND_FALSE298]] ]
+// SIMD-ONLY0-NEXT:    [[CONV302:%.*]] = trunc i32 [[COND301]] to i8
+// SIMD-ONLY0-NEXT:    store i8 [[CONV302]], ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP139:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP139]], ptr [[UCV]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP140:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV303:%.*]] = zext i8 [[TMP140]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP141:%.*]] = load i8, ptr [[UCE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV304:%.*]] = zext i8 [[TMP141]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP305:%.*]] = icmp slt i32 [[CONV303]], [[CONV304]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP305]], label [[COND_TRUE307:%.*]], label [[COND_FALSE309:%.*]]
+// SIMD-ONLY0:       cond.true307:
+// SIMD-ONLY0-NEXT:    [[TMP142:%.*]] = load i8, ptr [[UCE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV308:%.*]] = zext i8 [[TMP142]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END311:%.*]]
+// SIMD-ONLY0:       cond.false309:
+// SIMD-ONLY0-NEXT:    [[TMP143:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV310:%.*]] = zext i8 [[TMP143]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END311]]
+// SIMD-ONLY0:       cond.end311:
+// SIMD-ONLY0-NEXT:    [[COND312:%.*]] = phi i32 [ [[CONV308]], [[COND_TRUE307]] ], [ [[CONV310]], [[COND_FALSE309]] ]
+// SIMD-ONLY0-NEXT:    [[CONV313:%.*]] = trunc i32 [[COND312]] to i8
+// SIMD-ONLY0-NEXT:    store i8 [[CONV313]], ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP144:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP144]], ptr [[UCV]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP145:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV314:%.*]] = zext i8 [[TMP145]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP146:%.*]] = load i8, ptr [[UCE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV315:%.*]] = zext i8 [[TMP146]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP316:%.*]] = icmp eq i32 [[CONV314]], [[CONV315]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP316]], label [[COND_TRUE318:%.*]], label [[COND_FALSE320:%.*]]
+// SIMD-ONLY0:       cond.true318:
+// SIMD-ONLY0-NEXT:    [[TMP147:%.*]] = load i8, ptr [[UCD]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV319:%.*]] = zext i8 [[TMP147]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END322:%.*]]
+// SIMD-ONLY0:       cond.false320:
+// SIMD-ONLY0-NEXT:    [[TMP148:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV321:%.*]] = zext i8 [[TMP148]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END322]]
+// SIMD-ONLY0:       cond.end322:
+// SIMD-ONLY0-NEXT:    [[COND323:%.*]] = phi i32 [ [[CONV319]], [[COND_TRUE318]] ], [ [[CONV321]], [[COND_FALSE320]] ]
+// SIMD-ONLY0-NEXT:    [[CONV324:%.*]] = trunc i32 [[COND323]] to i8
+// SIMD-ONLY0-NEXT:    store i8 [[CONV324]], ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP149:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP149]], ptr [[UCV]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP150:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP150]], ptr [[UCV]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP151:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV325:%.*]] = zext i8 [[TMP151]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP152:%.*]] = load i8, ptr [[UCE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV326:%.*]] = zext i8 [[TMP152]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP327:%.*]] = icmp sgt i32 [[CONV325]], [[CONV326]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP327]], label [[COND_TRUE329:%.*]], label [[COND_FALSE331:%.*]]
+// SIMD-ONLY0:       cond.true329:
+// SIMD-ONLY0-NEXT:    [[TMP153:%.*]] = load i8, ptr [[UCE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV330:%.*]] = zext i8 [[TMP153]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END333:%.*]]
+// SIMD-ONLY0:       cond.false331:
+// SIMD-ONLY0-NEXT:    [[TMP154:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV332:%.*]] = zext i8 [[TMP154]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END333]]
+// SIMD-ONLY0:       cond.end333:
+// SIMD-ONLY0-NEXT:    [[COND334:%.*]] = phi i32 [ [[CONV330]], [[COND_TRUE329]] ], [ [[CONV332]], [[COND_FALSE331]] ]
+// SIMD-ONLY0-NEXT:    [[CONV335:%.*]] = trunc i32 [[COND334]] to i8
+// SIMD-ONLY0-NEXT:    store i8 [[CONV335]], ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP155:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP155]], ptr [[UCV]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP156:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV336:%.*]] = zext i8 [[TMP156]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP157:%.*]] = load i8, ptr [[UCE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV337:%.*]] = zext i8 [[TMP157]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP338:%.*]] = icmp slt i32 [[CONV336]], [[CONV337]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP338]], label [[COND_TRUE340:%.*]], label [[COND_FALSE342:%.*]]
+// SIMD-ONLY0:       cond.true340:
+// SIMD-ONLY0-NEXT:    [[TMP158:%.*]] = load i8, ptr [[UCE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV341:%.*]] = zext i8 [[TMP158]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END344:%.*]]
+// SIMD-ONLY0:       cond.false342:
+// SIMD-ONLY0-NEXT:    [[TMP159:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV343:%.*]] = zext i8 [[TMP159]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END344]]
+// SIMD-ONLY0:       cond.end344:
+// SIMD-ONLY0-NEXT:    [[COND345:%.*]] = phi i32 [ [[CONV341]], [[COND_TRUE340]] ], [ [[CONV343]], [[COND_FALSE342]] ]
+// SIMD-ONLY0-NEXT:    [[CONV346:%.*]] = trunc i32 [[COND345]] to i8
+// SIMD-ONLY0-NEXT:    store i8 [[CONV346]], ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP160:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP160]], ptr [[UCV]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP161:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV347:%.*]] = zext i8 [[TMP161]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP162:%.*]] = load i8, ptr [[UCE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV348:%.*]] = zext i8 [[TMP162]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP349:%.*]] = icmp eq i32 [[CONV347]], [[CONV348]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP349]], label [[COND_TRUE351:%.*]], label [[COND_FALSE353:%.*]]
+// SIMD-ONLY0:       cond.true351:
+// SIMD-ONLY0-NEXT:    [[TMP163:%.*]] = load i8, ptr [[UCD]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV352:%.*]] = zext i8 [[TMP163]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END355:%.*]]
+// SIMD-ONLY0:       cond.false353:
+// SIMD-ONLY0-NEXT:    [[TMP164:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV354:%.*]] = zext i8 [[TMP164]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END355]]
+// SIMD-ONLY0:       cond.end355:
+// SIMD-ONLY0-NEXT:    [[COND356:%.*]] = phi i32 [ [[CONV352]], [[COND_TRUE351]] ], [ [[CONV354]], [[COND_FALSE353]] ]
+// SIMD-ONLY0-NEXT:    [[CONV357:%.*]] = trunc i32 [[COND356]] to i8
+// SIMD-ONLY0-NEXT:    store i8 [[CONV357]], ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP165:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV358:%.*]] = zext i8 [[TMP165]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP166:%.*]] = load i8, ptr [[UCE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV359:%.*]] = zext i8 [[TMP166]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP360:%.*]] = icmp sgt i32 [[CONV358]], [[CONV359]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP360]], label [[COND_TRUE362:%.*]], label [[COND_FALSE364:%.*]]
+// SIMD-ONLY0:       cond.true362:
+// SIMD-ONLY0-NEXT:    [[TMP167:%.*]] = load i8, ptr [[UCE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV363:%.*]] = zext i8 [[TMP167]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END366:%.*]]
+// SIMD-ONLY0:       cond.false364:
+// SIMD-ONLY0-NEXT:    [[TMP168:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV365:%.*]] = zext i8 [[TMP168]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END366]]
+// SIMD-ONLY0:       cond.end366:
+// SIMD-ONLY0-NEXT:    [[COND367:%.*]] = phi i32 [ [[CONV363]], [[COND_TRUE362]] ], [ [[CONV365]], [[COND_FALSE364]] ]
+// SIMD-ONLY0-NEXT:    [[CONV368:%.*]] = trunc i32 [[COND367]] to i8
+// SIMD-ONLY0-NEXT:    store i8 [[CONV368]], ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP169:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP169]], ptr [[UCV]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP170:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV369:%.*]] = zext i8 [[TMP170]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP171:%.*]] = load i8, ptr [[UCE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV370:%.*]] = zext i8 [[TMP171]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP371:%.*]] = icmp slt i32 [[CONV369]], [[CONV370]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP371]], label [[COND_TRUE373:%.*]], label [[COND_FALSE375:%.*]]
+// SIMD-ONLY0:       cond.true373:
+// SIMD-ONLY0-NEXT:    [[TMP172:%.*]] = load i8, ptr [[UCE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV374:%.*]] = zext i8 [[TMP172]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END377:%.*]]
+// SIMD-ONLY0:       cond.false375:
+// SIMD-ONLY0-NEXT:    [[TMP173:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV376:%.*]] = zext i8 [[TMP173]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END377]]
+// SIMD-ONLY0:       cond.end377:
+// SIMD-ONLY0-NEXT:    [[COND378:%.*]] = phi i32 [ [[CONV374]], [[COND_TRUE373]] ], [ [[CONV376]], [[COND_FALSE375]] ]
+// SIMD-ONLY0-NEXT:    [[CONV379:%.*]] = trunc i32 [[COND378]] to i8
+// SIMD-ONLY0-NEXT:    store i8 [[CONV379]], ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP174:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP174]], ptr [[UCV]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP175:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV380:%.*]] = zext i8 [[TMP175]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP176:%.*]] = load i8, ptr [[UCE]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV381:%.*]] = zext i8 [[TMP176]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP382:%.*]] = icmp eq i32 [[CONV380]], [[CONV381]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP382]], label [[COND_TRUE384:%.*]], label [[COND_FALSE386:%.*]]
+// SIMD-ONLY0:       cond.true384:
+// SIMD-ONLY0-NEXT:    [[TMP177:%.*]] = load i8, ptr [[UCD]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV385:%.*]] = zext i8 [[TMP177]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END388:%.*]]
+// SIMD-ONLY0:       cond.false386:
+// SIMD-ONLY0-NEXT:    [[TMP178:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    [[CONV387:%.*]] = zext i8 [[TMP178]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END388]]
+// SIMD-ONLY0:       cond.end388:
+// SIMD-ONLY0-NEXT:    [[COND389:%.*]] = phi i32 [ [[CONV385]], [[COND_TRUE384]] ], [ [[CONV387]], [[COND_FALSE386]] ]
+// SIMD-ONLY0-NEXT:    [[CONV390:%.*]] = trunc i32 [[COND389]] to i8
+// SIMD-ONLY0-NEXT:    store i8 [[CONV390]], ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP179:%.*]] = load i8, ptr [[UCX]], align 1
+// SIMD-ONLY0-NEXT:    store i8 [[TMP179]], ptr [[UCV]], align 1
+// SIMD-ONLY0-NEXT:    [[TMP180:%.*]] = load i8, ptr [[UCV]], align 1
+// SIMD-ONLY0-NEXT:    ret i8 [[TMP180]]
+//
+//
+// SIMD-ONLY0-LABEL: @sxevd(
+// SIMD-ONLY0-NEXT:  entry:
+// SIMD-ONLY0-NEXT:    [[SX:%.*]] = alloca i16, align 2
+// SIMD-ONLY0-NEXT:    [[SV:%.*]] = alloca i16, align 2
+// SIMD-ONLY0-NEXT:    [[SE:%.*]] = alloca i16, align 2
+// SIMD-ONLY0-NEXT:    [[SD:%.*]] = alloca i16, align 2
+// SIMD-ONLY0-NEXT:    [[TMP0:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP0]], ptr [[SV]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP1:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV:%.*]] = sext i16 [[TMP1]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP2:%.*]] = load i16, ptr [[SE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1:%.*]] = sext i16 [[TMP2]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP:%.*]] = icmp sgt i32 [[CONV]], [[CONV1]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
+// SIMD-ONLY0:       cond.true:
+// SIMD-ONLY0-NEXT:    [[TMP3:%.*]] = load i16, ptr [[SE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV3:%.*]] = sext i16 [[TMP3]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END:%.*]]
+// SIMD-ONLY0:       cond.false:
+// SIMD-ONLY0-NEXT:    [[TMP4:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV4:%.*]] = sext i16 [[TMP4]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END]]
+// SIMD-ONLY0:       cond.end:
+// SIMD-ONLY0-NEXT:    [[COND:%.*]] = phi i32 [ [[CONV3]], [[COND_TRUE]] ], [ [[CONV4]], [[COND_FALSE]] ]
+// SIMD-ONLY0-NEXT:    [[CONV5:%.*]] = trunc i32 [[COND]] to i16
+// SIMD-ONLY0-NEXT:    store i16 [[CONV5]], ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP5:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP5]], ptr [[SV]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP6:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV6:%.*]] = sext i16 [[TMP6]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP7:%.*]] = load i16, ptr [[SE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV7:%.*]] = sext i16 [[TMP7]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP8:%.*]] = icmp slt i32 [[CONV6]], [[CONV7]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP8]], label [[COND_TRUE10:%.*]], label [[COND_FALSE12:%.*]]
+// SIMD-ONLY0:       cond.true10:
+// SIMD-ONLY0-NEXT:    [[TMP8:%.*]] = load i16, ptr [[SE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV11:%.*]] = sext i16 [[TMP8]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END14:%.*]]
+// SIMD-ONLY0:       cond.false12:
+// SIMD-ONLY0-NEXT:    [[TMP9:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV13:%.*]] = sext i16 [[TMP9]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END14]]
+// SIMD-ONLY0:       cond.end14:
+// SIMD-ONLY0-NEXT:    [[COND15:%.*]] = phi i32 [ [[CONV11]], [[COND_TRUE10]] ], [ [[CONV13]], [[COND_FALSE12]] ]
+// SIMD-ONLY0-NEXT:    [[CONV16:%.*]] = trunc i32 [[COND15]] to i16
+// SIMD-ONLY0-NEXT:    store i16 [[CONV16]], ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP10:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP10]], ptr [[SV]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP11:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV17:%.*]] = sext i16 [[TMP11]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP12:%.*]] = load i16, ptr [[SE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV18:%.*]] = sext i16 [[TMP12]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP19:%.*]] = icmp eq i32 [[CONV17]], [[CONV18]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP19]], label [[COND_TRUE21:%.*]], label [[COND_FALSE23:%.*]]
+// SIMD-ONLY0:       cond.true21:
+// SIMD-ONLY0-NEXT:    [[TMP13:%.*]] = load i16, ptr [[SD]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV22:%.*]] = sext i16 [[TMP13]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END25:%.*]]
+// SIMD-ONLY0:       cond.false23:
+// SIMD-ONLY0-NEXT:    [[TMP14:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV24:%.*]] = sext i16 [[TMP14]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END25]]
+// SIMD-ONLY0:       cond.end25:
+// SIMD-ONLY0-NEXT:    [[COND26:%.*]] = phi i32 [ [[CONV22]], [[COND_TRUE21]] ], [ [[CONV24]], [[COND_FALSE23]] ]
+// SIMD-ONLY0-NEXT:    [[CONV27:%.*]] = trunc i32 [[COND26]] to i16
+// SIMD-ONLY0-NEXT:    store i16 [[CONV27]], ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP15:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV28:%.*]] = sext i16 [[TMP15]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP16:%.*]] = load i16, ptr [[SE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV29:%.*]] = sext i16 [[TMP16]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP30:%.*]] = icmp sgt i32 [[CONV28]], [[CONV29]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP30]], label [[COND_TRUE32:%.*]], label [[COND_FALSE34:%.*]]
+// SIMD-ONLY0:       cond.true32:
+// SIMD-ONLY0-NEXT:    [[TMP17:%.*]] = load i16, ptr [[SE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV33:%.*]] = sext i16 [[TMP17]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END36:%.*]]
+// SIMD-ONLY0:       cond.false34:
+// SIMD-ONLY0-NEXT:    [[TMP18:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV35:%.*]] = sext i16 [[TMP18]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END36]]
+// SIMD-ONLY0:       cond.end36:
+// SIMD-ONLY0-NEXT:    [[COND37:%.*]] = phi i32 [ [[CONV33]], [[COND_TRUE32]] ], [ [[CONV35]], [[COND_FALSE34]] ]
+// SIMD-ONLY0-NEXT:    [[CONV38:%.*]] = trunc i32 [[COND37]] to i16
+// SIMD-ONLY0-NEXT:    store i16 [[CONV38]], ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP19:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP19]], ptr [[SV]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP20:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV39:%.*]] = sext i16 [[TMP20]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP21:%.*]] = load i16, ptr [[SE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV40:%.*]] = sext i16 [[TMP21]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP41:%.*]] = icmp slt i32 [[CONV39]], [[CONV40]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP41]], label [[COND_TRUE43:%.*]], label [[COND_FALSE45:%.*]]
+// SIMD-ONLY0:       cond.true43:
+// SIMD-ONLY0-NEXT:    [[TMP22:%.*]] = load i16, ptr [[SE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV44:%.*]] = sext i16 [[TMP22]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END47:%.*]]
+// SIMD-ONLY0:       cond.false45:
+// SIMD-ONLY0-NEXT:    [[TMP23:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV46:%.*]] = sext i16 [[TMP23]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END47]]
+// SIMD-ONLY0:       cond.end47:
+// SIMD-ONLY0-NEXT:    [[COND48:%.*]] = phi i32 [ [[CONV44]], [[COND_TRUE43]] ], [ [[CONV46]], [[COND_FALSE45]] ]
+// SIMD-ONLY0-NEXT:    [[CONV49:%.*]] = trunc i32 [[COND48]] to i16
+// SIMD-ONLY0-NEXT:    store i16 [[CONV49]], ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP24:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP24]], ptr [[SV]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP25:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV50:%.*]] = sext i16 [[TMP25]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP26:%.*]] = load i16, ptr [[SE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV51:%.*]] = sext i16 [[TMP26]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP52:%.*]] = icmp eq i32 [[CONV50]], [[CONV51]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP52]], label [[COND_TRUE54:%.*]], label [[COND_FALSE56:%.*]]
+// SIMD-ONLY0:       cond.true54:
+// SIMD-ONLY0-NEXT:    [[TMP27:%.*]] = load i16, ptr [[SD]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV55:%.*]] = sext i16 [[TMP27]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END58:%.*]]
+// SIMD-ONLY0:       cond.false56:
+// SIMD-ONLY0-NEXT:    [[TMP28:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV57:%.*]] = sext i16 [[TMP28]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END58]]
+// SIMD-ONLY0:       cond.end58:
+// SIMD-ONLY0-NEXT:    [[COND59:%.*]] = phi i32 [ [[CONV55]], [[COND_TRUE54]] ], [ [[CONV57]], [[COND_FALSE56]] ]
+// SIMD-ONLY0-NEXT:    [[CONV60:%.*]] = trunc i32 [[COND59]] to i16
+// SIMD-ONLY0-NEXT:    store i16 [[CONV60]], ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP29:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP29]], ptr [[SV]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP30:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP30]], ptr [[SV]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP31:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV61:%.*]] = sext i16 [[TMP31]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP32:%.*]] = load i16, ptr [[SE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV62:%.*]] = sext i16 [[TMP32]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP63:%.*]] = icmp sgt i32 [[CONV61]], [[CONV62]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP63]], label [[COND_TRUE65:%.*]], label [[COND_FALSE67:%.*]]
+// SIMD-ONLY0:       cond.true65:
+// SIMD-ONLY0-NEXT:    [[TMP33:%.*]] = load i16, ptr [[SE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV66:%.*]] = sext i16 [[TMP33]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END69:%.*]]
+// SIMD-ONLY0:       cond.false67:
+// SIMD-ONLY0-NEXT:    [[TMP34:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV68:%.*]] = sext i16 [[TMP34]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END69]]
+// SIMD-ONLY0:       cond.end69:
+// SIMD-ONLY0-NEXT:    [[COND70:%.*]] = phi i32 [ [[CONV66]], [[COND_TRUE65]] ], [ [[CONV68]], [[COND_FALSE67]] ]
+// SIMD-ONLY0-NEXT:    [[CONV71:%.*]] = trunc i32 [[COND70]] to i16
+// SIMD-ONLY0-NEXT:    store i16 [[CONV71]], ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP35:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP35]], ptr [[SV]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP36:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV72:%.*]] = sext i16 [[TMP36]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP37:%.*]] = load i16, ptr [[SE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV73:%.*]] = sext i16 [[TMP37]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP74:%.*]] = icmp slt i32 [[CONV72]], [[CONV73]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP74]], label [[COND_TRUE76:%.*]], label [[COND_FALSE78:%.*]]
+// SIMD-ONLY0:       cond.true76:
+// SIMD-ONLY0-NEXT:    [[TMP38:%.*]] = load i16, ptr [[SE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV77:%.*]] = sext i16 [[TMP38]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END80:%.*]]
+// SIMD-ONLY0:       cond.false78:
+// SIMD-ONLY0-NEXT:    [[TMP39:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV79:%.*]] = sext i16 [[TMP39]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END80]]
+// SIMD-ONLY0:       cond.end80:
+// SIMD-ONLY0-NEXT:    [[COND81:%.*]] = phi i32 [ [[CONV77]], [[COND_TRUE76]] ], [ [[CONV79]], [[COND_FALSE78]] ]
+// SIMD-ONLY0-NEXT:    [[CONV82:%.*]] = trunc i32 [[COND81]] to i16
+// SIMD-ONLY0-NEXT:    store i16 [[CONV82]], ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP40:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP40]], ptr [[SV]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP41:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV83:%.*]] = sext i16 [[TMP41]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP42:%.*]] = load i16, ptr [[SE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV84:%.*]] = sext i16 [[TMP42]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP85:%.*]] = icmp eq i32 [[CONV83]], [[CONV84]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP85]], label [[COND_TRUE87:%.*]], label [[COND_FALSE89:%.*]]
+// SIMD-ONLY0:       cond.true87:
+// SIMD-ONLY0-NEXT:    [[TMP43:%.*]] = load i16, ptr [[SD]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV88:%.*]] = sext i16 [[TMP43]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END91:%.*]]
+// SIMD-ONLY0:       cond.false89:
+// SIMD-ONLY0-NEXT:    [[TMP44:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV90:%.*]] = sext i16 [[TMP44]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END91]]
+// SIMD-ONLY0:       cond.end91:
+// SIMD-ONLY0-NEXT:    [[COND92:%.*]] = phi i32 [ [[CONV88]], [[COND_TRUE87]] ], [ [[CONV90]], [[COND_FALSE89]] ]
+// SIMD-ONLY0-NEXT:    [[CONV93:%.*]] = trunc i32 [[COND92]] to i16
+// SIMD-ONLY0-NEXT:    store i16 [[CONV93]], ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP45:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV94:%.*]] = sext i16 [[TMP45]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP46:%.*]] = load i16, ptr [[SE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV95:%.*]] = sext i16 [[TMP46]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP96:%.*]] = icmp sgt i32 [[CONV94]], [[CONV95]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP96]], label [[COND_TRUE98:%.*]], label [[COND_FALSE100:%.*]]
+// SIMD-ONLY0:       cond.true98:
+// SIMD-ONLY0-NEXT:    [[TMP47:%.*]] = load i16, ptr [[SE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV99:%.*]] = sext i16 [[TMP47]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END102:%.*]]
+// SIMD-ONLY0:       cond.false100:
+// SIMD-ONLY0-NEXT:    [[TMP48:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV101:%.*]] = sext i16 [[TMP48]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END102]]
+// SIMD-ONLY0:       cond.end102:
+// SIMD-ONLY0-NEXT:    [[COND103:%.*]] = phi i32 [ [[CONV99]], [[COND_TRUE98]] ], [ [[CONV101]], [[COND_FALSE100]] ]
+// SIMD-ONLY0-NEXT:    [[CONV104:%.*]] = trunc i32 [[COND103]] to i16
+// SIMD-ONLY0-NEXT:    store i16 [[CONV104]], ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP49:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP49]], ptr [[SV]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP50:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV105:%.*]] = sext i16 [[TMP50]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP51:%.*]] = load i16, ptr [[SE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV106:%.*]] = sext i16 [[TMP51]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP107:%.*]] = icmp slt i32 [[CONV105]], [[CONV106]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP107]], label [[COND_TRUE109:%.*]], label [[COND_FALSE111:%.*]]
+// SIMD-ONLY0:       cond.true109:
+// SIMD-ONLY0-NEXT:    [[TMP52:%.*]] = load i16, ptr [[SE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV110:%.*]] = sext i16 [[TMP52]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END113:%.*]]
+// SIMD-ONLY0:       cond.false111:
+// SIMD-ONLY0-NEXT:    [[TMP53:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV112:%.*]] = sext i16 [[TMP53]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END113]]
+// SIMD-ONLY0:       cond.end113:
+// SIMD-ONLY0-NEXT:    [[COND114:%.*]] = phi i32 [ [[CONV110]], [[COND_TRUE109]] ], [ [[CONV112]], [[COND_FALSE111]] ]
+// SIMD-ONLY0-NEXT:    [[CONV115:%.*]] = trunc i32 [[COND114]] to i16
+// SIMD-ONLY0-NEXT:    store i16 [[CONV115]], ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP54:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP54]], ptr [[SV]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP55:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV116:%.*]] = sext i16 [[TMP55]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP56:%.*]] = load i16, ptr [[SE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV117:%.*]] = sext i16 [[TMP56]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP118:%.*]] = icmp eq i32 [[CONV116]], [[CONV117]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP118]], label [[COND_TRUE120:%.*]], label [[COND_FALSE122:%.*]]
+// SIMD-ONLY0:       cond.true120:
+// SIMD-ONLY0-NEXT:    [[TMP57:%.*]] = load i16, ptr [[SD]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV121:%.*]] = sext i16 [[TMP57]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END124:%.*]]
+// SIMD-ONLY0:       cond.false122:
+// SIMD-ONLY0-NEXT:    [[TMP58:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV123:%.*]] = sext i16 [[TMP58]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END124]]
+// SIMD-ONLY0:       cond.end124:
+// SIMD-ONLY0-NEXT:    [[COND125:%.*]] = phi i32 [ [[CONV121]], [[COND_TRUE120]] ], [ [[CONV123]], [[COND_FALSE122]] ]
+// SIMD-ONLY0-NEXT:    [[CONV126:%.*]] = trunc i32 [[COND125]] to i16
+// SIMD-ONLY0-NEXT:    store i16 [[CONV126]], ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP59:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP59]], ptr [[SV]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP60:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP60]], ptr [[SV]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP61:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV127:%.*]] = sext i16 [[TMP61]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP62:%.*]] = load i16, ptr [[SE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV128:%.*]] = sext i16 [[TMP62]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP129:%.*]] = icmp sgt i32 [[CONV127]], [[CONV128]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP129]], label [[COND_TRUE131:%.*]], label [[COND_FALSE133:%.*]]
+// SIMD-ONLY0:       cond.true131:
+// SIMD-ONLY0-NEXT:    [[TMP63:%.*]] = load i16, ptr [[SE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV132:%.*]] = sext i16 [[TMP63]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END135:%.*]]
+// SIMD-ONLY0:       cond.false133:
+// SIMD-ONLY0-NEXT:    [[TMP64:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV134:%.*]] = sext i16 [[TMP64]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END135]]
+// SIMD-ONLY0:       cond.end135:
+// SIMD-ONLY0-NEXT:    [[COND136:%.*]] = phi i32 [ [[CONV132]], [[COND_TRUE131]] ], [ [[CONV134]], [[COND_FALSE133]] ]
+// SIMD-ONLY0-NEXT:    [[CONV137:%.*]] = trunc i32 [[COND136]] to i16
+// SIMD-ONLY0-NEXT:    store i16 [[CONV137]], ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP65:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP65]], ptr [[SV]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP66:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV138:%.*]] = sext i16 [[TMP66]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP67:%.*]] = load i16, ptr [[SE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV139:%.*]] = sext i16 [[TMP67]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP140:%.*]] = icmp slt i32 [[CONV138]], [[CONV139]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP140]], label [[COND_TRUE142:%.*]], label [[COND_FALSE144:%.*]]
+// SIMD-ONLY0:       cond.true142:
+// SIMD-ONLY0-NEXT:    [[TMP68:%.*]] = load i16, ptr [[SE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV143:%.*]] = sext i16 [[TMP68]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END146:%.*]]
+// SIMD-ONLY0:       cond.false144:
+// SIMD-ONLY0-NEXT:    [[TMP69:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV145:%.*]] = sext i16 [[TMP69]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END146]]
+// SIMD-ONLY0:       cond.end146:
+// SIMD-ONLY0-NEXT:    [[COND147:%.*]] = phi i32 [ [[CONV143]], [[COND_TRUE142]] ], [ [[CONV145]], [[COND_FALSE144]] ]
+// SIMD-ONLY0-NEXT:    [[CONV148:%.*]] = trunc i32 [[COND147]] to i16
+// SIMD-ONLY0-NEXT:    store i16 [[CONV148]], ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP70:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP70]], ptr [[SV]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP71:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV149:%.*]] = sext i16 [[TMP71]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP72:%.*]] = load i16, ptr [[SE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV150:%.*]] = sext i16 [[TMP72]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP151:%.*]] = icmp eq i32 [[CONV149]], [[CONV150]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP151]], label [[COND_TRUE153:%.*]], label [[COND_FALSE155:%.*]]
+// SIMD-ONLY0:       cond.true153:
+// SIMD-ONLY0-NEXT:    [[TMP73:%.*]] = load i16, ptr [[SD]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV154:%.*]] = sext i16 [[TMP73]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END157:%.*]]
+// SIMD-ONLY0:       cond.false155:
+// SIMD-ONLY0-NEXT:    [[TMP74:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV156:%.*]] = sext i16 [[TMP74]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END157]]
+// SIMD-ONLY0:       cond.end157:
+// SIMD-ONLY0-NEXT:    [[COND158:%.*]] = phi i32 [ [[CONV154]], [[COND_TRUE153]] ], [ [[CONV156]], [[COND_FALSE155]] ]
+// SIMD-ONLY0-NEXT:    [[CONV159:%.*]] = trunc i32 [[COND158]] to i16
+// SIMD-ONLY0-NEXT:    store i16 [[CONV159]], ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP75:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV160:%.*]] = sext i16 [[TMP75]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP76:%.*]] = load i16, ptr [[SE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV161:%.*]] = sext i16 [[TMP76]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP162:%.*]] = icmp sgt i32 [[CONV160]], [[CONV161]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP162]], label [[COND_TRUE164:%.*]], label [[COND_FALSE166:%.*]]
+// SIMD-ONLY0:       cond.true164:
+// SIMD-ONLY0-NEXT:    [[TMP77:%.*]] = load i16, ptr [[SE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV165:%.*]] = sext i16 [[TMP77]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END168:%.*]]
+// SIMD-ONLY0:       cond.false166:
+// SIMD-ONLY0-NEXT:    [[TMP78:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV167:%.*]] = sext i16 [[TMP78]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END168]]
+// SIMD-ONLY0:       cond.end168:
+// SIMD-ONLY0-NEXT:    [[COND169:%.*]] = phi i32 [ [[CONV165]], [[COND_TRUE164]] ], [ [[CONV167]], [[COND_FALSE166]] ]
+// SIMD-ONLY0-NEXT:    [[CONV170:%.*]] = trunc i32 [[COND169]] to i16
+// SIMD-ONLY0-NEXT:    store i16 [[CONV170]], ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP79:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP79]], ptr [[SV]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP80:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV171:%.*]] = sext i16 [[TMP80]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP81:%.*]] = load i16, ptr [[SE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV172:%.*]] = sext i16 [[TMP81]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP173:%.*]] = icmp slt i32 [[CONV171]], [[CONV172]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP173]], label [[COND_TRUE175:%.*]], label [[COND_FALSE177:%.*]]
+// SIMD-ONLY0:       cond.true175:
+// SIMD-ONLY0-NEXT:    [[TMP82:%.*]] = load i16, ptr [[SE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV176:%.*]] = sext i16 [[TMP82]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END179:%.*]]
+// SIMD-ONLY0:       cond.false177:
+// SIMD-ONLY0-NEXT:    [[TMP83:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV178:%.*]] = sext i16 [[TMP83]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END179]]
+// SIMD-ONLY0:       cond.end179:
+// SIMD-ONLY0-NEXT:    [[COND180:%.*]] = phi i32 [ [[CONV176]], [[COND_TRUE175]] ], [ [[CONV178]], [[COND_FALSE177]] ]
+// SIMD-ONLY0-NEXT:    [[CONV181:%.*]] = trunc i32 [[COND180]] to i16
+// SIMD-ONLY0-NEXT:    store i16 [[CONV181]], ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP84:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP84]], ptr [[SV]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP85:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV182:%.*]] = sext i16 [[TMP85]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP86:%.*]] = load i16, ptr [[SE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV183:%.*]] = sext i16 [[TMP86]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP184:%.*]] = icmp eq i32 [[CONV182]], [[CONV183]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP184]], label [[COND_TRUE186:%.*]], label [[COND_FALSE188:%.*]]
+// SIMD-ONLY0:       cond.true186:
+// SIMD-ONLY0-NEXT:    [[TMP87:%.*]] = load i16, ptr [[SD]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV187:%.*]] = sext i16 [[TMP87]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END190:%.*]]
+// SIMD-ONLY0:       cond.false188:
+// SIMD-ONLY0-NEXT:    [[TMP88:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV189:%.*]] = sext i16 [[TMP88]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END190]]
+// SIMD-ONLY0:       cond.end190:
+// SIMD-ONLY0-NEXT:    [[COND191:%.*]] = phi i32 [ [[CONV187]], [[COND_TRUE186]] ], [ [[CONV189]], [[COND_FALSE188]] ]
+// SIMD-ONLY0-NEXT:    [[CONV192:%.*]] = trunc i32 [[COND191]] to i16
+// SIMD-ONLY0-NEXT:    store i16 [[CONV192]], ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP89:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP89]], ptr [[SV]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP90:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP90]], ptr [[SV]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP91:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV193:%.*]] = sext i16 [[TMP91]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP92:%.*]] = load i16, ptr [[SE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV194:%.*]] = sext i16 [[TMP92]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP195:%.*]] = icmp sgt i32 [[CONV193]], [[CONV194]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP195]], label [[COND_TRUE197:%.*]], label [[COND_FALSE199:%.*]]
+// SIMD-ONLY0:       cond.true197:
+// SIMD-ONLY0-NEXT:    [[TMP93:%.*]] = load i16, ptr [[SE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV198:%.*]] = sext i16 [[TMP93]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END201:%.*]]
+// SIMD-ONLY0:       cond.false199:
+// SIMD-ONLY0-NEXT:    [[TMP94:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV200:%.*]] = sext i16 [[TMP94]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END201]]
+// SIMD-ONLY0:       cond.end201:
+// SIMD-ONLY0-NEXT:    [[COND202:%.*]] = phi i32 [ [[CONV198]], [[COND_TRUE197]] ], [ [[CONV200]], [[COND_FALSE199]] ]
+// SIMD-ONLY0-NEXT:    [[CONV203:%.*]] = trunc i32 [[COND202]] to i16
+// SIMD-ONLY0-NEXT:    store i16 [[CONV203]], ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP95:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP95]], ptr [[SV]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP96:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV204:%.*]] = sext i16 [[TMP96]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP97:%.*]] = load i16, ptr [[SE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV205:%.*]] = sext i16 [[TMP97]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP206:%.*]] = icmp slt i32 [[CONV204]], [[CONV205]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP206]], label [[COND_TRUE208:%.*]], label [[COND_FALSE210:%.*]]
+// SIMD-ONLY0:       cond.true208:
+// SIMD-ONLY0-NEXT:    [[TMP98:%.*]] = load i16, ptr [[SE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV209:%.*]] = sext i16 [[TMP98]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END212:%.*]]
+// SIMD-ONLY0:       cond.false210:
+// SIMD-ONLY0-NEXT:    [[TMP99:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV211:%.*]] = sext i16 [[TMP99]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END212]]
+// SIMD-ONLY0:       cond.end212:
+// SIMD-ONLY0-NEXT:    [[COND213:%.*]] = phi i32 [ [[CONV209]], [[COND_TRUE208]] ], [ [[CONV211]], [[COND_FALSE210]] ]
+// SIMD-ONLY0-NEXT:    [[CONV214:%.*]] = trunc i32 [[COND213]] to i16
+// SIMD-ONLY0-NEXT:    store i16 [[CONV214]], ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP100:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP100]], ptr [[SV]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP101:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV215:%.*]] = sext i16 [[TMP101]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP102:%.*]] = load i16, ptr [[SE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV216:%.*]] = sext i16 [[TMP102]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP217:%.*]] = icmp eq i32 [[CONV215]], [[CONV216]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP217]], label [[COND_TRUE219:%.*]], label [[COND_FALSE221:%.*]]
+// SIMD-ONLY0:       cond.true219:
+// SIMD-ONLY0-NEXT:    [[TMP103:%.*]] = load i16, ptr [[SD]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV220:%.*]] = sext i16 [[TMP103]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END223:%.*]]
+// SIMD-ONLY0:       cond.false221:
+// SIMD-ONLY0-NEXT:    [[TMP104:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV222:%.*]] = sext i16 [[TMP104]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END223]]
+// SIMD-ONLY0:       cond.end223:
+// SIMD-ONLY0-NEXT:    [[COND224:%.*]] = phi i32 [ [[CONV220]], [[COND_TRUE219]] ], [ [[CONV222]], [[COND_FALSE221]] ]
+// SIMD-ONLY0-NEXT:    [[CONV225:%.*]] = trunc i32 [[COND224]] to i16
+// SIMD-ONLY0-NEXT:    store i16 [[CONV225]], ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP105:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV226:%.*]] = sext i16 [[TMP105]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP106:%.*]] = load i16, ptr [[SE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV227:%.*]] = sext i16 [[TMP106]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP228:%.*]] = icmp sgt i32 [[CONV226]], [[CONV227]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP228]], label [[COND_TRUE230:%.*]], label [[COND_FALSE232:%.*]]
+// SIMD-ONLY0:       cond.true230:
+// SIMD-ONLY0-NEXT:    [[TMP107:%.*]] = load i16, ptr [[SE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV231:%.*]] = sext i16 [[TMP107]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END234:%.*]]
+// SIMD-ONLY0:       cond.false232:
+// SIMD-ONLY0-NEXT:    [[TMP108:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV233:%.*]] = sext i16 [[TMP108]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END234]]
+// SIMD-ONLY0:       cond.end234:
+// SIMD-ONLY0-NEXT:    [[COND235:%.*]] = phi i32 [ [[CONV231]], [[COND_TRUE230]] ], [ [[CONV233]], [[COND_FALSE232]] ]
+// SIMD-ONLY0-NEXT:    [[CONV236:%.*]] = trunc i32 [[COND235]] to i16
+// SIMD-ONLY0-NEXT:    store i16 [[CONV236]], ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP109:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP109]], ptr [[SV]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP110:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV237:%.*]] = sext i16 [[TMP110]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP111:%.*]] = load i16, ptr [[SE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV238:%.*]] = sext i16 [[TMP111]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP239:%.*]] = icmp slt i32 [[CONV237]], [[CONV238]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP239]], label [[COND_TRUE241:%.*]], label [[COND_FALSE243:%.*]]
+// SIMD-ONLY0:       cond.true241:
+// SIMD-ONLY0-NEXT:    [[TMP112:%.*]] = load i16, ptr [[SE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV242:%.*]] = sext i16 [[TMP112]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END245:%.*]]
+// SIMD-ONLY0:       cond.false243:
+// SIMD-ONLY0-NEXT:    [[TMP113:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV244:%.*]] = sext i16 [[TMP113]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END245]]
+// SIMD-ONLY0:       cond.end245:
+// SIMD-ONLY0-NEXT:    [[COND246:%.*]] = phi i32 [ [[CONV242]], [[COND_TRUE241]] ], [ [[CONV244]], [[COND_FALSE243]] ]
+// SIMD-ONLY0-NEXT:    [[CONV247:%.*]] = trunc i32 [[COND246]] to i16
+// SIMD-ONLY0-NEXT:    store i16 [[CONV247]], ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP114:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP114]], ptr [[SV]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP115:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV248:%.*]] = sext i16 [[TMP115]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP116:%.*]] = load i16, ptr [[SE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV249:%.*]] = sext i16 [[TMP116]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP250:%.*]] = icmp eq i32 [[CONV248]], [[CONV249]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP250]], label [[COND_TRUE252:%.*]], label [[COND_FALSE254:%.*]]
+// SIMD-ONLY0:       cond.true252:
+// SIMD-ONLY0-NEXT:    [[TMP117:%.*]] = load i16, ptr [[SD]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV253:%.*]] = sext i16 [[TMP117]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END256:%.*]]
+// SIMD-ONLY0:       cond.false254:
+// SIMD-ONLY0-NEXT:    [[TMP118:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV255:%.*]] = sext i16 [[TMP118]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END256]]
+// SIMD-ONLY0:       cond.end256:
+// SIMD-ONLY0-NEXT:    [[COND257:%.*]] = phi i32 [ [[CONV253]], [[COND_TRUE252]] ], [ [[CONV255]], [[COND_FALSE254]] ]
+// SIMD-ONLY0-NEXT:    [[CONV258:%.*]] = trunc i32 [[COND257]] to i16
+// SIMD-ONLY0-NEXT:    store i16 [[CONV258]], ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP119:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP119]], ptr [[SV]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP120:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP120]], ptr [[SV]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP121:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV259:%.*]] = sext i16 [[TMP121]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP122:%.*]] = load i16, ptr [[SE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV260:%.*]] = sext i16 [[TMP122]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP261:%.*]] = icmp sgt i32 [[CONV259]], [[CONV260]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP261]], label [[COND_TRUE263:%.*]], label [[COND_FALSE265:%.*]]
+// SIMD-ONLY0:       cond.true263:
+// SIMD-ONLY0-NEXT:    [[TMP123:%.*]] = load i16, ptr [[SE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV264:%.*]] = sext i16 [[TMP123]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END267:%.*]]
+// SIMD-ONLY0:       cond.false265:
+// SIMD-ONLY0-NEXT:    [[TMP124:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV266:%.*]] = sext i16 [[TMP124]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END267]]
+// SIMD-ONLY0:       cond.end267:
+// SIMD-ONLY0-NEXT:    [[COND268:%.*]] = phi i32 [ [[CONV264]], [[COND_TRUE263]] ], [ [[CONV266]], [[COND_FALSE265]] ]
+// SIMD-ONLY0-NEXT:    [[CONV269:%.*]] = trunc i32 [[COND268]] to i16
+// SIMD-ONLY0-NEXT:    store i16 [[CONV269]], ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP125:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP125]], ptr [[SV]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP126:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV270:%.*]] = sext i16 [[TMP126]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP127:%.*]] = load i16, ptr [[SE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV271:%.*]] = sext i16 [[TMP127]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP272:%.*]] = icmp slt i32 [[CONV270]], [[CONV271]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP272]], label [[COND_TRUE274:%.*]], label [[COND_FALSE276:%.*]]
+// SIMD-ONLY0:       cond.true274:
+// SIMD-ONLY0-NEXT:    [[TMP128:%.*]] = load i16, ptr [[SE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV275:%.*]] = sext i16 [[TMP128]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END278:%.*]]
+// SIMD-ONLY0:       cond.false276:
+// SIMD-ONLY0-NEXT:    [[TMP129:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV277:%.*]] = sext i16 [[TMP129]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END278]]
+// SIMD-ONLY0:       cond.end278:
+// SIMD-ONLY0-NEXT:    [[COND279:%.*]] = phi i32 [ [[CONV275]], [[COND_TRUE274]] ], [ [[CONV277]], [[COND_FALSE276]] ]
+// SIMD-ONLY0-NEXT:    [[CONV280:%.*]] = trunc i32 [[COND279]] to i16
+// SIMD-ONLY0-NEXT:    store i16 [[CONV280]], ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP130:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP130]], ptr [[SV]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP131:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV281:%.*]] = sext i16 [[TMP131]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP132:%.*]] = load i16, ptr [[SE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV282:%.*]] = sext i16 [[TMP132]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP283:%.*]] = icmp eq i32 [[CONV281]], [[CONV282]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP283]], label [[COND_TRUE285:%.*]], label [[COND_FALSE287:%.*]]
+// SIMD-ONLY0:       cond.true285:
+// SIMD-ONLY0-NEXT:    [[TMP133:%.*]] = load i16, ptr [[SD]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV286:%.*]] = sext i16 [[TMP133]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END289:%.*]]
+// SIMD-ONLY0:       cond.false287:
+// SIMD-ONLY0-NEXT:    [[TMP134:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV288:%.*]] = sext i16 [[TMP134]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END289]]
+// SIMD-ONLY0:       cond.end289:
+// SIMD-ONLY0-NEXT:    [[COND290:%.*]] = phi i32 [ [[CONV286]], [[COND_TRUE285]] ], [ [[CONV288]], [[COND_FALSE287]] ]
+// SIMD-ONLY0-NEXT:    [[CONV291:%.*]] = trunc i32 [[COND290]] to i16
+// SIMD-ONLY0-NEXT:    store i16 [[CONV291]], ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP135:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV292:%.*]] = sext i16 [[TMP135]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP136:%.*]] = load i16, ptr [[SE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV293:%.*]] = sext i16 [[TMP136]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP294:%.*]] = icmp sgt i32 [[CONV292]], [[CONV293]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP294]], label [[COND_TRUE296:%.*]], label [[COND_FALSE298:%.*]]
+// SIMD-ONLY0:       cond.true296:
+// SIMD-ONLY0-NEXT:    [[TMP137:%.*]] = load i16, ptr [[SE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV297:%.*]] = sext i16 [[TMP137]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END300:%.*]]
+// SIMD-ONLY0:       cond.false298:
+// SIMD-ONLY0-NEXT:    [[TMP138:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV299:%.*]] = sext i16 [[TMP138]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END300]]
+// SIMD-ONLY0:       cond.end300:
+// SIMD-ONLY0-NEXT:    [[COND301:%.*]] = phi i32 [ [[CONV297]], [[COND_TRUE296]] ], [ [[CONV299]], [[COND_FALSE298]] ]
+// SIMD-ONLY0-NEXT:    [[CONV302:%.*]] = trunc i32 [[COND301]] to i16
+// SIMD-ONLY0-NEXT:    store i16 [[CONV302]], ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP139:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP139]], ptr [[SV]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP140:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV303:%.*]] = sext i16 [[TMP140]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP141:%.*]] = load i16, ptr [[SE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV304:%.*]] = sext i16 [[TMP141]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP305:%.*]] = icmp slt i32 [[CONV303]], [[CONV304]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP305]], label [[COND_TRUE307:%.*]], label [[COND_FALSE309:%.*]]
+// SIMD-ONLY0:       cond.true307:
+// SIMD-ONLY0-NEXT:    [[TMP142:%.*]] = load i16, ptr [[SE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV308:%.*]] = sext i16 [[TMP142]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END311:%.*]]
+// SIMD-ONLY0:       cond.false309:
+// SIMD-ONLY0-NEXT:    [[TMP143:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV310:%.*]] = sext i16 [[TMP143]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END311]]
+// SIMD-ONLY0:       cond.end311:
+// SIMD-ONLY0-NEXT:    [[COND312:%.*]] = phi i32 [ [[CONV308]], [[COND_TRUE307]] ], [ [[CONV310]], [[COND_FALSE309]] ]
+// SIMD-ONLY0-NEXT:    [[CONV313:%.*]] = trunc i32 [[COND312]] to i16
+// SIMD-ONLY0-NEXT:    store i16 [[CONV313]], ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP144:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP144]], ptr [[SV]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP145:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV314:%.*]] = sext i16 [[TMP145]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP146:%.*]] = load i16, ptr [[SE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV315:%.*]] = sext i16 [[TMP146]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP316:%.*]] = icmp eq i32 [[CONV314]], [[CONV315]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP316]], label [[COND_TRUE318:%.*]], label [[COND_FALSE320:%.*]]
+// SIMD-ONLY0:       cond.true318:
+// SIMD-ONLY0-NEXT:    [[TMP147:%.*]] = load i16, ptr [[SD]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV319:%.*]] = sext i16 [[TMP147]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END322:%.*]]
+// SIMD-ONLY0:       cond.false320:
+// SIMD-ONLY0-NEXT:    [[TMP148:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV321:%.*]] = sext i16 [[TMP148]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END322]]
+// SIMD-ONLY0:       cond.end322:
+// SIMD-ONLY0-NEXT:    [[COND323:%.*]] = phi i32 [ [[CONV319]], [[COND_TRUE318]] ], [ [[CONV321]], [[COND_FALSE320]] ]
+// SIMD-ONLY0-NEXT:    [[CONV324:%.*]] = trunc i32 [[COND323]] to i16
+// SIMD-ONLY0-NEXT:    store i16 [[CONV324]], ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP149:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP149]], ptr [[SV]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP150:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP150]], ptr [[SV]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP151:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV325:%.*]] = sext i16 [[TMP151]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP152:%.*]] = load i16, ptr [[SE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV326:%.*]] = sext i16 [[TMP152]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP327:%.*]] = icmp sgt i32 [[CONV325]], [[CONV326]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP327]], label [[COND_TRUE329:%.*]], label [[COND_FALSE331:%.*]]
+// SIMD-ONLY0:       cond.true329:
+// SIMD-ONLY0-NEXT:    [[TMP153:%.*]] = load i16, ptr [[SE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV330:%.*]] = sext i16 [[TMP153]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END333:%.*]]
+// SIMD-ONLY0:       cond.false331:
+// SIMD-ONLY0-NEXT:    [[TMP154:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV332:%.*]] = sext i16 [[TMP154]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END333]]
+// SIMD-ONLY0:       cond.end333:
+// SIMD-ONLY0-NEXT:    [[COND334:%.*]] = phi i32 [ [[CONV330]], [[COND_TRUE329]] ], [ [[CONV332]], [[COND_FALSE331]] ]
+// SIMD-ONLY0-NEXT:    [[CONV335:%.*]] = trunc i32 [[COND334]] to i16
+// SIMD-ONLY0-NEXT:    store i16 [[CONV335]], ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP155:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP155]], ptr [[SV]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP156:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV336:%.*]] = sext i16 [[TMP156]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP157:%.*]] = load i16, ptr [[SE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV337:%.*]] = sext i16 [[TMP157]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP338:%.*]] = icmp slt i32 [[CONV336]], [[CONV337]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP338]], label [[COND_TRUE340:%.*]], label [[COND_FALSE342:%.*]]
+// SIMD-ONLY0:       cond.true340:
+// SIMD-ONLY0-NEXT:    [[TMP158:%.*]] = load i16, ptr [[SE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV341:%.*]] = sext i16 [[TMP158]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END344:%.*]]
+// SIMD-ONLY0:       cond.false342:
+// SIMD-ONLY0-NEXT:    [[TMP159:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV343:%.*]] = sext i16 [[TMP159]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END344]]
+// SIMD-ONLY0:       cond.end344:
+// SIMD-ONLY0-NEXT:    [[COND345:%.*]] = phi i32 [ [[CONV341]], [[COND_TRUE340]] ], [ [[CONV343]], [[COND_FALSE342]] ]
+// SIMD-ONLY0-NEXT:    [[CONV346:%.*]] = trunc i32 [[COND345]] to i16
+// SIMD-ONLY0-NEXT:    store i16 [[CONV346]], ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP160:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP160]], ptr [[SV]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP161:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV347:%.*]] = sext i16 [[TMP161]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP162:%.*]] = load i16, ptr [[SE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV348:%.*]] = sext i16 [[TMP162]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP349:%.*]] = icmp eq i32 [[CONV347]], [[CONV348]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP349]], label [[COND_TRUE351:%.*]], label [[COND_FALSE353:%.*]]
+// SIMD-ONLY0:       cond.true351:
+// SIMD-ONLY0-NEXT:    [[TMP163:%.*]] = load i16, ptr [[SD]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV352:%.*]] = sext i16 [[TMP163]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END355:%.*]]
+// SIMD-ONLY0:       cond.false353:
+// SIMD-ONLY0-NEXT:    [[TMP164:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV354:%.*]] = sext i16 [[TMP164]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END355]]
+// SIMD-ONLY0:       cond.end355:
+// SIMD-ONLY0-NEXT:    [[COND356:%.*]] = phi i32 [ [[CONV352]], [[COND_TRUE351]] ], [ [[CONV354]], [[COND_FALSE353]] ]
+// SIMD-ONLY0-NEXT:    [[CONV357:%.*]] = trunc i32 [[COND356]] to i16
+// SIMD-ONLY0-NEXT:    store i16 [[CONV357]], ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP165:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV358:%.*]] = sext i16 [[TMP165]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP166:%.*]] = load i16, ptr [[SE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV359:%.*]] = sext i16 [[TMP166]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP360:%.*]] = icmp sgt i32 [[CONV358]], [[CONV359]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP360]], label [[COND_TRUE362:%.*]], label [[COND_FALSE364:%.*]]
+// SIMD-ONLY0:       cond.true362:
+// SIMD-ONLY0-NEXT:    [[TMP167:%.*]] = load i16, ptr [[SE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV363:%.*]] = sext i16 [[TMP167]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END366:%.*]]
+// SIMD-ONLY0:       cond.false364:
+// SIMD-ONLY0-NEXT:    [[TMP168:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV365:%.*]] = sext i16 [[TMP168]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END366]]
+// SIMD-ONLY0:       cond.end366:
+// SIMD-ONLY0-NEXT:    [[COND367:%.*]] = phi i32 [ [[CONV363]], [[COND_TRUE362]] ], [ [[CONV365]], [[COND_FALSE364]] ]
+// SIMD-ONLY0-NEXT:    [[CONV368:%.*]] = trunc i32 [[COND367]] to i16
+// SIMD-ONLY0-NEXT:    store i16 [[CONV368]], ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP169:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP169]], ptr [[SV]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP170:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV369:%.*]] = sext i16 [[TMP170]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP171:%.*]] = load i16, ptr [[SE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV370:%.*]] = sext i16 [[TMP171]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP371:%.*]] = icmp slt i32 [[CONV369]], [[CONV370]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP371]], label [[COND_TRUE373:%.*]], label [[COND_FALSE375:%.*]]
+// SIMD-ONLY0:       cond.true373:
+// SIMD-ONLY0-NEXT:    [[TMP172:%.*]] = load i16, ptr [[SE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV374:%.*]] = sext i16 [[TMP172]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END377:%.*]]
+// SIMD-ONLY0:       cond.false375:
+// SIMD-ONLY0-NEXT:    [[TMP173:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV376:%.*]] = sext i16 [[TMP173]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END377]]
+// SIMD-ONLY0:       cond.end377:
+// SIMD-ONLY0-NEXT:    [[COND378:%.*]] = phi i32 [ [[CONV374]], [[COND_TRUE373]] ], [ [[CONV376]], [[COND_FALSE375]] ]
+// SIMD-ONLY0-NEXT:    [[CONV379:%.*]] = trunc i32 [[COND378]] to i16
+// SIMD-ONLY0-NEXT:    store i16 [[CONV379]], ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP174:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP174]], ptr [[SV]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP175:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV380:%.*]] = sext i16 [[TMP175]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP176:%.*]] = load i16, ptr [[SE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV381:%.*]] = sext i16 [[TMP176]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP382:%.*]] = icmp eq i32 [[CONV380]], [[CONV381]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP382]], label [[COND_TRUE384:%.*]], label [[COND_FALSE386:%.*]]
+// SIMD-ONLY0:       cond.true384:
+// SIMD-ONLY0-NEXT:    [[TMP177:%.*]] = load i16, ptr [[SD]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV385:%.*]] = sext i16 [[TMP177]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END388:%.*]]
+// SIMD-ONLY0:       cond.false386:
+// SIMD-ONLY0-NEXT:    [[TMP178:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV387:%.*]] = sext i16 [[TMP178]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END388]]
+// SIMD-ONLY0:       cond.end388:
+// SIMD-ONLY0-NEXT:    [[COND389:%.*]] = phi i32 [ [[CONV385]], [[COND_TRUE384]] ], [ [[CONV387]], [[COND_FALSE386]] ]
+// SIMD-ONLY0-NEXT:    [[CONV390:%.*]] = trunc i32 [[COND389]] to i16
+// SIMD-ONLY0-NEXT:    store i16 [[CONV390]], ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP179:%.*]] = load i16, ptr [[SX]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP179]], ptr [[SV]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP180:%.*]] = load i16, ptr [[SV]], align 2
+// SIMD-ONLY0-NEXT:    ret i16 [[TMP180]]
+//
+//
+// SIMD-ONLY0-LABEL: @usxevd(
+// SIMD-ONLY0-NEXT:  entry:
+// SIMD-ONLY0-NEXT:    [[USX:%.*]] = alloca i16, align 2
+// SIMD-ONLY0-NEXT:    [[USV:%.*]] = alloca i16, align 2
+// SIMD-ONLY0-NEXT:    [[USE:%.*]] = alloca i16, align 2
+// SIMD-ONLY0-NEXT:    [[USD:%.*]] = alloca i16, align 2
+// SIMD-ONLY0-NEXT:    [[TMP0:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP0]], ptr [[USV]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP1:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV:%.*]] = zext i16 [[TMP1]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP2:%.*]] = load i16, ptr [[USE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV1:%.*]] = zext i16 [[TMP2]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP:%.*]] = icmp sgt i32 [[CONV]], [[CONV1]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
+// SIMD-ONLY0:       cond.true:
+// SIMD-ONLY0-NEXT:    [[TMP3:%.*]] = load i16, ptr [[USE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV3:%.*]] = zext i16 [[TMP3]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END:%.*]]
+// SIMD-ONLY0:       cond.false:
+// SIMD-ONLY0-NEXT:    [[TMP4:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV4:%.*]] = zext i16 [[TMP4]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END]]
+// SIMD-ONLY0:       cond.end:
+// SIMD-ONLY0-NEXT:    [[COND:%.*]] = phi i32 [ [[CONV3]], [[COND_TRUE]] ], [ [[CONV4]], [[COND_FALSE]] ]
+// SIMD-ONLY0-NEXT:    [[CONV5:%.*]] = trunc i32 [[COND]] to i16
+// SIMD-ONLY0-NEXT:    store i16 [[CONV5]], ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP5:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP5]], ptr [[USV]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP6:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV6:%.*]] = zext i16 [[TMP6]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP7:%.*]] = load i16, ptr [[USE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV7:%.*]] = zext i16 [[TMP7]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP8:%.*]] = icmp slt i32 [[CONV6]], [[CONV7]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP8]], label [[COND_TRUE10:%.*]], label [[COND_FALSE12:%.*]]
+// SIMD-ONLY0:       cond.true10:
+// SIMD-ONLY0-NEXT:    [[TMP8:%.*]] = load i16, ptr [[USE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV11:%.*]] = zext i16 [[TMP8]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END14:%.*]]
+// SIMD-ONLY0:       cond.false12:
+// SIMD-ONLY0-NEXT:    [[TMP9:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV13:%.*]] = zext i16 [[TMP9]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END14]]
+// SIMD-ONLY0:       cond.end14:
+// SIMD-ONLY0-NEXT:    [[COND15:%.*]] = phi i32 [ [[CONV11]], [[COND_TRUE10]] ], [ [[CONV13]], [[COND_FALSE12]] ]
+// SIMD-ONLY0-NEXT:    [[CONV16:%.*]] = trunc i32 [[COND15]] to i16
+// SIMD-ONLY0-NEXT:    store i16 [[CONV16]], ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP10:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP10]], ptr [[USV]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP11:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV17:%.*]] = zext i16 [[TMP11]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP12:%.*]] = load i16, ptr [[USE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV18:%.*]] = zext i16 [[TMP12]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP19:%.*]] = icmp eq i32 [[CONV17]], [[CONV18]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP19]], label [[COND_TRUE21:%.*]], label [[COND_FALSE23:%.*]]
+// SIMD-ONLY0:       cond.true21:
+// SIMD-ONLY0-NEXT:    [[TMP13:%.*]] = load i16, ptr [[USD]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV22:%.*]] = zext i16 [[TMP13]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END25:%.*]]
+// SIMD-ONLY0:       cond.false23:
+// SIMD-ONLY0-NEXT:    [[TMP14:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV24:%.*]] = zext i16 [[TMP14]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END25]]
+// SIMD-ONLY0:       cond.end25:
+// SIMD-ONLY0-NEXT:    [[COND26:%.*]] = phi i32 [ [[CONV22]], [[COND_TRUE21]] ], [ [[CONV24]], [[COND_FALSE23]] ]
+// SIMD-ONLY0-NEXT:    [[CONV27:%.*]] = trunc i32 [[COND26]] to i16
+// SIMD-ONLY0-NEXT:    store i16 [[CONV27]], ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP15:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV28:%.*]] = zext i16 [[TMP15]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP16:%.*]] = load i16, ptr [[USE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV29:%.*]] = zext i16 [[TMP16]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP30:%.*]] = icmp sgt i32 [[CONV28]], [[CONV29]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP30]], label [[COND_TRUE32:%.*]], label [[COND_FALSE34:%.*]]
+// SIMD-ONLY0:       cond.true32:
+// SIMD-ONLY0-NEXT:    [[TMP17:%.*]] = load i16, ptr [[USE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV33:%.*]] = zext i16 [[TMP17]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END36:%.*]]
+// SIMD-ONLY0:       cond.false34:
+// SIMD-ONLY0-NEXT:    [[TMP18:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV35:%.*]] = zext i16 [[TMP18]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END36]]
+// SIMD-ONLY0:       cond.end36:
+// SIMD-ONLY0-NEXT:    [[COND37:%.*]] = phi i32 [ [[CONV33]], [[COND_TRUE32]] ], [ [[CONV35]], [[COND_FALSE34]] ]
+// SIMD-ONLY0-NEXT:    [[CONV38:%.*]] = trunc i32 [[COND37]] to i16
+// SIMD-ONLY0-NEXT:    store i16 [[CONV38]], ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP19:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP19]], ptr [[USV]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP20:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV39:%.*]] = zext i16 [[TMP20]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP21:%.*]] = load i16, ptr [[USE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV40:%.*]] = zext i16 [[TMP21]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP41:%.*]] = icmp slt i32 [[CONV39]], [[CONV40]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP41]], label [[COND_TRUE43:%.*]], label [[COND_FALSE45:%.*]]
+// SIMD-ONLY0:       cond.true43:
+// SIMD-ONLY0-NEXT:    [[TMP22:%.*]] = load i16, ptr [[USE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV44:%.*]] = zext i16 [[TMP22]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END47:%.*]]
+// SIMD-ONLY0:       cond.false45:
+// SIMD-ONLY0-NEXT:    [[TMP23:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV46:%.*]] = zext i16 [[TMP23]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END47]]
+// SIMD-ONLY0:       cond.end47:
+// SIMD-ONLY0-NEXT:    [[COND48:%.*]] = phi i32 [ [[CONV44]], [[COND_TRUE43]] ], [ [[CONV46]], [[COND_FALSE45]] ]
+// SIMD-ONLY0-NEXT:    [[CONV49:%.*]] = trunc i32 [[COND48]] to i16
+// SIMD-ONLY0-NEXT:    store i16 [[CONV49]], ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP24:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP24]], ptr [[USV]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP25:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV50:%.*]] = zext i16 [[TMP25]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP26:%.*]] = load i16, ptr [[USE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV51:%.*]] = zext i16 [[TMP26]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP52:%.*]] = icmp eq i32 [[CONV50]], [[CONV51]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP52]], label [[COND_TRUE54:%.*]], label [[COND_FALSE56:%.*]]
+// SIMD-ONLY0:       cond.true54:
+// SIMD-ONLY0-NEXT:    [[TMP27:%.*]] = load i16, ptr [[USD]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV55:%.*]] = zext i16 [[TMP27]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END58:%.*]]
+// SIMD-ONLY0:       cond.false56:
+// SIMD-ONLY0-NEXT:    [[TMP28:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV57:%.*]] = zext i16 [[TMP28]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END58]]
+// SIMD-ONLY0:       cond.end58:
+// SIMD-ONLY0-NEXT:    [[COND59:%.*]] = phi i32 [ [[CONV55]], [[COND_TRUE54]] ], [ [[CONV57]], [[COND_FALSE56]] ]
+// SIMD-ONLY0-NEXT:    [[CONV60:%.*]] = trunc i32 [[COND59]] to i16
+// SIMD-ONLY0-NEXT:    store i16 [[CONV60]], ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP29:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP29]], ptr [[USV]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP30:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP30]], ptr [[USV]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP31:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV61:%.*]] = zext i16 [[TMP31]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP32:%.*]] = load i16, ptr [[USE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV62:%.*]] = zext i16 [[TMP32]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP63:%.*]] = icmp sgt i32 [[CONV61]], [[CONV62]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP63]], label [[COND_TRUE65:%.*]], label [[COND_FALSE67:%.*]]
+// SIMD-ONLY0:       cond.true65:
+// SIMD-ONLY0-NEXT:    [[TMP33:%.*]] = load i16, ptr [[USE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV66:%.*]] = zext i16 [[TMP33]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END69:%.*]]
+// SIMD-ONLY0:       cond.false67:
+// SIMD-ONLY0-NEXT:    [[TMP34:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV68:%.*]] = zext i16 [[TMP34]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END69]]
+// SIMD-ONLY0:       cond.end69:
+// SIMD-ONLY0-NEXT:    [[COND70:%.*]] = phi i32 [ [[CONV66]], [[COND_TRUE65]] ], [ [[CONV68]], [[COND_FALSE67]] ]
+// SIMD-ONLY0-NEXT:    [[CONV71:%.*]] = trunc i32 [[COND70]] to i16
+// SIMD-ONLY0-NEXT:    store i16 [[CONV71]], ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP35:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP35]], ptr [[USV]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP36:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV72:%.*]] = zext i16 [[TMP36]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP37:%.*]] = load i16, ptr [[USE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV73:%.*]] = zext i16 [[TMP37]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP74:%.*]] = icmp slt i32 [[CONV72]], [[CONV73]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP74]], label [[COND_TRUE76:%.*]], label [[COND_FALSE78:%.*]]
+// SIMD-ONLY0:       cond.true76:
+// SIMD-ONLY0-NEXT:    [[TMP38:%.*]] = load i16, ptr [[USE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV77:%.*]] = zext i16 [[TMP38]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END80:%.*]]
+// SIMD-ONLY0:       cond.false78:
+// SIMD-ONLY0-NEXT:    [[TMP39:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV79:%.*]] = zext i16 [[TMP39]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END80]]
+// SIMD-ONLY0:       cond.end80:
+// SIMD-ONLY0-NEXT:    [[COND81:%.*]] = phi i32 [ [[CONV77]], [[COND_TRUE76]] ], [ [[CONV79]], [[COND_FALSE78]] ]
+// SIMD-ONLY0-NEXT:    [[CONV82:%.*]] = trunc i32 [[COND81]] to i16
+// SIMD-ONLY0-NEXT:    store i16 [[CONV82]], ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP40:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP40]], ptr [[USV]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP41:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV83:%.*]] = zext i16 [[TMP41]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP42:%.*]] = load i16, ptr [[USE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV84:%.*]] = zext i16 [[TMP42]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP85:%.*]] = icmp eq i32 [[CONV83]], [[CONV84]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP85]], label [[COND_TRUE87:%.*]], label [[COND_FALSE89:%.*]]
+// SIMD-ONLY0:       cond.true87:
+// SIMD-ONLY0-NEXT:    [[TMP43:%.*]] = load i16, ptr [[USD]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV88:%.*]] = zext i16 [[TMP43]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END91:%.*]]
+// SIMD-ONLY0:       cond.false89:
+// SIMD-ONLY0-NEXT:    [[TMP44:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV90:%.*]] = zext i16 [[TMP44]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END91]]
+// SIMD-ONLY0:       cond.end91:
+// SIMD-ONLY0-NEXT:    [[COND92:%.*]] = phi i32 [ [[CONV88]], [[COND_TRUE87]] ], [ [[CONV90]], [[COND_FALSE89]] ]
+// SIMD-ONLY0-NEXT:    [[CONV93:%.*]] = trunc i32 [[COND92]] to i16
+// SIMD-ONLY0-NEXT:    store i16 [[CONV93]], ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP45:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV94:%.*]] = zext i16 [[TMP45]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP46:%.*]] = load i16, ptr [[USE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV95:%.*]] = zext i16 [[TMP46]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP96:%.*]] = icmp sgt i32 [[CONV94]], [[CONV95]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP96]], label [[COND_TRUE98:%.*]], label [[COND_FALSE100:%.*]]
+// SIMD-ONLY0:       cond.true98:
+// SIMD-ONLY0-NEXT:    [[TMP47:%.*]] = load i16, ptr [[USE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV99:%.*]] = zext i16 [[TMP47]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END102:%.*]]
+// SIMD-ONLY0:       cond.false100:
+// SIMD-ONLY0-NEXT:    [[TMP48:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV101:%.*]] = zext i16 [[TMP48]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END102]]
+// SIMD-ONLY0:       cond.end102:
+// SIMD-ONLY0-NEXT:    [[COND103:%.*]] = phi i32 [ [[CONV99]], [[COND_TRUE98]] ], [ [[CONV101]], [[COND_FALSE100]] ]
+// SIMD-ONLY0-NEXT:    [[CONV104:%.*]] = trunc i32 [[COND103]] to i16
+// SIMD-ONLY0-NEXT:    store i16 [[CONV104]], ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP49:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP49]], ptr [[USV]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP50:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV105:%.*]] = zext i16 [[TMP50]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP51:%.*]] = load i16, ptr [[USE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV106:%.*]] = zext i16 [[TMP51]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP107:%.*]] = icmp slt i32 [[CONV105]], [[CONV106]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP107]], label [[COND_TRUE109:%.*]], label [[COND_FALSE111:%.*]]
+// SIMD-ONLY0:       cond.true109:
+// SIMD-ONLY0-NEXT:    [[TMP52:%.*]] = load i16, ptr [[USE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV110:%.*]] = zext i16 [[TMP52]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END113:%.*]]
+// SIMD-ONLY0:       cond.false111:
+// SIMD-ONLY0-NEXT:    [[TMP53:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV112:%.*]] = zext i16 [[TMP53]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END113]]
+// SIMD-ONLY0:       cond.end113:
+// SIMD-ONLY0-NEXT:    [[COND114:%.*]] = phi i32 [ [[CONV110]], [[COND_TRUE109]] ], [ [[CONV112]], [[COND_FALSE111]] ]
+// SIMD-ONLY0-NEXT:    [[CONV115:%.*]] = trunc i32 [[COND114]] to i16
+// SIMD-ONLY0-NEXT:    store i16 [[CONV115]], ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP54:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP54]], ptr [[USV]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP55:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV116:%.*]] = zext i16 [[TMP55]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP56:%.*]] = load i16, ptr [[USE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV117:%.*]] = zext i16 [[TMP56]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP118:%.*]] = icmp eq i32 [[CONV116]], [[CONV117]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP118]], label [[COND_TRUE120:%.*]], label [[COND_FALSE122:%.*]]
+// SIMD-ONLY0:       cond.true120:
+// SIMD-ONLY0-NEXT:    [[TMP57:%.*]] = load i16, ptr [[USD]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV121:%.*]] = zext i16 [[TMP57]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END124:%.*]]
+// SIMD-ONLY0:       cond.false122:
+// SIMD-ONLY0-NEXT:    [[TMP58:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV123:%.*]] = zext i16 [[TMP58]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END124]]
+// SIMD-ONLY0:       cond.end124:
+// SIMD-ONLY0-NEXT:    [[COND125:%.*]] = phi i32 [ [[CONV121]], [[COND_TRUE120]] ], [ [[CONV123]], [[COND_FALSE122]] ]
+// SIMD-ONLY0-NEXT:    [[CONV126:%.*]] = trunc i32 [[COND125]] to i16
+// SIMD-ONLY0-NEXT:    store i16 [[CONV126]], ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP59:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP59]], ptr [[USV]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP60:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP60]], ptr [[USV]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP61:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV127:%.*]] = zext i16 [[TMP61]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP62:%.*]] = load i16, ptr [[USE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV128:%.*]] = zext i16 [[TMP62]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP129:%.*]] = icmp sgt i32 [[CONV127]], [[CONV128]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP129]], label [[COND_TRUE131:%.*]], label [[COND_FALSE133:%.*]]
+// SIMD-ONLY0:       cond.true131:
+// SIMD-ONLY0-NEXT:    [[TMP63:%.*]] = load i16, ptr [[USE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV132:%.*]] = zext i16 [[TMP63]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END135:%.*]]
+// SIMD-ONLY0:       cond.false133:
+// SIMD-ONLY0-NEXT:    [[TMP64:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV134:%.*]] = zext i16 [[TMP64]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END135]]
+// SIMD-ONLY0:       cond.end135:
+// SIMD-ONLY0-NEXT:    [[COND136:%.*]] = phi i32 [ [[CONV132]], [[COND_TRUE131]] ], [ [[CONV134]], [[COND_FALSE133]] ]
+// SIMD-ONLY0-NEXT:    [[CONV137:%.*]] = trunc i32 [[COND136]] to i16
+// SIMD-ONLY0-NEXT:    store i16 [[CONV137]], ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP65:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP65]], ptr [[USV]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP66:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV138:%.*]] = zext i16 [[TMP66]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP67:%.*]] = load i16, ptr [[USE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV139:%.*]] = zext i16 [[TMP67]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP140:%.*]] = icmp slt i32 [[CONV138]], [[CONV139]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP140]], label [[COND_TRUE142:%.*]], label [[COND_FALSE144:%.*]]
+// SIMD-ONLY0:       cond.true142:
+// SIMD-ONLY0-NEXT:    [[TMP68:%.*]] = load i16, ptr [[USE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV143:%.*]] = zext i16 [[TMP68]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END146:%.*]]
+// SIMD-ONLY0:       cond.false144:
+// SIMD-ONLY0-NEXT:    [[TMP69:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV145:%.*]] = zext i16 [[TMP69]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END146]]
+// SIMD-ONLY0:       cond.end146:
+// SIMD-ONLY0-NEXT:    [[COND147:%.*]] = phi i32 [ [[CONV143]], [[COND_TRUE142]] ], [ [[CONV145]], [[COND_FALSE144]] ]
+// SIMD-ONLY0-NEXT:    [[CONV148:%.*]] = trunc i32 [[COND147]] to i16
+// SIMD-ONLY0-NEXT:    store i16 [[CONV148]], ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP70:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP70]], ptr [[USV]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP71:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV149:%.*]] = zext i16 [[TMP71]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP72:%.*]] = load i16, ptr [[USE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV150:%.*]] = zext i16 [[TMP72]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP151:%.*]] = icmp eq i32 [[CONV149]], [[CONV150]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP151]], label [[COND_TRUE153:%.*]], label [[COND_FALSE155:%.*]]
+// SIMD-ONLY0:       cond.true153:
+// SIMD-ONLY0-NEXT:    [[TMP73:%.*]] = load i16, ptr [[USD]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV154:%.*]] = zext i16 [[TMP73]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END157:%.*]]
+// SIMD-ONLY0:       cond.false155:
+// SIMD-ONLY0-NEXT:    [[TMP74:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV156:%.*]] = zext i16 [[TMP74]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END157]]
+// SIMD-ONLY0:       cond.end157:
+// SIMD-ONLY0-NEXT:    [[COND158:%.*]] = phi i32 [ [[CONV154]], [[COND_TRUE153]] ], [ [[CONV156]], [[COND_FALSE155]] ]
+// SIMD-ONLY0-NEXT:    [[CONV159:%.*]] = trunc i32 [[COND158]] to i16
+// SIMD-ONLY0-NEXT:    store i16 [[CONV159]], ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP75:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV160:%.*]] = zext i16 [[TMP75]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP76:%.*]] = load i16, ptr [[USE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV161:%.*]] = zext i16 [[TMP76]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP162:%.*]] = icmp sgt i32 [[CONV160]], [[CONV161]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP162]], label [[COND_TRUE164:%.*]], label [[COND_FALSE166:%.*]]
+// SIMD-ONLY0:       cond.true164:
+// SIMD-ONLY0-NEXT:    [[TMP77:%.*]] = load i16, ptr [[USE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV165:%.*]] = zext i16 [[TMP77]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END168:%.*]]
+// SIMD-ONLY0:       cond.false166:
+// SIMD-ONLY0-NEXT:    [[TMP78:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV167:%.*]] = zext i16 [[TMP78]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END168]]
+// SIMD-ONLY0:       cond.end168:
+// SIMD-ONLY0-NEXT:    [[COND169:%.*]] = phi i32 [ [[CONV165]], [[COND_TRUE164]] ], [ [[CONV167]], [[COND_FALSE166]] ]
+// SIMD-ONLY0-NEXT:    [[CONV170:%.*]] = trunc i32 [[COND169]] to i16
+// SIMD-ONLY0-NEXT:    store i16 [[CONV170]], ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP79:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP79]], ptr [[USV]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP80:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV171:%.*]] = zext i16 [[TMP80]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP81:%.*]] = load i16, ptr [[USE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV172:%.*]] = zext i16 [[TMP81]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP173:%.*]] = icmp slt i32 [[CONV171]], [[CONV172]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP173]], label [[COND_TRUE175:%.*]], label [[COND_FALSE177:%.*]]
+// SIMD-ONLY0:       cond.true175:
+// SIMD-ONLY0-NEXT:    [[TMP82:%.*]] = load i16, ptr [[USE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV176:%.*]] = zext i16 [[TMP82]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END179:%.*]]
+// SIMD-ONLY0:       cond.false177:
+// SIMD-ONLY0-NEXT:    [[TMP83:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV178:%.*]] = zext i16 [[TMP83]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END179]]
+// SIMD-ONLY0:       cond.end179:
+// SIMD-ONLY0-NEXT:    [[COND180:%.*]] = phi i32 [ [[CONV176]], [[COND_TRUE175]] ], [ [[CONV178]], [[COND_FALSE177]] ]
+// SIMD-ONLY0-NEXT:    [[CONV181:%.*]] = trunc i32 [[COND180]] to i16
+// SIMD-ONLY0-NEXT:    store i16 [[CONV181]], ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP84:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP84]], ptr [[USV]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP85:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV182:%.*]] = zext i16 [[TMP85]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP86:%.*]] = load i16, ptr [[USE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV183:%.*]] = zext i16 [[TMP86]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP184:%.*]] = icmp eq i32 [[CONV182]], [[CONV183]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP184]], label [[COND_TRUE186:%.*]], label [[COND_FALSE188:%.*]]
+// SIMD-ONLY0:       cond.true186:
+// SIMD-ONLY0-NEXT:    [[TMP87:%.*]] = load i16, ptr [[USD]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV187:%.*]] = zext i16 [[TMP87]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END190:%.*]]
+// SIMD-ONLY0:       cond.false188:
+// SIMD-ONLY0-NEXT:    [[TMP88:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV189:%.*]] = zext i16 [[TMP88]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END190]]
+// SIMD-ONLY0:       cond.end190:
+// SIMD-ONLY0-NEXT:    [[COND191:%.*]] = phi i32 [ [[CONV187]], [[COND_TRUE186]] ], [ [[CONV189]], [[COND_FALSE188]] ]
+// SIMD-ONLY0-NEXT:    [[CONV192:%.*]] = trunc i32 [[COND191]] to i16
+// SIMD-ONLY0-NEXT:    store i16 [[CONV192]], ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP89:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP89]], ptr [[USV]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP90:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP90]], ptr [[USV]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP91:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV193:%.*]] = zext i16 [[TMP91]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP92:%.*]] = load i16, ptr [[USE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV194:%.*]] = zext i16 [[TMP92]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP195:%.*]] = icmp sgt i32 [[CONV193]], [[CONV194]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP195]], label [[COND_TRUE197:%.*]], label [[COND_FALSE199:%.*]]
+// SIMD-ONLY0:       cond.true197:
+// SIMD-ONLY0-NEXT:    [[TMP93:%.*]] = load i16, ptr [[USE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV198:%.*]] = zext i16 [[TMP93]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END201:%.*]]
+// SIMD-ONLY0:       cond.false199:
+// SIMD-ONLY0-NEXT:    [[TMP94:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV200:%.*]] = zext i16 [[TMP94]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END201]]
+// SIMD-ONLY0:       cond.end201:
+// SIMD-ONLY0-NEXT:    [[COND202:%.*]] = phi i32 [ [[CONV198]], [[COND_TRUE197]] ], [ [[CONV200]], [[COND_FALSE199]] ]
+// SIMD-ONLY0-NEXT:    [[CONV203:%.*]] = trunc i32 [[COND202]] to i16
+// SIMD-ONLY0-NEXT:    store i16 [[CONV203]], ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP95:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP95]], ptr [[USV]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP96:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV204:%.*]] = zext i16 [[TMP96]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP97:%.*]] = load i16, ptr [[USE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV205:%.*]] = zext i16 [[TMP97]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP206:%.*]] = icmp slt i32 [[CONV204]], [[CONV205]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP206]], label [[COND_TRUE208:%.*]], label [[COND_FALSE210:%.*]]
+// SIMD-ONLY0:       cond.true208:
+// SIMD-ONLY0-NEXT:    [[TMP98:%.*]] = load i16, ptr [[USE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV209:%.*]] = zext i16 [[TMP98]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END212:%.*]]
+// SIMD-ONLY0:       cond.false210:
+// SIMD-ONLY0-NEXT:    [[TMP99:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV211:%.*]] = zext i16 [[TMP99]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END212]]
+// SIMD-ONLY0:       cond.end212:
+// SIMD-ONLY0-NEXT:    [[COND213:%.*]] = phi i32 [ [[CONV209]], [[COND_TRUE208]] ], [ [[CONV211]], [[COND_FALSE210]] ]
+// SIMD-ONLY0-NEXT:    [[CONV214:%.*]] = trunc i32 [[COND213]] to i16
+// SIMD-ONLY0-NEXT:    store i16 [[CONV214]], ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP100:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP100]], ptr [[USV]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP101:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV215:%.*]] = zext i16 [[TMP101]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP102:%.*]] = load i16, ptr [[USE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV216:%.*]] = zext i16 [[TMP102]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP217:%.*]] = icmp eq i32 [[CONV215]], [[CONV216]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP217]], label [[COND_TRUE219:%.*]], label [[COND_FALSE221:%.*]]
+// SIMD-ONLY0:       cond.true219:
+// SIMD-ONLY0-NEXT:    [[TMP103:%.*]] = load i16, ptr [[USD]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV220:%.*]] = zext i16 [[TMP103]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END223:%.*]]
+// SIMD-ONLY0:       cond.false221:
+// SIMD-ONLY0-NEXT:    [[TMP104:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV222:%.*]] = zext i16 [[TMP104]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END223]]
+// SIMD-ONLY0:       cond.end223:
+// SIMD-ONLY0-NEXT:    [[COND224:%.*]] = phi i32 [ [[CONV220]], [[COND_TRUE219]] ], [ [[CONV222]], [[COND_FALSE221]] ]
+// SIMD-ONLY0-NEXT:    [[CONV225:%.*]] = trunc i32 [[COND224]] to i16
+// SIMD-ONLY0-NEXT:    store i16 [[CONV225]], ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP105:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV226:%.*]] = zext i16 [[TMP105]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP106:%.*]] = load i16, ptr [[USE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV227:%.*]] = zext i16 [[TMP106]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP228:%.*]] = icmp sgt i32 [[CONV226]], [[CONV227]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP228]], label [[COND_TRUE230:%.*]], label [[COND_FALSE232:%.*]]
+// SIMD-ONLY0:       cond.true230:
+// SIMD-ONLY0-NEXT:    [[TMP107:%.*]] = load i16, ptr [[USE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV231:%.*]] = zext i16 [[TMP107]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END234:%.*]]
+// SIMD-ONLY0:       cond.false232:
+// SIMD-ONLY0-NEXT:    [[TMP108:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV233:%.*]] = zext i16 [[TMP108]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END234]]
+// SIMD-ONLY0:       cond.end234:
+// SIMD-ONLY0-NEXT:    [[COND235:%.*]] = phi i32 [ [[CONV231]], [[COND_TRUE230]] ], [ [[CONV233]], [[COND_FALSE232]] ]
+// SIMD-ONLY0-NEXT:    [[CONV236:%.*]] = trunc i32 [[COND235]] to i16
+// SIMD-ONLY0-NEXT:    store i16 [[CONV236]], ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP109:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP109]], ptr [[USV]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP110:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV237:%.*]] = zext i16 [[TMP110]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP111:%.*]] = load i16, ptr [[USE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV238:%.*]] = zext i16 [[TMP111]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP239:%.*]] = icmp slt i32 [[CONV237]], [[CONV238]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP239]], label [[COND_TRUE241:%.*]], label [[COND_FALSE243:%.*]]
+// SIMD-ONLY0:       cond.true241:
+// SIMD-ONLY0-NEXT:    [[TMP112:%.*]] = load i16, ptr [[USE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV242:%.*]] = zext i16 [[TMP112]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END245:%.*]]
+// SIMD-ONLY0:       cond.false243:
+// SIMD-ONLY0-NEXT:    [[TMP113:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV244:%.*]] = zext i16 [[TMP113]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END245]]
+// SIMD-ONLY0:       cond.end245:
+// SIMD-ONLY0-NEXT:    [[COND246:%.*]] = phi i32 [ [[CONV242]], [[COND_TRUE241]] ], [ [[CONV244]], [[COND_FALSE243]] ]
+// SIMD-ONLY0-NEXT:    [[CONV247:%.*]] = trunc i32 [[COND246]] to i16
+// SIMD-ONLY0-NEXT:    store i16 [[CONV247]], ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP114:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP114]], ptr [[USV]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP115:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV248:%.*]] = zext i16 [[TMP115]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP116:%.*]] = load i16, ptr [[USE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV249:%.*]] = zext i16 [[TMP116]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP250:%.*]] = icmp eq i32 [[CONV248]], [[CONV249]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP250]], label [[COND_TRUE252:%.*]], label [[COND_FALSE254:%.*]]
+// SIMD-ONLY0:       cond.true252:
+// SIMD-ONLY0-NEXT:    [[TMP117:%.*]] = load i16, ptr [[USD]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV253:%.*]] = zext i16 [[TMP117]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END256:%.*]]
+// SIMD-ONLY0:       cond.false254:
+// SIMD-ONLY0-NEXT:    [[TMP118:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV255:%.*]] = zext i16 [[TMP118]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END256]]
+// SIMD-ONLY0:       cond.end256:
+// SIMD-ONLY0-NEXT:    [[COND257:%.*]] = phi i32 [ [[CONV253]], [[COND_TRUE252]] ], [ [[CONV255]], [[COND_FALSE254]] ]
+// SIMD-ONLY0-NEXT:    [[CONV258:%.*]] = trunc i32 [[COND257]] to i16
+// SIMD-ONLY0-NEXT:    store i16 [[CONV258]], ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP119:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP119]], ptr [[USV]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP120:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP120]], ptr [[USV]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP121:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV259:%.*]] = zext i16 [[TMP121]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP122:%.*]] = load i16, ptr [[USE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV260:%.*]] = zext i16 [[TMP122]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP261:%.*]] = icmp sgt i32 [[CONV259]], [[CONV260]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP261]], label [[COND_TRUE263:%.*]], label [[COND_FALSE265:%.*]]
+// SIMD-ONLY0:       cond.true263:
+// SIMD-ONLY0-NEXT:    [[TMP123:%.*]] = load i16, ptr [[USE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV264:%.*]] = zext i16 [[TMP123]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END267:%.*]]
+// SIMD-ONLY0:       cond.false265:
+// SIMD-ONLY0-NEXT:    [[TMP124:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV266:%.*]] = zext i16 [[TMP124]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END267]]
+// SIMD-ONLY0:       cond.end267:
+// SIMD-ONLY0-NEXT:    [[COND268:%.*]] = phi i32 [ [[CONV264]], [[COND_TRUE263]] ], [ [[CONV266]], [[COND_FALSE265]] ]
+// SIMD-ONLY0-NEXT:    [[CONV269:%.*]] = trunc i32 [[COND268]] to i16
+// SIMD-ONLY0-NEXT:    store i16 [[CONV269]], ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP125:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP125]], ptr [[USV]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP126:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV270:%.*]] = zext i16 [[TMP126]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP127:%.*]] = load i16, ptr [[USE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV271:%.*]] = zext i16 [[TMP127]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP272:%.*]] = icmp slt i32 [[CONV270]], [[CONV271]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP272]], label [[COND_TRUE274:%.*]], label [[COND_FALSE276:%.*]]
+// SIMD-ONLY0:       cond.true274:
+// SIMD-ONLY0-NEXT:    [[TMP128:%.*]] = load i16, ptr [[USE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV275:%.*]] = zext i16 [[TMP128]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END278:%.*]]
+// SIMD-ONLY0:       cond.false276:
+// SIMD-ONLY0-NEXT:    [[TMP129:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV277:%.*]] = zext i16 [[TMP129]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END278]]
+// SIMD-ONLY0:       cond.end278:
+// SIMD-ONLY0-NEXT:    [[COND279:%.*]] = phi i32 [ [[CONV275]], [[COND_TRUE274]] ], [ [[CONV277]], [[COND_FALSE276]] ]
+// SIMD-ONLY0-NEXT:    [[CONV280:%.*]] = trunc i32 [[COND279]] to i16
+// SIMD-ONLY0-NEXT:    store i16 [[CONV280]], ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP130:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP130]], ptr [[USV]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP131:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV281:%.*]] = zext i16 [[TMP131]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP132:%.*]] = load i16, ptr [[USE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV282:%.*]] = zext i16 [[TMP132]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP283:%.*]] = icmp eq i32 [[CONV281]], [[CONV282]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP283]], label [[COND_TRUE285:%.*]], label [[COND_FALSE287:%.*]]
+// SIMD-ONLY0:       cond.true285:
+// SIMD-ONLY0-NEXT:    [[TMP133:%.*]] = load i16, ptr [[USD]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV286:%.*]] = zext i16 [[TMP133]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END289:%.*]]
+// SIMD-ONLY0:       cond.false287:
+// SIMD-ONLY0-NEXT:    [[TMP134:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV288:%.*]] = zext i16 [[TMP134]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END289]]
+// SIMD-ONLY0:       cond.end289:
+// SIMD-ONLY0-NEXT:    [[COND290:%.*]] = phi i32 [ [[CONV286]], [[COND_TRUE285]] ], [ [[CONV288]], [[COND_FALSE287]] ]
+// SIMD-ONLY0-NEXT:    [[CONV291:%.*]] = trunc i32 [[COND290]] to i16
+// SIMD-ONLY0-NEXT:    store i16 [[CONV291]], ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP135:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV292:%.*]] = zext i16 [[TMP135]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP136:%.*]] = load i16, ptr [[USE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV293:%.*]] = zext i16 [[TMP136]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP294:%.*]] = icmp sgt i32 [[CONV292]], [[CONV293]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP294]], label [[COND_TRUE296:%.*]], label [[COND_FALSE298:%.*]]
+// SIMD-ONLY0:       cond.true296:
+// SIMD-ONLY0-NEXT:    [[TMP137:%.*]] = load i16, ptr [[USE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV297:%.*]] = zext i16 [[TMP137]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END300:%.*]]
+// SIMD-ONLY0:       cond.false298:
+// SIMD-ONLY0-NEXT:    [[TMP138:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV299:%.*]] = zext i16 [[TMP138]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END300]]
+// SIMD-ONLY0:       cond.end300:
+// SIMD-ONLY0-NEXT:    [[COND301:%.*]] = phi i32 [ [[CONV297]], [[COND_TRUE296]] ], [ [[CONV299]], [[COND_FALSE298]] ]
+// SIMD-ONLY0-NEXT:    [[CONV302:%.*]] = trunc i32 [[COND301]] to i16
+// SIMD-ONLY0-NEXT:    store i16 [[CONV302]], ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP139:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP139]], ptr [[USV]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP140:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV303:%.*]] = zext i16 [[TMP140]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP141:%.*]] = load i16, ptr [[USE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV304:%.*]] = zext i16 [[TMP141]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP305:%.*]] = icmp slt i32 [[CONV303]], [[CONV304]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP305]], label [[COND_TRUE307:%.*]], label [[COND_FALSE309:%.*]]
+// SIMD-ONLY0:       cond.true307:
+// SIMD-ONLY0-NEXT:    [[TMP142:%.*]] = load i16, ptr [[USE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV308:%.*]] = zext i16 [[TMP142]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END311:%.*]]
+// SIMD-ONLY0:       cond.false309:
+// SIMD-ONLY0-NEXT:    [[TMP143:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV310:%.*]] = zext i16 [[TMP143]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END311]]
+// SIMD-ONLY0:       cond.end311:
+// SIMD-ONLY0-NEXT:    [[COND312:%.*]] = phi i32 [ [[CONV308]], [[COND_TRUE307]] ], [ [[CONV310]], [[COND_FALSE309]] ]
+// SIMD-ONLY0-NEXT:    [[CONV313:%.*]] = trunc i32 [[COND312]] to i16
+// SIMD-ONLY0-NEXT:    store i16 [[CONV313]], ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP144:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP144]], ptr [[USV]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP145:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV314:%.*]] = zext i16 [[TMP145]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP146:%.*]] = load i16, ptr [[USE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV315:%.*]] = zext i16 [[TMP146]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP316:%.*]] = icmp eq i32 [[CONV314]], [[CONV315]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP316]], label [[COND_TRUE318:%.*]], label [[COND_FALSE320:%.*]]
+// SIMD-ONLY0:       cond.true318:
+// SIMD-ONLY0-NEXT:    [[TMP147:%.*]] = load i16, ptr [[USD]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV319:%.*]] = zext i16 [[TMP147]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END322:%.*]]
+// SIMD-ONLY0:       cond.false320:
+// SIMD-ONLY0-NEXT:    [[TMP148:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV321:%.*]] = zext i16 [[TMP148]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END322]]
+// SIMD-ONLY0:       cond.end322:
+// SIMD-ONLY0-NEXT:    [[COND323:%.*]] = phi i32 [ [[CONV319]], [[COND_TRUE318]] ], [ [[CONV321]], [[COND_FALSE320]] ]
+// SIMD-ONLY0-NEXT:    [[CONV324:%.*]] = trunc i32 [[COND323]] to i16
+// SIMD-ONLY0-NEXT:    store i16 [[CONV324]], ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP149:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP149]], ptr [[USV]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP150:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP150]], ptr [[USV]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP151:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV325:%.*]] = zext i16 [[TMP151]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP152:%.*]] = load i16, ptr [[USE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV326:%.*]] = zext i16 [[TMP152]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP327:%.*]] = icmp sgt i32 [[CONV325]], [[CONV326]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP327]], label [[COND_TRUE329:%.*]], label [[COND_FALSE331:%.*]]
+// SIMD-ONLY0:       cond.true329:
+// SIMD-ONLY0-NEXT:    [[TMP153:%.*]] = load i16, ptr [[USE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV330:%.*]] = zext i16 [[TMP153]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END333:%.*]]
+// SIMD-ONLY0:       cond.false331:
+// SIMD-ONLY0-NEXT:    [[TMP154:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV332:%.*]] = zext i16 [[TMP154]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END333]]
+// SIMD-ONLY0:       cond.end333:
+// SIMD-ONLY0-NEXT:    [[COND334:%.*]] = phi i32 [ [[CONV330]], [[COND_TRUE329]] ], [ [[CONV332]], [[COND_FALSE331]] ]
+// SIMD-ONLY0-NEXT:    [[CONV335:%.*]] = trunc i32 [[COND334]] to i16
+// SIMD-ONLY0-NEXT:    store i16 [[CONV335]], ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP155:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP155]], ptr [[USV]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP156:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV336:%.*]] = zext i16 [[TMP156]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP157:%.*]] = load i16, ptr [[USE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV337:%.*]] = zext i16 [[TMP157]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP338:%.*]] = icmp slt i32 [[CONV336]], [[CONV337]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP338]], label [[COND_TRUE340:%.*]], label [[COND_FALSE342:%.*]]
+// SIMD-ONLY0:       cond.true340:
+// SIMD-ONLY0-NEXT:    [[TMP158:%.*]] = load i16, ptr [[USE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV341:%.*]] = zext i16 [[TMP158]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END344:%.*]]
+// SIMD-ONLY0:       cond.false342:
+// SIMD-ONLY0-NEXT:    [[TMP159:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV343:%.*]] = zext i16 [[TMP159]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END344]]
+// SIMD-ONLY0:       cond.end344:
+// SIMD-ONLY0-NEXT:    [[COND345:%.*]] = phi i32 [ [[CONV341]], [[COND_TRUE340]] ], [ [[CONV343]], [[COND_FALSE342]] ]
+// SIMD-ONLY0-NEXT:    [[CONV346:%.*]] = trunc i32 [[COND345]] to i16
+// SIMD-ONLY0-NEXT:    store i16 [[CONV346]], ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP160:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP160]], ptr [[USV]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP161:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV347:%.*]] = zext i16 [[TMP161]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP162:%.*]] = load i16, ptr [[USE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV348:%.*]] = zext i16 [[TMP162]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP349:%.*]] = icmp eq i32 [[CONV347]], [[CONV348]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP349]], label [[COND_TRUE351:%.*]], label [[COND_FALSE353:%.*]]
+// SIMD-ONLY0:       cond.true351:
+// SIMD-ONLY0-NEXT:    [[TMP163:%.*]] = load i16, ptr [[USD]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV352:%.*]] = zext i16 [[TMP163]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END355:%.*]]
+// SIMD-ONLY0:       cond.false353:
+// SIMD-ONLY0-NEXT:    [[TMP164:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV354:%.*]] = zext i16 [[TMP164]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END355]]
+// SIMD-ONLY0:       cond.end355:
+// SIMD-ONLY0-NEXT:    [[COND356:%.*]] = phi i32 [ [[CONV352]], [[COND_TRUE351]] ], [ [[CONV354]], [[COND_FALSE353]] ]
+// SIMD-ONLY0-NEXT:    [[CONV357:%.*]] = trunc i32 [[COND356]] to i16
+// SIMD-ONLY0-NEXT:    store i16 [[CONV357]], ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP165:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV358:%.*]] = zext i16 [[TMP165]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP166:%.*]] = load i16, ptr [[USE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV359:%.*]] = zext i16 [[TMP166]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP360:%.*]] = icmp sgt i32 [[CONV358]], [[CONV359]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP360]], label [[COND_TRUE362:%.*]], label [[COND_FALSE364:%.*]]
+// SIMD-ONLY0:       cond.true362:
+// SIMD-ONLY0-NEXT:    [[TMP167:%.*]] = load i16, ptr [[USE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV363:%.*]] = zext i16 [[TMP167]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END366:%.*]]
+// SIMD-ONLY0:       cond.false364:
+// SIMD-ONLY0-NEXT:    [[TMP168:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV365:%.*]] = zext i16 [[TMP168]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END366]]
+// SIMD-ONLY0:       cond.end366:
+// SIMD-ONLY0-NEXT:    [[COND367:%.*]] = phi i32 [ [[CONV363]], [[COND_TRUE362]] ], [ [[CONV365]], [[COND_FALSE364]] ]
+// SIMD-ONLY0-NEXT:    [[CONV368:%.*]] = trunc i32 [[COND367]] to i16
+// SIMD-ONLY0-NEXT:    store i16 [[CONV368]], ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP169:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP169]], ptr [[USV]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP170:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV369:%.*]] = zext i16 [[TMP170]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP171:%.*]] = load i16, ptr [[USE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV370:%.*]] = zext i16 [[TMP171]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP371:%.*]] = icmp slt i32 [[CONV369]], [[CONV370]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP371]], label [[COND_TRUE373:%.*]], label [[COND_FALSE375:%.*]]
+// SIMD-ONLY0:       cond.true373:
+// SIMD-ONLY0-NEXT:    [[TMP172:%.*]] = load i16, ptr [[USE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV374:%.*]] = zext i16 [[TMP172]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END377:%.*]]
+// SIMD-ONLY0:       cond.false375:
+// SIMD-ONLY0-NEXT:    [[TMP173:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV376:%.*]] = zext i16 [[TMP173]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END377]]
+// SIMD-ONLY0:       cond.end377:
+// SIMD-ONLY0-NEXT:    [[COND378:%.*]] = phi i32 [ [[CONV374]], [[COND_TRUE373]] ], [ [[CONV376]], [[COND_FALSE375]] ]
+// SIMD-ONLY0-NEXT:    [[CONV379:%.*]] = trunc i32 [[COND378]] to i16
+// SIMD-ONLY0-NEXT:    store i16 [[CONV379]], ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP174:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP174]], ptr [[USV]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP175:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV380:%.*]] = zext i16 [[TMP175]] to i32
+// SIMD-ONLY0-NEXT:    [[TMP176:%.*]] = load i16, ptr [[USE]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV381:%.*]] = zext i16 [[TMP176]] to i32
+// SIMD-ONLY0-NEXT:    [[CMP382:%.*]] = icmp eq i32 [[CONV380]], [[CONV381]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP382]], label [[COND_TRUE384:%.*]], label [[COND_FALSE386:%.*]]
+// SIMD-ONLY0:       cond.true384:
+// SIMD-ONLY0-NEXT:    [[TMP177:%.*]] = load i16, ptr [[USD]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV385:%.*]] = zext i16 [[TMP177]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END388:%.*]]
+// SIMD-ONLY0:       cond.false386:
+// SIMD-ONLY0-NEXT:    [[TMP178:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    [[CONV387:%.*]] = zext i16 [[TMP178]] to i32
+// SIMD-ONLY0-NEXT:    br label [[COND_END388]]
+// SIMD-ONLY0:       cond.end388:
+// SIMD-ONLY0-NEXT:    [[COND389:%.*]] = phi i32 [ [[CONV385]], [[COND_TRUE384]] ], [ [[CONV387]], [[COND_FALSE386]] ]
+// SIMD-ONLY0-NEXT:    [[CONV390:%.*]] = trunc i32 [[COND389]] to i16
+// SIMD-ONLY0-NEXT:    store i16 [[CONV390]], ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP179:%.*]] = load i16, ptr [[USX]], align 2
+// SIMD-ONLY0-NEXT:    store i16 [[TMP179]], ptr [[USV]], align 2
+// SIMD-ONLY0-NEXT:    [[TMP180:%.*]] = load i16, ptr [[USV]], align 2
+// SIMD-ONLY0-NEXT:    ret i16 [[TMP180]]
+//
+//
+// SIMD-ONLY0-LABEL: @ixevd(
+// SIMD-ONLY0-NEXT:  entry:
+// SIMD-ONLY0-NEXT:    [[IX:%.*]] = alloca i32, align 4
+// SIMD-ONLY0-NEXT:    [[IV:%.*]] = alloca i32, align 4
+// SIMD-ONLY0-NEXT:    [[IE:%.*]] = alloca i32, align 4
+// SIMD-ONLY0-NEXT:    [[ID:%.*]] = alloca i32, align 4
+// SIMD-ONLY0-NEXT:    [[TMP0:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP0]], ptr [[IV]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP1:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP2:%.*]] = load i32, ptr [[IE]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP:%.*]] = icmp sgt i32 [[TMP1]], [[TMP2]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
+// SIMD-ONLY0:       cond.true:
+// SIMD-ONLY0-NEXT:    [[TMP3:%.*]] = load i32, ptr [[IE]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END:%.*]]
+// SIMD-ONLY0:       cond.false:
+// SIMD-ONLY0-NEXT:    [[TMP4:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END]]
+// SIMD-ONLY0:       cond.end:
+// SIMD-ONLY0-NEXT:    [[COND:%.*]] = phi i32 [ [[TMP3]], [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ]
+// SIMD-ONLY0-NEXT:    store i32 [[COND]], ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP5:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP5]], ptr [[IV]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP6:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP7:%.*]] = load i32, ptr [[IE]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP1:%.*]] = icmp slt i32 [[TMP6]], [[TMP7]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP1]], label [[COND_TRUE2:%.*]], label [[COND_FALSE3:%.*]]
+// SIMD-ONLY0:       cond.true2:
+// SIMD-ONLY0-NEXT:    [[TMP8:%.*]] = load i32, ptr [[IE]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END4:%.*]]
+// SIMD-ONLY0:       cond.false3:
+// SIMD-ONLY0-NEXT:    [[TMP9:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END4]]
+// SIMD-ONLY0:       cond.end4:
+// SIMD-ONLY0-NEXT:    [[COND5:%.*]] = phi i32 [ [[TMP8]], [[COND_TRUE2]] ], [ [[TMP9]], [[COND_FALSE3]] ]
+// SIMD-ONLY0-NEXT:    store i32 [[COND5]], ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP10:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP10]], ptr [[IV]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP11:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP12:%.*]] = load i32, ptr [[IE]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP6:%.*]] = icmp eq i32 [[TMP11]], [[TMP12]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP6]], label [[COND_TRUE7:%.*]], label [[COND_FALSE8:%.*]]
+// SIMD-ONLY0:       cond.true7:
+// SIMD-ONLY0-NEXT:    [[TMP13:%.*]] = load i32, ptr [[ID]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END9:%.*]]
+// SIMD-ONLY0:       cond.false8:
+// SIMD-ONLY0-NEXT:    [[TMP14:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END9]]
+// SIMD-ONLY0:       cond.end9:
+// SIMD-ONLY0-NEXT:    [[COND10:%.*]] = phi i32 [ [[TMP13]], [[COND_TRUE7]] ], [ [[TMP14]], [[COND_FALSE8]] ]
+// SIMD-ONLY0-NEXT:    store i32 [[COND10]], ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP15:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP16:%.*]] = load i32, ptr [[IE]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP11:%.*]] = icmp sgt i32 [[TMP15]], [[TMP16]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP11]], label [[COND_TRUE12:%.*]], label [[COND_FALSE13:%.*]]
+// SIMD-ONLY0:       cond.true12:
+// SIMD-ONLY0-NEXT:    [[TMP17:%.*]] = load i32, ptr [[IE]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END14:%.*]]
+// SIMD-ONLY0:       cond.false13:
+// SIMD-ONLY0-NEXT:    [[TMP18:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END14]]
+// SIMD-ONLY0:       cond.end14:
+// SIMD-ONLY0-NEXT:    [[COND15:%.*]] = phi i32 [ [[TMP17]], [[COND_TRUE12]] ], [ [[TMP18]], [[COND_FALSE13]] ]
+// SIMD-ONLY0-NEXT:    store i32 [[COND15]], ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP19:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP19]], ptr [[IV]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP20:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP21:%.*]] = load i32, ptr [[IE]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP16:%.*]] = icmp slt i32 [[TMP20]], [[TMP21]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP16]], label [[COND_TRUE17:%.*]], label [[COND_FALSE18:%.*]]
+// SIMD-ONLY0:       cond.true17:
+// SIMD-ONLY0-NEXT:    [[TMP22:%.*]] = load i32, ptr [[IE]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END19:%.*]]
+// SIMD-ONLY0:       cond.false18:
+// SIMD-ONLY0-NEXT:    [[TMP23:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END19]]
+// SIMD-ONLY0:       cond.end19:
+// SIMD-ONLY0-NEXT:    [[COND20:%.*]] = phi i32 [ [[TMP22]], [[COND_TRUE17]] ], [ [[TMP23]], [[COND_FALSE18]] ]
+// SIMD-ONLY0-NEXT:    store i32 [[COND20]], ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP24:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP24]], ptr [[IV]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP25:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP26:%.*]] = load i32, ptr [[IE]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP21:%.*]] = icmp eq i32 [[TMP25]], [[TMP26]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP21]], label [[COND_TRUE22:%.*]], label [[COND_FALSE23:%.*]]
+// SIMD-ONLY0:       cond.true22:
+// SIMD-ONLY0-NEXT:    [[TMP27:%.*]] = load i32, ptr [[ID]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END24:%.*]]
+// SIMD-ONLY0:       cond.false23:
+// SIMD-ONLY0-NEXT:    [[TMP28:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END24]]
+// SIMD-ONLY0:       cond.end24:
+// SIMD-ONLY0-NEXT:    [[COND25:%.*]] = phi i32 [ [[TMP27]], [[COND_TRUE22]] ], [ [[TMP28]], [[COND_FALSE23]] ]
+// SIMD-ONLY0-NEXT:    store i32 [[COND25]], ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP29:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP29]], ptr [[IV]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP30:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP30]], ptr [[IV]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP31:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP32:%.*]] = load i32, ptr [[IE]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP26:%.*]] = icmp sgt i32 [[TMP31]], [[TMP32]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP26]], label [[COND_TRUE27:%.*]], label [[COND_FALSE28:%.*]]
+// SIMD-ONLY0:       cond.true27:
+// SIMD-ONLY0-NEXT:    [[TMP33:%.*]] = load i32, ptr [[IE]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END29:%.*]]
+// SIMD-ONLY0:       cond.false28:
+// SIMD-ONLY0-NEXT:    [[TMP34:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END29]]
+// SIMD-ONLY0:       cond.end29:
+// SIMD-ONLY0-NEXT:    [[COND30:%.*]] = phi i32 [ [[TMP33]], [[COND_TRUE27]] ], [ [[TMP34]], [[COND_FALSE28]] ]
+// SIMD-ONLY0-NEXT:    store i32 [[COND30]], ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP35:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP35]], ptr [[IV]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP36:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP37:%.*]] = load i32, ptr [[IE]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP31:%.*]] = icmp slt i32 [[TMP36]], [[TMP37]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP31]], label [[COND_TRUE32:%.*]], label [[COND_FALSE33:%.*]]
+// SIMD-ONLY0:       cond.true32:
+// SIMD-ONLY0-NEXT:    [[TMP38:%.*]] = load i32, ptr [[IE]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END34:%.*]]
+// SIMD-ONLY0:       cond.false33:
+// SIMD-ONLY0-NEXT:    [[TMP39:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END34]]
+// SIMD-ONLY0:       cond.end34:
+// SIMD-ONLY0-NEXT:    [[COND35:%.*]] = phi i32 [ [[TMP38]], [[COND_TRUE32]] ], [ [[TMP39]], [[COND_FALSE33]] ]
+// SIMD-ONLY0-NEXT:    store i32 [[COND35]], ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP40:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP40]], ptr [[IV]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP41:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP42:%.*]] = load i32, ptr [[IE]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP36:%.*]] = icmp eq i32 [[TMP41]], [[TMP42]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP36]], label [[COND_TRUE37:%.*]], label [[COND_FALSE38:%.*]]
+// SIMD-ONLY0:       cond.true37:
+// SIMD-ONLY0-NEXT:    [[TMP43:%.*]] = load i32, ptr [[ID]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END39:%.*]]
+// SIMD-ONLY0:       cond.false38:
+// SIMD-ONLY0-NEXT:    [[TMP44:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END39]]
+// SIMD-ONLY0:       cond.end39:
+// SIMD-ONLY0-NEXT:    [[COND40:%.*]] = phi i32 [ [[TMP43]], [[COND_TRUE37]] ], [ [[TMP44]], [[COND_FALSE38]] ]
+// SIMD-ONLY0-NEXT:    store i32 [[COND40]], ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP45:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP46:%.*]] = load i32, ptr [[IE]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP41:%.*]] = icmp sgt i32 [[TMP45]], [[TMP46]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP41]], label [[COND_TRUE42:%.*]], label [[COND_FALSE43:%.*]]
+// SIMD-ONLY0:       cond.true42:
+// SIMD-ONLY0-NEXT:    [[TMP47:%.*]] = load i32, ptr [[IE]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END44:%.*]]
+// SIMD-ONLY0:       cond.false43:
+// SIMD-ONLY0-NEXT:    [[TMP48:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END44]]
+// SIMD-ONLY0:       cond.end44:
+// SIMD-ONLY0-NEXT:    [[COND45:%.*]] = phi i32 [ [[TMP47]], [[COND_TRUE42]] ], [ [[TMP48]], [[COND_FALSE43]] ]
+// SIMD-ONLY0-NEXT:    store i32 [[COND45]], ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP49:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP49]], ptr [[IV]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP50:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP51:%.*]] = load i32, ptr [[IE]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP46:%.*]] = icmp slt i32 [[TMP50]], [[TMP51]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP46]], label [[COND_TRUE47:%.*]], label [[COND_FALSE48:%.*]]
+// SIMD-ONLY0:       cond.true47:
+// SIMD-ONLY0-NEXT:    [[TMP52:%.*]] = load i32, ptr [[IE]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END49:%.*]]
+// SIMD-ONLY0:       cond.false48:
+// SIMD-ONLY0-NEXT:    [[TMP53:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END49]]
+// SIMD-ONLY0:       cond.end49:
+// SIMD-ONLY0-NEXT:    [[COND50:%.*]] = phi i32 [ [[TMP52]], [[COND_TRUE47]] ], [ [[TMP53]], [[COND_FALSE48]] ]
+// SIMD-ONLY0-NEXT:    store i32 [[COND50]], ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP54:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP54]], ptr [[IV]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP55:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP56:%.*]] = load i32, ptr [[IE]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP51:%.*]] = icmp eq i32 [[TMP55]], [[TMP56]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP51]], label [[COND_TRUE52:%.*]], label [[COND_FALSE53:%.*]]
+// SIMD-ONLY0:       cond.true52:
+// SIMD-ONLY0-NEXT:    [[TMP57:%.*]] = load i32, ptr [[ID]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END54:%.*]]
+// SIMD-ONLY0:       cond.false53:
+// SIMD-ONLY0-NEXT:    [[TMP58:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END54]]
+// SIMD-ONLY0:       cond.end54:
+// SIMD-ONLY0-NEXT:    [[COND55:%.*]] = phi i32 [ [[TMP57]], [[COND_TRUE52]] ], [ [[TMP58]], [[COND_FALSE53]] ]
+// SIMD-ONLY0-NEXT:    store i32 [[COND55]], ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP59:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP59]], ptr [[IV]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP60:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP60]], ptr [[IV]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP61:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP62:%.*]] = load i32, ptr [[IE]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP56:%.*]] = icmp sgt i32 [[TMP61]], [[TMP62]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP56]], label [[COND_TRUE57:%.*]], label [[COND_FALSE58:%.*]]
+// SIMD-ONLY0:       cond.true57:
+// SIMD-ONLY0-NEXT:    [[TMP63:%.*]] = load i32, ptr [[IE]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END59:%.*]]
+// SIMD-ONLY0:       cond.false58:
+// SIMD-ONLY0-NEXT:    [[TMP64:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END59]]
+// SIMD-ONLY0:       cond.end59:
+// SIMD-ONLY0-NEXT:    [[COND60:%.*]] = phi i32 [ [[TMP63]], [[COND_TRUE57]] ], [ [[TMP64]], [[COND_FALSE58]] ]
+// SIMD-ONLY0-NEXT:    store i32 [[COND60]], ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP65:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP65]], ptr [[IV]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP66:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP67:%.*]] = load i32, ptr [[IE]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP61:%.*]] = icmp slt i32 [[TMP66]], [[TMP67]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP61]], label [[COND_TRUE62:%.*]], label [[COND_FALSE63:%.*]]
+// SIMD-ONLY0:       cond.true62:
+// SIMD-ONLY0-NEXT:    [[TMP68:%.*]] = load i32, ptr [[IE]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END64:%.*]]
+// SIMD-ONLY0:       cond.false63:
+// SIMD-ONLY0-NEXT:    [[TMP69:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END64]]
+// SIMD-ONLY0:       cond.end64:
+// SIMD-ONLY0-NEXT:    [[COND65:%.*]] = phi i32 [ [[TMP68]], [[COND_TRUE62]] ], [ [[TMP69]], [[COND_FALSE63]] ]
+// SIMD-ONLY0-NEXT:    store i32 [[COND65]], ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP70:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP70]], ptr [[IV]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP71:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP72:%.*]] = load i32, ptr [[IE]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP66:%.*]] = icmp eq i32 [[TMP71]], [[TMP72]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP66]], label [[COND_TRUE67:%.*]], label [[COND_FALSE68:%.*]]
+// SIMD-ONLY0:       cond.true67:
+// SIMD-ONLY0-NEXT:    [[TMP73:%.*]] = load i32, ptr [[ID]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END69:%.*]]
+// SIMD-ONLY0:       cond.false68:
+// SIMD-ONLY0-NEXT:    [[TMP74:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END69]]
+// SIMD-ONLY0:       cond.end69:
+// SIMD-ONLY0-NEXT:    [[COND70:%.*]] = phi i32 [ [[TMP73]], [[COND_TRUE67]] ], [ [[TMP74]], [[COND_FALSE68]] ]
+// SIMD-ONLY0-NEXT:    store i32 [[COND70]], ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP75:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP76:%.*]] = load i32, ptr [[IE]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP71:%.*]] = icmp sgt i32 [[TMP75]], [[TMP76]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP71]], label [[COND_TRUE72:%.*]], label [[COND_FALSE73:%.*]]
+// SIMD-ONLY0:       cond.true72:
+// SIMD-ONLY0-NEXT:    [[TMP77:%.*]] = load i32, ptr [[IE]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END74:%.*]]
+// SIMD-ONLY0:       cond.false73:
+// SIMD-ONLY0-NEXT:    [[TMP78:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END74]]
+// SIMD-ONLY0:       cond.end74:
+// SIMD-ONLY0-NEXT:    [[COND75:%.*]] = phi i32 [ [[TMP77]], [[COND_TRUE72]] ], [ [[TMP78]], [[COND_FALSE73]] ]
+// SIMD-ONLY0-NEXT:    store i32 [[COND75]], ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP79:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP79]], ptr [[IV]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP80:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP81:%.*]] = load i32, ptr [[IE]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP76:%.*]] = icmp slt i32 [[TMP80]], [[TMP81]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP76]], label [[COND_TRUE77:%.*]], label [[COND_FALSE78:%.*]]
+// SIMD-ONLY0:       cond.true77:
+// SIMD-ONLY0-NEXT:    [[TMP82:%.*]] = load i32, ptr [[IE]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END79:%.*]]
+// SIMD-ONLY0:       cond.false78:
+// SIMD-ONLY0-NEXT:    [[TMP83:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END79]]
+// SIMD-ONLY0:       cond.end79:
+// SIMD-ONLY0-NEXT:    [[COND80:%.*]] = phi i32 [ [[TMP82]], [[COND_TRUE77]] ], [ [[TMP83]], [[COND_FALSE78]] ]
+// SIMD-ONLY0-NEXT:    store i32 [[COND80]], ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP84:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP84]], ptr [[IV]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP85:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP86:%.*]] = load i32, ptr [[IE]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP81:%.*]] = icmp eq i32 [[TMP85]], [[TMP86]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP81]], label [[COND_TRUE82:%.*]], label [[COND_FALSE83:%.*]]
+// SIMD-ONLY0:       cond.true82:
+// SIMD-ONLY0-NEXT:    [[TMP87:%.*]] = load i32, ptr [[ID]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END84:%.*]]
+// SIMD-ONLY0:       cond.false83:
+// SIMD-ONLY0-NEXT:    [[TMP88:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END84]]
+// SIMD-ONLY0:       cond.end84:
+// SIMD-ONLY0-NEXT:    [[COND85:%.*]] = phi i32 [ [[TMP87]], [[COND_TRUE82]] ], [ [[TMP88]], [[COND_FALSE83]] ]
+// SIMD-ONLY0-NEXT:    store i32 [[COND85]], ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP89:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP89]], ptr [[IV]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP90:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP90]], ptr [[IV]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP91:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP92:%.*]] = load i32, ptr [[IE]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP86:%.*]] = icmp sgt i32 [[TMP91]], [[TMP92]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP86]], label [[COND_TRUE87:%.*]], label [[COND_FALSE88:%.*]]
+// SIMD-ONLY0:       cond.true87:
+// SIMD-ONLY0-NEXT:    [[TMP93:%.*]] = load i32, ptr [[IE]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END89:%.*]]
+// SIMD-ONLY0:       cond.false88:
+// SIMD-ONLY0-NEXT:    [[TMP94:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END89]]
+// SIMD-ONLY0:       cond.end89:
+// SIMD-ONLY0-NEXT:    [[COND90:%.*]] = phi i32 [ [[TMP93]], [[COND_TRUE87]] ], [ [[TMP94]], [[COND_FALSE88]] ]
+// SIMD-ONLY0-NEXT:    store i32 [[COND90]], ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP95:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP95]], ptr [[IV]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP96:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP97:%.*]] = load i32, ptr [[IE]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP91:%.*]] = icmp slt i32 [[TMP96]], [[TMP97]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP91]], label [[COND_TRUE92:%.*]], label [[COND_FALSE93:%.*]]
+// SIMD-ONLY0:       cond.true92:
+// SIMD-ONLY0-NEXT:    [[TMP98:%.*]] = load i32, ptr [[IE]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END94:%.*]]
+// SIMD-ONLY0:       cond.false93:
+// SIMD-ONLY0-NEXT:    [[TMP99:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END94]]
+// SIMD-ONLY0:       cond.end94:
+// SIMD-ONLY0-NEXT:    [[COND95:%.*]] = phi i32 [ [[TMP98]], [[COND_TRUE92]] ], [ [[TMP99]], [[COND_FALSE93]] ]
+// SIMD-ONLY0-NEXT:    store i32 [[COND95]], ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP100:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP100]], ptr [[IV]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP101:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP102:%.*]] = load i32, ptr [[IE]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP96:%.*]] = icmp eq i32 [[TMP101]], [[TMP102]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP96]], label [[COND_TRUE97:%.*]], label [[COND_FALSE98:%.*]]
+// SIMD-ONLY0:       cond.true97:
+// SIMD-ONLY0-NEXT:    [[TMP103:%.*]] = load i32, ptr [[ID]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END99:%.*]]
+// SIMD-ONLY0:       cond.false98:
+// SIMD-ONLY0-NEXT:    [[TMP104:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END99]]
+// SIMD-ONLY0:       cond.end99:
+// SIMD-ONLY0-NEXT:    [[COND100:%.*]] = phi i32 [ [[TMP103]], [[COND_TRUE97]] ], [ [[TMP104]], [[COND_FALSE98]] ]
+// SIMD-ONLY0-NEXT:    store i32 [[COND100]], ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP105:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP106:%.*]] = load i32, ptr [[IE]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP101:%.*]] = icmp sgt i32 [[TMP105]], [[TMP106]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP101]], label [[COND_TRUE102:%.*]], label [[COND_FALSE103:%.*]]
+// SIMD-ONLY0:       cond.true102:
+// SIMD-ONLY0-NEXT:    [[TMP107:%.*]] = load i32, ptr [[IE]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END104:%.*]]
+// SIMD-ONLY0:       cond.false103:
+// SIMD-ONLY0-NEXT:    [[TMP108:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END104]]
+// SIMD-ONLY0:       cond.end104:
+// SIMD-ONLY0-NEXT:    [[COND105:%.*]] = phi i32 [ [[TMP107]], [[COND_TRUE102]] ], [ [[TMP108]], [[COND_FALSE103]] ]
+// SIMD-ONLY0-NEXT:    store i32 [[COND105]], ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP109:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP109]], ptr [[IV]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP110:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP111:%.*]] = load i32, ptr [[IE]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP106:%.*]] = icmp slt i32 [[TMP110]], [[TMP111]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP106]], label [[COND_TRUE107:%.*]], label [[COND_FALSE108:%.*]]
+// SIMD-ONLY0:       cond.true107:
+// SIMD-ONLY0-NEXT:    [[TMP112:%.*]] = load i32, ptr [[IE]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END109:%.*]]
+// SIMD-ONLY0:       cond.false108:
+// SIMD-ONLY0-NEXT:    [[TMP113:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END109]]
+// SIMD-ONLY0:       cond.end109:
+// SIMD-ONLY0-NEXT:    [[COND110:%.*]] = phi i32 [ [[TMP112]], [[COND_TRUE107]] ], [ [[TMP113]], [[COND_FALSE108]] ]
+// SIMD-ONLY0-NEXT:    store i32 [[COND110]], ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP114:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP114]], ptr [[IV]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP115:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP116:%.*]] = load i32, ptr [[IE]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP111:%.*]] = icmp eq i32 [[TMP115]], [[TMP116]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP111]], label [[COND_TRUE112:%.*]], label [[COND_FALSE113:%.*]]
+// SIMD-ONLY0:       cond.true112:
+// SIMD-ONLY0-NEXT:    [[TMP117:%.*]] = load i32, ptr [[ID]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END114:%.*]]
+// SIMD-ONLY0:       cond.false113:
+// SIMD-ONLY0-NEXT:    [[TMP118:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END114]]
+// SIMD-ONLY0:       cond.end114:
+// SIMD-ONLY0-NEXT:    [[COND115:%.*]] = phi i32 [ [[TMP117]], [[COND_TRUE112]] ], [ [[TMP118]], [[COND_FALSE113]] ]
+// SIMD-ONLY0-NEXT:    store i32 [[COND115]], ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP119:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP119]], ptr [[IV]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP120:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP120]], ptr [[IV]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP121:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP122:%.*]] = load i32, ptr [[IE]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP116:%.*]] = icmp sgt i32 [[TMP121]], [[TMP122]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP116]], label [[COND_TRUE117:%.*]], label [[COND_FALSE118:%.*]]
+// SIMD-ONLY0:       cond.true117:
+// SIMD-ONLY0-NEXT:    [[TMP123:%.*]] = load i32, ptr [[IE]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END119:%.*]]
+// SIMD-ONLY0:       cond.false118:
+// SIMD-ONLY0-NEXT:    [[TMP124:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END119]]
+// SIMD-ONLY0:       cond.end119:
+// SIMD-ONLY0-NEXT:    [[COND120:%.*]] = phi i32 [ [[TMP123]], [[COND_TRUE117]] ], [ [[TMP124]], [[COND_FALSE118]] ]
+// SIMD-ONLY0-NEXT:    store i32 [[COND120]], ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP125:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP125]], ptr [[IV]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP126:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP127:%.*]] = load i32, ptr [[IE]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP121:%.*]] = icmp slt i32 [[TMP126]], [[TMP127]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP121]], label [[COND_TRUE122:%.*]], label [[COND_FALSE123:%.*]]
+// SIMD-ONLY0:       cond.true122:
+// SIMD-ONLY0-NEXT:    [[TMP128:%.*]] = load i32, ptr [[IE]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END124:%.*]]
+// SIMD-ONLY0:       cond.false123:
+// SIMD-ONLY0-NEXT:    [[TMP129:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END124]]
+// SIMD-ONLY0:       cond.end124:
+// SIMD-ONLY0-NEXT:    [[COND125:%.*]] = phi i32 [ [[TMP128]], [[COND_TRUE122]] ], [ [[TMP129]], [[COND_FALSE123]] ]
+// SIMD-ONLY0-NEXT:    store i32 [[COND125]], ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP130:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP130]], ptr [[IV]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP131:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP132:%.*]] = load i32, ptr [[IE]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP126:%.*]] = icmp eq i32 [[TMP131]], [[TMP132]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP126]], label [[COND_TRUE127:%.*]], label [[COND_FALSE128:%.*]]
+// SIMD-ONLY0:       cond.true127:
+// SIMD-ONLY0-NEXT:    [[TMP133:%.*]] = load i32, ptr [[ID]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END129:%.*]]
+// SIMD-ONLY0:       cond.false128:
+// SIMD-ONLY0-NEXT:    [[TMP134:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END129]]
+// SIMD-ONLY0:       cond.end129:
+// SIMD-ONLY0-NEXT:    [[COND130:%.*]] = phi i32 [ [[TMP133]], [[COND_TRUE127]] ], [ [[TMP134]], [[COND_FALSE128]] ]
+// SIMD-ONLY0-NEXT:    store i32 [[COND130]], ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP135:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP136:%.*]] = load i32, ptr [[IE]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP131:%.*]] = icmp sgt i32 [[TMP135]], [[TMP136]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP131]], label [[COND_TRUE132:%.*]], label [[COND_FALSE133:%.*]]
+// SIMD-ONLY0:       cond.true132:
+// SIMD-ONLY0-NEXT:    [[TMP137:%.*]] = load i32, ptr [[IE]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END134:%.*]]
+// SIMD-ONLY0:       cond.false133:
+// SIMD-ONLY0-NEXT:    [[TMP138:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END134]]
+// SIMD-ONLY0:       cond.end134:
+// SIMD-ONLY0-NEXT:    [[COND135:%.*]] = phi i32 [ [[TMP137]], [[COND_TRUE132]] ], [ [[TMP138]], [[COND_FALSE133]] ]
+// SIMD-ONLY0-NEXT:    store i32 [[COND135]], ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP139:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP139]], ptr [[IV]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP140:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP141:%.*]] = load i32, ptr [[IE]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP136:%.*]] = icmp slt i32 [[TMP140]], [[TMP141]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP136]], label [[COND_TRUE137:%.*]], label [[COND_FALSE138:%.*]]
+// SIMD-ONLY0:       cond.true137:
+// SIMD-ONLY0-NEXT:    [[TMP142:%.*]] = load i32, ptr [[IE]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END139:%.*]]
+// SIMD-ONLY0:       cond.false138:
+// SIMD-ONLY0-NEXT:    [[TMP143:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END139]]
+// SIMD-ONLY0:       cond.end139:
+// SIMD-ONLY0-NEXT:    [[COND140:%.*]] = phi i32 [ [[TMP142]], [[COND_TRUE137]] ], [ [[TMP143]], [[COND_FALSE138]] ]
+// SIMD-ONLY0-NEXT:    store i32 [[COND140]], ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP144:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP144]], ptr [[IV]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP145:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP146:%.*]] = load i32, ptr [[IE]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP141:%.*]] = icmp eq i32 [[TMP145]], [[TMP146]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP141]], label [[COND_TRUE142:%.*]], label [[COND_FALSE143:%.*]]
+// SIMD-ONLY0:       cond.true142:
+// SIMD-ONLY0-NEXT:    [[TMP147:%.*]] = load i32, ptr [[ID]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END144:%.*]]
+// SIMD-ONLY0:       cond.false143:
+// SIMD-ONLY0-NEXT:    [[TMP148:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END144]]
+// SIMD-ONLY0:       cond.end144:
+// SIMD-ONLY0-NEXT:    [[COND145:%.*]] = phi i32 [ [[TMP147]], [[COND_TRUE142]] ], [ [[TMP148]], [[COND_FALSE143]] ]
+// SIMD-ONLY0-NEXT:    store i32 [[COND145]], ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP149:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP149]], ptr [[IV]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP150:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP150]], ptr [[IV]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP151:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP152:%.*]] = load i32, ptr [[IE]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP146:%.*]] = icmp sgt i32 [[TMP151]], [[TMP152]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP146]], label [[COND_TRUE147:%.*]], label [[COND_FALSE148:%.*]]
+// SIMD-ONLY0:       cond.true147:
+// SIMD-ONLY0-NEXT:    [[TMP153:%.*]] = load i32, ptr [[IE]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END149:%.*]]
+// SIMD-ONLY0:       cond.false148:
+// SIMD-ONLY0-NEXT:    [[TMP154:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END149]]
+// SIMD-ONLY0:       cond.end149:
+// SIMD-ONLY0-NEXT:    [[COND150:%.*]] = phi i32 [ [[TMP153]], [[COND_TRUE147]] ], [ [[TMP154]], [[COND_FALSE148]] ]
+// SIMD-ONLY0-NEXT:    store i32 [[COND150]], ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP155:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP155]], ptr [[IV]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP156:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP157:%.*]] = load i32, ptr [[IE]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP151:%.*]] = icmp slt i32 [[TMP156]], [[TMP157]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP151]], label [[COND_TRUE152:%.*]], label [[COND_FALSE153:%.*]]
+// SIMD-ONLY0:       cond.true152:
+// SIMD-ONLY0-NEXT:    [[TMP158:%.*]] = load i32, ptr [[IE]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END154:%.*]]
+// SIMD-ONLY0:       cond.false153:
+// SIMD-ONLY0-NEXT:    [[TMP159:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END154]]
+// SIMD-ONLY0:       cond.end154:
+// SIMD-ONLY0-NEXT:    [[COND155:%.*]] = phi i32 [ [[TMP158]], [[COND_TRUE152]] ], [ [[TMP159]], [[COND_FALSE153]] ]
+// SIMD-ONLY0-NEXT:    store i32 [[COND155]], ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP160:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP160]], ptr [[IV]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP161:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP162:%.*]] = load i32, ptr [[IE]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP156:%.*]] = icmp eq i32 [[TMP161]], [[TMP162]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP156]], label [[COND_TRUE157:%.*]], label [[COND_FALSE158:%.*]]
+// SIMD-ONLY0:       cond.true157:
+// SIMD-ONLY0-NEXT:    [[TMP163:%.*]] = load i32, ptr [[ID]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END159:%.*]]
+// SIMD-ONLY0:       cond.false158:
+// SIMD-ONLY0-NEXT:    [[TMP164:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END159]]
+// SIMD-ONLY0:       cond.end159:
+// SIMD-ONLY0-NEXT:    [[COND160:%.*]] = phi i32 [ [[TMP163]], [[COND_TRUE157]] ], [ [[TMP164]], [[COND_FALSE158]] ]
+// SIMD-ONLY0-NEXT:    store i32 [[COND160]], ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP165:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP166:%.*]] = load i32, ptr [[IE]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP161:%.*]] = icmp sgt i32 [[TMP165]], [[TMP166]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP161]], label [[COND_TRUE162:%.*]], label [[COND_FALSE163:%.*]]
+// SIMD-ONLY0:       cond.true162:
+// SIMD-ONLY0-NEXT:    [[TMP167:%.*]] = load i32, ptr [[IE]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END164:%.*]]
+// SIMD-ONLY0:       cond.false163:
+// SIMD-ONLY0-NEXT:    [[TMP168:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END164]]
+// SIMD-ONLY0:       cond.end164:
+// SIMD-ONLY0-NEXT:    [[COND165:%.*]] = phi i32 [ [[TMP167]], [[COND_TRUE162]] ], [ [[TMP168]], [[COND_FALSE163]] ]
+// SIMD-ONLY0-NEXT:    store i32 [[COND165]], ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP169:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP169]], ptr [[IV]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP170:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP171:%.*]] = load i32, ptr [[IE]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP166:%.*]] = icmp slt i32 [[TMP170]], [[TMP171]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP166]], label [[COND_TRUE167:%.*]], label [[COND_FALSE168:%.*]]
+// SIMD-ONLY0:       cond.true167:
+// SIMD-ONLY0-NEXT:    [[TMP172:%.*]] = load i32, ptr [[IE]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END169:%.*]]
+// SIMD-ONLY0:       cond.false168:
+// SIMD-ONLY0-NEXT:    [[TMP173:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END169]]
+// SIMD-ONLY0:       cond.end169:
+// SIMD-ONLY0-NEXT:    [[COND170:%.*]] = phi i32 [ [[TMP172]], [[COND_TRUE167]] ], [ [[TMP173]], [[COND_FALSE168]] ]
+// SIMD-ONLY0-NEXT:    store i32 [[COND170]], ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP174:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP174]], ptr [[IV]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP175:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP176:%.*]] = load i32, ptr [[IE]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP171:%.*]] = icmp eq i32 [[TMP175]], [[TMP176]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP171]], label [[COND_TRUE172:%.*]], label [[COND_FALSE173:%.*]]
+// SIMD-ONLY0:       cond.true172:
+// SIMD-ONLY0-NEXT:    [[TMP177:%.*]] = load i32, ptr [[ID]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END174:%.*]]
+// SIMD-ONLY0:       cond.false173:
+// SIMD-ONLY0-NEXT:    [[TMP178:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END174]]
+// SIMD-ONLY0:       cond.end174:
+// SIMD-ONLY0-NEXT:    [[COND175:%.*]] = phi i32 [ [[TMP177]], [[COND_TRUE172]] ], [ [[TMP178]], [[COND_FALSE173]] ]
+// SIMD-ONLY0-NEXT:    store i32 [[COND175]], ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP179:%.*]] = load i32, ptr [[IX]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP179]], ptr [[IV]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP180:%.*]] = load i32, ptr [[IV]], align 4
+// SIMD-ONLY0-NEXT:    ret i32 [[TMP180]]
+//
+//
+// SIMD-ONLY0-LABEL: @uixevd(
+// SIMD-ONLY0-NEXT:  entry:
+// SIMD-ONLY0-NEXT:    [[UIX:%.*]] = alloca i32, align 4
+// SIMD-ONLY0-NEXT:    [[UIV:%.*]] = alloca i32, align 4
+// SIMD-ONLY0-NEXT:    [[UIE:%.*]] = alloca i32, align 4
+// SIMD-ONLY0-NEXT:    [[UID:%.*]] = alloca i32, align 4
+// SIMD-ONLY0-NEXT:    [[TMP0:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP0]], ptr [[UIV]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP1:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP2:%.*]] = load i32, ptr [[UIE]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP:%.*]] = icmp ugt i32 [[TMP1]], [[TMP2]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
+// SIMD-ONLY0:       cond.true:
+// SIMD-ONLY0-NEXT:    [[TMP3:%.*]] = load i32, ptr [[UIE]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END:%.*]]
+// SIMD-ONLY0:       cond.false:
+// SIMD-ONLY0-NEXT:    [[TMP4:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END]]
+// SIMD-ONLY0:       cond.end:
+// SIMD-ONLY0-NEXT:    [[COND:%.*]] = phi i32 [ [[TMP3]], [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ]
+// SIMD-ONLY0-NEXT:    store i32 [[COND]], ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP5:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP5]], ptr [[UIV]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP6:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP7:%.*]] = load i32, ptr [[UIE]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP1:%.*]] = icmp ult i32 [[TMP6]], [[TMP7]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP1]], label [[COND_TRUE2:%.*]], label [[COND_FALSE3:%.*]]
+// SIMD-ONLY0:       cond.true2:
+// SIMD-ONLY0-NEXT:    [[TMP8:%.*]] = load i32, ptr [[UIE]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END4:%.*]]
+// SIMD-ONLY0:       cond.false3:
+// SIMD-ONLY0-NEXT:    [[TMP9:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END4]]
+// SIMD-ONLY0:       cond.end4:
+// SIMD-ONLY0-NEXT:    [[COND5:%.*]] = phi i32 [ [[TMP8]], [[COND_TRUE2]] ], [ [[TMP9]], [[COND_FALSE3]] ]
+// SIMD-ONLY0-NEXT:    store i32 [[COND5]], ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP10:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP10]], ptr [[UIV]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP11:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP12:%.*]] = load i32, ptr [[UIE]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP6:%.*]] = icmp eq i32 [[TMP11]], [[TMP12]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP6]], label [[COND_TRUE7:%.*]], label [[COND_FALSE8:%.*]]
+// SIMD-ONLY0:       cond.true7:
+// SIMD-ONLY0-NEXT:    [[TMP13:%.*]] = load i32, ptr [[UID]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END9:%.*]]
+// SIMD-ONLY0:       cond.false8:
+// SIMD-ONLY0-NEXT:    [[TMP14:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END9]]
+// SIMD-ONLY0:       cond.end9:
+// SIMD-ONLY0-NEXT:    [[COND10:%.*]] = phi i32 [ [[TMP13]], [[COND_TRUE7]] ], [ [[TMP14]], [[COND_FALSE8]] ]
+// SIMD-ONLY0-NEXT:    store i32 [[COND10]], ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP15:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP16:%.*]] = load i32, ptr [[UIE]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP11:%.*]] = icmp ugt i32 [[TMP15]], [[TMP16]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP11]], label [[COND_TRUE12:%.*]], label [[COND_FALSE13:%.*]]
+// SIMD-ONLY0:       cond.true12:
+// SIMD-ONLY0-NEXT:    [[TMP17:%.*]] = load i32, ptr [[UIE]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END14:%.*]]
+// SIMD-ONLY0:       cond.false13:
+// SIMD-ONLY0-NEXT:    [[TMP18:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END14]]
+// SIMD-ONLY0:       cond.end14:
+// SIMD-ONLY0-NEXT:    [[COND15:%.*]] = phi i32 [ [[TMP17]], [[COND_TRUE12]] ], [ [[TMP18]], [[COND_FALSE13]] ]
+// SIMD-ONLY0-NEXT:    store i32 [[COND15]], ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP19:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP19]], ptr [[UIV]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP20:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP21:%.*]] = load i32, ptr [[UIE]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP16:%.*]] = icmp ult i32 [[TMP20]], [[TMP21]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP16]], label [[COND_TRUE17:%.*]], label [[COND_FALSE18:%.*]]
+// SIMD-ONLY0:       cond.true17:
+// SIMD-ONLY0-NEXT:    [[TMP22:%.*]] = load i32, ptr [[UIE]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END19:%.*]]
+// SIMD-ONLY0:       cond.false18:
+// SIMD-ONLY0-NEXT:    [[TMP23:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END19]]
+// SIMD-ONLY0:       cond.end19:
+// SIMD-ONLY0-NEXT:    [[COND20:%.*]] = phi i32 [ [[TMP22]], [[COND_TRUE17]] ], [ [[TMP23]], [[COND_FALSE18]] ]
+// SIMD-ONLY0-NEXT:    store i32 [[COND20]], ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP24:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP24]], ptr [[UIV]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP25:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP26:%.*]] = load i32, ptr [[UIE]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP21:%.*]] = icmp eq i32 [[TMP25]], [[TMP26]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP21]], label [[COND_TRUE22:%.*]], label [[COND_FALSE23:%.*]]
+// SIMD-ONLY0:       cond.true22:
+// SIMD-ONLY0-NEXT:    [[TMP27:%.*]] = load i32, ptr [[UID]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END24:%.*]]
+// SIMD-ONLY0:       cond.false23:
+// SIMD-ONLY0-NEXT:    [[TMP28:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END24]]
+// SIMD-ONLY0:       cond.end24:
+// SIMD-ONLY0-NEXT:    [[COND25:%.*]] = phi i32 [ [[TMP27]], [[COND_TRUE22]] ], [ [[TMP28]], [[COND_FALSE23]] ]
+// SIMD-ONLY0-NEXT:    store i32 [[COND25]], ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP29:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP29]], ptr [[UIV]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP30:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP30]], ptr [[UIV]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP31:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP32:%.*]] = load i32, ptr [[UIE]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP26:%.*]] = icmp ugt i32 [[TMP31]], [[TMP32]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP26]], label [[COND_TRUE27:%.*]], label [[COND_FALSE28:%.*]]
+// SIMD-ONLY0:       cond.true27:
+// SIMD-ONLY0-NEXT:    [[TMP33:%.*]] = load i32, ptr [[UIE]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END29:%.*]]
+// SIMD-ONLY0:       cond.false28:
+// SIMD-ONLY0-NEXT:    [[TMP34:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END29]]
+// SIMD-ONLY0:       cond.end29:
+// SIMD-ONLY0-NEXT:    [[COND30:%.*]] = phi i32 [ [[TMP33]], [[COND_TRUE27]] ], [ [[TMP34]], [[COND_FALSE28]] ]
+// SIMD-ONLY0-NEXT:    store i32 [[COND30]], ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP35:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP35]], ptr [[UIV]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP36:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP37:%.*]] = load i32, ptr [[UIE]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP31:%.*]] = icmp ult i32 [[TMP36]], [[TMP37]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP31]], label [[COND_TRUE32:%.*]], label [[COND_FALSE33:%.*]]
+// SIMD-ONLY0:       cond.true32:
+// SIMD-ONLY0-NEXT:    [[TMP38:%.*]] = load i32, ptr [[UIE]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END34:%.*]]
+// SIMD-ONLY0:       cond.false33:
+// SIMD-ONLY0-NEXT:    [[TMP39:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END34]]
+// SIMD-ONLY0:       cond.end34:
+// SIMD-ONLY0-NEXT:    [[COND35:%.*]] = phi i32 [ [[TMP38]], [[COND_TRUE32]] ], [ [[TMP39]], [[COND_FALSE33]] ]
+// SIMD-ONLY0-NEXT:    store i32 [[COND35]], ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP40:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP40]], ptr [[UIV]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP41:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP42:%.*]] = load i32, ptr [[UIE]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP36:%.*]] = icmp eq i32 [[TMP41]], [[TMP42]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP36]], label [[COND_TRUE37:%.*]], label [[COND_FALSE38:%.*]]
+// SIMD-ONLY0:       cond.true37:
+// SIMD-ONLY0-NEXT:    [[TMP43:%.*]] = load i32, ptr [[UID]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END39:%.*]]
+// SIMD-ONLY0:       cond.false38:
+// SIMD-ONLY0-NEXT:    [[TMP44:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END39]]
+// SIMD-ONLY0:       cond.end39:
+// SIMD-ONLY0-NEXT:    [[COND40:%.*]] = phi i32 [ [[TMP43]], [[COND_TRUE37]] ], [ [[TMP44]], [[COND_FALSE38]] ]
+// SIMD-ONLY0-NEXT:    store i32 [[COND40]], ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP45:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP46:%.*]] = load i32, ptr [[UIE]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP41:%.*]] = icmp ugt i32 [[TMP45]], [[TMP46]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP41]], label [[COND_TRUE42:%.*]], label [[COND_FALSE43:%.*]]
+// SIMD-ONLY0:       cond.true42:
+// SIMD-ONLY0-NEXT:    [[TMP47:%.*]] = load i32, ptr [[UIE]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END44:%.*]]
+// SIMD-ONLY0:       cond.false43:
+// SIMD-ONLY0-NEXT:    [[TMP48:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END44]]
+// SIMD-ONLY0:       cond.end44:
+// SIMD-ONLY0-NEXT:    [[COND45:%.*]] = phi i32 [ [[TMP47]], [[COND_TRUE42]] ], [ [[TMP48]], [[COND_FALSE43]] ]
+// SIMD-ONLY0-NEXT:    store i32 [[COND45]], ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP49:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP49]], ptr [[UIV]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP50:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP51:%.*]] = load i32, ptr [[UIE]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP46:%.*]] = icmp ult i32 [[TMP50]], [[TMP51]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP46]], label [[COND_TRUE47:%.*]], label [[COND_FALSE48:%.*]]
+// SIMD-ONLY0:       cond.true47:
+// SIMD-ONLY0-NEXT:    [[TMP52:%.*]] = load i32, ptr [[UIE]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END49:%.*]]
+// SIMD-ONLY0:       cond.false48:
+// SIMD-ONLY0-NEXT:    [[TMP53:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END49]]
+// SIMD-ONLY0:       cond.end49:
+// SIMD-ONLY0-NEXT:    [[COND50:%.*]] = phi i32 [ [[TMP52]], [[COND_TRUE47]] ], [ [[TMP53]], [[COND_FALSE48]] ]
+// SIMD-ONLY0-NEXT:    store i32 [[COND50]], ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP54:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP54]], ptr [[UIV]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP55:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP56:%.*]] = load i32, ptr [[UIE]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP51:%.*]] = icmp eq i32 [[TMP55]], [[TMP56]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP51]], label [[COND_TRUE52:%.*]], label [[COND_FALSE53:%.*]]
+// SIMD-ONLY0:       cond.true52:
+// SIMD-ONLY0-NEXT:    [[TMP57:%.*]] = load i32, ptr [[UID]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END54:%.*]]
+// SIMD-ONLY0:       cond.false53:
+// SIMD-ONLY0-NEXT:    [[TMP58:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END54]]
+// SIMD-ONLY0:       cond.end54:
+// SIMD-ONLY0-NEXT:    [[COND55:%.*]] = phi i32 [ [[TMP57]], [[COND_TRUE52]] ], [ [[TMP58]], [[COND_FALSE53]] ]
+// SIMD-ONLY0-NEXT:    store i32 [[COND55]], ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP59:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP59]], ptr [[UIV]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP60:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP60]], ptr [[UIV]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP61:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP62:%.*]] = load i32, ptr [[UIE]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP56:%.*]] = icmp ugt i32 [[TMP61]], [[TMP62]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP56]], label [[COND_TRUE57:%.*]], label [[COND_FALSE58:%.*]]
+// SIMD-ONLY0:       cond.true57:
+// SIMD-ONLY0-NEXT:    [[TMP63:%.*]] = load i32, ptr [[UIE]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END59:%.*]]
+// SIMD-ONLY0:       cond.false58:
+// SIMD-ONLY0-NEXT:    [[TMP64:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END59]]
+// SIMD-ONLY0:       cond.end59:
+// SIMD-ONLY0-NEXT:    [[COND60:%.*]] = phi i32 [ [[TMP63]], [[COND_TRUE57]] ], [ [[TMP64]], [[COND_FALSE58]] ]
+// SIMD-ONLY0-NEXT:    store i32 [[COND60]], ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP65:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP65]], ptr [[UIV]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP66:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP67:%.*]] = load i32, ptr [[UIE]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP61:%.*]] = icmp ult i32 [[TMP66]], [[TMP67]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP61]], label [[COND_TRUE62:%.*]], label [[COND_FALSE63:%.*]]
+// SIMD-ONLY0:       cond.true62:
+// SIMD-ONLY0-NEXT:    [[TMP68:%.*]] = load i32, ptr [[UIE]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END64:%.*]]
+// SIMD-ONLY0:       cond.false63:
+// SIMD-ONLY0-NEXT:    [[TMP69:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END64]]
+// SIMD-ONLY0:       cond.end64:
+// SIMD-ONLY0-NEXT:    [[COND65:%.*]] = phi i32 [ [[TMP68]], [[COND_TRUE62]] ], [ [[TMP69]], [[COND_FALSE63]] ]
+// SIMD-ONLY0-NEXT:    store i32 [[COND65]], ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP70:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP70]], ptr [[UIV]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP71:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP72:%.*]] = load i32, ptr [[UIE]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP66:%.*]] = icmp eq i32 [[TMP71]], [[TMP72]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP66]], label [[COND_TRUE67:%.*]], label [[COND_FALSE68:%.*]]
+// SIMD-ONLY0:       cond.true67:
+// SIMD-ONLY0-NEXT:    [[TMP73:%.*]] = load i32, ptr [[UID]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END69:%.*]]
+// SIMD-ONLY0:       cond.false68:
+// SIMD-ONLY0-NEXT:    [[TMP74:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END69]]
+// SIMD-ONLY0:       cond.end69:
+// SIMD-ONLY0-NEXT:    [[COND70:%.*]] = phi i32 [ [[TMP73]], [[COND_TRUE67]] ], [ [[TMP74]], [[COND_FALSE68]] ]
+// SIMD-ONLY0-NEXT:    store i32 [[COND70]], ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP75:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP76:%.*]] = load i32, ptr [[UIE]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP71:%.*]] = icmp ugt i32 [[TMP75]], [[TMP76]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP71]], label [[COND_TRUE72:%.*]], label [[COND_FALSE73:%.*]]
+// SIMD-ONLY0:       cond.true72:
+// SIMD-ONLY0-NEXT:    [[TMP77:%.*]] = load i32, ptr [[UIE]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END74:%.*]]
+// SIMD-ONLY0:       cond.false73:
+// SIMD-ONLY0-NEXT:    [[TMP78:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END74]]
+// SIMD-ONLY0:       cond.end74:
+// SIMD-ONLY0-NEXT:    [[COND75:%.*]] = phi i32 [ [[TMP77]], [[COND_TRUE72]] ], [ [[TMP78]], [[COND_FALSE73]] ]
+// SIMD-ONLY0-NEXT:    store i32 [[COND75]], ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP79:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP79]], ptr [[UIV]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP80:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP81:%.*]] = load i32, ptr [[UIE]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP76:%.*]] = icmp ult i32 [[TMP80]], [[TMP81]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP76]], label [[COND_TRUE77:%.*]], label [[COND_FALSE78:%.*]]
+// SIMD-ONLY0:       cond.true77:
+// SIMD-ONLY0-NEXT:    [[TMP82:%.*]] = load i32, ptr [[UIE]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END79:%.*]]
+// SIMD-ONLY0:       cond.false78:
+// SIMD-ONLY0-NEXT:    [[TMP83:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END79]]
+// SIMD-ONLY0:       cond.end79:
+// SIMD-ONLY0-NEXT:    [[COND80:%.*]] = phi i32 [ [[TMP82]], [[COND_TRUE77]] ], [ [[TMP83]], [[COND_FALSE78]] ]
+// SIMD-ONLY0-NEXT:    store i32 [[COND80]], ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP84:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP84]], ptr [[UIV]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP85:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP86:%.*]] = load i32, ptr [[UIE]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP81:%.*]] = icmp eq i32 [[TMP85]], [[TMP86]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP81]], label [[COND_TRUE82:%.*]], label [[COND_FALSE83:%.*]]
+// SIMD-ONLY0:       cond.true82:
+// SIMD-ONLY0-NEXT:    [[TMP87:%.*]] = load i32, ptr [[UID]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END84:%.*]]
+// SIMD-ONLY0:       cond.false83:
+// SIMD-ONLY0-NEXT:    [[TMP88:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END84]]
+// SIMD-ONLY0:       cond.end84:
+// SIMD-ONLY0-NEXT:    [[COND85:%.*]] = phi i32 [ [[TMP87]], [[COND_TRUE82]] ], [ [[TMP88]], [[COND_FALSE83]] ]
+// SIMD-ONLY0-NEXT:    store i32 [[COND85]], ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP89:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP89]], ptr [[UIV]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP90:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP90]], ptr [[UIV]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP91:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP92:%.*]] = load i32, ptr [[UIE]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP86:%.*]] = icmp ugt i32 [[TMP91]], [[TMP92]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP86]], label [[COND_TRUE87:%.*]], label [[COND_FALSE88:%.*]]
+// SIMD-ONLY0:       cond.true87:
+// SIMD-ONLY0-NEXT:    [[TMP93:%.*]] = load i32, ptr [[UIE]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END89:%.*]]
+// SIMD-ONLY0:       cond.false88:
+// SIMD-ONLY0-NEXT:    [[TMP94:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END89]]
+// SIMD-ONLY0:       cond.end89:
+// SIMD-ONLY0-NEXT:    [[COND90:%.*]] = phi i32 [ [[TMP93]], [[COND_TRUE87]] ], [ [[TMP94]], [[COND_FALSE88]] ]
+// SIMD-ONLY0-NEXT:    store i32 [[COND90]], ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP95:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP95]], ptr [[UIV]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP96:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP97:%.*]] = load i32, ptr [[UIE]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP91:%.*]] = icmp ult i32 [[TMP96]], [[TMP97]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP91]], label [[COND_TRUE92:%.*]], label [[COND_FALSE93:%.*]]
+// SIMD-ONLY0:       cond.true92:
+// SIMD-ONLY0-NEXT:    [[TMP98:%.*]] = load i32, ptr [[UIE]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END94:%.*]]
+// SIMD-ONLY0:       cond.false93:
+// SIMD-ONLY0-NEXT:    [[TMP99:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END94]]
+// SIMD-ONLY0:       cond.end94:
+// SIMD-ONLY0-NEXT:    [[COND95:%.*]] = phi i32 [ [[TMP98]], [[COND_TRUE92]] ], [ [[TMP99]], [[COND_FALSE93]] ]
+// SIMD-ONLY0-NEXT:    store i32 [[COND95]], ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP100:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP100]], ptr [[UIV]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP101:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP102:%.*]] = load i32, ptr [[UIE]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP96:%.*]] = icmp eq i32 [[TMP101]], [[TMP102]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP96]], label [[COND_TRUE97:%.*]], label [[COND_FALSE98:%.*]]
+// SIMD-ONLY0:       cond.true97:
+// SIMD-ONLY0-NEXT:    [[TMP103:%.*]] = load i32, ptr [[UID]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END99:%.*]]
+// SIMD-ONLY0:       cond.false98:
+// SIMD-ONLY0-NEXT:    [[TMP104:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END99]]
+// SIMD-ONLY0:       cond.end99:
+// SIMD-ONLY0-NEXT:    [[COND100:%.*]] = phi i32 [ [[TMP103]], [[COND_TRUE97]] ], [ [[TMP104]], [[COND_FALSE98]] ]
+// SIMD-ONLY0-NEXT:    store i32 [[COND100]], ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP105:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP106:%.*]] = load i32, ptr [[UIE]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP101:%.*]] = icmp ugt i32 [[TMP105]], [[TMP106]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP101]], label [[COND_TRUE102:%.*]], label [[COND_FALSE103:%.*]]
+// SIMD-ONLY0:       cond.true102:
+// SIMD-ONLY0-NEXT:    [[TMP107:%.*]] = load i32, ptr [[UIE]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END104:%.*]]
+// SIMD-ONLY0:       cond.false103:
+// SIMD-ONLY0-NEXT:    [[TMP108:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END104]]
+// SIMD-ONLY0:       cond.end104:
+// SIMD-ONLY0-NEXT:    [[COND105:%.*]] = phi i32 [ [[TMP107]], [[COND_TRUE102]] ], [ [[TMP108]], [[COND_FALSE103]] ]
+// SIMD-ONLY0-NEXT:    store i32 [[COND105]], ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP109:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP109]], ptr [[UIV]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP110:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP111:%.*]] = load i32, ptr [[UIE]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP106:%.*]] = icmp ult i32 [[TMP110]], [[TMP111]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP106]], label [[COND_TRUE107:%.*]], label [[COND_FALSE108:%.*]]
+// SIMD-ONLY0:       cond.true107:
+// SIMD-ONLY0-NEXT:    [[TMP112:%.*]] = load i32, ptr [[UIE]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END109:%.*]]
+// SIMD-ONLY0:       cond.false108:
+// SIMD-ONLY0-NEXT:    [[TMP113:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END109]]
+// SIMD-ONLY0:       cond.end109:
+// SIMD-ONLY0-NEXT:    [[COND110:%.*]] = phi i32 [ [[TMP112]], [[COND_TRUE107]] ], [ [[TMP113]], [[COND_FALSE108]] ]
+// SIMD-ONLY0-NEXT:    store i32 [[COND110]], ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP114:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP114]], ptr [[UIV]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP115:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP116:%.*]] = load i32, ptr [[UIE]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP111:%.*]] = icmp eq i32 [[TMP115]], [[TMP116]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP111]], label [[COND_TRUE112:%.*]], label [[COND_FALSE113:%.*]]
+// SIMD-ONLY0:       cond.true112:
+// SIMD-ONLY0-NEXT:    [[TMP117:%.*]] = load i32, ptr [[UID]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END114:%.*]]
+// SIMD-ONLY0:       cond.false113:
+// SIMD-ONLY0-NEXT:    [[TMP118:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END114]]
+// SIMD-ONLY0:       cond.end114:
+// SIMD-ONLY0-NEXT:    [[COND115:%.*]] = phi i32 [ [[TMP117]], [[COND_TRUE112]] ], [ [[TMP118]], [[COND_FALSE113]] ]
+// SIMD-ONLY0-NEXT:    store i32 [[COND115]], ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP119:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP119]], ptr [[UIV]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP120:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP120]], ptr [[UIV]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP121:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP122:%.*]] = load i32, ptr [[UIE]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP116:%.*]] = icmp ugt i32 [[TMP121]], [[TMP122]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP116]], label [[COND_TRUE117:%.*]], label [[COND_FALSE118:%.*]]
+// SIMD-ONLY0:       cond.true117:
+// SIMD-ONLY0-NEXT:    [[TMP123:%.*]] = load i32, ptr [[UIE]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END119:%.*]]
+// SIMD-ONLY0:       cond.false118:
+// SIMD-ONLY0-NEXT:    [[TMP124:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END119]]
+// SIMD-ONLY0:       cond.end119:
+// SIMD-ONLY0-NEXT:    [[COND120:%.*]] = phi i32 [ [[TMP123]], [[COND_TRUE117]] ], [ [[TMP124]], [[COND_FALSE118]] ]
+// SIMD-ONLY0-NEXT:    store i32 [[COND120]], ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP125:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP125]], ptr [[UIV]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP126:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP127:%.*]] = load i32, ptr [[UIE]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP121:%.*]] = icmp ult i32 [[TMP126]], [[TMP127]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP121]], label [[COND_TRUE122:%.*]], label [[COND_FALSE123:%.*]]
+// SIMD-ONLY0:       cond.true122:
+// SIMD-ONLY0-NEXT:    [[TMP128:%.*]] = load i32, ptr [[UIE]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END124:%.*]]
+// SIMD-ONLY0:       cond.false123:
+// SIMD-ONLY0-NEXT:    [[TMP129:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END124]]
+// SIMD-ONLY0:       cond.end124:
+// SIMD-ONLY0-NEXT:    [[COND125:%.*]] = phi i32 [ [[TMP128]], [[COND_TRUE122]] ], [ [[TMP129]], [[COND_FALSE123]] ]
+// SIMD-ONLY0-NEXT:    store i32 [[COND125]], ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP130:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP130]], ptr [[UIV]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP131:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP132:%.*]] = load i32, ptr [[UIE]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP126:%.*]] = icmp eq i32 [[TMP131]], [[TMP132]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP126]], label [[COND_TRUE127:%.*]], label [[COND_FALSE128:%.*]]
+// SIMD-ONLY0:       cond.true127:
+// SIMD-ONLY0-NEXT:    [[TMP133:%.*]] = load i32, ptr [[UID]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END129:%.*]]
+// SIMD-ONLY0:       cond.false128:
+// SIMD-ONLY0-NEXT:    [[TMP134:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END129]]
+// SIMD-ONLY0:       cond.end129:
+// SIMD-ONLY0-NEXT:    [[COND130:%.*]] = phi i32 [ [[TMP133]], [[COND_TRUE127]] ], [ [[TMP134]], [[COND_FALSE128]] ]
+// SIMD-ONLY0-NEXT:    store i32 [[COND130]], ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP135:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP136:%.*]] = load i32, ptr [[UIE]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP131:%.*]] = icmp ugt i32 [[TMP135]], [[TMP136]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP131]], label [[COND_TRUE132:%.*]], label [[COND_FALSE133:%.*]]
+// SIMD-ONLY0:       cond.true132:
+// SIMD-ONLY0-NEXT:    [[TMP137:%.*]] = load i32, ptr [[UIE]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END134:%.*]]
+// SIMD-ONLY0:       cond.false133:
+// SIMD-ONLY0-NEXT:    [[TMP138:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END134]]
+// SIMD-ONLY0:       cond.end134:
+// SIMD-ONLY0-NEXT:    [[COND135:%.*]] = phi i32 [ [[TMP137]], [[COND_TRUE132]] ], [ [[TMP138]], [[COND_FALSE133]] ]
+// SIMD-ONLY0-NEXT:    store i32 [[COND135]], ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP139:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP139]], ptr [[UIV]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP140:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP141:%.*]] = load i32, ptr [[UIE]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP136:%.*]] = icmp ult i32 [[TMP140]], [[TMP141]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP136]], label [[COND_TRUE137:%.*]], label [[COND_FALSE138:%.*]]
+// SIMD-ONLY0:       cond.true137:
+// SIMD-ONLY0-NEXT:    [[TMP142:%.*]] = load i32, ptr [[UIE]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END139:%.*]]
+// SIMD-ONLY0:       cond.false138:
+// SIMD-ONLY0-NEXT:    [[TMP143:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END139]]
+// SIMD-ONLY0:       cond.end139:
+// SIMD-ONLY0-NEXT:    [[COND140:%.*]] = phi i32 [ [[TMP142]], [[COND_TRUE137]] ], [ [[TMP143]], [[COND_FALSE138]] ]
+// SIMD-ONLY0-NEXT:    store i32 [[COND140]], ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP144:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP144]], ptr [[UIV]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP145:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP146:%.*]] = load i32, ptr [[UIE]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP141:%.*]] = icmp eq i32 [[TMP145]], [[TMP146]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP141]], label [[COND_TRUE142:%.*]], label [[COND_FALSE143:%.*]]
+// SIMD-ONLY0:       cond.true142:
+// SIMD-ONLY0-NEXT:    [[TMP147:%.*]] = load i32, ptr [[UID]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END144:%.*]]
+// SIMD-ONLY0:       cond.false143:
+// SIMD-ONLY0-NEXT:    [[TMP148:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END144]]
+// SIMD-ONLY0:       cond.end144:
+// SIMD-ONLY0-NEXT:    [[COND145:%.*]] = phi i32 [ [[TMP147]], [[COND_TRUE142]] ], [ [[TMP148]], [[COND_FALSE143]] ]
+// SIMD-ONLY0-NEXT:    store i32 [[COND145]], ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP149:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP149]], ptr [[UIV]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP150:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP150]], ptr [[UIV]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP151:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP152:%.*]] = load i32, ptr [[UIE]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP146:%.*]] = icmp ugt i32 [[TMP151]], [[TMP152]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP146]], label [[COND_TRUE147:%.*]], label [[COND_FALSE148:%.*]]
+// SIMD-ONLY0:       cond.true147:
+// SIMD-ONLY0-NEXT:    [[TMP153:%.*]] = load i32, ptr [[UIE]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END149:%.*]]
+// SIMD-ONLY0:       cond.false148:
+// SIMD-ONLY0-NEXT:    [[TMP154:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END149]]
+// SIMD-ONLY0:       cond.end149:
+// SIMD-ONLY0-NEXT:    [[COND150:%.*]] = phi i32 [ [[TMP153]], [[COND_TRUE147]] ], [ [[TMP154]], [[COND_FALSE148]] ]
+// SIMD-ONLY0-NEXT:    store i32 [[COND150]], ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP155:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP155]], ptr [[UIV]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP156:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP157:%.*]] = load i32, ptr [[UIE]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP151:%.*]] = icmp ult i32 [[TMP156]], [[TMP157]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP151]], label [[COND_TRUE152:%.*]], label [[COND_FALSE153:%.*]]
+// SIMD-ONLY0:       cond.true152:
+// SIMD-ONLY0-NEXT:    [[TMP158:%.*]] = load i32, ptr [[UIE]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END154:%.*]]
+// SIMD-ONLY0:       cond.false153:
+// SIMD-ONLY0-NEXT:    [[TMP159:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END154]]
+// SIMD-ONLY0:       cond.end154:
+// SIMD-ONLY0-NEXT:    [[COND155:%.*]] = phi i32 [ [[TMP158]], [[COND_TRUE152]] ], [ [[TMP159]], [[COND_FALSE153]] ]
+// SIMD-ONLY0-NEXT:    store i32 [[COND155]], ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP160:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP160]], ptr [[UIV]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP161:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP162:%.*]] = load i32, ptr [[UIE]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP156:%.*]] = icmp eq i32 [[TMP161]], [[TMP162]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP156]], label [[COND_TRUE157:%.*]], label [[COND_FALSE158:%.*]]
+// SIMD-ONLY0:       cond.true157:
+// SIMD-ONLY0-NEXT:    [[TMP163:%.*]] = load i32, ptr [[UID]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END159:%.*]]
+// SIMD-ONLY0:       cond.false158:
+// SIMD-ONLY0-NEXT:    [[TMP164:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END159]]
+// SIMD-ONLY0:       cond.end159:
+// SIMD-ONLY0-NEXT:    [[COND160:%.*]] = phi i32 [ [[TMP163]], [[COND_TRUE157]] ], [ [[TMP164]], [[COND_FALSE158]] ]
+// SIMD-ONLY0-NEXT:    store i32 [[COND160]], ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP165:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP166:%.*]] = load i32, ptr [[UIE]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP161:%.*]] = icmp ugt i32 [[TMP165]], [[TMP166]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP161]], label [[COND_TRUE162:%.*]], label [[COND_FALSE163:%.*]]
+// SIMD-ONLY0:       cond.true162:
+// SIMD-ONLY0-NEXT:    [[TMP167:%.*]] = load i32, ptr [[UIE]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END164:%.*]]
+// SIMD-ONLY0:       cond.false163:
+// SIMD-ONLY0-NEXT:    [[TMP168:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END164]]
+// SIMD-ONLY0:       cond.end164:
+// SIMD-ONLY0-NEXT:    [[COND165:%.*]] = phi i32 [ [[TMP167]], [[COND_TRUE162]] ], [ [[TMP168]], [[COND_FALSE163]] ]
+// SIMD-ONLY0-NEXT:    store i32 [[COND165]], ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP169:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP169]], ptr [[UIV]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP170:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP171:%.*]] = load i32, ptr [[UIE]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP166:%.*]] = icmp ult i32 [[TMP170]], [[TMP171]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP166]], label [[COND_TRUE167:%.*]], label [[COND_FALSE168:%.*]]
+// SIMD-ONLY0:       cond.true167:
+// SIMD-ONLY0-NEXT:    [[TMP172:%.*]] = load i32, ptr [[UIE]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END169:%.*]]
+// SIMD-ONLY0:       cond.false168:
+// SIMD-ONLY0-NEXT:    [[TMP173:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END169]]
+// SIMD-ONLY0:       cond.end169:
+// SIMD-ONLY0-NEXT:    [[COND170:%.*]] = phi i32 [ [[TMP172]], [[COND_TRUE167]] ], [ [[TMP173]], [[COND_FALSE168]] ]
+// SIMD-ONLY0-NEXT:    store i32 [[COND170]], ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP174:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP174]], ptr [[UIV]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP175:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP176:%.*]] = load i32, ptr [[UIE]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP171:%.*]] = icmp eq i32 [[TMP175]], [[TMP176]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP171]], label [[COND_TRUE172:%.*]], label [[COND_FALSE173:%.*]]
+// SIMD-ONLY0:       cond.true172:
+// SIMD-ONLY0-NEXT:    [[TMP177:%.*]] = load i32, ptr [[UID]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END174:%.*]]
+// SIMD-ONLY0:       cond.false173:
+// SIMD-ONLY0-NEXT:    [[TMP178:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END174]]
+// SIMD-ONLY0:       cond.end174:
+// SIMD-ONLY0-NEXT:    [[COND175:%.*]] = phi i32 [ [[TMP177]], [[COND_TRUE172]] ], [ [[TMP178]], [[COND_FALSE173]] ]
+// SIMD-ONLY0-NEXT:    store i32 [[COND175]], ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP179:%.*]] = load i32, ptr [[UIX]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP179]], ptr [[UIV]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP180:%.*]] = load i32, ptr [[UIV]], align 4
+// SIMD-ONLY0-NEXT:    ret i32 [[TMP180]]
+//
+//
+// SIMD-ONLY0-LABEL: @lxevd(
+// SIMD-ONLY0-NEXT:  entry:
+// SIMD-ONLY0-NEXT:    [[LX:%.*]] = alloca i64, align 8
+// SIMD-ONLY0-NEXT:    [[LV:%.*]] = alloca i64, align 8
+// SIMD-ONLY0-NEXT:    [[LE:%.*]] = alloca i64, align 8
+// SIMD-ONLY0-NEXT:    [[LD:%.*]] = alloca i64, align 8
+// SIMD-ONLY0-NEXT:    [[TMP0:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP0]], ptr [[LV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP1:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2:%.*]] = load i64, ptr [[LE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP:%.*]] = icmp sgt i64 [[TMP1]], [[TMP2]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
+// SIMD-ONLY0:       cond.true:
+// SIMD-ONLY0-NEXT:    [[TMP3:%.*]] = load i64, ptr [[LE]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END:%.*]]
+// SIMD-ONLY0:       cond.false:
+// SIMD-ONLY0-NEXT:    [[TMP4:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END]]
+// SIMD-ONLY0:       cond.end:
+// SIMD-ONLY0-NEXT:    [[COND:%.*]] = phi i64 [ [[TMP3]], [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ]
+// SIMD-ONLY0-NEXT:    store i64 [[COND]], ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP5:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP5]], ptr [[LV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP6:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP7:%.*]] = load i64, ptr [[LE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP1:%.*]] = icmp slt i64 [[TMP6]], [[TMP7]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP1]], label [[COND_TRUE2:%.*]], label [[COND_FALSE3:%.*]]
+// SIMD-ONLY0:       cond.true2:
+// SIMD-ONLY0-NEXT:    [[TMP8:%.*]] = load i64, ptr [[LE]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END4:%.*]]
+// SIMD-ONLY0:       cond.false3:
+// SIMD-ONLY0-NEXT:    [[TMP9:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END4]]
+// SIMD-ONLY0:       cond.end4:
+// SIMD-ONLY0-NEXT:    [[COND5:%.*]] = phi i64 [ [[TMP8]], [[COND_TRUE2]] ], [ [[TMP9]], [[COND_FALSE3]] ]
+// SIMD-ONLY0-NEXT:    store i64 [[COND5]], ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP10:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP10]], ptr [[LV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP11:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP12:%.*]] = load i64, ptr [[LE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP6:%.*]] = icmp eq i64 [[TMP11]], [[TMP12]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP6]], label [[COND_TRUE7:%.*]], label [[COND_FALSE8:%.*]]
+// SIMD-ONLY0:       cond.true7:
+// SIMD-ONLY0-NEXT:    [[TMP13:%.*]] = load i64, ptr [[LD]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END9:%.*]]
+// SIMD-ONLY0:       cond.false8:
+// SIMD-ONLY0-NEXT:    [[TMP14:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END9]]
+// SIMD-ONLY0:       cond.end9:
+// SIMD-ONLY0-NEXT:    [[COND10:%.*]] = phi i64 [ [[TMP13]], [[COND_TRUE7]] ], [ [[TMP14]], [[COND_FALSE8]] ]
+// SIMD-ONLY0-NEXT:    store i64 [[COND10]], ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP15:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP16:%.*]] = load i64, ptr [[LE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP11:%.*]] = icmp sgt i64 [[TMP15]], [[TMP16]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP11]], label [[COND_TRUE12:%.*]], label [[COND_FALSE13:%.*]]
+// SIMD-ONLY0:       cond.true12:
+// SIMD-ONLY0-NEXT:    [[TMP17:%.*]] = load i64, ptr [[LE]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END14:%.*]]
+// SIMD-ONLY0:       cond.false13:
+// SIMD-ONLY0-NEXT:    [[TMP18:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END14]]
+// SIMD-ONLY0:       cond.end14:
+// SIMD-ONLY0-NEXT:    [[COND15:%.*]] = phi i64 [ [[TMP17]], [[COND_TRUE12]] ], [ [[TMP18]], [[COND_FALSE13]] ]
+// SIMD-ONLY0-NEXT:    store i64 [[COND15]], ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP19:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP19]], ptr [[LV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP20:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP21:%.*]] = load i64, ptr [[LE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP16:%.*]] = icmp slt i64 [[TMP20]], [[TMP21]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP16]], label [[COND_TRUE17:%.*]], label [[COND_FALSE18:%.*]]
+// SIMD-ONLY0:       cond.true17:
+// SIMD-ONLY0-NEXT:    [[TMP22:%.*]] = load i64, ptr [[LE]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END19:%.*]]
+// SIMD-ONLY0:       cond.false18:
+// SIMD-ONLY0-NEXT:    [[TMP23:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END19]]
+// SIMD-ONLY0:       cond.end19:
+// SIMD-ONLY0-NEXT:    [[COND20:%.*]] = phi i64 [ [[TMP22]], [[COND_TRUE17]] ], [ [[TMP23]], [[COND_FALSE18]] ]
+// SIMD-ONLY0-NEXT:    store i64 [[COND20]], ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP24:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP24]], ptr [[LV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP25:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP26:%.*]] = load i64, ptr [[LE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP21:%.*]] = icmp eq i64 [[TMP25]], [[TMP26]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP21]], label [[COND_TRUE22:%.*]], label [[COND_FALSE23:%.*]]
+// SIMD-ONLY0:       cond.true22:
+// SIMD-ONLY0-NEXT:    [[TMP27:%.*]] = load i64, ptr [[LD]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END24:%.*]]
+// SIMD-ONLY0:       cond.false23:
+// SIMD-ONLY0-NEXT:    [[TMP28:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END24]]
+// SIMD-ONLY0:       cond.end24:
+// SIMD-ONLY0-NEXT:    [[COND25:%.*]] = phi i64 [ [[TMP27]], [[COND_TRUE22]] ], [ [[TMP28]], [[COND_FALSE23]] ]
+// SIMD-ONLY0-NEXT:    store i64 [[COND25]], ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP29:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP29]], ptr [[LV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP30:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP30]], ptr [[LV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP31:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP32:%.*]] = load i64, ptr [[LE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP26:%.*]] = icmp sgt i64 [[TMP31]], [[TMP32]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP26]], label [[COND_TRUE27:%.*]], label [[COND_FALSE28:%.*]]
+// SIMD-ONLY0:       cond.true27:
+// SIMD-ONLY0-NEXT:    [[TMP33:%.*]] = load i64, ptr [[LE]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END29:%.*]]
+// SIMD-ONLY0:       cond.false28:
+// SIMD-ONLY0-NEXT:    [[TMP34:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END29]]
+// SIMD-ONLY0:       cond.end29:
+// SIMD-ONLY0-NEXT:    [[COND30:%.*]] = phi i64 [ [[TMP33]], [[COND_TRUE27]] ], [ [[TMP34]], [[COND_FALSE28]] ]
+// SIMD-ONLY0-NEXT:    store i64 [[COND30]], ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP35:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP35]], ptr [[LV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP36:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP37:%.*]] = load i64, ptr [[LE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP31:%.*]] = icmp slt i64 [[TMP36]], [[TMP37]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP31]], label [[COND_TRUE32:%.*]], label [[COND_FALSE33:%.*]]
+// SIMD-ONLY0:       cond.true32:
+// SIMD-ONLY0-NEXT:    [[TMP38:%.*]] = load i64, ptr [[LE]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END34:%.*]]
+// SIMD-ONLY0:       cond.false33:
+// SIMD-ONLY0-NEXT:    [[TMP39:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END34]]
+// SIMD-ONLY0:       cond.end34:
+// SIMD-ONLY0-NEXT:    [[COND35:%.*]] = phi i64 [ [[TMP38]], [[COND_TRUE32]] ], [ [[TMP39]], [[COND_FALSE33]] ]
+// SIMD-ONLY0-NEXT:    store i64 [[COND35]], ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP40:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP40]], ptr [[LV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP41:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP42:%.*]] = load i64, ptr [[LE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP36:%.*]] = icmp eq i64 [[TMP41]], [[TMP42]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP36]], label [[COND_TRUE37:%.*]], label [[COND_FALSE38:%.*]]
+// SIMD-ONLY0:       cond.true37:
+// SIMD-ONLY0-NEXT:    [[TMP43:%.*]] = load i64, ptr [[LD]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END39:%.*]]
+// SIMD-ONLY0:       cond.false38:
+// SIMD-ONLY0-NEXT:    [[TMP44:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END39]]
+// SIMD-ONLY0:       cond.end39:
+// SIMD-ONLY0-NEXT:    [[COND40:%.*]] = phi i64 [ [[TMP43]], [[COND_TRUE37]] ], [ [[TMP44]], [[COND_FALSE38]] ]
+// SIMD-ONLY0-NEXT:    store i64 [[COND40]], ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP45:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP46:%.*]] = load i64, ptr [[LE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP41:%.*]] = icmp sgt i64 [[TMP45]], [[TMP46]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP41]], label [[COND_TRUE42:%.*]], label [[COND_FALSE43:%.*]]
+// SIMD-ONLY0:       cond.true42:
+// SIMD-ONLY0-NEXT:    [[TMP47:%.*]] = load i64, ptr [[LE]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END44:%.*]]
+// SIMD-ONLY0:       cond.false43:
+// SIMD-ONLY0-NEXT:    [[TMP48:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END44]]
+// SIMD-ONLY0:       cond.end44:
+// SIMD-ONLY0-NEXT:    [[COND45:%.*]] = phi i64 [ [[TMP47]], [[COND_TRUE42]] ], [ [[TMP48]], [[COND_FALSE43]] ]
+// SIMD-ONLY0-NEXT:    store i64 [[COND45]], ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP49:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP49]], ptr [[LV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP50:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP51:%.*]] = load i64, ptr [[LE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP46:%.*]] = icmp slt i64 [[TMP50]], [[TMP51]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP46]], label [[COND_TRUE47:%.*]], label [[COND_FALSE48:%.*]]
+// SIMD-ONLY0:       cond.true47:
+// SIMD-ONLY0-NEXT:    [[TMP52:%.*]] = load i64, ptr [[LE]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END49:%.*]]
+// SIMD-ONLY0:       cond.false48:
+// SIMD-ONLY0-NEXT:    [[TMP53:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END49]]
+// SIMD-ONLY0:       cond.end49:
+// SIMD-ONLY0-NEXT:    [[COND50:%.*]] = phi i64 [ [[TMP52]], [[COND_TRUE47]] ], [ [[TMP53]], [[COND_FALSE48]] ]
+// SIMD-ONLY0-NEXT:    store i64 [[COND50]], ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP54:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP54]], ptr [[LV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP55:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP56:%.*]] = load i64, ptr [[LE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP51:%.*]] = icmp eq i64 [[TMP55]], [[TMP56]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP51]], label [[COND_TRUE52:%.*]], label [[COND_FALSE53:%.*]]
+// SIMD-ONLY0:       cond.true52:
+// SIMD-ONLY0-NEXT:    [[TMP57:%.*]] = load i64, ptr [[LD]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END54:%.*]]
+// SIMD-ONLY0:       cond.false53:
+// SIMD-ONLY0-NEXT:    [[TMP58:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END54]]
+// SIMD-ONLY0:       cond.end54:
+// SIMD-ONLY0-NEXT:    [[COND55:%.*]] = phi i64 [ [[TMP57]], [[COND_TRUE52]] ], [ [[TMP58]], [[COND_FALSE53]] ]
+// SIMD-ONLY0-NEXT:    store i64 [[COND55]], ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP59:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP59]], ptr [[LV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP60:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP60]], ptr [[LV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP61:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP62:%.*]] = load i64, ptr [[LE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP56:%.*]] = icmp sgt i64 [[TMP61]], [[TMP62]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP56]], label [[COND_TRUE57:%.*]], label [[COND_FALSE58:%.*]]
+// SIMD-ONLY0:       cond.true57:
+// SIMD-ONLY0-NEXT:    [[TMP63:%.*]] = load i64, ptr [[LE]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END59:%.*]]
+// SIMD-ONLY0:       cond.false58:
+// SIMD-ONLY0-NEXT:    [[TMP64:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END59]]
+// SIMD-ONLY0:       cond.end59:
+// SIMD-ONLY0-NEXT:    [[COND60:%.*]] = phi i64 [ [[TMP63]], [[COND_TRUE57]] ], [ [[TMP64]], [[COND_FALSE58]] ]
+// SIMD-ONLY0-NEXT:    store i64 [[COND60]], ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP65:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP65]], ptr [[LV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP66:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP67:%.*]] = load i64, ptr [[LE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP61:%.*]] = icmp slt i64 [[TMP66]], [[TMP67]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP61]], label [[COND_TRUE62:%.*]], label [[COND_FALSE63:%.*]]
+// SIMD-ONLY0:       cond.true62:
+// SIMD-ONLY0-NEXT:    [[TMP68:%.*]] = load i64, ptr [[LE]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END64:%.*]]
+// SIMD-ONLY0:       cond.false63:
+// SIMD-ONLY0-NEXT:    [[TMP69:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END64]]
+// SIMD-ONLY0:       cond.end64:
+// SIMD-ONLY0-NEXT:    [[COND65:%.*]] = phi i64 [ [[TMP68]], [[COND_TRUE62]] ], [ [[TMP69]], [[COND_FALSE63]] ]
+// SIMD-ONLY0-NEXT:    store i64 [[COND65]], ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP70:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP70]], ptr [[LV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP71:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP72:%.*]] = load i64, ptr [[LE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP66:%.*]] = icmp eq i64 [[TMP71]], [[TMP72]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP66]], label [[COND_TRUE67:%.*]], label [[COND_FALSE68:%.*]]
+// SIMD-ONLY0:       cond.true67:
+// SIMD-ONLY0-NEXT:    [[TMP73:%.*]] = load i64, ptr [[LD]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END69:%.*]]
+// SIMD-ONLY0:       cond.false68:
+// SIMD-ONLY0-NEXT:    [[TMP74:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END69]]
+// SIMD-ONLY0:       cond.end69:
+// SIMD-ONLY0-NEXT:    [[COND70:%.*]] = phi i64 [ [[TMP73]], [[COND_TRUE67]] ], [ [[TMP74]], [[COND_FALSE68]] ]
+// SIMD-ONLY0-NEXT:    store i64 [[COND70]], ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP75:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP76:%.*]] = load i64, ptr [[LE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP71:%.*]] = icmp sgt i64 [[TMP75]], [[TMP76]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP71]], label [[COND_TRUE72:%.*]], label [[COND_FALSE73:%.*]]
+// SIMD-ONLY0:       cond.true72:
+// SIMD-ONLY0-NEXT:    [[TMP77:%.*]] = load i64, ptr [[LE]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END74:%.*]]
+// SIMD-ONLY0:       cond.false73:
+// SIMD-ONLY0-NEXT:    [[TMP78:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END74]]
+// SIMD-ONLY0:       cond.end74:
+// SIMD-ONLY0-NEXT:    [[COND75:%.*]] = phi i64 [ [[TMP77]], [[COND_TRUE72]] ], [ [[TMP78]], [[COND_FALSE73]] ]
+// SIMD-ONLY0-NEXT:    store i64 [[COND75]], ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP79:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP79]], ptr [[LV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP80:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP81:%.*]] = load i64, ptr [[LE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP76:%.*]] = icmp slt i64 [[TMP80]], [[TMP81]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP76]], label [[COND_TRUE77:%.*]], label [[COND_FALSE78:%.*]]
+// SIMD-ONLY0:       cond.true77:
+// SIMD-ONLY0-NEXT:    [[TMP82:%.*]] = load i64, ptr [[LE]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END79:%.*]]
+// SIMD-ONLY0:       cond.false78:
+// SIMD-ONLY0-NEXT:    [[TMP83:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END79]]
+// SIMD-ONLY0:       cond.end79:
+// SIMD-ONLY0-NEXT:    [[COND80:%.*]] = phi i64 [ [[TMP82]], [[COND_TRUE77]] ], [ [[TMP83]], [[COND_FALSE78]] ]
+// SIMD-ONLY0-NEXT:    store i64 [[COND80]], ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP84:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP84]], ptr [[LV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP85:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP86:%.*]] = load i64, ptr [[LE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP81:%.*]] = icmp eq i64 [[TMP85]], [[TMP86]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP81]], label [[COND_TRUE82:%.*]], label [[COND_FALSE83:%.*]]
+// SIMD-ONLY0:       cond.true82:
+// SIMD-ONLY0-NEXT:    [[TMP87:%.*]] = load i64, ptr [[LD]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END84:%.*]]
+// SIMD-ONLY0:       cond.false83:
+// SIMD-ONLY0-NEXT:    [[TMP88:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END84]]
+// SIMD-ONLY0:       cond.end84:
+// SIMD-ONLY0-NEXT:    [[COND85:%.*]] = phi i64 [ [[TMP87]], [[COND_TRUE82]] ], [ [[TMP88]], [[COND_FALSE83]] ]
+// SIMD-ONLY0-NEXT:    store i64 [[COND85]], ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP89:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP89]], ptr [[LV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP90:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP90]], ptr [[LV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP91:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP92:%.*]] = load i64, ptr [[LE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP86:%.*]] = icmp sgt i64 [[TMP91]], [[TMP92]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP86]], label [[COND_TRUE87:%.*]], label [[COND_FALSE88:%.*]]
+// SIMD-ONLY0:       cond.true87:
+// SIMD-ONLY0-NEXT:    [[TMP93:%.*]] = load i64, ptr [[LE]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END89:%.*]]
+// SIMD-ONLY0:       cond.false88:
+// SIMD-ONLY0-NEXT:    [[TMP94:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END89]]
+// SIMD-ONLY0:       cond.end89:
+// SIMD-ONLY0-NEXT:    [[COND90:%.*]] = phi i64 [ [[TMP93]], [[COND_TRUE87]] ], [ [[TMP94]], [[COND_FALSE88]] ]
+// SIMD-ONLY0-NEXT:    store i64 [[COND90]], ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP95:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP95]], ptr [[LV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP96:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP97:%.*]] = load i64, ptr [[LE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP91:%.*]] = icmp slt i64 [[TMP96]], [[TMP97]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP91]], label [[COND_TRUE92:%.*]], label [[COND_FALSE93:%.*]]
+// SIMD-ONLY0:       cond.true92:
+// SIMD-ONLY0-NEXT:    [[TMP98:%.*]] = load i64, ptr [[LE]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END94:%.*]]
+// SIMD-ONLY0:       cond.false93:
+// SIMD-ONLY0-NEXT:    [[TMP99:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END94]]
+// SIMD-ONLY0:       cond.end94:
+// SIMD-ONLY0-NEXT:    [[COND95:%.*]] = phi i64 [ [[TMP98]], [[COND_TRUE92]] ], [ [[TMP99]], [[COND_FALSE93]] ]
+// SIMD-ONLY0-NEXT:    store i64 [[COND95]], ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP100:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP100]], ptr [[LV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP101:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP102:%.*]] = load i64, ptr [[LE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP96:%.*]] = icmp eq i64 [[TMP101]], [[TMP102]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP96]], label [[COND_TRUE97:%.*]], label [[COND_FALSE98:%.*]]
+// SIMD-ONLY0:       cond.true97:
+// SIMD-ONLY0-NEXT:    [[TMP103:%.*]] = load i64, ptr [[LD]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END99:%.*]]
+// SIMD-ONLY0:       cond.false98:
+// SIMD-ONLY0-NEXT:    [[TMP104:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END99]]
+// SIMD-ONLY0:       cond.end99:
+// SIMD-ONLY0-NEXT:    [[COND100:%.*]] = phi i64 [ [[TMP103]], [[COND_TRUE97]] ], [ [[TMP104]], [[COND_FALSE98]] ]
+// SIMD-ONLY0-NEXT:    store i64 [[COND100]], ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP105:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP106:%.*]] = load i64, ptr [[LE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP101:%.*]] = icmp sgt i64 [[TMP105]], [[TMP106]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP101]], label [[COND_TRUE102:%.*]], label [[COND_FALSE103:%.*]]
+// SIMD-ONLY0:       cond.true102:
+// SIMD-ONLY0-NEXT:    [[TMP107:%.*]] = load i64, ptr [[LE]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END104:%.*]]
+// SIMD-ONLY0:       cond.false103:
+// SIMD-ONLY0-NEXT:    [[TMP108:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END104]]
+// SIMD-ONLY0:       cond.end104:
+// SIMD-ONLY0-NEXT:    [[COND105:%.*]] = phi i64 [ [[TMP107]], [[COND_TRUE102]] ], [ [[TMP108]], [[COND_FALSE103]] ]
+// SIMD-ONLY0-NEXT:    store i64 [[COND105]], ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP109:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP109]], ptr [[LV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP110:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP111:%.*]] = load i64, ptr [[LE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP106:%.*]] = icmp slt i64 [[TMP110]], [[TMP111]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP106]], label [[COND_TRUE107:%.*]], label [[COND_FALSE108:%.*]]
+// SIMD-ONLY0:       cond.true107:
+// SIMD-ONLY0-NEXT:    [[TMP112:%.*]] = load i64, ptr [[LE]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END109:%.*]]
+// SIMD-ONLY0:       cond.false108:
+// SIMD-ONLY0-NEXT:    [[TMP113:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END109]]
+// SIMD-ONLY0:       cond.end109:
+// SIMD-ONLY0-NEXT:    [[COND110:%.*]] = phi i64 [ [[TMP112]], [[COND_TRUE107]] ], [ [[TMP113]], [[COND_FALSE108]] ]
+// SIMD-ONLY0-NEXT:    store i64 [[COND110]], ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP114:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP114]], ptr [[LV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP115:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP116:%.*]] = load i64, ptr [[LE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP111:%.*]] = icmp eq i64 [[TMP115]], [[TMP116]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP111]], label [[COND_TRUE112:%.*]], label [[COND_FALSE113:%.*]]
+// SIMD-ONLY0:       cond.true112:
+// SIMD-ONLY0-NEXT:    [[TMP117:%.*]] = load i64, ptr [[LD]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END114:%.*]]
+// SIMD-ONLY0:       cond.false113:
+// SIMD-ONLY0-NEXT:    [[TMP118:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END114]]
+// SIMD-ONLY0:       cond.end114:
+// SIMD-ONLY0-NEXT:    [[COND115:%.*]] = phi i64 [ [[TMP117]], [[COND_TRUE112]] ], [ [[TMP118]], [[COND_FALSE113]] ]
+// SIMD-ONLY0-NEXT:    store i64 [[COND115]], ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP119:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP119]], ptr [[LV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP120:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP120]], ptr [[LV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP121:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP122:%.*]] = load i64, ptr [[LE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP116:%.*]] = icmp sgt i64 [[TMP121]], [[TMP122]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP116]], label [[COND_TRUE117:%.*]], label [[COND_FALSE118:%.*]]
+// SIMD-ONLY0:       cond.true117:
+// SIMD-ONLY0-NEXT:    [[TMP123:%.*]] = load i64, ptr [[LE]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END119:%.*]]
+// SIMD-ONLY0:       cond.false118:
+// SIMD-ONLY0-NEXT:    [[TMP124:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END119]]
+// SIMD-ONLY0:       cond.end119:
+// SIMD-ONLY0-NEXT:    [[COND120:%.*]] = phi i64 [ [[TMP123]], [[COND_TRUE117]] ], [ [[TMP124]], [[COND_FALSE118]] ]
+// SIMD-ONLY0-NEXT:    store i64 [[COND120]], ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP125:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP125]], ptr [[LV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP126:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP127:%.*]] = load i64, ptr [[LE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP121:%.*]] = icmp slt i64 [[TMP126]], [[TMP127]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP121]], label [[COND_TRUE122:%.*]], label [[COND_FALSE123:%.*]]
+// SIMD-ONLY0:       cond.true122:
+// SIMD-ONLY0-NEXT:    [[TMP128:%.*]] = load i64, ptr [[LE]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END124:%.*]]
+// SIMD-ONLY0:       cond.false123:
+// SIMD-ONLY0-NEXT:    [[TMP129:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END124]]
+// SIMD-ONLY0:       cond.end124:
+// SIMD-ONLY0-NEXT:    [[COND125:%.*]] = phi i64 [ [[TMP128]], [[COND_TRUE122]] ], [ [[TMP129]], [[COND_FALSE123]] ]
+// SIMD-ONLY0-NEXT:    store i64 [[COND125]], ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP130:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP130]], ptr [[LV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP131:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP132:%.*]] = load i64, ptr [[LE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP126:%.*]] = icmp eq i64 [[TMP131]], [[TMP132]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP126]], label [[COND_TRUE127:%.*]], label [[COND_FALSE128:%.*]]
+// SIMD-ONLY0:       cond.true127:
+// SIMD-ONLY0-NEXT:    [[TMP133:%.*]] = load i64, ptr [[LD]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END129:%.*]]
+// SIMD-ONLY0:       cond.false128:
+// SIMD-ONLY0-NEXT:    [[TMP134:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END129]]
+// SIMD-ONLY0:       cond.end129:
+// SIMD-ONLY0-NEXT:    [[COND130:%.*]] = phi i64 [ [[TMP133]], [[COND_TRUE127]] ], [ [[TMP134]], [[COND_FALSE128]] ]
+// SIMD-ONLY0-NEXT:    store i64 [[COND130]], ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP135:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP136:%.*]] = load i64, ptr [[LE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP131:%.*]] = icmp sgt i64 [[TMP135]], [[TMP136]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP131]], label [[COND_TRUE132:%.*]], label [[COND_FALSE133:%.*]]
+// SIMD-ONLY0:       cond.true132:
+// SIMD-ONLY0-NEXT:    [[TMP137:%.*]] = load i64, ptr [[LE]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END134:%.*]]
+// SIMD-ONLY0:       cond.false133:
+// SIMD-ONLY0-NEXT:    [[TMP138:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END134]]
+// SIMD-ONLY0:       cond.end134:
+// SIMD-ONLY0-NEXT:    [[COND135:%.*]] = phi i64 [ [[TMP137]], [[COND_TRUE132]] ], [ [[TMP138]], [[COND_FALSE133]] ]
+// SIMD-ONLY0-NEXT:    store i64 [[COND135]], ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP139:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP139]], ptr [[LV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP140:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP141:%.*]] = load i64, ptr [[LE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP136:%.*]] = icmp slt i64 [[TMP140]], [[TMP141]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP136]], label [[COND_TRUE137:%.*]], label [[COND_FALSE138:%.*]]
+// SIMD-ONLY0:       cond.true137:
+// SIMD-ONLY0-NEXT:    [[TMP142:%.*]] = load i64, ptr [[LE]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END139:%.*]]
+// SIMD-ONLY0:       cond.false138:
+// SIMD-ONLY0-NEXT:    [[TMP143:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END139]]
+// SIMD-ONLY0:       cond.end139:
+// SIMD-ONLY0-NEXT:    [[COND140:%.*]] = phi i64 [ [[TMP142]], [[COND_TRUE137]] ], [ [[TMP143]], [[COND_FALSE138]] ]
+// SIMD-ONLY0-NEXT:    store i64 [[COND140]], ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP144:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP144]], ptr [[LV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP145:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP146:%.*]] = load i64, ptr [[LE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP141:%.*]] = icmp eq i64 [[TMP145]], [[TMP146]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP141]], label [[COND_TRUE142:%.*]], label [[COND_FALSE143:%.*]]
+// SIMD-ONLY0:       cond.true142:
+// SIMD-ONLY0-NEXT:    [[TMP147:%.*]] = load i64, ptr [[LD]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END144:%.*]]
+// SIMD-ONLY0:       cond.false143:
+// SIMD-ONLY0-NEXT:    [[TMP148:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END144]]
+// SIMD-ONLY0:       cond.end144:
+// SIMD-ONLY0-NEXT:    [[COND145:%.*]] = phi i64 [ [[TMP147]], [[COND_TRUE142]] ], [ [[TMP148]], [[COND_FALSE143]] ]
+// SIMD-ONLY0-NEXT:    store i64 [[COND145]], ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP149:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP149]], ptr [[LV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP150:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP150]], ptr [[LV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP151:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP152:%.*]] = load i64, ptr [[LE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP146:%.*]] = icmp sgt i64 [[TMP151]], [[TMP152]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP146]], label [[COND_TRUE147:%.*]], label [[COND_FALSE148:%.*]]
+// SIMD-ONLY0:       cond.true147:
+// SIMD-ONLY0-NEXT:    [[TMP153:%.*]] = load i64, ptr [[LE]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END149:%.*]]
+// SIMD-ONLY0:       cond.false148:
+// SIMD-ONLY0-NEXT:    [[TMP154:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END149]]
+// SIMD-ONLY0:       cond.end149:
+// SIMD-ONLY0-NEXT:    [[COND150:%.*]] = phi i64 [ [[TMP153]], [[COND_TRUE147]] ], [ [[TMP154]], [[COND_FALSE148]] ]
+// SIMD-ONLY0-NEXT:    store i64 [[COND150]], ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP155:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP155]], ptr [[LV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP156:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP157:%.*]] = load i64, ptr [[LE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP151:%.*]] = icmp slt i64 [[TMP156]], [[TMP157]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP151]], label [[COND_TRUE152:%.*]], label [[COND_FALSE153:%.*]]
+// SIMD-ONLY0:       cond.true152:
+// SIMD-ONLY0-NEXT:    [[TMP158:%.*]] = load i64, ptr [[LE]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END154:%.*]]
+// SIMD-ONLY0:       cond.false153:
+// SIMD-ONLY0-NEXT:    [[TMP159:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END154]]
+// SIMD-ONLY0:       cond.end154:
+// SIMD-ONLY0-NEXT:    [[COND155:%.*]] = phi i64 [ [[TMP158]], [[COND_TRUE152]] ], [ [[TMP159]], [[COND_FALSE153]] ]
+// SIMD-ONLY0-NEXT:    store i64 [[COND155]], ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP160:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP160]], ptr [[LV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP161:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP162:%.*]] = load i64, ptr [[LE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP156:%.*]] = icmp eq i64 [[TMP161]], [[TMP162]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP156]], label [[COND_TRUE157:%.*]], label [[COND_FALSE158:%.*]]
+// SIMD-ONLY0:       cond.true157:
+// SIMD-ONLY0-NEXT:    [[TMP163:%.*]] = load i64, ptr [[LD]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END159:%.*]]
+// SIMD-ONLY0:       cond.false158:
+// SIMD-ONLY0-NEXT:    [[TMP164:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END159]]
+// SIMD-ONLY0:       cond.end159:
+// SIMD-ONLY0-NEXT:    [[COND160:%.*]] = phi i64 [ [[TMP163]], [[COND_TRUE157]] ], [ [[TMP164]], [[COND_FALSE158]] ]
+// SIMD-ONLY0-NEXT:    store i64 [[COND160]], ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP165:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP166:%.*]] = load i64, ptr [[LE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP161:%.*]] = icmp sgt i64 [[TMP165]], [[TMP166]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP161]], label [[COND_TRUE162:%.*]], label [[COND_FALSE163:%.*]]
+// SIMD-ONLY0:       cond.true162:
+// SIMD-ONLY0-NEXT:    [[TMP167:%.*]] = load i64, ptr [[LE]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END164:%.*]]
+// SIMD-ONLY0:       cond.false163:
+// SIMD-ONLY0-NEXT:    [[TMP168:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END164]]
+// SIMD-ONLY0:       cond.end164:
+// SIMD-ONLY0-NEXT:    [[COND165:%.*]] = phi i64 [ [[TMP167]], [[COND_TRUE162]] ], [ [[TMP168]], [[COND_FALSE163]] ]
+// SIMD-ONLY0-NEXT:    store i64 [[COND165]], ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP169:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP169]], ptr [[LV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP170:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP171:%.*]] = load i64, ptr [[LE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP166:%.*]] = icmp slt i64 [[TMP170]], [[TMP171]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP166]], label [[COND_TRUE167:%.*]], label [[COND_FALSE168:%.*]]
+// SIMD-ONLY0:       cond.true167:
+// SIMD-ONLY0-NEXT:    [[TMP172:%.*]] = load i64, ptr [[LE]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END169:%.*]]
+// SIMD-ONLY0:       cond.false168:
+// SIMD-ONLY0-NEXT:    [[TMP173:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END169]]
+// SIMD-ONLY0:       cond.end169:
+// SIMD-ONLY0-NEXT:    [[COND170:%.*]] = phi i64 [ [[TMP172]], [[COND_TRUE167]] ], [ [[TMP173]], [[COND_FALSE168]] ]
+// SIMD-ONLY0-NEXT:    store i64 [[COND170]], ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP174:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP174]], ptr [[LV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP175:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP176:%.*]] = load i64, ptr [[LE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP171:%.*]] = icmp eq i64 [[TMP175]], [[TMP176]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP171]], label [[COND_TRUE172:%.*]], label [[COND_FALSE173:%.*]]
+// SIMD-ONLY0:       cond.true172:
+// SIMD-ONLY0-NEXT:    [[TMP177:%.*]] = load i64, ptr [[LD]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END174:%.*]]
+// SIMD-ONLY0:       cond.false173:
+// SIMD-ONLY0-NEXT:    [[TMP178:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END174]]
+// SIMD-ONLY0:       cond.end174:
+// SIMD-ONLY0-NEXT:    [[COND175:%.*]] = phi i64 [ [[TMP177]], [[COND_TRUE172]] ], [ [[TMP178]], [[COND_FALSE173]] ]
+// SIMD-ONLY0-NEXT:    store i64 [[COND175]], ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP179:%.*]] = load i64, ptr [[LX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP179]], ptr [[LV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP180:%.*]] = load i64, ptr [[LV]], align 8
+// SIMD-ONLY0-NEXT:    ret i64 [[TMP180]]
+//
+//
+// SIMD-ONLY0-LABEL: @ulxevd(
+// SIMD-ONLY0-NEXT:  entry:
+// SIMD-ONLY0-NEXT:    [[ULX:%.*]] = alloca i64, align 8
+// SIMD-ONLY0-NEXT:    [[ULV:%.*]] = alloca i64, align 8
+// SIMD-ONLY0-NEXT:    [[ULE:%.*]] = alloca i64, align 8
+// SIMD-ONLY0-NEXT:    [[ULD:%.*]] = alloca i64, align 8
+// SIMD-ONLY0-NEXT:    [[TMP0:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP0]], ptr [[ULV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP1:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2:%.*]] = load i64, ptr [[ULE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP:%.*]] = icmp ugt i64 [[TMP1]], [[TMP2]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
+// SIMD-ONLY0:       cond.true:
+// SIMD-ONLY0-NEXT:    [[TMP3:%.*]] = load i64, ptr [[ULE]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END:%.*]]
+// SIMD-ONLY0:       cond.false:
+// SIMD-ONLY0-NEXT:    [[TMP4:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END]]
+// SIMD-ONLY0:       cond.end:
+// SIMD-ONLY0-NEXT:    [[COND:%.*]] = phi i64 [ [[TMP3]], [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ]
+// SIMD-ONLY0-NEXT:    store i64 [[COND]], ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP5:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP5]], ptr [[ULV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP6:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP7:%.*]] = load i64, ptr [[ULE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP1:%.*]] = icmp ult i64 [[TMP6]], [[TMP7]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP1]], label [[COND_TRUE2:%.*]], label [[COND_FALSE3:%.*]]
+// SIMD-ONLY0:       cond.true2:
+// SIMD-ONLY0-NEXT:    [[TMP8:%.*]] = load i64, ptr [[ULE]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END4:%.*]]
+// SIMD-ONLY0:       cond.false3:
+// SIMD-ONLY0-NEXT:    [[TMP9:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END4]]
+// SIMD-ONLY0:       cond.end4:
+// SIMD-ONLY0-NEXT:    [[COND5:%.*]] = phi i64 [ [[TMP8]], [[COND_TRUE2]] ], [ [[TMP9]], [[COND_FALSE3]] ]
+// SIMD-ONLY0-NEXT:    store i64 [[COND5]], ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP10:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP10]], ptr [[ULV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP11:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP12:%.*]] = load i64, ptr [[ULE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP6:%.*]] = icmp eq i64 [[TMP11]], [[TMP12]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP6]], label [[COND_TRUE7:%.*]], label [[COND_FALSE8:%.*]]
+// SIMD-ONLY0:       cond.true7:
+// SIMD-ONLY0-NEXT:    [[TMP13:%.*]] = load i64, ptr [[ULD]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END9:%.*]]
+// SIMD-ONLY0:       cond.false8:
+// SIMD-ONLY0-NEXT:    [[TMP14:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END9]]
+// SIMD-ONLY0:       cond.end9:
+// SIMD-ONLY0-NEXT:    [[COND10:%.*]] = phi i64 [ [[TMP13]], [[COND_TRUE7]] ], [ [[TMP14]], [[COND_FALSE8]] ]
+// SIMD-ONLY0-NEXT:    store i64 [[COND10]], ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP15:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP16:%.*]] = load i64, ptr [[ULE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP11:%.*]] = icmp ugt i64 [[TMP15]], [[TMP16]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP11]], label [[COND_TRUE12:%.*]], label [[COND_FALSE13:%.*]]
+// SIMD-ONLY0:       cond.true12:
+// SIMD-ONLY0-NEXT:    [[TMP17:%.*]] = load i64, ptr [[ULE]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END14:%.*]]
+// SIMD-ONLY0:       cond.false13:
+// SIMD-ONLY0-NEXT:    [[TMP18:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END14]]
+// SIMD-ONLY0:       cond.end14:
+// SIMD-ONLY0-NEXT:    [[COND15:%.*]] = phi i64 [ [[TMP17]], [[COND_TRUE12]] ], [ [[TMP18]], [[COND_FALSE13]] ]
+// SIMD-ONLY0-NEXT:    store i64 [[COND15]], ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP19:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP19]], ptr [[ULV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP20:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP21:%.*]] = load i64, ptr [[ULE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP16:%.*]] = icmp ult i64 [[TMP20]], [[TMP21]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP16]], label [[COND_TRUE17:%.*]], label [[COND_FALSE18:%.*]]
+// SIMD-ONLY0:       cond.true17:
+// SIMD-ONLY0-NEXT:    [[TMP22:%.*]] = load i64, ptr [[ULE]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END19:%.*]]
+// SIMD-ONLY0:       cond.false18:
+// SIMD-ONLY0-NEXT:    [[TMP23:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END19]]
+// SIMD-ONLY0:       cond.end19:
+// SIMD-ONLY0-NEXT:    [[COND20:%.*]] = phi i64 [ [[TMP22]], [[COND_TRUE17]] ], [ [[TMP23]], [[COND_FALSE18]] ]
+// SIMD-ONLY0-NEXT:    store i64 [[COND20]], ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP24:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP24]], ptr [[ULV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP25:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP26:%.*]] = load i64, ptr [[ULE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP21:%.*]] = icmp eq i64 [[TMP25]], [[TMP26]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP21]], label [[COND_TRUE22:%.*]], label [[COND_FALSE23:%.*]]
+// SIMD-ONLY0:       cond.true22:
+// SIMD-ONLY0-NEXT:    [[TMP27:%.*]] = load i64, ptr [[ULD]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END24:%.*]]
+// SIMD-ONLY0:       cond.false23:
+// SIMD-ONLY0-NEXT:    [[TMP28:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END24]]
+// SIMD-ONLY0:       cond.end24:
+// SIMD-ONLY0-NEXT:    [[COND25:%.*]] = phi i64 [ [[TMP27]], [[COND_TRUE22]] ], [ [[TMP28]], [[COND_FALSE23]] ]
+// SIMD-ONLY0-NEXT:    store i64 [[COND25]], ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP29:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP29]], ptr [[ULV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP30:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP30]], ptr [[ULV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP31:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP32:%.*]] = load i64, ptr [[ULE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP26:%.*]] = icmp ugt i64 [[TMP31]], [[TMP32]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP26]], label [[COND_TRUE27:%.*]], label [[COND_FALSE28:%.*]]
+// SIMD-ONLY0:       cond.true27:
+// SIMD-ONLY0-NEXT:    [[TMP33:%.*]] = load i64, ptr [[ULE]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END29:%.*]]
+// SIMD-ONLY0:       cond.false28:
+// SIMD-ONLY0-NEXT:    [[TMP34:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END29]]
+// SIMD-ONLY0:       cond.end29:
+// SIMD-ONLY0-NEXT:    [[COND30:%.*]] = phi i64 [ [[TMP33]], [[COND_TRUE27]] ], [ [[TMP34]], [[COND_FALSE28]] ]
+// SIMD-ONLY0-NEXT:    store i64 [[COND30]], ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP35:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP35]], ptr [[ULV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP36:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP37:%.*]] = load i64, ptr [[ULE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP31:%.*]] = icmp ult i64 [[TMP36]], [[TMP37]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP31]], label [[COND_TRUE32:%.*]], label [[COND_FALSE33:%.*]]
+// SIMD-ONLY0:       cond.true32:
+// SIMD-ONLY0-NEXT:    [[TMP38:%.*]] = load i64, ptr [[ULE]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END34:%.*]]
+// SIMD-ONLY0:       cond.false33:
+// SIMD-ONLY0-NEXT:    [[TMP39:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END34]]
+// SIMD-ONLY0:       cond.end34:
+// SIMD-ONLY0-NEXT:    [[COND35:%.*]] = phi i64 [ [[TMP38]], [[COND_TRUE32]] ], [ [[TMP39]], [[COND_FALSE33]] ]
+// SIMD-ONLY0-NEXT:    store i64 [[COND35]], ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP40:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP40]], ptr [[ULV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP41:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP42:%.*]] = load i64, ptr [[ULE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP36:%.*]] = icmp eq i64 [[TMP41]], [[TMP42]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP36]], label [[COND_TRUE37:%.*]], label [[COND_FALSE38:%.*]]
+// SIMD-ONLY0:       cond.true37:
+// SIMD-ONLY0-NEXT:    [[TMP43:%.*]] = load i64, ptr [[ULD]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END39:%.*]]
+// SIMD-ONLY0:       cond.false38:
+// SIMD-ONLY0-NEXT:    [[TMP44:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END39]]
+// SIMD-ONLY0:       cond.end39:
+// SIMD-ONLY0-NEXT:    [[COND40:%.*]] = phi i64 [ [[TMP43]], [[COND_TRUE37]] ], [ [[TMP44]], [[COND_FALSE38]] ]
+// SIMD-ONLY0-NEXT:    store i64 [[COND40]], ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP45:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP46:%.*]] = load i64, ptr [[ULE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP41:%.*]] = icmp ugt i64 [[TMP45]], [[TMP46]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP41]], label [[COND_TRUE42:%.*]], label [[COND_FALSE43:%.*]]
+// SIMD-ONLY0:       cond.true42:
+// SIMD-ONLY0-NEXT:    [[TMP47:%.*]] = load i64, ptr [[ULE]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END44:%.*]]
+// SIMD-ONLY0:       cond.false43:
+// SIMD-ONLY0-NEXT:    [[TMP48:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END44]]
+// SIMD-ONLY0:       cond.end44:
+// SIMD-ONLY0-NEXT:    [[COND45:%.*]] = phi i64 [ [[TMP47]], [[COND_TRUE42]] ], [ [[TMP48]], [[COND_FALSE43]] ]
+// SIMD-ONLY0-NEXT:    store i64 [[COND45]], ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP49:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP49]], ptr [[ULV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP50:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP51:%.*]] = load i64, ptr [[ULE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP46:%.*]] = icmp ult i64 [[TMP50]], [[TMP51]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP46]], label [[COND_TRUE47:%.*]], label [[COND_FALSE48:%.*]]
+// SIMD-ONLY0:       cond.true47:
+// SIMD-ONLY0-NEXT:    [[TMP52:%.*]] = load i64, ptr [[ULE]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END49:%.*]]
+// SIMD-ONLY0:       cond.false48:
+// SIMD-ONLY0-NEXT:    [[TMP53:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END49]]
+// SIMD-ONLY0:       cond.end49:
+// SIMD-ONLY0-NEXT:    [[COND50:%.*]] = phi i64 [ [[TMP52]], [[COND_TRUE47]] ], [ [[TMP53]], [[COND_FALSE48]] ]
+// SIMD-ONLY0-NEXT:    store i64 [[COND50]], ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP54:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP54]], ptr [[ULV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP55:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP56:%.*]] = load i64, ptr [[ULE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP51:%.*]] = icmp eq i64 [[TMP55]], [[TMP56]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP51]], label [[COND_TRUE52:%.*]], label [[COND_FALSE53:%.*]]
+// SIMD-ONLY0:       cond.true52:
+// SIMD-ONLY0-NEXT:    [[TMP57:%.*]] = load i64, ptr [[ULD]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END54:%.*]]
+// SIMD-ONLY0:       cond.false53:
+// SIMD-ONLY0-NEXT:    [[TMP58:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END54]]
+// SIMD-ONLY0:       cond.end54:
+// SIMD-ONLY0-NEXT:    [[COND55:%.*]] = phi i64 [ [[TMP57]], [[COND_TRUE52]] ], [ [[TMP58]], [[COND_FALSE53]] ]
+// SIMD-ONLY0-NEXT:    store i64 [[COND55]], ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP59:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP59]], ptr [[ULV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP60:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP60]], ptr [[ULV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP61:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP62:%.*]] = load i64, ptr [[ULE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP56:%.*]] = icmp ugt i64 [[TMP61]], [[TMP62]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP56]], label [[COND_TRUE57:%.*]], label [[COND_FALSE58:%.*]]
+// SIMD-ONLY0:       cond.true57:
+// SIMD-ONLY0-NEXT:    [[TMP63:%.*]] = load i64, ptr [[ULE]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END59:%.*]]
+// SIMD-ONLY0:       cond.false58:
+// SIMD-ONLY0-NEXT:    [[TMP64:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END59]]
+// SIMD-ONLY0:       cond.end59:
+// SIMD-ONLY0-NEXT:    [[COND60:%.*]] = phi i64 [ [[TMP63]], [[COND_TRUE57]] ], [ [[TMP64]], [[COND_FALSE58]] ]
+// SIMD-ONLY0-NEXT:    store i64 [[COND60]], ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP65:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP65]], ptr [[ULV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP66:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP67:%.*]] = load i64, ptr [[ULE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP61:%.*]] = icmp ult i64 [[TMP66]], [[TMP67]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP61]], label [[COND_TRUE62:%.*]], label [[COND_FALSE63:%.*]]
+// SIMD-ONLY0:       cond.true62:
+// SIMD-ONLY0-NEXT:    [[TMP68:%.*]] = load i64, ptr [[ULE]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END64:%.*]]
+// SIMD-ONLY0:       cond.false63:
+// SIMD-ONLY0-NEXT:    [[TMP69:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END64]]
+// SIMD-ONLY0:       cond.end64:
+// SIMD-ONLY0-NEXT:    [[COND65:%.*]] = phi i64 [ [[TMP68]], [[COND_TRUE62]] ], [ [[TMP69]], [[COND_FALSE63]] ]
+// SIMD-ONLY0-NEXT:    store i64 [[COND65]], ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP70:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP70]], ptr [[ULV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP71:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP72:%.*]] = load i64, ptr [[ULE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP66:%.*]] = icmp eq i64 [[TMP71]], [[TMP72]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP66]], label [[COND_TRUE67:%.*]], label [[COND_FALSE68:%.*]]
+// SIMD-ONLY0:       cond.true67:
+// SIMD-ONLY0-NEXT:    [[TMP73:%.*]] = load i64, ptr [[ULD]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END69:%.*]]
+// SIMD-ONLY0:       cond.false68:
+// SIMD-ONLY0-NEXT:    [[TMP74:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END69]]
+// SIMD-ONLY0:       cond.end69:
+// SIMD-ONLY0-NEXT:    [[COND70:%.*]] = phi i64 [ [[TMP73]], [[COND_TRUE67]] ], [ [[TMP74]], [[COND_FALSE68]] ]
+// SIMD-ONLY0-NEXT:    store i64 [[COND70]], ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP75:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP76:%.*]] = load i64, ptr [[ULE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP71:%.*]] = icmp ugt i64 [[TMP75]], [[TMP76]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP71]], label [[COND_TRUE72:%.*]], label [[COND_FALSE73:%.*]]
+// SIMD-ONLY0:       cond.true72:
+// SIMD-ONLY0-NEXT:    [[TMP77:%.*]] = load i64, ptr [[ULE]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END74:%.*]]
+// SIMD-ONLY0:       cond.false73:
+// SIMD-ONLY0-NEXT:    [[TMP78:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END74]]
+// SIMD-ONLY0:       cond.end74:
+// SIMD-ONLY0-NEXT:    [[COND75:%.*]] = phi i64 [ [[TMP77]], [[COND_TRUE72]] ], [ [[TMP78]], [[COND_FALSE73]] ]
+// SIMD-ONLY0-NEXT:    store i64 [[COND75]], ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP79:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP79]], ptr [[ULV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP80:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP81:%.*]] = load i64, ptr [[ULE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP76:%.*]] = icmp ult i64 [[TMP80]], [[TMP81]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP76]], label [[COND_TRUE77:%.*]], label [[COND_FALSE78:%.*]]
+// SIMD-ONLY0:       cond.true77:
+// SIMD-ONLY0-NEXT:    [[TMP82:%.*]] = load i64, ptr [[ULE]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END79:%.*]]
+// SIMD-ONLY0:       cond.false78:
+// SIMD-ONLY0-NEXT:    [[TMP83:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END79]]
+// SIMD-ONLY0:       cond.end79:
+// SIMD-ONLY0-NEXT:    [[COND80:%.*]] = phi i64 [ [[TMP82]], [[COND_TRUE77]] ], [ [[TMP83]], [[COND_FALSE78]] ]
+// SIMD-ONLY0-NEXT:    store i64 [[COND80]], ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP84:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP84]], ptr [[ULV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP85:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP86:%.*]] = load i64, ptr [[ULE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP81:%.*]] = icmp eq i64 [[TMP85]], [[TMP86]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP81]], label [[COND_TRUE82:%.*]], label [[COND_FALSE83:%.*]]
+// SIMD-ONLY0:       cond.true82:
+// SIMD-ONLY0-NEXT:    [[TMP87:%.*]] = load i64, ptr [[ULD]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END84:%.*]]
+// SIMD-ONLY0:       cond.false83:
+// SIMD-ONLY0-NEXT:    [[TMP88:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END84]]
+// SIMD-ONLY0:       cond.end84:
+// SIMD-ONLY0-NEXT:    [[COND85:%.*]] = phi i64 [ [[TMP87]], [[COND_TRUE82]] ], [ [[TMP88]], [[COND_FALSE83]] ]
+// SIMD-ONLY0-NEXT:    store i64 [[COND85]], ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP89:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP89]], ptr [[ULV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP90:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP90]], ptr [[ULV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP91:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP92:%.*]] = load i64, ptr [[ULE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP86:%.*]] = icmp ugt i64 [[TMP91]], [[TMP92]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP86]], label [[COND_TRUE87:%.*]], label [[COND_FALSE88:%.*]]
+// SIMD-ONLY0:       cond.true87:
+// SIMD-ONLY0-NEXT:    [[TMP93:%.*]] = load i64, ptr [[ULE]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END89:%.*]]
+// SIMD-ONLY0:       cond.false88:
+// SIMD-ONLY0-NEXT:    [[TMP94:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END89]]
+// SIMD-ONLY0:       cond.end89:
+// SIMD-ONLY0-NEXT:    [[COND90:%.*]] = phi i64 [ [[TMP93]], [[COND_TRUE87]] ], [ [[TMP94]], [[COND_FALSE88]] ]
+// SIMD-ONLY0-NEXT:    store i64 [[COND90]], ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP95:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP95]], ptr [[ULV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP96:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP97:%.*]] = load i64, ptr [[ULE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP91:%.*]] = icmp ult i64 [[TMP96]], [[TMP97]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP91]], label [[COND_TRUE92:%.*]], label [[COND_FALSE93:%.*]]
+// SIMD-ONLY0:       cond.true92:
+// SIMD-ONLY0-NEXT:    [[TMP98:%.*]] = load i64, ptr [[ULE]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END94:%.*]]
+// SIMD-ONLY0:       cond.false93:
+// SIMD-ONLY0-NEXT:    [[TMP99:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END94]]
+// SIMD-ONLY0:       cond.end94:
+// SIMD-ONLY0-NEXT:    [[COND95:%.*]] = phi i64 [ [[TMP98]], [[COND_TRUE92]] ], [ [[TMP99]], [[COND_FALSE93]] ]
+// SIMD-ONLY0-NEXT:    store i64 [[COND95]], ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP100:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP100]], ptr [[ULV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP101:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP102:%.*]] = load i64, ptr [[ULE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP96:%.*]] = icmp eq i64 [[TMP101]], [[TMP102]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP96]], label [[COND_TRUE97:%.*]], label [[COND_FALSE98:%.*]]
+// SIMD-ONLY0:       cond.true97:
+// SIMD-ONLY0-NEXT:    [[TMP103:%.*]] = load i64, ptr [[ULD]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END99:%.*]]
+// SIMD-ONLY0:       cond.false98:
+// SIMD-ONLY0-NEXT:    [[TMP104:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END99]]
+// SIMD-ONLY0:       cond.end99:
+// SIMD-ONLY0-NEXT:    [[COND100:%.*]] = phi i64 [ [[TMP103]], [[COND_TRUE97]] ], [ [[TMP104]], [[COND_FALSE98]] ]
+// SIMD-ONLY0-NEXT:    store i64 [[COND100]], ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP105:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP106:%.*]] = load i64, ptr [[ULE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP101:%.*]] = icmp ugt i64 [[TMP105]], [[TMP106]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP101]], label [[COND_TRUE102:%.*]], label [[COND_FALSE103:%.*]]
+// SIMD-ONLY0:       cond.true102:
+// SIMD-ONLY0-NEXT:    [[TMP107:%.*]] = load i64, ptr [[ULE]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END104:%.*]]
+// SIMD-ONLY0:       cond.false103:
+// SIMD-ONLY0-NEXT:    [[TMP108:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END104]]
+// SIMD-ONLY0:       cond.end104:
+// SIMD-ONLY0-NEXT:    [[COND105:%.*]] = phi i64 [ [[TMP107]], [[COND_TRUE102]] ], [ [[TMP108]], [[COND_FALSE103]] ]
+// SIMD-ONLY0-NEXT:    store i64 [[COND105]], ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP109:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP109]], ptr [[ULV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP110:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP111:%.*]] = load i64, ptr [[ULE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP106:%.*]] = icmp ult i64 [[TMP110]], [[TMP111]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP106]], label [[COND_TRUE107:%.*]], label [[COND_FALSE108:%.*]]
+// SIMD-ONLY0:       cond.true107:
+// SIMD-ONLY0-NEXT:    [[TMP112:%.*]] = load i64, ptr [[ULE]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END109:%.*]]
+// SIMD-ONLY0:       cond.false108:
+// SIMD-ONLY0-NEXT:    [[TMP113:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END109]]
+// SIMD-ONLY0:       cond.end109:
+// SIMD-ONLY0-NEXT:    [[COND110:%.*]] = phi i64 [ [[TMP112]], [[COND_TRUE107]] ], [ [[TMP113]], [[COND_FALSE108]] ]
+// SIMD-ONLY0-NEXT:    store i64 [[COND110]], ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP114:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP114]], ptr [[ULV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP115:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP116:%.*]] = load i64, ptr [[ULE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP111:%.*]] = icmp eq i64 [[TMP115]], [[TMP116]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP111]], label [[COND_TRUE112:%.*]], label [[COND_FALSE113:%.*]]
+// SIMD-ONLY0:       cond.true112:
+// SIMD-ONLY0-NEXT:    [[TMP117:%.*]] = load i64, ptr [[ULD]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END114:%.*]]
+// SIMD-ONLY0:       cond.false113:
+// SIMD-ONLY0-NEXT:    [[TMP118:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END114]]
+// SIMD-ONLY0:       cond.end114:
+// SIMD-ONLY0-NEXT:    [[COND115:%.*]] = phi i64 [ [[TMP117]], [[COND_TRUE112]] ], [ [[TMP118]], [[COND_FALSE113]] ]
+// SIMD-ONLY0-NEXT:    store i64 [[COND115]], ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP119:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP119]], ptr [[ULV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP120:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP120]], ptr [[ULV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP121:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP122:%.*]] = load i64, ptr [[ULE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP116:%.*]] = icmp ugt i64 [[TMP121]], [[TMP122]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP116]], label [[COND_TRUE117:%.*]], label [[COND_FALSE118:%.*]]
+// SIMD-ONLY0:       cond.true117:
+// SIMD-ONLY0-NEXT:    [[TMP123:%.*]] = load i64, ptr [[ULE]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END119:%.*]]
+// SIMD-ONLY0:       cond.false118:
+// SIMD-ONLY0-NEXT:    [[TMP124:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END119]]
+// SIMD-ONLY0:       cond.end119:
+// SIMD-ONLY0-NEXT:    [[COND120:%.*]] = phi i64 [ [[TMP123]], [[COND_TRUE117]] ], [ [[TMP124]], [[COND_FALSE118]] ]
+// SIMD-ONLY0-NEXT:    store i64 [[COND120]], ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP125:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP125]], ptr [[ULV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP126:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP127:%.*]] = load i64, ptr [[ULE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP121:%.*]] = icmp ult i64 [[TMP126]], [[TMP127]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP121]], label [[COND_TRUE122:%.*]], label [[COND_FALSE123:%.*]]
+// SIMD-ONLY0:       cond.true122:
+// SIMD-ONLY0-NEXT:    [[TMP128:%.*]] = load i64, ptr [[ULE]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END124:%.*]]
+// SIMD-ONLY0:       cond.false123:
+// SIMD-ONLY0-NEXT:    [[TMP129:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END124]]
+// SIMD-ONLY0:       cond.end124:
+// SIMD-ONLY0-NEXT:    [[COND125:%.*]] = phi i64 [ [[TMP128]], [[COND_TRUE122]] ], [ [[TMP129]], [[COND_FALSE123]] ]
+// SIMD-ONLY0-NEXT:    store i64 [[COND125]], ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP130:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP130]], ptr [[ULV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP131:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP132:%.*]] = load i64, ptr [[ULE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP126:%.*]] = icmp eq i64 [[TMP131]], [[TMP132]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP126]], label [[COND_TRUE127:%.*]], label [[COND_FALSE128:%.*]]
+// SIMD-ONLY0:       cond.true127:
+// SIMD-ONLY0-NEXT:    [[TMP133:%.*]] = load i64, ptr [[ULD]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END129:%.*]]
+// SIMD-ONLY0:       cond.false128:
+// SIMD-ONLY0-NEXT:    [[TMP134:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END129]]
+// SIMD-ONLY0:       cond.end129:
+// SIMD-ONLY0-NEXT:    [[COND130:%.*]] = phi i64 [ [[TMP133]], [[COND_TRUE127]] ], [ [[TMP134]], [[COND_FALSE128]] ]
+// SIMD-ONLY0-NEXT:    store i64 [[COND130]], ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP135:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP136:%.*]] = load i64, ptr [[ULE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP131:%.*]] = icmp ugt i64 [[TMP135]], [[TMP136]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP131]], label [[COND_TRUE132:%.*]], label [[COND_FALSE133:%.*]]
+// SIMD-ONLY0:       cond.true132:
+// SIMD-ONLY0-NEXT:    [[TMP137:%.*]] = load i64, ptr [[ULE]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END134:%.*]]
+// SIMD-ONLY0:       cond.false133:
+// SIMD-ONLY0-NEXT:    [[TMP138:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END134]]
+// SIMD-ONLY0:       cond.end134:
+// SIMD-ONLY0-NEXT:    [[COND135:%.*]] = phi i64 [ [[TMP137]], [[COND_TRUE132]] ], [ [[TMP138]], [[COND_FALSE133]] ]
+// SIMD-ONLY0-NEXT:    store i64 [[COND135]], ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP139:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP139]], ptr [[ULV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP140:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP141:%.*]] = load i64, ptr [[ULE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP136:%.*]] = icmp ult i64 [[TMP140]], [[TMP141]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP136]], label [[COND_TRUE137:%.*]], label [[COND_FALSE138:%.*]]
+// SIMD-ONLY0:       cond.true137:
+// SIMD-ONLY0-NEXT:    [[TMP142:%.*]] = load i64, ptr [[ULE]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END139:%.*]]
+// SIMD-ONLY0:       cond.false138:
+// SIMD-ONLY0-NEXT:    [[TMP143:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END139]]
+// SIMD-ONLY0:       cond.end139:
+// SIMD-ONLY0-NEXT:    [[COND140:%.*]] = phi i64 [ [[TMP142]], [[COND_TRUE137]] ], [ [[TMP143]], [[COND_FALSE138]] ]
+// SIMD-ONLY0-NEXT:    store i64 [[COND140]], ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP144:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP144]], ptr [[ULV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP145:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP146:%.*]] = load i64, ptr [[ULE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP141:%.*]] = icmp eq i64 [[TMP145]], [[TMP146]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP141]], label [[COND_TRUE142:%.*]], label [[COND_FALSE143:%.*]]
+// SIMD-ONLY0:       cond.true142:
+// SIMD-ONLY0-NEXT:    [[TMP147:%.*]] = load i64, ptr [[ULD]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END144:%.*]]
+// SIMD-ONLY0:       cond.false143:
+// SIMD-ONLY0-NEXT:    [[TMP148:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END144]]
+// SIMD-ONLY0:       cond.end144:
+// SIMD-ONLY0-NEXT:    [[COND145:%.*]] = phi i64 [ [[TMP147]], [[COND_TRUE142]] ], [ [[TMP148]], [[COND_FALSE143]] ]
+// SIMD-ONLY0-NEXT:    store i64 [[COND145]], ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP149:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP149]], ptr [[ULV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP150:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP150]], ptr [[ULV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP151:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP152:%.*]] = load i64, ptr [[ULE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP146:%.*]] = icmp ugt i64 [[TMP151]], [[TMP152]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP146]], label [[COND_TRUE147:%.*]], label [[COND_FALSE148:%.*]]
+// SIMD-ONLY0:       cond.true147:
+// SIMD-ONLY0-NEXT:    [[TMP153:%.*]] = load i64, ptr [[ULE]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END149:%.*]]
+// SIMD-ONLY0:       cond.false148:
+// SIMD-ONLY0-NEXT:    [[TMP154:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END149]]
+// SIMD-ONLY0:       cond.end149:
+// SIMD-ONLY0-NEXT:    [[COND150:%.*]] = phi i64 [ [[TMP153]], [[COND_TRUE147]] ], [ [[TMP154]], [[COND_FALSE148]] ]
+// SIMD-ONLY0-NEXT:    store i64 [[COND150]], ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP155:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP155]], ptr [[ULV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP156:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP157:%.*]] = load i64, ptr [[ULE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP151:%.*]] = icmp ult i64 [[TMP156]], [[TMP157]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP151]], label [[COND_TRUE152:%.*]], label [[COND_FALSE153:%.*]]
+// SIMD-ONLY0:       cond.true152:
+// SIMD-ONLY0-NEXT:    [[TMP158:%.*]] = load i64, ptr [[ULE]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END154:%.*]]
+// SIMD-ONLY0:       cond.false153:
+// SIMD-ONLY0-NEXT:    [[TMP159:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END154]]
+// SIMD-ONLY0:       cond.end154:
+// SIMD-ONLY0-NEXT:    [[COND155:%.*]] = phi i64 [ [[TMP158]], [[COND_TRUE152]] ], [ [[TMP159]], [[COND_FALSE153]] ]
+// SIMD-ONLY0-NEXT:    store i64 [[COND155]], ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP160:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP160]], ptr [[ULV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP161:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP162:%.*]] = load i64, ptr [[ULE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP156:%.*]] = icmp eq i64 [[TMP161]], [[TMP162]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP156]], label [[COND_TRUE157:%.*]], label [[COND_FALSE158:%.*]]
+// SIMD-ONLY0:       cond.true157:
+// SIMD-ONLY0-NEXT:    [[TMP163:%.*]] = load i64, ptr [[ULD]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END159:%.*]]
+// SIMD-ONLY0:       cond.false158:
+// SIMD-ONLY0-NEXT:    [[TMP164:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END159]]
+// SIMD-ONLY0:       cond.end159:
+// SIMD-ONLY0-NEXT:    [[COND160:%.*]] = phi i64 [ [[TMP163]], [[COND_TRUE157]] ], [ [[TMP164]], [[COND_FALSE158]] ]
+// SIMD-ONLY0-NEXT:    store i64 [[COND160]], ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP165:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP166:%.*]] = load i64, ptr [[ULE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP161:%.*]] = icmp ugt i64 [[TMP165]], [[TMP166]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP161]], label [[COND_TRUE162:%.*]], label [[COND_FALSE163:%.*]]
+// SIMD-ONLY0:       cond.true162:
+// SIMD-ONLY0-NEXT:    [[TMP167:%.*]] = load i64, ptr [[ULE]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END164:%.*]]
+// SIMD-ONLY0:       cond.false163:
+// SIMD-ONLY0-NEXT:    [[TMP168:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END164]]
+// SIMD-ONLY0:       cond.end164:
+// SIMD-ONLY0-NEXT:    [[COND165:%.*]] = phi i64 [ [[TMP167]], [[COND_TRUE162]] ], [ [[TMP168]], [[COND_FALSE163]] ]
+// SIMD-ONLY0-NEXT:    store i64 [[COND165]], ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP169:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP169]], ptr [[ULV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP170:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP171:%.*]] = load i64, ptr [[ULE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP166:%.*]] = icmp ult i64 [[TMP170]], [[TMP171]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP166]], label [[COND_TRUE167:%.*]], label [[COND_FALSE168:%.*]]
+// SIMD-ONLY0:       cond.true167:
+// SIMD-ONLY0-NEXT:    [[TMP172:%.*]] = load i64, ptr [[ULE]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END169:%.*]]
+// SIMD-ONLY0:       cond.false168:
+// SIMD-ONLY0-NEXT:    [[TMP173:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END169]]
+// SIMD-ONLY0:       cond.end169:
+// SIMD-ONLY0-NEXT:    [[COND170:%.*]] = phi i64 [ [[TMP172]], [[COND_TRUE167]] ], [ [[TMP173]], [[COND_FALSE168]] ]
+// SIMD-ONLY0-NEXT:    store i64 [[COND170]], ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP174:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP174]], ptr [[ULV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP175:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP176:%.*]] = load i64, ptr [[ULE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP171:%.*]] = icmp eq i64 [[TMP175]], [[TMP176]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP171]], label [[COND_TRUE172:%.*]], label [[COND_FALSE173:%.*]]
+// SIMD-ONLY0:       cond.true172:
+// SIMD-ONLY0-NEXT:    [[TMP177:%.*]] = load i64, ptr [[ULD]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END174:%.*]]
+// SIMD-ONLY0:       cond.false173:
+// SIMD-ONLY0-NEXT:    [[TMP178:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END174]]
+// SIMD-ONLY0:       cond.end174:
+// SIMD-ONLY0-NEXT:    [[COND175:%.*]] = phi i64 [ [[TMP177]], [[COND_TRUE172]] ], [ [[TMP178]], [[COND_FALSE173]] ]
+// SIMD-ONLY0-NEXT:    store i64 [[COND175]], ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP179:%.*]] = load i64, ptr [[ULX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP179]], ptr [[ULV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP180:%.*]] = load i64, ptr [[ULV]], align 8
+// SIMD-ONLY0-NEXT:    ret i64 [[TMP180]]
+//
+//
+// SIMD-ONLY0-LABEL: @llxevd(
+// SIMD-ONLY0-NEXT:  entry:
+// SIMD-ONLY0-NEXT:    [[LLX:%.*]] = alloca i64, align 8
+// SIMD-ONLY0-NEXT:    [[LLV:%.*]] = alloca i64, align 8
+// SIMD-ONLY0-NEXT:    [[LLE:%.*]] = alloca i64, align 8
+// SIMD-ONLY0-NEXT:    [[LLD:%.*]] = alloca i64, align 8
+// SIMD-ONLY0-NEXT:    [[TMP0:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP0]], ptr [[LLV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP1:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2:%.*]] = load i64, ptr [[LLE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP:%.*]] = icmp sgt i64 [[TMP1]], [[TMP2]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
+// SIMD-ONLY0:       cond.true:
+// SIMD-ONLY0-NEXT:    [[TMP3:%.*]] = load i64, ptr [[LLE]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END:%.*]]
+// SIMD-ONLY0:       cond.false:
+// SIMD-ONLY0-NEXT:    [[TMP4:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END]]
+// SIMD-ONLY0:       cond.end:
+// SIMD-ONLY0-NEXT:    [[COND:%.*]] = phi i64 [ [[TMP3]], [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ]
+// SIMD-ONLY0-NEXT:    store i64 [[COND]], ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP5:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP5]], ptr [[LLV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP6:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP7:%.*]] = load i64, ptr [[LLE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP1:%.*]] = icmp slt i64 [[TMP6]], [[TMP7]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP1]], label [[COND_TRUE2:%.*]], label [[COND_FALSE3:%.*]]
+// SIMD-ONLY0:       cond.true2:
+// SIMD-ONLY0-NEXT:    [[TMP8:%.*]] = load i64, ptr [[LLE]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END4:%.*]]
+// SIMD-ONLY0:       cond.false3:
+// SIMD-ONLY0-NEXT:    [[TMP9:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END4]]
+// SIMD-ONLY0:       cond.end4:
+// SIMD-ONLY0-NEXT:    [[COND5:%.*]] = phi i64 [ [[TMP8]], [[COND_TRUE2]] ], [ [[TMP9]], [[COND_FALSE3]] ]
+// SIMD-ONLY0-NEXT:    store i64 [[COND5]], ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP10:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP10]], ptr [[LLV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP11:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP12:%.*]] = load i64, ptr [[LLE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP6:%.*]] = icmp eq i64 [[TMP11]], [[TMP12]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP6]], label [[COND_TRUE7:%.*]], label [[COND_FALSE8:%.*]]
+// SIMD-ONLY0:       cond.true7:
+// SIMD-ONLY0-NEXT:    [[TMP13:%.*]] = load i64, ptr [[LLD]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END9:%.*]]
+// SIMD-ONLY0:       cond.false8:
+// SIMD-ONLY0-NEXT:    [[TMP14:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END9]]
+// SIMD-ONLY0:       cond.end9:
+// SIMD-ONLY0-NEXT:    [[COND10:%.*]] = phi i64 [ [[TMP13]], [[COND_TRUE7]] ], [ [[TMP14]], [[COND_FALSE8]] ]
+// SIMD-ONLY0-NEXT:    store i64 [[COND10]], ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP15:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP16:%.*]] = load i64, ptr [[LLE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP11:%.*]] = icmp sgt i64 [[TMP15]], [[TMP16]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP11]], label [[COND_TRUE12:%.*]], label [[COND_FALSE13:%.*]]
+// SIMD-ONLY0:       cond.true12:
+// SIMD-ONLY0-NEXT:    [[TMP17:%.*]] = load i64, ptr [[LLE]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END14:%.*]]
+// SIMD-ONLY0:       cond.false13:
+// SIMD-ONLY0-NEXT:    [[TMP18:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END14]]
+// SIMD-ONLY0:       cond.end14:
+// SIMD-ONLY0-NEXT:    [[COND15:%.*]] = phi i64 [ [[TMP17]], [[COND_TRUE12]] ], [ [[TMP18]], [[COND_FALSE13]] ]
+// SIMD-ONLY0-NEXT:    store i64 [[COND15]], ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP19:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP19]], ptr [[LLV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP20:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP21:%.*]] = load i64, ptr [[LLE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP16:%.*]] = icmp slt i64 [[TMP20]], [[TMP21]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP16]], label [[COND_TRUE17:%.*]], label [[COND_FALSE18:%.*]]
+// SIMD-ONLY0:       cond.true17:
+// SIMD-ONLY0-NEXT:    [[TMP22:%.*]] = load i64, ptr [[LLE]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END19:%.*]]
+// SIMD-ONLY0:       cond.false18:
+// SIMD-ONLY0-NEXT:    [[TMP23:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END19]]
+// SIMD-ONLY0:       cond.end19:
+// SIMD-ONLY0-NEXT:    [[COND20:%.*]] = phi i64 [ [[TMP22]], [[COND_TRUE17]] ], [ [[TMP23]], [[COND_FALSE18]] ]
+// SIMD-ONLY0-NEXT:    store i64 [[COND20]], ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP24:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP24]], ptr [[LLV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP25:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP26:%.*]] = load i64, ptr [[LLE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP21:%.*]] = icmp eq i64 [[TMP25]], [[TMP26]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP21]], label [[COND_TRUE22:%.*]], label [[COND_FALSE23:%.*]]
+// SIMD-ONLY0:       cond.true22:
+// SIMD-ONLY0-NEXT:    [[TMP27:%.*]] = load i64, ptr [[LLD]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END24:%.*]]
+// SIMD-ONLY0:       cond.false23:
+// SIMD-ONLY0-NEXT:    [[TMP28:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END24]]
+// SIMD-ONLY0:       cond.end24:
+// SIMD-ONLY0-NEXT:    [[COND25:%.*]] = phi i64 [ [[TMP27]], [[COND_TRUE22]] ], [ [[TMP28]], [[COND_FALSE23]] ]
+// SIMD-ONLY0-NEXT:    store i64 [[COND25]], ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP29:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP29]], ptr [[LLV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP30:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP30]], ptr [[LLV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP31:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP32:%.*]] = load i64, ptr [[LLE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP26:%.*]] = icmp sgt i64 [[TMP31]], [[TMP32]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP26]], label [[COND_TRUE27:%.*]], label [[COND_FALSE28:%.*]]
+// SIMD-ONLY0:       cond.true27:
+// SIMD-ONLY0-NEXT:    [[TMP33:%.*]] = load i64, ptr [[LLE]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END29:%.*]]
+// SIMD-ONLY0:       cond.false28:
+// SIMD-ONLY0-NEXT:    [[TMP34:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END29]]
+// SIMD-ONLY0:       cond.end29:
+// SIMD-ONLY0-NEXT:    [[COND30:%.*]] = phi i64 [ [[TMP33]], [[COND_TRUE27]] ], [ [[TMP34]], [[COND_FALSE28]] ]
+// SIMD-ONLY0-NEXT:    store i64 [[COND30]], ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP35:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP35]], ptr [[LLV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP36:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP37:%.*]] = load i64, ptr [[LLE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP31:%.*]] = icmp slt i64 [[TMP36]], [[TMP37]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP31]], label [[COND_TRUE32:%.*]], label [[COND_FALSE33:%.*]]
+// SIMD-ONLY0:       cond.true32:
+// SIMD-ONLY0-NEXT:    [[TMP38:%.*]] = load i64, ptr [[LLE]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END34:%.*]]
+// SIMD-ONLY0:       cond.false33:
+// SIMD-ONLY0-NEXT:    [[TMP39:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END34]]
+// SIMD-ONLY0:       cond.end34:
+// SIMD-ONLY0-NEXT:    [[COND35:%.*]] = phi i64 [ [[TMP38]], [[COND_TRUE32]] ], [ [[TMP39]], [[COND_FALSE33]] ]
+// SIMD-ONLY0-NEXT:    store i64 [[COND35]], ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP40:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP40]], ptr [[LLV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP41:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP42:%.*]] = load i64, ptr [[LLE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP36:%.*]] = icmp eq i64 [[TMP41]], [[TMP42]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP36]], label [[COND_TRUE37:%.*]], label [[COND_FALSE38:%.*]]
+// SIMD-ONLY0:       cond.true37:
+// SIMD-ONLY0-NEXT:    [[TMP43:%.*]] = load i64, ptr [[LLD]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END39:%.*]]
+// SIMD-ONLY0:       cond.false38:
+// SIMD-ONLY0-NEXT:    [[TMP44:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END39]]
+// SIMD-ONLY0:       cond.end39:
+// SIMD-ONLY0-NEXT:    [[COND40:%.*]] = phi i64 [ [[TMP43]], [[COND_TRUE37]] ], [ [[TMP44]], [[COND_FALSE38]] ]
+// SIMD-ONLY0-NEXT:    store i64 [[COND40]], ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP45:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP46:%.*]] = load i64, ptr [[LLE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP41:%.*]] = icmp sgt i64 [[TMP45]], [[TMP46]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP41]], label [[COND_TRUE42:%.*]], label [[COND_FALSE43:%.*]]
+// SIMD-ONLY0:       cond.true42:
+// SIMD-ONLY0-NEXT:    [[TMP47:%.*]] = load i64, ptr [[LLE]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END44:%.*]]
+// SIMD-ONLY0:       cond.false43:
+// SIMD-ONLY0-NEXT:    [[TMP48:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END44]]
+// SIMD-ONLY0:       cond.end44:
+// SIMD-ONLY0-NEXT:    [[COND45:%.*]] = phi i64 [ [[TMP47]], [[COND_TRUE42]] ], [ [[TMP48]], [[COND_FALSE43]] ]
+// SIMD-ONLY0-NEXT:    store i64 [[COND45]], ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP49:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP49]], ptr [[LLV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP50:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP51:%.*]] = load i64, ptr [[LLE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP46:%.*]] = icmp slt i64 [[TMP50]], [[TMP51]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP46]], label [[COND_TRUE47:%.*]], label [[COND_FALSE48:%.*]]
+// SIMD-ONLY0:       cond.true47:
+// SIMD-ONLY0-NEXT:    [[TMP52:%.*]] = load i64, ptr [[LLE]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END49:%.*]]
+// SIMD-ONLY0:       cond.false48:
+// SIMD-ONLY0-NEXT:    [[TMP53:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END49]]
+// SIMD-ONLY0:       cond.end49:
+// SIMD-ONLY0-NEXT:    [[COND50:%.*]] = phi i64 [ [[TMP52]], [[COND_TRUE47]] ], [ [[TMP53]], [[COND_FALSE48]] ]
+// SIMD-ONLY0-NEXT:    store i64 [[COND50]], ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP54:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP54]], ptr [[LLV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP55:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP56:%.*]] = load i64, ptr [[LLE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP51:%.*]] = icmp eq i64 [[TMP55]], [[TMP56]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP51]], label [[COND_TRUE52:%.*]], label [[COND_FALSE53:%.*]]
+// SIMD-ONLY0:       cond.true52:
+// SIMD-ONLY0-NEXT:    [[TMP57:%.*]] = load i64, ptr [[LLD]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END54:%.*]]
+// SIMD-ONLY0:       cond.false53:
+// SIMD-ONLY0-NEXT:    [[TMP58:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END54]]
+// SIMD-ONLY0:       cond.end54:
+// SIMD-ONLY0-NEXT:    [[COND55:%.*]] = phi i64 [ [[TMP57]], [[COND_TRUE52]] ], [ [[TMP58]], [[COND_FALSE53]] ]
+// SIMD-ONLY0-NEXT:    store i64 [[COND55]], ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP59:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP59]], ptr [[LLV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP60:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP60]], ptr [[LLV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP61:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP62:%.*]] = load i64, ptr [[LLE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP56:%.*]] = icmp sgt i64 [[TMP61]], [[TMP62]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP56]], label [[COND_TRUE57:%.*]], label [[COND_FALSE58:%.*]]
+// SIMD-ONLY0:       cond.true57:
+// SIMD-ONLY0-NEXT:    [[TMP63:%.*]] = load i64, ptr [[LLE]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END59:%.*]]
+// SIMD-ONLY0:       cond.false58:
+// SIMD-ONLY0-NEXT:    [[TMP64:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END59]]
+// SIMD-ONLY0:       cond.end59:
+// SIMD-ONLY0-NEXT:    [[COND60:%.*]] = phi i64 [ [[TMP63]], [[COND_TRUE57]] ], [ [[TMP64]], [[COND_FALSE58]] ]
+// SIMD-ONLY0-NEXT:    store i64 [[COND60]], ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP65:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP65]], ptr [[LLV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP66:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP67:%.*]] = load i64, ptr [[LLE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP61:%.*]] = icmp slt i64 [[TMP66]], [[TMP67]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP61]], label [[COND_TRUE62:%.*]], label [[COND_FALSE63:%.*]]
+// SIMD-ONLY0:       cond.true62:
+// SIMD-ONLY0-NEXT:    [[TMP68:%.*]] = load i64, ptr [[LLE]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END64:%.*]]
+// SIMD-ONLY0:       cond.false63:
+// SIMD-ONLY0-NEXT:    [[TMP69:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END64]]
+// SIMD-ONLY0:       cond.end64:
+// SIMD-ONLY0-NEXT:    [[COND65:%.*]] = phi i64 [ [[TMP68]], [[COND_TRUE62]] ], [ [[TMP69]], [[COND_FALSE63]] ]
+// SIMD-ONLY0-NEXT:    store i64 [[COND65]], ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP70:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP70]], ptr [[LLV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP71:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP72:%.*]] = load i64, ptr [[LLE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP66:%.*]] = icmp eq i64 [[TMP71]], [[TMP72]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP66]], label [[COND_TRUE67:%.*]], label [[COND_FALSE68:%.*]]
+// SIMD-ONLY0:       cond.true67:
+// SIMD-ONLY0-NEXT:    [[TMP73:%.*]] = load i64, ptr [[LLD]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END69:%.*]]
+// SIMD-ONLY0:       cond.false68:
+// SIMD-ONLY0-NEXT:    [[TMP74:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END69]]
+// SIMD-ONLY0:       cond.end69:
+// SIMD-ONLY0-NEXT:    [[COND70:%.*]] = phi i64 [ [[TMP73]], [[COND_TRUE67]] ], [ [[TMP74]], [[COND_FALSE68]] ]
+// SIMD-ONLY0-NEXT:    store i64 [[COND70]], ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP75:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP76:%.*]] = load i64, ptr [[LLE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP71:%.*]] = icmp sgt i64 [[TMP75]], [[TMP76]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP71]], label [[COND_TRUE72:%.*]], label [[COND_FALSE73:%.*]]
+// SIMD-ONLY0:       cond.true72:
+// SIMD-ONLY0-NEXT:    [[TMP77:%.*]] = load i64, ptr [[LLE]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END74:%.*]]
+// SIMD-ONLY0:       cond.false73:
+// SIMD-ONLY0-NEXT:    [[TMP78:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END74]]
+// SIMD-ONLY0:       cond.end74:
+// SIMD-ONLY0-NEXT:    [[COND75:%.*]] = phi i64 [ [[TMP77]], [[COND_TRUE72]] ], [ [[TMP78]], [[COND_FALSE73]] ]
+// SIMD-ONLY0-NEXT:    store i64 [[COND75]], ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP79:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP79]], ptr [[LLV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP80:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP81:%.*]] = load i64, ptr [[LLE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP76:%.*]] = icmp slt i64 [[TMP80]], [[TMP81]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP76]], label [[COND_TRUE77:%.*]], label [[COND_FALSE78:%.*]]
+// SIMD-ONLY0:       cond.true77:
+// SIMD-ONLY0-NEXT:    [[TMP82:%.*]] = load i64, ptr [[LLE]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END79:%.*]]
+// SIMD-ONLY0:       cond.false78:
+// SIMD-ONLY0-NEXT:    [[TMP83:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END79]]
+// SIMD-ONLY0:       cond.end79:
+// SIMD-ONLY0-NEXT:    [[COND80:%.*]] = phi i64 [ [[TMP82]], [[COND_TRUE77]] ], [ [[TMP83]], [[COND_FALSE78]] ]
+// SIMD-ONLY0-NEXT:    store i64 [[COND80]], ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP84:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP84]], ptr [[LLV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP85:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP86:%.*]] = load i64, ptr [[LLE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP81:%.*]] = icmp eq i64 [[TMP85]], [[TMP86]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP81]], label [[COND_TRUE82:%.*]], label [[COND_FALSE83:%.*]]
+// SIMD-ONLY0:       cond.true82:
+// SIMD-ONLY0-NEXT:    [[TMP87:%.*]] = load i64, ptr [[LLD]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END84:%.*]]
+// SIMD-ONLY0:       cond.false83:
+// SIMD-ONLY0-NEXT:    [[TMP88:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END84]]
+// SIMD-ONLY0:       cond.end84:
+// SIMD-ONLY0-NEXT:    [[COND85:%.*]] = phi i64 [ [[TMP87]], [[COND_TRUE82]] ], [ [[TMP88]], [[COND_FALSE83]] ]
+// SIMD-ONLY0-NEXT:    store i64 [[COND85]], ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP89:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP89]], ptr [[LLV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP90:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP90]], ptr [[LLV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP91:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP92:%.*]] = load i64, ptr [[LLE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP86:%.*]] = icmp sgt i64 [[TMP91]], [[TMP92]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP86]], label [[COND_TRUE87:%.*]], label [[COND_FALSE88:%.*]]
+// SIMD-ONLY0:       cond.true87:
+// SIMD-ONLY0-NEXT:    [[TMP93:%.*]] = load i64, ptr [[LLE]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END89:%.*]]
+// SIMD-ONLY0:       cond.false88:
+// SIMD-ONLY0-NEXT:    [[TMP94:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END89]]
+// SIMD-ONLY0:       cond.end89:
+// SIMD-ONLY0-NEXT:    [[COND90:%.*]] = phi i64 [ [[TMP93]], [[COND_TRUE87]] ], [ [[TMP94]], [[COND_FALSE88]] ]
+// SIMD-ONLY0-NEXT:    store i64 [[COND90]], ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP95:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP95]], ptr [[LLV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP96:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP97:%.*]] = load i64, ptr [[LLE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP91:%.*]] = icmp slt i64 [[TMP96]], [[TMP97]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP91]], label [[COND_TRUE92:%.*]], label [[COND_FALSE93:%.*]]
+// SIMD-ONLY0:       cond.true92:
+// SIMD-ONLY0-NEXT:    [[TMP98:%.*]] = load i64, ptr [[LLE]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END94:%.*]]
+// SIMD-ONLY0:       cond.false93:
+// SIMD-ONLY0-NEXT:    [[TMP99:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END94]]
+// SIMD-ONLY0:       cond.end94:
+// SIMD-ONLY0-NEXT:    [[COND95:%.*]] = phi i64 [ [[TMP98]], [[COND_TRUE92]] ], [ [[TMP99]], [[COND_FALSE93]] ]
+// SIMD-ONLY0-NEXT:    store i64 [[COND95]], ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP100:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP100]], ptr [[LLV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP101:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP102:%.*]] = load i64, ptr [[LLE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP96:%.*]] = icmp eq i64 [[TMP101]], [[TMP102]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP96]], label [[COND_TRUE97:%.*]], label [[COND_FALSE98:%.*]]
+// SIMD-ONLY0:       cond.true97:
+// SIMD-ONLY0-NEXT:    [[TMP103:%.*]] = load i64, ptr [[LLD]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END99:%.*]]
+// SIMD-ONLY0:       cond.false98:
+// SIMD-ONLY0-NEXT:    [[TMP104:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END99]]
+// SIMD-ONLY0:       cond.end99:
+// SIMD-ONLY0-NEXT:    [[COND100:%.*]] = phi i64 [ [[TMP103]], [[COND_TRUE97]] ], [ [[TMP104]], [[COND_FALSE98]] ]
+// SIMD-ONLY0-NEXT:    store i64 [[COND100]], ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP105:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP106:%.*]] = load i64, ptr [[LLE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP101:%.*]] = icmp sgt i64 [[TMP105]], [[TMP106]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP101]], label [[COND_TRUE102:%.*]], label [[COND_FALSE103:%.*]]
+// SIMD-ONLY0:       cond.true102:
+// SIMD-ONLY0-NEXT:    [[TMP107:%.*]] = load i64, ptr [[LLE]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END104:%.*]]
+// SIMD-ONLY0:       cond.false103:
+// SIMD-ONLY0-NEXT:    [[TMP108:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END104]]
+// SIMD-ONLY0:       cond.end104:
+// SIMD-ONLY0-NEXT:    [[COND105:%.*]] = phi i64 [ [[TMP107]], [[COND_TRUE102]] ], [ [[TMP108]], [[COND_FALSE103]] ]
+// SIMD-ONLY0-NEXT:    store i64 [[COND105]], ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP109:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP109]], ptr [[LLV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP110:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP111:%.*]] = load i64, ptr [[LLE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP106:%.*]] = icmp slt i64 [[TMP110]], [[TMP111]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP106]], label [[COND_TRUE107:%.*]], label [[COND_FALSE108:%.*]]
+// SIMD-ONLY0:       cond.true107:
+// SIMD-ONLY0-NEXT:    [[TMP112:%.*]] = load i64, ptr [[LLE]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END109:%.*]]
+// SIMD-ONLY0:       cond.false108:
+// SIMD-ONLY0-NEXT:    [[TMP113:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END109]]
+// SIMD-ONLY0:       cond.end109:
+// SIMD-ONLY0-NEXT:    [[COND110:%.*]] = phi i64 [ [[TMP112]], [[COND_TRUE107]] ], [ [[TMP113]], [[COND_FALSE108]] ]
+// SIMD-ONLY0-NEXT:    store i64 [[COND110]], ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP114:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP114]], ptr [[LLV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP115:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP116:%.*]] = load i64, ptr [[LLE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP111:%.*]] = icmp eq i64 [[TMP115]], [[TMP116]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP111]], label [[COND_TRUE112:%.*]], label [[COND_FALSE113:%.*]]
+// SIMD-ONLY0:       cond.true112:
+// SIMD-ONLY0-NEXT:    [[TMP117:%.*]] = load i64, ptr [[LLD]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END114:%.*]]
+// SIMD-ONLY0:       cond.false113:
+// SIMD-ONLY0-NEXT:    [[TMP118:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END114]]
+// SIMD-ONLY0:       cond.end114:
+// SIMD-ONLY0-NEXT:    [[COND115:%.*]] = phi i64 [ [[TMP117]], [[COND_TRUE112]] ], [ [[TMP118]], [[COND_FALSE113]] ]
+// SIMD-ONLY0-NEXT:    store i64 [[COND115]], ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP119:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP119]], ptr [[LLV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP120:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP120]], ptr [[LLV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP121:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP122:%.*]] = load i64, ptr [[LLE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP116:%.*]] = icmp sgt i64 [[TMP121]], [[TMP122]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP116]], label [[COND_TRUE117:%.*]], label [[COND_FALSE118:%.*]]
+// SIMD-ONLY0:       cond.true117:
+// SIMD-ONLY0-NEXT:    [[TMP123:%.*]] = load i64, ptr [[LLE]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END119:%.*]]
+// SIMD-ONLY0:       cond.false118:
+// SIMD-ONLY0-NEXT:    [[TMP124:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END119]]
+// SIMD-ONLY0:       cond.end119:
+// SIMD-ONLY0-NEXT:    [[COND120:%.*]] = phi i64 [ [[TMP123]], [[COND_TRUE117]] ], [ [[TMP124]], [[COND_FALSE118]] ]
+// SIMD-ONLY0-NEXT:    store i64 [[COND120]], ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP125:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP125]], ptr [[LLV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP126:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP127:%.*]] = load i64, ptr [[LLE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP121:%.*]] = icmp slt i64 [[TMP126]], [[TMP127]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP121]], label [[COND_TRUE122:%.*]], label [[COND_FALSE123:%.*]]
+// SIMD-ONLY0:       cond.true122:
+// SIMD-ONLY0-NEXT:    [[TMP128:%.*]] = load i64, ptr [[LLE]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END124:%.*]]
+// SIMD-ONLY0:       cond.false123:
+// SIMD-ONLY0-NEXT:    [[TMP129:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END124]]
+// SIMD-ONLY0:       cond.end124:
+// SIMD-ONLY0-NEXT:    [[COND125:%.*]] = phi i64 [ [[TMP128]], [[COND_TRUE122]] ], [ [[TMP129]], [[COND_FALSE123]] ]
+// SIMD-ONLY0-NEXT:    store i64 [[COND125]], ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP130:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP130]], ptr [[LLV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP131:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP132:%.*]] = load i64, ptr [[LLE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP126:%.*]] = icmp eq i64 [[TMP131]], [[TMP132]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP126]], label [[COND_TRUE127:%.*]], label [[COND_FALSE128:%.*]]
+// SIMD-ONLY0:       cond.true127:
+// SIMD-ONLY0-NEXT:    [[TMP133:%.*]] = load i64, ptr [[LLD]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END129:%.*]]
+// SIMD-ONLY0:       cond.false128:
+// SIMD-ONLY0-NEXT:    [[TMP134:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END129]]
+// SIMD-ONLY0:       cond.end129:
+// SIMD-ONLY0-NEXT:    [[COND130:%.*]] = phi i64 [ [[TMP133]], [[COND_TRUE127]] ], [ [[TMP134]], [[COND_FALSE128]] ]
+// SIMD-ONLY0-NEXT:    store i64 [[COND130]], ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP135:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP136:%.*]] = load i64, ptr [[LLE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP131:%.*]] = icmp sgt i64 [[TMP135]], [[TMP136]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP131]], label [[COND_TRUE132:%.*]], label [[COND_FALSE133:%.*]]
+// SIMD-ONLY0:       cond.true132:
+// SIMD-ONLY0-NEXT:    [[TMP137:%.*]] = load i64, ptr [[LLE]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END134:%.*]]
+// SIMD-ONLY0:       cond.false133:
+// SIMD-ONLY0-NEXT:    [[TMP138:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END134]]
+// SIMD-ONLY0:       cond.end134:
+// SIMD-ONLY0-NEXT:    [[COND135:%.*]] = phi i64 [ [[TMP137]], [[COND_TRUE132]] ], [ [[TMP138]], [[COND_FALSE133]] ]
+// SIMD-ONLY0-NEXT:    store i64 [[COND135]], ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP139:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP139]], ptr [[LLV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP140:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP141:%.*]] = load i64, ptr [[LLE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP136:%.*]] = icmp slt i64 [[TMP140]], [[TMP141]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP136]], label [[COND_TRUE137:%.*]], label [[COND_FALSE138:%.*]]
+// SIMD-ONLY0:       cond.true137:
+// SIMD-ONLY0-NEXT:    [[TMP142:%.*]] = load i64, ptr [[LLE]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END139:%.*]]
+// SIMD-ONLY0:       cond.false138:
+// SIMD-ONLY0-NEXT:    [[TMP143:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END139]]
+// SIMD-ONLY0:       cond.end139:
+// SIMD-ONLY0-NEXT:    [[COND140:%.*]] = phi i64 [ [[TMP142]], [[COND_TRUE137]] ], [ [[TMP143]], [[COND_FALSE138]] ]
+// SIMD-ONLY0-NEXT:    store i64 [[COND140]], ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP144:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP144]], ptr [[LLV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP145:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP146:%.*]] = load i64, ptr [[LLE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP141:%.*]] = icmp eq i64 [[TMP145]], [[TMP146]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP141]], label [[COND_TRUE142:%.*]], label [[COND_FALSE143:%.*]]
+// SIMD-ONLY0:       cond.true142:
+// SIMD-ONLY0-NEXT:    [[TMP147:%.*]] = load i64, ptr [[LLD]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END144:%.*]]
+// SIMD-ONLY0:       cond.false143:
+// SIMD-ONLY0-NEXT:    [[TMP148:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END144]]
+// SIMD-ONLY0:       cond.end144:
+// SIMD-ONLY0-NEXT:    [[COND145:%.*]] = phi i64 [ [[TMP147]], [[COND_TRUE142]] ], [ [[TMP148]], [[COND_FALSE143]] ]
+// SIMD-ONLY0-NEXT:    store i64 [[COND145]], ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP149:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP149]], ptr [[LLV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP150:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP150]], ptr [[LLV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP151:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP152:%.*]] = load i64, ptr [[LLE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP146:%.*]] = icmp sgt i64 [[TMP151]], [[TMP152]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP146]], label [[COND_TRUE147:%.*]], label [[COND_FALSE148:%.*]]
+// SIMD-ONLY0:       cond.true147:
+// SIMD-ONLY0-NEXT:    [[TMP153:%.*]] = load i64, ptr [[LLE]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END149:%.*]]
+// SIMD-ONLY0:       cond.false148:
+// SIMD-ONLY0-NEXT:    [[TMP154:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END149]]
+// SIMD-ONLY0:       cond.end149:
+// SIMD-ONLY0-NEXT:    [[COND150:%.*]] = phi i64 [ [[TMP153]], [[COND_TRUE147]] ], [ [[TMP154]], [[COND_FALSE148]] ]
+// SIMD-ONLY0-NEXT:    store i64 [[COND150]], ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP155:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP155]], ptr [[LLV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP156:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP157:%.*]] = load i64, ptr [[LLE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP151:%.*]] = icmp slt i64 [[TMP156]], [[TMP157]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP151]], label [[COND_TRUE152:%.*]], label [[COND_FALSE153:%.*]]
+// SIMD-ONLY0:       cond.true152:
+// SIMD-ONLY0-NEXT:    [[TMP158:%.*]] = load i64, ptr [[LLE]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END154:%.*]]
+// SIMD-ONLY0:       cond.false153:
+// SIMD-ONLY0-NEXT:    [[TMP159:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END154]]
+// SIMD-ONLY0:       cond.end154:
+// SIMD-ONLY0-NEXT:    [[COND155:%.*]] = phi i64 [ [[TMP158]], [[COND_TRUE152]] ], [ [[TMP159]], [[COND_FALSE153]] ]
+// SIMD-ONLY0-NEXT:    store i64 [[COND155]], ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP160:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP160]], ptr [[LLV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP161:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP162:%.*]] = load i64, ptr [[LLE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP156:%.*]] = icmp eq i64 [[TMP161]], [[TMP162]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP156]], label [[COND_TRUE157:%.*]], label [[COND_FALSE158:%.*]]
+// SIMD-ONLY0:       cond.true157:
+// SIMD-ONLY0-NEXT:    [[TMP163:%.*]] = load i64, ptr [[LLD]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END159:%.*]]
+// SIMD-ONLY0:       cond.false158:
+// SIMD-ONLY0-NEXT:    [[TMP164:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END159]]
+// SIMD-ONLY0:       cond.end159:
+// SIMD-ONLY0-NEXT:    [[COND160:%.*]] = phi i64 [ [[TMP163]], [[COND_TRUE157]] ], [ [[TMP164]], [[COND_FALSE158]] ]
+// SIMD-ONLY0-NEXT:    store i64 [[COND160]], ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP165:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP166:%.*]] = load i64, ptr [[LLE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP161:%.*]] = icmp sgt i64 [[TMP165]], [[TMP166]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP161]], label [[COND_TRUE162:%.*]], label [[COND_FALSE163:%.*]]
+// SIMD-ONLY0:       cond.true162:
+// SIMD-ONLY0-NEXT:    [[TMP167:%.*]] = load i64, ptr [[LLE]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END164:%.*]]
+// SIMD-ONLY0:       cond.false163:
+// SIMD-ONLY0-NEXT:    [[TMP168:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END164]]
+// SIMD-ONLY0:       cond.end164:
+// SIMD-ONLY0-NEXT:    [[COND165:%.*]] = phi i64 [ [[TMP167]], [[COND_TRUE162]] ], [ [[TMP168]], [[COND_FALSE163]] ]
+// SIMD-ONLY0-NEXT:    store i64 [[COND165]], ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP169:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP169]], ptr [[LLV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP170:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP171:%.*]] = load i64, ptr [[LLE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP166:%.*]] = icmp slt i64 [[TMP170]], [[TMP171]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP166]], label [[COND_TRUE167:%.*]], label [[COND_FALSE168:%.*]]
+// SIMD-ONLY0:       cond.true167:
+// SIMD-ONLY0-NEXT:    [[TMP172:%.*]] = load i64, ptr [[LLE]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END169:%.*]]
+// SIMD-ONLY0:       cond.false168:
+// SIMD-ONLY0-NEXT:    [[TMP173:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END169]]
+// SIMD-ONLY0:       cond.end169:
+// SIMD-ONLY0-NEXT:    [[COND170:%.*]] = phi i64 [ [[TMP172]], [[COND_TRUE167]] ], [ [[TMP173]], [[COND_FALSE168]] ]
+// SIMD-ONLY0-NEXT:    store i64 [[COND170]], ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP174:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP174]], ptr [[LLV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP175:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP176:%.*]] = load i64, ptr [[LLE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP171:%.*]] = icmp eq i64 [[TMP175]], [[TMP176]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP171]], label [[COND_TRUE172:%.*]], label [[COND_FALSE173:%.*]]
+// SIMD-ONLY0:       cond.true172:
+// SIMD-ONLY0-NEXT:    [[TMP177:%.*]] = load i64, ptr [[LLD]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END174:%.*]]
+// SIMD-ONLY0:       cond.false173:
+// SIMD-ONLY0-NEXT:    [[TMP178:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END174]]
+// SIMD-ONLY0:       cond.end174:
+// SIMD-ONLY0-NEXT:    [[COND175:%.*]] = phi i64 [ [[TMP177]], [[COND_TRUE172]] ], [ [[TMP178]], [[COND_FALSE173]] ]
+// SIMD-ONLY0-NEXT:    store i64 [[COND175]], ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP179:%.*]] = load i64, ptr [[LLX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP179]], ptr [[LLV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP180:%.*]] = load i64, ptr [[LLV]], align 8
+// SIMD-ONLY0-NEXT:    ret i64 [[TMP180]]
+//
+//
+// SIMD-ONLY0-LABEL: @ullxevd(
+// SIMD-ONLY0-NEXT:  entry:
+// SIMD-ONLY0-NEXT:    [[ULLX:%.*]] = alloca i64, align 8
+// SIMD-ONLY0-NEXT:    [[ULLV:%.*]] = alloca i64, align 8
+// SIMD-ONLY0-NEXT:    [[ULLE:%.*]] = alloca i64, align 8
+// SIMD-ONLY0-NEXT:    [[ULLD:%.*]] = alloca i64, align 8
+// SIMD-ONLY0-NEXT:    [[TMP0:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP0]], ptr [[ULLV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP1:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2:%.*]] = load i64, ptr [[ULLE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP:%.*]] = icmp ugt i64 [[TMP1]], [[TMP2]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
+// SIMD-ONLY0:       cond.true:
+// SIMD-ONLY0-NEXT:    [[TMP3:%.*]] = load i64, ptr [[ULLE]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END:%.*]]
+// SIMD-ONLY0:       cond.false:
+// SIMD-ONLY0-NEXT:    [[TMP4:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END]]
+// SIMD-ONLY0:       cond.end:
+// SIMD-ONLY0-NEXT:    [[COND:%.*]] = phi i64 [ [[TMP3]], [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ]
+// SIMD-ONLY0-NEXT:    store i64 [[COND]], ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP5:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP5]], ptr [[ULLV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP6:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP7:%.*]] = load i64, ptr [[ULLE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP1:%.*]] = icmp ult i64 [[TMP6]], [[TMP7]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP1]], label [[COND_TRUE2:%.*]], label [[COND_FALSE3:%.*]]
+// SIMD-ONLY0:       cond.true2:
+// SIMD-ONLY0-NEXT:    [[TMP8:%.*]] = load i64, ptr [[ULLE]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END4:%.*]]
+// SIMD-ONLY0:       cond.false3:
+// SIMD-ONLY0-NEXT:    [[TMP9:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END4]]
+// SIMD-ONLY0:       cond.end4:
+// SIMD-ONLY0-NEXT:    [[COND5:%.*]] = phi i64 [ [[TMP8]], [[COND_TRUE2]] ], [ [[TMP9]], [[COND_FALSE3]] ]
+// SIMD-ONLY0-NEXT:    store i64 [[COND5]], ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP10:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP10]], ptr [[ULLV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP11:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP12:%.*]] = load i64, ptr [[ULLE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP6:%.*]] = icmp eq i64 [[TMP11]], [[TMP12]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP6]], label [[COND_TRUE7:%.*]], label [[COND_FALSE8:%.*]]
+// SIMD-ONLY0:       cond.true7:
+// SIMD-ONLY0-NEXT:    [[TMP13:%.*]] = load i64, ptr [[ULLD]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END9:%.*]]
+// SIMD-ONLY0:       cond.false8:
+// SIMD-ONLY0-NEXT:    [[TMP14:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END9]]
+// SIMD-ONLY0:       cond.end9:
+// SIMD-ONLY0-NEXT:    [[COND10:%.*]] = phi i64 [ [[TMP13]], [[COND_TRUE7]] ], [ [[TMP14]], [[COND_FALSE8]] ]
+// SIMD-ONLY0-NEXT:    store i64 [[COND10]], ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP15:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP16:%.*]] = load i64, ptr [[ULLE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP11:%.*]] = icmp ugt i64 [[TMP15]], [[TMP16]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP11]], label [[COND_TRUE12:%.*]], label [[COND_FALSE13:%.*]]
+// SIMD-ONLY0:       cond.true12:
+// SIMD-ONLY0-NEXT:    [[TMP17:%.*]] = load i64, ptr [[ULLE]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END14:%.*]]
+// SIMD-ONLY0:       cond.false13:
+// SIMD-ONLY0-NEXT:    [[TMP18:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END14]]
+// SIMD-ONLY0:       cond.end14:
+// SIMD-ONLY0-NEXT:    [[COND15:%.*]] = phi i64 [ [[TMP17]], [[COND_TRUE12]] ], [ [[TMP18]], [[COND_FALSE13]] ]
+// SIMD-ONLY0-NEXT:    store i64 [[COND15]], ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP19:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP19]], ptr [[ULLV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP20:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP21:%.*]] = load i64, ptr [[ULLE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP16:%.*]] = icmp ult i64 [[TMP20]], [[TMP21]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP16]], label [[COND_TRUE17:%.*]], label [[COND_FALSE18:%.*]]
+// SIMD-ONLY0:       cond.true17:
+// SIMD-ONLY0-NEXT:    [[TMP22:%.*]] = load i64, ptr [[ULLE]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END19:%.*]]
+// SIMD-ONLY0:       cond.false18:
+// SIMD-ONLY0-NEXT:    [[TMP23:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END19]]
+// SIMD-ONLY0:       cond.end19:
+// SIMD-ONLY0-NEXT:    [[COND20:%.*]] = phi i64 [ [[TMP22]], [[COND_TRUE17]] ], [ [[TMP23]], [[COND_FALSE18]] ]
+// SIMD-ONLY0-NEXT:    store i64 [[COND20]], ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP24:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP24]], ptr [[ULLV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP25:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP26:%.*]] = load i64, ptr [[ULLE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP21:%.*]] = icmp eq i64 [[TMP25]], [[TMP26]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP21]], label [[COND_TRUE22:%.*]], label [[COND_FALSE23:%.*]]
+// SIMD-ONLY0:       cond.true22:
+// SIMD-ONLY0-NEXT:    [[TMP27:%.*]] = load i64, ptr [[ULLD]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END24:%.*]]
+// SIMD-ONLY0:       cond.false23:
+// SIMD-ONLY0-NEXT:    [[TMP28:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END24]]
+// SIMD-ONLY0:       cond.end24:
+// SIMD-ONLY0-NEXT:    [[COND25:%.*]] = phi i64 [ [[TMP27]], [[COND_TRUE22]] ], [ [[TMP28]], [[COND_FALSE23]] ]
+// SIMD-ONLY0-NEXT:    store i64 [[COND25]], ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP29:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP29]], ptr [[ULLV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP30:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP30]], ptr [[ULLV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP31:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP32:%.*]] = load i64, ptr [[ULLE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP26:%.*]] = icmp ugt i64 [[TMP31]], [[TMP32]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP26]], label [[COND_TRUE27:%.*]], label [[COND_FALSE28:%.*]]
+// SIMD-ONLY0:       cond.true27:
+// SIMD-ONLY0-NEXT:    [[TMP33:%.*]] = load i64, ptr [[ULLE]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END29:%.*]]
+// SIMD-ONLY0:       cond.false28:
+// SIMD-ONLY0-NEXT:    [[TMP34:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END29]]
+// SIMD-ONLY0:       cond.end29:
+// SIMD-ONLY0-NEXT:    [[COND30:%.*]] = phi i64 [ [[TMP33]], [[COND_TRUE27]] ], [ [[TMP34]], [[COND_FALSE28]] ]
+// SIMD-ONLY0-NEXT:    store i64 [[COND30]], ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP35:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP35]], ptr [[ULLV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP36:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP37:%.*]] = load i64, ptr [[ULLE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP31:%.*]] = icmp ult i64 [[TMP36]], [[TMP37]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP31]], label [[COND_TRUE32:%.*]], label [[COND_FALSE33:%.*]]
+// SIMD-ONLY0:       cond.true32:
+// SIMD-ONLY0-NEXT:    [[TMP38:%.*]] = load i64, ptr [[ULLE]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END34:%.*]]
+// SIMD-ONLY0:       cond.false33:
+// SIMD-ONLY0-NEXT:    [[TMP39:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END34]]
+// SIMD-ONLY0:       cond.end34:
+// SIMD-ONLY0-NEXT:    [[COND35:%.*]] = phi i64 [ [[TMP38]], [[COND_TRUE32]] ], [ [[TMP39]], [[COND_FALSE33]] ]
+// SIMD-ONLY0-NEXT:    store i64 [[COND35]], ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP40:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP40]], ptr [[ULLV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP41:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP42:%.*]] = load i64, ptr [[ULLE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP36:%.*]] = icmp eq i64 [[TMP41]], [[TMP42]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP36]], label [[COND_TRUE37:%.*]], label [[COND_FALSE38:%.*]]
+// SIMD-ONLY0:       cond.true37:
+// SIMD-ONLY0-NEXT:    [[TMP43:%.*]] = load i64, ptr [[ULLD]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END39:%.*]]
+// SIMD-ONLY0:       cond.false38:
+// SIMD-ONLY0-NEXT:    [[TMP44:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END39]]
+// SIMD-ONLY0:       cond.end39:
+// SIMD-ONLY0-NEXT:    [[COND40:%.*]] = phi i64 [ [[TMP43]], [[COND_TRUE37]] ], [ [[TMP44]], [[COND_FALSE38]] ]
+// SIMD-ONLY0-NEXT:    store i64 [[COND40]], ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP45:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP46:%.*]] = load i64, ptr [[ULLE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP41:%.*]] = icmp ugt i64 [[TMP45]], [[TMP46]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP41]], label [[COND_TRUE42:%.*]], label [[COND_FALSE43:%.*]]
+// SIMD-ONLY0:       cond.true42:
+// SIMD-ONLY0-NEXT:    [[TMP47:%.*]] = load i64, ptr [[ULLE]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END44:%.*]]
+// SIMD-ONLY0:       cond.false43:
+// SIMD-ONLY0-NEXT:    [[TMP48:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END44]]
+// SIMD-ONLY0:       cond.end44:
+// SIMD-ONLY0-NEXT:    [[COND45:%.*]] = phi i64 [ [[TMP47]], [[COND_TRUE42]] ], [ [[TMP48]], [[COND_FALSE43]] ]
+// SIMD-ONLY0-NEXT:    store i64 [[COND45]], ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP49:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP49]], ptr [[ULLV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP50:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP51:%.*]] = load i64, ptr [[ULLE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP46:%.*]] = icmp ult i64 [[TMP50]], [[TMP51]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP46]], label [[COND_TRUE47:%.*]], label [[COND_FALSE48:%.*]]
+// SIMD-ONLY0:       cond.true47:
+// SIMD-ONLY0-NEXT:    [[TMP52:%.*]] = load i64, ptr [[ULLE]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END49:%.*]]
+// SIMD-ONLY0:       cond.false48:
+// SIMD-ONLY0-NEXT:    [[TMP53:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END49]]
+// SIMD-ONLY0:       cond.end49:
+// SIMD-ONLY0-NEXT:    [[COND50:%.*]] = phi i64 [ [[TMP52]], [[COND_TRUE47]] ], [ [[TMP53]], [[COND_FALSE48]] ]
+// SIMD-ONLY0-NEXT:    store i64 [[COND50]], ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP54:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP54]], ptr [[ULLV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP55:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP56:%.*]] = load i64, ptr [[ULLE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP51:%.*]] = icmp eq i64 [[TMP55]], [[TMP56]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP51]], label [[COND_TRUE52:%.*]], label [[COND_FALSE53:%.*]]
+// SIMD-ONLY0:       cond.true52:
+// SIMD-ONLY0-NEXT:    [[TMP57:%.*]] = load i64, ptr [[ULLD]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END54:%.*]]
+// SIMD-ONLY0:       cond.false53:
+// SIMD-ONLY0-NEXT:    [[TMP58:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END54]]
+// SIMD-ONLY0:       cond.end54:
+// SIMD-ONLY0-NEXT:    [[COND55:%.*]] = phi i64 [ [[TMP57]], [[COND_TRUE52]] ], [ [[TMP58]], [[COND_FALSE53]] ]
+// SIMD-ONLY0-NEXT:    store i64 [[COND55]], ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP59:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP59]], ptr [[ULLV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP60:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP60]], ptr [[ULLV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP61:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP62:%.*]] = load i64, ptr [[ULLE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP56:%.*]] = icmp ugt i64 [[TMP61]], [[TMP62]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP56]], label [[COND_TRUE57:%.*]], label [[COND_FALSE58:%.*]]
+// SIMD-ONLY0:       cond.true57:
+// SIMD-ONLY0-NEXT:    [[TMP63:%.*]] = load i64, ptr [[ULLE]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END59:%.*]]
+// SIMD-ONLY0:       cond.false58:
+// SIMD-ONLY0-NEXT:    [[TMP64:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END59]]
+// SIMD-ONLY0:       cond.end59:
+// SIMD-ONLY0-NEXT:    [[COND60:%.*]] = phi i64 [ [[TMP63]], [[COND_TRUE57]] ], [ [[TMP64]], [[COND_FALSE58]] ]
+// SIMD-ONLY0-NEXT:    store i64 [[COND60]], ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP65:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP65]], ptr [[ULLV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP66:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP67:%.*]] = load i64, ptr [[ULLE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP61:%.*]] = icmp ult i64 [[TMP66]], [[TMP67]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP61]], label [[COND_TRUE62:%.*]], label [[COND_FALSE63:%.*]]
+// SIMD-ONLY0:       cond.true62:
+// SIMD-ONLY0-NEXT:    [[TMP68:%.*]] = load i64, ptr [[ULLE]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END64:%.*]]
+// SIMD-ONLY0:       cond.false63:
+// SIMD-ONLY0-NEXT:    [[TMP69:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END64]]
+// SIMD-ONLY0:       cond.end64:
+// SIMD-ONLY0-NEXT:    [[COND65:%.*]] = phi i64 [ [[TMP68]], [[COND_TRUE62]] ], [ [[TMP69]], [[COND_FALSE63]] ]
+// SIMD-ONLY0-NEXT:    store i64 [[COND65]], ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP70:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP70]], ptr [[ULLV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP71:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP72:%.*]] = load i64, ptr [[ULLE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP66:%.*]] = icmp eq i64 [[TMP71]], [[TMP72]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP66]], label [[COND_TRUE67:%.*]], label [[COND_FALSE68:%.*]]
+// SIMD-ONLY0:       cond.true67:
+// SIMD-ONLY0-NEXT:    [[TMP73:%.*]] = load i64, ptr [[ULLD]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END69:%.*]]
+// SIMD-ONLY0:       cond.false68:
+// SIMD-ONLY0-NEXT:    [[TMP74:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END69]]
+// SIMD-ONLY0:       cond.end69:
+// SIMD-ONLY0-NEXT:    [[COND70:%.*]] = phi i64 [ [[TMP73]], [[COND_TRUE67]] ], [ [[TMP74]], [[COND_FALSE68]] ]
+// SIMD-ONLY0-NEXT:    store i64 [[COND70]], ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP75:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP76:%.*]] = load i64, ptr [[ULLE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP71:%.*]] = icmp ugt i64 [[TMP75]], [[TMP76]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP71]], label [[COND_TRUE72:%.*]], label [[COND_FALSE73:%.*]]
+// SIMD-ONLY0:       cond.true72:
+// SIMD-ONLY0-NEXT:    [[TMP77:%.*]] = load i64, ptr [[ULLE]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END74:%.*]]
+// SIMD-ONLY0:       cond.false73:
+// SIMD-ONLY0-NEXT:    [[TMP78:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END74]]
+// SIMD-ONLY0:       cond.end74:
+// SIMD-ONLY0-NEXT:    [[COND75:%.*]] = phi i64 [ [[TMP77]], [[COND_TRUE72]] ], [ [[TMP78]], [[COND_FALSE73]] ]
+// SIMD-ONLY0-NEXT:    store i64 [[COND75]], ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP79:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP79]], ptr [[ULLV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP80:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP81:%.*]] = load i64, ptr [[ULLE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP76:%.*]] = icmp ult i64 [[TMP80]], [[TMP81]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP76]], label [[COND_TRUE77:%.*]], label [[COND_FALSE78:%.*]]
+// SIMD-ONLY0:       cond.true77:
+// SIMD-ONLY0-NEXT:    [[TMP82:%.*]] = load i64, ptr [[ULLE]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END79:%.*]]
+// SIMD-ONLY0:       cond.false78:
+// SIMD-ONLY0-NEXT:    [[TMP83:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END79]]
+// SIMD-ONLY0:       cond.end79:
+// SIMD-ONLY0-NEXT:    [[COND80:%.*]] = phi i64 [ [[TMP82]], [[COND_TRUE77]] ], [ [[TMP83]], [[COND_FALSE78]] ]
+// SIMD-ONLY0-NEXT:    store i64 [[COND80]], ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP84:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP84]], ptr [[ULLV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP85:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP86:%.*]] = load i64, ptr [[ULLE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP81:%.*]] = icmp eq i64 [[TMP85]], [[TMP86]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP81]], label [[COND_TRUE82:%.*]], label [[COND_FALSE83:%.*]]
+// SIMD-ONLY0:       cond.true82:
+// SIMD-ONLY0-NEXT:    [[TMP87:%.*]] = load i64, ptr [[ULLD]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END84:%.*]]
+// SIMD-ONLY0:       cond.false83:
+// SIMD-ONLY0-NEXT:    [[TMP88:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END84]]
+// SIMD-ONLY0:       cond.end84:
+// SIMD-ONLY0-NEXT:    [[COND85:%.*]] = phi i64 [ [[TMP87]], [[COND_TRUE82]] ], [ [[TMP88]], [[COND_FALSE83]] ]
+// SIMD-ONLY0-NEXT:    store i64 [[COND85]], ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP89:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP89]], ptr [[ULLV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP90:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP90]], ptr [[ULLV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP91:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP92:%.*]] = load i64, ptr [[ULLE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP86:%.*]] = icmp ugt i64 [[TMP91]], [[TMP92]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP86]], label [[COND_TRUE87:%.*]], label [[COND_FALSE88:%.*]]
+// SIMD-ONLY0:       cond.true87:
+// SIMD-ONLY0-NEXT:    [[TMP93:%.*]] = load i64, ptr [[ULLE]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END89:%.*]]
+// SIMD-ONLY0:       cond.false88:
+// SIMD-ONLY0-NEXT:    [[TMP94:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END89]]
+// SIMD-ONLY0:       cond.end89:
+// SIMD-ONLY0-NEXT:    [[COND90:%.*]] = phi i64 [ [[TMP93]], [[COND_TRUE87]] ], [ [[TMP94]], [[COND_FALSE88]] ]
+// SIMD-ONLY0-NEXT:    store i64 [[COND90]], ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP95:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP95]], ptr [[ULLV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP96:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP97:%.*]] = load i64, ptr [[ULLE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP91:%.*]] = icmp ult i64 [[TMP96]], [[TMP97]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP91]], label [[COND_TRUE92:%.*]], label [[COND_FALSE93:%.*]]
+// SIMD-ONLY0:       cond.true92:
+// SIMD-ONLY0-NEXT:    [[TMP98:%.*]] = load i64, ptr [[ULLE]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END94:%.*]]
+// SIMD-ONLY0:       cond.false93:
+// SIMD-ONLY0-NEXT:    [[TMP99:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END94]]
+// SIMD-ONLY0:       cond.end94:
+// SIMD-ONLY0-NEXT:    [[COND95:%.*]] = phi i64 [ [[TMP98]], [[COND_TRUE92]] ], [ [[TMP99]], [[COND_FALSE93]] ]
+// SIMD-ONLY0-NEXT:    store i64 [[COND95]], ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP100:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP100]], ptr [[ULLV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP101:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP102:%.*]] = load i64, ptr [[ULLE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP96:%.*]] = icmp eq i64 [[TMP101]], [[TMP102]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP96]], label [[COND_TRUE97:%.*]], label [[COND_FALSE98:%.*]]
+// SIMD-ONLY0:       cond.true97:
+// SIMD-ONLY0-NEXT:    [[TMP103:%.*]] = load i64, ptr [[ULLD]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END99:%.*]]
+// SIMD-ONLY0:       cond.false98:
+// SIMD-ONLY0-NEXT:    [[TMP104:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END99]]
+// SIMD-ONLY0:       cond.end99:
+// SIMD-ONLY0-NEXT:    [[COND100:%.*]] = phi i64 [ [[TMP103]], [[COND_TRUE97]] ], [ [[TMP104]], [[COND_FALSE98]] ]
+// SIMD-ONLY0-NEXT:    store i64 [[COND100]], ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP105:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP106:%.*]] = load i64, ptr [[ULLE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP101:%.*]] = icmp ugt i64 [[TMP105]], [[TMP106]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP101]], label [[COND_TRUE102:%.*]], label [[COND_FALSE103:%.*]]
+// SIMD-ONLY0:       cond.true102:
+// SIMD-ONLY0-NEXT:    [[TMP107:%.*]] = load i64, ptr [[ULLE]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END104:%.*]]
+// SIMD-ONLY0:       cond.false103:
+// SIMD-ONLY0-NEXT:    [[TMP108:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END104]]
+// SIMD-ONLY0:       cond.end104:
+// SIMD-ONLY0-NEXT:    [[COND105:%.*]] = phi i64 [ [[TMP107]], [[COND_TRUE102]] ], [ [[TMP108]], [[COND_FALSE103]] ]
+// SIMD-ONLY0-NEXT:    store i64 [[COND105]], ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP109:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP109]], ptr [[ULLV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP110:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP111:%.*]] = load i64, ptr [[ULLE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP106:%.*]] = icmp ult i64 [[TMP110]], [[TMP111]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP106]], label [[COND_TRUE107:%.*]], label [[COND_FALSE108:%.*]]
+// SIMD-ONLY0:       cond.true107:
+// SIMD-ONLY0-NEXT:    [[TMP112:%.*]] = load i64, ptr [[ULLE]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END109:%.*]]
+// SIMD-ONLY0:       cond.false108:
+// SIMD-ONLY0-NEXT:    [[TMP113:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END109]]
+// SIMD-ONLY0:       cond.end109:
+// SIMD-ONLY0-NEXT:    [[COND110:%.*]] = phi i64 [ [[TMP112]], [[COND_TRUE107]] ], [ [[TMP113]], [[COND_FALSE108]] ]
+// SIMD-ONLY0-NEXT:    store i64 [[COND110]], ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP114:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP114]], ptr [[ULLV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP115:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP116:%.*]] = load i64, ptr [[ULLE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP111:%.*]] = icmp eq i64 [[TMP115]], [[TMP116]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP111]], label [[COND_TRUE112:%.*]], label [[COND_FALSE113:%.*]]
+// SIMD-ONLY0:       cond.true112:
+// SIMD-ONLY0-NEXT:    [[TMP117:%.*]] = load i64, ptr [[ULLD]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END114:%.*]]
+// SIMD-ONLY0:       cond.false113:
+// SIMD-ONLY0-NEXT:    [[TMP118:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END114]]
+// SIMD-ONLY0:       cond.end114:
+// SIMD-ONLY0-NEXT:    [[COND115:%.*]] = phi i64 [ [[TMP117]], [[COND_TRUE112]] ], [ [[TMP118]], [[COND_FALSE113]] ]
+// SIMD-ONLY0-NEXT:    store i64 [[COND115]], ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP119:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP119]], ptr [[ULLV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP120:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP120]], ptr [[ULLV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP121:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP122:%.*]] = load i64, ptr [[ULLE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP116:%.*]] = icmp ugt i64 [[TMP121]], [[TMP122]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP116]], label [[COND_TRUE117:%.*]], label [[COND_FALSE118:%.*]]
+// SIMD-ONLY0:       cond.true117:
+// SIMD-ONLY0-NEXT:    [[TMP123:%.*]] = load i64, ptr [[ULLE]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END119:%.*]]
+// SIMD-ONLY0:       cond.false118:
+// SIMD-ONLY0-NEXT:    [[TMP124:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END119]]
+// SIMD-ONLY0:       cond.end119:
+// SIMD-ONLY0-NEXT:    [[COND120:%.*]] = phi i64 [ [[TMP123]], [[COND_TRUE117]] ], [ [[TMP124]], [[COND_FALSE118]] ]
+// SIMD-ONLY0-NEXT:    store i64 [[COND120]], ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP125:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP125]], ptr [[ULLV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP126:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP127:%.*]] = load i64, ptr [[ULLE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP121:%.*]] = icmp ult i64 [[TMP126]], [[TMP127]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP121]], label [[COND_TRUE122:%.*]], label [[COND_FALSE123:%.*]]
+// SIMD-ONLY0:       cond.true122:
+// SIMD-ONLY0-NEXT:    [[TMP128:%.*]] = load i64, ptr [[ULLE]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END124:%.*]]
+// SIMD-ONLY0:       cond.false123:
+// SIMD-ONLY0-NEXT:    [[TMP129:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END124]]
+// SIMD-ONLY0:       cond.end124:
+// SIMD-ONLY0-NEXT:    [[COND125:%.*]] = phi i64 [ [[TMP128]], [[COND_TRUE122]] ], [ [[TMP129]], [[COND_FALSE123]] ]
+// SIMD-ONLY0-NEXT:    store i64 [[COND125]], ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP130:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP130]], ptr [[ULLV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP131:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP132:%.*]] = load i64, ptr [[ULLE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP126:%.*]] = icmp eq i64 [[TMP131]], [[TMP132]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP126]], label [[COND_TRUE127:%.*]], label [[COND_FALSE128:%.*]]
+// SIMD-ONLY0:       cond.true127:
+// SIMD-ONLY0-NEXT:    [[TMP133:%.*]] = load i64, ptr [[ULLD]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END129:%.*]]
+// SIMD-ONLY0:       cond.false128:
+// SIMD-ONLY0-NEXT:    [[TMP134:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END129]]
+// SIMD-ONLY0:       cond.end129:
+// SIMD-ONLY0-NEXT:    [[COND130:%.*]] = phi i64 [ [[TMP133]], [[COND_TRUE127]] ], [ [[TMP134]], [[COND_FALSE128]] ]
+// SIMD-ONLY0-NEXT:    store i64 [[COND130]], ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP135:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP136:%.*]] = load i64, ptr [[ULLE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP131:%.*]] = icmp ugt i64 [[TMP135]], [[TMP136]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP131]], label [[COND_TRUE132:%.*]], label [[COND_FALSE133:%.*]]
+// SIMD-ONLY0:       cond.true132:
+// SIMD-ONLY0-NEXT:    [[TMP137:%.*]] = load i64, ptr [[ULLE]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END134:%.*]]
+// SIMD-ONLY0:       cond.false133:
+// SIMD-ONLY0-NEXT:    [[TMP138:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END134]]
+// SIMD-ONLY0:       cond.end134:
+// SIMD-ONLY0-NEXT:    [[COND135:%.*]] = phi i64 [ [[TMP137]], [[COND_TRUE132]] ], [ [[TMP138]], [[COND_FALSE133]] ]
+// SIMD-ONLY0-NEXT:    store i64 [[COND135]], ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP139:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP139]], ptr [[ULLV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP140:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP141:%.*]] = load i64, ptr [[ULLE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP136:%.*]] = icmp ult i64 [[TMP140]], [[TMP141]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP136]], label [[COND_TRUE137:%.*]], label [[COND_FALSE138:%.*]]
+// SIMD-ONLY0:       cond.true137:
+// SIMD-ONLY0-NEXT:    [[TMP142:%.*]] = load i64, ptr [[ULLE]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END139:%.*]]
+// SIMD-ONLY0:       cond.false138:
+// SIMD-ONLY0-NEXT:    [[TMP143:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END139]]
+// SIMD-ONLY0:       cond.end139:
+// SIMD-ONLY0-NEXT:    [[COND140:%.*]] = phi i64 [ [[TMP142]], [[COND_TRUE137]] ], [ [[TMP143]], [[COND_FALSE138]] ]
+// SIMD-ONLY0-NEXT:    store i64 [[COND140]], ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP144:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP144]], ptr [[ULLV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP145:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP146:%.*]] = load i64, ptr [[ULLE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP141:%.*]] = icmp eq i64 [[TMP145]], [[TMP146]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP141]], label [[COND_TRUE142:%.*]], label [[COND_FALSE143:%.*]]
+// SIMD-ONLY0:       cond.true142:
+// SIMD-ONLY0-NEXT:    [[TMP147:%.*]] = load i64, ptr [[ULLD]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END144:%.*]]
+// SIMD-ONLY0:       cond.false143:
+// SIMD-ONLY0-NEXT:    [[TMP148:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END144]]
+// SIMD-ONLY0:       cond.end144:
+// SIMD-ONLY0-NEXT:    [[COND145:%.*]] = phi i64 [ [[TMP147]], [[COND_TRUE142]] ], [ [[TMP148]], [[COND_FALSE143]] ]
+// SIMD-ONLY0-NEXT:    store i64 [[COND145]], ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP149:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP149]], ptr [[ULLV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP150:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP150]], ptr [[ULLV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP151:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP152:%.*]] = load i64, ptr [[ULLE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP146:%.*]] = icmp ugt i64 [[TMP151]], [[TMP152]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP146]], label [[COND_TRUE147:%.*]], label [[COND_FALSE148:%.*]]
+// SIMD-ONLY0:       cond.true147:
+// SIMD-ONLY0-NEXT:    [[TMP153:%.*]] = load i64, ptr [[ULLE]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END149:%.*]]
+// SIMD-ONLY0:       cond.false148:
+// SIMD-ONLY0-NEXT:    [[TMP154:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END149]]
+// SIMD-ONLY0:       cond.end149:
+// SIMD-ONLY0-NEXT:    [[COND150:%.*]] = phi i64 [ [[TMP153]], [[COND_TRUE147]] ], [ [[TMP154]], [[COND_FALSE148]] ]
+// SIMD-ONLY0-NEXT:    store i64 [[COND150]], ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP155:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP155]], ptr [[ULLV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP156:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP157:%.*]] = load i64, ptr [[ULLE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP151:%.*]] = icmp ult i64 [[TMP156]], [[TMP157]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP151]], label [[COND_TRUE152:%.*]], label [[COND_FALSE153:%.*]]
+// SIMD-ONLY0:       cond.true152:
+// SIMD-ONLY0-NEXT:    [[TMP158:%.*]] = load i64, ptr [[ULLE]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END154:%.*]]
+// SIMD-ONLY0:       cond.false153:
+// SIMD-ONLY0-NEXT:    [[TMP159:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END154]]
+// SIMD-ONLY0:       cond.end154:
+// SIMD-ONLY0-NEXT:    [[COND155:%.*]] = phi i64 [ [[TMP158]], [[COND_TRUE152]] ], [ [[TMP159]], [[COND_FALSE153]] ]
+// SIMD-ONLY0-NEXT:    store i64 [[COND155]], ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP160:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP160]], ptr [[ULLV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP161:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP162:%.*]] = load i64, ptr [[ULLE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP156:%.*]] = icmp eq i64 [[TMP161]], [[TMP162]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP156]], label [[COND_TRUE157:%.*]], label [[COND_FALSE158:%.*]]
+// SIMD-ONLY0:       cond.true157:
+// SIMD-ONLY0-NEXT:    [[TMP163:%.*]] = load i64, ptr [[ULLD]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END159:%.*]]
+// SIMD-ONLY0:       cond.false158:
+// SIMD-ONLY0-NEXT:    [[TMP164:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END159]]
+// SIMD-ONLY0:       cond.end159:
+// SIMD-ONLY0-NEXT:    [[COND160:%.*]] = phi i64 [ [[TMP163]], [[COND_TRUE157]] ], [ [[TMP164]], [[COND_FALSE158]] ]
+// SIMD-ONLY0-NEXT:    store i64 [[COND160]], ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP165:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP166:%.*]] = load i64, ptr [[ULLE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP161:%.*]] = icmp ugt i64 [[TMP165]], [[TMP166]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP161]], label [[COND_TRUE162:%.*]], label [[COND_FALSE163:%.*]]
+// SIMD-ONLY0:       cond.true162:
+// SIMD-ONLY0-NEXT:    [[TMP167:%.*]] = load i64, ptr [[ULLE]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END164:%.*]]
+// SIMD-ONLY0:       cond.false163:
+// SIMD-ONLY0-NEXT:    [[TMP168:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END164]]
+// SIMD-ONLY0:       cond.end164:
+// SIMD-ONLY0-NEXT:    [[COND165:%.*]] = phi i64 [ [[TMP167]], [[COND_TRUE162]] ], [ [[TMP168]], [[COND_FALSE163]] ]
+// SIMD-ONLY0-NEXT:    store i64 [[COND165]], ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP169:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP169]], ptr [[ULLV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP170:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP171:%.*]] = load i64, ptr [[ULLE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP166:%.*]] = icmp ult i64 [[TMP170]], [[TMP171]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP166]], label [[COND_TRUE167:%.*]], label [[COND_FALSE168:%.*]]
+// SIMD-ONLY0:       cond.true167:
+// SIMD-ONLY0-NEXT:    [[TMP172:%.*]] = load i64, ptr [[ULLE]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END169:%.*]]
+// SIMD-ONLY0:       cond.false168:
+// SIMD-ONLY0-NEXT:    [[TMP173:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END169]]
+// SIMD-ONLY0:       cond.end169:
+// SIMD-ONLY0-NEXT:    [[COND170:%.*]] = phi i64 [ [[TMP172]], [[COND_TRUE167]] ], [ [[TMP173]], [[COND_FALSE168]] ]
+// SIMD-ONLY0-NEXT:    store i64 [[COND170]], ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP174:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP174]], ptr [[ULLV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP175:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP176:%.*]] = load i64, ptr [[ULLE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP171:%.*]] = icmp eq i64 [[TMP175]], [[TMP176]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP171]], label [[COND_TRUE172:%.*]], label [[COND_FALSE173:%.*]]
+// SIMD-ONLY0:       cond.true172:
+// SIMD-ONLY0-NEXT:    [[TMP177:%.*]] = load i64, ptr [[ULLD]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END174:%.*]]
+// SIMD-ONLY0:       cond.false173:
+// SIMD-ONLY0-NEXT:    [[TMP178:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END174]]
+// SIMD-ONLY0:       cond.end174:
+// SIMD-ONLY0-NEXT:    [[COND175:%.*]] = phi i64 [ [[TMP177]], [[COND_TRUE172]] ], [ [[TMP178]], [[COND_FALSE173]] ]
+// SIMD-ONLY0-NEXT:    store i64 [[COND175]], ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP179:%.*]] = load i64, ptr [[ULLX]], align 8
+// SIMD-ONLY0-NEXT:    store i64 [[TMP179]], ptr [[ULLV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP180:%.*]] = load i64, ptr [[ULLV]], align 8
+// SIMD-ONLY0-NEXT:    ret i64 [[TMP180]]
+//
+//
+// SIMD-ONLY0-LABEL: @fxevd(
+// SIMD-ONLY0-NEXT:  entry:
+// SIMD-ONLY0-NEXT:    [[FX:%.*]] = alloca float, align 4
+// SIMD-ONLY0-NEXT:    [[FV:%.*]] = alloca float, align 4
+// SIMD-ONLY0-NEXT:    [[FE:%.*]] = alloca float, align 4
+// SIMD-ONLY0-NEXT:    [[FD:%.*]] = alloca float, align 4
+// SIMD-ONLY0-NEXT:    [[TMP0:%.*]] = load float, ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    store float [[TMP0]], ptr [[FV]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP1:%.*]] = load float, ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP2:%.*]] = load float, ptr [[FE]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP:%.*]] = fcmp ogt float [[TMP1]], [[TMP2]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
+// SIMD-ONLY0:       cond.true:
+// SIMD-ONLY0-NEXT:    [[TMP3:%.*]] = load float, ptr [[FE]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END:%.*]]
+// SIMD-ONLY0:       cond.false:
+// SIMD-ONLY0-NEXT:    [[TMP4:%.*]] = load float, ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END]]
+// SIMD-ONLY0:       cond.end:
+// SIMD-ONLY0-NEXT:    [[COND:%.*]] = phi float [ [[TMP3]], [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ]
+// SIMD-ONLY0-NEXT:    store float [[COND]], ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP5:%.*]] = load float, ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    store float [[TMP5]], ptr [[FV]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP6:%.*]] = load float, ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP7:%.*]] = load float, ptr [[FE]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP1:%.*]] = fcmp olt float [[TMP6]], [[TMP7]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP1]], label [[COND_TRUE2:%.*]], label [[COND_FALSE3:%.*]]
+// SIMD-ONLY0:       cond.true2:
+// SIMD-ONLY0-NEXT:    [[TMP8:%.*]] = load float, ptr [[FE]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END4:%.*]]
+// SIMD-ONLY0:       cond.false3:
+// SIMD-ONLY0-NEXT:    [[TMP9:%.*]] = load float, ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END4]]
+// SIMD-ONLY0:       cond.end4:
+// SIMD-ONLY0-NEXT:    [[COND5:%.*]] = phi float [ [[TMP8]], [[COND_TRUE2]] ], [ [[TMP9]], [[COND_FALSE3]] ]
+// SIMD-ONLY0-NEXT:    store float [[COND5]], ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP10:%.*]] = load float, ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    store float [[TMP10]], ptr [[FV]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP11:%.*]] = load float, ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP12:%.*]] = load float, ptr [[FE]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP6:%.*]] = fcmp oeq float [[TMP11]], [[TMP12]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP6]], label [[COND_TRUE7:%.*]], label [[COND_FALSE8:%.*]]
+// SIMD-ONLY0:       cond.true7:
+// SIMD-ONLY0-NEXT:    [[TMP13:%.*]] = load float, ptr [[FD]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END9:%.*]]
+// SIMD-ONLY0:       cond.false8:
+// SIMD-ONLY0-NEXT:    [[TMP14:%.*]] = load float, ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END9]]
+// SIMD-ONLY0:       cond.end9:
+// SIMD-ONLY0-NEXT:    [[COND10:%.*]] = phi float [ [[TMP13]], [[COND_TRUE7]] ], [ [[TMP14]], [[COND_FALSE8]] ]
+// SIMD-ONLY0-NEXT:    store float [[COND10]], ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP15:%.*]] = load float, ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP16:%.*]] = load float, ptr [[FE]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP11:%.*]] = fcmp ogt float [[TMP15]], [[TMP16]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP11]], label [[COND_TRUE12:%.*]], label [[COND_FALSE13:%.*]]
+// SIMD-ONLY0:       cond.true12:
+// SIMD-ONLY0-NEXT:    [[TMP17:%.*]] = load float, ptr [[FE]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END14:%.*]]
+// SIMD-ONLY0:       cond.false13:
+// SIMD-ONLY0-NEXT:    [[TMP18:%.*]] = load float, ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END14]]
+// SIMD-ONLY0:       cond.end14:
+// SIMD-ONLY0-NEXT:    [[COND15:%.*]] = phi float [ [[TMP17]], [[COND_TRUE12]] ], [ [[TMP18]], [[COND_FALSE13]] ]
+// SIMD-ONLY0-NEXT:    store float [[COND15]], ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP19:%.*]] = load float, ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    store float [[TMP19]], ptr [[FV]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP20:%.*]] = load float, ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP21:%.*]] = load float, ptr [[FE]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP16:%.*]] = fcmp olt float [[TMP20]], [[TMP21]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP16]], label [[COND_TRUE17:%.*]], label [[COND_FALSE18:%.*]]
+// SIMD-ONLY0:       cond.true17:
+// SIMD-ONLY0-NEXT:    [[TMP22:%.*]] = load float, ptr [[FE]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END19:%.*]]
+// SIMD-ONLY0:       cond.false18:
+// SIMD-ONLY0-NEXT:    [[TMP23:%.*]] = load float, ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END19]]
+// SIMD-ONLY0:       cond.end19:
+// SIMD-ONLY0-NEXT:    [[COND20:%.*]] = phi float [ [[TMP22]], [[COND_TRUE17]] ], [ [[TMP23]], [[COND_FALSE18]] ]
+// SIMD-ONLY0-NEXT:    store float [[COND20]], ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP24:%.*]] = load float, ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    store float [[TMP24]], ptr [[FV]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP25:%.*]] = load float, ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP26:%.*]] = load float, ptr [[FE]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP21:%.*]] = fcmp oeq float [[TMP25]], [[TMP26]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP21]], label [[COND_TRUE22:%.*]], label [[COND_FALSE23:%.*]]
+// SIMD-ONLY0:       cond.true22:
+// SIMD-ONLY0-NEXT:    [[TMP27:%.*]] = load float, ptr [[FD]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END24:%.*]]
+// SIMD-ONLY0:       cond.false23:
+// SIMD-ONLY0-NEXT:    [[TMP28:%.*]] = load float, ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END24]]
+// SIMD-ONLY0:       cond.end24:
+// SIMD-ONLY0-NEXT:    [[COND25:%.*]] = phi float [ [[TMP27]], [[COND_TRUE22]] ], [ [[TMP28]], [[COND_FALSE23]] ]
+// SIMD-ONLY0-NEXT:    store float [[COND25]], ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP29:%.*]] = load float, ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    store float [[TMP29]], ptr [[FV]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP30:%.*]] = load float, ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    store float [[TMP30]], ptr [[FV]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP31:%.*]] = load float, ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP32:%.*]] = load float, ptr [[FE]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP26:%.*]] = fcmp ogt float [[TMP31]], [[TMP32]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP26]], label [[COND_TRUE27:%.*]], label [[COND_FALSE28:%.*]]
+// SIMD-ONLY0:       cond.true27:
+// SIMD-ONLY0-NEXT:    [[TMP33:%.*]] = load float, ptr [[FE]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END29:%.*]]
+// SIMD-ONLY0:       cond.false28:
+// SIMD-ONLY0-NEXT:    [[TMP34:%.*]] = load float, ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END29]]
+// SIMD-ONLY0:       cond.end29:
+// SIMD-ONLY0-NEXT:    [[COND30:%.*]] = phi float [ [[TMP33]], [[COND_TRUE27]] ], [ [[TMP34]], [[COND_FALSE28]] ]
+// SIMD-ONLY0-NEXT:    store float [[COND30]], ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP35:%.*]] = load float, ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    store float [[TMP35]], ptr [[FV]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP36:%.*]] = load float, ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP37:%.*]] = load float, ptr [[FE]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP31:%.*]] = fcmp olt float [[TMP36]], [[TMP37]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP31]], label [[COND_TRUE32:%.*]], label [[COND_FALSE33:%.*]]
+// SIMD-ONLY0:       cond.true32:
+// SIMD-ONLY0-NEXT:    [[TMP38:%.*]] = load float, ptr [[FE]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END34:%.*]]
+// SIMD-ONLY0:       cond.false33:
+// SIMD-ONLY0-NEXT:    [[TMP39:%.*]] = load float, ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END34]]
+// SIMD-ONLY0:       cond.end34:
+// SIMD-ONLY0-NEXT:    [[COND35:%.*]] = phi float [ [[TMP38]], [[COND_TRUE32]] ], [ [[TMP39]], [[COND_FALSE33]] ]
+// SIMD-ONLY0-NEXT:    store float [[COND35]], ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP40:%.*]] = load float, ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    store float [[TMP40]], ptr [[FV]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP41:%.*]] = load float, ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP42:%.*]] = load float, ptr [[FE]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP36:%.*]] = fcmp oeq float [[TMP41]], [[TMP42]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP36]], label [[COND_TRUE37:%.*]], label [[COND_FALSE38:%.*]]
+// SIMD-ONLY0:       cond.true37:
+// SIMD-ONLY0-NEXT:    [[TMP43:%.*]] = load float, ptr [[FD]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END39:%.*]]
+// SIMD-ONLY0:       cond.false38:
+// SIMD-ONLY0-NEXT:    [[TMP44:%.*]] = load float, ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END39]]
+// SIMD-ONLY0:       cond.end39:
+// SIMD-ONLY0-NEXT:    [[COND40:%.*]] = phi float [ [[TMP43]], [[COND_TRUE37]] ], [ [[TMP44]], [[COND_FALSE38]] ]
+// SIMD-ONLY0-NEXT:    store float [[COND40]], ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP45:%.*]] = load float, ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP46:%.*]] = load float, ptr [[FE]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP41:%.*]] = fcmp ogt float [[TMP45]], [[TMP46]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP41]], label [[COND_TRUE42:%.*]], label [[COND_FALSE43:%.*]]
+// SIMD-ONLY0:       cond.true42:
+// SIMD-ONLY0-NEXT:    [[TMP47:%.*]] = load float, ptr [[FE]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END44:%.*]]
+// SIMD-ONLY0:       cond.false43:
+// SIMD-ONLY0-NEXT:    [[TMP48:%.*]] = load float, ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END44]]
+// SIMD-ONLY0:       cond.end44:
+// SIMD-ONLY0-NEXT:    [[COND45:%.*]] = phi float [ [[TMP47]], [[COND_TRUE42]] ], [ [[TMP48]], [[COND_FALSE43]] ]
+// SIMD-ONLY0-NEXT:    store float [[COND45]], ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP49:%.*]] = load float, ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    store float [[TMP49]], ptr [[FV]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP50:%.*]] = load float, ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP51:%.*]] = load float, ptr [[FE]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP46:%.*]] = fcmp olt float [[TMP50]], [[TMP51]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP46]], label [[COND_TRUE47:%.*]], label [[COND_FALSE48:%.*]]
+// SIMD-ONLY0:       cond.true47:
+// SIMD-ONLY0-NEXT:    [[TMP52:%.*]] = load float, ptr [[FE]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END49:%.*]]
+// SIMD-ONLY0:       cond.false48:
+// SIMD-ONLY0-NEXT:    [[TMP53:%.*]] = load float, ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END49]]
+// SIMD-ONLY0:       cond.end49:
+// SIMD-ONLY0-NEXT:    [[COND50:%.*]] = phi float [ [[TMP52]], [[COND_TRUE47]] ], [ [[TMP53]], [[COND_FALSE48]] ]
+// SIMD-ONLY0-NEXT:    store float [[COND50]], ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP54:%.*]] = load float, ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    store float [[TMP54]], ptr [[FV]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP55:%.*]] = load float, ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP56:%.*]] = load float, ptr [[FE]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP51:%.*]] = fcmp oeq float [[TMP55]], [[TMP56]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP51]], label [[COND_TRUE52:%.*]], label [[COND_FALSE53:%.*]]
+// SIMD-ONLY0:       cond.true52:
+// SIMD-ONLY0-NEXT:    [[TMP57:%.*]] = load float, ptr [[FD]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END54:%.*]]
+// SIMD-ONLY0:       cond.false53:
+// SIMD-ONLY0-NEXT:    [[TMP58:%.*]] = load float, ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END54]]
+// SIMD-ONLY0:       cond.end54:
+// SIMD-ONLY0-NEXT:    [[COND55:%.*]] = phi float [ [[TMP57]], [[COND_TRUE52]] ], [ [[TMP58]], [[COND_FALSE53]] ]
+// SIMD-ONLY0-NEXT:    store float [[COND55]], ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP59:%.*]] = load float, ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    store float [[TMP59]], ptr [[FV]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP60:%.*]] = load float, ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    store float [[TMP60]], ptr [[FV]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP61:%.*]] = load float, ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP62:%.*]] = load float, ptr [[FE]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP56:%.*]] = fcmp ogt float [[TMP61]], [[TMP62]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP56]], label [[COND_TRUE57:%.*]], label [[COND_FALSE58:%.*]]
+// SIMD-ONLY0:       cond.true57:
+// SIMD-ONLY0-NEXT:    [[TMP63:%.*]] = load float, ptr [[FE]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END59:%.*]]
+// SIMD-ONLY0:       cond.false58:
+// SIMD-ONLY0-NEXT:    [[TMP64:%.*]] = load float, ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END59]]
+// SIMD-ONLY0:       cond.end59:
+// SIMD-ONLY0-NEXT:    [[COND60:%.*]] = phi float [ [[TMP63]], [[COND_TRUE57]] ], [ [[TMP64]], [[COND_FALSE58]] ]
+// SIMD-ONLY0-NEXT:    store float [[COND60]], ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP65:%.*]] = load float, ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    store float [[TMP65]], ptr [[FV]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP66:%.*]] = load float, ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP67:%.*]] = load float, ptr [[FE]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP61:%.*]] = fcmp olt float [[TMP66]], [[TMP67]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP61]], label [[COND_TRUE62:%.*]], label [[COND_FALSE63:%.*]]
+// SIMD-ONLY0:       cond.true62:
+// SIMD-ONLY0-NEXT:    [[TMP68:%.*]] = load float, ptr [[FE]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END64:%.*]]
+// SIMD-ONLY0:       cond.false63:
+// SIMD-ONLY0-NEXT:    [[TMP69:%.*]] = load float, ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END64]]
+// SIMD-ONLY0:       cond.end64:
+// SIMD-ONLY0-NEXT:    [[COND65:%.*]] = phi float [ [[TMP68]], [[COND_TRUE62]] ], [ [[TMP69]], [[COND_FALSE63]] ]
+// SIMD-ONLY0-NEXT:    store float [[COND65]], ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP70:%.*]] = load float, ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    store float [[TMP70]], ptr [[FV]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP71:%.*]] = load float, ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP72:%.*]] = load float, ptr [[FE]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP66:%.*]] = fcmp oeq float [[TMP71]], [[TMP72]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP66]], label [[COND_TRUE67:%.*]], label [[COND_FALSE68:%.*]]
+// SIMD-ONLY0:       cond.true67:
+// SIMD-ONLY0-NEXT:    [[TMP73:%.*]] = load float, ptr [[FD]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END69:%.*]]
+// SIMD-ONLY0:       cond.false68:
+// SIMD-ONLY0-NEXT:    [[TMP74:%.*]] = load float, ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END69]]
+// SIMD-ONLY0:       cond.end69:
+// SIMD-ONLY0-NEXT:    [[COND70:%.*]] = phi float [ [[TMP73]], [[COND_TRUE67]] ], [ [[TMP74]], [[COND_FALSE68]] ]
+// SIMD-ONLY0-NEXT:    store float [[COND70]], ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP75:%.*]] = load float, ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP76:%.*]] = load float, ptr [[FE]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP71:%.*]] = fcmp ogt float [[TMP75]], [[TMP76]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP71]], label [[COND_TRUE72:%.*]], label [[COND_FALSE73:%.*]]
+// SIMD-ONLY0:       cond.true72:
+// SIMD-ONLY0-NEXT:    [[TMP77:%.*]] = load float, ptr [[FE]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END74:%.*]]
+// SIMD-ONLY0:       cond.false73:
+// SIMD-ONLY0-NEXT:    [[TMP78:%.*]] = load float, ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END74]]
+// SIMD-ONLY0:       cond.end74:
+// SIMD-ONLY0-NEXT:    [[COND75:%.*]] = phi float [ [[TMP77]], [[COND_TRUE72]] ], [ [[TMP78]], [[COND_FALSE73]] ]
+// SIMD-ONLY0-NEXT:    store float [[COND75]], ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP79:%.*]] = load float, ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    store float [[TMP79]], ptr [[FV]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP80:%.*]] = load float, ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP81:%.*]] = load float, ptr [[FE]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP76:%.*]] = fcmp olt float [[TMP80]], [[TMP81]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP76]], label [[COND_TRUE77:%.*]], label [[COND_FALSE78:%.*]]
+// SIMD-ONLY0:       cond.true77:
+// SIMD-ONLY0-NEXT:    [[TMP82:%.*]] = load float, ptr [[FE]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END79:%.*]]
+// SIMD-ONLY0:       cond.false78:
+// SIMD-ONLY0-NEXT:    [[TMP83:%.*]] = load float, ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END79]]
+// SIMD-ONLY0:       cond.end79:
+// SIMD-ONLY0-NEXT:    [[COND80:%.*]] = phi float [ [[TMP82]], [[COND_TRUE77]] ], [ [[TMP83]], [[COND_FALSE78]] ]
+// SIMD-ONLY0-NEXT:    store float [[COND80]], ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP84:%.*]] = load float, ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    store float [[TMP84]], ptr [[FV]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP85:%.*]] = load float, ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP86:%.*]] = load float, ptr [[FE]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP81:%.*]] = fcmp oeq float [[TMP85]], [[TMP86]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP81]], label [[COND_TRUE82:%.*]], label [[COND_FALSE83:%.*]]
+// SIMD-ONLY0:       cond.true82:
+// SIMD-ONLY0-NEXT:    [[TMP87:%.*]] = load float, ptr [[FD]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END84:%.*]]
+// SIMD-ONLY0:       cond.false83:
+// SIMD-ONLY0-NEXT:    [[TMP88:%.*]] = load float, ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END84]]
+// SIMD-ONLY0:       cond.end84:
+// SIMD-ONLY0-NEXT:    [[COND85:%.*]] = phi float [ [[TMP87]], [[COND_TRUE82]] ], [ [[TMP88]], [[COND_FALSE83]] ]
+// SIMD-ONLY0-NEXT:    store float [[COND85]], ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP89:%.*]] = load float, ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    store float [[TMP89]], ptr [[FV]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP90:%.*]] = load float, ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    store float [[TMP90]], ptr [[FV]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP91:%.*]] = load float, ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP92:%.*]] = load float, ptr [[FE]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP86:%.*]] = fcmp ogt float [[TMP91]], [[TMP92]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP86]], label [[COND_TRUE87:%.*]], label [[COND_FALSE88:%.*]]
+// SIMD-ONLY0:       cond.true87:
+// SIMD-ONLY0-NEXT:    [[TMP93:%.*]] = load float, ptr [[FE]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END89:%.*]]
+// SIMD-ONLY0:       cond.false88:
+// SIMD-ONLY0-NEXT:    [[TMP94:%.*]] = load float, ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END89]]
+// SIMD-ONLY0:       cond.end89:
+// SIMD-ONLY0-NEXT:    [[COND90:%.*]] = phi float [ [[TMP93]], [[COND_TRUE87]] ], [ [[TMP94]], [[COND_FALSE88]] ]
+// SIMD-ONLY0-NEXT:    store float [[COND90]], ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP95:%.*]] = load float, ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    store float [[TMP95]], ptr [[FV]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP96:%.*]] = load float, ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP97:%.*]] = load float, ptr [[FE]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP91:%.*]] = fcmp olt float [[TMP96]], [[TMP97]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP91]], label [[COND_TRUE92:%.*]], label [[COND_FALSE93:%.*]]
+// SIMD-ONLY0:       cond.true92:
+// SIMD-ONLY0-NEXT:    [[TMP98:%.*]] = load float, ptr [[FE]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END94:%.*]]
+// SIMD-ONLY0:       cond.false93:
+// SIMD-ONLY0-NEXT:    [[TMP99:%.*]] = load float, ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END94]]
+// SIMD-ONLY0:       cond.end94:
+// SIMD-ONLY0-NEXT:    [[COND95:%.*]] = phi float [ [[TMP98]], [[COND_TRUE92]] ], [ [[TMP99]], [[COND_FALSE93]] ]
+// SIMD-ONLY0-NEXT:    store float [[COND95]], ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP100:%.*]] = load float, ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    store float [[TMP100]], ptr [[FV]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP101:%.*]] = load float, ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP102:%.*]] = load float, ptr [[FE]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP96:%.*]] = fcmp oeq float [[TMP101]], [[TMP102]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP96]], label [[COND_TRUE97:%.*]], label [[COND_FALSE98:%.*]]
+// SIMD-ONLY0:       cond.true97:
+// SIMD-ONLY0-NEXT:    [[TMP103:%.*]] = load float, ptr [[FD]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END99:%.*]]
+// SIMD-ONLY0:       cond.false98:
+// SIMD-ONLY0-NEXT:    [[TMP104:%.*]] = load float, ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END99]]
+// SIMD-ONLY0:       cond.end99:
+// SIMD-ONLY0-NEXT:    [[COND100:%.*]] = phi float [ [[TMP103]], [[COND_TRUE97]] ], [ [[TMP104]], [[COND_FALSE98]] ]
+// SIMD-ONLY0-NEXT:    store float [[COND100]], ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP105:%.*]] = load float, ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP106:%.*]] = load float, ptr [[FE]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP101:%.*]] = fcmp ogt float [[TMP105]], [[TMP106]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP101]], label [[COND_TRUE102:%.*]], label [[COND_FALSE103:%.*]]
+// SIMD-ONLY0:       cond.true102:
+// SIMD-ONLY0-NEXT:    [[TMP107:%.*]] = load float, ptr [[FE]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END104:%.*]]
+// SIMD-ONLY0:       cond.false103:
+// SIMD-ONLY0-NEXT:    [[TMP108:%.*]] = load float, ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END104]]
+// SIMD-ONLY0:       cond.end104:
+// SIMD-ONLY0-NEXT:    [[COND105:%.*]] = phi float [ [[TMP107]], [[COND_TRUE102]] ], [ [[TMP108]], [[COND_FALSE103]] ]
+// SIMD-ONLY0-NEXT:    store float [[COND105]], ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP109:%.*]] = load float, ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    store float [[TMP109]], ptr [[FV]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP110:%.*]] = load float, ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP111:%.*]] = load float, ptr [[FE]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP106:%.*]] = fcmp olt float [[TMP110]], [[TMP111]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP106]], label [[COND_TRUE107:%.*]], label [[COND_FALSE108:%.*]]
+// SIMD-ONLY0:       cond.true107:
+// SIMD-ONLY0-NEXT:    [[TMP112:%.*]] = load float, ptr [[FE]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END109:%.*]]
+// SIMD-ONLY0:       cond.false108:
+// SIMD-ONLY0-NEXT:    [[TMP113:%.*]] = load float, ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END109]]
+// SIMD-ONLY0:       cond.end109:
+// SIMD-ONLY0-NEXT:    [[COND110:%.*]] = phi float [ [[TMP112]], [[COND_TRUE107]] ], [ [[TMP113]], [[COND_FALSE108]] ]
+// SIMD-ONLY0-NEXT:    store float [[COND110]], ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP114:%.*]] = load float, ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    store float [[TMP114]], ptr [[FV]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP115:%.*]] = load float, ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP116:%.*]] = load float, ptr [[FE]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP111:%.*]] = fcmp oeq float [[TMP115]], [[TMP116]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP111]], label [[COND_TRUE112:%.*]], label [[COND_FALSE113:%.*]]
+// SIMD-ONLY0:       cond.true112:
+// SIMD-ONLY0-NEXT:    [[TMP117:%.*]] = load float, ptr [[FD]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END114:%.*]]
+// SIMD-ONLY0:       cond.false113:
+// SIMD-ONLY0-NEXT:    [[TMP118:%.*]] = load float, ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END114]]
+// SIMD-ONLY0:       cond.end114:
+// SIMD-ONLY0-NEXT:    [[COND115:%.*]] = phi float [ [[TMP117]], [[COND_TRUE112]] ], [ [[TMP118]], [[COND_FALSE113]] ]
+// SIMD-ONLY0-NEXT:    store float [[COND115]], ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP119:%.*]] = load float, ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    store float [[TMP119]], ptr [[FV]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP120:%.*]] = load float, ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    store float [[TMP120]], ptr [[FV]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP121:%.*]] = load float, ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP122:%.*]] = load float, ptr [[FE]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP116:%.*]] = fcmp ogt float [[TMP121]], [[TMP122]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP116]], label [[COND_TRUE117:%.*]], label [[COND_FALSE118:%.*]]
+// SIMD-ONLY0:       cond.true117:
+// SIMD-ONLY0-NEXT:    [[TMP123:%.*]] = load float, ptr [[FE]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END119:%.*]]
+// SIMD-ONLY0:       cond.false118:
+// SIMD-ONLY0-NEXT:    [[TMP124:%.*]] = load float, ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END119]]
+// SIMD-ONLY0:       cond.end119:
+// SIMD-ONLY0-NEXT:    [[COND120:%.*]] = phi float [ [[TMP123]], [[COND_TRUE117]] ], [ [[TMP124]], [[COND_FALSE118]] ]
+// SIMD-ONLY0-NEXT:    store float [[COND120]], ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP125:%.*]] = load float, ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    store float [[TMP125]], ptr [[FV]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP126:%.*]] = load float, ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP127:%.*]] = load float, ptr [[FE]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP121:%.*]] = fcmp olt float [[TMP126]], [[TMP127]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP121]], label [[COND_TRUE122:%.*]], label [[COND_FALSE123:%.*]]
+// SIMD-ONLY0:       cond.true122:
+// SIMD-ONLY0-NEXT:    [[TMP128:%.*]] = load float, ptr [[FE]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END124:%.*]]
+// SIMD-ONLY0:       cond.false123:
+// SIMD-ONLY0-NEXT:    [[TMP129:%.*]] = load float, ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END124]]
+// SIMD-ONLY0:       cond.end124:
+// SIMD-ONLY0-NEXT:    [[COND125:%.*]] = phi float [ [[TMP128]], [[COND_TRUE122]] ], [ [[TMP129]], [[COND_FALSE123]] ]
+// SIMD-ONLY0-NEXT:    store float [[COND125]], ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP130:%.*]] = load float, ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    store float [[TMP130]], ptr [[FV]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP131:%.*]] = load float, ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP132:%.*]] = load float, ptr [[FE]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP126:%.*]] = fcmp oeq float [[TMP131]], [[TMP132]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP126]], label [[COND_TRUE127:%.*]], label [[COND_FALSE128:%.*]]
+// SIMD-ONLY0:       cond.true127:
+// SIMD-ONLY0-NEXT:    [[TMP133:%.*]] = load float, ptr [[FD]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END129:%.*]]
+// SIMD-ONLY0:       cond.false128:
+// SIMD-ONLY0-NEXT:    [[TMP134:%.*]] = load float, ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END129]]
+// SIMD-ONLY0:       cond.end129:
+// SIMD-ONLY0-NEXT:    [[COND130:%.*]] = phi float [ [[TMP133]], [[COND_TRUE127]] ], [ [[TMP134]], [[COND_FALSE128]] ]
+// SIMD-ONLY0-NEXT:    store float [[COND130]], ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP135:%.*]] = load float, ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP136:%.*]] = load float, ptr [[FE]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP131:%.*]] = fcmp ogt float [[TMP135]], [[TMP136]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP131]], label [[COND_TRUE132:%.*]], label [[COND_FALSE133:%.*]]
+// SIMD-ONLY0:       cond.true132:
+// SIMD-ONLY0-NEXT:    [[TMP137:%.*]] = load float, ptr [[FE]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END134:%.*]]
+// SIMD-ONLY0:       cond.false133:
+// SIMD-ONLY0-NEXT:    [[TMP138:%.*]] = load float, ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END134]]
+// SIMD-ONLY0:       cond.end134:
+// SIMD-ONLY0-NEXT:    [[COND135:%.*]] = phi float [ [[TMP137]], [[COND_TRUE132]] ], [ [[TMP138]], [[COND_FALSE133]] ]
+// SIMD-ONLY0-NEXT:    store float [[COND135]], ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP139:%.*]] = load float, ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    store float [[TMP139]], ptr [[FV]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP140:%.*]] = load float, ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP141:%.*]] = load float, ptr [[FE]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP136:%.*]] = fcmp olt float [[TMP140]], [[TMP141]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP136]], label [[COND_TRUE137:%.*]], label [[COND_FALSE138:%.*]]
+// SIMD-ONLY0:       cond.true137:
+// SIMD-ONLY0-NEXT:    [[TMP142:%.*]] = load float, ptr [[FE]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END139:%.*]]
+// SIMD-ONLY0:       cond.false138:
+// SIMD-ONLY0-NEXT:    [[TMP143:%.*]] = load float, ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END139]]
+// SIMD-ONLY0:       cond.end139:
+// SIMD-ONLY0-NEXT:    [[COND140:%.*]] = phi float [ [[TMP142]], [[COND_TRUE137]] ], [ [[TMP143]], [[COND_FALSE138]] ]
+// SIMD-ONLY0-NEXT:    store float [[COND140]], ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP144:%.*]] = load float, ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    store float [[TMP144]], ptr [[FV]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP145:%.*]] = load float, ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP146:%.*]] = load float, ptr [[FE]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP141:%.*]] = fcmp oeq float [[TMP145]], [[TMP146]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP141]], label [[COND_TRUE142:%.*]], label [[COND_FALSE143:%.*]]
+// SIMD-ONLY0:       cond.true142:
+// SIMD-ONLY0-NEXT:    [[TMP147:%.*]] = load float, ptr [[FD]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END144:%.*]]
+// SIMD-ONLY0:       cond.false143:
+// SIMD-ONLY0-NEXT:    [[TMP148:%.*]] = load float, ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END144]]
+// SIMD-ONLY0:       cond.end144:
+// SIMD-ONLY0-NEXT:    [[COND145:%.*]] = phi float [ [[TMP147]], [[COND_TRUE142]] ], [ [[TMP148]], [[COND_FALSE143]] ]
+// SIMD-ONLY0-NEXT:    store float [[COND145]], ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP149:%.*]] = load float, ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    store float [[TMP149]], ptr [[FV]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP150:%.*]] = load float, ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    store float [[TMP150]], ptr [[FV]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP151:%.*]] = load float, ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP152:%.*]] = load float, ptr [[FE]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP146:%.*]] = fcmp ogt float [[TMP151]], [[TMP152]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP146]], label [[COND_TRUE147:%.*]], label [[COND_FALSE148:%.*]]
+// SIMD-ONLY0:       cond.true147:
+// SIMD-ONLY0-NEXT:    [[TMP153:%.*]] = load float, ptr [[FE]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END149:%.*]]
+// SIMD-ONLY0:       cond.false148:
+// SIMD-ONLY0-NEXT:    [[TMP154:%.*]] = load float, ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END149]]
+// SIMD-ONLY0:       cond.end149:
+// SIMD-ONLY0-NEXT:    [[COND150:%.*]] = phi float [ [[TMP153]], [[COND_TRUE147]] ], [ [[TMP154]], [[COND_FALSE148]] ]
+// SIMD-ONLY0-NEXT:    store float [[COND150]], ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP155:%.*]] = load float, ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    store float [[TMP155]], ptr [[FV]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP156:%.*]] = load float, ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP157:%.*]] = load float, ptr [[FE]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP151:%.*]] = fcmp olt float [[TMP156]], [[TMP157]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP151]], label [[COND_TRUE152:%.*]], label [[COND_FALSE153:%.*]]
+// SIMD-ONLY0:       cond.true152:
+// SIMD-ONLY0-NEXT:    [[TMP158:%.*]] = load float, ptr [[FE]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END154:%.*]]
+// SIMD-ONLY0:       cond.false153:
+// SIMD-ONLY0-NEXT:    [[TMP159:%.*]] = load float, ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END154]]
+// SIMD-ONLY0:       cond.end154:
+// SIMD-ONLY0-NEXT:    [[COND155:%.*]] = phi float [ [[TMP158]], [[COND_TRUE152]] ], [ [[TMP159]], [[COND_FALSE153]] ]
+// SIMD-ONLY0-NEXT:    store float [[COND155]], ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP160:%.*]] = load float, ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    store float [[TMP160]], ptr [[FV]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP161:%.*]] = load float, ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP162:%.*]] = load float, ptr [[FE]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP156:%.*]] = fcmp oeq float [[TMP161]], [[TMP162]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP156]], label [[COND_TRUE157:%.*]], label [[COND_FALSE158:%.*]]
+// SIMD-ONLY0:       cond.true157:
+// SIMD-ONLY0-NEXT:    [[TMP163:%.*]] = load float, ptr [[FD]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END159:%.*]]
+// SIMD-ONLY0:       cond.false158:
+// SIMD-ONLY0-NEXT:    [[TMP164:%.*]] = load float, ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END159]]
+// SIMD-ONLY0:       cond.end159:
+// SIMD-ONLY0-NEXT:    [[COND160:%.*]] = phi float [ [[TMP163]], [[COND_TRUE157]] ], [ [[TMP164]], [[COND_FALSE158]] ]
+// SIMD-ONLY0-NEXT:    store float [[COND160]], ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP165:%.*]] = load float, ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP166:%.*]] = load float, ptr [[FE]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP161:%.*]] = fcmp ogt float [[TMP165]], [[TMP166]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP161]], label [[COND_TRUE162:%.*]], label [[COND_FALSE163:%.*]]
+// SIMD-ONLY0:       cond.true162:
+// SIMD-ONLY0-NEXT:    [[TMP167:%.*]] = load float, ptr [[FE]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END164:%.*]]
+// SIMD-ONLY0:       cond.false163:
+// SIMD-ONLY0-NEXT:    [[TMP168:%.*]] = load float, ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END164]]
+// SIMD-ONLY0:       cond.end164:
+// SIMD-ONLY0-NEXT:    [[COND165:%.*]] = phi float [ [[TMP167]], [[COND_TRUE162]] ], [ [[TMP168]], [[COND_FALSE163]] ]
+// SIMD-ONLY0-NEXT:    store float [[COND165]], ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP169:%.*]] = load float, ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    store float [[TMP169]], ptr [[FV]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP170:%.*]] = load float, ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP171:%.*]] = load float, ptr [[FE]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP166:%.*]] = fcmp olt float [[TMP170]], [[TMP171]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP166]], label [[COND_TRUE167:%.*]], label [[COND_FALSE168:%.*]]
+// SIMD-ONLY0:       cond.true167:
+// SIMD-ONLY0-NEXT:    [[TMP172:%.*]] = load float, ptr [[FE]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END169:%.*]]
+// SIMD-ONLY0:       cond.false168:
+// SIMD-ONLY0-NEXT:    [[TMP173:%.*]] = load float, ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END169]]
+// SIMD-ONLY0:       cond.end169:
+// SIMD-ONLY0-NEXT:    [[COND170:%.*]] = phi float [ [[TMP172]], [[COND_TRUE167]] ], [ [[TMP173]], [[COND_FALSE168]] ]
+// SIMD-ONLY0-NEXT:    store float [[COND170]], ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP174:%.*]] = load float, ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    store float [[TMP174]], ptr [[FV]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP175:%.*]] = load float, ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP176:%.*]] = load float, ptr [[FE]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP171:%.*]] = fcmp oeq float [[TMP175]], [[TMP176]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP171]], label [[COND_TRUE172:%.*]], label [[COND_FALSE173:%.*]]
+// SIMD-ONLY0:       cond.true172:
+// SIMD-ONLY0-NEXT:    [[TMP177:%.*]] = load float, ptr [[FD]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END174:%.*]]
+// SIMD-ONLY0:       cond.false173:
+// SIMD-ONLY0-NEXT:    [[TMP178:%.*]] = load float, ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    br label [[COND_END174]]
+// SIMD-ONLY0:       cond.end174:
+// SIMD-ONLY0-NEXT:    [[COND175:%.*]] = phi float [ [[TMP177]], [[COND_TRUE172]] ], [ [[TMP178]], [[COND_FALSE173]] ]
+// SIMD-ONLY0-NEXT:    store float [[COND175]], ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP179:%.*]] = load float, ptr [[FX]], align 4
+// SIMD-ONLY0-NEXT:    store float [[TMP179]], ptr [[FV]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP180:%.*]] = load float, ptr [[FV]], align 4
+// SIMD-ONLY0-NEXT:    ret float [[TMP180]]
+//
+//
+// SIMD-ONLY0-LABEL: @dxevd(
+// SIMD-ONLY0-NEXT:  entry:
+// SIMD-ONLY0-NEXT:    [[DX:%.*]] = alloca double, align 8
+// SIMD-ONLY0-NEXT:    [[DV:%.*]] = alloca double, align 8
+// SIMD-ONLY0-NEXT:    [[DE:%.*]] = alloca double, align 8
+// SIMD-ONLY0-NEXT:    [[DD:%.*]] = alloca double, align 8
+// SIMD-ONLY0-NEXT:    [[TMP0:%.*]] = load double, ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    store double [[TMP0]], ptr [[DV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP1:%.*]] = load double, ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP2:%.*]] = load double, ptr [[DE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP:%.*]] = fcmp ogt double [[TMP1]], [[TMP2]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
+// SIMD-ONLY0:       cond.true:
+// SIMD-ONLY0-NEXT:    [[TMP3:%.*]] = load double, ptr [[DE]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END:%.*]]
+// SIMD-ONLY0:       cond.false:
+// SIMD-ONLY0-NEXT:    [[TMP4:%.*]] = load double, ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END]]
+// SIMD-ONLY0:       cond.end:
+// SIMD-ONLY0-NEXT:    [[COND:%.*]] = phi double [ [[TMP3]], [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ]
+// SIMD-ONLY0-NEXT:    store double [[COND]], ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP5:%.*]] = load double, ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    store double [[TMP5]], ptr [[DV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP6:%.*]] = load double, ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP7:%.*]] = load double, ptr [[DE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP1:%.*]] = fcmp olt double [[TMP6]], [[TMP7]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP1]], label [[COND_TRUE2:%.*]], label [[COND_FALSE3:%.*]]
+// SIMD-ONLY0:       cond.true2:
+// SIMD-ONLY0-NEXT:    [[TMP8:%.*]] = load double, ptr [[DE]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END4:%.*]]
+// SIMD-ONLY0:       cond.false3:
+// SIMD-ONLY0-NEXT:    [[TMP9:%.*]] = load double, ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END4]]
+// SIMD-ONLY0:       cond.end4:
+// SIMD-ONLY0-NEXT:    [[COND5:%.*]] = phi double [ [[TMP8]], [[COND_TRUE2]] ], [ [[TMP9]], [[COND_FALSE3]] ]
+// SIMD-ONLY0-NEXT:    store double [[COND5]], ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP10:%.*]] = load double, ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    store double [[TMP10]], ptr [[DV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP11:%.*]] = load double, ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP12:%.*]] = load double, ptr [[DE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP6:%.*]] = fcmp oeq double [[TMP11]], [[TMP12]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP6]], label [[COND_TRUE7:%.*]], label [[COND_FALSE8:%.*]]
+// SIMD-ONLY0:       cond.true7:
+// SIMD-ONLY0-NEXT:    [[TMP13:%.*]] = load double, ptr [[DD]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END9:%.*]]
+// SIMD-ONLY0:       cond.false8:
+// SIMD-ONLY0-NEXT:    [[TMP14:%.*]] = load double, ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END9]]
+// SIMD-ONLY0:       cond.end9:
+// SIMD-ONLY0-NEXT:    [[COND10:%.*]] = phi double [ [[TMP13]], [[COND_TRUE7]] ], [ [[TMP14]], [[COND_FALSE8]] ]
+// SIMD-ONLY0-NEXT:    store double [[COND10]], ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP15:%.*]] = load double, ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP16:%.*]] = load double, ptr [[DE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP11:%.*]] = fcmp ogt double [[TMP15]], [[TMP16]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP11]], label [[COND_TRUE12:%.*]], label [[COND_FALSE13:%.*]]
+// SIMD-ONLY0:       cond.true12:
+// SIMD-ONLY0-NEXT:    [[TMP17:%.*]] = load double, ptr [[DE]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END14:%.*]]
+// SIMD-ONLY0:       cond.false13:
+// SIMD-ONLY0-NEXT:    [[TMP18:%.*]] = load double, ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END14]]
+// SIMD-ONLY0:       cond.end14:
+// SIMD-ONLY0-NEXT:    [[COND15:%.*]] = phi double [ [[TMP17]], [[COND_TRUE12]] ], [ [[TMP18]], [[COND_FALSE13]] ]
+// SIMD-ONLY0-NEXT:    store double [[COND15]], ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP19:%.*]] = load double, ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    store double [[TMP19]], ptr [[DV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP20:%.*]] = load double, ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP21:%.*]] = load double, ptr [[DE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP16:%.*]] = fcmp olt double [[TMP20]], [[TMP21]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP16]], label [[COND_TRUE17:%.*]], label [[COND_FALSE18:%.*]]
+// SIMD-ONLY0:       cond.true17:
+// SIMD-ONLY0-NEXT:    [[TMP22:%.*]] = load double, ptr [[DE]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END19:%.*]]
+// SIMD-ONLY0:       cond.false18:
+// SIMD-ONLY0-NEXT:    [[TMP23:%.*]] = load double, ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END19]]
+// SIMD-ONLY0:       cond.end19:
+// SIMD-ONLY0-NEXT:    [[COND20:%.*]] = phi double [ [[TMP22]], [[COND_TRUE17]] ], [ [[TMP23]], [[COND_FALSE18]] ]
+// SIMD-ONLY0-NEXT:    store double [[COND20]], ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP24:%.*]] = load double, ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    store double [[TMP24]], ptr [[DV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP25:%.*]] = load double, ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP26:%.*]] = load double, ptr [[DE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP21:%.*]] = fcmp oeq double [[TMP25]], [[TMP26]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP21]], label [[COND_TRUE22:%.*]], label [[COND_FALSE23:%.*]]
+// SIMD-ONLY0:       cond.true22:
+// SIMD-ONLY0-NEXT:    [[TMP27:%.*]] = load double, ptr [[DD]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END24:%.*]]
+// SIMD-ONLY0:       cond.false23:
+// SIMD-ONLY0-NEXT:    [[TMP28:%.*]] = load double, ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END24]]
+// SIMD-ONLY0:       cond.end24:
+// SIMD-ONLY0-NEXT:    [[COND25:%.*]] = phi double [ [[TMP27]], [[COND_TRUE22]] ], [ [[TMP28]], [[COND_FALSE23]] ]
+// SIMD-ONLY0-NEXT:    store double [[COND25]], ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP29:%.*]] = load double, ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    store double [[TMP29]], ptr [[DV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP30:%.*]] = load double, ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    store double [[TMP30]], ptr [[DV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP31:%.*]] = load double, ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP32:%.*]] = load double, ptr [[DE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP26:%.*]] = fcmp ogt double [[TMP31]], [[TMP32]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP26]], label [[COND_TRUE27:%.*]], label [[COND_FALSE28:%.*]]
+// SIMD-ONLY0:       cond.true27:
+// SIMD-ONLY0-NEXT:    [[TMP33:%.*]] = load double, ptr [[DE]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END29:%.*]]
+// SIMD-ONLY0:       cond.false28:
+// SIMD-ONLY0-NEXT:    [[TMP34:%.*]] = load double, ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END29]]
+// SIMD-ONLY0:       cond.end29:
+// SIMD-ONLY0-NEXT:    [[COND30:%.*]] = phi double [ [[TMP33]], [[COND_TRUE27]] ], [ [[TMP34]], [[COND_FALSE28]] ]
+// SIMD-ONLY0-NEXT:    store double [[COND30]], ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP35:%.*]] = load double, ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    store double [[TMP35]], ptr [[DV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP36:%.*]] = load double, ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP37:%.*]] = load double, ptr [[DE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP31:%.*]] = fcmp olt double [[TMP36]], [[TMP37]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP31]], label [[COND_TRUE32:%.*]], label [[COND_FALSE33:%.*]]
+// SIMD-ONLY0:       cond.true32:
+// SIMD-ONLY0-NEXT:    [[TMP38:%.*]] = load double, ptr [[DE]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END34:%.*]]
+// SIMD-ONLY0:       cond.false33:
+// SIMD-ONLY0-NEXT:    [[TMP39:%.*]] = load double, ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END34]]
+// SIMD-ONLY0:       cond.end34:
+// SIMD-ONLY0-NEXT:    [[COND35:%.*]] = phi double [ [[TMP38]], [[COND_TRUE32]] ], [ [[TMP39]], [[COND_FALSE33]] ]
+// SIMD-ONLY0-NEXT:    store double [[COND35]], ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP40:%.*]] = load double, ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    store double [[TMP40]], ptr [[DV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP41:%.*]] = load double, ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP42:%.*]] = load double, ptr [[DE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP36:%.*]] = fcmp oeq double [[TMP41]], [[TMP42]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP36]], label [[COND_TRUE37:%.*]], label [[COND_FALSE38:%.*]]
+// SIMD-ONLY0:       cond.true37:
+// SIMD-ONLY0-NEXT:    [[TMP43:%.*]] = load double, ptr [[DD]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END39:%.*]]
+// SIMD-ONLY0:       cond.false38:
+// SIMD-ONLY0-NEXT:    [[TMP44:%.*]] = load double, ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END39]]
+// SIMD-ONLY0:       cond.end39:
+// SIMD-ONLY0-NEXT:    [[COND40:%.*]] = phi double [ [[TMP43]], [[COND_TRUE37]] ], [ [[TMP44]], [[COND_FALSE38]] ]
+// SIMD-ONLY0-NEXT:    store double [[COND40]], ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP45:%.*]] = load double, ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP46:%.*]] = load double, ptr [[DE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP41:%.*]] = fcmp ogt double [[TMP45]], [[TMP46]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP41]], label [[COND_TRUE42:%.*]], label [[COND_FALSE43:%.*]]
+// SIMD-ONLY0:       cond.true42:
+// SIMD-ONLY0-NEXT:    [[TMP47:%.*]] = load double, ptr [[DE]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END44:%.*]]
+// SIMD-ONLY0:       cond.false43:
+// SIMD-ONLY0-NEXT:    [[TMP48:%.*]] = load double, ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END44]]
+// SIMD-ONLY0:       cond.end44:
+// SIMD-ONLY0-NEXT:    [[COND45:%.*]] = phi double [ [[TMP47]], [[COND_TRUE42]] ], [ [[TMP48]], [[COND_FALSE43]] ]
+// SIMD-ONLY0-NEXT:    store double [[COND45]], ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP49:%.*]] = load double, ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    store double [[TMP49]], ptr [[DV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP50:%.*]] = load double, ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP51:%.*]] = load double, ptr [[DE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP46:%.*]] = fcmp olt double [[TMP50]], [[TMP51]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP46]], label [[COND_TRUE47:%.*]], label [[COND_FALSE48:%.*]]
+// SIMD-ONLY0:       cond.true47:
+// SIMD-ONLY0-NEXT:    [[TMP52:%.*]] = load double, ptr [[DE]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END49:%.*]]
+// SIMD-ONLY0:       cond.false48:
+// SIMD-ONLY0-NEXT:    [[TMP53:%.*]] = load double, ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END49]]
+// SIMD-ONLY0:       cond.end49:
+// SIMD-ONLY0-NEXT:    [[COND50:%.*]] = phi double [ [[TMP52]], [[COND_TRUE47]] ], [ [[TMP53]], [[COND_FALSE48]] ]
+// SIMD-ONLY0-NEXT:    store double [[COND50]], ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP54:%.*]] = load double, ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    store double [[TMP54]], ptr [[DV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP55:%.*]] = load double, ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP56:%.*]] = load double, ptr [[DE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP51:%.*]] = fcmp oeq double [[TMP55]], [[TMP56]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP51]], label [[COND_TRUE52:%.*]], label [[COND_FALSE53:%.*]]
+// SIMD-ONLY0:       cond.true52:
+// SIMD-ONLY0-NEXT:    [[TMP57:%.*]] = load double, ptr [[DD]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END54:%.*]]
+// SIMD-ONLY0:       cond.false53:
+// SIMD-ONLY0-NEXT:    [[TMP58:%.*]] = load double, ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END54]]
+// SIMD-ONLY0:       cond.end54:
+// SIMD-ONLY0-NEXT:    [[COND55:%.*]] = phi double [ [[TMP57]], [[COND_TRUE52]] ], [ [[TMP58]], [[COND_FALSE53]] ]
+// SIMD-ONLY0-NEXT:    store double [[COND55]], ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP59:%.*]] = load double, ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    store double [[TMP59]], ptr [[DV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP60:%.*]] = load double, ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    store double [[TMP60]], ptr [[DV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP61:%.*]] = load double, ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP62:%.*]] = load double, ptr [[DE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP56:%.*]] = fcmp ogt double [[TMP61]], [[TMP62]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP56]], label [[COND_TRUE57:%.*]], label [[COND_FALSE58:%.*]]
+// SIMD-ONLY0:       cond.true57:
+// SIMD-ONLY0-NEXT:    [[TMP63:%.*]] = load double, ptr [[DE]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END59:%.*]]
+// SIMD-ONLY0:       cond.false58:
+// SIMD-ONLY0-NEXT:    [[TMP64:%.*]] = load double, ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END59]]
+// SIMD-ONLY0:       cond.end59:
+// SIMD-ONLY0-NEXT:    [[COND60:%.*]] = phi double [ [[TMP63]], [[COND_TRUE57]] ], [ [[TMP64]], [[COND_FALSE58]] ]
+// SIMD-ONLY0-NEXT:    store double [[COND60]], ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP65:%.*]] = load double, ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    store double [[TMP65]], ptr [[DV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP66:%.*]] = load double, ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP67:%.*]] = load double, ptr [[DE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP61:%.*]] = fcmp olt double [[TMP66]], [[TMP67]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP61]], label [[COND_TRUE62:%.*]], label [[COND_FALSE63:%.*]]
+// SIMD-ONLY0:       cond.true62:
+// SIMD-ONLY0-NEXT:    [[TMP68:%.*]] = load double, ptr [[DE]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END64:%.*]]
+// SIMD-ONLY0:       cond.false63:
+// SIMD-ONLY0-NEXT:    [[TMP69:%.*]] = load double, ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END64]]
+// SIMD-ONLY0:       cond.end64:
+// SIMD-ONLY0-NEXT:    [[COND65:%.*]] = phi double [ [[TMP68]], [[COND_TRUE62]] ], [ [[TMP69]], [[COND_FALSE63]] ]
+// SIMD-ONLY0-NEXT:    store double [[COND65]], ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP70:%.*]] = load double, ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    store double [[TMP70]], ptr [[DV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP71:%.*]] = load double, ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP72:%.*]] = load double, ptr [[DE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP66:%.*]] = fcmp oeq double [[TMP71]], [[TMP72]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP66]], label [[COND_TRUE67:%.*]], label [[COND_FALSE68:%.*]]
+// SIMD-ONLY0:       cond.true67:
+// SIMD-ONLY0-NEXT:    [[TMP73:%.*]] = load double, ptr [[DD]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END69:%.*]]
+// SIMD-ONLY0:       cond.false68:
+// SIMD-ONLY0-NEXT:    [[TMP74:%.*]] = load double, ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END69]]
+// SIMD-ONLY0:       cond.end69:
+// SIMD-ONLY0-NEXT:    [[COND70:%.*]] = phi double [ [[TMP73]], [[COND_TRUE67]] ], [ [[TMP74]], [[COND_FALSE68]] ]
+// SIMD-ONLY0-NEXT:    store double [[COND70]], ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP75:%.*]] = load double, ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP76:%.*]] = load double, ptr [[DE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP71:%.*]] = fcmp ogt double [[TMP75]], [[TMP76]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP71]], label [[COND_TRUE72:%.*]], label [[COND_FALSE73:%.*]]
+// SIMD-ONLY0:       cond.true72:
+// SIMD-ONLY0-NEXT:    [[TMP77:%.*]] = load double, ptr [[DE]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END74:%.*]]
+// SIMD-ONLY0:       cond.false73:
+// SIMD-ONLY0-NEXT:    [[TMP78:%.*]] = load double, ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END74]]
+// SIMD-ONLY0:       cond.end74:
+// SIMD-ONLY0-NEXT:    [[COND75:%.*]] = phi double [ [[TMP77]], [[COND_TRUE72]] ], [ [[TMP78]], [[COND_FALSE73]] ]
+// SIMD-ONLY0-NEXT:    store double [[COND75]], ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP79:%.*]] = load double, ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    store double [[TMP79]], ptr [[DV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP80:%.*]] = load double, ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP81:%.*]] = load double, ptr [[DE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP76:%.*]] = fcmp olt double [[TMP80]], [[TMP81]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP76]], label [[COND_TRUE77:%.*]], label [[COND_FALSE78:%.*]]
+// SIMD-ONLY0:       cond.true77:
+// SIMD-ONLY0-NEXT:    [[TMP82:%.*]] = load double, ptr [[DE]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END79:%.*]]
+// SIMD-ONLY0:       cond.false78:
+// SIMD-ONLY0-NEXT:    [[TMP83:%.*]] = load double, ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END79]]
+// SIMD-ONLY0:       cond.end79:
+// SIMD-ONLY0-NEXT:    [[COND80:%.*]] = phi double [ [[TMP82]], [[COND_TRUE77]] ], [ [[TMP83]], [[COND_FALSE78]] ]
+// SIMD-ONLY0-NEXT:    store double [[COND80]], ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP84:%.*]] = load double, ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    store double [[TMP84]], ptr [[DV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP85:%.*]] = load double, ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP86:%.*]] = load double, ptr [[DE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP81:%.*]] = fcmp oeq double [[TMP85]], [[TMP86]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP81]], label [[COND_TRUE82:%.*]], label [[COND_FALSE83:%.*]]
+// SIMD-ONLY0:       cond.true82:
+// SIMD-ONLY0-NEXT:    [[TMP87:%.*]] = load double, ptr [[DD]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END84:%.*]]
+// SIMD-ONLY0:       cond.false83:
+// SIMD-ONLY0-NEXT:    [[TMP88:%.*]] = load double, ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END84]]
+// SIMD-ONLY0:       cond.end84:
+// SIMD-ONLY0-NEXT:    [[COND85:%.*]] = phi double [ [[TMP87]], [[COND_TRUE82]] ], [ [[TMP88]], [[COND_FALSE83]] ]
+// SIMD-ONLY0-NEXT:    store double [[COND85]], ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP89:%.*]] = load double, ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    store double [[TMP89]], ptr [[DV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP90:%.*]] = load double, ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    store double [[TMP90]], ptr [[DV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP91:%.*]] = load double, ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP92:%.*]] = load double, ptr [[DE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP86:%.*]] = fcmp ogt double [[TMP91]], [[TMP92]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP86]], label [[COND_TRUE87:%.*]], label [[COND_FALSE88:%.*]]
+// SIMD-ONLY0:       cond.true87:
+// SIMD-ONLY0-NEXT:    [[TMP93:%.*]] = load double, ptr [[DE]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END89:%.*]]
+// SIMD-ONLY0:       cond.false88:
+// SIMD-ONLY0-NEXT:    [[TMP94:%.*]] = load double, ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END89]]
+// SIMD-ONLY0:       cond.end89:
+// SIMD-ONLY0-NEXT:    [[COND90:%.*]] = phi double [ [[TMP93]], [[COND_TRUE87]] ], [ [[TMP94]], [[COND_FALSE88]] ]
+// SIMD-ONLY0-NEXT:    store double [[COND90]], ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP95:%.*]] = load double, ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    store double [[TMP95]], ptr [[DV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP96:%.*]] = load double, ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP97:%.*]] = load double, ptr [[DE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP91:%.*]] = fcmp olt double [[TMP96]], [[TMP97]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP91]], label [[COND_TRUE92:%.*]], label [[COND_FALSE93:%.*]]
+// SIMD-ONLY0:       cond.true92:
+// SIMD-ONLY0-NEXT:    [[TMP98:%.*]] = load double, ptr [[DE]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END94:%.*]]
+// SIMD-ONLY0:       cond.false93:
+// SIMD-ONLY0-NEXT:    [[TMP99:%.*]] = load double, ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END94]]
+// SIMD-ONLY0:       cond.end94:
+// SIMD-ONLY0-NEXT:    [[COND95:%.*]] = phi double [ [[TMP98]], [[COND_TRUE92]] ], [ [[TMP99]], [[COND_FALSE93]] ]
+// SIMD-ONLY0-NEXT:    store double [[COND95]], ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP100:%.*]] = load double, ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    store double [[TMP100]], ptr [[DV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP101:%.*]] = load double, ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP102:%.*]] = load double, ptr [[DE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP96:%.*]] = fcmp oeq double [[TMP101]], [[TMP102]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP96]], label [[COND_TRUE97:%.*]], label [[COND_FALSE98:%.*]]
+// SIMD-ONLY0:       cond.true97:
+// SIMD-ONLY0-NEXT:    [[TMP103:%.*]] = load double, ptr [[DD]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END99:%.*]]
+// SIMD-ONLY0:       cond.false98:
+// SIMD-ONLY0-NEXT:    [[TMP104:%.*]] = load double, ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END99]]
+// SIMD-ONLY0:       cond.end99:
+// SIMD-ONLY0-NEXT:    [[COND100:%.*]] = phi double [ [[TMP103]], [[COND_TRUE97]] ], [ [[TMP104]], [[COND_FALSE98]] ]
+// SIMD-ONLY0-NEXT:    store double [[COND100]], ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP105:%.*]] = load double, ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP106:%.*]] = load double, ptr [[DE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP101:%.*]] = fcmp ogt double [[TMP105]], [[TMP106]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP101]], label [[COND_TRUE102:%.*]], label [[COND_FALSE103:%.*]]
+// SIMD-ONLY0:       cond.true102:
+// SIMD-ONLY0-NEXT:    [[TMP107:%.*]] = load double, ptr [[DE]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END104:%.*]]
+// SIMD-ONLY0:       cond.false103:
+// SIMD-ONLY0-NEXT:    [[TMP108:%.*]] = load double, ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END104]]
+// SIMD-ONLY0:       cond.end104:
+// SIMD-ONLY0-NEXT:    [[COND105:%.*]] = phi double [ [[TMP107]], [[COND_TRUE102]] ], [ [[TMP108]], [[COND_FALSE103]] ]
+// SIMD-ONLY0-NEXT:    store double [[COND105]], ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP109:%.*]] = load double, ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    store double [[TMP109]], ptr [[DV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP110:%.*]] = load double, ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP111:%.*]] = load double, ptr [[DE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP106:%.*]] = fcmp olt double [[TMP110]], [[TMP111]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP106]], label [[COND_TRUE107:%.*]], label [[COND_FALSE108:%.*]]
+// SIMD-ONLY0:       cond.true107:
+// SIMD-ONLY0-NEXT:    [[TMP112:%.*]] = load double, ptr [[DE]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END109:%.*]]
+// SIMD-ONLY0:       cond.false108:
+// SIMD-ONLY0-NEXT:    [[TMP113:%.*]] = load double, ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END109]]
+// SIMD-ONLY0:       cond.end109:
+// SIMD-ONLY0-NEXT:    [[COND110:%.*]] = phi double [ [[TMP112]], [[COND_TRUE107]] ], [ [[TMP113]], [[COND_FALSE108]] ]
+// SIMD-ONLY0-NEXT:    store double [[COND110]], ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP114:%.*]] = load double, ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    store double [[TMP114]], ptr [[DV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP115:%.*]] = load double, ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP116:%.*]] = load double, ptr [[DE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP111:%.*]] = fcmp oeq double [[TMP115]], [[TMP116]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP111]], label [[COND_TRUE112:%.*]], label [[COND_FALSE113:%.*]]
+// SIMD-ONLY0:       cond.true112:
+// SIMD-ONLY0-NEXT:    [[TMP117:%.*]] = load double, ptr [[DD]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END114:%.*]]
+// SIMD-ONLY0:       cond.false113:
+// SIMD-ONLY0-NEXT:    [[TMP118:%.*]] = load double, ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END114]]
+// SIMD-ONLY0:       cond.end114:
+// SIMD-ONLY0-NEXT:    [[COND115:%.*]] = phi double [ [[TMP117]], [[COND_TRUE112]] ], [ [[TMP118]], [[COND_FALSE113]] ]
+// SIMD-ONLY0-NEXT:    store double [[COND115]], ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP119:%.*]] = load double, ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    store double [[TMP119]], ptr [[DV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP120:%.*]] = load double, ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    store double [[TMP120]], ptr [[DV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP121:%.*]] = load double, ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP122:%.*]] = load double, ptr [[DE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP116:%.*]] = fcmp ogt double [[TMP121]], [[TMP122]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP116]], label [[COND_TRUE117:%.*]], label [[COND_FALSE118:%.*]]
+// SIMD-ONLY0:       cond.true117:
+// SIMD-ONLY0-NEXT:    [[TMP123:%.*]] = load double, ptr [[DE]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END119:%.*]]
+// SIMD-ONLY0:       cond.false118:
+// SIMD-ONLY0-NEXT:    [[TMP124:%.*]] = load double, ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END119]]
+// SIMD-ONLY0:       cond.end119:
+// SIMD-ONLY0-NEXT:    [[COND120:%.*]] = phi double [ [[TMP123]], [[COND_TRUE117]] ], [ [[TMP124]], [[COND_FALSE118]] ]
+// SIMD-ONLY0-NEXT:    store double [[COND120]], ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP125:%.*]] = load double, ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    store double [[TMP125]], ptr [[DV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP126:%.*]] = load double, ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP127:%.*]] = load double, ptr [[DE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP121:%.*]] = fcmp olt double [[TMP126]], [[TMP127]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP121]], label [[COND_TRUE122:%.*]], label [[COND_FALSE123:%.*]]
+// SIMD-ONLY0:       cond.true122:
+// SIMD-ONLY0-NEXT:    [[TMP128:%.*]] = load double, ptr [[DE]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END124:%.*]]
+// SIMD-ONLY0:       cond.false123:
+// SIMD-ONLY0-NEXT:    [[TMP129:%.*]] = load double, ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END124]]
+// SIMD-ONLY0:       cond.end124:
+// SIMD-ONLY0-NEXT:    [[COND125:%.*]] = phi double [ [[TMP128]], [[COND_TRUE122]] ], [ [[TMP129]], [[COND_FALSE123]] ]
+// SIMD-ONLY0-NEXT:    store double [[COND125]], ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP130:%.*]] = load double, ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    store double [[TMP130]], ptr [[DV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP131:%.*]] = load double, ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP132:%.*]] = load double, ptr [[DE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP126:%.*]] = fcmp oeq double [[TMP131]], [[TMP132]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP126]], label [[COND_TRUE127:%.*]], label [[COND_FALSE128:%.*]]
+// SIMD-ONLY0:       cond.true127:
+// SIMD-ONLY0-NEXT:    [[TMP133:%.*]] = load double, ptr [[DD]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END129:%.*]]
+// SIMD-ONLY0:       cond.false128:
+// SIMD-ONLY0-NEXT:    [[TMP134:%.*]] = load double, ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END129]]
+// SIMD-ONLY0:       cond.end129:
+// SIMD-ONLY0-NEXT:    [[COND130:%.*]] = phi double [ [[TMP133]], [[COND_TRUE127]] ], [ [[TMP134]], [[COND_FALSE128]] ]
+// SIMD-ONLY0-NEXT:    store double [[COND130]], ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP135:%.*]] = load double, ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP136:%.*]] = load double, ptr [[DE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP131:%.*]] = fcmp ogt double [[TMP135]], [[TMP136]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP131]], label [[COND_TRUE132:%.*]], label [[COND_FALSE133:%.*]]
+// SIMD-ONLY0:       cond.true132:
+// SIMD-ONLY0-NEXT:    [[TMP137:%.*]] = load double, ptr [[DE]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END134:%.*]]
+// SIMD-ONLY0:       cond.false133:
+// SIMD-ONLY0-NEXT:    [[TMP138:%.*]] = load double, ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END134]]
+// SIMD-ONLY0:       cond.end134:
+// SIMD-ONLY0-NEXT:    [[COND135:%.*]] = phi double [ [[TMP137]], [[COND_TRUE132]] ], [ [[TMP138]], [[COND_FALSE133]] ]
+// SIMD-ONLY0-NEXT:    store double [[COND135]], ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP139:%.*]] = load double, ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    store double [[TMP139]], ptr [[DV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP140:%.*]] = load double, ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP141:%.*]] = load double, ptr [[DE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP136:%.*]] = fcmp olt double [[TMP140]], [[TMP141]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP136]], label [[COND_TRUE137:%.*]], label [[COND_FALSE138:%.*]]
+// SIMD-ONLY0:       cond.true137:
+// SIMD-ONLY0-NEXT:    [[TMP142:%.*]] = load double, ptr [[DE]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END139:%.*]]
+// SIMD-ONLY0:       cond.false138:
+// SIMD-ONLY0-NEXT:    [[TMP143:%.*]] = load double, ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END139]]
+// SIMD-ONLY0:       cond.end139:
+// SIMD-ONLY0-NEXT:    [[COND140:%.*]] = phi double [ [[TMP142]], [[COND_TRUE137]] ], [ [[TMP143]], [[COND_FALSE138]] ]
+// SIMD-ONLY0-NEXT:    store double [[COND140]], ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP144:%.*]] = load double, ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    store double [[TMP144]], ptr [[DV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP145:%.*]] = load double, ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP146:%.*]] = load double, ptr [[DE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP141:%.*]] = fcmp oeq double [[TMP145]], [[TMP146]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP141]], label [[COND_TRUE142:%.*]], label [[COND_FALSE143:%.*]]
+// SIMD-ONLY0:       cond.true142:
+// SIMD-ONLY0-NEXT:    [[TMP147:%.*]] = load double, ptr [[DD]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END144:%.*]]
+// SIMD-ONLY0:       cond.false143:
+// SIMD-ONLY0-NEXT:    [[TMP148:%.*]] = load double, ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END144]]
+// SIMD-ONLY0:       cond.end144:
+// SIMD-ONLY0-NEXT:    [[COND145:%.*]] = phi double [ [[TMP147]], [[COND_TRUE142]] ], [ [[TMP148]], [[COND_FALSE143]] ]
+// SIMD-ONLY0-NEXT:    store double [[COND145]], ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP149:%.*]] = load double, ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    store double [[TMP149]], ptr [[DV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP150:%.*]] = load double, ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    store double [[TMP150]], ptr [[DV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP151:%.*]] = load double, ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP152:%.*]] = load double, ptr [[DE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP146:%.*]] = fcmp ogt double [[TMP151]], [[TMP152]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP146]], label [[COND_TRUE147:%.*]], label [[COND_FALSE148:%.*]]
+// SIMD-ONLY0:       cond.true147:
+// SIMD-ONLY0-NEXT:    [[TMP153:%.*]] = load double, ptr [[DE]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END149:%.*]]
+// SIMD-ONLY0:       cond.false148:
+// SIMD-ONLY0-NEXT:    [[TMP154:%.*]] = load double, ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END149]]
+// SIMD-ONLY0:       cond.end149:
+// SIMD-ONLY0-NEXT:    [[COND150:%.*]] = phi double [ [[TMP153]], [[COND_TRUE147]] ], [ [[TMP154]], [[COND_FALSE148]] ]
+// SIMD-ONLY0-NEXT:    store double [[COND150]], ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP155:%.*]] = load double, ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    store double [[TMP155]], ptr [[DV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP156:%.*]] = load double, ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP157:%.*]] = load double, ptr [[DE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP151:%.*]] = fcmp olt double [[TMP156]], [[TMP157]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP151]], label [[COND_TRUE152:%.*]], label [[COND_FALSE153:%.*]]
+// SIMD-ONLY0:       cond.true152:
+// SIMD-ONLY0-NEXT:    [[TMP158:%.*]] = load double, ptr [[DE]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END154:%.*]]
+// SIMD-ONLY0:       cond.false153:
+// SIMD-ONLY0-NEXT:    [[TMP159:%.*]] = load double, ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END154]]
+// SIMD-ONLY0:       cond.end154:
+// SIMD-ONLY0-NEXT:    [[COND155:%.*]] = phi double [ [[TMP158]], [[COND_TRUE152]] ], [ [[TMP159]], [[COND_FALSE153]] ]
+// SIMD-ONLY0-NEXT:    store double [[COND155]], ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP160:%.*]] = load double, ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    store double [[TMP160]], ptr [[DV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP161:%.*]] = load double, ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP162:%.*]] = load double, ptr [[DE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP156:%.*]] = fcmp oeq double [[TMP161]], [[TMP162]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP156]], label [[COND_TRUE157:%.*]], label [[COND_FALSE158:%.*]]
+// SIMD-ONLY0:       cond.true157:
+// SIMD-ONLY0-NEXT:    [[TMP163:%.*]] = load double, ptr [[DD]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END159:%.*]]
+// SIMD-ONLY0:       cond.false158:
+// SIMD-ONLY0-NEXT:    [[TMP164:%.*]] = load double, ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END159]]
+// SIMD-ONLY0:       cond.end159:
+// SIMD-ONLY0-NEXT:    [[COND160:%.*]] = phi double [ [[TMP163]], [[COND_TRUE157]] ], [ [[TMP164]], [[COND_FALSE158]] ]
+// SIMD-ONLY0-NEXT:    store double [[COND160]], ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP165:%.*]] = load double, ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP166:%.*]] = load double, ptr [[DE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP161:%.*]] = fcmp ogt double [[TMP165]], [[TMP166]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP161]], label [[COND_TRUE162:%.*]], label [[COND_FALSE163:%.*]]
+// SIMD-ONLY0:       cond.true162:
+// SIMD-ONLY0-NEXT:    [[TMP167:%.*]] = load double, ptr [[DE]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END164:%.*]]
+// SIMD-ONLY0:       cond.false163:
+// SIMD-ONLY0-NEXT:    [[TMP168:%.*]] = load double, ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END164]]
+// SIMD-ONLY0:       cond.end164:
+// SIMD-ONLY0-NEXT:    [[COND165:%.*]] = phi double [ [[TMP167]], [[COND_TRUE162]] ], [ [[TMP168]], [[COND_FALSE163]] ]
+// SIMD-ONLY0-NEXT:    store double [[COND165]], ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP169:%.*]] = load double, ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    store double [[TMP169]], ptr [[DV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP170:%.*]] = load double, ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP171:%.*]] = load double, ptr [[DE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP166:%.*]] = fcmp olt double [[TMP170]], [[TMP171]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP166]], label [[COND_TRUE167:%.*]], label [[COND_FALSE168:%.*]]
+// SIMD-ONLY0:       cond.true167:
+// SIMD-ONLY0-NEXT:    [[TMP172:%.*]] = load double, ptr [[DE]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END169:%.*]]
+// SIMD-ONLY0:       cond.false168:
+// SIMD-ONLY0-NEXT:    [[TMP173:%.*]] = load double, ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END169]]
+// SIMD-ONLY0:       cond.end169:
+// SIMD-ONLY0-NEXT:    [[COND170:%.*]] = phi double [ [[TMP172]], [[COND_TRUE167]] ], [ [[TMP173]], [[COND_FALSE168]] ]
+// SIMD-ONLY0-NEXT:    store double [[COND170]], ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP174:%.*]] = load double, ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    store double [[TMP174]], ptr [[DV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP175:%.*]] = load double, ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP176:%.*]] = load double, ptr [[DE]], align 8
+// SIMD-ONLY0-NEXT:    [[CMP171:%.*]] = fcmp oeq double [[TMP175]], [[TMP176]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP171]], label [[COND_TRUE172:%.*]], label [[COND_FALSE173:%.*]]
+// SIMD-ONLY0:       cond.true172:
+// SIMD-ONLY0-NEXT:    [[TMP177:%.*]] = load double, ptr [[DD]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END174:%.*]]
+// SIMD-ONLY0:       cond.false173:
+// SIMD-ONLY0-NEXT:    [[TMP178:%.*]] = load double, ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    br label [[COND_END174]]
+// SIMD-ONLY0:       cond.end174:
+// SIMD-ONLY0-NEXT:    [[COND175:%.*]] = phi double [ [[TMP177]], [[COND_TRUE172]] ], [ [[TMP178]], [[COND_FALSE173]] ]
+// SIMD-ONLY0-NEXT:    store double [[COND175]], ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP179:%.*]] = load double, ptr [[DX]], align 8
+// SIMD-ONLY0-NEXT:    store double [[TMP179]], ptr [[DV]], align 8
+// SIMD-ONLY0-NEXT:    [[TMP180:%.*]] = load double, ptr [[DV]], align 8
+// SIMD-ONLY0-NEXT:    ret double [[TMP180]]
+//

diff  --git a/clang/test/OpenMP/bug60602.cpp b/clang/test/OpenMP/bug60602.cpp
new file mode 100644
index 0000000000000..bc5a2f5953142
--- /dev/null
+++ b/clang/test/OpenMP/bug60602.cpp
@@ -0,0 +1,578 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --include-generated-funcs --replace-value-regex "__omp_offloading_[0-9a-z]+_[0-9a-z]+" "reduction_size[.].+[.]" "pl_cond[.].+[.|,]" --prefix-filecheck-ir-name _ --version 2
+// Test host codegen.
+// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s
+// expected-no-diagnostics
+
+int kernel_within_loop(int *a, int *b, int N, int num_iters) {
+  int i;
+  for (i = 0; i < num_iters; ++i) {
+#pragma omp target parallel for map(a[0:N]) map(b[0:N])
+    for (int j = 0; j< N; j++)
+      a[j] = b[j];
+
+#pragma omp target teams distribute parallel for map(a[0:N]) map(b[0:N])
+    for (int j = 0; j< N; j+=3)
+      a[j] = b[j] * 2;
+  }
+  return a[N-1];
+}
+// CHECK-LABEL: define dso_local noundef signext i32 @_Z18kernel_within_loopPiS_ii
+// CHECK-SAME: (ptr noundef [[A:%.*]], ptr noundef [[B:%.*]], i32 noundef signext [[N:%.*]], i32 noundef signext [[NUM_ITERS:%.*]]) #[[ATTR0:[0-9]+]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A_ADDR:%.*]] = alloca ptr, align 8
+// CHECK-NEXT:    [[B_ADDR:%.*]] = alloca ptr, align 8
+// CHECK-NEXT:    [[N_ADDR:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    [[NUM_ITERS_ADDR:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    [[I:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    [[N_CASTED:%.*]] = alloca i64, align 8
+// CHECK-NEXT:    [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [3 x ptr], align 8
+// CHECK-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [3 x ptr], align 8
+// CHECK-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [3 x ptr], align 8
+// CHECK-NEXT:    [[DOTOFFLOAD_SIZES:%.*]] = alloca [3 x i64], align 8
+// CHECK-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
+// CHECK-NEXT:    [[N_CASTED3:%.*]] = alloca i64, align 8
+// CHECK-NEXT:    [[DOTOFFLOAD_BASEPTRS8:%.*]] = alloca [3 x ptr], align 8
+// CHECK-NEXT:    [[DOTOFFLOAD_PTRS9:%.*]] = alloca [3 x ptr], align 8
+// CHECK-NEXT:    [[DOTOFFLOAD_MAPPERS10:%.*]] = alloca [3 x ptr], align 8
+// CHECK-NEXT:    [[DOTOFFLOAD_SIZES11:%.*]] = alloca [3 x i64], align 8
+// CHECK-NEXT:    [[TMP:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    [[DOTCAPTURE_EXPR_12:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    [[KERNEL_ARGS14:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
+// CHECK-NEXT:    store ptr [[A]], ptr [[A_ADDR]], align 8
+// CHECK-NEXT:    store ptr [[B]], ptr [[B_ADDR]], align 8
+// CHECK-NEXT:    store i32 [[N]], ptr [[N_ADDR]], align 4
+// CHECK-NEXT:    store i32 [[NUM_ITERS]], ptr [[NUM_ITERS_ADDR]], align 4
+// CHECK-NEXT:    store i32 0, ptr [[I]], align 4
+// CHECK-NEXT:    br label [[FOR_COND:%.*]]
+// CHECK:       for.cond:
+// CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[I]], align 4
+// CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr [[NUM_ITERS_ADDR]], align 4
+// CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[TMP0]], [[TMP1]]
+// CHECK-NEXT:    br i1 [[CMP]], label [[FOR_BODY:%.*]], label [[FOR_END:%.*]]
+// CHECK:       for.body:
+// CHECK-NEXT:    [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4
+// CHECK-NEXT:    store i32 [[TMP2]], ptr [[N_CASTED]], align 4
+// CHECK-NEXT:    [[TMP3:%.*]] = load i64, ptr [[N_CASTED]], align 8
+// CHECK-NEXT:    [[TMP4:%.*]] = load ptr, ptr [[A_ADDR]], align 8
+// CHECK-NEXT:    [[TMP5:%.*]] = load ptr, ptr [[B_ADDR]], align 8
+// CHECK-NEXT:    [[TMP6:%.*]] = load ptr, ptr [[A_ADDR]], align 8
+// CHECK-NEXT:    [[TMP7:%.*]] = load ptr, ptr [[A_ADDR]], align 8
+// CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP7]], i64 0
+// CHECK-NEXT:    [[TMP8:%.*]] = load i32, ptr [[N_ADDR]], align 4
+// CHECK-NEXT:    [[CONV:%.*]] = sext i32 [[TMP8]] to i64
+// CHECK-NEXT:    [[TMP9:%.*]] = mul nuw i64 [[CONV]], 4
+// CHECK-NEXT:    [[TMP10:%.*]] = load ptr, ptr [[B_ADDR]], align 8
+// CHECK-NEXT:    [[TMP11:%.*]] = load ptr, ptr [[B_ADDR]], align 8
+// CHECK-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds i32, ptr [[TMP11]], i64 0
+// CHECK-NEXT:    [[TMP12:%.*]] = load i32, ptr [[N_ADDR]], align 4
+// CHECK-NEXT:    [[CONV2:%.*]] = sext i32 [[TMP12]] to i64
+// CHECK-NEXT:    [[TMP13:%.*]] = mul nuw i64 [[CONV2]], 4
+// CHECK-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[DOTOFFLOAD_SIZES]], ptr align 8 @.offload_sizes, i64 24, i1 false)
+// CHECK-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK-NEXT:    store i64 [[TMP3]], ptr [[TMP14]], align 8
+// CHECK-NEXT:    [[TMP15:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK-NEXT:    store i64 [[TMP3]], ptr [[TMP15]], align 8
+// CHECK-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0
+// CHECK-NEXT:    store ptr null, ptr [[TMP16]], align 8
+// CHECK-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
+// CHECK-NEXT:    store ptr [[TMP6]], ptr [[TMP17]], align 8
+// CHECK-NEXT:    [[TMP18:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 1
+// CHECK-NEXT:    store ptr [[ARRAYIDX]], ptr [[TMP18]], align 8
+// CHECK-NEXT:    [[TMP19:%.*]] = getelementptr inbounds [3 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 1
+// CHECK-NEXT:    store i64 [[TMP9]], ptr [[TMP19]], align 8
+// CHECK-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1
+// CHECK-NEXT:    store ptr null, ptr [[TMP20]], align 8
+// CHECK-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
+// CHECK-NEXT:    store ptr [[TMP10]], ptr [[TMP21]], align 8
+// CHECK-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 2
+// CHECK-NEXT:    store ptr [[ARRAYIDX1]], ptr [[TMP22]], align 8
+// CHECK-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [3 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 2
+// CHECK-NEXT:    store i64 [[TMP13]], ptr [[TMP23]], align 8
+// CHECK-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2
+// CHECK-NEXT:    store ptr null, ptr [[TMP24]], align 8
+// CHECK-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK-NEXT:    [[TMP27:%.*]] = getelementptr inbounds [3 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 0
+// CHECK-NEXT:    [[TMP28:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
+// CHECK-NEXT:    store i32 2, ptr [[TMP28]], align 4
+// CHECK-NEXT:    [[TMP29:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
+// CHECK-NEXT:    store i32 3, ptr [[TMP29]], align 4
+// CHECK-NEXT:    [[TMP30:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2
+// CHECK-NEXT:    store ptr [[TMP25]], ptr [[TMP30]], align 8
+// CHECK-NEXT:    [[TMP31:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3
+// CHECK-NEXT:    store ptr [[TMP26]], ptr [[TMP31]], align 8
+// CHECK-NEXT:    [[TMP32:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4
+// CHECK-NEXT:    store ptr [[TMP27]], ptr [[TMP32]], align 8
+// CHECK-NEXT:    [[TMP33:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5
+// CHECK-NEXT:    store ptr @.offload_maptypes, ptr [[TMP33]], align 8
+// CHECK-NEXT:    [[TMP34:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6
+// CHECK-NEXT:    store ptr null, ptr [[TMP34]], align 8
+// CHECK-NEXT:    [[TMP35:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7
+// CHECK-NEXT:    store ptr null, ptr [[TMP35]], align 8
+// CHECK-NEXT:    [[TMP36:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8
+// CHECK-NEXT:    store i64 0, ptr [[TMP36]], align 8
+// CHECK-NEXT:    [[TMP37:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 9
+// CHECK-NEXT:    store i64 0, ptr [[TMP37]], align 8
+// CHECK-NEXT:    [[TMP38:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 10
+// CHECK-NEXT:    store [3 x i32] [i32 1, i32 0, i32 0], ptr [[TMP38]], align 4
+// CHECK-NEXT:    [[TMP39:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 11
+// CHECK-NEXT:    store [3 x i32] zeroinitializer, ptr [[TMP39]], align 4
+// CHECK-NEXT:    [[TMP40:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 12
+// CHECK-NEXT:    store i32 0, ptr [[TMP40]], align 4
+// CHECK-NEXT:    [[TMP41:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB2:[0-9]+]], i64 -1, i32 1, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z18kernel_within_loopPiS_ii_l9.region_id, ptr [[KERNEL_ARGS]])
+// CHECK-NEXT:    [[TMP42:%.*]] = icmp ne i32 [[TMP41]], 0
+// CHECK-NEXT:    br i1 [[TMP42]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
+// CHECK:       omp_offload.failed:
+// CHECK-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z18kernel_within_loopPiS_ii_l9(i64 [[TMP3]], ptr [[TMP4]], ptr [[TMP5]]) #[[ATTR3:[0-9]+]]
+// CHECK-NEXT:    br label [[OMP_OFFLOAD_CONT]]
+// CHECK:       omp_offload.cont:
+// CHECK-NEXT:    [[TMP43:%.*]] = load i32, ptr [[N_ADDR]], align 4
+// CHECK-NEXT:    store i32 [[TMP43]], ptr [[N_CASTED3]], align 4
+// CHECK-NEXT:    [[TMP44:%.*]] = load i64, ptr [[N_CASTED3]], align 8
+// CHECK-NEXT:    [[TMP45:%.*]] = load ptr, ptr [[A_ADDR]], align 8
+// CHECK-NEXT:    [[TMP46:%.*]] = load ptr, ptr [[B_ADDR]], align 8
+// CHECK-NEXT:    [[TMP47:%.*]] = load ptr, ptr [[A_ADDR]], align 8
+// CHECK-NEXT:    [[TMP48:%.*]] = load ptr, ptr [[A_ADDR]], align 8
+// CHECK-NEXT:    [[ARRAYIDX4:%.*]] = getelementptr inbounds i32, ptr [[TMP48]], i64 0
+// CHECK-NEXT:    [[TMP49:%.*]] = load i32, ptr [[N_ADDR]], align 4
+// CHECK-NEXT:    [[CONV5:%.*]] = sext i32 [[TMP49]] to i64
+// CHECK-NEXT:    [[TMP50:%.*]] = mul nuw i64 [[CONV5]], 4
+// CHECK-NEXT:    [[TMP51:%.*]] = load ptr, ptr [[B_ADDR]], align 8
+// CHECK-NEXT:    [[TMP52:%.*]] = load ptr, ptr [[B_ADDR]], align 8
+// CHECK-NEXT:    [[ARRAYIDX6:%.*]] = getelementptr inbounds i32, ptr [[TMP52]], i64 0
+// CHECK-NEXT:    [[TMP53:%.*]] = load i32, ptr [[N_ADDR]], align 4
+// CHECK-NEXT:    [[CONV7:%.*]] = sext i32 [[TMP53]] to i64
+// CHECK-NEXT:    [[TMP54:%.*]] = mul nuw i64 [[CONV7]], 4
+// CHECK-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[DOTOFFLOAD_SIZES11]], ptr align 8 @.offload_sizes.3, i64 24, i1 false)
+// CHECK-NEXT:    [[TMP55:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS8]], i32 0, i32 0
+// CHECK-NEXT:    store i64 [[TMP44]], ptr [[TMP55]], align 8
+// CHECK-NEXT:    [[TMP56:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS9]], i32 0, i32 0
+// CHECK-NEXT:    store i64 [[TMP44]], ptr [[TMP56]], align 8
+// CHECK-NEXT:    [[TMP57:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_MAPPERS10]], i64 0, i64 0
+// CHECK-NEXT:    store ptr null, ptr [[TMP57]], align 8
+// CHECK-NEXT:    [[TMP58:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS8]], i32 0, i32 1
+// CHECK-NEXT:    store ptr [[TMP47]], ptr [[TMP58]], align 8
+// CHECK-NEXT:    [[TMP59:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS9]], i32 0, i32 1
+// CHECK-NEXT:    store ptr [[ARRAYIDX4]], ptr [[TMP59]], align 8
+// CHECK-NEXT:    [[TMP60:%.*]] = getelementptr inbounds [3 x i64], ptr [[DOTOFFLOAD_SIZES11]], i32 0, i32 1
+// CHECK-NEXT:    store i64 [[TMP50]], ptr [[TMP60]], align 8
+// CHECK-NEXT:    [[TMP61:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_MAPPERS10]], i64 0, i64 1
+// CHECK-NEXT:    store ptr null, ptr [[TMP61]], align 8
+// CHECK-NEXT:    [[TMP62:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS8]], i32 0, i32 2
+// CHECK-NEXT:    store ptr [[TMP51]], ptr [[TMP62]], align 8
+// CHECK-NEXT:    [[TMP63:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS9]], i32 0, i32 2
+// CHECK-NEXT:    store ptr [[ARRAYIDX6]], ptr [[TMP63]], align 8
+// CHECK-NEXT:    [[TMP64:%.*]] = getelementptr inbounds [3 x i64], ptr [[DOTOFFLOAD_SIZES11]], i32 0, i32 2
+// CHECK-NEXT:    store i64 [[TMP54]], ptr [[TMP64]], align 8
+// CHECK-NEXT:    [[TMP65:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_MAPPERS10]], i64 0, i64 2
+// CHECK-NEXT:    store ptr null, ptr [[TMP65]], align 8
+// CHECK-NEXT:    [[TMP66:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS8]], i32 0, i32 0
+// CHECK-NEXT:    [[TMP67:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS9]], i32 0, i32 0
+// CHECK-NEXT:    [[TMP68:%.*]] = getelementptr inbounds [3 x i64], ptr [[DOTOFFLOAD_SIZES11]], i32 0, i32 0
+// CHECK-NEXT:    [[TMP69:%.*]] = load i32, ptr [[N_ADDR]], align 4
+// CHECK-NEXT:    store i32 [[TMP69]], ptr [[DOTCAPTURE_EXPR_]], align 4
+// CHECK-NEXT:    [[TMP70:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
+// CHECK-NEXT:    [[SUB:%.*]] = sub i32 [[TMP70]], -2
+// CHECK-NEXT:    [[DIV:%.*]] = udiv i32 [[SUB]], 3
+// CHECK-NEXT:    [[SUB13:%.*]] = sub i32 [[DIV]], 1
+// CHECK-NEXT:    store i32 [[SUB13]], ptr [[DOTCAPTURE_EXPR_12]], align 4
+// CHECK-NEXT:    [[TMP71:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_12]], align 4
+// CHECK-NEXT:    [[ADD:%.*]] = add i32 [[TMP71]], 1
+// CHECK-NEXT:    [[TMP72:%.*]] = zext i32 [[ADD]] to i64
+// CHECK-NEXT:    [[TMP73:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS14]], i32 0, i32 0
+// CHECK-NEXT:    store i32 2, ptr [[TMP73]], align 4
+// CHECK-NEXT:    [[TMP74:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS14]], i32 0, i32 1
+// CHECK-NEXT:    store i32 3, ptr [[TMP74]], align 4
+// CHECK-NEXT:    [[TMP75:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS14]], i32 0, i32 2
+// CHECK-NEXT:    store ptr [[TMP66]], ptr [[TMP75]], align 8
+// CHECK-NEXT:    [[TMP76:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS14]], i32 0, i32 3
+// CHECK-NEXT:    store ptr [[TMP67]], ptr [[TMP76]], align 8
+// CHECK-NEXT:    [[TMP77:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS14]], i32 0, i32 4
+// CHECK-NEXT:    store ptr [[TMP68]], ptr [[TMP77]], align 8
+// CHECK-NEXT:    [[TMP78:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS14]], i32 0, i32 5
+// CHECK-NEXT:    store ptr @.offload_maptypes.4, ptr [[TMP78]], align 8
+// CHECK-NEXT:    [[TMP79:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS14]], i32 0, i32 6
+// CHECK-NEXT:    store ptr null, ptr [[TMP79]], align 8
+// CHECK-NEXT:    [[TMP80:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS14]], i32 0, i32 7
+// CHECK-NEXT:    store ptr null, ptr [[TMP80]], align 8
+// CHECK-NEXT:    [[TMP81:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS14]], i32 0, i32 8
+// CHECK-NEXT:    store i64 [[TMP72]], ptr [[TMP81]], align 8
+// CHECK-NEXT:    [[TMP82:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS14]], i32 0, i32 9
+// CHECK-NEXT:    store i64 0, ptr [[TMP82]], align 8
+// CHECK-NEXT:    [[TMP83:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS14]], i32 0, i32 10
+// CHECK-NEXT:    store [3 x i32] zeroinitializer, ptr [[TMP83]], align 4
+// CHECK-NEXT:    [[TMP84:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS14]], i32 0, i32 11
+// CHECK-NEXT:    store [3 x i32] zeroinitializer, ptr [[TMP84]], align 4
+// CHECK-NEXT:    [[TMP85:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS14]], i32 0, i32 12
+// CHECK-NEXT:    store i32 0, ptr [[TMP85]], align 4
+// CHECK-NEXT:    [[TMP86:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB2]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z18kernel_within_loopPiS_ii_l13.region_id, ptr [[KERNEL_ARGS14]])
+// CHECK-NEXT:    [[TMP87:%.*]] = icmp ne i32 [[TMP86]], 0
+// CHECK-NEXT:    br i1 [[TMP87]], label [[OMP_OFFLOAD_FAILED15:%.*]], label [[OMP_OFFLOAD_CONT16:%.*]]
+// CHECK:       omp_offload.failed15:
+// CHECK-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z18kernel_within_loopPiS_ii_l13(i64 [[TMP44]], ptr [[TMP45]], ptr [[TMP46]]) #[[ATTR3]]
+// CHECK-NEXT:    br label [[OMP_OFFLOAD_CONT16]]
+// CHECK:       omp_offload.cont16:
+// CHECK-NEXT:    br label [[FOR_INC:%.*]]
+// CHECK:       for.inc:
+// CHECK-NEXT:    [[TMP88:%.*]] = load i32, ptr [[I]], align 4
+// CHECK-NEXT:    [[INC:%.*]] = add nsw i32 [[TMP88]], 1
+// CHECK-NEXT:    store i32 [[INC]], ptr [[I]], align 4
+// CHECK-NEXT:    br label [[FOR_COND]], !llvm.loop [[LOOP5:![0-9]+]]
+// CHECK:       for.end:
+// CHECK-NEXT:    [[TMP89:%.*]] = load ptr, ptr [[A_ADDR]], align 8
+// CHECK-NEXT:    [[TMP90:%.*]] = load i32, ptr [[N_ADDR]], align 4
+// CHECK-NEXT:    [[SUB17:%.*]] = sub nsw i32 [[TMP90]], 1
+// CHECK-NEXT:    [[IDXPROM:%.*]] = sext i32 [[SUB17]] to i64
+// CHECK-NEXT:    [[ARRAYIDX18:%.*]] = getelementptr inbounds i32, ptr [[TMP89]], i64 [[IDXPROM]]
+// CHECK-NEXT:    [[TMP91:%.*]] = load i32, ptr [[ARRAYIDX18]], align 4
+// CHECK-NEXT:    ret i32 [[TMP91]]
+//
+//
+// CHECK-LABEL: define internal void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z18kernel_within_loopPiS_ii_l9
+// CHECK-SAME: (i64 noundef [[N:%.*]], ptr noundef [[A:%.*]], ptr noundef [[B:%.*]]) #[[ATTR1:[0-9]+]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[N_ADDR:%.*]] = alloca i64, align 8
+// CHECK-NEXT:    [[A_ADDR:%.*]] = alloca ptr, align 8
+// CHECK-NEXT:    [[B_ADDR:%.*]] = alloca ptr, align 8
+// CHECK-NEXT:    [[N_CASTED:%.*]] = alloca i64, align 8
+// CHECK-NEXT:    store i64 [[N]], ptr [[N_ADDR]], align 8
+// CHECK-NEXT:    store ptr [[A]], ptr [[A_ADDR]], align 8
+// CHECK-NEXT:    store ptr [[B]], ptr [[B_ADDR]], align 8
+// CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[N_ADDR]], align 4
+// CHECK-NEXT:    store i32 [[TMP0]], ptr [[N_CASTED]], align 4
+// CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr [[N_CASTED]], align 8
+// CHECK-NEXT:    [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 8
+// CHECK-NEXT:    [[TMP3:%.*]] = load ptr, ptr [[B_ADDR]], align 8
+// CHECK-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 3, ptr @.omp_outlined., i64 [[TMP1]], ptr [[TMP2]], ptr [[TMP3]])
+// CHECK-NEXT:    ret void
+//
+//
+// CHECK-LABEL: define internal void @.omp_outlined.
+// CHECK-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[N:%.*]], ptr noundef [[A:%.*]], ptr noundef [[B:%.*]]) #[[ATTR2:[0-9]+]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
+// CHECK-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
+// CHECK-NEXT:    [[N_ADDR:%.*]] = alloca i64, align 8
+// CHECK-NEXT:    [[A_ADDR:%.*]] = alloca ptr, align 8
+// CHECK-NEXT:    [[B_ADDR:%.*]] = alloca ptr, align 8
+// CHECK-NEXT:    [[DOTOMP_IV:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    [[TMP:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    [[J:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    [[DOTOMP_LB:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    [[DOTOMP_UB:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    [[J3:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK-NEXT:    store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
+// CHECK-NEXT:    store i64 [[N]], ptr [[N_ADDR]], align 8
+// CHECK-NEXT:    store ptr [[A]], ptr [[A_ADDR]], align 8
+// CHECK-NEXT:    store ptr [[B]], ptr [[B_ADDR]], align 8
+// CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[N_ADDR]], align 4
+// CHECK-NEXT:    store i32 [[TMP0]], ptr [[DOTCAPTURE_EXPR_]], align 4
+// CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
+// CHECK-NEXT:    [[SUB:%.*]] = sub nsw i32 [[TMP1]], 0
+// CHECK-NEXT:    [[DIV:%.*]] = sdiv i32 [[SUB]], 1
+// CHECK-NEXT:    [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1
+// CHECK-NEXT:    store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK-NEXT:    store i32 0, ptr [[J]], align 4
+// CHECK-NEXT:    [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
+// CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 0, [[TMP2]]
+// CHECK-NEXT:    br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]]
+// CHECK:       omp.precond.then:
+// CHECK-NEXT:    store i32 0, ptr [[DOTOMP_LB]], align 4
+// CHECK-NEXT:    [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK-NEXT:    store i32 [[TMP3]], ptr [[DOTOMP_UB]], align 4
+// CHECK-NEXT:    store i32 1, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK-NEXT:    store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
+// CHECK-NEXT:    [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK-NEXT:    [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4
+// CHECK-NEXT:    call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP5]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
+// CHECK-NEXT:    [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
+// CHECK-NEXT:    [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK-NEXT:    [[CMP4:%.*]] = icmp sgt i32 [[TMP6]], [[TMP7]]
+// CHECK-NEXT:    br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
+// CHECK:       cond.true:
+// CHECK-NEXT:    [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK-NEXT:    br label [[COND_END:%.*]]
+// CHECK:       cond.false:
+// CHECK-NEXT:    [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
+// CHECK-NEXT:    br label [[COND_END]]
+// CHECK:       cond.end:
+// CHECK-NEXT:    [[COND:%.*]] = phi i32 [ [[TMP8]], [[COND_TRUE]] ], [ [[TMP9]], [[COND_FALSE]] ]
+// CHECK-NEXT:    store i32 [[COND]], ptr [[DOTOMP_UB]], align 4
+// CHECK-NEXT:    [[TMP10:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
+// CHECK-NEXT:    store i32 [[TMP10]], ptr [[DOTOMP_IV]], align 4
+// CHECK-NEXT:    br label [[OMP_INNER_FOR_COND:%.*]]
+// CHECK:       omp.inner.for.cond:
+// CHECK-NEXT:    [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK-NEXT:    [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
+// CHECK-NEXT:    [[CMP5:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]]
+// CHECK-NEXT:    br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
+// CHECK:       omp.inner.for.body:
+// CHECK-NEXT:    [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK-NEXT:    [[MUL:%.*]] = mul nsw i32 [[TMP13]], 1
+// CHECK-NEXT:    [[ADD:%.*]] = add nsw i32 0, [[MUL]]
+// CHECK-NEXT:    store i32 [[ADD]], ptr [[J3]], align 4
+// CHECK-NEXT:    [[TMP14:%.*]] = load ptr, ptr [[B_ADDR]], align 8
+// CHECK-NEXT:    [[TMP15:%.*]] = load i32, ptr [[J3]], align 4
+// CHECK-NEXT:    [[IDXPROM:%.*]] = sext i32 [[TMP15]] to i64
+// CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP14]], i64 [[IDXPROM]]
+// CHECK-NEXT:    [[TMP16:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
+// CHECK-NEXT:    [[TMP17:%.*]] = load ptr, ptr [[A_ADDR]], align 8
+// CHECK-NEXT:    [[TMP18:%.*]] = load i32, ptr [[J3]], align 4
+// CHECK-NEXT:    [[IDXPROM6:%.*]] = sext i32 [[TMP18]] to i64
+// CHECK-NEXT:    [[ARRAYIDX7:%.*]] = getelementptr inbounds i32, ptr [[TMP17]], i64 [[IDXPROM6]]
+// CHECK-NEXT:    store i32 [[TMP16]], ptr [[ARRAYIDX7]], align 4
+// CHECK-NEXT:    br label [[OMP_BODY_CONTINUE:%.*]]
+// CHECK:       omp.body.continue:
+// CHECK-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
+// CHECK:       omp.inner.for.inc:
+// CHECK-NEXT:    [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK-NEXT:    [[ADD8:%.*]] = add nsw i32 [[TMP19]], 1
+// CHECK-NEXT:    store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4
+// CHECK-NEXT:    br label [[OMP_INNER_FOR_COND]]
+// CHECK:       omp.inner.for.end:
+// CHECK-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
+// CHECK:       omp.loop.exit:
+// CHECK-NEXT:    [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK-NEXT:    [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4
+// CHECK-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]])
+// CHECK-NEXT:    br label [[OMP_PRECOND_END]]
+// CHECK:       omp.precond.end:
+// CHECK-NEXT:    ret void
+//
+//
+// CHECK-LABEL: define internal void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z18kernel_within_loopPiS_ii_l13
+// CHECK-SAME: (i64 noundef [[N:%.*]], ptr noundef [[A:%.*]], ptr noundef [[B:%.*]]) #[[ATTR2]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[N_ADDR:%.*]] = alloca i64, align 8
+// CHECK-NEXT:    [[A_ADDR:%.*]] = alloca ptr, align 8
+// CHECK-NEXT:    [[B_ADDR:%.*]] = alloca ptr, align 8
+// CHECK-NEXT:    [[N_CASTED:%.*]] = alloca i64, align 8
+// CHECK-NEXT:    store i64 [[N]], ptr [[N_ADDR]], align 8
+// CHECK-NEXT:    store ptr [[A]], ptr [[A_ADDR]], align 8
+// CHECK-NEXT:    store ptr [[B]], ptr [[B_ADDR]], align 8
+// CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[N_ADDR]], align 4
+// CHECK-NEXT:    store i32 [[TMP0]], ptr [[N_CASTED]], align 4
+// CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr [[N_CASTED]], align 8
+// CHECK-NEXT:    [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 8
+// CHECK-NEXT:    [[TMP3:%.*]] = load ptr, ptr [[B_ADDR]], align 8
+// CHECK-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 3, ptr @.omp_outlined..1, i64 [[TMP1]], ptr [[TMP2]], ptr [[TMP3]])
+// CHECK-NEXT:    ret void
+//
+//
+// CHECK-LABEL: define internal void @.omp_outlined..1
+// CHECK-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[N:%.*]], ptr noundef [[A:%.*]], ptr noundef [[B:%.*]]) #[[ATTR2]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
+// CHECK-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
+// CHECK-NEXT:    [[N_ADDR:%.*]] = alloca i64, align 8
+// CHECK-NEXT:    [[A_ADDR:%.*]] = alloca ptr, align 8
+// CHECK-NEXT:    [[B_ADDR:%.*]] = alloca ptr, align 8
+// CHECK-NEXT:    [[DOTOMP_IV:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    [[TMP:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    [[J:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    [[J3:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    [[N_CASTED:%.*]] = alloca i64, align 8
+// CHECK-NEXT:    store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK-NEXT:    store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
+// CHECK-NEXT:    store i64 [[N]], ptr [[N_ADDR]], align 8
+// CHECK-NEXT:    store ptr [[A]], ptr [[A_ADDR]], align 8
+// CHECK-NEXT:    store ptr [[B]], ptr [[B_ADDR]], align 8
+// CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[N_ADDR]], align 4
+// CHECK-NEXT:    store i32 [[TMP0]], ptr [[DOTCAPTURE_EXPR_]], align 4
+// CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
+// CHECK-NEXT:    [[SUB:%.*]] = sub i32 [[TMP1]], -2
+// CHECK-NEXT:    [[DIV:%.*]] = udiv i32 [[SUB]], 3
+// CHECK-NEXT:    [[SUB2:%.*]] = sub i32 [[DIV]], 1
+// CHECK-NEXT:    store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK-NEXT:    store i32 0, ptr [[J]], align 4
+// CHECK-NEXT:    [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
+// CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 0, [[TMP2]]
+// CHECK-NEXT:    br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]]
+// CHECK:       omp.precond.then:
+// CHECK-NEXT:    store i32 0, ptr [[DOTOMP_COMB_LB]], align 4
+// CHECK-NEXT:    [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK-NEXT:    store i32 [[TMP3]], ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK-NEXT:    store i32 1, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK-NEXT:    store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
+// CHECK-NEXT:    [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK-NEXT:    [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4
+// CHECK-NEXT:    call void @__kmpc_for_static_init_4u(ptr @[[GLOB3:[0-9]+]], i32 [[TMP5]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
+// CHECK-NEXT:    [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK-NEXT:    [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK-NEXT:    [[CMP4:%.*]] = icmp ugt i32 [[TMP6]], [[TMP7]]
+// CHECK-NEXT:    br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
+// CHECK:       cond.true:
+// CHECK-NEXT:    [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK-NEXT:    br label [[COND_END:%.*]]
+// CHECK:       cond.false:
+// CHECK-NEXT:    [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK-NEXT:    br label [[COND_END]]
+// CHECK:       cond.end:
+// CHECK-NEXT:    [[COND:%.*]] = phi i32 [ [[TMP8]], [[COND_TRUE]] ], [ [[TMP9]], [[COND_FALSE]] ]
+// CHECK-NEXT:    store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK-NEXT:    [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
+// CHECK-NEXT:    store i32 [[TMP10]], ptr [[DOTOMP_IV]], align 4
+// CHECK-NEXT:    br label [[OMP_INNER_FOR_COND:%.*]]
+// CHECK:       omp.inner.for.cond:
+// CHECK-NEXT:    [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK-NEXT:    [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK-NEXT:    [[ADD:%.*]] = add i32 [[TMP12]], 1
+// CHECK-NEXT:    [[CMP5:%.*]] = icmp ult i32 [[TMP11]], [[ADD]]
+// CHECK-NEXT:    br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
+// CHECK:       omp.inner.for.body:
+// CHECK-NEXT:    [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
+// CHECK-NEXT:    [[TMP14:%.*]] = zext i32 [[TMP13]] to i64
+// CHECK-NEXT:    [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK-NEXT:    [[TMP16:%.*]] = zext i32 [[TMP15]] to i64
+// CHECK-NEXT:    [[TMP17:%.*]] = load i32, ptr [[N_ADDR]], align 4
+// CHECK-NEXT:    store i32 [[TMP17]], ptr [[N_CASTED]], align 4
+// CHECK-NEXT:    [[TMP18:%.*]] = load i64, ptr [[N_CASTED]], align 8
+// CHECK-NEXT:    [[TMP19:%.*]] = load ptr, ptr [[A_ADDR]], align 8
+// CHECK-NEXT:    [[TMP20:%.*]] = load ptr, ptr [[B_ADDR]], align 8
+// CHECK-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 5, ptr @.omp_outlined..2, i64 [[TMP14]], i64 [[TMP16]], i64 [[TMP18]], ptr [[TMP19]], ptr [[TMP20]])
+// CHECK-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
+// CHECK:       omp.inner.for.inc:
+// CHECK-NEXT:    [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK-NEXT:    [[TMP22:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK-NEXT:    [[ADD6:%.*]] = add i32 [[TMP21]], [[TMP22]]
+// CHECK-NEXT:    store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4
+// CHECK-NEXT:    br label [[OMP_INNER_FOR_COND]]
+// CHECK:       omp.inner.for.end:
+// CHECK-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
+// CHECK:       omp.loop.exit:
+// CHECK-NEXT:    [[TMP23:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK-NEXT:    [[TMP24:%.*]] = load i32, ptr [[TMP23]], align 4
+// CHECK-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB3]], i32 [[TMP24]])
+// CHECK-NEXT:    br label [[OMP_PRECOND_END]]
+// CHECK:       omp.precond.end:
+// CHECK-NEXT:    ret void
+//
+//
+// CHECK-LABEL: define internal void @.omp_outlined..2
+// CHECK-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64 noundef [[N:%.*]], ptr noundef [[A:%.*]], ptr noundef [[B:%.*]]) #[[ATTR2]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
+// CHECK-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
+// CHECK-NEXT:    [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8
+// CHECK-NEXT:    [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8
+// CHECK-NEXT:    [[N_ADDR:%.*]] = alloca i64, align 8
+// CHECK-NEXT:    [[A_ADDR:%.*]] = alloca ptr, align 8
+// CHECK-NEXT:    [[B_ADDR:%.*]] = alloca ptr, align 8
+// CHECK-NEXT:    [[DOTOMP_IV:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    [[TMP:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    [[J:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    [[DOTOMP_LB:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    [[DOTOMP_UB:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    [[J4:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK-NEXT:    store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
+// CHECK-NEXT:    store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8
+// CHECK-NEXT:    store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8
+// CHECK-NEXT:    store i64 [[N]], ptr [[N_ADDR]], align 8
+// CHECK-NEXT:    store ptr [[A]], ptr [[A_ADDR]], align 8
+// CHECK-NEXT:    store ptr [[B]], ptr [[B_ADDR]], align 8
+// CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[N_ADDR]], align 4
+// CHECK-NEXT:    store i32 [[TMP0]], ptr [[DOTCAPTURE_EXPR_]], align 4
+// CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
+// CHECK-NEXT:    [[SUB:%.*]] = sub i32 [[TMP1]], -2
+// CHECK-NEXT:    [[DIV:%.*]] = udiv i32 [[SUB]], 3
+// CHECK-NEXT:    [[SUB2:%.*]] = sub i32 [[DIV]], 1
+// CHECK-NEXT:    store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK-NEXT:    store i32 0, ptr [[J]], align 4
+// CHECK-NEXT:    [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
+// CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 0, [[TMP2]]
+// CHECK-NEXT:    br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]]
+// CHECK:       omp.precond.then:
+// CHECK-NEXT:    store i32 0, ptr [[DOTOMP_LB]], align 4
+// CHECK-NEXT:    [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK-NEXT:    store i32 [[TMP3]], ptr [[DOTOMP_UB]], align 4
+// CHECK-NEXT:    [[TMP4:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8
+// CHECK-NEXT:    [[CONV:%.*]] = trunc i64 [[TMP4]] to i32
+// CHECK-NEXT:    [[TMP5:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8
+// CHECK-NEXT:    [[CONV3:%.*]] = trunc i64 [[TMP5]] to i32
+// CHECK-NEXT:    store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4
+// CHECK-NEXT:    store i32 [[CONV3]], ptr [[DOTOMP_UB]], align 4
+// CHECK-NEXT:    store i32 1, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK-NEXT:    store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
+// CHECK-NEXT:    [[TMP6:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK-NEXT:    [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4
+// CHECK-NEXT:    call void @__kmpc_for_static_init_4u(ptr @[[GLOB1]], i32 [[TMP7]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
+// CHECK-NEXT:    [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
+// CHECK-NEXT:    [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK-NEXT:    [[CMP5:%.*]] = icmp ugt i32 [[TMP8]], [[TMP9]]
+// CHECK-NEXT:    br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
+// CHECK:       cond.true:
+// CHECK-NEXT:    [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK-NEXT:    br label [[COND_END:%.*]]
+// CHECK:       cond.false:
+// CHECK-NEXT:    [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
+// CHECK-NEXT:    br label [[COND_END]]
+// CHECK:       cond.end:
+// CHECK-NEXT:    [[COND:%.*]] = phi i32 [ [[TMP10]], [[COND_TRUE]] ], [ [[TMP11]], [[COND_FALSE]] ]
+// CHECK-NEXT:    store i32 [[COND]], ptr [[DOTOMP_UB]], align 4
+// CHECK-NEXT:    [[TMP12:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
+// CHECK-NEXT:    store i32 [[TMP12]], ptr [[DOTOMP_IV]], align 4
+// CHECK-NEXT:    br label [[OMP_INNER_FOR_COND:%.*]]
+// CHECK:       omp.inner.for.cond:
+// CHECK-NEXT:    [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK-NEXT:    [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
+// CHECK-NEXT:    [[ADD:%.*]] = add i32 [[TMP14]], 1
+// CHECK-NEXT:    [[CMP6:%.*]] = icmp ult i32 [[TMP13]], [[ADD]]
+// CHECK-NEXT:    br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
+// CHECK:       omp.inner.for.body:
+// CHECK-NEXT:    [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK-NEXT:    [[MUL:%.*]] = mul i32 [[TMP15]], 3
+// CHECK-NEXT:    [[ADD7:%.*]] = add i32 0, [[MUL]]
+// CHECK-NEXT:    store i32 [[ADD7]], ptr [[J4]], align 4
+// CHECK-NEXT:    [[TMP16:%.*]] = load ptr, ptr [[B_ADDR]], align 8
+// CHECK-NEXT:    [[TMP17:%.*]] = load i32, ptr [[J4]], align 4
+// CHECK-NEXT:    [[IDXPROM:%.*]] = sext i32 [[TMP17]] to i64
+// CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP16]], i64 [[IDXPROM]]
+// CHECK-NEXT:    [[TMP18:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
+// CHECK-NEXT:    [[MUL8:%.*]] = mul nsw i32 [[TMP18]], 2
+// CHECK-NEXT:    [[TMP19:%.*]] = load ptr, ptr [[A_ADDR]], align 8
+// CHECK-NEXT:    [[TMP20:%.*]] = load i32, ptr [[J4]], align 4
+// CHECK-NEXT:    [[IDXPROM9:%.*]] = sext i32 [[TMP20]] to i64
+// CHECK-NEXT:    [[ARRAYIDX10:%.*]] = getelementptr inbounds i32, ptr [[TMP19]], i64 [[IDXPROM9]]
+// CHECK-NEXT:    store i32 [[MUL8]], ptr [[ARRAYIDX10]], align 4
+// CHECK-NEXT:    br label [[OMP_BODY_CONTINUE:%.*]]
+// CHECK:       omp.body.continue:
+// CHECK-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
+// CHECK:       omp.inner.for.inc:
+// CHECK-NEXT:    [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK-NEXT:    [[ADD11:%.*]] = add i32 [[TMP21]], 1
+// CHECK-NEXT:    store i32 [[ADD11]], ptr [[DOTOMP_IV]], align 4
+// CHECK-NEXT:    br label [[OMP_INNER_FOR_COND]]
+// CHECK:       omp.inner.for.end:
+// CHECK-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
+// CHECK:       omp.loop.exit:
+// CHECK-NEXT:    [[TMP22:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK-NEXT:    [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4
+// CHECK-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB3]], i32 [[TMP23]])
+// CHECK-NEXT:    br label [[OMP_PRECOND_END]]
+// CHECK:       omp.precond.end:
+// CHECK-NEXT:    ret void
+//
+//
+// CHECK-LABEL: define internal void @.omp_offloading.requires_reg
+// CHECK-SAME: () #[[ATTR5:[0-9]+]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    call void @__tgt_register_requires(i64 1)
+// CHECK-NEXT:    ret void
+//

diff  --git a/clang/test/OpenMP/distribute_codegen.cpp b/clang/test/OpenMP/distribute_codegen.cpp
index a93d3e1385761..f07ef5336d82b 100644
--- a/clang/test/OpenMP/distribute_codegen.cpp
+++ b/clang/test/OpenMP/distribute_codegen.cpp
@@ -127,6 +127,7 @@ int fint(void) { return ftemplate<int>(); }
 // CHECK1-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [4 x ptr], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [4 x ptr], align 8
 // CHECK1-NEXT:    [[TMP:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK1-NEXT:    store ptr [[A]], ptr [[A_ADDR]], align 8
 // CHECK1-NEXT:    store ptr [[B]], ptr [[B_ADDR]], align 8
 // CHECK1-NEXT:    store ptr [[C]], ptr [[C_ADDR]], align 8
@@ -161,7 +162,6 @@ int fint(void) { return ftemplate<int>(); }
 // CHECK1-NEXT:    store ptr null, ptr [[TMP15]], align 8
 // CHECK1-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK1-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK1-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK1-NEXT:    [[TMP18:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK1-NEXT:    store i32 2, ptr [[TMP18]], align 4
 // CHECK1-NEXT:    [[TMP19:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -318,6 +318,7 @@ int fint(void) { return ftemplate<int>(); }
 // CHECK1-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [4 x ptr], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [4 x ptr], align 8
 // CHECK1-NEXT:    [[TMP:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK1-NEXT:    store ptr [[A]], ptr [[A_ADDR]], align 8
 // CHECK1-NEXT:    store ptr [[B]], ptr [[B_ADDR]], align 8
 // CHECK1-NEXT:    store ptr [[C]], ptr [[C_ADDR]], align 8
@@ -352,7 +353,6 @@ int fint(void) { return ftemplate<int>(); }
 // CHECK1-NEXT:    store ptr null, ptr [[TMP15]], align 8
 // CHECK1-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK1-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK1-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK1-NEXT:    [[TMP18:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK1-NEXT:    store i32 2, ptr [[TMP18]], align 4
 // CHECK1-NEXT:    [[TMP19:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -509,6 +509,7 @@ int fint(void) { return ftemplate<int>(); }
 // CHECK1-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [4 x ptr], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [4 x ptr], align 8
 // CHECK1-NEXT:    [[TMP:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK1-NEXT:    store ptr [[A]], ptr [[A_ADDR]], align 8
 // CHECK1-NEXT:    store ptr [[B]], ptr [[B_ADDR]], align 8
 // CHECK1-NEXT:    store ptr [[C]], ptr [[C_ADDR]], align 8
@@ -543,7 +544,6 @@ int fint(void) { return ftemplate<int>(); }
 // CHECK1-NEXT:    store ptr null, ptr [[TMP15]], align 8
 // CHECK1-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK1-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK1-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK1-NEXT:    [[TMP18:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK1-NEXT:    store i32 2, ptr [[TMP18]], align 4
 // CHECK1-NEXT:    [[TMP19:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -717,6 +717,7 @@ int fint(void) { return ftemplate<int>(); }
 // CHECK1-NEXT:    [[TMP:%.*]] = alloca i8, align 1
 // CHECK1-NEXT:    [[DOTCAPTURE_EXPR_:%.*]] = alloca i8, align 1
 // CHECK1-NEXT:    [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK1-NEXT:    store i8 0, ptr [[A]], align 1
 // CHECK1-NEXT:    [[TMP0:%.*]] = load i8, ptr [[A]], align 1
 // CHECK1-NEXT:    store i8 [[TMP0]], ptr [[A_CASTED]], align 1
@@ -742,7 +743,6 @@ int fint(void) { return ftemplate<int>(); }
 // CHECK1-NEXT:    [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
 // CHECK1-NEXT:    [[ADD4:%.*]] = add nsw i32 [[TMP9]], 1
 // CHECK1-NEXT:    [[TMP10:%.*]] = zext i32 [[ADD4]] to i64
-// CHECK1-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK1-NEXT:    [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK1-NEXT:    store i32 2, ptr [[TMP11]], align 4
 // CHECK1-NEXT:    [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -897,6 +897,7 @@ int fint(void) { return ftemplate<int>(); }
 // CHECK1-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [1 x ptr], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [1 x ptr], align 8
 // CHECK1-NEXT:    [[TMP:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK1-NEXT:    store i16 0, ptr [[AA]], align 2
 // CHECK1-NEXT:    [[TMP0:%.*]] = load i16, ptr [[AA]], align 2
 // CHECK1-NEXT:    store i16 [[TMP0]], ptr [[AA_CASTED]], align 2
@@ -909,7 +910,6 @@ int fint(void) { return ftemplate<int>(); }
 // CHECK1-NEXT:    store ptr null, ptr [[TMP4]], align 8
 // CHECK1-NEXT:    [[TMP5:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK1-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK1-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK1-NEXT:    [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK1-NEXT:    store i32 2, ptr [[TMP7]], align 4
 // CHECK1-NEXT:    [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -1055,6 +1055,7 @@ int fint(void) { return ftemplate<int>(); }
 // CHECK3-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [4 x ptr], align 4
 // CHECK3-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [4 x ptr], align 4
 // CHECK3-NEXT:    [[TMP:%.*]] = alloca i32, align 4
+// CHECK3-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK3-NEXT:    store ptr [[A]], ptr [[A_ADDR]], align 4
 // CHECK3-NEXT:    store ptr [[B]], ptr [[B_ADDR]], align 4
 // CHECK3-NEXT:    store ptr [[C]], ptr [[C_ADDR]], align 4
@@ -1089,7 +1090,6 @@ int fint(void) { return ftemplate<int>(); }
 // CHECK3-NEXT:    store ptr null, ptr [[TMP15]], align 4
 // CHECK3-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK3-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK3-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK3-NEXT:    [[TMP18:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK3-NEXT:    store i32 2, ptr [[TMP18]], align 4
 // CHECK3-NEXT:    [[TMP19:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -1242,6 +1242,7 @@ int fint(void) { return ftemplate<int>(); }
 // CHECK3-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [4 x ptr], align 4
 // CHECK3-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [4 x ptr], align 4
 // CHECK3-NEXT:    [[TMP:%.*]] = alloca i32, align 4
+// CHECK3-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK3-NEXT:    store ptr [[A]], ptr [[A_ADDR]], align 4
 // CHECK3-NEXT:    store ptr [[B]], ptr [[B_ADDR]], align 4
 // CHECK3-NEXT:    store ptr [[C]], ptr [[C_ADDR]], align 4
@@ -1276,7 +1277,6 @@ int fint(void) { return ftemplate<int>(); }
 // CHECK3-NEXT:    store ptr null, ptr [[TMP15]], align 4
 // CHECK3-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK3-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK3-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK3-NEXT:    [[TMP18:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK3-NEXT:    store i32 2, ptr [[TMP18]], align 4
 // CHECK3-NEXT:    [[TMP19:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -1429,6 +1429,7 @@ int fint(void) { return ftemplate<int>(); }
 // CHECK3-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [4 x ptr], align 4
 // CHECK3-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [4 x ptr], align 4
 // CHECK3-NEXT:    [[TMP:%.*]] = alloca i32, align 4
+// CHECK3-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK3-NEXT:    store ptr [[A]], ptr [[A_ADDR]], align 4
 // CHECK3-NEXT:    store ptr [[B]], ptr [[B_ADDR]], align 4
 // CHECK3-NEXT:    store ptr [[C]], ptr [[C_ADDR]], align 4
@@ -1463,7 +1464,6 @@ int fint(void) { return ftemplate<int>(); }
 // CHECK3-NEXT:    store ptr null, ptr [[TMP15]], align 4
 // CHECK3-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK3-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK3-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK3-NEXT:    [[TMP18:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK3-NEXT:    store i32 2, ptr [[TMP18]], align 4
 // CHECK3-NEXT:    [[TMP19:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -1633,6 +1633,7 @@ int fint(void) { return ftemplate<int>(); }
 // CHECK3-NEXT:    [[TMP:%.*]] = alloca i8, align 1
 // CHECK3-NEXT:    [[DOTCAPTURE_EXPR_:%.*]] = alloca i8, align 1
 // CHECK3-NEXT:    [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4
+// CHECK3-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK3-NEXT:    store i8 0, ptr [[A]], align 1
 // CHECK3-NEXT:    [[TMP0:%.*]] = load i8, ptr [[A]], align 1
 // CHECK3-NEXT:    store i8 [[TMP0]], ptr [[A_CASTED]], align 1
@@ -1658,7 +1659,6 @@ int fint(void) { return ftemplate<int>(); }
 // CHECK3-NEXT:    [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
 // CHECK3-NEXT:    [[ADD4:%.*]] = add nsw i32 [[TMP9]], 1
 // CHECK3-NEXT:    [[TMP10:%.*]] = zext i32 [[ADD4]] to i64
-// CHECK3-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK3-NEXT:    [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK3-NEXT:    store i32 2, ptr [[TMP11]], align 4
 // CHECK3-NEXT:    [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -1813,6 +1813,7 @@ int fint(void) { return ftemplate<int>(); }
 // CHECK3-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [1 x ptr], align 4
 // CHECK3-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [1 x ptr], align 4
 // CHECK3-NEXT:    [[TMP:%.*]] = alloca i32, align 4
+// CHECK3-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK3-NEXT:    store i16 0, ptr [[AA]], align 2
 // CHECK3-NEXT:    [[TMP0:%.*]] = load i16, ptr [[AA]], align 2
 // CHECK3-NEXT:    store i16 [[TMP0]], ptr [[AA_CASTED]], align 2
@@ -1825,7 +1826,6 @@ int fint(void) { return ftemplate<int>(); }
 // CHECK3-NEXT:    store ptr null, ptr [[TMP4]], align 4
 // CHECK3-NEXT:    [[TMP5:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK3-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK3-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK3-NEXT:    [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK3-NEXT:    store i32 2, ptr [[TMP7]], align 4
 // CHECK3-NEXT:    [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1

diff  --git a/clang/test/OpenMP/distribute_firstprivate_codegen.cpp b/clang/test/OpenMP/distribute_firstprivate_codegen.cpp
index c385bc1209357..a1c1fc02a04a0 100644
--- a/clang/test/OpenMP/distribute_firstprivate_codegen.cpp
+++ b/clang/test/OpenMP/distribute_firstprivate_codegen.cpp
@@ -501,6 +501,7 @@ int main() {
 // CHECK9-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [5 x ptr], align 8
 // CHECK9-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [5 x ptr], align 8
 // CHECK9-NEXT:    [[_TMP1:%.*]] = alloca i32, align 4
+// CHECK9-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK9-NEXT:    store i32 0, ptr [[RETVAL]], align 4
 // CHECK9-NEXT:    store ptr [[G]], ptr [[G1]], align 8
 // CHECK9-NEXT:    call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[TEST]])
@@ -554,7 +555,6 @@ int main() {
 // CHECK9-NEXT:    store ptr null, ptr [[TMP22]], align 8
 // CHECK9-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK9-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK9-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK9-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK9-NEXT:    store i32 2, ptr [[TMP25]], align 4
 // CHECK9-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -808,6 +808,7 @@ int main() {
 // CHECK9-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [4 x ptr], align 8
 // CHECK9-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [4 x ptr], align 8
 // CHECK9-NEXT:    [[_TMP1:%.*]] = alloca i32, align 4
+// CHECK9-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK9-NEXT:    call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[TEST]])
 // CHECK9-NEXT:    store i32 0, ptr [[T_VAR]], align 4
 // CHECK9-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VEC]], ptr align 4 @__const._Z5tmainIiET_v.vec, i64 8, i1 false)
@@ -850,7 +851,6 @@ int main() {
 // CHECK9-NEXT:    store ptr null, ptr [[TMP17]], align 8
 // CHECK9-NEXT:    [[TMP18:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK9-NEXT:    [[TMP19:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK9-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK9-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK9-NEXT:    store i32 2, ptr [[TMP20]], align 4
 // CHECK9-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -1173,6 +1173,7 @@ int main() {
 // CHECK11-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [5 x ptr], align 4
 // CHECK11-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [5 x ptr], align 4
 // CHECK11-NEXT:    [[_TMP1:%.*]] = alloca i32, align 4
+// CHECK11-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK11-NEXT:    store i32 0, ptr [[RETVAL]], align 4
 // CHECK11-NEXT:    store ptr [[G]], ptr [[G1]], align 4
 // CHECK11-NEXT:    call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[TEST]])
@@ -1226,7 +1227,6 @@ int main() {
 // CHECK11-NEXT:    store ptr null, ptr [[TMP22]], align 4
 // CHECK11-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK11-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK11-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK11-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK11-NEXT:    store i32 2, ptr [[TMP25]], align 4
 // CHECK11-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -1478,6 +1478,7 @@ int main() {
 // CHECK11-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [4 x ptr], align 4
 // CHECK11-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [4 x ptr], align 4
 // CHECK11-NEXT:    [[_TMP1:%.*]] = alloca i32, align 4
+// CHECK11-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK11-NEXT:    call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[TEST]])
 // CHECK11-NEXT:    store i32 0, ptr [[T_VAR]], align 4
 // CHECK11-NEXT:    call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[VEC]], ptr align 4 @__const._Z5tmainIiET_v.vec, i32 8, i1 false)
@@ -1520,7 +1521,6 @@ int main() {
 // CHECK11-NEXT:    store ptr null, ptr [[TMP17]], align 4
 // CHECK11-NEXT:    [[TMP18:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK11-NEXT:    [[TMP19:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK11-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK11-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK11-NEXT:    store i32 2, ptr [[TMP20]], align 4
 // CHECK11-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1

diff  --git a/clang/test/OpenMP/distribute_lastprivate_codegen.cpp b/clang/test/OpenMP/distribute_lastprivate_codegen.cpp
index 8230ab672185b..417fff2d06fc8 100644
--- a/clang/test/OpenMP/distribute_lastprivate_codegen.cpp
+++ b/clang/test/OpenMP/distribute_lastprivate_codegen.cpp
@@ -485,6 +485,7 @@ int main() {
 // CHECK9-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [5 x ptr], align 8
 // CHECK9-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [5 x ptr], align 8
 // CHECK9-NEXT:    [[_TMP1:%.*]] = alloca i32, align 4
+// CHECK9-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK9-NEXT:    [[I:%.*]] = alloca i32, align 4
 // CHECK9-NEXT:    store i32 0, ptr [[RETVAL]], align 4
 // CHECK9-NEXT:    store ptr [[G]], ptr [[G1]], align 8
@@ -539,7 +540,6 @@ int main() {
 // CHECK9-NEXT:    store ptr null, ptr [[TMP22]], align 8
 // CHECK9-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK9-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK9-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK9-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK9-NEXT:    store i32 2, ptr [[TMP25]], align 4
 // CHECK9-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -811,6 +811,7 @@ int main() {
 // CHECK9-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [4 x ptr], align 8
 // CHECK9-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [4 x ptr], align 8
 // CHECK9-NEXT:    [[_TMP1:%.*]] = alloca i32, align 4
+// CHECK9-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK9-NEXT:    call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[TEST]])
 // CHECK9-NEXT:    store i32 0, ptr [[T_VAR]], align 4
 // CHECK9-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VEC]], ptr align 4 @__const._Z5tmainIiET_v.vec, i64 8, i1 false)
@@ -853,7 +854,6 @@ int main() {
 // CHECK9-NEXT:    store ptr null, ptr [[TMP17]], align 8
 // CHECK9-NEXT:    [[TMP18:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK9-NEXT:    [[TMP19:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK9-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK9-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK9-NEXT:    store i32 2, ptr [[TMP20]], align 4
 // CHECK9-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -1194,6 +1194,7 @@ int main() {
 // CHECK11-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [5 x ptr], align 4
 // CHECK11-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [5 x ptr], align 4
 // CHECK11-NEXT:    [[_TMP1:%.*]] = alloca i32, align 4
+// CHECK11-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK11-NEXT:    [[I:%.*]] = alloca i32, align 4
 // CHECK11-NEXT:    store i32 0, ptr [[RETVAL]], align 4
 // CHECK11-NEXT:    store ptr [[G]], ptr [[G1]], align 4
@@ -1248,7 +1249,6 @@ int main() {
 // CHECK11-NEXT:    store ptr null, ptr [[TMP22]], align 4
 // CHECK11-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK11-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK11-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK11-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK11-NEXT:    store i32 2, ptr [[TMP25]], align 4
 // CHECK11-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -1518,6 +1518,7 @@ int main() {
 // CHECK11-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [4 x ptr], align 4
 // CHECK11-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [4 x ptr], align 4
 // CHECK11-NEXT:    [[_TMP1:%.*]] = alloca i32, align 4
+// CHECK11-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK11-NEXT:    call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[TEST]])
 // CHECK11-NEXT:    store i32 0, ptr [[T_VAR]], align 4
 // CHECK11-NEXT:    call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[VEC]], ptr align 4 @__const._Z5tmainIiET_v.vec, i32 8, i1 false)
@@ -1560,7 +1561,6 @@ int main() {
 // CHECK11-NEXT:    store ptr null, ptr [[TMP17]], align 4
 // CHECK11-NEXT:    [[TMP18:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK11-NEXT:    [[TMP19:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK11-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK11-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK11-NEXT:    store i32 2, ptr [[TMP20]], align 4
 // CHECK11-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1

diff  --git a/clang/test/OpenMP/distribute_parallel_for_codegen.cpp b/clang/test/OpenMP/distribute_parallel_for_codegen.cpp
index f333e83bf627c..c7f80248e5ed2 100644
--- a/clang/test/OpenMP/distribute_parallel_for_codegen.cpp
+++ b/clang/test/OpenMP/distribute_parallel_for_codegen.cpp
@@ -4288,6 +4288,7 @@ int main() {
 // CHECK9-NEXT:    [[TMP:%.*]] = alloca i32, align 4
 // CHECK9-NEXT:    [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
 // CHECK9-NEXT:    [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4
+// CHECK9-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK9-NEXT:    [[N_CASTED3:%.*]] = alloca i64, align 8
 // CHECK9-NEXT:    [[DOTOFFLOAD_BASEPTRS4:%.*]] = alloca [4 x ptr], align 8
 // CHECK9-NEXT:    [[DOTOFFLOAD_PTRS5:%.*]] = alloca [4 x ptr], align 8
@@ -4295,6 +4296,7 @@ int main() {
 // CHECK9-NEXT:    [[_TMP7:%.*]] = alloca i32, align 4
 // CHECK9-NEXT:    [[DOTCAPTURE_EXPR_8:%.*]] = alloca i32, align 4
 // CHECK9-NEXT:    [[DOTCAPTURE_EXPR_9:%.*]] = alloca i32, align 4
+// CHECK9-NEXT:    [[KERNEL_ARGS14:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK9-NEXT:    [[CH_CASTED:%.*]] = alloca i64, align 8
 // CHECK9-NEXT:    [[N_CASTED17:%.*]] = alloca i64, align 8
 // CHECK9-NEXT:    [[DOTOFFLOAD_BASEPTRS18:%.*]] = alloca [5 x ptr], align 8
@@ -4303,6 +4305,7 @@ int main() {
 // CHECK9-NEXT:    [[_TMP21:%.*]] = alloca i32, align 4
 // CHECK9-NEXT:    [[DOTCAPTURE_EXPR_22:%.*]] = alloca i32, align 4
 // CHECK9-NEXT:    [[DOTCAPTURE_EXPR_23:%.*]] = alloca i32, align 4
+// CHECK9-NEXT:    [[KERNEL_ARGS28:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK9-NEXT:    [[N_CASTED31:%.*]] = alloca i64, align 8
 // CHECK9-NEXT:    [[DOTOFFLOAD_BASEPTRS32:%.*]] = alloca [4 x ptr], align 8
 // CHECK9-NEXT:    [[DOTOFFLOAD_PTRS33:%.*]] = alloca [4 x ptr], align 8
@@ -4310,6 +4313,7 @@ int main() {
 // CHECK9-NEXT:    [[_TMP35:%.*]] = alloca i32, align 4
 // CHECK9-NEXT:    [[DOTCAPTURE_EXPR_36:%.*]] = alloca i32, align 4
 // CHECK9-NEXT:    [[DOTCAPTURE_EXPR_37:%.*]] = alloca i32, align 4
+// CHECK9-NEXT:    [[KERNEL_ARGS42:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK9-NEXT:    [[CH_CASTED45:%.*]] = alloca i64, align 8
 // CHECK9-NEXT:    [[N_CASTED46:%.*]] = alloca i64, align 8
 // CHECK9-NEXT:    [[DOTOFFLOAD_BASEPTRS47:%.*]] = alloca [5 x ptr], align 8
@@ -4318,6 +4322,7 @@ int main() {
 // CHECK9-NEXT:    [[_TMP50:%.*]] = alloca i32, align 4
 // CHECK9-NEXT:    [[DOTCAPTURE_EXPR_51:%.*]] = alloca i32, align 4
 // CHECK9-NEXT:    [[DOTCAPTURE_EXPR_52:%.*]] = alloca i32, align 4
+// CHECK9-NEXT:    [[KERNEL_ARGS57:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK9-NEXT:    [[N_CASTED60:%.*]] = alloca i64, align 8
 // CHECK9-NEXT:    [[DOTOFFLOAD_BASEPTRS61:%.*]] = alloca [4 x ptr], align 8
 // CHECK9-NEXT:    [[DOTOFFLOAD_PTRS62:%.*]] = alloca [4 x ptr], align 8
@@ -4325,6 +4330,7 @@ int main() {
 // CHECK9-NEXT:    [[_TMP64:%.*]] = alloca i32, align 4
 // CHECK9-NEXT:    [[DOTCAPTURE_EXPR_65:%.*]] = alloca i32, align 4
 // CHECK9-NEXT:    [[DOTCAPTURE_EXPR_66:%.*]] = alloca i32, align 4
+// CHECK9-NEXT:    [[KERNEL_ARGS71:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK9-NEXT:    [[CH_CASTED74:%.*]] = alloca i64, align 8
 // CHECK9-NEXT:    [[N_CASTED75:%.*]] = alloca i64, align 8
 // CHECK9-NEXT:    [[DOTOFFLOAD_BASEPTRS76:%.*]] = alloca [5 x ptr], align 8
@@ -4333,6 +4339,7 @@ int main() {
 // CHECK9-NEXT:    [[_TMP79:%.*]] = alloca i32, align 4
 // CHECK9-NEXT:    [[DOTCAPTURE_EXPR_80:%.*]] = alloca i32, align 4
 // CHECK9-NEXT:    [[DOTCAPTURE_EXPR_81:%.*]] = alloca i32, align 4
+// CHECK9-NEXT:    [[KERNEL_ARGS86:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK9-NEXT:    store i32 0, ptr [[RETVAL]], align 4
 // CHECK9-NEXT:    store i32 10000, ptr [[N]], align 4
 // CHECK9-NEXT:    store i32 100, ptr [[CH]], align 4
@@ -4378,7 +4385,6 @@ int main() {
 // CHECK9-NEXT:    [[TMP21:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
 // CHECK9-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP21]], 1
 // CHECK9-NEXT:    [[TMP22:%.*]] = zext i32 [[ADD]] to i64
-// CHECK9-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK9-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK9-NEXT:    store i32 2, ptr [[TMP23]], align 4
 // CHECK9-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -4454,7 +4460,6 @@ int main() {
 // CHECK9-NEXT:    [[TMP59:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_9]], align 4
 // CHECK9-NEXT:    [[ADD13:%.*]] = add nsw i32 [[TMP59]], 1
 // CHECK9-NEXT:    [[TMP60:%.*]] = zext i32 [[ADD13]] to i64
-// CHECK9-NEXT:    [[KERNEL_ARGS14:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK9-NEXT:    [[TMP61:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS14]], i32 0, i32 0
 // CHECK9-NEXT:    store i32 2, ptr [[TMP61]], align 4
 // CHECK9-NEXT:    [[TMP62:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS14]], i32 0, i32 1
@@ -4539,7 +4544,6 @@ int main() {
 // CHECK9-NEXT:    [[TMP102:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_23]], align 4
 // CHECK9-NEXT:    [[ADD27:%.*]] = add nsw i32 [[TMP102]], 1
 // CHECK9-NEXT:    [[TMP103:%.*]] = zext i32 [[ADD27]] to i64
-// CHECK9-NEXT:    [[KERNEL_ARGS28:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK9-NEXT:    [[TMP104:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS28]], i32 0, i32 0
 // CHECK9-NEXT:    store i32 2, ptr [[TMP104]], align 4
 // CHECK9-NEXT:    [[TMP105:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS28]], i32 0, i32 1
@@ -4615,7 +4619,6 @@ int main() {
 // CHECK9-NEXT:    [[TMP140:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_37]], align 4
 // CHECK9-NEXT:    [[ADD41:%.*]] = add nsw i32 [[TMP140]], 1
 // CHECK9-NEXT:    [[TMP141:%.*]] = zext i32 [[ADD41]] to i64
-// CHECK9-NEXT:    [[KERNEL_ARGS42:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK9-NEXT:    [[TMP142:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS42]], i32 0, i32 0
 // CHECK9-NEXT:    store i32 2, ptr [[TMP142]], align 4
 // CHECK9-NEXT:    [[TMP143:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS42]], i32 0, i32 1
@@ -4700,7 +4703,6 @@ int main() {
 // CHECK9-NEXT:    [[TMP183:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_52]], align 4
 // CHECK9-NEXT:    [[ADD56:%.*]] = add nsw i32 [[TMP183]], 1
 // CHECK9-NEXT:    [[TMP184:%.*]] = zext i32 [[ADD56]] to i64
-// CHECK9-NEXT:    [[KERNEL_ARGS57:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK9-NEXT:    [[TMP185:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS57]], i32 0, i32 0
 // CHECK9-NEXT:    store i32 2, ptr [[TMP185]], align 4
 // CHECK9-NEXT:    [[TMP186:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS57]], i32 0, i32 1
@@ -4776,7 +4778,6 @@ int main() {
 // CHECK9-NEXT:    [[TMP221:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_66]], align 4
 // CHECK9-NEXT:    [[ADD70:%.*]] = add nsw i32 [[TMP221]], 1
 // CHECK9-NEXT:    [[TMP222:%.*]] = zext i32 [[ADD70]] to i64
-// CHECK9-NEXT:    [[KERNEL_ARGS71:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK9-NEXT:    [[TMP223:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS71]], i32 0, i32 0
 // CHECK9-NEXT:    store i32 2, ptr [[TMP223]], align 4
 // CHECK9-NEXT:    [[TMP224:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS71]], i32 0, i32 1
@@ -4861,7 +4862,6 @@ int main() {
 // CHECK9-NEXT:    [[TMP264:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_81]], align 4
 // CHECK9-NEXT:    [[ADD85:%.*]] = add nsw i32 [[TMP264]], 1
 // CHECK9-NEXT:    [[TMP265:%.*]] = zext i32 [[ADD85]] to i64
-// CHECK9-NEXT:    [[KERNEL_ARGS86:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK9-NEXT:    [[TMP266:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS86]], i32 0, i32 0
 // CHECK9-NEXT:    store i32 2, ptr [[TMP266]], align 4
 // CHECK9-NEXT:    [[TMP267:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS86]], i32 0, i32 1
@@ -6588,6 +6588,7 @@ int main() {
 // CHECK9-NEXT:    [[TMP:%.*]] = alloca i32, align 4
 // CHECK9-NEXT:    [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
 // CHECK9-NEXT:    [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4
+// CHECK9-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK9-NEXT:    [[N_CASTED3:%.*]] = alloca i64, align 8
 // CHECK9-NEXT:    [[DOTOFFLOAD_BASEPTRS4:%.*]] = alloca [4 x ptr], align 8
 // CHECK9-NEXT:    [[DOTOFFLOAD_PTRS5:%.*]] = alloca [4 x ptr], align 8
@@ -6595,6 +6596,7 @@ int main() {
 // CHECK9-NEXT:    [[_TMP7:%.*]] = alloca i32, align 4
 // CHECK9-NEXT:    [[DOTCAPTURE_EXPR_8:%.*]] = alloca i32, align 4
 // CHECK9-NEXT:    [[DOTCAPTURE_EXPR_9:%.*]] = alloca i32, align 4
+// CHECK9-NEXT:    [[KERNEL_ARGS14:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK9-NEXT:    [[CH_CASTED:%.*]] = alloca i64, align 8
 // CHECK9-NEXT:    [[N_CASTED17:%.*]] = alloca i64, align 8
 // CHECK9-NEXT:    [[DOTOFFLOAD_BASEPTRS18:%.*]] = alloca [5 x ptr], align 8
@@ -6603,6 +6605,7 @@ int main() {
 // CHECK9-NEXT:    [[_TMP21:%.*]] = alloca i32, align 4
 // CHECK9-NEXT:    [[DOTCAPTURE_EXPR_22:%.*]] = alloca i32, align 4
 // CHECK9-NEXT:    [[DOTCAPTURE_EXPR_23:%.*]] = alloca i32, align 4
+// CHECK9-NEXT:    [[KERNEL_ARGS28:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK9-NEXT:    [[N_CASTED31:%.*]] = alloca i64, align 8
 // CHECK9-NEXT:    [[DOTOFFLOAD_BASEPTRS32:%.*]] = alloca [4 x ptr], align 8
 // CHECK9-NEXT:    [[DOTOFFLOAD_PTRS33:%.*]] = alloca [4 x ptr], align 8
@@ -6610,6 +6613,7 @@ int main() {
 // CHECK9-NEXT:    [[_TMP35:%.*]] = alloca i32, align 4
 // CHECK9-NEXT:    [[DOTCAPTURE_EXPR_36:%.*]] = alloca i32, align 4
 // CHECK9-NEXT:    [[DOTCAPTURE_EXPR_37:%.*]] = alloca i32, align 4
+// CHECK9-NEXT:    [[KERNEL_ARGS42:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK9-NEXT:    [[CH_CASTED45:%.*]] = alloca i64, align 8
 // CHECK9-NEXT:    [[N_CASTED46:%.*]] = alloca i64, align 8
 // CHECK9-NEXT:    [[DOTOFFLOAD_BASEPTRS47:%.*]] = alloca [5 x ptr], align 8
@@ -6618,6 +6622,7 @@ int main() {
 // CHECK9-NEXT:    [[_TMP50:%.*]] = alloca i32, align 4
 // CHECK9-NEXT:    [[DOTCAPTURE_EXPR_51:%.*]] = alloca i32, align 4
 // CHECK9-NEXT:    [[DOTCAPTURE_EXPR_52:%.*]] = alloca i32, align 4
+// CHECK9-NEXT:    [[KERNEL_ARGS57:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK9-NEXT:    [[N_CASTED60:%.*]] = alloca i64, align 8
 // CHECK9-NEXT:    [[DOTOFFLOAD_BASEPTRS61:%.*]] = alloca [4 x ptr], align 8
 // CHECK9-NEXT:    [[DOTOFFLOAD_PTRS62:%.*]] = alloca [4 x ptr], align 8
@@ -6625,6 +6630,7 @@ int main() {
 // CHECK9-NEXT:    [[_TMP64:%.*]] = alloca i32, align 4
 // CHECK9-NEXT:    [[DOTCAPTURE_EXPR_65:%.*]] = alloca i32, align 4
 // CHECK9-NEXT:    [[DOTCAPTURE_EXPR_66:%.*]] = alloca i32, align 4
+// CHECK9-NEXT:    [[KERNEL_ARGS71:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK9-NEXT:    [[CH_CASTED74:%.*]] = alloca i64, align 8
 // CHECK9-NEXT:    [[N_CASTED75:%.*]] = alloca i64, align 8
 // CHECK9-NEXT:    [[DOTOFFLOAD_BASEPTRS76:%.*]] = alloca [5 x ptr], align 8
@@ -6633,6 +6639,7 @@ int main() {
 // CHECK9-NEXT:    [[_TMP79:%.*]] = alloca i32, align 4
 // CHECK9-NEXT:    [[DOTCAPTURE_EXPR_80:%.*]] = alloca i32, align 4
 // CHECK9-NEXT:    [[DOTCAPTURE_EXPR_81:%.*]] = alloca i32, align 4
+// CHECK9-NEXT:    [[KERNEL_ARGS86:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK9-NEXT:    store i32 10000, ptr [[N]], align 4
 // CHECK9-NEXT:    store i32 100, ptr [[CH]], align 4
 // CHECK9-NEXT:    [[TMP0:%.*]] = load i32, ptr [[N]], align 4
@@ -6677,7 +6684,6 @@ int main() {
 // CHECK9-NEXT:    [[TMP21:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
 // CHECK9-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP21]], 1
 // CHECK9-NEXT:    [[TMP22:%.*]] = zext i32 [[ADD]] to i64
-// CHECK9-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK9-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK9-NEXT:    store i32 2, ptr [[TMP23]], align 4
 // CHECK9-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -6753,7 +6759,6 @@ int main() {
 // CHECK9-NEXT:    [[TMP59:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_9]], align 4
 // CHECK9-NEXT:    [[ADD13:%.*]] = add nsw i32 [[TMP59]], 1
 // CHECK9-NEXT:    [[TMP60:%.*]] = zext i32 [[ADD13]] to i64
-// CHECK9-NEXT:    [[KERNEL_ARGS14:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK9-NEXT:    [[TMP61:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS14]], i32 0, i32 0
 // CHECK9-NEXT:    store i32 2, ptr [[TMP61]], align 4
 // CHECK9-NEXT:    [[TMP62:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS14]], i32 0, i32 1
@@ -6838,7 +6843,6 @@ int main() {
 // CHECK9-NEXT:    [[TMP102:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_23]], align 4
 // CHECK9-NEXT:    [[ADD27:%.*]] = add nsw i32 [[TMP102]], 1
 // CHECK9-NEXT:    [[TMP103:%.*]] = zext i32 [[ADD27]] to i64
-// CHECK9-NEXT:    [[KERNEL_ARGS28:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK9-NEXT:    [[TMP104:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS28]], i32 0, i32 0
 // CHECK9-NEXT:    store i32 2, ptr [[TMP104]], align 4
 // CHECK9-NEXT:    [[TMP105:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS28]], i32 0, i32 1
@@ -6914,7 +6918,6 @@ int main() {
 // CHECK9-NEXT:    [[TMP140:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_37]], align 4
 // CHECK9-NEXT:    [[ADD41:%.*]] = add nsw i32 [[TMP140]], 1
 // CHECK9-NEXT:    [[TMP141:%.*]] = zext i32 [[ADD41]] to i64
-// CHECK9-NEXT:    [[KERNEL_ARGS42:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK9-NEXT:    [[TMP142:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS42]], i32 0, i32 0
 // CHECK9-NEXT:    store i32 2, ptr [[TMP142]], align 4
 // CHECK9-NEXT:    [[TMP143:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS42]], i32 0, i32 1
@@ -6999,7 +7002,6 @@ int main() {
 // CHECK9-NEXT:    [[TMP183:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_52]], align 4
 // CHECK9-NEXT:    [[ADD56:%.*]] = add nsw i32 [[TMP183]], 1
 // CHECK9-NEXT:    [[TMP184:%.*]] = zext i32 [[ADD56]] to i64
-// CHECK9-NEXT:    [[KERNEL_ARGS57:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK9-NEXT:    [[TMP185:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS57]], i32 0, i32 0
 // CHECK9-NEXT:    store i32 2, ptr [[TMP185]], align 4
 // CHECK9-NEXT:    [[TMP186:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS57]], i32 0, i32 1
@@ -7075,7 +7077,6 @@ int main() {
 // CHECK9-NEXT:    [[TMP221:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_66]], align 4
 // CHECK9-NEXT:    [[ADD70:%.*]] = add nsw i32 [[TMP221]], 1
 // CHECK9-NEXT:    [[TMP222:%.*]] = zext i32 [[ADD70]] to i64
-// CHECK9-NEXT:    [[KERNEL_ARGS71:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK9-NEXT:    [[TMP223:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS71]], i32 0, i32 0
 // CHECK9-NEXT:    store i32 2, ptr [[TMP223]], align 4
 // CHECK9-NEXT:    [[TMP224:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS71]], i32 0, i32 1
@@ -7160,7 +7161,6 @@ int main() {
 // CHECK9-NEXT:    [[TMP264:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_81]], align 4
 // CHECK9-NEXT:    [[ADD85:%.*]] = add nsw i32 [[TMP264]], 1
 // CHECK9-NEXT:    [[TMP265:%.*]] = zext i32 [[ADD85]] to i64
-// CHECK9-NEXT:    [[KERNEL_ARGS86:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK9-NEXT:    [[TMP266:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS86]], i32 0, i32 0
 // CHECK9-NEXT:    store i32 2, ptr [[TMP266]], align 4
 // CHECK9-NEXT:    [[TMP267:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS86]], i32 0, i32 1
@@ -8909,6 +8909,7 @@ int main() {
 // CHECK11-NEXT:    [[TMP:%.*]] = alloca i32, align 4
 // CHECK11-NEXT:    [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
 // CHECK11-NEXT:    [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4
+// CHECK11-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK11-NEXT:    [[N_CASTED3:%.*]] = alloca i32, align 4
 // CHECK11-NEXT:    [[DOTOFFLOAD_BASEPTRS4:%.*]] = alloca [4 x ptr], align 4
 // CHECK11-NEXT:    [[DOTOFFLOAD_PTRS5:%.*]] = alloca [4 x ptr], align 4
@@ -8916,6 +8917,7 @@ int main() {
 // CHECK11-NEXT:    [[_TMP7:%.*]] = alloca i32, align 4
 // CHECK11-NEXT:    [[DOTCAPTURE_EXPR_8:%.*]] = alloca i32, align 4
 // CHECK11-NEXT:    [[DOTCAPTURE_EXPR_9:%.*]] = alloca i32, align 4
+// CHECK11-NEXT:    [[KERNEL_ARGS14:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK11-NEXT:    [[CH_CASTED:%.*]] = alloca i32, align 4
 // CHECK11-NEXT:    [[N_CASTED17:%.*]] = alloca i32, align 4
 // CHECK11-NEXT:    [[DOTOFFLOAD_BASEPTRS18:%.*]] = alloca [5 x ptr], align 4
@@ -8924,6 +8926,7 @@ int main() {
 // CHECK11-NEXT:    [[_TMP21:%.*]] = alloca i32, align 4
 // CHECK11-NEXT:    [[DOTCAPTURE_EXPR_22:%.*]] = alloca i32, align 4
 // CHECK11-NEXT:    [[DOTCAPTURE_EXPR_23:%.*]] = alloca i32, align 4
+// CHECK11-NEXT:    [[KERNEL_ARGS28:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK11-NEXT:    [[N_CASTED31:%.*]] = alloca i32, align 4
 // CHECK11-NEXT:    [[DOTOFFLOAD_BASEPTRS32:%.*]] = alloca [4 x ptr], align 4
 // CHECK11-NEXT:    [[DOTOFFLOAD_PTRS33:%.*]] = alloca [4 x ptr], align 4
@@ -8931,6 +8934,7 @@ int main() {
 // CHECK11-NEXT:    [[_TMP35:%.*]] = alloca i32, align 4
 // CHECK11-NEXT:    [[DOTCAPTURE_EXPR_36:%.*]] = alloca i32, align 4
 // CHECK11-NEXT:    [[DOTCAPTURE_EXPR_37:%.*]] = alloca i32, align 4
+// CHECK11-NEXT:    [[KERNEL_ARGS42:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK11-NEXT:    [[CH_CASTED45:%.*]] = alloca i32, align 4
 // CHECK11-NEXT:    [[N_CASTED46:%.*]] = alloca i32, align 4
 // CHECK11-NEXT:    [[DOTOFFLOAD_BASEPTRS47:%.*]] = alloca [5 x ptr], align 4
@@ -8939,6 +8943,7 @@ int main() {
 // CHECK11-NEXT:    [[_TMP50:%.*]] = alloca i32, align 4
 // CHECK11-NEXT:    [[DOTCAPTURE_EXPR_51:%.*]] = alloca i32, align 4
 // CHECK11-NEXT:    [[DOTCAPTURE_EXPR_52:%.*]] = alloca i32, align 4
+// CHECK11-NEXT:    [[KERNEL_ARGS57:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK11-NEXT:    [[N_CASTED60:%.*]] = alloca i32, align 4
 // CHECK11-NEXT:    [[DOTOFFLOAD_BASEPTRS61:%.*]] = alloca [4 x ptr], align 4
 // CHECK11-NEXT:    [[DOTOFFLOAD_PTRS62:%.*]] = alloca [4 x ptr], align 4
@@ -8946,6 +8951,7 @@ int main() {
 // CHECK11-NEXT:    [[_TMP64:%.*]] = alloca i32, align 4
 // CHECK11-NEXT:    [[DOTCAPTURE_EXPR_65:%.*]] = alloca i32, align 4
 // CHECK11-NEXT:    [[DOTCAPTURE_EXPR_66:%.*]] = alloca i32, align 4
+// CHECK11-NEXT:    [[KERNEL_ARGS71:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK11-NEXT:    [[CH_CASTED74:%.*]] = alloca i32, align 4
 // CHECK11-NEXT:    [[N_CASTED75:%.*]] = alloca i32, align 4
 // CHECK11-NEXT:    [[DOTOFFLOAD_BASEPTRS76:%.*]] = alloca [5 x ptr], align 4
@@ -8954,6 +8960,7 @@ int main() {
 // CHECK11-NEXT:    [[_TMP79:%.*]] = alloca i32, align 4
 // CHECK11-NEXT:    [[DOTCAPTURE_EXPR_80:%.*]] = alloca i32, align 4
 // CHECK11-NEXT:    [[DOTCAPTURE_EXPR_81:%.*]] = alloca i32, align 4
+// CHECK11-NEXT:    [[KERNEL_ARGS86:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK11-NEXT:    store i32 0, ptr [[RETVAL]], align 4
 // CHECK11-NEXT:    store i32 10000, ptr [[N]], align 4
 // CHECK11-NEXT:    store i32 100, ptr [[CH]], align 4
@@ -8999,7 +9006,6 @@ int main() {
 // CHECK11-NEXT:    [[TMP21:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
 // CHECK11-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP21]], 1
 // CHECK11-NEXT:    [[TMP22:%.*]] = zext i32 [[ADD]] to i64
-// CHECK11-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK11-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK11-NEXT:    store i32 2, ptr [[TMP23]], align 4
 // CHECK11-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -9075,7 +9081,6 @@ int main() {
 // CHECK11-NEXT:    [[TMP59:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_9]], align 4
 // CHECK11-NEXT:    [[ADD13:%.*]] = add nsw i32 [[TMP59]], 1
 // CHECK11-NEXT:    [[TMP60:%.*]] = zext i32 [[ADD13]] to i64
-// CHECK11-NEXT:    [[KERNEL_ARGS14:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK11-NEXT:    [[TMP61:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS14]], i32 0, i32 0
 // CHECK11-NEXT:    store i32 2, ptr [[TMP61]], align 4
 // CHECK11-NEXT:    [[TMP62:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS14]], i32 0, i32 1
@@ -9160,7 +9165,6 @@ int main() {
 // CHECK11-NEXT:    [[TMP102:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_23]], align 4
 // CHECK11-NEXT:    [[ADD27:%.*]] = add nsw i32 [[TMP102]], 1
 // CHECK11-NEXT:    [[TMP103:%.*]] = zext i32 [[ADD27]] to i64
-// CHECK11-NEXT:    [[KERNEL_ARGS28:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK11-NEXT:    [[TMP104:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS28]], i32 0, i32 0
 // CHECK11-NEXT:    store i32 2, ptr [[TMP104]], align 4
 // CHECK11-NEXT:    [[TMP105:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS28]], i32 0, i32 1
@@ -9236,7 +9240,6 @@ int main() {
 // CHECK11-NEXT:    [[TMP140:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_37]], align 4
 // CHECK11-NEXT:    [[ADD41:%.*]] = add nsw i32 [[TMP140]], 1
 // CHECK11-NEXT:    [[TMP141:%.*]] = zext i32 [[ADD41]] to i64
-// CHECK11-NEXT:    [[KERNEL_ARGS42:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK11-NEXT:    [[TMP142:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS42]], i32 0, i32 0
 // CHECK11-NEXT:    store i32 2, ptr [[TMP142]], align 4
 // CHECK11-NEXT:    [[TMP143:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS42]], i32 0, i32 1
@@ -9321,7 +9324,6 @@ int main() {
 // CHECK11-NEXT:    [[TMP183:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_52]], align 4
 // CHECK11-NEXT:    [[ADD56:%.*]] = add nsw i32 [[TMP183]], 1
 // CHECK11-NEXT:    [[TMP184:%.*]] = zext i32 [[ADD56]] to i64
-// CHECK11-NEXT:    [[KERNEL_ARGS57:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK11-NEXT:    [[TMP185:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS57]], i32 0, i32 0
 // CHECK11-NEXT:    store i32 2, ptr [[TMP185]], align 4
 // CHECK11-NEXT:    [[TMP186:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS57]], i32 0, i32 1
@@ -9397,7 +9399,6 @@ int main() {
 // CHECK11-NEXT:    [[TMP221:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_66]], align 4
 // CHECK11-NEXT:    [[ADD70:%.*]] = add nsw i32 [[TMP221]], 1
 // CHECK11-NEXT:    [[TMP222:%.*]] = zext i32 [[ADD70]] to i64
-// CHECK11-NEXT:    [[KERNEL_ARGS71:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK11-NEXT:    [[TMP223:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS71]], i32 0, i32 0
 // CHECK11-NEXT:    store i32 2, ptr [[TMP223]], align 4
 // CHECK11-NEXT:    [[TMP224:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS71]], i32 0, i32 1
@@ -9482,7 +9483,6 @@ int main() {
 // CHECK11-NEXT:    [[TMP264:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_81]], align 4
 // CHECK11-NEXT:    [[ADD85:%.*]] = add nsw i32 [[TMP264]], 1
 // CHECK11-NEXT:    [[TMP265:%.*]] = zext i32 [[ADD85]] to i64
-// CHECK11-NEXT:    [[KERNEL_ARGS86:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK11-NEXT:    [[TMP266:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS86]], i32 0, i32 0
 // CHECK11-NEXT:    store i32 2, ptr [[TMP266]], align 4
 // CHECK11-NEXT:    [[TMP267:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS86]], i32 0, i32 1
@@ -11158,6 +11158,7 @@ int main() {
 // CHECK11-NEXT:    [[TMP:%.*]] = alloca i32, align 4
 // CHECK11-NEXT:    [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
 // CHECK11-NEXT:    [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4
+// CHECK11-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK11-NEXT:    [[N_CASTED3:%.*]] = alloca i32, align 4
 // CHECK11-NEXT:    [[DOTOFFLOAD_BASEPTRS4:%.*]] = alloca [4 x ptr], align 4
 // CHECK11-NEXT:    [[DOTOFFLOAD_PTRS5:%.*]] = alloca [4 x ptr], align 4
@@ -11165,6 +11166,7 @@ int main() {
 // CHECK11-NEXT:    [[_TMP7:%.*]] = alloca i32, align 4
 // CHECK11-NEXT:    [[DOTCAPTURE_EXPR_8:%.*]] = alloca i32, align 4
 // CHECK11-NEXT:    [[DOTCAPTURE_EXPR_9:%.*]] = alloca i32, align 4
+// CHECK11-NEXT:    [[KERNEL_ARGS14:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK11-NEXT:    [[CH_CASTED:%.*]] = alloca i32, align 4
 // CHECK11-NEXT:    [[N_CASTED17:%.*]] = alloca i32, align 4
 // CHECK11-NEXT:    [[DOTOFFLOAD_BASEPTRS18:%.*]] = alloca [5 x ptr], align 4
@@ -11173,6 +11175,7 @@ int main() {
 // CHECK11-NEXT:    [[_TMP21:%.*]] = alloca i32, align 4
 // CHECK11-NEXT:    [[DOTCAPTURE_EXPR_22:%.*]] = alloca i32, align 4
 // CHECK11-NEXT:    [[DOTCAPTURE_EXPR_23:%.*]] = alloca i32, align 4
+// CHECK11-NEXT:    [[KERNEL_ARGS28:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK11-NEXT:    [[N_CASTED31:%.*]] = alloca i32, align 4
 // CHECK11-NEXT:    [[DOTOFFLOAD_BASEPTRS32:%.*]] = alloca [4 x ptr], align 4
 // CHECK11-NEXT:    [[DOTOFFLOAD_PTRS33:%.*]] = alloca [4 x ptr], align 4
@@ -11180,6 +11183,7 @@ int main() {
 // CHECK11-NEXT:    [[_TMP35:%.*]] = alloca i32, align 4
 // CHECK11-NEXT:    [[DOTCAPTURE_EXPR_36:%.*]] = alloca i32, align 4
 // CHECK11-NEXT:    [[DOTCAPTURE_EXPR_37:%.*]] = alloca i32, align 4
+// CHECK11-NEXT:    [[KERNEL_ARGS42:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK11-NEXT:    [[CH_CASTED45:%.*]] = alloca i32, align 4
 // CHECK11-NEXT:    [[N_CASTED46:%.*]] = alloca i32, align 4
 // CHECK11-NEXT:    [[DOTOFFLOAD_BASEPTRS47:%.*]] = alloca [5 x ptr], align 4
@@ -11188,6 +11192,7 @@ int main() {
 // CHECK11-NEXT:    [[_TMP50:%.*]] = alloca i32, align 4
 // CHECK11-NEXT:    [[DOTCAPTURE_EXPR_51:%.*]] = alloca i32, align 4
 // CHECK11-NEXT:    [[DOTCAPTURE_EXPR_52:%.*]] = alloca i32, align 4
+// CHECK11-NEXT:    [[KERNEL_ARGS57:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK11-NEXT:    [[N_CASTED60:%.*]] = alloca i32, align 4
 // CHECK11-NEXT:    [[DOTOFFLOAD_BASEPTRS61:%.*]] = alloca [4 x ptr], align 4
 // CHECK11-NEXT:    [[DOTOFFLOAD_PTRS62:%.*]] = alloca [4 x ptr], align 4
@@ -11195,6 +11200,7 @@ int main() {
 // CHECK11-NEXT:    [[_TMP64:%.*]] = alloca i32, align 4
 // CHECK11-NEXT:    [[DOTCAPTURE_EXPR_65:%.*]] = alloca i32, align 4
 // CHECK11-NEXT:    [[DOTCAPTURE_EXPR_66:%.*]] = alloca i32, align 4
+// CHECK11-NEXT:    [[KERNEL_ARGS71:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK11-NEXT:    [[CH_CASTED74:%.*]] = alloca i32, align 4
 // CHECK11-NEXT:    [[N_CASTED75:%.*]] = alloca i32, align 4
 // CHECK11-NEXT:    [[DOTOFFLOAD_BASEPTRS76:%.*]] = alloca [5 x ptr], align 4
@@ -11203,6 +11209,7 @@ int main() {
 // CHECK11-NEXT:    [[_TMP79:%.*]] = alloca i32, align 4
 // CHECK11-NEXT:    [[DOTCAPTURE_EXPR_80:%.*]] = alloca i32, align 4
 // CHECK11-NEXT:    [[DOTCAPTURE_EXPR_81:%.*]] = alloca i32, align 4
+// CHECK11-NEXT:    [[KERNEL_ARGS86:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK11-NEXT:    store i32 10000, ptr [[N]], align 4
 // CHECK11-NEXT:    store i32 100, ptr [[CH]], align 4
 // CHECK11-NEXT:    [[TMP0:%.*]] = load i32, ptr [[N]], align 4
@@ -11247,7 +11254,6 @@ int main() {
 // CHECK11-NEXT:    [[TMP21:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
 // CHECK11-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP21]], 1
 // CHECK11-NEXT:    [[TMP22:%.*]] = zext i32 [[ADD]] to i64
-// CHECK11-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK11-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK11-NEXT:    store i32 2, ptr [[TMP23]], align 4
 // CHECK11-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -11323,7 +11329,6 @@ int main() {
 // CHECK11-NEXT:    [[TMP59:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_9]], align 4
 // CHECK11-NEXT:    [[ADD13:%.*]] = add nsw i32 [[TMP59]], 1
 // CHECK11-NEXT:    [[TMP60:%.*]] = zext i32 [[ADD13]] to i64
-// CHECK11-NEXT:    [[KERNEL_ARGS14:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK11-NEXT:    [[TMP61:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS14]], i32 0, i32 0
 // CHECK11-NEXT:    store i32 2, ptr [[TMP61]], align 4
 // CHECK11-NEXT:    [[TMP62:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS14]], i32 0, i32 1
@@ -11408,7 +11413,6 @@ int main() {
 // CHECK11-NEXT:    [[TMP102:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_23]], align 4
 // CHECK11-NEXT:    [[ADD27:%.*]] = add nsw i32 [[TMP102]], 1
 // CHECK11-NEXT:    [[TMP103:%.*]] = zext i32 [[ADD27]] to i64
-// CHECK11-NEXT:    [[KERNEL_ARGS28:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK11-NEXT:    [[TMP104:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS28]], i32 0, i32 0
 // CHECK11-NEXT:    store i32 2, ptr [[TMP104]], align 4
 // CHECK11-NEXT:    [[TMP105:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS28]], i32 0, i32 1
@@ -11484,7 +11488,6 @@ int main() {
 // CHECK11-NEXT:    [[TMP140:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_37]], align 4
 // CHECK11-NEXT:    [[ADD41:%.*]] = add nsw i32 [[TMP140]], 1
 // CHECK11-NEXT:    [[TMP141:%.*]] = zext i32 [[ADD41]] to i64
-// CHECK11-NEXT:    [[KERNEL_ARGS42:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK11-NEXT:    [[TMP142:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS42]], i32 0, i32 0
 // CHECK11-NEXT:    store i32 2, ptr [[TMP142]], align 4
 // CHECK11-NEXT:    [[TMP143:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS42]], i32 0, i32 1
@@ -11569,7 +11572,6 @@ int main() {
 // CHECK11-NEXT:    [[TMP183:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_52]], align 4
 // CHECK11-NEXT:    [[ADD56:%.*]] = add nsw i32 [[TMP183]], 1
 // CHECK11-NEXT:    [[TMP184:%.*]] = zext i32 [[ADD56]] to i64
-// CHECK11-NEXT:    [[KERNEL_ARGS57:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK11-NEXT:    [[TMP185:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS57]], i32 0, i32 0
 // CHECK11-NEXT:    store i32 2, ptr [[TMP185]], align 4
 // CHECK11-NEXT:    [[TMP186:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS57]], i32 0, i32 1
@@ -11645,7 +11647,6 @@ int main() {
 // CHECK11-NEXT:    [[TMP221:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_66]], align 4
 // CHECK11-NEXT:    [[ADD70:%.*]] = add nsw i32 [[TMP221]], 1
 // CHECK11-NEXT:    [[TMP222:%.*]] = zext i32 [[ADD70]] to i64
-// CHECK11-NEXT:    [[KERNEL_ARGS71:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK11-NEXT:    [[TMP223:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS71]], i32 0, i32 0
 // CHECK11-NEXT:    store i32 2, ptr [[TMP223]], align 4
 // CHECK11-NEXT:    [[TMP224:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS71]], i32 0, i32 1
@@ -11730,7 +11731,6 @@ int main() {
 // CHECK11-NEXT:    [[TMP264:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_81]], align 4
 // CHECK11-NEXT:    [[ADD85:%.*]] = add nsw i32 [[TMP264]], 1
 // CHECK11-NEXT:    [[TMP265:%.*]] = zext i32 [[ADD85]] to i64
-// CHECK11-NEXT:    [[KERNEL_ARGS86:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK11-NEXT:    [[TMP266:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS86]], i32 0, i32 0
 // CHECK11-NEXT:    store i32 2, ptr [[TMP266]], align 4
 // CHECK11-NEXT:    [[TMP267:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS86]], i32 0, i32 1

diff  --git a/clang/test/OpenMP/distribute_parallel_for_firstprivate_codegen.cpp b/clang/test/OpenMP/distribute_parallel_for_firstprivate_codegen.cpp
index dfeb1898823f5..f46a6bf1ef0b1 100644
--- a/clang/test/OpenMP/distribute_parallel_for_firstprivate_codegen.cpp
+++ b/clang/test/OpenMP/distribute_parallel_for_firstprivate_codegen.cpp
@@ -784,6 +784,7 @@ int main() {
 // CHECK8-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [5 x ptr], align 8
 // CHECK8-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [5 x ptr], align 8
 // CHECK8-NEXT:    [[_TMP1:%.*]] = alloca i32, align 4
+// CHECK8-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK8-NEXT:    store i32 0, ptr [[RETVAL]], align 4
 // CHECK8-NEXT:    store ptr [[G]], ptr [[G1]], align 8
 // CHECK8-NEXT:    call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[TEST]])
@@ -837,7 +838,6 @@ int main() {
 // CHECK8-NEXT:    store ptr null, ptr [[TMP22]], align 8
 // CHECK8-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK8-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK8-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK8-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK8-NEXT:    store i32 2, ptr [[TMP25]], align 4
 // CHECK8-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -1221,6 +1221,7 @@ int main() {
 // CHECK8-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [4 x ptr], align 8
 // CHECK8-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [4 x ptr], align 8
 // CHECK8-NEXT:    [[_TMP1:%.*]] = alloca i32, align 4
+// CHECK8-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK8-NEXT:    call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[TEST]])
 // CHECK8-NEXT:    store i32 0, ptr [[T_VAR]], align 4
 // CHECK8-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VEC]], ptr align 4 @__const._Z5tmainIiET_v.vec, i64 8, i1 false)
@@ -1263,7 +1264,6 @@ int main() {
 // CHECK8-NEXT:    store ptr null, ptr [[TMP17]], align 8
 // CHECK8-NEXT:    [[TMP18:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK8-NEXT:    [[TMP19:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK8-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK8-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK8-NEXT:    store i32 2, ptr [[TMP20]], align 4
 // CHECK8-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -1710,6 +1710,7 @@ int main() {
 // CHECK10-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [5 x ptr], align 4
 // CHECK10-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [5 x ptr], align 4
 // CHECK10-NEXT:    [[_TMP1:%.*]] = alloca i32, align 4
+// CHECK10-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK10-NEXT:    store i32 0, ptr [[RETVAL]], align 4
 // CHECK10-NEXT:    store ptr [[G]], ptr [[G1]], align 4
 // CHECK10-NEXT:    call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[TEST]])
@@ -1763,7 +1764,6 @@ int main() {
 // CHECK10-NEXT:    store ptr null, ptr [[TMP22]], align 4
 // CHECK10-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK10-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK10-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK10-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK10-NEXT:    store i32 2, ptr [[TMP25]], align 4
 // CHECK10-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -2141,6 +2141,7 @@ int main() {
 // CHECK10-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [4 x ptr], align 4
 // CHECK10-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [4 x ptr], align 4
 // CHECK10-NEXT:    [[_TMP1:%.*]] = alloca i32, align 4
+// CHECK10-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK10-NEXT:    call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[TEST]])
 // CHECK10-NEXT:    store i32 0, ptr [[T_VAR]], align 4
 // CHECK10-NEXT:    call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[VEC]], ptr align 4 @__const._Z5tmainIiET_v.vec, i32 8, i1 false)
@@ -2183,7 +2184,6 @@ int main() {
 // CHECK10-NEXT:    store ptr null, ptr [[TMP17]], align 4
 // CHECK10-NEXT:    [[TMP18:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK10-NEXT:    [[TMP19:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK10-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK10-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK10-NEXT:    store i32 2, ptr [[TMP20]], align 4
 // CHECK10-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1

diff  --git a/clang/test/OpenMP/distribute_parallel_for_if_codegen.cpp b/clang/test/OpenMP/distribute_parallel_for_if_codegen.cpp
index 7ca6b251f3e0a..f8612e1b3ccc4 100644
--- a/clang/test/OpenMP/distribute_parallel_for_if_codegen.cpp
+++ b/clang/test/OpenMP/distribute_parallel_for_if_codegen.cpp
@@ -124,8 +124,9 @@ int main() {
 // CHECK1-SAME: () #[[ATTR0:[0-9]+]] {
 // CHECK1-NEXT:  entry:
 // CHECK1-NEXT:    [[TMP:%.*]] = alloca i32, align 4
-// CHECK1-NEXT:    [[_TMP1:%.*]] = alloca i32, align 4
 // CHECK1-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
+// CHECK1-NEXT:    [[_TMP1:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[KERNEL_ARGS2:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK1-NEXT:    [[TMP0:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK1-NEXT:    store i32 2, ptr [[TMP0]], align 4
 // CHECK1-NEXT:    [[TMP1:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -159,7 +160,6 @@ int main() {
 // CHECK1-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9gtid_testv_l47() #[[ATTR2:[0-9]+]]
 // CHECK1-NEXT:    br label [[OMP_OFFLOAD_CONT]]
 // CHECK1:       omp_offload.cont:
-// CHECK1-NEXT:    [[KERNEL_ARGS2:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK1-NEXT:    [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS2]], i32 0, i32 0
 // CHECK1-NEXT:    store i32 2, ptr [[TMP15]], align 4
 // CHECK1-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS2]], i32 0, i32 1
@@ -481,14 +481,16 @@ int main() {
 // CHECK1-NEXT:  entry:
 // CHECK1-NEXT:    [[RETVAL:%.*]] = alloca i32, align 4
 // CHECK1-NEXT:    [[TMP:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK1-NEXT:    [[_TMP1:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[KERNEL_ARGS2:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK1-NEXT:    [[ARG_CASTED:%.*]] = alloca i64, align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [1 x ptr], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [1 x ptr], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [1 x ptr], align 8
 // CHECK1-NEXT:    [[_TMP5:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[KERNEL_ARGS6:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK1-NEXT:    store i32 0, ptr [[RETVAL]], align 4
-// CHECK1-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK1-NEXT:    [[TMP0:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK1-NEXT:    store i32 2, ptr [[TMP0]], align 4
 // CHECK1-NEXT:    [[TMP1:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -522,7 +524,6 @@ int main() {
 // CHECK1-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l85() #[[ATTR2]]
 // CHECK1-NEXT:    br label [[OMP_OFFLOAD_CONT]]
 // CHECK1:       omp_offload.cont:
-// CHECK1-NEXT:    [[KERNEL_ARGS2:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK1-NEXT:    [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS2]], i32 0, i32 0
 // CHECK1-NEXT:    store i32 2, ptr [[TMP15]], align 4
 // CHECK1-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS2]], i32 0, i32 1
@@ -567,7 +568,6 @@ int main() {
 // CHECK1-NEXT:    store ptr null, ptr [[TMP34]], align 8
 // CHECK1-NEXT:    [[TMP35:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK1-NEXT:    [[TMP36:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK1-NEXT:    [[KERNEL_ARGS6:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK1-NEXT:    [[TMP37:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 0
 // CHECK1-NEXT:    store i32 2, ptr [[TMP37]], align 4
 // CHECK1-NEXT:    [[TMP38:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 1
@@ -1049,14 +1049,16 @@ int main() {
 // CHECK1-NEXT:  entry:
 // CHECK1-NEXT:    [[ARG_ADDR:%.*]] = alloca i32, align 4
 // CHECK1-NEXT:    [[TMP:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK1-NEXT:    [[_TMP1:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[KERNEL_ARGS2:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK1-NEXT:    [[ARG_CASTED:%.*]] = alloca i64, align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [1 x ptr], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [1 x ptr], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [1 x ptr], align 8
 // CHECK1-NEXT:    [[_TMP5:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[KERNEL_ARGS6:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK1-NEXT:    store i32 [[ARG]], ptr [[ARG_ADDR]], align 4
-// CHECK1-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK1-NEXT:    [[TMP0:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK1-NEXT:    store i32 2, ptr [[TMP0]], align 4
 // CHECK1-NEXT:    [[TMP1:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -1090,7 +1092,6 @@ int main() {
 // CHECK1-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiEiT__l63() #[[ATTR2]]
 // CHECK1-NEXT:    br label [[OMP_OFFLOAD_CONT]]
 // CHECK1:       omp_offload.cont:
-// CHECK1-NEXT:    [[KERNEL_ARGS2:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK1-NEXT:    [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS2]], i32 0, i32 0
 // CHECK1-NEXT:    store i32 2, ptr [[TMP15]], align 4
 // CHECK1-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS2]], i32 0, i32 1
@@ -1135,7 +1136,6 @@ int main() {
 // CHECK1-NEXT:    store ptr null, ptr [[TMP34]], align 8
 // CHECK1-NEXT:    [[TMP35:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK1-NEXT:    [[TMP36:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK1-NEXT:    [[KERNEL_ARGS6:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK1-NEXT:    [[TMP37:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 0
 // CHECK1-NEXT:    store i32 2, ptr [[TMP37]], align 4
 // CHECK1-NEXT:    [[TMP38:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 1

diff  --git a/clang/test/OpenMP/distribute_parallel_for_lastprivate_codegen.cpp b/clang/test/OpenMP/distribute_parallel_for_lastprivate_codegen.cpp
index fdc1c68b5785c..b8c3ee2bd3965 100644
--- a/clang/test/OpenMP/distribute_parallel_for_lastprivate_codegen.cpp
+++ b/clang/test/OpenMP/distribute_parallel_for_lastprivate_codegen.cpp
@@ -759,6 +759,7 @@ int main() {
 // CHECK9-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [5 x ptr], align 8
 // CHECK9-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [5 x ptr], align 8
 // CHECK9-NEXT:    [[_TMP1:%.*]] = alloca i32, align 4
+// CHECK9-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK9-NEXT:    [[I:%.*]] = alloca i32, align 4
 // CHECK9-NEXT:    store i32 0, ptr [[RETVAL]], align 4
 // CHECK9-NEXT:    store ptr [[G]], ptr [[G1]], align 8
@@ -813,7 +814,6 @@ int main() {
 // CHECK9-NEXT:    store ptr null, ptr [[TMP22]], align 8
 // CHECK9-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK9-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK9-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK9-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK9-NEXT:    store i32 2, ptr [[TMP25]], align 4
 // CHECK9-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -1233,6 +1233,7 @@ int main() {
 // CHECK9-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [4 x ptr], align 8
 // CHECK9-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [4 x ptr], align 8
 // CHECK9-NEXT:    [[_TMP1:%.*]] = alloca i32, align 4
+// CHECK9-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK9-NEXT:    call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[TEST]])
 // CHECK9-NEXT:    store i32 0, ptr [[T_VAR]], align 4
 // CHECK9-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VEC]], ptr align 4 @__const._Z5tmainIiET_v.vec, i64 8, i1 false)
@@ -1275,7 +1276,6 @@ int main() {
 // CHECK9-NEXT:    store ptr null, ptr [[TMP17]], align 8
 // CHECK9-NEXT:    [[TMP18:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK9-NEXT:    [[TMP19:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK9-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK9-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK9-NEXT:    store i32 2, ptr [[TMP20]], align 4
 // CHECK9-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -1758,6 +1758,7 @@ int main() {
 // CHECK11-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [5 x ptr], align 4
 // CHECK11-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [5 x ptr], align 4
 // CHECK11-NEXT:    [[_TMP1:%.*]] = alloca i32, align 4
+// CHECK11-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK11-NEXT:    [[I:%.*]] = alloca i32, align 4
 // CHECK11-NEXT:    store i32 0, ptr [[RETVAL]], align 4
 // CHECK11-NEXT:    store ptr [[G]], ptr [[G1]], align 4
@@ -1812,7 +1813,6 @@ int main() {
 // CHECK11-NEXT:    store ptr null, ptr [[TMP22]], align 4
 // CHECK11-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK11-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK11-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK11-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK11-NEXT:    store i32 2, ptr [[TMP25]], align 4
 // CHECK11-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -2226,6 +2226,7 @@ int main() {
 // CHECK11-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [4 x ptr], align 4
 // CHECK11-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [4 x ptr], align 4
 // CHECK11-NEXT:    [[_TMP1:%.*]] = alloca i32, align 4
+// CHECK11-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK11-NEXT:    call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[TEST]])
 // CHECK11-NEXT:    store i32 0, ptr [[T_VAR]], align 4
 // CHECK11-NEXT:    call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[VEC]], ptr align 4 @__const._Z5tmainIiET_v.vec, i32 8, i1 false)
@@ -2268,7 +2269,6 @@ int main() {
 // CHECK11-NEXT:    store ptr null, ptr [[TMP17]], align 4
 // CHECK11-NEXT:    [[TMP18:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK11-NEXT:    [[TMP19:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK11-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK11-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK11-NEXT:    store i32 2, ptr [[TMP20]], align 4
 // CHECK11-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1

diff  --git a/clang/test/OpenMP/distribute_parallel_for_num_threads_codegen.cpp b/clang/test/OpenMP/distribute_parallel_for_num_threads_codegen.cpp
index 9c874b81f2b71..c97b78de3fd61 100644
--- a/clang/test/OpenMP/distribute_parallel_for_num_threads_codegen.cpp
+++ b/clang/test/OpenMP/distribute_parallel_for_num_threads_codegen.cpp
@@ -102,18 +102,19 @@ int main() {
 // CHECK1-NEXT:    [[EXN_SLOT:%.*]] = alloca ptr, align 8
 // CHECK1-NEXT:    [[EHSELECTOR_SLOT:%.*]] = alloca i32, align 4
 // CHECK1-NEXT:    [[TMP:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK1-NEXT:    [[A_CASTED:%.*]] = alloca i64, align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [1 x ptr], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [1 x ptr], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [1 x ptr], align 8
 // CHECK1-NEXT:    [[_TMP1:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[KERNEL_ARGS2:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK1-NEXT:    store i32 0, ptr [[RETVAL]], align 4
 // CHECK1-NEXT:    call void @_ZN1SC1El(ptr noundef nonnull align 8 dereferenceable(24) [[S]], i64 noundef 0)
 // CHECK1-NEXT:    [[CALL:%.*]] = invoke noundef signext i8 @_ZN1ScvcEv(ptr noundef nonnull align 8 dereferenceable(24) [[S]])
 // CHECK1-NEXT:    to label [[INVOKE_CONT:%.*]] unwind label [[LPAD:%.*]]
 // CHECK1:       invoke.cont:
 // CHECK1-NEXT:    store i8 [[CALL]], ptr [[A]], align 1
-// CHECK1-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK1-NEXT:    [[TMP0:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK1-NEXT:    store i32 2, ptr [[TMP0]], align 4
 // CHECK1-NEXT:    [[TMP1:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -167,7 +168,6 @@ int main() {
 // CHECK1-NEXT:    store ptr null, ptr [[TMP22]], align 8
 // CHECK1-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK1-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK1-NEXT:    [[KERNEL_ARGS2:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK1-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS2]], i32 0, i32 0
 // CHECK1-NEXT:    store i32 2, ptr [[TMP25]], align 4
 // CHECK1-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS2]], i32 0, i32 1
@@ -561,8 +561,9 @@ int main() {
 // CHECK1-SAME: () #[[ATTR7:[0-9]+]] comdat {
 // CHECK1-NEXT:  entry:
 // CHECK1-NEXT:    [[TMP:%.*]] = alloca i32, align 4
-// CHECK1-NEXT:    [[_TMP1:%.*]] = alloca i32, align 4
 // CHECK1-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
+// CHECK1-NEXT:    [[_TMP1:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[KERNEL_ARGS2:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK1-NEXT:    [[TMP0:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK1-NEXT:    store i32 2, ptr [[TMP0]], align 4
 // CHECK1-NEXT:    [[TMP1:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -596,7 +597,6 @@ int main() {
 // CHECK1-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIcLi5EEiv_l52() #[[ATTR6]]
 // CHECK1-NEXT:    br label [[OMP_OFFLOAD_CONT]]
 // CHECK1:       omp_offload.cont:
-// CHECK1-NEXT:    [[KERNEL_ARGS2:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK1-NEXT:    [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS2]], i32 0, i32 0
 // CHECK1-NEXT:    store i32 2, ptr [[TMP15]], align 4
 // CHECK1-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS2]], i32 0, i32 1
@@ -637,8 +637,9 @@ int main() {
 // CHECK1-SAME: () #[[ATTR7]] comdat {
 // CHECK1-NEXT:  entry:
 // CHECK1-NEXT:    [[TMP:%.*]] = alloca i32, align 4
-// CHECK1-NEXT:    [[_TMP1:%.*]] = alloca i32, align 4
 // CHECK1-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
+// CHECK1-NEXT:    [[_TMP1:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[KERNEL_ARGS2:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK1-NEXT:    [[TMP0:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK1-NEXT:    store i32 2, ptr [[TMP0]], align 4
 // CHECK1-NEXT:    [[TMP1:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -672,7 +673,6 @@ int main() {
 // CHECK1-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainI1SLi1EEiv_l52() #[[ATTR6]]
 // CHECK1-NEXT:    br label [[OMP_OFFLOAD_CONT]]
 // CHECK1:       omp_offload.cont:
-// CHECK1-NEXT:    [[KERNEL_ARGS2:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK1-NEXT:    [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS2]], i32 0, i32 0
 // CHECK1-NEXT:    store i32 2, ptr [[TMP15]], align 4
 // CHECK1-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS2]], i32 0, i32 1
@@ -1361,18 +1361,19 @@ int main() {
 // CHECK5-NEXT:    [[EXN_SLOT:%.*]] = alloca ptr, align 8
 // CHECK5-NEXT:    [[EHSELECTOR_SLOT:%.*]] = alloca i32, align 4
 // CHECK5-NEXT:    [[TMP:%.*]] = alloca i32, align 4
+// CHECK5-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK5-NEXT:    [[A_CASTED:%.*]] = alloca i64, align 8
 // CHECK5-NEXT:    [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [1 x ptr], align 8
 // CHECK5-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [1 x ptr], align 8
 // CHECK5-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [1 x ptr], align 8
 // CHECK5-NEXT:    [[_TMP1:%.*]] = alloca i32, align 4
+// CHECK5-NEXT:    [[KERNEL_ARGS2:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK5-NEXT:    store i32 0, ptr [[RETVAL]], align 4
 // CHECK5-NEXT:    call void @_ZN1SC1El(ptr noundef nonnull align 8 dereferenceable(24) [[S]], i64 noundef 0)
 // CHECK5-NEXT:    [[CALL:%.*]] = invoke noundef signext i8 @_ZN1ScvcEv(ptr noundef nonnull align 8 dereferenceable(24) [[S]])
 // CHECK5-NEXT:    to label [[INVOKE_CONT:%.*]] unwind label [[LPAD:%.*]]
 // CHECK5:       invoke.cont:
 // CHECK5-NEXT:    store i8 [[CALL]], ptr [[A]], align 1
-// CHECK5-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK5-NEXT:    [[TMP0:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK5-NEXT:    store i32 2, ptr [[TMP0]], align 4
 // CHECK5-NEXT:    [[TMP1:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -1426,7 +1427,6 @@ int main() {
 // CHECK5-NEXT:    store ptr null, ptr [[TMP22]], align 8
 // CHECK5-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK5-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK5-NEXT:    [[KERNEL_ARGS2:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK5-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS2]], i32 0, i32 0
 // CHECK5-NEXT:    store i32 2, ptr [[TMP25]], align 4
 // CHECK5-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS2]], i32 0, i32 1
@@ -1820,8 +1820,9 @@ int main() {
 // CHECK5-SAME: () #[[ATTR7:[0-9]+]] comdat {
 // CHECK5-NEXT:  entry:
 // CHECK5-NEXT:    [[TMP:%.*]] = alloca i32, align 4
-// CHECK5-NEXT:    [[_TMP1:%.*]] = alloca i32, align 4
 // CHECK5-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
+// CHECK5-NEXT:    [[_TMP1:%.*]] = alloca i32, align 4
+// CHECK5-NEXT:    [[KERNEL_ARGS2:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK5-NEXT:    [[TMP0:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK5-NEXT:    store i32 2, ptr [[TMP0]], align 4
 // CHECK5-NEXT:    [[TMP1:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -1855,7 +1856,6 @@ int main() {
 // CHECK5-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIcLi5EEiv_l52() #[[ATTR6]]
 // CHECK5-NEXT:    br label [[OMP_OFFLOAD_CONT]]
 // CHECK5:       omp_offload.cont:
-// CHECK5-NEXT:    [[KERNEL_ARGS2:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK5-NEXT:    [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS2]], i32 0, i32 0
 // CHECK5-NEXT:    store i32 2, ptr [[TMP15]], align 4
 // CHECK5-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS2]], i32 0, i32 1
@@ -1896,8 +1896,9 @@ int main() {
 // CHECK5-SAME: () #[[ATTR7]] comdat {
 // CHECK5-NEXT:  entry:
 // CHECK5-NEXT:    [[TMP:%.*]] = alloca i32, align 4
-// CHECK5-NEXT:    [[_TMP1:%.*]] = alloca i32, align 4
 // CHECK5-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
+// CHECK5-NEXT:    [[_TMP1:%.*]] = alloca i32, align 4
+// CHECK5-NEXT:    [[KERNEL_ARGS2:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK5-NEXT:    [[TMP0:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK5-NEXT:    store i32 2, ptr [[TMP0]], align 4
 // CHECK5-NEXT:    [[TMP1:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -1931,7 +1932,6 @@ int main() {
 // CHECK5-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainI1SLi1EEiv_l52() #[[ATTR6]]
 // CHECK5-NEXT:    br label [[OMP_OFFLOAD_CONT]]
 // CHECK5:       omp_offload.cont:
-// CHECK5-NEXT:    [[KERNEL_ARGS2:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK5-NEXT:    [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS2]], i32 0, i32 0
 // CHECK5-NEXT:    store i32 2, ptr [[TMP15]], align 4
 // CHECK5-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS2]], i32 0, i32 1
@@ -2620,18 +2620,19 @@ int main() {
 // CHECK9-NEXT:    [[EXN_SLOT:%.*]] = alloca ptr, align 8
 // CHECK9-NEXT:    [[EHSELECTOR_SLOT:%.*]] = alloca i32, align 4
 // CHECK9-NEXT:    [[TMP:%.*]] = alloca i32, align 4
+// CHECK9-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK9-NEXT:    [[A_CASTED:%.*]] = alloca i64, align 8
 // CHECK9-NEXT:    [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [1 x ptr], align 8
 // CHECK9-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [1 x ptr], align 8
 // CHECK9-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [1 x ptr], align 8
 // CHECK9-NEXT:    [[_TMP1:%.*]] = alloca i32, align 4
+// CHECK9-NEXT:    [[KERNEL_ARGS2:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK9-NEXT:    store i32 0, ptr [[RETVAL]], align 4
 // CHECK9-NEXT:    call void @_ZN1SC1El(ptr noundef nonnull align 8 dereferenceable(24) [[S]], i64 noundef 0)
 // CHECK9-NEXT:    [[CALL:%.*]] = invoke noundef i8 @_ZN1ScvcEv(ptr noundef nonnull align 8 dereferenceable(24) [[S]])
 // CHECK9-NEXT:    to label [[INVOKE_CONT:%.*]] unwind label [[LPAD:%.*]]
 // CHECK9:       invoke.cont:
 // CHECK9-NEXT:    store i8 [[CALL]], ptr [[A]], align 1
-// CHECK9-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK9-NEXT:    [[TMP0:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK9-NEXT:    store i32 2, ptr [[TMP0]], align 4
 // CHECK9-NEXT:    [[TMP1:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -2685,7 +2686,6 @@ int main() {
 // CHECK9-NEXT:    store ptr null, ptr [[TMP22]], align 8
 // CHECK9-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK9-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK9-NEXT:    [[KERNEL_ARGS2:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK9-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS2]], i32 0, i32 0
 // CHECK9-NEXT:    store i32 2, ptr [[TMP25]], align 4
 // CHECK9-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS2]], i32 0, i32 1
@@ -3079,8 +3079,9 @@ int main() {
 // CHECK9-SAME: () #[[ATTR7:[0-9]+]] comdat {
 // CHECK9-NEXT:  entry:
 // CHECK9-NEXT:    [[TMP:%.*]] = alloca i32, align 4
-// CHECK9-NEXT:    [[_TMP1:%.*]] = alloca i32, align 4
 // CHECK9-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
+// CHECK9-NEXT:    [[_TMP1:%.*]] = alloca i32, align 4
+// CHECK9-NEXT:    [[KERNEL_ARGS2:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK9-NEXT:    [[TMP0:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK9-NEXT:    store i32 2, ptr [[TMP0]], align 4
 // CHECK9-NEXT:    [[TMP1:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -3114,7 +3115,6 @@ int main() {
 // CHECK9-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIcLi5EEiv_l52() #[[ATTR6]]
 // CHECK9-NEXT:    br label [[OMP_OFFLOAD_CONT]]
 // CHECK9:       omp_offload.cont:
-// CHECK9-NEXT:    [[KERNEL_ARGS2:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK9-NEXT:    [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS2]], i32 0, i32 0
 // CHECK9-NEXT:    store i32 2, ptr [[TMP15]], align 4
 // CHECK9-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS2]], i32 0, i32 1
@@ -3155,8 +3155,9 @@ int main() {
 // CHECK9-SAME: () #[[ATTR7]] comdat {
 // CHECK9-NEXT:  entry:
 // CHECK9-NEXT:    [[TMP:%.*]] = alloca i32, align 4
-// CHECK9-NEXT:    [[_TMP1:%.*]] = alloca i32, align 4
 // CHECK9-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
+// CHECK9-NEXT:    [[_TMP1:%.*]] = alloca i32, align 4
+// CHECK9-NEXT:    [[KERNEL_ARGS2:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK9-NEXT:    [[TMP0:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK9-NEXT:    store i32 2, ptr [[TMP0]], align 4
 // CHECK9-NEXT:    [[TMP1:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -3190,7 +3191,6 @@ int main() {
 // CHECK9-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainI1SLi1EEiv_l52() #[[ATTR6]]
 // CHECK9-NEXT:    br label [[OMP_OFFLOAD_CONT]]
 // CHECK9:       omp_offload.cont:
-// CHECK9-NEXT:    [[KERNEL_ARGS2:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK9-NEXT:    [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS2]], i32 0, i32 0
 // CHECK9-NEXT:    store i32 2, ptr [[TMP15]], align 4
 // CHECK9-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS2]], i32 0, i32 1
@@ -3879,18 +3879,19 @@ int main() {
 // CHECK13-NEXT:    [[EXN_SLOT:%.*]] = alloca ptr, align 8
 // CHECK13-NEXT:    [[EHSELECTOR_SLOT:%.*]] = alloca i32, align 4
 // CHECK13-NEXT:    [[TMP:%.*]] = alloca i32, align 4
+// CHECK13-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK13-NEXT:    [[A_CASTED:%.*]] = alloca i64, align 8
 // CHECK13-NEXT:    [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [1 x ptr], align 8
 // CHECK13-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [1 x ptr], align 8
 // CHECK13-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [1 x ptr], align 8
 // CHECK13-NEXT:    [[_TMP1:%.*]] = alloca i32, align 4
+// CHECK13-NEXT:    [[KERNEL_ARGS2:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK13-NEXT:    store i32 0, ptr [[RETVAL]], align 4
 // CHECK13-NEXT:    call void @_ZN1SC1El(ptr noundef nonnull align 8 dereferenceable(24) [[S]], i64 noundef 0)
 // CHECK13-NEXT:    [[CALL:%.*]] = invoke noundef i8 @_ZN1ScvcEv(ptr noundef nonnull align 8 dereferenceable(24) [[S]])
 // CHECK13-NEXT:    to label [[INVOKE_CONT:%.*]] unwind label [[LPAD:%.*]]
 // CHECK13:       invoke.cont:
 // CHECK13-NEXT:    store i8 [[CALL]], ptr [[A]], align 1
-// CHECK13-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK13-NEXT:    [[TMP0:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK13-NEXT:    store i32 2, ptr [[TMP0]], align 4
 // CHECK13-NEXT:    [[TMP1:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -3944,7 +3945,6 @@ int main() {
 // CHECK13-NEXT:    store ptr null, ptr [[TMP22]], align 8
 // CHECK13-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK13-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK13-NEXT:    [[KERNEL_ARGS2:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK13-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS2]], i32 0, i32 0
 // CHECK13-NEXT:    store i32 2, ptr [[TMP25]], align 4
 // CHECK13-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS2]], i32 0, i32 1
@@ -4338,8 +4338,9 @@ int main() {
 // CHECK13-SAME: () #[[ATTR7:[0-9]+]] comdat {
 // CHECK13-NEXT:  entry:
 // CHECK13-NEXT:    [[TMP:%.*]] = alloca i32, align 4
-// CHECK13-NEXT:    [[_TMP1:%.*]] = alloca i32, align 4
 // CHECK13-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
+// CHECK13-NEXT:    [[_TMP1:%.*]] = alloca i32, align 4
+// CHECK13-NEXT:    [[KERNEL_ARGS2:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK13-NEXT:    [[TMP0:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK13-NEXT:    store i32 2, ptr [[TMP0]], align 4
 // CHECK13-NEXT:    [[TMP1:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -4373,7 +4374,6 @@ int main() {
 // CHECK13-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIcLi5EEiv_l52() #[[ATTR6]]
 // CHECK13-NEXT:    br label [[OMP_OFFLOAD_CONT]]
 // CHECK13:       omp_offload.cont:
-// CHECK13-NEXT:    [[KERNEL_ARGS2:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK13-NEXT:    [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS2]], i32 0, i32 0
 // CHECK13-NEXT:    store i32 2, ptr [[TMP15]], align 4
 // CHECK13-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS2]], i32 0, i32 1
@@ -4414,8 +4414,9 @@ int main() {
 // CHECK13-SAME: () #[[ATTR7]] comdat {
 // CHECK13-NEXT:  entry:
 // CHECK13-NEXT:    [[TMP:%.*]] = alloca i32, align 4
-// CHECK13-NEXT:    [[_TMP1:%.*]] = alloca i32, align 4
 // CHECK13-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
+// CHECK13-NEXT:    [[_TMP1:%.*]] = alloca i32, align 4
+// CHECK13-NEXT:    [[KERNEL_ARGS2:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK13-NEXT:    [[TMP0:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK13-NEXT:    store i32 2, ptr [[TMP0]], align 4
 // CHECK13-NEXT:    [[TMP1:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -4449,7 +4450,6 @@ int main() {
 // CHECK13-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainI1SLi1EEiv_l52() #[[ATTR6]]
 // CHECK13-NEXT:    br label [[OMP_OFFLOAD_CONT]]
 // CHECK13:       omp_offload.cont:
-// CHECK13-NEXT:    [[KERNEL_ARGS2:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK13-NEXT:    [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS2]], i32 0, i32 0
 // CHECK13-NEXT:    store i32 2, ptr [[TMP15]], align 4
 // CHECK13-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS2]], i32 0, i32 1

diff  --git a/clang/test/OpenMP/distribute_parallel_for_private_codegen.cpp b/clang/test/OpenMP/distribute_parallel_for_private_codegen.cpp
index 9a606bc2f4581..f62e389a7e298 100644
--- a/clang/test/OpenMP/distribute_parallel_for_private_codegen.cpp
+++ b/clang/test/OpenMP/distribute_parallel_for_private_codegen.cpp
@@ -522,6 +522,7 @@ int main() {
 // CHECK9-NEXT:    [[VAR:%.*]] = alloca ptr, align 8
 // CHECK9-NEXT:    [[TMP:%.*]] = alloca i32, align 4
 // CHECK9-NEXT:    [[_TMP1:%.*]] = alloca ptr, align 8
+// CHECK9-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK9-NEXT:    store i32 0, ptr [[RETVAL]], align 4
 // CHECK9-NEXT:    store ptr [[G]], ptr [[G1]], align 8
 // CHECK9-NEXT:    call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[TEST]])
@@ -533,7 +534,6 @@ int main() {
 // CHECK9-NEXT:    call void @_ZN1SIfEC1Ef(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_ELEMENT]], float noundef 2.000000e+00)
 // CHECK9-NEXT:    store ptr [[TEST]], ptr [[VAR]], align 8
 // CHECK9-NEXT:    store ptr undef, ptr [[_TMP1]], align 8
-// CHECK9-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK9-NEXT:    [[TMP0:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK9-NEXT:    store i32 2, ptr [[TMP0]], align 4
 // CHECK9-NEXT:    [[TMP1:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -845,6 +845,7 @@ int main() {
 // CHECK9-NEXT:    [[VAR:%.*]] = alloca ptr, align 8
 // CHECK9-NEXT:    [[TMP:%.*]] = alloca i32, align 4
 // CHECK9-NEXT:    [[_TMP1:%.*]] = alloca ptr, align 8
+// CHECK9-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK9-NEXT:    call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[TEST]])
 // CHECK9-NEXT:    store i32 0, ptr [[T_VAR]], align 4
 // CHECK9-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VEC]], ptr align 4 @__const._Z5tmainIiET_v.vec, i64 8, i1 false)
@@ -854,7 +855,6 @@ int main() {
 // CHECK9-NEXT:    call void @_ZN1SIiEC1Ei(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_ELEMENT]], i32 noundef signext 2)
 // CHECK9-NEXT:    store ptr [[TEST]], ptr [[VAR]], align 8
 // CHECK9-NEXT:    store ptr undef, ptr [[_TMP1]], align 8
-// CHECK9-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK9-NEXT:    [[TMP0:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK9-NEXT:    store i32 2, ptr [[TMP0]], align 4
 // CHECK9-NEXT:    [[TMP1:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -1240,6 +1240,7 @@ int main() {
 // CHECK11-NEXT:    [[VAR:%.*]] = alloca ptr, align 4
 // CHECK11-NEXT:    [[TMP:%.*]] = alloca i32, align 4
 // CHECK11-NEXT:    [[_TMP1:%.*]] = alloca ptr, align 4
+// CHECK11-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK11-NEXT:    store i32 0, ptr [[RETVAL]], align 4
 // CHECK11-NEXT:    store ptr [[G]], ptr [[G1]], align 4
 // CHECK11-NEXT:    call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[TEST]])
@@ -1251,7 +1252,6 @@ int main() {
 // CHECK11-NEXT:    call void @_ZN1SIfEC1Ef(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_ELEMENT]], float noundef 2.000000e+00)
 // CHECK11-NEXT:    store ptr [[TEST]], ptr [[VAR]], align 4
 // CHECK11-NEXT:    store ptr undef, ptr [[_TMP1]], align 4
-// CHECK11-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK11-NEXT:    [[TMP0:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK11-NEXT:    store i32 2, ptr [[TMP0]], align 4
 // CHECK11-NEXT:    [[TMP1:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -1557,6 +1557,7 @@ int main() {
 // CHECK11-NEXT:    [[VAR:%.*]] = alloca ptr, align 4
 // CHECK11-NEXT:    [[TMP:%.*]] = alloca i32, align 4
 // CHECK11-NEXT:    [[_TMP1:%.*]] = alloca ptr, align 4
+// CHECK11-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK11-NEXT:    call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[TEST]])
 // CHECK11-NEXT:    store i32 0, ptr [[T_VAR]], align 4
 // CHECK11-NEXT:    call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[VEC]], ptr align 4 @__const._Z5tmainIiET_v.vec, i32 8, i1 false)
@@ -1566,7 +1567,6 @@ int main() {
 // CHECK11-NEXT:    call void @_ZN1SIiEC1Ei(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_ELEMENT]], i32 noundef 2)
 // CHECK11-NEXT:    store ptr [[TEST]], ptr [[VAR]], align 4
 // CHECK11-NEXT:    store ptr undef, ptr [[_TMP1]], align 4
-// CHECK11-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK11-NEXT:    [[TMP0:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK11-NEXT:    store i32 2, ptr [[TMP0]], align 4
 // CHECK11-NEXT:    [[TMP1:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1

diff  --git a/clang/test/OpenMP/distribute_parallel_for_proc_bind_codegen.cpp b/clang/test/OpenMP/distribute_parallel_for_proc_bind_codegen.cpp
index 903dbb6de2880..9e84ed68e0b8d 100644
--- a/clang/test/OpenMP/distribute_parallel_for_proc_bind_codegen.cpp
+++ b/clang/test/OpenMP/distribute_parallel_for_proc_bind_codegen.cpp
@@ -58,9 +58,10 @@ int main() {
 // CHECK1-NEXT:  entry:
 // CHECK1-NEXT:    [[RETVAL:%.*]] = alloca i32, align 4
 // CHECK1-NEXT:    [[TMP:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK1-NEXT:    [[_TMP1:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[KERNEL_ARGS2:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK1-NEXT:    store i32 0, ptr [[RETVAL]], align 4
-// CHECK1-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK1-NEXT:    [[TMP0:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK1-NEXT:    store i32 2, ptr [[TMP0]], align 4
 // CHECK1-NEXT:    [[TMP1:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -94,7 +95,6 @@ int main() {
 // CHECK1-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l37() #[[ATTR2:[0-9]+]]
 // CHECK1-NEXT:    br label [[OMP_OFFLOAD_CONT]]
 // CHECK1:       omp_offload.cont:
-// CHECK1-NEXT:    [[KERNEL_ARGS2:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK1-NEXT:    [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS2]], i32 0, i32 0
 // CHECK1-NEXT:    store i32 2, ptr [[TMP15]], align 4
 // CHECK1-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS2]], i32 0, i32 1

diff  --git a/clang/test/OpenMP/distribute_parallel_for_simd_codegen.cpp b/clang/test/OpenMP/distribute_parallel_for_simd_codegen.cpp
index a0819493d2cee..529ce78ed6dac 100644
--- a/clang/test/OpenMP/distribute_parallel_for_simd_codegen.cpp
+++ b/clang/test/OpenMP/distribute_parallel_for_simd_codegen.cpp
@@ -4678,6 +4678,7 @@ int main() {
 // CHECK9-NEXT:    [[TMP:%.*]] = alloca i32, align 4
 // CHECK9-NEXT:    [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
 // CHECK9-NEXT:    [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4
+// CHECK9-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK9-NEXT:    [[N_CASTED3:%.*]] = alloca i64, align 8
 // CHECK9-NEXT:    [[DOTOFFLOAD_BASEPTRS4:%.*]] = alloca [4 x ptr], align 8
 // CHECK9-NEXT:    [[DOTOFFLOAD_PTRS5:%.*]] = alloca [4 x ptr], align 8
@@ -4685,6 +4686,7 @@ int main() {
 // CHECK9-NEXT:    [[_TMP7:%.*]] = alloca i32, align 4
 // CHECK9-NEXT:    [[DOTCAPTURE_EXPR_8:%.*]] = alloca i32, align 4
 // CHECK9-NEXT:    [[DOTCAPTURE_EXPR_9:%.*]] = alloca i32, align 4
+// CHECK9-NEXT:    [[KERNEL_ARGS14:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK9-NEXT:    [[CH_CASTED:%.*]] = alloca i64, align 8
 // CHECK9-NEXT:    [[N_CASTED17:%.*]] = alloca i64, align 8
 // CHECK9-NEXT:    [[DOTOFFLOAD_BASEPTRS18:%.*]] = alloca [5 x ptr], align 8
@@ -4693,6 +4695,7 @@ int main() {
 // CHECK9-NEXT:    [[_TMP21:%.*]] = alloca i32, align 4
 // CHECK9-NEXT:    [[DOTCAPTURE_EXPR_22:%.*]] = alloca i32, align 4
 // CHECK9-NEXT:    [[DOTCAPTURE_EXPR_23:%.*]] = alloca i32, align 4
+// CHECK9-NEXT:    [[KERNEL_ARGS28:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK9-NEXT:    [[N_CASTED31:%.*]] = alloca i64, align 8
 // CHECK9-NEXT:    [[DOTOFFLOAD_BASEPTRS32:%.*]] = alloca [4 x ptr], align 8
 // CHECK9-NEXT:    [[DOTOFFLOAD_PTRS33:%.*]] = alloca [4 x ptr], align 8
@@ -4700,6 +4703,7 @@ int main() {
 // CHECK9-NEXT:    [[_TMP35:%.*]] = alloca i32, align 4
 // CHECK9-NEXT:    [[DOTCAPTURE_EXPR_36:%.*]] = alloca i32, align 4
 // CHECK9-NEXT:    [[DOTCAPTURE_EXPR_37:%.*]] = alloca i32, align 4
+// CHECK9-NEXT:    [[KERNEL_ARGS42:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK9-NEXT:    [[CH_CASTED45:%.*]] = alloca i64, align 8
 // CHECK9-NEXT:    [[N_CASTED46:%.*]] = alloca i64, align 8
 // CHECK9-NEXT:    [[DOTOFFLOAD_BASEPTRS47:%.*]] = alloca [5 x ptr], align 8
@@ -4708,6 +4712,7 @@ int main() {
 // CHECK9-NEXT:    [[_TMP50:%.*]] = alloca i32, align 4
 // CHECK9-NEXT:    [[DOTCAPTURE_EXPR_51:%.*]] = alloca i32, align 4
 // CHECK9-NEXT:    [[DOTCAPTURE_EXPR_52:%.*]] = alloca i32, align 4
+// CHECK9-NEXT:    [[KERNEL_ARGS57:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK9-NEXT:    [[N_CASTED60:%.*]] = alloca i64, align 8
 // CHECK9-NEXT:    [[DOTOFFLOAD_BASEPTRS61:%.*]] = alloca [4 x ptr], align 8
 // CHECK9-NEXT:    [[DOTOFFLOAD_PTRS62:%.*]] = alloca [4 x ptr], align 8
@@ -4715,6 +4720,7 @@ int main() {
 // CHECK9-NEXT:    [[_TMP64:%.*]] = alloca i32, align 4
 // CHECK9-NEXT:    [[DOTCAPTURE_EXPR_65:%.*]] = alloca i32, align 4
 // CHECK9-NEXT:    [[DOTCAPTURE_EXPR_66:%.*]] = alloca i32, align 4
+// CHECK9-NEXT:    [[KERNEL_ARGS71:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK9-NEXT:    [[CH_CASTED74:%.*]] = alloca i64, align 8
 // CHECK9-NEXT:    [[N_CASTED75:%.*]] = alloca i64, align 8
 // CHECK9-NEXT:    [[DOTOFFLOAD_BASEPTRS76:%.*]] = alloca [5 x ptr], align 8
@@ -4723,6 +4729,7 @@ int main() {
 // CHECK9-NEXT:    [[_TMP79:%.*]] = alloca i32, align 4
 // CHECK9-NEXT:    [[DOTCAPTURE_EXPR_80:%.*]] = alloca i32, align 4
 // CHECK9-NEXT:    [[DOTCAPTURE_EXPR_81:%.*]] = alloca i32, align 4
+// CHECK9-NEXT:    [[KERNEL_ARGS86:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK9-NEXT:    store i32 0, ptr [[RETVAL]], align 4
 // CHECK9-NEXT:    store i32 10000, ptr [[N]], align 4
 // CHECK9-NEXT:    store i32 100, ptr [[CH]], align 4
@@ -4768,7 +4775,6 @@ int main() {
 // CHECK9-NEXT:    [[TMP21:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
 // CHECK9-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP21]], 1
 // CHECK9-NEXT:    [[TMP22:%.*]] = zext i32 [[ADD]] to i64
-// CHECK9-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK9-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK9-NEXT:    store i32 2, ptr [[TMP23]], align 4
 // CHECK9-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -4844,7 +4850,6 @@ int main() {
 // CHECK9-NEXT:    [[TMP59:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_9]], align 4
 // CHECK9-NEXT:    [[ADD13:%.*]] = add nsw i32 [[TMP59]], 1
 // CHECK9-NEXT:    [[TMP60:%.*]] = zext i32 [[ADD13]] to i64
-// CHECK9-NEXT:    [[KERNEL_ARGS14:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK9-NEXT:    [[TMP61:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS14]], i32 0, i32 0
 // CHECK9-NEXT:    store i32 2, ptr [[TMP61]], align 4
 // CHECK9-NEXT:    [[TMP62:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS14]], i32 0, i32 1
@@ -4929,7 +4934,6 @@ int main() {
 // CHECK9-NEXT:    [[TMP102:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_23]], align 4
 // CHECK9-NEXT:    [[ADD27:%.*]] = add nsw i32 [[TMP102]], 1
 // CHECK9-NEXT:    [[TMP103:%.*]] = zext i32 [[ADD27]] to i64
-// CHECK9-NEXT:    [[KERNEL_ARGS28:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK9-NEXT:    [[TMP104:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS28]], i32 0, i32 0
 // CHECK9-NEXT:    store i32 2, ptr [[TMP104]], align 4
 // CHECK9-NEXT:    [[TMP105:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS28]], i32 0, i32 1
@@ -5005,7 +5009,6 @@ int main() {
 // CHECK9-NEXT:    [[TMP140:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_37]], align 4
 // CHECK9-NEXT:    [[ADD41:%.*]] = add nsw i32 [[TMP140]], 1
 // CHECK9-NEXT:    [[TMP141:%.*]] = zext i32 [[ADD41]] to i64
-// CHECK9-NEXT:    [[KERNEL_ARGS42:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK9-NEXT:    [[TMP142:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS42]], i32 0, i32 0
 // CHECK9-NEXT:    store i32 2, ptr [[TMP142]], align 4
 // CHECK9-NEXT:    [[TMP143:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS42]], i32 0, i32 1
@@ -5090,7 +5093,6 @@ int main() {
 // CHECK9-NEXT:    [[TMP183:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_52]], align 4
 // CHECK9-NEXT:    [[ADD56:%.*]] = add nsw i32 [[TMP183]], 1
 // CHECK9-NEXT:    [[TMP184:%.*]] = zext i32 [[ADD56]] to i64
-// CHECK9-NEXT:    [[KERNEL_ARGS57:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK9-NEXT:    [[TMP185:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS57]], i32 0, i32 0
 // CHECK9-NEXT:    store i32 2, ptr [[TMP185]], align 4
 // CHECK9-NEXT:    [[TMP186:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS57]], i32 0, i32 1
@@ -5166,7 +5168,6 @@ int main() {
 // CHECK9-NEXT:    [[TMP221:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_66]], align 4
 // CHECK9-NEXT:    [[ADD70:%.*]] = add nsw i32 [[TMP221]], 1
 // CHECK9-NEXT:    [[TMP222:%.*]] = zext i32 [[ADD70]] to i64
-// CHECK9-NEXT:    [[KERNEL_ARGS71:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK9-NEXT:    [[TMP223:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS71]], i32 0, i32 0
 // CHECK9-NEXT:    store i32 2, ptr [[TMP223]], align 4
 // CHECK9-NEXT:    [[TMP224:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS71]], i32 0, i32 1
@@ -5251,7 +5252,6 @@ int main() {
 // CHECK9-NEXT:    [[TMP264:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_81]], align 4
 // CHECK9-NEXT:    [[ADD85:%.*]] = add nsw i32 [[TMP264]], 1
 // CHECK9-NEXT:    [[TMP265:%.*]] = zext i32 [[ADD85]] to i64
-// CHECK9-NEXT:    [[KERNEL_ARGS86:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK9-NEXT:    [[TMP266:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS86]], i32 0, i32 0
 // CHECK9-NEXT:    store i32 2, ptr [[TMP266]], align 4
 // CHECK9-NEXT:    [[TMP267:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS86]], i32 0, i32 1
@@ -7146,6 +7146,7 @@ int main() {
 // CHECK9-NEXT:    [[TMP:%.*]] = alloca i32, align 4
 // CHECK9-NEXT:    [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
 // CHECK9-NEXT:    [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4
+// CHECK9-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK9-NEXT:    [[N_CASTED3:%.*]] = alloca i64, align 8
 // CHECK9-NEXT:    [[DOTOFFLOAD_BASEPTRS4:%.*]] = alloca [4 x ptr], align 8
 // CHECK9-NEXT:    [[DOTOFFLOAD_PTRS5:%.*]] = alloca [4 x ptr], align 8
@@ -7153,6 +7154,7 @@ int main() {
 // CHECK9-NEXT:    [[_TMP7:%.*]] = alloca i32, align 4
 // CHECK9-NEXT:    [[DOTCAPTURE_EXPR_8:%.*]] = alloca i32, align 4
 // CHECK9-NEXT:    [[DOTCAPTURE_EXPR_9:%.*]] = alloca i32, align 4
+// CHECK9-NEXT:    [[KERNEL_ARGS14:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK9-NEXT:    [[CH_CASTED:%.*]] = alloca i64, align 8
 // CHECK9-NEXT:    [[N_CASTED17:%.*]] = alloca i64, align 8
 // CHECK9-NEXT:    [[DOTOFFLOAD_BASEPTRS18:%.*]] = alloca [5 x ptr], align 8
@@ -7161,6 +7163,7 @@ int main() {
 // CHECK9-NEXT:    [[_TMP21:%.*]] = alloca i32, align 4
 // CHECK9-NEXT:    [[DOTCAPTURE_EXPR_22:%.*]] = alloca i32, align 4
 // CHECK9-NEXT:    [[DOTCAPTURE_EXPR_23:%.*]] = alloca i32, align 4
+// CHECK9-NEXT:    [[KERNEL_ARGS28:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK9-NEXT:    [[N_CASTED31:%.*]] = alloca i64, align 8
 // CHECK9-NEXT:    [[DOTOFFLOAD_BASEPTRS32:%.*]] = alloca [4 x ptr], align 8
 // CHECK9-NEXT:    [[DOTOFFLOAD_PTRS33:%.*]] = alloca [4 x ptr], align 8
@@ -7168,6 +7171,7 @@ int main() {
 // CHECK9-NEXT:    [[_TMP35:%.*]] = alloca i32, align 4
 // CHECK9-NEXT:    [[DOTCAPTURE_EXPR_36:%.*]] = alloca i32, align 4
 // CHECK9-NEXT:    [[DOTCAPTURE_EXPR_37:%.*]] = alloca i32, align 4
+// CHECK9-NEXT:    [[KERNEL_ARGS42:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK9-NEXT:    [[CH_CASTED45:%.*]] = alloca i64, align 8
 // CHECK9-NEXT:    [[N_CASTED46:%.*]] = alloca i64, align 8
 // CHECK9-NEXT:    [[DOTOFFLOAD_BASEPTRS47:%.*]] = alloca [5 x ptr], align 8
@@ -7176,6 +7180,7 @@ int main() {
 // CHECK9-NEXT:    [[_TMP50:%.*]] = alloca i32, align 4
 // CHECK9-NEXT:    [[DOTCAPTURE_EXPR_51:%.*]] = alloca i32, align 4
 // CHECK9-NEXT:    [[DOTCAPTURE_EXPR_52:%.*]] = alloca i32, align 4
+// CHECK9-NEXT:    [[KERNEL_ARGS57:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK9-NEXT:    [[N_CASTED60:%.*]] = alloca i64, align 8
 // CHECK9-NEXT:    [[DOTOFFLOAD_BASEPTRS61:%.*]] = alloca [4 x ptr], align 8
 // CHECK9-NEXT:    [[DOTOFFLOAD_PTRS62:%.*]] = alloca [4 x ptr], align 8
@@ -7183,6 +7188,7 @@ int main() {
 // CHECK9-NEXT:    [[_TMP64:%.*]] = alloca i32, align 4
 // CHECK9-NEXT:    [[DOTCAPTURE_EXPR_65:%.*]] = alloca i32, align 4
 // CHECK9-NEXT:    [[DOTCAPTURE_EXPR_66:%.*]] = alloca i32, align 4
+// CHECK9-NEXT:    [[KERNEL_ARGS71:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK9-NEXT:    [[CH_CASTED74:%.*]] = alloca i64, align 8
 // CHECK9-NEXT:    [[N_CASTED75:%.*]] = alloca i64, align 8
 // CHECK9-NEXT:    [[DOTOFFLOAD_BASEPTRS76:%.*]] = alloca [5 x ptr], align 8
@@ -7191,6 +7197,7 @@ int main() {
 // CHECK9-NEXT:    [[_TMP79:%.*]] = alloca i32, align 4
 // CHECK9-NEXT:    [[DOTCAPTURE_EXPR_80:%.*]] = alloca i32, align 4
 // CHECK9-NEXT:    [[DOTCAPTURE_EXPR_81:%.*]] = alloca i32, align 4
+// CHECK9-NEXT:    [[KERNEL_ARGS86:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK9-NEXT:    store i32 10000, ptr [[N]], align 4
 // CHECK9-NEXT:    store i32 100, ptr [[CH]], align 4
 // CHECK9-NEXT:    [[TMP0:%.*]] = load i32, ptr [[N]], align 4
@@ -7235,7 +7242,6 @@ int main() {
 // CHECK9-NEXT:    [[TMP21:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
 // CHECK9-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP21]], 1
 // CHECK9-NEXT:    [[TMP22:%.*]] = zext i32 [[ADD]] to i64
-// CHECK9-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK9-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK9-NEXT:    store i32 2, ptr [[TMP23]], align 4
 // CHECK9-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -7311,7 +7317,6 @@ int main() {
 // CHECK9-NEXT:    [[TMP59:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_9]], align 4
 // CHECK9-NEXT:    [[ADD13:%.*]] = add nsw i32 [[TMP59]], 1
 // CHECK9-NEXT:    [[TMP60:%.*]] = zext i32 [[ADD13]] to i64
-// CHECK9-NEXT:    [[KERNEL_ARGS14:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK9-NEXT:    [[TMP61:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS14]], i32 0, i32 0
 // CHECK9-NEXT:    store i32 2, ptr [[TMP61]], align 4
 // CHECK9-NEXT:    [[TMP62:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS14]], i32 0, i32 1
@@ -7396,7 +7401,6 @@ int main() {
 // CHECK9-NEXT:    [[TMP102:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_23]], align 4
 // CHECK9-NEXT:    [[ADD27:%.*]] = add nsw i32 [[TMP102]], 1
 // CHECK9-NEXT:    [[TMP103:%.*]] = zext i32 [[ADD27]] to i64
-// CHECK9-NEXT:    [[KERNEL_ARGS28:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK9-NEXT:    [[TMP104:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS28]], i32 0, i32 0
 // CHECK9-NEXT:    store i32 2, ptr [[TMP104]], align 4
 // CHECK9-NEXT:    [[TMP105:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS28]], i32 0, i32 1
@@ -7472,7 +7476,6 @@ int main() {
 // CHECK9-NEXT:    [[TMP140:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_37]], align 4
 // CHECK9-NEXT:    [[ADD41:%.*]] = add nsw i32 [[TMP140]], 1
 // CHECK9-NEXT:    [[TMP141:%.*]] = zext i32 [[ADD41]] to i64
-// CHECK9-NEXT:    [[KERNEL_ARGS42:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK9-NEXT:    [[TMP142:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS42]], i32 0, i32 0
 // CHECK9-NEXT:    store i32 2, ptr [[TMP142]], align 4
 // CHECK9-NEXT:    [[TMP143:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS42]], i32 0, i32 1
@@ -7557,7 +7560,6 @@ int main() {
 // CHECK9-NEXT:    [[TMP183:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_52]], align 4
 // CHECK9-NEXT:    [[ADD56:%.*]] = add nsw i32 [[TMP183]], 1
 // CHECK9-NEXT:    [[TMP184:%.*]] = zext i32 [[ADD56]] to i64
-// CHECK9-NEXT:    [[KERNEL_ARGS57:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK9-NEXT:    [[TMP185:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS57]], i32 0, i32 0
 // CHECK9-NEXT:    store i32 2, ptr [[TMP185]], align 4
 // CHECK9-NEXT:    [[TMP186:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS57]], i32 0, i32 1
@@ -7633,7 +7635,6 @@ int main() {
 // CHECK9-NEXT:    [[TMP221:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_66]], align 4
 // CHECK9-NEXT:    [[ADD70:%.*]] = add nsw i32 [[TMP221]], 1
 // CHECK9-NEXT:    [[TMP222:%.*]] = zext i32 [[ADD70]] to i64
-// CHECK9-NEXT:    [[KERNEL_ARGS71:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK9-NEXT:    [[TMP223:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS71]], i32 0, i32 0
 // CHECK9-NEXT:    store i32 2, ptr [[TMP223]], align 4
 // CHECK9-NEXT:    [[TMP224:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS71]], i32 0, i32 1
@@ -7718,7 +7719,6 @@ int main() {
 // CHECK9-NEXT:    [[TMP264:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_81]], align 4
 // CHECK9-NEXT:    [[ADD85:%.*]] = add nsw i32 [[TMP264]], 1
 // CHECK9-NEXT:    [[TMP265:%.*]] = zext i32 [[ADD85]] to i64
-// CHECK9-NEXT:    [[KERNEL_ARGS86:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK9-NEXT:    [[TMP266:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS86]], i32 0, i32 0
 // CHECK9-NEXT:    store i32 2, ptr [[TMP266]], align 4
 // CHECK9-NEXT:    [[TMP267:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS86]], i32 0, i32 1
@@ -9620,6 +9620,7 @@ int main() {
 // CHECK11-NEXT:    [[TMP:%.*]] = alloca i32, align 4
 // CHECK11-NEXT:    [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
 // CHECK11-NEXT:    [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4
+// CHECK11-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK11-NEXT:    [[N_CASTED3:%.*]] = alloca i32, align 4
 // CHECK11-NEXT:    [[DOTOFFLOAD_BASEPTRS4:%.*]] = alloca [4 x ptr], align 4
 // CHECK11-NEXT:    [[DOTOFFLOAD_PTRS5:%.*]] = alloca [4 x ptr], align 4
@@ -9627,6 +9628,7 @@ int main() {
 // CHECK11-NEXT:    [[_TMP7:%.*]] = alloca i32, align 4
 // CHECK11-NEXT:    [[DOTCAPTURE_EXPR_8:%.*]] = alloca i32, align 4
 // CHECK11-NEXT:    [[DOTCAPTURE_EXPR_9:%.*]] = alloca i32, align 4
+// CHECK11-NEXT:    [[KERNEL_ARGS14:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK11-NEXT:    [[CH_CASTED:%.*]] = alloca i32, align 4
 // CHECK11-NEXT:    [[N_CASTED17:%.*]] = alloca i32, align 4
 // CHECK11-NEXT:    [[DOTOFFLOAD_BASEPTRS18:%.*]] = alloca [5 x ptr], align 4
@@ -9635,6 +9637,7 @@ int main() {
 // CHECK11-NEXT:    [[_TMP21:%.*]] = alloca i32, align 4
 // CHECK11-NEXT:    [[DOTCAPTURE_EXPR_22:%.*]] = alloca i32, align 4
 // CHECK11-NEXT:    [[DOTCAPTURE_EXPR_23:%.*]] = alloca i32, align 4
+// CHECK11-NEXT:    [[KERNEL_ARGS28:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK11-NEXT:    [[N_CASTED31:%.*]] = alloca i32, align 4
 // CHECK11-NEXT:    [[DOTOFFLOAD_BASEPTRS32:%.*]] = alloca [4 x ptr], align 4
 // CHECK11-NEXT:    [[DOTOFFLOAD_PTRS33:%.*]] = alloca [4 x ptr], align 4
@@ -9642,6 +9645,7 @@ int main() {
 // CHECK11-NEXT:    [[_TMP35:%.*]] = alloca i32, align 4
 // CHECK11-NEXT:    [[DOTCAPTURE_EXPR_36:%.*]] = alloca i32, align 4
 // CHECK11-NEXT:    [[DOTCAPTURE_EXPR_37:%.*]] = alloca i32, align 4
+// CHECK11-NEXT:    [[KERNEL_ARGS42:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK11-NEXT:    [[CH_CASTED45:%.*]] = alloca i32, align 4
 // CHECK11-NEXT:    [[N_CASTED46:%.*]] = alloca i32, align 4
 // CHECK11-NEXT:    [[DOTOFFLOAD_BASEPTRS47:%.*]] = alloca [5 x ptr], align 4
@@ -9650,6 +9654,7 @@ int main() {
 // CHECK11-NEXT:    [[_TMP50:%.*]] = alloca i32, align 4
 // CHECK11-NEXT:    [[DOTCAPTURE_EXPR_51:%.*]] = alloca i32, align 4
 // CHECK11-NEXT:    [[DOTCAPTURE_EXPR_52:%.*]] = alloca i32, align 4
+// CHECK11-NEXT:    [[KERNEL_ARGS57:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK11-NEXT:    [[N_CASTED60:%.*]] = alloca i32, align 4
 // CHECK11-NEXT:    [[DOTOFFLOAD_BASEPTRS61:%.*]] = alloca [4 x ptr], align 4
 // CHECK11-NEXT:    [[DOTOFFLOAD_PTRS62:%.*]] = alloca [4 x ptr], align 4
@@ -9657,6 +9662,7 @@ int main() {
 // CHECK11-NEXT:    [[_TMP64:%.*]] = alloca i32, align 4
 // CHECK11-NEXT:    [[DOTCAPTURE_EXPR_65:%.*]] = alloca i32, align 4
 // CHECK11-NEXT:    [[DOTCAPTURE_EXPR_66:%.*]] = alloca i32, align 4
+// CHECK11-NEXT:    [[KERNEL_ARGS71:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK11-NEXT:    [[CH_CASTED74:%.*]] = alloca i32, align 4
 // CHECK11-NEXT:    [[N_CASTED75:%.*]] = alloca i32, align 4
 // CHECK11-NEXT:    [[DOTOFFLOAD_BASEPTRS76:%.*]] = alloca [5 x ptr], align 4
@@ -9665,6 +9671,7 @@ int main() {
 // CHECK11-NEXT:    [[_TMP79:%.*]] = alloca i32, align 4
 // CHECK11-NEXT:    [[DOTCAPTURE_EXPR_80:%.*]] = alloca i32, align 4
 // CHECK11-NEXT:    [[DOTCAPTURE_EXPR_81:%.*]] = alloca i32, align 4
+// CHECK11-NEXT:    [[KERNEL_ARGS86:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK11-NEXT:    store i32 0, ptr [[RETVAL]], align 4
 // CHECK11-NEXT:    store i32 10000, ptr [[N]], align 4
 // CHECK11-NEXT:    store i32 100, ptr [[CH]], align 4
@@ -9710,7 +9717,6 @@ int main() {
 // CHECK11-NEXT:    [[TMP21:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
 // CHECK11-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP21]], 1
 // CHECK11-NEXT:    [[TMP22:%.*]] = zext i32 [[ADD]] to i64
-// CHECK11-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK11-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK11-NEXT:    store i32 2, ptr [[TMP23]], align 4
 // CHECK11-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -9786,7 +9792,6 @@ int main() {
 // CHECK11-NEXT:    [[TMP59:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_9]], align 4
 // CHECK11-NEXT:    [[ADD13:%.*]] = add nsw i32 [[TMP59]], 1
 // CHECK11-NEXT:    [[TMP60:%.*]] = zext i32 [[ADD13]] to i64
-// CHECK11-NEXT:    [[KERNEL_ARGS14:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK11-NEXT:    [[TMP61:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS14]], i32 0, i32 0
 // CHECK11-NEXT:    store i32 2, ptr [[TMP61]], align 4
 // CHECK11-NEXT:    [[TMP62:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS14]], i32 0, i32 1
@@ -9871,7 +9876,6 @@ int main() {
 // CHECK11-NEXT:    [[TMP102:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_23]], align 4
 // CHECK11-NEXT:    [[ADD27:%.*]] = add nsw i32 [[TMP102]], 1
 // CHECK11-NEXT:    [[TMP103:%.*]] = zext i32 [[ADD27]] to i64
-// CHECK11-NEXT:    [[KERNEL_ARGS28:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK11-NEXT:    [[TMP104:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS28]], i32 0, i32 0
 // CHECK11-NEXT:    store i32 2, ptr [[TMP104]], align 4
 // CHECK11-NEXT:    [[TMP105:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS28]], i32 0, i32 1
@@ -9947,7 +9951,6 @@ int main() {
 // CHECK11-NEXT:    [[TMP140:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_37]], align 4
 // CHECK11-NEXT:    [[ADD41:%.*]] = add nsw i32 [[TMP140]], 1
 // CHECK11-NEXT:    [[TMP141:%.*]] = zext i32 [[ADD41]] to i64
-// CHECK11-NEXT:    [[KERNEL_ARGS42:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK11-NEXT:    [[TMP142:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS42]], i32 0, i32 0
 // CHECK11-NEXT:    store i32 2, ptr [[TMP142]], align 4
 // CHECK11-NEXT:    [[TMP143:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS42]], i32 0, i32 1
@@ -10032,7 +10035,6 @@ int main() {
 // CHECK11-NEXT:    [[TMP183:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_52]], align 4
 // CHECK11-NEXT:    [[ADD56:%.*]] = add nsw i32 [[TMP183]], 1
 // CHECK11-NEXT:    [[TMP184:%.*]] = zext i32 [[ADD56]] to i64
-// CHECK11-NEXT:    [[KERNEL_ARGS57:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK11-NEXT:    [[TMP185:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS57]], i32 0, i32 0
 // CHECK11-NEXT:    store i32 2, ptr [[TMP185]], align 4
 // CHECK11-NEXT:    [[TMP186:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS57]], i32 0, i32 1
@@ -10108,7 +10110,6 @@ int main() {
 // CHECK11-NEXT:    [[TMP221:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_66]], align 4
 // CHECK11-NEXT:    [[ADD70:%.*]] = add nsw i32 [[TMP221]], 1
 // CHECK11-NEXT:    [[TMP222:%.*]] = zext i32 [[ADD70]] to i64
-// CHECK11-NEXT:    [[KERNEL_ARGS71:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK11-NEXT:    [[TMP223:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS71]], i32 0, i32 0
 // CHECK11-NEXT:    store i32 2, ptr [[TMP223]], align 4
 // CHECK11-NEXT:    [[TMP224:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS71]], i32 0, i32 1
@@ -10193,7 +10194,6 @@ int main() {
 // CHECK11-NEXT:    [[TMP264:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_81]], align 4
 // CHECK11-NEXT:    [[ADD85:%.*]] = add nsw i32 [[TMP264]], 1
 // CHECK11-NEXT:    [[TMP265:%.*]] = zext i32 [[ADD85]] to i64
-// CHECK11-NEXT:    [[KERNEL_ARGS86:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK11-NEXT:    [[TMP266:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS86]], i32 0, i32 0
 // CHECK11-NEXT:    store i32 2, ptr [[TMP266]], align 4
 // CHECK11-NEXT:    [[TMP267:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS86]], i32 0, i32 1
@@ -12037,6 +12037,7 @@ int main() {
 // CHECK11-NEXT:    [[TMP:%.*]] = alloca i32, align 4
 // CHECK11-NEXT:    [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
 // CHECK11-NEXT:    [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4
+// CHECK11-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK11-NEXT:    [[N_CASTED3:%.*]] = alloca i32, align 4
 // CHECK11-NEXT:    [[DOTOFFLOAD_BASEPTRS4:%.*]] = alloca [4 x ptr], align 4
 // CHECK11-NEXT:    [[DOTOFFLOAD_PTRS5:%.*]] = alloca [4 x ptr], align 4
@@ -12044,6 +12045,7 @@ int main() {
 // CHECK11-NEXT:    [[_TMP7:%.*]] = alloca i32, align 4
 // CHECK11-NEXT:    [[DOTCAPTURE_EXPR_8:%.*]] = alloca i32, align 4
 // CHECK11-NEXT:    [[DOTCAPTURE_EXPR_9:%.*]] = alloca i32, align 4
+// CHECK11-NEXT:    [[KERNEL_ARGS14:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK11-NEXT:    [[CH_CASTED:%.*]] = alloca i32, align 4
 // CHECK11-NEXT:    [[N_CASTED17:%.*]] = alloca i32, align 4
 // CHECK11-NEXT:    [[DOTOFFLOAD_BASEPTRS18:%.*]] = alloca [5 x ptr], align 4
@@ -12052,6 +12054,7 @@ int main() {
 // CHECK11-NEXT:    [[_TMP21:%.*]] = alloca i32, align 4
 // CHECK11-NEXT:    [[DOTCAPTURE_EXPR_22:%.*]] = alloca i32, align 4
 // CHECK11-NEXT:    [[DOTCAPTURE_EXPR_23:%.*]] = alloca i32, align 4
+// CHECK11-NEXT:    [[KERNEL_ARGS28:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK11-NEXT:    [[N_CASTED31:%.*]] = alloca i32, align 4
 // CHECK11-NEXT:    [[DOTOFFLOAD_BASEPTRS32:%.*]] = alloca [4 x ptr], align 4
 // CHECK11-NEXT:    [[DOTOFFLOAD_PTRS33:%.*]] = alloca [4 x ptr], align 4
@@ -12059,6 +12062,7 @@ int main() {
 // CHECK11-NEXT:    [[_TMP35:%.*]] = alloca i32, align 4
 // CHECK11-NEXT:    [[DOTCAPTURE_EXPR_36:%.*]] = alloca i32, align 4
 // CHECK11-NEXT:    [[DOTCAPTURE_EXPR_37:%.*]] = alloca i32, align 4
+// CHECK11-NEXT:    [[KERNEL_ARGS42:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK11-NEXT:    [[CH_CASTED45:%.*]] = alloca i32, align 4
 // CHECK11-NEXT:    [[N_CASTED46:%.*]] = alloca i32, align 4
 // CHECK11-NEXT:    [[DOTOFFLOAD_BASEPTRS47:%.*]] = alloca [5 x ptr], align 4
@@ -12067,6 +12071,7 @@ int main() {
 // CHECK11-NEXT:    [[_TMP50:%.*]] = alloca i32, align 4
 // CHECK11-NEXT:    [[DOTCAPTURE_EXPR_51:%.*]] = alloca i32, align 4
 // CHECK11-NEXT:    [[DOTCAPTURE_EXPR_52:%.*]] = alloca i32, align 4
+// CHECK11-NEXT:    [[KERNEL_ARGS57:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK11-NEXT:    [[N_CASTED60:%.*]] = alloca i32, align 4
 // CHECK11-NEXT:    [[DOTOFFLOAD_BASEPTRS61:%.*]] = alloca [4 x ptr], align 4
 // CHECK11-NEXT:    [[DOTOFFLOAD_PTRS62:%.*]] = alloca [4 x ptr], align 4
@@ -12074,6 +12079,7 @@ int main() {
 // CHECK11-NEXT:    [[_TMP64:%.*]] = alloca i32, align 4
 // CHECK11-NEXT:    [[DOTCAPTURE_EXPR_65:%.*]] = alloca i32, align 4
 // CHECK11-NEXT:    [[DOTCAPTURE_EXPR_66:%.*]] = alloca i32, align 4
+// CHECK11-NEXT:    [[KERNEL_ARGS71:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK11-NEXT:    [[CH_CASTED74:%.*]] = alloca i32, align 4
 // CHECK11-NEXT:    [[N_CASTED75:%.*]] = alloca i32, align 4
 // CHECK11-NEXT:    [[DOTOFFLOAD_BASEPTRS76:%.*]] = alloca [5 x ptr], align 4
@@ -12082,6 +12088,7 @@ int main() {
 // CHECK11-NEXT:    [[_TMP79:%.*]] = alloca i32, align 4
 // CHECK11-NEXT:    [[DOTCAPTURE_EXPR_80:%.*]] = alloca i32, align 4
 // CHECK11-NEXT:    [[DOTCAPTURE_EXPR_81:%.*]] = alloca i32, align 4
+// CHECK11-NEXT:    [[KERNEL_ARGS86:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK11-NEXT:    store i32 10000, ptr [[N]], align 4
 // CHECK11-NEXT:    store i32 100, ptr [[CH]], align 4
 // CHECK11-NEXT:    [[TMP0:%.*]] = load i32, ptr [[N]], align 4
@@ -12126,7 +12133,6 @@ int main() {
 // CHECK11-NEXT:    [[TMP21:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
 // CHECK11-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP21]], 1
 // CHECK11-NEXT:    [[TMP22:%.*]] = zext i32 [[ADD]] to i64
-// CHECK11-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK11-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK11-NEXT:    store i32 2, ptr [[TMP23]], align 4
 // CHECK11-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -12202,7 +12208,6 @@ int main() {
 // CHECK11-NEXT:    [[TMP59:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_9]], align 4
 // CHECK11-NEXT:    [[ADD13:%.*]] = add nsw i32 [[TMP59]], 1
 // CHECK11-NEXT:    [[TMP60:%.*]] = zext i32 [[ADD13]] to i64
-// CHECK11-NEXT:    [[KERNEL_ARGS14:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK11-NEXT:    [[TMP61:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS14]], i32 0, i32 0
 // CHECK11-NEXT:    store i32 2, ptr [[TMP61]], align 4
 // CHECK11-NEXT:    [[TMP62:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS14]], i32 0, i32 1
@@ -12287,7 +12292,6 @@ int main() {
 // CHECK11-NEXT:    [[TMP102:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_23]], align 4
 // CHECK11-NEXT:    [[ADD27:%.*]] = add nsw i32 [[TMP102]], 1
 // CHECK11-NEXT:    [[TMP103:%.*]] = zext i32 [[ADD27]] to i64
-// CHECK11-NEXT:    [[KERNEL_ARGS28:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK11-NEXT:    [[TMP104:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS28]], i32 0, i32 0
 // CHECK11-NEXT:    store i32 2, ptr [[TMP104]], align 4
 // CHECK11-NEXT:    [[TMP105:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS28]], i32 0, i32 1
@@ -12363,7 +12367,6 @@ int main() {
 // CHECK11-NEXT:    [[TMP140:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_37]], align 4
 // CHECK11-NEXT:    [[ADD41:%.*]] = add nsw i32 [[TMP140]], 1
 // CHECK11-NEXT:    [[TMP141:%.*]] = zext i32 [[ADD41]] to i64
-// CHECK11-NEXT:    [[KERNEL_ARGS42:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK11-NEXT:    [[TMP142:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS42]], i32 0, i32 0
 // CHECK11-NEXT:    store i32 2, ptr [[TMP142]], align 4
 // CHECK11-NEXT:    [[TMP143:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS42]], i32 0, i32 1
@@ -12448,7 +12451,6 @@ int main() {
 // CHECK11-NEXT:    [[TMP183:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_52]], align 4
 // CHECK11-NEXT:    [[ADD56:%.*]] = add nsw i32 [[TMP183]], 1
 // CHECK11-NEXT:    [[TMP184:%.*]] = zext i32 [[ADD56]] to i64
-// CHECK11-NEXT:    [[KERNEL_ARGS57:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK11-NEXT:    [[TMP185:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS57]], i32 0, i32 0
 // CHECK11-NEXT:    store i32 2, ptr [[TMP185]], align 4
 // CHECK11-NEXT:    [[TMP186:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS57]], i32 0, i32 1
@@ -12524,7 +12526,6 @@ int main() {
 // CHECK11-NEXT:    [[TMP221:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_66]], align 4
 // CHECK11-NEXT:    [[ADD70:%.*]] = add nsw i32 [[TMP221]], 1
 // CHECK11-NEXT:    [[TMP222:%.*]] = zext i32 [[ADD70]] to i64
-// CHECK11-NEXT:    [[KERNEL_ARGS71:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK11-NEXT:    [[TMP223:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS71]], i32 0, i32 0
 // CHECK11-NEXT:    store i32 2, ptr [[TMP223]], align 4
 // CHECK11-NEXT:    [[TMP224:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS71]], i32 0, i32 1
@@ -12609,7 +12610,6 @@ int main() {
 // CHECK11-NEXT:    [[TMP264:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_81]], align 4
 // CHECK11-NEXT:    [[ADD85:%.*]] = add nsw i32 [[TMP264]], 1
 // CHECK11-NEXT:    [[TMP265:%.*]] = zext i32 [[ADD85]] to i64
-// CHECK11-NEXT:    [[KERNEL_ARGS86:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK11-NEXT:    [[TMP266:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS86]], i32 0, i32 0
 // CHECK11-NEXT:    store i32 2, ptr [[TMP266]], align 4
 // CHECK11-NEXT:    [[TMP267:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS86]], i32 0, i32 1

diff  --git a/clang/test/OpenMP/distribute_parallel_for_simd_firstprivate_codegen.cpp b/clang/test/OpenMP/distribute_parallel_for_simd_firstprivate_codegen.cpp
index af7cddc3114c1..69890ffd7fec5 100644
--- a/clang/test/OpenMP/distribute_parallel_for_simd_firstprivate_codegen.cpp
+++ b/clang/test/OpenMP/distribute_parallel_for_simd_firstprivate_codegen.cpp
@@ -847,6 +847,7 @@ int main() {
 // CHECK8-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [5 x ptr], align 8
 // CHECK8-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [5 x ptr], align 8
 // CHECK8-NEXT:    [[_TMP1:%.*]] = alloca i32, align 4
+// CHECK8-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK8-NEXT:    store i32 0, ptr [[RETVAL]], align 4
 // CHECK8-NEXT:    store ptr [[G]], ptr [[G1]], align 8
 // CHECK8-NEXT:    call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[TEST]])
@@ -900,7 +901,6 @@ int main() {
 // CHECK8-NEXT:    store ptr null, ptr [[TMP22]], align 8
 // CHECK8-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK8-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK8-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK8-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK8-NEXT:    store i32 2, ptr [[TMP25]], align 4
 // CHECK8-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -1298,6 +1298,7 @@ int main() {
 // CHECK8-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [4 x ptr], align 8
 // CHECK8-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [4 x ptr], align 8
 // CHECK8-NEXT:    [[_TMP1:%.*]] = alloca i32, align 4
+// CHECK8-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK8-NEXT:    call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[TEST]])
 // CHECK8-NEXT:    store i32 0, ptr [[T_VAR]], align 4
 // CHECK8-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VEC]], ptr align 4 @__const._Z5tmainIiET_v.vec, i64 8, i1 false)
@@ -1340,7 +1341,6 @@ int main() {
 // CHECK8-NEXT:    store ptr null, ptr [[TMP17]], align 8
 // CHECK8-NEXT:    [[TMP18:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK8-NEXT:    [[TMP19:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK8-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK8-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK8-NEXT:    store i32 2, ptr [[TMP20]], align 4
 // CHECK8-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -1801,6 +1801,7 @@ int main() {
 // CHECK10-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [5 x ptr], align 4
 // CHECK10-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [5 x ptr], align 4
 // CHECK10-NEXT:    [[_TMP1:%.*]] = alloca i32, align 4
+// CHECK10-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK10-NEXT:    store i32 0, ptr [[RETVAL]], align 4
 // CHECK10-NEXT:    store ptr [[G]], ptr [[G1]], align 4
 // CHECK10-NEXT:    call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[TEST]])
@@ -1854,7 +1855,6 @@ int main() {
 // CHECK10-NEXT:    store ptr null, ptr [[TMP22]], align 4
 // CHECK10-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK10-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK10-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK10-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK10-NEXT:    store i32 2, ptr [[TMP25]], align 4
 // CHECK10-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -2246,6 +2246,7 @@ int main() {
 // CHECK10-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [4 x ptr], align 4
 // CHECK10-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [4 x ptr], align 4
 // CHECK10-NEXT:    [[_TMP1:%.*]] = alloca i32, align 4
+// CHECK10-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK10-NEXT:    call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[TEST]])
 // CHECK10-NEXT:    store i32 0, ptr [[T_VAR]], align 4
 // CHECK10-NEXT:    call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[VEC]], ptr align 4 @__const._Z5tmainIiET_v.vec, i32 8, i1 false)
@@ -2288,7 +2289,6 @@ int main() {
 // CHECK10-NEXT:    store ptr null, ptr [[TMP17]], align 4
 // CHECK10-NEXT:    [[TMP18:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK10-NEXT:    [[TMP19:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK10-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK10-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK10-NEXT:    store i32 2, ptr [[TMP20]], align 4
 // CHECK10-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1

diff  --git a/clang/test/OpenMP/distribute_parallel_for_simd_if_codegen.cpp b/clang/test/OpenMP/distribute_parallel_for_simd_if_codegen.cpp
index ad23bf5bdc640..27e0a19095984 100644
--- a/clang/test/OpenMP/distribute_parallel_for_simd_if_codegen.cpp
+++ b/clang/test/OpenMP/distribute_parallel_for_simd_if_codegen.cpp
@@ -121,8 +121,9 @@ int main() {
 // CHECK1-SAME: () #[[ATTR0:[0-9]+]] {
 // CHECK1-NEXT:  entry:
 // CHECK1-NEXT:    [[TMP:%.*]] = alloca i32, align 4
-// CHECK1-NEXT:    [[_TMP1:%.*]] = alloca i32, align 4
 // CHECK1-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
+// CHECK1-NEXT:    [[_TMP1:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[KERNEL_ARGS2:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK1-NEXT:    [[TMP0:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK1-NEXT:    store i32 2, ptr [[TMP0]], align 4
 // CHECK1-NEXT:    [[TMP1:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -156,7 +157,6 @@ int main() {
 // CHECK1-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9gtid_testv_l43() #[[ATTR2:[0-9]+]]
 // CHECK1-NEXT:    br label [[OMP_OFFLOAD_CONT]]
 // CHECK1:       omp_offload.cont:
-// CHECK1-NEXT:    [[KERNEL_ARGS2:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK1-NEXT:    [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS2]], i32 0, i32 0
 // CHECK1-NEXT:    store i32 2, ptr [[TMP15]], align 4
 // CHECK1-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS2]], i32 0, i32 1
@@ -506,14 +506,16 @@ int main() {
 // CHECK1-NEXT:  entry:
 // CHECK1-NEXT:    [[RETVAL:%.*]] = alloca i32, align 4
 // CHECK1-NEXT:    [[TMP:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK1-NEXT:    [[_TMP1:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[KERNEL_ARGS2:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK1-NEXT:    [[ARG_CASTED:%.*]] = alloca i64, align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [1 x ptr], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [1 x ptr], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [1 x ptr], align 8
 // CHECK1-NEXT:    [[_TMP5:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[KERNEL_ARGS6:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK1-NEXT:    store i32 0, ptr [[RETVAL]], align 4
-// CHECK1-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK1-NEXT:    [[TMP0:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK1-NEXT:    store i32 2, ptr [[TMP0]], align 4
 // CHECK1-NEXT:    [[TMP1:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -547,7 +549,6 @@ int main() {
 // CHECK1-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l81() #[[ATTR2]]
 // CHECK1-NEXT:    br label [[OMP_OFFLOAD_CONT]]
 // CHECK1:       omp_offload.cont:
-// CHECK1-NEXT:    [[KERNEL_ARGS2:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK1-NEXT:    [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS2]], i32 0, i32 0
 // CHECK1-NEXT:    store i32 2, ptr [[TMP15]], align 4
 // CHECK1-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS2]], i32 0, i32 1
@@ -592,7 +593,6 @@ int main() {
 // CHECK1-NEXT:    store ptr null, ptr [[TMP34]], align 8
 // CHECK1-NEXT:    [[TMP35:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK1-NEXT:    [[TMP36:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK1-NEXT:    [[KERNEL_ARGS6:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK1-NEXT:    [[TMP37:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 0
 // CHECK1-NEXT:    store i32 2, ptr [[TMP37]], align 4
 // CHECK1-NEXT:    [[TMP38:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 1
@@ -1116,14 +1116,16 @@ int main() {
 // CHECK1-NEXT:  entry:
 // CHECK1-NEXT:    [[ARG_ADDR:%.*]] = alloca i32, align 4
 // CHECK1-NEXT:    [[TMP:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK1-NEXT:    [[_TMP1:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[KERNEL_ARGS2:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK1-NEXT:    [[ARG_CASTED:%.*]] = alloca i64, align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [1 x ptr], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [1 x ptr], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [1 x ptr], align 8
 // CHECK1-NEXT:    [[_TMP5:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[KERNEL_ARGS6:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK1-NEXT:    store i32 [[ARG]], ptr [[ARG_ADDR]], align 4
-// CHECK1-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK1-NEXT:    [[TMP0:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK1-NEXT:    store i32 2, ptr [[TMP0]], align 4
 // CHECK1-NEXT:    [[TMP1:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -1157,7 +1159,6 @@ int main() {
 // CHECK1-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiEiT__l59() #[[ATTR2]]
 // CHECK1-NEXT:    br label [[OMP_OFFLOAD_CONT]]
 // CHECK1:       omp_offload.cont:
-// CHECK1-NEXT:    [[KERNEL_ARGS2:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK1-NEXT:    [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS2]], i32 0, i32 0
 // CHECK1-NEXT:    store i32 2, ptr [[TMP15]], align 4
 // CHECK1-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS2]], i32 0, i32 1
@@ -1202,7 +1203,6 @@ int main() {
 // CHECK1-NEXT:    store ptr null, ptr [[TMP34]], align 8
 // CHECK1-NEXT:    [[TMP35:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK1-NEXT:    [[TMP36:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK1-NEXT:    [[KERNEL_ARGS6:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK1-NEXT:    [[TMP37:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 0
 // CHECK1-NEXT:    store i32 2, ptr [[TMP37]], align 4
 // CHECK1-NEXT:    [[TMP38:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 1
@@ -1730,8 +1730,9 @@ int main() {
 // CHECK3-SAME: () #[[ATTR0:[0-9]+]] {
 // CHECK3-NEXT:  entry:
 // CHECK3-NEXT:    [[TMP:%.*]] = alloca i32, align 4
-// CHECK3-NEXT:    [[_TMP1:%.*]] = alloca i32, align 4
 // CHECK3-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
+// CHECK3-NEXT:    [[_TMP1:%.*]] = alloca i32, align 4
+// CHECK3-NEXT:    [[KERNEL_ARGS2:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK3-NEXT:    [[TMP0:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK3-NEXT:    store i32 2, ptr [[TMP0]], align 4
 // CHECK3-NEXT:    [[TMP1:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -1765,7 +1766,6 @@ int main() {
 // CHECK3-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9gtid_testv_l43() #[[ATTR2:[0-9]+]]
 // CHECK3-NEXT:    br label [[OMP_OFFLOAD_CONT]]
 // CHECK3:       omp_offload.cont:
-// CHECK3-NEXT:    [[KERNEL_ARGS2:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK3-NEXT:    [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS2]], i32 0, i32 0
 // CHECK3-NEXT:    store i32 2, ptr [[TMP15]], align 4
 // CHECK3-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS2]], i32 0, i32 1
@@ -2115,14 +2115,16 @@ int main() {
 // CHECK3-NEXT:  entry:
 // CHECK3-NEXT:    [[RETVAL:%.*]] = alloca i32, align 4
 // CHECK3-NEXT:    [[TMP:%.*]] = alloca i32, align 4
+// CHECK3-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK3-NEXT:    [[_TMP1:%.*]] = alloca i32, align 4
+// CHECK3-NEXT:    [[KERNEL_ARGS2:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK3-NEXT:    [[ARG_CASTED:%.*]] = alloca i64, align 8
 // CHECK3-NEXT:    [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [1 x ptr], align 8
 // CHECK3-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [1 x ptr], align 8
 // CHECK3-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [1 x ptr], align 8
 // CHECK3-NEXT:    [[_TMP5:%.*]] = alloca i32, align 4
+// CHECK3-NEXT:    [[KERNEL_ARGS6:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK3-NEXT:    store i32 0, ptr [[RETVAL]], align 4
-// CHECK3-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK3-NEXT:    [[TMP0:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK3-NEXT:    store i32 2, ptr [[TMP0]], align 4
 // CHECK3-NEXT:    [[TMP1:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -2156,7 +2158,6 @@ int main() {
 // CHECK3-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l81() #[[ATTR2]]
 // CHECK3-NEXT:    br label [[OMP_OFFLOAD_CONT]]
 // CHECK3:       omp_offload.cont:
-// CHECK3-NEXT:    [[KERNEL_ARGS2:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK3-NEXT:    [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS2]], i32 0, i32 0
 // CHECK3-NEXT:    store i32 2, ptr [[TMP15]], align 4
 // CHECK3-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS2]], i32 0, i32 1
@@ -2201,7 +2202,6 @@ int main() {
 // CHECK3-NEXT:    store ptr null, ptr [[TMP34]], align 8
 // CHECK3-NEXT:    [[TMP35:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK3-NEXT:    [[TMP36:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK3-NEXT:    [[KERNEL_ARGS6:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK3-NEXT:    [[TMP37:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 0
 // CHECK3-NEXT:    store i32 2, ptr [[TMP37]], align 4
 // CHECK3-NEXT:    [[TMP38:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 1
@@ -2960,14 +2960,16 @@ int main() {
 // CHECK3-NEXT:  entry:
 // CHECK3-NEXT:    [[ARG_ADDR:%.*]] = alloca i32, align 4
 // CHECK3-NEXT:    [[TMP:%.*]] = alloca i32, align 4
+// CHECK3-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK3-NEXT:    [[_TMP1:%.*]] = alloca i32, align 4
+// CHECK3-NEXT:    [[KERNEL_ARGS2:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK3-NEXT:    [[ARG_CASTED:%.*]] = alloca i64, align 8
 // CHECK3-NEXT:    [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [1 x ptr], align 8
 // CHECK3-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [1 x ptr], align 8
 // CHECK3-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [1 x ptr], align 8
 // CHECK3-NEXT:    [[_TMP5:%.*]] = alloca i32, align 4
+// CHECK3-NEXT:    [[KERNEL_ARGS6:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK3-NEXT:    store i32 [[ARG]], ptr [[ARG_ADDR]], align 4
-// CHECK3-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK3-NEXT:    [[TMP0:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK3-NEXT:    store i32 2, ptr [[TMP0]], align 4
 // CHECK3-NEXT:    [[TMP1:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -3001,7 +3003,6 @@ int main() {
 // CHECK3-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiEiT__l59() #[[ATTR2]]
 // CHECK3-NEXT:    br label [[OMP_OFFLOAD_CONT]]
 // CHECK3:       omp_offload.cont:
-// CHECK3-NEXT:    [[KERNEL_ARGS2:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK3-NEXT:    [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS2]], i32 0, i32 0
 // CHECK3-NEXT:    store i32 2, ptr [[TMP15]], align 4
 // CHECK3-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS2]], i32 0, i32 1
@@ -3046,7 +3047,6 @@ int main() {
 // CHECK3-NEXT:    store ptr null, ptr [[TMP34]], align 8
 // CHECK3-NEXT:    [[TMP35:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK3-NEXT:    [[TMP36:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK3-NEXT:    [[KERNEL_ARGS6:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK3-NEXT:    [[TMP37:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 0
 // CHECK3-NEXT:    store i32 2, ptr [[TMP37]], align 4
 // CHECK3-NEXT:    [[TMP38:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 1
@@ -4150,8 +4150,9 @@ int main() {
 // CHECK9-SAME: () #[[ATTR0:[0-9]+]] {
 // CHECK9-NEXT:  entry:
 // CHECK9-NEXT:    [[TMP:%.*]] = alloca i32, align 4
-// CHECK9-NEXT:    [[_TMP1:%.*]] = alloca i32, align 4
 // CHECK9-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
+// CHECK9-NEXT:    [[_TMP1:%.*]] = alloca i32, align 4
+// CHECK9-NEXT:    [[KERNEL_ARGS2:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK9-NEXT:    [[TMP0:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK9-NEXT:    store i32 2, ptr [[TMP0]], align 4
 // CHECK9-NEXT:    [[TMP1:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -4185,7 +4186,6 @@ int main() {
 // CHECK9-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9gtid_testv_l43() #[[ATTR2:[0-9]+]]
 // CHECK9-NEXT:    br label [[OMP_OFFLOAD_CONT]]
 // CHECK9:       omp_offload.cont:
-// CHECK9-NEXT:    [[KERNEL_ARGS2:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK9-NEXT:    [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS2]], i32 0, i32 0
 // CHECK9-NEXT:    store i32 2, ptr [[TMP15]], align 4
 // CHECK9-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS2]], i32 0, i32 1
@@ -4535,14 +4535,16 @@ int main() {
 // CHECK9-NEXT:  entry:
 // CHECK9-NEXT:    [[RETVAL:%.*]] = alloca i32, align 4
 // CHECK9-NEXT:    [[TMP:%.*]] = alloca i32, align 4
+// CHECK9-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK9-NEXT:    [[_TMP1:%.*]] = alloca i32, align 4
+// CHECK9-NEXT:    [[KERNEL_ARGS2:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK9-NEXT:    [[ARG_CASTED:%.*]] = alloca i64, align 8
 // CHECK9-NEXT:    [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [1 x ptr], align 8
 // CHECK9-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [1 x ptr], align 8
 // CHECK9-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [1 x ptr], align 8
 // CHECK9-NEXT:    [[_TMP5:%.*]] = alloca i32, align 4
+// CHECK9-NEXT:    [[KERNEL_ARGS6:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK9-NEXT:    store i32 0, ptr [[RETVAL]], align 4
-// CHECK9-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK9-NEXT:    [[TMP0:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK9-NEXT:    store i32 2, ptr [[TMP0]], align 4
 // CHECK9-NEXT:    [[TMP1:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -4576,7 +4578,6 @@ int main() {
 // CHECK9-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l81() #[[ATTR2]]
 // CHECK9-NEXT:    br label [[OMP_OFFLOAD_CONT]]
 // CHECK9:       omp_offload.cont:
-// CHECK9-NEXT:    [[KERNEL_ARGS2:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK9-NEXT:    [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS2]], i32 0, i32 0
 // CHECK9-NEXT:    store i32 2, ptr [[TMP15]], align 4
 // CHECK9-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS2]], i32 0, i32 1
@@ -4621,7 +4622,6 @@ int main() {
 // CHECK9-NEXT:    store ptr null, ptr [[TMP34]], align 8
 // CHECK9-NEXT:    [[TMP35:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK9-NEXT:    [[TMP36:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK9-NEXT:    [[KERNEL_ARGS6:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK9-NEXT:    [[TMP37:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 0
 // CHECK9-NEXT:    store i32 2, ptr [[TMP37]], align 4
 // CHECK9-NEXT:    [[TMP38:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 1
@@ -5145,14 +5145,16 @@ int main() {
 // CHECK9-NEXT:  entry:
 // CHECK9-NEXT:    [[ARG_ADDR:%.*]] = alloca i32, align 4
 // CHECK9-NEXT:    [[TMP:%.*]] = alloca i32, align 4
+// CHECK9-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK9-NEXT:    [[_TMP1:%.*]] = alloca i32, align 4
+// CHECK9-NEXT:    [[KERNEL_ARGS2:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK9-NEXT:    [[ARG_CASTED:%.*]] = alloca i64, align 8
 // CHECK9-NEXT:    [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [1 x ptr], align 8
 // CHECK9-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [1 x ptr], align 8
 // CHECK9-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [1 x ptr], align 8
 // CHECK9-NEXT:    [[_TMP5:%.*]] = alloca i32, align 4
+// CHECK9-NEXT:    [[KERNEL_ARGS6:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK9-NEXT:    store i32 [[ARG]], ptr [[ARG_ADDR]], align 4
-// CHECK9-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK9-NEXT:    [[TMP0:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK9-NEXT:    store i32 2, ptr [[TMP0]], align 4
 // CHECK9-NEXT:    [[TMP1:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -5186,7 +5188,6 @@ int main() {
 // CHECK9-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiEiT__l59() #[[ATTR2]]
 // CHECK9-NEXT:    br label [[OMP_OFFLOAD_CONT]]
 // CHECK9:       omp_offload.cont:
-// CHECK9-NEXT:    [[KERNEL_ARGS2:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK9-NEXT:    [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS2]], i32 0, i32 0
 // CHECK9-NEXT:    store i32 2, ptr [[TMP15]], align 4
 // CHECK9-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS2]], i32 0, i32 1
@@ -5231,7 +5232,6 @@ int main() {
 // CHECK9-NEXT:    store ptr null, ptr [[TMP34]], align 8
 // CHECK9-NEXT:    [[TMP35:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK9-NEXT:    [[TMP36:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK9-NEXT:    [[KERNEL_ARGS6:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK9-NEXT:    [[TMP37:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 0
 // CHECK9-NEXT:    store i32 2, ptr [[TMP37]], align 4
 // CHECK9-NEXT:    [[TMP38:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 1
@@ -5759,8 +5759,9 @@ int main() {
 // CHECK11-SAME: () #[[ATTR0:[0-9]+]] {
 // CHECK11-NEXT:  entry:
 // CHECK11-NEXT:    [[TMP:%.*]] = alloca i32, align 4
-// CHECK11-NEXT:    [[_TMP1:%.*]] = alloca i32, align 4
 // CHECK11-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
+// CHECK11-NEXT:    [[_TMP1:%.*]] = alloca i32, align 4
+// CHECK11-NEXT:    [[KERNEL_ARGS2:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK11-NEXT:    [[TMP0:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK11-NEXT:    store i32 2, ptr [[TMP0]], align 4
 // CHECK11-NEXT:    [[TMP1:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -5794,7 +5795,6 @@ int main() {
 // CHECK11-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9gtid_testv_l43() #[[ATTR2:[0-9]+]]
 // CHECK11-NEXT:    br label [[OMP_OFFLOAD_CONT]]
 // CHECK11:       omp_offload.cont:
-// CHECK11-NEXT:    [[KERNEL_ARGS2:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK11-NEXT:    [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS2]], i32 0, i32 0
 // CHECK11-NEXT:    store i32 2, ptr [[TMP15]], align 4
 // CHECK11-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS2]], i32 0, i32 1
@@ -6144,14 +6144,16 @@ int main() {
 // CHECK11-NEXT:  entry:
 // CHECK11-NEXT:    [[RETVAL:%.*]] = alloca i32, align 4
 // CHECK11-NEXT:    [[TMP:%.*]] = alloca i32, align 4
+// CHECK11-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK11-NEXT:    [[_TMP1:%.*]] = alloca i32, align 4
+// CHECK11-NEXT:    [[KERNEL_ARGS2:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK11-NEXT:    [[ARG_CASTED:%.*]] = alloca i64, align 8
 // CHECK11-NEXT:    [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [1 x ptr], align 8
 // CHECK11-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [1 x ptr], align 8
 // CHECK11-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [1 x ptr], align 8
 // CHECK11-NEXT:    [[_TMP5:%.*]] = alloca i32, align 4
+// CHECK11-NEXT:    [[KERNEL_ARGS6:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK11-NEXT:    store i32 0, ptr [[RETVAL]], align 4
-// CHECK11-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK11-NEXT:    [[TMP0:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK11-NEXT:    store i32 2, ptr [[TMP0]], align 4
 // CHECK11-NEXT:    [[TMP1:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -6185,7 +6187,6 @@ int main() {
 // CHECK11-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l81() #[[ATTR2]]
 // CHECK11-NEXT:    br label [[OMP_OFFLOAD_CONT]]
 // CHECK11:       omp_offload.cont:
-// CHECK11-NEXT:    [[KERNEL_ARGS2:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK11-NEXT:    [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS2]], i32 0, i32 0
 // CHECK11-NEXT:    store i32 2, ptr [[TMP15]], align 4
 // CHECK11-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS2]], i32 0, i32 1
@@ -6230,7 +6231,6 @@ int main() {
 // CHECK11-NEXT:    store ptr null, ptr [[TMP34]], align 8
 // CHECK11-NEXT:    [[TMP35:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK11-NEXT:    [[TMP36:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK11-NEXT:    [[KERNEL_ARGS6:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK11-NEXT:    [[TMP37:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 0
 // CHECK11-NEXT:    store i32 2, ptr [[TMP37]], align 4
 // CHECK11-NEXT:    [[TMP38:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 1
@@ -6989,14 +6989,16 @@ int main() {
 // CHECK11-NEXT:  entry:
 // CHECK11-NEXT:    [[ARG_ADDR:%.*]] = alloca i32, align 4
 // CHECK11-NEXT:    [[TMP:%.*]] = alloca i32, align 4
+// CHECK11-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK11-NEXT:    [[_TMP1:%.*]] = alloca i32, align 4
+// CHECK11-NEXT:    [[KERNEL_ARGS2:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK11-NEXT:    [[ARG_CASTED:%.*]] = alloca i64, align 8
 // CHECK11-NEXT:    [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [1 x ptr], align 8
 // CHECK11-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [1 x ptr], align 8
 // CHECK11-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [1 x ptr], align 8
 // CHECK11-NEXT:    [[_TMP5:%.*]] = alloca i32, align 4
+// CHECK11-NEXT:    [[KERNEL_ARGS6:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK11-NEXT:    store i32 [[ARG]], ptr [[ARG_ADDR]], align 4
-// CHECK11-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK11-NEXT:    [[TMP0:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK11-NEXT:    store i32 2, ptr [[TMP0]], align 4
 // CHECK11-NEXT:    [[TMP1:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -7030,7 +7032,6 @@ int main() {
 // CHECK11-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiEiT__l59() #[[ATTR2]]
 // CHECK11-NEXT:    br label [[OMP_OFFLOAD_CONT]]
 // CHECK11:       omp_offload.cont:
-// CHECK11-NEXT:    [[KERNEL_ARGS2:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK11-NEXT:    [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS2]], i32 0, i32 0
 // CHECK11-NEXT:    store i32 2, ptr [[TMP15]], align 4
 // CHECK11-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS2]], i32 0, i32 1
@@ -7075,7 +7076,6 @@ int main() {
 // CHECK11-NEXT:    store ptr null, ptr [[TMP34]], align 8
 // CHECK11-NEXT:    [[TMP35:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK11-NEXT:    [[TMP36:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK11-NEXT:    [[KERNEL_ARGS6:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK11-NEXT:    [[TMP37:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 0
 // CHECK11-NEXT:    store i32 2, ptr [[TMP37]], align 4
 // CHECK11-NEXT:    [[TMP38:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 1

diff  --git a/clang/test/OpenMP/distribute_parallel_for_simd_lastprivate_codegen.cpp b/clang/test/OpenMP/distribute_parallel_for_simd_lastprivate_codegen.cpp
index 78e461c7ce40e..b9ab7ace7968e 100644
--- a/clang/test/OpenMP/distribute_parallel_for_simd_lastprivate_codegen.cpp
+++ b/clang/test/OpenMP/distribute_parallel_for_simd_lastprivate_codegen.cpp
@@ -826,6 +826,7 @@ int main() {
 // CHECK9-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [5 x ptr], align 8
 // CHECK9-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [5 x ptr], align 8
 // CHECK9-NEXT:    [[_TMP1:%.*]] = alloca i32, align 4
+// CHECK9-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK9-NEXT:    [[I:%.*]] = alloca i32, align 4
 // CHECK9-NEXT:    store i32 0, ptr [[RETVAL]], align 4
 // CHECK9-NEXT:    store ptr [[G]], ptr [[G1]], align 8
@@ -880,7 +881,6 @@ int main() {
 // CHECK9-NEXT:    store ptr null, ptr [[TMP22]], align 8
 // CHECK9-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK9-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK9-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK9-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK9-NEXT:    store i32 2, ptr [[TMP25]], align 4
 // CHECK9-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -1314,6 +1314,7 @@ int main() {
 // CHECK9-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [4 x ptr], align 8
 // CHECK9-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [4 x ptr], align 8
 // CHECK9-NEXT:    [[_TMP1:%.*]] = alloca i32, align 4
+// CHECK9-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK9-NEXT:    call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[TEST]])
 // CHECK9-NEXT:    store i32 0, ptr [[T_VAR]], align 4
 // CHECK9-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VEC]], ptr align 4 @__const._Z5tmainIiET_v.vec, i64 8, i1 false)
@@ -1356,7 +1357,6 @@ int main() {
 // CHECK9-NEXT:    store ptr null, ptr [[TMP17]], align 8
 // CHECK9-NEXT:    [[TMP18:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK9-NEXT:    [[TMP19:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK9-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK9-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK9-NEXT:    store i32 2, ptr [[TMP20]], align 4
 // CHECK9-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -1853,6 +1853,7 @@ int main() {
 // CHECK11-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [5 x ptr], align 4
 // CHECK11-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [5 x ptr], align 4
 // CHECK11-NEXT:    [[_TMP1:%.*]] = alloca i32, align 4
+// CHECK11-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK11-NEXT:    [[I:%.*]] = alloca i32, align 4
 // CHECK11-NEXT:    store i32 0, ptr [[RETVAL]], align 4
 // CHECK11-NEXT:    store ptr [[G]], ptr [[G1]], align 4
@@ -1907,7 +1908,6 @@ int main() {
 // CHECK11-NEXT:    store ptr null, ptr [[TMP22]], align 4
 // CHECK11-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK11-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK11-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK11-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK11-NEXT:    store i32 2, ptr [[TMP25]], align 4
 // CHECK11-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -2335,6 +2335,7 @@ int main() {
 // CHECK11-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [4 x ptr], align 4
 // CHECK11-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [4 x ptr], align 4
 // CHECK11-NEXT:    [[_TMP1:%.*]] = alloca i32, align 4
+// CHECK11-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK11-NEXT:    call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[TEST]])
 // CHECK11-NEXT:    store i32 0, ptr [[T_VAR]], align 4
 // CHECK11-NEXT:    call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[VEC]], ptr align 4 @__const._Z5tmainIiET_v.vec, i32 8, i1 false)
@@ -2377,7 +2378,6 @@ int main() {
 // CHECK11-NEXT:    store ptr null, ptr [[TMP17]], align 4
 // CHECK11-NEXT:    [[TMP18:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK11-NEXT:    [[TMP19:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK11-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK11-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK11-NEXT:    store i32 2, ptr [[TMP20]], align 4
 // CHECK11-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1

diff  --git a/clang/test/OpenMP/distribute_parallel_for_simd_num_threads_codegen.cpp b/clang/test/OpenMP/distribute_parallel_for_simd_num_threads_codegen.cpp
index c2d6814d62ee2..de369bf417459 100644
--- a/clang/test/OpenMP/distribute_parallel_for_simd_num_threads_codegen.cpp
+++ b/clang/test/OpenMP/distribute_parallel_for_simd_num_threads_codegen.cpp
@@ -102,18 +102,19 @@ int main() {
 // CHECK1-NEXT:    [[EXN_SLOT:%.*]] = alloca ptr, align 8
 // CHECK1-NEXT:    [[EHSELECTOR_SLOT:%.*]] = alloca i32, align 4
 // CHECK1-NEXT:    [[TMP:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK1-NEXT:    [[A_CASTED:%.*]] = alloca i64, align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [1 x ptr], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [1 x ptr], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [1 x ptr], align 8
 // CHECK1-NEXT:    [[_TMP1:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[KERNEL_ARGS2:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK1-NEXT:    store i32 0, ptr [[RETVAL]], align 4
 // CHECK1-NEXT:    call void @_ZN1SC1El(ptr nonnull align 8 dereferenceable(24) [[S]], i64 0)
 // CHECK1-NEXT:    [[CALL:%.*]] = invoke signext i8 @_ZN1ScvcEv(ptr nonnull align 8 dereferenceable(24) [[S]])
 // CHECK1-NEXT:    to label [[INVOKE_CONT:%.*]] unwind label [[LPAD:%.*]]
 // CHECK1:       invoke.cont:
 // CHECK1-NEXT:    store i8 [[CALL]], ptr [[A]], align 1
-// CHECK1-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK1-NEXT:    [[TMP0:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK1-NEXT:    store i32 2, ptr [[TMP0]], align 4
 // CHECK1-NEXT:    [[TMP1:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -167,7 +168,6 @@ int main() {
 // CHECK1-NEXT:    store ptr null, ptr [[TMP22]], align 8
 // CHECK1-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK1-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK1-NEXT:    [[KERNEL_ARGS2:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK1-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS2]], i32 0, i32 0
 // CHECK1-NEXT:    store i32 2, ptr [[TMP25]], align 4
 // CHECK1-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS2]], i32 0, i32 1
@@ -589,8 +589,9 @@ int main() {
 // CHECK1-SAME: () #[[ATTR7:[0-9]+]] comdat {
 // CHECK1-NEXT:  entry:
 // CHECK1-NEXT:    [[TMP:%.*]] = alloca i32, align 4
-// CHECK1-NEXT:    [[_TMP1:%.*]] = alloca i32, align 4
 // CHECK1-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
+// CHECK1-NEXT:    [[_TMP1:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[KERNEL_ARGS2:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK1-NEXT:    [[TMP0:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK1-NEXT:    store i32 2, ptr [[TMP0]], align 4
 // CHECK1-NEXT:    [[TMP1:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -624,7 +625,6 @@ int main() {
 // CHECK1-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIcLi5EEiv_l52() #[[ATTR6]]
 // CHECK1-NEXT:    br label [[OMP_OFFLOAD_CONT]]
 // CHECK1:       omp_offload.cont:
-// CHECK1-NEXT:    [[KERNEL_ARGS2:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK1-NEXT:    [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS2]], i32 0, i32 0
 // CHECK1-NEXT:    store i32 2, ptr [[TMP15]], align 4
 // CHECK1-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS2]], i32 0, i32 1
@@ -665,8 +665,9 @@ int main() {
 // CHECK1-SAME: () #[[ATTR7]] comdat {
 // CHECK1-NEXT:  entry:
 // CHECK1-NEXT:    [[TMP:%.*]] = alloca i32, align 4
-// CHECK1-NEXT:    [[_TMP1:%.*]] = alloca i32, align 4
 // CHECK1-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
+// CHECK1-NEXT:    [[_TMP1:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[KERNEL_ARGS2:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK1-NEXT:    [[TMP0:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK1-NEXT:    store i32 2, ptr [[TMP0]], align 4
 // CHECK1-NEXT:    [[TMP1:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -700,7 +701,6 @@ int main() {
 // CHECK1-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainI1SLi1EEiv_l52() #[[ATTR6]]
 // CHECK1-NEXT:    br label [[OMP_OFFLOAD_CONT]]
 // CHECK1:       omp_offload.cont:
-// CHECK1-NEXT:    [[KERNEL_ARGS2:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK1-NEXT:    [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS2]], i32 0, i32 0
 // CHECK1-NEXT:    store i32 2, ptr [[TMP15]], align 4
 // CHECK1-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS2]], i32 0, i32 1
@@ -1784,18 +1784,19 @@ int main() {
 // CHECK5-NEXT:    [[EXN_SLOT:%.*]] = alloca ptr, align 8
 // CHECK5-NEXT:    [[EHSELECTOR_SLOT:%.*]] = alloca i32, align 4
 // CHECK5-NEXT:    [[TMP:%.*]] = alloca i32, align 4
+// CHECK5-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK5-NEXT:    [[A_CASTED:%.*]] = alloca i64, align 8
 // CHECK5-NEXT:    [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [1 x ptr], align 8
 // CHECK5-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [1 x ptr], align 8
 // CHECK5-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [1 x ptr], align 8
 // CHECK5-NEXT:    [[_TMP1:%.*]] = alloca i32, align 4
+// CHECK5-NEXT:    [[KERNEL_ARGS2:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK5-NEXT:    store i32 0, ptr [[RETVAL]], align 4
 // CHECK5-NEXT:    call void @_ZN1SC1El(ptr nonnull align 8 dereferenceable(24) [[S]], i64 0)
 // CHECK5-NEXT:    [[CALL:%.*]] = invoke signext i8 @_ZN1ScvcEv(ptr nonnull align 8 dereferenceable(24) [[S]])
 // CHECK5-NEXT:    to label [[INVOKE_CONT:%.*]] unwind label [[LPAD:%.*]]
 // CHECK5:       invoke.cont:
 // CHECK5-NEXT:    store i8 [[CALL]], ptr [[A]], align 1
-// CHECK5-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK5-NEXT:    [[TMP0:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK5-NEXT:    store i32 2, ptr [[TMP0]], align 4
 // CHECK5-NEXT:    [[TMP1:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -1849,7 +1850,6 @@ int main() {
 // CHECK5-NEXT:    store ptr null, ptr [[TMP22]], align 8
 // CHECK5-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK5-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK5-NEXT:    [[KERNEL_ARGS2:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK5-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS2]], i32 0, i32 0
 // CHECK5-NEXT:    store i32 2, ptr [[TMP25]], align 4
 // CHECK5-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS2]], i32 0, i32 1
@@ -2271,8 +2271,9 @@ int main() {
 // CHECK5-SAME: () #[[ATTR7:[0-9]+]] comdat {
 // CHECK5-NEXT:  entry:
 // CHECK5-NEXT:    [[TMP:%.*]] = alloca i32, align 4
-// CHECK5-NEXT:    [[_TMP1:%.*]] = alloca i32, align 4
 // CHECK5-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
+// CHECK5-NEXT:    [[_TMP1:%.*]] = alloca i32, align 4
+// CHECK5-NEXT:    [[KERNEL_ARGS2:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK5-NEXT:    [[TMP0:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK5-NEXT:    store i32 2, ptr [[TMP0]], align 4
 // CHECK5-NEXT:    [[TMP1:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -2306,7 +2307,6 @@ int main() {
 // CHECK5-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIcLi5EEiv_l52() #[[ATTR6]]
 // CHECK5-NEXT:    br label [[OMP_OFFLOAD_CONT]]
 // CHECK5:       omp_offload.cont:
-// CHECK5-NEXT:    [[KERNEL_ARGS2:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK5-NEXT:    [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS2]], i32 0, i32 0
 // CHECK5-NEXT:    store i32 2, ptr [[TMP15]], align 4
 // CHECK5-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS2]], i32 0, i32 1
@@ -2347,8 +2347,9 @@ int main() {
 // CHECK5-SAME: () #[[ATTR7]] comdat {
 // CHECK5-NEXT:  entry:
 // CHECK5-NEXT:    [[TMP:%.*]] = alloca i32, align 4
-// CHECK5-NEXT:    [[_TMP1:%.*]] = alloca i32, align 4
 // CHECK5-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
+// CHECK5-NEXT:    [[_TMP1:%.*]] = alloca i32, align 4
+// CHECK5-NEXT:    [[KERNEL_ARGS2:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK5-NEXT:    [[TMP0:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK5-NEXT:    store i32 2, ptr [[TMP0]], align 4
 // CHECK5-NEXT:    [[TMP1:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -2382,7 +2383,6 @@ int main() {
 // CHECK5-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainI1SLi1EEiv_l52() #[[ATTR6]]
 // CHECK5-NEXT:    br label [[OMP_OFFLOAD_CONT]]
 // CHECK5:       omp_offload.cont:
-// CHECK5-NEXT:    [[KERNEL_ARGS2:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK5-NEXT:    [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS2]], i32 0, i32 0
 // CHECK5-NEXT:    store i32 2, ptr [[TMP15]], align 4
 // CHECK5-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS2]], i32 0, i32 1
@@ -3127,18 +3127,19 @@ int main() {
 // CHECK9-NEXT:    [[EXN_SLOT:%.*]] = alloca ptr, align 8
 // CHECK9-NEXT:    [[EHSELECTOR_SLOT:%.*]] = alloca i32, align 4
 // CHECK9-NEXT:    [[TMP:%.*]] = alloca i32, align 4
+// CHECK9-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK9-NEXT:    [[A_CASTED:%.*]] = alloca i64, align 8
 // CHECK9-NEXT:    [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [1 x ptr], align 8
 // CHECK9-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [1 x ptr], align 8
 // CHECK9-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [1 x ptr], align 8
 // CHECK9-NEXT:    [[_TMP1:%.*]] = alloca i32, align 4
+// CHECK9-NEXT:    [[KERNEL_ARGS2:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK9-NEXT:    store i32 0, ptr [[RETVAL]], align 4
 // CHECK9-NEXT:    call void @_ZN1SC1El(ptr nonnull align 8 dereferenceable(24) [[S]], i64 0)
 // CHECK9-NEXT:    [[CALL:%.*]] = invoke i8 @_ZN1ScvcEv(ptr nonnull align 8 dereferenceable(24) [[S]])
 // CHECK9-NEXT:    to label [[INVOKE_CONT:%.*]] unwind label [[LPAD:%.*]]
 // CHECK9:       invoke.cont:
 // CHECK9-NEXT:    store i8 [[CALL]], ptr [[A]], align 1
-// CHECK9-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK9-NEXT:    [[TMP0:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK9-NEXT:    store i32 2, ptr [[TMP0]], align 4
 // CHECK9-NEXT:    [[TMP1:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -3192,7 +3193,6 @@ int main() {
 // CHECK9-NEXT:    store ptr null, ptr [[TMP22]], align 8
 // CHECK9-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK9-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK9-NEXT:    [[KERNEL_ARGS2:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK9-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS2]], i32 0, i32 0
 // CHECK9-NEXT:    store i32 2, ptr [[TMP25]], align 4
 // CHECK9-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS2]], i32 0, i32 1
@@ -3614,8 +3614,9 @@ int main() {
 // CHECK9-SAME: () #[[ATTR7:[0-9]+]] comdat {
 // CHECK9-NEXT:  entry:
 // CHECK9-NEXT:    [[TMP:%.*]] = alloca i32, align 4
-// CHECK9-NEXT:    [[_TMP1:%.*]] = alloca i32, align 4
 // CHECK9-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
+// CHECK9-NEXT:    [[_TMP1:%.*]] = alloca i32, align 4
+// CHECK9-NEXT:    [[KERNEL_ARGS2:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK9-NEXT:    [[TMP0:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK9-NEXT:    store i32 2, ptr [[TMP0]], align 4
 // CHECK9-NEXT:    [[TMP1:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -3649,7 +3650,6 @@ int main() {
 // CHECK9-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIcLi5EEiv_l52() #[[ATTR6]]
 // CHECK9-NEXT:    br label [[OMP_OFFLOAD_CONT]]
 // CHECK9:       omp_offload.cont:
-// CHECK9-NEXT:    [[KERNEL_ARGS2:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK9-NEXT:    [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS2]], i32 0, i32 0
 // CHECK9-NEXT:    store i32 2, ptr [[TMP15]], align 4
 // CHECK9-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS2]], i32 0, i32 1
@@ -3690,8 +3690,9 @@ int main() {
 // CHECK9-SAME: () #[[ATTR7]] comdat {
 // CHECK9-NEXT:  entry:
 // CHECK9-NEXT:    [[TMP:%.*]] = alloca i32, align 4
-// CHECK9-NEXT:    [[_TMP1:%.*]] = alloca i32, align 4
 // CHECK9-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
+// CHECK9-NEXT:    [[_TMP1:%.*]] = alloca i32, align 4
+// CHECK9-NEXT:    [[KERNEL_ARGS2:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK9-NEXT:    [[TMP0:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK9-NEXT:    store i32 2, ptr [[TMP0]], align 4
 // CHECK9-NEXT:    [[TMP1:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -3725,7 +3726,6 @@ int main() {
 // CHECK9-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainI1SLi1EEiv_l52() #[[ATTR6]]
 // CHECK9-NEXT:    br label [[OMP_OFFLOAD_CONT]]
 // CHECK9:       omp_offload.cont:
-// CHECK9-NEXT:    [[KERNEL_ARGS2:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK9-NEXT:    [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS2]], i32 0, i32 0
 // CHECK9-NEXT:    store i32 2, ptr [[TMP15]], align 4
 // CHECK9-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS2]], i32 0, i32 1
@@ -4809,18 +4809,19 @@ int main() {
 // CHECK13-NEXT:    [[EXN_SLOT:%.*]] = alloca ptr, align 8
 // CHECK13-NEXT:    [[EHSELECTOR_SLOT:%.*]] = alloca i32, align 4
 // CHECK13-NEXT:    [[TMP:%.*]] = alloca i32, align 4
+// CHECK13-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK13-NEXT:    [[A_CASTED:%.*]] = alloca i64, align 8
 // CHECK13-NEXT:    [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [1 x ptr], align 8
 // CHECK13-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [1 x ptr], align 8
 // CHECK13-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [1 x ptr], align 8
 // CHECK13-NEXT:    [[_TMP1:%.*]] = alloca i32, align 4
+// CHECK13-NEXT:    [[KERNEL_ARGS2:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK13-NEXT:    store i32 0, ptr [[RETVAL]], align 4
 // CHECK13-NEXT:    call void @_ZN1SC1El(ptr nonnull align 8 dereferenceable(24) [[S]], i64 0)
 // CHECK13-NEXT:    [[CALL:%.*]] = invoke i8 @_ZN1ScvcEv(ptr nonnull align 8 dereferenceable(24) [[S]])
 // CHECK13-NEXT:    to label [[INVOKE_CONT:%.*]] unwind label [[LPAD:%.*]]
 // CHECK13:       invoke.cont:
 // CHECK13-NEXT:    store i8 [[CALL]], ptr [[A]], align 1
-// CHECK13-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK13-NEXT:    [[TMP0:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK13-NEXT:    store i32 2, ptr [[TMP0]], align 4
 // CHECK13-NEXT:    [[TMP1:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -4874,7 +4875,6 @@ int main() {
 // CHECK13-NEXT:    store ptr null, ptr [[TMP22]], align 8
 // CHECK13-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK13-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK13-NEXT:    [[KERNEL_ARGS2:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK13-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS2]], i32 0, i32 0
 // CHECK13-NEXT:    store i32 2, ptr [[TMP25]], align 4
 // CHECK13-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS2]], i32 0, i32 1
@@ -5296,8 +5296,9 @@ int main() {
 // CHECK13-SAME: () #[[ATTR7:[0-9]+]] comdat {
 // CHECK13-NEXT:  entry:
 // CHECK13-NEXT:    [[TMP:%.*]] = alloca i32, align 4
-// CHECK13-NEXT:    [[_TMP1:%.*]] = alloca i32, align 4
 // CHECK13-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
+// CHECK13-NEXT:    [[_TMP1:%.*]] = alloca i32, align 4
+// CHECK13-NEXT:    [[KERNEL_ARGS2:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK13-NEXT:    [[TMP0:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK13-NEXT:    store i32 2, ptr [[TMP0]], align 4
 // CHECK13-NEXT:    [[TMP1:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -5331,7 +5332,6 @@ int main() {
 // CHECK13-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIcLi5EEiv_l52() #[[ATTR6]]
 // CHECK13-NEXT:    br label [[OMP_OFFLOAD_CONT]]
 // CHECK13:       omp_offload.cont:
-// CHECK13-NEXT:    [[KERNEL_ARGS2:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK13-NEXT:    [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS2]], i32 0, i32 0
 // CHECK13-NEXT:    store i32 2, ptr [[TMP15]], align 4
 // CHECK13-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS2]], i32 0, i32 1
@@ -5372,8 +5372,9 @@ int main() {
 // CHECK13-SAME: () #[[ATTR7]] comdat {
 // CHECK13-NEXT:  entry:
 // CHECK13-NEXT:    [[TMP:%.*]] = alloca i32, align 4
-// CHECK13-NEXT:    [[_TMP1:%.*]] = alloca i32, align 4
 // CHECK13-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
+// CHECK13-NEXT:    [[_TMP1:%.*]] = alloca i32, align 4
+// CHECK13-NEXT:    [[KERNEL_ARGS2:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK13-NEXT:    [[TMP0:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK13-NEXT:    store i32 2, ptr [[TMP0]], align 4
 // CHECK13-NEXT:    [[TMP1:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -5407,7 +5408,6 @@ int main() {
 // CHECK13-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainI1SLi1EEiv_l52() #[[ATTR6]]
 // CHECK13-NEXT:    br label [[OMP_OFFLOAD_CONT]]
 // CHECK13:       omp_offload.cont:
-// CHECK13-NEXT:    [[KERNEL_ARGS2:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK13-NEXT:    [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS2]], i32 0, i32 0
 // CHECK13-NEXT:    store i32 2, ptr [[TMP15]], align 4
 // CHECK13-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS2]], i32 0, i32 1

diff  --git a/clang/test/OpenMP/distribute_parallel_for_simd_private_codegen.cpp b/clang/test/OpenMP/distribute_parallel_for_simd_private_codegen.cpp
index 0816e513772fe..7696d4ab1ff51 100644
--- a/clang/test/OpenMP/distribute_parallel_for_simd_private_codegen.cpp
+++ b/clang/test/OpenMP/distribute_parallel_for_simd_private_codegen.cpp
@@ -576,6 +576,7 @@ int main() {
 // CHECK9-NEXT:    [[VAR:%.*]] = alloca ptr, align 8
 // CHECK9-NEXT:    [[TMP:%.*]] = alloca i32, align 4
 // CHECK9-NEXT:    [[_TMP1:%.*]] = alloca ptr, align 8
+// CHECK9-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK9-NEXT:    store i32 0, ptr [[RETVAL]], align 4
 // CHECK9-NEXT:    store ptr [[G]], ptr [[G1]], align 8
 // CHECK9-NEXT:    call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[TEST]])
@@ -587,7 +588,6 @@ int main() {
 // CHECK9-NEXT:    call void @_ZN1SIfEC1Ef(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_ELEMENT]], float noundef 2.000000e+00)
 // CHECK9-NEXT:    store ptr [[TEST]], ptr [[VAR]], align 8
 // CHECK9-NEXT:    store ptr undef, ptr [[_TMP1]], align 8
-// CHECK9-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK9-NEXT:    [[TMP0:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK9-NEXT:    store i32 2, ptr [[TMP0]], align 4
 // CHECK9-NEXT:    [[TMP1:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -913,6 +913,7 @@ int main() {
 // CHECK9-NEXT:    [[VAR:%.*]] = alloca ptr, align 8
 // CHECK9-NEXT:    [[TMP:%.*]] = alloca i32, align 4
 // CHECK9-NEXT:    [[_TMP1:%.*]] = alloca ptr, align 8
+// CHECK9-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK9-NEXT:    call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[TEST]])
 // CHECK9-NEXT:    store i32 0, ptr [[T_VAR]], align 4
 // CHECK9-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VEC]], ptr align 4 @__const._Z5tmainIiET_v.vec, i64 8, i1 false)
@@ -922,7 +923,6 @@ int main() {
 // CHECK9-NEXT:    call void @_ZN1SIiEC1Ei(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_ELEMENT]], i32 noundef signext 2)
 // CHECK9-NEXT:    store ptr [[TEST]], ptr [[VAR]], align 8
 // CHECK9-NEXT:    store ptr undef, ptr [[_TMP1]], align 8
-// CHECK9-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK9-NEXT:    [[TMP0:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK9-NEXT:    store i32 2, ptr [[TMP0]], align 4
 // CHECK9-NEXT:    [[TMP1:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -1322,6 +1322,7 @@ int main() {
 // CHECK11-NEXT:    [[VAR:%.*]] = alloca ptr, align 4
 // CHECK11-NEXT:    [[TMP:%.*]] = alloca i32, align 4
 // CHECK11-NEXT:    [[_TMP1:%.*]] = alloca ptr, align 4
+// CHECK11-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK11-NEXT:    store i32 0, ptr [[RETVAL]], align 4
 // CHECK11-NEXT:    store ptr [[G]], ptr [[G1]], align 4
 // CHECK11-NEXT:    call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[TEST]])
@@ -1333,7 +1334,6 @@ int main() {
 // CHECK11-NEXT:    call void @_ZN1SIfEC1Ef(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_ELEMENT]], float noundef 2.000000e+00)
 // CHECK11-NEXT:    store ptr [[TEST]], ptr [[VAR]], align 4
 // CHECK11-NEXT:    store ptr undef, ptr [[_TMP1]], align 4
-// CHECK11-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK11-NEXT:    [[TMP0:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK11-NEXT:    store i32 2, ptr [[TMP0]], align 4
 // CHECK11-NEXT:    [[TMP1:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -1653,6 +1653,7 @@ int main() {
 // CHECK11-NEXT:    [[VAR:%.*]] = alloca ptr, align 4
 // CHECK11-NEXT:    [[TMP:%.*]] = alloca i32, align 4
 // CHECK11-NEXT:    [[_TMP1:%.*]] = alloca ptr, align 4
+// CHECK11-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK11-NEXT:    call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[TEST]])
 // CHECK11-NEXT:    store i32 0, ptr [[T_VAR]], align 4
 // CHECK11-NEXT:    call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[VEC]], ptr align 4 @__const._Z5tmainIiET_v.vec, i32 8, i1 false)
@@ -1662,7 +1663,6 @@ int main() {
 // CHECK11-NEXT:    call void @_ZN1SIiEC1Ei(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_ELEMENT]], i32 noundef 2)
 // CHECK11-NEXT:    store ptr [[TEST]], ptr [[VAR]], align 4
 // CHECK11-NEXT:    store ptr undef, ptr [[_TMP1]], align 4
-// CHECK11-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK11-NEXT:    [[TMP0:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK11-NEXT:    store i32 2, ptr [[TMP0]], align 4
 // CHECK11-NEXT:    [[TMP1:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1

diff  --git a/clang/test/OpenMP/distribute_parallel_for_simd_proc_bind_codegen.cpp b/clang/test/OpenMP/distribute_parallel_for_simd_proc_bind_codegen.cpp
index d992f389100e3..700b3ca0a0494 100644
--- a/clang/test/OpenMP/distribute_parallel_for_simd_proc_bind_codegen.cpp
+++ b/clang/test/OpenMP/distribute_parallel_for_simd_proc_bind_codegen.cpp
@@ -58,9 +58,10 @@ int main() {
 // CHECK1-NEXT:  entry:
 // CHECK1-NEXT:    [[RETVAL:%.*]] = alloca i32, align 4
 // CHECK1-NEXT:    [[TMP:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK1-NEXT:    [[_TMP1:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[KERNEL_ARGS2:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK1-NEXT:    store i32 0, ptr [[RETVAL]], align 4
-// CHECK1-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK1-NEXT:    [[TMP0:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK1-NEXT:    store i32 2, ptr [[TMP0]], align 4
 // CHECK1-NEXT:    [[TMP1:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -94,7 +95,6 @@ int main() {
 // CHECK1-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l37() #[[ATTR2:[0-9]+]]
 // CHECK1-NEXT:    br label [[OMP_OFFLOAD_CONT]]
 // CHECK1:       omp_offload.cont:
-// CHECK1-NEXT:    [[KERNEL_ARGS2:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK1-NEXT:    [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS2]], i32 0, i32 0
 // CHECK1-NEXT:    store i32 2, ptr [[TMP15]], align 4
 // CHECK1-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS2]], i32 0, i32 1

diff  --git a/clang/test/OpenMP/distribute_private_codegen.cpp b/clang/test/OpenMP/distribute_private_codegen.cpp
index ab654e7ab3fa1..7dd4c5f20b070 100644
--- a/clang/test/OpenMP/distribute_private_codegen.cpp
+++ b/clang/test/OpenMP/distribute_private_codegen.cpp
@@ -349,8 +349,10 @@ int main() {
 // CHECK9-NEXT:    [[VAR:%.*]] = alloca ptr, align 8
 // CHECK9-NEXT:    [[TMP:%.*]] = alloca i32, align 4
 // CHECK9-NEXT:    [[_TMP1:%.*]] = alloca ptr, align 8
+// CHECK9-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK9-NEXT:    [[I:%.*]] = alloca i32, align 4
 // CHECK9-NEXT:    [[_TMP2:%.*]] = alloca i32, align 4
+// CHECK9-NEXT:    [[KERNEL_ARGS3:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK9-NEXT:    store i32 0, ptr [[RETVAL]], align 4
 // CHECK9-NEXT:    store ptr [[G]], ptr [[G1]], align 8
 // CHECK9-NEXT:    call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[TEST]])
@@ -362,7 +364,6 @@ int main() {
 // CHECK9-NEXT:    call void @_ZN1SIfEC1Ef(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_ELEMENT]], float noundef 2.000000e+00)
 // CHECK9-NEXT:    store ptr [[TEST]], ptr [[VAR]], align 8
 // CHECK9-NEXT:    store ptr undef, ptr [[_TMP1]], align 8
-// CHECK9-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK9-NEXT:    [[TMP0:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK9-NEXT:    store i32 2, ptr [[TMP0]], align 4
 // CHECK9-NEXT:    [[TMP1:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -396,7 +397,6 @@ int main() {
 // CHECK9-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l93() #[[ATTR4:[0-9]+]]
 // CHECK9-NEXT:    br label [[OMP_OFFLOAD_CONT]]
 // CHECK9:       omp_offload.cont:
-// CHECK9-NEXT:    [[KERNEL_ARGS3:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK9-NEXT:    [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS3]], i32 0, i32 0
 // CHECK9-NEXT:    store i32 2, ptr [[TMP15]], align 4
 // CHECK9-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS3]], i32 0, i32 1
@@ -671,6 +671,7 @@ int main() {
 // CHECK9-NEXT:    [[VAR:%.*]] = alloca ptr, align 8
 // CHECK9-NEXT:    [[TMP:%.*]] = alloca i32, align 4
 // CHECK9-NEXT:    [[_TMP1:%.*]] = alloca ptr, align 8
+// CHECK9-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK9-NEXT:    call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[TEST]])
 // CHECK9-NEXT:    store i32 0, ptr [[T_VAR]], align 4
 // CHECK9-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VEC]], ptr align 4 @__const._Z5tmainIiET_v.vec, i64 8, i1 false)
@@ -680,7 +681,6 @@ int main() {
 // CHECK9-NEXT:    call void @_ZN1SIiEC1Ei(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_ELEMENT]], i32 noundef signext 2)
 // CHECK9-NEXT:    store ptr [[TEST]], ptr [[VAR]], align 8
 // CHECK9-NEXT:    store ptr undef, ptr [[_TMP1]], align 8
-// CHECK9-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK9-NEXT:    [[TMP0:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK9-NEXT:    store i32 2, ptr [[TMP0]], align 4
 // CHECK9-NEXT:    [[TMP1:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -962,8 +962,10 @@ int main() {
 // CHECK11-NEXT:    [[VAR:%.*]] = alloca ptr, align 4
 // CHECK11-NEXT:    [[TMP:%.*]] = alloca i32, align 4
 // CHECK11-NEXT:    [[_TMP1:%.*]] = alloca ptr, align 4
+// CHECK11-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK11-NEXT:    [[I:%.*]] = alloca i32, align 4
 // CHECK11-NEXT:    [[_TMP2:%.*]] = alloca i32, align 4
+// CHECK11-NEXT:    [[KERNEL_ARGS3:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK11-NEXT:    store i32 0, ptr [[RETVAL]], align 4
 // CHECK11-NEXT:    store ptr [[G]], ptr [[G1]], align 4
 // CHECK11-NEXT:    call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[TEST]])
@@ -975,7 +977,6 @@ int main() {
 // CHECK11-NEXT:    call void @_ZN1SIfEC1Ef(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_ELEMENT]], float noundef 2.000000e+00)
 // CHECK11-NEXT:    store ptr [[TEST]], ptr [[VAR]], align 4
 // CHECK11-NEXT:    store ptr undef, ptr [[_TMP1]], align 4
-// CHECK11-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK11-NEXT:    [[TMP0:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK11-NEXT:    store i32 2, ptr [[TMP0]], align 4
 // CHECK11-NEXT:    [[TMP1:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -1009,7 +1010,6 @@ int main() {
 // CHECK11-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l93() #[[ATTR4:[0-9]+]]
 // CHECK11-NEXT:    br label [[OMP_OFFLOAD_CONT]]
 // CHECK11:       omp_offload.cont:
-// CHECK11-NEXT:    [[KERNEL_ARGS3:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK11-NEXT:    [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS3]], i32 0, i32 0
 // CHECK11-NEXT:    store i32 2, ptr [[TMP15]], align 4
 // CHECK11-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS3]], i32 0, i32 1
@@ -1282,6 +1282,7 @@ int main() {
 // CHECK11-NEXT:    [[VAR:%.*]] = alloca ptr, align 4
 // CHECK11-NEXT:    [[TMP:%.*]] = alloca i32, align 4
 // CHECK11-NEXT:    [[_TMP1:%.*]] = alloca ptr, align 4
+// CHECK11-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK11-NEXT:    call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[TEST]])
 // CHECK11-NEXT:    store i32 0, ptr [[T_VAR]], align 4
 // CHECK11-NEXT:    call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[VEC]], ptr align 4 @__const._Z5tmainIiET_v.vec, i32 8, i1 false)
@@ -1291,7 +1292,6 @@ int main() {
 // CHECK11-NEXT:    call void @_ZN1SIiEC1Ei(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_ELEMENT]], i32 noundef 2)
 // CHECK11-NEXT:    store ptr [[TEST]], ptr [[VAR]], align 4
 // CHECK11-NEXT:    store ptr undef, ptr [[_TMP1]], align 4
-// CHECK11-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK11-NEXT:    [[TMP0:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK11-NEXT:    store i32 2, ptr [[TMP0]], align 4
 // CHECK11-NEXT:    [[TMP1:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1

diff  --git a/clang/test/OpenMP/distribute_simd_codegen.cpp b/clang/test/OpenMP/distribute_simd_codegen.cpp
index bcf49a213f2ab..02d84779f3b85 100644
--- a/clang/test/OpenMP/distribute_simd_codegen.cpp
+++ b/clang/test/OpenMP/distribute_simd_codegen.cpp
@@ -155,6 +155,7 @@ int fint(void) { return ftemplate<int>(); }
 // CHECK1-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [4 x ptr], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [4 x ptr], align 8
 // CHECK1-NEXT:    [[TMP:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK1-NEXT:    store ptr [[A]], ptr [[A_ADDR]], align 8
 // CHECK1-NEXT:    store ptr [[B]], ptr [[B_ADDR]], align 8
 // CHECK1-NEXT:    store ptr [[C]], ptr [[C_ADDR]], align 8
@@ -189,7 +190,6 @@ int fint(void) { return ftemplate<int>(); }
 // CHECK1-NEXT:    store ptr null, ptr [[TMP15]], align 8
 // CHECK1-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK1-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK1-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK1-NEXT:    [[TMP18:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK1-NEXT:    store i32 2, ptr [[TMP18]], align 4
 // CHECK1-NEXT:    [[TMP19:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -355,6 +355,7 @@ int fint(void) { return ftemplate<int>(); }
 // CHECK1-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [4 x ptr], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [4 x ptr], align 8
 // CHECK1-NEXT:    [[TMP:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK1-NEXT:    store ptr [[A]], ptr [[A_ADDR]], align 8
 // CHECK1-NEXT:    store ptr [[B]], ptr [[B_ADDR]], align 8
 // CHECK1-NEXT:    store ptr [[C]], ptr [[C_ADDR]], align 8
@@ -389,7 +390,6 @@ int fint(void) { return ftemplate<int>(); }
 // CHECK1-NEXT:    store ptr null, ptr [[TMP15]], align 8
 // CHECK1-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK1-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK1-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK1-NEXT:    [[TMP18:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK1-NEXT:    store i32 2, ptr [[TMP18]], align 4
 // CHECK1-NEXT:    [[TMP19:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -553,6 +553,7 @@ int fint(void) { return ftemplate<int>(); }
 // CHECK1-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [4 x ptr], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [4 x ptr], align 8
 // CHECK1-NEXT:    [[TMP:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK1-NEXT:    store ptr [[A]], ptr [[A_ADDR]], align 8
 // CHECK1-NEXT:    store ptr [[B]], ptr [[B_ADDR]], align 8
 // CHECK1-NEXT:    store ptr [[C]], ptr [[C_ADDR]], align 8
@@ -587,7 +588,6 @@ int fint(void) { return ftemplate<int>(); }
 // CHECK1-NEXT:    store ptr null, ptr [[TMP15]], align 8
 // CHECK1-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK1-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK1-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK1-NEXT:    [[TMP18:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK1-NEXT:    store i32 2, ptr [[TMP18]], align 4
 // CHECK1-NEXT:    [[TMP19:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -770,6 +770,7 @@ int fint(void) { return ftemplate<int>(); }
 // CHECK1-NEXT:    [[TMP:%.*]] = alloca i8, align 1
 // CHECK1-NEXT:    [[DOTCAPTURE_EXPR_:%.*]] = alloca i8, align 1
 // CHECK1-NEXT:    [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK1-NEXT:    store i8 0, ptr [[A]], align 1
 // CHECK1-NEXT:    [[TMP0:%.*]] = load i8, ptr [[I]], align 1
 // CHECK1-NEXT:    store i8 [[TMP0]], ptr [[I_CASTED]], align 1
@@ -804,7 +805,6 @@ int fint(void) { return ftemplate<int>(); }
 // CHECK1-NEXT:    [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
 // CHECK1-NEXT:    [[ADD4:%.*]] = add nsw i32 [[TMP14]], 1
 // CHECK1-NEXT:    [[TMP15:%.*]] = zext i32 [[ADD4]] to i64
-// CHECK1-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK1-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK1-NEXT:    store i32 2, ptr [[TMP16]], align 4
 // CHECK1-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -982,6 +982,7 @@ int fint(void) { return ftemplate<int>(); }
 // CHECK1-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [1 x ptr], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [1 x ptr], align 8
 // CHECK1-NEXT:    [[TMP:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK1-NEXT:    store i16 0, ptr [[AA]], align 2
 // CHECK1-NEXT:    [[TMP0:%.*]] = load i16, ptr [[AA]], align 2
 // CHECK1-NEXT:    store i16 [[TMP0]], ptr [[AA_CASTED]], align 2
@@ -994,7 +995,6 @@ int fint(void) { return ftemplate<int>(); }
 // CHECK1-NEXT:    store ptr null, ptr [[TMP4]], align 8
 // CHECK1-NEXT:    [[TMP5:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK1-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK1-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK1-NEXT:    [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK1-NEXT:    store i32 2, ptr [[TMP7]], align 4
 // CHECK1-NEXT:    [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -1147,6 +1147,7 @@ int fint(void) { return ftemplate<int>(); }
 // CHECK3-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [4 x ptr], align 4
 // CHECK3-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [4 x ptr], align 4
 // CHECK3-NEXT:    [[TMP:%.*]] = alloca i32, align 4
+// CHECK3-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK3-NEXT:    store ptr [[A]], ptr [[A_ADDR]], align 4
 // CHECK3-NEXT:    store ptr [[B]], ptr [[B_ADDR]], align 4
 // CHECK3-NEXT:    store ptr [[C]], ptr [[C_ADDR]], align 4
@@ -1181,7 +1182,6 @@ int fint(void) { return ftemplate<int>(); }
 // CHECK3-NEXT:    store ptr null, ptr [[TMP15]], align 4
 // CHECK3-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK3-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK3-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK3-NEXT:    [[TMP18:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK3-NEXT:    store i32 2, ptr [[TMP18]], align 4
 // CHECK3-NEXT:    [[TMP19:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -1343,6 +1343,7 @@ int fint(void) { return ftemplate<int>(); }
 // CHECK3-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [4 x ptr], align 4
 // CHECK3-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [4 x ptr], align 4
 // CHECK3-NEXT:    [[TMP:%.*]] = alloca i32, align 4
+// CHECK3-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK3-NEXT:    store ptr [[A]], ptr [[A_ADDR]], align 4
 // CHECK3-NEXT:    store ptr [[B]], ptr [[B_ADDR]], align 4
 // CHECK3-NEXT:    store ptr [[C]], ptr [[C_ADDR]], align 4
@@ -1377,7 +1378,6 @@ int fint(void) { return ftemplate<int>(); }
 // CHECK3-NEXT:    store ptr null, ptr [[TMP15]], align 4
 // CHECK3-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK3-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK3-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK3-NEXT:    [[TMP18:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK3-NEXT:    store i32 2, ptr [[TMP18]], align 4
 // CHECK3-NEXT:    [[TMP19:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -1537,6 +1537,7 @@ int fint(void) { return ftemplate<int>(); }
 // CHECK3-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [4 x ptr], align 4
 // CHECK3-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [4 x ptr], align 4
 // CHECK3-NEXT:    [[TMP:%.*]] = alloca i32, align 4
+// CHECK3-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK3-NEXT:    store ptr [[A]], ptr [[A_ADDR]], align 4
 // CHECK3-NEXT:    store ptr [[B]], ptr [[B_ADDR]], align 4
 // CHECK3-NEXT:    store ptr [[C]], ptr [[C_ADDR]], align 4
@@ -1571,7 +1572,6 @@ int fint(void) { return ftemplate<int>(); }
 // CHECK3-NEXT:    store ptr null, ptr [[TMP15]], align 4
 // CHECK3-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK3-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK3-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK3-NEXT:    [[TMP18:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK3-NEXT:    store i32 2, ptr [[TMP18]], align 4
 // CHECK3-NEXT:    [[TMP19:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -1750,6 +1750,7 @@ int fint(void) { return ftemplate<int>(); }
 // CHECK3-NEXT:    [[TMP:%.*]] = alloca i8, align 1
 // CHECK3-NEXT:    [[DOTCAPTURE_EXPR_:%.*]] = alloca i8, align 1
 // CHECK3-NEXT:    [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4
+// CHECK3-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK3-NEXT:    store i8 0, ptr [[A]], align 1
 // CHECK3-NEXT:    [[TMP0:%.*]] = load i8, ptr [[I]], align 1
 // CHECK3-NEXT:    store i8 [[TMP0]], ptr [[I_CASTED]], align 1
@@ -1784,7 +1785,6 @@ int fint(void) { return ftemplate<int>(); }
 // CHECK3-NEXT:    [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
 // CHECK3-NEXT:    [[ADD4:%.*]] = add nsw i32 [[TMP14]], 1
 // CHECK3-NEXT:    [[TMP15:%.*]] = zext i32 [[ADD4]] to i64
-// CHECK3-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK3-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK3-NEXT:    store i32 2, ptr [[TMP16]], align 4
 // CHECK3-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -1962,6 +1962,7 @@ int fint(void) { return ftemplate<int>(); }
 // CHECK3-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [1 x ptr], align 4
 // CHECK3-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [1 x ptr], align 4
 // CHECK3-NEXT:    [[TMP:%.*]] = alloca i32, align 4
+// CHECK3-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK3-NEXT:    store i16 0, ptr [[AA]], align 2
 // CHECK3-NEXT:    [[TMP0:%.*]] = load i16, ptr [[AA]], align 2
 // CHECK3-NEXT:    store i16 [[TMP0]], ptr [[AA_CASTED]], align 2
@@ -1974,7 +1975,6 @@ int fint(void) { return ftemplate<int>(); }
 // CHECK3-NEXT:    store ptr null, ptr [[TMP4]], align 4
 // CHECK3-NEXT:    [[TMP5:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK3-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK3-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK3-NEXT:    [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK3-NEXT:    store i32 2, ptr [[TMP7]], align 4
 // CHECK3-NEXT:    [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -2127,6 +2127,7 @@ int fint(void) { return ftemplate<int>(); }
 // CHECK5-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [4 x ptr], align 8
 // CHECK5-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [4 x ptr], align 8
 // CHECK5-NEXT:    [[TMP:%.*]] = alloca i32, align 4
+// CHECK5-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK5-NEXT:    store ptr [[A]], ptr [[A_ADDR]], align 8
 // CHECK5-NEXT:    store ptr [[B]], ptr [[B_ADDR]], align 8
 // CHECK5-NEXT:    store ptr [[C]], ptr [[C_ADDR]], align 8
@@ -2161,7 +2162,6 @@ int fint(void) { return ftemplate<int>(); }
 // CHECK5-NEXT:    store ptr null, ptr [[TMP15]], align 8
 // CHECK5-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK5-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK5-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK5-NEXT:    [[TMP18:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK5-NEXT:    store i32 2, ptr [[TMP18]], align 4
 // CHECK5-NEXT:    [[TMP19:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -2327,6 +2327,7 @@ int fint(void) { return ftemplate<int>(); }
 // CHECK5-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [4 x ptr], align 8
 // CHECK5-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [4 x ptr], align 8
 // CHECK5-NEXT:    [[TMP:%.*]] = alloca i32, align 4
+// CHECK5-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK5-NEXT:    store ptr [[A]], ptr [[A_ADDR]], align 8
 // CHECK5-NEXT:    store ptr [[B]], ptr [[B_ADDR]], align 8
 // CHECK5-NEXT:    store ptr [[C]], ptr [[C_ADDR]], align 8
@@ -2361,7 +2362,6 @@ int fint(void) { return ftemplate<int>(); }
 // CHECK5-NEXT:    store ptr null, ptr [[TMP15]], align 8
 // CHECK5-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK5-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK5-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK5-NEXT:    [[TMP18:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK5-NEXT:    store i32 2, ptr [[TMP18]], align 4
 // CHECK5-NEXT:    [[TMP19:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -2525,6 +2525,7 @@ int fint(void) { return ftemplate<int>(); }
 // CHECK5-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [4 x ptr], align 8
 // CHECK5-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [4 x ptr], align 8
 // CHECK5-NEXT:    [[TMP:%.*]] = alloca i32, align 4
+// CHECK5-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK5-NEXT:    store ptr [[A]], ptr [[A_ADDR]], align 8
 // CHECK5-NEXT:    store ptr [[B]], ptr [[B_ADDR]], align 8
 // CHECK5-NEXT:    store ptr [[C]], ptr [[C_ADDR]], align 8
@@ -2559,7 +2560,6 @@ int fint(void) { return ftemplate<int>(); }
 // CHECK5-NEXT:    store ptr null, ptr [[TMP15]], align 8
 // CHECK5-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK5-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK5-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK5-NEXT:    [[TMP18:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK5-NEXT:    store i32 2, ptr [[TMP18]], align 4
 // CHECK5-NEXT:    [[TMP19:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -2742,6 +2742,7 @@ int fint(void) { return ftemplate<int>(); }
 // CHECK5-NEXT:    [[TMP:%.*]] = alloca i8, align 1
 // CHECK5-NEXT:    [[DOTCAPTURE_EXPR_:%.*]] = alloca i8, align 1
 // CHECK5-NEXT:    [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4
+// CHECK5-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK5-NEXT:    store i8 0, ptr [[A]], align 1
 // CHECK5-NEXT:    [[TMP0:%.*]] = load i8, ptr [[I]], align 1
 // CHECK5-NEXT:    store i8 [[TMP0]], ptr [[I_CASTED]], align 1
@@ -2776,7 +2777,6 @@ int fint(void) { return ftemplate<int>(); }
 // CHECK5-NEXT:    [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
 // CHECK5-NEXT:    [[ADD4:%.*]] = add nsw i32 [[TMP14]], 1
 // CHECK5-NEXT:    [[TMP15:%.*]] = zext i32 [[ADD4]] to i64
-// CHECK5-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK5-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK5-NEXT:    store i32 2, ptr [[TMP16]], align 4
 // CHECK5-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -2985,6 +2985,7 @@ int fint(void) { return ftemplate<int>(); }
 // CHECK5-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [1 x ptr], align 8
 // CHECK5-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [1 x ptr], align 8
 // CHECK5-NEXT:    [[TMP:%.*]] = alloca i32, align 4
+// CHECK5-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK5-NEXT:    store i16 0, ptr [[AA]], align 2
 // CHECK5-NEXT:    [[TMP0:%.*]] = load i16, ptr [[AA]], align 2
 // CHECK5-NEXT:    store i16 [[TMP0]], ptr [[AA_CASTED]], align 2
@@ -2997,7 +2998,6 @@ int fint(void) { return ftemplate<int>(); }
 // CHECK5-NEXT:    store ptr null, ptr [[TMP4]], align 8
 // CHECK5-NEXT:    [[TMP5:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK5-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK5-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK5-NEXT:    [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK5-NEXT:    store i32 2, ptr [[TMP7]], align 4
 // CHECK5-NEXT:    [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -3150,6 +3150,7 @@ int fint(void) { return ftemplate<int>(); }
 // CHECK7-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [4 x ptr], align 4
 // CHECK7-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [4 x ptr], align 4
 // CHECK7-NEXT:    [[TMP:%.*]] = alloca i32, align 4
+// CHECK7-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK7-NEXT:    store ptr [[A]], ptr [[A_ADDR]], align 4
 // CHECK7-NEXT:    store ptr [[B]], ptr [[B_ADDR]], align 4
 // CHECK7-NEXT:    store ptr [[C]], ptr [[C_ADDR]], align 4
@@ -3184,7 +3185,6 @@ int fint(void) { return ftemplate<int>(); }
 // CHECK7-NEXT:    store ptr null, ptr [[TMP15]], align 4
 // CHECK7-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK7-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK7-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK7-NEXT:    [[TMP18:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK7-NEXT:    store i32 2, ptr [[TMP18]], align 4
 // CHECK7-NEXT:    [[TMP19:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -3346,6 +3346,7 @@ int fint(void) { return ftemplate<int>(); }
 // CHECK7-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [4 x ptr], align 4
 // CHECK7-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [4 x ptr], align 4
 // CHECK7-NEXT:    [[TMP:%.*]] = alloca i32, align 4
+// CHECK7-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK7-NEXT:    store ptr [[A]], ptr [[A_ADDR]], align 4
 // CHECK7-NEXT:    store ptr [[B]], ptr [[B_ADDR]], align 4
 // CHECK7-NEXT:    store ptr [[C]], ptr [[C_ADDR]], align 4
@@ -3380,7 +3381,6 @@ int fint(void) { return ftemplate<int>(); }
 // CHECK7-NEXT:    store ptr null, ptr [[TMP15]], align 4
 // CHECK7-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK7-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK7-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK7-NEXT:    [[TMP18:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK7-NEXT:    store i32 2, ptr [[TMP18]], align 4
 // CHECK7-NEXT:    [[TMP19:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -3540,6 +3540,7 @@ int fint(void) { return ftemplate<int>(); }
 // CHECK7-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [4 x ptr], align 4
 // CHECK7-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [4 x ptr], align 4
 // CHECK7-NEXT:    [[TMP:%.*]] = alloca i32, align 4
+// CHECK7-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK7-NEXT:    store ptr [[A]], ptr [[A_ADDR]], align 4
 // CHECK7-NEXT:    store ptr [[B]], ptr [[B_ADDR]], align 4
 // CHECK7-NEXT:    store ptr [[C]], ptr [[C_ADDR]], align 4
@@ -3574,7 +3575,6 @@ int fint(void) { return ftemplate<int>(); }
 // CHECK7-NEXT:    store ptr null, ptr [[TMP15]], align 4
 // CHECK7-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK7-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK7-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK7-NEXT:    [[TMP18:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK7-NEXT:    store i32 2, ptr [[TMP18]], align 4
 // CHECK7-NEXT:    [[TMP19:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -3753,6 +3753,7 @@ int fint(void) { return ftemplate<int>(); }
 // CHECK7-NEXT:    [[TMP:%.*]] = alloca i8, align 1
 // CHECK7-NEXT:    [[DOTCAPTURE_EXPR_:%.*]] = alloca i8, align 1
 // CHECK7-NEXT:    [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4
+// CHECK7-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK7-NEXT:    store i8 0, ptr [[A]], align 1
 // CHECK7-NEXT:    [[TMP0:%.*]] = load i8, ptr [[I]], align 1
 // CHECK7-NEXT:    store i8 [[TMP0]], ptr [[I_CASTED]], align 1
@@ -3787,7 +3788,6 @@ int fint(void) { return ftemplate<int>(); }
 // CHECK7-NEXT:    [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
 // CHECK7-NEXT:    [[ADD4:%.*]] = add nsw i32 [[TMP14]], 1
 // CHECK7-NEXT:    [[TMP15:%.*]] = zext i32 [[ADD4]] to i64
-// CHECK7-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK7-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK7-NEXT:    store i32 2, ptr [[TMP16]], align 4
 // CHECK7-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -3996,6 +3996,7 @@ int fint(void) { return ftemplate<int>(); }
 // CHECK7-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [1 x ptr], align 4
 // CHECK7-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [1 x ptr], align 4
 // CHECK7-NEXT:    [[TMP:%.*]] = alloca i32, align 4
+// CHECK7-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK7-NEXT:    store i16 0, ptr [[AA]], align 2
 // CHECK7-NEXT:    [[TMP0:%.*]] = load i16, ptr [[AA]], align 2
 // CHECK7-NEXT:    store i16 [[TMP0]], ptr [[AA_CASTED]], align 2
@@ -4008,7 +4009,6 @@ int fint(void) { return ftemplate<int>(); }
 // CHECK7-NEXT:    store ptr null, ptr [[TMP4]], align 4
 // CHECK7-NEXT:    [[TMP5:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK7-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK7-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK7-NEXT:    [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK7-NEXT:    store i32 2, ptr [[TMP7]], align 4
 // CHECK7-NEXT:    [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1

diff  --git a/clang/test/OpenMP/distribute_simd_firstprivate_codegen.cpp b/clang/test/OpenMP/distribute_simd_firstprivate_codegen.cpp
index 60ed8f0e9eb68..8787b131d070e 100644
--- a/clang/test/OpenMP/distribute_simd_firstprivate_codegen.cpp
+++ b/clang/test/OpenMP/distribute_simd_firstprivate_codegen.cpp
@@ -554,6 +554,7 @@ int main() {
 // CHECK9-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [5 x ptr], align 8
 // CHECK9-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [5 x ptr], align 8
 // CHECK9-NEXT:    [[_TMP1:%.*]] = alloca i32, align 4
+// CHECK9-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK9-NEXT:    store i32 0, ptr [[RETVAL]], align 4
 // CHECK9-NEXT:    store ptr [[G]], ptr [[G1]], align 8
 // CHECK9-NEXT:    call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[TEST]])
@@ -607,7 +608,6 @@ int main() {
 // CHECK9-NEXT:    store ptr null, ptr [[TMP22]], align 8
 // CHECK9-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK9-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK9-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK9-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK9-NEXT:    store i32 2, ptr [[TMP25]], align 4
 // CHECK9-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -868,6 +868,7 @@ int main() {
 // CHECK9-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [4 x ptr], align 8
 // CHECK9-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [4 x ptr], align 8
 // CHECK9-NEXT:    [[_TMP1:%.*]] = alloca i32, align 4
+// CHECK9-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK9-NEXT:    call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[TEST]])
 // CHECK9-NEXT:    store i32 0, ptr [[T_VAR]], align 4
 // CHECK9-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VEC]], ptr align 4 @__const._Z5tmainIiET_v.vec, i64 8, i1 false)
@@ -910,7 +911,6 @@ int main() {
 // CHECK9-NEXT:    store ptr null, ptr [[TMP17]], align 8
 // CHECK9-NEXT:    [[TMP18:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK9-NEXT:    [[TMP19:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK9-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK9-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK9-NEXT:    store i32 2, ptr [[TMP20]], align 4
 // CHECK9-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -1240,6 +1240,7 @@ int main() {
 // CHECK11-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [5 x ptr], align 4
 // CHECK11-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [5 x ptr], align 4
 // CHECK11-NEXT:    [[_TMP1:%.*]] = alloca i32, align 4
+// CHECK11-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK11-NEXT:    store i32 0, ptr [[RETVAL]], align 4
 // CHECK11-NEXT:    store ptr [[G]], ptr [[G1]], align 4
 // CHECK11-NEXT:    call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[TEST]])
@@ -1293,7 +1294,6 @@ int main() {
 // CHECK11-NEXT:    store ptr null, ptr [[TMP22]], align 4
 // CHECK11-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK11-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK11-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK11-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK11-NEXT:    store i32 2, ptr [[TMP25]], align 4
 // CHECK11-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -1552,6 +1552,7 @@ int main() {
 // CHECK11-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [4 x ptr], align 4
 // CHECK11-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [4 x ptr], align 4
 // CHECK11-NEXT:    [[_TMP1:%.*]] = alloca i32, align 4
+// CHECK11-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK11-NEXT:    call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[TEST]])
 // CHECK11-NEXT:    store i32 0, ptr [[T_VAR]], align 4
 // CHECK11-NEXT:    call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[VEC]], ptr align 4 @__const._Z5tmainIiET_v.vec, i32 8, i1 false)
@@ -1594,7 +1595,6 @@ int main() {
 // CHECK11-NEXT:    store ptr null, ptr [[TMP17]], align 4
 // CHECK11-NEXT:    [[TMP18:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK11-NEXT:    [[TMP19:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK11-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK11-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK11-NEXT:    store i32 2, ptr [[TMP20]], align 4
 // CHECK11-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1

diff  --git a/clang/test/OpenMP/distribute_simd_lastprivate_codegen.cpp b/clang/test/OpenMP/distribute_simd_lastprivate_codegen.cpp
index d540eda5dbfc5..c26260cb45159 100644
--- a/clang/test/OpenMP/distribute_simd_lastprivate_codegen.cpp
+++ b/clang/test/OpenMP/distribute_simd_lastprivate_codegen.cpp
@@ -540,6 +540,7 @@ int main() {
 // CHECK9-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [5 x ptr], align 8
 // CHECK9-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [5 x ptr], align 8
 // CHECK9-NEXT:    [[_TMP1:%.*]] = alloca i32, align 4
+// CHECK9-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK9-NEXT:    [[I:%.*]] = alloca i32, align 4
 // CHECK9-NEXT:    store i32 0, ptr [[RETVAL]], align 4
 // CHECK9-NEXT:    store ptr [[G]], ptr [[G1]], align 8
@@ -594,7 +595,6 @@ int main() {
 // CHECK9-NEXT:    store ptr null, ptr [[TMP22]], align 8
 // CHECK9-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK9-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK9-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK9-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK9-NEXT:    store i32 2, ptr [[TMP25]], align 4
 // CHECK9-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -873,6 +873,7 @@ int main() {
 // CHECK9-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [4 x ptr], align 8
 // CHECK9-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [4 x ptr], align 8
 // CHECK9-NEXT:    [[_TMP1:%.*]] = alloca i32, align 4
+// CHECK9-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK9-NEXT:    call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[TEST]])
 // CHECK9-NEXT:    store i32 0, ptr [[T_VAR]], align 4
 // CHECK9-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VEC]], ptr align 4 @__const._Z5tmainIiET_v.vec, i64 8, i1 false)
@@ -915,7 +916,6 @@ int main() {
 // CHECK9-NEXT:    store ptr null, ptr [[TMP17]], align 8
 // CHECK9-NEXT:    [[TMP18:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK9-NEXT:    [[TMP19:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK9-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK9-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK9-NEXT:    store i32 2, ptr [[TMP20]], align 4
 // CHECK9-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -1263,6 +1263,7 @@ int main() {
 // CHECK11-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [5 x ptr], align 4
 // CHECK11-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [5 x ptr], align 4
 // CHECK11-NEXT:    [[_TMP1:%.*]] = alloca i32, align 4
+// CHECK11-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK11-NEXT:    [[I:%.*]] = alloca i32, align 4
 // CHECK11-NEXT:    store i32 0, ptr [[RETVAL]], align 4
 // CHECK11-NEXT:    store ptr [[G]], ptr [[G1]], align 4
@@ -1317,7 +1318,6 @@ int main() {
 // CHECK11-NEXT:    store ptr null, ptr [[TMP22]], align 4
 // CHECK11-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK11-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK11-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK11-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK11-NEXT:    store i32 2, ptr [[TMP25]], align 4
 // CHECK11-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -1594,6 +1594,7 @@ int main() {
 // CHECK11-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [4 x ptr], align 4
 // CHECK11-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [4 x ptr], align 4
 // CHECK11-NEXT:    [[_TMP1:%.*]] = alloca i32, align 4
+// CHECK11-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK11-NEXT:    call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[TEST]])
 // CHECK11-NEXT:    store i32 0, ptr [[T_VAR]], align 4
 // CHECK11-NEXT:    call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[VEC]], ptr align 4 @__const._Z5tmainIiET_v.vec, i32 8, i1 false)
@@ -1636,7 +1637,6 @@ int main() {
 // CHECK11-NEXT:    store ptr null, ptr [[TMP17]], align 4
 // CHECK11-NEXT:    [[TMP18:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK11-NEXT:    [[TMP19:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK11-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK11-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK11-NEXT:    store i32 2, ptr [[TMP20]], align 4
 // CHECK11-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1

diff  --git a/clang/test/OpenMP/distribute_simd_private_codegen.cpp b/clang/test/OpenMP/distribute_simd_private_codegen.cpp
index 824a96642f046..181881a43fcbc 100644
--- a/clang/test/OpenMP/distribute_simd_private_codegen.cpp
+++ b/clang/test/OpenMP/distribute_simd_private_codegen.cpp
@@ -390,12 +390,14 @@ int main() {
 // CHECK9-NEXT:    [[VAR:%.*]] = alloca ptr, align 8
 // CHECK9-NEXT:    [[TMP:%.*]] = alloca i32, align 4
 // CHECK9-NEXT:    [[_TMP1:%.*]] = alloca ptr, align 8
+// CHECK9-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK9-NEXT:    [[I:%.*]] = alloca i32, align 4
 // CHECK9-NEXT:    [[I_CASTED:%.*]] = alloca i64, align 8
 // CHECK9-NEXT:    [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [1 x ptr], align 8
 // CHECK9-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [1 x ptr], align 8
 // CHECK9-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [1 x ptr], align 8
 // CHECK9-NEXT:    [[_TMP2:%.*]] = alloca i32, align 4
+// CHECK9-NEXT:    [[KERNEL_ARGS3:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK9-NEXT:    store i32 0, ptr [[RETVAL]], align 4
 // CHECK9-NEXT:    store ptr [[G]], ptr [[G1]], align 8
 // CHECK9-NEXT:    call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[TEST]])
@@ -407,7 +409,6 @@ int main() {
 // CHECK9-NEXT:    call void @_ZN1SIfEC1Ef(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_ELEMENT]], float noundef 2.000000e+00)
 // CHECK9-NEXT:    store ptr [[TEST]], ptr [[VAR]], align 8
 // CHECK9-NEXT:    store ptr undef, ptr [[_TMP1]], align 8
-// CHECK9-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK9-NEXT:    [[TMP0:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK9-NEXT:    store i32 2, ptr [[TMP0]], align 4
 // CHECK9-NEXT:    [[TMP1:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -452,7 +453,6 @@ int main() {
 // CHECK9-NEXT:    store ptr null, ptr [[TMP19]], align 8
 // CHECK9-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK9-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK9-NEXT:    [[KERNEL_ARGS3:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK9-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS3]], i32 0, i32 0
 // CHECK9-NEXT:    store i32 2, ptr [[TMP22]], align 4
 // CHECK9-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS3]], i32 0, i32 1
@@ -745,6 +745,7 @@ int main() {
 // CHECK9-NEXT:    [[VAR:%.*]] = alloca ptr, align 8
 // CHECK9-NEXT:    [[TMP:%.*]] = alloca i32, align 4
 // CHECK9-NEXT:    [[_TMP1:%.*]] = alloca ptr, align 8
+// CHECK9-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK9-NEXT:    call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[TEST]])
 // CHECK9-NEXT:    store i32 0, ptr [[T_VAR]], align 4
 // CHECK9-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VEC]], ptr align 4 @__const._Z5tmainIiET_v.vec, i64 8, i1 false)
@@ -754,7 +755,6 @@ int main() {
 // CHECK9-NEXT:    call void @_ZN1SIiEC1Ei(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_ELEMENT]], i32 noundef signext 2)
 // CHECK9-NEXT:    store ptr [[TEST]], ptr [[VAR]], align 8
 // CHECK9-NEXT:    store ptr undef, ptr [[_TMP1]], align 8
-// CHECK9-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK9-NEXT:    [[TMP0:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK9-NEXT:    store i32 2, ptr [[TMP0]], align 4
 // CHECK9-NEXT:    [[TMP1:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -1043,12 +1043,14 @@ int main() {
 // CHECK11-NEXT:    [[VAR:%.*]] = alloca ptr, align 4
 // CHECK11-NEXT:    [[TMP:%.*]] = alloca i32, align 4
 // CHECK11-NEXT:    [[_TMP1:%.*]] = alloca ptr, align 4
+// CHECK11-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK11-NEXT:    [[I:%.*]] = alloca i32, align 4
 // CHECK11-NEXT:    [[I_CASTED:%.*]] = alloca i32, align 4
 // CHECK11-NEXT:    [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [1 x ptr], align 4
 // CHECK11-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [1 x ptr], align 4
 // CHECK11-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [1 x ptr], align 4
 // CHECK11-NEXT:    [[_TMP2:%.*]] = alloca i32, align 4
+// CHECK11-NEXT:    [[KERNEL_ARGS3:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK11-NEXT:    store i32 0, ptr [[RETVAL]], align 4
 // CHECK11-NEXT:    store ptr [[G]], ptr [[G1]], align 4
 // CHECK11-NEXT:    call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[TEST]])
@@ -1060,7 +1062,6 @@ int main() {
 // CHECK11-NEXT:    call void @_ZN1SIfEC1Ef(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_ELEMENT]], float noundef 2.000000e+00)
 // CHECK11-NEXT:    store ptr [[TEST]], ptr [[VAR]], align 4
 // CHECK11-NEXT:    store ptr undef, ptr [[_TMP1]], align 4
-// CHECK11-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK11-NEXT:    [[TMP0:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK11-NEXT:    store i32 2, ptr [[TMP0]], align 4
 // CHECK11-NEXT:    [[TMP1:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -1105,7 +1106,6 @@ int main() {
 // CHECK11-NEXT:    store ptr null, ptr [[TMP19]], align 4
 // CHECK11-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK11-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK11-NEXT:    [[KERNEL_ARGS3:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK11-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS3]], i32 0, i32 0
 // CHECK11-NEXT:    store i32 2, ptr [[TMP22]], align 4
 // CHECK11-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS3]], i32 0, i32 1
@@ -1396,6 +1396,7 @@ int main() {
 // CHECK11-NEXT:    [[VAR:%.*]] = alloca ptr, align 4
 // CHECK11-NEXT:    [[TMP:%.*]] = alloca i32, align 4
 // CHECK11-NEXT:    [[_TMP1:%.*]] = alloca ptr, align 4
+// CHECK11-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK11-NEXT:    call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[TEST]])
 // CHECK11-NEXT:    store i32 0, ptr [[T_VAR]], align 4
 // CHECK11-NEXT:    call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[VEC]], ptr align 4 @__const._Z5tmainIiET_v.vec, i32 8, i1 false)
@@ -1405,7 +1406,6 @@ int main() {
 // CHECK11-NEXT:    call void @_ZN1SIiEC1Ei(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_ELEMENT]], i32 noundef 2)
 // CHECK11-NEXT:    store ptr [[TEST]], ptr [[VAR]], align 4
 // CHECK11-NEXT:    store ptr undef, ptr [[_TMP1]], align 4
-// CHECK11-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK11-NEXT:    [[TMP0:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK11-NEXT:    store i32 2, ptr [[TMP0]], align 4
 // CHECK11-NEXT:    [[TMP1:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1

diff  --git a/clang/test/OpenMP/distribute_simd_reduction_codegen.cpp b/clang/test/OpenMP/distribute_simd_reduction_codegen.cpp
index 72249ee7a719b..979dbdd030cbe 100644
--- a/clang/test/OpenMP/distribute_simd_reduction_codegen.cpp
+++ b/clang/test/OpenMP/distribute_simd_reduction_codegen.cpp
@@ -91,6 +91,7 @@ int main() {
 // CHECK1-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [1 x ptr], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [1 x ptr], align 8
 // CHECK1-NEXT:    [[TMP:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK1-NEXT:    store i32 0, ptr [[RETVAL]], align 4
 // CHECK1-NEXT:    [[TMP0:%.*]] = load i32, ptr @_ZZ4mainE5sivar, align 4
 // CHECK1-NEXT:    store i32 [[TMP0]], ptr [[SIVAR_CASTED]], align 4
@@ -103,7 +104,6 @@ int main() {
 // CHECK1-NEXT:    store ptr null, ptr [[TMP4]], align 8
 // CHECK1-NEXT:    [[TMP5:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK1-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK1-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK1-NEXT:    [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK1-NEXT:    store i32 2, ptr [[TMP7]], align 4
 // CHECK1-NEXT:    [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -240,6 +240,7 @@ int main() {
 // CHECK1-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [1 x ptr], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [1 x ptr], align 8
 // CHECK1-NEXT:    [[TMP:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK1-NEXT:    store i32 0, ptr [[T_VAR]], align 4
 // CHECK1-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VEC]], ptr align 4 @__const._Z5tmainIiET_v.vec, i64 8, i1 false)
 // CHECK1-NEXT:    [[TMP0:%.*]] = load i32, ptr [[T_VAR]], align 4
@@ -253,7 +254,6 @@ int main() {
 // CHECK1-NEXT:    store ptr null, ptr [[TMP4]], align 8
 // CHECK1-NEXT:    [[TMP5:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK1-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK1-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK1-NEXT:    [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK1-NEXT:    store i32 2, ptr [[TMP7]], align 4
 // CHECK1-NEXT:    [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -395,6 +395,7 @@ int main() {
 // CHECK3-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [1 x ptr], align 4
 // CHECK3-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [1 x ptr], align 4
 // CHECK3-NEXT:    [[TMP:%.*]] = alloca i32, align 4
+// CHECK3-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK3-NEXT:    store i32 0, ptr [[RETVAL]], align 4
 // CHECK3-NEXT:    [[TMP0:%.*]] = load i32, ptr @_ZZ4mainE5sivar, align 4
 // CHECK3-NEXT:    store i32 [[TMP0]], ptr [[SIVAR_CASTED]], align 4
@@ -407,7 +408,6 @@ int main() {
 // CHECK3-NEXT:    store ptr null, ptr [[TMP4]], align 4
 // CHECK3-NEXT:    [[TMP5:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK3-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK3-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK3-NEXT:    [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK3-NEXT:    store i32 2, ptr [[TMP7]], align 4
 // CHECK3-NEXT:    [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -544,6 +544,7 @@ int main() {
 // CHECK3-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [1 x ptr], align 4
 // CHECK3-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [1 x ptr], align 4
 // CHECK3-NEXT:    [[TMP:%.*]] = alloca i32, align 4
+// CHECK3-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK3-NEXT:    store i32 0, ptr [[T_VAR]], align 4
 // CHECK3-NEXT:    call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[VEC]], ptr align 4 @__const._Z5tmainIiET_v.vec, i32 8, i1 false)
 // CHECK3-NEXT:    [[TMP0:%.*]] = load i32, ptr [[T_VAR]], align 4
@@ -557,7 +558,6 @@ int main() {
 // CHECK3-NEXT:    store ptr null, ptr [[TMP4]], align 4
 // CHECK3-NEXT:    [[TMP5:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK3-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK3-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK3-NEXT:    [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK3-NEXT:    store i32 2, ptr [[TMP7]], align 4
 // CHECK3-NEXT:    [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1

diff  --git a/clang/test/OpenMP/for_non_rectangular_codegen.c b/clang/test/OpenMP/for_non_rectangular_codegen.c
index 969b4aea4d961..48dd37f6eddc0 100644
--- a/clang/test/OpenMP/for_non_rectangular_codegen.c
+++ b/clang/test/OpenMP/for_non_rectangular_codegen.c
@@ -6,8 +6,6 @@
 // RUN: %clang_cc1 -verify -fopenmp-simd -x c -triple x86_64-unknown-unknown -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
 // RUN: %clang_cc1 -fopenmp-simd -x c -triple x86_64-unknown-unknown -emit-pch -o %t %s
 // RUN: %clang_cc1 -fopenmp-simd -x c -triple x86_64-unknown-unknown -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
-// SIMD-ONLY0-NOT: {{__kmpc|__tgt}}
-//
 // expected-no-diagnostics
 #ifndef HEADER
 #define HEADER
@@ -257,3 +255,59 @@ void collapsed(int mp) {
 // CHECK-NEXT:    call void @__kmpc_barrier(ptr @[[GLOB3:[0-9]+]], i32 [[TMP0]])
 // CHECK-NEXT:    ret void
 //
+//
+// SIMD-ONLY0-LABEL: define {{[^@]+}}@collapsed
+// SIMD-ONLY0-SAME: (i32 noundef [[MP:%.*]]) #[[ATTR0:[0-9]+]] {
+// SIMD-ONLY0-NEXT:  entry:
+// SIMD-ONLY0-NEXT:    [[MP_ADDR:%.*]] = alloca i32, align 4
+// SIMD-ONLY0-NEXT:    [[J:%.*]] = alloca i32, align 4
+// SIMD-ONLY0-NEXT:    [[I:%.*]] = alloca i32, align 4
+// SIMD-ONLY0-NEXT:    [[I0:%.*]] = alloca i32, align 4
+// SIMD-ONLY0-NEXT:    store i32 [[MP]], ptr [[MP_ADDR]], align 4
+// SIMD-ONLY0-NEXT:    store i32 0, ptr [[J]], align 4
+// SIMD-ONLY0-NEXT:    br label [[FOR_COND:%.*]]
+// SIMD-ONLY0:       for.cond:
+// SIMD-ONLY0-NEXT:    [[TMP0:%.*]] = load i32, ptr [[J]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP1:%.*]] = load i32, ptr [[MP_ADDR]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP:%.*]] = icmp slt i32 [[TMP0]], [[TMP1]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP]], label [[FOR_BODY:%.*]], label [[FOR_END12:%.*]]
+// SIMD-ONLY0:       for.body:
+// SIMD-ONLY0-NEXT:    [[TMP2:%.*]] = load i32, ptr [[J]], align 4
+// SIMD-ONLY0-NEXT:    store i32 [[TMP2]], ptr [[I]], align 4
+// SIMD-ONLY0-NEXT:    br label [[FOR_COND1:%.*]]
+// SIMD-ONLY0:       for.cond1:
+// SIMD-ONLY0-NEXT:    [[TMP3:%.*]] = load i32, ptr [[I]], align 4
+// SIMD-ONLY0-NEXT:    [[TMP4:%.*]] = load i32, ptr [[MP_ADDR]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP2:%.*]] = icmp slt i32 [[TMP3]], [[TMP4]]
+// SIMD-ONLY0-NEXT:    br i1 [[CMP2]], label [[FOR_BODY3:%.*]], label [[FOR_END9:%.*]]
+// SIMD-ONLY0:       for.body3:
+// SIMD-ONLY0-NEXT:    store i32 0, ptr [[I0]], align 4
+// SIMD-ONLY0-NEXT:    br label [[FOR_COND4:%.*]]
+// SIMD-ONLY0:       for.cond4:
+// SIMD-ONLY0-NEXT:    [[TMP5:%.*]] = load i32, ptr [[I0]], align 4
+// SIMD-ONLY0-NEXT:    [[CMP5:%.*]] = icmp slt i32 [[TMP5]], 10
+// SIMD-ONLY0-NEXT:    br i1 [[CMP5]], label [[FOR_BODY6:%.*]], label [[FOR_END:%.*]]
+// SIMD-ONLY0:       for.body6:
+// SIMD-ONLY0-NEXT:    br label [[FOR_INC:%.*]]
+// SIMD-ONLY0:       for.inc:
+// SIMD-ONLY0-NEXT:    [[TMP6:%.*]] = load i32, ptr [[I0]], align 4
+// SIMD-ONLY0-NEXT:    [[INC:%.*]] = add nsw i32 [[TMP6]], 1
+// SIMD-ONLY0-NEXT:    store i32 [[INC]], ptr [[I0]], align 4
+// SIMD-ONLY0-NEXT:    br label [[FOR_COND4]], !llvm.loop [[LOOP2:![0-9]+]]
+// SIMD-ONLY0:       for.end:
+// SIMD-ONLY0-NEXT:    br label [[FOR_INC7:%.*]]
+// SIMD-ONLY0:       for.inc7:
+// SIMD-ONLY0-NEXT:    [[TMP7:%.*]] = load i32, ptr [[I]], align 4
+// SIMD-ONLY0-NEXT:    [[INC8:%.*]] = add nsw i32 [[TMP7]], 1
+// SIMD-ONLY0-NEXT:    store i32 [[INC8]], ptr [[I]], align 4
+// SIMD-ONLY0-NEXT:    br label [[FOR_COND1]], !llvm.loop [[LOOP4:![0-9]+]]
+// SIMD-ONLY0:       for.end9:
+// SIMD-ONLY0-NEXT:    br label [[FOR_INC10:%.*]]
+// SIMD-ONLY0:       for.inc10:
+// SIMD-ONLY0-NEXT:    [[TMP8:%.*]] = load i32, ptr [[J]], align 4
+// SIMD-ONLY0-NEXT:    [[INC11:%.*]] = add nsw i32 [[TMP8]], 1
+// SIMD-ONLY0-NEXT:    store i32 [[INC11]], ptr [[J]], align 4
+// SIMD-ONLY0-NEXT:    br label [[FOR_COND]], !llvm.loop [[LOOP5:![0-9]+]]
+// SIMD-ONLY0:       for.end12:
+// SIMD-ONLY0-NEXT:    ret void
+//

diff  --git a/clang/test/OpenMP/irbuilder_nested_openmp_parallel_empty.c b/clang/test/OpenMP/irbuilder_nested_openmp_parallel_empty.c
index b459cefa686c6..7bdb9749757cc 100644
--- a/clang/test/OpenMP/irbuilder_nested_openmp_parallel_empty.c
+++ b/clang/test/OpenMP/irbuilder_nested_openmp_parallel_empty.c
@@ -122,3 +122,5 @@ void nested_parallel_2(float *r, int a, double b) {
 }
 
 #endif
+//// NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+// IRBUILDER: {{.*}}

diff  --git a/clang/test/OpenMP/nested_loop_codegen.cpp b/clang/test/OpenMP/nested_loop_codegen.cpp
index e38d9db29b8ee..f30ca705d79ec 100644
--- a/clang/test/OpenMP/nested_loop_codegen.cpp
+++ b/clang/test/OpenMP/nested_loop_codegen.cpp
@@ -728,91 +728,91 @@ int inline_decl() {
 // CHECK4-NEXT:    [[DOTSTOP:%.*]] = alloca i32, align 4
 // CHECK4-NEXT:    [[DOTSTEP:%.*]] = alloca i32, align 4
 // CHECK4-NEXT:    store ptr [[DISTANCE]], ptr [[DISTANCE_ADDR]], align 8
-// CHECK4-NEXT:    call void @llvm.dbg.declare(metadata ptr [[DISTANCE_ADDR]], metadata [[META47:![0-9]+]], metadata !DIExpression()), !dbg [[DBG48:![0-9]+]]
+// CHECK4-NEXT:    call void @llvm.dbg.declare(metadata ptr [[DISTANCE_ADDR]], metadata [[META46:![0-9]+]], metadata !DIExpression()), !dbg [[DBG47:![0-9]+]]
 // CHECK4-NEXT:    store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8
-// CHECK4-NEXT:    call void @llvm.dbg.declare(metadata ptr [[__CONTEXT_ADDR]], metadata [[META49:![0-9]+]], metadata !DIExpression()), !dbg [[DBG48]]
+// CHECK4-NEXT:    call void @llvm.dbg.declare(metadata ptr [[__CONTEXT_ADDR]], metadata [[META48:![0-9]+]], metadata !DIExpression()), !dbg [[DBG47]]
 // CHECK4-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8
-// CHECK4-NEXT:    call void @llvm.dbg.declare(metadata ptr [[DOTSTART]], metadata [[META50:![0-9]+]], metadata !DIExpression()), !dbg [[DBG52:![0-9]+]]
-// CHECK4-NEXT:    [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0, !dbg [[DBG53:![0-9]+]]
-// CHECK4-NEXT:    [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8, !dbg [[DBG53]]
-// CHECK4-NEXT:    [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4, !dbg [[DBG53]]
-// CHECK4-NEXT:    store i32 [[TMP3]], ptr [[DOTSTART]], align 4, !dbg [[DBG52]]
-// CHECK4-NEXT:    call void @llvm.dbg.declare(metadata ptr [[DOTSTOP]], metadata [[META55:![0-9]+]], metadata !DIExpression()), !dbg [[DBG56:![0-9]+]]
-// CHECK4-NEXT:    store i32 5, ptr [[DOTSTOP]], align 4, !dbg [[DBG56]]
-// CHECK4-NEXT:    call void @llvm.dbg.declare(metadata ptr [[DOTSTEP]], metadata [[META57:![0-9]+]], metadata !DIExpression()), !dbg [[DBG56]]
-// CHECK4-NEXT:    store i32 1, ptr [[DOTSTEP]], align 4, !dbg [[DBG56]]
-// CHECK4-NEXT:    [[TMP4:%.*]] = load i32, ptr [[DOTSTART]], align 4, !dbg [[DBG56]]
-// CHECK4-NEXT:    [[TMP5:%.*]] = load i32, ptr [[DOTSTOP]], align 4, !dbg [[DBG56]]
-// CHECK4-NEXT:    [[CMP:%.*]] = icmp slt i32 [[TMP4]], [[TMP5]], !dbg [[DBG56]]
-// CHECK4-NEXT:    br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]], !dbg [[DBG56]]
+// CHECK4-NEXT:    call void @llvm.dbg.declare(metadata ptr [[DOTSTART]], metadata [[META49:![0-9]+]], metadata !DIExpression()), !dbg [[DBG51:![0-9]+]]
+// CHECK4-NEXT:    [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0, !dbg [[DBG52:![0-9]+]]
+// CHECK4-NEXT:    [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8, !dbg [[DBG52]]
+// CHECK4-NEXT:    [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4, !dbg [[DBG52]]
+// CHECK4-NEXT:    store i32 [[TMP3]], ptr [[DOTSTART]], align 4, !dbg [[DBG51]]
+// CHECK4-NEXT:    call void @llvm.dbg.declare(metadata ptr [[DOTSTOP]], metadata [[META54:![0-9]+]], metadata !DIExpression()), !dbg [[DBG55:![0-9]+]]
+// CHECK4-NEXT:    store i32 5, ptr [[DOTSTOP]], align 4, !dbg [[DBG55]]
+// CHECK4-NEXT:    call void @llvm.dbg.declare(metadata ptr [[DOTSTEP]], metadata [[META56:![0-9]+]], metadata !DIExpression()), !dbg [[DBG55]]
+// CHECK4-NEXT:    store i32 1, ptr [[DOTSTEP]], align 4, !dbg [[DBG55]]
+// CHECK4-NEXT:    [[TMP4:%.*]] = load i32, ptr [[DOTSTART]], align 4, !dbg [[DBG55]]
+// CHECK4-NEXT:    [[TMP5:%.*]] = load i32, ptr [[DOTSTOP]], align 4, !dbg [[DBG55]]
+// CHECK4-NEXT:    [[CMP:%.*]] = icmp slt i32 [[TMP4]], [[TMP5]], !dbg [[DBG55]]
+// CHECK4-NEXT:    br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]], !dbg [[DBG55]]
 // CHECK4:       cond.true:
-// CHECK4-NEXT:    [[TMP6:%.*]] = load i32, ptr [[DOTSTOP]], align 4, !dbg [[DBG56]]
-// CHECK4-NEXT:    [[TMP7:%.*]] = load i32, ptr [[DOTSTART]], align 4, !dbg [[DBG56]]
-// CHECK4-NEXT:    [[SUB:%.*]] = sub nsw i32 [[TMP6]], [[TMP7]], !dbg [[DBG56]]
-// CHECK4-NEXT:    [[TMP8:%.*]] = load i32, ptr [[DOTSTEP]], align 4, !dbg [[DBG56]]
-// CHECK4-NEXT:    [[SUB1:%.*]] = sub i32 [[TMP8]], 1, !dbg [[DBG56]]
-// CHECK4-NEXT:    [[ADD:%.*]] = add i32 [[SUB]], [[SUB1]], !dbg [[DBG56]]
-// CHECK4-NEXT:    [[TMP9:%.*]] = load i32, ptr [[DOTSTEP]], align 4, !dbg [[DBG56]]
-// CHECK4-NEXT:    [[DIV:%.*]] = udiv i32 [[ADD]], [[TMP9]], !dbg [[DBG56]]
-// CHECK4-NEXT:    br label [[COND_END:%.*]], !dbg [[DBG56]]
+// CHECK4-NEXT:    [[TMP6:%.*]] = load i32, ptr [[DOTSTOP]], align 4, !dbg [[DBG55]]
+// CHECK4-NEXT:    [[TMP7:%.*]] = load i32, ptr [[DOTSTART]], align 4, !dbg [[DBG55]]
+// CHECK4-NEXT:    [[SUB:%.*]] = sub nsw i32 [[TMP6]], [[TMP7]], !dbg [[DBG55]]
+// CHECK4-NEXT:    [[TMP8:%.*]] = load i32, ptr [[DOTSTEP]], align 4, !dbg [[DBG55]]
+// CHECK4-NEXT:    [[SUB1:%.*]] = sub i32 [[TMP8]], 1, !dbg [[DBG55]]
+// CHECK4-NEXT:    [[ADD:%.*]] = add i32 [[SUB]], [[SUB1]], !dbg [[DBG55]]
+// CHECK4-NEXT:    [[TMP9:%.*]] = load i32, ptr [[DOTSTEP]], align 4, !dbg [[DBG55]]
+// CHECK4-NEXT:    [[DIV:%.*]] = udiv i32 [[ADD]], [[TMP9]], !dbg [[DBG55]]
+// CHECK4-NEXT:    br label [[COND_END:%.*]], !dbg [[DBG55]]
 // CHECK4:       cond.false:
-// CHECK4-NEXT:    br label [[COND_END]], !dbg [[DBG56]]
+// CHECK4-NEXT:    br label [[COND_END]], !dbg [[DBG55]]
 // CHECK4:       cond.end:
-// CHECK4-NEXT:    [[COND:%.*]] = phi i32 [ [[DIV]], [[COND_TRUE]] ], [ 0, [[COND_FALSE]] ], !dbg [[DBG56]]
-// CHECK4-NEXT:    [[TMP10:%.*]] = load ptr, ptr [[DISTANCE_ADDR]], align 8, !dbg [[DBG56]]
-// CHECK4-NEXT:    store i32 [[COND]], ptr [[TMP10]], align 4, !dbg [[DBG56]]
-// CHECK4-NEXT:    ret void, !dbg [[DBG58:![0-9]+]]
+// CHECK4-NEXT:    [[COND:%.*]] = phi i32 [ [[DIV]], [[COND_TRUE]] ], [ 0, [[COND_FALSE]] ], !dbg [[DBG55]]
+// CHECK4-NEXT:    [[TMP10:%.*]] = load ptr, ptr [[DISTANCE_ADDR]], align 8, !dbg [[DBG55]]
+// CHECK4-NEXT:    store i32 [[COND]], ptr [[TMP10]], align 4, !dbg [[DBG55]]
+// CHECK4-NEXT:    ret void, !dbg [[DBG57:![0-9]+]]
 //
 //
 // CHECK4-LABEL: define {{[^@]+}}@__captured_stmt.1
-// CHECK4-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[LOOPVAR:%.*]], i32 noundef [[LOGICAL:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] !dbg [[DBG60:![0-9]+]] {
+// CHECK4-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[LOOPVAR:%.*]], i32 noundef [[LOGICAL:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] !dbg [[DBG59:![0-9]+]] {
 // CHECK4-NEXT:  entry:
 // CHECK4-NEXT:    [[LOOPVAR_ADDR:%.*]] = alloca ptr, align 8
 // CHECK4-NEXT:    [[LOGICAL_ADDR:%.*]] = alloca i32, align 4
 // CHECK4-NEXT:    [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8
 // CHECK4-NEXT:    store ptr [[LOOPVAR]], ptr [[LOOPVAR_ADDR]], align 8
-// CHECK4-NEXT:    call void @llvm.dbg.declare(metadata ptr [[LOOPVAR_ADDR]], metadata [[META68:![0-9]+]], metadata !DIExpression()), !dbg [[DBG69:![0-9]+]]
+// CHECK4-NEXT:    call void @llvm.dbg.declare(metadata ptr [[LOOPVAR_ADDR]], metadata [[META67:![0-9]+]], metadata !DIExpression()), !dbg [[DBG68:![0-9]+]]
 // CHECK4-NEXT:    store i32 [[LOGICAL]], ptr [[LOGICAL_ADDR]], align 4
-// CHECK4-NEXT:    call void @llvm.dbg.declare(metadata ptr [[LOGICAL_ADDR]], metadata [[META70:![0-9]+]], metadata !DIExpression()), !dbg [[DBG69]]
+// CHECK4-NEXT:    call void @llvm.dbg.declare(metadata ptr [[LOGICAL_ADDR]], metadata [[META69:![0-9]+]], metadata !DIExpression()), !dbg [[DBG68]]
 // CHECK4-NEXT:    store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8
-// CHECK4-NEXT:    call void @llvm.dbg.declare(metadata ptr [[__CONTEXT_ADDR]], metadata [[META71:![0-9]+]], metadata !DIExpression()), !dbg [[DBG69]]
+// CHECK4-NEXT:    call void @llvm.dbg.declare(metadata ptr [[__CONTEXT_ADDR]], metadata [[META70:![0-9]+]], metadata !DIExpression()), !dbg [[DBG68]]
 // CHECK4-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8
-// CHECK4-NEXT:    [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0, !dbg [[DBG72:![0-9]+]]
-// CHECK4-NEXT:    [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4, !dbg [[DBG72]]
-// CHECK4-NEXT:    [[TMP3:%.*]] = load i32, ptr [[LOGICAL_ADDR]], align 4, !dbg [[DBG74:![0-9]+]]
-// CHECK4-NEXT:    [[MUL:%.*]] = mul i32 1, [[TMP3]], !dbg [[DBG74]]
-// CHECK4-NEXT:    [[ADD:%.*]] = add i32 [[TMP2]], [[MUL]], !dbg [[DBG74]]
-// CHECK4-NEXT:    [[TMP4:%.*]] = load ptr, ptr [[LOOPVAR_ADDR]], align 8, !dbg [[DBG74]]
-// CHECK4-NEXT:    store i32 [[ADD]], ptr [[TMP4]], align 4, !dbg [[DBG69]]
-// CHECK4-NEXT:    ret void, !dbg [[DBG72]]
+// CHECK4-NEXT:    [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0, !dbg [[DBG71:![0-9]+]]
+// CHECK4-NEXT:    [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4, !dbg [[DBG71]]
+// CHECK4-NEXT:    [[TMP3:%.*]] = load i32, ptr [[LOGICAL_ADDR]], align 4, !dbg [[DBG73:![0-9]+]]
+// CHECK4-NEXT:    [[MUL:%.*]] = mul i32 1, [[TMP3]], !dbg [[DBG73]]
+// CHECK4-NEXT:    [[ADD:%.*]] = add i32 [[TMP2]], [[MUL]], !dbg [[DBG73]]
+// CHECK4-NEXT:    [[TMP4:%.*]] = load ptr, ptr [[LOOPVAR_ADDR]], align 8, !dbg [[DBG73]]
+// CHECK4-NEXT:    store i32 [[ADD]], ptr [[TMP4]], align 4, !dbg [[DBG68]]
+// CHECK4-NEXT:    ret void, !dbg [[DBG71]]
 //
 //
 // CHECK4-LABEL: define {{[^@]+}}@_Z11inline_declv
-// CHECK4-SAME: () #[[ATTR0]] !dbg [[DBG77:![0-9]+]] {
+// CHECK4-SAME: () #[[ATTR0]] !dbg [[DBG76:![0-9]+]] {
 // CHECK4-NEXT:  entry:
 // CHECK4-NEXT:    [[STRUCTARG:%.*]] = alloca { ptr, ptr }, align 8
 // CHECK4-NEXT:    [[I:%.*]] = alloca i32, align 4
 // CHECK4-NEXT:    [[RES:%.*]] = alloca i32, align 4
-// CHECK4-NEXT:    call void @llvm.dbg.declare(metadata ptr [[I]], metadata [[META78:![0-9]+]], metadata !DIExpression()), !dbg [[DBG79:![0-9]+]]
-// CHECK4-NEXT:    call void @llvm.dbg.declare(metadata ptr [[RES]], metadata [[META80:![0-9]+]], metadata !DIExpression()), !dbg [[DBG79]]
-// CHECK4-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB3:[0-9]+]]), !dbg [[DBG81:![0-9]+]]
+// CHECK4-NEXT:    call void @llvm.dbg.declare(metadata ptr [[I]], metadata [[META77:![0-9]+]], metadata !DIExpression()), !dbg [[DBG78:![0-9]+]]
+// CHECK4-NEXT:    call void @llvm.dbg.declare(metadata ptr [[RES]], metadata [[META79:![0-9]+]], metadata !DIExpression()), !dbg [[DBG78]]
+// CHECK4-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB3:[0-9]+]]), !dbg [[DBG80:![0-9]+]]
 // CHECK4-NEXT:    br label [[OMP_PARALLEL:%.*]]
 // CHECK4:       omp_parallel:
 // CHECK4-NEXT:    [[GEP_I:%.*]] = getelementptr { ptr, ptr }, ptr [[STRUCTARG]], i32 0, i32 0
 // CHECK4-NEXT:    store ptr [[I]], ptr [[GEP_I]], align 8
 // CHECK4-NEXT:    [[GEP_RES:%.*]] = getelementptr { ptr, ptr }, ptr [[STRUCTARG]], i32 0, i32 1
 // CHECK4-NEXT:    store ptr [[RES]], ptr [[GEP_RES]], align 8
-// CHECK4-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @_Z11inline_declv..omp_par, ptr [[STRUCTARG]]), !dbg [[DBG82:![0-9]+]]
+// CHECK4-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @_Z11inline_declv..omp_par, ptr [[STRUCTARG]]), !dbg [[DBG81:![0-9]+]]
 // CHECK4-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT:%.*]]
 // CHECK4:       omp.par.outlined.exit:
 // CHECK4-NEXT:    br label [[OMP_PAR_EXIT_SPLIT:%.*]]
 // CHECK4:       omp.par.exit.split:
-// CHECK4-NEXT:    [[TMP0:%.*]] = load i32, ptr [[RES]], align 4, !dbg [[DBG84:![0-9]+]]
-// CHECK4-NEXT:    ret i32 [[TMP0]], !dbg [[DBG84]]
+// CHECK4-NEXT:    [[TMP0:%.*]] = load i32, ptr [[RES]], align 4, !dbg [[DBG83:![0-9]+]]
+// CHECK4-NEXT:    ret i32 [[TMP0]], !dbg [[DBG83]]
 //
 //
 // CHECK4-LABEL: define {{[^@]+}}@_Z11inline_declv..omp_par
-// CHECK4-SAME: (ptr noalias [[TID_ADDR:%.*]], ptr noalias [[ZERO_ADDR:%.*]], ptr [[TMP0:%.*]]) #[[ATTR1]] !dbg [[DBG85:![0-9]+]] {
+// CHECK4-SAME: (ptr noalias [[TID_ADDR:%.*]], ptr noalias [[ZERO_ADDR:%.*]], ptr [[TMP0:%.*]]) #[[ATTR1]] !dbg [[DBG84:![0-9]+]] {
 // CHECK4-NEXT:  omp.par.entry:
 // CHECK4-NEXT:    [[GEP_I:%.*]] = getelementptr { ptr, ptr }, ptr [[TMP0]], i32 0, i32 0
 // CHECK4-NEXT:    [[LOADGEP_I:%.*]] = load ptr, ptr [[GEP_I]], align 8
@@ -828,61 +828,61 @@ int inline_decl() {
 // CHECK4-NEXT:    [[DOTCOUNT_ADDR:%.*]] = alloca i32, align 4
 // CHECK4-NEXT:    br label [[OMP_PAR_REGION:%.*]]
 // CHECK4:       omp.par.region:
-// CHECK4-NEXT:    store i32 0, ptr [[LOADGEP_I]], align 4, !dbg [[DBG86:![0-9]+]]
-// CHECK4-NEXT:    br label [[FOR_COND:%.*]], !dbg [[DBG86]]
+// CHECK4-NEXT:    store i32 0, ptr [[LOADGEP_I]], align 4, !dbg [[DBG85:![0-9]+]]
+// CHECK4-NEXT:    br label [[FOR_COND:%.*]], !dbg [[DBG85]]
 // CHECK4:       for.cond:
-// CHECK4-NEXT:    [[TMP2:%.*]] = load i32, ptr [[LOADGEP_I]], align 4, !dbg [[DBG88:![0-9]+]]
-// CHECK4-NEXT:    [[CMP:%.*]] = icmp slt i32 [[TMP2]], 10, !dbg [[DBG88]]
-// CHECK4-NEXT:    br i1 [[CMP]], label [[FOR_BODY:%.*]], label [[FOR_END:%.*]], !dbg [[DBG86]]
+// CHECK4-NEXT:    [[TMP2:%.*]] = load i32, ptr [[LOADGEP_I]], align 4, !dbg [[DBG87:![0-9]+]]
+// CHECK4-NEXT:    [[CMP:%.*]] = icmp slt i32 [[TMP2]], 10, !dbg [[DBG87]]
+// CHECK4-NEXT:    br i1 [[CMP]], label [[FOR_BODY:%.*]], label [[FOR_END:%.*]], !dbg [[DBG85]]
 // CHECK4:       for.end:
-// CHECK4-NEXT:    br label [[OMP_PAR_REGION_PARALLEL_AFTER:%.*]], !dbg [[DBG90:![0-9]+]]
+// CHECK4-NEXT:    br label [[OMP_PAR_REGION_PARALLEL_AFTER:%.*]], !dbg [[DBG89:![0-9]+]]
 // CHECK4:       omp.par.region.parallel.after:
 // CHECK4-NEXT:    br label [[OMP_PAR_PRE_FINALIZE:%.*]]
 // CHECK4:       omp.par.pre_finalize:
-// CHECK4-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]], !dbg [[DBG90]]
+// CHECK4-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]], !dbg [[DBG89]]
 // CHECK4:       for.body:
-// CHECK4-NEXT:    call void @llvm.dbg.declare(metadata ptr [[K]], metadata [[META91:![0-9]+]], metadata !DIExpression()), !dbg [[DBG95:![0-9]+]]
-// CHECK4-NEXT:    store i32 0, ptr [[K]], align 4, !dbg [[DBG95]]
-// CHECK4-NEXT:    [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[AGG_CAPTURED]], i32 0, i32 0, !dbg [[DBG95]]
-// CHECK4-NEXT:    store ptr [[K]], ptr [[TMP3]], align 8, !dbg [[DBG95]]
-// CHECK4-NEXT:    [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[AGG_CAPTURED1]], i32 0, i32 0, !dbg [[DBG95]]
-// CHECK4-NEXT:    [[TMP5:%.*]] = load i32, ptr [[K]], align 4, !dbg [[DBG96:![0-9]+]]
-// CHECK4-NEXT:    store i32 [[TMP5]], ptr [[TMP4]], align 4, !dbg [[DBG95]]
-// CHECK4-NEXT:    call void @__captured_stmt.2(ptr [[DOTCOUNT_ADDR]], ptr [[AGG_CAPTURED]]), !dbg [[DBG95]]
-// CHECK4-NEXT:    [[DOTCOUNT:%.*]] = load i32, ptr [[DOTCOUNT_ADDR]], align 4, !dbg [[DBG95]]
-// CHECK4-NEXT:    br label [[OMP_LOOP_PREHEADER:%.*]], !dbg [[DBG95]]
+// CHECK4-NEXT:    call void @llvm.dbg.declare(metadata ptr [[K]], metadata [[META90:![0-9]+]], metadata !DIExpression()), !dbg [[DBG94:![0-9]+]]
+// CHECK4-NEXT:    store i32 0, ptr [[K]], align 4, !dbg [[DBG94]]
+// CHECK4-NEXT:    [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[AGG_CAPTURED]], i32 0, i32 0, !dbg [[DBG94]]
+// CHECK4-NEXT:    store ptr [[K]], ptr [[TMP3]], align 8, !dbg [[DBG94]]
+// CHECK4-NEXT:    [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[AGG_CAPTURED1]], i32 0, i32 0, !dbg [[DBG94]]
+// CHECK4-NEXT:    [[TMP5:%.*]] = load i32, ptr [[K]], align 4, !dbg [[DBG95:![0-9]+]]
+// CHECK4-NEXT:    store i32 [[TMP5]], ptr [[TMP4]], align 4, !dbg [[DBG94]]
+// CHECK4-NEXT:    call void @__captured_stmt.2(ptr [[DOTCOUNT_ADDR]], ptr [[AGG_CAPTURED]]), !dbg [[DBG94]]
+// CHECK4-NEXT:    [[DOTCOUNT:%.*]] = load i32, ptr [[DOTCOUNT_ADDR]], align 4, !dbg [[DBG94]]
+// CHECK4-NEXT:    br label [[OMP_LOOP_PREHEADER:%.*]], !dbg [[DBG94]]
 // CHECK4:       omp_loop.preheader:
-// CHECK4-NEXT:    br label [[OMP_LOOP_HEADER:%.*]], !dbg [[DBG95]]
+// CHECK4-NEXT:    br label [[OMP_LOOP_HEADER:%.*]], !dbg [[DBG94]]
 // CHECK4:       omp_loop.header:
-// CHECK4-NEXT:    [[OMP_LOOP_IV:%.*]] = phi i32 [ 0, [[OMP_LOOP_PREHEADER]] ], [ [[OMP_LOOP_NEXT:%.*]], [[OMP_LOOP_INC:%.*]] ], !dbg [[DBG95]]
-// CHECK4-NEXT:    br label [[OMP_LOOP_COND:%.*]], !dbg [[DBG95]]
+// CHECK4-NEXT:    [[OMP_LOOP_IV:%.*]] = phi i32 [ 0, [[OMP_LOOP_PREHEADER]] ], [ [[OMP_LOOP_NEXT:%.*]], [[OMP_LOOP_INC:%.*]] ], !dbg [[DBG94]]
+// CHECK4-NEXT:    br label [[OMP_LOOP_COND:%.*]], !dbg [[DBG94]]
 // CHECK4:       omp_loop.cond:
-// CHECK4-NEXT:    [[OMP_LOOP_CMP:%.*]] = icmp ult i32 [[OMP_LOOP_IV]], [[DOTCOUNT]], !dbg [[DBG95]]
-// CHECK4-NEXT:    br i1 [[OMP_LOOP_CMP]], label [[OMP_LOOP_BODY:%.*]], label [[OMP_LOOP_EXIT:%.*]], !dbg [[DBG95]]
+// CHECK4-NEXT:    [[OMP_LOOP_CMP:%.*]] = icmp ult i32 [[OMP_LOOP_IV]], [[DOTCOUNT]], !dbg [[DBG94]]
+// CHECK4-NEXT:    br i1 [[OMP_LOOP_CMP]], label [[OMP_LOOP_BODY:%.*]], label [[OMP_LOOP_EXIT:%.*]], !dbg [[DBG94]]
 // CHECK4:       omp_loop.exit:
-// CHECK4-NEXT:    br label [[OMP_LOOP_AFTER:%.*]], !dbg [[DBG95]]
+// CHECK4-NEXT:    br label [[OMP_LOOP_AFTER:%.*]], !dbg [[DBG94]]
 // CHECK4:       omp_loop.after:
-// CHECK4-NEXT:    br label [[FOR_INC:%.*]], !dbg [[DBG97:![0-9]+]]
+// CHECK4-NEXT:    br label [[FOR_INC:%.*]], !dbg [[DBG96:![0-9]+]]
 // CHECK4:       for.inc:
-// CHECK4-NEXT:    [[TMP6:%.*]] = load i32, ptr [[LOADGEP_I]], align 4, !dbg [[DBG88]]
-// CHECK4-NEXT:    [[INC2:%.*]] = add nsw i32 [[TMP6]], 1, !dbg [[DBG88]]
-// CHECK4-NEXT:    store i32 [[INC2]], ptr [[LOADGEP_I]], align 4, !dbg [[DBG88]]
-// CHECK4-NEXT:    br label [[FOR_COND]], !dbg [[DBG88]], !llvm.loop [[LOOP98:![0-9]+]]
+// CHECK4-NEXT:    [[TMP6:%.*]] = load i32, ptr [[LOADGEP_I]], align 4, !dbg [[DBG87]]
+// CHECK4-NEXT:    [[INC2:%.*]] = add nsw i32 [[TMP6]], 1, !dbg [[DBG87]]
+// CHECK4-NEXT:    store i32 [[INC2]], ptr [[LOADGEP_I]], align 4, !dbg [[DBG87]]
+// CHECK4-NEXT:    br label [[FOR_COND]], !dbg [[DBG87]], !llvm.loop [[LOOP97:![0-9]+]]
 // CHECK4:       omp_loop.body:
-// CHECK4-NEXT:    call void @__captured_stmt.3(ptr [[K]], i32 [[OMP_LOOP_IV]], ptr [[AGG_CAPTURED1]]), !dbg [[DBG95]]
-// CHECK4-NEXT:    [[TMP7:%.*]] = load i32, ptr [[LOADGEP_RES]], align 4, !dbg [[DBG99:![0-9]+]]
-// CHECK4-NEXT:    [[INC:%.*]] = add nsw i32 [[TMP7]], 1, !dbg [[DBG99]]
-// CHECK4-NEXT:    store i32 [[INC]], ptr [[LOADGEP_RES]], align 4, !dbg [[DBG99]]
-// CHECK4-NEXT:    br label [[OMP_LOOP_INC]], !dbg [[DBG95]]
+// CHECK4-NEXT:    call void @__captured_stmt.3(ptr [[K]], i32 [[OMP_LOOP_IV]], ptr [[AGG_CAPTURED1]]), !dbg [[DBG94]]
+// CHECK4-NEXT:    [[TMP7:%.*]] = load i32, ptr [[LOADGEP_RES]], align 4, !dbg [[DBG98:![0-9]+]]
+// CHECK4-NEXT:    [[INC:%.*]] = add nsw i32 [[TMP7]], 1, !dbg [[DBG98]]
+// CHECK4-NEXT:    store i32 [[INC]], ptr [[LOADGEP_RES]], align 4, !dbg [[DBG98]]
+// CHECK4-NEXT:    br label [[OMP_LOOP_INC]], !dbg [[DBG94]]
 // CHECK4:       omp_loop.inc:
-// CHECK4-NEXT:    [[OMP_LOOP_NEXT]] = add nuw i32 [[OMP_LOOP_IV]], 1, !dbg [[DBG95]]
-// CHECK4-NEXT:    br label [[OMP_LOOP_HEADER]], !dbg [[DBG95]]
+// CHECK4-NEXT:    [[OMP_LOOP_NEXT]] = add nuw i32 [[OMP_LOOP_IV]], 1, !dbg [[DBG94]]
+// CHECK4-NEXT:    br label [[OMP_LOOP_HEADER]], !dbg [[DBG94]]
 // CHECK4:       omp.par.outlined.exit.exitStub:
 // CHECK4-NEXT:    ret void
 //
 //
 // CHECK4-LABEL: define {{[^@]+}}@__captured_stmt.2
-// CHECK4-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[DISTANCE:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] !dbg [[DBG101:![0-9]+]] {
+// CHECK4-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[DISTANCE:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] !dbg [[DBG100:![0-9]+]] {
 // CHECK4-NEXT:  entry:
 // CHECK4-NEXT:    [[DISTANCE_ADDR:%.*]] = alloca ptr, align 8
 // CHECK4-NEXT:    [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8
@@ -890,61 +890,61 @@ int inline_decl() {
 // CHECK4-NEXT:    [[DOTSTOP:%.*]] = alloca i32, align 4
 // CHECK4-NEXT:    [[DOTSTEP:%.*]] = alloca i32, align 4
 // CHECK4-NEXT:    store ptr [[DISTANCE]], ptr [[DISTANCE_ADDR]], align 8
-// CHECK4-NEXT:    call void @llvm.dbg.declare(metadata ptr [[DISTANCE_ADDR]], metadata [[META102:![0-9]+]], metadata !DIExpression()), !dbg [[DBG103:![0-9]+]]
+// CHECK4-NEXT:    call void @llvm.dbg.declare(metadata ptr [[DISTANCE_ADDR]], metadata [[META101:![0-9]+]], metadata !DIExpression()), !dbg [[DBG102:![0-9]+]]
 // CHECK4-NEXT:    store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8
-// CHECK4-NEXT:    call void @llvm.dbg.declare(metadata ptr [[__CONTEXT_ADDR]], metadata [[META104:![0-9]+]], metadata !DIExpression()), !dbg [[DBG103]]
+// CHECK4-NEXT:    call void @llvm.dbg.declare(metadata ptr [[__CONTEXT_ADDR]], metadata [[META103:![0-9]+]], metadata !DIExpression()), !dbg [[DBG102]]
 // CHECK4-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8
-// CHECK4-NEXT:    call void @llvm.dbg.declare(metadata ptr [[DOTSTART]], metadata [[META105:![0-9]+]], metadata !DIExpression()), !dbg [[DBG107:![0-9]+]]
-// CHECK4-NEXT:    [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0, !dbg [[DBG108:![0-9]+]]
-// CHECK4-NEXT:    [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8, !dbg [[DBG108]]
-// CHECK4-NEXT:    [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4, !dbg [[DBG108]]
-// CHECK4-NEXT:    store i32 [[TMP3]], ptr [[DOTSTART]], align 4, !dbg [[DBG107]]
-// CHECK4-NEXT:    call void @llvm.dbg.declare(metadata ptr [[DOTSTOP]], metadata [[META110:![0-9]+]], metadata !DIExpression()), !dbg [[DBG111:![0-9]+]]
-// CHECK4-NEXT:    store i32 5, ptr [[DOTSTOP]], align 4, !dbg [[DBG111]]
-// CHECK4-NEXT:    call void @llvm.dbg.declare(metadata ptr [[DOTSTEP]], metadata [[META112:![0-9]+]], metadata !DIExpression()), !dbg [[DBG111]]
-// CHECK4-NEXT:    store i32 1, ptr [[DOTSTEP]], align 4, !dbg [[DBG111]]
-// CHECK4-NEXT:    [[TMP4:%.*]] = load i32, ptr [[DOTSTART]], align 4, !dbg [[DBG111]]
-// CHECK4-NEXT:    [[TMP5:%.*]] = load i32, ptr [[DOTSTOP]], align 4, !dbg [[DBG111]]
-// CHECK4-NEXT:    [[CMP:%.*]] = icmp slt i32 [[TMP4]], [[TMP5]], !dbg [[DBG111]]
-// CHECK4-NEXT:    br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]], !dbg [[DBG111]]
+// CHECK4-NEXT:    call void @llvm.dbg.declare(metadata ptr [[DOTSTART]], metadata [[META104:![0-9]+]], metadata !DIExpression()), !dbg [[DBG106:![0-9]+]]
+// CHECK4-NEXT:    [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0, !dbg [[DBG107:![0-9]+]]
+// CHECK4-NEXT:    [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8, !dbg [[DBG107]]
+// CHECK4-NEXT:    [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4, !dbg [[DBG107]]
+// CHECK4-NEXT:    store i32 [[TMP3]], ptr [[DOTSTART]], align 4, !dbg [[DBG106]]
+// CHECK4-NEXT:    call void @llvm.dbg.declare(metadata ptr [[DOTSTOP]], metadata [[META109:![0-9]+]], metadata !DIExpression()), !dbg [[DBG110:![0-9]+]]
+// CHECK4-NEXT:    store i32 5, ptr [[DOTSTOP]], align 4, !dbg [[DBG110]]
+// CHECK4-NEXT:    call void @llvm.dbg.declare(metadata ptr [[DOTSTEP]], metadata [[META111:![0-9]+]], metadata !DIExpression()), !dbg [[DBG110]]
+// CHECK4-NEXT:    store i32 1, ptr [[DOTSTEP]], align 4, !dbg [[DBG110]]
+// CHECK4-NEXT:    [[TMP4:%.*]] = load i32, ptr [[DOTSTART]], align 4, !dbg [[DBG110]]
+// CHECK4-NEXT:    [[TMP5:%.*]] = load i32, ptr [[DOTSTOP]], align 4, !dbg [[DBG110]]
+// CHECK4-NEXT:    [[CMP:%.*]] = icmp slt i32 [[TMP4]], [[TMP5]], !dbg [[DBG110]]
+// CHECK4-NEXT:    br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]], !dbg [[DBG110]]
 // CHECK4:       cond.true:
-// CHECK4-NEXT:    [[TMP6:%.*]] = load i32, ptr [[DOTSTOP]], align 4, !dbg [[DBG111]]
-// CHECK4-NEXT:    [[TMP7:%.*]] = load i32, ptr [[DOTSTART]], align 4, !dbg [[DBG111]]
-// CHECK4-NEXT:    [[SUB:%.*]] = sub nsw i32 [[TMP6]], [[TMP7]], !dbg [[DBG111]]
-// CHECK4-NEXT:    [[TMP8:%.*]] = load i32, ptr [[DOTSTEP]], align 4, !dbg [[DBG111]]
-// CHECK4-NEXT:    [[SUB1:%.*]] = sub i32 [[TMP8]], 1, !dbg [[DBG111]]
-// CHECK4-NEXT:    [[ADD:%.*]] = add i32 [[SUB]], [[SUB1]], !dbg [[DBG111]]
-// CHECK4-NEXT:    [[TMP9:%.*]] = load i32, ptr [[DOTSTEP]], align 4, !dbg [[DBG111]]
-// CHECK4-NEXT:    [[DIV:%.*]] = udiv i32 [[ADD]], [[TMP9]], !dbg [[DBG111]]
-// CHECK4-NEXT:    br label [[COND_END:%.*]], !dbg [[DBG111]]
+// CHECK4-NEXT:    [[TMP6:%.*]] = load i32, ptr [[DOTSTOP]], align 4, !dbg [[DBG110]]
+// CHECK4-NEXT:    [[TMP7:%.*]] = load i32, ptr [[DOTSTART]], align 4, !dbg [[DBG110]]
+// CHECK4-NEXT:    [[SUB:%.*]] = sub nsw i32 [[TMP6]], [[TMP7]], !dbg [[DBG110]]
+// CHECK4-NEXT:    [[TMP8:%.*]] = load i32, ptr [[DOTSTEP]], align 4, !dbg [[DBG110]]
+// CHECK4-NEXT:    [[SUB1:%.*]] = sub i32 [[TMP8]], 1, !dbg [[DBG110]]
+// CHECK4-NEXT:    [[ADD:%.*]] = add i32 [[SUB]], [[SUB1]], !dbg [[DBG110]]
+// CHECK4-NEXT:    [[TMP9:%.*]] = load i32, ptr [[DOTSTEP]], align 4, !dbg [[DBG110]]
+// CHECK4-NEXT:    [[DIV:%.*]] = udiv i32 [[ADD]], [[TMP9]], !dbg [[DBG110]]
+// CHECK4-NEXT:    br label [[COND_END:%.*]], !dbg [[DBG110]]
 // CHECK4:       cond.false:
-// CHECK4-NEXT:    br label [[COND_END]], !dbg [[DBG111]]
+// CHECK4-NEXT:    br label [[COND_END]], !dbg [[DBG110]]
 // CHECK4:       cond.end:
-// CHECK4-NEXT:    [[COND:%.*]] = phi i32 [ [[DIV]], [[COND_TRUE]] ], [ 0, [[COND_FALSE]] ], !dbg [[DBG111]]
-// CHECK4-NEXT:    [[TMP10:%.*]] = load ptr, ptr [[DISTANCE_ADDR]], align 8, !dbg [[DBG111]]
-// CHECK4-NEXT:    store i32 [[COND]], ptr [[TMP10]], align 4, !dbg [[DBG111]]
-// CHECK4-NEXT:    ret void, !dbg [[DBG113:![0-9]+]]
+// CHECK4-NEXT:    [[COND:%.*]] = phi i32 [ [[DIV]], [[COND_TRUE]] ], [ 0, [[COND_FALSE]] ], !dbg [[DBG110]]
+// CHECK4-NEXT:    [[TMP10:%.*]] = load ptr, ptr [[DISTANCE_ADDR]], align 8, !dbg [[DBG110]]
+// CHECK4-NEXT:    store i32 [[COND]], ptr [[TMP10]], align 4, !dbg [[DBG110]]
+// CHECK4-NEXT:    ret void, !dbg [[DBG112:![0-9]+]]
 //
 //
 // CHECK4-LABEL: define {{[^@]+}}@__captured_stmt.3
-// CHECK4-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[LOOPVAR:%.*]], i32 noundef [[LOGICAL:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] !dbg [[DBG115:![0-9]+]] {
+// CHECK4-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[LOOPVAR:%.*]], i32 noundef [[LOGICAL:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] !dbg [[DBG114:![0-9]+]] {
 // CHECK4-NEXT:  entry:
 // CHECK4-NEXT:    [[LOOPVAR_ADDR:%.*]] = alloca ptr, align 8
 // CHECK4-NEXT:    [[LOGICAL_ADDR:%.*]] = alloca i32, align 4
 // CHECK4-NEXT:    [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8
 // CHECK4-NEXT:    store ptr [[LOOPVAR]], ptr [[LOOPVAR_ADDR]], align 8
-// CHECK4-NEXT:    call void @llvm.dbg.declare(metadata ptr [[LOOPVAR_ADDR]], metadata [[META116:![0-9]+]], metadata !DIExpression()), !dbg [[DBG117:![0-9]+]]
+// CHECK4-NEXT:    call void @llvm.dbg.declare(metadata ptr [[LOOPVAR_ADDR]], metadata [[META115:![0-9]+]], metadata !DIExpression()), !dbg [[DBG116:![0-9]+]]
 // CHECK4-NEXT:    store i32 [[LOGICAL]], ptr [[LOGICAL_ADDR]], align 4
-// CHECK4-NEXT:    call void @llvm.dbg.declare(metadata ptr [[LOGICAL_ADDR]], metadata [[META118:![0-9]+]], metadata !DIExpression()), !dbg [[DBG117]]
+// CHECK4-NEXT:    call void @llvm.dbg.declare(metadata ptr [[LOGICAL_ADDR]], metadata [[META117:![0-9]+]], metadata !DIExpression()), !dbg [[DBG116]]
 // CHECK4-NEXT:    store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8
-// CHECK4-NEXT:    call void @llvm.dbg.declare(metadata ptr [[__CONTEXT_ADDR]], metadata [[META119:![0-9]+]], metadata !DIExpression()), !dbg [[DBG117]]
+// CHECK4-NEXT:    call void @llvm.dbg.declare(metadata ptr [[__CONTEXT_ADDR]], metadata [[META118:![0-9]+]], metadata !DIExpression()), !dbg [[DBG116]]
 // CHECK4-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8
-// CHECK4-NEXT:    [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0, !dbg [[DBG120:![0-9]+]]
-// CHECK4-NEXT:    [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4, !dbg [[DBG120]]
-// CHECK4-NEXT:    [[TMP3:%.*]] = load i32, ptr [[LOGICAL_ADDR]], align 4, !dbg [[DBG122:![0-9]+]]
-// CHECK4-NEXT:    [[MUL:%.*]] = mul i32 1, [[TMP3]], !dbg [[DBG122]]
-// CHECK4-NEXT:    [[ADD:%.*]] = add i32 [[TMP2]], [[MUL]], !dbg [[DBG122]]
-// CHECK4-NEXT:    [[TMP4:%.*]] = load ptr, ptr [[LOOPVAR_ADDR]], align 8, !dbg [[DBG122]]
-// CHECK4-NEXT:    store i32 [[ADD]], ptr [[TMP4]], align 4, !dbg [[DBG117]]
-// CHECK4-NEXT:    ret void, !dbg [[DBG120]]
+// CHECK4-NEXT:    [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0, !dbg [[DBG119:![0-9]+]]
+// CHECK4-NEXT:    [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4, !dbg [[DBG119]]
+// CHECK4-NEXT:    [[TMP3:%.*]] = load i32, ptr [[LOGICAL_ADDR]], align 4, !dbg [[DBG121:![0-9]+]]
+// CHECK4-NEXT:    [[MUL:%.*]] = mul i32 1, [[TMP3]], !dbg [[DBG121]]
+// CHECK4-NEXT:    [[ADD:%.*]] = add i32 [[TMP2]], [[MUL]], !dbg [[DBG121]]
+// CHECK4-NEXT:    [[TMP4:%.*]] = load ptr, ptr [[LOOPVAR_ADDR]], align 8, !dbg [[DBG121]]
+// CHECK4-NEXT:    store i32 [[ADD]], ptr [[TMP4]], align 4, !dbg [[DBG116]]
+// CHECK4-NEXT:    ret void, !dbg [[DBG119]]
 //

diff  --git a/clang/test/OpenMP/nvptx_lambda_capturing.cpp b/clang/test/OpenMP/nvptx_lambda_capturing.cpp
index 4a6de49e78cf0..9c1317dbc40f3 100644
--- a/clang/test/OpenMP/nvptx_lambda_capturing.cpp
+++ b/clang/test/OpenMP/nvptx_lambda_capturing.cpp
@@ -66,12 +66,14 @@ int main(int argc, char **argv) {
 // CHECK1-NEXT:    [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [11 x ptr], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [11 x ptr], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [11 x ptr], align 8
+// CHECK1-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK1-NEXT:    [[_TMP4:%.*]] = alloca ptr, align 8
 // CHECK1-NEXT:    [[_TMP5:%.*]] = alloca ptr, align 8
 // CHECK1-NEXT:    [[_TMP6:%.*]] = alloca ptr, align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_BASEPTRS7:%.*]] = alloca [11 x ptr], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_PTRS8:%.*]] = alloca [11 x ptr], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_MAPPERS9:%.*]] = alloca [11 x ptr], align 8
+// CHECK1-NEXT:    [[KERNEL_ARGS10:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK1-NEXT:    store i32 0, ptr [[RETVAL]], align 4
 // CHECK1-NEXT:    store i32 [[ARGC]], ptr [[ARGC_ADDR]], align 4
 // CHECK1-NEXT:    store ptr [[ARGV]], ptr [[ARGV_ADDR]], align 8
@@ -188,7 +190,6 @@ int main(int argc, char **argv) {
 // CHECK1-NEXT:    store ptr null, ptr [[TMP63]], align 8
 // CHECK1-NEXT:    [[TMP64:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK1-NEXT:    [[TMP65:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK1-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK1-NEXT:    [[TMP66:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK1-NEXT:    store i32 2, ptr [[TMP66]], align 4
 // CHECK1-NEXT:    [[TMP67:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -315,7 +316,6 @@ int main(int argc, char **argv) {
 // CHECK1-NEXT:    store ptr null, ptr [[TMP135]], align 8
 // CHECK1-NEXT:    [[TMP136:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_BASEPTRS7]], i32 0, i32 0
 // CHECK1-NEXT:    [[TMP137:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_PTRS8]], i32 0, i32 0
-// CHECK1-NEXT:    [[KERNEL_ARGS10:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK1-NEXT:    [[TMP138:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 0
 // CHECK1-NEXT:    store i32 2, ptr [[TMP138]], align 4
 // CHECK1-NEXT:    [[TMP139:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 1
@@ -504,10 +504,12 @@ int main(int argc, char **argv) {
 // CHECK1-NEXT:    [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [3 x ptr], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [3 x ptr], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [3 x ptr], align 8
+// CHECK1-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK1-NEXT:    [[_TMP2:%.*]] = alloca ptr, align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_BASEPTRS3:%.*]] = alloca [3 x ptr], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_PTRS4:%.*]] = alloca [3 x ptr], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_MAPPERS5:%.*]] = alloca [3 x ptr], align 8
+// CHECK1-NEXT:    [[KERNEL_ARGS6:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK1-NEXT:    store ptr [[THIS]], ptr [[THIS_ADDR]], align 8
 // CHECK1-NEXT:    [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8
 // CHECK1-NEXT:    [[TMP0:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 0
@@ -538,7 +540,6 @@ int main(int argc, char **argv) {
 // CHECK1-NEXT:    store ptr null, ptr [[TMP13]], align 8
 // CHECK1-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK1-NEXT:    [[TMP15:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK1-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK1-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK1-NEXT:    store i32 2, ptr [[TMP16]], align 4
 // CHECK1-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -597,7 +598,6 @@ int main(int argc, char **argv) {
 // CHECK1-NEXT:    store ptr null, ptr [[TMP43]], align 8
 // CHECK1-NEXT:    [[TMP44:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS3]], i32 0, i32 0
 // CHECK1-NEXT:    [[TMP45:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS4]], i32 0, i32 0
-// CHECK1-NEXT:    [[KERNEL_ARGS6:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK1-NEXT:    [[TMP46:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 0
 // CHECK1-NEXT:    store i32 2, ptr [[TMP46]], align 4
 // CHECK1-NEXT:    [[TMP47:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 1
@@ -722,6 +722,7 @@ int main(int argc, char **argv) {
 // CHECK1-NEXT:    [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [2 x ptr], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [2 x ptr], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [2 x ptr], align 8
+// CHECK1-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK1-NEXT:    store ptr [[T]], ptr [[T_ADDR]], align 8
 // CHECK1-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[T_ADDR]], align 8
 // CHECK1-NEXT:    store ptr [[TMP0]], ptr [[TMP]], align 8
@@ -742,7 +743,6 @@ int main(int argc, char **argv) {
 // CHECK1-NEXT:    store ptr null, ptr [[TMP9]], align 8
 // CHECK1-NEXT:    [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK1-NEXT:    [[TMP11:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK1-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK1-NEXT:    [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK1-NEXT:    store i32 2, ptr [[TMP12]], align 4
 // CHECK1-NEXT:    [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1

diff  --git a/clang/test/OpenMP/nvptx_target_parallel_proc_bind_codegen.cpp b/clang/test/OpenMP/nvptx_target_parallel_proc_bind_codegen.cpp
index b2cf555cafacf..c9639be846503 100644
--- a/clang/test/OpenMP/nvptx_target_parallel_proc_bind_codegen.cpp
+++ b/clang/test/OpenMP/nvptx_target_parallel_proc_bind_codegen.cpp
@@ -52,6 +52,130 @@ int bar(int n){
 }
 
 #endif
+// CHECK-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l27
+// CHECK-SAME: () #[[ATTR0:[0-9]+]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 4
+// CHECK-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 2, i1 false)
+// CHECK-NEXT:    [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
+// CHECK-NEXT:    br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
+// CHECK:       user_code.entry:
+// CHECK-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
+// CHECK-NEXT:    call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 0)
+// CHECK-NEXT:    call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
+// CHECK-NEXT:    ret void
+// CHECK:       worker.exit:
+// CHECK-NEXT:    ret void
+// CHECK-LABEL: define {{[^@]+}}@__omp_outlined__
+// CHECK-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1:[0-9]+]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
+// CHECK-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
+// CHECK-NEXT:    store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
+// CHECK-NEXT:    store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
+// CHECK-NEXT:    ret void
+// CHECK-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l31
+// CHECK-SAME: (i32 noundef [[AA:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[AA_ADDR:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    [[AA_CASTED:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 4
+// CHECK-NEXT:    store i32 [[AA]], ptr [[AA_ADDR]], align 4
+// CHECK-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
+// CHECK-NEXT:    [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
+// CHECK-NEXT:    br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
+// CHECK:       user_code.entry:
+// CHECK-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
+// CHECK-NEXT:    [[TMP2:%.*]] = load i16, ptr [[AA_ADDR]], align 2
+// CHECK-NEXT:    store i16 [[TMP2]], ptr [[AA_CASTED]], align 2
+// CHECK-NEXT:    [[TMP3:%.*]] = load i32, ptr [[AA_CASTED]], align 4
+// CHECK-NEXT:    [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 0
+// CHECK-NEXT:    [[TMP5:%.*]] = inttoptr i32 [[TMP3]] to ptr
+// CHECK-NEXT:    store ptr [[TMP5]], ptr [[TMP4]], align 4
+// CHECK-NEXT:    call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__1, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 1)
+// CHECK-NEXT:    call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
+// CHECK-NEXT:    ret void
+// CHECK:       worker.exit:
+// CHECK-NEXT:    ret void
+// CHECK-LABEL: define {{[^@]+}}@__omp_outlined__1
+// CHECK-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[AA:%.*]]) #[[ATTR1]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
+// CHECK-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
+// CHECK-NEXT:    [[AA_ADDR:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
+// CHECK-NEXT:    store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
+// CHECK-NEXT:    store i32 [[AA]], ptr [[AA_ADDR]], align 4
+// CHECK-NEXT:    [[TMP0:%.*]] = load i16, ptr [[AA_ADDR]], align 2
+// CHECK-NEXT:    [[CONV:%.*]] = sext i16 [[TMP0]] to i32
+// CHECK-NEXT:    [[ADD:%.*]] = add nsw i32 [[CONV]], 1
+// CHECK-NEXT:    [[CONV1:%.*]] = trunc i32 [[ADD]] to i16
+// CHECK-NEXT:    store i16 [[CONV1]], ptr [[AA_ADDR]], align 2
+// CHECK-NEXT:    ret void
+// CHECK-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l36
+// CHECK-SAME: (i32 noundef [[A:%.*]], i32 noundef [[AA:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    [[AA_ADDR:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    [[B_ADDR:%.*]] = alloca ptr, align 4
+// CHECK-NEXT:    [[A_CASTED:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    [[AA_CASTED:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    [[CAPTURED_VARS_ADDRS:%.*]] = alloca [3 x ptr], align 4
+// CHECK-NEXT:    store i32 [[A]], ptr [[A_ADDR]], align 4
+// CHECK-NEXT:    store i32 [[AA]], ptr [[AA_ADDR]], align 4
+// CHECK-NEXT:    store ptr [[B]], ptr [[B_ADDR]], align 4
+// CHECK-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4
+// CHECK-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
+// CHECK-NEXT:    [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
+// CHECK-NEXT:    br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
+// CHECK:       user_code.entry:
+// CHECK-NEXT:    [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
+// CHECK-NEXT:    [[TMP3:%.*]] = load i32, ptr [[A_ADDR]], align 4
+// CHECK-NEXT:    store i32 [[TMP3]], ptr [[A_CASTED]], align 4
+// CHECK-NEXT:    [[TMP4:%.*]] = load i32, ptr [[A_CASTED]], align 4
+// CHECK-NEXT:    [[TMP5:%.*]] = load i16, ptr [[AA_ADDR]], align 2
+// CHECK-NEXT:    store i16 [[TMP5]], ptr [[AA_CASTED]], align 2
+// CHECK-NEXT:    [[TMP6:%.*]] = load i32, ptr [[AA_CASTED]], align 4
+// CHECK-NEXT:    [[TMP7:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 0
+// CHECK-NEXT:    [[TMP8:%.*]] = inttoptr i32 [[TMP4]] to ptr
+// CHECK-NEXT:    store ptr [[TMP8]], ptr [[TMP7]], align 4
+// CHECK-NEXT:    [[TMP9:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 1
+// CHECK-NEXT:    [[TMP10:%.*]] = inttoptr i32 [[TMP6]] to ptr
+// CHECK-NEXT:    store ptr [[TMP10]], ptr [[TMP9]], align 4
+// CHECK-NEXT:    [[TMP11:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 2
+// CHECK-NEXT:    store ptr [[TMP0]], ptr [[TMP11]], align 4
+// CHECK-NEXT:    call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__2, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 3)
+// CHECK-NEXT:    call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
+// CHECK-NEXT:    ret void
+// CHECK:       worker.exit:
+// CHECK-NEXT:    ret void
+// CHECK-LABEL: define {{[^@]+}}@__omp_outlined__2
+// CHECK-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[A:%.*]], i32 noundef [[AA:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR1]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
+// CHECK-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
+// CHECK-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    [[AA_ADDR:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    [[B_ADDR:%.*]] = alloca ptr, align 4
+// CHECK-NEXT:    store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
+// CHECK-NEXT:    store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
+// CHECK-NEXT:    store i32 [[A]], ptr [[A_ADDR]], align 4
+// CHECK-NEXT:    store i32 [[AA]], ptr [[AA_ADDR]], align 4
+// CHECK-NEXT:    store ptr [[B]], ptr [[B_ADDR]], align 4
+// CHECK-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4
+// CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4
+// CHECK-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP1]], 1
+// CHECK-NEXT:    store i32 [[ADD]], ptr [[A_ADDR]], align 4
+// CHECK-NEXT:    [[TMP2:%.*]] = load i16, ptr [[AA_ADDR]], align 2
+// CHECK-NEXT:    [[CONV:%.*]] = sext i16 [[TMP2]] to i32
+// CHECK-NEXT:    [[ADD1:%.*]] = add nsw i32 [[CONV]], 1
+// CHECK-NEXT:    [[CONV2:%.*]] = trunc i32 [[ADD1]] to i16
+// CHECK-NEXT:    store i16 [[CONV2]], ptr [[AA_ADDR]], align 2
+// CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i32 0, i32 2
+// CHECK-NEXT:    [[TMP3:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
+// CHECK-NEXT:    [[ADD3:%.*]] = add nsw i32 [[TMP3]], 1
+// CHECK-NEXT:    store i32 [[ADD3]], ptr [[ARRAYIDX]], align 4
+// CHECK-NEXT:    ret void
 // CHECK45-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l27
 // CHECK45-64-SAME: () #[[ATTR0:[0-9]+]] {
 // CHECK45-64-NEXT:  entry:
@@ -596,142 +720,6 @@ int bar(int n){
 // CHECK-64-NEXT:    ret void
 //
 //
-// CHECK-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l27
-// CHECK-SAME: () #[[ATTR0:[0-9]+]] {
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 4
-// CHECK-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 2, i1 false)
-// CHECK-NEXT:    [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
-// CHECK-NEXT:    br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
-// CHECK:       user_code.entry:
-// CHECK-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
-// CHECK-NEXT:    call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 0)
-// CHECK-NEXT:    call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
-// CHECK-NEXT:    ret void
-// CHECK:       worker.exit:
-// CHECK-NEXT:    ret void
-//
-//
-// CHECK-LABEL: define {{[^@]+}}@__omp_outlined__
-// CHECK-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1:[0-9]+]] {
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-NEXT:    store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
-// CHECK-NEXT:    store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
-// CHECK-NEXT:    ret void
-//
-//
-// CHECK-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l31
-// CHECK-SAME: (i32 noundef [[AA:%.*]]) #[[ATTR0]] {
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[AA_ADDR:%.*]] = alloca i32, align 4
-// CHECK-NEXT:    [[AA_CASTED:%.*]] = alloca i32, align 4
-// CHECK-NEXT:    [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 4
-// CHECK-NEXT:    store i32 [[AA]], ptr [[AA_ADDR]], align 4
-// CHECK-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
-// CHECK-NEXT:    [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
-// CHECK-NEXT:    br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
-// CHECK:       user_code.entry:
-// CHECK-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
-// CHECK-NEXT:    [[TMP2:%.*]] = load i16, ptr [[AA_ADDR]], align 2
-// CHECK-NEXT:    store i16 [[TMP2]], ptr [[AA_CASTED]], align 2
-// CHECK-NEXT:    [[TMP3:%.*]] = load i32, ptr [[AA_CASTED]], align 4
-// CHECK-NEXT:    [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 0
-// CHECK-NEXT:    [[TMP5:%.*]] = inttoptr i32 [[TMP3]] to ptr
-// CHECK-NEXT:    store ptr [[TMP5]], ptr [[TMP4]], align 4
-// CHECK-NEXT:    call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__1, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 1)
-// CHECK-NEXT:    call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
-// CHECK-NEXT:    ret void
-// CHECK:       worker.exit:
-// CHECK-NEXT:    ret void
-//
-//
-// CHECK-LABEL: define {{[^@]+}}@__omp_outlined__1
-// CHECK-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[AA:%.*]]) #[[ATTR1]] {
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-NEXT:    [[AA_ADDR:%.*]] = alloca i32, align 4
-// CHECK-NEXT:    store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
-// CHECK-NEXT:    store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
-// CHECK-NEXT:    store i32 [[AA]], ptr [[AA_ADDR]], align 4
-// CHECK-NEXT:    [[TMP0:%.*]] = load i16, ptr [[AA_ADDR]], align 2
-// CHECK-NEXT:    [[CONV:%.*]] = sext i16 [[TMP0]] to i32
-// CHECK-NEXT:    [[ADD:%.*]] = add nsw i32 [[CONV]], 1
-// CHECK-NEXT:    [[CONV1:%.*]] = trunc i32 [[ADD]] to i16
-// CHECK-NEXT:    store i16 [[CONV1]], ptr [[AA_ADDR]], align 2
-// CHECK-NEXT:    ret void
-//
-//
-// CHECK-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l36
-// CHECK-SAME: (i32 noundef [[A:%.*]], i32 noundef [[AA:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR0]] {
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
-// CHECK-NEXT:    [[AA_ADDR:%.*]] = alloca i32, align 4
-// CHECK-NEXT:    [[B_ADDR:%.*]] = alloca ptr, align 4
-// CHECK-NEXT:    [[A_CASTED:%.*]] = alloca i32, align 4
-// CHECK-NEXT:    [[AA_CASTED:%.*]] = alloca i32, align 4
-// CHECK-NEXT:    [[CAPTURED_VARS_ADDRS:%.*]] = alloca [3 x ptr], align 4
-// CHECK-NEXT:    store i32 [[A]], ptr [[A_ADDR]], align 4
-// CHECK-NEXT:    store i32 [[AA]], ptr [[AA_ADDR]], align 4
-// CHECK-NEXT:    store ptr [[B]], ptr [[B_ADDR]], align 4
-// CHECK-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4
-// CHECK-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
-// CHECK-NEXT:    [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
-// CHECK-NEXT:    br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
-// CHECK:       user_code.entry:
-// CHECK-NEXT:    [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
-// CHECK-NEXT:    [[TMP3:%.*]] = load i32, ptr [[A_ADDR]], align 4
-// CHECK-NEXT:    store i32 [[TMP3]], ptr [[A_CASTED]], align 4
-// CHECK-NEXT:    [[TMP4:%.*]] = load i32, ptr [[A_CASTED]], align 4
-// CHECK-NEXT:    [[TMP5:%.*]] = load i16, ptr [[AA_ADDR]], align 2
-// CHECK-NEXT:    store i16 [[TMP5]], ptr [[AA_CASTED]], align 2
-// CHECK-NEXT:    [[TMP6:%.*]] = load i32, ptr [[AA_CASTED]], align 4
-// CHECK-NEXT:    [[TMP7:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 0
-// CHECK-NEXT:    [[TMP8:%.*]] = inttoptr i32 [[TMP4]] to ptr
-// CHECK-NEXT:    store ptr [[TMP8]], ptr [[TMP7]], align 4
-// CHECK-NEXT:    [[TMP9:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 1
-// CHECK-NEXT:    [[TMP10:%.*]] = inttoptr i32 [[TMP6]] to ptr
-// CHECK-NEXT:    store ptr [[TMP10]], ptr [[TMP9]], align 4
-// CHECK-NEXT:    [[TMP11:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 2
-// CHECK-NEXT:    store ptr [[TMP0]], ptr [[TMP11]], align 4
-// CHECK-NEXT:    call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__2, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 3)
-// CHECK-NEXT:    call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
-// CHECK-NEXT:    ret void
-// CHECK:       worker.exit:
-// CHECK-NEXT:    ret void
-//
-//
-// CHECK-LABEL: define {{[^@]+}}@__omp_outlined__2
-// CHECK-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[A:%.*]], i32 noundef [[AA:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR1]] {
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
-// CHECK-NEXT:    [[AA_ADDR:%.*]] = alloca i32, align 4
-// CHECK-NEXT:    [[B_ADDR:%.*]] = alloca ptr, align 4
-// CHECK-NEXT:    store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
-// CHECK-NEXT:    store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
-// CHECK-NEXT:    store i32 [[A]], ptr [[A_ADDR]], align 4
-// CHECK-NEXT:    store i32 [[AA]], ptr [[AA_ADDR]], align 4
-// CHECK-NEXT:    store ptr [[B]], ptr [[B_ADDR]], align 4
-// CHECK-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4
-// CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4
-// CHECK-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP1]], 1
-// CHECK-NEXT:    store i32 [[ADD]], ptr [[A_ADDR]], align 4
-// CHECK-NEXT:    [[TMP2:%.*]] = load i16, ptr [[AA_ADDR]], align 2
-// CHECK-NEXT:    [[CONV:%.*]] = sext i16 [[TMP2]] to i32
-// CHECK-NEXT:    [[ADD1:%.*]] = add nsw i32 [[CONV]], 1
-// CHECK-NEXT:    [[CONV2:%.*]] = trunc i32 [[ADD1]] to i16
-// CHECK-NEXT:    store i16 [[CONV2]], ptr [[AA_ADDR]], align 2
-// CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i32 0, i32 2
-// CHECK-NEXT:    [[TMP3:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
-// CHECK-NEXT:    [[ADD3:%.*]] = add nsw i32 [[TMP3]], 1
-// CHECK-NEXT:    store i32 [[ADD3]], ptr [[ARRAYIDX]], align 4
-// CHECK-NEXT:    ret void
-//
-//
 // CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l27
 // CHECK-32-SAME: () #[[ATTR0:[0-9]+]] {
 // CHECK-32-NEXT:  entry:

diff  --git a/clang/test/OpenMP/nvptx_target_parallel_reduction_codegen_tbaa_PR46146.cpp b/clang/test/OpenMP/nvptx_target_parallel_reduction_codegen_tbaa_PR46146.cpp
index 17140487a23cd..5b7e31ba5d3b0 100644
--- a/clang/test/OpenMP/nvptx_target_parallel_reduction_codegen_tbaa_PR46146.cpp
+++ b/clang/test/OpenMP/nvptx_target_parallel_reduction_codegen_tbaa_PR46146.cpp
@@ -434,7 +434,7 @@ void test() {
 // CHECK1-NEXT:    [[TMP34:%.*]] = load ptr, ptr [[TMP33]], align 8
 // CHECK1-NEXT:    [[TMP35:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP4]], i64 0, i64 0
 // CHECK1-NEXT:    [[TMP36:%.*]] = load ptr, ptr [[TMP35]], align 8
-// CHECK1-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP36]], ptr align 4 [[TMP34]], i64 8, i1 false), !tbaa.struct !21
+// CHECK1-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP36]], ptr align 4 [[TMP34]], i64 8, i1 false), !tbaa.struct [[TBAA_STRUCT21:![0-9]+]]
 // CHECK1-NEXT:    br label [[IFCONT6:%.*]]
 // CHECK1:       else5:
 // CHECK1-NEXT:    br label [[IFCONT6]]
@@ -939,7 +939,7 @@ void test() {
 // CHECK1-NEXT:    [[TMP41:%.*]] = load ptr, ptr [[TMP40]], align 8
 // CHECK1-NEXT:    [[TMP42:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP4]], i64 0, i64 0
 // CHECK1-NEXT:    [[TMP43:%.*]] = load ptr, ptr [[TMP42]], align 8
-// CHECK1-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[TMP43]], ptr align 8 [[TMP41]], i64 16, i1 false), !tbaa.struct !27
+// CHECK1-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[TMP43]], ptr align 8 [[TMP41]], i64 16, i1 false), !tbaa.struct [[TBAA_STRUCT27:![0-9]+]]
 // CHECK1-NEXT:    br label [[IFCONT6:%.*]]
 // CHECK1:       else5:
 // CHECK1-NEXT:    br label [[IFCONT6]]

diff  --git a/clang/test/OpenMP/reduction_compound_op.cpp b/clang/test/OpenMP/reduction_compound_op.cpp
index 44dfb6081ecc6..e21944dfd426e 100644
--- a/clang/test/OpenMP/reduction_compound_op.cpp
+++ b/clang/test/OpenMP/reduction_compound_op.cpp
@@ -18,7 +18,6 @@
 //RUN: %clang_cc1 -no-enable-noundef-analysis -triple x86_64-unknown-linux-gnu -fopenmp-simd -DNORM -DCOMP \
 //RUN:  -emit-llvm -o - %s | FileCheck %s --check-prefix SIMD-ONLY
 
-// SIMD-ONLY-NOT: {{__kmpc|__tgt}}
 
 struct Point {
   int x = 0;
@@ -206,10 +205,10 @@ void foo(int N, Point const *Points) {
 // NORM-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]])
 // NORM-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0
 // NORM-NEXT:    store ptr [[RED3]], ptr [[TMP22]], align 8
-// NORM-NEXT:    [[TMP24:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// NORM-NEXT:    [[TMP25:%.*]] = load i32, ptr [[TMP24]], align 4
-// NORM-NEXT:    [[TMP27:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB2:[0-9]+]], i32 [[TMP25]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var)
-// NORM-NEXT:    switch i32 [[TMP27]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [
+// NORM-NEXT:    [[TMP23:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// NORM-NEXT:    [[TMP24:%.*]] = load i32, ptr [[TMP23]], align 4
+// NORM-NEXT:    [[TMP25:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB2:[0-9]+]], i32 [[TMP24]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var)
+// NORM-NEXT:    switch i32 [[TMP25]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [
 // NORM-NEXT:    i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]]
 // NORM-NEXT:    i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]]
 // NORM-NEXT:    ]
@@ -217,16 +216,16 @@ void foo(int N, Point const *Points) {
 // NORM-NEXT:    [[CALL:%.*]] = call i64 @_ZNK5PointplERKS_(ptr nonnull align 4 dereferenceable(8) [[TMP1]], ptr nonnull align 4 dereferenceable(8) [[RED3]])
 // NORM-NEXT:    store i64 [[CALL]], ptr [[REF_TMP]], align 4
 // NORM-NEXT:    [[CALL9:%.*]] = call nonnull align 4 dereferenceable(8) ptr @_ZN5PointaSERKS_(ptr nonnull align 4 dereferenceable(8) [[TMP1]], ptr nonnull align 4 dereferenceable(8) [[REF_TMP]])
-// NORM-NEXT:    call void @__kmpc_end_reduce_nowait(ptr @[[GLOB2]], i32 [[TMP25]], ptr @.gomp_critical_user_.reduction.var)
+// NORM-NEXT:    call void @__kmpc_end_reduce_nowait(ptr @[[GLOB2]], i32 [[TMP24]], ptr @.gomp_critical_user_.reduction.var)
 // NORM-NEXT:    br label [[DOTOMP_REDUCTION_DEFAULT]]
 // NORM:       .omp.reduction.case2:
-// NORM-NEXT:    [[TMP29:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// NORM-NEXT:    [[TMP30:%.*]] = load i32, ptr [[TMP29]], align 4
-// NORM-NEXT:    call void @__kmpc_critical(ptr @[[GLOB3]], i32 [[TMP30]], ptr @.gomp_critical_user_.atomic_reduction.var)
+// NORM-NEXT:    [[TMP26:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// NORM-NEXT:    [[TMP27:%.*]] = load i32, ptr [[TMP26]], align 4
+// NORM-NEXT:    call void @__kmpc_critical(ptr @[[GLOB3]], i32 [[TMP27]], ptr @.gomp_critical_user_.atomic_reduction.var)
 // NORM-NEXT:    [[CALL11:%.*]] = call i64 @_ZNK5PointplERKS_(ptr nonnull align 4 dereferenceable(8) [[TMP1]], ptr nonnull align 4 dereferenceable(8) [[RED3]])
 // NORM-NEXT:    store i64 [[CALL11]], ptr [[REF_TMP10]], align 4
 // NORM-NEXT:    [[CALL12:%.*]] = call nonnull align 4 dereferenceable(8) ptr @_ZN5PointaSERKS_(ptr nonnull align 4 dereferenceable(8) [[TMP1]], ptr nonnull align 4 dereferenceable(8) [[REF_TMP10]])
-// NORM-NEXT:    call void @__kmpc_end_critical(ptr @[[GLOB3]], i32 [[TMP30]], ptr @.gomp_critical_user_.atomic_reduction.var)
+// NORM-NEXT:    call void @__kmpc_end_critical(ptr @[[GLOB3]], i32 [[TMP27]], ptr @.gomp_critical_user_.atomic_reduction.var)
 // NORM-NEXT:    br label [[DOTOMP_REDUCTION_DEFAULT]]
 // NORM:       .omp.reduction.default:
 // NORM-NEXT:    br label [[OMP_PRECOND_END]]
@@ -243,14 +242,14 @@ void foo(int N, Point const *Points) {
 // NORM-NEXT:    store ptr [[TMP0]], ptr [[DOTADDR]], align 8
 // NORM-NEXT:    store ptr [[TMP1]], ptr [[DOTADDR1]], align 8
 // NORM-NEXT:    [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8
-// NORM-NEXT:    [[TMP4:%.*]] = load ptr, ptr [[DOTADDR1]], align 8
-// NORM-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP4]], i64 0, i64 0
+// NORM-NEXT:    [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8
+// NORM-NEXT:    [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP3]], i64 0, i64 0
+// NORM-NEXT:    [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8
+// NORM-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i64 0, i64 0
 // NORM-NEXT:    [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8
-// NORM-NEXT:    [[TMP9:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i64 0, i64 0
-// NORM-NEXT:    [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8
-// NORM-NEXT:    [[CALL:%.*]] = call i64 @_ZNK5PointplERKS_(ptr nonnull align 4 dereferenceable(8) [[TMP10]], ptr nonnull align 4 dereferenceable(8) [[TMP7]])
+// NORM-NEXT:    [[CALL:%.*]] = call i64 @_ZNK5PointplERKS_(ptr nonnull align 4 dereferenceable(8) [[TMP7]], ptr nonnull align 4 dereferenceable(8) [[TMP5]])
 // NORM-NEXT:    store i64 [[CALL]], ptr [[REF_TMP]], align 4
-// NORM-NEXT:    [[CALL2:%.*]] = call nonnull align 4 dereferenceable(8) ptr @_ZN5PointaSERKS_(ptr nonnull align 4 dereferenceable(8) [[TMP10]], ptr nonnull align 4 dereferenceable(8) [[REF_TMP]])
+// NORM-NEXT:    [[CALL2:%.*]] = call nonnull align 4 dereferenceable(8) ptr @_ZN5PointaSERKS_(ptr nonnull align 4 dereferenceable(8) [[TMP7]], ptr nonnull align 4 dereferenceable(8) [[REF_TMP]])
 // NORM-NEXT:    ret void
 //
 //
@@ -351,10 +350,10 @@ void foo(int N, Point const *Points) {
 // NORM-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]])
 // NORM-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0
 // NORM-NEXT:    store ptr [[RED3]], ptr [[TMP22]], align 8
-// NORM-NEXT:    [[TMP24:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// NORM-NEXT:    [[TMP25:%.*]] = load i32, ptr [[TMP24]], align 4
-// NORM-NEXT:    [[TMP27:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB2]], i32 [[TMP25]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.2, ptr @.gomp_critical_user_.reduction.var)
-// NORM-NEXT:    switch i32 [[TMP27]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [
+// NORM-NEXT:    [[TMP23:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// NORM-NEXT:    [[TMP24:%.*]] = load i32, ptr [[TMP23]], align 4
+// NORM-NEXT:    [[TMP25:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB2]], i32 [[TMP24]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.2, ptr @.gomp_critical_user_.reduction.var)
+// NORM-NEXT:    switch i32 [[TMP25]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [
 // NORM-NEXT:    i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]]
 // NORM-NEXT:    i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]]
 // NORM-NEXT:    ]
@@ -362,16 +361,16 @@ void foo(int N, Point const *Points) {
 // NORM-NEXT:    [[CALL:%.*]] = call i64 @_ZNK5PointplERKS_(ptr nonnull align 4 dereferenceable(8) [[TMP1]], ptr nonnull align 4 dereferenceable(8) [[RED3]])
 // NORM-NEXT:    store i64 [[CALL]], ptr [[REF_TMP]], align 4
 // NORM-NEXT:    [[CALL9:%.*]] = call nonnull align 4 dereferenceable(8) ptr @_ZN5PointaSERKS_(ptr nonnull align 4 dereferenceable(8) [[TMP1]], ptr nonnull align 4 dereferenceable(8) [[REF_TMP]])
-// NORM-NEXT:    call void @__kmpc_end_reduce_nowait(ptr @[[GLOB2]], i32 [[TMP25]], ptr @.gomp_critical_user_.reduction.var)
+// NORM-NEXT:    call void @__kmpc_end_reduce_nowait(ptr @[[GLOB2]], i32 [[TMP24]], ptr @.gomp_critical_user_.reduction.var)
 // NORM-NEXT:    br label [[DOTOMP_REDUCTION_DEFAULT]]
 // NORM:       .omp.reduction.case2:
-// NORM-NEXT:    [[TMP29:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// NORM-NEXT:    [[TMP30:%.*]] = load i32, ptr [[TMP29]], align 4
-// NORM-NEXT:    call void @__kmpc_critical(ptr @[[GLOB3]], i32 [[TMP30]], ptr @.gomp_critical_user_.atomic_reduction.var)
+// NORM-NEXT:    [[TMP26:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// NORM-NEXT:    [[TMP27:%.*]] = load i32, ptr [[TMP26]], align 4
+// NORM-NEXT:    call void @__kmpc_critical(ptr @[[GLOB3]], i32 [[TMP27]], ptr @.gomp_critical_user_.atomic_reduction.var)
 // NORM-NEXT:    [[CALL11:%.*]] = call i64 @_ZNK5PointplERKS_(ptr nonnull align 4 dereferenceable(8) [[TMP1]], ptr nonnull align 4 dereferenceable(8) [[RED3]])
 // NORM-NEXT:    store i64 [[CALL11]], ptr [[REF_TMP10]], align 4
 // NORM-NEXT:    [[CALL12:%.*]] = call nonnull align 4 dereferenceable(8) ptr @_ZN5PointaSERKS_(ptr nonnull align 4 dereferenceable(8) [[TMP1]], ptr nonnull align 4 dereferenceable(8) [[REF_TMP10]])
-// NORM-NEXT:    call void @__kmpc_end_critical(ptr @[[GLOB3]], i32 [[TMP30]], ptr @.gomp_critical_user_.atomic_reduction.var)
+// NORM-NEXT:    call void @__kmpc_end_critical(ptr @[[GLOB3]], i32 [[TMP27]], ptr @.gomp_critical_user_.atomic_reduction.var)
 // NORM-NEXT:    br label [[DOTOMP_REDUCTION_DEFAULT]]
 // NORM:       .omp.reduction.default:
 // NORM-NEXT:    br label [[OMP_PRECOND_END]]
@@ -388,14 +387,14 @@ void foo(int N, Point const *Points) {
 // NORM-NEXT:    store ptr [[TMP0]], ptr [[DOTADDR]], align 8
 // NORM-NEXT:    store ptr [[TMP1]], ptr [[DOTADDR1]], align 8
 // NORM-NEXT:    [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8
-// NORM-NEXT:    [[TMP4:%.*]] = load ptr, ptr [[DOTADDR1]], align 8
-// NORM-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP4]], i64 0, i64 0
+// NORM-NEXT:    [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8
+// NORM-NEXT:    [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP3]], i64 0, i64 0
+// NORM-NEXT:    [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8
+// NORM-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i64 0, i64 0
 // NORM-NEXT:    [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8
-// NORM-NEXT:    [[TMP9:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i64 0, i64 0
-// NORM-NEXT:    [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8
-// NORM-NEXT:    [[CALL:%.*]] = call i64 @_ZNK5PointplERKS_(ptr nonnull align 4 dereferenceable(8) [[TMP10]], ptr nonnull align 4 dereferenceable(8) [[TMP7]])
+// NORM-NEXT:    [[CALL:%.*]] = call i64 @_ZNK5PointplERKS_(ptr nonnull align 4 dereferenceable(8) [[TMP7]], ptr nonnull align 4 dereferenceable(8) [[TMP5]])
 // NORM-NEXT:    store i64 [[CALL]], ptr [[REF_TMP]], align 4
-// NORM-NEXT:    [[CALL2:%.*]] = call nonnull align 4 dereferenceable(8) ptr @_ZN5PointaSERKS_(ptr nonnull align 4 dereferenceable(8) [[TMP10]], ptr nonnull align 4 dereferenceable(8) [[REF_TMP]])
+// NORM-NEXT:    [[CALL2:%.*]] = call nonnull align 4 dereferenceable(8) ptr @_ZN5PointaSERKS_(ptr nonnull align 4 dereferenceable(8) [[TMP7]], ptr nonnull align 4 dereferenceable(8) [[REF_TMP]])
 // NORM-NEXT:    ret void
 //
 //
@@ -496,10 +495,10 @@ void foo(int N, Point const *Points) {
 // NORM-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]])
 // NORM-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0
 // NORM-NEXT:    store ptr [[RED3]], ptr [[TMP22]], align 8
-// NORM-NEXT:    [[TMP24:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// NORM-NEXT:    [[TMP25:%.*]] = load i32, ptr [[TMP24]], align 4
-// NORM-NEXT:    [[TMP27:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB2]], i32 [[TMP25]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.4, ptr @.gomp_critical_user_.reduction.var)
-// NORM-NEXT:    switch i32 [[TMP27]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [
+// NORM-NEXT:    [[TMP23:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// NORM-NEXT:    [[TMP24:%.*]] = load i32, ptr [[TMP23]], align 4
+// NORM-NEXT:    [[TMP25:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB2]], i32 [[TMP24]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.4, ptr @.gomp_critical_user_.reduction.var)
+// NORM-NEXT:    switch i32 [[TMP25]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [
 // NORM-NEXT:    i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]]
 // NORM-NEXT:    i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]]
 // NORM-NEXT:    ]
@@ -507,16 +506,16 @@ void foo(int N, Point const *Points) {
 // NORM-NEXT:    [[CALL:%.*]] = call i64 @_ZNK5PointmlERKS_(ptr nonnull align 4 dereferenceable(8) [[TMP1]], ptr nonnull align 4 dereferenceable(8) [[RED3]])
 // NORM-NEXT:    store i64 [[CALL]], ptr [[REF_TMP]], align 4
 // NORM-NEXT:    [[CALL9:%.*]] = call nonnull align 4 dereferenceable(8) ptr @_ZN5PointaSERKS_(ptr nonnull align 4 dereferenceable(8) [[TMP1]], ptr nonnull align 4 dereferenceable(8) [[REF_TMP]])
-// NORM-NEXT:    call void @__kmpc_end_reduce_nowait(ptr @[[GLOB2]], i32 [[TMP25]], ptr @.gomp_critical_user_.reduction.var)
+// NORM-NEXT:    call void @__kmpc_end_reduce_nowait(ptr @[[GLOB2]], i32 [[TMP24]], ptr @.gomp_critical_user_.reduction.var)
 // NORM-NEXT:    br label [[DOTOMP_REDUCTION_DEFAULT]]
 // NORM:       .omp.reduction.case2:
-// NORM-NEXT:    [[TMP29:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// NORM-NEXT:    [[TMP30:%.*]] = load i32, ptr [[TMP29]], align 4
-// NORM-NEXT:    call void @__kmpc_critical(ptr @[[GLOB3]], i32 [[TMP30]], ptr @.gomp_critical_user_.atomic_reduction.var)
+// NORM-NEXT:    [[TMP26:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// NORM-NEXT:    [[TMP27:%.*]] = load i32, ptr [[TMP26]], align 4
+// NORM-NEXT:    call void @__kmpc_critical(ptr @[[GLOB3]], i32 [[TMP27]], ptr @.gomp_critical_user_.atomic_reduction.var)
 // NORM-NEXT:    [[CALL11:%.*]] = call i64 @_ZNK5PointmlERKS_(ptr nonnull align 4 dereferenceable(8) [[TMP1]], ptr nonnull align 4 dereferenceable(8) [[RED3]])
 // NORM-NEXT:    store i64 [[CALL11]], ptr [[REF_TMP10]], align 4
 // NORM-NEXT:    [[CALL12:%.*]] = call nonnull align 4 dereferenceable(8) ptr @_ZN5PointaSERKS_(ptr nonnull align 4 dereferenceable(8) [[TMP1]], ptr nonnull align 4 dereferenceable(8) [[REF_TMP10]])
-// NORM-NEXT:    call void @__kmpc_end_critical(ptr @[[GLOB3]], i32 [[TMP30]], ptr @.gomp_critical_user_.atomic_reduction.var)
+// NORM-NEXT:    call void @__kmpc_end_critical(ptr @[[GLOB3]], i32 [[TMP27]], ptr @.gomp_critical_user_.atomic_reduction.var)
 // NORM-NEXT:    br label [[DOTOMP_REDUCTION_DEFAULT]]
 // NORM:       .omp.reduction.default:
 // NORM-NEXT:    br label [[OMP_PRECOND_END]]
@@ -533,14 +532,14 @@ void foo(int N, Point const *Points) {
 // NORM-NEXT:    store ptr [[TMP0]], ptr [[DOTADDR]], align 8
 // NORM-NEXT:    store ptr [[TMP1]], ptr [[DOTADDR1]], align 8
 // NORM-NEXT:    [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8
-// NORM-NEXT:    [[TMP4:%.*]] = load ptr, ptr [[DOTADDR1]], align 8
-// NORM-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP4]], i64 0, i64 0
+// NORM-NEXT:    [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8
+// NORM-NEXT:    [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP3]], i64 0, i64 0
+// NORM-NEXT:    [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8
+// NORM-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i64 0, i64 0
 // NORM-NEXT:    [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8
-// NORM-NEXT:    [[TMP9:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i64 0, i64 0
-// NORM-NEXT:    [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8
-// NORM-NEXT:    [[CALL:%.*]] = call i64 @_ZNK5PointmlERKS_(ptr nonnull align 4 dereferenceable(8) [[TMP10]], ptr nonnull align 4 dereferenceable(8) [[TMP7]])
+// NORM-NEXT:    [[CALL:%.*]] = call i64 @_ZNK5PointmlERKS_(ptr nonnull align 4 dereferenceable(8) [[TMP7]], ptr nonnull align 4 dereferenceable(8) [[TMP5]])
 // NORM-NEXT:    store i64 [[CALL]], ptr [[REF_TMP]], align 4
-// NORM-NEXT:    [[CALL2:%.*]] = call nonnull align 4 dereferenceable(8) ptr @_ZN5PointaSERKS_(ptr nonnull align 4 dereferenceable(8) [[TMP10]], ptr nonnull align 4 dereferenceable(8) [[REF_TMP]])
+// NORM-NEXT:    [[CALL2:%.*]] = call nonnull align 4 dereferenceable(8) ptr @_ZN5PointaSERKS_(ptr nonnull align 4 dereferenceable(8) [[TMP7]], ptr nonnull align 4 dereferenceable(8) [[REF_TMP]])
 // NORM-NEXT:    ret void
 //
 //
@@ -641,10 +640,10 @@ void foo(int N, Point const *Points) {
 // NORM-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]])
 // NORM-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0
 // NORM-NEXT:    store ptr [[RED3]], ptr [[TMP22]], align 8
-// NORM-NEXT:    [[TMP24:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// NORM-NEXT:    [[TMP25:%.*]] = load i32, ptr [[TMP24]], align 4
-// NORM-NEXT:    [[TMP27:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB2]], i32 [[TMP25]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.6, ptr @.gomp_critical_user_.reduction.var)
-// NORM-NEXT:    switch i32 [[TMP27]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [
+// NORM-NEXT:    [[TMP23:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// NORM-NEXT:    [[TMP24:%.*]] = load i32, ptr [[TMP23]], align 4
+// NORM-NEXT:    [[TMP25:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB2]], i32 [[TMP24]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.6, ptr @.gomp_critical_user_.reduction.var)
+// NORM-NEXT:    switch i32 [[TMP25]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [
 // NORM-NEXT:    i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]]
 // NORM-NEXT:    i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]]
 // NORM-NEXT:    ]
@@ -652,16 +651,16 @@ void foo(int N, Point const *Points) {
 // NORM-NEXT:    [[CALL:%.*]] = call i64 @_ZNK5PointanERKS_(ptr nonnull align 4 dereferenceable(8) [[TMP1]], ptr nonnull align 4 dereferenceable(8) [[RED3]])
 // NORM-NEXT:    store i64 [[CALL]], ptr [[REF_TMP]], align 4
 // NORM-NEXT:    [[CALL9:%.*]] = call nonnull align 4 dereferenceable(8) ptr @_ZN5PointaSERKS_(ptr nonnull align 4 dereferenceable(8) [[TMP1]], ptr nonnull align 4 dereferenceable(8) [[REF_TMP]])
-// NORM-NEXT:    call void @__kmpc_end_reduce_nowait(ptr @[[GLOB2]], i32 [[TMP25]], ptr @.gomp_critical_user_.reduction.var)
+// NORM-NEXT:    call void @__kmpc_end_reduce_nowait(ptr @[[GLOB2]], i32 [[TMP24]], ptr @.gomp_critical_user_.reduction.var)
 // NORM-NEXT:    br label [[DOTOMP_REDUCTION_DEFAULT]]
 // NORM:       .omp.reduction.case2:
-// NORM-NEXT:    [[TMP29:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// NORM-NEXT:    [[TMP30:%.*]] = load i32, ptr [[TMP29]], align 4
-// NORM-NEXT:    call void @__kmpc_critical(ptr @[[GLOB3]], i32 [[TMP30]], ptr @.gomp_critical_user_.atomic_reduction.var)
+// NORM-NEXT:    [[TMP26:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// NORM-NEXT:    [[TMP27:%.*]] = load i32, ptr [[TMP26]], align 4
+// NORM-NEXT:    call void @__kmpc_critical(ptr @[[GLOB3]], i32 [[TMP27]], ptr @.gomp_critical_user_.atomic_reduction.var)
 // NORM-NEXT:    [[CALL11:%.*]] = call i64 @_ZNK5PointanERKS_(ptr nonnull align 4 dereferenceable(8) [[TMP1]], ptr nonnull align 4 dereferenceable(8) [[RED3]])
 // NORM-NEXT:    store i64 [[CALL11]], ptr [[REF_TMP10]], align 4
 // NORM-NEXT:    [[CALL12:%.*]] = call nonnull align 4 dereferenceable(8) ptr @_ZN5PointaSERKS_(ptr nonnull align 4 dereferenceable(8) [[TMP1]], ptr nonnull align 4 dereferenceable(8) [[REF_TMP10]])
-// NORM-NEXT:    call void @__kmpc_end_critical(ptr @[[GLOB3]], i32 [[TMP30]], ptr @.gomp_critical_user_.atomic_reduction.var)
+// NORM-NEXT:    call void @__kmpc_end_critical(ptr @[[GLOB3]], i32 [[TMP27]], ptr @.gomp_critical_user_.atomic_reduction.var)
 // NORM-NEXT:    br label [[DOTOMP_REDUCTION_DEFAULT]]
 // NORM:       .omp.reduction.default:
 // NORM-NEXT:    br label [[OMP_PRECOND_END]]
@@ -678,14 +677,14 @@ void foo(int N, Point const *Points) {
 // NORM-NEXT:    store ptr [[TMP0]], ptr [[DOTADDR]], align 8
 // NORM-NEXT:    store ptr [[TMP1]], ptr [[DOTADDR1]], align 8
 // NORM-NEXT:    [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8
-// NORM-NEXT:    [[TMP4:%.*]] = load ptr, ptr [[DOTADDR1]], align 8
-// NORM-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP4]], i64 0, i64 0
+// NORM-NEXT:    [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8
+// NORM-NEXT:    [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP3]], i64 0, i64 0
+// NORM-NEXT:    [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8
+// NORM-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i64 0, i64 0
 // NORM-NEXT:    [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8
-// NORM-NEXT:    [[TMP9:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i64 0, i64 0
-// NORM-NEXT:    [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8
-// NORM-NEXT:    [[CALL:%.*]] = call i64 @_ZNK5PointanERKS_(ptr nonnull align 4 dereferenceable(8) [[TMP10]], ptr nonnull align 4 dereferenceable(8) [[TMP7]])
+// NORM-NEXT:    [[CALL:%.*]] = call i64 @_ZNK5PointanERKS_(ptr nonnull align 4 dereferenceable(8) [[TMP7]], ptr nonnull align 4 dereferenceable(8) [[TMP5]])
 // NORM-NEXT:    store i64 [[CALL]], ptr [[REF_TMP]], align 4
-// NORM-NEXT:    [[CALL2:%.*]] = call nonnull align 4 dereferenceable(8) ptr @_ZN5PointaSERKS_(ptr nonnull align 4 dereferenceable(8) [[TMP10]], ptr nonnull align 4 dereferenceable(8) [[REF_TMP]])
+// NORM-NEXT:    [[CALL2:%.*]] = call nonnull align 4 dereferenceable(8) ptr @_ZN5PointaSERKS_(ptr nonnull align 4 dereferenceable(8) [[TMP7]], ptr nonnull align 4 dereferenceable(8) [[REF_TMP]])
 // NORM-NEXT:    ret void
 //
 //
@@ -786,10 +785,10 @@ void foo(int N, Point const *Points) {
 // NORM-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]])
 // NORM-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0
 // NORM-NEXT:    store ptr [[RED3]], ptr [[TMP22]], align 8
-// NORM-NEXT:    [[TMP24:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// NORM-NEXT:    [[TMP25:%.*]] = load i32, ptr [[TMP24]], align 4
-// NORM-NEXT:    [[TMP27:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB2]], i32 [[TMP25]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.8, ptr @.gomp_critical_user_.reduction.var)
-// NORM-NEXT:    switch i32 [[TMP27]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [
+// NORM-NEXT:    [[TMP23:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// NORM-NEXT:    [[TMP24:%.*]] = load i32, ptr [[TMP23]], align 4
+// NORM-NEXT:    [[TMP25:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB2]], i32 [[TMP24]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.8, ptr @.gomp_critical_user_.reduction.var)
+// NORM-NEXT:    switch i32 [[TMP25]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [
 // NORM-NEXT:    i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]]
 // NORM-NEXT:    i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]]
 // NORM-NEXT:    ]
@@ -797,16 +796,16 @@ void foo(int N, Point const *Points) {
 // NORM-NEXT:    [[CALL:%.*]] = call i64 @_ZNK5PointorERKS_(ptr nonnull align 4 dereferenceable(8) [[TMP1]], ptr nonnull align 4 dereferenceable(8) [[RED3]])
 // NORM-NEXT:    store i64 [[CALL]], ptr [[REF_TMP]], align 4
 // NORM-NEXT:    [[CALL9:%.*]] = call nonnull align 4 dereferenceable(8) ptr @_ZN5PointaSERKS_(ptr nonnull align 4 dereferenceable(8) [[TMP1]], ptr nonnull align 4 dereferenceable(8) [[REF_TMP]])
-// NORM-NEXT:    call void @__kmpc_end_reduce_nowait(ptr @[[GLOB2]], i32 [[TMP25]], ptr @.gomp_critical_user_.reduction.var)
+// NORM-NEXT:    call void @__kmpc_end_reduce_nowait(ptr @[[GLOB2]], i32 [[TMP24]], ptr @.gomp_critical_user_.reduction.var)
 // NORM-NEXT:    br label [[DOTOMP_REDUCTION_DEFAULT]]
 // NORM:       .omp.reduction.case2:
-// NORM-NEXT:    [[TMP29:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// NORM-NEXT:    [[TMP30:%.*]] = load i32, ptr [[TMP29]], align 4
-// NORM-NEXT:    call void @__kmpc_critical(ptr @[[GLOB3]], i32 [[TMP30]], ptr @.gomp_critical_user_.atomic_reduction.var)
+// NORM-NEXT:    [[TMP26:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// NORM-NEXT:    [[TMP27:%.*]] = load i32, ptr [[TMP26]], align 4
+// NORM-NEXT:    call void @__kmpc_critical(ptr @[[GLOB3]], i32 [[TMP27]], ptr @.gomp_critical_user_.atomic_reduction.var)
 // NORM-NEXT:    [[CALL11:%.*]] = call i64 @_ZNK5PointorERKS_(ptr nonnull align 4 dereferenceable(8) [[TMP1]], ptr nonnull align 4 dereferenceable(8) [[RED3]])
 // NORM-NEXT:    store i64 [[CALL11]], ptr [[REF_TMP10]], align 4
 // NORM-NEXT:    [[CALL12:%.*]] = call nonnull align 4 dereferenceable(8) ptr @_ZN5PointaSERKS_(ptr nonnull align 4 dereferenceable(8) [[TMP1]], ptr nonnull align 4 dereferenceable(8) [[REF_TMP10]])
-// NORM-NEXT:    call void @__kmpc_end_critical(ptr @[[GLOB3]], i32 [[TMP30]], ptr @.gomp_critical_user_.atomic_reduction.var)
+// NORM-NEXT:    call void @__kmpc_end_critical(ptr @[[GLOB3]], i32 [[TMP27]], ptr @.gomp_critical_user_.atomic_reduction.var)
 // NORM-NEXT:    br label [[DOTOMP_REDUCTION_DEFAULT]]
 // NORM:       .omp.reduction.default:
 // NORM-NEXT:    br label [[OMP_PRECOND_END]]
@@ -823,14 +822,14 @@ void foo(int N, Point const *Points) {
 // NORM-NEXT:    store ptr [[TMP0]], ptr [[DOTADDR]], align 8
 // NORM-NEXT:    store ptr [[TMP1]], ptr [[DOTADDR1]], align 8
 // NORM-NEXT:    [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8
-// NORM-NEXT:    [[TMP4:%.*]] = load ptr, ptr [[DOTADDR1]], align 8
-// NORM-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP4]], i64 0, i64 0
+// NORM-NEXT:    [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8
+// NORM-NEXT:    [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP3]], i64 0, i64 0
+// NORM-NEXT:    [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8
+// NORM-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i64 0, i64 0
 // NORM-NEXT:    [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8
-// NORM-NEXT:    [[TMP9:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i64 0, i64 0
-// NORM-NEXT:    [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8
-// NORM-NEXT:    [[CALL:%.*]] = call i64 @_ZNK5PointorERKS_(ptr nonnull align 4 dereferenceable(8) [[TMP10]], ptr nonnull align 4 dereferenceable(8) [[TMP7]])
+// NORM-NEXT:    [[CALL:%.*]] = call i64 @_ZNK5PointorERKS_(ptr nonnull align 4 dereferenceable(8) [[TMP7]], ptr nonnull align 4 dereferenceable(8) [[TMP5]])
 // NORM-NEXT:    store i64 [[CALL]], ptr [[REF_TMP]], align 4
-// NORM-NEXT:    [[CALL2:%.*]] = call nonnull align 4 dereferenceable(8) ptr @_ZN5PointaSERKS_(ptr nonnull align 4 dereferenceable(8) [[TMP10]], ptr nonnull align 4 dereferenceable(8) [[REF_TMP]])
+// NORM-NEXT:    [[CALL2:%.*]] = call nonnull align 4 dereferenceable(8) ptr @_ZN5PointaSERKS_(ptr nonnull align 4 dereferenceable(8) [[TMP7]], ptr nonnull align 4 dereferenceable(8) [[REF_TMP]])
 // NORM-NEXT:    ret void
 //
 //
@@ -931,10 +930,10 @@ void foo(int N, Point const *Points) {
 // NORM-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]])
 // NORM-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0
 // NORM-NEXT:    store ptr [[RED3]], ptr [[TMP22]], align 8
-// NORM-NEXT:    [[TMP24:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// NORM-NEXT:    [[TMP25:%.*]] = load i32, ptr [[TMP24]], align 4
-// NORM-NEXT:    [[TMP27:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB2]], i32 [[TMP25]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.10, ptr @.gomp_critical_user_.reduction.var)
-// NORM-NEXT:    switch i32 [[TMP27]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [
+// NORM-NEXT:    [[TMP23:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// NORM-NEXT:    [[TMP24:%.*]] = load i32, ptr [[TMP23]], align 4
+// NORM-NEXT:    [[TMP25:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB2]], i32 [[TMP24]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.10, ptr @.gomp_critical_user_.reduction.var)
+// NORM-NEXT:    switch i32 [[TMP25]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [
 // NORM-NEXT:    i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]]
 // NORM-NEXT:    i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]]
 // NORM-NEXT:    ]
@@ -942,16 +941,16 @@ void foo(int N, Point const *Points) {
 // NORM-NEXT:    [[CALL:%.*]] = call i64 @_ZNK5PointeoERKS_(ptr nonnull align 4 dereferenceable(8) [[TMP1]], ptr nonnull align 4 dereferenceable(8) [[RED3]])
 // NORM-NEXT:    store i64 [[CALL]], ptr [[REF_TMP]], align 4
 // NORM-NEXT:    [[CALL9:%.*]] = call nonnull align 4 dereferenceable(8) ptr @_ZN5PointaSERKS_(ptr nonnull align 4 dereferenceable(8) [[TMP1]], ptr nonnull align 4 dereferenceable(8) [[REF_TMP]])
-// NORM-NEXT:    call void @__kmpc_end_reduce_nowait(ptr @[[GLOB2]], i32 [[TMP25]], ptr @.gomp_critical_user_.reduction.var)
+// NORM-NEXT:    call void @__kmpc_end_reduce_nowait(ptr @[[GLOB2]], i32 [[TMP24]], ptr @.gomp_critical_user_.reduction.var)
 // NORM-NEXT:    br label [[DOTOMP_REDUCTION_DEFAULT]]
 // NORM:       .omp.reduction.case2:
-// NORM-NEXT:    [[TMP29:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// NORM-NEXT:    [[TMP30:%.*]] = load i32, ptr [[TMP29]], align 4
-// NORM-NEXT:    call void @__kmpc_critical(ptr @[[GLOB3]], i32 [[TMP30]], ptr @.gomp_critical_user_.atomic_reduction.var)
+// NORM-NEXT:    [[TMP26:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// NORM-NEXT:    [[TMP27:%.*]] = load i32, ptr [[TMP26]], align 4
+// NORM-NEXT:    call void @__kmpc_critical(ptr @[[GLOB3]], i32 [[TMP27]], ptr @.gomp_critical_user_.atomic_reduction.var)
 // NORM-NEXT:    [[CALL11:%.*]] = call i64 @_ZNK5PointeoERKS_(ptr nonnull align 4 dereferenceable(8) [[TMP1]], ptr nonnull align 4 dereferenceable(8) [[RED3]])
 // NORM-NEXT:    store i64 [[CALL11]], ptr [[REF_TMP10]], align 4
 // NORM-NEXT:    [[CALL12:%.*]] = call nonnull align 4 dereferenceable(8) ptr @_ZN5PointaSERKS_(ptr nonnull align 4 dereferenceable(8) [[TMP1]], ptr nonnull align 4 dereferenceable(8) [[REF_TMP10]])
-// NORM-NEXT:    call void @__kmpc_end_critical(ptr @[[GLOB3]], i32 [[TMP30]], ptr @.gomp_critical_user_.atomic_reduction.var)
+// NORM-NEXT:    call void @__kmpc_end_critical(ptr @[[GLOB3]], i32 [[TMP27]], ptr @.gomp_critical_user_.atomic_reduction.var)
 // NORM-NEXT:    br label [[DOTOMP_REDUCTION_DEFAULT]]
 // NORM:       .omp.reduction.default:
 // NORM-NEXT:    br label [[OMP_PRECOND_END]]
@@ -968,14 +967,14 @@ void foo(int N, Point const *Points) {
 // NORM-NEXT:    store ptr [[TMP0]], ptr [[DOTADDR]], align 8
 // NORM-NEXT:    store ptr [[TMP1]], ptr [[DOTADDR1]], align 8
 // NORM-NEXT:    [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8
-// NORM-NEXT:    [[TMP4:%.*]] = load ptr, ptr [[DOTADDR1]], align 8
-// NORM-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP4]], i64 0, i64 0
+// NORM-NEXT:    [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8
+// NORM-NEXT:    [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP3]], i64 0, i64 0
+// NORM-NEXT:    [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8
+// NORM-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i64 0, i64 0
 // NORM-NEXT:    [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8
-// NORM-NEXT:    [[TMP9:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i64 0, i64 0
-// NORM-NEXT:    [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8
-// NORM-NEXT:    [[CALL:%.*]] = call i64 @_ZNK5PointeoERKS_(ptr nonnull align 4 dereferenceable(8) [[TMP10]], ptr nonnull align 4 dereferenceable(8) [[TMP7]])
+// NORM-NEXT:    [[CALL:%.*]] = call i64 @_ZNK5PointeoERKS_(ptr nonnull align 4 dereferenceable(8) [[TMP7]], ptr nonnull align 4 dereferenceable(8) [[TMP5]])
 // NORM-NEXT:    store i64 [[CALL]], ptr [[REF_TMP]], align 4
-// NORM-NEXT:    [[CALL2:%.*]] = call nonnull align 4 dereferenceable(8) ptr @_ZN5PointaSERKS_(ptr nonnull align 4 dereferenceable(8) [[TMP10]], ptr nonnull align 4 dereferenceable(8) [[REF_TMP]])
+// NORM-NEXT:    [[CALL2:%.*]] = call nonnull align 4 dereferenceable(8) ptr @_ZN5PointaSERKS_(ptr nonnull align 4 dereferenceable(8) [[TMP7]], ptr nonnull align 4 dereferenceable(8) [[REF_TMP]])
 // NORM-NEXT:    ret void
 //
 //
@@ -1076,10 +1075,10 @@ void foo(int N, Point const *Points) {
 // NORM-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]])
 // NORM-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0
 // NORM-NEXT:    store ptr [[RED3]], ptr [[TMP22]], align 8
-// NORM-NEXT:    [[TMP24:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// NORM-NEXT:    [[TMP25:%.*]] = load i32, ptr [[TMP24]], align 4
-// NORM-NEXT:    [[TMP27:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB2]], i32 [[TMP25]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.12, ptr @.gomp_critical_user_.reduction.var)
-// NORM-NEXT:    switch i32 [[TMP27]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [
+// NORM-NEXT:    [[TMP23:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// NORM-NEXT:    [[TMP24:%.*]] = load i32, ptr [[TMP23]], align 4
+// NORM-NEXT:    [[TMP25:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB2]], i32 [[TMP24]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.12, ptr @.gomp_critical_user_.reduction.var)
+// NORM-NEXT:    switch i32 [[TMP25]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [
 // NORM-NEXT:    i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]]
 // NORM-NEXT:    i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]]
 // NORM-NEXT:    ]
@@ -1087,16 +1086,16 @@ void foo(int N, Point const *Points) {
 // NORM-NEXT:    [[CALL:%.*]] = call i64 @_ZNK5PointaaERKS_(ptr nonnull align 4 dereferenceable(8) [[TMP1]], ptr nonnull align 4 dereferenceable(8) [[RED3]])
 // NORM-NEXT:    store i64 [[CALL]], ptr [[REF_TMP]], align 4
 // NORM-NEXT:    [[CALL9:%.*]] = call nonnull align 4 dereferenceable(8) ptr @_ZN5PointaSERKS_(ptr nonnull align 4 dereferenceable(8) [[TMP1]], ptr nonnull align 4 dereferenceable(8) [[REF_TMP]])
-// NORM-NEXT:    call void @__kmpc_end_reduce_nowait(ptr @[[GLOB2]], i32 [[TMP25]], ptr @.gomp_critical_user_.reduction.var)
+// NORM-NEXT:    call void @__kmpc_end_reduce_nowait(ptr @[[GLOB2]], i32 [[TMP24]], ptr @.gomp_critical_user_.reduction.var)
 // NORM-NEXT:    br label [[DOTOMP_REDUCTION_DEFAULT]]
 // NORM:       .omp.reduction.case2:
-// NORM-NEXT:    [[TMP29:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// NORM-NEXT:    [[TMP30:%.*]] = load i32, ptr [[TMP29]], align 4
-// NORM-NEXT:    call void @__kmpc_critical(ptr @[[GLOB3]], i32 [[TMP30]], ptr @.gomp_critical_user_.atomic_reduction.var)
+// NORM-NEXT:    [[TMP26:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// NORM-NEXT:    [[TMP27:%.*]] = load i32, ptr [[TMP26]], align 4
+// NORM-NEXT:    call void @__kmpc_critical(ptr @[[GLOB3]], i32 [[TMP27]], ptr @.gomp_critical_user_.atomic_reduction.var)
 // NORM-NEXT:    [[CALL11:%.*]] = call i64 @_ZNK5PointaaERKS_(ptr nonnull align 4 dereferenceable(8) [[TMP1]], ptr nonnull align 4 dereferenceable(8) [[RED3]])
 // NORM-NEXT:    store i64 [[CALL11]], ptr [[REF_TMP10]], align 4
 // NORM-NEXT:    [[CALL12:%.*]] = call nonnull align 4 dereferenceable(8) ptr @_ZN5PointaSERKS_(ptr nonnull align 4 dereferenceable(8) [[TMP1]], ptr nonnull align 4 dereferenceable(8) [[REF_TMP10]])
-// NORM-NEXT:    call void @__kmpc_end_critical(ptr @[[GLOB3]], i32 [[TMP30]], ptr @.gomp_critical_user_.atomic_reduction.var)
+// NORM-NEXT:    call void @__kmpc_end_critical(ptr @[[GLOB3]], i32 [[TMP27]], ptr @.gomp_critical_user_.atomic_reduction.var)
 // NORM-NEXT:    br label [[DOTOMP_REDUCTION_DEFAULT]]
 // NORM:       .omp.reduction.default:
 // NORM-NEXT:    br label [[OMP_PRECOND_END]]
@@ -1113,14 +1112,14 @@ void foo(int N, Point const *Points) {
 // NORM-NEXT:    store ptr [[TMP0]], ptr [[DOTADDR]], align 8
 // NORM-NEXT:    store ptr [[TMP1]], ptr [[DOTADDR1]], align 8
 // NORM-NEXT:    [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8
-// NORM-NEXT:    [[TMP4:%.*]] = load ptr, ptr [[DOTADDR1]], align 8
-// NORM-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP4]], i64 0, i64 0
+// NORM-NEXT:    [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8
+// NORM-NEXT:    [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP3]], i64 0, i64 0
+// NORM-NEXT:    [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8
+// NORM-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i64 0, i64 0
 // NORM-NEXT:    [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8
-// NORM-NEXT:    [[TMP9:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i64 0, i64 0
-// NORM-NEXT:    [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8
-// NORM-NEXT:    [[CALL:%.*]] = call i64 @_ZNK5PointaaERKS_(ptr nonnull align 4 dereferenceable(8) [[TMP10]], ptr nonnull align 4 dereferenceable(8) [[TMP7]])
+// NORM-NEXT:    [[CALL:%.*]] = call i64 @_ZNK5PointaaERKS_(ptr nonnull align 4 dereferenceable(8) [[TMP7]], ptr nonnull align 4 dereferenceable(8) [[TMP5]])
 // NORM-NEXT:    store i64 [[CALL]], ptr [[REF_TMP]], align 4
-// NORM-NEXT:    [[CALL2:%.*]] = call nonnull align 4 dereferenceable(8) ptr @_ZN5PointaSERKS_(ptr nonnull align 4 dereferenceable(8) [[TMP10]], ptr nonnull align 4 dereferenceable(8) [[REF_TMP]])
+// NORM-NEXT:    [[CALL2:%.*]] = call nonnull align 4 dereferenceable(8) ptr @_ZN5PointaSERKS_(ptr nonnull align 4 dereferenceable(8) [[TMP7]], ptr nonnull align 4 dereferenceable(8) [[REF_TMP]])
 // NORM-NEXT:    ret void
 //
 //
@@ -1221,10 +1220,10 @@ void foo(int N, Point const *Points) {
 // NORM-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]])
 // NORM-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0
 // NORM-NEXT:    store ptr [[RED3]], ptr [[TMP22]], align 8
-// NORM-NEXT:    [[TMP24:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// NORM-NEXT:    [[TMP25:%.*]] = load i32, ptr [[TMP24]], align 4
-// NORM-NEXT:    [[TMP27:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB2]], i32 [[TMP25]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.14, ptr @.gomp_critical_user_.reduction.var)
-// NORM-NEXT:    switch i32 [[TMP27]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [
+// NORM-NEXT:    [[TMP23:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// NORM-NEXT:    [[TMP24:%.*]] = load i32, ptr [[TMP23]], align 4
+// NORM-NEXT:    [[TMP25:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB2]], i32 [[TMP24]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.14, ptr @.gomp_critical_user_.reduction.var)
+// NORM-NEXT:    switch i32 [[TMP25]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [
 // NORM-NEXT:    i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]]
 // NORM-NEXT:    i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]]
 // NORM-NEXT:    ]
@@ -1232,16 +1231,16 @@ void foo(int N, Point const *Points) {
 // NORM-NEXT:    [[CALL:%.*]] = call i64 @_ZNK5PointooERKS_(ptr nonnull align 4 dereferenceable(8) [[TMP1]], ptr nonnull align 4 dereferenceable(8) [[RED3]])
 // NORM-NEXT:    store i64 [[CALL]], ptr [[REF_TMP]], align 4
 // NORM-NEXT:    [[CALL9:%.*]] = call nonnull align 4 dereferenceable(8) ptr @_ZN5PointaSERKS_(ptr nonnull align 4 dereferenceable(8) [[TMP1]], ptr nonnull align 4 dereferenceable(8) [[REF_TMP]])
-// NORM-NEXT:    call void @__kmpc_end_reduce_nowait(ptr @[[GLOB2]], i32 [[TMP25]], ptr @.gomp_critical_user_.reduction.var)
+// NORM-NEXT:    call void @__kmpc_end_reduce_nowait(ptr @[[GLOB2]], i32 [[TMP24]], ptr @.gomp_critical_user_.reduction.var)
 // NORM-NEXT:    br label [[DOTOMP_REDUCTION_DEFAULT]]
 // NORM:       .omp.reduction.case2:
-// NORM-NEXT:    [[TMP29:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// NORM-NEXT:    [[TMP30:%.*]] = load i32, ptr [[TMP29]], align 4
-// NORM-NEXT:    call void @__kmpc_critical(ptr @[[GLOB3]], i32 [[TMP30]], ptr @.gomp_critical_user_.atomic_reduction.var)
+// NORM-NEXT:    [[TMP26:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// NORM-NEXT:    [[TMP27:%.*]] = load i32, ptr [[TMP26]], align 4
+// NORM-NEXT:    call void @__kmpc_critical(ptr @[[GLOB3]], i32 [[TMP27]], ptr @.gomp_critical_user_.atomic_reduction.var)
 // NORM-NEXT:    [[CALL11:%.*]] = call i64 @_ZNK5PointooERKS_(ptr nonnull align 4 dereferenceable(8) [[TMP1]], ptr nonnull align 4 dereferenceable(8) [[RED3]])
 // NORM-NEXT:    store i64 [[CALL11]], ptr [[REF_TMP10]], align 4
 // NORM-NEXT:    [[CALL12:%.*]] = call nonnull align 4 dereferenceable(8) ptr @_ZN5PointaSERKS_(ptr nonnull align 4 dereferenceable(8) [[TMP1]], ptr nonnull align 4 dereferenceable(8) [[REF_TMP10]])
-// NORM-NEXT:    call void @__kmpc_end_critical(ptr @[[GLOB3]], i32 [[TMP30]], ptr @.gomp_critical_user_.atomic_reduction.var)
+// NORM-NEXT:    call void @__kmpc_end_critical(ptr @[[GLOB3]], i32 [[TMP27]], ptr @.gomp_critical_user_.atomic_reduction.var)
 // NORM-NEXT:    br label [[DOTOMP_REDUCTION_DEFAULT]]
 // NORM:       .omp.reduction.default:
 // NORM-NEXT:    br label [[OMP_PRECOND_END]]
@@ -1258,14 +1257,14 @@ void foo(int N, Point const *Points) {
 // NORM-NEXT:    store ptr [[TMP0]], ptr [[DOTADDR]], align 8
 // NORM-NEXT:    store ptr [[TMP1]], ptr [[DOTADDR1]], align 8
 // NORM-NEXT:    [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8
-// NORM-NEXT:    [[TMP4:%.*]] = load ptr, ptr [[DOTADDR1]], align 8
-// NORM-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP4]], i64 0, i64 0
+// NORM-NEXT:    [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8
+// NORM-NEXT:    [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP3]], i64 0, i64 0
+// NORM-NEXT:    [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8
+// NORM-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i64 0, i64 0
 // NORM-NEXT:    [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8
-// NORM-NEXT:    [[TMP9:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i64 0, i64 0
-// NORM-NEXT:    [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8
-// NORM-NEXT:    [[CALL:%.*]] = call i64 @_ZNK5PointooERKS_(ptr nonnull align 4 dereferenceable(8) [[TMP10]], ptr nonnull align 4 dereferenceable(8) [[TMP7]])
+// NORM-NEXT:    [[CALL:%.*]] = call i64 @_ZNK5PointooERKS_(ptr nonnull align 4 dereferenceable(8) [[TMP7]], ptr nonnull align 4 dereferenceable(8) [[TMP5]])
 // NORM-NEXT:    store i64 [[CALL]], ptr [[REF_TMP]], align 4
-// NORM-NEXT:    [[CALL2:%.*]] = call nonnull align 4 dereferenceable(8) ptr @_ZN5PointaSERKS_(ptr nonnull align 4 dereferenceable(8) [[TMP10]], ptr nonnull align 4 dereferenceable(8) [[REF_TMP]])
+// NORM-NEXT:    [[CALL2:%.*]] = call nonnull align 4 dereferenceable(8) ptr @_ZN5PointaSERKS_(ptr nonnull align 4 dereferenceable(8) [[TMP7]], ptr nonnull align 4 dereferenceable(8) [[REF_TMP]])
 // NORM-NEXT:    ret void
 //
 //
@@ -1407,23 +1406,23 @@ void foo(int N, Point const *Points) {
 // COMP-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]])
 // COMP-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0
 // COMP-NEXT:    store ptr [[RED3]], ptr [[TMP22]], align 8
-// COMP-NEXT:    [[TMP24:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// COMP-NEXT:    [[TMP25:%.*]] = load i32, ptr [[TMP24]], align 4
-// COMP-NEXT:    [[TMP27:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB2:[0-9]+]], i32 [[TMP25]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var)
-// COMP-NEXT:    switch i32 [[TMP27]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [
+// COMP-NEXT:    [[TMP23:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// COMP-NEXT:    [[TMP24:%.*]] = load i32, ptr [[TMP23]], align 4
+// COMP-NEXT:    [[TMP25:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB2:[0-9]+]], i32 [[TMP24]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var)
+// COMP-NEXT:    switch i32 [[TMP25]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [
 // COMP-NEXT:    i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]]
 // COMP-NEXT:    i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]]
 // COMP-NEXT:    ]
 // COMP:       .omp.reduction.case1:
 // COMP-NEXT:    [[CALL:%.*]] = call nonnull align 4 dereferenceable(8) ptr @_ZN5PointpLERKS_(ptr nonnull align 4 dereferenceable(8) [[TMP1]], ptr nonnull align 4 dereferenceable(8) [[RED3]])
-// COMP-NEXT:    call void @__kmpc_end_reduce_nowait(ptr @[[GLOB2]], i32 [[TMP25]], ptr @.gomp_critical_user_.reduction.var)
+// COMP-NEXT:    call void @__kmpc_end_reduce_nowait(ptr @[[GLOB2]], i32 [[TMP24]], ptr @.gomp_critical_user_.reduction.var)
 // COMP-NEXT:    br label [[DOTOMP_REDUCTION_DEFAULT]]
 // COMP:       .omp.reduction.case2:
-// COMP-NEXT:    [[TMP28:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// COMP-NEXT:    [[TMP29:%.*]] = load i32, ptr [[TMP28]], align 4
-// COMP-NEXT:    call void @__kmpc_critical(ptr @[[GLOB3]], i32 [[TMP29]], ptr @.gomp_critical_user_.atomic_reduction.var)
+// COMP-NEXT:    [[TMP26:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// COMP-NEXT:    [[TMP27:%.*]] = load i32, ptr [[TMP26]], align 4
+// COMP-NEXT:    call void @__kmpc_critical(ptr @[[GLOB3]], i32 [[TMP27]], ptr @.gomp_critical_user_.atomic_reduction.var)
 // COMP-NEXT:    [[CALL9:%.*]] = call nonnull align 4 dereferenceable(8) ptr @_ZN5PointpLERKS_(ptr nonnull align 4 dereferenceable(8) [[TMP1]], ptr nonnull align 4 dereferenceable(8) [[RED3]])
-// COMP-NEXT:    call void @__kmpc_end_critical(ptr @[[GLOB3]], i32 [[TMP29]], ptr @.gomp_critical_user_.atomic_reduction.var)
+// COMP-NEXT:    call void @__kmpc_end_critical(ptr @[[GLOB3]], i32 [[TMP27]], ptr @.gomp_critical_user_.atomic_reduction.var)
 // COMP-NEXT:    br label [[DOTOMP_REDUCTION_DEFAULT]]
 // COMP:       .omp.reduction.default:
 // COMP-NEXT:    br label [[OMP_PRECOND_END]]
@@ -1439,12 +1438,12 @@ void foo(int N, Point const *Points) {
 // COMP-NEXT:    store ptr [[TMP0]], ptr [[DOTADDR]], align 8
 // COMP-NEXT:    store ptr [[TMP1]], ptr [[DOTADDR1]], align 8
 // COMP-NEXT:    [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8
-// COMP-NEXT:    [[TMP4:%.*]] = load ptr, ptr [[DOTADDR1]], align 8
-// COMP-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP4]], i64 0, i64 0
+// COMP-NEXT:    [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8
+// COMP-NEXT:    [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP3]], i64 0, i64 0
+// COMP-NEXT:    [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8
+// COMP-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i64 0, i64 0
 // COMP-NEXT:    [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8
-// COMP-NEXT:    [[TMP9:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i64 0, i64 0
-// COMP-NEXT:    [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8
-// COMP-NEXT:    [[CALL:%.*]] = call nonnull align 4 dereferenceable(8) ptr @_ZN5PointpLERKS_(ptr nonnull align 4 dereferenceable(8) [[TMP10]], ptr nonnull align 4 dereferenceable(8) [[TMP7]])
+// COMP-NEXT:    [[CALL:%.*]] = call nonnull align 4 dereferenceable(8) ptr @_ZN5PointpLERKS_(ptr nonnull align 4 dereferenceable(8) [[TMP7]], ptr nonnull align 4 dereferenceable(8) [[TMP5]])
 // COMP-NEXT:    ret void
 //
 //
@@ -1543,23 +1542,23 @@ void foo(int N, Point const *Points) {
 // COMP-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]])
 // COMP-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0
 // COMP-NEXT:    store ptr [[RED3]], ptr [[TMP22]], align 8
-// COMP-NEXT:    [[TMP24:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// COMP-NEXT:    [[TMP25:%.*]] = load i32, ptr [[TMP24]], align 4
-// COMP-NEXT:    [[TMP27:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB2]], i32 [[TMP25]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.2, ptr @.gomp_critical_user_.reduction.var)
-// COMP-NEXT:    switch i32 [[TMP27]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [
+// COMP-NEXT:    [[TMP23:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// COMP-NEXT:    [[TMP24:%.*]] = load i32, ptr [[TMP23]], align 4
+// COMP-NEXT:    [[TMP25:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB2]], i32 [[TMP24]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.2, ptr @.gomp_critical_user_.reduction.var)
+// COMP-NEXT:    switch i32 [[TMP25]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [
 // COMP-NEXT:    i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]]
 // COMP-NEXT:    i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]]
 // COMP-NEXT:    ]
 // COMP:       .omp.reduction.case1:
 // COMP-NEXT:    [[CALL:%.*]] = call nonnull align 4 dereferenceable(8) ptr @_ZN5PointpLERKS_(ptr nonnull align 4 dereferenceable(8) [[TMP1]], ptr nonnull align 4 dereferenceable(8) [[RED3]])
-// COMP-NEXT:    call void @__kmpc_end_reduce_nowait(ptr @[[GLOB2]], i32 [[TMP25]], ptr @.gomp_critical_user_.reduction.var)
+// COMP-NEXT:    call void @__kmpc_end_reduce_nowait(ptr @[[GLOB2]], i32 [[TMP24]], ptr @.gomp_critical_user_.reduction.var)
 // COMP-NEXT:    br label [[DOTOMP_REDUCTION_DEFAULT]]
 // COMP:       .omp.reduction.case2:
-// COMP-NEXT:    [[TMP28:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// COMP-NEXT:    [[TMP29:%.*]] = load i32, ptr [[TMP28]], align 4
-// COMP-NEXT:    call void @__kmpc_critical(ptr @[[GLOB3]], i32 [[TMP29]], ptr @.gomp_critical_user_.atomic_reduction.var)
+// COMP-NEXT:    [[TMP26:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// COMP-NEXT:    [[TMP27:%.*]] = load i32, ptr [[TMP26]], align 4
+// COMP-NEXT:    call void @__kmpc_critical(ptr @[[GLOB3]], i32 [[TMP27]], ptr @.gomp_critical_user_.atomic_reduction.var)
 // COMP-NEXT:    [[CALL9:%.*]] = call nonnull align 4 dereferenceable(8) ptr @_ZN5PointpLERKS_(ptr nonnull align 4 dereferenceable(8) [[TMP1]], ptr nonnull align 4 dereferenceable(8) [[RED3]])
-// COMP-NEXT:    call void @__kmpc_end_critical(ptr @[[GLOB3]], i32 [[TMP29]], ptr @.gomp_critical_user_.atomic_reduction.var)
+// COMP-NEXT:    call void @__kmpc_end_critical(ptr @[[GLOB3]], i32 [[TMP27]], ptr @.gomp_critical_user_.atomic_reduction.var)
 // COMP-NEXT:    br label [[DOTOMP_REDUCTION_DEFAULT]]
 // COMP:       .omp.reduction.default:
 // COMP-NEXT:    br label [[OMP_PRECOND_END]]
@@ -1575,12 +1574,12 @@ void foo(int N, Point const *Points) {
 // COMP-NEXT:    store ptr [[TMP0]], ptr [[DOTADDR]], align 8
 // COMP-NEXT:    store ptr [[TMP1]], ptr [[DOTADDR1]], align 8
 // COMP-NEXT:    [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8
-// COMP-NEXT:    [[TMP4:%.*]] = load ptr, ptr [[DOTADDR1]], align 8
-// COMP-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP4]], i64 0, i64 0
+// COMP-NEXT:    [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8
+// COMP-NEXT:    [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP3]], i64 0, i64 0
+// COMP-NEXT:    [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8
+// COMP-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i64 0, i64 0
 // COMP-NEXT:    [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8
-// COMP-NEXT:    [[TMP9:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i64 0, i64 0
-// COMP-NEXT:    [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8
-// COMP-NEXT:    [[CALL:%.*]] = call nonnull align 4 dereferenceable(8) ptr @_ZN5PointpLERKS_(ptr nonnull align 4 dereferenceable(8) [[TMP10]], ptr nonnull align 4 dereferenceable(8) [[TMP7]])
+// COMP-NEXT:    [[CALL:%.*]] = call nonnull align 4 dereferenceable(8) ptr @_ZN5PointpLERKS_(ptr nonnull align 4 dereferenceable(8) [[TMP7]], ptr nonnull align 4 dereferenceable(8) [[TMP5]])
 // COMP-NEXT:    ret void
 //
 //
@@ -1679,23 +1678,23 @@ void foo(int N, Point const *Points) {
 // COMP-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]])
 // COMP-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0
 // COMP-NEXT:    store ptr [[RED3]], ptr [[TMP22]], align 8
-// COMP-NEXT:    [[TMP24:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// COMP-NEXT:    [[TMP25:%.*]] = load i32, ptr [[TMP24]], align 4
-// COMP-NEXT:    [[TMP27:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB2]], i32 [[TMP25]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.4, ptr @.gomp_critical_user_.reduction.var)
-// COMP-NEXT:    switch i32 [[TMP27]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [
+// COMP-NEXT:    [[TMP23:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// COMP-NEXT:    [[TMP24:%.*]] = load i32, ptr [[TMP23]], align 4
+// COMP-NEXT:    [[TMP25:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB2]], i32 [[TMP24]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.4, ptr @.gomp_critical_user_.reduction.var)
+// COMP-NEXT:    switch i32 [[TMP25]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [
 // COMP-NEXT:    i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]]
 // COMP-NEXT:    i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]]
 // COMP-NEXT:    ]
 // COMP:       .omp.reduction.case1:
 // COMP-NEXT:    [[CALL:%.*]] = call nonnull align 4 dereferenceable(8) ptr @_ZN5PointmLERKS_(ptr nonnull align 4 dereferenceable(8) [[TMP1]], ptr nonnull align 4 dereferenceable(8) [[RED3]])
-// COMP-NEXT:    call void @__kmpc_end_reduce_nowait(ptr @[[GLOB2]], i32 [[TMP25]], ptr @.gomp_critical_user_.reduction.var)
+// COMP-NEXT:    call void @__kmpc_end_reduce_nowait(ptr @[[GLOB2]], i32 [[TMP24]], ptr @.gomp_critical_user_.reduction.var)
 // COMP-NEXT:    br label [[DOTOMP_REDUCTION_DEFAULT]]
 // COMP:       .omp.reduction.case2:
-// COMP-NEXT:    [[TMP28:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// COMP-NEXT:    [[TMP29:%.*]] = load i32, ptr [[TMP28]], align 4
-// COMP-NEXT:    call void @__kmpc_critical(ptr @[[GLOB3]], i32 [[TMP29]], ptr @.gomp_critical_user_.atomic_reduction.var)
+// COMP-NEXT:    [[TMP26:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// COMP-NEXT:    [[TMP27:%.*]] = load i32, ptr [[TMP26]], align 4
+// COMP-NEXT:    call void @__kmpc_critical(ptr @[[GLOB3]], i32 [[TMP27]], ptr @.gomp_critical_user_.atomic_reduction.var)
 // COMP-NEXT:    [[CALL9:%.*]] = call nonnull align 4 dereferenceable(8) ptr @_ZN5PointmLERKS_(ptr nonnull align 4 dereferenceable(8) [[TMP1]], ptr nonnull align 4 dereferenceable(8) [[RED3]])
-// COMP-NEXT:    call void @__kmpc_end_critical(ptr @[[GLOB3]], i32 [[TMP29]], ptr @.gomp_critical_user_.atomic_reduction.var)
+// COMP-NEXT:    call void @__kmpc_end_critical(ptr @[[GLOB3]], i32 [[TMP27]], ptr @.gomp_critical_user_.atomic_reduction.var)
 // COMP-NEXT:    br label [[DOTOMP_REDUCTION_DEFAULT]]
 // COMP:       .omp.reduction.default:
 // COMP-NEXT:    br label [[OMP_PRECOND_END]]
@@ -1711,12 +1710,12 @@ void foo(int N, Point const *Points) {
 // COMP-NEXT:    store ptr [[TMP0]], ptr [[DOTADDR]], align 8
 // COMP-NEXT:    store ptr [[TMP1]], ptr [[DOTADDR1]], align 8
 // COMP-NEXT:    [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8
-// COMP-NEXT:    [[TMP4:%.*]] = load ptr, ptr [[DOTADDR1]], align 8
-// COMP-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP4]], i64 0, i64 0
+// COMP-NEXT:    [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8
+// COMP-NEXT:    [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP3]], i64 0, i64 0
+// COMP-NEXT:    [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8
+// COMP-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i64 0, i64 0
 // COMP-NEXT:    [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8
-// COMP-NEXT:    [[TMP9:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i64 0, i64 0
-// COMP-NEXT:    [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8
-// COMP-NEXT:    [[CALL:%.*]] = call nonnull align 4 dereferenceable(8) ptr @_ZN5PointmLERKS_(ptr nonnull align 4 dereferenceable(8) [[TMP10]], ptr nonnull align 4 dereferenceable(8) [[TMP7]])
+// COMP-NEXT:    [[CALL:%.*]] = call nonnull align 4 dereferenceable(8) ptr @_ZN5PointmLERKS_(ptr nonnull align 4 dereferenceable(8) [[TMP7]], ptr nonnull align 4 dereferenceable(8) [[TMP5]])
 // COMP-NEXT:    ret void
 //
 //
@@ -1815,23 +1814,23 @@ void foo(int N, Point const *Points) {
 // COMP-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]])
 // COMP-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0
 // COMP-NEXT:    store ptr [[RED3]], ptr [[TMP22]], align 8
-// COMP-NEXT:    [[TMP24:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// COMP-NEXT:    [[TMP25:%.*]] = load i32, ptr [[TMP24]], align 4
-// COMP-NEXT:    [[TMP27:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB2]], i32 [[TMP25]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.6, ptr @.gomp_critical_user_.reduction.var)
-// COMP-NEXT:    switch i32 [[TMP27]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [
+// COMP-NEXT:    [[TMP23:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// COMP-NEXT:    [[TMP24:%.*]] = load i32, ptr [[TMP23]], align 4
+// COMP-NEXT:    [[TMP25:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB2]], i32 [[TMP24]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.6, ptr @.gomp_critical_user_.reduction.var)
+// COMP-NEXT:    switch i32 [[TMP25]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [
 // COMP-NEXT:    i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]]
 // COMP-NEXT:    i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]]
 // COMP-NEXT:    ]
 // COMP:       .omp.reduction.case1:
 // COMP-NEXT:    [[CALL:%.*]] = call nonnull align 4 dereferenceable(8) ptr @_ZN5PointaNERKS_(ptr nonnull align 4 dereferenceable(8) [[TMP1]], ptr nonnull align 4 dereferenceable(8) [[RED3]])
-// COMP-NEXT:    call void @__kmpc_end_reduce_nowait(ptr @[[GLOB2]], i32 [[TMP25]], ptr @.gomp_critical_user_.reduction.var)
+// COMP-NEXT:    call void @__kmpc_end_reduce_nowait(ptr @[[GLOB2]], i32 [[TMP24]], ptr @.gomp_critical_user_.reduction.var)
 // COMP-NEXT:    br label [[DOTOMP_REDUCTION_DEFAULT]]
 // COMP:       .omp.reduction.case2:
-// COMP-NEXT:    [[TMP28:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// COMP-NEXT:    [[TMP29:%.*]] = load i32, ptr [[TMP28]], align 4
-// COMP-NEXT:    call void @__kmpc_critical(ptr @[[GLOB3]], i32 [[TMP29]], ptr @.gomp_critical_user_.atomic_reduction.var)
+// COMP-NEXT:    [[TMP26:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// COMP-NEXT:    [[TMP27:%.*]] = load i32, ptr [[TMP26]], align 4
+// COMP-NEXT:    call void @__kmpc_critical(ptr @[[GLOB3]], i32 [[TMP27]], ptr @.gomp_critical_user_.atomic_reduction.var)
 // COMP-NEXT:    [[CALL9:%.*]] = call nonnull align 4 dereferenceable(8) ptr @_ZN5PointaNERKS_(ptr nonnull align 4 dereferenceable(8) [[TMP1]], ptr nonnull align 4 dereferenceable(8) [[RED3]])
-// COMP-NEXT:    call void @__kmpc_end_critical(ptr @[[GLOB3]], i32 [[TMP29]], ptr @.gomp_critical_user_.atomic_reduction.var)
+// COMP-NEXT:    call void @__kmpc_end_critical(ptr @[[GLOB3]], i32 [[TMP27]], ptr @.gomp_critical_user_.atomic_reduction.var)
 // COMP-NEXT:    br label [[DOTOMP_REDUCTION_DEFAULT]]
 // COMP:       .omp.reduction.default:
 // COMP-NEXT:    br label [[OMP_PRECOND_END]]
@@ -1847,12 +1846,12 @@ void foo(int N, Point const *Points) {
 // COMP-NEXT:    store ptr [[TMP0]], ptr [[DOTADDR]], align 8
 // COMP-NEXT:    store ptr [[TMP1]], ptr [[DOTADDR1]], align 8
 // COMP-NEXT:    [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8
-// COMP-NEXT:    [[TMP4:%.*]] = load ptr, ptr [[DOTADDR1]], align 8
-// COMP-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP4]], i64 0, i64 0
+// COMP-NEXT:    [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8
+// COMP-NEXT:    [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP3]], i64 0, i64 0
+// COMP-NEXT:    [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8
+// COMP-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i64 0, i64 0
 // COMP-NEXT:    [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8
-// COMP-NEXT:    [[TMP9:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i64 0, i64 0
-// COMP-NEXT:    [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8
-// COMP-NEXT:    [[CALL:%.*]] = call nonnull align 4 dereferenceable(8) ptr @_ZN5PointaNERKS_(ptr nonnull align 4 dereferenceable(8) [[TMP10]], ptr nonnull align 4 dereferenceable(8) [[TMP7]])
+// COMP-NEXT:    [[CALL:%.*]] = call nonnull align 4 dereferenceable(8) ptr @_ZN5PointaNERKS_(ptr nonnull align 4 dereferenceable(8) [[TMP7]], ptr nonnull align 4 dereferenceable(8) [[TMP5]])
 // COMP-NEXT:    ret void
 //
 //
@@ -1951,23 +1950,23 @@ void foo(int N, Point const *Points) {
 // COMP-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]])
 // COMP-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0
 // COMP-NEXT:    store ptr [[RED3]], ptr [[TMP22]], align 8
-// COMP-NEXT:    [[TMP24:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// COMP-NEXT:    [[TMP25:%.*]] = load i32, ptr [[TMP24]], align 4
-// COMP-NEXT:    [[TMP27:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB2]], i32 [[TMP25]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.8, ptr @.gomp_critical_user_.reduction.var)
-// COMP-NEXT:    switch i32 [[TMP27]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [
+// COMP-NEXT:    [[TMP23:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// COMP-NEXT:    [[TMP24:%.*]] = load i32, ptr [[TMP23]], align 4
+// COMP-NEXT:    [[TMP25:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB2]], i32 [[TMP24]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.8, ptr @.gomp_critical_user_.reduction.var)
+// COMP-NEXT:    switch i32 [[TMP25]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [
 // COMP-NEXT:    i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]]
 // COMP-NEXT:    i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]]
 // COMP-NEXT:    ]
 // COMP:       .omp.reduction.case1:
 // COMP-NEXT:    [[CALL:%.*]] = call nonnull align 4 dereferenceable(8) ptr @_ZN5PointoRERKS_(ptr nonnull align 4 dereferenceable(8) [[TMP1]], ptr nonnull align 4 dereferenceable(8) [[RED3]])
-// COMP-NEXT:    call void @__kmpc_end_reduce_nowait(ptr @[[GLOB2]], i32 [[TMP25]], ptr @.gomp_critical_user_.reduction.var)
+// COMP-NEXT:    call void @__kmpc_end_reduce_nowait(ptr @[[GLOB2]], i32 [[TMP24]], ptr @.gomp_critical_user_.reduction.var)
 // COMP-NEXT:    br label [[DOTOMP_REDUCTION_DEFAULT]]
 // COMP:       .omp.reduction.case2:
-// COMP-NEXT:    [[TMP28:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// COMP-NEXT:    [[TMP29:%.*]] = load i32, ptr [[TMP28]], align 4
-// COMP-NEXT:    call void @__kmpc_critical(ptr @[[GLOB3]], i32 [[TMP29]], ptr @.gomp_critical_user_.atomic_reduction.var)
+// COMP-NEXT:    [[TMP26:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// COMP-NEXT:    [[TMP27:%.*]] = load i32, ptr [[TMP26]], align 4
+// COMP-NEXT:    call void @__kmpc_critical(ptr @[[GLOB3]], i32 [[TMP27]], ptr @.gomp_critical_user_.atomic_reduction.var)
 // COMP-NEXT:    [[CALL9:%.*]] = call nonnull align 4 dereferenceable(8) ptr @_ZN5PointoRERKS_(ptr nonnull align 4 dereferenceable(8) [[TMP1]], ptr nonnull align 4 dereferenceable(8) [[RED3]])
-// COMP-NEXT:    call void @__kmpc_end_critical(ptr @[[GLOB3]], i32 [[TMP29]], ptr @.gomp_critical_user_.atomic_reduction.var)
+// COMP-NEXT:    call void @__kmpc_end_critical(ptr @[[GLOB3]], i32 [[TMP27]], ptr @.gomp_critical_user_.atomic_reduction.var)
 // COMP-NEXT:    br label [[DOTOMP_REDUCTION_DEFAULT]]
 // COMP:       .omp.reduction.default:
 // COMP-NEXT:    br label [[OMP_PRECOND_END]]
@@ -1983,12 +1982,12 @@ void foo(int N, Point const *Points) {
 // COMP-NEXT:    store ptr [[TMP0]], ptr [[DOTADDR]], align 8
 // COMP-NEXT:    store ptr [[TMP1]], ptr [[DOTADDR1]], align 8
 // COMP-NEXT:    [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8
-// COMP-NEXT:    [[TMP4:%.*]] = load ptr, ptr [[DOTADDR1]], align 8
-// COMP-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP4]], i64 0, i64 0
+// COMP-NEXT:    [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8
+// COMP-NEXT:    [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP3]], i64 0, i64 0
+// COMP-NEXT:    [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8
+// COMP-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i64 0, i64 0
 // COMP-NEXT:    [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8
-// COMP-NEXT:    [[TMP9:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i64 0, i64 0
-// COMP-NEXT:    [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8
-// COMP-NEXT:    [[CALL:%.*]] = call nonnull align 4 dereferenceable(8) ptr @_ZN5PointoRERKS_(ptr nonnull align 4 dereferenceable(8) [[TMP10]], ptr nonnull align 4 dereferenceable(8) [[TMP7]])
+// COMP-NEXT:    [[CALL:%.*]] = call nonnull align 4 dereferenceable(8) ptr @_ZN5PointoRERKS_(ptr nonnull align 4 dereferenceable(8) [[TMP7]], ptr nonnull align 4 dereferenceable(8) [[TMP5]])
 // COMP-NEXT:    ret void
 //
 //
@@ -2087,23 +2086,23 @@ void foo(int N, Point const *Points) {
 // COMP-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]])
 // COMP-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0
 // COMP-NEXT:    store ptr [[RED3]], ptr [[TMP22]], align 8
-// COMP-NEXT:    [[TMP24:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// COMP-NEXT:    [[TMP25:%.*]] = load i32, ptr [[TMP24]], align 4
-// COMP-NEXT:    [[TMP27:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB2]], i32 [[TMP25]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.10, ptr @.gomp_critical_user_.reduction.var)
-// COMP-NEXT:    switch i32 [[TMP27]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [
+// COMP-NEXT:    [[TMP23:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// COMP-NEXT:    [[TMP24:%.*]] = load i32, ptr [[TMP23]], align 4
+// COMP-NEXT:    [[TMP25:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB2]], i32 [[TMP24]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.10, ptr @.gomp_critical_user_.reduction.var)
+// COMP-NEXT:    switch i32 [[TMP25]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [
 // COMP-NEXT:    i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]]
 // COMP-NEXT:    i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]]
 // COMP-NEXT:    ]
 // COMP:       .omp.reduction.case1:
 // COMP-NEXT:    [[CALL:%.*]] = call nonnull align 4 dereferenceable(8) ptr @_ZN5PointeOERKS_(ptr nonnull align 4 dereferenceable(8) [[TMP1]], ptr nonnull align 4 dereferenceable(8) [[RED3]])
-// COMP-NEXT:    call void @__kmpc_end_reduce_nowait(ptr @[[GLOB2]], i32 [[TMP25]], ptr @.gomp_critical_user_.reduction.var)
+// COMP-NEXT:    call void @__kmpc_end_reduce_nowait(ptr @[[GLOB2]], i32 [[TMP24]], ptr @.gomp_critical_user_.reduction.var)
 // COMP-NEXT:    br label [[DOTOMP_REDUCTION_DEFAULT]]
 // COMP:       .omp.reduction.case2:
-// COMP-NEXT:    [[TMP28:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// COMP-NEXT:    [[TMP29:%.*]] = load i32, ptr [[TMP28]], align 4
-// COMP-NEXT:    call void @__kmpc_critical(ptr @[[GLOB3]], i32 [[TMP29]], ptr @.gomp_critical_user_.atomic_reduction.var)
+// COMP-NEXT:    [[TMP26:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// COMP-NEXT:    [[TMP27:%.*]] = load i32, ptr [[TMP26]], align 4
+// COMP-NEXT:    call void @__kmpc_critical(ptr @[[GLOB3]], i32 [[TMP27]], ptr @.gomp_critical_user_.atomic_reduction.var)
 // COMP-NEXT:    [[CALL9:%.*]] = call nonnull align 4 dereferenceable(8) ptr @_ZN5PointeOERKS_(ptr nonnull align 4 dereferenceable(8) [[TMP1]], ptr nonnull align 4 dereferenceable(8) [[RED3]])
-// COMP-NEXT:    call void @__kmpc_end_critical(ptr @[[GLOB3]], i32 [[TMP29]], ptr @.gomp_critical_user_.atomic_reduction.var)
+// COMP-NEXT:    call void @__kmpc_end_critical(ptr @[[GLOB3]], i32 [[TMP27]], ptr @.gomp_critical_user_.atomic_reduction.var)
 // COMP-NEXT:    br label [[DOTOMP_REDUCTION_DEFAULT]]
 // COMP:       .omp.reduction.default:
 // COMP-NEXT:    br label [[OMP_PRECOND_END]]
@@ -2119,12 +2118,12 @@ void foo(int N, Point const *Points) {
 // COMP-NEXT:    store ptr [[TMP0]], ptr [[DOTADDR]], align 8
 // COMP-NEXT:    store ptr [[TMP1]], ptr [[DOTADDR1]], align 8
 // COMP-NEXT:    [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8
-// COMP-NEXT:    [[TMP4:%.*]] = load ptr, ptr [[DOTADDR1]], align 8
-// COMP-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP4]], i64 0, i64 0
+// COMP-NEXT:    [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8
+// COMP-NEXT:    [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP3]], i64 0, i64 0
+// COMP-NEXT:    [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8
+// COMP-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i64 0, i64 0
 // COMP-NEXT:    [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8
-// COMP-NEXT:    [[TMP9:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i64 0, i64 0
-// COMP-NEXT:    [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8
-// COMP-NEXT:    [[CALL:%.*]] = call nonnull align 4 dereferenceable(8) ptr @_ZN5PointeOERKS_(ptr nonnull align 4 dereferenceable(8) [[TMP10]], ptr nonnull align 4 dereferenceable(8) [[TMP7]])
+// COMP-NEXT:    [[CALL:%.*]] = call nonnull align 4 dereferenceable(8) ptr @_ZN5PointeOERKS_(ptr nonnull align 4 dereferenceable(8) [[TMP7]], ptr nonnull align 4 dereferenceable(8) [[TMP5]])
 // COMP-NEXT:    ret void
 //
 //
@@ -2225,10 +2224,10 @@ void foo(int N, Point const *Points) {
 // COMP-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]])
 // COMP-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0
 // COMP-NEXT:    store ptr [[RED3]], ptr [[TMP22]], align 8
-// COMP-NEXT:    [[TMP24:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// COMP-NEXT:    [[TMP25:%.*]] = load i32, ptr [[TMP24]], align 4
-// COMP-NEXT:    [[TMP27:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB2]], i32 [[TMP25]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.12, ptr @.gomp_critical_user_.reduction.var)
-// COMP-NEXT:    switch i32 [[TMP27]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [
+// COMP-NEXT:    [[TMP23:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// COMP-NEXT:    [[TMP24:%.*]] = load i32, ptr [[TMP23]], align 4
+// COMP-NEXT:    [[TMP25:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB2]], i32 [[TMP24]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.12, ptr @.gomp_critical_user_.reduction.var)
+// COMP-NEXT:    switch i32 [[TMP25]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [
 // COMP-NEXT:    i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]]
 // COMP-NEXT:    i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]]
 // COMP-NEXT:    ]
@@ -2236,16 +2235,16 @@ void foo(int N, Point const *Points) {
 // COMP-NEXT:    [[CALL:%.*]] = call i64 @_ZNK5PointaaERKS_(ptr nonnull align 4 dereferenceable(8) [[TMP1]], ptr nonnull align 4 dereferenceable(8) [[RED3]])
 // COMP-NEXT:    store i64 [[CALL]], ptr [[REF_TMP]], align 4
 // COMP-NEXT:    [[CALL9:%.*]] = call nonnull align 4 dereferenceable(8) ptr @_ZN5PointaSERKS_(ptr nonnull align 4 dereferenceable(8) [[TMP1]], ptr nonnull align 4 dereferenceable(8) [[REF_TMP]])
-// COMP-NEXT:    call void @__kmpc_end_reduce_nowait(ptr @[[GLOB2]], i32 [[TMP25]], ptr @.gomp_critical_user_.reduction.var)
+// COMP-NEXT:    call void @__kmpc_end_reduce_nowait(ptr @[[GLOB2]], i32 [[TMP24]], ptr @.gomp_critical_user_.reduction.var)
 // COMP-NEXT:    br label [[DOTOMP_REDUCTION_DEFAULT]]
 // COMP:       .omp.reduction.case2:
-// COMP-NEXT:    [[TMP29:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// COMP-NEXT:    [[TMP30:%.*]] = load i32, ptr [[TMP29]], align 4
-// COMP-NEXT:    call void @__kmpc_critical(ptr @[[GLOB3]], i32 [[TMP30]], ptr @.gomp_critical_user_.atomic_reduction.var)
+// COMP-NEXT:    [[TMP26:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// COMP-NEXT:    [[TMP27:%.*]] = load i32, ptr [[TMP26]], align 4
+// COMP-NEXT:    call void @__kmpc_critical(ptr @[[GLOB3]], i32 [[TMP27]], ptr @.gomp_critical_user_.atomic_reduction.var)
 // COMP-NEXT:    [[CALL11:%.*]] = call i64 @_ZNK5PointaaERKS_(ptr nonnull align 4 dereferenceable(8) [[TMP1]], ptr nonnull align 4 dereferenceable(8) [[RED3]])
 // COMP-NEXT:    store i64 [[CALL11]], ptr [[REF_TMP10]], align 4
 // COMP-NEXT:    [[CALL12:%.*]] = call nonnull align 4 dereferenceable(8) ptr @_ZN5PointaSERKS_(ptr nonnull align 4 dereferenceable(8) [[TMP1]], ptr nonnull align 4 dereferenceable(8) [[REF_TMP10]])
-// COMP-NEXT:    call void @__kmpc_end_critical(ptr @[[GLOB3]], i32 [[TMP30]], ptr @.gomp_critical_user_.atomic_reduction.var)
+// COMP-NEXT:    call void @__kmpc_end_critical(ptr @[[GLOB3]], i32 [[TMP27]], ptr @.gomp_critical_user_.atomic_reduction.var)
 // COMP-NEXT:    br label [[DOTOMP_REDUCTION_DEFAULT]]
 // COMP:       .omp.reduction.default:
 // COMP-NEXT:    br label [[OMP_PRECOND_END]]
@@ -2262,14 +2261,14 @@ void foo(int N, Point const *Points) {
 // COMP-NEXT:    store ptr [[TMP0]], ptr [[DOTADDR]], align 8
 // COMP-NEXT:    store ptr [[TMP1]], ptr [[DOTADDR1]], align 8
 // COMP-NEXT:    [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8
-// COMP-NEXT:    [[TMP4:%.*]] = load ptr, ptr [[DOTADDR1]], align 8
-// COMP-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP4]], i64 0, i64 0
+// COMP-NEXT:    [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8
+// COMP-NEXT:    [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP3]], i64 0, i64 0
+// COMP-NEXT:    [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8
+// COMP-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i64 0, i64 0
 // COMP-NEXT:    [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8
-// COMP-NEXT:    [[TMP9:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i64 0, i64 0
-// COMP-NEXT:    [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8
-// COMP-NEXT:    [[CALL:%.*]] = call i64 @_ZNK5PointaaERKS_(ptr nonnull align 4 dereferenceable(8) [[TMP10]], ptr nonnull align 4 dereferenceable(8) [[TMP7]])
+// COMP-NEXT:    [[CALL:%.*]] = call i64 @_ZNK5PointaaERKS_(ptr nonnull align 4 dereferenceable(8) [[TMP7]], ptr nonnull align 4 dereferenceable(8) [[TMP5]])
 // COMP-NEXT:    store i64 [[CALL]], ptr [[REF_TMP]], align 4
-// COMP-NEXT:    [[CALL2:%.*]] = call nonnull align 4 dereferenceable(8) ptr @_ZN5PointaSERKS_(ptr nonnull align 4 dereferenceable(8) [[TMP10]], ptr nonnull align 4 dereferenceable(8) [[REF_TMP]])
+// COMP-NEXT:    [[CALL2:%.*]] = call nonnull align 4 dereferenceable(8) ptr @_ZN5PointaSERKS_(ptr nonnull align 4 dereferenceable(8) [[TMP7]], ptr nonnull align 4 dereferenceable(8) [[REF_TMP]])
 // COMP-NEXT:    ret void
 //
 //
@@ -2370,10 +2369,10 @@ void foo(int N, Point const *Points) {
 // COMP-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]])
 // COMP-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0
 // COMP-NEXT:    store ptr [[RED3]], ptr [[TMP22]], align 8
-// COMP-NEXT:    [[TMP24:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// COMP-NEXT:    [[TMP25:%.*]] = load i32, ptr [[TMP24]], align 4
-// COMP-NEXT:    [[TMP27:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB2]], i32 [[TMP25]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.14, ptr @.gomp_critical_user_.reduction.var)
-// COMP-NEXT:    switch i32 [[TMP27]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [
+// COMP-NEXT:    [[TMP23:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// COMP-NEXT:    [[TMP24:%.*]] = load i32, ptr [[TMP23]], align 4
+// COMP-NEXT:    [[TMP25:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB2]], i32 [[TMP24]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.14, ptr @.gomp_critical_user_.reduction.var)
+// COMP-NEXT:    switch i32 [[TMP25]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [
 // COMP-NEXT:    i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]]
 // COMP-NEXT:    i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]]
 // COMP-NEXT:    ]
@@ -2381,16 +2380,16 @@ void foo(int N, Point const *Points) {
 // COMP-NEXT:    [[CALL:%.*]] = call i64 @_ZNK5PointooERKS_(ptr nonnull align 4 dereferenceable(8) [[TMP1]], ptr nonnull align 4 dereferenceable(8) [[RED3]])
 // COMP-NEXT:    store i64 [[CALL]], ptr [[REF_TMP]], align 4
 // COMP-NEXT:    [[CALL9:%.*]] = call nonnull align 4 dereferenceable(8) ptr @_ZN5PointaSERKS_(ptr nonnull align 4 dereferenceable(8) [[TMP1]], ptr nonnull align 4 dereferenceable(8) [[REF_TMP]])
-// COMP-NEXT:    call void @__kmpc_end_reduce_nowait(ptr @[[GLOB2]], i32 [[TMP25]], ptr @.gomp_critical_user_.reduction.var)
+// COMP-NEXT:    call void @__kmpc_end_reduce_nowait(ptr @[[GLOB2]], i32 [[TMP24]], ptr @.gomp_critical_user_.reduction.var)
 // COMP-NEXT:    br label [[DOTOMP_REDUCTION_DEFAULT]]
 // COMP:       .omp.reduction.case2:
-// COMP-NEXT:    [[TMP29:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// COMP-NEXT:    [[TMP30:%.*]] = load i32, ptr [[TMP29]], align 4
-// COMP-NEXT:    call void @__kmpc_critical(ptr @[[GLOB3]], i32 [[TMP30]], ptr @.gomp_critical_user_.atomic_reduction.var)
+// COMP-NEXT:    [[TMP26:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// COMP-NEXT:    [[TMP27:%.*]] = load i32, ptr [[TMP26]], align 4
+// COMP-NEXT:    call void @__kmpc_critical(ptr @[[GLOB3]], i32 [[TMP27]], ptr @.gomp_critical_user_.atomic_reduction.var)
 // COMP-NEXT:    [[CALL11:%.*]] = call i64 @_ZNK5PointooERKS_(ptr nonnull align 4 dereferenceable(8) [[TMP1]], ptr nonnull align 4 dereferenceable(8) [[RED3]])
 // COMP-NEXT:    store i64 [[CALL11]], ptr [[REF_TMP10]], align 4
 // COMP-NEXT:    [[CALL12:%.*]] = call nonnull align 4 dereferenceable(8) ptr @_ZN5PointaSERKS_(ptr nonnull align 4 dereferenceable(8) [[TMP1]], ptr nonnull align 4 dereferenceable(8) [[REF_TMP10]])
-// COMP-NEXT:    call void @__kmpc_end_critical(ptr @[[GLOB3]], i32 [[TMP30]], ptr @.gomp_critical_user_.atomic_reduction.var)
+// COMP-NEXT:    call void @__kmpc_end_critical(ptr @[[GLOB3]], i32 [[TMP27]], ptr @.gomp_critical_user_.atomic_reduction.var)
 // COMP-NEXT:    br label [[DOTOMP_REDUCTION_DEFAULT]]
 // COMP:       .omp.reduction.default:
 // COMP-NEXT:    br label [[OMP_PRECOND_END]]
@@ -2407,14 +2406,14 @@ void foo(int N, Point const *Points) {
 // COMP-NEXT:    store ptr [[TMP0]], ptr [[DOTADDR]], align 8
 // COMP-NEXT:    store ptr [[TMP1]], ptr [[DOTADDR1]], align 8
 // COMP-NEXT:    [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8
-// COMP-NEXT:    [[TMP4:%.*]] = load ptr, ptr [[DOTADDR1]], align 8
-// COMP-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP4]], i64 0, i64 0
+// COMP-NEXT:    [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8
+// COMP-NEXT:    [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP3]], i64 0, i64 0
+// COMP-NEXT:    [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8
+// COMP-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i64 0, i64 0
 // COMP-NEXT:    [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8
-// COMP-NEXT:    [[TMP9:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i64 0, i64 0
-// COMP-NEXT:    [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8
-// COMP-NEXT:    [[CALL:%.*]] = call i64 @_ZNK5PointooERKS_(ptr nonnull align 4 dereferenceable(8) [[TMP10]], ptr nonnull align 4 dereferenceable(8) [[TMP7]])
+// COMP-NEXT:    [[CALL:%.*]] = call i64 @_ZNK5PointooERKS_(ptr nonnull align 4 dereferenceable(8) [[TMP7]], ptr nonnull align 4 dereferenceable(8) [[TMP5]])
 // COMP-NEXT:    store i64 [[CALL]], ptr [[REF_TMP]], align 4
-// COMP-NEXT:    [[CALL2:%.*]] = call nonnull align 4 dereferenceable(8) ptr @_ZN5PointaSERKS_(ptr nonnull align 4 dereferenceable(8) [[TMP10]], ptr nonnull align 4 dereferenceable(8) [[REF_TMP]])
+// COMP-NEXT:    [[CALL2:%.*]] = call nonnull align 4 dereferenceable(8) ptr @_ZN5PointaSERKS_(ptr nonnull align 4 dereferenceable(8) [[TMP7]], ptr nonnull align 4 dereferenceable(8) [[REF_TMP]])
 // COMP-NEXT:    ret void
 //
 //
@@ -2430,3 +2429,190 @@ void foo(int N, Point const *Points) {
 // COMP-NEXT:    store i32 0, ptr [[Y]], align 4
 // COMP-NEXT:    ret void
 //
+//
+// SIMD-ONLY-LABEL: define {{[^@]+}}@_Z3fooiPK5Point
+// SIMD-ONLY-SAME: (i32 [[N:%.*]], ptr [[POINTS:%.*]]) #[[ATTR0:[0-9]+]] {
+// SIMD-ONLY-NEXT:  entry:
+// SIMD-ONLY-NEXT:    [[N_ADDR:%.*]] = alloca i32, align 4
+// SIMD-ONLY-NEXT:    [[POINTS_ADDR:%.*]] = alloca ptr, align 8
+// SIMD-ONLY-NEXT:    [[RED:%.*]] = alloca [[STRUCT_POINT:%.*]], align 4
+// SIMD-ONLY-NEXT:    [[I:%.*]] = alloca i32, align 4
+// SIMD-ONLY-NEXT:    [[I1:%.*]] = alloca i32, align 4
+// SIMD-ONLY-NEXT:    [[I8:%.*]] = alloca i32, align 4
+// SIMD-ONLY-NEXT:    [[I15:%.*]] = alloca i32, align 4
+// SIMD-ONLY-NEXT:    [[I22:%.*]] = alloca i32, align 4
+// SIMD-ONLY-NEXT:    [[I29:%.*]] = alloca i32, align 4
+// SIMD-ONLY-NEXT:    [[I36:%.*]] = alloca i32, align 4
+// SIMD-ONLY-NEXT:    [[I43:%.*]] = alloca i32, align 4
+// SIMD-ONLY-NEXT:    store i32 [[N]], ptr [[N_ADDR]], align 4
+// SIMD-ONLY-NEXT:    store ptr [[POINTS]], ptr [[POINTS_ADDR]], align 8
+// SIMD-ONLY-NEXT:    call void @_ZN5PointC1Ev(ptr nonnull align 4 dereferenceable(8) [[RED]]) #[[ATTR3:[0-9]+]]
+// SIMD-ONLY-NEXT:    store i32 0, ptr [[I]], align 4
+// SIMD-ONLY-NEXT:    br label [[FOR_COND:%.*]]
+// SIMD-ONLY:       for.cond:
+// SIMD-ONLY-NEXT:    [[TMP0:%.*]] = load i32, ptr [[I]], align 4
+// SIMD-ONLY-NEXT:    [[TMP1:%.*]] = load i32, ptr [[N_ADDR]], align 4
+// SIMD-ONLY-NEXT:    [[CMP:%.*]] = icmp ult i32 [[TMP0]], [[TMP1]]
+// SIMD-ONLY-NEXT:    br i1 [[CMP]], label [[FOR_BODY:%.*]], label [[FOR_END:%.*]]
+// SIMD-ONLY:       for.body:
+// SIMD-ONLY-NEXT:    [[TMP2:%.*]] = load i32, ptr [[I]], align 4
+// SIMD-ONLY-NEXT:    [[TMP3:%.*]] = load ptr, ptr [[POINTS_ADDR]], align 8
+// SIMD-ONLY-NEXT:    call void @_Z4workR5PointiPKS_(ptr nonnull align 4 dereferenceable(8) [[RED]], i32 [[TMP2]], ptr [[TMP3]])
+// SIMD-ONLY-NEXT:    br label [[FOR_INC:%.*]]
+// SIMD-ONLY:       for.inc:
+// SIMD-ONLY-NEXT:    [[TMP4:%.*]] = load i32, ptr [[I]], align 4
+// SIMD-ONLY-NEXT:    [[INC:%.*]] = add i32 [[TMP4]], 1
+// SIMD-ONLY-NEXT:    store i32 [[INC]], ptr [[I]], align 4
+// SIMD-ONLY-NEXT:    br label [[FOR_COND]], !llvm.loop [[LOOP2:![0-9]+]]
+// SIMD-ONLY:       for.end:
+// SIMD-ONLY-NEXT:    store i32 0, ptr [[I1]], align 4
+// SIMD-ONLY-NEXT:    br label [[FOR_COND2:%.*]]
+// SIMD-ONLY:       for.cond2:
+// SIMD-ONLY-NEXT:    [[TMP5:%.*]] = load i32, ptr [[I1]], align 4
+// SIMD-ONLY-NEXT:    [[TMP6:%.*]] = load i32, ptr [[N_ADDR]], align 4
+// SIMD-ONLY-NEXT:    [[CMP3:%.*]] = icmp ult i32 [[TMP5]], [[TMP6]]
+// SIMD-ONLY-NEXT:    br i1 [[CMP3]], label [[FOR_BODY4:%.*]], label [[FOR_END7:%.*]]
+// SIMD-ONLY:       for.body4:
+// SIMD-ONLY-NEXT:    [[TMP7:%.*]] = load i32, ptr [[I1]], align 4
+// SIMD-ONLY-NEXT:    [[TMP8:%.*]] = load ptr, ptr [[POINTS_ADDR]], align 8
+// SIMD-ONLY-NEXT:    call void @_Z4workR5PointiPKS_(ptr nonnull align 4 dereferenceable(8) [[RED]], i32 [[TMP7]], ptr [[TMP8]])
+// SIMD-ONLY-NEXT:    br label [[FOR_INC5:%.*]]
+// SIMD-ONLY:       for.inc5:
+// SIMD-ONLY-NEXT:    [[TMP9:%.*]] = load i32, ptr [[I1]], align 4
+// SIMD-ONLY-NEXT:    [[INC6:%.*]] = add i32 [[TMP9]], 1
+// SIMD-ONLY-NEXT:    store i32 [[INC6]], ptr [[I1]], align 4
+// SIMD-ONLY-NEXT:    br label [[FOR_COND2]], !llvm.loop [[LOOP4:![0-9]+]]
+// SIMD-ONLY:       for.end7:
+// SIMD-ONLY-NEXT:    store i32 0, ptr [[I8]], align 4
+// SIMD-ONLY-NEXT:    br label [[FOR_COND9:%.*]]
+// SIMD-ONLY:       for.cond9:
+// SIMD-ONLY-NEXT:    [[TMP10:%.*]] = load i32, ptr [[I8]], align 4
+// SIMD-ONLY-NEXT:    [[TMP11:%.*]] = load i32, ptr [[N_ADDR]], align 4
+// SIMD-ONLY-NEXT:    [[CMP10:%.*]] = icmp ult i32 [[TMP10]], [[TMP11]]
+// SIMD-ONLY-NEXT:    br i1 [[CMP10]], label [[FOR_BODY11:%.*]], label [[FOR_END14:%.*]]
+// SIMD-ONLY:       for.body11:
+// SIMD-ONLY-NEXT:    [[TMP12:%.*]] = load i32, ptr [[I8]], align 4
+// SIMD-ONLY-NEXT:    [[TMP13:%.*]] = load ptr, ptr [[POINTS_ADDR]], align 8
+// SIMD-ONLY-NEXT:    call void @_Z4workR5PointiPKS_(ptr nonnull align 4 dereferenceable(8) [[RED]], i32 [[TMP12]], ptr [[TMP13]])
+// SIMD-ONLY-NEXT:    br label [[FOR_INC12:%.*]]
+// SIMD-ONLY:       for.inc12:
+// SIMD-ONLY-NEXT:    [[TMP14:%.*]] = load i32, ptr [[I8]], align 4
+// SIMD-ONLY-NEXT:    [[INC13:%.*]] = add i32 [[TMP14]], 1
+// SIMD-ONLY-NEXT:    store i32 [[INC13]], ptr [[I8]], align 4
+// SIMD-ONLY-NEXT:    br label [[FOR_COND9]], !llvm.loop [[LOOP5:![0-9]+]]
+// SIMD-ONLY:       for.end14:
+// SIMD-ONLY-NEXT:    store i32 0, ptr [[I15]], align 4
+// SIMD-ONLY-NEXT:    br label [[FOR_COND16:%.*]]
+// SIMD-ONLY:       for.cond16:
+// SIMD-ONLY-NEXT:    [[TMP15:%.*]] = load i32, ptr [[I15]], align 4
+// SIMD-ONLY-NEXT:    [[TMP16:%.*]] = load i32, ptr [[N_ADDR]], align 4
+// SIMD-ONLY-NEXT:    [[CMP17:%.*]] = icmp ult i32 [[TMP15]], [[TMP16]]
+// SIMD-ONLY-NEXT:    br i1 [[CMP17]], label [[FOR_BODY18:%.*]], label [[FOR_END21:%.*]]
+// SIMD-ONLY:       for.body18:
+// SIMD-ONLY-NEXT:    [[TMP17:%.*]] = load i32, ptr [[I15]], align 4
+// SIMD-ONLY-NEXT:    [[TMP18:%.*]] = load ptr, ptr [[POINTS_ADDR]], align 8
+// SIMD-ONLY-NEXT:    call void @_Z4workR5PointiPKS_(ptr nonnull align 4 dereferenceable(8) [[RED]], i32 [[TMP17]], ptr [[TMP18]])
+// SIMD-ONLY-NEXT:    br label [[FOR_INC19:%.*]]
+// SIMD-ONLY:       for.inc19:
+// SIMD-ONLY-NEXT:    [[TMP19:%.*]] = load i32, ptr [[I15]], align 4
+// SIMD-ONLY-NEXT:    [[INC20:%.*]] = add i32 [[TMP19]], 1
+// SIMD-ONLY-NEXT:    store i32 [[INC20]], ptr [[I15]], align 4
+// SIMD-ONLY-NEXT:    br label [[FOR_COND16]], !llvm.loop [[LOOP6:![0-9]+]]
+// SIMD-ONLY:       for.end21:
+// SIMD-ONLY-NEXT:    store i32 0, ptr [[I22]], align 4
+// SIMD-ONLY-NEXT:    br label [[FOR_COND23:%.*]]
+// SIMD-ONLY:       for.cond23:
+// SIMD-ONLY-NEXT:    [[TMP20:%.*]] = load i32, ptr [[I22]], align 4
+// SIMD-ONLY-NEXT:    [[TMP21:%.*]] = load i32, ptr [[N_ADDR]], align 4
+// SIMD-ONLY-NEXT:    [[CMP24:%.*]] = icmp ult i32 [[TMP20]], [[TMP21]]
+// SIMD-ONLY-NEXT:    br i1 [[CMP24]], label [[FOR_BODY25:%.*]], label [[FOR_END28:%.*]]
+// SIMD-ONLY:       for.body25:
+// SIMD-ONLY-NEXT:    [[TMP22:%.*]] = load i32, ptr [[I22]], align 4
+// SIMD-ONLY-NEXT:    [[TMP23:%.*]] = load ptr, ptr [[POINTS_ADDR]], align 8
+// SIMD-ONLY-NEXT:    call void @_Z4workR5PointiPKS_(ptr nonnull align 4 dereferenceable(8) [[RED]], i32 [[TMP22]], ptr [[TMP23]])
+// SIMD-ONLY-NEXT:    br label [[FOR_INC26:%.*]]
+// SIMD-ONLY:       for.inc26:
+// SIMD-ONLY-NEXT:    [[TMP24:%.*]] = load i32, ptr [[I22]], align 4
+// SIMD-ONLY-NEXT:    [[INC27:%.*]] = add i32 [[TMP24]], 1
+// SIMD-ONLY-NEXT:    store i32 [[INC27]], ptr [[I22]], align 4
+// SIMD-ONLY-NEXT:    br label [[FOR_COND23]], !llvm.loop [[LOOP7:![0-9]+]]
+// SIMD-ONLY:       for.end28:
+// SIMD-ONLY-NEXT:    store i32 0, ptr [[I29]], align 4
+// SIMD-ONLY-NEXT:    br label [[FOR_COND30:%.*]]
+// SIMD-ONLY:       for.cond30:
+// SIMD-ONLY-NEXT:    [[TMP25:%.*]] = load i32, ptr [[I29]], align 4
+// SIMD-ONLY-NEXT:    [[TMP26:%.*]] = load i32, ptr [[N_ADDR]], align 4
+// SIMD-ONLY-NEXT:    [[CMP31:%.*]] = icmp ult i32 [[TMP25]], [[TMP26]]
+// SIMD-ONLY-NEXT:    br i1 [[CMP31]], label [[FOR_BODY32:%.*]], label [[FOR_END35:%.*]]
+// SIMD-ONLY:       for.body32:
+// SIMD-ONLY-NEXT:    [[TMP27:%.*]] = load i32, ptr [[I29]], align 4
+// SIMD-ONLY-NEXT:    [[TMP28:%.*]] = load ptr, ptr [[POINTS_ADDR]], align 8
+// SIMD-ONLY-NEXT:    call void @_Z4workR5PointiPKS_(ptr nonnull align 4 dereferenceable(8) [[RED]], i32 [[TMP27]], ptr [[TMP28]])
+// SIMD-ONLY-NEXT:    br label [[FOR_INC33:%.*]]
+// SIMD-ONLY:       for.inc33:
+// SIMD-ONLY-NEXT:    [[TMP29:%.*]] = load i32, ptr [[I29]], align 4
+// SIMD-ONLY-NEXT:    [[INC34:%.*]] = add i32 [[TMP29]], 1
+// SIMD-ONLY-NEXT:    store i32 [[INC34]], ptr [[I29]], align 4
+// SIMD-ONLY-NEXT:    br label [[FOR_COND30]], !llvm.loop [[LOOP8:![0-9]+]]
+// SIMD-ONLY:       for.end35:
+// SIMD-ONLY-NEXT:    store i32 0, ptr [[I36]], align 4
+// SIMD-ONLY-NEXT:    br label [[FOR_COND37:%.*]]
+// SIMD-ONLY:       for.cond37:
+// SIMD-ONLY-NEXT:    [[TMP30:%.*]] = load i32, ptr [[I36]], align 4
+// SIMD-ONLY-NEXT:    [[TMP31:%.*]] = load i32, ptr [[N_ADDR]], align 4
+// SIMD-ONLY-NEXT:    [[CMP38:%.*]] = icmp ult i32 [[TMP30]], [[TMP31]]
+// SIMD-ONLY-NEXT:    br i1 [[CMP38]], label [[FOR_BODY39:%.*]], label [[FOR_END42:%.*]]
+// SIMD-ONLY:       for.body39:
+// SIMD-ONLY-NEXT:    [[TMP32:%.*]] = load i32, ptr [[I36]], align 4
+// SIMD-ONLY-NEXT:    [[TMP33:%.*]] = load ptr, ptr [[POINTS_ADDR]], align 8
+// SIMD-ONLY-NEXT:    call void @_Z4workR5PointiPKS_(ptr nonnull align 4 dereferenceable(8) [[RED]], i32 [[TMP32]], ptr [[TMP33]])
+// SIMD-ONLY-NEXT:    br label [[FOR_INC40:%.*]]
+// SIMD-ONLY:       for.inc40:
+// SIMD-ONLY-NEXT:    [[TMP34:%.*]] = load i32, ptr [[I36]], align 4
+// SIMD-ONLY-NEXT:    [[INC41:%.*]] = add i32 [[TMP34]], 1
+// SIMD-ONLY-NEXT:    store i32 [[INC41]], ptr [[I36]], align 4
+// SIMD-ONLY-NEXT:    br label [[FOR_COND37]], !llvm.loop [[LOOP9:![0-9]+]]
+// SIMD-ONLY:       for.end42:
+// SIMD-ONLY-NEXT:    store i32 0, ptr [[I43]], align 4
+// SIMD-ONLY-NEXT:    br label [[FOR_COND44:%.*]]
+// SIMD-ONLY:       for.cond44:
+// SIMD-ONLY-NEXT:    [[TMP35:%.*]] = load i32, ptr [[I43]], align 4
+// SIMD-ONLY-NEXT:    [[TMP36:%.*]] = load i32, ptr [[N_ADDR]], align 4
+// SIMD-ONLY-NEXT:    [[CMP45:%.*]] = icmp ult i32 [[TMP35]], [[TMP36]]
+// SIMD-ONLY-NEXT:    br i1 [[CMP45]], label [[FOR_BODY46:%.*]], label [[FOR_END49:%.*]]
+// SIMD-ONLY:       for.body46:
+// SIMD-ONLY-NEXT:    [[TMP37:%.*]] = load i32, ptr [[I43]], align 4
+// SIMD-ONLY-NEXT:    [[TMP38:%.*]] = load ptr, ptr [[POINTS_ADDR]], align 8
+// SIMD-ONLY-NEXT:    call void @_Z4workR5PointiPKS_(ptr nonnull align 4 dereferenceable(8) [[RED]], i32 [[TMP37]], ptr [[TMP38]])
+// SIMD-ONLY-NEXT:    br label [[FOR_INC47:%.*]]
+// SIMD-ONLY:       for.inc47:
+// SIMD-ONLY-NEXT:    [[TMP39:%.*]] = load i32, ptr [[I43]], align 4
+// SIMD-ONLY-NEXT:    [[INC48:%.*]] = add i32 [[TMP39]], 1
+// SIMD-ONLY-NEXT:    store i32 [[INC48]], ptr [[I43]], align 4
+// SIMD-ONLY-NEXT:    br label [[FOR_COND44]], !llvm.loop [[LOOP10:![0-9]+]]
+// SIMD-ONLY:       for.end49:
+// SIMD-ONLY-NEXT:    ret void
+//
+//
+// SIMD-ONLY-LABEL: define {{[^@]+}}@_ZN5PointC1Ev
+// SIMD-ONLY-SAME: (ptr nonnull align 4 dereferenceable(8) [[THIS:%.*]]) unnamed_addr #[[ATTR1:[0-9]+]] comdat align 2 {
+// SIMD-ONLY-NEXT:  entry:
+// SIMD-ONLY-NEXT:    [[THIS_ADDR:%.*]] = alloca ptr, align 8
+// SIMD-ONLY-NEXT:    store ptr [[THIS]], ptr [[THIS_ADDR]], align 8
+// SIMD-ONLY-NEXT:    [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8
+// SIMD-ONLY-NEXT:    call void @_ZN5PointC2Ev(ptr nonnull align 4 dereferenceable(8) [[THIS1]]) #[[ATTR3]]
+// SIMD-ONLY-NEXT:    ret void
+//
+//
+// SIMD-ONLY-LABEL: define {{[^@]+}}@_ZN5PointC2Ev
+// SIMD-ONLY-SAME: (ptr nonnull align 4 dereferenceable(8) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 {
+// SIMD-ONLY-NEXT:  entry:
+// SIMD-ONLY-NEXT:    [[THIS_ADDR:%.*]] = alloca ptr, align 8
+// SIMD-ONLY-NEXT:    store ptr [[THIS]], ptr [[THIS_ADDR]], align 8
+// SIMD-ONLY-NEXT:    [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8
+// SIMD-ONLY-NEXT:    [[X:%.*]] = getelementptr inbounds [[STRUCT_POINT:%.*]], ptr [[THIS1]], i32 0, i32 0
+// SIMD-ONLY-NEXT:    store i32 0, ptr [[X]], align 4
+// SIMD-ONLY-NEXT:    [[Y:%.*]] = getelementptr inbounds [[STRUCT_POINT]], ptr [[THIS1]], i32 0, i32 1
+// SIMD-ONLY-NEXT:    store i32 0, ptr [[Y]], align 4
+// SIMD-ONLY-NEXT:    ret void
+//

diff  --git a/clang/test/OpenMP/reduction_implicit_map.cpp b/clang/test/OpenMP/reduction_implicit_map.cpp
index 610b9e737ca93..d190250b04c73 100644
--- a/clang/test/OpenMP/reduction_implicit_map.cpp
+++ b/clang/test/OpenMP/reduction_implicit_map.cpp
@@ -293,10 +293,12 @@ int main()
 // CHECK1-NEXT:    [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [1 x ptr], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [1 x ptr], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [1 x ptr], align 8
+// CHECK1-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK1-NEXT:    [[B:%.*]] = alloca [10 x [10 x [10 x double]]], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_BASEPTRS1:%.*]] = alloca [1 x ptr], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_PTRS2:%.*]] = alloca [1 x ptr], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_MAPPERS3:%.*]] = alloca [1 x ptr], align 8
+// CHECK1-NEXT:    [[KERNEL_ARGS4:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK1-NEXT:    [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [5 x %class.S2], ptr [[O]], i32 0, i32 0
 // CHECK1-NEXT:    [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[CLASS_S2:%.*]], ptr [[ARRAY_BEGIN]], i64 5
 // CHECK1-NEXT:    br label [[ARRAYCTOR_LOOP:%.*]]
@@ -316,7 +318,6 @@ int main()
 // CHECK1-NEXT:    store ptr null, ptr [[TMP2]], align 8
 // CHECK1-NEXT:    [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK1-NEXT:    [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK1-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK1-NEXT:    [[TMP5:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK1-NEXT:    store i32 2, ptr [[TMP5]], align 4
 // CHECK1-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -358,7 +359,6 @@ int main()
 // CHECK1-NEXT:    store ptr null, ptr [[TMP22]], align 8
 // CHECK1-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS1]], i32 0, i32 0
 // CHECK1-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS2]], i32 0, i32 0
-// CHECK1-NEXT:    [[KERNEL_ARGS4:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK1-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 0
 // CHECK1-NEXT:    store i32 2, ptr [[TMP25]], align 4
 // CHECK1-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 1
@@ -825,6 +825,7 @@ int main()
 // CHECK2-NEXT:    [[TMP:%.*]] = alloca i32, align 4
 // CHECK2-NEXT:    [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
 // CHECK2-NEXT:    [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK2-NEXT:    [[SIZE_CASTED4:%.*]] = alloca i32, align 4
 // CHECK2-NEXT:    [[DOTOFFLOAD_BASEPTRS7:%.*]] = alloca [3 x ptr], align 4
 // CHECK2-NEXT:    [[DOTOFFLOAD_PTRS8:%.*]] = alloca [3 x ptr], align 4
@@ -833,15 +834,18 @@ int main()
 // CHECK2-NEXT:    [[_TMP11:%.*]] = alloca i32, align 4
 // CHECK2-NEXT:    [[DOTCAPTURE_EXPR_12:%.*]] = alloca i32, align 4
 // CHECK2-NEXT:    [[DOTCAPTURE_EXPR_13:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    [[KERNEL_ARGS18:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK2-NEXT:    [[A:%.*]] = alloca [10 x i32], align 4
 // CHECK2-NEXT:    [[SIZE_CASTED21:%.*]] = alloca i32, align 4
 // CHECK2-NEXT:    [[DOTOFFLOAD_BASEPTRS23:%.*]] = alloca [2 x ptr], align 4
 // CHECK2-NEXT:    [[DOTOFFLOAD_PTRS24:%.*]] = alloca [2 x ptr], align 4
 // CHECK2-NEXT:    [[DOTOFFLOAD_MAPPERS25:%.*]] = alloca [2 x ptr], align 4
+// CHECK2-NEXT:    [[KERNEL_ARGS26:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK2-NEXT:    [[SIZE_CASTED29:%.*]] = alloca i32, align 4
 // CHECK2-NEXT:    [[DOTOFFLOAD_BASEPTRS31:%.*]] = alloca [2 x ptr], align 4
 // CHECK2-NEXT:    [[DOTOFFLOAD_PTRS32:%.*]] = alloca [2 x ptr], align 4
 // CHECK2-NEXT:    [[DOTOFFLOAD_MAPPERS33:%.*]] = alloca [2 x ptr], align 4
+// CHECK2-NEXT:    [[KERNEL_ARGS34:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK2-NEXT:    store ptr [[INPUT]], ptr [[INPUT_ADDR]], align 4
 // CHECK2-NEXT:    store i32 [[SIZE]], ptr [[SIZE_ADDR]], align 4
 // CHECK2-NEXT:    store ptr [[OUTPUT]], ptr [[OUTPUT_ADDR]], align 4
@@ -893,7 +897,6 @@ int main()
 // CHECK2-NEXT:    [[TMP26:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4
 // CHECK2-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP26]], 1
 // CHECK2-NEXT:    [[TMP27:%.*]] = zext i32 [[ADD]] to i64
-// CHECK2-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK2-NEXT:    [[TMP28:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK2-NEXT:    store i32 2, ptr [[TMP28]], align 4
 // CHECK2-NEXT:    [[TMP29:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -975,7 +978,6 @@ int main()
 // CHECK2-NEXT:    [[TMP69:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_13]], align 4
 // CHECK2-NEXT:    [[ADD17:%.*]] = add nsw i32 [[TMP69]], 1
 // CHECK2-NEXT:    [[TMP70:%.*]] = zext i32 [[ADD17]] to i64
-// CHECK2-NEXT:    [[KERNEL_ARGS18:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK2-NEXT:    [[TMP71:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS18]], i32 0, i32 0
 // CHECK2-NEXT:    store i32 2, ptr [[TMP71]], align 4
 // CHECK2-NEXT:    [[TMP72:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS18]], i32 0, i32 1
@@ -1027,7 +1029,6 @@ int main()
 // CHECK2-NEXT:    store ptr null, ptr [[TMP93]], align 4
 // CHECK2-NEXT:    [[TMP94:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS23]], i32 0, i32 0
 // CHECK2-NEXT:    [[TMP95:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS24]], i32 0, i32 0
-// CHECK2-NEXT:    [[KERNEL_ARGS26:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK2-NEXT:    [[TMP96:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS26]], i32 0, i32 0
 // CHECK2-NEXT:    store i32 2, ptr [[TMP96]], align 4
 // CHECK2-NEXT:    [[TMP97:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS26]], i32 0, i32 1
@@ -1079,7 +1080,6 @@ int main()
 // CHECK2-NEXT:    store ptr null, ptr [[TMP118]], align 4
 // CHECK2-NEXT:    [[TMP119:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS31]], i32 0, i32 0
 // CHECK2-NEXT:    [[TMP120:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS32]], i32 0, i32 0
-// CHECK2-NEXT:    [[KERNEL_ARGS34:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK2-NEXT:    [[TMP121:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS34]], i32 0, i32 0
 // CHECK2-NEXT:    store i32 2, ptr [[TMP121]], align 4
 // CHECK2-NEXT:    [[TMP122:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS34]], i32 0, i32 1

diff  --git a/clang/test/OpenMP/target_codegen_global_capture.cpp b/clang/test/OpenMP/target_codegen_global_capture.cpp
index 4c9556c5aa908..858ac559ccc2e 100644
--- a/clang/test/OpenMP/target_codegen_global_capture.cpp
+++ b/clang/test/OpenMP/target_codegen_global_capture.cpp
@@ -185,6 +185,7 @@ int tbar2(short a, short b, short c, short d){
 // CHECK1-NEXT:    [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [9 x ptr], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [9 x ptr], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [9 x ptr], align 8
+// CHECK1-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK1-NEXT:    store i16 [[A]], ptr [[A_ADDR]], align 2
 // CHECK1-NEXT:    store i16 [[B]], ptr [[B_ADDR]], align 2
 // CHECK1-NEXT:    store i16 [[C]], ptr [[C_ADDR]], align 2
@@ -286,7 +287,6 @@ int tbar2(short a, short b, short c, short d){
 // CHECK1-NEXT:    store ptr null, ptr [[TMP47]], align 8
 // CHECK1-NEXT:    [[TMP48:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK1-NEXT:    [[TMP49:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK1-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK1-NEXT:    [[TMP50:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK1-NEXT:    store i32 2, ptr [[TMP50]], align 4
 // CHECK1-NEXT:    [[TMP51:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -506,6 +506,7 @@ int tbar2(short a, short b, short c, short d){
 // CHECK1-NEXT:    [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [9 x ptr], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [9 x ptr], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [9 x ptr], align 8
+// CHECK1-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK1-NEXT:    store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
 // CHECK1-NEXT:    store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
 // CHECK1-NEXT:    store ptr [[A]], ptr [[A_ADDR]], align 8
@@ -613,7 +614,6 @@ int tbar2(short a, short b, short c, short d){
 // CHECK1-NEXT:    store ptr null, ptr [[TMP51]], align 8
 // CHECK1-NEXT:    [[TMP52:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK1-NEXT:    [[TMP53:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK1-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK1-NEXT:    [[TMP54:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK1-NEXT:    store i32 2, ptr [[TMP54]], align 4
 // CHECK1-NEXT:    [[TMP55:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -829,6 +829,7 @@ int tbar2(short a, short b, short c, short d){
 // CHECK1-NEXT:    [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [9 x ptr], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [9 x ptr], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [9 x ptr], align 8
+// CHECK1-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK1-NEXT:    store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
 // CHECK1-NEXT:    store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
 // CHECK1-NEXT:    store ptr [[A]], ptr [[A_ADDR]], align 8
@@ -936,7 +937,6 @@ int tbar2(short a, short b, short c, short d){
 // CHECK1-NEXT:    store ptr null, ptr [[TMP51]], align 8
 // CHECK1-NEXT:    [[TMP52:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK1-NEXT:    [[TMP53:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK1-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK1-NEXT:    [[TMP54:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK1-NEXT:    store i32 2, ptr [[TMP54]], align 4
 // CHECK1-NEXT:    [[TMP55:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -1097,6 +1097,7 @@ int tbar2(short a, short b, short c, short d){
 // CHECK3-NEXT:    [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [9 x ptr], align 4
 // CHECK3-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [9 x ptr], align 4
 // CHECK3-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [9 x ptr], align 4
+// CHECK3-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK3-NEXT:    store i16 [[A]], ptr [[A_ADDR]], align 2
 // CHECK3-NEXT:    store i16 [[B]], ptr [[B_ADDR]], align 2
 // CHECK3-NEXT:    store i16 [[C]], ptr [[C_ADDR]], align 2
@@ -1189,7 +1190,6 @@ int tbar2(short a, short b, short c, short d){
 // CHECK3-NEXT:    store ptr null, ptr [[TMP41]], align 4
 // CHECK3-NEXT:    [[TMP42:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK3-NEXT:    [[TMP43:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK3-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK3-NEXT:    [[TMP44:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK3-NEXT:    store i32 2, ptr [[TMP44]], align 4
 // CHECK3-NEXT:    [[TMP45:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -1418,6 +1418,7 @@ int tbar2(short a, short b, short c, short d){
 // CHECK3-NEXT:    [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [9 x ptr], align 4
 // CHECK3-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [9 x ptr], align 4
 // CHECK3-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [9 x ptr], align 4
+// CHECK3-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK3-NEXT:    store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
 // CHECK3-NEXT:    store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
 // CHECK3-NEXT:    store ptr [[A]], ptr [[A_ADDR]], align 4
@@ -1516,7 +1517,6 @@ int tbar2(short a, short b, short c, short d){
 // CHECK3-NEXT:    store ptr null, ptr [[TMP45]], align 4
 // CHECK3-NEXT:    [[TMP46:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK3-NEXT:    [[TMP47:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK3-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK3-NEXT:    [[TMP48:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK3-NEXT:    store i32 2, ptr [[TMP48]], align 4
 // CHECK3-NEXT:    [[TMP49:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -1741,6 +1741,7 @@ int tbar2(short a, short b, short c, short d){
 // CHECK3-NEXT:    [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [9 x ptr], align 4
 // CHECK3-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [9 x ptr], align 4
 // CHECK3-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [9 x ptr], align 4
+// CHECK3-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK3-NEXT:    store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
 // CHECK3-NEXT:    store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
 // CHECK3-NEXT:    store ptr [[A]], ptr [[A_ADDR]], align 4
@@ -1839,7 +1840,6 @@ int tbar2(short a, short b, short c, short d){
 // CHECK3-NEXT:    store ptr null, ptr [[TMP45]], align 4
 // CHECK3-NEXT:    [[TMP46:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK3-NEXT:    [[TMP47:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK3-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK3-NEXT:    [[TMP48:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK3-NEXT:    store i32 2, ptr [[TMP48]], align 4
 // CHECK3-NEXT:    [[TMP49:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1

diff  --git a/clang/test/OpenMP/target_has_device_addr_codegen.cpp b/clang/test/OpenMP/target_has_device_addr_codegen.cpp
index c1111b31602e5..ef6f0e79516f3 100644
--- a/clang/test/OpenMP/target_has_device_addr_codegen.cpp
+++ b/clang/test/OpenMP/target_has_device_addr_codegen.cpp
@@ -286,23 +286,29 @@ void use_template() {
 // CHECK-NEXT:    [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [1 x ptr], align 8
 // CHECK-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [1 x ptr], align 8
 // CHECK-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [1 x ptr], align 8
+// CHECK-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK-NEXT:    [[TMP:%.*]] = alloca ptr, align 8
 // CHECK-NEXT:    [[DOTOFFLOAD_BASEPTRS1:%.*]] = alloca [1 x ptr], align 8
 // CHECK-NEXT:    [[DOTOFFLOAD_PTRS2:%.*]] = alloca [1 x ptr], align 8
 // CHECK-NEXT:    [[DOTOFFLOAD_MAPPERS3:%.*]] = alloca [1 x ptr], align 8
+// CHECK-NEXT:    [[KERNEL_ARGS4:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK-NEXT:    [[DOTOFFLOAD_BASEPTRS7:%.*]] = alloca [1 x ptr], align 8
 // CHECK-NEXT:    [[DOTOFFLOAD_PTRS8:%.*]] = alloca [1 x ptr], align 8
 // CHECK-NEXT:    [[DOTOFFLOAD_MAPPERS9:%.*]] = alloca [1 x ptr], align 8
+// CHECK-NEXT:    [[KERNEL_ARGS10:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK-NEXT:    [[_TMP13:%.*]] = alloca ptr, align 8
 // CHECK-NEXT:    [[DOTOFFLOAD_BASEPTRS14:%.*]] = alloca [1 x ptr], align 8
 // CHECK-NEXT:    [[DOTOFFLOAD_PTRS15:%.*]] = alloca [1 x ptr], align 8
 // CHECK-NEXT:    [[DOTOFFLOAD_MAPPERS16:%.*]] = alloca [1 x ptr], align 8
+// CHECK-NEXT:    [[KERNEL_ARGS17:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK-NEXT:    [[DOTOFFLOAD_BASEPTRS20:%.*]] = alloca [1 x ptr], align 8
 // CHECK-NEXT:    [[DOTOFFLOAD_PTRS21:%.*]] = alloca [1 x ptr], align 8
 // CHECK-NEXT:    [[DOTOFFLOAD_MAPPERS22:%.*]] = alloca [1 x ptr], align 8
+// CHECK-NEXT:    [[KERNEL_ARGS23:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK-NEXT:    [[DOTOFFLOAD_BASEPTRS26:%.*]] = alloca [1 x ptr], align 8
 // CHECK-NEXT:    [[DOTOFFLOAD_PTRS27:%.*]] = alloca [1 x ptr], align 8
 // CHECK-NEXT:    [[DOTOFFLOAD_MAPPERS28:%.*]] = alloca [1 x ptr], align 8
+// CHECK-NEXT:    [[KERNEL_ARGS29:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK-NEXT:    store i32 0, ptr [[RETVAL]], align 4
 // CHECK-NEXT:    store i32 [[ARGC]], ptr [[ARGC_ADDR]], align 4
 // CHECK-NEXT:    store ptr [[ARGV]], ptr [[ARGV_ADDR]], align 8
@@ -321,7 +327,6 @@ void use_template() {
 // CHECK-NEXT:    store ptr null, ptr [[TMP3]], align 8
 // CHECK-NEXT:    [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK-NEXT:    store i32 2, ptr [[TMP6]], align 4
 // CHECK-NEXT:    [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -366,7 +371,6 @@ void use_template() {
 // CHECK-NEXT:    store ptr null, ptr [[TMP25]], align 8
 // CHECK-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS1]], i32 0, i32 0
 // CHECK-NEXT:    [[TMP27:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS2]], i32 0, i32 0
-// CHECK-NEXT:    [[KERNEL_ARGS4:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK-NEXT:    [[TMP28:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 0
 // CHECK-NEXT:    store i32 2, ptr [[TMP28]], align 4
 // CHECK-NEXT:    [[TMP29:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 1
@@ -408,7 +412,6 @@ void use_template() {
 // CHECK-NEXT:    store ptr null, ptr [[TMP45]], align 8
 // CHECK-NEXT:    [[TMP46:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS7]], i32 0, i32 0
 // CHECK-NEXT:    [[TMP47:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS8]], i32 0, i32 0
-// CHECK-NEXT:    [[KERNEL_ARGS10:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK-NEXT:    [[TMP48:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 0
 // CHECK-NEXT:    store i32 2, ptr [[TMP48]], align 4
 // CHECK-NEXT:    [[TMP49:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 1
@@ -453,7 +456,6 @@ void use_template() {
 // CHECK-NEXT:    store ptr null, ptr [[TMP67]], align 8
 // CHECK-NEXT:    [[TMP68:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS14]], i32 0, i32 0
 // CHECK-NEXT:    [[TMP69:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS15]], i32 0, i32 0
-// CHECK-NEXT:    [[KERNEL_ARGS17:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK-NEXT:    [[TMP70:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS17]], i32 0, i32 0
 // CHECK-NEXT:    store i32 2, ptr [[TMP70]], align 4
 // CHECK-NEXT:    [[TMP71:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS17]], i32 0, i32 1
@@ -495,7 +497,6 @@ void use_template() {
 // CHECK-NEXT:    store ptr null, ptr [[TMP87]], align 8
 // CHECK-NEXT:    [[TMP88:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS20]], i32 0, i32 0
 // CHECK-NEXT:    [[TMP89:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS21]], i32 0, i32 0
-// CHECK-NEXT:    [[KERNEL_ARGS23:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK-NEXT:    [[TMP90:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS23]], i32 0, i32 0
 // CHECK-NEXT:    store i32 2, ptr [[TMP90]], align 4
 // CHECK-NEXT:    [[TMP91:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS23]], i32 0, i32 1
@@ -537,7 +538,6 @@ void use_template() {
 // CHECK-NEXT:    store ptr null, ptr [[TMP107]], align 8
 // CHECK-NEXT:    [[TMP108:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS26]], i32 0, i32 0
 // CHECK-NEXT:    [[TMP109:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS27]], i32 0, i32 0
-// CHECK-NEXT:    [[KERNEL_ARGS29:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK-NEXT:    [[TMP110:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS29]], i32 0, i32 0
 // CHECK-NEXT:    store i32 2, ptr [[TMP110]], align 4
 // CHECK-NEXT:    [[TMP111:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS29]], i32 0, i32 1
@@ -675,16 +675,20 @@ void use_template() {
 // CHECK-NEXT:    [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [1 x ptr], align 8
 // CHECK-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [1 x ptr], align 8
 // CHECK-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [1 x ptr], align 8
+// CHECK-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK-NEXT:    [[TMP:%.*]] = alloca ptr, align 8
 // CHECK-NEXT:    [[DOTOFFLOAD_BASEPTRS1:%.*]] = alloca [1 x ptr], align 8
 // CHECK-NEXT:    [[DOTOFFLOAD_PTRS2:%.*]] = alloca [1 x ptr], align 8
 // CHECK-NEXT:    [[DOTOFFLOAD_MAPPERS3:%.*]] = alloca [1 x ptr], align 8
+// CHECK-NEXT:    [[KERNEL_ARGS4:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK-NEXT:    [[DOTOFFLOAD_BASEPTRS7:%.*]] = alloca [1 x ptr], align 8
 // CHECK-NEXT:    [[DOTOFFLOAD_PTRS8:%.*]] = alloca [1 x ptr], align 8
 // CHECK-NEXT:    [[DOTOFFLOAD_MAPPERS9:%.*]] = alloca [1 x ptr], align 8
+// CHECK-NEXT:    [[KERNEL_ARGS10:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK-NEXT:    [[DOTOFFLOAD_BASEPTRS13:%.*]] = alloca [1 x ptr], align 8
 // CHECK-NEXT:    [[DOTOFFLOAD_PTRS14:%.*]] = alloca [1 x ptr], align 8
 // CHECK-NEXT:    [[DOTOFFLOAD_MAPPERS15:%.*]] = alloca [1 x ptr], align 8
+// CHECK-NEXT:    [[KERNEL_ARGS16:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK-NEXT:    store i32 [[ARGC]], ptr [[ARGC_ADDR]], align 4
 // CHECK-NEXT:    call void @llvm.memset.p0.i64(ptr align 4 [[DA]], i8 0, i64 20, i1 false)
 // CHECK-NEXT:    store ptr [[H]], ptr [[RH]], align 8
@@ -700,7 +704,6 @@ void use_template() {
 // CHECK-NEXT:    store ptr null, ptr [[TMP3]], align 8
 // CHECK-NEXT:    [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK-NEXT:    store i32 2, ptr [[TMP6]], align 4
 // CHECK-NEXT:    [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -745,7 +748,6 @@ void use_template() {
 // CHECK-NEXT:    store ptr null, ptr [[TMP25]], align 8
 // CHECK-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS1]], i32 0, i32 0
 // CHECK-NEXT:    [[TMP27:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS2]], i32 0, i32 0
-// CHECK-NEXT:    [[KERNEL_ARGS4:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK-NEXT:    [[TMP28:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 0
 // CHECK-NEXT:    store i32 2, ptr [[TMP28]], align 4
 // CHECK-NEXT:    [[TMP29:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 1
@@ -787,7 +789,6 @@ void use_template() {
 // CHECK-NEXT:    store ptr null, ptr [[TMP45]], align 8
 // CHECK-NEXT:    [[TMP46:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS7]], i32 0, i32 0
 // CHECK-NEXT:    [[TMP47:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS8]], i32 0, i32 0
-// CHECK-NEXT:    [[KERNEL_ARGS10:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK-NEXT:    [[TMP48:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 0
 // CHECK-NEXT:    store i32 2, ptr [[TMP48]], align 4
 // CHECK-NEXT:    [[TMP49:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 1
@@ -829,7 +830,6 @@ void use_template() {
 // CHECK-NEXT:    store ptr null, ptr [[TMP65]], align 8
 // CHECK-NEXT:    [[TMP66:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS13]], i32 0, i32 0
 // CHECK-NEXT:    [[TMP67:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS14]], i32 0, i32 0
-// CHECK-NEXT:    [[KERNEL_ARGS16:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK-NEXT:    [[TMP68:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS16]], i32 0, i32 0
 // CHECK-NEXT:    store i32 2, ptr [[TMP68]], align 4
 // CHECK-NEXT:    [[TMP69:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS16]], i32 0, i32 1
@@ -881,16 +881,20 @@ void use_template() {
 // CHECK-NEXT:    [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [1 x ptr], align 8
 // CHECK-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [1 x ptr], align 8
 // CHECK-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [1 x ptr], align 8
+// CHECK-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK-NEXT:    [[TMP:%.*]] = alloca ptr, align 8
 // CHECK-NEXT:    [[DOTOFFLOAD_BASEPTRS1:%.*]] = alloca [1 x ptr], align 8
 // CHECK-NEXT:    [[DOTOFFLOAD_PTRS2:%.*]] = alloca [1 x ptr], align 8
 // CHECK-NEXT:    [[DOTOFFLOAD_MAPPERS3:%.*]] = alloca [1 x ptr], align 8
+// CHECK-NEXT:    [[KERNEL_ARGS4:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK-NEXT:    [[DOTOFFLOAD_BASEPTRS7:%.*]] = alloca [1 x ptr], align 8
 // CHECK-NEXT:    [[DOTOFFLOAD_PTRS8:%.*]] = alloca [1 x ptr], align 8
 // CHECK-NEXT:    [[DOTOFFLOAD_MAPPERS9:%.*]] = alloca [1 x ptr], align 8
+// CHECK-NEXT:    [[KERNEL_ARGS10:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK-NEXT:    [[DOTOFFLOAD_BASEPTRS13:%.*]] = alloca [1 x ptr], align 8
 // CHECK-NEXT:    [[DOTOFFLOAD_PTRS14:%.*]] = alloca [1 x ptr], align 8
 // CHECK-NEXT:    [[DOTOFFLOAD_MAPPERS15:%.*]] = alloca [1 x ptr], align 8
+// CHECK-NEXT:    [[KERNEL_ARGS16:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK-NEXT:    store ptr [[ARGC]], ptr [[ARGC_ADDR]], align 8
 // CHECK-NEXT:    call void @llvm.memset.p0.i64(ptr align 8 [[DA]], i8 0, i64 40, i1 false)
 // CHECK-NEXT:    store ptr [[H]], ptr [[RH]], align 8
@@ -906,7 +910,6 @@ void use_template() {
 // CHECK-NEXT:    store ptr null, ptr [[TMP3]], align 8
 // CHECK-NEXT:    [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK-NEXT:    store i32 2, ptr [[TMP6]], align 4
 // CHECK-NEXT:    [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -951,7 +954,6 @@ void use_template() {
 // CHECK-NEXT:    store ptr null, ptr [[TMP25]], align 8
 // CHECK-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS1]], i32 0, i32 0
 // CHECK-NEXT:    [[TMP27:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS2]], i32 0, i32 0
-// CHECK-NEXT:    [[KERNEL_ARGS4:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK-NEXT:    [[TMP28:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 0
 // CHECK-NEXT:    store i32 2, ptr [[TMP28]], align 4
 // CHECK-NEXT:    [[TMP29:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 1
@@ -993,7 +995,6 @@ void use_template() {
 // CHECK-NEXT:    store ptr null, ptr [[TMP45]], align 8
 // CHECK-NEXT:    [[TMP46:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS7]], i32 0, i32 0
 // CHECK-NEXT:    [[TMP47:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS8]], i32 0, i32 0
-// CHECK-NEXT:    [[KERNEL_ARGS10:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK-NEXT:    [[TMP48:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 0
 // CHECK-NEXT:    store i32 2, ptr [[TMP48]], align 4
 // CHECK-NEXT:    [[TMP49:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 1
@@ -1035,7 +1036,6 @@ void use_template() {
 // CHECK-NEXT:    store ptr null, ptr [[TMP65]], align 8
 // CHECK-NEXT:    [[TMP66:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS13]], i32 0, i32 0
 // CHECK-NEXT:    [[TMP67:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS14]], i32 0, i32 0
-// CHECK-NEXT:    [[KERNEL_ARGS16:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK-NEXT:    [[TMP68:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS16]], i32 0, i32 0
 // CHECK-NEXT:    store i32 2, ptr [[TMP68]], align 4
 // CHECK-NEXT:    [[TMP69:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS16]], i32 0, i32 1
@@ -1199,6 +1199,7 @@ void use_template() {
 // CHECK-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [3 x ptr], align 8
 // CHECK-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [3 x ptr], align 8
 // CHECK-NEXT:    [[DOTOFFLOAD_SIZES:%.*]] = alloca [3 x i64], align 8
+// CHECK-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK-NEXT:    store ptr [[THIS]], ptr [[THIS_ADDR]], align 8
 // CHECK-NEXT:    [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8
 // CHECK-NEXT:    [[TARGETDEV:%.*]] = getelementptr inbounds [[STRUCT_SOMEKERNEL:%.*]], ptr [[THIS1]], i32 0, i32 0
@@ -1237,7 +1238,6 @@ void use_template() {
 // CHECK-NEXT:    [[TMP18:%.*]] = getelementptr inbounds [3 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 0
 // CHECK-NEXT:    [[TMP19:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
 // CHECK-NEXT:    [[TMP20:%.*]] = sext i32 [[TMP19]] to i64
-// CHECK-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK-NEXT:    store i32 2, ptr [[TMP21]], align 4
 // CHECK-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1

diff  --git a/clang/test/OpenMP/target_has_device_addr_codegen_01.cpp b/clang/test/OpenMP/target_has_device_addr_codegen_01.cpp
index 25c60fa688e74..4a5a385869df4 100644
--- a/clang/test/OpenMP/target_has_device_addr_codegen_01.cpp
+++ b/clang/test/OpenMP/target_has_device_addr_codegen_01.cpp
@@ -45,6 +45,7 @@ int main() {
 // CHECK-NEXT:    [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [6 x ptr], align 8
 // CHECK-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [6 x ptr], align 8
 // CHECK-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [6 x ptr], align 8
+// CHECK-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK-NEXT:    store i32 0, ptr [[RETVAL]], align 4
 // CHECK-NEXT:    store float 0.000000e+00, ptr [[A]], align 4
 // CHECK-NEXT:    store ptr [[A]], ptr [[PTR]], align 8
@@ -100,7 +101,6 @@ int main() {
 // CHECK-NEXT:    store ptr null, ptr [[TMP23]], align 8
 // CHECK-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [6 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [6 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK-NEXT:    store i32 2, ptr [[TMP26]], align 4
 // CHECK-NEXT:    [[TMP27:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -161,6 +161,7 @@ int main() {
 // CHECK-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [5 x ptr], align 8
 // CHECK-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [5 x ptr], align 8
 // CHECK-NEXT:    [[DOTOFFLOAD_SIZES:%.*]] = alloca [5 x i64], align 8
+// CHECK-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK-NEXT:    store ptr [[THIS]], ptr [[THIS_ADDR]], align 8
 // CHECK-NEXT:    [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8
 // CHECK-NEXT:    [[A:%.*]] = getelementptr inbounds [[STRUCT_S:%.*]], ptr [[THIS1]], i32 0, i32 0
@@ -209,7 +210,6 @@ int main() {
 // CHECK-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
 // CHECK-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [5 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK-NEXT:    store i32 2, ptr [[TMP25]], align 4
 // CHECK-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1

diff  --git a/clang/test/OpenMP/target_map_codegen_03.cpp b/clang/test/OpenMP/target_map_codegen_03.cpp
index 552cc794a39bf..03f8175619f63 100644
--- a/clang/test/OpenMP/target_map_codegen_03.cpp
+++ b/clang/test/OpenMP/target_map_codegen_03.cpp
@@ -79,6 +79,7 @@ void implicit_maps_nested_integer (int a){
 // CHECK1-NEXT:    [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [1 x ptr], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [1 x ptr], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [1 x ptr], align 8
+// CHECK1-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK1-NEXT:    store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
 // CHECK1-NEXT:    store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
 // CHECK1-NEXT:    store ptr [[I]], ptr [[I_ADDR]], align 8
@@ -94,7 +95,6 @@ void implicit_maps_nested_integer (int a){
 // CHECK1-NEXT:    store ptr null, ptr [[TMP5]], align 8
 // CHECK1-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK1-NEXT:    [[TMP7:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK1-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK1-NEXT:    [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK1-NEXT:    store i32 2, ptr [[TMP8]], align 4
 // CHECK1-NEXT:    [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -185,6 +185,7 @@ void implicit_maps_nested_integer (int a){
 // CHECK3-NEXT:    [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [1 x ptr], align 4
 // CHECK3-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [1 x ptr], align 4
 // CHECK3-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [1 x ptr], align 4
+// CHECK3-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK3-NEXT:    store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
 // CHECK3-NEXT:    store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
 // CHECK3-NEXT:    store ptr [[I]], ptr [[I_ADDR]], align 4
@@ -200,7 +201,6 @@ void implicit_maps_nested_integer (int a){
 // CHECK3-NEXT:    store ptr null, ptr [[TMP5]], align 4
 // CHECK3-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK3-NEXT:    [[TMP7:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK3-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK3-NEXT:    [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK3-NEXT:    store i32 2, ptr [[TMP8]], align 4
 // CHECK3-NEXT:    [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1

diff  --git a/clang/test/OpenMP/target_map_codegen_hold.cpp b/clang/test/OpenMP/target_map_codegen_hold.cpp
index 7008d32d76cad..f6c3db95ec8f3 100644
--- a/clang/test/OpenMP/target_map_codegen_hold.cpp
+++ b/clang/test/OpenMP/target_map_codegen_hold.cpp
@@ -81,35 +81,6 @@
 // MEMBER_OF_1  = 0x1000000000000
 // MEMBER_OF_5  = 0x5000000000000
 
-//.
-// CHECK-USE-PPC64LE: @.offload_sizes = private unnamed_addr constant [7 x i64] [i64 0, i64 4, i64 4, i64 4, i64 0, i64 4, i64 4]
-// CHECK-USE-PPC64LE: @.offload_maptypes = private unnamed_addr constant [7 x i64] [i64 [[#0x2020]], i64 [[#0x1000000002003]], i64 [[#0x1000000002003]], i64 [[#0x2023]], i64 [[#0x2020]], i64 [[#0x5000000002003]], i64 [[#0x5000000002003]]]
-// CHECK-USE-PPC64LE: @.offload_sizes.1 = private unnamed_addr constant [1 x i64] [i64 4]
-// CHECK-USE-PPC64LE: @.offload_maptypes.2 = private unnamed_addr constant [1 x i64] [i64 [[#0x2427]]]
-// CHECK-USE-PPC64LE: @.offload_sizes.3 = private unnamed_addr constant [3 x i64] [i64 0, i64 4, i64 4]
-// CHECK-USE-PPC64LE: @.offload_maptypes.4 = private unnamed_addr constant [3 x i64] [i64 [[#0x2020]], i64 [[#0x1000000002003]], i64 [[#0x1000000002003]]]
-//.
-// CHECK-USE-I386: @.offload_sizes = private unnamed_addr constant [7 x i64] [i64 0, i64 4, i64 4, i64 4, i64 0, i64 4, i64 4]
-// CHECK-USE-I386: @.offload_maptypes = private unnamed_addr constant [7 x i64] [i64 [[#0x2020]], i64 [[#0x1000000002003]], i64 [[#0x1000000002003]], i64 [[#0x2023]], i64 [[#0x2020]], i64 [[#0x5000000002003]], i64 [[#0x5000000002003]]]
-// CHECK-USE-I386: @.offload_sizes.1 = private unnamed_addr constant [1 x i64] [i64 4]
-// CHECK-USE-I386: @.offload_maptypes.2 = private unnamed_addr constant [1 x i64] [i64 [[#0x2427]]]
-// CHECK-USE-I386: @.offload_sizes.3 = private unnamed_addr constant [3 x i64] [i64 0, i64 4, i64 4]
-// CHECK-USE-I386: @.offload_maptypes.4 = private unnamed_addr constant [3 x i64] [i64 [[#0x2020]], i64 [[#0x1000000002003]], i64 [[#0x1000000002003]]]
-//.
-// CHECK-NOUSE-PPC64LE: @.offload_sizes = private unnamed_addr constant [7 x i64] [i64 0, i64 4, i64 4, i64 4, i64 0, i64 4, i64 4]
-// CHECK-NOUSE-PPC64LE: @.offload_maptypes = private unnamed_addr constant [7 x i64] [i64 [[#0x2000]], i64 [[#0x1000000002003]], i64 [[#0x1000000002003]], i64 [[#0x2003]], i64 [[#0x2000]], i64 [[#0x5000000002003]], i64 [[#0x5000000002003]]]
-// CHECK-NOUSE-PPC64LE: @.offload_sizes.1 = private unnamed_addr constant [1 x i64] [i64 4]
-// CHECK-NOUSE-PPC64LE: @.offload_maptypes.2 = private unnamed_addr constant [1 x i64] [i64 [[#0x2407]]]
-// CHECK-NOUSE-PPC64LE: @.offload_sizes.3 = private unnamed_addr constant [3 x i64] [i64 0, i64 4, i64 4]
-// CHECK-NOUSE-PPC64LE: @.offload_maptypes.4 = private unnamed_addr constant [3 x i64] [i64 [[#0x2000]], i64 [[#0x1000000002003]], i64 [[#0x1000000002003]]]
-//.
-// CHECK-NOUSE-I386: @.offload_sizes = private unnamed_addr constant [7 x i64] [i64 0, i64 4, i64 4, i64 4, i64 0, i64 4, i64 4]
-// CHECK-NOUSE-I386: @.offload_maptypes = private unnamed_addr constant [7 x i64] [i64 [[#0x2000]], i64 [[#0x1000000002003]], i64 [[#0x1000000002003]], i64 [[#0x2003]], i64 [[#0x2000]], i64 [[#0x5000000002003]], i64 [[#0x5000000002003]]]
-// CHECK-NOUSE-I386: @.offload_sizes.1 = private unnamed_addr constant [1 x i64] [i64 4]
-// CHECK-NOUSE-I386: @.offload_maptypes.2 = private unnamed_addr constant [1 x i64] [i64 [[#0x2407]]]
-// CHECK-NOUSE-I386: @.offload_sizes.3 = private unnamed_addr constant [3 x i64] [i64 0, i64 4, i64 4]
-// CHECK-NOUSE-I386: @.offload_maptypes.4 = private unnamed_addr constant [3 x i64] [i64 [[#0x2000]], i64 [[#0x1000000002003]], i64 [[#0x1000000002003]]]
-//.
 struct ST {
   int i;
   int j;
@@ -162,6 +133,34 @@ void ST::test_present_members() {
 
 #endif
 //.
+// CHECK-USE-PPC64LE: @.offload_sizes = private unnamed_addr constant [7 x i64] [i64 0, i64 4, i64 4, i64 4, i64 0, i64 4, i64 4]
+// CHECK-USE-PPC64LE: @.offload_maptypes = private unnamed_addr constant [7 x i64] [i64 8224, i64 281474976718851, i64 281474976718851, i64 8227, i64 8224, i64 1407374883561475, i64 1407374883561475]
+// CHECK-USE-PPC64LE: @.offload_sizes.1 = private unnamed_addr constant [1 x i64] [i64 4]
+// CHECK-USE-PPC64LE: @.offload_maptypes.2 = private unnamed_addr constant [1 x i64] [i64 9255]
+// CHECK-USE-PPC64LE: @.offload_sizes.3 = private unnamed_addr constant [3 x i64] [i64 0, i64 4, i64 4]
+// CHECK-USE-PPC64LE: @.offload_maptypes.4 = private unnamed_addr constant [3 x i64] [i64 8224, i64 281474976718851, i64 281474976718851]
+//.
+// CHECK-USE-I386: @.offload_sizes = private unnamed_addr constant [7 x i64] [i64 0, i64 4, i64 4, i64 4, i64 0, i64 4, i64 4]
+// CHECK-USE-I386: @.offload_maptypes = private unnamed_addr constant [7 x i64] [i64 8224, i64 281474976718851, i64 281474976718851, i64 8227, i64 8224, i64 1407374883561475, i64 1407374883561475]
+// CHECK-USE-I386: @.offload_sizes.1 = private unnamed_addr constant [1 x i64] [i64 4]
+// CHECK-USE-I386: @.offload_maptypes.2 = private unnamed_addr constant [1 x i64] [i64 9255]
+// CHECK-USE-I386: @.offload_sizes.3 = private unnamed_addr constant [3 x i64] [i64 0, i64 4, i64 4]
+// CHECK-USE-I386: @.offload_maptypes.4 = private unnamed_addr constant [3 x i64] [i64 8224, i64 281474976718851, i64 281474976718851]
+//.
+// CHECK-NOUSE-PPC64LE: @.offload_sizes = private unnamed_addr constant [7 x i64] [i64 0, i64 4, i64 4, i64 4, i64 0, i64 4, i64 4]
+// CHECK-NOUSE-PPC64LE: @.offload_maptypes = private unnamed_addr constant [7 x i64] [i64 8192, i64 281474976718851, i64 281474976718851, i64 8195, i64 8192, i64 1407374883561475, i64 1407374883561475]
+// CHECK-NOUSE-PPC64LE: @.offload_sizes.1 = private unnamed_addr constant [1 x i64] [i64 4]
+// CHECK-NOUSE-PPC64LE: @.offload_maptypes.2 = private unnamed_addr constant [1 x i64] [i64 9223]
+// CHECK-NOUSE-PPC64LE: @.offload_sizes.3 = private unnamed_addr constant [3 x i64] [i64 0, i64 4, i64 4]
+// CHECK-NOUSE-PPC64LE: @.offload_maptypes.4 = private unnamed_addr constant [3 x i64] [i64 8192, i64 281474976718851, i64 281474976718851]
+//.
+// CHECK-NOUSE-I386: @.offload_sizes = private unnamed_addr constant [7 x i64] [i64 0, i64 4, i64 4, i64 4, i64 0, i64 4, i64 4]
+// CHECK-NOUSE-I386: @.offload_maptypes = private unnamed_addr constant [7 x i64] [i64 8192, i64 281474976718851, i64 281474976718851, i64 8195, i64 8192, i64 1407374883561475, i64 1407374883561475]
+// CHECK-NOUSE-I386: @.offload_sizes.1 = private unnamed_addr constant [1 x i64] [i64 4]
+// CHECK-NOUSE-I386: @.offload_maptypes.2 = private unnamed_addr constant [1 x i64] [i64 9223]
+// CHECK-NOUSE-I386: @.offload_sizes.3 = private unnamed_addr constant [3 x i64] [i64 0, i64 4, i64 4]
+// CHECK-NOUSE-I386: @.offload_maptypes.4 = private unnamed_addr constant [3 x i64] [i64 8192, i64 281474976718851, i64 281474976718851]
+//.
 // CHECK-USE-PPC64LE-LABEL: define {{[^@]+}}@_Z20explicit_maps_singlei
 // CHECK-USE-PPC64LE-SAME: (i32 noundef signext [[II:%.*]]) #[[ATTR0:[0-9]+]] {
 // CHECK-USE-PPC64LE-NEXT:  entry:
@@ -173,156 +172,156 @@ void ST::test_present_members() {
 // CHECK-USE-PPC64LE-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [7 x ptr], align 8
 // CHECK-USE-PPC64LE-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [7 x ptr], align 8
 // CHECK-USE-PPC64LE-NEXT:    [[DOTOFFLOAD_SIZES:%.*]] = alloca [7 x i64], align 8
+// CHECK-USE-PPC64LE-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK-USE-PPC64LE-NEXT:    [[DOTOFFLOAD_BASEPTRS3:%.*]] = alloca [1 x ptr], align 8
 // CHECK-USE-PPC64LE-NEXT:    [[DOTOFFLOAD_PTRS4:%.*]] = alloca [1 x ptr], align 8
 // CHECK-USE-PPC64LE-NEXT:    [[DOTOFFLOAD_MAPPERS5:%.*]] = alloca [1 x ptr], align 8
+// CHECK-USE-PPC64LE-NEXT:    [[KERNEL_ARGS6:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK-USE-PPC64LE-NEXT:    store i32 [[II]], ptr [[II_ADDR]], align 4
 // CHECK-USE-PPC64LE-NEXT:    [[TMP0:%.*]] = load i32, ptr [[II_ADDR]], align 4
 // CHECK-USE-PPC64LE-NEXT:    store i32 [[TMP0]], ptr [[A]], align 4
 // CHECK-USE-PPC64LE-NEXT:    [[I:%.*]] = getelementptr inbounds [[STRUCT_ST]], ptr [[ST1]], i32 0, i32 0
 // CHECK-USE-PPC64LE-NEXT:    [[J:%.*]] = getelementptr inbounds [[STRUCT_ST]], ptr [[ST1]], i32 0, i32 1
 // CHECK-USE-PPC64LE-NEXT:    [[TMP1:%.*]] = getelementptr i32, ptr [[J]], i32 1
-// CHECK-USE-PPC64LE-NEXT:    [[TMP4:%.*]] = ptrtoint ptr [[TMP1]] to i64
-// CHECK-USE-PPC64LE-NEXT:    [[TMP5:%.*]] = ptrtoint ptr [[I]] to i64
-// CHECK-USE-PPC64LE-NEXT:    [[TMP6:%.*]] = sub i64 [[TMP4]], [[TMP5]]
-// CHECK-USE-PPC64LE-NEXT:    [[TMP7:%.*]] = sdiv exact i64 [[TMP6]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64)
+// CHECK-USE-PPC64LE-NEXT:    [[TMP2:%.*]] = ptrtoint ptr [[TMP1]] to i64
+// CHECK-USE-PPC64LE-NEXT:    [[TMP3:%.*]] = ptrtoint ptr [[I]] to i64
+// CHECK-USE-PPC64LE-NEXT:    [[TMP4:%.*]] = sub i64 [[TMP2]], [[TMP3]]
+// CHECK-USE-PPC64LE-NEXT:    [[TMP5:%.*]] = sdiv exact i64 [[TMP4]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64)
 // CHECK-USE-PPC64LE-NEXT:    [[I1:%.*]] = getelementptr inbounds [[STRUCT_ST]], ptr [[ST2]], i32 0, i32 0
 // CHECK-USE-PPC64LE-NEXT:    [[J2:%.*]] = getelementptr inbounds [[STRUCT_ST]], ptr [[ST2]], i32 0, i32 1
-// CHECK-USE-PPC64LE-NEXT:    [[TMP8:%.*]] = getelementptr i32, ptr [[J2]], i32 1
-// CHECK-USE-PPC64LE-NEXT:    [[TMP11:%.*]] = ptrtoint ptr [[TMP8]] to i64
-// CHECK-USE-PPC64LE-NEXT:    [[TMP12:%.*]] = ptrtoint ptr [[I1]] to i64
-// CHECK-USE-PPC64LE-NEXT:    [[TMP13:%.*]] = sub i64 [[TMP11]], [[TMP12]]
-// CHECK-USE-PPC64LE-NEXT:    [[TMP14:%.*]] = sdiv exact i64 [[TMP13]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64)
+// CHECK-USE-PPC64LE-NEXT:    [[TMP6:%.*]] = getelementptr i32, ptr [[J2]], i32 1
+// CHECK-USE-PPC64LE-NEXT:    [[TMP7:%.*]] = ptrtoint ptr [[TMP6]] to i64
+// CHECK-USE-PPC64LE-NEXT:    [[TMP8:%.*]] = ptrtoint ptr [[I1]] to i64
+// CHECK-USE-PPC64LE-NEXT:    [[TMP9:%.*]] = sub i64 [[TMP7]], [[TMP8]]
+// CHECK-USE-PPC64LE-NEXT:    [[TMP10:%.*]] = sdiv exact i64 [[TMP9]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64)
 // CHECK-USE-PPC64LE-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[DOTOFFLOAD_SIZES]], ptr align 8 @.offload_sizes, i64 56, i1 false)
-// CHECK-USE-PPC64LE-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [7 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK-USE-PPC64LE-NEXT:    store ptr [[ST1]], ptr [[TMP16]], align 8
-// CHECK-USE-PPC64LE-NEXT:    [[TMP18:%.*]] = getelementptr inbounds [7 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK-USE-PPC64LE-NEXT:    store ptr [[I]], ptr [[TMP18]], align 8
-// CHECK-USE-PPC64LE-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [7 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK-USE-PPC64LE-NEXT:    store i64 [[TMP7]], ptr [[TMP20]], align 8
-// CHECK-USE-PPC64LE-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [7 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0
-// CHECK-USE-PPC64LE-NEXT:    store ptr null, ptr [[TMP21]], align 8
-// CHECK-USE-PPC64LE-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [7 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
-// CHECK-USE-PPC64LE-NEXT:    store ptr [[ST1]], ptr [[TMP22]], align 8
-// CHECK-USE-PPC64LE-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [7 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 1
-// CHECK-USE-PPC64LE-NEXT:    store ptr [[I]], ptr [[TMP24]], align 8
-// CHECK-USE-PPC64LE-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [7 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1
-// CHECK-USE-PPC64LE-NEXT:    store ptr null, ptr [[TMP26]], align 8
-// CHECK-USE-PPC64LE-NEXT:    [[TMP27:%.*]] = getelementptr inbounds [7 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
-// CHECK-USE-PPC64LE-NEXT:    store ptr [[ST1]], ptr [[TMP27]], align 8
-// CHECK-USE-PPC64LE-NEXT:    [[TMP29:%.*]] = getelementptr inbounds [7 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 2
-// CHECK-USE-PPC64LE-NEXT:    store ptr [[J]], ptr [[TMP29]], align 8
-// CHECK-USE-PPC64LE-NEXT:    [[TMP31:%.*]] = getelementptr inbounds [7 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2
-// CHECK-USE-PPC64LE-NEXT:    store ptr null, ptr [[TMP31]], align 8
-// CHECK-USE-PPC64LE-NEXT:    [[TMP32:%.*]] = getelementptr inbounds [7 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3
-// CHECK-USE-PPC64LE-NEXT:    store ptr [[A]], ptr [[TMP32]], align 8
-// CHECK-USE-PPC64LE-NEXT:    [[TMP34:%.*]] = getelementptr inbounds [7 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 3
-// CHECK-USE-PPC64LE-NEXT:    store ptr [[A]], ptr [[TMP34]], align 8
-// CHECK-USE-PPC64LE-NEXT:    [[TMP36:%.*]] = getelementptr inbounds [7 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 3
-// CHECK-USE-PPC64LE-NEXT:    store ptr null, ptr [[TMP36]], align 8
-// CHECK-USE-PPC64LE-NEXT:    [[TMP37:%.*]] = getelementptr inbounds [7 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 4
-// CHECK-USE-PPC64LE-NEXT:    store ptr [[ST2]], ptr [[TMP37]], align 8
-// CHECK-USE-PPC64LE-NEXT:    [[TMP39:%.*]] = getelementptr inbounds [7 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 4
-// CHECK-USE-PPC64LE-NEXT:    store ptr [[I1]], ptr [[TMP39]], align 8
-// CHECK-USE-PPC64LE-NEXT:    [[TMP41:%.*]] = getelementptr inbounds [7 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 4
-// CHECK-USE-PPC64LE-NEXT:    store i64 [[TMP14]], ptr [[TMP41]], align 8
-// CHECK-USE-PPC64LE-NEXT:    [[TMP42:%.*]] = getelementptr inbounds [7 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 4
-// CHECK-USE-PPC64LE-NEXT:    store ptr null, ptr [[TMP42]], align 8
-// CHECK-USE-PPC64LE-NEXT:    [[TMP43:%.*]] = getelementptr inbounds [7 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 5
-// CHECK-USE-PPC64LE-NEXT:    store ptr [[ST2]], ptr [[TMP43]], align 8
-// CHECK-USE-PPC64LE-NEXT:    [[TMP45:%.*]] = getelementptr inbounds [7 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 5
-// CHECK-USE-PPC64LE-NEXT:    store ptr [[I1]], ptr [[TMP45]], align 8
-// CHECK-USE-PPC64LE-NEXT:    [[TMP47:%.*]] = getelementptr inbounds [7 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 5
-// CHECK-USE-PPC64LE-NEXT:    store ptr null, ptr [[TMP47]], align 8
-// CHECK-USE-PPC64LE-NEXT:    [[TMP48:%.*]] = getelementptr inbounds [7 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 6
-// CHECK-USE-PPC64LE-NEXT:    store ptr [[ST2]], ptr [[TMP48]], align 8
-// CHECK-USE-PPC64LE-NEXT:    [[TMP50:%.*]] = getelementptr inbounds [7 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 6
-// CHECK-USE-PPC64LE-NEXT:    store ptr [[J2]], ptr [[TMP50]], align 8
-// CHECK-USE-PPC64LE-NEXT:    [[TMP52:%.*]] = getelementptr inbounds [7 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 6
-// CHECK-USE-PPC64LE-NEXT:    store ptr null, ptr [[TMP52]], align 8
-// CHECK-USE-PPC64LE-NEXT:    [[TMP53:%.*]] = getelementptr inbounds [7 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK-USE-PPC64LE-NEXT:    [[TMP54:%.*]] = getelementptr inbounds [7 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK-USE-PPC64LE-NEXT:    [[TMP55:%.*]] = getelementptr inbounds [7 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK-USE-PPC64LE-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
-// CHECK-USE-PPC64LE-NEXT:    [[TMP56:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
-// CHECK-USE-PPC64LE-NEXT:    store i32 2, ptr [[TMP56]], align 4
-// CHECK-USE-PPC64LE-NEXT:    [[TMP57:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
-// CHECK-USE-PPC64LE-NEXT:    store i32 7, ptr [[TMP57]], align 4
-// CHECK-USE-PPC64LE-NEXT:    [[TMP58:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2
-// CHECK-USE-PPC64LE-NEXT:    store ptr [[TMP53]], ptr [[TMP58]], align 8
-// CHECK-USE-PPC64LE-NEXT:    [[TMP59:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3
-// CHECK-USE-PPC64LE-NEXT:    store ptr [[TMP54]], ptr [[TMP59]], align 8
-// CHECK-USE-PPC64LE-NEXT:    [[TMP60:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4
-// CHECK-USE-PPC64LE-NEXT:    store ptr [[TMP55]], ptr [[TMP60]], align 8
-// CHECK-USE-PPC64LE-NEXT:    [[TMP61:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5
-// CHECK-USE-PPC64LE-NEXT:    store ptr @.offload_maptypes, ptr [[TMP61]], align 8
-// CHECK-USE-PPC64LE-NEXT:    [[TMP62:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6
-// CHECK-USE-PPC64LE-NEXT:    store ptr null, ptr [[TMP62]], align 8
-// CHECK-USE-PPC64LE-NEXT:    [[TMP63:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7
-// CHECK-USE-PPC64LE-NEXT:    store ptr null, ptr [[TMP63]], align 8
-// CHECK-USE-PPC64LE-NEXT:    [[TMP64:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8
-// CHECK-USE-PPC64LE-NEXT:    store i64 0, ptr [[TMP64]], align 8
-// CHECK-USE-PPC64LE-NEXT:    [[TMP65:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 9
-// CHECK-USE-PPC64LE-NEXT:    store i64 0, ptr [[TMP65]], align 8
-// CHECK-USE-PPC64LE-NEXT:    [[TMP66:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 10
-// CHECK-USE-PPC64LE-NEXT:    store [3 x i32] [i32 -1, i32 0, i32 0], ptr [[TMP66]], align 4
-// CHECK-USE-PPC64LE-NEXT:    [[TMP67:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 11
-// CHECK-USE-PPC64LE-NEXT:    store [3 x i32] zeroinitializer, ptr [[TMP67]], align 4
-// CHECK-USE-PPC64LE-NEXT:    [[TMP68:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 12
-// CHECK-USE-PPC64LE-NEXT:    store i32 0, ptr [[TMP68]], align 4
-// CHECK-USE-PPC64LE-NEXT:    [[TMP69:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1:[0-9]+]], i64 -1, i32 -1, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z20explicit_maps_singlei_l{{[0-9]*}}.region_id, ptr [[KERNEL_ARGS]])
-// CHECK-USE-PPC64LE-NEXT:    [[TMP70:%.*]] = icmp ne i32 [[TMP69]], 0
-// CHECK-USE-PPC64LE-NEXT:    br i1 [[TMP70]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
+// CHECK-USE-PPC64LE-NEXT:    [[TMP11:%.*]] = getelementptr inbounds [7 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK-USE-PPC64LE-NEXT:    store ptr [[ST1]], ptr [[TMP11]], align 8
+// CHECK-USE-PPC64LE-NEXT:    [[TMP12:%.*]] = getelementptr inbounds [7 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK-USE-PPC64LE-NEXT:    store ptr [[I]], ptr [[TMP12]], align 8
+// CHECK-USE-PPC64LE-NEXT:    [[TMP13:%.*]] = getelementptr inbounds [7 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 0
+// CHECK-USE-PPC64LE-NEXT:    store i64 [[TMP5]], ptr [[TMP13]], align 8
+// CHECK-USE-PPC64LE-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [7 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0
+// CHECK-USE-PPC64LE-NEXT:    store ptr null, ptr [[TMP14]], align 8
+// CHECK-USE-PPC64LE-NEXT:    [[TMP15:%.*]] = getelementptr inbounds [7 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
+// CHECK-USE-PPC64LE-NEXT:    store ptr [[ST1]], ptr [[TMP15]], align 8
+// CHECK-USE-PPC64LE-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [7 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 1
+// CHECK-USE-PPC64LE-NEXT:    store ptr [[I]], ptr [[TMP16]], align 8
+// CHECK-USE-PPC64LE-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [7 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1
+// CHECK-USE-PPC64LE-NEXT:    store ptr null, ptr [[TMP17]], align 8
+// CHECK-USE-PPC64LE-NEXT:    [[TMP18:%.*]] = getelementptr inbounds [7 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
+// CHECK-USE-PPC64LE-NEXT:    store ptr [[ST1]], ptr [[TMP18]], align 8
+// CHECK-USE-PPC64LE-NEXT:    [[TMP19:%.*]] = getelementptr inbounds [7 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 2
+// CHECK-USE-PPC64LE-NEXT:    store ptr [[J]], ptr [[TMP19]], align 8
+// CHECK-USE-PPC64LE-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [7 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2
+// CHECK-USE-PPC64LE-NEXT:    store ptr null, ptr [[TMP20]], align 8
+// CHECK-USE-PPC64LE-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [7 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3
+// CHECK-USE-PPC64LE-NEXT:    store ptr [[A]], ptr [[TMP21]], align 8
+// CHECK-USE-PPC64LE-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [7 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 3
+// CHECK-USE-PPC64LE-NEXT:    store ptr [[A]], ptr [[TMP22]], align 8
+// CHECK-USE-PPC64LE-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [7 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 3
+// CHECK-USE-PPC64LE-NEXT:    store ptr null, ptr [[TMP23]], align 8
+// CHECK-USE-PPC64LE-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [7 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 4
+// CHECK-USE-PPC64LE-NEXT:    store ptr [[ST2]], ptr [[TMP24]], align 8
+// CHECK-USE-PPC64LE-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [7 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 4
+// CHECK-USE-PPC64LE-NEXT:    store ptr [[I1]], ptr [[TMP25]], align 8
+// CHECK-USE-PPC64LE-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [7 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 4
+// CHECK-USE-PPC64LE-NEXT:    store i64 [[TMP10]], ptr [[TMP26]], align 8
+// CHECK-USE-PPC64LE-NEXT:    [[TMP27:%.*]] = getelementptr inbounds [7 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 4
+// CHECK-USE-PPC64LE-NEXT:    store ptr null, ptr [[TMP27]], align 8
+// CHECK-USE-PPC64LE-NEXT:    [[TMP28:%.*]] = getelementptr inbounds [7 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 5
+// CHECK-USE-PPC64LE-NEXT:    store ptr [[ST2]], ptr [[TMP28]], align 8
+// CHECK-USE-PPC64LE-NEXT:    [[TMP29:%.*]] = getelementptr inbounds [7 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 5
+// CHECK-USE-PPC64LE-NEXT:    store ptr [[I1]], ptr [[TMP29]], align 8
+// CHECK-USE-PPC64LE-NEXT:    [[TMP30:%.*]] = getelementptr inbounds [7 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 5
+// CHECK-USE-PPC64LE-NEXT:    store ptr null, ptr [[TMP30]], align 8
+// CHECK-USE-PPC64LE-NEXT:    [[TMP31:%.*]] = getelementptr inbounds [7 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 6
+// CHECK-USE-PPC64LE-NEXT:    store ptr [[ST2]], ptr [[TMP31]], align 8
+// CHECK-USE-PPC64LE-NEXT:    [[TMP32:%.*]] = getelementptr inbounds [7 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 6
+// CHECK-USE-PPC64LE-NEXT:    store ptr [[J2]], ptr [[TMP32]], align 8
+// CHECK-USE-PPC64LE-NEXT:    [[TMP33:%.*]] = getelementptr inbounds [7 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 6
+// CHECK-USE-PPC64LE-NEXT:    store ptr null, ptr [[TMP33]], align 8
+// CHECK-USE-PPC64LE-NEXT:    [[TMP34:%.*]] = getelementptr inbounds [7 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK-USE-PPC64LE-NEXT:    [[TMP35:%.*]] = getelementptr inbounds [7 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK-USE-PPC64LE-NEXT:    [[TMP36:%.*]] = getelementptr inbounds [7 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 0
+// CHECK-USE-PPC64LE-NEXT:    [[TMP37:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
+// CHECK-USE-PPC64LE-NEXT:    store i32 2, ptr [[TMP37]], align 4
+// CHECK-USE-PPC64LE-NEXT:    [[TMP38:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
+// CHECK-USE-PPC64LE-NEXT:    store i32 7, ptr [[TMP38]], align 4
+// CHECK-USE-PPC64LE-NEXT:    [[TMP39:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2
+// CHECK-USE-PPC64LE-NEXT:    store ptr [[TMP34]], ptr [[TMP39]], align 8
+// CHECK-USE-PPC64LE-NEXT:    [[TMP40:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3
+// CHECK-USE-PPC64LE-NEXT:    store ptr [[TMP35]], ptr [[TMP40]], align 8
+// CHECK-USE-PPC64LE-NEXT:    [[TMP41:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4
+// CHECK-USE-PPC64LE-NEXT:    store ptr [[TMP36]], ptr [[TMP41]], align 8
+// CHECK-USE-PPC64LE-NEXT:    [[TMP42:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5
+// CHECK-USE-PPC64LE-NEXT:    store ptr @.offload_maptypes, ptr [[TMP42]], align 8
+// CHECK-USE-PPC64LE-NEXT:    [[TMP43:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6
+// CHECK-USE-PPC64LE-NEXT:    store ptr null, ptr [[TMP43]], align 8
+// CHECK-USE-PPC64LE-NEXT:    [[TMP44:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7
+// CHECK-USE-PPC64LE-NEXT:    store ptr null, ptr [[TMP44]], align 8
+// CHECK-USE-PPC64LE-NEXT:    [[TMP45:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8
+// CHECK-USE-PPC64LE-NEXT:    store i64 0, ptr [[TMP45]], align 8
+// CHECK-USE-PPC64LE-NEXT:    [[TMP46:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 9
+// CHECK-USE-PPC64LE-NEXT:    store i64 0, ptr [[TMP46]], align 8
+// CHECK-USE-PPC64LE-NEXT:    [[TMP47:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 10
+// CHECK-USE-PPC64LE-NEXT:    store [3 x i32] [i32 -1, i32 0, i32 0], ptr [[TMP47]], align 4
+// CHECK-USE-PPC64LE-NEXT:    [[TMP48:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 11
+// CHECK-USE-PPC64LE-NEXT:    store [3 x i32] zeroinitializer, ptr [[TMP48]], align 4
+// CHECK-USE-PPC64LE-NEXT:    [[TMP49:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 12
+// CHECK-USE-PPC64LE-NEXT:    store i32 0, ptr [[TMP49]], align 4
+// CHECK-USE-PPC64LE-NEXT:    [[TMP50:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1:[0-9]+]], i64 -1, i32 -1, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z20explicit_maps_singlei_l100.region_id, ptr [[KERNEL_ARGS]])
+// CHECK-USE-PPC64LE-NEXT:    [[TMP51:%.*]] = icmp ne i32 [[TMP50]], 0
+// CHECK-USE-PPC64LE-NEXT:    br i1 [[TMP51]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK-USE-PPC64LE:       omp_offload.failed:
-// CHECK-USE-PPC64LE-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z20explicit_maps_singlei_l{{[0-9]*}}(ptr [[ST1]], ptr [[A]], ptr [[ST2]]) #[[ATTR3:[0-9]+]]
+// CHECK-USE-PPC64LE-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z20explicit_maps_singlei_l100(ptr [[ST1]], ptr [[A]], ptr [[ST2]]) #[[ATTR3:[0-9]+]]
 // CHECK-USE-PPC64LE-NEXT:    br label [[OMP_OFFLOAD_CONT]]
 // CHECK-USE-PPC64LE:       omp_offload.cont:
-// CHECK-USE-PPC64LE-NEXT:    [[TMP71:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS3]], i32 0, i32 0
-// CHECK-USE-PPC64LE-NEXT:    store ptr [[A]], ptr [[TMP71]], align 8
-// CHECK-USE-PPC64LE-NEXT:    [[TMP73:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS4]], i32 0, i32 0
-// CHECK-USE-PPC64LE-NEXT:    store ptr [[A]], ptr [[TMP73]], align 8
-// CHECK-USE-PPC64LE-NEXT:    [[TMP75:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS5]], i64 0, i64 0
-// CHECK-USE-PPC64LE-NEXT:    store ptr null, ptr [[TMP75]], align 8
-// CHECK-USE-PPC64LE-NEXT:    [[TMP76:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS3]], i32 0, i32 0
-// CHECK-USE-PPC64LE-NEXT:    [[TMP77:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS4]], i32 0, i32 0
-// CHECK-USE-PPC64LE-NEXT:    [[KERNEL_ARGS6:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
-// CHECK-USE-PPC64LE-NEXT:    [[TMP78:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 0
-// CHECK-USE-PPC64LE-NEXT:    store i32 2, ptr [[TMP78]], align 4
-// CHECK-USE-PPC64LE-NEXT:    [[TMP79:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 1
-// CHECK-USE-PPC64LE-NEXT:    store i32 1, ptr [[TMP79]], align 4
-// CHECK-USE-PPC64LE-NEXT:    [[TMP80:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 2
-// CHECK-USE-PPC64LE-NEXT:    store ptr [[TMP76]], ptr [[TMP80]], align 8
-// CHECK-USE-PPC64LE-NEXT:    [[TMP81:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 3
-// CHECK-USE-PPC64LE-NEXT:    store ptr [[TMP77]], ptr [[TMP81]], align 8
-// CHECK-USE-PPC64LE-NEXT:    [[TMP82:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 4
-// CHECK-USE-PPC64LE-NEXT:    store ptr @.offload_sizes.1, ptr [[TMP82]], align 8
-// CHECK-USE-PPC64LE-NEXT:    [[TMP83:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 5
-// CHECK-USE-PPC64LE-NEXT:    store ptr @.offload_maptypes.2, ptr [[TMP83]], align 8
-// CHECK-USE-PPC64LE-NEXT:    [[TMP84:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 6
-// CHECK-USE-PPC64LE-NEXT:    store ptr null, ptr [[TMP84]], align 8
-// CHECK-USE-PPC64LE-NEXT:    [[TMP85:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 7
-// CHECK-USE-PPC64LE-NEXT:    store ptr null, ptr [[TMP85]], align 8
-// CHECK-USE-PPC64LE-NEXT:    [[TMP86:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 8
-// CHECK-USE-PPC64LE-NEXT:    store i64 0, ptr [[TMP86]], align 8
-// CHECK-USE-PPC64LE-NEXT:    [[TMP87:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 9
-// CHECK-USE-PPC64LE-NEXT:    store i64 0, ptr [[TMP87]], align 8
-// CHECK-USE-PPC64LE-NEXT:    [[TMP88:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 10
-// CHECK-USE-PPC64LE-NEXT:    store [3 x i32] [i32 -1, i32 0, i32 0], ptr [[TMP88]], align 4
-// CHECK-USE-PPC64LE-NEXT:    [[TMP89:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 11
-// CHECK-USE-PPC64LE-NEXT:    store [3 x i32] zeroinitializer, ptr [[TMP89]], align 4
-// CHECK-USE-PPC64LE-NEXT:    [[TMP90:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 12
-// CHECK-USE-PPC64LE-NEXT:    store i32 0, ptr [[TMP90]], align 4
-// CHECK-USE-PPC64LE-NEXT:    [[TMP91:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 -1, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z20explicit_maps_singlei_l{{[0-9]*}}.region_id, ptr [[KERNEL_ARGS6]])
-// CHECK-USE-PPC64LE-NEXT:    [[TMP92:%.*]] = icmp ne i32 [[TMP91]], 0
-// CHECK-USE-PPC64LE-NEXT:    br i1 [[TMP92]], label [[OMP_OFFLOAD_FAILED7:%.*]], label [[OMP_OFFLOAD_CONT8:%.*]]
+// CHECK-USE-PPC64LE-NEXT:    [[TMP52:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS3]], i32 0, i32 0
+// CHECK-USE-PPC64LE-NEXT:    store ptr [[A]], ptr [[TMP52]], align 8
+// CHECK-USE-PPC64LE-NEXT:    [[TMP53:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS4]], i32 0, i32 0
+// CHECK-USE-PPC64LE-NEXT:    store ptr [[A]], ptr [[TMP53]], align 8
+// CHECK-USE-PPC64LE-NEXT:    [[TMP54:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS5]], i64 0, i64 0
+// CHECK-USE-PPC64LE-NEXT:    store ptr null, ptr [[TMP54]], align 8
+// CHECK-USE-PPC64LE-NEXT:    [[TMP55:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS3]], i32 0, i32 0
+// CHECK-USE-PPC64LE-NEXT:    [[TMP56:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS4]], i32 0, i32 0
+// CHECK-USE-PPC64LE-NEXT:    [[TMP57:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 0
+// CHECK-USE-PPC64LE-NEXT:    store i32 2, ptr [[TMP57]], align 4
+// CHECK-USE-PPC64LE-NEXT:    [[TMP58:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 1
+// CHECK-USE-PPC64LE-NEXT:    store i32 1, ptr [[TMP58]], align 4
+// CHECK-USE-PPC64LE-NEXT:    [[TMP59:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 2
+// CHECK-USE-PPC64LE-NEXT:    store ptr [[TMP55]], ptr [[TMP59]], align 8
+// CHECK-USE-PPC64LE-NEXT:    [[TMP60:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 3
+// CHECK-USE-PPC64LE-NEXT:    store ptr [[TMP56]], ptr [[TMP60]], align 8
+// CHECK-USE-PPC64LE-NEXT:    [[TMP61:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 4
+// CHECK-USE-PPC64LE-NEXT:    store ptr @.offload_sizes.1, ptr [[TMP61]], align 8
+// CHECK-USE-PPC64LE-NEXT:    [[TMP62:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 5
+// CHECK-USE-PPC64LE-NEXT:    store ptr @.offload_maptypes.2, ptr [[TMP62]], align 8
+// CHECK-USE-PPC64LE-NEXT:    [[TMP63:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 6
+// CHECK-USE-PPC64LE-NEXT:    store ptr null, ptr [[TMP63]], align 8
+// CHECK-USE-PPC64LE-NEXT:    [[TMP64:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 7
+// CHECK-USE-PPC64LE-NEXT:    store ptr null, ptr [[TMP64]], align 8
+// CHECK-USE-PPC64LE-NEXT:    [[TMP65:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 8
+// CHECK-USE-PPC64LE-NEXT:    store i64 0, ptr [[TMP65]], align 8
+// CHECK-USE-PPC64LE-NEXT:    [[TMP66:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 9
+// CHECK-USE-PPC64LE-NEXT:    store i64 0, ptr [[TMP66]], align 8
+// CHECK-USE-PPC64LE-NEXT:    [[TMP67:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 10
+// CHECK-USE-PPC64LE-NEXT:    store [3 x i32] [i32 -1, i32 0, i32 0], ptr [[TMP67]], align 4
+// CHECK-USE-PPC64LE-NEXT:    [[TMP68:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 11
+// CHECK-USE-PPC64LE-NEXT:    store [3 x i32] zeroinitializer, ptr [[TMP68]], align 4
+// CHECK-USE-PPC64LE-NEXT:    [[TMP69:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 12
+// CHECK-USE-PPC64LE-NEXT:    store i32 0, ptr [[TMP69]], align 4
+// CHECK-USE-PPC64LE-NEXT:    [[TMP70:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 -1, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z20explicit_maps_singlei_l114.region_id, ptr [[KERNEL_ARGS6]])
+// CHECK-USE-PPC64LE-NEXT:    [[TMP71:%.*]] = icmp ne i32 [[TMP70]], 0
+// CHECK-USE-PPC64LE-NEXT:    br i1 [[TMP71]], label [[OMP_OFFLOAD_FAILED7:%.*]], label [[OMP_OFFLOAD_CONT8:%.*]]
 // CHECK-USE-PPC64LE:       omp_offload.failed7:
-// CHECK-USE-PPC64LE-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z20explicit_maps_singlei_l{{[0-9]*}}(ptr [[A]]) #[[ATTR3]]
+// CHECK-USE-PPC64LE-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z20explicit_maps_singlei_l114(ptr [[A]]) #[[ATTR3]]
 // CHECK-USE-PPC64LE-NEXT:    br label [[OMP_OFFLOAD_CONT8]]
 // CHECK-USE-PPC64LE:       omp_offload.cont8:
 // CHECK-USE-PPC64LE-NEXT:    ret void
 //
 //
-// CHECK-USE-PPC64LE-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z20explicit_maps_singlei_l{{[0-9]*}}
+// CHECK-USE-PPC64LE-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z20explicit_maps_singlei_l100
 // CHECK-USE-PPC64LE-SAME: (ptr noundef nonnull align 4 dereferenceable(8) [[ST1:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[ST2:%.*]]) #[[ATTR1:[0-9]+]] {
 // CHECK-USE-PPC64LE-NEXT:  entry:
 // CHECK-USE-PPC64LE-NEXT:    [[ST1_ADDR:%.*]] = alloca ptr, align 8
@@ -356,7 +355,7 @@ void ST::test_present_members() {
 // CHECK-USE-PPC64LE-NEXT:    ret void
 //
 //
-// CHECK-USE-PPC64LE-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z20explicit_maps_singlei_l{{[0-9]*}}
+// CHECK-USE-PPC64LE-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z20explicit_maps_singlei_l114
 // CHECK-USE-PPC64LE-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR1]] {
 // CHECK-USE-PPC64LE-NEXT:  entry:
 // CHECK-USE-PPC64LE-NEXT:    [[A_ADDR:%.*]] = alloca ptr, align 8
@@ -376,77 +375,77 @@ void ST::test_present_members() {
 // CHECK-USE-PPC64LE-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [3 x ptr], align 8
 // CHECK-USE-PPC64LE-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [3 x ptr], align 8
 // CHECK-USE-PPC64LE-NEXT:    [[DOTOFFLOAD_SIZES:%.*]] = alloca [3 x i64], align 8
+// CHECK-USE-PPC64LE-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK-USE-PPC64LE-NEXT:    store ptr [[THIS]], ptr [[THIS_ADDR]], align 8
 // CHECK-USE-PPC64LE-NEXT:    [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8
 // CHECK-USE-PPC64LE-NEXT:    [[I:%.*]] = getelementptr inbounds [[STRUCT_ST:%.*]], ptr [[THIS1]], i32 0, i32 0
 // CHECK-USE-PPC64LE-NEXT:    [[J:%.*]] = getelementptr inbounds [[STRUCT_ST]], ptr [[THIS1]], i32 0, i32 1
 // CHECK-USE-PPC64LE-NEXT:    [[TMP0:%.*]] = getelementptr i32, ptr [[J]], i32 1
-// CHECK-USE-PPC64LE-NEXT:    [[TMP3:%.*]] = ptrtoint ptr [[TMP0]] to i64
-// CHECK-USE-PPC64LE-NEXT:    [[TMP4:%.*]] = ptrtoint ptr [[I]] to i64
-// CHECK-USE-PPC64LE-NEXT:    [[TMP5:%.*]] = sub i64 [[TMP3]], [[TMP4]]
-// CHECK-USE-PPC64LE-NEXT:    [[TMP6:%.*]] = sdiv exact i64 [[TMP5]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64)
+// CHECK-USE-PPC64LE-NEXT:    [[TMP1:%.*]] = ptrtoint ptr [[TMP0]] to i64
+// CHECK-USE-PPC64LE-NEXT:    [[TMP2:%.*]] = ptrtoint ptr [[I]] to i64
+// CHECK-USE-PPC64LE-NEXT:    [[TMP3:%.*]] = sub i64 [[TMP1]], [[TMP2]]
+// CHECK-USE-PPC64LE-NEXT:    [[TMP4:%.*]] = sdiv exact i64 [[TMP3]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64)
 // CHECK-USE-PPC64LE-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[DOTOFFLOAD_SIZES]], ptr align 8 @.offload_sizes.3, i64 24, i1 false)
-// CHECK-USE-PPC64LE-NEXT:    [[TMP8:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK-USE-PPC64LE-NEXT:    store ptr [[THIS1]], ptr [[TMP8]], align 8
-// CHECK-USE-PPC64LE-NEXT:    [[TMP10:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK-USE-PPC64LE-NEXT:    [[TMP5:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK-USE-PPC64LE-NEXT:    store ptr [[THIS1]], ptr [[TMP5]], align 8
+// CHECK-USE-PPC64LE-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK-USE-PPC64LE-NEXT:    store ptr [[I]], ptr [[TMP6]], align 8
+// CHECK-USE-PPC64LE-NEXT:    [[TMP7:%.*]] = getelementptr inbounds [3 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 0
+// CHECK-USE-PPC64LE-NEXT:    store i64 [[TMP4]], ptr [[TMP7]], align 8
+// CHECK-USE-PPC64LE-NEXT:    [[TMP8:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0
+// CHECK-USE-PPC64LE-NEXT:    store ptr null, ptr [[TMP8]], align 8
+// CHECK-USE-PPC64LE-NEXT:    [[TMP9:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
+// CHECK-USE-PPC64LE-NEXT:    store ptr [[THIS1]], ptr [[TMP9]], align 8
+// CHECK-USE-PPC64LE-NEXT:    [[TMP10:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 1
 // CHECK-USE-PPC64LE-NEXT:    store ptr [[I]], ptr [[TMP10]], align 8
-// CHECK-USE-PPC64LE-NEXT:    [[TMP12:%.*]] = getelementptr inbounds [3 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK-USE-PPC64LE-NEXT:    store i64 [[TMP6]], ptr [[TMP12]], align 8
-// CHECK-USE-PPC64LE-NEXT:    [[TMP13:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0
-// CHECK-USE-PPC64LE-NEXT:    store ptr null, ptr [[TMP13]], align 8
-// CHECK-USE-PPC64LE-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
-// CHECK-USE-PPC64LE-NEXT:    store ptr [[THIS1]], ptr [[TMP14]], align 8
-// CHECK-USE-PPC64LE-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 1
-// CHECK-USE-PPC64LE-NEXT:    store ptr [[I]], ptr [[TMP16]], align 8
-// CHECK-USE-PPC64LE-NEXT:    [[TMP18:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1
-// CHECK-USE-PPC64LE-NEXT:    store ptr null, ptr [[TMP18]], align 8
-// CHECK-USE-PPC64LE-NEXT:    [[TMP19:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
-// CHECK-USE-PPC64LE-NEXT:    store ptr [[THIS1]], ptr [[TMP19]], align 8
-// CHECK-USE-PPC64LE-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 2
-// CHECK-USE-PPC64LE-NEXT:    store ptr [[J]], ptr [[TMP21]], align 8
-// CHECK-USE-PPC64LE-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2
-// CHECK-USE-PPC64LE-NEXT:    store ptr null, ptr [[TMP23]], align 8
-// CHECK-USE-PPC64LE-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK-USE-PPC64LE-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK-USE-PPC64LE-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [3 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK-USE-PPC64LE-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
-// CHECK-USE-PPC64LE-NEXT:    [[TMP27:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
-// CHECK-USE-PPC64LE-NEXT:    store i32 2, ptr [[TMP27]], align 4
-// CHECK-USE-PPC64LE-NEXT:    [[TMP28:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
-// CHECK-USE-PPC64LE-NEXT:    store i32 3, ptr [[TMP28]], align 4
-// CHECK-USE-PPC64LE-NEXT:    [[TMP29:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2
-// CHECK-USE-PPC64LE-NEXT:    store ptr [[TMP24]], ptr [[TMP29]], align 8
-// CHECK-USE-PPC64LE-NEXT:    [[TMP30:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3
-// CHECK-USE-PPC64LE-NEXT:    store ptr [[TMP25]], ptr [[TMP30]], align 8
-// CHECK-USE-PPC64LE-NEXT:    [[TMP31:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4
-// CHECK-USE-PPC64LE-NEXT:    store ptr [[TMP26]], ptr [[TMP31]], align 8
-// CHECK-USE-PPC64LE-NEXT:    [[TMP32:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5
-// CHECK-USE-PPC64LE-NEXT:    store ptr @.offload_maptypes.4, ptr [[TMP32]], align 8
-// CHECK-USE-PPC64LE-NEXT:    [[TMP33:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6
-// CHECK-USE-PPC64LE-NEXT:    store ptr null, ptr [[TMP33]], align 8
-// CHECK-USE-PPC64LE-NEXT:    [[TMP34:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7
-// CHECK-USE-PPC64LE-NEXT:    store ptr null, ptr [[TMP34]], align 8
-// CHECK-USE-PPC64LE-NEXT:    [[TMP35:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8
-// CHECK-USE-PPC64LE-NEXT:    store i64 0, ptr [[TMP35]], align 8
-// CHECK-USE-PPC64LE-NEXT:    [[TMP36:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 9
-// CHECK-USE-PPC64LE-NEXT:    store i64 0, ptr [[TMP36]], align 8
-// CHECK-USE-PPC64LE-NEXT:    [[TMP37:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 10
-// CHECK-USE-PPC64LE-NEXT:    store [3 x i32] [i32 -1, i32 0, i32 0], ptr [[TMP37]], align 4
-// CHECK-USE-PPC64LE-NEXT:    [[TMP38:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 11
-// CHECK-USE-PPC64LE-NEXT:    store [3 x i32] zeroinitializer, ptr [[TMP38]], align 4
-// CHECK-USE-PPC64LE-NEXT:    [[TMP39:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 12
-// CHECK-USE-PPC64LE-NEXT:    store i32 0, ptr [[TMP39]], align 4
-// CHECK-USE-PPC64LE-NEXT:    [[TMP40:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 -1, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2ST20test_present_membersEv_l{{[0-9]*}}.region_id, ptr [[KERNEL_ARGS]])
-// CHECK-USE-PPC64LE-NEXT:    [[TMP41:%.*]] = icmp ne i32 [[TMP40]], 0
-// CHECK-USE-PPC64LE-NEXT:    br i1 [[TMP41]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
+// CHECK-USE-PPC64LE-NEXT:    [[TMP11:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1
+// CHECK-USE-PPC64LE-NEXT:    store ptr null, ptr [[TMP11]], align 8
+// CHECK-USE-PPC64LE-NEXT:    [[TMP12:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
+// CHECK-USE-PPC64LE-NEXT:    store ptr [[THIS1]], ptr [[TMP12]], align 8
+// CHECK-USE-PPC64LE-NEXT:    [[TMP13:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 2
+// CHECK-USE-PPC64LE-NEXT:    store ptr [[J]], ptr [[TMP13]], align 8
+// CHECK-USE-PPC64LE-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2
+// CHECK-USE-PPC64LE-NEXT:    store ptr null, ptr [[TMP14]], align 8
+// CHECK-USE-PPC64LE-NEXT:    [[TMP15:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK-USE-PPC64LE-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK-USE-PPC64LE-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [3 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 0
+// CHECK-USE-PPC64LE-NEXT:    [[TMP18:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
+// CHECK-USE-PPC64LE-NEXT:    store i32 2, ptr [[TMP18]], align 4
+// CHECK-USE-PPC64LE-NEXT:    [[TMP19:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
+// CHECK-USE-PPC64LE-NEXT:    store i32 3, ptr [[TMP19]], align 4
+// CHECK-USE-PPC64LE-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2
+// CHECK-USE-PPC64LE-NEXT:    store ptr [[TMP15]], ptr [[TMP20]], align 8
+// CHECK-USE-PPC64LE-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3
+// CHECK-USE-PPC64LE-NEXT:    store ptr [[TMP16]], ptr [[TMP21]], align 8
+// CHECK-USE-PPC64LE-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4
+// CHECK-USE-PPC64LE-NEXT:    store ptr [[TMP17]], ptr [[TMP22]], align 8
+// CHECK-USE-PPC64LE-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5
+// CHECK-USE-PPC64LE-NEXT:    store ptr @.offload_maptypes.4, ptr [[TMP23]], align 8
+// CHECK-USE-PPC64LE-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6
+// CHECK-USE-PPC64LE-NEXT:    store ptr null, ptr [[TMP24]], align 8
+// CHECK-USE-PPC64LE-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7
+// CHECK-USE-PPC64LE-NEXT:    store ptr null, ptr [[TMP25]], align 8
+// CHECK-USE-PPC64LE-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8
+// CHECK-USE-PPC64LE-NEXT:    store i64 0, ptr [[TMP26]], align 8
+// CHECK-USE-PPC64LE-NEXT:    [[TMP27:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 9
+// CHECK-USE-PPC64LE-NEXT:    store i64 0, ptr [[TMP27]], align 8
+// CHECK-USE-PPC64LE-NEXT:    [[TMP28:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 10
+// CHECK-USE-PPC64LE-NEXT:    store [3 x i32] [i32 -1, i32 0, i32 0], ptr [[TMP28]], align 4
+// CHECK-USE-PPC64LE-NEXT:    [[TMP29:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 11
+// CHECK-USE-PPC64LE-NEXT:    store [3 x i32] zeroinitializer, ptr [[TMP29]], align 4
+// CHECK-USE-PPC64LE-NEXT:    [[TMP30:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 12
+// CHECK-USE-PPC64LE-NEXT:    store i32 0, ptr [[TMP30]], align 4
+// CHECK-USE-PPC64LE-NEXT:    [[TMP31:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 -1, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2ST20test_present_membersEv_l125.region_id, ptr [[KERNEL_ARGS]])
+// CHECK-USE-PPC64LE-NEXT:    [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0
+// CHECK-USE-PPC64LE-NEXT:    br i1 [[TMP32]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK-USE-PPC64LE:       omp_offload.failed:
-// CHECK-USE-PPC64LE-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2ST20test_present_membersEv_l{{[0-9]*}}(ptr [[THIS1]]) #[[ATTR3]]
+// CHECK-USE-PPC64LE-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2ST20test_present_membersEv_l125(ptr [[THIS1]]) #[[ATTR3]]
 // CHECK-USE-PPC64LE-NEXT:    br label [[OMP_OFFLOAD_CONT]]
 // CHECK-USE-PPC64LE:       omp_offload.cont:
 // CHECK-USE-PPC64LE-NEXT:    ret void
 //
 //
-// CHECK-USE-PPC64LE-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2ST20test_present_membersEv_l{{[0-9]*}}
+// CHECK-USE-PPC64LE-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2ST20test_present_membersEv_l125
 // CHECK-USE-PPC64LE-SAME: (ptr noundef [[THIS:%.*]]) #[[ATTR1]] {
 // CHECK-USE-PPC64LE-NEXT:  entry:
 // CHECK-USE-PPC64LE-NEXT:    [[THIS_ADDR:%.*]] = alloca ptr, align 8
@@ -481,156 +480,156 @@ void ST::test_present_members() {
 // CHECK-USE-I386-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [7 x ptr], align 4
 // CHECK-USE-I386-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [7 x ptr], align 4
 // CHECK-USE-I386-NEXT:    [[DOTOFFLOAD_SIZES:%.*]] = alloca [7 x i64], align 4
+// CHECK-USE-I386-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK-USE-I386-NEXT:    [[DOTOFFLOAD_BASEPTRS3:%.*]] = alloca [1 x ptr], align 4
 // CHECK-USE-I386-NEXT:    [[DOTOFFLOAD_PTRS4:%.*]] = alloca [1 x ptr], align 4
 // CHECK-USE-I386-NEXT:    [[DOTOFFLOAD_MAPPERS5:%.*]] = alloca [1 x ptr], align 4
+// CHECK-USE-I386-NEXT:    [[KERNEL_ARGS6:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK-USE-I386-NEXT:    store i32 [[II]], ptr [[II_ADDR]], align 4
 // CHECK-USE-I386-NEXT:    [[TMP0:%.*]] = load i32, ptr [[II_ADDR]], align 4
 // CHECK-USE-I386-NEXT:    store i32 [[TMP0]], ptr [[A]], align 4
 // CHECK-USE-I386-NEXT:    [[I:%.*]] = getelementptr inbounds [[STRUCT_ST]], ptr [[ST1]], i32 0, i32 0
 // CHECK-USE-I386-NEXT:    [[J:%.*]] = getelementptr inbounds [[STRUCT_ST]], ptr [[ST1]], i32 0, i32 1
 // CHECK-USE-I386-NEXT:    [[TMP1:%.*]] = getelementptr i32, ptr [[J]], i32 1
-// CHECK-USE-I386-NEXT:    [[TMP4:%.*]] = ptrtoint ptr [[TMP1]] to i64
-// CHECK-USE-I386-NEXT:    [[TMP5:%.*]] = ptrtoint ptr [[I]] to i64
-// CHECK-USE-I386-NEXT:    [[TMP6:%.*]] = sub i64 [[TMP4]], [[TMP5]]
-// CHECK-USE-I386-NEXT:    [[TMP7:%.*]] = sdiv exact i64 [[TMP6]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64)
+// CHECK-USE-I386-NEXT:    [[TMP2:%.*]] = ptrtoint ptr [[TMP1]] to i64
+// CHECK-USE-I386-NEXT:    [[TMP3:%.*]] = ptrtoint ptr [[I]] to i64
+// CHECK-USE-I386-NEXT:    [[TMP4:%.*]] = sub i64 [[TMP2]], [[TMP3]]
+// CHECK-USE-I386-NEXT:    [[TMP5:%.*]] = sdiv exact i64 [[TMP4]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64)
 // CHECK-USE-I386-NEXT:    [[I1:%.*]] = getelementptr inbounds [[STRUCT_ST]], ptr [[ST2]], i32 0, i32 0
 // CHECK-USE-I386-NEXT:    [[J2:%.*]] = getelementptr inbounds [[STRUCT_ST]], ptr [[ST2]], i32 0, i32 1
-// CHECK-USE-I386-NEXT:    [[TMP8:%.*]] = getelementptr i32, ptr [[J2]], i32 1
-// CHECK-USE-I386-NEXT:    [[TMP11:%.*]] = ptrtoint ptr [[TMP8]] to i64
-// CHECK-USE-I386-NEXT:    [[TMP12:%.*]] = ptrtoint ptr [[I1]] to i64
-// CHECK-USE-I386-NEXT:    [[TMP13:%.*]] = sub i64 [[TMP11]], [[TMP12]]
-// CHECK-USE-I386-NEXT:    [[TMP14:%.*]] = sdiv exact i64 [[TMP13]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64)
+// CHECK-USE-I386-NEXT:    [[TMP6:%.*]] = getelementptr i32, ptr [[J2]], i32 1
+// CHECK-USE-I386-NEXT:    [[TMP7:%.*]] = ptrtoint ptr [[TMP6]] to i64
+// CHECK-USE-I386-NEXT:    [[TMP8:%.*]] = ptrtoint ptr [[I1]] to i64
+// CHECK-USE-I386-NEXT:    [[TMP9:%.*]] = sub i64 [[TMP7]], [[TMP8]]
+// CHECK-USE-I386-NEXT:    [[TMP10:%.*]] = sdiv exact i64 [[TMP9]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64)
 // CHECK-USE-I386-NEXT:    call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[DOTOFFLOAD_SIZES]], ptr align 4 @.offload_sizes, i32 56, i1 false)
-// CHECK-USE-I386-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [7 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK-USE-I386-NEXT:    store ptr [[ST1]], ptr [[TMP16]], align 4
-// CHECK-USE-I386-NEXT:    [[TMP18:%.*]] = getelementptr inbounds [7 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK-USE-I386-NEXT:    store ptr [[I]], ptr [[TMP18]], align 4
-// CHECK-USE-I386-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [7 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK-USE-I386-NEXT:    store i64 [[TMP7]], ptr [[TMP20]], align 4
-// CHECK-USE-I386-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [7 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0
-// CHECK-USE-I386-NEXT:    store ptr null, ptr [[TMP21]], align 4
-// CHECK-USE-I386-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [7 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
-// CHECK-USE-I386-NEXT:    store ptr [[ST1]], ptr [[TMP22]], align 4
-// CHECK-USE-I386-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [7 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 1
-// CHECK-USE-I386-NEXT:    store ptr [[I]], ptr [[TMP24]], align 4
-// CHECK-USE-I386-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [7 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1
-// CHECK-USE-I386-NEXT:    store ptr null, ptr [[TMP26]], align 4
-// CHECK-USE-I386-NEXT:    [[TMP27:%.*]] = getelementptr inbounds [7 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
-// CHECK-USE-I386-NEXT:    store ptr [[ST1]], ptr [[TMP27]], align 4
-// CHECK-USE-I386-NEXT:    [[TMP29:%.*]] = getelementptr inbounds [7 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 2
-// CHECK-USE-I386-NEXT:    store ptr [[J]], ptr [[TMP29]], align 4
-// CHECK-USE-I386-NEXT:    [[TMP31:%.*]] = getelementptr inbounds [7 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 2
-// CHECK-USE-I386-NEXT:    store ptr null, ptr [[TMP31]], align 4
-// CHECK-USE-I386-NEXT:    [[TMP32:%.*]] = getelementptr inbounds [7 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3
-// CHECK-USE-I386-NEXT:    store ptr [[A]], ptr [[TMP32]], align 4
-// CHECK-USE-I386-NEXT:    [[TMP34:%.*]] = getelementptr inbounds [7 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 3
-// CHECK-USE-I386-NEXT:    store ptr [[A]], ptr [[TMP34]], align 4
-// CHECK-USE-I386-NEXT:    [[TMP36:%.*]] = getelementptr inbounds [7 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 3
-// CHECK-USE-I386-NEXT:    store ptr null, ptr [[TMP36]], align 4
-// CHECK-USE-I386-NEXT:    [[TMP37:%.*]] = getelementptr inbounds [7 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 4
-// CHECK-USE-I386-NEXT:    store ptr [[ST2]], ptr [[TMP37]], align 4
-// CHECK-USE-I386-NEXT:    [[TMP39:%.*]] = getelementptr inbounds [7 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 4
-// CHECK-USE-I386-NEXT:    store ptr [[I1]], ptr [[TMP39]], align 4
-// CHECK-USE-I386-NEXT:    [[TMP41:%.*]] = getelementptr inbounds [7 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 4
-// CHECK-USE-I386-NEXT:    store i64 [[TMP14]], ptr [[TMP41]], align 4
-// CHECK-USE-I386-NEXT:    [[TMP42:%.*]] = getelementptr inbounds [7 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 4
-// CHECK-USE-I386-NEXT:    store ptr null, ptr [[TMP42]], align 4
-// CHECK-USE-I386-NEXT:    [[TMP43:%.*]] = getelementptr inbounds [7 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 5
-// CHECK-USE-I386-NEXT:    store ptr [[ST2]], ptr [[TMP43]], align 4
-// CHECK-USE-I386-NEXT:    [[TMP45:%.*]] = getelementptr inbounds [7 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 5
-// CHECK-USE-I386-NEXT:    store ptr [[I1]], ptr [[TMP45]], align 4
-// CHECK-USE-I386-NEXT:    [[TMP47:%.*]] = getelementptr inbounds [7 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 5
-// CHECK-USE-I386-NEXT:    store ptr null, ptr [[TMP47]], align 4
-// CHECK-USE-I386-NEXT:    [[TMP48:%.*]] = getelementptr inbounds [7 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 6
-// CHECK-USE-I386-NEXT:    store ptr [[ST2]], ptr [[TMP48]], align 4
-// CHECK-USE-I386-NEXT:    [[TMP50:%.*]] = getelementptr inbounds [7 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 6
-// CHECK-USE-I386-NEXT:    store ptr [[J2]], ptr [[TMP50]], align 4
-// CHECK-USE-I386-NEXT:    [[TMP52:%.*]] = getelementptr inbounds [7 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 6
-// CHECK-USE-I386-NEXT:    store ptr null, ptr [[TMP52]], align 4
-// CHECK-USE-I386-NEXT:    [[TMP53:%.*]] = getelementptr inbounds [7 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK-USE-I386-NEXT:    [[TMP54:%.*]] = getelementptr inbounds [7 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK-USE-I386-NEXT:    [[TMP55:%.*]] = getelementptr inbounds [7 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK-USE-I386-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
-// CHECK-USE-I386-NEXT:    [[TMP56:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
-// CHECK-USE-I386-NEXT:    store i32 2, ptr [[TMP56]], align 4
-// CHECK-USE-I386-NEXT:    [[TMP57:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
-// CHECK-USE-I386-NEXT:    store i32 7, ptr [[TMP57]], align 4
-// CHECK-USE-I386-NEXT:    [[TMP58:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2
-// CHECK-USE-I386-NEXT:    store ptr [[TMP53]], ptr [[TMP58]], align 4
-// CHECK-USE-I386-NEXT:    [[TMP59:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3
-// CHECK-USE-I386-NEXT:    store ptr [[TMP54]], ptr [[TMP59]], align 4
-// CHECK-USE-I386-NEXT:    [[TMP60:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4
-// CHECK-USE-I386-NEXT:    store ptr [[TMP55]], ptr [[TMP60]], align 4
-// CHECK-USE-I386-NEXT:    [[TMP61:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5
-// CHECK-USE-I386-NEXT:    store ptr @.offload_maptypes, ptr [[TMP61]], align 4
-// CHECK-USE-I386-NEXT:    [[TMP62:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6
-// CHECK-USE-I386-NEXT:    store ptr null, ptr [[TMP62]], align 4
-// CHECK-USE-I386-NEXT:    [[TMP63:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7
-// CHECK-USE-I386-NEXT:    store ptr null, ptr [[TMP63]], align 4
-// CHECK-USE-I386-NEXT:    [[TMP64:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8
-// CHECK-USE-I386-NEXT:    store i64 0, ptr [[TMP64]], align 8
-// CHECK-USE-I386-NEXT:    [[TMP65:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 9
-// CHECK-USE-I386-NEXT:    store i64 0, ptr [[TMP65]], align 8
-// CHECK-USE-I386-NEXT:    [[TMP66:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 10
-// CHECK-USE-I386-NEXT:    store [3 x i32] [i32 -1, i32 0, i32 0], ptr [[TMP66]], align 4
-// CHECK-USE-I386-NEXT:    [[TMP67:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 11
-// CHECK-USE-I386-NEXT:    store [3 x i32] zeroinitializer, ptr [[TMP67]], align 4
-// CHECK-USE-I386-NEXT:    [[TMP68:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 12
-// CHECK-USE-I386-NEXT:    store i32 0, ptr [[TMP68]], align 4
-// CHECK-USE-I386-NEXT:    [[TMP69:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1:[0-9]+]], i64 -1, i32 -1, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z20explicit_maps_singlei_l{{[0-9]*}}.region_id, ptr [[KERNEL_ARGS]])
-// CHECK-USE-I386-NEXT:    [[TMP70:%.*]] = icmp ne i32 [[TMP69]], 0
-// CHECK-USE-I386-NEXT:    br i1 [[TMP70]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
+// CHECK-USE-I386-NEXT:    [[TMP11:%.*]] = getelementptr inbounds [7 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK-USE-I386-NEXT:    store ptr [[ST1]], ptr [[TMP11]], align 4
+// CHECK-USE-I386-NEXT:    [[TMP12:%.*]] = getelementptr inbounds [7 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK-USE-I386-NEXT:    store ptr [[I]], ptr [[TMP12]], align 4
+// CHECK-USE-I386-NEXT:    [[TMP13:%.*]] = getelementptr inbounds [7 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 0
+// CHECK-USE-I386-NEXT:    store i64 [[TMP5]], ptr [[TMP13]], align 4
+// CHECK-USE-I386-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [7 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0
+// CHECK-USE-I386-NEXT:    store ptr null, ptr [[TMP14]], align 4
+// CHECK-USE-I386-NEXT:    [[TMP15:%.*]] = getelementptr inbounds [7 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
+// CHECK-USE-I386-NEXT:    store ptr [[ST1]], ptr [[TMP15]], align 4
+// CHECK-USE-I386-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [7 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 1
+// CHECK-USE-I386-NEXT:    store ptr [[I]], ptr [[TMP16]], align 4
+// CHECK-USE-I386-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [7 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1
+// CHECK-USE-I386-NEXT:    store ptr null, ptr [[TMP17]], align 4
+// CHECK-USE-I386-NEXT:    [[TMP18:%.*]] = getelementptr inbounds [7 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
+// CHECK-USE-I386-NEXT:    store ptr [[ST1]], ptr [[TMP18]], align 4
+// CHECK-USE-I386-NEXT:    [[TMP19:%.*]] = getelementptr inbounds [7 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 2
+// CHECK-USE-I386-NEXT:    store ptr [[J]], ptr [[TMP19]], align 4
+// CHECK-USE-I386-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [7 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 2
+// CHECK-USE-I386-NEXT:    store ptr null, ptr [[TMP20]], align 4
+// CHECK-USE-I386-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [7 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3
+// CHECK-USE-I386-NEXT:    store ptr [[A]], ptr [[TMP21]], align 4
+// CHECK-USE-I386-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [7 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 3
+// CHECK-USE-I386-NEXT:    store ptr [[A]], ptr [[TMP22]], align 4
+// CHECK-USE-I386-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [7 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 3
+// CHECK-USE-I386-NEXT:    store ptr null, ptr [[TMP23]], align 4
+// CHECK-USE-I386-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [7 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 4
+// CHECK-USE-I386-NEXT:    store ptr [[ST2]], ptr [[TMP24]], align 4
+// CHECK-USE-I386-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [7 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 4
+// CHECK-USE-I386-NEXT:    store ptr [[I1]], ptr [[TMP25]], align 4
+// CHECK-USE-I386-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [7 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 4
+// CHECK-USE-I386-NEXT:    store i64 [[TMP10]], ptr [[TMP26]], align 4
+// CHECK-USE-I386-NEXT:    [[TMP27:%.*]] = getelementptr inbounds [7 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 4
+// CHECK-USE-I386-NEXT:    store ptr null, ptr [[TMP27]], align 4
+// CHECK-USE-I386-NEXT:    [[TMP28:%.*]] = getelementptr inbounds [7 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 5
+// CHECK-USE-I386-NEXT:    store ptr [[ST2]], ptr [[TMP28]], align 4
+// CHECK-USE-I386-NEXT:    [[TMP29:%.*]] = getelementptr inbounds [7 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 5
+// CHECK-USE-I386-NEXT:    store ptr [[I1]], ptr [[TMP29]], align 4
+// CHECK-USE-I386-NEXT:    [[TMP30:%.*]] = getelementptr inbounds [7 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 5
+// CHECK-USE-I386-NEXT:    store ptr null, ptr [[TMP30]], align 4
+// CHECK-USE-I386-NEXT:    [[TMP31:%.*]] = getelementptr inbounds [7 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 6
+// CHECK-USE-I386-NEXT:    store ptr [[ST2]], ptr [[TMP31]], align 4
+// CHECK-USE-I386-NEXT:    [[TMP32:%.*]] = getelementptr inbounds [7 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 6
+// CHECK-USE-I386-NEXT:    store ptr [[J2]], ptr [[TMP32]], align 4
+// CHECK-USE-I386-NEXT:    [[TMP33:%.*]] = getelementptr inbounds [7 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 6
+// CHECK-USE-I386-NEXT:    store ptr null, ptr [[TMP33]], align 4
+// CHECK-USE-I386-NEXT:    [[TMP34:%.*]] = getelementptr inbounds [7 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK-USE-I386-NEXT:    [[TMP35:%.*]] = getelementptr inbounds [7 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK-USE-I386-NEXT:    [[TMP36:%.*]] = getelementptr inbounds [7 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 0
+// CHECK-USE-I386-NEXT:    [[TMP37:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
+// CHECK-USE-I386-NEXT:    store i32 2, ptr [[TMP37]], align 4
+// CHECK-USE-I386-NEXT:    [[TMP38:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
+// CHECK-USE-I386-NEXT:    store i32 7, ptr [[TMP38]], align 4
+// CHECK-USE-I386-NEXT:    [[TMP39:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2
+// CHECK-USE-I386-NEXT:    store ptr [[TMP34]], ptr [[TMP39]], align 4
+// CHECK-USE-I386-NEXT:    [[TMP40:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3
+// CHECK-USE-I386-NEXT:    store ptr [[TMP35]], ptr [[TMP40]], align 4
+// CHECK-USE-I386-NEXT:    [[TMP41:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4
+// CHECK-USE-I386-NEXT:    store ptr [[TMP36]], ptr [[TMP41]], align 4
+// CHECK-USE-I386-NEXT:    [[TMP42:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5
+// CHECK-USE-I386-NEXT:    store ptr @.offload_maptypes, ptr [[TMP42]], align 4
+// CHECK-USE-I386-NEXT:    [[TMP43:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6
+// CHECK-USE-I386-NEXT:    store ptr null, ptr [[TMP43]], align 4
+// CHECK-USE-I386-NEXT:    [[TMP44:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7
+// CHECK-USE-I386-NEXT:    store ptr null, ptr [[TMP44]], align 4
+// CHECK-USE-I386-NEXT:    [[TMP45:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8
+// CHECK-USE-I386-NEXT:    store i64 0, ptr [[TMP45]], align 8
+// CHECK-USE-I386-NEXT:    [[TMP46:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 9
+// CHECK-USE-I386-NEXT:    store i64 0, ptr [[TMP46]], align 8
+// CHECK-USE-I386-NEXT:    [[TMP47:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 10
+// CHECK-USE-I386-NEXT:    store [3 x i32] [i32 -1, i32 0, i32 0], ptr [[TMP47]], align 4
+// CHECK-USE-I386-NEXT:    [[TMP48:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 11
+// CHECK-USE-I386-NEXT:    store [3 x i32] zeroinitializer, ptr [[TMP48]], align 4
+// CHECK-USE-I386-NEXT:    [[TMP49:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 12
+// CHECK-USE-I386-NEXT:    store i32 0, ptr [[TMP49]], align 4
+// CHECK-USE-I386-NEXT:    [[TMP50:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1:[0-9]+]], i64 -1, i32 -1, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z20explicit_maps_singlei_l100.region_id, ptr [[KERNEL_ARGS]])
+// CHECK-USE-I386-NEXT:    [[TMP51:%.*]] = icmp ne i32 [[TMP50]], 0
+// CHECK-USE-I386-NEXT:    br i1 [[TMP51]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK-USE-I386:       omp_offload.failed:
-// CHECK-USE-I386-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z20explicit_maps_singlei_l{{[0-9]*}}(ptr [[ST1]], ptr [[A]], ptr [[ST2]]) #[[ATTR3:[0-9]+]]
+// CHECK-USE-I386-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z20explicit_maps_singlei_l100(ptr [[ST1]], ptr [[A]], ptr [[ST2]]) #[[ATTR3:[0-9]+]]
 // CHECK-USE-I386-NEXT:    br label [[OMP_OFFLOAD_CONT]]
 // CHECK-USE-I386:       omp_offload.cont:
-// CHECK-USE-I386-NEXT:    [[TMP71:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS3]], i32 0, i32 0
-// CHECK-USE-I386-NEXT:    store ptr [[A]], ptr [[TMP71]], align 4
-// CHECK-USE-I386-NEXT:    [[TMP73:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS4]], i32 0, i32 0
-// CHECK-USE-I386-NEXT:    store ptr [[A]], ptr [[TMP73]], align 4
-// CHECK-USE-I386-NEXT:    [[TMP75:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS5]], i32 0, i32 0
-// CHECK-USE-I386-NEXT:    store ptr null, ptr [[TMP75]], align 4
-// CHECK-USE-I386-NEXT:    [[TMP76:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS3]], i32 0, i32 0
-// CHECK-USE-I386-NEXT:    [[TMP77:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS4]], i32 0, i32 0
-// CHECK-USE-I386-NEXT:    [[KERNEL_ARGS6:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
-// CHECK-USE-I386-NEXT:    [[TMP78:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 0
-// CHECK-USE-I386-NEXT:    store i32 2, ptr [[TMP78]], align 4
-// CHECK-USE-I386-NEXT:    [[TMP79:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 1
-// CHECK-USE-I386-NEXT:    store i32 1, ptr [[TMP79]], align 4
-// CHECK-USE-I386-NEXT:    [[TMP80:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 2
-// CHECK-USE-I386-NEXT:    store ptr [[TMP76]], ptr [[TMP80]], align 4
-// CHECK-USE-I386-NEXT:    [[TMP81:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 3
-// CHECK-USE-I386-NEXT:    store ptr [[TMP77]], ptr [[TMP81]], align 4
-// CHECK-USE-I386-NEXT:    [[TMP82:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 4
-// CHECK-USE-I386-NEXT:    store ptr @.offload_sizes.1, ptr [[TMP82]], align 4
-// CHECK-USE-I386-NEXT:    [[TMP83:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 5
-// CHECK-USE-I386-NEXT:    store ptr @.offload_maptypes.2, ptr [[TMP83]], align 4
-// CHECK-USE-I386-NEXT:    [[TMP84:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 6
-// CHECK-USE-I386-NEXT:    store ptr null, ptr [[TMP84]], align 4
-// CHECK-USE-I386-NEXT:    [[TMP85:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 7
-// CHECK-USE-I386-NEXT:    store ptr null, ptr [[TMP85]], align 4
-// CHECK-USE-I386-NEXT:    [[TMP86:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 8
-// CHECK-USE-I386-NEXT:    store i64 0, ptr [[TMP86]], align 8
-// CHECK-USE-I386-NEXT:    [[TMP87:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 9
-// CHECK-USE-I386-NEXT:    store i64 0, ptr [[TMP87]], align 8
-// CHECK-USE-I386-NEXT:    [[TMP88:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 10
-// CHECK-USE-I386-NEXT:    store [3 x i32] [i32 -1, i32 0, i32 0], ptr [[TMP88]], align 4
-// CHECK-USE-I386-NEXT:    [[TMP89:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 11
-// CHECK-USE-I386-NEXT:    store [3 x i32] zeroinitializer, ptr [[TMP89]], align 4
-// CHECK-USE-I386-NEXT:    [[TMP90:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 12
-// CHECK-USE-I386-NEXT:    store i32 0, ptr [[TMP90]], align 4
-// CHECK-USE-I386-NEXT:    [[TMP91:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 -1, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z20explicit_maps_singlei_l{{[0-9]*}}.region_id, ptr [[KERNEL_ARGS6]])
-// CHECK-USE-I386-NEXT:    [[TMP92:%.*]] = icmp ne i32 [[TMP91]], 0
-// CHECK-USE-I386-NEXT:    br i1 [[TMP92]], label [[OMP_OFFLOAD_FAILED7:%.*]], label [[OMP_OFFLOAD_CONT8:%.*]]
+// CHECK-USE-I386-NEXT:    [[TMP52:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS3]], i32 0, i32 0
+// CHECK-USE-I386-NEXT:    store ptr [[A]], ptr [[TMP52]], align 4
+// CHECK-USE-I386-NEXT:    [[TMP53:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS4]], i32 0, i32 0
+// CHECK-USE-I386-NEXT:    store ptr [[A]], ptr [[TMP53]], align 4
+// CHECK-USE-I386-NEXT:    [[TMP54:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS5]], i32 0, i32 0
+// CHECK-USE-I386-NEXT:    store ptr null, ptr [[TMP54]], align 4
+// CHECK-USE-I386-NEXT:    [[TMP55:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS3]], i32 0, i32 0
+// CHECK-USE-I386-NEXT:    [[TMP56:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS4]], i32 0, i32 0
+// CHECK-USE-I386-NEXT:    [[TMP57:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 0
+// CHECK-USE-I386-NEXT:    store i32 2, ptr [[TMP57]], align 4
+// CHECK-USE-I386-NEXT:    [[TMP58:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 1
+// CHECK-USE-I386-NEXT:    store i32 1, ptr [[TMP58]], align 4
+// CHECK-USE-I386-NEXT:    [[TMP59:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 2
+// CHECK-USE-I386-NEXT:    store ptr [[TMP55]], ptr [[TMP59]], align 4
+// CHECK-USE-I386-NEXT:    [[TMP60:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 3
+// CHECK-USE-I386-NEXT:    store ptr [[TMP56]], ptr [[TMP60]], align 4
+// CHECK-USE-I386-NEXT:    [[TMP61:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 4
+// CHECK-USE-I386-NEXT:    store ptr @.offload_sizes.1, ptr [[TMP61]], align 4
+// CHECK-USE-I386-NEXT:    [[TMP62:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 5
+// CHECK-USE-I386-NEXT:    store ptr @.offload_maptypes.2, ptr [[TMP62]], align 4
+// CHECK-USE-I386-NEXT:    [[TMP63:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 6
+// CHECK-USE-I386-NEXT:    store ptr null, ptr [[TMP63]], align 4
+// CHECK-USE-I386-NEXT:    [[TMP64:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 7
+// CHECK-USE-I386-NEXT:    store ptr null, ptr [[TMP64]], align 4
+// CHECK-USE-I386-NEXT:    [[TMP65:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 8
+// CHECK-USE-I386-NEXT:    store i64 0, ptr [[TMP65]], align 8
+// CHECK-USE-I386-NEXT:    [[TMP66:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 9
+// CHECK-USE-I386-NEXT:    store i64 0, ptr [[TMP66]], align 8
+// CHECK-USE-I386-NEXT:    [[TMP67:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 10
+// CHECK-USE-I386-NEXT:    store [3 x i32] [i32 -1, i32 0, i32 0], ptr [[TMP67]], align 4
+// CHECK-USE-I386-NEXT:    [[TMP68:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 11
+// CHECK-USE-I386-NEXT:    store [3 x i32] zeroinitializer, ptr [[TMP68]], align 4
+// CHECK-USE-I386-NEXT:    [[TMP69:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 12
+// CHECK-USE-I386-NEXT:    store i32 0, ptr [[TMP69]], align 4
+// CHECK-USE-I386-NEXT:    [[TMP70:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 -1, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z20explicit_maps_singlei_l114.region_id, ptr [[KERNEL_ARGS6]])
+// CHECK-USE-I386-NEXT:    [[TMP71:%.*]] = icmp ne i32 [[TMP70]], 0
+// CHECK-USE-I386-NEXT:    br i1 [[TMP71]], label [[OMP_OFFLOAD_FAILED7:%.*]], label [[OMP_OFFLOAD_CONT8:%.*]]
 // CHECK-USE-I386:       omp_offload.failed7:
-// CHECK-USE-I386-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z20explicit_maps_singlei_l{{[0-9]*}}(ptr [[A]]) #[[ATTR3]]
+// CHECK-USE-I386-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z20explicit_maps_singlei_l114(ptr [[A]]) #[[ATTR3]]
 // CHECK-USE-I386-NEXT:    br label [[OMP_OFFLOAD_CONT8]]
 // CHECK-USE-I386:       omp_offload.cont8:
 // CHECK-USE-I386-NEXT:    ret void
 //
 //
-// CHECK-USE-I386-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z20explicit_maps_singlei_l{{[0-9]*}}
+// CHECK-USE-I386-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z20explicit_maps_singlei_l100
 // CHECK-USE-I386-SAME: (ptr noundef nonnull align 4 dereferenceable(8) [[ST1:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[ST2:%.*]]) #[[ATTR1:[0-9]+]] {
 // CHECK-USE-I386-NEXT:  entry:
 // CHECK-USE-I386-NEXT:    [[ST1_ADDR:%.*]] = alloca ptr, align 4
@@ -664,7 +663,7 @@ void ST::test_present_members() {
 // CHECK-USE-I386-NEXT:    ret void
 //
 //
-// CHECK-USE-I386-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z20explicit_maps_singlei_l{{[0-9]*}}
+// CHECK-USE-I386-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z20explicit_maps_singlei_l114
 // CHECK-USE-I386-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR1]] {
 // CHECK-USE-I386-NEXT:  entry:
 // CHECK-USE-I386-NEXT:    [[A_ADDR:%.*]] = alloca ptr, align 4
@@ -684,77 +683,77 @@ void ST::test_present_members() {
 // CHECK-USE-I386-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [3 x ptr], align 4
 // CHECK-USE-I386-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [3 x ptr], align 4
 // CHECK-USE-I386-NEXT:    [[DOTOFFLOAD_SIZES:%.*]] = alloca [3 x i64], align 4
+// CHECK-USE-I386-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK-USE-I386-NEXT:    store ptr [[THIS]], ptr [[THIS_ADDR]], align 4
 // CHECK-USE-I386-NEXT:    [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4
 // CHECK-USE-I386-NEXT:    [[I:%.*]] = getelementptr inbounds [[STRUCT_ST:%.*]], ptr [[THIS1]], i32 0, i32 0
 // CHECK-USE-I386-NEXT:    [[J:%.*]] = getelementptr inbounds [[STRUCT_ST]], ptr [[THIS1]], i32 0, i32 1
 // CHECK-USE-I386-NEXT:    [[TMP0:%.*]] = getelementptr i32, ptr [[J]], i32 1
-// CHECK-USE-I386-NEXT:    [[TMP3:%.*]] = ptrtoint ptr [[TMP0]] to i64
-// CHECK-USE-I386-NEXT:    [[TMP4:%.*]] = ptrtoint ptr [[I]] to i64
-// CHECK-USE-I386-NEXT:    [[TMP5:%.*]] = sub i64 [[TMP3]], [[TMP4]]
-// CHECK-USE-I386-NEXT:    [[TMP6:%.*]] = sdiv exact i64 [[TMP5]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64)
+// CHECK-USE-I386-NEXT:    [[TMP1:%.*]] = ptrtoint ptr [[TMP0]] to i64
+// CHECK-USE-I386-NEXT:    [[TMP2:%.*]] = ptrtoint ptr [[I]] to i64
+// CHECK-USE-I386-NEXT:    [[TMP3:%.*]] = sub i64 [[TMP1]], [[TMP2]]
+// CHECK-USE-I386-NEXT:    [[TMP4:%.*]] = sdiv exact i64 [[TMP3]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64)
 // CHECK-USE-I386-NEXT:    call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[DOTOFFLOAD_SIZES]], ptr align 4 @.offload_sizes.3, i32 24, i1 false)
-// CHECK-USE-I386-NEXT:    [[TMP8:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK-USE-I386-NEXT:    store ptr [[THIS1]], ptr [[TMP8]], align 4
-// CHECK-USE-I386-NEXT:    [[TMP10:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK-USE-I386-NEXT:    [[TMP5:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK-USE-I386-NEXT:    store ptr [[THIS1]], ptr [[TMP5]], align 4
+// CHECK-USE-I386-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK-USE-I386-NEXT:    store ptr [[I]], ptr [[TMP6]], align 4
+// CHECK-USE-I386-NEXT:    [[TMP7:%.*]] = getelementptr inbounds [3 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 0
+// CHECK-USE-I386-NEXT:    store i64 [[TMP4]], ptr [[TMP7]], align 4
+// CHECK-USE-I386-NEXT:    [[TMP8:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0
+// CHECK-USE-I386-NEXT:    store ptr null, ptr [[TMP8]], align 4
+// CHECK-USE-I386-NEXT:    [[TMP9:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
+// CHECK-USE-I386-NEXT:    store ptr [[THIS1]], ptr [[TMP9]], align 4
+// CHECK-USE-I386-NEXT:    [[TMP10:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 1
 // CHECK-USE-I386-NEXT:    store ptr [[I]], ptr [[TMP10]], align 4
-// CHECK-USE-I386-NEXT:    [[TMP12:%.*]] = getelementptr inbounds [3 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK-USE-I386-NEXT:    store i64 [[TMP6]], ptr [[TMP12]], align 4
-// CHECK-USE-I386-NEXT:    [[TMP13:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0
-// CHECK-USE-I386-NEXT:    store ptr null, ptr [[TMP13]], align 4
-// CHECK-USE-I386-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
-// CHECK-USE-I386-NEXT:    store ptr [[THIS1]], ptr [[TMP14]], align 4
-// CHECK-USE-I386-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 1
-// CHECK-USE-I386-NEXT:    store ptr [[I]], ptr [[TMP16]], align 4
-// CHECK-USE-I386-NEXT:    [[TMP18:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1
-// CHECK-USE-I386-NEXT:    store ptr null, ptr [[TMP18]], align 4
-// CHECK-USE-I386-NEXT:    [[TMP19:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
-// CHECK-USE-I386-NEXT:    store ptr [[THIS1]], ptr [[TMP19]], align 4
-// CHECK-USE-I386-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 2
-// CHECK-USE-I386-NEXT:    store ptr [[J]], ptr [[TMP21]], align 4
-// CHECK-USE-I386-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 2
-// CHECK-USE-I386-NEXT:    store ptr null, ptr [[TMP23]], align 4
-// CHECK-USE-I386-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK-USE-I386-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK-USE-I386-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [3 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK-USE-I386-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
-// CHECK-USE-I386-NEXT:    [[TMP27:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
-// CHECK-USE-I386-NEXT:    store i32 2, ptr [[TMP27]], align 4
-// CHECK-USE-I386-NEXT:    [[TMP28:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
-// CHECK-USE-I386-NEXT:    store i32 3, ptr [[TMP28]], align 4
-// CHECK-USE-I386-NEXT:    [[TMP29:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2
-// CHECK-USE-I386-NEXT:    store ptr [[TMP24]], ptr [[TMP29]], align 4
-// CHECK-USE-I386-NEXT:    [[TMP30:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3
-// CHECK-USE-I386-NEXT:    store ptr [[TMP25]], ptr [[TMP30]], align 4
-// CHECK-USE-I386-NEXT:    [[TMP31:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4
-// CHECK-USE-I386-NEXT:    store ptr [[TMP26]], ptr [[TMP31]], align 4
-// CHECK-USE-I386-NEXT:    [[TMP32:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5
-// CHECK-USE-I386-NEXT:    store ptr @.offload_maptypes.4, ptr [[TMP32]], align 4
-// CHECK-USE-I386-NEXT:    [[TMP33:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6
-// CHECK-USE-I386-NEXT:    store ptr null, ptr [[TMP33]], align 4
-// CHECK-USE-I386-NEXT:    [[TMP34:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7
-// CHECK-USE-I386-NEXT:    store ptr null, ptr [[TMP34]], align 4
-// CHECK-USE-I386-NEXT:    [[TMP35:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8
-// CHECK-USE-I386-NEXT:    store i64 0, ptr [[TMP35]], align 8
-// CHECK-USE-I386-NEXT:    [[TMP36:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 9
-// CHECK-USE-I386-NEXT:    store i64 0, ptr [[TMP36]], align 8
-// CHECK-USE-I386-NEXT:    [[TMP37:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 10
-// CHECK-USE-I386-NEXT:    store [3 x i32] [i32 -1, i32 0, i32 0], ptr [[TMP37]], align 4
-// CHECK-USE-I386-NEXT:    [[TMP38:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 11
-// CHECK-USE-I386-NEXT:    store [3 x i32] zeroinitializer, ptr [[TMP38]], align 4
-// CHECK-USE-I386-NEXT:    [[TMP39:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 12
-// CHECK-USE-I386-NEXT:    store i32 0, ptr [[TMP39]], align 4
-// CHECK-USE-I386-NEXT:    [[TMP40:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 -1, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2ST20test_present_membersEv_l{{[0-9]*}}.region_id, ptr [[KERNEL_ARGS]])
-// CHECK-USE-I386-NEXT:    [[TMP41:%.*]] = icmp ne i32 [[TMP40]], 0
-// CHECK-USE-I386-NEXT:    br i1 [[TMP41]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
+// CHECK-USE-I386-NEXT:    [[TMP11:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1
+// CHECK-USE-I386-NEXT:    store ptr null, ptr [[TMP11]], align 4
+// CHECK-USE-I386-NEXT:    [[TMP12:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
+// CHECK-USE-I386-NEXT:    store ptr [[THIS1]], ptr [[TMP12]], align 4
+// CHECK-USE-I386-NEXT:    [[TMP13:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 2
+// CHECK-USE-I386-NEXT:    store ptr [[J]], ptr [[TMP13]], align 4
+// CHECK-USE-I386-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 2
+// CHECK-USE-I386-NEXT:    store ptr null, ptr [[TMP14]], align 4
+// CHECK-USE-I386-NEXT:    [[TMP15:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK-USE-I386-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK-USE-I386-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [3 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 0
+// CHECK-USE-I386-NEXT:    [[TMP18:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
+// CHECK-USE-I386-NEXT:    store i32 2, ptr [[TMP18]], align 4
+// CHECK-USE-I386-NEXT:    [[TMP19:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
+// CHECK-USE-I386-NEXT:    store i32 3, ptr [[TMP19]], align 4
+// CHECK-USE-I386-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2
+// CHECK-USE-I386-NEXT:    store ptr [[TMP15]], ptr [[TMP20]], align 4
+// CHECK-USE-I386-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3
+// CHECK-USE-I386-NEXT:    store ptr [[TMP16]], ptr [[TMP21]], align 4
+// CHECK-USE-I386-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4
+// CHECK-USE-I386-NEXT:    store ptr [[TMP17]], ptr [[TMP22]], align 4
+// CHECK-USE-I386-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5
+// CHECK-USE-I386-NEXT:    store ptr @.offload_maptypes.4, ptr [[TMP23]], align 4
+// CHECK-USE-I386-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6
+// CHECK-USE-I386-NEXT:    store ptr null, ptr [[TMP24]], align 4
+// CHECK-USE-I386-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7
+// CHECK-USE-I386-NEXT:    store ptr null, ptr [[TMP25]], align 4
+// CHECK-USE-I386-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8
+// CHECK-USE-I386-NEXT:    store i64 0, ptr [[TMP26]], align 8
+// CHECK-USE-I386-NEXT:    [[TMP27:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 9
+// CHECK-USE-I386-NEXT:    store i64 0, ptr [[TMP27]], align 8
+// CHECK-USE-I386-NEXT:    [[TMP28:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 10
+// CHECK-USE-I386-NEXT:    store [3 x i32] [i32 -1, i32 0, i32 0], ptr [[TMP28]], align 4
+// CHECK-USE-I386-NEXT:    [[TMP29:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 11
+// CHECK-USE-I386-NEXT:    store [3 x i32] zeroinitializer, ptr [[TMP29]], align 4
+// CHECK-USE-I386-NEXT:    [[TMP30:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 12
+// CHECK-USE-I386-NEXT:    store i32 0, ptr [[TMP30]], align 4
+// CHECK-USE-I386-NEXT:    [[TMP31:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 -1, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2ST20test_present_membersEv_l125.region_id, ptr [[KERNEL_ARGS]])
+// CHECK-USE-I386-NEXT:    [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0
+// CHECK-USE-I386-NEXT:    br i1 [[TMP32]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK-USE-I386:       omp_offload.failed:
-// CHECK-USE-I386-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2ST20test_present_membersEv_l{{[0-9]*}}(ptr [[THIS1]]) #[[ATTR3]]
+// CHECK-USE-I386-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2ST20test_present_membersEv_l125(ptr [[THIS1]]) #[[ATTR3]]
 // CHECK-USE-I386-NEXT:    br label [[OMP_OFFLOAD_CONT]]
 // CHECK-USE-I386:       omp_offload.cont:
 // CHECK-USE-I386-NEXT:    ret void
 //
 //
-// CHECK-USE-I386-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2ST20test_present_membersEv_l{{[0-9]*}}
+// CHECK-USE-I386-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2ST20test_present_membersEv_l125
 // CHECK-USE-I386-SAME: (ptr noundef [[THIS:%.*]]) #[[ATTR1]] {
 // CHECK-USE-I386-NEXT:  entry:
 // CHECK-USE-I386-NEXT:    [[THIS_ADDR:%.*]] = alloca ptr, align 4
@@ -789,162 +788,162 @@ void ST::test_present_members() {
 // CHECK-NOUSE-PPC64LE-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [7 x ptr], align 8
 // CHECK-NOUSE-PPC64LE-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [7 x ptr], align 8
 // CHECK-NOUSE-PPC64LE-NEXT:    [[DOTOFFLOAD_SIZES:%.*]] = alloca [7 x i64], align 8
+// CHECK-NOUSE-PPC64LE-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK-NOUSE-PPC64LE-NEXT:    [[DOTOFFLOAD_BASEPTRS3:%.*]] = alloca [1 x ptr], align 8
 // CHECK-NOUSE-PPC64LE-NEXT:    [[DOTOFFLOAD_PTRS4:%.*]] = alloca [1 x ptr], align 8
 // CHECK-NOUSE-PPC64LE-NEXT:    [[DOTOFFLOAD_MAPPERS5:%.*]] = alloca [1 x ptr], align 8
+// CHECK-NOUSE-PPC64LE-NEXT:    [[KERNEL_ARGS6:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK-NOUSE-PPC64LE-NEXT:    store i32 [[II]], ptr [[II_ADDR]], align 4
 // CHECK-NOUSE-PPC64LE-NEXT:    [[TMP0:%.*]] = load i32, ptr [[II_ADDR]], align 4
 // CHECK-NOUSE-PPC64LE-NEXT:    store i32 [[TMP0]], ptr [[A]], align 4
 // CHECK-NOUSE-PPC64LE-NEXT:    [[I:%.*]] = getelementptr inbounds [[STRUCT_ST]], ptr [[ST1]], i32 0, i32 0
 // CHECK-NOUSE-PPC64LE-NEXT:    [[J:%.*]] = getelementptr inbounds [[STRUCT_ST]], ptr [[ST1]], i32 0, i32 1
 // CHECK-NOUSE-PPC64LE-NEXT:    [[TMP1:%.*]] = getelementptr i32, ptr [[J]], i32 1
-// CHECK-NOUSE-PPC64LE-NEXT:    [[TMP4:%.*]] = ptrtoint ptr [[TMP1]] to i64
-// CHECK-NOUSE-PPC64LE-NEXT:    [[TMP5:%.*]] = ptrtoint ptr [[I]] to i64
-// CHECK-NOUSE-PPC64LE-NEXT:    [[TMP6:%.*]] = sub i64 [[TMP4]], [[TMP5]]
-// CHECK-NOUSE-PPC64LE-NEXT:    [[TMP7:%.*]] = sdiv exact i64 [[TMP6]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64)
+// CHECK-NOUSE-PPC64LE-NEXT:    [[TMP2:%.*]] = ptrtoint ptr [[TMP1]] to i64
+// CHECK-NOUSE-PPC64LE-NEXT:    [[TMP3:%.*]] = ptrtoint ptr [[I]] to i64
+// CHECK-NOUSE-PPC64LE-NEXT:    [[TMP4:%.*]] = sub i64 [[TMP2]], [[TMP3]]
+// CHECK-NOUSE-PPC64LE-NEXT:    [[TMP5:%.*]] = sdiv exact i64 [[TMP4]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64)
 // CHECK-NOUSE-PPC64LE-NEXT:    [[I1:%.*]] = getelementptr inbounds [[STRUCT_ST]], ptr [[ST2]], i32 0, i32 0
 // CHECK-NOUSE-PPC64LE-NEXT:    [[J2:%.*]] = getelementptr inbounds [[STRUCT_ST]], ptr [[ST2]], i32 0, i32 1
-// CHECK-NOUSE-PPC64LE-NEXT:    [[TMP8:%.*]] = getelementptr i32, ptr [[J2]], i32 1
-// CHECK-NOUSE-PPC64LE-NEXT:    [[TMP11:%.*]] = ptrtoint ptr [[TMP8]] to i64
-// CHECK-NOUSE-PPC64LE-NEXT:    [[TMP12:%.*]] = ptrtoint ptr [[I1]] to i64
-// CHECK-NOUSE-PPC64LE-NEXT:    [[TMP13:%.*]] = sub i64 [[TMP11]], [[TMP12]]
-// CHECK-NOUSE-PPC64LE-NEXT:    [[TMP14:%.*]] = sdiv exact i64 [[TMP13]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64)
+// CHECK-NOUSE-PPC64LE-NEXT:    [[TMP6:%.*]] = getelementptr i32, ptr [[J2]], i32 1
+// CHECK-NOUSE-PPC64LE-NEXT:    [[TMP7:%.*]] = ptrtoint ptr [[TMP6]] to i64
+// CHECK-NOUSE-PPC64LE-NEXT:    [[TMP8:%.*]] = ptrtoint ptr [[I1]] to i64
+// CHECK-NOUSE-PPC64LE-NEXT:    [[TMP9:%.*]] = sub i64 [[TMP7]], [[TMP8]]
+// CHECK-NOUSE-PPC64LE-NEXT:    [[TMP10:%.*]] = sdiv exact i64 [[TMP9]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64)
 // CHECK-NOUSE-PPC64LE-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[DOTOFFLOAD_SIZES]], ptr align 8 @.offload_sizes, i64 56, i1 false)
-// CHECK-NOUSE-PPC64LE-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [7 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK-NOUSE-PPC64LE-NEXT:    store ptr [[ST1]], ptr [[TMP16]], align 8
-// CHECK-NOUSE-PPC64LE-NEXT:    [[TMP18:%.*]] = getelementptr inbounds [7 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK-NOUSE-PPC64LE-NEXT:    store ptr [[I]], ptr [[TMP18]], align 8
-// CHECK-NOUSE-PPC64LE-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [7 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK-NOUSE-PPC64LE-NEXT:    store i64 [[TMP7]], ptr [[TMP20]], align 8
-// CHECK-NOUSE-PPC64LE-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [7 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0
-// CHECK-NOUSE-PPC64LE-NEXT:    store ptr null, ptr [[TMP21]], align 8
-// CHECK-NOUSE-PPC64LE-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [7 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
-// CHECK-NOUSE-PPC64LE-NEXT:    store ptr [[ST1]], ptr [[TMP22]], align 8
-// CHECK-NOUSE-PPC64LE-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [7 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 1
-// CHECK-NOUSE-PPC64LE-NEXT:    store ptr [[I]], ptr [[TMP24]], align 8
-// CHECK-NOUSE-PPC64LE-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [7 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1
-// CHECK-NOUSE-PPC64LE-NEXT:    store ptr null, ptr [[TMP26]], align 8
-// CHECK-NOUSE-PPC64LE-NEXT:    [[TMP27:%.*]] = getelementptr inbounds [7 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
-// CHECK-NOUSE-PPC64LE-NEXT:    store ptr [[ST1]], ptr [[TMP27]], align 8
-// CHECK-NOUSE-PPC64LE-NEXT:    [[TMP29:%.*]] = getelementptr inbounds [7 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 2
-// CHECK-NOUSE-PPC64LE-NEXT:    store ptr [[J]], ptr [[TMP29]], align 8
-// CHECK-NOUSE-PPC64LE-NEXT:    [[TMP31:%.*]] = getelementptr inbounds [7 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2
-// CHECK-NOUSE-PPC64LE-NEXT:    store ptr null, ptr [[TMP31]], align 8
-// CHECK-NOUSE-PPC64LE-NEXT:    [[TMP32:%.*]] = getelementptr inbounds [7 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3
-// CHECK-NOUSE-PPC64LE-NEXT:    store ptr [[A]], ptr [[TMP32]], align 8
-// CHECK-NOUSE-PPC64LE-NEXT:    [[TMP34:%.*]] = getelementptr inbounds [7 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 3
-// CHECK-NOUSE-PPC64LE-NEXT:    store ptr [[A]], ptr [[TMP34]], align 8
-// CHECK-NOUSE-PPC64LE-NEXT:    [[TMP36:%.*]] = getelementptr inbounds [7 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 3
-// CHECK-NOUSE-PPC64LE-NEXT:    store ptr null, ptr [[TMP36]], align 8
-// CHECK-NOUSE-PPC64LE-NEXT:    [[TMP37:%.*]] = getelementptr inbounds [7 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 4
-// CHECK-NOUSE-PPC64LE-NEXT:    store ptr [[ST2]], ptr [[TMP37]], align 8
-// CHECK-NOUSE-PPC64LE-NEXT:    [[TMP39:%.*]] = getelementptr inbounds [7 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 4
-// CHECK-NOUSE-PPC64LE-NEXT:    store ptr [[I1]], ptr [[TMP39]], align 8
-// CHECK-NOUSE-PPC64LE-NEXT:    [[TMP41:%.*]] = getelementptr inbounds [7 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 4
-// CHECK-NOUSE-PPC64LE-NEXT:    store i64 [[TMP14]], ptr [[TMP41]], align 8
-// CHECK-NOUSE-PPC64LE-NEXT:    [[TMP42:%.*]] = getelementptr inbounds [7 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 4
-// CHECK-NOUSE-PPC64LE-NEXT:    store ptr null, ptr [[TMP42]], align 8
-// CHECK-NOUSE-PPC64LE-NEXT:    [[TMP43:%.*]] = getelementptr inbounds [7 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 5
-// CHECK-NOUSE-PPC64LE-NEXT:    store ptr [[ST2]], ptr [[TMP43]], align 8
-// CHECK-NOUSE-PPC64LE-NEXT:    [[TMP45:%.*]] = getelementptr inbounds [7 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 5
-// CHECK-NOUSE-PPC64LE-NEXT:    store ptr [[I1]], ptr [[TMP45]], align 8
-// CHECK-NOUSE-PPC64LE-NEXT:    [[TMP47:%.*]] = getelementptr inbounds [7 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 5
-// CHECK-NOUSE-PPC64LE-NEXT:    store ptr null, ptr [[TMP47]], align 8
-// CHECK-NOUSE-PPC64LE-NEXT:    [[TMP48:%.*]] = getelementptr inbounds [7 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 6
-// CHECK-NOUSE-PPC64LE-NEXT:    store ptr [[ST2]], ptr [[TMP48]], align 8
-// CHECK-NOUSE-PPC64LE-NEXT:    [[TMP50:%.*]] = getelementptr inbounds [7 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 6
-// CHECK-NOUSE-PPC64LE-NEXT:    store ptr [[J2]], ptr [[TMP50]], align 8
-// CHECK-NOUSE-PPC64LE-NEXT:    [[TMP52:%.*]] = getelementptr inbounds [7 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 6
-// CHECK-NOUSE-PPC64LE-NEXT:    store ptr null, ptr [[TMP52]], align 8
-// CHECK-NOUSE-PPC64LE-NEXT:    [[TMP53:%.*]] = getelementptr inbounds [7 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK-NOUSE-PPC64LE-NEXT:    [[TMP54:%.*]] = getelementptr inbounds [7 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK-NOUSE-PPC64LE-NEXT:    [[TMP55:%.*]] = getelementptr inbounds [7 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK-NOUSE-PPC64LE-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
-// CHECK-NOUSE-PPC64LE-NEXT:    [[TMP56:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
-// CHECK-NOUSE-PPC64LE-NEXT:    store i32 2, ptr [[TMP56]], align 4
-// CHECK-NOUSE-PPC64LE-NEXT:    [[TMP57:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
-// CHECK-NOUSE-PPC64LE-NEXT:    store i32 7, ptr [[TMP57]], align 4
-// CHECK-NOUSE-PPC64LE-NEXT:    [[TMP58:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2
-// CHECK-NOUSE-PPC64LE-NEXT:    store ptr [[TMP53]], ptr [[TMP58]], align 8
-// CHECK-NOUSE-PPC64LE-NEXT:    [[TMP59:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3
-// CHECK-NOUSE-PPC64LE-NEXT:    store ptr [[TMP54]], ptr [[TMP59]], align 8
-// CHECK-NOUSE-PPC64LE-NEXT:    [[TMP60:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4
-// CHECK-NOUSE-PPC64LE-NEXT:    store ptr [[TMP55]], ptr [[TMP60]], align 8
-// CHECK-NOUSE-PPC64LE-NEXT:    [[TMP61:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5
-// CHECK-NOUSE-PPC64LE-NEXT:    store ptr @.offload_maptypes, ptr [[TMP61]], align 8
-// CHECK-NOUSE-PPC64LE-NEXT:    [[TMP62:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6
-// CHECK-NOUSE-PPC64LE-NEXT:    store ptr null, ptr [[TMP62]], align 8
-// CHECK-NOUSE-PPC64LE-NEXT:    [[TMP63:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7
-// CHECK-NOUSE-PPC64LE-NEXT:    store ptr null, ptr [[TMP63]], align 8
-// CHECK-NOUSE-PPC64LE-NEXT:    [[TMP64:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8
-// CHECK-NOUSE-PPC64LE-NEXT:    store i64 0, ptr [[TMP64]], align 8
-// CHECK-NOUSE-PPC64LE-NEXT:    [[TMP65:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 9
-// CHECK-NOUSE-PPC64LE-NEXT:    store i64 0, ptr [[TMP65]], align 8
-// CHECK-NOUSE-PPC64LE-NEXT:    [[TMP66:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 10
-// CHECK-NOUSE-PPC64LE-NEXT:    store [3 x i32] [i32 -1, i32 0, i32 0], ptr [[TMP66]], align 4
-// CHECK-NOUSE-PPC64LE-NEXT:    [[TMP67:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 11
-// CHECK-NOUSE-PPC64LE-NEXT:    store [3 x i32] zeroinitializer, ptr [[TMP67]], align 4
-// CHECK-NOUSE-PPC64LE-NEXT:    [[TMP68:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 12
-// CHECK-NOUSE-PPC64LE-NEXT:    store i32 0, ptr [[TMP68]], align 4
-// CHECK-NOUSE-PPC64LE-NEXT:    [[TMP69:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1:[0-9]+]], i64 -1, i32 -1, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z20explicit_maps_singlei_l{{[0-9]*}}.region_id, ptr [[KERNEL_ARGS]])
-// CHECK-NOUSE-PPC64LE-NEXT:    [[TMP70:%.*]] = icmp ne i32 [[TMP69]], 0
-// CHECK-NOUSE-PPC64LE-NEXT:    br i1 [[TMP70]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
+// CHECK-NOUSE-PPC64LE-NEXT:    [[TMP11:%.*]] = getelementptr inbounds [7 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK-NOUSE-PPC64LE-NEXT:    store ptr [[ST1]], ptr [[TMP11]], align 8
+// CHECK-NOUSE-PPC64LE-NEXT:    [[TMP12:%.*]] = getelementptr inbounds [7 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK-NOUSE-PPC64LE-NEXT:    store ptr [[I]], ptr [[TMP12]], align 8
+// CHECK-NOUSE-PPC64LE-NEXT:    [[TMP13:%.*]] = getelementptr inbounds [7 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 0
+// CHECK-NOUSE-PPC64LE-NEXT:    store i64 [[TMP5]], ptr [[TMP13]], align 8
+// CHECK-NOUSE-PPC64LE-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [7 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0
+// CHECK-NOUSE-PPC64LE-NEXT:    store ptr null, ptr [[TMP14]], align 8
+// CHECK-NOUSE-PPC64LE-NEXT:    [[TMP15:%.*]] = getelementptr inbounds [7 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
+// CHECK-NOUSE-PPC64LE-NEXT:    store ptr [[ST1]], ptr [[TMP15]], align 8
+// CHECK-NOUSE-PPC64LE-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [7 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 1
+// CHECK-NOUSE-PPC64LE-NEXT:    store ptr [[I]], ptr [[TMP16]], align 8
+// CHECK-NOUSE-PPC64LE-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [7 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1
+// CHECK-NOUSE-PPC64LE-NEXT:    store ptr null, ptr [[TMP17]], align 8
+// CHECK-NOUSE-PPC64LE-NEXT:    [[TMP18:%.*]] = getelementptr inbounds [7 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
+// CHECK-NOUSE-PPC64LE-NEXT:    store ptr [[ST1]], ptr [[TMP18]], align 8
+// CHECK-NOUSE-PPC64LE-NEXT:    [[TMP19:%.*]] = getelementptr inbounds [7 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 2
+// CHECK-NOUSE-PPC64LE-NEXT:    store ptr [[J]], ptr [[TMP19]], align 8
+// CHECK-NOUSE-PPC64LE-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [7 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2
+// CHECK-NOUSE-PPC64LE-NEXT:    store ptr null, ptr [[TMP20]], align 8
+// CHECK-NOUSE-PPC64LE-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [7 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3
+// CHECK-NOUSE-PPC64LE-NEXT:    store ptr [[A]], ptr [[TMP21]], align 8
+// CHECK-NOUSE-PPC64LE-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [7 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 3
+// CHECK-NOUSE-PPC64LE-NEXT:    store ptr [[A]], ptr [[TMP22]], align 8
+// CHECK-NOUSE-PPC64LE-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [7 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 3
+// CHECK-NOUSE-PPC64LE-NEXT:    store ptr null, ptr [[TMP23]], align 8
+// CHECK-NOUSE-PPC64LE-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [7 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 4
+// CHECK-NOUSE-PPC64LE-NEXT:    store ptr [[ST2]], ptr [[TMP24]], align 8
+// CHECK-NOUSE-PPC64LE-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [7 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 4
+// CHECK-NOUSE-PPC64LE-NEXT:    store ptr [[I1]], ptr [[TMP25]], align 8
+// CHECK-NOUSE-PPC64LE-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [7 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 4
+// CHECK-NOUSE-PPC64LE-NEXT:    store i64 [[TMP10]], ptr [[TMP26]], align 8
+// CHECK-NOUSE-PPC64LE-NEXT:    [[TMP27:%.*]] = getelementptr inbounds [7 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 4
+// CHECK-NOUSE-PPC64LE-NEXT:    store ptr null, ptr [[TMP27]], align 8
+// CHECK-NOUSE-PPC64LE-NEXT:    [[TMP28:%.*]] = getelementptr inbounds [7 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 5
+// CHECK-NOUSE-PPC64LE-NEXT:    store ptr [[ST2]], ptr [[TMP28]], align 8
+// CHECK-NOUSE-PPC64LE-NEXT:    [[TMP29:%.*]] = getelementptr inbounds [7 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 5
+// CHECK-NOUSE-PPC64LE-NEXT:    store ptr [[I1]], ptr [[TMP29]], align 8
+// CHECK-NOUSE-PPC64LE-NEXT:    [[TMP30:%.*]] = getelementptr inbounds [7 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 5
+// CHECK-NOUSE-PPC64LE-NEXT:    store ptr null, ptr [[TMP30]], align 8
+// CHECK-NOUSE-PPC64LE-NEXT:    [[TMP31:%.*]] = getelementptr inbounds [7 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 6
+// CHECK-NOUSE-PPC64LE-NEXT:    store ptr [[ST2]], ptr [[TMP31]], align 8
+// CHECK-NOUSE-PPC64LE-NEXT:    [[TMP32:%.*]] = getelementptr inbounds [7 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 6
+// CHECK-NOUSE-PPC64LE-NEXT:    store ptr [[J2]], ptr [[TMP32]], align 8
+// CHECK-NOUSE-PPC64LE-NEXT:    [[TMP33:%.*]] = getelementptr inbounds [7 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 6
+// CHECK-NOUSE-PPC64LE-NEXT:    store ptr null, ptr [[TMP33]], align 8
+// CHECK-NOUSE-PPC64LE-NEXT:    [[TMP34:%.*]] = getelementptr inbounds [7 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK-NOUSE-PPC64LE-NEXT:    [[TMP35:%.*]] = getelementptr inbounds [7 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK-NOUSE-PPC64LE-NEXT:    [[TMP36:%.*]] = getelementptr inbounds [7 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 0
+// CHECK-NOUSE-PPC64LE-NEXT:    [[TMP37:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
+// CHECK-NOUSE-PPC64LE-NEXT:    store i32 2, ptr [[TMP37]], align 4
+// CHECK-NOUSE-PPC64LE-NEXT:    [[TMP38:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
+// CHECK-NOUSE-PPC64LE-NEXT:    store i32 7, ptr [[TMP38]], align 4
+// CHECK-NOUSE-PPC64LE-NEXT:    [[TMP39:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2
+// CHECK-NOUSE-PPC64LE-NEXT:    store ptr [[TMP34]], ptr [[TMP39]], align 8
+// CHECK-NOUSE-PPC64LE-NEXT:    [[TMP40:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3
+// CHECK-NOUSE-PPC64LE-NEXT:    store ptr [[TMP35]], ptr [[TMP40]], align 8
+// CHECK-NOUSE-PPC64LE-NEXT:    [[TMP41:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4
+// CHECK-NOUSE-PPC64LE-NEXT:    store ptr [[TMP36]], ptr [[TMP41]], align 8
+// CHECK-NOUSE-PPC64LE-NEXT:    [[TMP42:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5
+// CHECK-NOUSE-PPC64LE-NEXT:    store ptr @.offload_maptypes, ptr [[TMP42]], align 8
+// CHECK-NOUSE-PPC64LE-NEXT:    [[TMP43:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6
+// CHECK-NOUSE-PPC64LE-NEXT:    store ptr null, ptr [[TMP43]], align 8
+// CHECK-NOUSE-PPC64LE-NEXT:    [[TMP44:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7
+// CHECK-NOUSE-PPC64LE-NEXT:    store ptr null, ptr [[TMP44]], align 8
+// CHECK-NOUSE-PPC64LE-NEXT:    [[TMP45:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8
+// CHECK-NOUSE-PPC64LE-NEXT:    store i64 0, ptr [[TMP45]], align 8
+// CHECK-NOUSE-PPC64LE-NEXT:    [[TMP46:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 9
+// CHECK-NOUSE-PPC64LE-NEXT:    store i64 0, ptr [[TMP46]], align 8
+// CHECK-NOUSE-PPC64LE-NEXT:    [[TMP47:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 10
+// CHECK-NOUSE-PPC64LE-NEXT:    store [3 x i32] [i32 -1, i32 0, i32 0], ptr [[TMP47]], align 4
+// CHECK-NOUSE-PPC64LE-NEXT:    [[TMP48:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 11
+// CHECK-NOUSE-PPC64LE-NEXT:    store [3 x i32] zeroinitializer, ptr [[TMP48]], align 4
+// CHECK-NOUSE-PPC64LE-NEXT:    [[TMP49:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 12
+// CHECK-NOUSE-PPC64LE-NEXT:    store i32 0, ptr [[TMP49]], align 4
+// CHECK-NOUSE-PPC64LE-NEXT:    [[TMP50:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1:[0-9]+]], i64 -1, i32 -1, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z20explicit_maps_singlei_l100.region_id, ptr [[KERNEL_ARGS]])
+// CHECK-NOUSE-PPC64LE-NEXT:    [[TMP51:%.*]] = icmp ne i32 [[TMP50]], 0
+// CHECK-NOUSE-PPC64LE-NEXT:    br i1 [[TMP51]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK-NOUSE-PPC64LE:       omp_offload.failed:
-// CHECK-NOUSE-PPC64LE-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z20explicit_maps_singlei_l{{[0-9]*}}() #[[ATTR3:[0-9]+]]
+// CHECK-NOUSE-PPC64LE-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z20explicit_maps_singlei_l100() #[[ATTR3:[0-9]+]]
 // CHECK-NOUSE-PPC64LE-NEXT:    br label [[OMP_OFFLOAD_CONT]]
 // CHECK-NOUSE-PPC64LE:       omp_offload.cont:
-// CHECK-NOUSE-PPC64LE-NEXT:    [[TMP71:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS3]], i32 0, i32 0
-// CHECK-NOUSE-PPC64LE-NEXT:    store ptr [[A]], ptr [[TMP71]], align 8
-// CHECK-NOUSE-PPC64LE-NEXT:    [[TMP73:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS4]], i32 0, i32 0
-// CHECK-NOUSE-PPC64LE-NEXT:    store ptr [[A]], ptr [[TMP73]], align 8
-// CHECK-NOUSE-PPC64LE-NEXT:    [[TMP75:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS5]], i64 0, i64 0
-// CHECK-NOUSE-PPC64LE-NEXT:    store ptr null, ptr [[TMP75]], align 8
-// CHECK-NOUSE-PPC64LE-NEXT:    [[TMP76:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS3]], i32 0, i32 0
-// CHECK-NOUSE-PPC64LE-NEXT:    [[TMP77:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS4]], i32 0, i32 0
-// CHECK-NOUSE-PPC64LE-NEXT:    [[KERNEL_ARGS6:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
-// CHECK-NOUSE-PPC64LE-NEXT:    [[TMP78:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 0
-// CHECK-NOUSE-PPC64LE-NEXT:    store i32 2, ptr [[TMP78]], align 4
-// CHECK-NOUSE-PPC64LE-NEXT:    [[TMP79:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 1
-// CHECK-NOUSE-PPC64LE-NEXT:    store i32 1, ptr [[TMP79]], align 4
-// CHECK-NOUSE-PPC64LE-NEXT:    [[TMP80:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 2
-// CHECK-NOUSE-PPC64LE-NEXT:    store ptr [[TMP76]], ptr [[TMP80]], align 8
-// CHECK-NOUSE-PPC64LE-NEXT:    [[TMP81:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 3
-// CHECK-NOUSE-PPC64LE-NEXT:    store ptr [[TMP77]], ptr [[TMP81]], align 8
-// CHECK-NOUSE-PPC64LE-NEXT:    [[TMP82:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 4
-// CHECK-NOUSE-PPC64LE-NEXT:    store ptr @.offload_sizes.1, ptr [[TMP82]], align 8
-// CHECK-NOUSE-PPC64LE-NEXT:    [[TMP83:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 5
-// CHECK-NOUSE-PPC64LE-NEXT:    store ptr @.offload_maptypes.2, ptr [[TMP83]], align 8
-// CHECK-NOUSE-PPC64LE-NEXT:    [[TMP84:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 6
-// CHECK-NOUSE-PPC64LE-NEXT:    store ptr null, ptr [[TMP84]], align 8
-// CHECK-NOUSE-PPC64LE-NEXT:    [[TMP85:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 7
-// CHECK-NOUSE-PPC64LE-NEXT:    store ptr null, ptr [[TMP85]], align 8
-// CHECK-NOUSE-PPC64LE-NEXT:    [[TMP86:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 8
-// CHECK-NOUSE-PPC64LE-NEXT:    store i64 0, ptr [[TMP86]], align 8
-// CHECK-NOUSE-PPC64LE-NEXT:    [[TMP87:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 9
-// CHECK-NOUSE-PPC64LE-NEXT:    store i64 0, ptr [[TMP87]], align 8
-// CHECK-NOUSE-PPC64LE-NEXT:    [[TMP88:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 10
-// CHECK-NOUSE-PPC64LE-NEXT:    store [3 x i32] [i32 -1, i32 0, i32 0], ptr [[TMP88]], align 4
-// CHECK-NOUSE-PPC64LE-NEXT:    [[TMP89:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 11
-// CHECK-NOUSE-PPC64LE-NEXT:    store [3 x i32] zeroinitializer, ptr [[TMP89]], align 4
-// CHECK-NOUSE-PPC64LE-NEXT:    [[TMP90:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 12
-// CHECK-NOUSE-PPC64LE-NEXT:    store i32 0, ptr [[TMP90]], align 4
-// CHECK-NOUSE-PPC64LE-NEXT:    [[TMP91:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 -1, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z20explicit_maps_singlei_l{{[0-9]*}}.region_id, ptr [[KERNEL_ARGS6]])
-// CHECK-NOUSE-PPC64LE-NEXT:    [[TMP92:%.*]] = icmp ne i32 [[TMP91]], 0
-// CHECK-NOUSE-PPC64LE-NEXT:    br i1 [[TMP92]], label [[OMP_OFFLOAD_FAILED7:%.*]], label [[OMP_OFFLOAD_CONT8:%.*]]
+// CHECK-NOUSE-PPC64LE-NEXT:    [[TMP52:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS3]], i32 0, i32 0
+// CHECK-NOUSE-PPC64LE-NEXT:    store ptr [[A]], ptr [[TMP52]], align 8
+// CHECK-NOUSE-PPC64LE-NEXT:    [[TMP53:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS4]], i32 0, i32 0
+// CHECK-NOUSE-PPC64LE-NEXT:    store ptr [[A]], ptr [[TMP53]], align 8
+// CHECK-NOUSE-PPC64LE-NEXT:    [[TMP54:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS5]], i64 0, i64 0
+// CHECK-NOUSE-PPC64LE-NEXT:    store ptr null, ptr [[TMP54]], align 8
+// CHECK-NOUSE-PPC64LE-NEXT:    [[TMP55:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS3]], i32 0, i32 0
+// CHECK-NOUSE-PPC64LE-NEXT:    [[TMP56:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS4]], i32 0, i32 0
+// CHECK-NOUSE-PPC64LE-NEXT:    [[TMP57:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 0
+// CHECK-NOUSE-PPC64LE-NEXT:    store i32 2, ptr [[TMP57]], align 4
+// CHECK-NOUSE-PPC64LE-NEXT:    [[TMP58:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 1
+// CHECK-NOUSE-PPC64LE-NEXT:    store i32 1, ptr [[TMP58]], align 4
+// CHECK-NOUSE-PPC64LE-NEXT:    [[TMP59:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 2
+// CHECK-NOUSE-PPC64LE-NEXT:    store ptr [[TMP55]], ptr [[TMP59]], align 8
+// CHECK-NOUSE-PPC64LE-NEXT:    [[TMP60:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 3
+// CHECK-NOUSE-PPC64LE-NEXT:    store ptr [[TMP56]], ptr [[TMP60]], align 8
+// CHECK-NOUSE-PPC64LE-NEXT:    [[TMP61:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 4
+// CHECK-NOUSE-PPC64LE-NEXT:    store ptr @.offload_sizes.1, ptr [[TMP61]], align 8
+// CHECK-NOUSE-PPC64LE-NEXT:    [[TMP62:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 5
+// CHECK-NOUSE-PPC64LE-NEXT:    store ptr @.offload_maptypes.2, ptr [[TMP62]], align 8
+// CHECK-NOUSE-PPC64LE-NEXT:    [[TMP63:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 6
+// CHECK-NOUSE-PPC64LE-NEXT:    store ptr null, ptr [[TMP63]], align 8
+// CHECK-NOUSE-PPC64LE-NEXT:    [[TMP64:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 7
+// CHECK-NOUSE-PPC64LE-NEXT:    store ptr null, ptr [[TMP64]], align 8
+// CHECK-NOUSE-PPC64LE-NEXT:    [[TMP65:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 8
+// CHECK-NOUSE-PPC64LE-NEXT:    store i64 0, ptr [[TMP65]], align 8
+// CHECK-NOUSE-PPC64LE-NEXT:    [[TMP66:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 9
+// CHECK-NOUSE-PPC64LE-NEXT:    store i64 0, ptr [[TMP66]], align 8
+// CHECK-NOUSE-PPC64LE-NEXT:    [[TMP67:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 10
+// CHECK-NOUSE-PPC64LE-NEXT:    store [3 x i32] [i32 -1, i32 0, i32 0], ptr [[TMP67]], align 4
+// CHECK-NOUSE-PPC64LE-NEXT:    [[TMP68:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 11
+// CHECK-NOUSE-PPC64LE-NEXT:    store [3 x i32] zeroinitializer, ptr [[TMP68]], align 4
+// CHECK-NOUSE-PPC64LE-NEXT:    [[TMP69:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 12
+// CHECK-NOUSE-PPC64LE-NEXT:    store i32 0, ptr [[TMP69]], align 4
+// CHECK-NOUSE-PPC64LE-NEXT:    [[TMP70:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 -1, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z20explicit_maps_singlei_l114.region_id, ptr [[KERNEL_ARGS6]])
+// CHECK-NOUSE-PPC64LE-NEXT:    [[TMP71:%.*]] = icmp ne i32 [[TMP70]], 0
+// CHECK-NOUSE-PPC64LE-NEXT:    br i1 [[TMP71]], label [[OMP_OFFLOAD_FAILED7:%.*]], label [[OMP_OFFLOAD_CONT8:%.*]]
 // CHECK-NOUSE-PPC64LE:       omp_offload.failed7:
-// CHECK-NOUSE-PPC64LE-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z20explicit_maps_singlei_l{{[0-9]*}}() #[[ATTR3]]
+// CHECK-NOUSE-PPC64LE-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z20explicit_maps_singlei_l114() #[[ATTR3]]
 // CHECK-NOUSE-PPC64LE-NEXT:    br label [[OMP_OFFLOAD_CONT8]]
 // CHECK-NOUSE-PPC64LE:       omp_offload.cont8:
 // CHECK-NOUSE-PPC64LE-NEXT:    ret void
 //
 //
-// CHECK-NOUSE-PPC64LE-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z20explicit_maps_singlei_l{{[0-9]*}}
+// CHECK-NOUSE-PPC64LE-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z20explicit_maps_singlei_l100
 // CHECK-NOUSE-PPC64LE-SAME: () #[[ATTR1:[0-9]+]] {
 // CHECK-NOUSE-PPC64LE-NEXT:  entry:
 // CHECK-NOUSE-PPC64LE-NEXT:    ret void
 //
 //
-// CHECK-NOUSE-PPC64LE-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z20explicit_maps_singlei_l{{[0-9]*}}
+// CHECK-NOUSE-PPC64LE-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z20explicit_maps_singlei_l114
 // CHECK-NOUSE-PPC64LE-SAME: () #[[ATTR1]] {
 // CHECK-NOUSE-PPC64LE-NEXT:  entry:
 // CHECK-NOUSE-PPC64LE-NEXT:    ret void
@@ -958,77 +957,77 @@ void ST::test_present_members() {
 // CHECK-NOUSE-PPC64LE-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [3 x ptr], align 8
 // CHECK-NOUSE-PPC64LE-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [3 x ptr], align 8
 // CHECK-NOUSE-PPC64LE-NEXT:    [[DOTOFFLOAD_SIZES:%.*]] = alloca [3 x i64], align 8
+// CHECK-NOUSE-PPC64LE-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK-NOUSE-PPC64LE-NEXT:    store ptr [[THIS]], ptr [[THIS_ADDR]], align 8
 // CHECK-NOUSE-PPC64LE-NEXT:    [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8
 // CHECK-NOUSE-PPC64LE-NEXT:    [[I:%.*]] = getelementptr inbounds [[STRUCT_ST:%.*]], ptr [[THIS1]], i32 0, i32 0
 // CHECK-NOUSE-PPC64LE-NEXT:    [[J:%.*]] = getelementptr inbounds [[STRUCT_ST]], ptr [[THIS1]], i32 0, i32 1
 // CHECK-NOUSE-PPC64LE-NEXT:    [[TMP0:%.*]] = getelementptr i32, ptr [[J]], i32 1
-// CHECK-NOUSE-PPC64LE-NEXT:    [[TMP3:%.*]] = ptrtoint ptr [[TMP0]] to i64
-// CHECK-NOUSE-PPC64LE-NEXT:    [[TMP4:%.*]] = ptrtoint ptr [[I]] to i64
-// CHECK-NOUSE-PPC64LE-NEXT:    [[TMP5:%.*]] = sub i64 [[TMP3]], [[TMP4]]
-// CHECK-NOUSE-PPC64LE-NEXT:    [[TMP6:%.*]] = sdiv exact i64 [[TMP5]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64)
+// CHECK-NOUSE-PPC64LE-NEXT:    [[TMP1:%.*]] = ptrtoint ptr [[TMP0]] to i64
+// CHECK-NOUSE-PPC64LE-NEXT:    [[TMP2:%.*]] = ptrtoint ptr [[I]] to i64
+// CHECK-NOUSE-PPC64LE-NEXT:    [[TMP3:%.*]] = sub i64 [[TMP1]], [[TMP2]]
+// CHECK-NOUSE-PPC64LE-NEXT:    [[TMP4:%.*]] = sdiv exact i64 [[TMP3]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64)
 // CHECK-NOUSE-PPC64LE-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[DOTOFFLOAD_SIZES]], ptr align 8 @.offload_sizes.3, i64 24, i1 false)
-// CHECK-NOUSE-PPC64LE-NEXT:    [[TMP8:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK-NOUSE-PPC64LE-NEXT:    store ptr [[THIS1]], ptr [[TMP8]], align 8
-// CHECK-NOUSE-PPC64LE-NEXT:    [[TMP10:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK-NOUSE-PPC64LE-NEXT:    [[TMP5:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK-NOUSE-PPC64LE-NEXT:    store ptr [[THIS1]], ptr [[TMP5]], align 8
+// CHECK-NOUSE-PPC64LE-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK-NOUSE-PPC64LE-NEXT:    store ptr [[I]], ptr [[TMP6]], align 8
+// CHECK-NOUSE-PPC64LE-NEXT:    [[TMP7:%.*]] = getelementptr inbounds [3 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 0
+// CHECK-NOUSE-PPC64LE-NEXT:    store i64 [[TMP4]], ptr [[TMP7]], align 8
+// CHECK-NOUSE-PPC64LE-NEXT:    [[TMP8:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0
+// CHECK-NOUSE-PPC64LE-NEXT:    store ptr null, ptr [[TMP8]], align 8
+// CHECK-NOUSE-PPC64LE-NEXT:    [[TMP9:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
+// CHECK-NOUSE-PPC64LE-NEXT:    store ptr [[THIS1]], ptr [[TMP9]], align 8
+// CHECK-NOUSE-PPC64LE-NEXT:    [[TMP10:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 1
 // CHECK-NOUSE-PPC64LE-NEXT:    store ptr [[I]], ptr [[TMP10]], align 8
-// CHECK-NOUSE-PPC64LE-NEXT:    [[TMP12:%.*]] = getelementptr inbounds [3 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK-NOUSE-PPC64LE-NEXT:    store i64 [[TMP6]], ptr [[TMP12]], align 8
-// CHECK-NOUSE-PPC64LE-NEXT:    [[TMP13:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0
-// CHECK-NOUSE-PPC64LE-NEXT:    store ptr null, ptr [[TMP13]], align 8
-// CHECK-NOUSE-PPC64LE-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
-// CHECK-NOUSE-PPC64LE-NEXT:    store ptr [[THIS1]], ptr [[TMP14]], align 8
-// CHECK-NOUSE-PPC64LE-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 1
-// CHECK-NOUSE-PPC64LE-NEXT:    store ptr [[I]], ptr [[TMP16]], align 8
-// CHECK-NOUSE-PPC64LE-NEXT:    [[TMP18:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1
-// CHECK-NOUSE-PPC64LE-NEXT:    store ptr null, ptr [[TMP18]], align 8
-// CHECK-NOUSE-PPC64LE-NEXT:    [[TMP19:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
-// CHECK-NOUSE-PPC64LE-NEXT:    store ptr [[THIS1]], ptr [[TMP19]], align 8
-// CHECK-NOUSE-PPC64LE-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 2
-// CHECK-NOUSE-PPC64LE-NEXT:    store ptr [[J]], ptr [[TMP21]], align 8
-// CHECK-NOUSE-PPC64LE-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2
-// CHECK-NOUSE-PPC64LE-NEXT:    store ptr null, ptr [[TMP23]], align 8
-// CHECK-NOUSE-PPC64LE-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK-NOUSE-PPC64LE-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK-NOUSE-PPC64LE-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [3 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK-NOUSE-PPC64LE-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
-// CHECK-NOUSE-PPC64LE-NEXT:    [[TMP27:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
-// CHECK-NOUSE-PPC64LE-NEXT:    store i32 2, ptr [[TMP27]], align 4
-// CHECK-NOUSE-PPC64LE-NEXT:    [[TMP28:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
-// CHECK-NOUSE-PPC64LE-NEXT:    store i32 3, ptr [[TMP28]], align 4
-// CHECK-NOUSE-PPC64LE-NEXT:    [[TMP29:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2
-// CHECK-NOUSE-PPC64LE-NEXT:    store ptr [[TMP24]], ptr [[TMP29]], align 8
-// CHECK-NOUSE-PPC64LE-NEXT:    [[TMP30:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3
-// CHECK-NOUSE-PPC64LE-NEXT:    store ptr [[TMP25]], ptr [[TMP30]], align 8
-// CHECK-NOUSE-PPC64LE-NEXT:    [[TMP31:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4
-// CHECK-NOUSE-PPC64LE-NEXT:    store ptr [[TMP26]], ptr [[TMP31]], align 8
-// CHECK-NOUSE-PPC64LE-NEXT:    [[TMP32:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5
-// CHECK-NOUSE-PPC64LE-NEXT:    store ptr @.offload_maptypes.4, ptr [[TMP32]], align 8
-// CHECK-NOUSE-PPC64LE-NEXT:    [[TMP33:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6
-// CHECK-NOUSE-PPC64LE-NEXT:    store ptr null, ptr [[TMP33]], align 8
-// CHECK-NOUSE-PPC64LE-NEXT:    [[TMP34:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7
-// CHECK-NOUSE-PPC64LE-NEXT:    store ptr null, ptr [[TMP34]], align 8
-// CHECK-NOUSE-PPC64LE-NEXT:    [[TMP35:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8
-// CHECK-NOUSE-PPC64LE-NEXT:    store i64 0, ptr [[TMP35]], align 8
-// CHECK-NOUSE-PPC64LE-NEXT:    [[TMP36:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 9
-// CHECK-NOUSE-PPC64LE-NEXT:    store i64 0, ptr [[TMP36]], align 8
-// CHECK-NOUSE-PPC64LE-NEXT:    [[TMP37:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 10
-// CHECK-NOUSE-PPC64LE-NEXT:    store [3 x i32] [i32 -1, i32 0, i32 0], ptr [[TMP37]], align 4
-// CHECK-NOUSE-PPC64LE-NEXT:    [[TMP38:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 11
-// CHECK-NOUSE-PPC64LE-NEXT:    store [3 x i32] zeroinitializer, ptr [[TMP38]], align 4
-// CHECK-NOUSE-PPC64LE-NEXT:    [[TMP39:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 12
-// CHECK-NOUSE-PPC64LE-NEXT:    store i32 0, ptr [[TMP39]], align 4
-// CHECK-NOUSE-PPC64LE-NEXT:    [[TMP40:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 -1, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2ST20test_present_membersEv_l{{[0-9]*}}.region_id, ptr [[KERNEL_ARGS]])
-// CHECK-NOUSE-PPC64LE-NEXT:    [[TMP41:%.*]] = icmp ne i32 [[TMP40]], 0
-// CHECK-NOUSE-PPC64LE-NEXT:    br i1 [[TMP41]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
+// CHECK-NOUSE-PPC64LE-NEXT:    [[TMP11:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1
+// CHECK-NOUSE-PPC64LE-NEXT:    store ptr null, ptr [[TMP11]], align 8
+// CHECK-NOUSE-PPC64LE-NEXT:    [[TMP12:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
+// CHECK-NOUSE-PPC64LE-NEXT:    store ptr [[THIS1]], ptr [[TMP12]], align 8
+// CHECK-NOUSE-PPC64LE-NEXT:    [[TMP13:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 2
+// CHECK-NOUSE-PPC64LE-NEXT:    store ptr [[J]], ptr [[TMP13]], align 8
+// CHECK-NOUSE-PPC64LE-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2
+// CHECK-NOUSE-PPC64LE-NEXT:    store ptr null, ptr [[TMP14]], align 8
+// CHECK-NOUSE-PPC64LE-NEXT:    [[TMP15:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK-NOUSE-PPC64LE-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK-NOUSE-PPC64LE-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [3 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 0
+// CHECK-NOUSE-PPC64LE-NEXT:    [[TMP18:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
+// CHECK-NOUSE-PPC64LE-NEXT:    store i32 2, ptr [[TMP18]], align 4
+// CHECK-NOUSE-PPC64LE-NEXT:    [[TMP19:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
+// CHECK-NOUSE-PPC64LE-NEXT:    store i32 3, ptr [[TMP19]], align 4
+// CHECK-NOUSE-PPC64LE-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2
+// CHECK-NOUSE-PPC64LE-NEXT:    store ptr [[TMP15]], ptr [[TMP20]], align 8
+// CHECK-NOUSE-PPC64LE-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3
+// CHECK-NOUSE-PPC64LE-NEXT:    store ptr [[TMP16]], ptr [[TMP21]], align 8
+// CHECK-NOUSE-PPC64LE-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4
+// CHECK-NOUSE-PPC64LE-NEXT:    store ptr [[TMP17]], ptr [[TMP22]], align 8
+// CHECK-NOUSE-PPC64LE-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5
+// CHECK-NOUSE-PPC64LE-NEXT:    store ptr @.offload_maptypes.4, ptr [[TMP23]], align 8
+// CHECK-NOUSE-PPC64LE-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6
+// CHECK-NOUSE-PPC64LE-NEXT:    store ptr null, ptr [[TMP24]], align 8
+// CHECK-NOUSE-PPC64LE-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7
+// CHECK-NOUSE-PPC64LE-NEXT:    store ptr null, ptr [[TMP25]], align 8
+// CHECK-NOUSE-PPC64LE-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8
+// CHECK-NOUSE-PPC64LE-NEXT:    store i64 0, ptr [[TMP26]], align 8
+// CHECK-NOUSE-PPC64LE-NEXT:    [[TMP27:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 9
+// CHECK-NOUSE-PPC64LE-NEXT:    store i64 0, ptr [[TMP27]], align 8
+// CHECK-NOUSE-PPC64LE-NEXT:    [[TMP28:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 10
+// CHECK-NOUSE-PPC64LE-NEXT:    store [3 x i32] [i32 -1, i32 0, i32 0], ptr [[TMP28]], align 4
+// CHECK-NOUSE-PPC64LE-NEXT:    [[TMP29:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 11
+// CHECK-NOUSE-PPC64LE-NEXT:    store [3 x i32] zeroinitializer, ptr [[TMP29]], align 4
+// CHECK-NOUSE-PPC64LE-NEXT:    [[TMP30:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 12
+// CHECK-NOUSE-PPC64LE-NEXT:    store i32 0, ptr [[TMP30]], align 4
+// CHECK-NOUSE-PPC64LE-NEXT:    [[TMP31:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 -1, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2ST20test_present_membersEv_l125.region_id, ptr [[KERNEL_ARGS]])
+// CHECK-NOUSE-PPC64LE-NEXT:    [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0
+// CHECK-NOUSE-PPC64LE-NEXT:    br i1 [[TMP32]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK-NOUSE-PPC64LE:       omp_offload.failed:
-// CHECK-NOUSE-PPC64LE-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2ST20test_present_membersEv_l{{[0-9]*}}() #[[ATTR3]]
+// CHECK-NOUSE-PPC64LE-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2ST20test_present_membersEv_l125() #[[ATTR3]]
 // CHECK-NOUSE-PPC64LE-NEXT:    br label [[OMP_OFFLOAD_CONT]]
 // CHECK-NOUSE-PPC64LE:       omp_offload.cont:
 // CHECK-NOUSE-PPC64LE-NEXT:    ret void
 //
 //
-// CHECK-NOUSE-PPC64LE-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2ST20test_present_membersEv_l{{[0-9]*}}
+// CHECK-NOUSE-PPC64LE-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2ST20test_present_membersEv_l125
 // CHECK-NOUSE-PPC64LE-SAME: () #[[ATTR1]] {
 // CHECK-NOUSE-PPC64LE-NEXT:  entry:
 // CHECK-NOUSE-PPC64LE-NEXT:    ret void
@@ -1052,162 +1051,162 @@ void ST::test_present_members() {
 // CHECK-NOUSE-I386-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [7 x ptr], align 4
 // CHECK-NOUSE-I386-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [7 x ptr], align 4
 // CHECK-NOUSE-I386-NEXT:    [[DOTOFFLOAD_SIZES:%.*]] = alloca [7 x i64], align 4
+// CHECK-NOUSE-I386-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK-NOUSE-I386-NEXT:    [[DOTOFFLOAD_BASEPTRS3:%.*]] = alloca [1 x ptr], align 4
 // CHECK-NOUSE-I386-NEXT:    [[DOTOFFLOAD_PTRS4:%.*]] = alloca [1 x ptr], align 4
 // CHECK-NOUSE-I386-NEXT:    [[DOTOFFLOAD_MAPPERS5:%.*]] = alloca [1 x ptr], align 4
+// CHECK-NOUSE-I386-NEXT:    [[KERNEL_ARGS6:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK-NOUSE-I386-NEXT:    store i32 [[II]], ptr [[II_ADDR]], align 4
 // CHECK-NOUSE-I386-NEXT:    [[TMP0:%.*]] = load i32, ptr [[II_ADDR]], align 4
 // CHECK-NOUSE-I386-NEXT:    store i32 [[TMP0]], ptr [[A]], align 4
 // CHECK-NOUSE-I386-NEXT:    [[I:%.*]] = getelementptr inbounds [[STRUCT_ST]], ptr [[ST1]], i32 0, i32 0
 // CHECK-NOUSE-I386-NEXT:    [[J:%.*]] = getelementptr inbounds [[STRUCT_ST]], ptr [[ST1]], i32 0, i32 1
 // CHECK-NOUSE-I386-NEXT:    [[TMP1:%.*]] = getelementptr i32, ptr [[J]], i32 1
-// CHECK-NOUSE-I386-NEXT:    [[TMP4:%.*]] = ptrtoint ptr [[TMP1]] to i64
-// CHECK-NOUSE-I386-NEXT:    [[TMP5:%.*]] = ptrtoint ptr [[I]] to i64
-// CHECK-NOUSE-I386-NEXT:    [[TMP6:%.*]] = sub i64 [[TMP4]], [[TMP5]]
-// CHECK-NOUSE-I386-NEXT:    [[TMP7:%.*]] = sdiv exact i64 [[TMP6]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64)
+// CHECK-NOUSE-I386-NEXT:    [[TMP2:%.*]] = ptrtoint ptr [[TMP1]] to i64
+// CHECK-NOUSE-I386-NEXT:    [[TMP3:%.*]] = ptrtoint ptr [[I]] to i64
+// CHECK-NOUSE-I386-NEXT:    [[TMP4:%.*]] = sub i64 [[TMP2]], [[TMP3]]
+// CHECK-NOUSE-I386-NEXT:    [[TMP5:%.*]] = sdiv exact i64 [[TMP4]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64)
 // CHECK-NOUSE-I386-NEXT:    [[I1:%.*]] = getelementptr inbounds [[STRUCT_ST]], ptr [[ST2]], i32 0, i32 0
 // CHECK-NOUSE-I386-NEXT:    [[J2:%.*]] = getelementptr inbounds [[STRUCT_ST]], ptr [[ST2]], i32 0, i32 1
-// CHECK-NOUSE-I386-NEXT:    [[TMP8:%.*]] = getelementptr i32, ptr [[J2]], i32 1
-// CHECK-NOUSE-I386-NEXT:    [[TMP11:%.*]] = ptrtoint ptr [[TMP8]] to i64
-// CHECK-NOUSE-I386-NEXT:    [[TMP12:%.*]] = ptrtoint ptr [[I1]] to i64
-// CHECK-NOUSE-I386-NEXT:    [[TMP13:%.*]] = sub i64 [[TMP11]], [[TMP12]]
-// CHECK-NOUSE-I386-NEXT:    [[TMP14:%.*]] = sdiv exact i64 [[TMP13]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64)
+// CHECK-NOUSE-I386-NEXT:    [[TMP6:%.*]] = getelementptr i32, ptr [[J2]], i32 1
+// CHECK-NOUSE-I386-NEXT:    [[TMP7:%.*]] = ptrtoint ptr [[TMP6]] to i64
+// CHECK-NOUSE-I386-NEXT:    [[TMP8:%.*]] = ptrtoint ptr [[I1]] to i64
+// CHECK-NOUSE-I386-NEXT:    [[TMP9:%.*]] = sub i64 [[TMP7]], [[TMP8]]
+// CHECK-NOUSE-I386-NEXT:    [[TMP10:%.*]] = sdiv exact i64 [[TMP9]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64)
 // CHECK-NOUSE-I386-NEXT:    call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[DOTOFFLOAD_SIZES]], ptr align 4 @.offload_sizes, i32 56, i1 false)
-// CHECK-NOUSE-I386-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [7 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK-NOUSE-I386-NEXT:    store ptr [[ST1]], ptr [[TMP16]], align 4
-// CHECK-NOUSE-I386-NEXT:    [[TMP18:%.*]] = getelementptr inbounds [7 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK-NOUSE-I386-NEXT:    store ptr [[I]], ptr [[TMP18]], align 4
-// CHECK-NOUSE-I386-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [7 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK-NOUSE-I386-NEXT:    store i64 [[TMP7]], ptr [[TMP20]], align 4
-// CHECK-NOUSE-I386-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [7 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0
-// CHECK-NOUSE-I386-NEXT:    store ptr null, ptr [[TMP21]], align 4
-// CHECK-NOUSE-I386-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [7 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
-// CHECK-NOUSE-I386-NEXT:    store ptr [[ST1]], ptr [[TMP22]], align 4
-// CHECK-NOUSE-I386-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [7 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 1
-// CHECK-NOUSE-I386-NEXT:    store ptr [[I]], ptr [[TMP24]], align 4
-// CHECK-NOUSE-I386-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [7 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1
-// CHECK-NOUSE-I386-NEXT:    store ptr null, ptr [[TMP26]], align 4
-// CHECK-NOUSE-I386-NEXT:    [[TMP27:%.*]] = getelementptr inbounds [7 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
-// CHECK-NOUSE-I386-NEXT:    store ptr [[ST1]], ptr [[TMP27]], align 4
-// CHECK-NOUSE-I386-NEXT:    [[TMP29:%.*]] = getelementptr inbounds [7 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 2
-// CHECK-NOUSE-I386-NEXT:    store ptr [[J]], ptr [[TMP29]], align 4
-// CHECK-NOUSE-I386-NEXT:    [[TMP31:%.*]] = getelementptr inbounds [7 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 2
-// CHECK-NOUSE-I386-NEXT:    store ptr null, ptr [[TMP31]], align 4
-// CHECK-NOUSE-I386-NEXT:    [[TMP32:%.*]] = getelementptr inbounds [7 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3
-// CHECK-NOUSE-I386-NEXT:    store ptr [[A]], ptr [[TMP32]], align 4
-// CHECK-NOUSE-I386-NEXT:    [[TMP34:%.*]] = getelementptr inbounds [7 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 3
-// CHECK-NOUSE-I386-NEXT:    store ptr [[A]], ptr [[TMP34]], align 4
-// CHECK-NOUSE-I386-NEXT:    [[TMP36:%.*]] = getelementptr inbounds [7 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 3
-// CHECK-NOUSE-I386-NEXT:    store ptr null, ptr [[TMP36]], align 4
-// CHECK-NOUSE-I386-NEXT:    [[TMP37:%.*]] = getelementptr inbounds [7 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 4
-// CHECK-NOUSE-I386-NEXT:    store ptr [[ST2]], ptr [[TMP37]], align 4
-// CHECK-NOUSE-I386-NEXT:    [[TMP39:%.*]] = getelementptr inbounds [7 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 4
-// CHECK-NOUSE-I386-NEXT:    store ptr [[I1]], ptr [[TMP39]], align 4
-// CHECK-NOUSE-I386-NEXT:    [[TMP41:%.*]] = getelementptr inbounds [7 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 4
-// CHECK-NOUSE-I386-NEXT:    store i64 [[TMP14]], ptr [[TMP41]], align 4
-// CHECK-NOUSE-I386-NEXT:    [[TMP42:%.*]] = getelementptr inbounds [7 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 4
-// CHECK-NOUSE-I386-NEXT:    store ptr null, ptr [[TMP42]], align 4
-// CHECK-NOUSE-I386-NEXT:    [[TMP43:%.*]] = getelementptr inbounds [7 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 5
-// CHECK-NOUSE-I386-NEXT:    store ptr [[ST2]], ptr [[TMP43]], align 4
-// CHECK-NOUSE-I386-NEXT:    [[TMP45:%.*]] = getelementptr inbounds [7 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 5
-// CHECK-NOUSE-I386-NEXT:    store ptr [[I1]], ptr [[TMP45]], align 4
-// CHECK-NOUSE-I386-NEXT:    [[TMP47:%.*]] = getelementptr inbounds [7 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 5
-// CHECK-NOUSE-I386-NEXT:    store ptr null, ptr [[TMP47]], align 4
-// CHECK-NOUSE-I386-NEXT:    [[TMP48:%.*]] = getelementptr inbounds [7 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 6
-// CHECK-NOUSE-I386-NEXT:    store ptr [[ST2]], ptr [[TMP48]], align 4
-// CHECK-NOUSE-I386-NEXT:    [[TMP50:%.*]] = getelementptr inbounds [7 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 6
-// CHECK-NOUSE-I386-NEXT:    store ptr [[J2]], ptr [[TMP50]], align 4
-// CHECK-NOUSE-I386-NEXT:    [[TMP52:%.*]] = getelementptr inbounds [7 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 6
-// CHECK-NOUSE-I386-NEXT:    store ptr null, ptr [[TMP52]], align 4
-// CHECK-NOUSE-I386-NEXT:    [[TMP53:%.*]] = getelementptr inbounds [7 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK-NOUSE-I386-NEXT:    [[TMP54:%.*]] = getelementptr inbounds [7 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK-NOUSE-I386-NEXT:    [[TMP55:%.*]] = getelementptr inbounds [7 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK-NOUSE-I386-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
-// CHECK-NOUSE-I386-NEXT:    [[TMP56:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
-// CHECK-NOUSE-I386-NEXT:    store i32 2, ptr [[TMP56]], align 4
-// CHECK-NOUSE-I386-NEXT:    [[TMP57:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
-// CHECK-NOUSE-I386-NEXT:    store i32 7, ptr [[TMP57]], align 4
-// CHECK-NOUSE-I386-NEXT:    [[TMP58:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2
-// CHECK-NOUSE-I386-NEXT:    store ptr [[TMP53]], ptr [[TMP58]], align 4
-// CHECK-NOUSE-I386-NEXT:    [[TMP59:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3
-// CHECK-NOUSE-I386-NEXT:    store ptr [[TMP54]], ptr [[TMP59]], align 4
-// CHECK-NOUSE-I386-NEXT:    [[TMP60:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4
-// CHECK-NOUSE-I386-NEXT:    store ptr [[TMP55]], ptr [[TMP60]], align 4
-// CHECK-NOUSE-I386-NEXT:    [[TMP61:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5
-// CHECK-NOUSE-I386-NEXT:    store ptr @.offload_maptypes, ptr [[TMP61]], align 4
-// CHECK-NOUSE-I386-NEXT:    [[TMP62:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6
-// CHECK-NOUSE-I386-NEXT:    store ptr null, ptr [[TMP62]], align 4
-// CHECK-NOUSE-I386-NEXT:    [[TMP63:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7
-// CHECK-NOUSE-I386-NEXT:    store ptr null, ptr [[TMP63]], align 4
-// CHECK-NOUSE-I386-NEXT:    [[TMP64:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8
-// CHECK-NOUSE-I386-NEXT:    store i64 0, ptr [[TMP64]], align 8
-// CHECK-NOUSE-I386-NEXT:    [[TMP65:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 9
-// CHECK-NOUSE-I386-NEXT:    store i64 0, ptr [[TMP65]], align 8
-// CHECK-NOUSE-I386-NEXT:    [[TMP66:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 10
-// CHECK-NOUSE-I386-NEXT:    store [3 x i32] [i32 -1, i32 0, i32 0], ptr [[TMP66]], align 4
-// CHECK-NOUSE-I386-NEXT:    [[TMP67:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 11
-// CHECK-NOUSE-I386-NEXT:    store [3 x i32] zeroinitializer, ptr [[TMP67]], align 4
-// CHECK-NOUSE-I386-NEXT:    [[TMP68:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 12
-// CHECK-NOUSE-I386-NEXT:    store i32 0, ptr [[TMP68]], align 4
-// CHECK-NOUSE-I386-NEXT:    [[TMP69:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1:[0-9]+]], i64 -1, i32 -1, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z20explicit_maps_singlei_l{{[0-9]*}}.region_id, ptr [[KERNEL_ARGS]])
-// CHECK-NOUSE-I386-NEXT:    [[TMP70:%.*]] = icmp ne i32 [[TMP69]], 0
-// CHECK-NOUSE-I386-NEXT:    br i1 [[TMP70]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
+// CHECK-NOUSE-I386-NEXT:    [[TMP11:%.*]] = getelementptr inbounds [7 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK-NOUSE-I386-NEXT:    store ptr [[ST1]], ptr [[TMP11]], align 4
+// CHECK-NOUSE-I386-NEXT:    [[TMP12:%.*]] = getelementptr inbounds [7 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK-NOUSE-I386-NEXT:    store ptr [[I]], ptr [[TMP12]], align 4
+// CHECK-NOUSE-I386-NEXT:    [[TMP13:%.*]] = getelementptr inbounds [7 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 0
+// CHECK-NOUSE-I386-NEXT:    store i64 [[TMP5]], ptr [[TMP13]], align 4
+// CHECK-NOUSE-I386-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [7 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0
+// CHECK-NOUSE-I386-NEXT:    store ptr null, ptr [[TMP14]], align 4
+// CHECK-NOUSE-I386-NEXT:    [[TMP15:%.*]] = getelementptr inbounds [7 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
+// CHECK-NOUSE-I386-NEXT:    store ptr [[ST1]], ptr [[TMP15]], align 4
+// CHECK-NOUSE-I386-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [7 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 1
+// CHECK-NOUSE-I386-NEXT:    store ptr [[I]], ptr [[TMP16]], align 4
+// CHECK-NOUSE-I386-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [7 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1
+// CHECK-NOUSE-I386-NEXT:    store ptr null, ptr [[TMP17]], align 4
+// CHECK-NOUSE-I386-NEXT:    [[TMP18:%.*]] = getelementptr inbounds [7 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
+// CHECK-NOUSE-I386-NEXT:    store ptr [[ST1]], ptr [[TMP18]], align 4
+// CHECK-NOUSE-I386-NEXT:    [[TMP19:%.*]] = getelementptr inbounds [7 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 2
+// CHECK-NOUSE-I386-NEXT:    store ptr [[J]], ptr [[TMP19]], align 4
+// CHECK-NOUSE-I386-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [7 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 2
+// CHECK-NOUSE-I386-NEXT:    store ptr null, ptr [[TMP20]], align 4
+// CHECK-NOUSE-I386-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [7 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3
+// CHECK-NOUSE-I386-NEXT:    store ptr [[A]], ptr [[TMP21]], align 4
+// CHECK-NOUSE-I386-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [7 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 3
+// CHECK-NOUSE-I386-NEXT:    store ptr [[A]], ptr [[TMP22]], align 4
+// CHECK-NOUSE-I386-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [7 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 3
+// CHECK-NOUSE-I386-NEXT:    store ptr null, ptr [[TMP23]], align 4
+// CHECK-NOUSE-I386-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [7 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 4
+// CHECK-NOUSE-I386-NEXT:    store ptr [[ST2]], ptr [[TMP24]], align 4
+// CHECK-NOUSE-I386-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [7 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 4
+// CHECK-NOUSE-I386-NEXT:    store ptr [[I1]], ptr [[TMP25]], align 4
+// CHECK-NOUSE-I386-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [7 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 4
+// CHECK-NOUSE-I386-NEXT:    store i64 [[TMP10]], ptr [[TMP26]], align 4
+// CHECK-NOUSE-I386-NEXT:    [[TMP27:%.*]] = getelementptr inbounds [7 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 4
+// CHECK-NOUSE-I386-NEXT:    store ptr null, ptr [[TMP27]], align 4
+// CHECK-NOUSE-I386-NEXT:    [[TMP28:%.*]] = getelementptr inbounds [7 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 5
+// CHECK-NOUSE-I386-NEXT:    store ptr [[ST2]], ptr [[TMP28]], align 4
+// CHECK-NOUSE-I386-NEXT:    [[TMP29:%.*]] = getelementptr inbounds [7 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 5
+// CHECK-NOUSE-I386-NEXT:    store ptr [[I1]], ptr [[TMP29]], align 4
+// CHECK-NOUSE-I386-NEXT:    [[TMP30:%.*]] = getelementptr inbounds [7 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 5
+// CHECK-NOUSE-I386-NEXT:    store ptr null, ptr [[TMP30]], align 4
+// CHECK-NOUSE-I386-NEXT:    [[TMP31:%.*]] = getelementptr inbounds [7 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 6
+// CHECK-NOUSE-I386-NEXT:    store ptr [[ST2]], ptr [[TMP31]], align 4
+// CHECK-NOUSE-I386-NEXT:    [[TMP32:%.*]] = getelementptr inbounds [7 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 6
+// CHECK-NOUSE-I386-NEXT:    store ptr [[J2]], ptr [[TMP32]], align 4
+// CHECK-NOUSE-I386-NEXT:    [[TMP33:%.*]] = getelementptr inbounds [7 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 6
+// CHECK-NOUSE-I386-NEXT:    store ptr null, ptr [[TMP33]], align 4
+// CHECK-NOUSE-I386-NEXT:    [[TMP34:%.*]] = getelementptr inbounds [7 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK-NOUSE-I386-NEXT:    [[TMP35:%.*]] = getelementptr inbounds [7 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK-NOUSE-I386-NEXT:    [[TMP36:%.*]] = getelementptr inbounds [7 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 0
+// CHECK-NOUSE-I386-NEXT:    [[TMP37:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
+// CHECK-NOUSE-I386-NEXT:    store i32 2, ptr [[TMP37]], align 4
+// CHECK-NOUSE-I386-NEXT:    [[TMP38:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
+// CHECK-NOUSE-I386-NEXT:    store i32 7, ptr [[TMP38]], align 4
+// CHECK-NOUSE-I386-NEXT:    [[TMP39:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2
+// CHECK-NOUSE-I386-NEXT:    store ptr [[TMP34]], ptr [[TMP39]], align 4
+// CHECK-NOUSE-I386-NEXT:    [[TMP40:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3
+// CHECK-NOUSE-I386-NEXT:    store ptr [[TMP35]], ptr [[TMP40]], align 4
+// CHECK-NOUSE-I386-NEXT:    [[TMP41:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4
+// CHECK-NOUSE-I386-NEXT:    store ptr [[TMP36]], ptr [[TMP41]], align 4
+// CHECK-NOUSE-I386-NEXT:    [[TMP42:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5
+// CHECK-NOUSE-I386-NEXT:    store ptr @.offload_maptypes, ptr [[TMP42]], align 4
+// CHECK-NOUSE-I386-NEXT:    [[TMP43:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6
+// CHECK-NOUSE-I386-NEXT:    store ptr null, ptr [[TMP43]], align 4
+// CHECK-NOUSE-I386-NEXT:    [[TMP44:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7
+// CHECK-NOUSE-I386-NEXT:    store ptr null, ptr [[TMP44]], align 4
+// CHECK-NOUSE-I386-NEXT:    [[TMP45:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8
+// CHECK-NOUSE-I386-NEXT:    store i64 0, ptr [[TMP45]], align 8
+// CHECK-NOUSE-I386-NEXT:    [[TMP46:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 9
+// CHECK-NOUSE-I386-NEXT:    store i64 0, ptr [[TMP46]], align 8
+// CHECK-NOUSE-I386-NEXT:    [[TMP47:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 10
+// CHECK-NOUSE-I386-NEXT:    store [3 x i32] [i32 -1, i32 0, i32 0], ptr [[TMP47]], align 4
+// CHECK-NOUSE-I386-NEXT:    [[TMP48:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 11
+// CHECK-NOUSE-I386-NEXT:    store [3 x i32] zeroinitializer, ptr [[TMP48]], align 4
+// CHECK-NOUSE-I386-NEXT:    [[TMP49:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 12
+// CHECK-NOUSE-I386-NEXT:    store i32 0, ptr [[TMP49]], align 4
+// CHECK-NOUSE-I386-NEXT:    [[TMP50:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1:[0-9]+]], i64 -1, i32 -1, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z20explicit_maps_singlei_l100.region_id, ptr [[KERNEL_ARGS]])
+// CHECK-NOUSE-I386-NEXT:    [[TMP51:%.*]] = icmp ne i32 [[TMP50]], 0
+// CHECK-NOUSE-I386-NEXT:    br i1 [[TMP51]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK-NOUSE-I386:       omp_offload.failed:
-// CHECK-NOUSE-I386-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z20explicit_maps_singlei_l{{[0-9]*}}() #[[ATTR3:[0-9]+]]
+// CHECK-NOUSE-I386-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z20explicit_maps_singlei_l100() #[[ATTR3:[0-9]+]]
 // CHECK-NOUSE-I386-NEXT:    br label [[OMP_OFFLOAD_CONT]]
 // CHECK-NOUSE-I386:       omp_offload.cont:
-// CHECK-NOUSE-I386-NEXT:    [[TMP71:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS3]], i32 0, i32 0
-// CHECK-NOUSE-I386-NEXT:    store ptr [[A]], ptr [[TMP71]], align 4
-// CHECK-NOUSE-I386-NEXT:    [[TMP73:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS4]], i32 0, i32 0
-// CHECK-NOUSE-I386-NEXT:    store ptr [[A]], ptr [[TMP73]], align 4
-// CHECK-NOUSE-I386-NEXT:    [[TMP75:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS5]], i32 0, i32 0
-// CHECK-NOUSE-I386-NEXT:    store ptr null, ptr [[TMP75]], align 4
-// CHECK-NOUSE-I386-NEXT:    [[TMP76:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS3]], i32 0, i32 0
-// CHECK-NOUSE-I386-NEXT:    [[TMP77:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS4]], i32 0, i32 0
-// CHECK-NOUSE-I386-NEXT:    [[KERNEL_ARGS6:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
-// CHECK-NOUSE-I386-NEXT:    [[TMP78:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 0
-// CHECK-NOUSE-I386-NEXT:    store i32 2, ptr [[TMP78]], align 4
-// CHECK-NOUSE-I386-NEXT:    [[TMP79:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 1
-// CHECK-NOUSE-I386-NEXT:    store i32 1, ptr [[TMP79]], align 4
-// CHECK-NOUSE-I386-NEXT:    [[TMP80:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 2
-// CHECK-NOUSE-I386-NEXT:    store ptr [[TMP76]], ptr [[TMP80]], align 4
-// CHECK-NOUSE-I386-NEXT:    [[TMP81:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 3
-// CHECK-NOUSE-I386-NEXT:    store ptr [[TMP77]], ptr [[TMP81]], align 4
-// CHECK-NOUSE-I386-NEXT:    [[TMP82:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 4
-// CHECK-NOUSE-I386-NEXT:    store ptr @.offload_sizes.1, ptr [[TMP82]], align 4
-// CHECK-NOUSE-I386-NEXT:    [[TMP83:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 5
-// CHECK-NOUSE-I386-NEXT:    store ptr @.offload_maptypes.2, ptr [[TMP83]], align 4
-// CHECK-NOUSE-I386-NEXT:    [[TMP84:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 6
-// CHECK-NOUSE-I386-NEXT:    store ptr null, ptr [[TMP84]], align 4
-// CHECK-NOUSE-I386-NEXT:    [[TMP85:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 7
-// CHECK-NOUSE-I386-NEXT:    store ptr null, ptr [[TMP85]], align 4
-// CHECK-NOUSE-I386-NEXT:    [[TMP86:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 8
-// CHECK-NOUSE-I386-NEXT:    store i64 0, ptr [[TMP86]], align 8
-// CHECK-NOUSE-I386-NEXT:    [[TMP87:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 9
-// CHECK-NOUSE-I386-NEXT:    store i64 0, ptr [[TMP87]], align 8
-// CHECK-NOUSE-I386-NEXT:    [[TMP88:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 10
-// CHECK-NOUSE-I386-NEXT:    store [3 x i32] [i32 -1, i32 0, i32 0], ptr [[TMP88]], align 4
-// CHECK-NOUSE-I386-NEXT:    [[TMP89:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 11
-// CHECK-NOUSE-I386-NEXT:    store [3 x i32] zeroinitializer, ptr [[TMP89]], align 4
-// CHECK-NOUSE-I386-NEXT:    [[TMP90:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 12
-// CHECK-NOUSE-I386-NEXT:    store i32 0, ptr [[TMP90]], align 4
-// CHECK-NOUSE-I386-NEXT:    [[TMP91:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 -1, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z20explicit_maps_singlei_l{{[0-9]*}}.region_id, ptr [[KERNEL_ARGS6]])
-// CHECK-NOUSE-I386-NEXT:    [[TMP92:%.*]] = icmp ne i32 [[TMP91]], 0
-// CHECK-NOUSE-I386-NEXT:    br i1 [[TMP92]], label [[OMP_OFFLOAD_FAILED7:%.*]], label [[OMP_OFFLOAD_CONT8:%.*]]
+// CHECK-NOUSE-I386-NEXT:    [[TMP52:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS3]], i32 0, i32 0
+// CHECK-NOUSE-I386-NEXT:    store ptr [[A]], ptr [[TMP52]], align 4
+// CHECK-NOUSE-I386-NEXT:    [[TMP53:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS4]], i32 0, i32 0
+// CHECK-NOUSE-I386-NEXT:    store ptr [[A]], ptr [[TMP53]], align 4
+// CHECK-NOUSE-I386-NEXT:    [[TMP54:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS5]], i32 0, i32 0
+// CHECK-NOUSE-I386-NEXT:    store ptr null, ptr [[TMP54]], align 4
+// CHECK-NOUSE-I386-NEXT:    [[TMP55:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS3]], i32 0, i32 0
+// CHECK-NOUSE-I386-NEXT:    [[TMP56:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS4]], i32 0, i32 0
+// CHECK-NOUSE-I386-NEXT:    [[TMP57:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 0
+// CHECK-NOUSE-I386-NEXT:    store i32 2, ptr [[TMP57]], align 4
+// CHECK-NOUSE-I386-NEXT:    [[TMP58:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 1
+// CHECK-NOUSE-I386-NEXT:    store i32 1, ptr [[TMP58]], align 4
+// CHECK-NOUSE-I386-NEXT:    [[TMP59:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 2
+// CHECK-NOUSE-I386-NEXT:    store ptr [[TMP55]], ptr [[TMP59]], align 4
+// CHECK-NOUSE-I386-NEXT:    [[TMP60:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 3
+// CHECK-NOUSE-I386-NEXT:    store ptr [[TMP56]], ptr [[TMP60]], align 4
+// CHECK-NOUSE-I386-NEXT:    [[TMP61:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 4
+// CHECK-NOUSE-I386-NEXT:    store ptr @.offload_sizes.1, ptr [[TMP61]], align 4
+// CHECK-NOUSE-I386-NEXT:    [[TMP62:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 5
+// CHECK-NOUSE-I386-NEXT:    store ptr @.offload_maptypes.2, ptr [[TMP62]], align 4
+// CHECK-NOUSE-I386-NEXT:    [[TMP63:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 6
+// CHECK-NOUSE-I386-NEXT:    store ptr null, ptr [[TMP63]], align 4
+// CHECK-NOUSE-I386-NEXT:    [[TMP64:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 7
+// CHECK-NOUSE-I386-NEXT:    store ptr null, ptr [[TMP64]], align 4
+// CHECK-NOUSE-I386-NEXT:    [[TMP65:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 8
+// CHECK-NOUSE-I386-NEXT:    store i64 0, ptr [[TMP65]], align 8
+// CHECK-NOUSE-I386-NEXT:    [[TMP66:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 9
+// CHECK-NOUSE-I386-NEXT:    store i64 0, ptr [[TMP66]], align 8
+// CHECK-NOUSE-I386-NEXT:    [[TMP67:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 10
+// CHECK-NOUSE-I386-NEXT:    store [3 x i32] [i32 -1, i32 0, i32 0], ptr [[TMP67]], align 4
+// CHECK-NOUSE-I386-NEXT:    [[TMP68:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 11
+// CHECK-NOUSE-I386-NEXT:    store [3 x i32] zeroinitializer, ptr [[TMP68]], align 4
+// CHECK-NOUSE-I386-NEXT:    [[TMP69:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 12
+// CHECK-NOUSE-I386-NEXT:    store i32 0, ptr [[TMP69]], align 4
+// CHECK-NOUSE-I386-NEXT:    [[TMP70:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 -1, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z20explicit_maps_singlei_l114.region_id, ptr [[KERNEL_ARGS6]])
+// CHECK-NOUSE-I386-NEXT:    [[TMP71:%.*]] = icmp ne i32 [[TMP70]], 0
+// CHECK-NOUSE-I386-NEXT:    br i1 [[TMP71]], label [[OMP_OFFLOAD_FAILED7:%.*]], label [[OMP_OFFLOAD_CONT8:%.*]]
 // CHECK-NOUSE-I386:       omp_offload.failed7:
-// CHECK-NOUSE-I386-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z20explicit_maps_singlei_l{{[0-9]*}}() #[[ATTR3]]
+// CHECK-NOUSE-I386-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z20explicit_maps_singlei_l114() #[[ATTR3]]
 // CHECK-NOUSE-I386-NEXT:    br label [[OMP_OFFLOAD_CONT8]]
 // CHECK-NOUSE-I386:       omp_offload.cont8:
 // CHECK-NOUSE-I386-NEXT:    ret void
 //
 //
-// CHECK-NOUSE-I386-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z20explicit_maps_singlei_l{{[0-9]*}}
+// CHECK-NOUSE-I386-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z20explicit_maps_singlei_l100
 // CHECK-NOUSE-I386-SAME: () #[[ATTR1:[0-9]+]] {
 // CHECK-NOUSE-I386-NEXT:  entry:
 // CHECK-NOUSE-I386-NEXT:    ret void
 //
 //
-// CHECK-NOUSE-I386-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z20explicit_maps_singlei_l{{[0-9]*}}
+// CHECK-NOUSE-I386-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z20explicit_maps_singlei_l114
 // CHECK-NOUSE-I386-SAME: () #[[ATTR1]] {
 // CHECK-NOUSE-I386-NEXT:  entry:
 // CHECK-NOUSE-I386-NEXT:    ret void
@@ -1221,77 +1220,77 @@ void ST::test_present_members() {
 // CHECK-NOUSE-I386-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [3 x ptr], align 4
 // CHECK-NOUSE-I386-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [3 x ptr], align 4
 // CHECK-NOUSE-I386-NEXT:    [[DOTOFFLOAD_SIZES:%.*]] = alloca [3 x i64], align 4
+// CHECK-NOUSE-I386-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK-NOUSE-I386-NEXT:    store ptr [[THIS]], ptr [[THIS_ADDR]], align 4
 // CHECK-NOUSE-I386-NEXT:    [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4
 // CHECK-NOUSE-I386-NEXT:    [[I:%.*]] = getelementptr inbounds [[STRUCT_ST:%.*]], ptr [[THIS1]], i32 0, i32 0
 // CHECK-NOUSE-I386-NEXT:    [[J:%.*]] = getelementptr inbounds [[STRUCT_ST]], ptr [[THIS1]], i32 0, i32 1
 // CHECK-NOUSE-I386-NEXT:    [[TMP0:%.*]] = getelementptr i32, ptr [[J]], i32 1
-// CHECK-NOUSE-I386-NEXT:    [[TMP3:%.*]] = ptrtoint ptr [[TMP0]] to i64
-// CHECK-NOUSE-I386-NEXT:    [[TMP4:%.*]] = ptrtoint ptr [[I]] to i64
-// CHECK-NOUSE-I386-NEXT:    [[TMP5:%.*]] = sub i64 [[TMP3]], [[TMP4]]
-// CHECK-NOUSE-I386-NEXT:    [[TMP6:%.*]] = sdiv exact i64 [[TMP5]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64)
+// CHECK-NOUSE-I386-NEXT:    [[TMP1:%.*]] = ptrtoint ptr [[TMP0]] to i64
+// CHECK-NOUSE-I386-NEXT:    [[TMP2:%.*]] = ptrtoint ptr [[I]] to i64
+// CHECK-NOUSE-I386-NEXT:    [[TMP3:%.*]] = sub i64 [[TMP1]], [[TMP2]]
+// CHECK-NOUSE-I386-NEXT:    [[TMP4:%.*]] = sdiv exact i64 [[TMP3]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64)
 // CHECK-NOUSE-I386-NEXT:    call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[DOTOFFLOAD_SIZES]], ptr align 4 @.offload_sizes.3, i32 24, i1 false)
-// CHECK-NOUSE-I386-NEXT:    [[TMP8:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK-NOUSE-I386-NEXT:    store ptr [[THIS1]], ptr [[TMP8]], align 4
-// CHECK-NOUSE-I386-NEXT:    [[TMP10:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK-NOUSE-I386-NEXT:    [[TMP5:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK-NOUSE-I386-NEXT:    store ptr [[THIS1]], ptr [[TMP5]], align 4
+// CHECK-NOUSE-I386-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK-NOUSE-I386-NEXT:    store ptr [[I]], ptr [[TMP6]], align 4
+// CHECK-NOUSE-I386-NEXT:    [[TMP7:%.*]] = getelementptr inbounds [3 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 0
+// CHECK-NOUSE-I386-NEXT:    store i64 [[TMP4]], ptr [[TMP7]], align 4
+// CHECK-NOUSE-I386-NEXT:    [[TMP8:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0
+// CHECK-NOUSE-I386-NEXT:    store ptr null, ptr [[TMP8]], align 4
+// CHECK-NOUSE-I386-NEXT:    [[TMP9:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
+// CHECK-NOUSE-I386-NEXT:    store ptr [[THIS1]], ptr [[TMP9]], align 4
+// CHECK-NOUSE-I386-NEXT:    [[TMP10:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 1
 // CHECK-NOUSE-I386-NEXT:    store ptr [[I]], ptr [[TMP10]], align 4
-// CHECK-NOUSE-I386-NEXT:    [[TMP12:%.*]] = getelementptr inbounds [3 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK-NOUSE-I386-NEXT:    store i64 [[TMP6]], ptr [[TMP12]], align 4
-// CHECK-NOUSE-I386-NEXT:    [[TMP13:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0
-// CHECK-NOUSE-I386-NEXT:    store ptr null, ptr [[TMP13]], align 4
-// CHECK-NOUSE-I386-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
-// CHECK-NOUSE-I386-NEXT:    store ptr [[THIS1]], ptr [[TMP14]], align 4
-// CHECK-NOUSE-I386-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 1
-// CHECK-NOUSE-I386-NEXT:    store ptr [[I]], ptr [[TMP16]], align 4
-// CHECK-NOUSE-I386-NEXT:    [[TMP18:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1
-// CHECK-NOUSE-I386-NEXT:    store ptr null, ptr [[TMP18]], align 4
-// CHECK-NOUSE-I386-NEXT:    [[TMP19:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
-// CHECK-NOUSE-I386-NEXT:    store ptr [[THIS1]], ptr [[TMP19]], align 4
-// CHECK-NOUSE-I386-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 2
-// CHECK-NOUSE-I386-NEXT:    store ptr [[J]], ptr [[TMP21]], align 4
-// CHECK-NOUSE-I386-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 2
-// CHECK-NOUSE-I386-NEXT:    store ptr null, ptr [[TMP23]], align 4
-// CHECK-NOUSE-I386-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK-NOUSE-I386-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK-NOUSE-I386-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [3 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK-NOUSE-I386-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
-// CHECK-NOUSE-I386-NEXT:    [[TMP27:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
-// CHECK-NOUSE-I386-NEXT:    store i32 2, ptr [[TMP27]], align 4
-// CHECK-NOUSE-I386-NEXT:    [[TMP28:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
-// CHECK-NOUSE-I386-NEXT:    store i32 3, ptr [[TMP28]], align 4
-// CHECK-NOUSE-I386-NEXT:    [[TMP29:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2
-// CHECK-NOUSE-I386-NEXT:    store ptr [[TMP24]], ptr [[TMP29]], align 4
-// CHECK-NOUSE-I386-NEXT:    [[TMP30:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3
-// CHECK-NOUSE-I386-NEXT:    store ptr [[TMP25]], ptr [[TMP30]], align 4
-// CHECK-NOUSE-I386-NEXT:    [[TMP31:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4
-// CHECK-NOUSE-I386-NEXT:    store ptr [[TMP26]], ptr [[TMP31]], align 4
-// CHECK-NOUSE-I386-NEXT:    [[TMP32:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5
-// CHECK-NOUSE-I386-NEXT:    store ptr @.offload_maptypes.4, ptr [[TMP32]], align 4
-// CHECK-NOUSE-I386-NEXT:    [[TMP33:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6
-// CHECK-NOUSE-I386-NEXT:    store ptr null, ptr [[TMP33]], align 4
-// CHECK-NOUSE-I386-NEXT:    [[TMP34:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7
-// CHECK-NOUSE-I386-NEXT:    store ptr null, ptr [[TMP34]], align 4
-// CHECK-NOUSE-I386-NEXT:    [[TMP35:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8
-// CHECK-NOUSE-I386-NEXT:    store i64 0, ptr [[TMP35]], align 8
-// CHECK-NOUSE-I386-NEXT:    [[TMP36:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 9
-// CHECK-NOUSE-I386-NEXT:    store i64 0, ptr [[TMP36]], align 8
-// CHECK-NOUSE-I386-NEXT:    [[TMP37:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 10
-// CHECK-NOUSE-I386-NEXT:    store [3 x i32] [i32 -1, i32 0, i32 0], ptr [[TMP37]], align 4
-// CHECK-NOUSE-I386-NEXT:    [[TMP38:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 11
-// CHECK-NOUSE-I386-NEXT:    store [3 x i32] zeroinitializer, ptr [[TMP38]], align 4
-// CHECK-NOUSE-I386-NEXT:    [[TMP39:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 12
-// CHECK-NOUSE-I386-NEXT:    store i32 0, ptr [[TMP39]], align 4
-// CHECK-NOUSE-I386-NEXT:    [[TMP40:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 -1, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2ST20test_present_membersEv_l{{[0-9]*}}.region_id, ptr [[KERNEL_ARGS]])
-// CHECK-NOUSE-I386-NEXT:    [[TMP41:%.*]] = icmp ne i32 [[TMP40]], 0
-// CHECK-NOUSE-I386-NEXT:    br i1 [[TMP41]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
+// CHECK-NOUSE-I386-NEXT:    [[TMP11:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1
+// CHECK-NOUSE-I386-NEXT:    store ptr null, ptr [[TMP11]], align 4
+// CHECK-NOUSE-I386-NEXT:    [[TMP12:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
+// CHECK-NOUSE-I386-NEXT:    store ptr [[THIS1]], ptr [[TMP12]], align 4
+// CHECK-NOUSE-I386-NEXT:    [[TMP13:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 2
+// CHECK-NOUSE-I386-NEXT:    store ptr [[J]], ptr [[TMP13]], align 4
+// CHECK-NOUSE-I386-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 2
+// CHECK-NOUSE-I386-NEXT:    store ptr null, ptr [[TMP14]], align 4
+// CHECK-NOUSE-I386-NEXT:    [[TMP15:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK-NOUSE-I386-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK-NOUSE-I386-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [3 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 0
+// CHECK-NOUSE-I386-NEXT:    [[TMP18:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
+// CHECK-NOUSE-I386-NEXT:    store i32 2, ptr [[TMP18]], align 4
+// CHECK-NOUSE-I386-NEXT:    [[TMP19:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
+// CHECK-NOUSE-I386-NEXT:    store i32 3, ptr [[TMP19]], align 4
+// CHECK-NOUSE-I386-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2
+// CHECK-NOUSE-I386-NEXT:    store ptr [[TMP15]], ptr [[TMP20]], align 4
+// CHECK-NOUSE-I386-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3
+// CHECK-NOUSE-I386-NEXT:    store ptr [[TMP16]], ptr [[TMP21]], align 4
+// CHECK-NOUSE-I386-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4
+// CHECK-NOUSE-I386-NEXT:    store ptr [[TMP17]], ptr [[TMP22]], align 4
+// CHECK-NOUSE-I386-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5
+// CHECK-NOUSE-I386-NEXT:    store ptr @.offload_maptypes.4, ptr [[TMP23]], align 4
+// CHECK-NOUSE-I386-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6
+// CHECK-NOUSE-I386-NEXT:    store ptr null, ptr [[TMP24]], align 4
+// CHECK-NOUSE-I386-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7
+// CHECK-NOUSE-I386-NEXT:    store ptr null, ptr [[TMP25]], align 4
+// CHECK-NOUSE-I386-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8
+// CHECK-NOUSE-I386-NEXT:    store i64 0, ptr [[TMP26]], align 8
+// CHECK-NOUSE-I386-NEXT:    [[TMP27:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 9
+// CHECK-NOUSE-I386-NEXT:    store i64 0, ptr [[TMP27]], align 8
+// CHECK-NOUSE-I386-NEXT:    [[TMP28:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 10
+// CHECK-NOUSE-I386-NEXT:    store [3 x i32] [i32 -1, i32 0, i32 0], ptr [[TMP28]], align 4
+// CHECK-NOUSE-I386-NEXT:    [[TMP29:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 11
+// CHECK-NOUSE-I386-NEXT:    store [3 x i32] zeroinitializer, ptr [[TMP29]], align 4
+// CHECK-NOUSE-I386-NEXT:    [[TMP30:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 12
+// CHECK-NOUSE-I386-NEXT:    store i32 0, ptr [[TMP30]], align 4
+// CHECK-NOUSE-I386-NEXT:    [[TMP31:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 -1, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2ST20test_present_membersEv_l125.region_id, ptr [[KERNEL_ARGS]])
+// CHECK-NOUSE-I386-NEXT:    [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0
+// CHECK-NOUSE-I386-NEXT:    br i1 [[TMP32]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK-NOUSE-I386:       omp_offload.failed:
-// CHECK-NOUSE-I386-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2ST20test_present_membersEv_l{{[0-9]*}}() #[[ATTR3]]
+// CHECK-NOUSE-I386-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2ST20test_present_membersEv_l125() #[[ATTR3]]
 // CHECK-NOUSE-I386-NEXT:    br label [[OMP_OFFLOAD_CONT]]
 // CHECK-NOUSE-I386:       omp_offload.cont:
 // CHECK-NOUSE-I386-NEXT:    ret void
 //
 //
-// CHECK-NOUSE-I386-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2ST20test_present_membersEv_l{{[0-9]*}}
+// CHECK-NOUSE-I386-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2ST20test_present_membersEv_l125
 // CHECK-NOUSE-I386-SAME: () #[[ATTR1]] {
 // CHECK-NOUSE-I386-NEXT:  entry:
 // CHECK-NOUSE-I386-NEXT:    ret void

diff  --git a/clang/test/OpenMP/target_map_deref_array_codegen.cpp b/clang/test/OpenMP/target_map_deref_array_codegen.cpp
index 40b3b5f1a6f2c..11ff8125a0a99 100644
--- a/clang/test/OpenMP/target_map_deref_array_codegen.cpp
+++ b/clang/test/OpenMP/target_map_deref_array_codegen.cpp
@@ -25,9 +25,6 @@ void foo(int **t1d)
 
 #endif
 
-// CHECK: @.offload_maptypes = private unnamed_addr constant [2 x i64] [i64 33, i64 17]
-// CHECK: @.offload_maptypes.2 = private unnamed_addr constant [2 x i64] [i64 35, i64 19]
-// CHECK: @.offload_maptypes.4 = private unnamed_addr constant [4 x i64] [i64 35, i64 19, i64 800, i64 800]
 // CHECK-LABEL: define {{[^@]+}}@_Z3fooPPi
 // CHECK-SAME: (ptr noundef [[T1D:%.*]]) #[[ATTR0:[0-9]+]] {
 // CHECK-NEXT:  entry:
@@ -36,9 +33,11 @@ void foo(int **t1d)
 // CHECK-NEXT:    [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [2 x ptr], align 8
 // CHECK-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [2 x ptr], align 8
 // CHECK-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [2 x ptr], align 8
+// CHECK-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK-NEXT:    [[DOTOFFLOAD_BASEPTRS2:%.*]] = alloca [2 x ptr], align 8
 // CHECK-NEXT:    [[DOTOFFLOAD_PTRS3:%.*]] = alloca [2 x ptr], align 8
 // CHECK-NEXT:    [[DOTOFFLOAD_MAPPERS4:%.*]] = alloca [2 x ptr], align 8
+// CHECK-NEXT:    [[KERNEL_ARGS5:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK-NEXT:    [[A:%.*]] = alloca i32, align 4
 // CHECK-NEXT:    [[B:%.*]] = alloca i32, align 4
 // CHECK-NEXT:    [[A_CASTED:%.*]] = alloca i64, align 8
@@ -46,6 +45,7 @@ void foo(int **t1d)
 // CHECK-NEXT:    [[DOTOFFLOAD_BASEPTRS12:%.*]] = alloca [4 x ptr], align 8
 // CHECK-NEXT:    [[DOTOFFLOAD_PTRS13:%.*]] = alloca [4 x ptr], align 8
 // CHECK-NEXT:    [[DOTOFFLOAD_MAPPERS14:%.*]] = alloca [4 x ptr], align 8
+// CHECK-NEXT:    [[KERNEL_ARGS15:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK-NEXT:    store ptr [[T1D]], ptr [[T1D_ADDR]], align 8
 // CHECK-NEXT:    [[CALL:%.*]] = call noalias noundef ptr @_Z6malloci(i32 noundef signext 12) #[[ATTR3:[0-9]+]]
 // CHECK-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[T1D_ADDR]], align 8
@@ -90,7 +90,6 @@ void foo(int **t1d)
 // CHECK-NEXT:    store ptr null, ptr [[TMP16]], align 8
 // CHECK-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK-NEXT:    [[TMP18:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK-NEXT:    [[TMP19:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK-NEXT:    store i32 2, ptr [[TMP19]], align 4
 // CHECK-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -143,7 +142,6 @@ void foo(int **t1d)
 // CHECK-NEXT:    store ptr null, ptr [[TMP44]], align 8
 // CHECK-NEXT:    [[TMP45:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS2]], i32 0, i32 0
 // CHECK-NEXT:    [[TMP46:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS3]], i32 0, i32 0
-// CHECK-NEXT:    [[KERNEL_ARGS5:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK-NEXT:    [[TMP47:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 0
 // CHECK-NEXT:    store i32 2, ptr [[TMP47]], align 4
 // CHECK-NEXT:    [[TMP48:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 1
@@ -225,7 +223,6 @@ void foo(int **t1d)
 // CHECK-NEXT:    store ptr null, ptr [[TMP85]], align 8
 // CHECK-NEXT:    [[TMP86:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS12]], i32 0, i32 0
 // CHECK-NEXT:    [[TMP87:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS13]], i32 0, i32 0
-// CHECK-NEXT:    [[KERNEL_ARGS15:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK-NEXT:    [[TMP88:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS15]], i32 0, i32 0
 // CHECK-NEXT:    store i32 2, ptr [[TMP88]], align 4
 // CHECK-NEXT:    [[TMP89:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS15]], i32 0, i32 1

diff  --git a/clang/test/OpenMP/target_map_member_expr_codegen.cpp b/clang/test/OpenMP/target_map_member_expr_codegen.cpp
index 26676ad7c3832..dc2b73638eda6 100644
--- a/clang/test/OpenMP/target_map_member_expr_codegen.cpp
+++ b/clang/test/OpenMP/target_map_member_expr_codegen.cpp
@@ -70,10 +70,6 @@ void foo() {
   c.bar(d);
 }
 
-// CHECK: @.offload_sizes = private unnamed_addr constant [4 x i64] [i64 12, i64 4, i64 4, i64 4]
-// CHECK-NOT: @.offload_sizes = private unnamed_addr constant [4 x i64] [i64 0, i64 4, i64 4, i64 4]
-// CHECK: @.offload_sizes.4 = private unnamed_addr constant [3 x i64] [i64 4, i64 0, i64 0]
-// CHECK-NOT: @.offload_sizes.4 = private unnamed_addr constant [3 x i64] [i64 4, i64 1, i64 0]
 // CHECK-LABEL: define {{[^@]+}}@_Z3foov
 // CHECK-SAME: () #[[ATTR0:[0-9]+]] {
 // CHECK-NEXT:  entry:
@@ -109,6 +105,7 @@ void foo() {
 // CHECK-NEXT:    [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [4 x ptr], align 8
 // CHECK-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [4 x ptr], align 8
 // CHECK-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [4 x ptr], align 8
+// CHECK-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK-NEXT:    store ptr [[THIS]], ptr [[THIS_ADDR]], align 8
 // CHECK-NEXT:    [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8
 // CHECK-NEXT:    [[RES:%.*]] = getelementptr inbounds [[CLASS_B:%.*]], ptr [[THIS1]], i32 0, i32 1
@@ -140,7 +137,6 @@ void foo() {
 // CHECK-NEXT:    store ptr null, ptr [[TMP11]], align 8
 // CHECK-NEXT:    [[TMP12:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK-NEXT:    [[TMP13:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK-NEXT:    store i32 2, ptr [[TMP14]], align 4
 // CHECK-NEXT:    [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -194,12 +190,14 @@ void foo() {
 // CHECK-NEXT:    [[DOTOFFLOAD_BASEPTRS9:%.*]] = alloca [2 x ptr], align 8
 // CHECK-NEXT:    [[DOTOFFLOAD_PTRS10:%.*]] = alloca [2 x ptr], align 8
 // CHECK-NEXT:    [[DOTOFFLOAD_MAPPERS11:%.*]] = alloca [2 x ptr], align 8
+// CHECK-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK-NEXT:    [[_TMP12:%.*]] = alloca ptr, align 8
 // CHECK-NEXT:    [[CSIZE_CASTED13:%.*]] = alloca i64, align 8
 // CHECK-NEXT:    [[DOTOFFLOAD_BASEPTRS18:%.*]] = alloca [3 x ptr], align 8
 // CHECK-NEXT:    [[DOTOFFLOAD_PTRS19:%.*]] = alloca [3 x ptr], align 8
 // CHECK-NEXT:    [[DOTOFFLOAD_MAPPERS20:%.*]] = alloca [3 x ptr], align 8
 // CHECK-NEXT:    [[DOTOFFLOAD_SIZES21:%.*]] = alloca [3 x i64], align 8
+// CHECK-NEXT:    [[KERNEL_ARGS22:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK-NEXT:    store ptr [[THIS]], ptr [[THIS_ADDR]], align 8
 // CHECK-NEXT:    store ptr [[D]], ptr [[D_ADDR]], align 8
 // CHECK-NEXT:    [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8
@@ -296,7 +294,6 @@ void foo() {
 // CHECK-NEXT:    store ptr null, ptr [[TMP52]], align 8
 // CHECK-NEXT:    [[TMP53:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS9]], i32 0, i32 0
 // CHECK-NEXT:    [[TMP54:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS10]], i32 0, i32 0
-// CHECK-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK-NEXT:    [[TMP55:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK-NEXT:    store i32 2, ptr [[TMP55]], align 4
 // CHECK-NEXT:    [[TMP56:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -381,7 +378,6 @@ void foo() {
 // CHECK-NEXT:    [[TMP99:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS18]], i32 0, i32 0
 // CHECK-NEXT:    [[TMP100:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS19]], i32 0, i32 0
 // CHECK-NEXT:    [[TMP101:%.*]] = getelementptr inbounds [3 x i64], ptr [[DOTOFFLOAD_SIZES21]], i32 0, i32 0
-// CHECK-NEXT:    [[KERNEL_ARGS22:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK-NEXT:    [[TMP102:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS22]], i32 0, i32 0
 // CHECK-NEXT:    store i32 2, ptr [[TMP102]], align 4
 // CHECK-NEXT:    [[TMP103:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS22]], i32 0, i32 1

diff  --git a/clang/test/OpenMP/target_offload_mandatory_codegen.cpp b/clang/test/OpenMP/target_offload_mandatory_codegen.cpp
index 2062e2e390fd4..5803a81c4b1a5 100644
--- a/clang/test/OpenMP/target_offload_mandatory_codegen.cpp
+++ b/clang/test/OpenMP/target_offload_mandatory_codegen.cpp
@@ -71,13 +71,13 @@ void host_dev(int device) {
 // MANDATORY-SAME: (i1 noundef zeroext [[COND:%.*]]) #[[ATTR0]] {
 // MANDATORY-NEXT:  entry:
 // MANDATORY-NEXT:    [[COND_ADDR:%.*]] = alloca i8, align 1
+// MANDATORY-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // MANDATORY-NEXT:    [[FROMBOOL:%.*]] = zext i1 [[COND]] to i8
 // MANDATORY-NEXT:    store i8 [[FROMBOOL]], ptr [[COND_ADDR]], align 1
 // MANDATORY-NEXT:    [[TMP0:%.*]] = load i8, ptr [[COND_ADDR]], align 1
 // MANDATORY-NEXT:    [[TOBOOL:%.*]] = trunc i8 [[TMP0]] to i1
 // MANDATORY-NEXT:    br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]]
 // MANDATORY:       omp_if.then:
-// MANDATORY-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // MANDATORY-NEXT:    [[TMP1:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // MANDATORY-NEXT:    store i32 2, ptr [[TMP1]], align 4
 // MANDATORY-NEXT:    [[TMP2:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -122,12 +122,12 @@ void host_dev(int device) {
 // MANDATORY-NEXT:  entry:
 // MANDATORY-NEXT:    [[DEVICE_ADDR:%.*]] = alloca i32, align 4
 // MANDATORY-NEXT:    [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
+// MANDATORY-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // MANDATORY-NEXT:    store i32 [[DEVICE]], ptr [[DEVICE_ADDR]], align 4
 // MANDATORY-NEXT:    [[TMP0:%.*]] = load i32, ptr [[DEVICE_ADDR]], align 4
 // MANDATORY-NEXT:    store i32 [[TMP0]], ptr [[DOTCAPTURE_EXPR_]], align 4
 // MANDATORY-NEXT:    [[TMP1:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
 // MANDATORY-NEXT:    [[TMP2:%.*]] = sext i32 [[TMP1]] to i64
-// MANDATORY-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // MANDATORY-NEXT:    [[TMP3:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // MANDATORY-NEXT:    store i32 2, ptr [[TMP3]], align 4
 // MANDATORY-NEXT:    [[TMP4:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1

diff  --git a/clang/test/OpenMP/target_ompx_dyn_cgroup_mem_codegen.cpp b/clang/test/OpenMP/target_ompx_dyn_cgroup_mem_codegen.cpp
index a023946f431db..b38a4273d4724 100644
--- a/clang/test/OpenMP/target_ompx_dyn_cgroup_mem_codegen.cpp
+++ b/clang/test/OpenMP/target_ompx_dyn_cgroup_mem_codegen.cpp
@@ -214,9 +214,11 @@ int bar(int n){
 // CHECK1-NEXT:    [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [3 x ptr], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [3 x ptr], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [3 x ptr], align 8
+// CHECK1-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_BASEPTRS3:%.*]] = alloca [1 x ptr], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_PTRS4:%.*]] = alloca [1 x ptr], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_MAPPERS5:%.*]] = alloca [1 x ptr], align 8
+// CHECK1-NEXT:    [[KERNEL_ARGS6:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK1-NEXT:    store ptr [[THIS]], ptr [[THIS_ADDR]], align 8
 // CHECK1-NEXT:    store i32 [[N]], ptr [[N_ADDR]], align 4
 // CHECK1-NEXT:    [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8
@@ -253,7 +255,6 @@ int bar(int n){
 // CHECK1-NEXT:    [[TMP15:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK1-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
 // CHECK1-NEXT:    [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
-// CHECK1-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK1-NEXT:    [[TMP18:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK1-NEXT:    store i32 2, ptr [[TMP18]], align 4
 // CHECK1-NEXT:    [[TMP19:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -296,7 +297,6 @@ int bar(int n){
 // CHECK1-NEXT:    store ptr null, ptr [[TMP35]], align 8
 // CHECK1-NEXT:    [[TMP36:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS3]], i32 0, i32 0
 // CHECK1-NEXT:    [[TMP37:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS4]], i32 0, i32 0
-// CHECK1-NEXT:    [[KERNEL_ARGS6:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK1-NEXT:    [[TMP38:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 0
 // CHECK1-NEXT:    store i32 2, ptr [[TMP38]], align 4
 // CHECK1-NEXT:    [[TMP39:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 1
@@ -351,6 +351,7 @@ int bar(int n){
 // CHECK1-NEXT:    [[TMP:%.*]] = alloca i32, align 4
 // CHECK1-NEXT:    [[DOTCAPTURE_EXPR_3:%.*]] = alloca i32, align 4
 // CHECK1-NEXT:    [[DOTCAPTURE_EXPR_4:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK1-NEXT:    [[DOTCAPTURE_EXPR_6:%.*]] = alloca i32, align 4
 // CHECK1-NEXT:    [[DOTCAPTURE_EXPR__CASTED8:%.*]] = alloca i64, align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_BASEPTRS9:%.*]] = alloca [1 x ptr], align 8
@@ -406,7 +407,6 @@ int bar(int n){
 // CHECK1-NEXT:    [[TMP24:%.*]] = zext i32 [[ADD]] to i64
 // CHECK1-NEXT:    [[TMP25:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
 // CHECK1-NEXT:    [[TMP26:%.*]] = insertvalue [3 x i32] zeroinitializer, i32 [[TMP20]], 0
-// CHECK1-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK1-NEXT:    [[TMP27:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK1-NEXT:    store i32 2, ptr [[TMP27]], align 4
 // CHECK1-NEXT:    [[TMP28:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -480,6 +480,7 @@ int bar(int n){
 // CHECK1-NEXT:  entry:
 // CHECK1-NEXT:    [[N_ADDR:%.*]] = alloca i32, align 4
 // CHECK1-NEXT:    [[A:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK1-NEXT:    [[B:%.*]] = alloca i16, align 2
 // CHECK1-NEXT:    [[DOTCAPTURE_EXPR_:%.*]] = alloca i16, align 2
 // CHECK1-NEXT:    [[A_CASTED:%.*]] = alloca i64, align 8
@@ -488,9 +489,9 @@ int bar(int n){
 // CHECK1-NEXT:    [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [3 x ptr], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [3 x ptr], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [3 x ptr], align 8
+// CHECK1-NEXT:    [[KERNEL_ARGS1:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK1-NEXT:    store i32 [[N]], ptr [[N_ADDR]], align 4
 // CHECK1-NEXT:    store i32 0, ptr [[A]], align 4
-// CHECK1-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK1-NEXT:    [[TMP0:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK1-NEXT:    store i32 2, ptr [[TMP0]], align 4
 // CHECK1-NEXT:    [[TMP1:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -559,7 +560,6 @@ int bar(int n){
 // CHECK1-NEXT:    [[TMP33:%.*]] = load i16, ptr [[DOTCAPTURE_EXPR_]], align 2
 // CHECK1-NEXT:    [[TMP34:%.*]] = sext i16 [[TMP33]] to i32
 // CHECK1-NEXT:    [[TMP35:%.*]] = insertvalue [3 x i32] zeroinitializer, i32 [[TMP34]], 0
-// CHECK1-NEXT:    [[KERNEL_ARGS1:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK1-NEXT:    [[TMP36:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS1]], i32 0, i32 0
 // CHECK1-NEXT:    store i32 2, ptr [[TMP36]], align 4
 // CHECK1-NEXT:    [[TMP37:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS1]], i32 0, i32 1
@@ -927,8 +927,8 @@ int bar(int n){
 // CHECK1-NEXT:    [[DOTFIRSTPRIV_PTR_ADDR_I:%.*]] = alloca ptr, align 8
 // CHECK1-NEXT:    [[DOTFIRSTPRIV_PTR_ADDR1_I:%.*]] = alloca ptr, align 8
 // CHECK1-NEXT:    [[DOTFIRSTPRIV_PTR_ADDR2_I:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT:    [[DOTCAPTURE_EXPR__CASTED_I:%.*]] = alloca i64, align 8
 // CHECK1-NEXT:    [[KERNEL_ARGS_I:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
+// CHECK1-NEXT:    [[DOTCAPTURE_EXPR__CASTED_I:%.*]] = alloca i64, align 8
 // CHECK1-NEXT:    [[DOTADDR:%.*]] = alloca i32, align 4
 // CHECK1-NEXT:    [[DOTADDR1:%.*]] = alloca ptr, align 8
 // CHECK1-NEXT:    store i32 [[TMP0]], ptr [[DOTADDR]], align 4
@@ -1103,9 +1103,11 @@ int bar(int n){
 // CHECK3-NEXT:    [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [3 x ptr], align 4
 // CHECK3-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [3 x ptr], align 4
 // CHECK3-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [3 x ptr], align 4
+// CHECK3-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK3-NEXT:    [[DOTOFFLOAD_BASEPTRS3:%.*]] = alloca [1 x ptr], align 4
 // CHECK3-NEXT:    [[DOTOFFLOAD_PTRS4:%.*]] = alloca [1 x ptr], align 4
 // CHECK3-NEXT:    [[DOTOFFLOAD_MAPPERS5:%.*]] = alloca [1 x ptr], align 4
+// CHECK3-NEXT:    [[KERNEL_ARGS6:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK3-NEXT:    store ptr [[THIS]], ptr [[THIS_ADDR]], align 4
 // CHECK3-NEXT:    store i32 [[N]], ptr [[N_ADDR]], align 4
 // CHECK3-NEXT:    [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4
@@ -1142,7 +1144,6 @@ int bar(int n){
 // CHECK3-NEXT:    [[TMP15:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK3-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
 // CHECK3-NEXT:    [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
-// CHECK3-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK3-NEXT:    [[TMP18:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK3-NEXT:    store i32 2, ptr [[TMP18]], align 4
 // CHECK3-NEXT:    [[TMP19:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -1185,7 +1186,6 @@ int bar(int n){
 // CHECK3-NEXT:    store ptr null, ptr [[TMP35]], align 4
 // CHECK3-NEXT:    [[TMP36:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS3]], i32 0, i32 0
 // CHECK3-NEXT:    [[TMP37:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS4]], i32 0, i32 0
-// CHECK3-NEXT:    [[KERNEL_ARGS6:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK3-NEXT:    [[TMP38:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 0
 // CHECK3-NEXT:    store i32 2, ptr [[TMP38]], align 4
 // CHECK3-NEXT:    [[TMP39:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 1
@@ -1240,6 +1240,7 @@ int bar(int n){
 // CHECK3-NEXT:    [[TMP:%.*]] = alloca i32, align 4
 // CHECK3-NEXT:    [[DOTCAPTURE_EXPR_3:%.*]] = alloca i32, align 4
 // CHECK3-NEXT:    [[DOTCAPTURE_EXPR_4:%.*]] = alloca i32, align 4
+// CHECK3-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK3-NEXT:    [[DOTCAPTURE_EXPR_6:%.*]] = alloca i32, align 4
 // CHECK3-NEXT:    [[DOTCAPTURE_EXPR__CASTED8:%.*]] = alloca i32, align 4
 // CHECK3-NEXT:    [[DOTOFFLOAD_BASEPTRS9:%.*]] = alloca [1 x ptr], align 4
@@ -1295,7 +1296,6 @@ int bar(int n){
 // CHECK3-NEXT:    [[TMP24:%.*]] = zext i32 [[ADD]] to i64
 // CHECK3-NEXT:    [[TMP25:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
 // CHECK3-NEXT:    [[TMP26:%.*]] = insertvalue [3 x i32] zeroinitializer, i32 [[TMP20]], 0
-// CHECK3-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK3-NEXT:    [[TMP27:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK3-NEXT:    store i32 2, ptr [[TMP27]], align 4
 // CHECK3-NEXT:    [[TMP28:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -1369,6 +1369,7 @@ int bar(int n){
 // CHECK3-NEXT:  entry:
 // CHECK3-NEXT:    [[N_ADDR:%.*]] = alloca i32, align 4
 // CHECK3-NEXT:    [[A:%.*]] = alloca i32, align 4
+// CHECK3-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK3-NEXT:    [[B:%.*]] = alloca i16, align 2
 // CHECK3-NEXT:    [[DOTCAPTURE_EXPR_:%.*]] = alloca i16, align 2
 // CHECK3-NEXT:    [[A_CASTED:%.*]] = alloca i32, align 4
@@ -1377,9 +1378,9 @@ int bar(int n){
 // CHECK3-NEXT:    [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [3 x ptr], align 4
 // CHECK3-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [3 x ptr], align 4
 // CHECK3-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [3 x ptr], align 4
+// CHECK3-NEXT:    [[KERNEL_ARGS1:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK3-NEXT:    store i32 [[N]], ptr [[N_ADDR]], align 4
 // CHECK3-NEXT:    store i32 0, ptr [[A]], align 4
-// CHECK3-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK3-NEXT:    [[TMP0:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK3-NEXT:    store i32 2, ptr [[TMP0]], align 4
 // CHECK3-NEXT:    [[TMP1:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -1448,7 +1449,6 @@ int bar(int n){
 // CHECK3-NEXT:    [[TMP33:%.*]] = load i16, ptr [[DOTCAPTURE_EXPR_]], align 2
 // CHECK3-NEXT:    [[TMP34:%.*]] = sext i16 [[TMP33]] to i32
 // CHECK3-NEXT:    [[TMP35:%.*]] = insertvalue [3 x i32] zeroinitializer, i32 [[TMP34]], 0
-// CHECK3-NEXT:    [[KERNEL_ARGS1:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK3-NEXT:    [[TMP36:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS1]], i32 0, i32 0
 // CHECK3-NEXT:    store i32 2, ptr [[TMP36]], align 4
 // CHECK3-NEXT:    [[TMP37:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS1]], i32 0, i32 1
@@ -1812,8 +1812,8 @@ int bar(int n){
 // CHECK3-NEXT:    [[DOTFIRSTPRIV_PTR_ADDR_I:%.*]] = alloca ptr, align 4
 // CHECK3-NEXT:    [[DOTFIRSTPRIV_PTR_ADDR1_I:%.*]] = alloca ptr, align 4
 // CHECK3-NEXT:    [[DOTFIRSTPRIV_PTR_ADDR2_I:%.*]] = alloca ptr, align 4
-// CHECK3-NEXT:    [[DOTCAPTURE_EXPR__CASTED_I:%.*]] = alloca i32, align 4
 // CHECK3-NEXT:    [[KERNEL_ARGS_I:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
+// CHECK3-NEXT:    [[DOTCAPTURE_EXPR__CASTED_I:%.*]] = alloca i32, align 4
 // CHECK3-NEXT:    [[DOTADDR:%.*]] = alloca i32, align 4
 // CHECK3-NEXT:    [[DOTADDR1:%.*]] = alloca ptr, align 4
 // CHECK3-NEXT:    store i32 [[TMP0]], ptr [[DOTADDR]], align 4

diff  --git a/clang/test/OpenMP/target_parallel_codegen.cpp b/clang/test/OpenMP/target_parallel_codegen.cpp
index 02788b69e2680..a35e64d7c8e77 100644
--- a/clang/test/OpenMP/target_parallel_codegen.cpp
+++ b/clang/test/OpenMP/target_parallel_codegen.cpp
@@ -309,16 +309,19 @@ int bar(int n){
 // CHECK1-NEXT:    [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [1 x ptr], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [1 x ptr], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [1 x ptr], align 8
+// CHECK1-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK1-NEXT:    [[A_CASTED2:%.*]] = alloca i64, align 8
 // CHECK1-NEXT:    [[AA_CASTED3:%.*]] = alloca i64, align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_BASEPTRS4:%.*]] = alloca [2 x ptr], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_PTRS5:%.*]] = alloca [2 x ptr], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_MAPPERS6:%.*]] = alloca [2 x ptr], align 8
+// CHECK1-NEXT:    [[KERNEL_ARGS7:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK1-NEXT:    [[A_CASTED10:%.*]] = alloca i64, align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_BASEPTRS13:%.*]] = alloca [9 x ptr], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_PTRS14:%.*]] = alloca [9 x ptr], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_MAPPERS15:%.*]] = alloca [9 x ptr], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_SIZES:%.*]] = alloca [9 x i64], align 8
+// CHECK1-NEXT:    [[KERNEL_ARGS16:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK1-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]])
 // CHECK1-NEXT:    store i32 [[N]], ptr [[N_ADDR]], align 4
 // CHECK1-NEXT:    store i32 0, ptr [[A]], align 4
@@ -352,7 +355,6 @@ int bar(int n){
 // CHECK1-NEXT:    store ptr null, ptr [[TMP16]], align 8
 // CHECK1-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK1-NEXT:    [[TMP18:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK1-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK1-NEXT:    [[TMP19:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK1-NEXT:    store i32 2, ptr [[TMP19]], align 4
 // CHECK1-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -410,7 +412,6 @@ int bar(int n){
 // CHECK1-NEXT:    store ptr null, ptr [[TMP44]], align 8
 // CHECK1-NEXT:    [[TMP45:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 0
 // CHECK1-NEXT:    [[TMP46:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS5]], i32 0, i32 0
-// CHECK1-NEXT:    [[KERNEL_ARGS7:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK1-NEXT:    [[TMP47:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS7]], i32 0, i32 0
 // CHECK1-NEXT:    store i32 2, ptr [[TMP47]], align 4
 // CHECK1-NEXT:    [[TMP48:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS7]], i32 0, i32 1
@@ -521,7 +522,6 @@ int bar(int n){
 // CHECK1-NEXT:    [[TMP97:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_BASEPTRS13]], i32 0, i32 0
 // CHECK1-NEXT:    [[TMP98:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_PTRS14]], i32 0, i32 0
 // CHECK1-NEXT:    [[TMP99:%.*]] = getelementptr inbounds [9 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK1-NEXT:    [[KERNEL_ARGS16:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK1-NEXT:    [[TMP100:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS16]], i32 0, i32 0
 // CHECK1-NEXT:    store i32 2, ptr [[TMP100]], align 4
 // CHECK1-NEXT:    [[TMP101:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS16]], i32 0, i32 1
@@ -911,6 +911,7 @@ int bar(int n){
 // CHECK1-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [5 x ptr], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [5 x ptr], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_SIZES:%.*]] = alloca [5 x i64], align 8
+// CHECK1-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK1-NEXT:    store ptr [[THIS]], ptr [[THIS_ADDR]], align 8
 // CHECK1-NEXT:    store i32 [[N]], ptr [[N_ADDR]], align 4
 // CHECK1-NEXT:    [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8
@@ -970,7 +971,6 @@ int bar(int n){
 // CHECK1-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK1-NEXT:    [[TMP27:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
 // CHECK1-NEXT:    [[TMP28:%.*]] = getelementptr inbounds [5 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK1-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK1-NEXT:    [[TMP29:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK1-NEXT:    store i32 2, ptr [[TMP29]], align 4
 // CHECK1-NEXT:    [[TMP30:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -1035,6 +1035,7 @@ int bar(int n){
 // CHECK1-NEXT:    [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [4 x ptr], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [4 x ptr], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [4 x ptr], align 8
+// CHECK1-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK1-NEXT:    store i32 [[N]], ptr [[N_ADDR]], align 4
 // CHECK1-NEXT:    store i32 0, ptr [[A]], align 4
 // CHECK1-NEXT:    store i16 0, ptr [[AA]], align 2
@@ -1078,7 +1079,6 @@ int bar(int n){
 // CHECK1-NEXT:    store ptr null, ptr [[TMP18]], align 8
 // CHECK1-NEXT:    [[TMP19:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK1-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK1-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK1-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK1-NEXT:    store i32 2, ptr [[TMP21]], align 4
 // CHECK1-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -1133,6 +1133,7 @@ int bar(int n){
 // CHECK1-NEXT:    [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [3 x ptr], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [3 x ptr], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [3 x ptr], align 8
+// CHECK1-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK1-NEXT:    store i32 [[N]], ptr [[N_ADDR]], align 4
 // CHECK1-NEXT:    store i32 0, ptr [[A]], align 4
 // CHECK1-NEXT:    store i16 0, ptr [[AA]], align 2
@@ -1166,7 +1167,6 @@ int bar(int n){
 // CHECK1-NEXT:    store ptr null, ptr [[TMP13]], align 8
 // CHECK1-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK1-NEXT:    [[TMP15:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK1-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK1-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK1-NEXT:    store i32 2, ptr [[TMP16]], align 4
 // CHECK1-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -1412,16 +1412,19 @@ int bar(int n){
 // CHECK3-NEXT:    [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [1 x ptr], align 4
 // CHECK3-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [1 x ptr], align 4
 // CHECK3-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [1 x ptr], align 4
+// CHECK3-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK3-NEXT:    [[A_CASTED2:%.*]] = alloca i32, align 4
 // CHECK3-NEXT:    [[AA_CASTED3:%.*]] = alloca i32, align 4
 // CHECK3-NEXT:    [[DOTOFFLOAD_BASEPTRS4:%.*]] = alloca [2 x ptr], align 4
 // CHECK3-NEXT:    [[DOTOFFLOAD_PTRS5:%.*]] = alloca [2 x ptr], align 4
 // CHECK3-NEXT:    [[DOTOFFLOAD_MAPPERS6:%.*]] = alloca [2 x ptr], align 4
+// CHECK3-NEXT:    [[KERNEL_ARGS7:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK3-NEXT:    [[A_CASTED10:%.*]] = alloca i32, align 4
 // CHECK3-NEXT:    [[DOTOFFLOAD_BASEPTRS13:%.*]] = alloca [9 x ptr], align 4
 // CHECK3-NEXT:    [[DOTOFFLOAD_PTRS14:%.*]] = alloca [9 x ptr], align 4
 // CHECK3-NEXT:    [[DOTOFFLOAD_MAPPERS15:%.*]] = alloca [9 x ptr], align 4
 // CHECK3-NEXT:    [[DOTOFFLOAD_SIZES:%.*]] = alloca [9 x i64], align 4
+// CHECK3-NEXT:    [[KERNEL_ARGS16:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK3-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]])
 // CHECK3-NEXT:    store i32 [[N]], ptr [[N_ADDR]], align 4
 // CHECK3-NEXT:    store i32 0, ptr [[A]], align 4
@@ -1453,7 +1456,6 @@ int bar(int n){
 // CHECK3-NEXT:    store ptr null, ptr [[TMP14]], align 4
 // CHECK3-NEXT:    [[TMP15:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK3-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK3-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK3-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK3-NEXT:    store i32 2, ptr [[TMP17]], align 4
 // CHECK3-NEXT:    [[TMP18:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -1511,7 +1513,6 @@ int bar(int n){
 // CHECK3-NEXT:    store ptr null, ptr [[TMP42]], align 4
 // CHECK3-NEXT:    [[TMP43:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 0
 // CHECK3-NEXT:    [[TMP44:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS5]], i32 0, i32 0
-// CHECK3-NEXT:    [[KERNEL_ARGS7:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK3-NEXT:    [[TMP45:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS7]], i32 0, i32 0
 // CHECK3-NEXT:    store i32 2, ptr [[TMP45]], align 4
 // CHECK3-NEXT:    [[TMP46:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS7]], i32 0, i32 1
@@ -1624,7 +1625,6 @@ int bar(int n){
 // CHECK3-NEXT:    [[TMP97:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_BASEPTRS13]], i32 0, i32 0
 // CHECK3-NEXT:    [[TMP98:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_PTRS14]], i32 0, i32 0
 // CHECK3-NEXT:    [[TMP99:%.*]] = getelementptr inbounds [9 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK3-NEXT:    [[KERNEL_ARGS16:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK3-NEXT:    [[TMP100:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS16]], i32 0, i32 0
 // CHECK3-NEXT:    store i32 2, ptr [[TMP100]], align 4
 // CHECK3-NEXT:    [[TMP101:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS16]], i32 0, i32 1
@@ -2014,6 +2014,7 @@ int bar(int n){
 // CHECK3-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [5 x ptr], align 4
 // CHECK3-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [5 x ptr], align 4
 // CHECK3-NEXT:    [[DOTOFFLOAD_SIZES:%.*]] = alloca [5 x i64], align 4
+// CHECK3-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK3-NEXT:    store ptr [[THIS]], ptr [[THIS_ADDR]], align 4
 // CHECK3-NEXT:    store i32 [[N]], ptr [[N_ADDR]], align 4
 // CHECK3-NEXT:    [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4
@@ -2073,7 +2074,6 @@ int bar(int n){
 // CHECK3-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK3-NEXT:    [[TMP27:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
 // CHECK3-NEXT:    [[TMP28:%.*]] = getelementptr inbounds [5 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK3-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK3-NEXT:    [[TMP29:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK3-NEXT:    store i32 2, ptr [[TMP29]], align 4
 // CHECK3-NEXT:    [[TMP30:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -2138,6 +2138,7 @@ int bar(int n){
 // CHECK3-NEXT:    [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [4 x ptr], align 4
 // CHECK3-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [4 x ptr], align 4
 // CHECK3-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [4 x ptr], align 4
+// CHECK3-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK3-NEXT:    store i32 [[N]], ptr [[N_ADDR]], align 4
 // CHECK3-NEXT:    store i32 0, ptr [[A]], align 4
 // CHECK3-NEXT:    store i16 0, ptr [[AA]], align 2
@@ -2181,7 +2182,6 @@ int bar(int n){
 // CHECK3-NEXT:    store ptr null, ptr [[TMP18]], align 4
 // CHECK3-NEXT:    [[TMP19:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK3-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK3-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK3-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK3-NEXT:    store i32 2, ptr [[TMP21]], align 4
 // CHECK3-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -2236,6 +2236,7 @@ int bar(int n){
 // CHECK3-NEXT:    [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [3 x ptr], align 4
 // CHECK3-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [3 x ptr], align 4
 // CHECK3-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [3 x ptr], align 4
+// CHECK3-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK3-NEXT:    store i32 [[N]], ptr [[N_ADDR]], align 4
 // CHECK3-NEXT:    store i32 0, ptr [[A]], align 4
 // CHECK3-NEXT:    store i16 0, ptr [[AA]], align 2
@@ -2269,7 +2270,6 @@ int bar(int n){
 // CHECK3-NEXT:    store ptr null, ptr [[TMP13]], align 4
 // CHECK3-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK3-NEXT:    [[TMP15:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK3-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK3-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK3-NEXT:    store i32 2, ptr [[TMP16]], align 4
 // CHECK3-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1

diff  --git a/clang/test/OpenMP/target_parallel_for_codegen.cpp b/clang/test/OpenMP/target_parallel_for_codegen.cpp
index 2d14c963b0577..405eaadc4f264 100644
--- a/clang/test/OpenMP/target_parallel_for_codegen.cpp
+++ b/clang/test/OpenMP/target_parallel_for_codegen.cpp
@@ -324,6 +324,7 @@ int bar(int n){
 // CHECK1-NEXT:    [[C:%.*]] = alloca [5 x [10 x double]], align 8
 // CHECK1-NEXT:    [[__VLA_EXPR1:%.*]] = alloca i64, align 8
 // CHECK1-NEXT:    [[D:%.*]] = alloca [[STRUCT_TT:%.*]], align 8
+// CHECK1-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK1-NEXT:    [[K:%.*]] = alloca i64, align 8
 // CHECK1-NEXT:    [[A_CASTED:%.*]] = alloca i64, align 8
 // CHECK1-NEXT:    [[K_CASTED:%.*]] = alloca i64, align 8
@@ -340,6 +341,7 @@ int bar(int n){
 // CHECK1-NEXT:    [[DOTOFFLOAD_BASEPTRS5:%.*]] = alloca [2 x ptr], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_PTRS6:%.*]] = alloca [2 x ptr], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_MAPPERS7:%.*]] = alloca [2 x ptr], align 8
+// CHECK1-NEXT:    [[KERNEL_ARGS8:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK1-NEXT:    [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
 // CHECK1-NEXT:    [[A_CASTED11:%.*]] = alloca i64, align 8
 // CHECK1-NEXT:    [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8
@@ -347,6 +349,7 @@ int bar(int n){
 // CHECK1-NEXT:    [[DOTOFFLOAD_PTRS15:%.*]] = alloca [10 x ptr], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_MAPPERS16:%.*]] = alloca [10 x ptr], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_SIZES:%.*]] = alloca [10 x i64], align 8
+// CHECK1-NEXT:    [[KERNEL_ARGS17:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK1-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2:[0-9]+]])
 // CHECK1-NEXT:    store i32 [[N]], ptr [[N_ADDR]], align 4
 // CHECK1-NEXT:    store i32 0, ptr [[A]], align 4
@@ -362,7 +365,6 @@ int bar(int n){
 // CHECK1-NEXT:    [[TMP6:%.*]] = mul nuw i64 5, [[TMP5]]
 // CHECK1-NEXT:    [[VLA1:%.*]] = alloca double, i64 [[TMP6]], align 8
 // CHECK1-NEXT:    store i64 [[TMP5]], ptr [[__VLA_EXPR1]], align 8
-// CHECK1-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK1-NEXT:    [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK1-NEXT:    store i32 2, ptr [[TMP7]], align 4
 // CHECK1-NEXT:    [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -484,7 +486,6 @@ int bar(int n){
 // CHECK1-NEXT:    store ptr null, ptr [[TMP70]], align 8
 // CHECK1-NEXT:    [[TMP71:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS5]], i32 0, i32 0
 // CHECK1-NEXT:    [[TMP72:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS6]], i32 0, i32 0
-// CHECK1-NEXT:    [[KERNEL_ARGS8:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK1-NEXT:    [[TMP73:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS8]], i32 0, i32 0
 // CHECK1-NEXT:    store i32 2, ptr [[TMP73]], align 4
 // CHECK1-NEXT:    [[TMP74:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS8]], i32 0, i32 1
@@ -606,7 +607,6 @@ int bar(int n){
 // CHECK1-NEXT:    [[TMP129:%.*]] = getelementptr inbounds [10 x ptr], ptr [[DOTOFFLOAD_BASEPTRS14]], i32 0, i32 0
 // CHECK1-NEXT:    [[TMP130:%.*]] = getelementptr inbounds [10 x ptr], ptr [[DOTOFFLOAD_PTRS15]], i32 0, i32 0
 // CHECK1-NEXT:    [[TMP131:%.*]] = getelementptr inbounds [10 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK1-NEXT:    [[KERNEL_ARGS17:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK1-NEXT:    [[TMP132:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS17]], i32 0, i32 0
 // CHECK1-NEXT:    store i32 2, ptr [[TMP132]], align 4
 // CHECK1-NEXT:    [[TMP133:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS17]], i32 0, i32 1
@@ -1012,10 +1012,10 @@ int bar(int n){
 // CHECK1-NEXT:    [[DOTFIRSTPRIV_PTR_ADDR1_I:%.*]] = alloca ptr, align 8
 // CHECK1-NEXT:    [[DOTFIRSTPRIV_PTR_ADDR2_I:%.*]] = alloca ptr, align 8
 // CHECK1-NEXT:    [[DOTFIRSTPRIV_PTR_ADDR3_I:%.*]] = alloca ptr, align 8
+// CHECK1-NEXT:    [[KERNEL_ARGS_I:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK1-NEXT:    [[AA_CASTED_I:%.*]] = alloca i64, align 8
 // CHECK1-NEXT:    [[LIN_CASTED_I:%.*]] = alloca i64, align 8
 // CHECK1-NEXT:    [[A_CASTED_I:%.*]] = alloca i64, align 8
-// CHECK1-NEXT:    [[KERNEL_ARGS_I:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK1-NEXT:    [[DOTADDR:%.*]] = alloca i32, align 4
 // CHECK1-NEXT:    [[DOTADDR1:%.*]] = alloca ptr, align 8
 // CHECK1-NEXT:    store i32 [[TMP0]], ptr [[DOTADDR]], align 4
@@ -1414,6 +1414,7 @@ int bar(int n){
 // CHECK1-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [5 x ptr], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [5 x ptr], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_SIZES:%.*]] = alloca [5 x i64], align 8
+// CHECK1-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK1-NEXT:    store ptr [[THIS]], ptr [[THIS_ADDR]], align 8
 // CHECK1-NEXT:    store i32 [[N]], ptr [[N_ADDR]], align 4
 // CHECK1-NEXT:    [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8
@@ -1473,7 +1474,6 @@ int bar(int n){
 // CHECK1-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK1-NEXT:    [[TMP27:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
 // CHECK1-NEXT:    [[TMP28:%.*]] = getelementptr inbounds [5 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK1-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK1-NEXT:    [[TMP29:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK1-NEXT:    store i32 2, ptr [[TMP29]], align 4
 // CHECK1-NEXT:    [[TMP30:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -1538,6 +1538,7 @@ int bar(int n){
 // CHECK1-NEXT:    [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [4 x ptr], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [4 x ptr], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [4 x ptr], align 8
+// CHECK1-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK1-NEXT:    store i32 [[N]], ptr [[N_ADDR]], align 4
 // CHECK1-NEXT:    store i32 0, ptr [[A]], align 4
 // CHECK1-NEXT:    store i16 0, ptr [[AA]], align 2
@@ -1581,7 +1582,6 @@ int bar(int n){
 // CHECK1-NEXT:    store ptr null, ptr [[TMP18]], align 8
 // CHECK1-NEXT:    [[TMP19:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK1-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK1-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK1-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK1-NEXT:    store i32 2, ptr [[TMP21]], align 4
 // CHECK1-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -1636,6 +1636,7 @@ int bar(int n){
 // CHECK1-NEXT:    [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [3 x ptr], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [3 x ptr], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [3 x ptr], align 8
+// CHECK1-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK1-NEXT:    store i32 [[N]], ptr [[N_ADDR]], align 4
 // CHECK1-NEXT:    store i32 0, ptr [[A]], align 4
 // CHECK1-NEXT:    store i16 0, ptr [[AA]], align 2
@@ -1669,7 +1670,6 @@ int bar(int n){
 // CHECK1-NEXT:    store ptr null, ptr [[TMP13]], align 8
 // CHECK1-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK1-NEXT:    [[TMP15:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK1-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK1-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK1-NEXT:    store i32 2, ptr [[TMP16]], align 4
 // CHECK1-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -2000,6 +2000,7 @@ int bar(int n){
 // CHECK3-NEXT:    [[C:%.*]] = alloca [5 x [10 x double]], align 8
 // CHECK3-NEXT:    [[__VLA_EXPR1:%.*]] = alloca i32, align 4
 // CHECK3-NEXT:    [[D:%.*]] = alloca [[STRUCT_TT:%.*]], align 4
+// CHECK3-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK3-NEXT:    [[K:%.*]] = alloca i64, align 8
 // CHECK3-NEXT:    [[A_CASTED:%.*]] = alloca i32, align 4
 // CHECK3-NEXT:    [[LIN:%.*]] = alloca i32, align 4
@@ -2015,6 +2016,7 @@ int bar(int n){
 // CHECK3-NEXT:    [[DOTOFFLOAD_BASEPTRS5:%.*]] = alloca [2 x ptr], align 4
 // CHECK3-NEXT:    [[DOTOFFLOAD_PTRS6:%.*]] = alloca [2 x ptr], align 4
 // CHECK3-NEXT:    [[DOTOFFLOAD_MAPPERS7:%.*]] = alloca [2 x ptr], align 4
+// CHECK3-NEXT:    [[KERNEL_ARGS8:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK3-NEXT:    [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
 // CHECK3-NEXT:    [[A_CASTED11:%.*]] = alloca i32, align 4
 // CHECK3-NEXT:    [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4
@@ -2022,6 +2024,7 @@ int bar(int n){
 // CHECK3-NEXT:    [[DOTOFFLOAD_PTRS15:%.*]] = alloca [10 x ptr], align 4
 // CHECK3-NEXT:    [[DOTOFFLOAD_MAPPERS16:%.*]] = alloca [10 x ptr], align 4
 // CHECK3-NEXT:    [[DOTOFFLOAD_SIZES:%.*]] = alloca [10 x i64], align 4
+// CHECK3-NEXT:    [[KERNEL_ARGS17:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK3-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2:[0-9]+]])
 // CHECK3-NEXT:    store i32 [[N]], ptr [[N_ADDR]], align 4
 // CHECK3-NEXT:    store i32 0, ptr [[A]], align 4
@@ -2035,7 +2038,6 @@ int bar(int n){
 // CHECK3-NEXT:    [[TMP4:%.*]] = mul nuw i32 5, [[TMP3]]
 // CHECK3-NEXT:    [[VLA1:%.*]] = alloca double, i32 [[TMP4]], align 8
 // CHECK3-NEXT:    store i32 [[TMP3]], ptr [[__VLA_EXPR1]], align 4
-// CHECK3-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK3-NEXT:    [[TMP5:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK3-NEXT:    store i32 2, ptr [[TMP5]], align 4
 // CHECK3-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -2154,7 +2156,6 @@ int bar(int n){
 // CHECK3-NEXT:    store ptr null, ptr [[TMP66]], align 4
 // CHECK3-NEXT:    [[TMP67:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS5]], i32 0, i32 0
 // CHECK3-NEXT:    [[TMP68:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS6]], i32 0, i32 0
-// CHECK3-NEXT:    [[KERNEL_ARGS8:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK3-NEXT:    [[TMP69:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS8]], i32 0, i32 0
 // CHECK3-NEXT:    store i32 2, ptr [[TMP69]], align 4
 // CHECK3-NEXT:    [[TMP70:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS8]], i32 0, i32 1
@@ -2278,7 +2279,6 @@ int bar(int n){
 // CHECK3-NEXT:    [[TMP127:%.*]] = getelementptr inbounds [10 x ptr], ptr [[DOTOFFLOAD_BASEPTRS14]], i32 0, i32 0
 // CHECK3-NEXT:    [[TMP128:%.*]] = getelementptr inbounds [10 x ptr], ptr [[DOTOFFLOAD_PTRS15]], i32 0, i32 0
 // CHECK3-NEXT:    [[TMP129:%.*]] = getelementptr inbounds [10 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK3-NEXT:    [[KERNEL_ARGS17:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK3-NEXT:    [[TMP130:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS17]], i32 0, i32 0
 // CHECK3-NEXT:    store i32 2, ptr [[TMP130]], align 4
 // CHECK3-NEXT:    [[TMP131:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS17]], i32 0, i32 1
@@ -2682,10 +2682,10 @@ int bar(int n){
 // CHECK3-NEXT:    [[DOTFIRSTPRIV_PTR_ADDR1_I:%.*]] = alloca ptr, align 4
 // CHECK3-NEXT:    [[DOTFIRSTPRIV_PTR_ADDR2_I:%.*]] = alloca ptr, align 4
 // CHECK3-NEXT:    [[DOTFIRSTPRIV_PTR_ADDR3_I:%.*]] = alloca ptr, align 4
+// CHECK3-NEXT:    [[KERNEL_ARGS_I:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK3-NEXT:    [[AA_CASTED_I:%.*]] = alloca i32, align 4
 // CHECK3-NEXT:    [[LIN_CASTED_I:%.*]] = alloca i32, align 4
 // CHECK3-NEXT:    [[A_CASTED_I:%.*]] = alloca i32, align 4
-// CHECK3-NEXT:    [[KERNEL_ARGS_I:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK3-NEXT:    [[DOTADDR:%.*]] = alloca i32, align 4
 // CHECK3-NEXT:    [[DOTADDR1:%.*]] = alloca ptr, align 4
 // CHECK3-NEXT:    store i32 [[TMP0]], ptr [[DOTADDR]], align 4
@@ -3084,6 +3084,7 @@ int bar(int n){
 // CHECK3-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [5 x ptr], align 4
 // CHECK3-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [5 x ptr], align 4
 // CHECK3-NEXT:    [[DOTOFFLOAD_SIZES:%.*]] = alloca [5 x i64], align 4
+// CHECK3-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK3-NEXT:    store ptr [[THIS]], ptr [[THIS_ADDR]], align 4
 // CHECK3-NEXT:    store i32 [[N]], ptr [[N_ADDR]], align 4
 // CHECK3-NEXT:    [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4
@@ -3143,7 +3144,6 @@ int bar(int n){
 // CHECK3-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK3-NEXT:    [[TMP27:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
 // CHECK3-NEXT:    [[TMP28:%.*]] = getelementptr inbounds [5 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK3-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK3-NEXT:    [[TMP29:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK3-NEXT:    store i32 2, ptr [[TMP29]], align 4
 // CHECK3-NEXT:    [[TMP30:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -3208,6 +3208,7 @@ int bar(int n){
 // CHECK3-NEXT:    [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [4 x ptr], align 4
 // CHECK3-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [4 x ptr], align 4
 // CHECK3-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [4 x ptr], align 4
+// CHECK3-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK3-NEXT:    store i32 [[N]], ptr [[N_ADDR]], align 4
 // CHECK3-NEXT:    store i32 0, ptr [[A]], align 4
 // CHECK3-NEXT:    store i16 0, ptr [[AA]], align 2
@@ -3251,7 +3252,6 @@ int bar(int n){
 // CHECK3-NEXT:    store ptr null, ptr [[TMP18]], align 4
 // CHECK3-NEXT:    [[TMP19:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK3-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK3-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK3-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK3-NEXT:    store i32 2, ptr [[TMP21]], align 4
 // CHECK3-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -3306,6 +3306,7 @@ int bar(int n){
 // CHECK3-NEXT:    [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [3 x ptr], align 4
 // CHECK3-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [3 x ptr], align 4
 // CHECK3-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [3 x ptr], align 4
+// CHECK3-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK3-NEXT:    store i32 [[N]], ptr [[N_ADDR]], align 4
 // CHECK3-NEXT:    store i32 0, ptr [[A]], align 4
 // CHECK3-NEXT:    store i16 0, ptr [[AA]], align 2
@@ -3339,7 +3340,6 @@ int bar(int n){
 // CHECK3-NEXT:    store ptr null, ptr [[TMP13]], align 4
 // CHECK3-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK3-NEXT:    [[TMP15:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK3-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK3-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK3-NEXT:    store i32 2, ptr [[TMP16]], align 4
 // CHECK3-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -5200,6 +5200,7 @@ int bar(int n){
 // CHECK17-NEXT:    [[C:%.*]] = alloca [5 x [10 x double]], align 8
 // CHECK17-NEXT:    [[__VLA_EXPR1:%.*]] = alloca i64, align 8
 // CHECK17-NEXT:    [[D:%.*]] = alloca [[STRUCT_TT:%.*]], align 8
+// CHECK17-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK17-NEXT:    [[K:%.*]] = alloca i64, align 8
 // CHECK17-NEXT:    [[A_CASTED:%.*]] = alloca i64, align 8
 // CHECK17-NEXT:    [[K_CASTED:%.*]] = alloca i64, align 8
@@ -5216,6 +5217,7 @@ int bar(int n){
 // CHECK17-NEXT:    [[DOTOFFLOAD_BASEPTRS5:%.*]] = alloca [2 x ptr], align 8
 // CHECK17-NEXT:    [[DOTOFFLOAD_PTRS6:%.*]] = alloca [2 x ptr], align 8
 // CHECK17-NEXT:    [[DOTOFFLOAD_MAPPERS7:%.*]] = alloca [2 x ptr], align 8
+// CHECK17-NEXT:    [[KERNEL_ARGS8:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK17-NEXT:    [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
 // CHECK17-NEXT:    [[A_CASTED11:%.*]] = alloca i64, align 8
 // CHECK17-NEXT:    [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8
@@ -5223,6 +5225,7 @@ int bar(int n){
 // CHECK17-NEXT:    [[DOTOFFLOAD_PTRS15:%.*]] = alloca [10 x ptr], align 8
 // CHECK17-NEXT:    [[DOTOFFLOAD_MAPPERS16:%.*]] = alloca [10 x ptr], align 8
 // CHECK17-NEXT:    [[DOTOFFLOAD_SIZES:%.*]] = alloca [10 x i64], align 8
+// CHECK17-NEXT:    [[KERNEL_ARGS17:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK17-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2:[0-9]+]])
 // CHECK17-NEXT:    store i32 [[N]], ptr [[N_ADDR]], align 4
 // CHECK17-NEXT:    store i32 0, ptr [[A]], align 4
@@ -5238,7 +5241,6 @@ int bar(int n){
 // CHECK17-NEXT:    [[TMP6:%.*]] = mul nuw i64 5, [[TMP5]]
 // CHECK17-NEXT:    [[VLA1:%.*]] = alloca double, i64 [[TMP6]], align 8
 // CHECK17-NEXT:    store i64 [[TMP5]], ptr [[__VLA_EXPR1]], align 8
-// CHECK17-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK17-NEXT:    [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK17-NEXT:    store i32 2, ptr [[TMP7]], align 4
 // CHECK17-NEXT:    [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -5360,7 +5362,6 @@ int bar(int n){
 // CHECK17-NEXT:    store ptr null, ptr [[TMP70]], align 8
 // CHECK17-NEXT:    [[TMP71:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS5]], i32 0, i32 0
 // CHECK17-NEXT:    [[TMP72:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS6]], i32 0, i32 0
-// CHECK17-NEXT:    [[KERNEL_ARGS8:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK17-NEXT:    [[TMP73:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS8]], i32 0, i32 0
 // CHECK17-NEXT:    store i32 2, ptr [[TMP73]], align 4
 // CHECK17-NEXT:    [[TMP74:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS8]], i32 0, i32 1
@@ -5482,7 +5483,6 @@ int bar(int n){
 // CHECK17-NEXT:    [[TMP129:%.*]] = getelementptr inbounds [10 x ptr], ptr [[DOTOFFLOAD_BASEPTRS14]], i32 0, i32 0
 // CHECK17-NEXT:    [[TMP130:%.*]] = getelementptr inbounds [10 x ptr], ptr [[DOTOFFLOAD_PTRS15]], i32 0, i32 0
 // CHECK17-NEXT:    [[TMP131:%.*]] = getelementptr inbounds [10 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK17-NEXT:    [[KERNEL_ARGS17:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK17-NEXT:    [[TMP132:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS17]], i32 0, i32 0
 // CHECK17-NEXT:    store i32 2, ptr [[TMP132]], align 4
 // CHECK17-NEXT:    [[TMP133:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS17]], i32 0, i32 1
@@ -5888,10 +5888,10 @@ int bar(int n){
 // CHECK17-NEXT:    [[DOTFIRSTPRIV_PTR_ADDR1_I:%.*]] = alloca ptr, align 8
 // CHECK17-NEXT:    [[DOTFIRSTPRIV_PTR_ADDR2_I:%.*]] = alloca ptr, align 8
 // CHECK17-NEXT:    [[DOTFIRSTPRIV_PTR_ADDR3_I:%.*]] = alloca ptr, align 8
+// CHECK17-NEXT:    [[KERNEL_ARGS_I:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK17-NEXT:    [[AA_CASTED_I:%.*]] = alloca i64, align 8
 // CHECK17-NEXT:    [[LIN_CASTED_I:%.*]] = alloca i64, align 8
 // CHECK17-NEXT:    [[A_CASTED_I:%.*]] = alloca i64, align 8
-// CHECK17-NEXT:    [[KERNEL_ARGS_I:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK17-NEXT:    [[DOTADDR:%.*]] = alloca i32, align 4
 // CHECK17-NEXT:    [[DOTADDR1:%.*]] = alloca ptr, align 8
 // CHECK17-NEXT:    store i32 [[TMP0]], ptr [[DOTADDR]], align 4
@@ -6290,6 +6290,7 @@ int bar(int n){
 // CHECK17-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [5 x ptr], align 8
 // CHECK17-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [5 x ptr], align 8
 // CHECK17-NEXT:    [[DOTOFFLOAD_SIZES:%.*]] = alloca [5 x i64], align 8
+// CHECK17-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK17-NEXT:    store ptr [[THIS]], ptr [[THIS_ADDR]], align 8
 // CHECK17-NEXT:    store i32 [[N]], ptr [[N_ADDR]], align 4
 // CHECK17-NEXT:    [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8
@@ -6349,7 +6350,6 @@ int bar(int n){
 // CHECK17-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK17-NEXT:    [[TMP27:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
 // CHECK17-NEXT:    [[TMP28:%.*]] = getelementptr inbounds [5 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK17-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK17-NEXT:    [[TMP29:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK17-NEXT:    store i32 2, ptr [[TMP29]], align 4
 // CHECK17-NEXT:    [[TMP30:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -6414,6 +6414,7 @@ int bar(int n){
 // CHECK17-NEXT:    [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [4 x ptr], align 8
 // CHECK17-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [4 x ptr], align 8
 // CHECK17-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [4 x ptr], align 8
+// CHECK17-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK17-NEXT:    store i32 [[N]], ptr [[N_ADDR]], align 4
 // CHECK17-NEXT:    store i32 0, ptr [[A]], align 4
 // CHECK17-NEXT:    store i16 0, ptr [[AA]], align 2
@@ -6457,7 +6458,6 @@ int bar(int n){
 // CHECK17-NEXT:    store ptr null, ptr [[TMP18]], align 8
 // CHECK17-NEXT:    [[TMP19:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK17-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK17-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK17-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK17-NEXT:    store i32 2, ptr [[TMP21]], align 4
 // CHECK17-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -6512,6 +6512,7 @@ int bar(int n){
 // CHECK17-NEXT:    [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [3 x ptr], align 8
 // CHECK17-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [3 x ptr], align 8
 // CHECK17-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [3 x ptr], align 8
+// CHECK17-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK17-NEXT:    store i32 [[N]], ptr [[N_ADDR]], align 4
 // CHECK17-NEXT:    store i32 0, ptr [[A]], align 4
 // CHECK17-NEXT:    store i16 0, ptr [[AA]], align 2
@@ -6545,7 +6546,6 @@ int bar(int n){
 // CHECK17-NEXT:    store ptr null, ptr [[TMP13]], align 8
 // CHECK17-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK17-NEXT:    [[TMP15:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK17-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK17-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK17-NEXT:    store i32 2, ptr [[TMP16]], align 4
 // CHECK17-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -6876,6 +6876,7 @@ int bar(int n){
 // CHECK19-NEXT:    [[C:%.*]] = alloca [5 x [10 x double]], align 8
 // CHECK19-NEXT:    [[__VLA_EXPR1:%.*]] = alloca i32, align 4
 // CHECK19-NEXT:    [[D:%.*]] = alloca [[STRUCT_TT:%.*]], align 4
+// CHECK19-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK19-NEXT:    [[K:%.*]] = alloca i64, align 8
 // CHECK19-NEXT:    [[A_CASTED:%.*]] = alloca i32, align 4
 // CHECK19-NEXT:    [[LIN:%.*]] = alloca i32, align 4
@@ -6891,6 +6892,7 @@ int bar(int n){
 // CHECK19-NEXT:    [[DOTOFFLOAD_BASEPTRS5:%.*]] = alloca [2 x ptr], align 4
 // CHECK19-NEXT:    [[DOTOFFLOAD_PTRS6:%.*]] = alloca [2 x ptr], align 4
 // CHECK19-NEXT:    [[DOTOFFLOAD_MAPPERS7:%.*]] = alloca [2 x ptr], align 4
+// CHECK19-NEXT:    [[KERNEL_ARGS8:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK19-NEXT:    [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
 // CHECK19-NEXT:    [[A_CASTED11:%.*]] = alloca i32, align 4
 // CHECK19-NEXT:    [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4
@@ -6898,6 +6900,7 @@ int bar(int n){
 // CHECK19-NEXT:    [[DOTOFFLOAD_PTRS15:%.*]] = alloca [10 x ptr], align 4
 // CHECK19-NEXT:    [[DOTOFFLOAD_MAPPERS16:%.*]] = alloca [10 x ptr], align 4
 // CHECK19-NEXT:    [[DOTOFFLOAD_SIZES:%.*]] = alloca [10 x i64], align 4
+// CHECK19-NEXT:    [[KERNEL_ARGS17:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK19-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2:[0-9]+]])
 // CHECK19-NEXT:    store i32 [[N]], ptr [[N_ADDR]], align 4
 // CHECK19-NEXT:    store i32 0, ptr [[A]], align 4
@@ -6911,7 +6914,6 @@ int bar(int n){
 // CHECK19-NEXT:    [[TMP4:%.*]] = mul nuw i32 5, [[TMP3]]
 // CHECK19-NEXT:    [[VLA1:%.*]] = alloca double, i32 [[TMP4]], align 8
 // CHECK19-NEXT:    store i32 [[TMP3]], ptr [[__VLA_EXPR1]], align 4
-// CHECK19-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK19-NEXT:    [[TMP5:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK19-NEXT:    store i32 2, ptr [[TMP5]], align 4
 // CHECK19-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -7030,7 +7032,6 @@ int bar(int n){
 // CHECK19-NEXT:    store ptr null, ptr [[TMP66]], align 4
 // CHECK19-NEXT:    [[TMP67:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS5]], i32 0, i32 0
 // CHECK19-NEXT:    [[TMP68:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS6]], i32 0, i32 0
-// CHECK19-NEXT:    [[KERNEL_ARGS8:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK19-NEXT:    [[TMP69:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS8]], i32 0, i32 0
 // CHECK19-NEXT:    store i32 2, ptr [[TMP69]], align 4
 // CHECK19-NEXT:    [[TMP70:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS8]], i32 0, i32 1
@@ -7154,7 +7155,6 @@ int bar(int n){
 // CHECK19-NEXT:    [[TMP127:%.*]] = getelementptr inbounds [10 x ptr], ptr [[DOTOFFLOAD_BASEPTRS14]], i32 0, i32 0
 // CHECK19-NEXT:    [[TMP128:%.*]] = getelementptr inbounds [10 x ptr], ptr [[DOTOFFLOAD_PTRS15]], i32 0, i32 0
 // CHECK19-NEXT:    [[TMP129:%.*]] = getelementptr inbounds [10 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK19-NEXT:    [[KERNEL_ARGS17:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK19-NEXT:    [[TMP130:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS17]], i32 0, i32 0
 // CHECK19-NEXT:    store i32 2, ptr [[TMP130]], align 4
 // CHECK19-NEXT:    [[TMP131:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS17]], i32 0, i32 1
@@ -7558,10 +7558,10 @@ int bar(int n){
 // CHECK19-NEXT:    [[DOTFIRSTPRIV_PTR_ADDR1_I:%.*]] = alloca ptr, align 4
 // CHECK19-NEXT:    [[DOTFIRSTPRIV_PTR_ADDR2_I:%.*]] = alloca ptr, align 4
 // CHECK19-NEXT:    [[DOTFIRSTPRIV_PTR_ADDR3_I:%.*]] = alloca ptr, align 4
+// CHECK19-NEXT:    [[KERNEL_ARGS_I:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK19-NEXT:    [[AA_CASTED_I:%.*]] = alloca i32, align 4
 // CHECK19-NEXT:    [[LIN_CASTED_I:%.*]] = alloca i32, align 4
 // CHECK19-NEXT:    [[A_CASTED_I:%.*]] = alloca i32, align 4
-// CHECK19-NEXT:    [[KERNEL_ARGS_I:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK19-NEXT:    [[DOTADDR:%.*]] = alloca i32, align 4
 // CHECK19-NEXT:    [[DOTADDR1:%.*]] = alloca ptr, align 4
 // CHECK19-NEXT:    store i32 [[TMP0]], ptr [[DOTADDR]], align 4
@@ -7960,6 +7960,7 @@ int bar(int n){
 // CHECK19-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [5 x ptr], align 4
 // CHECK19-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [5 x ptr], align 4
 // CHECK19-NEXT:    [[DOTOFFLOAD_SIZES:%.*]] = alloca [5 x i64], align 4
+// CHECK19-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK19-NEXT:    store ptr [[THIS]], ptr [[THIS_ADDR]], align 4
 // CHECK19-NEXT:    store i32 [[N]], ptr [[N_ADDR]], align 4
 // CHECK19-NEXT:    [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4
@@ -8019,7 +8020,6 @@ int bar(int n){
 // CHECK19-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK19-NEXT:    [[TMP27:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
 // CHECK19-NEXT:    [[TMP28:%.*]] = getelementptr inbounds [5 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK19-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK19-NEXT:    [[TMP29:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK19-NEXT:    store i32 2, ptr [[TMP29]], align 4
 // CHECK19-NEXT:    [[TMP30:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -8084,6 +8084,7 @@ int bar(int n){
 // CHECK19-NEXT:    [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [4 x ptr], align 4
 // CHECK19-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [4 x ptr], align 4
 // CHECK19-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [4 x ptr], align 4
+// CHECK19-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK19-NEXT:    store i32 [[N]], ptr [[N_ADDR]], align 4
 // CHECK19-NEXT:    store i32 0, ptr [[A]], align 4
 // CHECK19-NEXT:    store i16 0, ptr [[AA]], align 2
@@ -8127,7 +8128,6 @@ int bar(int n){
 // CHECK19-NEXT:    store ptr null, ptr [[TMP18]], align 4
 // CHECK19-NEXT:    [[TMP19:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK19-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK19-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK19-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK19-NEXT:    store i32 2, ptr [[TMP21]], align 4
 // CHECK19-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -8182,6 +8182,7 @@ int bar(int n){
 // CHECK19-NEXT:    [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [3 x ptr], align 4
 // CHECK19-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [3 x ptr], align 4
 // CHECK19-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [3 x ptr], align 4
+// CHECK19-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK19-NEXT:    store i32 [[N]], ptr [[N_ADDR]], align 4
 // CHECK19-NEXT:    store i32 0, ptr [[A]], align 4
 // CHECK19-NEXT:    store i16 0, ptr [[AA]], align 2
@@ -8215,7 +8216,6 @@ int bar(int n){
 // CHECK19-NEXT:    store ptr null, ptr [[TMP13]], align 4
 // CHECK19-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK19-NEXT:    [[TMP15:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK19-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK19-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK19-NEXT:    store i32 2, ptr [[TMP16]], align 4
 // CHECK19-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1

diff  --git a/clang/test/OpenMP/target_parallel_for_simd_codegen.cpp b/clang/test/OpenMP/target_parallel_for_simd_codegen.cpp
index 387c93cd35647..af3a6d8e4db24 100644
--- a/clang/test/OpenMP/target_parallel_for_simd_codegen.cpp
+++ b/clang/test/OpenMP/target_parallel_for_simd_codegen.cpp
@@ -322,11 +322,13 @@ int bar(int n){
 // CHECK1-NEXT:    [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [3 x ptr], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [3 x ptr], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [3 x ptr], align 8
+// CHECK1-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK1-NEXT:    [[A_CASTED3:%.*]] = alloca i64, align 8
 // CHECK1-NEXT:    [[AA_CASTED4:%.*]] = alloca i64, align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_BASEPTRS5:%.*]] = alloca [2 x ptr], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_PTRS6:%.*]] = alloca [2 x ptr], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_MAPPERS7:%.*]] = alloca [2 x ptr], align 8
+// CHECK1-NEXT:    [[KERNEL_ARGS8:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK1-NEXT:    [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
 // CHECK1-NEXT:    [[A_CASTED11:%.*]] = alloca i64, align 8
 // CHECK1-NEXT:    [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8
@@ -334,6 +336,7 @@ int bar(int n){
 // CHECK1-NEXT:    [[DOTOFFLOAD_PTRS15:%.*]] = alloca [10 x ptr], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_MAPPERS16:%.*]] = alloca [10 x ptr], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_SIZES:%.*]] = alloca [10 x i64], align 8
+// CHECK1-NEXT:    [[KERNEL_ARGS17:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK1-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2:[0-9]+]])
 // CHECK1-NEXT:    store i32 [[N]], ptr [[N_ADDR]], align 4
 // CHECK1-NEXT:    store i32 0, ptr [[A]], align 4
@@ -391,7 +394,6 @@ int bar(int n){
 // CHECK1-NEXT:    store ptr null, ptr [[TMP28]], align 8
 // CHECK1-NEXT:    [[TMP29:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK1-NEXT:    [[TMP30:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK1-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK1-NEXT:    [[TMP31:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK1-NEXT:    store i32 2, ptr [[TMP31]], align 4
 // CHECK1-NEXT:    [[TMP32:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -449,7 +451,6 @@ int bar(int n){
 // CHECK1-NEXT:    store ptr null, ptr [[TMP56]], align 8
 // CHECK1-NEXT:    [[TMP57:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS5]], i32 0, i32 0
 // CHECK1-NEXT:    [[TMP58:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS6]], i32 0, i32 0
-// CHECK1-NEXT:    [[KERNEL_ARGS8:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK1-NEXT:    [[TMP59:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS8]], i32 0, i32 0
 // CHECK1-NEXT:    store i32 2, ptr [[TMP59]], align 4
 // CHECK1-NEXT:    [[TMP60:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS8]], i32 0, i32 1
@@ -571,7 +572,6 @@ int bar(int n){
 // CHECK1-NEXT:    [[TMP115:%.*]] = getelementptr inbounds [10 x ptr], ptr [[DOTOFFLOAD_BASEPTRS14]], i32 0, i32 0
 // CHECK1-NEXT:    [[TMP116:%.*]] = getelementptr inbounds [10 x ptr], ptr [[DOTOFFLOAD_PTRS15]], i32 0, i32 0
 // CHECK1-NEXT:    [[TMP117:%.*]] = getelementptr inbounds [10 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK1-NEXT:    [[KERNEL_ARGS17:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK1-NEXT:    [[TMP118:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS17]], i32 0, i32 0
 // CHECK1-NEXT:    store i32 2, ptr [[TMP118]], align 4
 // CHECK1-NEXT:    [[TMP119:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS17]], i32 0, i32 1
@@ -1342,6 +1342,7 @@ int bar(int n){
 // CHECK1-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [5 x ptr], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [5 x ptr], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_SIZES:%.*]] = alloca [5 x i64], align 8
+// CHECK1-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK1-NEXT:    store ptr [[THIS]], ptr [[THIS_ADDR]], align 8
 // CHECK1-NEXT:    store i32 [[N]], ptr [[N_ADDR]], align 4
 // CHECK1-NEXT:    [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8
@@ -1401,7 +1402,6 @@ int bar(int n){
 // CHECK1-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK1-NEXT:    [[TMP27:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
 // CHECK1-NEXT:    [[TMP28:%.*]] = getelementptr inbounds [5 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK1-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK1-NEXT:    [[TMP29:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK1-NEXT:    store i32 2, ptr [[TMP29]], align 4
 // CHECK1-NEXT:    [[TMP30:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -1466,6 +1466,7 @@ int bar(int n){
 // CHECK1-NEXT:    [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [4 x ptr], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [4 x ptr], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [4 x ptr], align 8
+// CHECK1-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK1-NEXT:    store i32 [[N]], ptr [[N_ADDR]], align 4
 // CHECK1-NEXT:    store i32 0, ptr [[A]], align 4
 // CHECK1-NEXT:    store i16 0, ptr [[AA]], align 2
@@ -1509,7 +1510,6 @@ int bar(int n){
 // CHECK1-NEXT:    store ptr null, ptr [[TMP18]], align 8
 // CHECK1-NEXT:    [[TMP19:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK1-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK1-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK1-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK1-NEXT:    store i32 2, ptr [[TMP21]], align 4
 // CHECK1-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -1564,6 +1564,7 @@ int bar(int n){
 // CHECK1-NEXT:    [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [3 x ptr], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [3 x ptr], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [3 x ptr], align 8
+// CHECK1-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK1-NEXT:    store i32 [[N]], ptr [[N_ADDR]], align 4
 // CHECK1-NEXT:    store i32 0, ptr [[A]], align 4
 // CHECK1-NEXT:    store i16 0, ptr [[AA]], align 2
@@ -1597,7 +1598,6 @@ int bar(int n){
 // CHECK1-NEXT:    store ptr null, ptr [[TMP13]], align 8
 // CHECK1-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK1-NEXT:    [[TMP15:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK1-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK1-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK1-NEXT:    store i32 2, ptr [[TMP16]], align 4
 // CHECK1-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -1952,11 +1952,13 @@ int bar(int n){
 // CHECK3-NEXT:    [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [3 x ptr], align 4
 // CHECK3-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [3 x ptr], align 4
 // CHECK3-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [3 x ptr], align 4
+// CHECK3-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK3-NEXT:    [[A_CASTED3:%.*]] = alloca i32, align 4
 // CHECK3-NEXT:    [[AA_CASTED4:%.*]] = alloca i32, align 4
 // CHECK3-NEXT:    [[DOTOFFLOAD_BASEPTRS5:%.*]] = alloca [2 x ptr], align 4
 // CHECK3-NEXT:    [[DOTOFFLOAD_PTRS6:%.*]] = alloca [2 x ptr], align 4
 // CHECK3-NEXT:    [[DOTOFFLOAD_MAPPERS7:%.*]] = alloca [2 x ptr], align 4
+// CHECK3-NEXT:    [[KERNEL_ARGS8:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK3-NEXT:    [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
 // CHECK3-NEXT:    [[A_CASTED11:%.*]] = alloca i32, align 4
 // CHECK3-NEXT:    [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4
@@ -1964,6 +1966,7 @@ int bar(int n){
 // CHECK3-NEXT:    [[DOTOFFLOAD_PTRS15:%.*]] = alloca [10 x ptr], align 4
 // CHECK3-NEXT:    [[DOTOFFLOAD_MAPPERS16:%.*]] = alloca [10 x ptr], align 4
 // CHECK3-NEXT:    [[DOTOFFLOAD_SIZES:%.*]] = alloca [10 x i64], align 4
+// CHECK3-NEXT:    [[KERNEL_ARGS17:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK3-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2:[0-9]+]])
 // CHECK3-NEXT:    store i32 [[N]], ptr [[N_ADDR]], align 4
 // CHECK3-NEXT:    store i32 0, ptr [[A]], align 4
@@ -2016,7 +2019,6 @@ int bar(int n){
 // CHECK3-NEXT:    store ptr null, ptr [[TMP24]], align 4
 // CHECK3-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK3-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK3-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK3-NEXT:    [[TMP27:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK3-NEXT:    store i32 2, ptr [[TMP27]], align 4
 // CHECK3-NEXT:    [[TMP28:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -2074,7 +2076,6 @@ int bar(int n){
 // CHECK3-NEXT:    store ptr null, ptr [[TMP52]], align 4
 // CHECK3-NEXT:    [[TMP53:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS5]], i32 0, i32 0
 // CHECK3-NEXT:    [[TMP54:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS6]], i32 0, i32 0
-// CHECK3-NEXT:    [[KERNEL_ARGS8:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK3-NEXT:    [[TMP55:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS8]], i32 0, i32 0
 // CHECK3-NEXT:    store i32 2, ptr [[TMP55]], align 4
 // CHECK3-NEXT:    [[TMP56:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS8]], i32 0, i32 1
@@ -2198,7 +2199,6 @@ int bar(int n){
 // CHECK3-NEXT:    [[TMP113:%.*]] = getelementptr inbounds [10 x ptr], ptr [[DOTOFFLOAD_BASEPTRS14]], i32 0, i32 0
 // CHECK3-NEXT:    [[TMP114:%.*]] = getelementptr inbounds [10 x ptr], ptr [[DOTOFFLOAD_PTRS15]], i32 0, i32 0
 // CHECK3-NEXT:    [[TMP115:%.*]] = getelementptr inbounds [10 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK3-NEXT:    [[KERNEL_ARGS17:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK3-NEXT:    [[TMP116:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS17]], i32 0, i32 0
 // CHECK3-NEXT:    store i32 2, ptr [[TMP116]], align 4
 // CHECK3-NEXT:    [[TMP117:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS17]], i32 0, i32 1
@@ -2967,6 +2967,7 @@ int bar(int n){
 // CHECK3-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [5 x ptr], align 4
 // CHECK3-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [5 x ptr], align 4
 // CHECK3-NEXT:    [[DOTOFFLOAD_SIZES:%.*]] = alloca [5 x i64], align 4
+// CHECK3-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK3-NEXT:    store ptr [[THIS]], ptr [[THIS_ADDR]], align 4
 // CHECK3-NEXT:    store i32 [[N]], ptr [[N_ADDR]], align 4
 // CHECK3-NEXT:    [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4
@@ -3026,7 +3027,6 @@ int bar(int n){
 // CHECK3-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK3-NEXT:    [[TMP27:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
 // CHECK3-NEXT:    [[TMP28:%.*]] = getelementptr inbounds [5 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK3-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK3-NEXT:    [[TMP29:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK3-NEXT:    store i32 2, ptr [[TMP29]], align 4
 // CHECK3-NEXT:    [[TMP30:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -3091,6 +3091,7 @@ int bar(int n){
 // CHECK3-NEXT:    [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [4 x ptr], align 4
 // CHECK3-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [4 x ptr], align 4
 // CHECK3-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [4 x ptr], align 4
+// CHECK3-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK3-NEXT:    store i32 [[N]], ptr [[N_ADDR]], align 4
 // CHECK3-NEXT:    store i32 0, ptr [[A]], align 4
 // CHECK3-NEXT:    store i16 0, ptr [[AA]], align 2
@@ -3134,7 +3135,6 @@ int bar(int n){
 // CHECK3-NEXT:    store ptr null, ptr [[TMP18]], align 4
 // CHECK3-NEXT:    [[TMP19:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK3-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK3-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK3-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK3-NEXT:    store i32 2, ptr [[TMP21]], align 4
 // CHECK3-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -3189,6 +3189,7 @@ int bar(int n){
 // CHECK3-NEXT:    [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [3 x ptr], align 4
 // CHECK3-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [3 x ptr], align 4
 // CHECK3-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [3 x ptr], align 4
+// CHECK3-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK3-NEXT:    store i32 [[N]], ptr [[N_ADDR]], align 4
 // CHECK3-NEXT:    store i32 0, ptr [[A]], align 4
 // CHECK3-NEXT:    store i16 0, ptr [[AA]], align 2
@@ -3222,7 +3223,6 @@ int bar(int n){
 // CHECK3-NEXT:    store ptr null, ptr [[TMP13]], align 4
 // CHECK3-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK3-NEXT:    [[TMP15:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK3-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK3-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK3-NEXT:    store i32 2, ptr [[TMP16]], align 4
 // CHECK3-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -3578,11 +3578,13 @@ int bar(int n){
 // CHECK5-NEXT:    [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [3 x ptr], align 8
 // CHECK5-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [3 x ptr], align 8
 // CHECK5-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [3 x ptr], align 8
+// CHECK5-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK5-NEXT:    [[A_CASTED3:%.*]] = alloca i64, align 8
 // CHECK5-NEXT:    [[AA_CASTED4:%.*]] = alloca i64, align 8
 // CHECK5-NEXT:    [[DOTOFFLOAD_BASEPTRS5:%.*]] = alloca [2 x ptr], align 8
 // CHECK5-NEXT:    [[DOTOFFLOAD_PTRS6:%.*]] = alloca [2 x ptr], align 8
 // CHECK5-NEXT:    [[DOTOFFLOAD_MAPPERS7:%.*]] = alloca [2 x ptr], align 8
+// CHECK5-NEXT:    [[KERNEL_ARGS8:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK5-NEXT:    [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
 // CHECK5-NEXT:    [[A_CASTED11:%.*]] = alloca i64, align 8
 // CHECK5-NEXT:    [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8
@@ -3590,6 +3592,7 @@ int bar(int n){
 // CHECK5-NEXT:    [[DOTOFFLOAD_PTRS15:%.*]] = alloca [10 x ptr], align 8
 // CHECK5-NEXT:    [[DOTOFFLOAD_MAPPERS16:%.*]] = alloca [10 x ptr], align 8
 // CHECK5-NEXT:    [[DOTOFFLOAD_SIZES:%.*]] = alloca [10 x i64], align 8
+// CHECK5-NEXT:    [[KERNEL_ARGS17:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK5-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2:[0-9]+]])
 // CHECK5-NEXT:    store i32 [[N]], ptr [[N_ADDR]], align 4
 // CHECK5-NEXT:    store i32 0, ptr [[A]], align 4
@@ -3647,7 +3650,6 @@ int bar(int n){
 // CHECK5-NEXT:    store ptr null, ptr [[TMP28]], align 8
 // CHECK5-NEXT:    [[TMP29:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK5-NEXT:    [[TMP30:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK5-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK5-NEXT:    [[TMP31:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK5-NEXT:    store i32 2, ptr [[TMP31]], align 4
 // CHECK5-NEXT:    [[TMP32:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -3705,7 +3707,6 @@ int bar(int n){
 // CHECK5-NEXT:    store ptr null, ptr [[TMP56]], align 8
 // CHECK5-NEXT:    [[TMP57:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS5]], i32 0, i32 0
 // CHECK5-NEXT:    [[TMP58:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS6]], i32 0, i32 0
-// CHECK5-NEXT:    [[KERNEL_ARGS8:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK5-NEXT:    [[TMP59:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS8]], i32 0, i32 0
 // CHECK5-NEXT:    store i32 2, ptr [[TMP59]], align 4
 // CHECK5-NEXT:    [[TMP60:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS8]], i32 0, i32 1
@@ -3827,7 +3828,6 @@ int bar(int n){
 // CHECK5-NEXT:    [[TMP115:%.*]] = getelementptr inbounds [10 x ptr], ptr [[DOTOFFLOAD_BASEPTRS14]], i32 0, i32 0
 // CHECK5-NEXT:    [[TMP116:%.*]] = getelementptr inbounds [10 x ptr], ptr [[DOTOFFLOAD_PTRS15]], i32 0, i32 0
 // CHECK5-NEXT:    [[TMP117:%.*]] = getelementptr inbounds [10 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK5-NEXT:    [[KERNEL_ARGS17:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK5-NEXT:    [[TMP118:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS17]], i32 0, i32 0
 // CHECK5-NEXT:    store i32 2, ptr [[TMP118]], align 4
 // CHECK5-NEXT:    [[TMP119:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS17]], i32 0, i32 1
@@ -4600,6 +4600,7 @@ int bar(int n){
 // CHECK5-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [6 x ptr], align 8
 // CHECK5-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [6 x ptr], align 8
 // CHECK5-NEXT:    [[DOTOFFLOAD_SIZES:%.*]] = alloca [6 x i64], align 8
+// CHECK5-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK5-NEXT:    store ptr [[THIS]], ptr [[THIS_ADDR]], align 8
 // CHECK5-NEXT:    store i32 [[N]], ptr [[N_ADDR]], align 4
 // CHECK5-NEXT:    [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8
@@ -4678,7 +4679,6 @@ int bar(int n){
 // CHECK5-NEXT:    [[TOBOOL4:%.*]] = trunc i8 [[TMP35]] to i1
 // CHECK5-NEXT:    [[TMP36:%.*]] = select i1 [[TOBOOL4]], i32 0, i32 1
 // CHECK5-NEXT:    [[TMP37:%.*]] = insertvalue [3 x i32] zeroinitializer, i32 [[TMP36]], 0
-// CHECK5-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK5-NEXT:    [[TMP38:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK5-NEXT:    store i32 2, ptr [[TMP38]], align 4
 // CHECK5-NEXT:    [[TMP39:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -4743,6 +4743,7 @@ int bar(int n){
 // CHECK5-NEXT:    [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [4 x ptr], align 8
 // CHECK5-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [4 x ptr], align 8
 // CHECK5-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [4 x ptr], align 8
+// CHECK5-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK5-NEXT:    store i32 [[N]], ptr [[N_ADDR]], align 4
 // CHECK5-NEXT:    store i32 0, ptr [[A]], align 4
 // CHECK5-NEXT:    store i16 0, ptr [[AA]], align 2
@@ -4786,7 +4787,6 @@ int bar(int n){
 // CHECK5-NEXT:    store ptr null, ptr [[TMP18]], align 8
 // CHECK5-NEXT:    [[TMP19:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK5-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK5-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK5-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK5-NEXT:    store i32 2, ptr [[TMP21]], align 4
 // CHECK5-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -4841,6 +4841,7 @@ int bar(int n){
 // CHECK5-NEXT:    [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [3 x ptr], align 8
 // CHECK5-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [3 x ptr], align 8
 // CHECK5-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [3 x ptr], align 8
+// CHECK5-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK5-NEXT:    store i32 [[N]], ptr [[N_ADDR]], align 4
 // CHECK5-NEXT:    store i32 0, ptr [[A]], align 4
 // CHECK5-NEXT:    store i16 0, ptr [[AA]], align 2
@@ -4874,7 +4875,6 @@ int bar(int n){
 // CHECK5-NEXT:    store ptr null, ptr [[TMP13]], align 8
 // CHECK5-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK5-NEXT:    [[TMP15:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK5-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK5-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK5-NEXT:    store i32 2, ptr [[TMP16]], align 4
 // CHECK5-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -5315,11 +5315,13 @@ int bar(int n){
 // CHECK7-NEXT:    [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [3 x ptr], align 4
 // CHECK7-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [3 x ptr], align 4
 // CHECK7-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [3 x ptr], align 4
+// CHECK7-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK7-NEXT:    [[A_CASTED3:%.*]] = alloca i32, align 4
 // CHECK7-NEXT:    [[AA_CASTED4:%.*]] = alloca i32, align 4
 // CHECK7-NEXT:    [[DOTOFFLOAD_BASEPTRS5:%.*]] = alloca [2 x ptr], align 4
 // CHECK7-NEXT:    [[DOTOFFLOAD_PTRS6:%.*]] = alloca [2 x ptr], align 4
 // CHECK7-NEXT:    [[DOTOFFLOAD_MAPPERS7:%.*]] = alloca [2 x ptr], align 4
+// CHECK7-NEXT:    [[KERNEL_ARGS8:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK7-NEXT:    [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
 // CHECK7-NEXT:    [[A_CASTED11:%.*]] = alloca i32, align 4
 // CHECK7-NEXT:    [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4
@@ -5327,6 +5329,7 @@ int bar(int n){
 // CHECK7-NEXT:    [[DOTOFFLOAD_PTRS15:%.*]] = alloca [10 x ptr], align 4
 // CHECK7-NEXT:    [[DOTOFFLOAD_MAPPERS16:%.*]] = alloca [10 x ptr], align 4
 // CHECK7-NEXT:    [[DOTOFFLOAD_SIZES:%.*]] = alloca [10 x i64], align 4
+// CHECK7-NEXT:    [[KERNEL_ARGS17:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK7-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2:[0-9]+]])
 // CHECK7-NEXT:    store i32 [[N]], ptr [[N_ADDR]], align 4
 // CHECK7-NEXT:    store i32 0, ptr [[A]], align 4
@@ -5379,7 +5382,6 @@ int bar(int n){
 // CHECK7-NEXT:    store ptr null, ptr [[TMP24]], align 4
 // CHECK7-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK7-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK7-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK7-NEXT:    [[TMP27:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK7-NEXT:    store i32 2, ptr [[TMP27]], align 4
 // CHECK7-NEXT:    [[TMP28:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -5437,7 +5439,6 @@ int bar(int n){
 // CHECK7-NEXT:    store ptr null, ptr [[TMP52]], align 4
 // CHECK7-NEXT:    [[TMP53:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS5]], i32 0, i32 0
 // CHECK7-NEXT:    [[TMP54:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS6]], i32 0, i32 0
-// CHECK7-NEXT:    [[KERNEL_ARGS8:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK7-NEXT:    [[TMP55:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS8]], i32 0, i32 0
 // CHECK7-NEXT:    store i32 2, ptr [[TMP55]], align 4
 // CHECK7-NEXT:    [[TMP56:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS8]], i32 0, i32 1
@@ -5561,7 +5562,6 @@ int bar(int n){
 // CHECK7-NEXT:    [[TMP113:%.*]] = getelementptr inbounds [10 x ptr], ptr [[DOTOFFLOAD_BASEPTRS14]], i32 0, i32 0
 // CHECK7-NEXT:    [[TMP114:%.*]] = getelementptr inbounds [10 x ptr], ptr [[DOTOFFLOAD_PTRS15]], i32 0, i32 0
 // CHECK7-NEXT:    [[TMP115:%.*]] = getelementptr inbounds [10 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK7-NEXT:    [[KERNEL_ARGS17:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK7-NEXT:    [[TMP116:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS17]], i32 0, i32 0
 // CHECK7-NEXT:    store i32 2, ptr [[TMP116]], align 4
 // CHECK7-NEXT:    [[TMP117:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS17]], i32 0, i32 1
@@ -6332,6 +6332,7 @@ int bar(int n){
 // CHECK7-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [6 x ptr], align 4
 // CHECK7-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [6 x ptr], align 4
 // CHECK7-NEXT:    [[DOTOFFLOAD_SIZES:%.*]] = alloca [6 x i64], align 4
+// CHECK7-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK7-NEXT:    store ptr [[THIS]], ptr [[THIS_ADDR]], align 4
 // CHECK7-NEXT:    store i32 [[N]], ptr [[N_ADDR]], align 4
 // CHECK7-NEXT:    [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4
@@ -6410,7 +6411,6 @@ int bar(int n){
 // CHECK7-NEXT:    [[TOBOOL4:%.*]] = trunc i8 [[TMP35]] to i1
 // CHECK7-NEXT:    [[TMP36:%.*]] = select i1 [[TOBOOL4]], i32 0, i32 1
 // CHECK7-NEXT:    [[TMP37:%.*]] = insertvalue [3 x i32] zeroinitializer, i32 [[TMP36]], 0
-// CHECK7-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK7-NEXT:    [[TMP38:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK7-NEXT:    store i32 2, ptr [[TMP38]], align 4
 // CHECK7-NEXT:    [[TMP39:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -6475,6 +6475,7 @@ int bar(int n){
 // CHECK7-NEXT:    [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [4 x ptr], align 4
 // CHECK7-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [4 x ptr], align 4
 // CHECK7-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [4 x ptr], align 4
+// CHECK7-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK7-NEXT:    store i32 [[N]], ptr [[N_ADDR]], align 4
 // CHECK7-NEXT:    store i32 0, ptr [[A]], align 4
 // CHECK7-NEXT:    store i16 0, ptr [[AA]], align 2
@@ -6518,7 +6519,6 @@ int bar(int n){
 // CHECK7-NEXT:    store ptr null, ptr [[TMP18]], align 4
 // CHECK7-NEXT:    [[TMP19:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK7-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK7-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK7-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK7-NEXT:    store i32 2, ptr [[TMP21]], align 4
 // CHECK7-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -6573,6 +6573,7 @@ int bar(int n){
 // CHECK7-NEXT:    [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [3 x ptr], align 4
 // CHECK7-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [3 x ptr], align 4
 // CHECK7-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [3 x ptr], align 4
+// CHECK7-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK7-NEXT:    store i32 [[N]], ptr [[N_ADDR]], align 4
 // CHECK7-NEXT:    store i32 0, ptr [[A]], align 4
 // CHECK7-NEXT:    store i16 0, ptr [[AA]], align 2
@@ -6606,7 +6607,6 @@ int bar(int n){
 // CHECK7-NEXT:    store ptr null, ptr [[TMP13]], align 4
 // CHECK7-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK7-NEXT:    [[TMP15:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK7-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK7-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK7-NEXT:    store i32 2, ptr [[TMP16]], align 4
 // CHECK7-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1

diff  --git a/clang/test/OpenMP/target_parallel_if_codegen.cpp b/clang/test/OpenMP/target_parallel_if_codegen.cpp
index 8796862623c4e..e1fc1d82aff3f 100644
--- a/clang/test/OpenMP/target_parallel_if_codegen.cpp
+++ b/clang/test/OpenMP/target_parallel_if_codegen.cpp
@@ -221,11 +221,13 @@ int bar(int n){
 // CHECK1-NEXT:    [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [3 x ptr], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [3 x ptr], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [3 x ptr], align 8
+// CHECK1-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK1-NEXT:    [[DOTCAPTURE_EXPR_4:%.*]] = alloca i8, align 1
 // CHECK1-NEXT:    [[DOTCAPTURE_EXPR__CASTED8:%.*]] = alloca i64, align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_BASEPTRS12:%.*]] = alloca [2 x ptr], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_PTRS13:%.*]] = alloca [2 x ptr], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_MAPPERS14:%.*]] = alloca [2 x ptr], align 8
+// CHECK1-NEXT:    [[KERNEL_ARGS16:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK1-NEXT:    store ptr [[THIS]], ptr [[THIS_ADDR]], align 8
 // CHECK1-NEXT:    store i32 [[N]], ptr [[N_ADDR]], align 4
 // CHECK1-NEXT:    [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8
@@ -267,7 +269,6 @@ int bar(int n){
 // CHECK1-NEXT:    [[TOBOOL3:%.*]] = trunc i8 [[TMP16]] to i1
 // CHECK1-NEXT:    [[TMP17:%.*]] = select i1 [[TOBOOL3]], i32 0, i32 1
 // CHECK1-NEXT:    [[TMP18:%.*]] = insertvalue [3 x i32] zeroinitializer, i32 [[TMP17]], 0
-// CHECK1-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK1-NEXT:    [[TMP19:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK1-NEXT:    store i32 2, ptr [[TMP19]], align 4
 // CHECK1-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -333,7 +334,6 @@ int bar(int n){
 // CHECK1-NEXT:    [[TOBOOL15:%.*]] = trunc i8 [[TMP46]] to i1
 // CHECK1-NEXT:    [[TMP47:%.*]] = select i1 [[TOBOOL15]], i32 0, i32 1
 // CHECK1-NEXT:    [[TMP48:%.*]] = insertvalue [3 x i32] zeroinitializer, i32 [[TMP47]], 0
-// CHECK1-NEXT:    [[KERNEL_ARGS16:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK1-NEXT:    [[TMP49:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS16]], i32 0, i32 0
 // CHECK1-NEXT:    store i32 2, ptr [[TMP49]], align 4
 // CHECK1-NEXT:    [[TMP50:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS16]], i32 0, i32 1
@@ -387,6 +387,8 @@ int bar(int n){
 // CHECK1-NEXT:    [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [1 x ptr], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [1 x ptr], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [1 x ptr], align 8
+// CHECK1-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
+// CHECK1-NEXT:    [[KERNEL_ARGS6:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK1-NEXT:    store i32 [[N]], ptr [[N_ADDR]], align 4
 // CHECK1-NEXT:    [[TMP0:%.*]] = load i32, ptr [[N_ADDR]], align 4
 // CHECK1-NEXT:    [[CMP:%.*]] = icmp sgt i32 [[TMP0]], 1
@@ -413,7 +415,6 @@ int bar(int n){
 // CHECK1-NEXT:    [[TOBOOL3:%.*]] = trunc i8 [[TMP9]] to i1
 // CHECK1-NEXT:    [[TMP10:%.*]] = select i1 [[TOBOOL3]], i32 0, i32 1
 // CHECK1-NEXT:    [[TMP11:%.*]] = insertvalue [3 x i32] zeroinitializer, i32 [[TMP10]], 0
-// CHECK1-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK1-NEXT:    [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK1-NEXT:    store i32 2, ptr [[TMP12]], align 4
 // CHECK1-NEXT:    [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -457,7 +458,6 @@ int bar(int n){
 // CHECK1-NEXT:    [[CMP4:%.*]] = icmp sgt i32 [[SUB]], 2
 // CHECK1-NEXT:    br i1 [[CMP4]], label [[OMP_IF_THEN5:%.*]], label [[OMP_IF_ELSE9:%.*]]
 // CHECK1:       omp_if.then5:
-// CHECK1-NEXT:    [[KERNEL_ARGS6:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK1-NEXT:    [[TMP28:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 0
 // CHECK1-NEXT:    store i32 2, ptr [[TMP28]], align 4
 // CHECK1-NEXT:    [[TMP29:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 1
@@ -510,12 +510,14 @@ int bar(int n){
 // CHECK1-NEXT:    [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [1 x ptr], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [1 x ptr], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [1 x ptr], align 8
+// CHECK1-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK1-NEXT:    [[B:%.*]] = alloca i16, align 2
 // CHECK1-NEXT:    [[A_CASTED1:%.*]] = alloca i64, align 8
 // CHECK1-NEXT:    [[B_CASTED:%.*]] = alloca i64, align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_BASEPTRS2:%.*]] = alloca [2 x ptr], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_PTRS3:%.*]] = alloca [2 x ptr], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_MAPPERS4:%.*]] = alloca [2 x ptr], align 8
+// CHECK1-NEXT:    [[KERNEL_ARGS5:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK1-NEXT:    store i32 [[N]], ptr [[N_ADDR]], align 4
 // CHECK1-NEXT:    store i32 0, ptr [[A]], align 4
 // CHECK1-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A]], align 4
@@ -529,7 +531,6 @@ int bar(int n){
 // CHECK1-NEXT:    store ptr null, ptr [[TMP4]], align 8
 // CHECK1-NEXT:    [[TMP5:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK1-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK1-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK1-NEXT:    [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK1-NEXT:    store i32 2, ptr [[TMP7]], align 4
 // CHECK1-NEXT:    [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -584,7 +585,6 @@ int bar(int n){
 // CHECK1-NEXT:    store ptr null, ptr [[TMP31]], align 8
 // CHECK1-NEXT:    [[TMP32:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS2]], i32 0, i32 0
 // CHECK1-NEXT:    [[TMP33:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS3]], i32 0, i32 0
-// CHECK1-NEXT:    [[KERNEL_ARGS5:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK1-NEXT:    [[TMP34:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 0
 // CHECK1-NEXT:    store i32 2, ptr [[TMP34]], align 4
 // CHECK1-NEXT:    [[TMP35:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 1
@@ -890,11 +890,13 @@ int bar(int n){
 // CHECK3-NEXT:    [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [3 x ptr], align 4
 // CHECK3-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [3 x ptr], align 4
 // CHECK3-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [3 x ptr], align 4
+// CHECK3-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK3-NEXT:    [[DOTCAPTURE_EXPR_4:%.*]] = alloca i8, align 1
 // CHECK3-NEXT:    [[DOTCAPTURE_EXPR__CASTED8:%.*]] = alloca i32, align 4
 // CHECK3-NEXT:    [[DOTOFFLOAD_BASEPTRS12:%.*]] = alloca [2 x ptr], align 4
 // CHECK3-NEXT:    [[DOTOFFLOAD_PTRS13:%.*]] = alloca [2 x ptr], align 4
 // CHECK3-NEXT:    [[DOTOFFLOAD_MAPPERS14:%.*]] = alloca [2 x ptr], align 4
+// CHECK3-NEXT:    [[KERNEL_ARGS16:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK3-NEXT:    store ptr [[THIS]], ptr [[THIS_ADDR]], align 4
 // CHECK3-NEXT:    store i32 [[N]], ptr [[N_ADDR]], align 4
 // CHECK3-NEXT:    [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4
@@ -936,7 +938,6 @@ int bar(int n){
 // CHECK3-NEXT:    [[TOBOOL3:%.*]] = trunc i8 [[TMP16]] to i1
 // CHECK3-NEXT:    [[TMP17:%.*]] = select i1 [[TOBOOL3]], i32 0, i32 1
 // CHECK3-NEXT:    [[TMP18:%.*]] = insertvalue [3 x i32] zeroinitializer, i32 [[TMP17]], 0
-// CHECK3-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK3-NEXT:    [[TMP19:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK3-NEXT:    store i32 2, ptr [[TMP19]], align 4
 // CHECK3-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -1002,7 +1003,6 @@ int bar(int n){
 // CHECK3-NEXT:    [[TOBOOL15:%.*]] = trunc i8 [[TMP46]] to i1
 // CHECK3-NEXT:    [[TMP47:%.*]] = select i1 [[TOBOOL15]], i32 0, i32 1
 // CHECK3-NEXT:    [[TMP48:%.*]] = insertvalue [3 x i32] zeroinitializer, i32 [[TMP47]], 0
-// CHECK3-NEXT:    [[KERNEL_ARGS16:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK3-NEXT:    [[TMP49:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS16]], i32 0, i32 0
 // CHECK3-NEXT:    store i32 2, ptr [[TMP49]], align 4
 // CHECK3-NEXT:    [[TMP50:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS16]], i32 0, i32 1
@@ -1056,6 +1056,8 @@ int bar(int n){
 // CHECK3-NEXT:    [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [1 x ptr], align 4
 // CHECK3-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [1 x ptr], align 4
 // CHECK3-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [1 x ptr], align 4
+// CHECK3-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
+// CHECK3-NEXT:    [[KERNEL_ARGS6:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK3-NEXT:    store i32 [[N]], ptr [[N_ADDR]], align 4
 // CHECK3-NEXT:    [[TMP0:%.*]] = load i32, ptr [[N_ADDR]], align 4
 // CHECK3-NEXT:    [[CMP:%.*]] = icmp sgt i32 [[TMP0]], 1
@@ -1082,7 +1084,6 @@ int bar(int n){
 // CHECK3-NEXT:    [[TOBOOL3:%.*]] = trunc i8 [[TMP9]] to i1
 // CHECK3-NEXT:    [[TMP10:%.*]] = select i1 [[TOBOOL3]], i32 0, i32 1
 // CHECK3-NEXT:    [[TMP11:%.*]] = insertvalue [3 x i32] zeroinitializer, i32 [[TMP10]], 0
-// CHECK3-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK3-NEXT:    [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK3-NEXT:    store i32 2, ptr [[TMP12]], align 4
 // CHECK3-NEXT:    [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -1126,7 +1127,6 @@ int bar(int n){
 // CHECK3-NEXT:    [[CMP4:%.*]] = icmp sgt i32 [[SUB]], 2
 // CHECK3-NEXT:    br i1 [[CMP4]], label [[OMP_IF_THEN5:%.*]], label [[OMP_IF_ELSE9:%.*]]
 // CHECK3:       omp_if.then5:
-// CHECK3-NEXT:    [[KERNEL_ARGS6:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK3-NEXT:    [[TMP28:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 0
 // CHECK3-NEXT:    store i32 2, ptr [[TMP28]], align 4
 // CHECK3-NEXT:    [[TMP29:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 1
@@ -1179,12 +1179,14 @@ int bar(int n){
 // CHECK3-NEXT:    [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [1 x ptr], align 4
 // CHECK3-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [1 x ptr], align 4
 // CHECK3-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [1 x ptr], align 4
+// CHECK3-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK3-NEXT:    [[B:%.*]] = alloca i16, align 2
 // CHECK3-NEXT:    [[A_CASTED1:%.*]] = alloca i32, align 4
 // CHECK3-NEXT:    [[B_CASTED:%.*]] = alloca i32, align 4
 // CHECK3-NEXT:    [[DOTOFFLOAD_BASEPTRS2:%.*]] = alloca [2 x ptr], align 4
 // CHECK3-NEXT:    [[DOTOFFLOAD_PTRS3:%.*]] = alloca [2 x ptr], align 4
 // CHECK3-NEXT:    [[DOTOFFLOAD_MAPPERS4:%.*]] = alloca [2 x ptr], align 4
+// CHECK3-NEXT:    [[KERNEL_ARGS5:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK3-NEXT:    store i32 [[N]], ptr [[N_ADDR]], align 4
 // CHECK3-NEXT:    store i32 0, ptr [[A]], align 4
 // CHECK3-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A]], align 4
@@ -1198,7 +1200,6 @@ int bar(int n){
 // CHECK3-NEXT:    store ptr null, ptr [[TMP4]], align 4
 // CHECK3-NEXT:    [[TMP5:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK3-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK3-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK3-NEXT:    [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK3-NEXT:    store i32 2, ptr [[TMP7]], align 4
 // CHECK3-NEXT:    [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -1253,7 +1254,6 @@ int bar(int n){
 // CHECK3-NEXT:    store ptr null, ptr [[TMP31]], align 4
 // CHECK3-NEXT:    [[TMP32:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS2]], i32 0, i32 0
 // CHECK3-NEXT:    [[TMP33:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS3]], i32 0, i32 0
-// CHECK3-NEXT:    [[KERNEL_ARGS5:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK3-NEXT:    [[TMP34:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 0
 // CHECK3-NEXT:    store i32 2, ptr [[TMP34]], align 4
 // CHECK3-NEXT:    [[TMP35:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 1

diff  --git a/clang/test/OpenMP/target_parallel_num_threads_codegen.cpp b/clang/test/OpenMP/target_parallel_num_threads_codegen.cpp
index f8be5d02d39c8..5d9f0af3dbed1 100644
--- a/clang/test/OpenMP/target_parallel_num_threads_codegen.cpp
+++ b/clang/test/OpenMP/target_parallel_num_threads_codegen.cpp
@@ -228,9 +228,11 @@ int bar(int n){
 // CHECK1-NEXT:    [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [3 x ptr], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [3 x ptr], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [3 x ptr], align 8
+// CHECK1-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_BASEPTRS3:%.*]] = alloca [1 x ptr], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_PTRS4:%.*]] = alloca [1 x ptr], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_MAPPERS5:%.*]] = alloca [1 x ptr], align 8
+// CHECK1-NEXT:    [[KERNEL_ARGS6:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK1-NEXT:    store ptr [[THIS]], ptr [[THIS_ADDR]], align 8
 // CHECK1-NEXT:    store i32 [[N]], ptr [[N_ADDR]], align 4
 // CHECK1-NEXT:    [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8
@@ -268,7 +270,6 @@ int bar(int n){
 // CHECK1-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
 // CHECK1-NEXT:    [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
 // CHECK1-NEXT:    [[TMP18:%.*]] = insertvalue [3 x i32] zeroinitializer, i32 [[TMP17]], 0
-// CHECK1-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK1-NEXT:    [[TMP19:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK1-NEXT:    store i32 2, ptr [[TMP19]], align 4
 // CHECK1-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -311,7 +312,6 @@ int bar(int n){
 // CHECK1-NEXT:    store ptr null, ptr [[TMP36]], align 8
 // CHECK1-NEXT:    [[TMP37:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS3]], i32 0, i32 0
 // CHECK1-NEXT:    [[TMP38:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS4]], i32 0, i32 0
-// CHECK1-NEXT:    [[KERNEL_ARGS6:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK1-NEXT:    [[TMP39:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 0
 // CHECK1-NEXT:    store i32 2, ptr [[TMP39]], align 4
 // CHECK1-NEXT:    [[TMP40:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 1
@@ -360,11 +360,13 @@ int bar(int n){
 // CHECK1-NEXT:    [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [1 x ptr], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [1 x ptr], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [1 x ptr], align 8
+// CHECK1-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK1-NEXT:    [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4
 // CHECK1-NEXT:    [[DOTCAPTURE_EXPR__CASTED2:%.*]] = alloca i64, align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_BASEPTRS3:%.*]] = alloca [1 x ptr], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_PTRS4:%.*]] = alloca [1 x ptr], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_MAPPERS5:%.*]] = alloca [1 x ptr], align 8
+// CHECK1-NEXT:    [[KERNEL_ARGS6:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK1-NEXT:    store i32 [[N]], ptr [[N_ADDR]], align 4
 // CHECK1-NEXT:    [[TMP0:%.*]] = load i32, ptr [[N_ADDR]], align 4
 // CHECK1-NEXT:    store i32 [[TMP0]], ptr [[DOTCAPTURE_EXPR_]], align 4
@@ -381,7 +383,6 @@ int bar(int n){
 // CHECK1-NEXT:    [[TMP7:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
 // CHECK1-NEXT:    [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
 // CHECK1-NEXT:    [[TMP9:%.*]] = insertvalue [3 x i32] zeroinitializer, i32 [[TMP8]], 0
-// CHECK1-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK1-NEXT:    [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK1-NEXT:    store i32 2, ptr [[TMP10]], align 4
 // CHECK1-NEXT:    [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -431,7 +432,6 @@ int bar(int n){
 // CHECK1-NEXT:    [[TMP32:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS4]], i32 0, i32 0
 // CHECK1-NEXT:    [[TMP33:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
 // CHECK1-NEXT:    [[TMP34:%.*]] = insertvalue [3 x i32] zeroinitializer, i32 [[TMP33]], 0
-// CHECK1-NEXT:    [[KERNEL_ARGS6:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK1-NEXT:    [[TMP35:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 0
 // CHECK1-NEXT:    store i32 2, ptr [[TMP35]], align 4
 // CHECK1-NEXT:    [[TMP36:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 1
@@ -475,6 +475,7 @@ int bar(int n){
 // CHECK1-NEXT:  entry:
 // CHECK1-NEXT:    [[N_ADDR:%.*]] = alloca i32, align 4
 // CHECK1-NEXT:    [[A:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK1-NEXT:    [[B:%.*]] = alloca i16, align 2
 // CHECK1-NEXT:    [[DOTCAPTURE_EXPR_:%.*]] = alloca i16, align 2
 // CHECK1-NEXT:    [[A_CASTED:%.*]] = alloca i64, align 8
@@ -483,9 +484,9 @@ int bar(int n){
 // CHECK1-NEXT:    [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [3 x ptr], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [3 x ptr], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [3 x ptr], align 8
+// CHECK1-NEXT:    [[KERNEL_ARGS1:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK1-NEXT:    store i32 [[N]], ptr [[N_ADDR]], align 4
 // CHECK1-NEXT:    store i32 0, ptr [[A]], align 4
-// CHECK1-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK1-NEXT:    [[TMP0:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK1-NEXT:    store i32 2, ptr [[TMP0]], align 4
 // CHECK1-NEXT:    [[TMP1:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -554,7 +555,6 @@ int bar(int n){
 // CHECK1-NEXT:    [[TMP33:%.*]] = load i16, ptr [[DOTCAPTURE_EXPR_]], align 2
 // CHECK1-NEXT:    [[TMP34:%.*]] = zext i16 [[TMP33]] to i32
 // CHECK1-NEXT:    [[TMP35:%.*]] = insertvalue [3 x i32] zeroinitializer, i32 [[TMP34]], 0
-// CHECK1-NEXT:    [[KERNEL_ARGS1:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK1-NEXT:    [[TMP36:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS1]], i32 0, i32 0
 // CHECK1-NEXT:    store i32 2, ptr [[TMP36]], align 4
 // CHECK1-NEXT:    [[TMP37:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS1]], i32 0, i32 1
@@ -813,9 +813,11 @@ int bar(int n){
 // CHECK3-NEXT:    [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [3 x ptr], align 4
 // CHECK3-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [3 x ptr], align 4
 // CHECK3-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [3 x ptr], align 4
+// CHECK3-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK3-NEXT:    [[DOTOFFLOAD_BASEPTRS3:%.*]] = alloca [1 x ptr], align 4
 // CHECK3-NEXT:    [[DOTOFFLOAD_PTRS4:%.*]] = alloca [1 x ptr], align 4
 // CHECK3-NEXT:    [[DOTOFFLOAD_MAPPERS5:%.*]] = alloca [1 x ptr], align 4
+// CHECK3-NEXT:    [[KERNEL_ARGS6:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK3-NEXT:    store ptr [[THIS]], ptr [[THIS_ADDR]], align 4
 // CHECK3-NEXT:    store i32 [[N]], ptr [[N_ADDR]], align 4
 // CHECK3-NEXT:    [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4
@@ -853,7 +855,6 @@ int bar(int n){
 // CHECK3-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
 // CHECK3-NEXT:    [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
 // CHECK3-NEXT:    [[TMP18:%.*]] = insertvalue [3 x i32] zeroinitializer, i32 [[TMP17]], 0
-// CHECK3-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK3-NEXT:    [[TMP19:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK3-NEXT:    store i32 2, ptr [[TMP19]], align 4
 // CHECK3-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -896,7 +897,6 @@ int bar(int n){
 // CHECK3-NEXT:    store ptr null, ptr [[TMP36]], align 4
 // CHECK3-NEXT:    [[TMP37:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS3]], i32 0, i32 0
 // CHECK3-NEXT:    [[TMP38:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS4]], i32 0, i32 0
-// CHECK3-NEXT:    [[KERNEL_ARGS6:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK3-NEXT:    [[TMP39:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 0
 // CHECK3-NEXT:    store i32 2, ptr [[TMP39]], align 4
 // CHECK3-NEXT:    [[TMP40:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 1
@@ -945,11 +945,13 @@ int bar(int n){
 // CHECK3-NEXT:    [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [1 x ptr], align 4
 // CHECK3-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [1 x ptr], align 4
 // CHECK3-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [1 x ptr], align 4
+// CHECK3-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK3-NEXT:    [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4
 // CHECK3-NEXT:    [[DOTCAPTURE_EXPR__CASTED2:%.*]] = alloca i32, align 4
 // CHECK3-NEXT:    [[DOTOFFLOAD_BASEPTRS3:%.*]] = alloca [1 x ptr], align 4
 // CHECK3-NEXT:    [[DOTOFFLOAD_PTRS4:%.*]] = alloca [1 x ptr], align 4
 // CHECK3-NEXT:    [[DOTOFFLOAD_MAPPERS5:%.*]] = alloca [1 x ptr], align 4
+// CHECK3-NEXT:    [[KERNEL_ARGS6:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK3-NEXT:    store i32 [[N]], ptr [[N_ADDR]], align 4
 // CHECK3-NEXT:    [[TMP0:%.*]] = load i32, ptr [[N_ADDR]], align 4
 // CHECK3-NEXT:    store i32 [[TMP0]], ptr [[DOTCAPTURE_EXPR_]], align 4
@@ -966,7 +968,6 @@ int bar(int n){
 // CHECK3-NEXT:    [[TMP7:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
 // CHECK3-NEXT:    [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
 // CHECK3-NEXT:    [[TMP9:%.*]] = insertvalue [3 x i32] zeroinitializer, i32 [[TMP8]], 0
-// CHECK3-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK3-NEXT:    [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK3-NEXT:    store i32 2, ptr [[TMP10]], align 4
 // CHECK3-NEXT:    [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -1016,7 +1017,6 @@ int bar(int n){
 // CHECK3-NEXT:    [[TMP32:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS4]], i32 0, i32 0
 // CHECK3-NEXT:    [[TMP33:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
 // CHECK3-NEXT:    [[TMP34:%.*]] = insertvalue [3 x i32] zeroinitializer, i32 [[TMP33]], 0
-// CHECK3-NEXT:    [[KERNEL_ARGS6:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK3-NEXT:    [[TMP35:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 0
 // CHECK3-NEXT:    store i32 2, ptr [[TMP35]], align 4
 // CHECK3-NEXT:    [[TMP36:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 1
@@ -1060,6 +1060,7 @@ int bar(int n){
 // CHECK3-NEXT:  entry:
 // CHECK3-NEXT:    [[N_ADDR:%.*]] = alloca i32, align 4
 // CHECK3-NEXT:    [[A:%.*]] = alloca i32, align 4
+// CHECK3-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK3-NEXT:    [[B:%.*]] = alloca i16, align 2
 // CHECK3-NEXT:    [[DOTCAPTURE_EXPR_:%.*]] = alloca i16, align 2
 // CHECK3-NEXT:    [[A_CASTED:%.*]] = alloca i32, align 4
@@ -1068,9 +1069,9 @@ int bar(int n){
 // CHECK3-NEXT:    [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [3 x ptr], align 4
 // CHECK3-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [3 x ptr], align 4
 // CHECK3-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [3 x ptr], align 4
+// CHECK3-NEXT:    [[KERNEL_ARGS1:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK3-NEXT:    store i32 [[N]], ptr [[N_ADDR]], align 4
 // CHECK3-NEXT:    store i32 0, ptr [[A]], align 4
-// CHECK3-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK3-NEXT:    [[TMP0:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK3-NEXT:    store i32 2, ptr [[TMP0]], align 4
 // CHECK3-NEXT:    [[TMP1:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -1139,7 +1140,6 @@ int bar(int n){
 // CHECK3-NEXT:    [[TMP33:%.*]] = load i16, ptr [[DOTCAPTURE_EXPR_]], align 2
 // CHECK3-NEXT:    [[TMP34:%.*]] = zext i16 [[TMP33]] to i32
 // CHECK3-NEXT:    [[TMP35:%.*]] = insertvalue [3 x i32] zeroinitializer, i32 [[TMP34]], 0
-// CHECK3-NEXT:    [[KERNEL_ARGS1:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK3-NEXT:    [[TMP36:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS1]], i32 0, i32 0
 // CHECK3-NEXT:    store i32 2, ptr [[TMP36]], align 4
 // CHECK3-NEXT:    [[TMP37:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS1]], i32 0, i32 1

diff  --git a/clang/test/OpenMP/target_task_affinity_codegen.cpp b/clang/test/OpenMP/target_task_affinity_codegen.cpp
index c0ce4ceacd842..3e5bd7cf803ed 100644
--- a/clang/test/OpenMP/target_task_affinity_codegen.cpp
+++ b/clang/test/OpenMP/target_task_affinity_codegen.cpp
@@ -72,6 +72,7 @@ int main() {
 // CHECK1-NEXT:    [[DOTOFFLOAD_BASEPTRS6:%.*]] = alloca [2 x ptr], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_PTRS7:%.*]] = alloca [2 x ptr], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_MAPPERS8:%.*]] = alloca [2 x ptr], align 8
+// CHECK1-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK1-NEXT:    store i32 0, ptr [[RETVAL]], align 4
 // CHECK1-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[A]], align 8
 // CHECK1-NEXT:    [[TMP1:%.*]] = load ptr, ptr [[A]], align 8
@@ -116,7 +117,6 @@ int main() {
 // CHECK1-NEXT:    store ptr null, ptr [[TMP23]], align 8
 // CHECK1-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS6]], i32 0, i32 0
 // CHECK1-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS7]], i32 0, i32 0
-// CHECK1-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK1-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK1-NEXT:    store i32 2, ptr [[TMP26]], align 4
 // CHECK1-NEXT:    [[TMP27:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -302,6 +302,7 @@ int main() {
 // CHECK3-NEXT:    [[DOTOFFLOAD_BASEPTRS6:%.*]] = alloca [2 x ptr], align 4
 // CHECK3-NEXT:    [[DOTOFFLOAD_PTRS7:%.*]] = alloca [2 x ptr], align 4
 // CHECK3-NEXT:    [[DOTOFFLOAD_MAPPERS8:%.*]] = alloca [2 x ptr], align 4
+// CHECK3-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK3-NEXT:    store i32 0, ptr [[RETVAL]], align 4
 // CHECK3-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[A]], align 4
 // CHECK3-NEXT:    [[TMP1:%.*]] = load ptr, ptr [[A]], align 4
@@ -346,7 +347,6 @@ int main() {
 // CHECK3-NEXT:    store ptr null, ptr [[TMP23]], align 4
 // CHECK3-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS6]], i32 0, i32 0
 // CHECK3-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS7]], i32 0, i32 0
-// CHECK3-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK3-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK3-NEXT:    store i32 2, ptr [[TMP26]], align 4
 // CHECK3-NEXT:    [[TMP27:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1

diff  --git a/clang/test/OpenMP/target_teams_codegen.cpp b/clang/test/OpenMP/target_teams_codegen.cpp
index bad0795e5c965..c18294e1f007d 100644
--- a/clang/test/OpenMP/target_teams_codegen.cpp
+++ b/clang/test/OpenMP/target_teams_codegen.cpp
@@ -328,25 +328,30 @@ int bar(int n){
 // CHECK1-NEXT:    [[DOTOFFLOAD_BASEPTRS5:%.*]] = alloca [1 x ptr], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_PTRS6:%.*]] = alloca [1 x ptr], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_MAPPERS7:%.*]] = alloca [1 x ptr], align 8
+// CHECK1-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK1-NEXT:    [[A_CASTED8:%.*]] = alloca i64, align 8
 // CHECK1-NEXT:    [[AA_CASTED9:%.*]] = alloca i64, align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_BASEPTRS10:%.*]] = alloca [2 x ptr], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_PTRS11:%.*]] = alloca [2 x ptr], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_MAPPERS12:%.*]] = alloca [2 x ptr], align 8
+// CHECK1-NEXT:    [[KERNEL_ARGS13:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK1-NEXT:    [[A_CASTED16:%.*]] = alloca i64, align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_BASEPTRS19:%.*]] = alloca [9 x ptr], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_PTRS20:%.*]] = alloca [9 x ptr], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_MAPPERS21:%.*]] = alloca [9 x ptr], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_SIZES:%.*]] = alloca [9 x i64], align 8
+// CHECK1-NEXT:    [[KERNEL_ARGS22:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK1-NEXT:    [[NN:%.*]] = alloca i32, align 4
 // CHECK1-NEXT:    [[NN_CASTED:%.*]] = alloca i64, align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_BASEPTRS27:%.*]] = alloca [1 x ptr], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_PTRS28:%.*]] = alloca [1 x ptr], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_MAPPERS29:%.*]] = alloca [1 x ptr], align 8
+// CHECK1-NEXT:    [[KERNEL_ARGS30:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK1-NEXT:    [[NN_CASTED33:%.*]] = alloca i64, align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_BASEPTRS34:%.*]] = alloca [1 x ptr], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_PTRS35:%.*]] = alloca [1 x ptr], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_MAPPERS36:%.*]] = alloca [1 x ptr], align 8
+// CHECK1-NEXT:    [[KERNEL_ARGS37:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK1-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]])
 // CHECK1-NEXT:    store i32 [[N]], ptr [[N_ADDR]], align 4
 // CHECK1-NEXT:    store i32 0, ptr [[A]], align 4
@@ -435,7 +440,6 @@ int bar(int n){
 // CHECK1-NEXT:    store ptr null, ptr [[TMP49]], align 8
 // CHECK1-NEXT:    [[TMP50:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS5]], i32 0, i32 0
 // CHECK1-NEXT:    [[TMP51:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS6]], i32 0, i32 0
-// CHECK1-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK1-NEXT:    [[TMP52:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK1-NEXT:    store i32 2, ptr [[TMP52]], align 4
 // CHECK1-NEXT:    [[TMP53:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -493,7 +497,6 @@ int bar(int n){
 // CHECK1-NEXT:    store ptr null, ptr [[TMP77]], align 8
 // CHECK1-NEXT:    [[TMP78:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS10]], i32 0, i32 0
 // CHECK1-NEXT:    [[TMP79:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS11]], i32 0, i32 0
-// CHECK1-NEXT:    [[KERNEL_ARGS13:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK1-NEXT:    [[TMP80:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS13]], i32 0, i32 0
 // CHECK1-NEXT:    store i32 2, ptr [[TMP80]], align 4
 // CHECK1-NEXT:    [[TMP81:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS13]], i32 0, i32 1
@@ -604,7 +607,6 @@ int bar(int n){
 // CHECK1-NEXT:    [[TMP130:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_BASEPTRS19]], i32 0, i32 0
 // CHECK1-NEXT:    [[TMP131:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_PTRS20]], i32 0, i32 0
 // CHECK1-NEXT:    [[TMP132:%.*]] = getelementptr inbounds [9 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK1-NEXT:    [[KERNEL_ARGS22:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK1-NEXT:    [[TMP133:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS22]], i32 0, i32 0
 // CHECK1-NEXT:    store i32 2, ptr [[TMP133]], align 4
 // CHECK1-NEXT:    [[TMP134:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS22]], i32 0, i32 1
@@ -655,7 +657,6 @@ int bar(int n){
 // CHECK1-NEXT:    store ptr null, ptr [[TMP152]], align 8
 // CHECK1-NEXT:    [[TMP153:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS27]], i32 0, i32 0
 // CHECK1-NEXT:    [[TMP154:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS28]], i32 0, i32 0
-// CHECK1-NEXT:    [[KERNEL_ARGS30:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK1-NEXT:    [[TMP155:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS30]], i32 0, i32 0
 // CHECK1-NEXT:    store i32 2, ptr [[TMP155]], align 4
 // CHECK1-NEXT:    [[TMP156:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS30]], i32 0, i32 1
@@ -700,7 +701,6 @@ int bar(int n){
 // CHECK1-NEXT:    store ptr null, ptr [[TMP174]], align 8
 // CHECK1-NEXT:    [[TMP175:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS34]], i32 0, i32 0
 // CHECK1-NEXT:    [[TMP176:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS35]], i32 0, i32 0
-// CHECK1-NEXT:    [[KERNEL_ARGS37:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK1-NEXT:    [[TMP177:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS37]], i32 0, i32 0
 // CHECK1-NEXT:    store i32 2, ptr [[TMP177]], align 4
 // CHECK1-NEXT:    [[TMP178:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS37]], i32 0, i32 1
@@ -815,10 +815,10 @@ int bar(int n){
 // CHECK1-NEXT:    [[DOTFIRSTPRIV_PTR_ADDR1_I:%.*]] = alloca ptr, align 8
 // CHECK1-NEXT:    [[DOTFIRSTPRIV_PTR_ADDR2_I:%.*]] = alloca ptr, align 8
 // CHECK1-NEXT:    [[DOTFIRSTPRIV_PTR_ADDR3_I:%.*]] = alloca ptr, align 8
+// CHECK1-NEXT:    [[KERNEL_ARGS_I:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK1-NEXT:    [[AA_CASTED_I:%.*]] = alloca i64, align 8
 // CHECK1-NEXT:    [[DOTCAPTURE_EXPR__CASTED_I:%.*]] = alloca i64, align 8
 // CHECK1-NEXT:    [[DOTCAPTURE_EXPR__CASTED4_I:%.*]] = alloca i64, align 8
-// CHECK1-NEXT:    [[KERNEL_ARGS_I:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK1-NEXT:    [[DOTADDR:%.*]] = alloca i32, align 4
 // CHECK1-NEXT:    [[DOTADDR1:%.*]] = alloca ptr, align 8
 // CHECK1-NEXT:    store i32 [[TMP0]], ptr [[DOTADDR]], align 4
@@ -1195,6 +1195,7 @@ int bar(int n){
 // CHECK1-NEXT:    [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [1 x ptr], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [1 x ptr], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [1 x ptr], align 8
+// CHECK1-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK1-NEXT:    store i32 [[N]], ptr [[N_ADDR]], align 4
 // CHECK1-NEXT:    store ptr [[F]], ptr [[F_ADDR]], align 8
 // CHECK1-NEXT:    [[TMP0:%.*]] = load i32, ptr [[N_ADDR]], align 4
@@ -1207,7 +1208,6 @@ int bar(int n){
 // CHECK1-NEXT:    store ptr null, ptr [[TMP4]], align 8
 // CHECK1-NEXT:    [[TMP5:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK1-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK1-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK1-NEXT:    [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK1-NEXT:    store i32 2, ptr [[TMP7]], align 4
 // CHECK1-NEXT:    [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -1313,6 +1313,7 @@ int bar(int n){
 // CHECK1-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [5 x ptr], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [5 x ptr], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_SIZES:%.*]] = alloca [5 x i64], align 8
+// CHECK1-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK1-NEXT:    store ptr [[THIS]], ptr [[THIS_ADDR]], align 8
 // CHECK1-NEXT:    store i32 [[N]], ptr [[N_ADDR]], align 4
 // CHECK1-NEXT:    [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8
@@ -1372,7 +1373,6 @@ int bar(int n){
 // CHECK1-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK1-NEXT:    [[TMP27:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
 // CHECK1-NEXT:    [[TMP28:%.*]] = getelementptr inbounds [5 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK1-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK1-NEXT:    [[TMP29:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK1-NEXT:    store i32 2, ptr [[TMP29]], align 4
 // CHECK1-NEXT:    [[TMP30:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -1437,6 +1437,7 @@ int bar(int n){
 // CHECK1-NEXT:    [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [4 x ptr], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [4 x ptr], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [4 x ptr], align 8
+// CHECK1-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK1-NEXT:    store i32 [[N]], ptr [[N_ADDR]], align 4
 // CHECK1-NEXT:    store i32 0, ptr [[A]], align 4
 // CHECK1-NEXT:    store i16 0, ptr [[AA]], align 2
@@ -1480,7 +1481,6 @@ int bar(int n){
 // CHECK1-NEXT:    store ptr null, ptr [[TMP18]], align 8
 // CHECK1-NEXT:    [[TMP19:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK1-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK1-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK1-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK1-NEXT:    store i32 2, ptr [[TMP21]], align 4
 // CHECK1-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -1535,6 +1535,7 @@ int bar(int n){
 // CHECK1-NEXT:    [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [3 x ptr], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [3 x ptr], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [3 x ptr], align 8
+// CHECK1-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK1-NEXT:    store i32 [[N]], ptr [[N_ADDR]], align 4
 // CHECK1-NEXT:    store i32 0, ptr [[A]], align 4
 // CHECK1-NEXT:    store i16 0, ptr [[AA]], align 2
@@ -1568,7 +1569,6 @@ int bar(int n){
 // CHECK1-NEXT:    store ptr null, ptr [[TMP13]], align 8
 // CHECK1-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK1-NEXT:    [[TMP15:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK1-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK1-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK1-NEXT:    store i32 2, ptr [[TMP16]], align 4
 // CHECK1-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -1822,25 +1822,30 @@ int bar(int n){
 // CHECK3-NEXT:    [[DOTOFFLOAD_BASEPTRS5:%.*]] = alloca [1 x ptr], align 4
 // CHECK3-NEXT:    [[DOTOFFLOAD_PTRS6:%.*]] = alloca [1 x ptr], align 4
 // CHECK3-NEXT:    [[DOTOFFLOAD_MAPPERS7:%.*]] = alloca [1 x ptr], align 4
+// CHECK3-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK3-NEXT:    [[A_CASTED8:%.*]] = alloca i32, align 4
 // CHECK3-NEXT:    [[AA_CASTED9:%.*]] = alloca i32, align 4
 // CHECK3-NEXT:    [[DOTOFFLOAD_BASEPTRS10:%.*]] = alloca [2 x ptr], align 4
 // CHECK3-NEXT:    [[DOTOFFLOAD_PTRS11:%.*]] = alloca [2 x ptr], align 4
 // CHECK3-NEXT:    [[DOTOFFLOAD_MAPPERS12:%.*]] = alloca [2 x ptr], align 4
+// CHECK3-NEXT:    [[KERNEL_ARGS13:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK3-NEXT:    [[A_CASTED16:%.*]] = alloca i32, align 4
 // CHECK3-NEXT:    [[DOTOFFLOAD_BASEPTRS19:%.*]] = alloca [9 x ptr], align 4
 // CHECK3-NEXT:    [[DOTOFFLOAD_PTRS20:%.*]] = alloca [9 x ptr], align 4
 // CHECK3-NEXT:    [[DOTOFFLOAD_MAPPERS21:%.*]] = alloca [9 x ptr], align 4
 // CHECK3-NEXT:    [[DOTOFFLOAD_SIZES:%.*]] = alloca [9 x i64], align 4
+// CHECK3-NEXT:    [[KERNEL_ARGS22:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK3-NEXT:    [[NN:%.*]] = alloca i32, align 4
 // CHECK3-NEXT:    [[NN_CASTED:%.*]] = alloca i32, align 4
 // CHECK3-NEXT:    [[DOTOFFLOAD_BASEPTRS27:%.*]] = alloca [1 x ptr], align 4
 // CHECK3-NEXT:    [[DOTOFFLOAD_PTRS28:%.*]] = alloca [1 x ptr], align 4
 // CHECK3-NEXT:    [[DOTOFFLOAD_MAPPERS29:%.*]] = alloca [1 x ptr], align 4
+// CHECK3-NEXT:    [[KERNEL_ARGS30:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK3-NEXT:    [[NN_CASTED33:%.*]] = alloca i32, align 4
 // CHECK3-NEXT:    [[DOTOFFLOAD_BASEPTRS34:%.*]] = alloca [1 x ptr], align 4
 // CHECK3-NEXT:    [[DOTOFFLOAD_PTRS35:%.*]] = alloca [1 x ptr], align 4
 // CHECK3-NEXT:    [[DOTOFFLOAD_MAPPERS36:%.*]] = alloca [1 x ptr], align 4
+// CHECK3-NEXT:    [[KERNEL_ARGS37:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK3-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]])
 // CHECK3-NEXT:    store i32 [[N]], ptr [[N_ADDR]], align 4
 // CHECK3-NEXT:    store i32 0, ptr [[A]], align 4
@@ -1927,7 +1932,6 @@ int bar(int n){
 // CHECK3-NEXT:    store ptr null, ptr [[TMP47]], align 4
 // CHECK3-NEXT:    [[TMP48:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS5]], i32 0, i32 0
 // CHECK3-NEXT:    [[TMP49:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS6]], i32 0, i32 0
-// CHECK3-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK3-NEXT:    [[TMP50:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK3-NEXT:    store i32 2, ptr [[TMP50]], align 4
 // CHECK3-NEXT:    [[TMP51:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -1985,7 +1989,6 @@ int bar(int n){
 // CHECK3-NEXT:    store ptr null, ptr [[TMP75]], align 4
 // CHECK3-NEXT:    [[TMP76:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS10]], i32 0, i32 0
 // CHECK3-NEXT:    [[TMP77:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS11]], i32 0, i32 0
-// CHECK3-NEXT:    [[KERNEL_ARGS13:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK3-NEXT:    [[TMP78:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS13]], i32 0, i32 0
 // CHECK3-NEXT:    store i32 2, ptr [[TMP78]], align 4
 // CHECK3-NEXT:    [[TMP79:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS13]], i32 0, i32 1
@@ -2098,7 +2101,6 @@ int bar(int n){
 // CHECK3-NEXT:    [[TMP130:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_BASEPTRS19]], i32 0, i32 0
 // CHECK3-NEXT:    [[TMP131:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_PTRS20]], i32 0, i32 0
 // CHECK3-NEXT:    [[TMP132:%.*]] = getelementptr inbounds [9 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK3-NEXT:    [[KERNEL_ARGS22:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK3-NEXT:    [[TMP133:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS22]], i32 0, i32 0
 // CHECK3-NEXT:    store i32 2, ptr [[TMP133]], align 4
 // CHECK3-NEXT:    [[TMP134:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS22]], i32 0, i32 1
@@ -2149,7 +2151,6 @@ int bar(int n){
 // CHECK3-NEXT:    store ptr null, ptr [[TMP152]], align 4
 // CHECK3-NEXT:    [[TMP153:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS27]], i32 0, i32 0
 // CHECK3-NEXT:    [[TMP154:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS28]], i32 0, i32 0
-// CHECK3-NEXT:    [[KERNEL_ARGS30:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK3-NEXT:    [[TMP155:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS30]], i32 0, i32 0
 // CHECK3-NEXT:    store i32 2, ptr [[TMP155]], align 4
 // CHECK3-NEXT:    [[TMP156:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS30]], i32 0, i32 1
@@ -2194,7 +2195,6 @@ int bar(int n){
 // CHECK3-NEXT:    store ptr null, ptr [[TMP174]], align 4
 // CHECK3-NEXT:    [[TMP175:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS34]], i32 0, i32 0
 // CHECK3-NEXT:    [[TMP176:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS35]], i32 0, i32 0
-// CHECK3-NEXT:    [[KERNEL_ARGS37:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK3-NEXT:    [[TMP177:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS37]], i32 0, i32 0
 // CHECK3-NEXT:    store i32 2, ptr [[TMP177]], align 4
 // CHECK3-NEXT:    [[TMP178:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS37]], i32 0, i32 1
@@ -2309,10 +2309,10 @@ int bar(int n){
 // CHECK3-NEXT:    [[DOTFIRSTPRIV_PTR_ADDR1_I:%.*]] = alloca ptr, align 4
 // CHECK3-NEXT:    [[DOTFIRSTPRIV_PTR_ADDR2_I:%.*]] = alloca ptr, align 4
 // CHECK3-NEXT:    [[DOTFIRSTPRIV_PTR_ADDR3_I:%.*]] = alloca ptr, align 4
+// CHECK3-NEXT:    [[KERNEL_ARGS_I:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK3-NEXT:    [[AA_CASTED_I:%.*]] = alloca i32, align 4
 // CHECK3-NEXT:    [[DOTCAPTURE_EXPR__CASTED_I:%.*]] = alloca i32, align 4
 // CHECK3-NEXT:    [[DOTCAPTURE_EXPR__CASTED4_I:%.*]] = alloca i32, align 4
-// CHECK3-NEXT:    [[KERNEL_ARGS_I:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK3-NEXT:    [[DOTADDR:%.*]] = alloca i32, align 4
 // CHECK3-NEXT:    [[DOTADDR1:%.*]] = alloca ptr, align 4
 // CHECK3-NEXT:    store i32 [[TMP0]], ptr [[DOTADDR]], align 4
@@ -2689,6 +2689,7 @@ int bar(int n){
 // CHECK3-NEXT:    [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [1 x ptr], align 4
 // CHECK3-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [1 x ptr], align 4
 // CHECK3-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [1 x ptr], align 4
+// CHECK3-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK3-NEXT:    store i32 [[N]], ptr [[N_ADDR]], align 4
 // CHECK3-NEXT:    store ptr [[F]], ptr [[F_ADDR]], align 4
 // CHECK3-NEXT:    [[TMP0:%.*]] = load i32, ptr [[N_ADDR]], align 4
@@ -2700,7 +2701,6 @@ int bar(int n){
 // CHECK3-NEXT:    store ptr null, ptr [[TMP3]], align 4
 // CHECK3-NEXT:    [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK3-NEXT:    [[TMP5:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK3-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK3-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK3-NEXT:    store i32 2, ptr [[TMP6]], align 4
 // CHECK3-NEXT:    [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -2806,6 +2806,7 @@ int bar(int n){
 // CHECK3-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [5 x ptr], align 4
 // CHECK3-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [5 x ptr], align 4
 // CHECK3-NEXT:    [[DOTOFFLOAD_SIZES:%.*]] = alloca [5 x i64], align 4
+// CHECK3-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK3-NEXT:    store ptr [[THIS]], ptr [[THIS_ADDR]], align 4
 // CHECK3-NEXT:    store i32 [[N]], ptr [[N_ADDR]], align 4
 // CHECK3-NEXT:    [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4
@@ -2865,7 +2866,6 @@ int bar(int n){
 // CHECK3-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK3-NEXT:    [[TMP27:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
 // CHECK3-NEXT:    [[TMP28:%.*]] = getelementptr inbounds [5 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK3-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK3-NEXT:    [[TMP29:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK3-NEXT:    store i32 2, ptr [[TMP29]], align 4
 // CHECK3-NEXT:    [[TMP30:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -2930,6 +2930,7 @@ int bar(int n){
 // CHECK3-NEXT:    [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [4 x ptr], align 4
 // CHECK3-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [4 x ptr], align 4
 // CHECK3-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [4 x ptr], align 4
+// CHECK3-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK3-NEXT:    store i32 [[N]], ptr [[N_ADDR]], align 4
 // CHECK3-NEXT:    store i32 0, ptr [[A]], align 4
 // CHECK3-NEXT:    store i16 0, ptr [[AA]], align 2
@@ -2973,7 +2974,6 @@ int bar(int n){
 // CHECK3-NEXT:    store ptr null, ptr [[TMP18]], align 4
 // CHECK3-NEXT:    [[TMP19:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK3-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK3-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK3-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK3-NEXT:    store i32 2, ptr [[TMP21]], align 4
 // CHECK3-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -3028,6 +3028,7 @@ int bar(int n){
 // CHECK3-NEXT:    [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [3 x ptr], align 4
 // CHECK3-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [3 x ptr], align 4
 // CHECK3-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [3 x ptr], align 4
+// CHECK3-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK3-NEXT:    store i32 [[N]], ptr [[N_ADDR]], align 4
 // CHECK3-NEXT:    store i32 0, ptr [[A]], align 4
 // CHECK3-NEXT:    store i16 0, ptr [[AA]], align 2
@@ -3061,7 +3062,6 @@ int bar(int n){
 // CHECK3-NEXT:    store ptr null, ptr [[TMP13]], align 4
 // CHECK3-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK3-NEXT:    [[TMP15:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK3-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK3-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK3-NEXT:    store i32 2, ptr [[TMP16]], align 4
 // CHECK3-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1

diff  --git a/clang/test/OpenMP/target_teams_distribute_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_codegen.cpp
index a37039588afbd..e8ea162bec009 100644
--- a/clang/test/OpenMP/target_teams_distribute_codegen.cpp
+++ b/clang/test/OpenMP/target_teams_distribute_codegen.cpp
@@ -321,12 +321,14 @@ int bar(int n){
 // CHECK1-NEXT:    [[DOTOFFLOAD_PTRS6:%.*]] = alloca [1 x ptr], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_MAPPERS7:%.*]] = alloca [1 x ptr], align 8
 // CHECK1-NEXT:    [[TMP:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK1-NEXT:    [[A_CASTED8:%.*]] = alloca i64, align 8
 // CHECK1-NEXT:    [[AA_CASTED9:%.*]] = alloca i64, align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_BASEPTRS10:%.*]] = alloca [2 x ptr], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_PTRS11:%.*]] = alloca [2 x ptr], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_MAPPERS12:%.*]] = alloca [2 x ptr], align 8
 // CHECK1-NEXT:    [[_TMP13:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[KERNEL_ARGS14:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK1-NEXT:    [[DOTCAPTURE_EXPR_17:%.*]] = alloca i32, align 4
 // CHECK1-NEXT:    [[A_CASTED18:%.*]] = alloca i64, align 8
 // CHECK1-NEXT:    [[DOTCAPTURE_EXPR__CASTED19:%.*]] = alloca i64, align 8
@@ -335,6 +337,7 @@ int bar(int n){
 // CHECK1-NEXT:    [[DOTOFFLOAD_MAPPERS24:%.*]] = alloca [10 x ptr], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_SIZES:%.*]] = alloca [10 x i64], align 8
 // CHECK1-NEXT:    [[_TMP25:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[KERNEL_ARGS26:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK1-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2:[0-9]+]])
 // CHECK1-NEXT:    store i32 [[N]], ptr [[N_ADDR]], align 4
 // CHECK1-NEXT:    store i32 0, ptr [[A]], align 4
@@ -423,7 +426,6 @@ int bar(int n){
 // CHECK1-NEXT:    store ptr null, ptr [[TMP49]], align 8
 // CHECK1-NEXT:    [[TMP50:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS5]], i32 0, i32 0
 // CHECK1-NEXT:    [[TMP51:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS6]], i32 0, i32 0
-// CHECK1-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK1-NEXT:    [[TMP52:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK1-NEXT:    store i32 2, ptr [[TMP52]], align 4
 // CHECK1-NEXT:    [[TMP53:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -481,7 +483,6 @@ int bar(int n){
 // CHECK1-NEXT:    store ptr null, ptr [[TMP77]], align 8
 // CHECK1-NEXT:    [[TMP78:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS10]], i32 0, i32 0
 // CHECK1-NEXT:    [[TMP79:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS11]], i32 0, i32 0
-// CHECK1-NEXT:    [[KERNEL_ARGS14:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK1-NEXT:    [[TMP80:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS14]], i32 0, i32 0
 // CHECK1-NEXT:    store i32 2, ptr [[TMP80]], align 4
 // CHECK1-NEXT:    [[TMP81:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS14]], i32 0, i32 1
@@ -603,7 +604,6 @@ int bar(int n){
 // CHECK1-NEXT:    [[TMP136:%.*]] = getelementptr inbounds [10 x ptr], ptr [[DOTOFFLOAD_BASEPTRS22]], i32 0, i32 0
 // CHECK1-NEXT:    [[TMP137:%.*]] = getelementptr inbounds [10 x ptr], ptr [[DOTOFFLOAD_PTRS23]], i32 0, i32 0
 // CHECK1-NEXT:    [[TMP138:%.*]] = getelementptr inbounds [10 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK1-NEXT:    [[KERNEL_ARGS26:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK1-NEXT:    [[TMP139:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS26]], i32 0, i32 0
 // CHECK1-NEXT:    store i32 2, ptr [[TMP139]], align 4
 // CHECK1-NEXT:    [[TMP140:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS26]], i32 0, i32 1
@@ -773,10 +773,10 @@ int bar(int n){
 // CHECK1-NEXT:    [[DOTFIRSTPRIV_PTR_ADDR1_I:%.*]] = alloca ptr, align 8
 // CHECK1-NEXT:    [[DOTFIRSTPRIV_PTR_ADDR2_I:%.*]] = alloca ptr, align 8
 // CHECK1-NEXT:    [[DOTFIRSTPRIV_PTR_ADDR3_I:%.*]] = alloca ptr, align 8
+// CHECK1-NEXT:    [[KERNEL_ARGS_I:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK1-NEXT:    [[AA_CASTED_I:%.*]] = alloca i64, align 8
 // CHECK1-NEXT:    [[DOTCAPTURE_EXPR__CASTED_I:%.*]] = alloca i64, align 8
 // CHECK1-NEXT:    [[DOTCAPTURE_EXPR__CASTED4_I:%.*]] = alloca i64, align 8
-// CHECK1-NEXT:    [[KERNEL_ARGS_I:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK1-NEXT:    [[DOTADDR:%.*]] = alloca i32, align 4
 // CHECK1-NEXT:    [[DOTADDR1:%.*]] = alloca ptr, align 8
 // CHECK1-NEXT:    store i32 [[TMP0]], ptr [[DOTADDR]], align 4
@@ -1336,6 +1336,7 @@ int bar(int n){
 // CHECK1-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [5 x ptr], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_SIZES:%.*]] = alloca [5 x i64], align 8
 // CHECK1-NEXT:    [[TMP:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK1-NEXT:    store ptr [[THIS]], ptr [[THIS_ADDR]], align 8
 // CHECK1-NEXT:    store i32 [[N]], ptr [[N_ADDR]], align 4
 // CHECK1-NEXT:    [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8
@@ -1395,7 +1396,6 @@ int bar(int n){
 // CHECK1-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK1-NEXT:    [[TMP27:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
 // CHECK1-NEXT:    [[TMP28:%.*]] = getelementptr inbounds [5 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK1-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK1-NEXT:    [[TMP29:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK1-NEXT:    store i32 2, ptr [[TMP29]], align 4
 // CHECK1-NEXT:    [[TMP30:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -1465,6 +1465,7 @@ int bar(int n){
 // CHECK1-NEXT:    [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
 // CHECK1-NEXT:    [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4
 // CHECK1-NEXT:    [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK1-NEXT:    store i32 [[N]], ptr [[N_ADDR]], align 4
 // CHECK1-NEXT:    store i32 0, ptr [[A]], align 4
 // CHECK1-NEXT:    store i16 0, ptr [[AA]], align 2
@@ -1532,7 +1533,6 @@ int bar(int n){
 // CHECK1-NEXT:    [[TMP30:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4
 // CHECK1-NEXT:    [[ADD5:%.*]] = add i32 [[TMP30]], 1
 // CHECK1-NEXT:    [[TMP31:%.*]] = zext i32 [[ADD5]] to i64
-// CHECK1-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK1-NEXT:    [[TMP32:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK1-NEXT:    store i32 2, ptr [[TMP32]], align 4
 // CHECK1-NEXT:    [[TMP33:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -1588,6 +1588,7 @@ int bar(int n){
 // CHECK1-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [3 x ptr], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [3 x ptr], align 8
 // CHECK1-NEXT:    [[TMP:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK1-NEXT:    store i32 [[N]], ptr [[N_ADDR]], align 4
 // CHECK1-NEXT:    store i32 0, ptr [[A]], align 4
 // CHECK1-NEXT:    store i16 0, ptr [[AA]], align 2
@@ -1621,7 +1622,6 @@ int bar(int n){
 // CHECK1-NEXT:    store ptr null, ptr [[TMP13]], align 8
 // CHECK1-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK1-NEXT:    [[TMP15:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK1-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK1-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK1-NEXT:    store i32 2, ptr [[TMP16]], align 4
 // CHECK1-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -2066,12 +2066,14 @@ int bar(int n){
 // CHECK3-NEXT:    [[DOTOFFLOAD_PTRS6:%.*]] = alloca [1 x ptr], align 4
 // CHECK3-NEXT:    [[DOTOFFLOAD_MAPPERS7:%.*]] = alloca [1 x ptr], align 4
 // CHECK3-NEXT:    [[TMP:%.*]] = alloca i32, align 4
+// CHECK3-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK3-NEXT:    [[A_CASTED8:%.*]] = alloca i32, align 4
 // CHECK3-NEXT:    [[AA_CASTED9:%.*]] = alloca i32, align 4
 // CHECK3-NEXT:    [[DOTOFFLOAD_BASEPTRS10:%.*]] = alloca [2 x ptr], align 4
 // CHECK3-NEXT:    [[DOTOFFLOAD_PTRS11:%.*]] = alloca [2 x ptr], align 4
 // CHECK3-NEXT:    [[DOTOFFLOAD_MAPPERS12:%.*]] = alloca [2 x ptr], align 4
 // CHECK3-NEXT:    [[_TMP13:%.*]] = alloca i32, align 4
+// CHECK3-NEXT:    [[KERNEL_ARGS14:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK3-NEXT:    [[DOTCAPTURE_EXPR_17:%.*]] = alloca i32, align 4
 // CHECK3-NEXT:    [[A_CASTED18:%.*]] = alloca i32, align 4
 // CHECK3-NEXT:    [[DOTCAPTURE_EXPR__CASTED19:%.*]] = alloca i32, align 4
@@ -2080,6 +2082,7 @@ int bar(int n){
 // CHECK3-NEXT:    [[DOTOFFLOAD_MAPPERS24:%.*]] = alloca [10 x ptr], align 4
 // CHECK3-NEXT:    [[DOTOFFLOAD_SIZES:%.*]] = alloca [10 x i64], align 4
 // CHECK3-NEXT:    [[_TMP25:%.*]] = alloca i32, align 4
+// CHECK3-NEXT:    [[KERNEL_ARGS26:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK3-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2:[0-9]+]])
 // CHECK3-NEXT:    store i32 [[N]], ptr [[N_ADDR]], align 4
 // CHECK3-NEXT:    store i32 0, ptr [[A]], align 4
@@ -2166,7 +2169,6 @@ int bar(int n){
 // CHECK3-NEXT:    store ptr null, ptr [[TMP47]], align 4
 // CHECK3-NEXT:    [[TMP48:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS5]], i32 0, i32 0
 // CHECK3-NEXT:    [[TMP49:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS6]], i32 0, i32 0
-// CHECK3-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK3-NEXT:    [[TMP50:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK3-NEXT:    store i32 2, ptr [[TMP50]], align 4
 // CHECK3-NEXT:    [[TMP51:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -2224,7 +2226,6 @@ int bar(int n){
 // CHECK3-NEXT:    store ptr null, ptr [[TMP75]], align 4
 // CHECK3-NEXT:    [[TMP76:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS10]], i32 0, i32 0
 // CHECK3-NEXT:    [[TMP77:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS11]], i32 0, i32 0
-// CHECK3-NEXT:    [[KERNEL_ARGS14:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK3-NEXT:    [[TMP78:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS14]], i32 0, i32 0
 // CHECK3-NEXT:    store i32 2, ptr [[TMP78]], align 4
 // CHECK3-NEXT:    [[TMP79:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS14]], i32 0, i32 1
@@ -2348,7 +2349,6 @@ int bar(int n){
 // CHECK3-NEXT:    [[TMP136:%.*]] = getelementptr inbounds [10 x ptr], ptr [[DOTOFFLOAD_BASEPTRS22]], i32 0, i32 0
 // CHECK3-NEXT:    [[TMP137:%.*]] = getelementptr inbounds [10 x ptr], ptr [[DOTOFFLOAD_PTRS23]], i32 0, i32 0
 // CHECK3-NEXT:    [[TMP138:%.*]] = getelementptr inbounds [10 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK3-NEXT:    [[KERNEL_ARGS26:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK3-NEXT:    [[TMP139:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS26]], i32 0, i32 0
 // CHECK3-NEXT:    store i32 2, ptr [[TMP139]], align 4
 // CHECK3-NEXT:    [[TMP140:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS26]], i32 0, i32 1
@@ -2518,10 +2518,10 @@ int bar(int n){
 // CHECK3-NEXT:    [[DOTFIRSTPRIV_PTR_ADDR1_I:%.*]] = alloca ptr, align 4
 // CHECK3-NEXT:    [[DOTFIRSTPRIV_PTR_ADDR2_I:%.*]] = alloca ptr, align 4
 // CHECK3-NEXT:    [[DOTFIRSTPRIV_PTR_ADDR3_I:%.*]] = alloca ptr, align 4
+// CHECK3-NEXT:    [[KERNEL_ARGS_I:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK3-NEXT:    [[AA_CASTED_I:%.*]] = alloca i32, align 4
 // CHECK3-NEXT:    [[DOTCAPTURE_EXPR__CASTED_I:%.*]] = alloca i32, align 4
 // CHECK3-NEXT:    [[DOTCAPTURE_EXPR__CASTED4_I:%.*]] = alloca i32, align 4
-// CHECK3-NEXT:    [[KERNEL_ARGS_I:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK3-NEXT:    [[DOTADDR:%.*]] = alloca i32, align 4
 // CHECK3-NEXT:    [[DOTADDR1:%.*]] = alloca ptr, align 4
 // CHECK3-NEXT:    store i32 [[TMP0]], ptr [[DOTADDR]], align 4
@@ -3081,6 +3081,7 @@ int bar(int n){
 // CHECK3-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [5 x ptr], align 4
 // CHECK3-NEXT:    [[DOTOFFLOAD_SIZES:%.*]] = alloca [5 x i64], align 4
 // CHECK3-NEXT:    [[TMP:%.*]] = alloca i32, align 4
+// CHECK3-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK3-NEXT:    store ptr [[THIS]], ptr [[THIS_ADDR]], align 4
 // CHECK3-NEXT:    store i32 [[N]], ptr [[N_ADDR]], align 4
 // CHECK3-NEXT:    [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4
@@ -3140,7 +3141,6 @@ int bar(int n){
 // CHECK3-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK3-NEXT:    [[TMP27:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
 // CHECK3-NEXT:    [[TMP28:%.*]] = getelementptr inbounds [5 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK3-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK3-NEXT:    [[TMP29:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK3-NEXT:    store i32 2, ptr [[TMP29]], align 4
 // CHECK3-NEXT:    [[TMP30:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -3210,6 +3210,7 @@ int bar(int n){
 // CHECK3-NEXT:    [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
 // CHECK3-NEXT:    [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4
 // CHECK3-NEXT:    [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4
+// CHECK3-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK3-NEXT:    store i32 [[N]], ptr [[N_ADDR]], align 4
 // CHECK3-NEXT:    store i32 0, ptr [[A]], align 4
 // CHECK3-NEXT:    store i16 0, ptr [[AA]], align 2
@@ -3277,7 +3278,6 @@ int bar(int n){
 // CHECK3-NEXT:    [[TMP30:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4
 // CHECK3-NEXT:    [[ADD5:%.*]] = add i32 [[TMP30]], 1
 // CHECK3-NEXT:    [[TMP31:%.*]] = zext i32 [[ADD5]] to i64
-// CHECK3-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK3-NEXT:    [[TMP32:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK3-NEXT:    store i32 2, ptr [[TMP32]], align 4
 // CHECK3-NEXT:    [[TMP33:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -3333,6 +3333,7 @@ int bar(int n){
 // CHECK3-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [3 x ptr], align 4
 // CHECK3-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [3 x ptr], align 4
 // CHECK3-NEXT:    [[TMP:%.*]] = alloca i32, align 4
+// CHECK3-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK3-NEXT:    store i32 [[N]], ptr [[N_ADDR]], align 4
 // CHECK3-NEXT:    store i32 0, ptr [[A]], align 4
 // CHECK3-NEXT:    store i16 0, ptr [[AA]], align 2
@@ -3366,7 +3367,6 @@ int bar(int n){
 // CHECK3-NEXT:    store ptr null, ptr [[TMP13]], align 4
 // CHECK3-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK3-NEXT:    [[TMP15:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK3-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK3-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK3-NEXT:    store i32 2, ptr [[TMP16]], align 4
 // CHECK3-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1

diff  --git a/clang/test/OpenMP/target_teams_distribute_collapse_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_collapse_codegen.cpp
index 67b2acf3fa485..dcc103b9ce3cc 100644
--- a/clang/test/OpenMP/target_teams_distribute_collapse_codegen.cpp
+++ b/clang/test/OpenMP/target_teams_distribute_collapse_codegen.cpp
@@ -111,6 +111,7 @@ int main (int argc, char **argv) {
 // CHECK1-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [1 x ptr], align 8
 // CHECK1-NEXT:    [[TMP:%.*]] = alloca i32, align 4
 // CHECK1-NEXT:    [[_TMP2:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK1-NEXT:    store ptr [[THIS]], ptr [[THIS_ADDR]], align 8
 // CHECK1-NEXT:    [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8
 // CHECK1-NEXT:    [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[THIS1]], i32 0, i32 0
@@ -122,7 +123,6 @@ int main (int argc, char **argv) {
 // CHECK1-NEXT:    store ptr null, ptr [[TMP2]], align 8
 // CHECK1-NEXT:    [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK1-NEXT:    [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK1-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK1-NEXT:    [[TMP5:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK1-NEXT:    store i32 2, ptr [[TMP5]], align 4
 // CHECK1-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -279,6 +279,7 @@ int main (int argc, char **argv) {
 // CHECK3-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [1 x ptr], align 4
 // CHECK3-NEXT:    [[TMP:%.*]] = alloca i32, align 4
 // CHECK3-NEXT:    [[_TMP2:%.*]] = alloca i32, align 4
+// CHECK3-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK3-NEXT:    store ptr [[THIS]], ptr [[THIS_ADDR]], align 4
 // CHECK3-NEXT:    [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4
 // CHECK3-NEXT:    [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[THIS1]], i32 0, i32 0
@@ -290,7 +291,6 @@ int main (int argc, char **argv) {
 // CHECK3-NEXT:    store ptr null, ptr [[TMP2]], align 4
 // CHECK3-NEXT:    [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK3-NEXT:    [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK3-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK3-NEXT:    [[TMP5:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK3-NEXT:    store i32 2, ptr [[TMP5]], align 4
 // CHECK3-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -450,6 +450,7 @@ int main (int argc, char **argv) {
 // CHECK9-NEXT:    [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
 // CHECK9-NEXT:    [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4
 // CHECK9-NEXT:    [[DOTCAPTURE_EXPR_3:%.*]] = alloca i64, align 8
+// CHECK9-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK9-NEXT:    store i32 0, ptr [[RETVAL]], align 4
 // CHECK9-NEXT:    store i32 [[ARGC]], ptr [[ARGC_ADDR]], align 4
 // CHECK9-NEXT:    store ptr [[ARGV]], ptr [[ARGV_ADDR]], align 8
@@ -526,7 +527,6 @@ int main (int argc, char **argv) {
 // CHECK9-NEXT:    store i64 [[SUB7]], ptr [[DOTCAPTURE_EXPR_3]], align 8
 // CHECK9-NEXT:    [[TMP35:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_3]], align 8
 // CHECK9-NEXT:    [[ADD:%.*]] = add nsw i64 [[TMP35]], 1
-// CHECK9-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK9-NEXT:    [[TMP36:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK9-NEXT:    store i32 2, ptr [[TMP36]], align 4
 // CHECK9-NEXT:    [[TMP37:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -753,6 +753,7 @@ int main (int argc, char **argv) {
 // CHECK9-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [1 x ptr], align 8
 // CHECK9-NEXT:    [[TMP:%.*]] = alloca i32, align 4
 // CHECK9-NEXT:    [[_TMP1:%.*]] = alloca i32, align 4
+// CHECK9-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK9-NEXT:    store i32 [[ARGC]], ptr [[ARGC_ADDR]], align 4
 // CHECK9-NEXT:    [[TMP0:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK9-NEXT:    store ptr [[A]], ptr [[TMP0]], align 8
@@ -762,7 +763,6 @@ int main (int argc, char **argv) {
 // CHECK9-NEXT:    store ptr null, ptr [[TMP2]], align 8
 // CHECK9-NEXT:    [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK9-NEXT:    [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK9-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK9-NEXT:    [[TMP5:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK9-NEXT:    store i32 2, ptr [[TMP5]], align 4
 // CHECK9-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -919,6 +919,7 @@ int main (int argc, char **argv) {
 // CHECK11-NEXT:    [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
 // CHECK11-NEXT:    [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4
 // CHECK11-NEXT:    [[DOTCAPTURE_EXPR_3:%.*]] = alloca i64, align 8
+// CHECK11-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK11-NEXT:    store i32 0, ptr [[RETVAL]], align 4
 // CHECK11-NEXT:    store i32 [[ARGC]], ptr [[ARGC_ADDR]], align 4
 // CHECK11-NEXT:    store ptr [[ARGV]], ptr [[ARGV_ADDR]], align 4
@@ -994,7 +995,6 @@ int main (int argc, char **argv) {
 // CHECK11-NEXT:    store i64 [[SUB7]], ptr [[DOTCAPTURE_EXPR_3]], align 8
 // CHECK11-NEXT:    [[TMP34:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_3]], align 8
 // CHECK11-NEXT:    [[ADD:%.*]] = add nsw i64 [[TMP34]], 1
-// CHECK11-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK11-NEXT:    [[TMP35:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK11-NEXT:    store i32 2, ptr [[TMP35]], align 4
 // CHECK11-NEXT:    [[TMP36:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -1219,6 +1219,7 @@ int main (int argc, char **argv) {
 // CHECK11-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [1 x ptr], align 4
 // CHECK11-NEXT:    [[TMP:%.*]] = alloca i32, align 4
 // CHECK11-NEXT:    [[_TMP1:%.*]] = alloca i32, align 4
+// CHECK11-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK11-NEXT:    store i32 [[ARGC]], ptr [[ARGC_ADDR]], align 4
 // CHECK11-NEXT:    [[TMP0:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK11-NEXT:    store ptr [[A]], ptr [[TMP0]], align 4
@@ -1228,7 +1229,6 @@ int main (int argc, char **argv) {
 // CHECK11-NEXT:    store ptr null, ptr [[TMP2]], align 4
 // CHECK11-NEXT:    [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK11-NEXT:    [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK11-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK11-NEXT:    [[TMP5:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK11-NEXT:    store i32 2, ptr [[TMP5]], align 4
 // CHECK11-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1

diff  --git a/clang/test/OpenMP/target_teams_distribute_dist_schedule_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_dist_schedule_codegen.cpp
index 4c4dab839a365..f4d3de7f2b925 100644
--- a/clang/test/OpenMP/target_teams_distribute_dist_schedule_codegen.cpp
+++ b/clang/test/OpenMP/target_teams_distribute_dist_schedule_codegen.cpp
@@ -138,14 +138,17 @@ int main (int argc, char **argv) {
 // CHECK1-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [1 x ptr], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [1 x ptr], align 8
 // CHECK1-NEXT:    [[TMP:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_BASEPTRS3:%.*]] = alloca [1 x ptr], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_PTRS4:%.*]] = alloca [1 x ptr], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_MAPPERS5:%.*]] = alloca [1 x ptr], align 8
 // CHECK1-NEXT:    [[_TMP6:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[KERNEL_ARGS7:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_BASEPTRS11:%.*]] = alloca [1 x ptr], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_PTRS12:%.*]] = alloca [1 x ptr], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_MAPPERS13:%.*]] = alloca [1 x ptr], align 8
 // CHECK1-NEXT:    [[_TMP14:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[KERNEL_ARGS15:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK1-NEXT:    store ptr [[THIS]], ptr [[THIS_ADDR]], align 8
 // CHECK1-NEXT:    [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8
 // CHECK1-NEXT:    [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[THIS1]], i32 0, i32 0
@@ -157,7 +160,6 @@ int main (int argc, char **argv) {
 // CHECK1-NEXT:    store ptr null, ptr [[TMP2]], align 8
 // CHECK1-NEXT:    [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK1-NEXT:    [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK1-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK1-NEXT:    [[TMP5:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK1-NEXT:    store i32 2, ptr [[TMP5]], align 4
 // CHECK1-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -200,7 +202,6 @@ int main (int argc, char **argv) {
 // CHECK1-NEXT:    store ptr null, ptr [[TMP22]], align 8
 // CHECK1-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS3]], i32 0, i32 0
 // CHECK1-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS4]], i32 0, i32 0
-// CHECK1-NEXT:    [[KERNEL_ARGS7:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK1-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS7]], i32 0, i32 0
 // CHECK1-NEXT:    store i32 2, ptr [[TMP25]], align 4
 // CHECK1-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS7]], i32 0, i32 1
@@ -243,7 +244,6 @@ int main (int argc, char **argv) {
 // CHECK1-NEXT:    store ptr null, ptr [[TMP42]], align 8
 // CHECK1-NEXT:    [[TMP43:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS11]], i32 0, i32 0
 // CHECK1-NEXT:    [[TMP44:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS12]], i32 0, i32 0
-// CHECK1-NEXT:    [[KERNEL_ARGS15:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK1-NEXT:    [[TMP45:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS15]], i32 0, i32 0
 // CHECK1-NEXT:    store i32 2, ptr [[TMP45]], align 4
 // CHECK1-NEXT:    [[TMP46:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS15]], i32 0, i32 1
@@ -557,14 +557,17 @@ int main (int argc, char **argv) {
 // CHECK3-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [1 x ptr], align 4
 // CHECK3-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [1 x ptr], align 4
 // CHECK3-NEXT:    [[TMP:%.*]] = alloca i32, align 4
+// CHECK3-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK3-NEXT:    [[DOTOFFLOAD_BASEPTRS3:%.*]] = alloca [1 x ptr], align 4
 // CHECK3-NEXT:    [[DOTOFFLOAD_PTRS4:%.*]] = alloca [1 x ptr], align 4
 // CHECK3-NEXT:    [[DOTOFFLOAD_MAPPERS5:%.*]] = alloca [1 x ptr], align 4
 // CHECK3-NEXT:    [[_TMP6:%.*]] = alloca i32, align 4
+// CHECK3-NEXT:    [[KERNEL_ARGS7:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK3-NEXT:    [[DOTOFFLOAD_BASEPTRS11:%.*]] = alloca [1 x ptr], align 4
 // CHECK3-NEXT:    [[DOTOFFLOAD_PTRS12:%.*]] = alloca [1 x ptr], align 4
 // CHECK3-NEXT:    [[DOTOFFLOAD_MAPPERS13:%.*]] = alloca [1 x ptr], align 4
 // CHECK3-NEXT:    [[_TMP14:%.*]] = alloca i32, align 4
+// CHECK3-NEXT:    [[KERNEL_ARGS15:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK3-NEXT:    store ptr [[THIS]], ptr [[THIS_ADDR]], align 4
 // CHECK3-NEXT:    [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4
 // CHECK3-NEXT:    [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[THIS1]], i32 0, i32 0
@@ -576,7 +579,6 @@ int main (int argc, char **argv) {
 // CHECK3-NEXT:    store ptr null, ptr [[TMP2]], align 4
 // CHECK3-NEXT:    [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK3-NEXT:    [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK3-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK3-NEXT:    [[TMP5:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK3-NEXT:    store i32 2, ptr [[TMP5]], align 4
 // CHECK3-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -619,7 +621,6 @@ int main (int argc, char **argv) {
 // CHECK3-NEXT:    store ptr null, ptr [[TMP22]], align 4
 // CHECK3-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS3]], i32 0, i32 0
 // CHECK3-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS4]], i32 0, i32 0
-// CHECK3-NEXT:    [[KERNEL_ARGS7:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK3-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS7]], i32 0, i32 0
 // CHECK3-NEXT:    store i32 2, ptr [[TMP25]], align 4
 // CHECK3-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS7]], i32 0, i32 1
@@ -662,7 +663,6 @@ int main (int argc, char **argv) {
 // CHECK3-NEXT:    store ptr null, ptr [[TMP42]], align 4
 // CHECK3-NEXT:    [[TMP43:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS11]], i32 0, i32 0
 // CHECK3-NEXT:    [[TMP44:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS12]], i32 0, i32 0
-// CHECK3-NEXT:    [[KERNEL_ARGS15:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK3-NEXT:    [[TMP45:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS15]], i32 0, i32 0
 // CHECK3-NEXT:    store i32 2, ptr [[TMP45]], align 4
 // CHECK3-NEXT:    [[TMP46:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS15]], i32 0, i32 1
@@ -974,6 +974,7 @@ int main (int argc, char **argv) {
 // CHECK9-NEXT:    [[TMP:%.*]] = alloca i32, align 4
 // CHECK9-NEXT:    [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
 // CHECK9-NEXT:    [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4
+// CHECK9-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK9-NEXT:    [[N_CASTED3:%.*]] = alloca i64, align 8
 // CHECK9-NEXT:    [[DOTOFFLOAD_BASEPTRS4:%.*]] = alloca [3 x ptr], align 8
 // CHECK9-NEXT:    [[DOTOFFLOAD_PTRS5:%.*]] = alloca [3 x ptr], align 8
@@ -982,6 +983,7 @@ int main (int argc, char **argv) {
 // CHECK9-NEXT:    [[_TMP8:%.*]] = alloca i32, align 4
 // CHECK9-NEXT:    [[DOTCAPTURE_EXPR_9:%.*]] = alloca i32, align 4
 // CHECK9-NEXT:    [[DOTCAPTURE_EXPR_10:%.*]] = alloca i32, align 4
+// CHECK9-NEXT:    [[KERNEL_ARGS15:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK9-NEXT:    [[DOTCAPTURE_EXPR_18:%.*]] = alloca i32, align 4
 // CHECK9-NEXT:    [[N_CASTED19:%.*]] = alloca i64, align 8
 // CHECK9-NEXT:    [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8
@@ -992,6 +994,7 @@ int main (int argc, char **argv) {
 // CHECK9-NEXT:    [[_TMP24:%.*]] = alloca i32, align 4
 // CHECK9-NEXT:    [[DOTCAPTURE_EXPR_25:%.*]] = alloca i32, align 4
 // CHECK9-NEXT:    [[DOTCAPTURE_EXPR_26:%.*]] = alloca i32, align 4
+// CHECK9-NEXT:    [[KERNEL_ARGS31:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK9-NEXT:    store i32 0, ptr [[RETVAL]], align 4
 // CHECK9-NEXT:    store i32 [[ARGC]], ptr [[ARGC_ADDR]], align 4
 // CHECK9-NEXT:    store ptr [[ARGV]], ptr [[ARGV_ADDR]], align 8
@@ -1040,7 +1043,6 @@ int main (int argc, char **argv) {
 // CHECK9-NEXT:    [[TMP21:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
 // CHECK9-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP21]], 1
 // CHECK9-NEXT:    [[TMP22:%.*]] = zext i32 [[ADD]] to i64
-// CHECK9-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK9-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK9-NEXT:    store i32 2, ptr [[TMP23]], align 4
 // CHECK9-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -1112,7 +1114,6 @@ int main (int argc, char **argv) {
 // CHECK9-NEXT:    [[TMP56:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_10]], align 4
 // CHECK9-NEXT:    [[ADD14:%.*]] = add nsw i32 [[TMP56]], 1
 // CHECK9-NEXT:    [[TMP57:%.*]] = zext i32 [[ADD14]] to i64
-// CHECK9-NEXT:    [[KERNEL_ARGS15:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK9-NEXT:    [[TMP58:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS15]], i32 0, i32 0
 // CHECK9-NEXT:    store i32 2, ptr [[TMP58]], align 4
 // CHECK9-NEXT:    [[TMP59:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS15]], i32 0, i32 1
@@ -1195,7 +1196,6 @@ int main (int argc, char **argv) {
 // CHECK9-NEXT:    [[TMP97:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_26]], align 4
 // CHECK9-NEXT:    [[ADD30:%.*]] = add nsw i32 [[TMP97]], 1
 // CHECK9-NEXT:    [[TMP98:%.*]] = zext i32 [[ADD30]] to i64
-// CHECK9-NEXT:    [[KERNEL_ARGS31:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK9-NEXT:    [[TMP99:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS31]], i32 0, i32 0
 // CHECK9-NEXT:    store i32 2, ptr [[TMP99]], align 4
 // CHECK9-NEXT:    [[TMP100:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS31]], i32 0, i32 1
@@ -1612,14 +1612,17 @@ int main (int argc, char **argv) {
 // CHECK9-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [1 x ptr], align 8
 // CHECK9-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [1 x ptr], align 8
 // CHECK9-NEXT:    [[TMP:%.*]] = alloca i32, align 4
+// CHECK9-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK9-NEXT:    [[DOTOFFLOAD_BASEPTRS1:%.*]] = alloca [1 x ptr], align 8
 // CHECK9-NEXT:    [[DOTOFFLOAD_PTRS2:%.*]] = alloca [1 x ptr], align 8
 // CHECK9-NEXT:    [[DOTOFFLOAD_MAPPERS3:%.*]] = alloca [1 x ptr], align 8
 // CHECK9-NEXT:    [[_TMP4:%.*]] = alloca i32, align 4
+// CHECK9-NEXT:    [[KERNEL_ARGS5:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK9-NEXT:    [[DOTOFFLOAD_BASEPTRS8:%.*]] = alloca [1 x ptr], align 8
 // CHECK9-NEXT:    [[DOTOFFLOAD_PTRS9:%.*]] = alloca [1 x ptr], align 8
 // CHECK9-NEXT:    [[DOTOFFLOAD_MAPPERS10:%.*]] = alloca [1 x ptr], align 8
 // CHECK9-NEXT:    [[_TMP11:%.*]] = alloca i32, align 4
+// CHECK9-NEXT:    [[KERNEL_ARGS12:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK9-NEXT:    store i32 [[ARGC]], ptr [[ARGC_ADDR]], align 4
 // CHECK9-NEXT:    [[TMP0:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK9-NEXT:    store ptr [[A]], ptr [[TMP0]], align 8
@@ -1629,7 +1632,6 @@ int main (int argc, char **argv) {
 // CHECK9-NEXT:    store ptr null, ptr [[TMP2]], align 8
 // CHECK9-NEXT:    [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK9-NEXT:    [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK9-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK9-NEXT:    [[TMP5:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK9-NEXT:    store i32 2, ptr [[TMP5]], align 4
 // CHECK9-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -1671,7 +1673,6 @@ int main (int argc, char **argv) {
 // CHECK9-NEXT:    store ptr null, ptr [[TMP22]], align 8
 // CHECK9-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS1]], i32 0, i32 0
 // CHECK9-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS2]], i32 0, i32 0
-// CHECK9-NEXT:    [[KERNEL_ARGS5:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK9-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 0
 // CHECK9-NEXT:    store i32 2, ptr [[TMP25]], align 4
 // CHECK9-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 1
@@ -1713,7 +1714,6 @@ int main (int argc, char **argv) {
 // CHECK9-NEXT:    store ptr null, ptr [[TMP42]], align 8
 // CHECK9-NEXT:    [[TMP43:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS8]], i32 0, i32 0
 // CHECK9-NEXT:    [[TMP44:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS9]], i32 0, i32 0
-// CHECK9-NEXT:    [[KERNEL_ARGS12:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK9-NEXT:    [[TMP45:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS12]], i32 0, i32 0
 // CHECK9-NEXT:    store i32 2, ptr [[TMP45]], align 4
 // CHECK9-NEXT:    [[TMP46:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS12]], i32 0, i32 1
@@ -2022,6 +2022,7 @@ int main (int argc, char **argv) {
 // CHECK11-NEXT:    [[TMP:%.*]] = alloca i32, align 4
 // CHECK11-NEXT:    [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
 // CHECK11-NEXT:    [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4
+// CHECK11-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK11-NEXT:    [[N_CASTED3:%.*]] = alloca i32, align 4
 // CHECK11-NEXT:    [[DOTOFFLOAD_BASEPTRS4:%.*]] = alloca [3 x ptr], align 4
 // CHECK11-NEXT:    [[DOTOFFLOAD_PTRS5:%.*]] = alloca [3 x ptr], align 4
@@ -2030,6 +2031,7 @@ int main (int argc, char **argv) {
 // CHECK11-NEXT:    [[_TMP8:%.*]] = alloca i32, align 4
 // CHECK11-NEXT:    [[DOTCAPTURE_EXPR_9:%.*]] = alloca i32, align 4
 // CHECK11-NEXT:    [[DOTCAPTURE_EXPR_10:%.*]] = alloca i32, align 4
+// CHECK11-NEXT:    [[KERNEL_ARGS15:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK11-NEXT:    [[DOTCAPTURE_EXPR_18:%.*]] = alloca i32, align 4
 // CHECK11-NEXT:    [[N_CASTED19:%.*]] = alloca i32, align 4
 // CHECK11-NEXT:    [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4
@@ -2040,6 +2042,7 @@ int main (int argc, char **argv) {
 // CHECK11-NEXT:    [[_TMP24:%.*]] = alloca i32, align 4
 // CHECK11-NEXT:    [[DOTCAPTURE_EXPR_25:%.*]] = alloca i32, align 4
 // CHECK11-NEXT:    [[DOTCAPTURE_EXPR_26:%.*]] = alloca i32, align 4
+// CHECK11-NEXT:    [[KERNEL_ARGS31:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK11-NEXT:    store i32 0, ptr [[RETVAL]], align 4
 // CHECK11-NEXT:    store i32 [[ARGC]], ptr [[ARGC_ADDR]], align 4
 // CHECK11-NEXT:    store ptr [[ARGV]], ptr [[ARGV_ADDR]], align 4
@@ -2088,7 +2091,6 @@ int main (int argc, char **argv) {
 // CHECK11-NEXT:    [[TMP21:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
 // CHECK11-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP21]], 1
 // CHECK11-NEXT:    [[TMP22:%.*]] = zext i32 [[ADD]] to i64
-// CHECK11-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK11-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK11-NEXT:    store i32 2, ptr [[TMP23]], align 4
 // CHECK11-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -2161,7 +2163,6 @@ int main (int argc, char **argv) {
 // CHECK11-NEXT:    [[TMP57:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_10]], align 4
 // CHECK11-NEXT:    [[ADD14:%.*]] = add nsw i32 [[TMP57]], 1
 // CHECK11-NEXT:    [[TMP58:%.*]] = zext i32 [[ADD14]] to i64
-// CHECK11-NEXT:    [[KERNEL_ARGS15:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK11-NEXT:    [[TMP59:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS15]], i32 0, i32 0
 // CHECK11-NEXT:    store i32 2, ptr [[TMP59]], align 4
 // CHECK11-NEXT:    [[TMP60:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS15]], i32 0, i32 1
@@ -2245,7 +2246,6 @@ int main (int argc, char **argv) {
 // CHECK11-NEXT:    [[TMP99:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_26]], align 4
 // CHECK11-NEXT:    [[ADD30:%.*]] = add nsw i32 [[TMP99]], 1
 // CHECK11-NEXT:    [[TMP100:%.*]] = zext i32 [[ADD30]] to i64
-// CHECK11-NEXT:    [[KERNEL_ARGS31:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK11-NEXT:    [[TMP101:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS31]], i32 0, i32 0
 // CHECK11-NEXT:    store i32 2, ptr [[TMP101]], align 4
 // CHECK11-NEXT:    [[TMP102:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS31]], i32 0, i32 1
@@ -2659,14 +2659,17 @@ int main (int argc, char **argv) {
 // CHECK11-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [1 x ptr], align 4
 // CHECK11-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [1 x ptr], align 4
 // CHECK11-NEXT:    [[TMP:%.*]] = alloca i32, align 4
+// CHECK11-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK11-NEXT:    [[DOTOFFLOAD_BASEPTRS1:%.*]] = alloca [1 x ptr], align 4
 // CHECK11-NEXT:    [[DOTOFFLOAD_PTRS2:%.*]] = alloca [1 x ptr], align 4
 // CHECK11-NEXT:    [[DOTOFFLOAD_MAPPERS3:%.*]] = alloca [1 x ptr], align 4
 // CHECK11-NEXT:    [[_TMP4:%.*]] = alloca i32, align 4
+// CHECK11-NEXT:    [[KERNEL_ARGS5:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK11-NEXT:    [[DOTOFFLOAD_BASEPTRS8:%.*]] = alloca [1 x ptr], align 4
 // CHECK11-NEXT:    [[DOTOFFLOAD_PTRS9:%.*]] = alloca [1 x ptr], align 4
 // CHECK11-NEXT:    [[DOTOFFLOAD_MAPPERS10:%.*]] = alloca [1 x ptr], align 4
 // CHECK11-NEXT:    [[_TMP11:%.*]] = alloca i32, align 4
+// CHECK11-NEXT:    [[KERNEL_ARGS12:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK11-NEXT:    store i32 [[ARGC]], ptr [[ARGC_ADDR]], align 4
 // CHECK11-NEXT:    [[TMP0:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK11-NEXT:    store ptr [[A]], ptr [[TMP0]], align 4
@@ -2676,7 +2679,6 @@ int main (int argc, char **argv) {
 // CHECK11-NEXT:    store ptr null, ptr [[TMP2]], align 4
 // CHECK11-NEXT:    [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK11-NEXT:    [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK11-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK11-NEXT:    [[TMP5:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK11-NEXT:    store i32 2, ptr [[TMP5]], align 4
 // CHECK11-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -2718,7 +2720,6 @@ int main (int argc, char **argv) {
 // CHECK11-NEXT:    store ptr null, ptr [[TMP22]], align 4
 // CHECK11-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS1]], i32 0, i32 0
 // CHECK11-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS2]], i32 0, i32 0
-// CHECK11-NEXT:    [[KERNEL_ARGS5:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK11-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 0
 // CHECK11-NEXT:    store i32 2, ptr [[TMP25]], align 4
 // CHECK11-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 1
@@ -2760,7 +2761,6 @@ int main (int argc, char **argv) {
 // CHECK11-NEXT:    store ptr null, ptr [[TMP42]], align 4
 // CHECK11-NEXT:    [[TMP43:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS8]], i32 0, i32 0
 // CHECK11-NEXT:    [[TMP44:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS9]], i32 0, i32 0
-// CHECK11-NEXT:    [[KERNEL_ARGS12:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK11-NEXT:    [[TMP45:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS12]], i32 0, i32 0
 // CHECK11-NEXT:    store i32 2, ptr [[TMP45]], align 4
 // CHECK11-NEXT:    [[TMP46:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS12]], i32 0, i32 1

diff  --git a/clang/test/OpenMP/target_teams_distribute_firstprivate_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_firstprivate_codegen.cpp
index c19e2d4d35c5c..cf8c4af9c1fce 100644
--- a/clang/test/OpenMP/target_teams_distribute_firstprivate_codegen.cpp
+++ b/clang/test/OpenMP/target_teams_distribute_firstprivate_codegen.cpp
@@ -262,6 +262,7 @@ int main() {
 // CHECK1-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [5 x ptr], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [5 x ptr], align 8
 // CHECK1-NEXT:    [[TMP:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK1-NEXT:    store i32 0, ptr [[RETVAL]], align 4
 // CHECK1-NEXT:    [[TMP0:%.*]] = load i32, ptr @t_var, align 4
 // CHECK1-NEXT:    store i32 [[TMP0]], ptr [[T_VAR_CASTED]], align 4
@@ -301,7 +302,6 @@ int main() {
 // CHECK1-NEXT:    store ptr null, ptr [[TMP18]], align 8
 // CHECK1-NEXT:    [[TMP19:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK1-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK1-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK1-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK1-NEXT:    store i32 2, ptr [[TMP21]], align 4
 // CHECK1-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -542,6 +542,7 @@ int main() {
 // CHECK1-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [4 x ptr], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [4 x ptr], align 8
 // CHECK1-NEXT:    [[_TMP1:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK1-NEXT:    call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[TEST]])
 // CHECK1-NEXT:    store i32 0, ptr [[T_VAR]], align 4
 // CHECK1-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VEC]], ptr align 4 @__const._Z5tmainIiET_v.vec, i64 8, i1 false)
@@ -582,7 +583,6 @@ int main() {
 // CHECK1-NEXT:    store ptr null, ptr [[TMP15]], align 8
 // CHECK1-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK1-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK1-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK1-NEXT:    [[TMP18:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK1-NEXT:    store i32 2, ptr [[TMP18]], align 4
 // CHECK1-NEXT:    [[TMP19:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -1071,6 +1071,7 @@ int main() {
 // CHECK3-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [5 x ptr], align 4
 // CHECK3-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [5 x ptr], align 4
 // CHECK3-NEXT:    [[TMP:%.*]] = alloca i32, align 4
+// CHECK3-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK3-NEXT:    store i32 0, ptr [[RETVAL]], align 4
 // CHECK3-NEXT:    [[TMP0:%.*]] = load i32, ptr @t_var, align 4
 // CHECK3-NEXT:    store i32 [[TMP0]], ptr [[T_VAR_CASTED]], align 4
@@ -1110,7 +1111,6 @@ int main() {
 // CHECK3-NEXT:    store ptr null, ptr [[TMP18]], align 4
 // CHECK3-NEXT:    [[TMP19:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK3-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK3-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK3-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK3-NEXT:    store i32 2, ptr [[TMP21]], align 4
 // CHECK3-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -1349,6 +1349,7 @@ int main() {
 // CHECK3-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [4 x ptr], align 4
 // CHECK3-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [4 x ptr], align 4
 // CHECK3-NEXT:    [[_TMP1:%.*]] = alloca i32, align 4
+// CHECK3-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK3-NEXT:    call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[TEST]])
 // CHECK3-NEXT:    store i32 0, ptr [[T_VAR]], align 4
 // CHECK3-NEXT:    call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[VEC]], ptr align 4 @__const._Z5tmainIiET_v.vec, i32 8, i1 false)
@@ -1389,7 +1390,6 @@ int main() {
 // CHECK3-NEXT:    store ptr null, ptr [[TMP15]], align 4
 // CHECK3-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK3-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK3-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK3-NEXT:    [[TMP18:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK3-NEXT:    store i32 2, ptr [[TMP18]], align 4
 // CHECK3-NEXT:    [[TMP19:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1

diff  --git a/clang/test/OpenMP/target_teams_distribute_lastprivate_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_lastprivate_codegen.cpp
index 03bf0ec637c62..e3bbe2601dd5d 100644
--- a/clang/test/OpenMP/target_teams_distribute_lastprivate_codegen.cpp
+++ b/clang/test/OpenMP/target_teams_distribute_lastprivate_codegen.cpp
@@ -482,6 +482,7 @@ int main() {
 // CHECK9-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [5 x ptr], align 8
 // CHECK9-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [5 x ptr], align 8
 // CHECK9-NEXT:    [[_TMP1:%.*]] = alloca i32, align 4
+// CHECK9-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK9-NEXT:    [[I:%.*]] = alloca i32, align 4
 // CHECK9-NEXT:    store i32 0, ptr [[RETVAL]], align 4
 // CHECK9-NEXT:    store ptr [[G]], ptr [[G1]], align 8
@@ -534,7 +535,6 @@ int main() {
 // CHECK9-NEXT:    store ptr null, ptr [[TMP20]], align 8
 // CHECK9-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK9-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK9-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK9-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK9-NEXT:    store i32 2, ptr [[TMP23]], align 4
 // CHECK9-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -809,6 +809,7 @@ int main() {
 // CHECK9-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [4 x ptr], align 8
 // CHECK9-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [4 x ptr], align 8
 // CHECK9-NEXT:    [[_TMP1:%.*]] = alloca i32, align 4
+// CHECK9-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK9-NEXT:    call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[TEST]])
 // CHECK9-NEXT:    store i32 0, ptr [[T_VAR]], align 4
 // CHECK9-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VEC]], ptr align 4 @__const._Z5tmainIiET_v.vec, i64 8, i1 false)
@@ -849,7 +850,6 @@ int main() {
 // CHECK9-NEXT:    store ptr null, ptr [[TMP15]], align 8
 // CHECK9-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK9-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK9-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK9-NEXT:    [[TMP18:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK9-NEXT:    store i32 2, ptr [[TMP18]], align 4
 // CHECK9-NEXT:    [[TMP19:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -1190,6 +1190,7 @@ int main() {
 // CHECK11-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [5 x ptr], align 4
 // CHECK11-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [5 x ptr], align 4
 // CHECK11-NEXT:    [[_TMP1:%.*]] = alloca i32, align 4
+// CHECK11-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK11-NEXT:    [[I:%.*]] = alloca i32, align 4
 // CHECK11-NEXT:    store i32 0, ptr [[RETVAL]], align 4
 // CHECK11-NEXT:    store ptr [[G]], ptr [[G1]], align 4
@@ -1242,7 +1243,6 @@ int main() {
 // CHECK11-NEXT:    store ptr null, ptr [[TMP20]], align 4
 // CHECK11-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK11-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK11-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK11-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK11-NEXT:    store i32 2, ptr [[TMP23]], align 4
 // CHECK11-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -1515,6 +1515,7 @@ int main() {
 // CHECK11-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [4 x ptr], align 4
 // CHECK11-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [4 x ptr], align 4
 // CHECK11-NEXT:    [[_TMP1:%.*]] = alloca i32, align 4
+// CHECK11-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK11-NEXT:    call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[TEST]])
 // CHECK11-NEXT:    store i32 0, ptr [[T_VAR]], align 4
 // CHECK11-NEXT:    call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[VEC]], ptr align 4 @__const._Z5tmainIiET_v.vec, i32 8, i1 false)
@@ -1555,7 +1556,6 @@ int main() {
 // CHECK11-NEXT:    store ptr null, ptr [[TMP15]], align 4
 // CHECK11-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK11-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK11-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK11-NEXT:    [[TMP18:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK11-NEXT:    store i32 2, ptr [[TMP18]], align 4
 // CHECK11-NEXT:    [[TMP19:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1

diff  --git a/clang/test/OpenMP/target_teams_distribute_parallel_for_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_parallel_for_codegen.cpp
index 1a09e0e93be6e..a9a5c43ed208c 100644
--- a/clang/test/OpenMP/target_teams_distribute_parallel_for_codegen.cpp
+++ b/clang/test/OpenMP/target_teams_distribute_parallel_for_codegen.cpp
@@ -584,6 +584,7 @@ int target_teams_fun(int *g){
 // CHECK2-NEXT:    [[TMP:%.*]] = alloca i32, align 4
 // CHECK2-NEXT:    [[DOTCAPTURE_EXPR_3:%.*]] = alloca i32, align 4
 // CHECK2-NEXT:    [[DOTCAPTURE_EXPR_4:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK2-NEXT:    [[N_CASTED7:%.*]] = alloca i64, align 8
 // CHECK2-NEXT:    [[DOTOFFLOAD_BASEPTRS8:%.*]] = alloca [3 x ptr], align 8
 // CHECK2-NEXT:    [[DOTOFFLOAD_PTRS9:%.*]] = alloca [3 x ptr], align 8
@@ -591,6 +592,7 @@ int target_teams_fun(int *g){
 // CHECK2-NEXT:    [[_TMP11:%.*]] = alloca i32, align 4
 // CHECK2-NEXT:    [[DOTCAPTURE_EXPR_12:%.*]] = alloca i32, align 4
 // CHECK2-NEXT:    [[DOTCAPTURE_EXPR_13:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    [[KERNEL_ARGS18:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK2-NEXT:    store ptr [[G]], ptr [[G_ADDR]], align 8
 // CHECK2-NEXT:    store i32 1000, ptr [[N]], align 4
 // CHECK2-NEXT:    [[TMP0:%.*]] = load i32, ptr [[N]], align 4
@@ -650,7 +652,6 @@ int target_teams_fun(int *g){
 // CHECK2-NEXT:    [[TMP28:%.*]] = zext i32 [[ADD]] to i64
 // CHECK2-NEXT:    [[TMP29:%.*]] = insertvalue [3 x i32] zeroinitializer, i32 [[TMP23]], 0
 // CHECK2-NEXT:    [[TMP30:%.*]] = insertvalue [3 x i32] zeroinitializer, i32 [[TMP24]], 0
-// CHECK2-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK2-NEXT:    [[TMP31:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK2-NEXT:    store i32 2, ptr [[TMP31]], align 4
 // CHECK2-NEXT:    [[TMP32:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -718,7 +719,6 @@ int target_teams_fun(int *g){
 // CHECK2-NEXT:    [[TMP62:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_13]], align 4
 // CHECK2-NEXT:    [[ADD17:%.*]] = add nsw i32 [[TMP62]], 1
 // CHECK2-NEXT:    [[TMP63:%.*]] = zext i32 [[ADD17]] to i64
-// CHECK2-NEXT:    [[KERNEL_ARGS18:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK2-NEXT:    [[TMP64:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS18]], i32 0, i32 0
 // CHECK2-NEXT:    store i32 2, ptr [[TMP64]], align 4
 // CHECK2-NEXT:    [[TMP65:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS18]], i32 0, i32 1
@@ -1233,6 +1233,7 @@ int target_teams_fun(int *g){
 // CHECK4-NEXT:    [[TMP:%.*]] = alloca i32, align 4
 // CHECK4-NEXT:    [[DOTCAPTURE_EXPR_3:%.*]] = alloca i32, align 4
 // CHECK4-NEXT:    [[DOTCAPTURE_EXPR_4:%.*]] = alloca i32, align 4
+// CHECK4-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK4-NEXT:    [[N_CASTED7:%.*]] = alloca i32, align 4
 // CHECK4-NEXT:    [[DOTOFFLOAD_BASEPTRS8:%.*]] = alloca [3 x ptr], align 4
 // CHECK4-NEXT:    [[DOTOFFLOAD_PTRS9:%.*]] = alloca [3 x ptr], align 4
@@ -1240,6 +1241,7 @@ int target_teams_fun(int *g){
 // CHECK4-NEXT:    [[_TMP11:%.*]] = alloca i32, align 4
 // CHECK4-NEXT:    [[DOTCAPTURE_EXPR_12:%.*]] = alloca i32, align 4
 // CHECK4-NEXT:    [[DOTCAPTURE_EXPR_13:%.*]] = alloca i32, align 4
+// CHECK4-NEXT:    [[KERNEL_ARGS18:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK4-NEXT:    store ptr [[G]], ptr [[G_ADDR]], align 4
 // CHECK4-NEXT:    store i32 1000, ptr [[N]], align 4
 // CHECK4-NEXT:    [[TMP0:%.*]] = load i32, ptr [[N]], align 4
@@ -1299,7 +1301,6 @@ int target_teams_fun(int *g){
 // CHECK4-NEXT:    [[TMP28:%.*]] = zext i32 [[ADD]] to i64
 // CHECK4-NEXT:    [[TMP29:%.*]] = insertvalue [3 x i32] zeroinitializer, i32 [[TMP23]], 0
 // CHECK4-NEXT:    [[TMP30:%.*]] = insertvalue [3 x i32] zeroinitializer, i32 [[TMP24]], 0
-// CHECK4-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK4-NEXT:    [[TMP31:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK4-NEXT:    store i32 2, ptr [[TMP31]], align 4
 // CHECK4-NEXT:    [[TMP32:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -1367,7 +1368,6 @@ int target_teams_fun(int *g){
 // CHECK4-NEXT:    [[TMP62:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_13]], align 4
 // CHECK4-NEXT:    [[ADD17:%.*]] = add nsw i32 [[TMP62]], 1
 // CHECK4-NEXT:    [[TMP63:%.*]] = zext i32 [[ADD17]] to i64
-// CHECK4-NEXT:    [[KERNEL_ARGS18:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK4-NEXT:    [[TMP64:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS18]], i32 0, i32 0
 // CHECK4-NEXT:    store i32 2, ptr [[TMP64]], align 4
 // CHECK4-NEXT:    [[TMP65:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS18]], i32 0, i32 1

diff  --git a/clang/test/OpenMP/target_teams_distribute_parallel_for_collapse_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_parallel_for_collapse_codegen.cpp
index a6b2cfba59584..09916a9c62186 100644
--- a/clang/test/OpenMP/target_teams_distribute_parallel_for_collapse_codegen.cpp
+++ b/clang/test/OpenMP/target_teams_distribute_parallel_for_collapse_codegen.cpp
@@ -116,6 +116,7 @@ int main (int argc, char **argv) {
 // CHECK1-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [1 x ptr], align 8
 // CHECK1-NEXT:    [[TMP:%.*]] = alloca i32, align 4
 // CHECK1-NEXT:    [[_TMP2:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK1-NEXT:    store ptr [[THIS]], ptr [[THIS_ADDR]], align 8
 // CHECK1-NEXT:    [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8
 // CHECK1-NEXT:    [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[THIS1]], i32 0, i32 0
@@ -127,7 +128,6 @@ int main (int argc, char **argv) {
 // CHECK1-NEXT:    store ptr null, ptr [[TMP2]], align 8
 // CHECK1-NEXT:    [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK1-NEXT:    [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK1-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK1-NEXT:    [[TMP5:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK1-NEXT:    store i32 2, ptr [[TMP5]], align 4
 // CHECK1-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -359,6 +359,7 @@ int main (int argc, char **argv) {
 // CHECK3-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [1 x ptr], align 4
 // CHECK3-NEXT:    [[TMP:%.*]] = alloca i32, align 4
 // CHECK3-NEXT:    [[_TMP2:%.*]] = alloca i32, align 4
+// CHECK3-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK3-NEXT:    store ptr [[THIS]], ptr [[THIS_ADDR]], align 4
 // CHECK3-NEXT:    [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4
 // CHECK3-NEXT:    [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[THIS1]], i32 0, i32 0
@@ -370,7 +371,6 @@ int main (int argc, char **argv) {
 // CHECK3-NEXT:    store ptr null, ptr [[TMP2]], align 4
 // CHECK3-NEXT:    [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK3-NEXT:    [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK3-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK3-NEXT:    [[TMP5:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK3-NEXT:    store i32 2, ptr [[TMP5]], align 4
 // CHECK3-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -601,6 +601,7 @@ int main (int argc, char **argv) {
 // CHECK9-NEXT:    [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
 // CHECK9-NEXT:    [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4
 // CHECK9-NEXT:    [[DOTCAPTURE_EXPR_3:%.*]] = alloca i64, align 8
+// CHECK9-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK9-NEXT:    store i32 0, ptr [[RETVAL]], align 4
 // CHECK9-NEXT:    store i32 [[ARGC]], ptr [[ARGC_ADDR]], align 4
 // CHECK9-NEXT:    store ptr [[ARGV]], ptr [[ARGV_ADDR]], align 8
@@ -677,7 +678,6 @@ int main (int argc, char **argv) {
 // CHECK9-NEXT:    store i64 [[SUB7]], ptr [[DOTCAPTURE_EXPR_3]], align 8
 // CHECK9-NEXT:    [[TMP35:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_3]], align 8
 // CHECK9-NEXT:    [[ADD:%.*]] = add nsw i64 [[TMP35]], 1
-// CHECK9-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK9-NEXT:    [[TMP36:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK9-NEXT:    store i32 2, ptr [[TMP36]], align 4
 // CHECK9-NEXT:    [[TMP37:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -1030,6 +1030,7 @@ int main (int argc, char **argv) {
 // CHECK9-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [1 x ptr], align 8
 // CHECK9-NEXT:    [[TMP:%.*]] = alloca i32, align 4
 // CHECK9-NEXT:    [[_TMP1:%.*]] = alloca i32, align 4
+// CHECK9-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK9-NEXT:    store i32 [[ARGC]], ptr [[ARGC_ADDR]], align 4
 // CHECK9-NEXT:    [[TMP0:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK9-NEXT:    store ptr [[A]], ptr [[TMP0]], align 8
@@ -1039,7 +1040,6 @@ int main (int argc, char **argv) {
 // CHECK9-NEXT:    store ptr null, ptr [[TMP2]], align 8
 // CHECK9-NEXT:    [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK9-NEXT:    [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK9-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK9-NEXT:    [[TMP5:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK9-NEXT:    store i32 2, ptr [[TMP5]], align 4
 // CHECK9-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -1271,6 +1271,7 @@ int main (int argc, char **argv) {
 // CHECK11-NEXT:    [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
 // CHECK11-NEXT:    [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4
 // CHECK11-NEXT:    [[DOTCAPTURE_EXPR_3:%.*]] = alloca i64, align 8
+// CHECK11-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK11-NEXT:    store i32 0, ptr [[RETVAL]], align 4
 // CHECK11-NEXT:    store i32 [[ARGC]], ptr [[ARGC_ADDR]], align 4
 // CHECK11-NEXT:    store ptr [[ARGV]], ptr [[ARGV_ADDR]], align 4
@@ -1346,7 +1347,6 @@ int main (int argc, char **argv) {
 // CHECK11-NEXT:    store i64 [[SUB7]], ptr [[DOTCAPTURE_EXPR_3]], align 8
 // CHECK11-NEXT:    [[TMP34:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_3]], align 8
 // CHECK11-NEXT:    [[ADD:%.*]] = add nsw i64 [[TMP34]], 1
-// CHECK11-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK11-NEXT:    [[TMP35:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK11-NEXT:    store i32 2, ptr [[TMP35]], align 4
 // CHECK11-NEXT:    [[TMP36:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -1701,6 +1701,7 @@ int main (int argc, char **argv) {
 // CHECK11-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [1 x ptr], align 4
 // CHECK11-NEXT:    [[TMP:%.*]] = alloca i32, align 4
 // CHECK11-NEXT:    [[_TMP1:%.*]] = alloca i32, align 4
+// CHECK11-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK11-NEXT:    store i32 [[ARGC]], ptr [[ARGC_ADDR]], align 4
 // CHECK11-NEXT:    [[TMP0:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK11-NEXT:    store ptr [[A]], ptr [[TMP0]], align 4
@@ -1710,7 +1711,6 @@ int main (int argc, char **argv) {
 // CHECK11-NEXT:    store ptr null, ptr [[TMP2]], align 4
 // CHECK11-NEXT:    [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK11-NEXT:    [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK11-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK11-NEXT:    [[TMP5:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK11-NEXT:    store i32 2, ptr [[TMP5]], align 4
 // CHECK11-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1

diff  --git a/clang/test/OpenMP/target_teams_distribute_parallel_for_dist_schedule_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_parallel_for_dist_schedule_codegen.cpp
index a35a2fd44c554..018e78a3960f6 100644
--- a/clang/test/OpenMP/target_teams_distribute_parallel_for_dist_schedule_codegen.cpp
+++ b/clang/test/OpenMP/target_teams_distribute_parallel_for_dist_schedule_codegen.cpp
@@ -150,14 +150,17 @@ int main (int argc, char **argv) {
 // CHECK1-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [1 x ptr], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [1 x ptr], align 8
 // CHECK1-NEXT:    [[TMP:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_BASEPTRS3:%.*]] = alloca [1 x ptr], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_PTRS4:%.*]] = alloca [1 x ptr], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_MAPPERS5:%.*]] = alloca [1 x ptr], align 8
 // CHECK1-NEXT:    [[_TMP6:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[KERNEL_ARGS7:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_BASEPTRS11:%.*]] = alloca [1 x ptr], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_PTRS12:%.*]] = alloca [1 x ptr], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_MAPPERS13:%.*]] = alloca [1 x ptr], align 8
 // CHECK1-NEXT:    [[_TMP14:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[KERNEL_ARGS15:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK1-NEXT:    store ptr [[THIS]], ptr [[THIS_ADDR]], align 8
 // CHECK1-NEXT:    [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8
 // CHECK1-NEXT:    [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[THIS1]], i32 0, i32 0
@@ -169,7 +172,6 @@ int main (int argc, char **argv) {
 // CHECK1-NEXT:    store ptr null, ptr [[TMP2]], align 8
 // CHECK1-NEXT:    [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK1-NEXT:    [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK1-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK1-NEXT:    [[TMP5:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK1-NEXT:    store i32 2, ptr [[TMP5]], align 4
 // CHECK1-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -212,7 +214,6 @@ int main (int argc, char **argv) {
 // CHECK1-NEXT:    store ptr null, ptr [[TMP22]], align 8
 // CHECK1-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS3]], i32 0, i32 0
 // CHECK1-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS4]], i32 0, i32 0
-// CHECK1-NEXT:    [[KERNEL_ARGS7:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK1-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS7]], i32 0, i32 0
 // CHECK1-NEXT:    store i32 2, ptr [[TMP25]], align 4
 // CHECK1-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS7]], i32 0, i32 1
@@ -255,7 +256,6 @@ int main (int argc, char **argv) {
 // CHECK1-NEXT:    store ptr null, ptr [[TMP42]], align 8
 // CHECK1-NEXT:    [[TMP43:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS11]], i32 0, i32 0
 // CHECK1-NEXT:    [[TMP44:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS12]], i32 0, i32 0
-// CHECK1-NEXT:    [[KERNEL_ARGS15:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK1-NEXT:    [[TMP45:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS15]], i32 0, i32 0
 // CHECK1-NEXT:    store i32 2, ptr [[TMP45]], align 4
 // CHECK1-NEXT:    [[TMP46:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS15]], i32 0, i32 1
@@ -791,14 +791,17 @@ int main (int argc, char **argv) {
 // CHECK3-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [1 x ptr], align 4
 // CHECK3-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [1 x ptr], align 4
 // CHECK3-NEXT:    [[TMP:%.*]] = alloca i32, align 4
+// CHECK3-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK3-NEXT:    [[DOTOFFLOAD_BASEPTRS3:%.*]] = alloca [1 x ptr], align 4
 // CHECK3-NEXT:    [[DOTOFFLOAD_PTRS4:%.*]] = alloca [1 x ptr], align 4
 // CHECK3-NEXT:    [[DOTOFFLOAD_MAPPERS5:%.*]] = alloca [1 x ptr], align 4
 // CHECK3-NEXT:    [[_TMP6:%.*]] = alloca i32, align 4
+// CHECK3-NEXT:    [[KERNEL_ARGS7:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK3-NEXT:    [[DOTOFFLOAD_BASEPTRS11:%.*]] = alloca [1 x ptr], align 4
 // CHECK3-NEXT:    [[DOTOFFLOAD_PTRS12:%.*]] = alloca [1 x ptr], align 4
 // CHECK3-NEXT:    [[DOTOFFLOAD_MAPPERS13:%.*]] = alloca [1 x ptr], align 4
 // CHECK3-NEXT:    [[_TMP14:%.*]] = alloca i32, align 4
+// CHECK3-NEXT:    [[KERNEL_ARGS15:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK3-NEXT:    store ptr [[THIS]], ptr [[THIS_ADDR]], align 4
 // CHECK3-NEXT:    [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4
 // CHECK3-NEXT:    [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[THIS1]], i32 0, i32 0
@@ -810,7 +813,6 @@ int main (int argc, char **argv) {
 // CHECK3-NEXT:    store ptr null, ptr [[TMP2]], align 4
 // CHECK3-NEXT:    [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK3-NEXT:    [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK3-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK3-NEXT:    [[TMP5:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK3-NEXT:    store i32 2, ptr [[TMP5]], align 4
 // CHECK3-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -853,7 +855,6 @@ int main (int argc, char **argv) {
 // CHECK3-NEXT:    store ptr null, ptr [[TMP22]], align 4
 // CHECK3-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS3]], i32 0, i32 0
 // CHECK3-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS4]], i32 0, i32 0
-// CHECK3-NEXT:    [[KERNEL_ARGS7:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK3-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS7]], i32 0, i32 0
 // CHECK3-NEXT:    store i32 2, ptr [[TMP25]], align 4
 // CHECK3-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS7]], i32 0, i32 1
@@ -896,7 +897,6 @@ int main (int argc, char **argv) {
 // CHECK3-NEXT:    store ptr null, ptr [[TMP42]], align 4
 // CHECK3-NEXT:    [[TMP43:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS11]], i32 0, i32 0
 // CHECK3-NEXT:    [[TMP44:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS12]], i32 0, i32 0
-// CHECK3-NEXT:    [[KERNEL_ARGS15:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK3-NEXT:    [[TMP45:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS15]], i32 0, i32 0
 // CHECK3-NEXT:    store i32 2, ptr [[TMP45]], align 4
 // CHECK3-NEXT:    [[TMP46:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS15]], i32 0, i32 1
@@ -1419,6 +1419,7 @@ int main (int argc, char **argv) {
 // CHECK9-NEXT:    [[TMP:%.*]] = alloca i32, align 4
 // CHECK9-NEXT:    [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
 // CHECK9-NEXT:    [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4
+// CHECK9-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK9-NEXT:    [[N_CASTED3:%.*]] = alloca i64, align 8
 // CHECK9-NEXT:    [[DOTOFFLOAD_BASEPTRS4:%.*]] = alloca [3 x ptr], align 8
 // CHECK9-NEXT:    [[DOTOFFLOAD_PTRS5:%.*]] = alloca [3 x ptr], align 8
@@ -1427,6 +1428,7 @@ int main (int argc, char **argv) {
 // CHECK9-NEXT:    [[_TMP8:%.*]] = alloca i32, align 4
 // CHECK9-NEXT:    [[DOTCAPTURE_EXPR_9:%.*]] = alloca i32, align 4
 // CHECK9-NEXT:    [[DOTCAPTURE_EXPR_10:%.*]] = alloca i32, align 4
+// CHECK9-NEXT:    [[KERNEL_ARGS15:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK9-NEXT:    [[DOTCAPTURE_EXPR_18:%.*]] = alloca i32, align 4
 // CHECK9-NEXT:    [[N_CASTED19:%.*]] = alloca i64, align 8
 // CHECK9-NEXT:    [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8
@@ -1437,6 +1439,7 @@ int main (int argc, char **argv) {
 // CHECK9-NEXT:    [[_TMP24:%.*]] = alloca i32, align 4
 // CHECK9-NEXT:    [[DOTCAPTURE_EXPR_25:%.*]] = alloca i32, align 4
 // CHECK9-NEXT:    [[DOTCAPTURE_EXPR_26:%.*]] = alloca i32, align 4
+// CHECK9-NEXT:    [[KERNEL_ARGS31:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK9-NEXT:    store i32 0, ptr [[RETVAL]], align 4
 // CHECK9-NEXT:    store i32 [[ARGC]], ptr [[ARGC_ADDR]], align 4
 // CHECK9-NEXT:    store ptr [[ARGV]], ptr [[ARGV_ADDR]], align 8
@@ -1486,7 +1489,6 @@ int main (int argc, char **argv) {
 // CHECK9-NEXT:    [[TMP21:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
 // CHECK9-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP21]], 1
 // CHECK9-NEXT:    [[TMP22:%.*]] = zext i32 [[ADD]] to i64
-// CHECK9-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK9-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK9-NEXT:    store i32 2, ptr [[TMP23]], align 4
 // CHECK9-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -1558,7 +1560,6 @@ int main (int argc, char **argv) {
 // CHECK9-NEXT:    [[TMP56:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_10]], align 4
 // CHECK9-NEXT:    [[ADD14:%.*]] = add nsw i32 [[TMP56]], 1
 // CHECK9-NEXT:    [[TMP57:%.*]] = zext i32 [[ADD14]] to i64
-// CHECK9-NEXT:    [[KERNEL_ARGS15:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK9-NEXT:    [[TMP58:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS15]], i32 0, i32 0
 // CHECK9-NEXT:    store i32 2, ptr [[TMP58]], align 4
 // CHECK9-NEXT:    [[TMP59:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS15]], i32 0, i32 1
@@ -1641,7 +1642,6 @@ int main (int argc, char **argv) {
 // CHECK9-NEXT:    [[TMP97:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_26]], align 4
 // CHECK9-NEXT:    [[ADD30:%.*]] = add nsw i32 [[TMP97]], 1
 // CHECK9-NEXT:    [[TMP98:%.*]] = zext i32 [[ADD30]] to i64
-// CHECK9-NEXT:    [[KERNEL_ARGS31:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK9-NEXT:    [[TMP99:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS31]], i32 0, i32 0
 // CHECK9-NEXT:    store i32 2, ptr [[TMP99]], align 4
 // CHECK9-NEXT:    [[TMP100:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS31]], i32 0, i32 1
@@ -2384,16 +2384,19 @@ int main (int argc, char **argv) {
 // CHECK9-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [1 x ptr], align 8
 // CHECK9-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [1 x ptr], align 8
 // CHECK9-NEXT:    [[TMP:%.*]] = alloca i32, align 4
+// CHECK9-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK9-NEXT:    [[DOTOFFLOAD_BASEPTRS1:%.*]] = alloca [1 x ptr], align 8
 // CHECK9-NEXT:    [[DOTOFFLOAD_PTRS2:%.*]] = alloca [1 x ptr], align 8
 // CHECK9-NEXT:    [[DOTOFFLOAD_MAPPERS3:%.*]] = alloca [1 x ptr], align 8
 // CHECK9-NEXT:    [[_TMP4:%.*]] = alloca i32, align 4
+// CHECK9-NEXT:    [[KERNEL_ARGS5:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK9-NEXT:    [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
 // CHECK9-NEXT:    [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8
 // CHECK9-NEXT:    [[DOTOFFLOAD_BASEPTRS8:%.*]] = alloca [2 x ptr], align 8
 // CHECK9-NEXT:    [[DOTOFFLOAD_PTRS9:%.*]] = alloca [2 x ptr], align 8
 // CHECK9-NEXT:    [[DOTOFFLOAD_MAPPERS10:%.*]] = alloca [2 x ptr], align 8
 // CHECK9-NEXT:    [[_TMP11:%.*]] = alloca i32, align 4
+// CHECK9-NEXT:    [[KERNEL_ARGS12:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK9-NEXT:    store i32 [[ARGC]], ptr [[ARGC_ADDR]], align 4
 // CHECK9-NEXT:    store i32 10, ptr [[M]], align 4
 // CHECK9-NEXT:    [[TMP0:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
@@ -2404,7 +2407,6 @@ int main (int argc, char **argv) {
 // CHECK9-NEXT:    store ptr null, ptr [[TMP2]], align 8
 // CHECK9-NEXT:    [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK9-NEXT:    [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK9-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK9-NEXT:    [[TMP5:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK9-NEXT:    store i32 2, ptr [[TMP5]], align 4
 // CHECK9-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -2446,7 +2448,6 @@ int main (int argc, char **argv) {
 // CHECK9-NEXT:    store ptr null, ptr [[TMP22]], align 8
 // CHECK9-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS1]], i32 0, i32 0
 // CHECK9-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS2]], i32 0, i32 0
-// CHECK9-NEXT:    [[KERNEL_ARGS5:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK9-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 0
 // CHECK9-NEXT:    store i32 2, ptr [[TMP25]], align 4
 // CHECK9-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 1
@@ -2499,7 +2500,6 @@ int main (int argc, char **argv) {
 // CHECK9-NEXT:    store ptr null, ptr [[TMP48]], align 8
 // CHECK9-NEXT:    [[TMP49:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS8]], i32 0, i32 0
 // CHECK9-NEXT:    [[TMP50:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS9]], i32 0, i32 0
-// CHECK9-NEXT:    [[KERNEL_ARGS12:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK9-NEXT:    [[TMP51:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS12]], i32 0, i32 0
 // CHECK9-NEXT:    store i32 2, ptr [[TMP51]], align 4
 // CHECK9-NEXT:    [[TMP52:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS12]], i32 0, i32 1
@@ -3046,6 +3046,7 @@ int main (int argc, char **argv) {
 // CHECK11-NEXT:    [[TMP:%.*]] = alloca i32, align 4
 // CHECK11-NEXT:    [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
 // CHECK11-NEXT:    [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4
+// CHECK11-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK11-NEXT:    [[N_CASTED3:%.*]] = alloca i32, align 4
 // CHECK11-NEXT:    [[DOTOFFLOAD_BASEPTRS4:%.*]] = alloca [3 x ptr], align 4
 // CHECK11-NEXT:    [[DOTOFFLOAD_PTRS5:%.*]] = alloca [3 x ptr], align 4
@@ -3054,6 +3055,7 @@ int main (int argc, char **argv) {
 // CHECK11-NEXT:    [[_TMP8:%.*]] = alloca i32, align 4
 // CHECK11-NEXT:    [[DOTCAPTURE_EXPR_9:%.*]] = alloca i32, align 4
 // CHECK11-NEXT:    [[DOTCAPTURE_EXPR_10:%.*]] = alloca i32, align 4
+// CHECK11-NEXT:    [[KERNEL_ARGS15:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK11-NEXT:    [[DOTCAPTURE_EXPR_18:%.*]] = alloca i32, align 4
 // CHECK11-NEXT:    [[N_CASTED19:%.*]] = alloca i32, align 4
 // CHECK11-NEXT:    [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4
@@ -3064,6 +3066,7 @@ int main (int argc, char **argv) {
 // CHECK11-NEXT:    [[_TMP24:%.*]] = alloca i32, align 4
 // CHECK11-NEXT:    [[DOTCAPTURE_EXPR_25:%.*]] = alloca i32, align 4
 // CHECK11-NEXT:    [[DOTCAPTURE_EXPR_26:%.*]] = alloca i32, align 4
+// CHECK11-NEXT:    [[KERNEL_ARGS31:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK11-NEXT:    store i32 0, ptr [[RETVAL]], align 4
 // CHECK11-NEXT:    store i32 [[ARGC]], ptr [[ARGC_ADDR]], align 4
 // CHECK11-NEXT:    store ptr [[ARGV]], ptr [[ARGV_ADDR]], align 4
@@ -3113,7 +3116,6 @@ int main (int argc, char **argv) {
 // CHECK11-NEXT:    [[TMP21:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
 // CHECK11-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP21]], 1
 // CHECK11-NEXT:    [[TMP22:%.*]] = zext i32 [[ADD]] to i64
-// CHECK11-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK11-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK11-NEXT:    store i32 2, ptr [[TMP23]], align 4
 // CHECK11-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -3186,7 +3188,6 @@ int main (int argc, char **argv) {
 // CHECK11-NEXT:    [[TMP57:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_10]], align 4
 // CHECK11-NEXT:    [[ADD14:%.*]] = add nsw i32 [[TMP57]], 1
 // CHECK11-NEXT:    [[TMP58:%.*]] = zext i32 [[ADD14]] to i64
-// CHECK11-NEXT:    [[KERNEL_ARGS15:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK11-NEXT:    [[TMP59:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS15]], i32 0, i32 0
 // CHECK11-NEXT:    store i32 2, ptr [[TMP59]], align 4
 // CHECK11-NEXT:    [[TMP60:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS15]], i32 0, i32 1
@@ -3270,7 +3271,6 @@ int main (int argc, char **argv) {
 // CHECK11-NEXT:    [[TMP99:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_26]], align 4
 // CHECK11-NEXT:    [[ADD30:%.*]] = add nsw i32 [[TMP99]], 1
 // CHECK11-NEXT:    [[TMP100:%.*]] = zext i32 [[ADD30]] to i64
-// CHECK11-NEXT:    [[KERNEL_ARGS31:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK11-NEXT:    [[TMP101:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS31]], i32 0, i32 0
 // CHECK11-NEXT:    store i32 2, ptr [[TMP101]], align 4
 // CHECK11-NEXT:    [[TMP102:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS31]], i32 0, i32 1
@@ -3998,16 +3998,19 @@ int main (int argc, char **argv) {
 // CHECK11-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [1 x ptr], align 4
 // CHECK11-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [1 x ptr], align 4
 // CHECK11-NEXT:    [[TMP:%.*]] = alloca i32, align 4
+// CHECK11-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK11-NEXT:    [[DOTOFFLOAD_BASEPTRS1:%.*]] = alloca [1 x ptr], align 4
 // CHECK11-NEXT:    [[DOTOFFLOAD_PTRS2:%.*]] = alloca [1 x ptr], align 4
 // CHECK11-NEXT:    [[DOTOFFLOAD_MAPPERS3:%.*]] = alloca [1 x ptr], align 4
 // CHECK11-NEXT:    [[_TMP4:%.*]] = alloca i32, align 4
+// CHECK11-NEXT:    [[KERNEL_ARGS5:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK11-NEXT:    [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
 // CHECK11-NEXT:    [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4
 // CHECK11-NEXT:    [[DOTOFFLOAD_BASEPTRS8:%.*]] = alloca [2 x ptr], align 4
 // CHECK11-NEXT:    [[DOTOFFLOAD_PTRS9:%.*]] = alloca [2 x ptr], align 4
 // CHECK11-NEXT:    [[DOTOFFLOAD_MAPPERS10:%.*]] = alloca [2 x ptr], align 4
 // CHECK11-NEXT:    [[_TMP11:%.*]] = alloca i32, align 4
+// CHECK11-NEXT:    [[KERNEL_ARGS12:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK11-NEXT:    store i32 [[ARGC]], ptr [[ARGC_ADDR]], align 4
 // CHECK11-NEXT:    store i32 10, ptr [[M]], align 4
 // CHECK11-NEXT:    [[TMP0:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
@@ -4018,7 +4021,6 @@ int main (int argc, char **argv) {
 // CHECK11-NEXT:    store ptr null, ptr [[TMP2]], align 4
 // CHECK11-NEXT:    [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK11-NEXT:    [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK11-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK11-NEXT:    [[TMP5:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK11-NEXT:    store i32 2, ptr [[TMP5]], align 4
 // CHECK11-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -4060,7 +4062,6 @@ int main (int argc, char **argv) {
 // CHECK11-NEXT:    store ptr null, ptr [[TMP22]], align 4
 // CHECK11-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS1]], i32 0, i32 0
 // CHECK11-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS2]], i32 0, i32 0
-// CHECK11-NEXT:    [[KERNEL_ARGS5:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK11-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 0
 // CHECK11-NEXT:    store i32 2, ptr [[TMP25]], align 4
 // CHECK11-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 1
@@ -4113,7 +4114,6 @@ int main (int argc, char **argv) {
 // CHECK11-NEXT:    store ptr null, ptr [[TMP48]], align 4
 // CHECK11-NEXT:    [[TMP49:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS8]], i32 0, i32 0
 // CHECK11-NEXT:    [[TMP50:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS9]], i32 0, i32 0
-// CHECK11-NEXT:    [[KERNEL_ARGS12:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK11-NEXT:    [[TMP51:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS12]], i32 0, i32 0
 // CHECK11-NEXT:    store i32 2, ptr [[TMP51]], align 4
 // CHECK11-NEXT:    [[TMP52:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS12]], i32 0, i32 1

diff  --git a/clang/test/OpenMP/target_teams_distribute_parallel_for_firstprivate_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_parallel_for_firstprivate_codegen.cpp
index c9eee578f579f..79d13e0f38219 100644
--- a/clang/test/OpenMP/target_teams_distribute_parallel_for_firstprivate_codegen.cpp
+++ b/clang/test/OpenMP/target_teams_distribute_parallel_for_firstprivate_codegen.cpp
@@ -323,6 +323,7 @@ int main() {
 // CHECK1-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [5 x ptr], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [5 x ptr], align 8
 // CHECK1-NEXT:    [[TMP:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK1-NEXT:    store i32 0, ptr [[RETVAL]], align 4
 // CHECK1-NEXT:    [[TMP0:%.*]] = load i32, ptr @t_var, align 4
 // CHECK1-NEXT:    store i32 [[TMP0]], ptr [[T_VAR_CASTED]], align 4
@@ -362,7 +363,6 @@ int main() {
 // CHECK1-NEXT:    store ptr null, ptr [[TMP18]], align 8
 // CHECK1-NEXT:    [[TMP19:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK1-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK1-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK1-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK1-NEXT:    store i32 2, ptr [[TMP21]], align 4
 // CHECK1-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -733,6 +733,7 @@ int main() {
 // CHECK1-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [4 x ptr], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [4 x ptr], align 8
 // CHECK1-NEXT:    [[_TMP1:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK1-NEXT:    call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[TEST]])
 // CHECK1-NEXT:    store i32 0, ptr [[T_VAR]], align 4
 // CHECK1-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VEC]], ptr align 4 @__const._Z5tmainIiET_v.vec, i64 8, i1 false)
@@ -773,7 +774,6 @@ int main() {
 // CHECK1-NEXT:    store ptr null, ptr [[TMP15]], align 8
 // CHECK1-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK1-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK1-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK1-NEXT:    [[TMP18:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK1-NEXT:    store i32 2, ptr [[TMP18]], align 4
 // CHECK1-NEXT:    [[TMP19:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -1392,6 +1392,7 @@ int main() {
 // CHECK3-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [5 x ptr], align 4
 // CHECK3-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [5 x ptr], align 4
 // CHECK3-NEXT:    [[TMP:%.*]] = alloca i32, align 4
+// CHECK3-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK3-NEXT:    store i32 0, ptr [[RETVAL]], align 4
 // CHECK3-NEXT:    [[TMP0:%.*]] = load i32, ptr @t_var, align 4
 // CHECK3-NEXT:    store i32 [[TMP0]], ptr [[T_VAR_CASTED]], align 4
@@ -1431,7 +1432,6 @@ int main() {
 // CHECK3-NEXT:    store ptr null, ptr [[TMP18]], align 4
 // CHECK3-NEXT:    [[TMP19:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK3-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK3-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK3-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK3-NEXT:    store i32 2, ptr [[TMP21]], align 4
 // CHECK3-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -1796,6 +1796,7 @@ int main() {
 // CHECK3-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [4 x ptr], align 4
 // CHECK3-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [4 x ptr], align 4
 // CHECK3-NEXT:    [[_TMP1:%.*]] = alloca i32, align 4
+// CHECK3-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK3-NEXT:    call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[TEST]])
 // CHECK3-NEXT:    store i32 0, ptr [[T_VAR]], align 4
 // CHECK3-NEXT:    call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[VEC]], ptr align 4 @__const._Z5tmainIiET_v.vec, i32 8, i1 false)
@@ -1836,7 +1837,6 @@ int main() {
 // CHECK3-NEXT:    store ptr null, ptr [[TMP15]], align 4
 // CHECK3-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK3-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK3-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK3-NEXT:    [[TMP18:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK3-NEXT:    store i32 2, ptr [[TMP18]], align 4
 // CHECK3-NEXT:    [[TMP19:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1

diff  --git a/clang/test/OpenMP/target_teams_distribute_parallel_for_if_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_parallel_for_if_codegen.cpp
index 0cdb8bc0f5b19..460edea1e3ea0 100644
--- a/clang/test/OpenMP/target_teams_distribute_parallel_for_if_codegen.cpp
+++ b/clang/test/OpenMP/target_teams_distribute_parallel_for_if_codegen.cpp
@@ -109,8 +109,9 @@ int main() {
 // CHECK1-SAME: () #[[ATTR0:[0-9]+]] {
 // CHECK1-NEXT:  entry:
 // CHECK1-NEXT:    [[TMP:%.*]] = alloca i32, align 4
-// CHECK1-NEXT:    [[_TMP1:%.*]] = alloca i32, align 4
 // CHECK1-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
+// CHECK1-NEXT:    [[_TMP1:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[KERNEL_ARGS2:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK1-NEXT:    [[TMP0:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK1-NEXT:    store i32 2, ptr [[TMP0]], align 4
 // CHECK1-NEXT:    [[TMP1:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -144,7 +145,6 @@ int main() {
 // CHECK1-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9gtid_testv_l48() #[[ATTR2:[0-9]+]]
 // CHECK1-NEXT:    br label [[OMP_OFFLOAD_CONT]]
 // CHECK1:       omp_offload.cont:
-// CHECK1-NEXT:    [[KERNEL_ARGS2:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK1-NEXT:    [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS2]], i32 0, i32 0
 // CHECK1-NEXT:    store i32 2, ptr [[TMP15]], align 4
 // CHECK1-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS2]], i32 0, i32 1
@@ -466,14 +466,15 @@ int main() {
 // CHECK1-NEXT:  entry:
 // CHECK1-NEXT:    [[RETVAL:%.*]] = alloca i32, align 4
 // CHECK1-NEXT:    [[TMP:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK1-NEXT:    [[DOTCAPTURE_EXPR_:%.*]] = alloca i8, align 1
 // CHECK1-NEXT:    [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [1 x ptr], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [1 x ptr], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [1 x ptr], align 8
 // CHECK1-NEXT:    [[_TMP5:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[KERNEL_ARGS6:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK1-NEXT:    store i32 0, ptr [[RETVAL]], align 4
-// CHECK1-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK1-NEXT:    [[TMP0:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK1-NEXT:    store i32 2, ptr [[TMP0]], align 4
 // CHECK1-NEXT:    [[TMP1:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -533,7 +534,6 @@ int main() {
 // CHECK1-NEXT:    [[TOBOOL4:%.*]] = trunc i8 [[TMP24]] to i1
 // CHECK1-NEXT:    [[TMP25:%.*]] = select i1 [[TOBOOL4]], i32 0, i32 1
 // CHECK1-NEXT:    [[TMP26:%.*]] = insertvalue [3 x i32] zeroinitializer, i32 [[TMP25]], 0
-// CHECK1-NEXT:    [[KERNEL_ARGS6:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK1-NEXT:    [[TMP27:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 0
 // CHECK1-NEXT:    store i32 2, ptr [[TMP27]], align 4
 // CHECK1-NEXT:    [[TMP28:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 1
@@ -1025,14 +1025,15 @@ int main() {
 // CHECK1-NEXT:  entry:
 // CHECK1-NEXT:    [[ARG_ADDR:%.*]] = alloca i32, align 4
 // CHECK1-NEXT:    [[TMP:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK1-NEXT:    [[DOTCAPTURE_EXPR_:%.*]] = alloca i8, align 1
 // CHECK1-NEXT:    [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [1 x ptr], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [1 x ptr], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [1 x ptr], align 8
 // CHECK1-NEXT:    [[_TMP4:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[KERNEL_ARGS5:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK1-NEXT:    store i32 [[ARG]], ptr [[ARG_ADDR]], align 4
-// CHECK1-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK1-NEXT:    [[TMP0:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK1-NEXT:    store i32 2, ptr [[TMP0]], align 4
 // CHECK1-NEXT:    [[TMP1:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -1088,7 +1089,6 @@ int main() {
 // CHECK1-NEXT:    [[TOBOOL3:%.*]] = trunc i8 [[TMP23]] to i1
 // CHECK1-NEXT:    [[TMP24:%.*]] = select i1 [[TOBOOL3]], i32 0, i32 1
 // CHECK1-NEXT:    [[TMP25:%.*]] = insertvalue [3 x i32] zeroinitializer, i32 [[TMP24]], 0
-// CHECK1-NEXT:    [[KERNEL_ARGS5:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK1-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 0
 // CHECK1-NEXT:    store i32 2, ptr [[TMP26]], align 4
 // CHECK1-NEXT:    [[TMP27:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 1

diff  --git a/clang/test/OpenMP/target_teams_distribute_parallel_for_lastprivate_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_parallel_for_lastprivate_codegen.cpp
index cc3d28be30ce7..986659e8b49ad 100644
--- a/clang/test/OpenMP/target_teams_distribute_parallel_for_lastprivate_codegen.cpp
+++ b/clang/test/OpenMP/target_teams_distribute_parallel_for_lastprivate_codegen.cpp
@@ -743,6 +743,7 @@ int main() {
 // CHECK5-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [5 x ptr], align 8
 // CHECK5-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [5 x ptr], align 8
 // CHECK5-NEXT:    [[_TMP1:%.*]] = alloca i32, align 4
+// CHECK5-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK5-NEXT:    [[I:%.*]] = alloca i32, align 4
 // CHECK5-NEXT:    store i32 0, ptr [[RETVAL]], align 4
 // CHECK5-NEXT:    store ptr [[G]], ptr [[G1]], align 8
@@ -795,7 +796,6 @@ int main() {
 // CHECK5-NEXT:    store ptr null, ptr [[TMP20]], align 8
 // CHECK5-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK5-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK5-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK5-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK5-NEXT:    store i32 2, ptr [[TMP23]], align 4
 // CHECK5-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -1224,6 +1224,7 @@ int main() {
 // CHECK5-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [4 x ptr], align 8
 // CHECK5-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [4 x ptr], align 8
 // CHECK5-NEXT:    [[_TMP1:%.*]] = alloca i32, align 4
+// CHECK5-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK5-NEXT:    call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[TEST]])
 // CHECK5-NEXT:    store i32 0, ptr [[T_VAR]], align 4
 // CHECK5-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VEC]], ptr align 4 @__const._Z5tmainIiET_v.vec, i64 8, i1 false)
@@ -1264,7 +1265,6 @@ int main() {
 // CHECK5-NEXT:    store ptr null, ptr [[TMP15]], align 8
 // CHECK5-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK5-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK5-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK5-NEXT:    [[TMP18:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK5-NEXT:    store i32 2, ptr [[TMP18]], align 4
 // CHECK5-NEXT:    [[TMP19:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -1750,6 +1750,7 @@ int main() {
 // CHECK7-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [5 x ptr], align 4
 // CHECK7-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [5 x ptr], align 4
 // CHECK7-NEXT:    [[_TMP1:%.*]] = alloca i32, align 4
+// CHECK7-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK7-NEXT:    [[I:%.*]] = alloca i32, align 4
 // CHECK7-NEXT:    store i32 0, ptr [[RETVAL]], align 4
 // CHECK7-NEXT:    store ptr [[G]], ptr [[G1]], align 4
@@ -1802,7 +1803,6 @@ int main() {
 // CHECK7-NEXT:    store ptr null, ptr [[TMP20]], align 4
 // CHECK7-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK7-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK7-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK7-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK7-NEXT:    store i32 2, ptr [[TMP23]], align 4
 // CHECK7-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -2225,6 +2225,7 @@ int main() {
 // CHECK7-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [4 x ptr], align 4
 // CHECK7-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [4 x ptr], align 4
 // CHECK7-NEXT:    [[_TMP1:%.*]] = alloca i32, align 4
+// CHECK7-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK7-NEXT:    call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[TEST]])
 // CHECK7-NEXT:    store i32 0, ptr [[T_VAR]], align 4
 // CHECK7-NEXT:    call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[VEC]], ptr align 4 @__const._Z5tmainIiET_v.vec, i32 8, i1 false)
@@ -2265,7 +2266,6 @@ int main() {
 // CHECK7-NEXT:    store ptr null, ptr [[TMP15]], align 4
 // CHECK7-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK7-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK7-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK7-NEXT:    [[TMP18:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK7-NEXT:    store i32 2, ptr [[TMP18]], align 4
 // CHECK7-NEXT:    [[TMP19:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1

diff  --git a/clang/test/OpenMP/target_teams_distribute_parallel_for_private_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_parallel_for_private_codegen.cpp
index 375e79d0736a0..6d29d29e626a2 100644
--- a/clang/test/OpenMP/target_teams_distribute_parallel_for_private_codegen.cpp
+++ b/clang/test/OpenMP/target_teams_distribute_parallel_for_private_codegen.cpp
@@ -298,8 +298,8 @@ int main() {
 // CHECK1-NEXT:  entry:
 // CHECK1-NEXT:    [[RETVAL:%.*]] = alloca i32, align 4
 // CHECK1-NEXT:    [[TMP:%.*]] = alloca i32, align 4
-// CHECK1-NEXT:    store i32 0, ptr [[RETVAL]], align 4
 // CHECK1-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
+// CHECK1-NEXT:    store i32 0, ptr [[RETVAL]], align 4
 // CHECK1-NEXT:    [[TMP0:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK1-NEXT:    store i32 2, ptr [[TMP0]], align 4
 // CHECK1-NEXT:    [[TMP1:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -560,6 +560,7 @@ int main() {
 // CHECK1-NEXT:    [[VAR:%.*]] = alloca ptr, align 8
 // CHECK1-NEXT:    [[TMP:%.*]] = alloca i32, align 4
 // CHECK1-NEXT:    [[_TMP1:%.*]] = alloca ptr, align 8
+// CHECK1-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK1-NEXT:    call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[TEST]])
 // CHECK1-NEXT:    store i32 0, ptr [[T_VAR]], align 4
 // CHECK1-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VEC]], ptr align 4 @__const._Z5tmainIiET_v.vec, i64 8, i1 false)
@@ -569,7 +570,6 @@ int main() {
 // CHECK1-NEXT:    call void @_ZN1SIiEC1Ei(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_ELEMENT]], i32 noundef signext 2)
 // CHECK1-NEXT:    store ptr [[TEST]], ptr [[VAR]], align 8
 // CHECK1-NEXT:    store ptr undef, ptr [[_TMP1]], align 8
-// CHECK1-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK1-NEXT:    [[TMP0:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK1-NEXT:    store i32 2, ptr [[TMP0]], align 4
 // CHECK1-NEXT:    [[TMP1:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -1038,8 +1038,8 @@ int main() {
 // CHECK3-NEXT:  entry:
 // CHECK3-NEXT:    [[RETVAL:%.*]] = alloca i32, align 4
 // CHECK3-NEXT:    [[TMP:%.*]] = alloca i32, align 4
-// CHECK3-NEXT:    store i32 0, ptr [[RETVAL]], align 4
 // CHECK3-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
+// CHECK3-NEXT:    store i32 0, ptr [[RETVAL]], align 4
 // CHECK3-NEXT:    [[TMP0:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK3-NEXT:    store i32 2, ptr [[TMP0]], align 4
 // CHECK3-NEXT:    [[TMP1:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -1294,6 +1294,7 @@ int main() {
 // CHECK3-NEXT:    [[VAR:%.*]] = alloca ptr, align 4
 // CHECK3-NEXT:    [[TMP:%.*]] = alloca i32, align 4
 // CHECK3-NEXT:    [[_TMP1:%.*]] = alloca ptr, align 4
+// CHECK3-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK3-NEXT:    call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[TEST]])
 // CHECK3-NEXT:    store i32 0, ptr [[T_VAR]], align 4
 // CHECK3-NEXT:    call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[VEC]], ptr align 4 @__const._Z5tmainIiET_v.vec, i32 8, i1 false)
@@ -1303,7 +1304,6 @@ int main() {
 // CHECK3-NEXT:    call void @_ZN1SIiEC1Ei(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_ELEMENT]], i32 noundef 2)
 // CHECK3-NEXT:    store ptr [[TEST]], ptr [[VAR]], align 4
 // CHECK3-NEXT:    store ptr undef, ptr [[_TMP1]], align 4
-// CHECK3-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK3-NEXT:    [[TMP0:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK3-NEXT:    store i32 2, ptr [[TMP0]], align 4
 // CHECK3-NEXT:    [[TMP1:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1

diff  --git a/clang/test/OpenMP/target_teams_distribute_parallel_for_proc_bind_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_parallel_for_proc_bind_codegen.cpp
index f1bc5360d465b..54c7d23a6442d 100644
--- a/clang/test/OpenMP/target_teams_distribute_parallel_for_proc_bind_codegen.cpp
+++ b/clang/test/OpenMP/target_teams_distribute_parallel_for_proc_bind_codegen.cpp
@@ -53,9 +53,10 @@ int main() {
 // CHECK1-NEXT:  entry:
 // CHECK1-NEXT:    [[RETVAL:%.*]] = alloca i32, align 4
 // CHECK1-NEXT:    [[TMP:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK1-NEXT:    [[_TMP1:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[KERNEL_ARGS2:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK1-NEXT:    store i32 0, ptr [[RETVAL]], align 4
-// CHECK1-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK1-NEXT:    [[TMP0:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK1-NEXT:    store i32 2, ptr [[TMP0]], align 4
 // CHECK1-NEXT:    [[TMP1:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -89,7 +90,6 @@ int main() {
 // CHECK1-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l36() #[[ATTR2:[0-9]+]]
 // CHECK1-NEXT:    br label [[OMP_OFFLOAD_CONT]]
 // CHECK1:       omp_offload.cont:
-// CHECK1-NEXT:    [[KERNEL_ARGS2:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK1-NEXT:    [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS2]], i32 0, i32 0
 // CHECK1-NEXT:    store i32 2, ptr [[TMP15]], align 4
 // CHECK1-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS2]], i32 0, i32 1

diff  --git a/clang/test/OpenMP/target_teams_distribute_parallel_for_reduction_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_parallel_for_reduction_codegen.cpp
index 2dc01b18aac20..68b3d7fd524b6 100644
--- a/clang/test/OpenMP/target_teams_distribute_parallel_for_reduction_codegen.cpp
+++ b/clang/test/OpenMP/target_teams_distribute_parallel_for_reduction_codegen.cpp
@@ -98,6 +98,7 @@ int main() {
 // CHECK1-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [1 x ptr], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [1 x ptr], align 8
 // CHECK1-NEXT:    [[TMP:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK1-NEXT:    store i32 0, ptr [[RETVAL]], align 4
 // CHECK1-NEXT:    [[TMP0:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK1-NEXT:    store ptr @_ZZ4mainE5sivar, ptr [[TMP0]], align 8
@@ -107,7 +108,6 @@ int main() {
 // CHECK1-NEXT:    store ptr null, ptr [[TMP2]], align 8
 // CHECK1-NEXT:    [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK1-NEXT:    [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK1-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK1-NEXT:    [[TMP5:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK1-NEXT:    store i32 2, ptr [[TMP5]], align 4
 // CHECK1-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -388,6 +388,7 @@ int main() {
 // CHECK1-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [1 x ptr], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [1 x ptr], align 8
 // CHECK1-NEXT:    [[TMP:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK1-NEXT:    store i32 0, ptr [[T_VAR]], align 4
 // CHECK1-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VEC]], ptr align 4 @__const._Z5tmainIiET_v.vec, i64 8, i1 false)
 // CHECK1-NEXT:    [[TMP0:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
@@ -398,7 +399,6 @@ int main() {
 // CHECK1-NEXT:    store ptr null, ptr [[TMP2]], align 8
 // CHECK1-NEXT:    [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK1-NEXT:    [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK1-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK1-NEXT:    [[TMP5:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK1-NEXT:    store i32 2, ptr [[TMP5]], align 4
 // CHECK1-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -684,6 +684,7 @@ int main() {
 // CHECK3-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [1 x ptr], align 4
 // CHECK3-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [1 x ptr], align 4
 // CHECK3-NEXT:    [[TMP:%.*]] = alloca i32, align 4
+// CHECK3-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK3-NEXT:    store i32 0, ptr [[RETVAL]], align 4
 // CHECK3-NEXT:    [[TMP0:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK3-NEXT:    store ptr @_ZZ4mainE5sivar, ptr [[TMP0]], align 4
@@ -693,7 +694,6 @@ int main() {
 // CHECK3-NEXT:    store ptr null, ptr [[TMP2]], align 4
 // CHECK3-NEXT:    [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK3-NEXT:    [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK3-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK3-NEXT:    [[TMP5:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK3-NEXT:    store i32 2, ptr [[TMP5]], align 4
 // CHECK3-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -970,6 +970,7 @@ int main() {
 // CHECK3-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [1 x ptr], align 4
 // CHECK3-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [1 x ptr], align 4
 // CHECK3-NEXT:    [[TMP:%.*]] = alloca i32, align 4
+// CHECK3-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK3-NEXT:    store i32 0, ptr [[T_VAR]], align 4
 // CHECK3-NEXT:    call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[VEC]], ptr align 4 @__const._Z5tmainIiET_v.vec, i32 8, i1 false)
 // CHECK3-NEXT:    [[TMP0:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
@@ -980,7 +981,6 @@ int main() {
 // CHECK3-NEXT:    store ptr null, ptr [[TMP2]], align 4
 // CHECK3-NEXT:    [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK3-NEXT:    [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK3-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK3-NEXT:    [[TMP5:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK3-NEXT:    store i32 2, ptr [[TMP5]], align 4
 // CHECK3-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1

diff  --git a/clang/test/OpenMP/target_teams_distribute_parallel_for_schedule_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_parallel_for_schedule_codegen.cpp
index 7f663d81ffe59..15fc87d0961d9 100644
--- a/clang/test/OpenMP/target_teams_distribute_parallel_for_schedule_codegen.cpp
+++ b/clang/test/OpenMP/target_teams_distribute_parallel_for_schedule_codegen.cpp
@@ -210,22 +210,27 @@ int main (int argc, char **argv) {
 // CHECK1-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [1 x ptr], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [1 x ptr], align 8
 // CHECK1-NEXT:    [[TMP:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_BASEPTRS3:%.*]] = alloca [1 x ptr], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_PTRS4:%.*]] = alloca [1 x ptr], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_MAPPERS5:%.*]] = alloca [1 x ptr], align 8
 // CHECK1-NEXT:    [[_TMP6:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[KERNEL_ARGS7:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_BASEPTRS11:%.*]] = alloca [1 x ptr], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_PTRS12:%.*]] = alloca [1 x ptr], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_MAPPERS13:%.*]] = alloca [1 x ptr], align 8
 // CHECK1-NEXT:    [[_TMP14:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[KERNEL_ARGS15:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_BASEPTRS19:%.*]] = alloca [1 x ptr], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_PTRS20:%.*]] = alloca [1 x ptr], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_MAPPERS21:%.*]] = alloca [1 x ptr], align 8
 // CHECK1-NEXT:    [[_TMP22:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[KERNEL_ARGS23:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_BASEPTRS27:%.*]] = alloca [1 x ptr], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_PTRS28:%.*]] = alloca [1 x ptr], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_MAPPERS29:%.*]] = alloca [1 x ptr], align 8
 // CHECK1-NEXT:    [[_TMP30:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[KERNEL_ARGS31:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK1-NEXT:    store ptr [[THIS]], ptr [[THIS_ADDR]], align 8
 // CHECK1-NEXT:    [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8
 // CHECK1-NEXT:    [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[THIS1]], i32 0, i32 0
@@ -237,7 +242,6 @@ int main (int argc, char **argv) {
 // CHECK1-NEXT:    store ptr null, ptr [[TMP2]], align 8
 // CHECK1-NEXT:    [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK1-NEXT:    [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK1-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK1-NEXT:    [[TMP5:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK1-NEXT:    store i32 2, ptr [[TMP5]], align 4
 // CHECK1-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -280,7 +284,6 @@ int main (int argc, char **argv) {
 // CHECK1-NEXT:    store ptr null, ptr [[TMP22]], align 8
 // CHECK1-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS3]], i32 0, i32 0
 // CHECK1-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS4]], i32 0, i32 0
-// CHECK1-NEXT:    [[KERNEL_ARGS7:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK1-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS7]], i32 0, i32 0
 // CHECK1-NEXT:    store i32 2, ptr [[TMP25]], align 4
 // CHECK1-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS7]], i32 0, i32 1
@@ -323,7 +326,6 @@ int main (int argc, char **argv) {
 // CHECK1-NEXT:    store ptr null, ptr [[TMP42]], align 8
 // CHECK1-NEXT:    [[TMP43:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS11]], i32 0, i32 0
 // CHECK1-NEXT:    [[TMP44:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS12]], i32 0, i32 0
-// CHECK1-NEXT:    [[KERNEL_ARGS15:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK1-NEXT:    [[TMP45:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS15]], i32 0, i32 0
 // CHECK1-NEXT:    store i32 2, ptr [[TMP45]], align 4
 // CHECK1-NEXT:    [[TMP46:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS15]], i32 0, i32 1
@@ -366,7 +368,6 @@ int main (int argc, char **argv) {
 // CHECK1-NEXT:    store ptr null, ptr [[TMP62]], align 8
 // CHECK1-NEXT:    [[TMP63:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS19]], i32 0, i32 0
 // CHECK1-NEXT:    [[TMP64:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS20]], i32 0, i32 0
-// CHECK1-NEXT:    [[KERNEL_ARGS23:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK1-NEXT:    [[TMP65:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS23]], i32 0, i32 0
 // CHECK1-NEXT:    store i32 2, ptr [[TMP65]], align 4
 // CHECK1-NEXT:    [[TMP66:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS23]], i32 0, i32 1
@@ -409,7 +410,6 @@ int main (int argc, char **argv) {
 // CHECK1-NEXT:    store ptr null, ptr [[TMP82]], align 8
 // CHECK1-NEXT:    [[TMP83:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS27]], i32 0, i32 0
 // CHECK1-NEXT:    [[TMP84:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS28]], i32 0, i32 0
-// CHECK1-NEXT:    [[KERNEL_ARGS31:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK1-NEXT:    [[TMP85:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS31]], i32 0, i32 0
 // CHECK1-NEXT:    store i32 2, ptr [[TMP85]], align 4
 // CHECK1-NEXT:    [[TMP86:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS31]], i32 0, i32 1
@@ -1244,22 +1244,27 @@ int main (int argc, char **argv) {
 // CHECK3-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [1 x ptr], align 4
 // CHECK3-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [1 x ptr], align 4
 // CHECK3-NEXT:    [[TMP:%.*]] = alloca i32, align 4
+// CHECK3-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK3-NEXT:    [[DOTOFFLOAD_BASEPTRS3:%.*]] = alloca [1 x ptr], align 4
 // CHECK3-NEXT:    [[DOTOFFLOAD_PTRS4:%.*]] = alloca [1 x ptr], align 4
 // CHECK3-NEXT:    [[DOTOFFLOAD_MAPPERS5:%.*]] = alloca [1 x ptr], align 4
 // CHECK3-NEXT:    [[_TMP6:%.*]] = alloca i32, align 4
+// CHECK3-NEXT:    [[KERNEL_ARGS7:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK3-NEXT:    [[DOTOFFLOAD_BASEPTRS11:%.*]] = alloca [1 x ptr], align 4
 // CHECK3-NEXT:    [[DOTOFFLOAD_PTRS12:%.*]] = alloca [1 x ptr], align 4
 // CHECK3-NEXT:    [[DOTOFFLOAD_MAPPERS13:%.*]] = alloca [1 x ptr], align 4
 // CHECK3-NEXT:    [[_TMP14:%.*]] = alloca i32, align 4
+// CHECK3-NEXT:    [[KERNEL_ARGS15:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK3-NEXT:    [[DOTOFFLOAD_BASEPTRS19:%.*]] = alloca [1 x ptr], align 4
 // CHECK3-NEXT:    [[DOTOFFLOAD_PTRS20:%.*]] = alloca [1 x ptr], align 4
 // CHECK3-NEXT:    [[DOTOFFLOAD_MAPPERS21:%.*]] = alloca [1 x ptr], align 4
 // CHECK3-NEXT:    [[_TMP22:%.*]] = alloca i32, align 4
+// CHECK3-NEXT:    [[KERNEL_ARGS23:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK3-NEXT:    [[DOTOFFLOAD_BASEPTRS27:%.*]] = alloca [1 x ptr], align 4
 // CHECK3-NEXT:    [[DOTOFFLOAD_PTRS28:%.*]] = alloca [1 x ptr], align 4
 // CHECK3-NEXT:    [[DOTOFFLOAD_MAPPERS29:%.*]] = alloca [1 x ptr], align 4
 // CHECK3-NEXT:    [[_TMP30:%.*]] = alloca i32, align 4
+// CHECK3-NEXT:    [[KERNEL_ARGS31:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK3-NEXT:    store ptr [[THIS]], ptr [[THIS_ADDR]], align 4
 // CHECK3-NEXT:    [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4
 // CHECK3-NEXT:    [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[THIS1]], i32 0, i32 0
@@ -1271,7 +1276,6 @@ int main (int argc, char **argv) {
 // CHECK3-NEXT:    store ptr null, ptr [[TMP2]], align 4
 // CHECK3-NEXT:    [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK3-NEXT:    [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK3-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK3-NEXT:    [[TMP5:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK3-NEXT:    store i32 2, ptr [[TMP5]], align 4
 // CHECK3-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -1314,7 +1318,6 @@ int main (int argc, char **argv) {
 // CHECK3-NEXT:    store ptr null, ptr [[TMP22]], align 4
 // CHECK3-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS3]], i32 0, i32 0
 // CHECK3-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS4]], i32 0, i32 0
-// CHECK3-NEXT:    [[KERNEL_ARGS7:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK3-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS7]], i32 0, i32 0
 // CHECK3-NEXT:    store i32 2, ptr [[TMP25]], align 4
 // CHECK3-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS7]], i32 0, i32 1
@@ -1357,7 +1360,6 @@ int main (int argc, char **argv) {
 // CHECK3-NEXT:    store ptr null, ptr [[TMP42]], align 4
 // CHECK3-NEXT:    [[TMP43:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS11]], i32 0, i32 0
 // CHECK3-NEXT:    [[TMP44:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS12]], i32 0, i32 0
-// CHECK3-NEXT:    [[KERNEL_ARGS15:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK3-NEXT:    [[TMP45:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS15]], i32 0, i32 0
 // CHECK3-NEXT:    store i32 2, ptr [[TMP45]], align 4
 // CHECK3-NEXT:    [[TMP46:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS15]], i32 0, i32 1
@@ -1400,7 +1402,6 @@ int main (int argc, char **argv) {
 // CHECK3-NEXT:    store ptr null, ptr [[TMP62]], align 4
 // CHECK3-NEXT:    [[TMP63:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS19]], i32 0, i32 0
 // CHECK3-NEXT:    [[TMP64:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS20]], i32 0, i32 0
-// CHECK3-NEXT:    [[KERNEL_ARGS23:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK3-NEXT:    [[TMP65:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS23]], i32 0, i32 0
 // CHECK3-NEXT:    store i32 2, ptr [[TMP65]], align 4
 // CHECK3-NEXT:    [[TMP66:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS23]], i32 0, i32 1
@@ -1443,7 +1444,6 @@ int main (int argc, char **argv) {
 // CHECK3-NEXT:    store ptr null, ptr [[TMP82]], align 4
 // CHECK3-NEXT:    [[TMP83:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS27]], i32 0, i32 0
 // CHECK3-NEXT:    [[TMP84:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS28]], i32 0, i32 0
-// CHECK3-NEXT:    [[KERNEL_ARGS31:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK3-NEXT:    [[TMP85:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS31]], i32 0, i32 0
 // CHECK3-NEXT:    store i32 2, ptr [[TMP85]], align 4
 // CHECK3-NEXT:    [[TMP86:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS31]], i32 0, i32 1
@@ -2251,22 +2251,27 @@ int main (int argc, char **argv) {
 // CHECK5-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [1 x ptr], align 8
 // CHECK5-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [1 x ptr], align 8
 // CHECK5-NEXT:    [[TMP:%.*]] = alloca i32, align 4
+// CHECK5-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK5-NEXT:    [[DOTOFFLOAD_BASEPTRS3:%.*]] = alloca [1 x ptr], align 8
 // CHECK5-NEXT:    [[DOTOFFLOAD_PTRS4:%.*]] = alloca [1 x ptr], align 8
 // CHECK5-NEXT:    [[DOTOFFLOAD_MAPPERS5:%.*]] = alloca [1 x ptr], align 8
 // CHECK5-NEXT:    [[_TMP6:%.*]] = alloca i32, align 4
+// CHECK5-NEXT:    [[KERNEL_ARGS7:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK5-NEXT:    [[DOTOFFLOAD_BASEPTRS11:%.*]] = alloca [1 x ptr], align 8
 // CHECK5-NEXT:    [[DOTOFFLOAD_PTRS12:%.*]] = alloca [1 x ptr], align 8
 // CHECK5-NEXT:    [[DOTOFFLOAD_MAPPERS13:%.*]] = alloca [1 x ptr], align 8
 // CHECK5-NEXT:    [[_TMP14:%.*]] = alloca i32, align 4
+// CHECK5-NEXT:    [[KERNEL_ARGS15:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK5-NEXT:    [[DOTOFFLOAD_BASEPTRS19:%.*]] = alloca [1 x ptr], align 8
 // CHECK5-NEXT:    [[DOTOFFLOAD_PTRS20:%.*]] = alloca [1 x ptr], align 8
 // CHECK5-NEXT:    [[DOTOFFLOAD_MAPPERS21:%.*]] = alloca [1 x ptr], align 8
 // CHECK5-NEXT:    [[_TMP22:%.*]] = alloca i32, align 4
+// CHECK5-NEXT:    [[KERNEL_ARGS23:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK5-NEXT:    [[DOTOFFLOAD_BASEPTRS27:%.*]] = alloca [1 x ptr], align 8
 // CHECK5-NEXT:    [[DOTOFFLOAD_PTRS28:%.*]] = alloca [1 x ptr], align 8
 // CHECK5-NEXT:    [[DOTOFFLOAD_MAPPERS29:%.*]] = alloca [1 x ptr], align 8
 // CHECK5-NEXT:    [[_TMP30:%.*]] = alloca i32, align 4
+// CHECK5-NEXT:    [[KERNEL_ARGS31:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK5-NEXT:    store ptr [[THIS]], ptr [[THIS_ADDR]], align 8
 // CHECK5-NEXT:    [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8
 // CHECK5-NEXT:    [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[THIS1]], i32 0, i32 0
@@ -2278,7 +2283,6 @@ int main (int argc, char **argv) {
 // CHECK5-NEXT:    store ptr null, ptr [[TMP2]], align 8
 // CHECK5-NEXT:    [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK5-NEXT:    [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK5-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK5-NEXT:    [[TMP5:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK5-NEXT:    store i32 2, ptr [[TMP5]], align 4
 // CHECK5-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -2321,7 +2325,6 @@ int main (int argc, char **argv) {
 // CHECK5-NEXT:    store ptr null, ptr [[TMP22]], align 8
 // CHECK5-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS3]], i32 0, i32 0
 // CHECK5-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS4]], i32 0, i32 0
-// CHECK5-NEXT:    [[KERNEL_ARGS7:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK5-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS7]], i32 0, i32 0
 // CHECK5-NEXT:    store i32 2, ptr [[TMP25]], align 4
 // CHECK5-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS7]], i32 0, i32 1
@@ -2364,7 +2367,6 @@ int main (int argc, char **argv) {
 // CHECK5-NEXT:    store ptr null, ptr [[TMP42]], align 8
 // CHECK5-NEXT:    [[TMP43:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS11]], i32 0, i32 0
 // CHECK5-NEXT:    [[TMP44:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS12]], i32 0, i32 0
-// CHECK5-NEXT:    [[KERNEL_ARGS15:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK5-NEXT:    [[TMP45:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS15]], i32 0, i32 0
 // CHECK5-NEXT:    store i32 2, ptr [[TMP45]], align 4
 // CHECK5-NEXT:    [[TMP46:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS15]], i32 0, i32 1
@@ -2407,7 +2409,6 @@ int main (int argc, char **argv) {
 // CHECK5-NEXT:    store ptr null, ptr [[TMP62]], align 8
 // CHECK5-NEXT:    [[TMP63:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS19]], i32 0, i32 0
 // CHECK5-NEXT:    [[TMP64:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS20]], i32 0, i32 0
-// CHECK5-NEXT:    [[KERNEL_ARGS23:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK5-NEXT:    [[TMP65:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS23]], i32 0, i32 0
 // CHECK5-NEXT:    store i32 2, ptr [[TMP65]], align 4
 // CHECK5-NEXT:    [[TMP66:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS23]], i32 0, i32 1
@@ -2450,7 +2451,6 @@ int main (int argc, char **argv) {
 // CHECK5-NEXT:    store ptr null, ptr [[TMP82]], align 8
 // CHECK5-NEXT:    [[TMP83:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS27]], i32 0, i32 0
 // CHECK5-NEXT:    [[TMP84:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS28]], i32 0, i32 0
-// CHECK5-NEXT:    [[KERNEL_ARGS31:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK5-NEXT:    [[TMP85:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS31]], i32 0, i32 0
 // CHECK5-NEXT:    store i32 2, ptr [[TMP85]], align 4
 // CHECK5-NEXT:    [[TMP86:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS31]], i32 0, i32 1
@@ -3285,22 +3285,27 @@ int main (int argc, char **argv) {
 // CHECK7-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [1 x ptr], align 4
 // CHECK7-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [1 x ptr], align 4
 // CHECK7-NEXT:    [[TMP:%.*]] = alloca i32, align 4
+// CHECK7-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK7-NEXT:    [[DOTOFFLOAD_BASEPTRS3:%.*]] = alloca [1 x ptr], align 4
 // CHECK7-NEXT:    [[DOTOFFLOAD_PTRS4:%.*]] = alloca [1 x ptr], align 4
 // CHECK7-NEXT:    [[DOTOFFLOAD_MAPPERS5:%.*]] = alloca [1 x ptr], align 4
 // CHECK7-NEXT:    [[_TMP6:%.*]] = alloca i32, align 4
+// CHECK7-NEXT:    [[KERNEL_ARGS7:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK7-NEXT:    [[DOTOFFLOAD_BASEPTRS11:%.*]] = alloca [1 x ptr], align 4
 // CHECK7-NEXT:    [[DOTOFFLOAD_PTRS12:%.*]] = alloca [1 x ptr], align 4
 // CHECK7-NEXT:    [[DOTOFFLOAD_MAPPERS13:%.*]] = alloca [1 x ptr], align 4
 // CHECK7-NEXT:    [[_TMP14:%.*]] = alloca i32, align 4
+// CHECK7-NEXT:    [[KERNEL_ARGS15:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK7-NEXT:    [[DOTOFFLOAD_BASEPTRS19:%.*]] = alloca [1 x ptr], align 4
 // CHECK7-NEXT:    [[DOTOFFLOAD_PTRS20:%.*]] = alloca [1 x ptr], align 4
 // CHECK7-NEXT:    [[DOTOFFLOAD_MAPPERS21:%.*]] = alloca [1 x ptr], align 4
 // CHECK7-NEXT:    [[_TMP22:%.*]] = alloca i32, align 4
+// CHECK7-NEXT:    [[KERNEL_ARGS23:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK7-NEXT:    [[DOTOFFLOAD_BASEPTRS27:%.*]] = alloca [1 x ptr], align 4
 // CHECK7-NEXT:    [[DOTOFFLOAD_PTRS28:%.*]] = alloca [1 x ptr], align 4
 // CHECK7-NEXT:    [[DOTOFFLOAD_MAPPERS29:%.*]] = alloca [1 x ptr], align 4
 // CHECK7-NEXT:    [[_TMP30:%.*]] = alloca i32, align 4
+// CHECK7-NEXT:    [[KERNEL_ARGS31:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK7-NEXT:    store ptr [[THIS]], ptr [[THIS_ADDR]], align 4
 // CHECK7-NEXT:    [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4
 // CHECK7-NEXT:    [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[THIS1]], i32 0, i32 0
@@ -3312,7 +3317,6 @@ int main (int argc, char **argv) {
 // CHECK7-NEXT:    store ptr null, ptr [[TMP2]], align 4
 // CHECK7-NEXT:    [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK7-NEXT:    [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK7-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK7-NEXT:    [[TMP5:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK7-NEXT:    store i32 2, ptr [[TMP5]], align 4
 // CHECK7-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -3355,7 +3359,6 @@ int main (int argc, char **argv) {
 // CHECK7-NEXT:    store ptr null, ptr [[TMP22]], align 4
 // CHECK7-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS3]], i32 0, i32 0
 // CHECK7-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS4]], i32 0, i32 0
-// CHECK7-NEXT:    [[KERNEL_ARGS7:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK7-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS7]], i32 0, i32 0
 // CHECK7-NEXT:    store i32 2, ptr [[TMP25]], align 4
 // CHECK7-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS7]], i32 0, i32 1
@@ -3398,7 +3401,6 @@ int main (int argc, char **argv) {
 // CHECK7-NEXT:    store ptr null, ptr [[TMP42]], align 4
 // CHECK7-NEXT:    [[TMP43:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS11]], i32 0, i32 0
 // CHECK7-NEXT:    [[TMP44:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS12]], i32 0, i32 0
-// CHECK7-NEXT:    [[KERNEL_ARGS15:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK7-NEXT:    [[TMP45:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS15]], i32 0, i32 0
 // CHECK7-NEXT:    store i32 2, ptr [[TMP45]], align 4
 // CHECK7-NEXT:    [[TMP46:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS15]], i32 0, i32 1
@@ -3441,7 +3443,6 @@ int main (int argc, char **argv) {
 // CHECK7-NEXT:    store ptr null, ptr [[TMP62]], align 4
 // CHECK7-NEXT:    [[TMP63:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS19]], i32 0, i32 0
 // CHECK7-NEXT:    [[TMP64:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS20]], i32 0, i32 0
-// CHECK7-NEXT:    [[KERNEL_ARGS23:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK7-NEXT:    [[TMP65:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS23]], i32 0, i32 0
 // CHECK7-NEXT:    store i32 2, ptr [[TMP65]], align 4
 // CHECK7-NEXT:    [[TMP66:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS23]], i32 0, i32 1
@@ -3484,7 +3485,6 @@ int main (int argc, char **argv) {
 // CHECK7-NEXT:    store ptr null, ptr [[TMP82]], align 4
 // CHECK7-NEXT:    [[TMP83:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS27]], i32 0, i32 0
 // CHECK7-NEXT:    [[TMP84:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS28]], i32 0, i32 0
-// CHECK7-NEXT:    [[KERNEL_ARGS31:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK7-NEXT:    [[TMP85:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS31]], i32 0, i32 0
 // CHECK7-NEXT:    store i32 2, ptr [[TMP85]], align 4
 // CHECK7-NEXT:    [[TMP86:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS31]], i32 0, i32 1
@@ -4294,6 +4294,7 @@ int main (int argc, char **argv) {
 // CHECK13-NEXT:    [[TMP:%.*]] = alloca i32, align 4
 // CHECK13-NEXT:    [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
 // CHECK13-NEXT:    [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4
+// CHECK13-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK13-NEXT:    [[N_CASTED3:%.*]] = alloca i64, align 8
 // CHECK13-NEXT:    [[DOTOFFLOAD_BASEPTRS4:%.*]] = alloca [3 x ptr], align 8
 // CHECK13-NEXT:    [[DOTOFFLOAD_PTRS5:%.*]] = alloca [3 x ptr], align 8
@@ -4302,6 +4303,7 @@ int main (int argc, char **argv) {
 // CHECK13-NEXT:    [[_TMP8:%.*]] = alloca i32, align 4
 // CHECK13-NEXT:    [[DOTCAPTURE_EXPR_9:%.*]] = alloca i32, align 4
 // CHECK13-NEXT:    [[DOTCAPTURE_EXPR_10:%.*]] = alloca i32, align 4
+// CHECK13-NEXT:    [[KERNEL_ARGS15:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK13-NEXT:    [[DOTCAPTURE_EXPR_18:%.*]] = alloca i32, align 4
 // CHECK13-NEXT:    [[N_CASTED19:%.*]] = alloca i64, align 8
 // CHECK13-NEXT:    [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8
@@ -4312,6 +4314,7 @@ int main (int argc, char **argv) {
 // CHECK13-NEXT:    [[_TMP24:%.*]] = alloca i32, align 4
 // CHECK13-NEXT:    [[DOTCAPTURE_EXPR_25:%.*]] = alloca i32, align 4
 // CHECK13-NEXT:    [[DOTCAPTURE_EXPR_26:%.*]] = alloca i32, align 4
+// CHECK13-NEXT:    [[KERNEL_ARGS31:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK13-NEXT:    [[N_CASTED34:%.*]] = alloca i64, align 8
 // CHECK13-NEXT:    [[DOTOFFLOAD_BASEPTRS35:%.*]] = alloca [3 x ptr], align 8
 // CHECK13-NEXT:    [[DOTOFFLOAD_PTRS36:%.*]] = alloca [3 x ptr], align 8
@@ -4320,6 +4323,7 @@ int main (int argc, char **argv) {
 // CHECK13-NEXT:    [[_TMP39:%.*]] = alloca i32, align 4
 // CHECK13-NEXT:    [[DOTCAPTURE_EXPR_40:%.*]] = alloca i32, align 4
 // CHECK13-NEXT:    [[DOTCAPTURE_EXPR_41:%.*]] = alloca i32, align 4
+// CHECK13-NEXT:    [[KERNEL_ARGS46:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK13-NEXT:    [[DOTCAPTURE_EXPR_49:%.*]] = alloca i32, align 4
 // CHECK13-NEXT:    [[N_CASTED50:%.*]] = alloca i64, align 8
 // CHECK13-NEXT:    [[DOTCAPTURE_EXPR__CASTED51:%.*]] = alloca i64, align 8
@@ -4330,6 +4334,7 @@ int main (int argc, char **argv) {
 // CHECK13-NEXT:    [[_TMP56:%.*]] = alloca i32, align 4
 // CHECK13-NEXT:    [[DOTCAPTURE_EXPR_57:%.*]] = alloca i32, align 4
 // CHECK13-NEXT:    [[DOTCAPTURE_EXPR_58:%.*]] = alloca i32, align 4
+// CHECK13-NEXT:    [[KERNEL_ARGS63:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK13-NEXT:    store i32 0, ptr [[RETVAL]], align 4
 // CHECK13-NEXT:    store i32 [[ARGC]], ptr [[ARGC_ADDR]], align 4
 // CHECK13-NEXT:    store ptr [[ARGV]], ptr [[ARGV_ADDR]], align 8
@@ -4379,7 +4384,6 @@ int main (int argc, char **argv) {
 // CHECK13-NEXT:    [[TMP21:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
 // CHECK13-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP21]], 1
 // CHECK13-NEXT:    [[TMP22:%.*]] = zext i32 [[ADD]] to i64
-// CHECK13-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK13-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK13-NEXT:    store i32 2, ptr [[TMP23]], align 4
 // CHECK13-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -4451,7 +4455,6 @@ int main (int argc, char **argv) {
 // CHECK13-NEXT:    [[TMP56:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_10]], align 4
 // CHECK13-NEXT:    [[ADD14:%.*]] = add nsw i32 [[TMP56]], 1
 // CHECK13-NEXT:    [[TMP57:%.*]] = zext i32 [[ADD14]] to i64
-// CHECK13-NEXT:    [[KERNEL_ARGS15:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK13-NEXT:    [[TMP58:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS15]], i32 0, i32 0
 // CHECK13-NEXT:    store i32 2, ptr [[TMP58]], align 4
 // CHECK13-NEXT:    [[TMP59:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS15]], i32 0, i32 1
@@ -4534,7 +4537,6 @@ int main (int argc, char **argv) {
 // CHECK13-NEXT:    [[TMP97:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_26]], align 4
 // CHECK13-NEXT:    [[ADD30:%.*]] = add nsw i32 [[TMP97]], 1
 // CHECK13-NEXT:    [[TMP98:%.*]] = zext i32 [[ADD30]] to i64
-// CHECK13-NEXT:    [[KERNEL_ARGS31:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK13-NEXT:    [[TMP99:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS31]], i32 0, i32 0
 // CHECK13-NEXT:    store i32 2, ptr [[TMP99]], align 4
 // CHECK13-NEXT:    [[TMP100:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS31]], i32 0, i32 1
@@ -4606,7 +4608,6 @@ int main (int argc, char **argv) {
 // CHECK13-NEXT:    [[TMP132:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_41]], align 4
 // CHECK13-NEXT:    [[ADD45:%.*]] = add nsw i32 [[TMP132]], 1
 // CHECK13-NEXT:    [[TMP133:%.*]] = zext i32 [[ADD45]] to i64
-// CHECK13-NEXT:    [[KERNEL_ARGS46:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK13-NEXT:    [[TMP134:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS46]], i32 0, i32 0
 // CHECK13-NEXT:    store i32 2, ptr [[TMP134]], align 4
 // CHECK13-NEXT:    [[TMP135:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS46]], i32 0, i32 1
@@ -4689,7 +4690,6 @@ int main (int argc, char **argv) {
 // CHECK13-NEXT:    [[TMP173:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_58]], align 4
 // CHECK13-NEXT:    [[ADD62:%.*]] = add nsw i32 [[TMP173]], 1
 // CHECK13-NEXT:    [[TMP174:%.*]] = zext i32 [[ADD62]] to i64
-// CHECK13-NEXT:    [[KERNEL_ARGS63:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK13-NEXT:    [[TMP175:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS63]], i32 0, i32 0
 // CHECK13-NEXT:    store i32 2, ptr [[TMP175]], align 4
 // CHECK13-NEXT:    [[TMP176:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS63]], i32 0, i32 1
@@ -5873,26 +5873,31 @@ int main (int argc, char **argv) {
 // CHECK13-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [1 x ptr], align 8
 // CHECK13-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [1 x ptr], align 8
 // CHECK13-NEXT:    [[TMP:%.*]] = alloca i32, align 4
+// CHECK13-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK13-NEXT:    [[DOTOFFLOAD_BASEPTRS1:%.*]] = alloca [1 x ptr], align 8
 // CHECK13-NEXT:    [[DOTOFFLOAD_PTRS2:%.*]] = alloca [1 x ptr], align 8
 // CHECK13-NEXT:    [[DOTOFFLOAD_MAPPERS3:%.*]] = alloca [1 x ptr], align 8
 // CHECK13-NEXT:    [[_TMP4:%.*]] = alloca i32, align 4
+// CHECK13-NEXT:    [[KERNEL_ARGS5:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK13-NEXT:    [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
 // CHECK13-NEXT:    [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8
 // CHECK13-NEXT:    [[DOTOFFLOAD_BASEPTRS8:%.*]] = alloca [2 x ptr], align 8
 // CHECK13-NEXT:    [[DOTOFFLOAD_PTRS9:%.*]] = alloca [2 x ptr], align 8
 // CHECK13-NEXT:    [[DOTOFFLOAD_MAPPERS10:%.*]] = alloca [2 x ptr], align 8
 // CHECK13-NEXT:    [[_TMP11:%.*]] = alloca i32, align 4
+// CHECK13-NEXT:    [[KERNEL_ARGS12:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK13-NEXT:    [[DOTOFFLOAD_BASEPTRS15:%.*]] = alloca [1 x ptr], align 8
 // CHECK13-NEXT:    [[DOTOFFLOAD_PTRS16:%.*]] = alloca [1 x ptr], align 8
 // CHECK13-NEXT:    [[DOTOFFLOAD_MAPPERS17:%.*]] = alloca [1 x ptr], align 8
 // CHECK13-NEXT:    [[_TMP18:%.*]] = alloca i32, align 4
+// CHECK13-NEXT:    [[KERNEL_ARGS19:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK13-NEXT:    [[DOTCAPTURE_EXPR_22:%.*]] = alloca i32, align 4
 // CHECK13-NEXT:    [[DOTCAPTURE_EXPR__CASTED23:%.*]] = alloca i64, align 8
 // CHECK13-NEXT:    [[DOTOFFLOAD_BASEPTRS24:%.*]] = alloca [2 x ptr], align 8
 // CHECK13-NEXT:    [[DOTOFFLOAD_PTRS25:%.*]] = alloca [2 x ptr], align 8
 // CHECK13-NEXT:    [[DOTOFFLOAD_MAPPERS26:%.*]] = alloca [2 x ptr], align 8
 // CHECK13-NEXT:    [[_TMP27:%.*]] = alloca i32, align 4
+// CHECK13-NEXT:    [[KERNEL_ARGS28:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK13-NEXT:    store i32 [[ARGC]], ptr [[ARGC_ADDR]], align 4
 // CHECK13-NEXT:    store i32 10, ptr [[M]], align 4
 // CHECK13-NEXT:    [[TMP0:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
@@ -5903,7 +5908,6 @@ int main (int argc, char **argv) {
 // CHECK13-NEXT:    store ptr null, ptr [[TMP2]], align 8
 // CHECK13-NEXT:    [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK13-NEXT:    [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK13-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK13-NEXT:    [[TMP5:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK13-NEXT:    store i32 2, ptr [[TMP5]], align 4
 // CHECK13-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -5945,7 +5949,6 @@ int main (int argc, char **argv) {
 // CHECK13-NEXT:    store ptr null, ptr [[TMP22]], align 8
 // CHECK13-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS1]], i32 0, i32 0
 // CHECK13-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS2]], i32 0, i32 0
-// CHECK13-NEXT:    [[KERNEL_ARGS5:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK13-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 0
 // CHECK13-NEXT:    store i32 2, ptr [[TMP25]], align 4
 // CHECK13-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 1
@@ -5998,7 +6001,6 @@ int main (int argc, char **argv) {
 // CHECK13-NEXT:    store ptr null, ptr [[TMP48]], align 8
 // CHECK13-NEXT:    [[TMP49:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS8]], i32 0, i32 0
 // CHECK13-NEXT:    [[TMP50:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS9]], i32 0, i32 0
-// CHECK13-NEXT:    [[KERNEL_ARGS12:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK13-NEXT:    [[TMP51:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS12]], i32 0, i32 0
 // CHECK13-NEXT:    store i32 2, ptr [[TMP51]], align 4
 // CHECK13-NEXT:    [[TMP52:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS12]], i32 0, i32 1
@@ -6040,7 +6042,6 @@ int main (int argc, char **argv) {
 // CHECK13-NEXT:    store ptr null, ptr [[TMP68]], align 8
 // CHECK13-NEXT:    [[TMP69:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS15]], i32 0, i32 0
 // CHECK13-NEXT:    [[TMP70:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS16]], i32 0, i32 0
-// CHECK13-NEXT:    [[KERNEL_ARGS19:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK13-NEXT:    [[TMP71:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS19]], i32 0, i32 0
 // CHECK13-NEXT:    store i32 2, ptr [[TMP71]], align 4
 // CHECK13-NEXT:    [[TMP72:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS19]], i32 0, i32 1
@@ -6093,7 +6094,6 @@ int main (int argc, char **argv) {
 // CHECK13-NEXT:    store ptr null, ptr [[TMP94]], align 8
 // CHECK13-NEXT:    [[TMP95:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS24]], i32 0, i32 0
 // CHECK13-NEXT:    [[TMP96:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS25]], i32 0, i32 0
-// CHECK13-NEXT:    [[KERNEL_ARGS28:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK13-NEXT:    [[TMP97:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS28]], i32 0, i32 0
 // CHECK13-NEXT:    store i32 2, ptr [[TMP97]], align 4
 // CHECK13-NEXT:    [[TMP98:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS28]], i32 0, i32 1
@@ -6952,6 +6952,7 @@ int main (int argc, char **argv) {
 // CHECK15-NEXT:    [[TMP:%.*]] = alloca i32, align 4
 // CHECK15-NEXT:    [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
 // CHECK15-NEXT:    [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4
+// CHECK15-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK15-NEXT:    [[N_CASTED3:%.*]] = alloca i32, align 4
 // CHECK15-NEXT:    [[DOTOFFLOAD_BASEPTRS4:%.*]] = alloca [3 x ptr], align 4
 // CHECK15-NEXT:    [[DOTOFFLOAD_PTRS5:%.*]] = alloca [3 x ptr], align 4
@@ -6960,6 +6961,7 @@ int main (int argc, char **argv) {
 // CHECK15-NEXT:    [[_TMP8:%.*]] = alloca i32, align 4
 // CHECK15-NEXT:    [[DOTCAPTURE_EXPR_9:%.*]] = alloca i32, align 4
 // CHECK15-NEXT:    [[DOTCAPTURE_EXPR_10:%.*]] = alloca i32, align 4
+// CHECK15-NEXT:    [[KERNEL_ARGS15:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK15-NEXT:    [[DOTCAPTURE_EXPR_18:%.*]] = alloca i32, align 4
 // CHECK15-NEXT:    [[N_CASTED19:%.*]] = alloca i32, align 4
 // CHECK15-NEXT:    [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4
@@ -6970,6 +6972,7 @@ int main (int argc, char **argv) {
 // CHECK15-NEXT:    [[_TMP24:%.*]] = alloca i32, align 4
 // CHECK15-NEXT:    [[DOTCAPTURE_EXPR_25:%.*]] = alloca i32, align 4
 // CHECK15-NEXT:    [[DOTCAPTURE_EXPR_26:%.*]] = alloca i32, align 4
+// CHECK15-NEXT:    [[KERNEL_ARGS31:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK15-NEXT:    [[N_CASTED34:%.*]] = alloca i32, align 4
 // CHECK15-NEXT:    [[DOTOFFLOAD_BASEPTRS35:%.*]] = alloca [3 x ptr], align 4
 // CHECK15-NEXT:    [[DOTOFFLOAD_PTRS36:%.*]] = alloca [3 x ptr], align 4
@@ -6978,6 +6981,7 @@ int main (int argc, char **argv) {
 // CHECK15-NEXT:    [[_TMP39:%.*]] = alloca i32, align 4
 // CHECK15-NEXT:    [[DOTCAPTURE_EXPR_40:%.*]] = alloca i32, align 4
 // CHECK15-NEXT:    [[DOTCAPTURE_EXPR_41:%.*]] = alloca i32, align 4
+// CHECK15-NEXT:    [[KERNEL_ARGS46:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK15-NEXT:    [[DOTCAPTURE_EXPR_49:%.*]] = alloca i32, align 4
 // CHECK15-NEXT:    [[N_CASTED50:%.*]] = alloca i32, align 4
 // CHECK15-NEXT:    [[DOTCAPTURE_EXPR__CASTED51:%.*]] = alloca i32, align 4
@@ -6988,6 +6992,7 @@ int main (int argc, char **argv) {
 // CHECK15-NEXT:    [[_TMP56:%.*]] = alloca i32, align 4
 // CHECK15-NEXT:    [[DOTCAPTURE_EXPR_57:%.*]] = alloca i32, align 4
 // CHECK15-NEXT:    [[DOTCAPTURE_EXPR_58:%.*]] = alloca i32, align 4
+// CHECK15-NEXT:    [[KERNEL_ARGS63:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK15-NEXT:    store i32 0, ptr [[RETVAL]], align 4
 // CHECK15-NEXT:    store i32 [[ARGC]], ptr [[ARGC_ADDR]], align 4
 // CHECK15-NEXT:    store ptr [[ARGV]], ptr [[ARGV_ADDR]], align 4
@@ -7037,7 +7042,6 @@ int main (int argc, char **argv) {
 // CHECK15-NEXT:    [[TMP21:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
 // CHECK15-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP21]], 1
 // CHECK15-NEXT:    [[TMP22:%.*]] = zext i32 [[ADD]] to i64
-// CHECK15-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK15-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK15-NEXT:    store i32 2, ptr [[TMP23]], align 4
 // CHECK15-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -7110,7 +7114,6 @@ int main (int argc, char **argv) {
 // CHECK15-NEXT:    [[TMP57:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_10]], align 4
 // CHECK15-NEXT:    [[ADD14:%.*]] = add nsw i32 [[TMP57]], 1
 // CHECK15-NEXT:    [[TMP58:%.*]] = zext i32 [[ADD14]] to i64
-// CHECK15-NEXT:    [[KERNEL_ARGS15:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK15-NEXT:    [[TMP59:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS15]], i32 0, i32 0
 // CHECK15-NEXT:    store i32 2, ptr [[TMP59]], align 4
 // CHECK15-NEXT:    [[TMP60:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS15]], i32 0, i32 1
@@ -7194,7 +7197,6 @@ int main (int argc, char **argv) {
 // CHECK15-NEXT:    [[TMP99:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_26]], align 4
 // CHECK15-NEXT:    [[ADD30:%.*]] = add nsw i32 [[TMP99]], 1
 // CHECK15-NEXT:    [[TMP100:%.*]] = zext i32 [[ADD30]] to i64
-// CHECK15-NEXT:    [[KERNEL_ARGS31:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK15-NEXT:    [[TMP101:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS31]], i32 0, i32 0
 // CHECK15-NEXT:    store i32 2, ptr [[TMP101]], align 4
 // CHECK15-NEXT:    [[TMP102:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS31]], i32 0, i32 1
@@ -7267,7 +7269,6 @@ int main (int argc, char **argv) {
 // CHECK15-NEXT:    [[TMP135:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_41]], align 4
 // CHECK15-NEXT:    [[ADD45:%.*]] = add nsw i32 [[TMP135]], 1
 // CHECK15-NEXT:    [[TMP136:%.*]] = zext i32 [[ADD45]] to i64
-// CHECK15-NEXT:    [[KERNEL_ARGS46:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK15-NEXT:    [[TMP137:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS46]], i32 0, i32 0
 // CHECK15-NEXT:    store i32 2, ptr [[TMP137]], align 4
 // CHECK15-NEXT:    [[TMP138:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS46]], i32 0, i32 1
@@ -7351,7 +7352,6 @@ int main (int argc, char **argv) {
 // CHECK15-NEXT:    [[TMP177:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_58]], align 4
 // CHECK15-NEXT:    [[ADD62:%.*]] = add nsw i32 [[TMP177]], 1
 // CHECK15-NEXT:    [[TMP178:%.*]] = zext i32 [[ADD62]] to i64
-// CHECK15-NEXT:    [[KERNEL_ARGS63:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK15-NEXT:    [[TMP179:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS63]], i32 0, i32 0
 // CHECK15-NEXT:    store i32 2, ptr [[TMP179]], align 4
 // CHECK15-NEXT:    [[TMP180:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS63]], i32 0, i32 1
@@ -8510,26 +8510,31 @@ int main (int argc, char **argv) {
 // CHECK15-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [1 x ptr], align 4
 // CHECK15-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [1 x ptr], align 4
 // CHECK15-NEXT:    [[TMP:%.*]] = alloca i32, align 4
+// CHECK15-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK15-NEXT:    [[DOTOFFLOAD_BASEPTRS1:%.*]] = alloca [1 x ptr], align 4
 // CHECK15-NEXT:    [[DOTOFFLOAD_PTRS2:%.*]] = alloca [1 x ptr], align 4
 // CHECK15-NEXT:    [[DOTOFFLOAD_MAPPERS3:%.*]] = alloca [1 x ptr], align 4
 // CHECK15-NEXT:    [[_TMP4:%.*]] = alloca i32, align 4
+// CHECK15-NEXT:    [[KERNEL_ARGS5:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK15-NEXT:    [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
 // CHECK15-NEXT:    [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4
 // CHECK15-NEXT:    [[DOTOFFLOAD_BASEPTRS8:%.*]] = alloca [2 x ptr], align 4
 // CHECK15-NEXT:    [[DOTOFFLOAD_PTRS9:%.*]] = alloca [2 x ptr], align 4
 // CHECK15-NEXT:    [[DOTOFFLOAD_MAPPERS10:%.*]] = alloca [2 x ptr], align 4
 // CHECK15-NEXT:    [[_TMP11:%.*]] = alloca i32, align 4
+// CHECK15-NEXT:    [[KERNEL_ARGS12:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK15-NEXT:    [[DOTOFFLOAD_BASEPTRS15:%.*]] = alloca [1 x ptr], align 4
 // CHECK15-NEXT:    [[DOTOFFLOAD_PTRS16:%.*]] = alloca [1 x ptr], align 4
 // CHECK15-NEXT:    [[DOTOFFLOAD_MAPPERS17:%.*]] = alloca [1 x ptr], align 4
 // CHECK15-NEXT:    [[_TMP18:%.*]] = alloca i32, align 4
+// CHECK15-NEXT:    [[KERNEL_ARGS19:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK15-NEXT:    [[DOTCAPTURE_EXPR_22:%.*]] = alloca i32, align 4
 // CHECK15-NEXT:    [[DOTCAPTURE_EXPR__CASTED23:%.*]] = alloca i32, align 4
 // CHECK15-NEXT:    [[DOTOFFLOAD_BASEPTRS24:%.*]] = alloca [2 x ptr], align 4
 // CHECK15-NEXT:    [[DOTOFFLOAD_PTRS25:%.*]] = alloca [2 x ptr], align 4
 // CHECK15-NEXT:    [[DOTOFFLOAD_MAPPERS26:%.*]] = alloca [2 x ptr], align 4
 // CHECK15-NEXT:    [[_TMP27:%.*]] = alloca i32, align 4
+// CHECK15-NEXT:    [[KERNEL_ARGS28:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK15-NEXT:    store i32 [[ARGC]], ptr [[ARGC_ADDR]], align 4
 // CHECK15-NEXT:    store i32 10, ptr [[M]], align 4
 // CHECK15-NEXT:    [[TMP0:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
@@ -8540,7 +8545,6 @@ int main (int argc, char **argv) {
 // CHECK15-NEXT:    store ptr null, ptr [[TMP2]], align 4
 // CHECK15-NEXT:    [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK15-NEXT:    [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK15-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK15-NEXT:    [[TMP5:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK15-NEXT:    store i32 2, ptr [[TMP5]], align 4
 // CHECK15-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -8582,7 +8586,6 @@ int main (int argc, char **argv) {
 // CHECK15-NEXT:    store ptr null, ptr [[TMP22]], align 4
 // CHECK15-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS1]], i32 0, i32 0
 // CHECK15-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS2]], i32 0, i32 0
-// CHECK15-NEXT:    [[KERNEL_ARGS5:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK15-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 0
 // CHECK15-NEXT:    store i32 2, ptr [[TMP25]], align 4
 // CHECK15-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 1
@@ -8635,7 +8638,6 @@ int main (int argc, char **argv) {
 // CHECK15-NEXT:    store ptr null, ptr [[TMP48]], align 4
 // CHECK15-NEXT:    [[TMP49:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS8]], i32 0, i32 0
 // CHECK15-NEXT:    [[TMP50:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS9]], i32 0, i32 0
-// CHECK15-NEXT:    [[KERNEL_ARGS12:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK15-NEXT:    [[TMP51:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS12]], i32 0, i32 0
 // CHECK15-NEXT:    store i32 2, ptr [[TMP51]], align 4
 // CHECK15-NEXT:    [[TMP52:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS12]], i32 0, i32 1
@@ -8677,7 +8679,6 @@ int main (int argc, char **argv) {
 // CHECK15-NEXT:    store ptr null, ptr [[TMP68]], align 4
 // CHECK15-NEXT:    [[TMP69:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS15]], i32 0, i32 0
 // CHECK15-NEXT:    [[TMP70:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS16]], i32 0, i32 0
-// CHECK15-NEXT:    [[KERNEL_ARGS19:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK15-NEXT:    [[TMP71:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS19]], i32 0, i32 0
 // CHECK15-NEXT:    store i32 2, ptr [[TMP71]], align 4
 // CHECK15-NEXT:    [[TMP72:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS19]], i32 0, i32 1
@@ -8730,7 +8731,6 @@ int main (int argc, char **argv) {
 // CHECK15-NEXT:    store ptr null, ptr [[TMP94]], align 4
 // CHECK15-NEXT:    [[TMP95:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS24]], i32 0, i32 0
 // CHECK15-NEXT:    [[TMP96:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS25]], i32 0, i32 0
-// CHECK15-NEXT:    [[KERNEL_ARGS28:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK15-NEXT:    [[TMP97:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS28]], i32 0, i32 0
 // CHECK15-NEXT:    store i32 2, ptr [[TMP97]], align 4
 // CHECK15-NEXT:    [[TMP98:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS28]], i32 0, i32 1
@@ -9562,6 +9562,7 @@ int main (int argc, char **argv) {
 // CHECK17-NEXT:    [[TMP:%.*]] = alloca i32, align 4
 // CHECK17-NEXT:    [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
 // CHECK17-NEXT:    [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4
+// CHECK17-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK17-NEXT:    [[N_CASTED3:%.*]] = alloca i64, align 8
 // CHECK17-NEXT:    [[DOTOFFLOAD_BASEPTRS4:%.*]] = alloca [3 x ptr], align 8
 // CHECK17-NEXT:    [[DOTOFFLOAD_PTRS5:%.*]] = alloca [3 x ptr], align 8
@@ -9570,6 +9571,7 @@ int main (int argc, char **argv) {
 // CHECK17-NEXT:    [[_TMP8:%.*]] = alloca i32, align 4
 // CHECK17-NEXT:    [[DOTCAPTURE_EXPR_9:%.*]] = alloca i32, align 4
 // CHECK17-NEXT:    [[DOTCAPTURE_EXPR_10:%.*]] = alloca i32, align 4
+// CHECK17-NEXT:    [[KERNEL_ARGS15:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK17-NEXT:    [[DOTCAPTURE_EXPR_18:%.*]] = alloca i32, align 4
 // CHECK17-NEXT:    [[N_CASTED19:%.*]] = alloca i64, align 8
 // CHECK17-NEXT:    [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8
@@ -9580,6 +9582,7 @@ int main (int argc, char **argv) {
 // CHECK17-NEXT:    [[_TMP24:%.*]] = alloca i32, align 4
 // CHECK17-NEXT:    [[DOTCAPTURE_EXPR_25:%.*]] = alloca i32, align 4
 // CHECK17-NEXT:    [[DOTCAPTURE_EXPR_26:%.*]] = alloca i32, align 4
+// CHECK17-NEXT:    [[KERNEL_ARGS31:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK17-NEXT:    [[N_CASTED34:%.*]] = alloca i64, align 8
 // CHECK17-NEXT:    [[DOTOFFLOAD_BASEPTRS35:%.*]] = alloca [3 x ptr], align 8
 // CHECK17-NEXT:    [[DOTOFFLOAD_PTRS36:%.*]] = alloca [3 x ptr], align 8
@@ -9588,6 +9591,7 @@ int main (int argc, char **argv) {
 // CHECK17-NEXT:    [[_TMP39:%.*]] = alloca i32, align 4
 // CHECK17-NEXT:    [[DOTCAPTURE_EXPR_40:%.*]] = alloca i32, align 4
 // CHECK17-NEXT:    [[DOTCAPTURE_EXPR_41:%.*]] = alloca i32, align 4
+// CHECK17-NEXT:    [[KERNEL_ARGS46:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK17-NEXT:    [[DOTCAPTURE_EXPR_49:%.*]] = alloca i32, align 4
 // CHECK17-NEXT:    [[N_CASTED50:%.*]] = alloca i64, align 8
 // CHECK17-NEXT:    [[DOTCAPTURE_EXPR__CASTED51:%.*]] = alloca i64, align 8
@@ -9598,6 +9602,7 @@ int main (int argc, char **argv) {
 // CHECK17-NEXT:    [[_TMP56:%.*]] = alloca i32, align 4
 // CHECK17-NEXT:    [[DOTCAPTURE_EXPR_57:%.*]] = alloca i32, align 4
 // CHECK17-NEXT:    [[DOTCAPTURE_EXPR_58:%.*]] = alloca i32, align 4
+// CHECK17-NEXT:    [[KERNEL_ARGS63:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK17-NEXT:    store i32 0, ptr [[RETVAL]], align 4
 // CHECK17-NEXT:    store i32 [[ARGC]], ptr [[ARGC_ADDR]], align 4
 // CHECK17-NEXT:    store ptr [[ARGV]], ptr [[ARGV_ADDR]], align 8
@@ -9647,7 +9652,6 @@ int main (int argc, char **argv) {
 // CHECK17-NEXT:    [[TMP21:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
 // CHECK17-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP21]], 1
 // CHECK17-NEXT:    [[TMP22:%.*]] = zext i32 [[ADD]] to i64
-// CHECK17-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK17-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK17-NEXT:    store i32 2, ptr [[TMP23]], align 4
 // CHECK17-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -9719,7 +9723,6 @@ int main (int argc, char **argv) {
 // CHECK17-NEXT:    [[TMP56:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_10]], align 4
 // CHECK17-NEXT:    [[ADD14:%.*]] = add nsw i32 [[TMP56]], 1
 // CHECK17-NEXT:    [[TMP57:%.*]] = zext i32 [[ADD14]] to i64
-// CHECK17-NEXT:    [[KERNEL_ARGS15:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK17-NEXT:    [[TMP58:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS15]], i32 0, i32 0
 // CHECK17-NEXT:    store i32 2, ptr [[TMP58]], align 4
 // CHECK17-NEXT:    [[TMP59:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS15]], i32 0, i32 1
@@ -9802,7 +9805,6 @@ int main (int argc, char **argv) {
 // CHECK17-NEXT:    [[TMP97:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_26]], align 4
 // CHECK17-NEXT:    [[ADD30:%.*]] = add nsw i32 [[TMP97]], 1
 // CHECK17-NEXT:    [[TMP98:%.*]] = zext i32 [[ADD30]] to i64
-// CHECK17-NEXT:    [[KERNEL_ARGS31:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK17-NEXT:    [[TMP99:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS31]], i32 0, i32 0
 // CHECK17-NEXT:    store i32 2, ptr [[TMP99]], align 4
 // CHECK17-NEXT:    [[TMP100:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS31]], i32 0, i32 1
@@ -9874,7 +9876,6 @@ int main (int argc, char **argv) {
 // CHECK17-NEXT:    [[TMP132:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_41]], align 4
 // CHECK17-NEXT:    [[ADD45:%.*]] = add nsw i32 [[TMP132]], 1
 // CHECK17-NEXT:    [[TMP133:%.*]] = zext i32 [[ADD45]] to i64
-// CHECK17-NEXT:    [[KERNEL_ARGS46:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK17-NEXT:    [[TMP134:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS46]], i32 0, i32 0
 // CHECK17-NEXT:    store i32 2, ptr [[TMP134]], align 4
 // CHECK17-NEXT:    [[TMP135:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS46]], i32 0, i32 1
@@ -9957,7 +9958,6 @@ int main (int argc, char **argv) {
 // CHECK17-NEXT:    [[TMP173:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_58]], align 4
 // CHECK17-NEXT:    [[ADD62:%.*]] = add nsw i32 [[TMP173]], 1
 // CHECK17-NEXT:    [[TMP174:%.*]] = zext i32 [[ADD62]] to i64
-// CHECK17-NEXT:    [[KERNEL_ARGS63:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK17-NEXT:    [[TMP175:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS63]], i32 0, i32 0
 // CHECK17-NEXT:    store i32 2, ptr [[TMP175]], align 4
 // CHECK17-NEXT:    [[TMP176:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS63]], i32 0, i32 1
@@ -11141,26 +11141,31 @@ int main (int argc, char **argv) {
 // CHECK17-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [1 x ptr], align 8
 // CHECK17-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [1 x ptr], align 8
 // CHECK17-NEXT:    [[TMP:%.*]] = alloca i32, align 4
+// CHECK17-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK17-NEXT:    [[DOTOFFLOAD_BASEPTRS1:%.*]] = alloca [1 x ptr], align 8
 // CHECK17-NEXT:    [[DOTOFFLOAD_PTRS2:%.*]] = alloca [1 x ptr], align 8
 // CHECK17-NEXT:    [[DOTOFFLOAD_MAPPERS3:%.*]] = alloca [1 x ptr], align 8
 // CHECK17-NEXT:    [[_TMP4:%.*]] = alloca i32, align 4
+// CHECK17-NEXT:    [[KERNEL_ARGS5:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK17-NEXT:    [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
 // CHECK17-NEXT:    [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8
 // CHECK17-NEXT:    [[DOTOFFLOAD_BASEPTRS8:%.*]] = alloca [2 x ptr], align 8
 // CHECK17-NEXT:    [[DOTOFFLOAD_PTRS9:%.*]] = alloca [2 x ptr], align 8
 // CHECK17-NEXT:    [[DOTOFFLOAD_MAPPERS10:%.*]] = alloca [2 x ptr], align 8
 // CHECK17-NEXT:    [[_TMP11:%.*]] = alloca i32, align 4
+// CHECK17-NEXT:    [[KERNEL_ARGS12:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK17-NEXT:    [[DOTOFFLOAD_BASEPTRS15:%.*]] = alloca [1 x ptr], align 8
 // CHECK17-NEXT:    [[DOTOFFLOAD_PTRS16:%.*]] = alloca [1 x ptr], align 8
 // CHECK17-NEXT:    [[DOTOFFLOAD_MAPPERS17:%.*]] = alloca [1 x ptr], align 8
 // CHECK17-NEXT:    [[_TMP18:%.*]] = alloca i32, align 4
+// CHECK17-NEXT:    [[KERNEL_ARGS19:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK17-NEXT:    [[DOTCAPTURE_EXPR_22:%.*]] = alloca i32, align 4
 // CHECK17-NEXT:    [[DOTCAPTURE_EXPR__CASTED23:%.*]] = alloca i64, align 8
 // CHECK17-NEXT:    [[DOTOFFLOAD_BASEPTRS24:%.*]] = alloca [2 x ptr], align 8
 // CHECK17-NEXT:    [[DOTOFFLOAD_PTRS25:%.*]] = alloca [2 x ptr], align 8
 // CHECK17-NEXT:    [[DOTOFFLOAD_MAPPERS26:%.*]] = alloca [2 x ptr], align 8
 // CHECK17-NEXT:    [[_TMP27:%.*]] = alloca i32, align 4
+// CHECK17-NEXT:    [[KERNEL_ARGS28:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK17-NEXT:    store i32 [[ARGC]], ptr [[ARGC_ADDR]], align 4
 // CHECK17-NEXT:    store i32 10, ptr [[M]], align 4
 // CHECK17-NEXT:    [[TMP0:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
@@ -11171,7 +11176,6 @@ int main (int argc, char **argv) {
 // CHECK17-NEXT:    store ptr null, ptr [[TMP2]], align 8
 // CHECK17-NEXT:    [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK17-NEXT:    [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK17-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK17-NEXT:    [[TMP5:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK17-NEXT:    store i32 2, ptr [[TMP5]], align 4
 // CHECK17-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -11213,7 +11217,6 @@ int main (int argc, char **argv) {
 // CHECK17-NEXT:    store ptr null, ptr [[TMP22]], align 8
 // CHECK17-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS1]], i32 0, i32 0
 // CHECK17-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS2]], i32 0, i32 0
-// CHECK17-NEXT:    [[KERNEL_ARGS5:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK17-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 0
 // CHECK17-NEXT:    store i32 2, ptr [[TMP25]], align 4
 // CHECK17-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 1
@@ -11266,7 +11269,6 @@ int main (int argc, char **argv) {
 // CHECK17-NEXT:    store ptr null, ptr [[TMP48]], align 8
 // CHECK17-NEXT:    [[TMP49:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS8]], i32 0, i32 0
 // CHECK17-NEXT:    [[TMP50:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS9]], i32 0, i32 0
-// CHECK17-NEXT:    [[KERNEL_ARGS12:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK17-NEXT:    [[TMP51:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS12]], i32 0, i32 0
 // CHECK17-NEXT:    store i32 2, ptr [[TMP51]], align 4
 // CHECK17-NEXT:    [[TMP52:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS12]], i32 0, i32 1
@@ -11308,7 +11310,6 @@ int main (int argc, char **argv) {
 // CHECK17-NEXT:    store ptr null, ptr [[TMP68]], align 8
 // CHECK17-NEXT:    [[TMP69:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS15]], i32 0, i32 0
 // CHECK17-NEXT:    [[TMP70:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS16]], i32 0, i32 0
-// CHECK17-NEXT:    [[KERNEL_ARGS19:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK17-NEXT:    [[TMP71:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS19]], i32 0, i32 0
 // CHECK17-NEXT:    store i32 2, ptr [[TMP71]], align 4
 // CHECK17-NEXT:    [[TMP72:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS19]], i32 0, i32 1
@@ -11361,7 +11362,6 @@ int main (int argc, char **argv) {
 // CHECK17-NEXT:    store ptr null, ptr [[TMP94]], align 8
 // CHECK17-NEXT:    [[TMP95:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS24]], i32 0, i32 0
 // CHECK17-NEXT:    [[TMP96:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS25]], i32 0, i32 0
-// CHECK17-NEXT:    [[KERNEL_ARGS28:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK17-NEXT:    [[TMP97:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS28]], i32 0, i32 0
 // CHECK17-NEXT:    store i32 2, ptr [[TMP97]], align 4
 // CHECK17-NEXT:    [[TMP98:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS28]], i32 0, i32 1
@@ -12220,6 +12220,7 @@ int main (int argc, char **argv) {
 // CHECK19-NEXT:    [[TMP:%.*]] = alloca i32, align 4
 // CHECK19-NEXT:    [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
 // CHECK19-NEXT:    [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4
+// CHECK19-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK19-NEXT:    [[N_CASTED3:%.*]] = alloca i32, align 4
 // CHECK19-NEXT:    [[DOTOFFLOAD_BASEPTRS4:%.*]] = alloca [3 x ptr], align 4
 // CHECK19-NEXT:    [[DOTOFFLOAD_PTRS5:%.*]] = alloca [3 x ptr], align 4
@@ -12228,6 +12229,7 @@ int main (int argc, char **argv) {
 // CHECK19-NEXT:    [[_TMP8:%.*]] = alloca i32, align 4
 // CHECK19-NEXT:    [[DOTCAPTURE_EXPR_9:%.*]] = alloca i32, align 4
 // CHECK19-NEXT:    [[DOTCAPTURE_EXPR_10:%.*]] = alloca i32, align 4
+// CHECK19-NEXT:    [[KERNEL_ARGS15:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK19-NEXT:    [[DOTCAPTURE_EXPR_18:%.*]] = alloca i32, align 4
 // CHECK19-NEXT:    [[N_CASTED19:%.*]] = alloca i32, align 4
 // CHECK19-NEXT:    [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4
@@ -12238,6 +12240,7 @@ int main (int argc, char **argv) {
 // CHECK19-NEXT:    [[_TMP24:%.*]] = alloca i32, align 4
 // CHECK19-NEXT:    [[DOTCAPTURE_EXPR_25:%.*]] = alloca i32, align 4
 // CHECK19-NEXT:    [[DOTCAPTURE_EXPR_26:%.*]] = alloca i32, align 4
+// CHECK19-NEXT:    [[KERNEL_ARGS31:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK19-NEXT:    [[N_CASTED34:%.*]] = alloca i32, align 4
 // CHECK19-NEXT:    [[DOTOFFLOAD_BASEPTRS35:%.*]] = alloca [3 x ptr], align 4
 // CHECK19-NEXT:    [[DOTOFFLOAD_PTRS36:%.*]] = alloca [3 x ptr], align 4
@@ -12246,6 +12249,7 @@ int main (int argc, char **argv) {
 // CHECK19-NEXT:    [[_TMP39:%.*]] = alloca i32, align 4
 // CHECK19-NEXT:    [[DOTCAPTURE_EXPR_40:%.*]] = alloca i32, align 4
 // CHECK19-NEXT:    [[DOTCAPTURE_EXPR_41:%.*]] = alloca i32, align 4
+// CHECK19-NEXT:    [[KERNEL_ARGS46:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK19-NEXT:    [[DOTCAPTURE_EXPR_49:%.*]] = alloca i32, align 4
 // CHECK19-NEXT:    [[N_CASTED50:%.*]] = alloca i32, align 4
 // CHECK19-NEXT:    [[DOTCAPTURE_EXPR__CASTED51:%.*]] = alloca i32, align 4
@@ -12256,6 +12260,7 @@ int main (int argc, char **argv) {
 // CHECK19-NEXT:    [[_TMP56:%.*]] = alloca i32, align 4
 // CHECK19-NEXT:    [[DOTCAPTURE_EXPR_57:%.*]] = alloca i32, align 4
 // CHECK19-NEXT:    [[DOTCAPTURE_EXPR_58:%.*]] = alloca i32, align 4
+// CHECK19-NEXT:    [[KERNEL_ARGS63:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK19-NEXT:    store i32 0, ptr [[RETVAL]], align 4
 // CHECK19-NEXT:    store i32 [[ARGC]], ptr [[ARGC_ADDR]], align 4
 // CHECK19-NEXT:    store ptr [[ARGV]], ptr [[ARGV_ADDR]], align 4
@@ -12305,7 +12310,6 @@ int main (int argc, char **argv) {
 // CHECK19-NEXT:    [[TMP21:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
 // CHECK19-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP21]], 1
 // CHECK19-NEXT:    [[TMP22:%.*]] = zext i32 [[ADD]] to i64
-// CHECK19-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK19-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK19-NEXT:    store i32 2, ptr [[TMP23]], align 4
 // CHECK19-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -12378,7 +12382,6 @@ int main (int argc, char **argv) {
 // CHECK19-NEXT:    [[TMP57:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_10]], align 4
 // CHECK19-NEXT:    [[ADD14:%.*]] = add nsw i32 [[TMP57]], 1
 // CHECK19-NEXT:    [[TMP58:%.*]] = zext i32 [[ADD14]] to i64
-// CHECK19-NEXT:    [[KERNEL_ARGS15:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK19-NEXT:    [[TMP59:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS15]], i32 0, i32 0
 // CHECK19-NEXT:    store i32 2, ptr [[TMP59]], align 4
 // CHECK19-NEXT:    [[TMP60:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS15]], i32 0, i32 1
@@ -12462,7 +12465,6 @@ int main (int argc, char **argv) {
 // CHECK19-NEXT:    [[TMP99:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_26]], align 4
 // CHECK19-NEXT:    [[ADD30:%.*]] = add nsw i32 [[TMP99]], 1
 // CHECK19-NEXT:    [[TMP100:%.*]] = zext i32 [[ADD30]] to i64
-// CHECK19-NEXT:    [[KERNEL_ARGS31:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK19-NEXT:    [[TMP101:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS31]], i32 0, i32 0
 // CHECK19-NEXT:    store i32 2, ptr [[TMP101]], align 4
 // CHECK19-NEXT:    [[TMP102:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS31]], i32 0, i32 1
@@ -12535,7 +12537,6 @@ int main (int argc, char **argv) {
 // CHECK19-NEXT:    [[TMP135:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_41]], align 4
 // CHECK19-NEXT:    [[ADD45:%.*]] = add nsw i32 [[TMP135]], 1
 // CHECK19-NEXT:    [[TMP136:%.*]] = zext i32 [[ADD45]] to i64
-// CHECK19-NEXT:    [[KERNEL_ARGS46:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK19-NEXT:    [[TMP137:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS46]], i32 0, i32 0
 // CHECK19-NEXT:    store i32 2, ptr [[TMP137]], align 4
 // CHECK19-NEXT:    [[TMP138:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS46]], i32 0, i32 1
@@ -12619,7 +12620,6 @@ int main (int argc, char **argv) {
 // CHECK19-NEXT:    [[TMP177:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_58]], align 4
 // CHECK19-NEXT:    [[ADD62:%.*]] = add nsw i32 [[TMP177]], 1
 // CHECK19-NEXT:    [[TMP178:%.*]] = zext i32 [[ADD62]] to i64
-// CHECK19-NEXT:    [[KERNEL_ARGS63:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK19-NEXT:    [[TMP179:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS63]], i32 0, i32 0
 // CHECK19-NEXT:    store i32 2, ptr [[TMP179]], align 4
 // CHECK19-NEXT:    [[TMP180:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS63]], i32 0, i32 1
@@ -13778,26 +13778,31 @@ int main (int argc, char **argv) {
 // CHECK19-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [1 x ptr], align 4
 // CHECK19-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [1 x ptr], align 4
 // CHECK19-NEXT:    [[TMP:%.*]] = alloca i32, align 4
+// CHECK19-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK19-NEXT:    [[DOTOFFLOAD_BASEPTRS1:%.*]] = alloca [1 x ptr], align 4
 // CHECK19-NEXT:    [[DOTOFFLOAD_PTRS2:%.*]] = alloca [1 x ptr], align 4
 // CHECK19-NEXT:    [[DOTOFFLOAD_MAPPERS3:%.*]] = alloca [1 x ptr], align 4
 // CHECK19-NEXT:    [[_TMP4:%.*]] = alloca i32, align 4
+// CHECK19-NEXT:    [[KERNEL_ARGS5:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK19-NEXT:    [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
 // CHECK19-NEXT:    [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4
 // CHECK19-NEXT:    [[DOTOFFLOAD_BASEPTRS8:%.*]] = alloca [2 x ptr], align 4
 // CHECK19-NEXT:    [[DOTOFFLOAD_PTRS9:%.*]] = alloca [2 x ptr], align 4
 // CHECK19-NEXT:    [[DOTOFFLOAD_MAPPERS10:%.*]] = alloca [2 x ptr], align 4
 // CHECK19-NEXT:    [[_TMP11:%.*]] = alloca i32, align 4
+// CHECK19-NEXT:    [[KERNEL_ARGS12:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK19-NEXT:    [[DOTOFFLOAD_BASEPTRS15:%.*]] = alloca [1 x ptr], align 4
 // CHECK19-NEXT:    [[DOTOFFLOAD_PTRS16:%.*]] = alloca [1 x ptr], align 4
 // CHECK19-NEXT:    [[DOTOFFLOAD_MAPPERS17:%.*]] = alloca [1 x ptr], align 4
 // CHECK19-NEXT:    [[_TMP18:%.*]] = alloca i32, align 4
+// CHECK19-NEXT:    [[KERNEL_ARGS19:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK19-NEXT:    [[DOTCAPTURE_EXPR_22:%.*]] = alloca i32, align 4
 // CHECK19-NEXT:    [[DOTCAPTURE_EXPR__CASTED23:%.*]] = alloca i32, align 4
 // CHECK19-NEXT:    [[DOTOFFLOAD_BASEPTRS24:%.*]] = alloca [2 x ptr], align 4
 // CHECK19-NEXT:    [[DOTOFFLOAD_PTRS25:%.*]] = alloca [2 x ptr], align 4
 // CHECK19-NEXT:    [[DOTOFFLOAD_MAPPERS26:%.*]] = alloca [2 x ptr], align 4
 // CHECK19-NEXT:    [[_TMP27:%.*]] = alloca i32, align 4
+// CHECK19-NEXT:    [[KERNEL_ARGS28:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK19-NEXT:    store i32 [[ARGC]], ptr [[ARGC_ADDR]], align 4
 // CHECK19-NEXT:    store i32 10, ptr [[M]], align 4
 // CHECK19-NEXT:    [[TMP0:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
@@ -13808,7 +13813,6 @@ int main (int argc, char **argv) {
 // CHECK19-NEXT:    store ptr null, ptr [[TMP2]], align 4
 // CHECK19-NEXT:    [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK19-NEXT:    [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK19-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK19-NEXT:    [[TMP5:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK19-NEXT:    store i32 2, ptr [[TMP5]], align 4
 // CHECK19-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -13850,7 +13854,6 @@ int main (int argc, char **argv) {
 // CHECK19-NEXT:    store ptr null, ptr [[TMP22]], align 4
 // CHECK19-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS1]], i32 0, i32 0
 // CHECK19-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS2]], i32 0, i32 0
-// CHECK19-NEXT:    [[KERNEL_ARGS5:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK19-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 0
 // CHECK19-NEXT:    store i32 2, ptr [[TMP25]], align 4
 // CHECK19-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 1
@@ -13903,7 +13906,6 @@ int main (int argc, char **argv) {
 // CHECK19-NEXT:    store ptr null, ptr [[TMP48]], align 4
 // CHECK19-NEXT:    [[TMP49:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS8]], i32 0, i32 0
 // CHECK19-NEXT:    [[TMP50:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS9]], i32 0, i32 0
-// CHECK19-NEXT:    [[KERNEL_ARGS12:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK19-NEXT:    [[TMP51:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS12]], i32 0, i32 0
 // CHECK19-NEXT:    store i32 2, ptr [[TMP51]], align 4
 // CHECK19-NEXT:    [[TMP52:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS12]], i32 0, i32 1
@@ -13945,7 +13947,6 @@ int main (int argc, char **argv) {
 // CHECK19-NEXT:    store ptr null, ptr [[TMP68]], align 4
 // CHECK19-NEXT:    [[TMP69:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS15]], i32 0, i32 0
 // CHECK19-NEXT:    [[TMP70:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS16]], i32 0, i32 0
-// CHECK19-NEXT:    [[KERNEL_ARGS19:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK19-NEXT:    [[TMP71:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS19]], i32 0, i32 0
 // CHECK19-NEXT:    store i32 2, ptr [[TMP71]], align 4
 // CHECK19-NEXT:    [[TMP72:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS19]], i32 0, i32 1
@@ -13998,7 +13999,6 @@ int main (int argc, char **argv) {
 // CHECK19-NEXT:    store ptr null, ptr [[TMP94]], align 4
 // CHECK19-NEXT:    [[TMP95:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS24]], i32 0, i32 0
 // CHECK19-NEXT:    [[TMP96:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS25]], i32 0, i32 0
-// CHECK19-NEXT:    [[KERNEL_ARGS28:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK19-NEXT:    [[TMP97:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS28]], i32 0, i32 0
 // CHECK19-NEXT:    store i32 2, ptr [[TMP97]], align 4
 // CHECK19-NEXT:    [[TMP98:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS28]], i32 0, i32 1

diff  --git a/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_codegen.cpp
index 80301d6b2e50a..928af33b10426 100644
--- a/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_codegen.cpp
+++ b/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_codegen.cpp
@@ -103,6 +103,7 @@ void test_target_teams_atomic() {
 // CHECK1-NEXT:    [[TMP:%.*]] = alloca i32, align 4
 // CHECK1-NEXT:    [[DOTCAPTURE_EXPR_3:%.*]] = alloca i32, align 4
 // CHECK1-NEXT:    [[DOTCAPTURE_EXPR_4:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK1-NEXT:    [[N_CASTED7:%.*]] = alloca i64, align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_BASEPTRS8:%.*]] = alloca [3 x ptr], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_PTRS9:%.*]] = alloca [3 x ptr], align 8
@@ -110,6 +111,7 @@ void test_target_teams_atomic() {
 // CHECK1-NEXT:    [[_TMP11:%.*]] = alloca i32, align 4
 // CHECK1-NEXT:    [[DOTCAPTURE_EXPR_12:%.*]] = alloca i32, align 4
 // CHECK1-NEXT:    [[DOTCAPTURE_EXPR_13:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[KERNEL_ARGS18:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK1-NEXT:    store ptr [[G]], ptr [[G_ADDR]], align 8
 // CHECK1-NEXT:    store i32 1000, ptr [[N]], align 4
 // CHECK1-NEXT:    [[TMP0:%.*]] = load i32, ptr [[N]], align 4
@@ -178,7 +180,6 @@ void test_target_teams_atomic() {
 // CHECK1-NEXT:    [[TMP33:%.*]] = zext i32 [[ADD]] to i64
 // CHECK1-NEXT:    [[TMP34:%.*]] = insertvalue [3 x i32] zeroinitializer, i32 [[TMP28]], 0
 // CHECK1-NEXT:    [[TMP35:%.*]] = insertvalue [3 x i32] zeroinitializer, i32 [[TMP29]], 0
-// CHECK1-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK1-NEXT:    [[TMP36:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK1-NEXT:    store i32 2, ptr [[TMP36]], align 4
 // CHECK1-NEXT:    [[TMP37:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -246,7 +247,6 @@ void test_target_teams_atomic() {
 // CHECK1-NEXT:    [[TMP67:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_13]], align 4
 // CHECK1-NEXT:    [[ADD17:%.*]] = add nsw i32 [[TMP67]], 1
 // CHECK1-NEXT:    [[TMP68:%.*]] = zext i32 [[ADD17]] to i64
-// CHECK1-NEXT:    [[KERNEL_ARGS18:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK1-NEXT:    [[TMP69:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS18]], i32 0, i32 0
 // CHECK1-NEXT:    store i32 2, ptr [[TMP69]], align 4
 // CHECK1-NEXT:    [[TMP70:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS18]], i32 0, i32 1
@@ -810,6 +810,7 @@ void test_target_teams_atomic() {
 // CHECK1-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [1 x ptr], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [1 x ptr], align 8
 // CHECK1-NEXT:    [[TMP:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK1-NEXT:    store i32 0, ptr [[X]], align 4
 // CHECK1-NEXT:    [[TMP0:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK1-NEXT:    store ptr [[X]], ptr [[TMP0]], align 8
@@ -819,7 +820,6 @@ void test_target_teams_atomic() {
 // CHECK1-NEXT:    store ptr null, ptr [[TMP2]], align 8
 // CHECK1-NEXT:    [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK1-NEXT:    [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK1-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK1-NEXT:    [[TMP5:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK1-NEXT:    store i32 2, ptr [[TMP5]], align 4
 // CHECK1-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -1045,6 +1045,7 @@ void test_target_teams_atomic() {
 // CHECK3-NEXT:    [[TMP:%.*]] = alloca i32, align 4
 // CHECK3-NEXT:    [[DOTCAPTURE_EXPR_3:%.*]] = alloca i32, align 4
 // CHECK3-NEXT:    [[DOTCAPTURE_EXPR_4:%.*]] = alloca i32, align 4
+// CHECK3-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK3-NEXT:    [[N_CASTED7:%.*]] = alloca i32, align 4
 // CHECK3-NEXT:    [[DOTOFFLOAD_BASEPTRS8:%.*]] = alloca [3 x ptr], align 4
 // CHECK3-NEXT:    [[DOTOFFLOAD_PTRS9:%.*]] = alloca [3 x ptr], align 4
@@ -1052,6 +1053,7 @@ void test_target_teams_atomic() {
 // CHECK3-NEXT:    [[_TMP11:%.*]] = alloca i32, align 4
 // CHECK3-NEXT:    [[DOTCAPTURE_EXPR_12:%.*]] = alloca i32, align 4
 // CHECK3-NEXT:    [[DOTCAPTURE_EXPR_13:%.*]] = alloca i32, align 4
+// CHECK3-NEXT:    [[KERNEL_ARGS18:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK3-NEXT:    store ptr [[G]], ptr [[G_ADDR]], align 4
 // CHECK3-NEXT:    store i32 1000, ptr [[N]], align 4
 // CHECK3-NEXT:    [[TMP0:%.*]] = load i32, ptr [[N]], align 4
@@ -1120,7 +1122,6 @@ void test_target_teams_atomic() {
 // CHECK3-NEXT:    [[TMP33:%.*]] = zext i32 [[ADD]] to i64
 // CHECK3-NEXT:    [[TMP34:%.*]] = insertvalue [3 x i32] zeroinitializer, i32 [[TMP28]], 0
 // CHECK3-NEXT:    [[TMP35:%.*]] = insertvalue [3 x i32] zeroinitializer, i32 [[TMP29]], 0
-// CHECK3-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK3-NEXT:    [[TMP36:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK3-NEXT:    store i32 2, ptr [[TMP36]], align 4
 // CHECK3-NEXT:    [[TMP37:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -1188,7 +1189,6 @@ void test_target_teams_atomic() {
 // CHECK3-NEXT:    [[TMP67:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_13]], align 4
 // CHECK3-NEXT:    [[ADD17:%.*]] = add nsw i32 [[TMP67]], 1
 // CHECK3-NEXT:    [[TMP68:%.*]] = zext i32 [[ADD17]] to i64
-// CHECK3-NEXT:    [[KERNEL_ARGS18:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK3-NEXT:    [[TMP69:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS18]], i32 0, i32 0
 // CHECK3-NEXT:    store i32 2, ptr [[TMP69]], align 4
 // CHECK3-NEXT:    [[TMP70:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS18]], i32 0, i32 1
@@ -1742,6 +1742,7 @@ void test_target_teams_atomic() {
 // CHECK3-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [1 x ptr], align 4
 // CHECK3-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [1 x ptr], align 4
 // CHECK3-NEXT:    [[TMP:%.*]] = alloca i32, align 4
+// CHECK3-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK3-NEXT:    store i32 0, ptr [[X]], align 4
 // CHECK3-NEXT:    [[TMP0:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK3-NEXT:    store ptr [[X]], ptr [[TMP0]], align 4
@@ -1751,7 +1752,6 @@ void test_target_teams_atomic() {
 // CHECK3-NEXT:    store ptr null, ptr [[TMP2]], align 4
 // CHECK3-NEXT:    [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK3-NEXT:    [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK3-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK3-NEXT:    [[TMP5:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK3-NEXT:    store i32 2, ptr [[TMP5]], align 4
 // CHECK3-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1

diff  --git a/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_collapse_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_collapse_codegen.cpp
index 6b0847203d835..8ee4e4fc14ca7 100644
--- a/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_collapse_codegen.cpp
+++ b/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_collapse_codegen.cpp
@@ -116,6 +116,7 @@ int main (int argc, char **argv) {
 // CHECK1-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [1 x ptr], align 8
 // CHECK1-NEXT:    [[TMP:%.*]] = alloca i32, align 4
 // CHECK1-NEXT:    [[_TMP2:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK1-NEXT:    store ptr [[THIS]], ptr [[THIS_ADDR]], align 8
 // CHECK1-NEXT:    [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8
 // CHECK1-NEXT:    [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[THIS1]], i32 0, i32 0
@@ -127,7 +128,6 @@ int main (int argc, char **argv) {
 // CHECK1-NEXT:    store ptr null, ptr [[TMP2]], align 8
 // CHECK1-NEXT:    [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK1-NEXT:    [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK1-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK1-NEXT:    [[TMP5:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK1-NEXT:    store i32 2, ptr [[TMP5]], align 4
 // CHECK1-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -375,6 +375,7 @@ int main (int argc, char **argv) {
 // CHECK3-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [1 x ptr], align 4
 // CHECK3-NEXT:    [[TMP:%.*]] = alloca i32, align 4
 // CHECK3-NEXT:    [[_TMP2:%.*]] = alloca i32, align 4
+// CHECK3-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK3-NEXT:    store ptr [[THIS]], ptr [[THIS_ADDR]], align 4
 // CHECK3-NEXT:    [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4
 // CHECK3-NEXT:    [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[THIS1]], i32 0, i32 0
@@ -386,7 +387,6 @@ int main (int argc, char **argv) {
 // CHECK3-NEXT:    store ptr null, ptr [[TMP2]], align 4
 // CHECK3-NEXT:    [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK3-NEXT:    [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK3-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK3-NEXT:    [[TMP5:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK3-NEXT:    store i32 2, ptr [[TMP5]], align 4
 // CHECK3-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -773,6 +773,7 @@ int main (int argc, char **argv) {
 // CHECK9-NEXT:    [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
 // CHECK9-NEXT:    [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4
 // CHECK9-NEXT:    [[DOTCAPTURE_EXPR_3:%.*]] = alloca i64, align 8
+// CHECK9-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK9-NEXT:    store i32 0, ptr [[RETVAL]], align 4
 // CHECK9-NEXT:    store i32 [[ARGC]], ptr [[ARGC_ADDR]], align 4
 // CHECK9-NEXT:    store ptr [[ARGV]], ptr [[ARGV_ADDR]], align 8
@@ -849,7 +850,6 @@ int main (int argc, char **argv) {
 // CHECK9-NEXT:    store i64 [[SUB7]], ptr [[DOTCAPTURE_EXPR_3]], align 8
 // CHECK9-NEXT:    [[TMP35:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_3]], align 8
 // CHECK9-NEXT:    [[ADD:%.*]] = add nsw i64 [[TMP35]], 1
-// CHECK9-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK9-NEXT:    [[TMP36:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK9-NEXT:    store i32 2, ptr [[TMP36]], align 4
 // CHECK9-NEXT:    [[TMP37:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -1238,6 +1238,7 @@ int main (int argc, char **argv) {
 // CHECK9-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [1 x ptr], align 8
 // CHECK9-NEXT:    [[TMP:%.*]] = alloca i32, align 4
 // CHECK9-NEXT:    [[_TMP1:%.*]] = alloca i32, align 4
+// CHECK9-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK9-NEXT:    store i32 [[ARGC]], ptr [[ARGC_ADDR]], align 4
 // CHECK9-NEXT:    [[TMP0:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK9-NEXT:    store ptr [[A]], ptr [[TMP0]], align 8
@@ -1247,7 +1248,6 @@ int main (int argc, char **argv) {
 // CHECK9-NEXT:    store ptr null, ptr [[TMP2]], align 8
 // CHECK9-NEXT:    [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK9-NEXT:    [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK9-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK9-NEXT:    [[TMP5:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK9-NEXT:    store i32 2, ptr [[TMP5]], align 4
 // CHECK9-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -1495,6 +1495,7 @@ int main (int argc, char **argv) {
 // CHECK11-NEXT:    [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
 // CHECK11-NEXT:    [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4
 // CHECK11-NEXT:    [[DOTCAPTURE_EXPR_3:%.*]] = alloca i64, align 8
+// CHECK11-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK11-NEXT:    store i32 0, ptr [[RETVAL]], align 4
 // CHECK11-NEXT:    store i32 [[ARGC]], ptr [[ARGC_ADDR]], align 4
 // CHECK11-NEXT:    store ptr [[ARGV]], ptr [[ARGV_ADDR]], align 4
@@ -1570,7 +1571,6 @@ int main (int argc, char **argv) {
 // CHECK11-NEXT:    store i64 [[SUB7]], ptr [[DOTCAPTURE_EXPR_3]], align 8
 // CHECK11-NEXT:    [[TMP34:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_3]], align 8
 // CHECK11-NEXT:    [[ADD:%.*]] = add nsw i64 [[TMP34]], 1
-// CHECK11-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK11-NEXT:    [[TMP35:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK11-NEXT:    store i32 2, ptr [[TMP35]], align 4
 // CHECK11-NEXT:    [[TMP36:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -1961,6 +1961,7 @@ int main (int argc, char **argv) {
 // CHECK11-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [1 x ptr], align 4
 // CHECK11-NEXT:    [[TMP:%.*]] = alloca i32, align 4
 // CHECK11-NEXT:    [[_TMP1:%.*]] = alloca i32, align 4
+// CHECK11-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK11-NEXT:    store i32 [[ARGC]], ptr [[ARGC_ADDR]], align 4
 // CHECK11-NEXT:    [[TMP0:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK11-NEXT:    store ptr [[A]], ptr [[TMP0]], align 4
@@ -1970,7 +1971,6 @@ int main (int argc, char **argv) {
 // CHECK11-NEXT:    store ptr null, ptr [[TMP2]], align 4
 // CHECK11-NEXT:    [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK11-NEXT:    [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK11-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK11-NEXT:    [[TMP5:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK11-NEXT:    store i32 2, ptr [[TMP5]], align 4
 // CHECK11-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1

diff  --git a/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_dist_schedule_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_dist_schedule_codegen.cpp
index 1ea96dfdce7df..f43962e1192db 100644
--- a/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_dist_schedule_codegen.cpp
+++ b/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_dist_schedule_codegen.cpp
@@ -150,14 +150,17 @@ int main (int argc, char **argv) {
 // CHECK1-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [1 x ptr], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [1 x ptr], align 8
 // CHECK1-NEXT:    [[TMP:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_BASEPTRS3:%.*]] = alloca [1 x ptr], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_PTRS4:%.*]] = alloca [1 x ptr], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_MAPPERS5:%.*]] = alloca [1 x ptr], align 8
 // CHECK1-NEXT:    [[_TMP6:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[KERNEL_ARGS7:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_BASEPTRS11:%.*]] = alloca [1 x ptr], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_PTRS12:%.*]] = alloca [1 x ptr], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_MAPPERS13:%.*]] = alloca [1 x ptr], align 8
 // CHECK1-NEXT:    [[_TMP14:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[KERNEL_ARGS15:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK1-NEXT:    store ptr [[THIS]], ptr [[THIS_ADDR]], align 8
 // CHECK1-NEXT:    [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8
 // CHECK1-NEXT:    [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[THIS1]], i32 0, i32 0
@@ -169,7 +172,6 @@ int main (int argc, char **argv) {
 // CHECK1-NEXT:    store ptr null, ptr [[TMP2]], align 8
 // CHECK1-NEXT:    [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK1-NEXT:    [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK1-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK1-NEXT:    [[TMP5:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK1-NEXT:    store i32 2, ptr [[TMP5]], align 4
 // CHECK1-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -212,7 +214,6 @@ int main (int argc, char **argv) {
 // CHECK1-NEXT:    store ptr null, ptr [[TMP22]], align 8
 // CHECK1-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS3]], i32 0, i32 0
 // CHECK1-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS4]], i32 0, i32 0
-// CHECK1-NEXT:    [[KERNEL_ARGS7:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK1-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS7]], i32 0, i32 0
 // CHECK1-NEXT:    store i32 2, ptr [[TMP25]], align 4
 // CHECK1-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS7]], i32 0, i32 1
@@ -255,7 +256,6 @@ int main (int argc, char **argv) {
 // CHECK1-NEXT:    store ptr null, ptr [[TMP42]], align 8
 // CHECK1-NEXT:    [[TMP43:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS11]], i32 0, i32 0
 // CHECK1-NEXT:    [[TMP44:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS12]], i32 0, i32 0
-// CHECK1-NEXT:    [[KERNEL_ARGS15:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK1-NEXT:    [[TMP45:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS15]], i32 0, i32 0
 // CHECK1-NEXT:    store i32 2, ptr [[TMP45]], align 4
 // CHECK1-NEXT:    [[TMP46:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS15]], i32 0, i32 1
@@ -833,14 +833,17 @@ int main (int argc, char **argv) {
 // CHECK3-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [1 x ptr], align 4
 // CHECK3-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [1 x ptr], align 4
 // CHECK3-NEXT:    [[TMP:%.*]] = alloca i32, align 4
+// CHECK3-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK3-NEXT:    [[DOTOFFLOAD_BASEPTRS3:%.*]] = alloca [1 x ptr], align 4
 // CHECK3-NEXT:    [[DOTOFFLOAD_PTRS4:%.*]] = alloca [1 x ptr], align 4
 // CHECK3-NEXT:    [[DOTOFFLOAD_MAPPERS5:%.*]] = alloca [1 x ptr], align 4
 // CHECK3-NEXT:    [[_TMP6:%.*]] = alloca i32, align 4
+// CHECK3-NEXT:    [[KERNEL_ARGS7:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK3-NEXT:    [[DOTOFFLOAD_BASEPTRS11:%.*]] = alloca [1 x ptr], align 4
 // CHECK3-NEXT:    [[DOTOFFLOAD_PTRS12:%.*]] = alloca [1 x ptr], align 4
 // CHECK3-NEXT:    [[DOTOFFLOAD_MAPPERS13:%.*]] = alloca [1 x ptr], align 4
 // CHECK3-NEXT:    [[_TMP14:%.*]] = alloca i32, align 4
+// CHECK3-NEXT:    [[KERNEL_ARGS15:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK3-NEXT:    store ptr [[THIS]], ptr [[THIS_ADDR]], align 4
 // CHECK3-NEXT:    [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4
 // CHECK3-NEXT:    [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[THIS1]], i32 0, i32 0
@@ -852,7 +855,6 @@ int main (int argc, char **argv) {
 // CHECK3-NEXT:    store ptr null, ptr [[TMP2]], align 4
 // CHECK3-NEXT:    [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK3-NEXT:    [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK3-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK3-NEXT:    [[TMP5:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK3-NEXT:    store i32 2, ptr [[TMP5]], align 4
 // CHECK3-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -895,7 +897,6 @@ int main (int argc, char **argv) {
 // CHECK3-NEXT:    store ptr null, ptr [[TMP22]], align 4
 // CHECK3-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS3]], i32 0, i32 0
 // CHECK3-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS4]], i32 0, i32 0
-// CHECK3-NEXT:    [[KERNEL_ARGS7:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK3-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS7]], i32 0, i32 0
 // CHECK3-NEXT:    store i32 2, ptr [[TMP25]], align 4
 // CHECK3-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS7]], i32 0, i32 1
@@ -938,7 +939,6 @@ int main (int argc, char **argv) {
 // CHECK3-NEXT:    store ptr null, ptr [[TMP42]], align 4
 // CHECK3-NEXT:    [[TMP43:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS11]], i32 0, i32 0
 // CHECK3-NEXT:    [[TMP44:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS12]], i32 0, i32 0
-// CHECK3-NEXT:    [[KERNEL_ARGS15:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK3-NEXT:    [[TMP45:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS15]], i32 0, i32 0
 // CHECK3-NEXT:    store i32 2, ptr [[TMP45]], align 4
 // CHECK3-NEXT:    [[TMP46:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS15]], i32 0, i32 1
@@ -1750,6 +1750,7 @@ int main (int argc, char **argv) {
 // CHECK9-NEXT:    [[TMP:%.*]] = alloca i32, align 4
 // CHECK9-NEXT:    [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
 // CHECK9-NEXT:    [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4
+// CHECK9-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK9-NEXT:    [[N_CASTED3:%.*]] = alloca i64, align 8
 // CHECK9-NEXT:    [[DOTOFFLOAD_BASEPTRS4:%.*]] = alloca [3 x ptr], align 8
 // CHECK9-NEXT:    [[DOTOFFLOAD_PTRS5:%.*]] = alloca [3 x ptr], align 8
@@ -1758,6 +1759,7 @@ int main (int argc, char **argv) {
 // CHECK9-NEXT:    [[_TMP8:%.*]] = alloca i32, align 4
 // CHECK9-NEXT:    [[DOTCAPTURE_EXPR_9:%.*]] = alloca i32, align 4
 // CHECK9-NEXT:    [[DOTCAPTURE_EXPR_10:%.*]] = alloca i32, align 4
+// CHECK9-NEXT:    [[KERNEL_ARGS15:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK9-NEXT:    [[DOTCAPTURE_EXPR_18:%.*]] = alloca i32, align 4
 // CHECK9-NEXT:    [[N_CASTED19:%.*]] = alloca i64, align 8
 // CHECK9-NEXT:    [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8
@@ -1768,6 +1770,7 @@ int main (int argc, char **argv) {
 // CHECK9-NEXT:    [[_TMP24:%.*]] = alloca i32, align 4
 // CHECK9-NEXT:    [[DOTCAPTURE_EXPR_25:%.*]] = alloca i32, align 4
 // CHECK9-NEXT:    [[DOTCAPTURE_EXPR_26:%.*]] = alloca i32, align 4
+// CHECK9-NEXT:    [[KERNEL_ARGS31:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK9-NEXT:    store i32 0, ptr [[RETVAL]], align 4
 // CHECK9-NEXT:    store i32 [[ARGC]], ptr [[ARGC_ADDR]], align 4
 // CHECK9-NEXT:    store ptr [[ARGV]], ptr [[ARGV_ADDR]], align 8
@@ -1817,7 +1820,6 @@ int main (int argc, char **argv) {
 // CHECK9-NEXT:    [[TMP21:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
 // CHECK9-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP21]], 1
 // CHECK9-NEXT:    [[TMP22:%.*]] = zext i32 [[ADD]] to i64
-// CHECK9-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK9-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK9-NEXT:    store i32 2, ptr [[TMP23]], align 4
 // CHECK9-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -1889,7 +1891,6 @@ int main (int argc, char **argv) {
 // CHECK9-NEXT:    [[TMP56:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_10]], align 4
 // CHECK9-NEXT:    [[ADD14:%.*]] = add nsw i32 [[TMP56]], 1
 // CHECK9-NEXT:    [[TMP57:%.*]] = zext i32 [[ADD14]] to i64
-// CHECK9-NEXT:    [[KERNEL_ARGS15:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK9-NEXT:    [[TMP58:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS15]], i32 0, i32 0
 // CHECK9-NEXT:    store i32 2, ptr [[TMP58]], align 4
 // CHECK9-NEXT:    [[TMP59:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS15]], i32 0, i32 1
@@ -1972,7 +1973,6 @@ int main (int argc, char **argv) {
 // CHECK9-NEXT:    [[TMP97:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_26]], align 4
 // CHECK9-NEXT:    [[ADD30:%.*]] = add nsw i32 [[TMP97]], 1
 // CHECK9-NEXT:    [[TMP98:%.*]] = zext i32 [[ADD30]] to i64
-// CHECK9-NEXT:    [[KERNEL_ARGS31:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK9-NEXT:    [[TMP99:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS31]], i32 0, i32 0
 // CHECK9-NEXT:    store i32 2, ptr [[TMP99]], align 4
 // CHECK9-NEXT:    [[TMP100:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS31]], i32 0, i32 1
@@ -2787,16 +2787,19 @@ int main (int argc, char **argv) {
 // CHECK9-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [1 x ptr], align 8
 // CHECK9-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [1 x ptr], align 8
 // CHECK9-NEXT:    [[TMP:%.*]] = alloca i32, align 4
+// CHECK9-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK9-NEXT:    [[DOTOFFLOAD_BASEPTRS1:%.*]] = alloca [1 x ptr], align 8
 // CHECK9-NEXT:    [[DOTOFFLOAD_PTRS2:%.*]] = alloca [1 x ptr], align 8
 // CHECK9-NEXT:    [[DOTOFFLOAD_MAPPERS3:%.*]] = alloca [1 x ptr], align 8
 // CHECK9-NEXT:    [[_TMP4:%.*]] = alloca i32, align 4
+// CHECK9-NEXT:    [[KERNEL_ARGS5:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK9-NEXT:    [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
 // CHECK9-NEXT:    [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8
 // CHECK9-NEXT:    [[DOTOFFLOAD_BASEPTRS8:%.*]] = alloca [2 x ptr], align 8
 // CHECK9-NEXT:    [[DOTOFFLOAD_PTRS9:%.*]] = alloca [2 x ptr], align 8
 // CHECK9-NEXT:    [[DOTOFFLOAD_MAPPERS10:%.*]] = alloca [2 x ptr], align 8
 // CHECK9-NEXT:    [[_TMP11:%.*]] = alloca i32, align 4
+// CHECK9-NEXT:    [[KERNEL_ARGS12:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK9-NEXT:    store i32 [[ARGC]], ptr [[ARGC_ADDR]], align 4
 // CHECK9-NEXT:    store i32 10, ptr [[M]], align 4
 // CHECK9-NEXT:    [[TMP0:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
@@ -2807,7 +2810,6 @@ int main (int argc, char **argv) {
 // CHECK9-NEXT:    store ptr null, ptr [[TMP2]], align 8
 // CHECK9-NEXT:    [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK9-NEXT:    [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK9-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK9-NEXT:    [[TMP5:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK9-NEXT:    store i32 2, ptr [[TMP5]], align 4
 // CHECK9-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -2849,7 +2851,6 @@ int main (int argc, char **argv) {
 // CHECK9-NEXT:    store ptr null, ptr [[TMP22]], align 8
 // CHECK9-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS1]], i32 0, i32 0
 // CHECK9-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS2]], i32 0, i32 0
-// CHECK9-NEXT:    [[KERNEL_ARGS5:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK9-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 0
 // CHECK9-NEXT:    store i32 2, ptr [[TMP25]], align 4
 // CHECK9-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 1
@@ -2902,7 +2903,6 @@ int main (int argc, char **argv) {
 // CHECK9-NEXT:    store ptr null, ptr [[TMP48]], align 8
 // CHECK9-NEXT:    [[TMP49:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS8]], i32 0, i32 0
 // CHECK9-NEXT:    [[TMP50:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS9]], i32 0, i32 0
-// CHECK9-NEXT:    [[KERNEL_ARGS12:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK9-NEXT:    [[TMP51:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS12]], i32 0, i32 0
 // CHECK9-NEXT:    store i32 2, ptr [[TMP51]], align 4
 // CHECK9-NEXT:    [[TMP52:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS12]], i32 0, i32 1
@@ -3491,6 +3491,7 @@ int main (int argc, char **argv) {
 // CHECK11-NEXT:    [[TMP:%.*]] = alloca i32, align 4
 // CHECK11-NEXT:    [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
 // CHECK11-NEXT:    [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4
+// CHECK11-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK11-NEXT:    [[N_CASTED3:%.*]] = alloca i32, align 4
 // CHECK11-NEXT:    [[DOTOFFLOAD_BASEPTRS4:%.*]] = alloca [3 x ptr], align 4
 // CHECK11-NEXT:    [[DOTOFFLOAD_PTRS5:%.*]] = alloca [3 x ptr], align 4
@@ -3499,6 +3500,7 @@ int main (int argc, char **argv) {
 // CHECK11-NEXT:    [[_TMP8:%.*]] = alloca i32, align 4
 // CHECK11-NEXT:    [[DOTCAPTURE_EXPR_9:%.*]] = alloca i32, align 4
 // CHECK11-NEXT:    [[DOTCAPTURE_EXPR_10:%.*]] = alloca i32, align 4
+// CHECK11-NEXT:    [[KERNEL_ARGS15:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK11-NEXT:    [[DOTCAPTURE_EXPR_18:%.*]] = alloca i32, align 4
 // CHECK11-NEXT:    [[N_CASTED19:%.*]] = alloca i32, align 4
 // CHECK11-NEXT:    [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4
@@ -3509,6 +3511,7 @@ int main (int argc, char **argv) {
 // CHECK11-NEXT:    [[_TMP24:%.*]] = alloca i32, align 4
 // CHECK11-NEXT:    [[DOTCAPTURE_EXPR_25:%.*]] = alloca i32, align 4
 // CHECK11-NEXT:    [[DOTCAPTURE_EXPR_26:%.*]] = alloca i32, align 4
+// CHECK11-NEXT:    [[KERNEL_ARGS31:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK11-NEXT:    store i32 0, ptr [[RETVAL]], align 4
 // CHECK11-NEXT:    store i32 [[ARGC]], ptr [[ARGC_ADDR]], align 4
 // CHECK11-NEXT:    store ptr [[ARGV]], ptr [[ARGV_ADDR]], align 4
@@ -3558,7 +3561,6 @@ int main (int argc, char **argv) {
 // CHECK11-NEXT:    [[TMP21:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
 // CHECK11-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP21]], 1
 // CHECK11-NEXT:    [[TMP22:%.*]] = zext i32 [[ADD]] to i64
-// CHECK11-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK11-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK11-NEXT:    store i32 2, ptr [[TMP23]], align 4
 // CHECK11-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -3631,7 +3633,6 @@ int main (int argc, char **argv) {
 // CHECK11-NEXT:    [[TMP57:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_10]], align 4
 // CHECK11-NEXT:    [[ADD14:%.*]] = add nsw i32 [[TMP57]], 1
 // CHECK11-NEXT:    [[TMP58:%.*]] = zext i32 [[ADD14]] to i64
-// CHECK11-NEXT:    [[KERNEL_ARGS15:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK11-NEXT:    [[TMP59:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS15]], i32 0, i32 0
 // CHECK11-NEXT:    store i32 2, ptr [[TMP59]], align 4
 // CHECK11-NEXT:    [[TMP60:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS15]], i32 0, i32 1
@@ -3715,7 +3716,6 @@ int main (int argc, char **argv) {
 // CHECK11-NEXT:    [[TMP99:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_26]], align 4
 // CHECK11-NEXT:    [[ADD30:%.*]] = add nsw i32 [[TMP99]], 1
 // CHECK11-NEXT:    [[TMP100:%.*]] = zext i32 [[ADD30]] to i64
-// CHECK11-NEXT:    [[KERNEL_ARGS31:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK11-NEXT:    [[TMP101:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS31]], i32 0, i32 0
 // CHECK11-NEXT:    store i32 2, ptr [[TMP101]], align 4
 // CHECK11-NEXT:    [[TMP102:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS31]], i32 0, i32 1
@@ -4515,16 +4515,19 @@ int main (int argc, char **argv) {
 // CHECK11-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [1 x ptr], align 4
 // CHECK11-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [1 x ptr], align 4
 // CHECK11-NEXT:    [[TMP:%.*]] = alloca i32, align 4
+// CHECK11-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK11-NEXT:    [[DOTOFFLOAD_BASEPTRS1:%.*]] = alloca [1 x ptr], align 4
 // CHECK11-NEXT:    [[DOTOFFLOAD_PTRS2:%.*]] = alloca [1 x ptr], align 4
 // CHECK11-NEXT:    [[DOTOFFLOAD_MAPPERS3:%.*]] = alloca [1 x ptr], align 4
 // CHECK11-NEXT:    [[_TMP4:%.*]] = alloca i32, align 4
+// CHECK11-NEXT:    [[KERNEL_ARGS5:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK11-NEXT:    [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
 // CHECK11-NEXT:    [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4
 // CHECK11-NEXT:    [[DOTOFFLOAD_BASEPTRS8:%.*]] = alloca [2 x ptr], align 4
 // CHECK11-NEXT:    [[DOTOFFLOAD_PTRS9:%.*]] = alloca [2 x ptr], align 4
 // CHECK11-NEXT:    [[DOTOFFLOAD_MAPPERS10:%.*]] = alloca [2 x ptr], align 4
 // CHECK11-NEXT:    [[_TMP11:%.*]] = alloca i32, align 4
+// CHECK11-NEXT:    [[KERNEL_ARGS12:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK11-NEXT:    store i32 [[ARGC]], ptr [[ARGC_ADDR]], align 4
 // CHECK11-NEXT:    store i32 10, ptr [[M]], align 4
 // CHECK11-NEXT:    [[TMP0:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
@@ -4535,7 +4538,6 @@ int main (int argc, char **argv) {
 // CHECK11-NEXT:    store ptr null, ptr [[TMP2]], align 4
 // CHECK11-NEXT:    [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK11-NEXT:    [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK11-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK11-NEXT:    [[TMP5:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK11-NEXT:    store i32 2, ptr [[TMP5]], align 4
 // CHECK11-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -4577,7 +4579,6 @@ int main (int argc, char **argv) {
 // CHECK11-NEXT:    store ptr null, ptr [[TMP22]], align 4
 // CHECK11-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS1]], i32 0, i32 0
 // CHECK11-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS2]], i32 0, i32 0
-// CHECK11-NEXT:    [[KERNEL_ARGS5:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK11-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 0
 // CHECK11-NEXT:    store i32 2, ptr [[TMP25]], align 4
 // CHECK11-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 1
@@ -4630,7 +4631,6 @@ int main (int argc, char **argv) {
 // CHECK11-NEXT:    store ptr null, ptr [[TMP48]], align 4
 // CHECK11-NEXT:    [[TMP49:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS8]], i32 0, i32 0
 // CHECK11-NEXT:    [[TMP50:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS9]], i32 0, i32 0
-// CHECK11-NEXT:    [[KERNEL_ARGS12:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK11-NEXT:    [[TMP51:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS12]], i32 0, i32 0
 // CHECK11-NEXT:    store i32 2, ptr [[TMP51]], align 4
 // CHECK11-NEXT:    [[TMP52:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS12]], i32 0, i32 1

diff  --git a/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_firstprivate_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_firstprivate_codegen.cpp
index bc272f8d5e939..ad133966d929d 100644
--- a/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_firstprivate_codegen.cpp
+++ b/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_firstprivate_codegen.cpp
@@ -321,6 +321,7 @@ int main() {
 // CHECK1-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [5 x ptr], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [5 x ptr], align 8
 // CHECK1-NEXT:    [[TMP:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK1-NEXT:    store i32 0, ptr [[RETVAL]], align 4
 // CHECK1-NEXT:    [[TMP0:%.*]] = load i32, ptr @t_var, align 4
 // CHECK1-NEXT:    store i32 [[TMP0]], ptr [[T_VAR_CASTED]], align 4
@@ -360,7 +361,6 @@ int main() {
 // CHECK1-NEXT:    store ptr null, ptr [[TMP18]], align 8
 // CHECK1-NEXT:    [[TMP19:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK1-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK1-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK1-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK1-NEXT:    store i32 2, ptr [[TMP21]], align 4
 // CHECK1-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -745,6 +745,7 @@ int main() {
 // CHECK1-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [4 x ptr], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [4 x ptr], align 8
 // CHECK1-NEXT:    [[_TMP1:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK1-NEXT:    call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[TEST]])
 // CHECK1-NEXT:    store i32 0, ptr [[T_VAR]], align 4
 // CHECK1-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VEC]], ptr align 4 @__const._Z5tmainIiET_v.vec, i64 8, i1 false)
@@ -785,7 +786,6 @@ int main() {
 // CHECK1-NEXT:    store ptr null, ptr [[TMP15]], align 8
 // CHECK1-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK1-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK1-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK1-NEXT:    [[TMP18:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK1-NEXT:    store i32 2, ptr [[TMP18]], align 4
 // CHECK1-NEXT:    [[TMP19:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -1418,6 +1418,7 @@ int main() {
 // CHECK3-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [5 x ptr], align 4
 // CHECK3-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [5 x ptr], align 4
 // CHECK3-NEXT:    [[TMP:%.*]] = alloca i32, align 4
+// CHECK3-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK3-NEXT:    store i32 0, ptr [[RETVAL]], align 4
 // CHECK3-NEXT:    [[TMP0:%.*]] = load i32, ptr @t_var, align 4
 // CHECK3-NEXT:    store i32 [[TMP0]], ptr [[T_VAR_CASTED]], align 4
@@ -1457,7 +1458,6 @@ int main() {
 // CHECK3-NEXT:    store ptr null, ptr [[TMP18]], align 4
 // CHECK3-NEXT:    [[TMP19:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK3-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK3-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK3-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK3-NEXT:    store i32 2, ptr [[TMP21]], align 4
 // CHECK3-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -1836,6 +1836,7 @@ int main() {
 // CHECK3-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [4 x ptr], align 4
 // CHECK3-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [4 x ptr], align 4
 // CHECK3-NEXT:    [[_TMP1:%.*]] = alloca i32, align 4
+// CHECK3-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK3-NEXT:    call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[TEST]])
 // CHECK3-NEXT:    store i32 0, ptr [[T_VAR]], align 4
 // CHECK3-NEXT:    call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[VEC]], ptr align 4 @__const._Z5tmainIiET_v.vec, i32 8, i1 false)
@@ -1876,7 +1877,6 @@ int main() {
 // CHECK3-NEXT:    store ptr null, ptr [[TMP15]], align 4
 // CHECK3-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK3-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK3-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK3-NEXT:    [[TMP18:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK3-NEXT:    store i32 2, ptr [[TMP18]], align 4
 // CHECK3-NEXT:    [[TMP19:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1

diff  --git a/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_if_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_if_codegen.cpp
index ee3efd3820572..7cd64ab5a59da 100644
--- a/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_if_codegen.cpp
+++ b/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_if_codegen.cpp
@@ -116,7 +116,9 @@ int main() {
 // CHECK1-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [1 x ptr], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [1 x ptr], align 8
 // CHECK1-NEXT:    [[TMP:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK1-NEXT:    [[_TMP1:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[KERNEL_ARGS2:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK1-NEXT:    [[TMP0:%.*]] = load i32, ptr @Arg, align 4
 // CHECK1-NEXT:    store i32 [[TMP0]], ptr [[ARG_CASTED]], align 4
 // CHECK1-NEXT:    [[TMP1:%.*]] = load i64, ptr [[ARG_CASTED]], align 8
@@ -128,7 +130,6 @@ int main() {
 // CHECK1-NEXT:    store ptr null, ptr [[TMP4]], align 8
 // CHECK1-NEXT:    [[TMP5:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK1-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK1-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK1-NEXT:    [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK1-NEXT:    store i32 2, ptr [[TMP7]], align 4
 // CHECK1-NEXT:    [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -162,7 +163,6 @@ int main() {
 // CHECK1-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9gtid_testv_l47(i64 [[TMP1]]) #[[ATTR2:[0-9]+]]
 // CHECK1-NEXT:    br label [[OMP_OFFLOAD_CONT]]
 // CHECK1:       omp_offload.cont:
-// CHECK1-NEXT:    [[KERNEL_ARGS2:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK1-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS2]], i32 0, i32 0
 // CHECK1-NEXT:    store i32 2, ptr [[TMP22]], align 4
 // CHECK1-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS2]], i32 0, i32 1
@@ -527,14 +527,15 @@ int main() {
 // CHECK1-NEXT:  entry:
 // CHECK1-NEXT:    [[RETVAL:%.*]] = alloca i32, align 4
 // CHECK1-NEXT:    [[TMP:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK1-NEXT:    [[DOTCAPTURE_EXPR_:%.*]] = alloca i8, align 1
 // CHECK1-NEXT:    [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [1 x ptr], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [1 x ptr], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [1 x ptr], align 8
 // CHECK1-NEXT:    [[_TMP5:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[KERNEL_ARGS6:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK1-NEXT:    store i32 0, ptr [[RETVAL]], align 4
-// CHECK1-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK1-NEXT:    [[TMP0:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK1-NEXT:    store i32 2, ptr [[TMP0]], align 4
 // CHECK1-NEXT:    [[TMP1:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -594,7 +595,6 @@ int main() {
 // CHECK1-NEXT:    [[TOBOOL4:%.*]] = trunc i8 [[TMP24]] to i1
 // CHECK1-NEXT:    [[TMP25:%.*]] = select i1 [[TOBOOL4]], i32 0, i32 1
 // CHECK1-NEXT:    [[TMP26:%.*]] = insertvalue [3 x i32] zeroinitializer, i32 [[TMP25]], 0
-// CHECK1-NEXT:    [[KERNEL_ARGS6:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK1-NEXT:    [[TMP27:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 0
 // CHECK1-NEXT:    store i32 2, ptr [[TMP27]], align 4
 // CHECK1-NEXT:    [[TMP28:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 1
@@ -1128,14 +1128,15 @@ int main() {
 // CHECK1-NEXT:  entry:
 // CHECK1-NEXT:    [[ARG_ADDR:%.*]] = alloca i32, align 4
 // CHECK1-NEXT:    [[TMP:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK1-NEXT:    [[DOTCAPTURE_EXPR_:%.*]] = alloca i8, align 1
 // CHECK1-NEXT:    [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [1 x ptr], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [1 x ptr], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [1 x ptr], align 8
 // CHECK1-NEXT:    [[_TMP4:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[KERNEL_ARGS5:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK1-NEXT:    store i32 [[ARG]], ptr [[ARG_ADDR]], align 4
-// CHECK1-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK1-NEXT:    [[TMP0:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK1-NEXT:    store i32 2, ptr [[TMP0]], align 4
 // CHECK1-NEXT:    [[TMP1:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -1191,7 +1192,6 @@ int main() {
 // CHECK1-NEXT:    [[TOBOOL3:%.*]] = trunc i8 [[TMP23]] to i1
 // CHECK1-NEXT:    [[TMP24:%.*]] = select i1 [[TOBOOL3]], i32 0, i32 1
 // CHECK1-NEXT:    [[TMP25:%.*]] = insertvalue [3 x i32] zeroinitializer, i32 [[TMP24]], 0
-// CHECK1-NEXT:    [[KERNEL_ARGS5:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK1-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 0
 // CHECK1-NEXT:    store i32 2, ptr [[TMP26]], align 4
 // CHECK1-NEXT:    [[TMP27:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 1
@@ -1728,7 +1728,9 @@ int main() {
 // CHECK3-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [1 x ptr], align 8
 // CHECK3-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [1 x ptr], align 8
 // CHECK3-NEXT:    [[TMP:%.*]] = alloca i32, align 4
+// CHECK3-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK3-NEXT:    [[_TMP1:%.*]] = alloca i32, align 4
+// CHECK3-NEXT:    [[KERNEL_ARGS2:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK3-NEXT:    [[TMP0:%.*]] = load i32, ptr @Arg, align 4
 // CHECK3-NEXT:    store i32 [[TMP0]], ptr [[ARG_CASTED]], align 4
 // CHECK3-NEXT:    [[TMP1:%.*]] = load i64, ptr [[ARG_CASTED]], align 8
@@ -1740,7 +1742,6 @@ int main() {
 // CHECK3-NEXT:    store ptr null, ptr [[TMP4]], align 8
 // CHECK3-NEXT:    [[TMP5:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK3-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK3-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK3-NEXT:    [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK3-NEXT:    store i32 2, ptr [[TMP7]], align 4
 // CHECK3-NEXT:    [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -1774,7 +1775,6 @@ int main() {
 // CHECK3-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9gtid_testv_l45(i64 [[TMP1]]) #[[ATTR2:[0-9]+]]
 // CHECK3-NEXT:    br label [[OMP_OFFLOAD_CONT]]
 // CHECK3:       omp_offload.cont:
-// CHECK3-NEXT:    [[KERNEL_ARGS2:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK3-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS2]], i32 0, i32 0
 // CHECK3-NEXT:    store i32 2, ptr [[TMP22]], align 4
 // CHECK3-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS2]], i32 0, i32 1
@@ -2139,14 +2139,15 @@ int main() {
 // CHECK3-NEXT:  entry:
 // CHECK3-NEXT:    [[RETVAL:%.*]] = alloca i32, align 4
 // CHECK3-NEXT:    [[TMP:%.*]] = alloca i32, align 4
+// CHECK3-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK3-NEXT:    [[DOTCAPTURE_EXPR_:%.*]] = alloca i8, align 1
 // CHECK3-NEXT:    [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8
 // CHECK3-NEXT:    [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [1 x ptr], align 8
 // CHECK3-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [1 x ptr], align 8
 // CHECK3-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [1 x ptr], align 8
 // CHECK3-NEXT:    [[_TMP5:%.*]] = alloca i32, align 4
+// CHECK3-NEXT:    [[KERNEL_ARGS6:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK3-NEXT:    store i32 0, ptr [[RETVAL]], align 4
-// CHECK3-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK3-NEXT:    [[TMP0:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK3-NEXT:    store i32 2, ptr [[TMP0]], align 4
 // CHECK3-NEXT:    [[TMP1:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -2206,7 +2207,6 @@ int main() {
 // CHECK3-NEXT:    [[TOBOOL4:%.*]] = trunc i8 [[TMP24]] to i1
 // CHECK3-NEXT:    [[TMP25:%.*]] = select i1 [[TOBOOL4]], i32 0, i32 1
 // CHECK3-NEXT:    [[TMP26:%.*]] = insertvalue [3 x i32] zeroinitializer, i32 [[TMP25]], 0
-// CHECK3-NEXT:    [[KERNEL_ARGS6:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK3-NEXT:    [[TMP27:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 0
 // CHECK3-NEXT:    store i32 2, ptr [[TMP27]], align 4
 // CHECK3-NEXT:    [[TMP28:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 1
@@ -2970,14 +2970,15 @@ int main() {
 // CHECK3-NEXT:  entry:
 // CHECK3-NEXT:    [[ARG_ADDR:%.*]] = alloca i32, align 4
 // CHECK3-NEXT:    [[TMP:%.*]] = alloca i32, align 4
+// CHECK3-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK3-NEXT:    [[DOTCAPTURE_EXPR_:%.*]] = alloca i8, align 1
 // CHECK3-NEXT:    [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8
 // CHECK3-NEXT:    [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [1 x ptr], align 8
 // CHECK3-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [1 x ptr], align 8
 // CHECK3-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [1 x ptr], align 8
 // CHECK3-NEXT:    [[_TMP4:%.*]] = alloca i32, align 4
+// CHECK3-NEXT:    [[KERNEL_ARGS5:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK3-NEXT:    store i32 [[ARG]], ptr [[ARG_ADDR]], align 4
-// CHECK3-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK3-NEXT:    [[TMP0:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK3-NEXT:    store i32 2, ptr [[TMP0]], align 4
 // CHECK3-NEXT:    [[TMP1:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -3033,7 +3034,6 @@ int main() {
 // CHECK3-NEXT:    [[TOBOOL3:%.*]] = trunc i8 [[TMP23]] to i1
 // CHECK3-NEXT:    [[TMP24:%.*]] = select i1 [[TOBOOL3]], i32 0, i32 1
 // CHECK3-NEXT:    [[TMP25:%.*]] = insertvalue [3 x i32] zeroinitializer, i32 [[TMP24]], 0
-// CHECK3-NEXT:    [[KERNEL_ARGS5:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK3-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 0
 // CHECK3-NEXT:    store i32 2, ptr [[TMP26]], align 4
 // CHECK3-NEXT:    [[TMP27:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 1
@@ -4163,7 +4163,9 @@ int main() {
 // CHECK9-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [1 x ptr], align 8
 // CHECK9-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [1 x ptr], align 8
 // CHECK9-NEXT:    [[TMP:%.*]] = alloca i32, align 4
+// CHECK9-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK9-NEXT:    [[_TMP1:%.*]] = alloca i32, align 4
+// CHECK9-NEXT:    [[KERNEL_ARGS2:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK9-NEXT:    [[TMP0:%.*]] = load i32, ptr @Arg, align 4
 // CHECK9-NEXT:    store i32 [[TMP0]], ptr [[ARG_CASTED]], align 4
 // CHECK9-NEXT:    [[TMP1:%.*]] = load i64, ptr [[ARG_CASTED]], align 8
@@ -4175,7 +4177,6 @@ int main() {
 // CHECK9-NEXT:    store ptr null, ptr [[TMP4]], align 8
 // CHECK9-NEXT:    [[TMP5:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK9-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK9-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK9-NEXT:    [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK9-NEXT:    store i32 2, ptr [[TMP7]], align 4
 // CHECK9-NEXT:    [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -4209,7 +4210,6 @@ int main() {
 // CHECK9-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9gtid_testv_l47(i64 [[TMP1]]) #[[ATTR2:[0-9]+]]
 // CHECK9-NEXT:    br label [[OMP_OFFLOAD_CONT]]
 // CHECK9:       omp_offload.cont:
-// CHECK9-NEXT:    [[KERNEL_ARGS2:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK9-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS2]], i32 0, i32 0
 // CHECK9-NEXT:    store i32 2, ptr [[TMP22]], align 4
 // CHECK9-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS2]], i32 0, i32 1
@@ -4574,14 +4574,15 @@ int main() {
 // CHECK9-NEXT:  entry:
 // CHECK9-NEXT:    [[RETVAL:%.*]] = alloca i32, align 4
 // CHECK9-NEXT:    [[TMP:%.*]] = alloca i32, align 4
+// CHECK9-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK9-NEXT:    [[DOTCAPTURE_EXPR_:%.*]] = alloca i8, align 1
 // CHECK9-NEXT:    [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8
 // CHECK9-NEXT:    [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [1 x ptr], align 8
 // CHECK9-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [1 x ptr], align 8
 // CHECK9-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [1 x ptr], align 8
 // CHECK9-NEXT:    [[_TMP5:%.*]] = alloca i32, align 4
+// CHECK9-NEXT:    [[KERNEL_ARGS6:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK9-NEXT:    store i32 0, ptr [[RETVAL]], align 4
-// CHECK9-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK9-NEXT:    [[TMP0:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK9-NEXT:    store i32 2, ptr [[TMP0]], align 4
 // CHECK9-NEXT:    [[TMP1:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -4641,7 +4642,6 @@ int main() {
 // CHECK9-NEXT:    [[TOBOOL4:%.*]] = trunc i8 [[TMP24]] to i1
 // CHECK9-NEXT:    [[TMP25:%.*]] = select i1 [[TOBOOL4]], i32 0, i32 1
 // CHECK9-NEXT:    [[TMP26:%.*]] = insertvalue [3 x i32] zeroinitializer, i32 [[TMP25]], 0
-// CHECK9-NEXT:    [[KERNEL_ARGS6:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK9-NEXT:    [[TMP27:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 0
 // CHECK9-NEXT:    store i32 2, ptr [[TMP27]], align 4
 // CHECK9-NEXT:    [[TMP28:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 1
@@ -5175,14 +5175,15 @@ int main() {
 // CHECK9-NEXT:  entry:
 // CHECK9-NEXT:    [[ARG_ADDR:%.*]] = alloca i32, align 4
 // CHECK9-NEXT:    [[TMP:%.*]] = alloca i32, align 4
+// CHECK9-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK9-NEXT:    [[DOTCAPTURE_EXPR_:%.*]] = alloca i8, align 1
 // CHECK9-NEXT:    [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8
 // CHECK9-NEXT:    [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [1 x ptr], align 8
 // CHECK9-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [1 x ptr], align 8
 // CHECK9-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [1 x ptr], align 8
 // CHECK9-NEXT:    [[_TMP4:%.*]] = alloca i32, align 4
+// CHECK9-NEXT:    [[KERNEL_ARGS5:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK9-NEXT:    store i32 [[ARG]], ptr [[ARG_ADDR]], align 4
-// CHECK9-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK9-NEXT:    [[TMP0:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK9-NEXT:    store i32 2, ptr [[TMP0]], align 4
 // CHECK9-NEXT:    [[TMP1:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -5238,7 +5239,6 @@ int main() {
 // CHECK9-NEXT:    [[TOBOOL3:%.*]] = trunc i8 [[TMP23]] to i1
 // CHECK9-NEXT:    [[TMP24:%.*]] = select i1 [[TOBOOL3]], i32 0, i32 1
 // CHECK9-NEXT:    [[TMP25:%.*]] = insertvalue [3 x i32] zeroinitializer, i32 [[TMP24]], 0
-// CHECK9-NEXT:    [[KERNEL_ARGS5:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK9-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 0
 // CHECK9-NEXT:    store i32 2, ptr [[TMP26]], align 4
 // CHECK9-NEXT:    [[TMP27:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 1
@@ -5775,7 +5775,9 @@ int main() {
 // CHECK11-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [1 x ptr], align 8
 // CHECK11-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [1 x ptr], align 8
 // CHECK11-NEXT:    [[TMP:%.*]] = alloca i32, align 4
+// CHECK11-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK11-NEXT:    [[_TMP1:%.*]] = alloca i32, align 4
+// CHECK11-NEXT:    [[KERNEL_ARGS2:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK11-NEXT:    [[TMP0:%.*]] = load i32, ptr @Arg, align 4
 // CHECK11-NEXT:    store i32 [[TMP0]], ptr [[ARG_CASTED]], align 4
 // CHECK11-NEXT:    [[TMP1:%.*]] = load i64, ptr [[ARG_CASTED]], align 8
@@ -5787,7 +5789,6 @@ int main() {
 // CHECK11-NEXT:    store ptr null, ptr [[TMP4]], align 8
 // CHECK11-NEXT:    [[TMP5:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK11-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK11-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK11-NEXT:    [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK11-NEXT:    store i32 2, ptr [[TMP7]], align 4
 // CHECK11-NEXT:    [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -5821,7 +5822,6 @@ int main() {
 // CHECK11-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9gtid_testv_l45(i64 [[TMP1]]) #[[ATTR2:[0-9]+]]
 // CHECK11-NEXT:    br label [[OMP_OFFLOAD_CONT]]
 // CHECK11:       omp_offload.cont:
-// CHECK11-NEXT:    [[KERNEL_ARGS2:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK11-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS2]], i32 0, i32 0
 // CHECK11-NEXT:    store i32 2, ptr [[TMP22]], align 4
 // CHECK11-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS2]], i32 0, i32 1
@@ -6186,14 +6186,15 @@ int main() {
 // CHECK11-NEXT:  entry:
 // CHECK11-NEXT:    [[RETVAL:%.*]] = alloca i32, align 4
 // CHECK11-NEXT:    [[TMP:%.*]] = alloca i32, align 4
+// CHECK11-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK11-NEXT:    [[DOTCAPTURE_EXPR_:%.*]] = alloca i8, align 1
 // CHECK11-NEXT:    [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8
 // CHECK11-NEXT:    [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [1 x ptr], align 8
 // CHECK11-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [1 x ptr], align 8
 // CHECK11-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [1 x ptr], align 8
 // CHECK11-NEXT:    [[_TMP5:%.*]] = alloca i32, align 4
+// CHECK11-NEXT:    [[KERNEL_ARGS6:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK11-NEXT:    store i32 0, ptr [[RETVAL]], align 4
-// CHECK11-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK11-NEXT:    [[TMP0:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK11-NEXT:    store i32 2, ptr [[TMP0]], align 4
 // CHECK11-NEXT:    [[TMP1:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -6253,7 +6254,6 @@ int main() {
 // CHECK11-NEXT:    [[TOBOOL4:%.*]] = trunc i8 [[TMP24]] to i1
 // CHECK11-NEXT:    [[TMP25:%.*]] = select i1 [[TOBOOL4]], i32 0, i32 1
 // CHECK11-NEXT:    [[TMP26:%.*]] = insertvalue [3 x i32] zeroinitializer, i32 [[TMP25]], 0
-// CHECK11-NEXT:    [[KERNEL_ARGS6:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK11-NEXT:    [[TMP27:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 0
 // CHECK11-NEXT:    store i32 2, ptr [[TMP27]], align 4
 // CHECK11-NEXT:    [[TMP28:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 1
@@ -7017,14 +7017,15 @@ int main() {
 // CHECK11-NEXT:  entry:
 // CHECK11-NEXT:    [[ARG_ADDR:%.*]] = alloca i32, align 4
 // CHECK11-NEXT:    [[TMP:%.*]] = alloca i32, align 4
+// CHECK11-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK11-NEXT:    [[DOTCAPTURE_EXPR_:%.*]] = alloca i8, align 1
 // CHECK11-NEXT:    [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8
 // CHECK11-NEXT:    [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [1 x ptr], align 8
 // CHECK11-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [1 x ptr], align 8
 // CHECK11-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [1 x ptr], align 8
 // CHECK11-NEXT:    [[_TMP4:%.*]] = alloca i32, align 4
+// CHECK11-NEXT:    [[KERNEL_ARGS5:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK11-NEXT:    store i32 [[ARG]], ptr [[ARG_ADDR]], align 4
-// CHECK11-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK11-NEXT:    [[TMP0:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK11-NEXT:    store i32 2, ptr [[TMP0]], align 4
 // CHECK11-NEXT:    [[TMP1:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -7080,7 +7081,6 @@ int main() {
 // CHECK11-NEXT:    [[TOBOOL3:%.*]] = trunc i8 [[TMP23]] to i1
 // CHECK11-NEXT:    [[TMP24:%.*]] = select i1 [[TOBOOL3]], i32 0, i32 1
 // CHECK11-NEXT:    [[TMP25:%.*]] = insertvalue [3 x i32] zeroinitializer, i32 [[TMP24]], 0
-// CHECK11-NEXT:    [[KERNEL_ARGS5:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK11-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 0
 // CHECK11-NEXT:    store i32 2, ptr [[TMP26]], align 4
 // CHECK11-NEXT:    [[TMP27:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 1

diff  --git a/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_lastprivate_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_lastprivate_codegen.cpp
index ea51af89e47f5..7272323ba0a1f 100644
--- a/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_lastprivate_codegen.cpp
+++ b/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_lastprivate_codegen.cpp
@@ -771,6 +771,7 @@ int main() {
 // CHECK5-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [5 x ptr], align 8
 // CHECK5-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [5 x ptr], align 8
 // CHECK5-NEXT:    [[_TMP1:%.*]] = alloca i32, align 4
+// CHECK5-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK5-NEXT:    [[I:%.*]] = alloca i32, align 4
 // CHECK5-NEXT:    store i32 0, ptr [[RETVAL]], align 4
 // CHECK5-NEXT:    store ptr [[G]], ptr [[G1]], align 8
@@ -823,7 +824,6 @@ int main() {
 // CHECK5-NEXT:    store ptr null, ptr [[TMP20]], align 8
 // CHECK5-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK5-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK5-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK5-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK5-NEXT:    store i32 2, ptr [[TMP23]], align 4
 // CHECK5-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -1266,6 +1266,7 @@ int main() {
 // CHECK5-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [4 x ptr], align 8
 // CHECK5-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [4 x ptr], align 8
 // CHECK5-NEXT:    [[_TMP1:%.*]] = alloca i32, align 4
+// CHECK5-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK5-NEXT:    call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[TEST]])
 // CHECK5-NEXT:    store i32 0, ptr [[T_VAR]], align 4
 // CHECK5-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VEC]], ptr align 4 @__const._Z5tmainIiET_v.vec, i64 8, i1 false)
@@ -1306,7 +1307,6 @@ int main() {
 // CHECK5-NEXT:    store ptr null, ptr [[TMP15]], align 8
 // CHECK5-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK5-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK5-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK5-NEXT:    [[TMP18:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK5-NEXT:    store i32 2, ptr [[TMP18]], align 4
 // CHECK5-NEXT:    [[TMP19:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -1806,6 +1806,7 @@ int main() {
 // CHECK7-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [5 x ptr], align 4
 // CHECK7-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [5 x ptr], align 4
 // CHECK7-NEXT:    [[_TMP1:%.*]] = alloca i32, align 4
+// CHECK7-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK7-NEXT:    [[I:%.*]] = alloca i32, align 4
 // CHECK7-NEXT:    store i32 0, ptr [[RETVAL]], align 4
 // CHECK7-NEXT:    store ptr [[G]], ptr [[G1]], align 4
@@ -1858,7 +1859,6 @@ int main() {
 // CHECK7-NEXT:    store ptr null, ptr [[TMP20]], align 4
 // CHECK7-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK7-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK7-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK7-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK7-NEXT:    store i32 2, ptr [[TMP23]], align 4
 // CHECK7-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -2295,6 +2295,7 @@ int main() {
 // CHECK7-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [4 x ptr], align 4
 // CHECK7-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [4 x ptr], align 4
 // CHECK7-NEXT:    [[_TMP1:%.*]] = alloca i32, align 4
+// CHECK7-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK7-NEXT:    call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[TEST]])
 // CHECK7-NEXT:    store i32 0, ptr [[T_VAR]], align 4
 // CHECK7-NEXT:    call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[VEC]], ptr align 4 @__const._Z5tmainIiET_v.vec, i32 8, i1 false)
@@ -2335,7 +2336,6 @@ int main() {
 // CHECK7-NEXT:    store ptr null, ptr [[TMP15]], align 4
 // CHECK7-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK7-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK7-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK7-NEXT:    [[TMP18:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK7-NEXT:    store i32 2, ptr [[TMP18]], align 4
 // CHECK7-NEXT:    [[TMP19:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1

diff  --git a/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_private_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_private_codegen.cpp
index a7d9e847e2f67..5ed05c5d6243a 100644
--- a/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_private_codegen.cpp
+++ b/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_private_codegen.cpp
@@ -298,8 +298,8 @@ int main() {
 // CHECK1-NEXT:  entry:
 // CHECK1-NEXT:    [[RETVAL:%.*]] = alloca i32, align 4
 // CHECK1-NEXT:    [[TMP:%.*]] = alloca i32, align 4
-// CHECK1-NEXT:    store i32 0, ptr [[RETVAL]], align 4
 // CHECK1-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
+// CHECK1-NEXT:    store i32 0, ptr [[RETVAL]], align 4
 // CHECK1-NEXT:    [[TMP0:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK1-NEXT:    store i32 2, ptr [[TMP0]], align 4
 // CHECK1-NEXT:    [[TMP1:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -574,6 +574,7 @@ int main() {
 // CHECK1-NEXT:    [[VAR:%.*]] = alloca ptr, align 8
 // CHECK1-NEXT:    [[TMP:%.*]] = alloca i32, align 4
 // CHECK1-NEXT:    [[_TMP1:%.*]] = alloca ptr, align 8
+// CHECK1-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK1-NEXT:    call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[TEST]])
 // CHECK1-NEXT:    store i32 0, ptr [[T_VAR]], align 4
 // CHECK1-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VEC]], ptr align 4 @__const._Z5tmainIiET_v.vec, i64 8, i1 false)
@@ -583,7 +584,6 @@ int main() {
 // CHECK1-NEXT:    call void @_ZN1SIiEC1Ei(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_ELEMENT]], i32 noundef signext 2)
 // CHECK1-NEXT:    store ptr [[TEST]], ptr [[VAR]], align 8
 // CHECK1-NEXT:    store ptr undef, ptr [[_TMP1]], align 8
-// CHECK1-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK1-NEXT:    [[TMP0:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK1-NEXT:    store i32 2, ptr [[TMP0]], align 4
 // CHECK1-NEXT:    [[TMP1:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -1066,8 +1066,8 @@ int main() {
 // CHECK3-NEXT:  entry:
 // CHECK3-NEXT:    [[RETVAL:%.*]] = alloca i32, align 4
 // CHECK3-NEXT:    [[TMP:%.*]] = alloca i32, align 4
-// CHECK3-NEXT:    store i32 0, ptr [[RETVAL]], align 4
 // CHECK3-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
+// CHECK3-NEXT:    store i32 0, ptr [[RETVAL]], align 4
 // CHECK3-NEXT:    [[TMP0:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK3-NEXT:    store i32 2, ptr [[TMP0]], align 4
 // CHECK3-NEXT:    [[TMP1:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -1336,6 +1336,7 @@ int main() {
 // CHECK3-NEXT:    [[VAR:%.*]] = alloca ptr, align 4
 // CHECK3-NEXT:    [[TMP:%.*]] = alloca i32, align 4
 // CHECK3-NEXT:    [[_TMP1:%.*]] = alloca ptr, align 4
+// CHECK3-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK3-NEXT:    call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[TEST]])
 // CHECK3-NEXT:    store i32 0, ptr [[T_VAR]], align 4
 // CHECK3-NEXT:    call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[VEC]], ptr align 4 @__const._Z5tmainIiET_v.vec, i32 8, i1 false)
@@ -1345,7 +1346,6 @@ int main() {
 // CHECK3-NEXT:    call void @_ZN1SIiEC1Ei(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_ELEMENT]], i32 noundef 2)
 // CHECK3-NEXT:    store ptr [[TEST]], ptr [[VAR]], align 4
 // CHECK3-NEXT:    store ptr undef, ptr [[_TMP1]], align 4
-// CHECK3-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK3-NEXT:    [[TMP0:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK3-NEXT:    store i32 2, ptr [[TMP0]], align 4
 // CHECK3-NEXT:    [[TMP1:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1

diff  --git a/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_proc_bind_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_proc_bind_codegen.cpp
index a953d41542673..35787a84ce96b 100644
--- a/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_proc_bind_codegen.cpp
+++ b/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_proc_bind_codegen.cpp
@@ -53,9 +53,10 @@ int main() {
 // CHECK1-NEXT:  entry:
 // CHECK1-NEXT:    [[RETVAL:%.*]] = alloca i32, align 4
 // CHECK1-NEXT:    [[TMP:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK1-NEXT:    [[_TMP1:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[KERNEL_ARGS2:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK1-NEXT:    store i32 0, ptr [[RETVAL]], align 4
-// CHECK1-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK1-NEXT:    [[TMP0:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK1-NEXT:    store i32 2, ptr [[TMP0]], align 4
 // CHECK1-NEXT:    [[TMP1:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -89,7 +90,6 @@ int main() {
 // CHECK1-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l36() #[[ATTR2:[0-9]+]]
 // CHECK1-NEXT:    br label [[OMP_OFFLOAD_CONT]]
 // CHECK1:       omp_offload.cont:
-// CHECK1-NEXT:    [[KERNEL_ARGS2:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK1-NEXT:    [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS2]], i32 0, i32 0
 // CHECK1-NEXT:    store i32 2, ptr [[TMP15]], align 4
 // CHECK1-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS2]], i32 0, i32 1

diff  --git a/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_reduction_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_reduction_codegen.cpp
index 652e0ce08a90b..f6f12b957a8cc 100644
--- a/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_reduction_codegen.cpp
+++ b/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_reduction_codegen.cpp
@@ -98,6 +98,7 @@ int main() {
 // CHECK1-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [1 x ptr], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [1 x ptr], align 8
 // CHECK1-NEXT:    [[TMP:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK1-NEXT:    store i32 0, ptr [[RETVAL]], align 4
 // CHECK1-NEXT:    [[TMP0:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK1-NEXT:    store ptr @_ZZ4mainE5sivar, ptr [[TMP0]], align 8
@@ -107,7 +108,6 @@ int main() {
 // CHECK1-NEXT:    store ptr null, ptr [[TMP2]], align 8
 // CHECK1-NEXT:    [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK1-NEXT:    [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK1-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK1-NEXT:    [[TMP5:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK1-NEXT:    store i32 2, ptr [[TMP5]], align 4
 // CHECK1-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -402,6 +402,7 @@ int main() {
 // CHECK1-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [1 x ptr], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [1 x ptr], align 8
 // CHECK1-NEXT:    [[TMP:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK1-NEXT:    store i32 0, ptr [[T_VAR]], align 4
 // CHECK1-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VEC]], ptr align 4 @__const._Z5tmainIiET_v.vec, i64 8, i1 false)
 // CHECK1-NEXT:    [[TMP0:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
@@ -412,7 +413,6 @@ int main() {
 // CHECK1-NEXT:    store ptr null, ptr [[TMP2]], align 8
 // CHECK1-NEXT:    [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK1-NEXT:    [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK1-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK1-NEXT:    [[TMP5:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK1-NEXT:    store i32 2, ptr [[TMP5]], align 4
 // CHECK1-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -712,6 +712,7 @@ int main() {
 // CHECK3-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [1 x ptr], align 4
 // CHECK3-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [1 x ptr], align 4
 // CHECK3-NEXT:    [[TMP:%.*]] = alloca i32, align 4
+// CHECK3-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK3-NEXT:    store i32 0, ptr [[RETVAL]], align 4
 // CHECK3-NEXT:    [[TMP0:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK3-NEXT:    store ptr @_ZZ4mainE5sivar, ptr [[TMP0]], align 4
@@ -721,7 +722,6 @@ int main() {
 // CHECK3-NEXT:    store ptr null, ptr [[TMP2]], align 4
 // CHECK3-NEXT:    [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK3-NEXT:    [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK3-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK3-NEXT:    [[TMP5:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK3-NEXT:    store i32 2, ptr [[TMP5]], align 4
 // CHECK3-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -1012,6 +1012,7 @@ int main() {
 // CHECK3-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [1 x ptr], align 4
 // CHECK3-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [1 x ptr], align 4
 // CHECK3-NEXT:    [[TMP:%.*]] = alloca i32, align 4
+// CHECK3-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK3-NEXT:    store i32 0, ptr [[T_VAR]], align 4
 // CHECK3-NEXT:    call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[VEC]], ptr align 4 @__const._Z5tmainIiET_v.vec, i32 8, i1 false)
 // CHECK3-NEXT:    [[TMP0:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
@@ -1022,7 +1023,6 @@ int main() {
 // CHECK3-NEXT:    store ptr null, ptr [[TMP2]], align 4
 // CHECK3-NEXT:    [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK3-NEXT:    [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK3-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK3-NEXT:    [[TMP5:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK3-NEXT:    store i32 2, ptr [[TMP5]], align 4
 // CHECK3-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1

diff  --git a/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_schedule_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_schedule_codegen.cpp
index c4f504bf066a7..c1ce2cd270f69 100644
--- a/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_schedule_codegen.cpp
+++ b/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_schedule_codegen.cpp
@@ -210,22 +210,27 @@ int main (int argc, char **argv) {
 // CHECK1-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [1 x ptr], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [1 x ptr], align 8
 // CHECK1-NEXT:    [[TMP:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_BASEPTRS3:%.*]] = alloca [1 x ptr], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_PTRS4:%.*]] = alloca [1 x ptr], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_MAPPERS5:%.*]] = alloca [1 x ptr], align 8
 // CHECK1-NEXT:    [[_TMP6:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[KERNEL_ARGS7:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_BASEPTRS11:%.*]] = alloca [1 x ptr], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_PTRS12:%.*]] = alloca [1 x ptr], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_MAPPERS13:%.*]] = alloca [1 x ptr], align 8
 // CHECK1-NEXT:    [[_TMP14:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[KERNEL_ARGS15:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_BASEPTRS19:%.*]] = alloca [1 x ptr], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_PTRS20:%.*]] = alloca [1 x ptr], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_MAPPERS21:%.*]] = alloca [1 x ptr], align 8
 // CHECK1-NEXT:    [[_TMP22:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[KERNEL_ARGS23:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_BASEPTRS27:%.*]] = alloca [1 x ptr], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_PTRS28:%.*]] = alloca [1 x ptr], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_MAPPERS29:%.*]] = alloca [1 x ptr], align 8
 // CHECK1-NEXT:    [[_TMP30:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[KERNEL_ARGS31:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK1-NEXT:    store ptr [[THIS]], ptr [[THIS_ADDR]], align 8
 // CHECK1-NEXT:    [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8
 // CHECK1-NEXT:    [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[THIS1]], i32 0, i32 0
@@ -237,7 +242,6 @@ int main (int argc, char **argv) {
 // CHECK1-NEXT:    store ptr null, ptr [[TMP2]], align 8
 // CHECK1-NEXT:    [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK1-NEXT:    [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK1-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK1-NEXT:    [[TMP5:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK1-NEXT:    store i32 2, ptr [[TMP5]], align 4
 // CHECK1-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -280,7 +284,6 @@ int main (int argc, char **argv) {
 // CHECK1-NEXT:    store ptr null, ptr [[TMP22]], align 8
 // CHECK1-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS3]], i32 0, i32 0
 // CHECK1-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS4]], i32 0, i32 0
-// CHECK1-NEXT:    [[KERNEL_ARGS7:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK1-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS7]], i32 0, i32 0
 // CHECK1-NEXT:    store i32 2, ptr [[TMP25]], align 4
 // CHECK1-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS7]], i32 0, i32 1
@@ -323,7 +326,6 @@ int main (int argc, char **argv) {
 // CHECK1-NEXT:    store ptr null, ptr [[TMP42]], align 8
 // CHECK1-NEXT:    [[TMP43:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS11]], i32 0, i32 0
 // CHECK1-NEXT:    [[TMP44:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS12]], i32 0, i32 0
-// CHECK1-NEXT:    [[KERNEL_ARGS15:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK1-NEXT:    [[TMP45:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS15]], i32 0, i32 0
 // CHECK1-NEXT:    store i32 2, ptr [[TMP45]], align 4
 // CHECK1-NEXT:    [[TMP46:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS15]], i32 0, i32 1
@@ -366,7 +368,6 @@ int main (int argc, char **argv) {
 // CHECK1-NEXT:    store ptr null, ptr [[TMP62]], align 8
 // CHECK1-NEXT:    [[TMP63:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS19]], i32 0, i32 0
 // CHECK1-NEXT:    [[TMP64:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS20]], i32 0, i32 0
-// CHECK1-NEXT:    [[KERNEL_ARGS23:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK1-NEXT:    [[TMP65:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS23]], i32 0, i32 0
 // CHECK1-NEXT:    store i32 2, ptr [[TMP65]], align 4
 // CHECK1-NEXT:    [[TMP66:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS23]], i32 0, i32 1
@@ -409,7 +410,6 @@ int main (int argc, char **argv) {
 // CHECK1-NEXT:    store ptr null, ptr [[TMP82]], align 8
 // CHECK1-NEXT:    [[TMP83:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS27]], i32 0, i32 0
 // CHECK1-NEXT:    [[TMP84:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS28]], i32 0, i32 0
-// CHECK1-NEXT:    [[KERNEL_ARGS31:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK1-NEXT:    [[TMP85:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS31]], i32 0, i32 0
 // CHECK1-NEXT:    store i32 2, ptr [[TMP85]], align 4
 // CHECK1-NEXT:    [[TMP86:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS31]], i32 0, i32 1
@@ -1314,22 +1314,27 @@ int main (int argc, char **argv) {
 // CHECK3-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [1 x ptr], align 4
 // CHECK3-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [1 x ptr], align 4
 // CHECK3-NEXT:    [[TMP:%.*]] = alloca i32, align 4
+// CHECK3-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK3-NEXT:    [[DOTOFFLOAD_BASEPTRS3:%.*]] = alloca [1 x ptr], align 4
 // CHECK3-NEXT:    [[DOTOFFLOAD_PTRS4:%.*]] = alloca [1 x ptr], align 4
 // CHECK3-NEXT:    [[DOTOFFLOAD_MAPPERS5:%.*]] = alloca [1 x ptr], align 4
 // CHECK3-NEXT:    [[_TMP6:%.*]] = alloca i32, align 4
+// CHECK3-NEXT:    [[KERNEL_ARGS7:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK3-NEXT:    [[DOTOFFLOAD_BASEPTRS11:%.*]] = alloca [1 x ptr], align 4
 // CHECK3-NEXT:    [[DOTOFFLOAD_PTRS12:%.*]] = alloca [1 x ptr], align 4
 // CHECK3-NEXT:    [[DOTOFFLOAD_MAPPERS13:%.*]] = alloca [1 x ptr], align 4
 // CHECK3-NEXT:    [[_TMP14:%.*]] = alloca i32, align 4
+// CHECK3-NEXT:    [[KERNEL_ARGS15:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK3-NEXT:    [[DOTOFFLOAD_BASEPTRS19:%.*]] = alloca [1 x ptr], align 4
 // CHECK3-NEXT:    [[DOTOFFLOAD_PTRS20:%.*]] = alloca [1 x ptr], align 4
 // CHECK3-NEXT:    [[DOTOFFLOAD_MAPPERS21:%.*]] = alloca [1 x ptr], align 4
 // CHECK3-NEXT:    [[_TMP22:%.*]] = alloca i32, align 4
+// CHECK3-NEXT:    [[KERNEL_ARGS23:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK3-NEXT:    [[DOTOFFLOAD_BASEPTRS27:%.*]] = alloca [1 x ptr], align 4
 // CHECK3-NEXT:    [[DOTOFFLOAD_PTRS28:%.*]] = alloca [1 x ptr], align 4
 // CHECK3-NEXT:    [[DOTOFFLOAD_MAPPERS29:%.*]] = alloca [1 x ptr], align 4
 // CHECK3-NEXT:    [[_TMP30:%.*]] = alloca i32, align 4
+// CHECK3-NEXT:    [[KERNEL_ARGS31:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK3-NEXT:    store ptr [[THIS]], ptr [[THIS_ADDR]], align 4
 // CHECK3-NEXT:    [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4
 // CHECK3-NEXT:    [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[THIS1]], i32 0, i32 0
@@ -1341,7 +1346,6 @@ int main (int argc, char **argv) {
 // CHECK3-NEXT:    store ptr null, ptr [[TMP2]], align 4
 // CHECK3-NEXT:    [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK3-NEXT:    [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK3-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK3-NEXT:    [[TMP5:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK3-NEXT:    store i32 2, ptr [[TMP5]], align 4
 // CHECK3-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -1384,7 +1388,6 @@ int main (int argc, char **argv) {
 // CHECK3-NEXT:    store ptr null, ptr [[TMP22]], align 4
 // CHECK3-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS3]], i32 0, i32 0
 // CHECK3-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS4]], i32 0, i32 0
-// CHECK3-NEXT:    [[KERNEL_ARGS7:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK3-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS7]], i32 0, i32 0
 // CHECK3-NEXT:    store i32 2, ptr [[TMP25]], align 4
 // CHECK3-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS7]], i32 0, i32 1
@@ -1427,7 +1430,6 @@ int main (int argc, char **argv) {
 // CHECK3-NEXT:    store ptr null, ptr [[TMP42]], align 4
 // CHECK3-NEXT:    [[TMP43:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS11]], i32 0, i32 0
 // CHECK3-NEXT:    [[TMP44:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS12]], i32 0, i32 0
-// CHECK3-NEXT:    [[KERNEL_ARGS15:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK3-NEXT:    [[TMP45:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS15]], i32 0, i32 0
 // CHECK3-NEXT:    store i32 2, ptr [[TMP45]], align 4
 // CHECK3-NEXT:    [[TMP46:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS15]], i32 0, i32 1
@@ -1470,7 +1472,6 @@ int main (int argc, char **argv) {
 // CHECK3-NEXT:    store ptr null, ptr [[TMP62]], align 4
 // CHECK3-NEXT:    [[TMP63:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS19]], i32 0, i32 0
 // CHECK3-NEXT:    [[TMP64:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS20]], i32 0, i32 0
-// CHECK3-NEXT:    [[KERNEL_ARGS23:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK3-NEXT:    [[TMP65:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS23]], i32 0, i32 0
 // CHECK3-NEXT:    store i32 2, ptr [[TMP65]], align 4
 // CHECK3-NEXT:    [[TMP66:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS23]], i32 0, i32 1
@@ -1513,7 +1514,6 @@ int main (int argc, char **argv) {
 // CHECK3-NEXT:    store ptr null, ptr [[TMP82]], align 4
 // CHECK3-NEXT:    [[TMP83:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS27]], i32 0, i32 0
 // CHECK3-NEXT:    [[TMP84:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS28]], i32 0, i32 0
-// CHECK3-NEXT:    [[KERNEL_ARGS31:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK3-NEXT:    [[TMP85:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS31]], i32 0, i32 0
 // CHECK3-NEXT:    store i32 2, ptr [[TMP85]], align 4
 // CHECK3-NEXT:    [[TMP86:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS31]], i32 0, i32 1
@@ -2391,22 +2391,27 @@ int main (int argc, char **argv) {
 // CHECK5-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [1 x ptr], align 8
 // CHECK5-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [1 x ptr], align 8
 // CHECK5-NEXT:    [[TMP:%.*]] = alloca i32, align 4
+// CHECK5-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK5-NEXT:    [[DOTOFFLOAD_BASEPTRS3:%.*]] = alloca [1 x ptr], align 8
 // CHECK5-NEXT:    [[DOTOFFLOAD_PTRS4:%.*]] = alloca [1 x ptr], align 8
 // CHECK5-NEXT:    [[DOTOFFLOAD_MAPPERS5:%.*]] = alloca [1 x ptr], align 8
 // CHECK5-NEXT:    [[_TMP6:%.*]] = alloca i32, align 4
+// CHECK5-NEXT:    [[KERNEL_ARGS7:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK5-NEXT:    [[DOTOFFLOAD_BASEPTRS11:%.*]] = alloca [1 x ptr], align 8
 // CHECK5-NEXT:    [[DOTOFFLOAD_PTRS12:%.*]] = alloca [1 x ptr], align 8
 // CHECK5-NEXT:    [[DOTOFFLOAD_MAPPERS13:%.*]] = alloca [1 x ptr], align 8
 // CHECK5-NEXT:    [[_TMP14:%.*]] = alloca i32, align 4
+// CHECK5-NEXT:    [[KERNEL_ARGS15:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK5-NEXT:    [[DOTOFFLOAD_BASEPTRS19:%.*]] = alloca [1 x ptr], align 8
 // CHECK5-NEXT:    [[DOTOFFLOAD_PTRS20:%.*]] = alloca [1 x ptr], align 8
 // CHECK5-NEXT:    [[DOTOFFLOAD_MAPPERS21:%.*]] = alloca [1 x ptr], align 8
 // CHECK5-NEXT:    [[_TMP22:%.*]] = alloca i32, align 4
+// CHECK5-NEXT:    [[KERNEL_ARGS23:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK5-NEXT:    [[DOTOFFLOAD_BASEPTRS27:%.*]] = alloca [1 x ptr], align 8
 // CHECK5-NEXT:    [[DOTOFFLOAD_PTRS28:%.*]] = alloca [1 x ptr], align 8
 // CHECK5-NEXT:    [[DOTOFFLOAD_MAPPERS29:%.*]] = alloca [1 x ptr], align 8
 // CHECK5-NEXT:    [[_TMP30:%.*]] = alloca i32, align 4
+// CHECK5-NEXT:    [[KERNEL_ARGS31:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK5-NEXT:    store ptr [[THIS]], ptr [[THIS_ADDR]], align 8
 // CHECK5-NEXT:    [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8
 // CHECK5-NEXT:    [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[THIS1]], i32 0, i32 0
@@ -2418,7 +2423,6 @@ int main (int argc, char **argv) {
 // CHECK5-NEXT:    store ptr null, ptr [[TMP2]], align 8
 // CHECK5-NEXT:    [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK5-NEXT:    [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK5-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK5-NEXT:    [[TMP5:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK5-NEXT:    store i32 2, ptr [[TMP5]], align 4
 // CHECK5-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -2461,7 +2465,6 @@ int main (int argc, char **argv) {
 // CHECK5-NEXT:    store ptr null, ptr [[TMP22]], align 8
 // CHECK5-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS3]], i32 0, i32 0
 // CHECK5-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS4]], i32 0, i32 0
-// CHECK5-NEXT:    [[KERNEL_ARGS7:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK5-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS7]], i32 0, i32 0
 // CHECK5-NEXT:    store i32 2, ptr [[TMP25]], align 4
 // CHECK5-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS7]], i32 0, i32 1
@@ -2504,7 +2507,6 @@ int main (int argc, char **argv) {
 // CHECK5-NEXT:    store ptr null, ptr [[TMP42]], align 8
 // CHECK5-NEXT:    [[TMP43:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS11]], i32 0, i32 0
 // CHECK5-NEXT:    [[TMP44:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS12]], i32 0, i32 0
-// CHECK5-NEXT:    [[KERNEL_ARGS15:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK5-NEXT:    [[TMP45:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS15]], i32 0, i32 0
 // CHECK5-NEXT:    store i32 2, ptr [[TMP45]], align 4
 // CHECK5-NEXT:    [[TMP46:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS15]], i32 0, i32 1
@@ -2547,7 +2549,6 @@ int main (int argc, char **argv) {
 // CHECK5-NEXT:    store ptr null, ptr [[TMP62]], align 8
 // CHECK5-NEXT:    [[TMP63:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS19]], i32 0, i32 0
 // CHECK5-NEXT:    [[TMP64:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS20]], i32 0, i32 0
-// CHECK5-NEXT:    [[KERNEL_ARGS23:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK5-NEXT:    [[TMP65:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS23]], i32 0, i32 0
 // CHECK5-NEXT:    store i32 2, ptr [[TMP65]], align 4
 // CHECK5-NEXT:    [[TMP66:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS23]], i32 0, i32 1
@@ -2590,7 +2591,6 @@ int main (int argc, char **argv) {
 // CHECK5-NEXT:    store ptr null, ptr [[TMP82]], align 8
 // CHECK5-NEXT:    [[TMP83:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS27]], i32 0, i32 0
 // CHECK5-NEXT:    [[TMP84:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS28]], i32 0, i32 0
-// CHECK5-NEXT:    [[KERNEL_ARGS31:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK5-NEXT:    [[TMP85:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS31]], i32 0, i32 0
 // CHECK5-NEXT:    store i32 2, ptr [[TMP85]], align 4
 // CHECK5-NEXT:    [[TMP86:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS31]], i32 0, i32 1
@@ -3495,22 +3495,27 @@ int main (int argc, char **argv) {
 // CHECK7-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [1 x ptr], align 4
 // CHECK7-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [1 x ptr], align 4
 // CHECK7-NEXT:    [[TMP:%.*]] = alloca i32, align 4
+// CHECK7-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK7-NEXT:    [[DOTOFFLOAD_BASEPTRS3:%.*]] = alloca [1 x ptr], align 4
 // CHECK7-NEXT:    [[DOTOFFLOAD_PTRS4:%.*]] = alloca [1 x ptr], align 4
 // CHECK7-NEXT:    [[DOTOFFLOAD_MAPPERS5:%.*]] = alloca [1 x ptr], align 4
 // CHECK7-NEXT:    [[_TMP6:%.*]] = alloca i32, align 4
+// CHECK7-NEXT:    [[KERNEL_ARGS7:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK7-NEXT:    [[DOTOFFLOAD_BASEPTRS11:%.*]] = alloca [1 x ptr], align 4
 // CHECK7-NEXT:    [[DOTOFFLOAD_PTRS12:%.*]] = alloca [1 x ptr], align 4
 // CHECK7-NEXT:    [[DOTOFFLOAD_MAPPERS13:%.*]] = alloca [1 x ptr], align 4
 // CHECK7-NEXT:    [[_TMP14:%.*]] = alloca i32, align 4
+// CHECK7-NEXT:    [[KERNEL_ARGS15:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK7-NEXT:    [[DOTOFFLOAD_BASEPTRS19:%.*]] = alloca [1 x ptr], align 4
 // CHECK7-NEXT:    [[DOTOFFLOAD_PTRS20:%.*]] = alloca [1 x ptr], align 4
 // CHECK7-NEXT:    [[DOTOFFLOAD_MAPPERS21:%.*]] = alloca [1 x ptr], align 4
 // CHECK7-NEXT:    [[_TMP22:%.*]] = alloca i32, align 4
+// CHECK7-NEXT:    [[KERNEL_ARGS23:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK7-NEXT:    [[DOTOFFLOAD_BASEPTRS27:%.*]] = alloca [1 x ptr], align 4
 // CHECK7-NEXT:    [[DOTOFFLOAD_PTRS28:%.*]] = alloca [1 x ptr], align 4
 // CHECK7-NEXT:    [[DOTOFFLOAD_MAPPERS29:%.*]] = alloca [1 x ptr], align 4
 // CHECK7-NEXT:    [[_TMP30:%.*]] = alloca i32, align 4
+// CHECK7-NEXT:    [[KERNEL_ARGS31:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK7-NEXT:    store ptr [[THIS]], ptr [[THIS_ADDR]], align 4
 // CHECK7-NEXT:    [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4
 // CHECK7-NEXT:    [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[THIS1]], i32 0, i32 0
@@ -3522,7 +3527,6 @@ int main (int argc, char **argv) {
 // CHECK7-NEXT:    store ptr null, ptr [[TMP2]], align 4
 // CHECK7-NEXT:    [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK7-NEXT:    [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK7-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK7-NEXT:    [[TMP5:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK7-NEXT:    store i32 2, ptr [[TMP5]], align 4
 // CHECK7-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -3565,7 +3569,6 @@ int main (int argc, char **argv) {
 // CHECK7-NEXT:    store ptr null, ptr [[TMP22]], align 4
 // CHECK7-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS3]], i32 0, i32 0
 // CHECK7-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS4]], i32 0, i32 0
-// CHECK7-NEXT:    [[KERNEL_ARGS7:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK7-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS7]], i32 0, i32 0
 // CHECK7-NEXT:    store i32 2, ptr [[TMP25]], align 4
 // CHECK7-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS7]], i32 0, i32 1
@@ -3608,7 +3611,6 @@ int main (int argc, char **argv) {
 // CHECK7-NEXT:    store ptr null, ptr [[TMP42]], align 4
 // CHECK7-NEXT:    [[TMP43:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS11]], i32 0, i32 0
 // CHECK7-NEXT:    [[TMP44:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS12]], i32 0, i32 0
-// CHECK7-NEXT:    [[KERNEL_ARGS15:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK7-NEXT:    [[TMP45:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS15]], i32 0, i32 0
 // CHECK7-NEXT:    store i32 2, ptr [[TMP45]], align 4
 // CHECK7-NEXT:    [[TMP46:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS15]], i32 0, i32 1
@@ -3651,7 +3653,6 @@ int main (int argc, char **argv) {
 // CHECK7-NEXT:    store ptr null, ptr [[TMP62]], align 4
 // CHECK7-NEXT:    [[TMP63:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS19]], i32 0, i32 0
 // CHECK7-NEXT:    [[TMP64:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS20]], i32 0, i32 0
-// CHECK7-NEXT:    [[KERNEL_ARGS23:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK7-NEXT:    [[TMP65:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS23]], i32 0, i32 0
 // CHECK7-NEXT:    store i32 2, ptr [[TMP65]], align 4
 // CHECK7-NEXT:    [[TMP66:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS23]], i32 0, i32 1
@@ -3694,7 +3695,6 @@ int main (int argc, char **argv) {
 // CHECK7-NEXT:    store ptr null, ptr [[TMP82]], align 4
 // CHECK7-NEXT:    [[TMP83:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS27]], i32 0, i32 0
 // CHECK7-NEXT:    [[TMP84:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS28]], i32 0, i32 0
-// CHECK7-NEXT:    [[KERNEL_ARGS31:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK7-NEXT:    [[TMP85:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS31]], i32 0, i32 0
 // CHECK7-NEXT:    store i32 2, ptr [[TMP85]], align 4
 // CHECK7-NEXT:    [[TMP86:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS31]], i32 0, i32 1
@@ -4959,6 +4959,7 @@ int main (int argc, char **argv) {
 // CHECK13-NEXT:    [[TMP:%.*]] = alloca i32, align 4
 // CHECK13-NEXT:    [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
 // CHECK13-NEXT:    [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4
+// CHECK13-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK13-NEXT:    [[N_CASTED3:%.*]] = alloca i64, align 8
 // CHECK13-NEXT:    [[DOTOFFLOAD_BASEPTRS4:%.*]] = alloca [3 x ptr], align 8
 // CHECK13-NEXT:    [[DOTOFFLOAD_PTRS5:%.*]] = alloca [3 x ptr], align 8
@@ -4967,6 +4968,7 @@ int main (int argc, char **argv) {
 // CHECK13-NEXT:    [[_TMP8:%.*]] = alloca i32, align 4
 // CHECK13-NEXT:    [[DOTCAPTURE_EXPR_9:%.*]] = alloca i32, align 4
 // CHECK13-NEXT:    [[DOTCAPTURE_EXPR_10:%.*]] = alloca i32, align 4
+// CHECK13-NEXT:    [[KERNEL_ARGS15:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK13-NEXT:    [[DOTCAPTURE_EXPR_18:%.*]] = alloca i32, align 4
 // CHECK13-NEXT:    [[N_CASTED19:%.*]] = alloca i64, align 8
 // CHECK13-NEXT:    [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8
@@ -4977,6 +4979,7 @@ int main (int argc, char **argv) {
 // CHECK13-NEXT:    [[_TMP24:%.*]] = alloca i32, align 4
 // CHECK13-NEXT:    [[DOTCAPTURE_EXPR_25:%.*]] = alloca i32, align 4
 // CHECK13-NEXT:    [[DOTCAPTURE_EXPR_26:%.*]] = alloca i32, align 4
+// CHECK13-NEXT:    [[KERNEL_ARGS31:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK13-NEXT:    [[N_CASTED34:%.*]] = alloca i64, align 8
 // CHECK13-NEXT:    [[DOTOFFLOAD_BASEPTRS35:%.*]] = alloca [3 x ptr], align 8
 // CHECK13-NEXT:    [[DOTOFFLOAD_PTRS36:%.*]] = alloca [3 x ptr], align 8
@@ -4985,6 +4988,7 @@ int main (int argc, char **argv) {
 // CHECK13-NEXT:    [[_TMP39:%.*]] = alloca i32, align 4
 // CHECK13-NEXT:    [[DOTCAPTURE_EXPR_40:%.*]] = alloca i32, align 4
 // CHECK13-NEXT:    [[DOTCAPTURE_EXPR_41:%.*]] = alloca i32, align 4
+// CHECK13-NEXT:    [[KERNEL_ARGS46:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK13-NEXT:    [[DOTCAPTURE_EXPR_49:%.*]] = alloca i32, align 4
 // CHECK13-NEXT:    [[N_CASTED50:%.*]] = alloca i64, align 8
 // CHECK13-NEXT:    [[DOTCAPTURE_EXPR__CASTED51:%.*]] = alloca i64, align 8
@@ -4995,6 +4999,7 @@ int main (int argc, char **argv) {
 // CHECK13-NEXT:    [[_TMP56:%.*]] = alloca i32, align 4
 // CHECK13-NEXT:    [[DOTCAPTURE_EXPR_57:%.*]] = alloca i32, align 4
 // CHECK13-NEXT:    [[DOTCAPTURE_EXPR_58:%.*]] = alloca i32, align 4
+// CHECK13-NEXT:    [[KERNEL_ARGS63:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK13-NEXT:    store i32 0, ptr [[RETVAL]], align 4
 // CHECK13-NEXT:    store i32 [[ARGC]], ptr [[ARGC_ADDR]], align 4
 // CHECK13-NEXT:    store ptr [[ARGV]], ptr [[ARGV_ADDR]], align 8
@@ -5044,7 +5049,6 @@ int main (int argc, char **argv) {
 // CHECK13-NEXT:    [[TMP21:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
 // CHECK13-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP21]], 1
 // CHECK13-NEXT:    [[TMP22:%.*]] = zext i32 [[ADD]] to i64
-// CHECK13-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK13-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK13-NEXT:    store i32 2, ptr [[TMP23]], align 4
 // CHECK13-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -5116,7 +5120,6 @@ int main (int argc, char **argv) {
 // CHECK13-NEXT:    [[TMP56:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_10]], align 4
 // CHECK13-NEXT:    [[ADD14:%.*]] = add nsw i32 [[TMP56]], 1
 // CHECK13-NEXT:    [[TMP57:%.*]] = zext i32 [[ADD14]] to i64
-// CHECK13-NEXT:    [[KERNEL_ARGS15:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK13-NEXT:    [[TMP58:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS15]], i32 0, i32 0
 // CHECK13-NEXT:    store i32 2, ptr [[TMP58]], align 4
 // CHECK13-NEXT:    [[TMP59:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS15]], i32 0, i32 1
@@ -5199,7 +5202,6 @@ int main (int argc, char **argv) {
 // CHECK13-NEXT:    [[TMP97:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_26]], align 4
 // CHECK13-NEXT:    [[ADD30:%.*]] = add nsw i32 [[TMP97]], 1
 // CHECK13-NEXT:    [[TMP98:%.*]] = zext i32 [[ADD30]] to i64
-// CHECK13-NEXT:    [[KERNEL_ARGS31:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK13-NEXT:    [[TMP99:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS31]], i32 0, i32 0
 // CHECK13-NEXT:    store i32 2, ptr [[TMP99]], align 4
 // CHECK13-NEXT:    [[TMP100:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS31]], i32 0, i32 1
@@ -5271,7 +5273,6 @@ int main (int argc, char **argv) {
 // CHECK13-NEXT:    [[TMP132:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_41]], align 4
 // CHECK13-NEXT:    [[ADD45:%.*]] = add nsw i32 [[TMP132]], 1
 // CHECK13-NEXT:    [[TMP133:%.*]] = zext i32 [[ADD45]] to i64
-// CHECK13-NEXT:    [[KERNEL_ARGS46:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK13-NEXT:    [[TMP134:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS46]], i32 0, i32 0
 // CHECK13-NEXT:    store i32 2, ptr [[TMP134]], align 4
 // CHECK13-NEXT:    [[TMP135:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS46]], i32 0, i32 1
@@ -5354,7 +5355,6 @@ int main (int argc, char **argv) {
 // CHECK13-NEXT:    [[TMP173:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_58]], align 4
 // CHECK13-NEXT:    [[ADD62:%.*]] = add nsw i32 [[TMP173]], 1
 // CHECK13-NEXT:    [[TMP174:%.*]] = zext i32 [[ADD62]] to i64
-// CHECK13-NEXT:    [[KERNEL_ARGS63:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK13-NEXT:    [[TMP175:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS63]], i32 0, i32 0
 // CHECK13-NEXT:    store i32 2, ptr [[TMP175]], align 4
 // CHECK13-NEXT:    [[TMP176:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS63]], i32 0, i32 1
@@ -6658,26 +6658,31 @@ int main (int argc, char **argv) {
 // CHECK13-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [1 x ptr], align 8
 // CHECK13-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [1 x ptr], align 8
 // CHECK13-NEXT:    [[TMP:%.*]] = alloca i32, align 4
+// CHECK13-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK13-NEXT:    [[DOTOFFLOAD_BASEPTRS1:%.*]] = alloca [1 x ptr], align 8
 // CHECK13-NEXT:    [[DOTOFFLOAD_PTRS2:%.*]] = alloca [1 x ptr], align 8
 // CHECK13-NEXT:    [[DOTOFFLOAD_MAPPERS3:%.*]] = alloca [1 x ptr], align 8
 // CHECK13-NEXT:    [[_TMP4:%.*]] = alloca i32, align 4
+// CHECK13-NEXT:    [[KERNEL_ARGS5:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK13-NEXT:    [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
 // CHECK13-NEXT:    [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8
 // CHECK13-NEXT:    [[DOTOFFLOAD_BASEPTRS8:%.*]] = alloca [2 x ptr], align 8
 // CHECK13-NEXT:    [[DOTOFFLOAD_PTRS9:%.*]] = alloca [2 x ptr], align 8
 // CHECK13-NEXT:    [[DOTOFFLOAD_MAPPERS10:%.*]] = alloca [2 x ptr], align 8
 // CHECK13-NEXT:    [[_TMP11:%.*]] = alloca i32, align 4
+// CHECK13-NEXT:    [[KERNEL_ARGS12:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK13-NEXT:    [[DOTOFFLOAD_BASEPTRS15:%.*]] = alloca [1 x ptr], align 8
 // CHECK13-NEXT:    [[DOTOFFLOAD_PTRS16:%.*]] = alloca [1 x ptr], align 8
 // CHECK13-NEXT:    [[DOTOFFLOAD_MAPPERS17:%.*]] = alloca [1 x ptr], align 8
 // CHECK13-NEXT:    [[_TMP18:%.*]] = alloca i32, align 4
+// CHECK13-NEXT:    [[KERNEL_ARGS19:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK13-NEXT:    [[DOTCAPTURE_EXPR_22:%.*]] = alloca i32, align 4
 // CHECK13-NEXT:    [[DOTCAPTURE_EXPR__CASTED23:%.*]] = alloca i64, align 8
 // CHECK13-NEXT:    [[DOTOFFLOAD_BASEPTRS24:%.*]] = alloca [2 x ptr], align 8
 // CHECK13-NEXT:    [[DOTOFFLOAD_PTRS25:%.*]] = alloca [2 x ptr], align 8
 // CHECK13-NEXT:    [[DOTOFFLOAD_MAPPERS26:%.*]] = alloca [2 x ptr], align 8
 // CHECK13-NEXT:    [[_TMP27:%.*]] = alloca i32, align 4
+// CHECK13-NEXT:    [[KERNEL_ARGS28:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK13-NEXT:    store i32 [[ARGC]], ptr [[ARGC_ADDR]], align 4
 // CHECK13-NEXT:    store i32 10, ptr [[M]], align 4
 // CHECK13-NEXT:    [[TMP0:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
@@ -6688,7 +6693,6 @@ int main (int argc, char **argv) {
 // CHECK13-NEXT:    store ptr null, ptr [[TMP2]], align 8
 // CHECK13-NEXT:    [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK13-NEXT:    [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK13-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK13-NEXT:    [[TMP5:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK13-NEXT:    store i32 2, ptr [[TMP5]], align 4
 // CHECK13-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -6730,7 +6734,6 @@ int main (int argc, char **argv) {
 // CHECK13-NEXT:    store ptr null, ptr [[TMP22]], align 8
 // CHECK13-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS1]], i32 0, i32 0
 // CHECK13-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS2]], i32 0, i32 0
-// CHECK13-NEXT:    [[KERNEL_ARGS5:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK13-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 0
 // CHECK13-NEXT:    store i32 2, ptr [[TMP25]], align 4
 // CHECK13-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 1
@@ -6783,7 +6786,6 @@ int main (int argc, char **argv) {
 // CHECK13-NEXT:    store ptr null, ptr [[TMP48]], align 8
 // CHECK13-NEXT:    [[TMP49:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS8]], i32 0, i32 0
 // CHECK13-NEXT:    [[TMP50:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS9]], i32 0, i32 0
-// CHECK13-NEXT:    [[KERNEL_ARGS12:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK13-NEXT:    [[TMP51:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS12]], i32 0, i32 0
 // CHECK13-NEXT:    store i32 2, ptr [[TMP51]], align 4
 // CHECK13-NEXT:    [[TMP52:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS12]], i32 0, i32 1
@@ -6825,7 +6827,6 @@ int main (int argc, char **argv) {
 // CHECK13-NEXT:    store ptr null, ptr [[TMP68]], align 8
 // CHECK13-NEXT:    [[TMP69:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS15]], i32 0, i32 0
 // CHECK13-NEXT:    [[TMP70:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS16]], i32 0, i32 0
-// CHECK13-NEXT:    [[KERNEL_ARGS19:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK13-NEXT:    [[TMP71:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS19]], i32 0, i32 0
 // CHECK13-NEXT:    store i32 2, ptr [[TMP71]], align 4
 // CHECK13-NEXT:    [[TMP72:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS19]], i32 0, i32 1
@@ -6878,7 +6879,6 @@ int main (int argc, char **argv) {
 // CHECK13-NEXT:    store ptr null, ptr [[TMP94]], align 8
 // CHECK13-NEXT:    [[TMP95:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS24]], i32 0, i32 0
 // CHECK13-NEXT:    [[TMP96:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS25]], i32 0, i32 0
-// CHECK13-NEXT:    [[KERNEL_ARGS28:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK13-NEXT:    [[TMP97:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS28]], i32 0, i32 0
 // CHECK13-NEXT:    store i32 2, ptr [[TMP97]], align 4
 // CHECK13-NEXT:    [[TMP98:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS28]], i32 0, i32 1
@@ -7807,6 +7807,7 @@ int main (int argc, char **argv) {
 // CHECK15-NEXT:    [[TMP:%.*]] = alloca i32, align 4
 // CHECK15-NEXT:    [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
 // CHECK15-NEXT:    [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4
+// CHECK15-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK15-NEXT:    [[N_CASTED3:%.*]] = alloca i32, align 4
 // CHECK15-NEXT:    [[DOTOFFLOAD_BASEPTRS4:%.*]] = alloca [3 x ptr], align 4
 // CHECK15-NEXT:    [[DOTOFFLOAD_PTRS5:%.*]] = alloca [3 x ptr], align 4
@@ -7815,6 +7816,7 @@ int main (int argc, char **argv) {
 // CHECK15-NEXT:    [[_TMP8:%.*]] = alloca i32, align 4
 // CHECK15-NEXT:    [[DOTCAPTURE_EXPR_9:%.*]] = alloca i32, align 4
 // CHECK15-NEXT:    [[DOTCAPTURE_EXPR_10:%.*]] = alloca i32, align 4
+// CHECK15-NEXT:    [[KERNEL_ARGS15:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK15-NEXT:    [[DOTCAPTURE_EXPR_18:%.*]] = alloca i32, align 4
 // CHECK15-NEXT:    [[N_CASTED19:%.*]] = alloca i32, align 4
 // CHECK15-NEXT:    [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4
@@ -7825,6 +7827,7 @@ int main (int argc, char **argv) {
 // CHECK15-NEXT:    [[_TMP24:%.*]] = alloca i32, align 4
 // CHECK15-NEXT:    [[DOTCAPTURE_EXPR_25:%.*]] = alloca i32, align 4
 // CHECK15-NEXT:    [[DOTCAPTURE_EXPR_26:%.*]] = alloca i32, align 4
+// CHECK15-NEXT:    [[KERNEL_ARGS31:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK15-NEXT:    [[N_CASTED34:%.*]] = alloca i32, align 4
 // CHECK15-NEXT:    [[DOTOFFLOAD_BASEPTRS35:%.*]] = alloca [3 x ptr], align 4
 // CHECK15-NEXT:    [[DOTOFFLOAD_PTRS36:%.*]] = alloca [3 x ptr], align 4
@@ -7833,6 +7836,7 @@ int main (int argc, char **argv) {
 // CHECK15-NEXT:    [[_TMP39:%.*]] = alloca i32, align 4
 // CHECK15-NEXT:    [[DOTCAPTURE_EXPR_40:%.*]] = alloca i32, align 4
 // CHECK15-NEXT:    [[DOTCAPTURE_EXPR_41:%.*]] = alloca i32, align 4
+// CHECK15-NEXT:    [[KERNEL_ARGS46:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK15-NEXT:    [[DOTCAPTURE_EXPR_49:%.*]] = alloca i32, align 4
 // CHECK15-NEXT:    [[N_CASTED50:%.*]] = alloca i32, align 4
 // CHECK15-NEXT:    [[DOTCAPTURE_EXPR__CASTED51:%.*]] = alloca i32, align 4
@@ -7843,6 +7847,7 @@ int main (int argc, char **argv) {
 // CHECK15-NEXT:    [[_TMP56:%.*]] = alloca i32, align 4
 // CHECK15-NEXT:    [[DOTCAPTURE_EXPR_57:%.*]] = alloca i32, align 4
 // CHECK15-NEXT:    [[DOTCAPTURE_EXPR_58:%.*]] = alloca i32, align 4
+// CHECK15-NEXT:    [[KERNEL_ARGS63:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK15-NEXT:    store i32 0, ptr [[RETVAL]], align 4
 // CHECK15-NEXT:    store i32 [[ARGC]], ptr [[ARGC_ADDR]], align 4
 // CHECK15-NEXT:    store ptr [[ARGV]], ptr [[ARGV_ADDR]], align 4
@@ -7892,7 +7897,6 @@ int main (int argc, char **argv) {
 // CHECK15-NEXT:    [[TMP21:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
 // CHECK15-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP21]], 1
 // CHECK15-NEXT:    [[TMP22:%.*]] = zext i32 [[ADD]] to i64
-// CHECK15-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK15-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK15-NEXT:    store i32 2, ptr [[TMP23]], align 4
 // CHECK15-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -7965,7 +7969,6 @@ int main (int argc, char **argv) {
 // CHECK15-NEXT:    [[TMP57:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_10]], align 4
 // CHECK15-NEXT:    [[ADD14:%.*]] = add nsw i32 [[TMP57]], 1
 // CHECK15-NEXT:    [[TMP58:%.*]] = zext i32 [[ADD14]] to i64
-// CHECK15-NEXT:    [[KERNEL_ARGS15:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK15-NEXT:    [[TMP59:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS15]], i32 0, i32 0
 // CHECK15-NEXT:    store i32 2, ptr [[TMP59]], align 4
 // CHECK15-NEXT:    [[TMP60:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS15]], i32 0, i32 1
@@ -8049,7 +8052,6 @@ int main (int argc, char **argv) {
 // CHECK15-NEXT:    [[TMP99:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_26]], align 4
 // CHECK15-NEXT:    [[ADD30:%.*]] = add nsw i32 [[TMP99]], 1
 // CHECK15-NEXT:    [[TMP100:%.*]] = zext i32 [[ADD30]] to i64
-// CHECK15-NEXT:    [[KERNEL_ARGS31:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK15-NEXT:    [[TMP101:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS31]], i32 0, i32 0
 // CHECK15-NEXT:    store i32 2, ptr [[TMP101]], align 4
 // CHECK15-NEXT:    [[TMP102:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS31]], i32 0, i32 1
@@ -8122,7 +8124,6 @@ int main (int argc, char **argv) {
 // CHECK15-NEXT:    [[TMP135:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_41]], align 4
 // CHECK15-NEXT:    [[ADD45:%.*]] = add nsw i32 [[TMP135]], 1
 // CHECK15-NEXT:    [[TMP136:%.*]] = zext i32 [[ADD45]] to i64
-// CHECK15-NEXT:    [[KERNEL_ARGS46:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK15-NEXT:    [[TMP137:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS46]], i32 0, i32 0
 // CHECK15-NEXT:    store i32 2, ptr [[TMP137]], align 4
 // CHECK15-NEXT:    [[TMP138:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS46]], i32 0, i32 1
@@ -8206,7 +8207,6 @@ int main (int argc, char **argv) {
 // CHECK15-NEXT:    [[TMP177:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_58]], align 4
 // CHECK15-NEXT:    [[ADD62:%.*]] = add nsw i32 [[TMP177]], 1
 // CHECK15-NEXT:    [[TMP178:%.*]] = zext i32 [[ADD62]] to i64
-// CHECK15-NEXT:    [[KERNEL_ARGS63:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK15-NEXT:    [[TMP179:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS63]], i32 0, i32 0
 // CHECK15-NEXT:    store i32 2, ptr [[TMP179]], align 4
 // CHECK15-NEXT:    [[TMP180:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS63]], i32 0, i32 1
@@ -9485,26 +9485,31 @@ int main (int argc, char **argv) {
 // CHECK15-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [1 x ptr], align 4
 // CHECK15-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [1 x ptr], align 4
 // CHECK15-NEXT:    [[TMP:%.*]] = alloca i32, align 4
+// CHECK15-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK15-NEXT:    [[DOTOFFLOAD_BASEPTRS1:%.*]] = alloca [1 x ptr], align 4
 // CHECK15-NEXT:    [[DOTOFFLOAD_PTRS2:%.*]] = alloca [1 x ptr], align 4
 // CHECK15-NEXT:    [[DOTOFFLOAD_MAPPERS3:%.*]] = alloca [1 x ptr], align 4
 // CHECK15-NEXT:    [[_TMP4:%.*]] = alloca i32, align 4
+// CHECK15-NEXT:    [[KERNEL_ARGS5:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK15-NEXT:    [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
 // CHECK15-NEXT:    [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4
 // CHECK15-NEXT:    [[DOTOFFLOAD_BASEPTRS8:%.*]] = alloca [2 x ptr], align 4
 // CHECK15-NEXT:    [[DOTOFFLOAD_PTRS9:%.*]] = alloca [2 x ptr], align 4
 // CHECK15-NEXT:    [[DOTOFFLOAD_MAPPERS10:%.*]] = alloca [2 x ptr], align 4
 // CHECK15-NEXT:    [[_TMP11:%.*]] = alloca i32, align 4
+// CHECK15-NEXT:    [[KERNEL_ARGS12:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK15-NEXT:    [[DOTOFFLOAD_BASEPTRS15:%.*]] = alloca [1 x ptr], align 4
 // CHECK15-NEXT:    [[DOTOFFLOAD_PTRS16:%.*]] = alloca [1 x ptr], align 4
 // CHECK15-NEXT:    [[DOTOFFLOAD_MAPPERS17:%.*]] = alloca [1 x ptr], align 4
 // CHECK15-NEXT:    [[_TMP18:%.*]] = alloca i32, align 4
+// CHECK15-NEXT:    [[KERNEL_ARGS19:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK15-NEXT:    [[DOTCAPTURE_EXPR_22:%.*]] = alloca i32, align 4
 // CHECK15-NEXT:    [[DOTCAPTURE_EXPR__CASTED23:%.*]] = alloca i32, align 4
 // CHECK15-NEXT:    [[DOTOFFLOAD_BASEPTRS24:%.*]] = alloca [2 x ptr], align 4
 // CHECK15-NEXT:    [[DOTOFFLOAD_PTRS25:%.*]] = alloca [2 x ptr], align 4
 // CHECK15-NEXT:    [[DOTOFFLOAD_MAPPERS26:%.*]] = alloca [2 x ptr], align 4
 // CHECK15-NEXT:    [[_TMP27:%.*]] = alloca i32, align 4
+// CHECK15-NEXT:    [[KERNEL_ARGS28:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK15-NEXT:    store i32 [[ARGC]], ptr [[ARGC_ADDR]], align 4
 // CHECK15-NEXT:    store i32 10, ptr [[M]], align 4
 // CHECK15-NEXT:    [[TMP0:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
@@ -9515,7 +9520,6 @@ int main (int argc, char **argv) {
 // CHECK15-NEXT:    store ptr null, ptr [[TMP2]], align 4
 // CHECK15-NEXT:    [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK15-NEXT:    [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK15-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK15-NEXT:    [[TMP5:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK15-NEXT:    store i32 2, ptr [[TMP5]], align 4
 // CHECK15-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -9557,7 +9561,6 @@ int main (int argc, char **argv) {
 // CHECK15-NEXT:    store ptr null, ptr [[TMP22]], align 4
 // CHECK15-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS1]], i32 0, i32 0
 // CHECK15-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS2]], i32 0, i32 0
-// CHECK15-NEXT:    [[KERNEL_ARGS5:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK15-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 0
 // CHECK15-NEXT:    store i32 2, ptr [[TMP25]], align 4
 // CHECK15-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 1
@@ -9610,7 +9613,6 @@ int main (int argc, char **argv) {
 // CHECK15-NEXT:    store ptr null, ptr [[TMP48]], align 4
 // CHECK15-NEXT:    [[TMP49:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS8]], i32 0, i32 0
 // CHECK15-NEXT:    [[TMP50:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS9]], i32 0, i32 0
-// CHECK15-NEXT:    [[KERNEL_ARGS12:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK15-NEXT:    [[TMP51:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS12]], i32 0, i32 0
 // CHECK15-NEXT:    store i32 2, ptr [[TMP51]], align 4
 // CHECK15-NEXT:    [[TMP52:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS12]], i32 0, i32 1
@@ -9652,7 +9654,6 @@ int main (int argc, char **argv) {
 // CHECK15-NEXT:    store ptr null, ptr [[TMP68]], align 4
 // CHECK15-NEXT:    [[TMP69:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS15]], i32 0, i32 0
 // CHECK15-NEXT:    [[TMP70:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS16]], i32 0, i32 0
-// CHECK15-NEXT:    [[KERNEL_ARGS19:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK15-NEXT:    [[TMP71:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS19]], i32 0, i32 0
 // CHECK15-NEXT:    store i32 2, ptr [[TMP71]], align 4
 // CHECK15-NEXT:    [[TMP72:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS19]], i32 0, i32 1
@@ -9705,7 +9706,6 @@ int main (int argc, char **argv) {
 // CHECK15-NEXT:    store ptr null, ptr [[TMP94]], align 4
 // CHECK15-NEXT:    [[TMP95:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS24]], i32 0, i32 0
 // CHECK15-NEXT:    [[TMP96:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS25]], i32 0, i32 0
-// CHECK15-NEXT:    [[KERNEL_ARGS28:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK15-NEXT:    [[TMP97:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS28]], i32 0, i32 0
 // CHECK15-NEXT:    store i32 2, ptr [[TMP97]], align 4
 // CHECK15-NEXT:    [[TMP98:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS28]], i32 0, i32 1
@@ -10607,6 +10607,7 @@ int main (int argc, char **argv) {
 // CHECK17-NEXT:    [[TMP:%.*]] = alloca i32, align 4
 // CHECK17-NEXT:    [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
 // CHECK17-NEXT:    [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4
+// CHECK17-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK17-NEXT:    [[N_CASTED3:%.*]] = alloca i64, align 8
 // CHECK17-NEXT:    [[DOTOFFLOAD_BASEPTRS4:%.*]] = alloca [3 x ptr], align 8
 // CHECK17-NEXT:    [[DOTOFFLOAD_PTRS5:%.*]] = alloca [3 x ptr], align 8
@@ -10615,6 +10616,7 @@ int main (int argc, char **argv) {
 // CHECK17-NEXT:    [[_TMP8:%.*]] = alloca i32, align 4
 // CHECK17-NEXT:    [[DOTCAPTURE_EXPR_9:%.*]] = alloca i32, align 4
 // CHECK17-NEXT:    [[DOTCAPTURE_EXPR_10:%.*]] = alloca i32, align 4
+// CHECK17-NEXT:    [[KERNEL_ARGS15:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK17-NEXT:    [[DOTCAPTURE_EXPR_18:%.*]] = alloca i32, align 4
 // CHECK17-NEXT:    [[N_CASTED19:%.*]] = alloca i64, align 8
 // CHECK17-NEXT:    [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8
@@ -10625,6 +10627,7 @@ int main (int argc, char **argv) {
 // CHECK17-NEXT:    [[_TMP24:%.*]] = alloca i32, align 4
 // CHECK17-NEXT:    [[DOTCAPTURE_EXPR_25:%.*]] = alloca i32, align 4
 // CHECK17-NEXT:    [[DOTCAPTURE_EXPR_26:%.*]] = alloca i32, align 4
+// CHECK17-NEXT:    [[KERNEL_ARGS31:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK17-NEXT:    [[N_CASTED34:%.*]] = alloca i64, align 8
 // CHECK17-NEXT:    [[DOTOFFLOAD_BASEPTRS35:%.*]] = alloca [3 x ptr], align 8
 // CHECK17-NEXT:    [[DOTOFFLOAD_PTRS36:%.*]] = alloca [3 x ptr], align 8
@@ -10633,6 +10636,7 @@ int main (int argc, char **argv) {
 // CHECK17-NEXT:    [[_TMP39:%.*]] = alloca i32, align 4
 // CHECK17-NEXT:    [[DOTCAPTURE_EXPR_40:%.*]] = alloca i32, align 4
 // CHECK17-NEXT:    [[DOTCAPTURE_EXPR_41:%.*]] = alloca i32, align 4
+// CHECK17-NEXT:    [[KERNEL_ARGS46:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK17-NEXT:    [[DOTCAPTURE_EXPR_49:%.*]] = alloca i32, align 4
 // CHECK17-NEXT:    [[N_CASTED50:%.*]] = alloca i64, align 8
 // CHECK17-NEXT:    [[DOTCAPTURE_EXPR__CASTED51:%.*]] = alloca i64, align 8
@@ -10643,6 +10647,7 @@ int main (int argc, char **argv) {
 // CHECK17-NEXT:    [[_TMP56:%.*]] = alloca i32, align 4
 // CHECK17-NEXT:    [[DOTCAPTURE_EXPR_57:%.*]] = alloca i32, align 4
 // CHECK17-NEXT:    [[DOTCAPTURE_EXPR_58:%.*]] = alloca i32, align 4
+// CHECK17-NEXT:    [[KERNEL_ARGS63:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK17-NEXT:    store i32 0, ptr [[RETVAL]], align 4
 // CHECK17-NEXT:    store i32 [[ARGC]], ptr [[ARGC_ADDR]], align 4
 // CHECK17-NEXT:    store ptr [[ARGV]], ptr [[ARGV_ADDR]], align 8
@@ -10692,7 +10697,6 @@ int main (int argc, char **argv) {
 // CHECK17-NEXT:    [[TMP21:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
 // CHECK17-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP21]], 1
 // CHECK17-NEXT:    [[TMP22:%.*]] = zext i32 [[ADD]] to i64
-// CHECK17-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK17-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK17-NEXT:    store i32 2, ptr [[TMP23]], align 4
 // CHECK17-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -10764,7 +10768,6 @@ int main (int argc, char **argv) {
 // CHECK17-NEXT:    [[TMP56:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_10]], align 4
 // CHECK17-NEXT:    [[ADD14:%.*]] = add nsw i32 [[TMP56]], 1
 // CHECK17-NEXT:    [[TMP57:%.*]] = zext i32 [[ADD14]] to i64
-// CHECK17-NEXT:    [[KERNEL_ARGS15:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK17-NEXT:    [[TMP58:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS15]], i32 0, i32 0
 // CHECK17-NEXT:    store i32 2, ptr [[TMP58]], align 4
 // CHECK17-NEXT:    [[TMP59:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS15]], i32 0, i32 1
@@ -10847,7 +10850,6 @@ int main (int argc, char **argv) {
 // CHECK17-NEXT:    [[TMP97:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_26]], align 4
 // CHECK17-NEXT:    [[ADD30:%.*]] = add nsw i32 [[TMP97]], 1
 // CHECK17-NEXT:    [[TMP98:%.*]] = zext i32 [[ADD30]] to i64
-// CHECK17-NEXT:    [[KERNEL_ARGS31:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK17-NEXT:    [[TMP99:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS31]], i32 0, i32 0
 // CHECK17-NEXT:    store i32 2, ptr [[TMP99]], align 4
 // CHECK17-NEXT:    [[TMP100:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS31]], i32 0, i32 1
@@ -10919,7 +10921,6 @@ int main (int argc, char **argv) {
 // CHECK17-NEXT:    [[TMP132:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_41]], align 4
 // CHECK17-NEXT:    [[ADD45:%.*]] = add nsw i32 [[TMP132]], 1
 // CHECK17-NEXT:    [[TMP133:%.*]] = zext i32 [[ADD45]] to i64
-// CHECK17-NEXT:    [[KERNEL_ARGS46:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK17-NEXT:    [[TMP134:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS46]], i32 0, i32 0
 // CHECK17-NEXT:    store i32 2, ptr [[TMP134]], align 4
 // CHECK17-NEXT:    [[TMP135:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS46]], i32 0, i32 1
@@ -11002,7 +11003,6 @@ int main (int argc, char **argv) {
 // CHECK17-NEXT:    [[TMP173:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_58]], align 4
 // CHECK17-NEXT:    [[ADD62:%.*]] = add nsw i32 [[TMP173]], 1
 // CHECK17-NEXT:    [[TMP174:%.*]] = zext i32 [[ADD62]] to i64
-// CHECK17-NEXT:    [[KERNEL_ARGS63:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK17-NEXT:    [[TMP175:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS63]], i32 0, i32 0
 // CHECK17-NEXT:    store i32 2, ptr [[TMP175]], align 4
 // CHECK17-NEXT:    [[TMP176:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS63]], i32 0, i32 1
@@ -12306,26 +12306,31 @@ int main (int argc, char **argv) {
 // CHECK17-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [1 x ptr], align 8
 // CHECK17-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [1 x ptr], align 8
 // CHECK17-NEXT:    [[TMP:%.*]] = alloca i32, align 4
+// CHECK17-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK17-NEXT:    [[DOTOFFLOAD_BASEPTRS1:%.*]] = alloca [1 x ptr], align 8
 // CHECK17-NEXT:    [[DOTOFFLOAD_PTRS2:%.*]] = alloca [1 x ptr], align 8
 // CHECK17-NEXT:    [[DOTOFFLOAD_MAPPERS3:%.*]] = alloca [1 x ptr], align 8
 // CHECK17-NEXT:    [[_TMP4:%.*]] = alloca i32, align 4
+// CHECK17-NEXT:    [[KERNEL_ARGS5:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK17-NEXT:    [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
 // CHECK17-NEXT:    [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8
 // CHECK17-NEXT:    [[DOTOFFLOAD_BASEPTRS8:%.*]] = alloca [2 x ptr], align 8
 // CHECK17-NEXT:    [[DOTOFFLOAD_PTRS9:%.*]] = alloca [2 x ptr], align 8
 // CHECK17-NEXT:    [[DOTOFFLOAD_MAPPERS10:%.*]] = alloca [2 x ptr], align 8
 // CHECK17-NEXT:    [[_TMP11:%.*]] = alloca i32, align 4
+// CHECK17-NEXT:    [[KERNEL_ARGS12:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK17-NEXT:    [[DOTOFFLOAD_BASEPTRS15:%.*]] = alloca [1 x ptr], align 8
 // CHECK17-NEXT:    [[DOTOFFLOAD_PTRS16:%.*]] = alloca [1 x ptr], align 8
 // CHECK17-NEXT:    [[DOTOFFLOAD_MAPPERS17:%.*]] = alloca [1 x ptr], align 8
 // CHECK17-NEXT:    [[_TMP18:%.*]] = alloca i32, align 4
+// CHECK17-NEXT:    [[KERNEL_ARGS19:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK17-NEXT:    [[DOTCAPTURE_EXPR_22:%.*]] = alloca i32, align 4
 // CHECK17-NEXT:    [[DOTCAPTURE_EXPR__CASTED23:%.*]] = alloca i64, align 8
 // CHECK17-NEXT:    [[DOTOFFLOAD_BASEPTRS24:%.*]] = alloca [2 x ptr], align 8
 // CHECK17-NEXT:    [[DOTOFFLOAD_PTRS25:%.*]] = alloca [2 x ptr], align 8
 // CHECK17-NEXT:    [[DOTOFFLOAD_MAPPERS26:%.*]] = alloca [2 x ptr], align 8
 // CHECK17-NEXT:    [[_TMP27:%.*]] = alloca i32, align 4
+// CHECK17-NEXT:    [[KERNEL_ARGS28:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK17-NEXT:    store i32 [[ARGC]], ptr [[ARGC_ADDR]], align 4
 // CHECK17-NEXT:    store i32 10, ptr [[M]], align 4
 // CHECK17-NEXT:    [[TMP0:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
@@ -12336,7 +12341,6 @@ int main (int argc, char **argv) {
 // CHECK17-NEXT:    store ptr null, ptr [[TMP2]], align 8
 // CHECK17-NEXT:    [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK17-NEXT:    [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK17-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK17-NEXT:    [[TMP5:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK17-NEXT:    store i32 2, ptr [[TMP5]], align 4
 // CHECK17-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -12378,7 +12382,6 @@ int main (int argc, char **argv) {
 // CHECK17-NEXT:    store ptr null, ptr [[TMP22]], align 8
 // CHECK17-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS1]], i32 0, i32 0
 // CHECK17-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS2]], i32 0, i32 0
-// CHECK17-NEXT:    [[KERNEL_ARGS5:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK17-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 0
 // CHECK17-NEXT:    store i32 2, ptr [[TMP25]], align 4
 // CHECK17-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 1
@@ -12431,7 +12434,6 @@ int main (int argc, char **argv) {
 // CHECK17-NEXT:    store ptr null, ptr [[TMP48]], align 8
 // CHECK17-NEXT:    [[TMP49:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS8]], i32 0, i32 0
 // CHECK17-NEXT:    [[TMP50:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS9]], i32 0, i32 0
-// CHECK17-NEXT:    [[KERNEL_ARGS12:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK17-NEXT:    [[TMP51:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS12]], i32 0, i32 0
 // CHECK17-NEXT:    store i32 2, ptr [[TMP51]], align 4
 // CHECK17-NEXT:    [[TMP52:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS12]], i32 0, i32 1
@@ -12473,7 +12475,6 @@ int main (int argc, char **argv) {
 // CHECK17-NEXT:    store ptr null, ptr [[TMP68]], align 8
 // CHECK17-NEXT:    [[TMP69:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS15]], i32 0, i32 0
 // CHECK17-NEXT:    [[TMP70:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS16]], i32 0, i32 0
-// CHECK17-NEXT:    [[KERNEL_ARGS19:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK17-NEXT:    [[TMP71:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS19]], i32 0, i32 0
 // CHECK17-NEXT:    store i32 2, ptr [[TMP71]], align 4
 // CHECK17-NEXT:    [[TMP72:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS19]], i32 0, i32 1
@@ -12526,7 +12527,6 @@ int main (int argc, char **argv) {
 // CHECK17-NEXT:    store ptr null, ptr [[TMP94]], align 8
 // CHECK17-NEXT:    [[TMP95:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS24]], i32 0, i32 0
 // CHECK17-NEXT:    [[TMP96:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS25]], i32 0, i32 0
-// CHECK17-NEXT:    [[KERNEL_ARGS28:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK17-NEXT:    [[TMP97:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS28]], i32 0, i32 0
 // CHECK17-NEXT:    store i32 2, ptr [[TMP97]], align 4
 // CHECK17-NEXT:    [[TMP98:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS28]], i32 0, i32 1
@@ -13455,6 +13455,7 @@ int main (int argc, char **argv) {
 // CHECK19-NEXT:    [[TMP:%.*]] = alloca i32, align 4
 // CHECK19-NEXT:    [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
 // CHECK19-NEXT:    [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4
+// CHECK19-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK19-NEXT:    [[N_CASTED3:%.*]] = alloca i32, align 4
 // CHECK19-NEXT:    [[DOTOFFLOAD_BASEPTRS4:%.*]] = alloca [3 x ptr], align 4
 // CHECK19-NEXT:    [[DOTOFFLOAD_PTRS5:%.*]] = alloca [3 x ptr], align 4
@@ -13463,6 +13464,7 @@ int main (int argc, char **argv) {
 // CHECK19-NEXT:    [[_TMP8:%.*]] = alloca i32, align 4
 // CHECK19-NEXT:    [[DOTCAPTURE_EXPR_9:%.*]] = alloca i32, align 4
 // CHECK19-NEXT:    [[DOTCAPTURE_EXPR_10:%.*]] = alloca i32, align 4
+// CHECK19-NEXT:    [[KERNEL_ARGS15:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK19-NEXT:    [[DOTCAPTURE_EXPR_18:%.*]] = alloca i32, align 4
 // CHECK19-NEXT:    [[N_CASTED19:%.*]] = alloca i32, align 4
 // CHECK19-NEXT:    [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4
@@ -13473,6 +13475,7 @@ int main (int argc, char **argv) {
 // CHECK19-NEXT:    [[_TMP24:%.*]] = alloca i32, align 4
 // CHECK19-NEXT:    [[DOTCAPTURE_EXPR_25:%.*]] = alloca i32, align 4
 // CHECK19-NEXT:    [[DOTCAPTURE_EXPR_26:%.*]] = alloca i32, align 4
+// CHECK19-NEXT:    [[KERNEL_ARGS31:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK19-NEXT:    [[N_CASTED34:%.*]] = alloca i32, align 4
 // CHECK19-NEXT:    [[DOTOFFLOAD_BASEPTRS35:%.*]] = alloca [3 x ptr], align 4
 // CHECK19-NEXT:    [[DOTOFFLOAD_PTRS36:%.*]] = alloca [3 x ptr], align 4
@@ -13481,6 +13484,7 @@ int main (int argc, char **argv) {
 // CHECK19-NEXT:    [[_TMP39:%.*]] = alloca i32, align 4
 // CHECK19-NEXT:    [[DOTCAPTURE_EXPR_40:%.*]] = alloca i32, align 4
 // CHECK19-NEXT:    [[DOTCAPTURE_EXPR_41:%.*]] = alloca i32, align 4
+// CHECK19-NEXT:    [[KERNEL_ARGS46:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK19-NEXT:    [[DOTCAPTURE_EXPR_49:%.*]] = alloca i32, align 4
 // CHECK19-NEXT:    [[N_CASTED50:%.*]] = alloca i32, align 4
 // CHECK19-NEXT:    [[DOTCAPTURE_EXPR__CASTED51:%.*]] = alloca i32, align 4
@@ -13491,6 +13495,7 @@ int main (int argc, char **argv) {
 // CHECK19-NEXT:    [[_TMP56:%.*]] = alloca i32, align 4
 // CHECK19-NEXT:    [[DOTCAPTURE_EXPR_57:%.*]] = alloca i32, align 4
 // CHECK19-NEXT:    [[DOTCAPTURE_EXPR_58:%.*]] = alloca i32, align 4
+// CHECK19-NEXT:    [[KERNEL_ARGS63:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK19-NEXT:    store i32 0, ptr [[RETVAL]], align 4
 // CHECK19-NEXT:    store i32 [[ARGC]], ptr [[ARGC_ADDR]], align 4
 // CHECK19-NEXT:    store ptr [[ARGV]], ptr [[ARGV_ADDR]], align 4
@@ -13540,7 +13545,6 @@ int main (int argc, char **argv) {
 // CHECK19-NEXT:    [[TMP21:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
 // CHECK19-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP21]], 1
 // CHECK19-NEXT:    [[TMP22:%.*]] = zext i32 [[ADD]] to i64
-// CHECK19-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK19-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK19-NEXT:    store i32 2, ptr [[TMP23]], align 4
 // CHECK19-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -13613,7 +13617,6 @@ int main (int argc, char **argv) {
 // CHECK19-NEXT:    [[TMP57:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_10]], align 4
 // CHECK19-NEXT:    [[ADD14:%.*]] = add nsw i32 [[TMP57]], 1
 // CHECK19-NEXT:    [[TMP58:%.*]] = zext i32 [[ADD14]] to i64
-// CHECK19-NEXT:    [[KERNEL_ARGS15:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK19-NEXT:    [[TMP59:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS15]], i32 0, i32 0
 // CHECK19-NEXT:    store i32 2, ptr [[TMP59]], align 4
 // CHECK19-NEXT:    [[TMP60:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS15]], i32 0, i32 1
@@ -13697,7 +13700,6 @@ int main (int argc, char **argv) {
 // CHECK19-NEXT:    [[TMP99:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_26]], align 4
 // CHECK19-NEXT:    [[ADD30:%.*]] = add nsw i32 [[TMP99]], 1
 // CHECK19-NEXT:    [[TMP100:%.*]] = zext i32 [[ADD30]] to i64
-// CHECK19-NEXT:    [[KERNEL_ARGS31:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK19-NEXT:    [[TMP101:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS31]], i32 0, i32 0
 // CHECK19-NEXT:    store i32 2, ptr [[TMP101]], align 4
 // CHECK19-NEXT:    [[TMP102:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS31]], i32 0, i32 1
@@ -13770,7 +13772,6 @@ int main (int argc, char **argv) {
 // CHECK19-NEXT:    [[TMP135:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_41]], align 4
 // CHECK19-NEXT:    [[ADD45:%.*]] = add nsw i32 [[TMP135]], 1
 // CHECK19-NEXT:    [[TMP136:%.*]] = zext i32 [[ADD45]] to i64
-// CHECK19-NEXT:    [[KERNEL_ARGS46:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK19-NEXT:    [[TMP137:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS46]], i32 0, i32 0
 // CHECK19-NEXT:    store i32 2, ptr [[TMP137]], align 4
 // CHECK19-NEXT:    [[TMP138:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS46]], i32 0, i32 1
@@ -13854,7 +13855,6 @@ int main (int argc, char **argv) {
 // CHECK19-NEXT:    [[TMP177:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_58]], align 4
 // CHECK19-NEXT:    [[ADD62:%.*]] = add nsw i32 [[TMP177]], 1
 // CHECK19-NEXT:    [[TMP178:%.*]] = zext i32 [[ADD62]] to i64
-// CHECK19-NEXT:    [[KERNEL_ARGS63:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK19-NEXT:    [[TMP179:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS63]], i32 0, i32 0
 // CHECK19-NEXT:    store i32 2, ptr [[TMP179]], align 4
 // CHECK19-NEXT:    [[TMP180:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS63]], i32 0, i32 1
@@ -15133,26 +15133,31 @@ int main (int argc, char **argv) {
 // CHECK19-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [1 x ptr], align 4
 // CHECK19-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [1 x ptr], align 4
 // CHECK19-NEXT:    [[TMP:%.*]] = alloca i32, align 4
+// CHECK19-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK19-NEXT:    [[DOTOFFLOAD_BASEPTRS1:%.*]] = alloca [1 x ptr], align 4
 // CHECK19-NEXT:    [[DOTOFFLOAD_PTRS2:%.*]] = alloca [1 x ptr], align 4
 // CHECK19-NEXT:    [[DOTOFFLOAD_MAPPERS3:%.*]] = alloca [1 x ptr], align 4
 // CHECK19-NEXT:    [[_TMP4:%.*]] = alloca i32, align 4
+// CHECK19-NEXT:    [[KERNEL_ARGS5:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK19-NEXT:    [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
 // CHECK19-NEXT:    [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4
 // CHECK19-NEXT:    [[DOTOFFLOAD_BASEPTRS8:%.*]] = alloca [2 x ptr], align 4
 // CHECK19-NEXT:    [[DOTOFFLOAD_PTRS9:%.*]] = alloca [2 x ptr], align 4
 // CHECK19-NEXT:    [[DOTOFFLOAD_MAPPERS10:%.*]] = alloca [2 x ptr], align 4
 // CHECK19-NEXT:    [[_TMP11:%.*]] = alloca i32, align 4
+// CHECK19-NEXT:    [[KERNEL_ARGS12:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK19-NEXT:    [[DOTOFFLOAD_BASEPTRS15:%.*]] = alloca [1 x ptr], align 4
 // CHECK19-NEXT:    [[DOTOFFLOAD_PTRS16:%.*]] = alloca [1 x ptr], align 4
 // CHECK19-NEXT:    [[DOTOFFLOAD_MAPPERS17:%.*]] = alloca [1 x ptr], align 4
 // CHECK19-NEXT:    [[_TMP18:%.*]] = alloca i32, align 4
+// CHECK19-NEXT:    [[KERNEL_ARGS19:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK19-NEXT:    [[DOTCAPTURE_EXPR_22:%.*]] = alloca i32, align 4
 // CHECK19-NEXT:    [[DOTCAPTURE_EXPR__CASTED23:%.*]] = alloca i32, align 4
 // CHECK19-NEXT:    [[DOTOFFLOAD_BASEPTRS24:%.*]] = alloca [2 x ptr], align 4
 // CHECK19-NEXT:    [[DOTOFFLOAD_PTRS25:%.*]] = alloca [2 x ptr], align 4
 // CHECK19-NEXT:    [[DOTOFFLOAD_MAPPERS26:%.*]] = alloca [2 x ptr], align 4
 // CHECK19-NEXT:    [[_TMP27:%.*]] = alloca i32, align 4
+// CHECK19-NEXT:    [[KERNEL_ARGS28:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK19-NEXT:    store i32 [[ARGC]], ptr [[ARGC_ADDR]], align 4
 // CHECK19-NEXT:    store i32 10, ptr [[M]], align 4
 // CHECK19-NEXT:    [[TMP0:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
@@ -15163,7 +15168,6 @@ int main (int argc, char **argv) {
 // CHECK19-NEXT:    store ptr null, ptr [[TMP2]], align 4
 // CHECK19-NEXT:    [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK19-NEXT:    [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK19-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK19-NEXT:    [[TMP5:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK19-NEXT:    store i32 2, ptr [[TMP5]], align 4
 // CHECK19-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -15205,7 +15209,6 @@ int main (int argc, char **argv) {
 // CHECK19-NEXT:    store ptr null, ptr [[TMP22]], align 4
 // CHECK19-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS1]], i32 0, i32 0
 // CHECK19-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS2]], i32 0, i32 0
-// CHECK19-NEXT:    [[KERNEL_ARGS5:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK19-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 0
 // CHECK19-NEXT:    store i32 2, ptr [[TMP25]], align 4
 // CHECK19-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 1
@@ -15258,7 +15261,6 @@ int main (int argc, char **argv) {
 // CHECK19-NEXT:    store ptr null, ptr [[TMP48]], align 4
 // CHECK19-NEXT:    [[TMP49:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS8]], i32 0, i32 0
 // CHECK19-NEXT:    [[TMP50:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS9]], i32 0, i32 0
-// CHECK19-NEXT:    [[KERNEL_ARGS12:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK19-NEXT:    [[TMP51:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS12]], i32 0, i32 0
 // CHECK19-NEXT:    store i32 2, ptr [[TMP51]], align 4
 // CHECK19-NEXT:    [[TMP52:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS12]], i32 0, i32 1
@@ -15300,7 +15302,6 @@ int main (int argc, char **argv) {
 // CHECK19-NEXT:    store ptr null, ptr [[TMP68]], align 4
 // CHECK19-NEXT:    [[TMP69:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS15]], i32 0, i32 0
 // CHECK19-NEXT:    [[TMP70:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS16]], i32 0, i32 0
-// CHECK19-NEXT:    [[KERNEL_ARGS19:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK19-NEXT:    [[TMP71:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS19]], i32 0, i32 0
 // CHECK19-NEXT:    store i32 2, ptr [[TMP71]], align 4
 // CHECK19-NEXT:    [[TMP72:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS19]], i32 0, i32 1
@@ -15353,7 +15354,6 @@ int main (int argc, char **argv) {
 // CHECK19-NEXT:    store ptr null, ptr [[TMP94]], align 4
 // CHECK19-NEXT:    [[TMP95:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS24]], i32 0, i32 0
 // CHECK19-NEXT:    [[TMP96:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS25]], i32 0, i32 0
-// CHECK19-NEXT:    [[KERNEL_ARGS28:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK19-NEXT:    [[TMP97:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS28]], i32 0, i32 0
 // CHECK19-NEXT:    store i32 2, ptr [[TMP97]], align 4
 // CHECK19-NEXT:    [[TMP98:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS28]], i32 0, i32 1

diff  --git a/clang/test/OpenMP/target_teams_distribute_private_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_private_codegen.cpp
index 9127aec58b9f4..5fb689f0938c7 100644
--- a/clang/test/OpenMP/target_teams_distribute_private_codegen.cpp
+++ b/clang/test/OpenMP/target_teams_distribute_private_codegen.cpp
@@ -237,8 +237,8 @@ int main() {
 // CHECK1-NEXT:  entry:
 // CHECK1-NEXT:    [[RETVAL:%.*]] = alloca i32, align 4
 // CHECK1-NEXT:    [[TMP:%.*]] = alloca i32, align 4
-// CHECK1-NEXT:    store i32 0, ptr [[RETVAL]], align 4
 // CHECK1-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
+// CHECK1-NEXT:    store i32 0, ptr [[RETVAL]], align 4
 // CHECK1-NEXT:    [[TMP0:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK1-NEXT:    store i32 2, ptr [[TMP0]], align 4
 // CHECK1-NEXT:    [[TMP1:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -398,6 +398,7 @@ int main() {
 // CHECK1-NEXT:    [[VAR:%.*]] = alloca ptr, align 8
 // CHECK1-NEXT:    [[TMP:%.*]] = alloca i32, align 4
 // CHECK1-NEXT:    [[_TMP1:%.*]] = alloca ptr, align 8
+// CHECK1-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK1-NEXT:    call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[TEST]])
 // CHECK1-NEXT:    store i32 0, ptr [[T_VAR]], align 4
 // CHECK1-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VEC]], ptr align 4 @__const._Z5tmainIiET_v.vec, i64 8, i1 false)
@@ -407,7 +408,6 @@ int main() {
 // CHECK1-NEXT:    call void @_ZN1SIiEC1Ei(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_ELEMENT]], i32 noundef signext 2)
 // CHECK1-NEXT:    store ptr [[TEST]], ptr [[VAR]], align 8
 // CHECK1-NEXT:    store ptr undef, ptr [[_TMP1]], align 8
-// CHECK1-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK1-NEXT:    [[TMP0:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK1-NEXT:    store i32 2, ptr [[TMP0]], align 4
 // CHECK1-NEXT:    [[TMP1:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -772,8 +772,8 @@ int main() {
 // CHECK3-NEXT:  entry:
 // CHECK3-NEXT:    [[RETVAL:%.*]] = alloca i32, align 4
 // CHECK3-NEXT:    [[TMP:%.*]] = alloca i32, align 4
-// CHECK3-NEXT:    store i32 0, ptr [[RETVAL]], align 4
 // CHECK3-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
+// CHECK3-NEXT:    store i32 0, ptr [[RETVAL]], align 4
 // CHECK3-NEXT:    [[TMP0:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK3-NEXT:    store i32 2, ptr [[TMP0]], align 4
 // CHECK3-NEXT:    [[TMP1:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -931,6 +931,7 @@ int main() {
 // CHECK3-NEXT:    [[VAR:%.*]] = alloca ptr, align 4
 // CHECK3-NEXT:    [[TMP:%.*]] = alloca i32, align 4
 // CHECK3-NEXT:    [[_TMP1:%.*]] = alloca ptr, align 4
+// CHECK3-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK3-NEXT:    call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[TEST]])
 // CHECK3-NEXT:    store i32 0, ptr [[T_VAR]], align 4
 // CHECK3-NEXT:    call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[VEC]], ptr align 4 @__const._Z5tmainIiET_v.vec, i32 8, i1 false)
@@ -940,7 +941,6 @@ int main() {
 // CHECK3-NEXT:    call void @_ZN1SIiEC1Ei(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_ELEMENT]], i32 noundef 2)
 // CHECK3-NEXT:    store ptr [[TEST]], ptr [[VAR]], align 4
 // CHECK3-NEXT:    store ptr undef, ptr [[_TMP1]], align 4
-// CHECK3-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK3-NEXT:    [[TMP0:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK3-NEXT:    store i32 2, ptr [[TMP0]], align 4
 // CHECK3-NEXT:    [[TMP1:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1

diff  --git a/clang/test/OpenMP/target_teams_distribute_reduction_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_reduction_codegen.cpp
index ffc4f6f500da5..ec4e0ac3c21cf 100644
--- a/clang/test/OpenMP/target_teams_distribute_reduction_codegen.cpp
+++ b/clang/test/OpenMP/target_teams_distribute_reduction_codegen.cpp
@@ -276,45 +276,55 @@ int main() {
 // CHECK1-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [1 x ptr], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [1 x ptr], align 8
 // CHECK1-NEXT:    [[TMP:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_BASEPTRS1:%.*]] = alloca [1 x ptr], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_PTRS2:%.*]] = alloca [1 x ptr], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_MAPPERS3:%.*]] = alloca [1 x ptr], align 8
 // CHECK1-NEXT:    [[_TMP4:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[KERNEL_ARGS5:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_BASEPTRS8:%.*]] = alloca [1 x ptr], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_PTRS9:%.*]] = alloca [1 x ptr], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_MAPPERS10:%.*]] = alloca [1 x ptr], align 8
 // CHECK1-NEXT:    [[_TMP11:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[KERNEL_ARGS12:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK1-NEXT:    [[AND_VAR:%.*]] = alloca i8, align 1
 // CHECK1-NEXT:    [[DOTOFFLOAD_BASEPTRS15:%.*]] = alloca [1 x ptr], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_PTRS16:%.*]] = alloca [1 x ptr], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_MAPPERS17:%.*]] = alloca [1 x ptr], align 8
 // CHECK1-NEXT:    [[_TMP18:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[KERNEL_ARGS19:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK1-NEXT:    [[OR_VAR:%.*]] = alloca i8, align 1
 // CHECK1-NEXT:    [[DOTOFFLOAD_BASEPTRS22:%.*]] = alloca [1 x ptr], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_PTRS23:%.*]] = alloca [1 x ptr], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_MAPPERS24:%.*]] = alloca [1 x ptr], align 8
 // CHECK1-NEXT:    [[_TMP25:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[KERNEL_ARGS26:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK1-NEXT:    [[BIT_VAR:%.*]] = alloca i32, align 4
 // CHECK1-NEXT:    [[DOTOFFLOAD_BASEPTRS29:%.*]] = alloca [1 x ptr], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_PTRS30:%.*]] = alloca [1 x ptr], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_MAPPERS31:%.*]] = alloca [1 x ptr], align 8
 // CHECK1-NEXT:    [[_TMP32:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[KERNEL_ARGS33:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_BASEPTRS36:%.*]] = alloca [1 x ptr], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_PTRS37:%.*]] = alloca [1 x ptr], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_MAPPERS38:%.*]] = alloca [1 x ptr], align 8
 // CHECK1-NEXT:    [[_TMP39:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[KERNEL_ARGS40:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_BASEPTRS43:%.*]] = alloca [1 x ptr], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_PTRS44:%.*]] = alloca [1 x ptr], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_MAPPERS45:%.*]] = alloca [1 x ptr], align 8
 // CHECK1-NEXT:    [[_TMP46:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[KERNEL_ARGS47:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_BASEPTRS50:%.*]] = alloca [1 x ptr], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_PTRS51:%.*]] = alloca [1 x ptr], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_MAPPERS52:%.*]] = alloca [1 x ptr], align 8
 // CHECK1-NEXT:    [[_TMP53:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[KERNEL_ARGS54:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_BASEPTRS57:%.*]] = alloca [1 x ptr], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_PTRS58:%.*]] = alloca [1 x ptr], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_MAPPERS59:%.*]] = alloca [1 x ptr], align 8
 // CHECK1-NEXT:    [[_TMP60:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[KERNEL_ARGS61:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK1-NEXT:    store i32 0, ptr [[RETVAL]], align 4
 // CHECK1-NEXT:    [[TMP0:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK1-NEXT:    store ptr @_ZZ4mainE5sivar, ptr [[TMP0]], align 8
@@ -324,7 +334,6 @@ int main() {
 // CHECK1-NEXT:    store ptr null, ptr [[TMP2]], align 8
 // CHECK1-NEXT:    [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK1-NEXT:    [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK1-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK1-NEXT:    [[TMP5:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK1-NEXT:    store i32 2, ptr [[TMP5]], align 4
 // CHECK1-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -366,7 +375,6 @@ int main() {
 // CHECK1-NEXT:    store ptr null, ptr [[TMP22]], align 8
 // CHECK1-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS1]], i32 0, i32 0
 // CHECK1-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS2]], i32 0, i32 0
-// CHECK1-NEXT:    [[KERNEL_ARGS5:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK1-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 0
 // CHECK1-NEXT:    store i32 2, ptr [[TMP25]], align 4
 // CHECK1-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 1
@@ -409,7 +417,6 @@ int main() {
 // CHECK1-NEXT:    store ptr null, ptr [[TMP42]], align 8
 // CHECK1-NEXT:    [[TMP43:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS8]], i32 0, i32 0
 // CHECK1-NEXT:    [[TMP44:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS9]], i32 0, i32 0
-// CHECK1-NEXT:    [[KERNEL_ARGS12:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK1-NEXT:    [[TMP45:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS12]], i32 0, i32 0
 // CHECK1-NEXT:    store i32 2, ptr [[TMP45]], align 4
 // CHECK1-NEXT:    [[TMP46:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS12]], i32 0, i32 1
@@ -452,7 +459,6 @@ int main() {
 // CHECK1-NEXT:    store ptr null, ptr [[TMP62]], align 8
 // CHECK1-NEXT:    [[TMP63:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS15]], i32 0, i32 0
 // CHECK1-NEXT:    [[TMP64:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS16]], i32 0, i32 0
-// CHECK1-NEXT:    [[KERNEL_ARGS19:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK1-NEXT:    [[TMP65:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS19]], i32 0, i32 0
 // CHECK1-NEXT:    store i32 2, ptr [[TMP65]], align 4
 // CHECK1-NEXT:    [[TMP66:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS19]], i32 0, i32 1
@@ -495,7 +501,6 @@ int main() {
 // CHECK1-NEXT:    store ptr null, ptr [[TMP82]], align 8
 // CHECK1-NEXT:    [[TMP83:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS22]], i32 0, i32 0
 // CHECK1-NEXT:    [[TMP84:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS23]], i32 0, i32 0
-// CHECK1-NEXT:    [[KERNEL_ARGS26:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK1-NEXT:    [[TMP85:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS26]], i32 0, i32 0
 // CHECK1-NEXT:    store i32 2, ptr [[TMP85]], align 4
 // CHECK1-NEXT:    [[TMP86:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS26]], i32 0, i32 1
@@ -538,7 +543,6 @@ int main() {
 // CHECK1-NEXT:    store ptr null, ptr [[TMP102]], align 8
 // CHECK1-NEXT:    [[TMP103:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS29]], i32 0, i32 0
 // CHECK1-NEXT:    [[TMP104:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS30]], i32 0, i32 0
-// CHECK1-NEXT:    [[KERNEL_ARGS33:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK1-NEXT:    [[TMP105:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS33]], i32 0, i32 0
 // CHECK1-NEXT:    store i32 2, ptr [[TMP105]], align 4
 // CHECK1-NEXT:    [[TMP106:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS33]], i32 0, i32 1
@@ -580,7 +584,6 @@ int main() {
 // CHECK1-NEXT:    store ptr null, ptr [[TMP122]], align 8
 // CHECK1-NEXT:    [[TMP123:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS36]], i32 0, i32 0
 // CHECK1-NEXT:    [[TMP124:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS37]], i32 0, i32 0
-// CHECK1-NEXT:    [[KERNEL_ARGS40:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK1-NEXT:    [[TMP125:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS40]], i32 0, i32 0
 // CHECK1-NEXT:    store i32 2, ptr [[TMP125]], align 4
 // CHECK1-NEXT:    [[TMP126:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS40]], i32 0, i32 1
@@ -622,7 +625,6 @@ int main() {
 // CHECK1-NEXT:    store ptr null, ptr [[TMP142]], align 8
 // CHECK1-NEXT:    [[TMP143:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS43]], i32 0, i32 0
 // CHECK1-NEXT:    [[TMP144:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS44]], i32 0, i32 0
-// CHECK1-NEXT:    [[KERNEL_ARGS47:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK1-NEXT:    [[TMP145:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS47]], i32 0, i32 0
 // CHECK1-NEXT:    store i32 2, ptr [[TMP145]], align 4
 // CHECK1-NEXT:    [[TMP146:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS47]], i32 0, i32 1
@@ -665,7 +667,6 @@ int main() {
 // CHECK1-NEXT:    store ptr null, ptr [[TMP162]], align 8
 // CHECK1-NEXT:    [[TMP163:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS50]], i32 0, i32 0
 // CHECK1-NEXT:    [[TMP164:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS51]], i32 0, i32 0
-// CHECK1-NEXT:    [[KERNEL_ARGS54:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK1-NEXT:    [[TMP165:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS54]], i32 0, i32 0
 // CHECK1-NEXT:    store i32 2, ptr [[TMP165]], align 4
 // CHECK1-NEXT:    [[TMP166:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS54]], i32 0, i32 1
@@ -708,7 +709,6 @@ int main() {
 // CHECK1-NEXT:    store ptr null, ptr [[TMP182]], align 8
 // CHECK1-NEXT:    [[TMP183:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS57]], i32 0, i32 0
 // CHECK1-NEXT:    [[TMP184:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS58]], i32 0, i32 0
-// CHECK1-NEXT:    [[KERNEL_ARGS61:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK1-NEXT:    [[TMP185:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS61]], i32 0, i32 0
 // CHECK1-NEXT:    store i32 2, ptr [[TMP185]], align 4
 // CHECK1-NEXT:    [[TMP186:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS61]], i32 0, i32 1
@@ -2119,45 +2119,55 @@ int main() {
 // CHECK1-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [1 x ptr], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [1 x ptr], align 8
 // CHECK1-NEXT:    [[TMP:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_BASEPTRS1:%.*]] = alloca [2 x ptr], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_PTRS2:%.*]] = alloca [2 x ptr], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_MAPPERS3:%.*]] = alloca [2 x ptr], align 8
 // CHECK1-NEXT:    [[_TMP4:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[KERNEL_ARGS5:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_BASEPTRS8:%.*]] = alloca [2 x ptr], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_PTRS9:%.*]] = alloca [2 x ptr], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_MAPPERS10:%.*]] = alloca [2 x ptr], align 8
 // CHECK1-NEXT:    [[_TMP11:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[KERNEL_ARGS12:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK1-NEXT:    [[AND_VAR:%.*]] = alloca i8, align 1
 // CHECK1-NEXT:    [[DOTOFFLOAD_BASEPTRS15:%.*]] = alloca [2 x ptr], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_PTRS16:%.*]] = alloca [2 x ptr], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_MAPPERS17:%.*]] = alloca [2 x ptr], align 8
 // CHECK1-NEXT:    [[_TMP18:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[KERNEL_ARGS19:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK1-NEXT:    [[OR_VAR:%.*]] = alloca i8, align 1
 // CHECK1-NEXT:    [[DOTOFFLOAD_BASEPTRS22:%.*]] = alloca [2 x ptr], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_PTRS23:%.*]] = alloca [2 x ptr], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_MAPPERS24:%.*]] = alloca [2 x ptr], align 8
 // CHECK1-NEXT:    [[_TMP25:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[KERNEL_ARGS26:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK1-NEXT:    [[BIT_VAR:%.*]] = alloca i32, align 4
 // CHECK1-NEXT:    [[DOTOFFLOAD_BASEPTRS29:%.*]] = alloca [2 x ptr], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_PTRS30:%.*]] = alloca [2 x ptr], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_MAPPERS31:%.*]] = alloca [2 x ptr], align 8
 // CHECK1-NEXT:    [[_TMP32:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[KERNEL_ARGS33:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_BASEPTRS36:%.*]] = alloca [2 x ptr], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_PTRS37:%.*]] = alloca [2 x ptr], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_MAPPERS38:%.*]] = alloca [2 x ptr], align 8
 // CHECK1-NEXT:    [[_TMP39:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[KERNEL_ARGS40:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_BASEPTRS43:%.*]] = alloca [2 x ptr], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_PTRS44:%.*]] = alloca [2 x ptr], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_MAPPERS45:%.*]] = alloca [2 x ptr], align 8
 // CHECK1-NEXT:    [[_TMP46:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[KERNEL_ARGS47:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_BASEPTRS50:%.*]] = alloca [2 x ptr], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_PTRS51:%.*]] = alloca [2 x ptr], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_MAPPERS52:%.*]] = alloca [2 x ptr], align 8
 // CHECK1-NEXT:    [[_TMP53:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[KERNEL_ARGS54:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_BASEPTRS57:%.*]] = alloca [2 x ptr], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_PTRS58:%.*]] = alloca [2 x ptr], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_MAPPERS59:%.*]] = alloca [2 x ptr], align 8
 // CHECK1-NEXT:    [[_TMP60:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[KERNEL_ARGS61:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK1-NEXT:    store i32 0, ptr [[T_VAR]], align 4
 // CHECK1-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VEC]], ptr align 4 @__const._Z5tmainIiET_v.vec, i64 8, i1 false)
 // CHECK1-NEXT:    [[TMP0:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
@@ -2168,7 +2178,6 @@ int main() {
 // CHECK1-NEXT:    store ptr null, ptr [[TMP2]], align 8
 // CHECK1-NEXT:    [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK1-NEXT:    [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK1-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK1-NEXT:    [[TMP5:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK1-NEXT:    store i32 2, ptr [[TMP5]], align 4
 // CHECK1-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -2216,7 +2225,6 @@ int main() {
 // CHECK1-NEXT:    store ptr null, ptr [[TMP25]], align 8
 // CHECK1-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS1]], i32 0, i32 0
 // CHECK1-NEXT:    [[TMP27:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS2]], i32 0, i32 0
-// CHECK1-NEXT:    [[KERNEL_ARGS5:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK1-NEXT:    [[TMP28:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 0
 // CHECK1-NEXT:    store i32 2, ptr [[TMP28]], align 4
 // CHECK1-NEXT:    [[TMP29:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 1
@@ -2265,7 +2273,6 @@ int main() {
 // CHECK1-NEXT:    store ptr null, ptr [[TMP48]], align 8
 // CHECK1-NEXT:    [[TMP49:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS8]], i32 0, i32 0
 // CHECK1-NEXT:    [[TMP50:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS9]], i32 0, i32 0
-// CHECK1-NEXT:    [[KERNEL_ARGS12:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK1-NEXT:    [[TMP51:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS12]], i32 0, i32 0
 // CHECK1-NEXT:    store i32 2, ptr [[TMP51]], align 4
 // CHECK1-NEXT:    [[TMP52:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS12]], i32 0, i32 1
@@ -2314,7 +2321,6 @@ int main() {
 // CHECK1-NEXT:    store ptr null, ptr [[TMP71]], align 8
 // CHECK1-NEXT:    [[TMP72:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS15]], i32 0, i32 0
 // CHECK1-NEXT:    [[TMP73:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS16]], i32 0, i32 0
-// CHECK1-NEXT:    [[KERNEL_ARGS19:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK1-NEXT:    [[TMP74:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS19]], i32 0, i32 0
 // CHECK1-NEXT:    store i32 2, ptr [[TMP74]], align 4
 // CHECK1-NEXT:    [[TMP75:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS19]], i32 0, i32 1
@@ -2363,7 +2369,6 @@ int main() {
 // CHECK1-NEXT:    store ptr null, ptr [[TMP94]], align 8
 // CHECK1-NEXT:    [[TMP95:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS22]], i32 0, i32 0
 // CHECK1-NEXT:    [[TMP96:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS23]], i32 0, i32 0
-// CHECK1-NEXT:    [[KERNEL_ARGS26:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK1-NEXT:    [[TMP97:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS26]], i32 0, i32 0
 // CHECK1-NEXT:    store i32 2, ptr [[TMP97]], align 4
 // CHECK1-NEXT:    [[TMP98:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS26]], i32 0, i32 1
@@ -2412,7 +2417,6 @@ int main() {
 // CHECK1-NEXT:    store ptr null, ptr [[TMP117]], align 8
 // CHECK1-NEXT:    [[TMP118:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS29]], i32 0, i32 0
 // CHECK1-NEXT:    [[TMP119:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS30]], i32 0, i32 0
-// CHECK1-NEXT:    [[KERNEL_ARGS33:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK1-NEXT:    [[TMP120:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS33]], i32 0, i32 0
 // CHECK1-NEXT:    store i32 2, ptr [[TMP120]], align 4
 // CHECK1-NEXT:    [[TMP121:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS33]], i32 0, i32 1
@@ -2460,7 +2464,6 @@ int main() {
 // CHECK1-NEXT:    store ptr null, ptr [[TMP140]], align 8
 // CHECK1-NEXT:    [[TMP141:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS36]], i32 0, i32 0
 // CHECK1-NEXT:    [[TMP142:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS37]], i32 0, i32 0
-// CHECK1-NEXT:    [[KERNEL_ARGS40:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK1-NEXT:    [[TMP143:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS40]], i32 0, i32 0
 // CHECK1-NEXT:    store i32 2, ptr [[TMP143]], align 4
 // CHECK1-NEXT:    [[TMP144:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS40]], i32 0, i32 1
@@ -2508,7 +2511,6 @@ int main() {
 // CHECK1-NEXT:    store ptr null, ptr [[TMP163]], align 8
 // CHECK1-NEXT:    [[TMP164:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS43]], i32 0, i32 0
 // CHECK1-NEXT:    [[TMP165:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS44]], i32 0, i32 0
-// CHECK1-NEXT:    [[KERNEL_ARGS47:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK1-NEXT:    [[TMP166:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS47]], i32 0, i32 0
 // CHECK1-NEXT:    store i32 2, ptr [[TMP166]], align 4
 // CHECK1-NEXT:    [[TMP167:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS47]], i32 0, i32 1
@@ -2557,7 +2559,6 @@ int main() {
 // CHECK1-NEXT:    store ptr null, ptr [[TMP186]], align 8
 // CHECK1-NEXT:    [[TMP187:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS50]], i32 0, i32 0
 // CHECK1-NEXT:    [[TMP188:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS51]], i32 0, i32 0
-// CHECK1-NEXT:    [[KERNEL_ARGS54:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK1-NEXT:    [[TMP189:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS54]], i32 0, i32 0
 // CHECK1-NEXT:    store i32 2, ptr [[TMP189]], align 4
 // CHECK1-NEXT:    [[TMP190:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS54]], i32 0, i32 1
@@ -2606,7 +2607,6 @@ int main() {
 // CHECK1-NEXT:    store ptr null, ptr [[TMP209]], align 8
 // CHECK1-NEXT:    [[TMP210:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS57]], i32 0, i32 0
 // CHECK1-NEXT:    [[TMP211:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS58]], i32 0, i32 0
-// CHECK1-NEXT:    [[KERNEL_ARGS61:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK1-NEXT:    [[TMP212:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS61]], i32 0, i32 0
 // CHECK1-NEXT:    store i32 2, ptr [[TMP212]], align 4
 // CHECK1-NEXT:    [[TMP213:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS61]], i32 0, i32 1
@@ -4111,45 +4111,55 @@ int main() {
 // CHECK3-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [1 x ptr], align 4
 // CHECK3-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [1 x ptr], align 4
 // CHECK3-NEXT:    [[TMP:%.*]] = alloca i32, align 4
+// CHECK3-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK3-NEXT:    [[DOTOFFLOAD_BASEPTRS1:%.*]] = alloca [1 x ptr], align 4
 // CHECK3-NEXT:    [[DOTOFFLOAD_PTRS2:%.*]] = alloca [1 x ptr], align 4
 // CHECK3-NEXT:    [[DOTOFFLOAD_MAPPERS3:%.*]] = alloca [1 x ptr], align 4
 // CHECK3-NEXT:    [[_TMP4:%.*]] = alloca i32, align 4
+// CHECK3-NEXT:    [[KERNEL_ARGS5:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK3-NEXT:    [[DOTOFFLOAD_BASEPTRS8:%.*]] = alloca [1 x ptr], align 4
 // CHECK3-NEXT:    [[DOTOFFLOAD_PTRS9:%.*]] = alloca [1 x ptr], align 4
 // CHECK3-NEXT:    [[DOTOFFLOAD_MAPPERS10:%.*]] = alloca [1 x ptr], align 4
 // CHECK3-NEXT:    [[_TMP11:%.*]] = alloca i32, align 4
+// CHECK3-NEXT:    [[KERNEL_ARGS12:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK3-NEXT:    [[AND_VAR:%.*]] = alloca i8, align 1
 // CHECK3-NEXT:    [[DOTOFFLOAD_BASEPTRS15:%.*]] = alloca [1 x ptr], align 4
 // CHECK3-NEXT:    [[DOTOFFLOAD_PTRS16:%.*]] = alloca [1 x ptr], align 4
 // CHECK3-NEXT:    [[DOTOFFLOAD_MAPPERS17:%.*]] = alloca [1 x ptr], align 4
 // CHECK3-NEXT:    [[_TMP18:%.*]] = alloca i32, align 4
+// CHECK3-NEXT:    [[KERNEL_ARGS19:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK3-NEXT:    [[OR_VAR:%.*]] = alloca i8, align 1
 // CHECK3-NEXT:    [[DOTOFFLOAD_BASEPTRS22:%.*]] = alloca [1 x ptr], align 4
 // CHECK3-NEXT:    [[DOTOFFLOAD_PTRS23:%.*]] = alloca [1 x ptr], align 4
 // CHECK3-NEXT:    [[DOTOFFLOAD_MAPPERS24:%.*]] = alloca [1 x ptr], align 4
 // CHECK3-NEXT:    [[_TMP25:%.*]] = alloca i32, align 4
+// CHECK3-NEXT:    [[KERNEL_ARGS26:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK3-NEXT:    [[BIT_VAR:%.*]] = alloca i32, align 4
 // CHECK3-NEXT:    [[DOTOFFLOAD_BASEPTRS29:%.*]] = alloca [1 x ptr], align 4
 // CHECK3-NEXT:    [[DOTOFFLOAD_PTRS30:%.*]] = alloca [1 x ptr], align 4
 // CHECK3-NEXT:    [[DOTOFFLOAD_MAPPERS31:%.*]] = alloca [1 x ptr], align 4
 // CHECK3-NEXT:    [[_TMP32:%.*]] = alloca i32, align 4
+// CHECK3-NEXT:    [[KERNEL_ARGS33:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK3-NEXT:    [[DOTOFFLOAD_BASEPTRS36:%.*]] = alloca [1 x ptr], align 4
 // CHECK3-NEXT:    [[DOTOFFLOAD_PTRS37:%.*]] = alloca [1 x ptr], align 4
 // CHECK3-NEXT:    [[DOTOFFLOAD_MAPPERS38:%.*]] = alloca [1 x ptr], align 4
 // CHECK3-NEXT:    [[_TMP39:%.*]] = alloca i32, align 4
+// CHECK3-NEXT:    [[KERNEL_ARGS40:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK3-NEXT:    [[DOTOFFLOAD_BASEPTRS43:%.*]] = alloca [1 x ptr], align 4
 // CHECK3-NEXT:    [[DOTOFFLOAD_PTRS44:%.*]] = alloca [1 x ptr], align 4
 // CHECK3-NEXT:    [[DOTOFFLOAD_MAPPERS45:%.*]] = alloca [1 x ptr], align 4
 // CHECK3-NEXT:    [[_TMP46:%.*]] = alloca i32, align 4
+// CHECK3-NEXT:    [[KERNEL_ARGS47:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK3-NEXT:    [[DOTOFFLOAD_BASEPTRS50:%.*]] = alloca [1 x ptr], align 4
 // CHECK3-NEXT:    [[DOTOFFLOAD_PTRS51:%.*]] = alloca [1 x ptr], align 4
 // CHECK3-NEXT:    [[DOTOFFLOAD_MAPPERS52:%.*]] = alloca [1 x ptr], align 4
 // CHECK3-NEXT:    [[_TMP53:%.*]] = alloca i32, align 4
+// CHECK3-NEXT:    [[KERNEL_ARGS54:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK3-NEXT:    [[DOTOFFLOAD_BASEPTRS57:%.*]] = alloca [1 x ptr], align 4
 // CHECK3-NEXT:    [[DOTOFFLOAD_PTRS58:%.*]] = alloca [1 x ptr], align 4
 // CHECK3-NEXT:    [[DOTOFFLOAD_MAPPERS59:%.*]] = alloca [1 x ptr], align 4
 // CHECK3-NEXT:    [[_TMP60:%.*]] = alloca i32, align 4
+// CHECK3-NEXT:    [[KERNEL_ARGS61:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK3-NEXT:    store i32 0, ptr [[RETVAL]], align 4
 // CHECK3-NEXT:    [[TMP0:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK3-NEXT:    store ptr @_ZZ4mainE5sivar, ptr [[TMP0]], align 4
@@ -4159,7 +4169,6 @@ int main() {
 // CHECK3-NEXT:    store ptr null, ptr [[TMP2]], align 4
 // CHECK3-NEXT:    [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK3-NEXT:    [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK3-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK3-NEXT:    [[TMP5:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK3-NEXT:    store i32 2, ptr [[TMP5]], align 4
 // CHECK3-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -4201,7 +4210,6 @@ int main() {
 // CHECK3-NEXT:    store ptr null, ptr [[TMP22]], align 4
 // CHECK3-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS1]], i32 0, i32 0
 // CHECK3-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS2]], i32 0, i32 0
-// CHECK3-NEXT:    [[KERNEL_ARGS5:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK3-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 0
 // CHECK3-NEXT:    store i32 2, ptr [[TMP25]], align 4
 // CHECK3-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 1
@@ -4244,7 +4252,6 @@ int main() {
 // CHECK3-NEXT:    store ptr null, ptr [[TMP42]], align 4
 // CHECK3-NEXT:    [[TMP43:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS8]], i32 0, i32 0
 // CHECK3-NEXT:    [[TMP44:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS9]], i32 0, i32 0
-// CHECK3-NEXT:    [[KERNEL_ARGS12:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK3-NEXT:    [[TMP45:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS12]], i32 0, i32 0
 // CHECK3-NEXT:    store i32 2, ptr [[TMP45]], align 4
 // CHECK3-NEXT:    [[TMP46:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS12]], i32 0, i32 1
@@ -4287,7 +4294,6 @@ int main() {
 // CHECK3-NEXT:    store ptr null, ptr [[TMP62]], align 4
 // CHECK3-NEXT:    [[TMP63:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS15]], i32 0, i32 0
 // CHECK3-NEXT:    [[TMP64:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS16]], i32 0, i32 0
-// CHECK3-NEXT:    [[KERNEL_ARGS19:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK3-NEXT:    [[TMP65:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS19]], i32 0, i32 0
 // CHECK3-NEXT:    store i32 2, ptr [[TMP65]], align 4
 // CHECK3-NEXT:    [[TMP66:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS19]], i32 0, i32 1
@@ -4330,7 +4336,6 @@ int main() {
 // CHECK3-NEXT:    store ptr null, ptr [[TMP82]], align 4
 // CHECK3-NEXT:    [[TMP83:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS22]], i32 0, i32 0
 // CHECK3-NEXT:    [[TMP84:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS23]], i32 0, i32 0
-// CHECK3-NEXT:    [[KERNEL_ARGS26:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK3-NEXT:    [[TMP85:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS26]], i32 0, i32 0
 // CHECK3-NEXT:    store i32 2, ptr [[TMP85]], align 4
 // CHECK3-NEXT:    [[TMP86:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS26]], i32 0, i32 1
@@ -4373,7 +4378,6 @@ int main() {
 // CHECK3-NEXT:    store ptr null, ptr [[TMP102]], align 4
 // CHECK3-NEXT:    [[TMP103:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS29]], i32 0, i32 0
 // CHECK3-NEXT:    [[TMP104:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS30]], i32 0, i32 0
-// CHECK3-NEXT:    [[KERNEL_ARGS33:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK3-NEXT:    [[TMP105:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS33]], i32 0, i32 0
 // CHECK3-NEXT:    store i32 2, ptr [[TMP105]], align 4
 // CHECK3-NEXT:    [[TMP106:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS33]], i32 0, i32 1
@@ -4415,7 +4419,6 @@ int main() {
 // CHECK3-NEXT:    store ptr null, ptr [[TMP122]], align 4
 // CHECK3-NEXT:    [[TMP123:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS36]], i32 0, i32 0
 // CHECK3-NEXT:    [[TMP124:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS37]], i32 0, i32 0
-// CHECK3-NEXT:    [[KERNEL_ARGS40:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK3-NEXT:    [[TMP125:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS40]], i32 0, i32 0
 // CHECK3-NEXT:    store i32 2, ptr [[TMP125]], align 4
 // CHECK3-NEXT:    [[TMP126:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS40]], i32 0, i32 1
@@ -4457,7 +4460,6 @@ int main() {
 // CHECK3-NEXT:    store ptr null, ptr [[TMP142]], align 4
 // CHECK3-NEXT:    [[TMP143:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS43]], i32 0, i32 0
 // CHECK3-NEXT:    [[TMP144:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS44]], i32 0, i32 0
-// CHECK3-NEXT:    [[KERNEL_ARGS47:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK3-NEXT:    [[TMP145:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS47]], i32 0, i32 0
 // CHECK3-NEXT:    store i32 2, ptr [[TMP145]], align 4
 // CHECK3-NEXT:    [[TMP146:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS47]], i32 0, i32 1
@@ -4500,7 +4502,6 @@ int main() {
 // CHECK3-NEXT:    store ptr null, ptr [[TMP162]], align 4
 // CHECK3-NEXT:    [[TMP163:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS50]], i32 0, i32 0
 // CHECK3-NEXT:    [[TMP164:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS51]], i32 0, i32 0
-// CHECK3-NEXT:    [[KERNEL_ARGS54:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK3-NEXT:    [[TMP165:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS54]], i32 0, i32 0
 // CHECK3-NEXT:    store i32 2, ptr [[TMP165]], align 4
 // CHECK3-NEXT:    [[TMP166:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS54]], i32 0, i32 1
@@ -4543,7 +4544,6 @@ int main() {
 // CHECK3-NEXT:    store ptr null, ptr [[TMP182]], align 4
 // CHECK3-NEXT:    [[TMP183:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS57]], i32 0, i32 0
 // CHECK3-NEXT:    [[TMP184:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS58]], i32 0, i32 0
-// CHECK3-NEXT:    [[KERNEL_ARGS61:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK3-NEXT:    [[TMP185:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS61]], i32 0, i32 0
 // CHECK3-NEXT:    store i32 2, ptr [[TMP185]], align 4
 // CHECK3-NEXT:    [[TMP186:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS61]], i32 0, i32 1
@@ -5954,45 +5954,55 @@ int main() {
 // CHECK3-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [1 x ptr], align 4
 // CHECK3-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [1 x ptr], align 4
 // CHECK3-NEXT:    [[TMP:%.*]] = alloca i32, align 4
+// CHECK3-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK3-NEXT:    [[DOTOFFLOAD_BASEPTRS1:%.*]] = alloca [2 x ptr], align 4
 // CHECK3-NEXT:    [[DOTOFFLOAD_PTRS2:%.*]] = alloca [2 x ptr], align 4
 // CHECK3-NEXT:    [[DOTOFFLOAD_MAPPERS3:%.*]] = alloca [2 x ptr], align 4
 // CHECK3-NEXT:    [[_TMP4:%.*]] = alloca i32, align 4
+// CHECK3-NEXT:    [[KERNEL_ARGS5:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK3-NEXT:    [[DOTOFFLOAD_BASEPTRS8:%.*]] = alloca [2 x ptr], align 4
 // CHECK3-NEXT:    [[DOTOFFLOAD_PTRS9:%.*]] = alloca [2 x ptr], align 4
 // CHECK3-NEXT:    [[DOTOFFLOAD_MAPPERS10:%.*]] = alloca [2 x ptr], align 4
 // CHECK3-NEXT:    [[_TMP11:%.*]] = alloca i32, align 4
+// CHECK3-NEXT:    [[KERNEL_ARGS12:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK3-NEXT:    [[AND_VAR:%.*]] = alloca i8, align 1
 // CHECK3-NEXT:    [[DOTOFFLOAD_BASEPTRS15:%.*]] = alloca [2 x ptr], align 4
 // CHECK3-NEXT:    [[DOTOFFLOAD_PTRS16:%.*]] = alloca [2 x ptr], align 4
 // CHECK3-NEXT:    [[DOTOFFLOAD_MAPPERS17:%.*]] = alloca [2 x ptr], align 4
 // CHECK3-NEXT:    [[_TMP18:%.*]] = alloca i32, align 4
+// CHECK3-NEXT:    [[KERNEL_ARGS19:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK3-NEXT:    [[OR_VAR:%.*]] = alloca i8, align 1
 // CHECK3-NEXT:    [[DOTOFFLOAD_BASEPTRS22:%.*]] = alloca [2 x ptr], align 4
 // CHECK3-NEXT:    [[DOTOFFLOAD_PTRS23:%.*]] = alloca [2 x ptr], align 4
 // CHECK3-NEXT:    [[DOTOFFLOAD_MAPPERS24:%.*]] = alloca [2 x ptr], align 4
 // CHECK3-NEXT:    [[_TMP25:%.*]] = alloca i32, align 4
+// CHECK3-NEXT:    [[KERNEL_ARGS26:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK3-NEXT:    [[BIT_VAR:%.*]] = alloca i32, align 4
 // CHECK3-NEXT:    [[DOTOFFLOAD_BASEPTRS29:%.*]] = alloca [2 x ptr], align 4
 // CHECK3-NEXT:    [[DOTOFFLOAD_PTRS30:%.*]] = alloca [2 x ptr], align 4
 // CHECK3-NEXT:    [[DOTOFFLOAD_MAPPERS31:%.*]] = alloca [2 x ptr], align 4
 // CHECK3-NEXT:    [[_TMP32:%.*]] = alloca i32, align 4
+// CHECK3-NEXT:    [[KERNEL_ARGS33:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK3-NEXT:    [[DOTOFFLOAD_BASEPTRS36:%.*]] = alloca [2 x ptr], align 4
 // CHECK3-NEXT:    [[DOTOFFLOAD_PTRS37:%.*]] = alloca [2 x ptr], align 4
 // CHECK3-NEXT:    [[DOTOFFLOAD_MAPPERS38:%.*]] = alloca [2 x ptr], align 4
 // CHECK3-NEXT:    [[_TMP39:%.*]] = alloca i32, align 4
+// CHECK3-NEXT:    [[KERNEL_ARGS40:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK3-NEXT:    [[DOTOFFLOAD_BASEPTRS43:%.*]] = alloca [2 x ptr], align 4
 // CHECK3-NEXT:    [[DOTOFFLOAD_PTRS44:%.*]] = alloca [2 x ptr], align 4
 // CHECK3-NEXT:    [[DOTOFFLOAD_MAPPERS45:%.*]] = alloca [2 x ptr], align 4
 // CHECK3-NEXT:    [[_TMP46:%.*]] = alloca i32, align 4
+// CHECK3-NEXT:    [[KERNEL_ARGS47:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK3-NEXT:    [[DOTOFFLOAD_BASEPTRS50:%.*]] = alloca [2 x ptr], align 4
 // CHECK3-NEXT:    [[DOTOFFLOAD_PTRS51:%.*]] = alloca [2 x ptr], align 4
 // CHECK3-NEXT:    [[DOTOFFLOAD_MAPPERS52:%.*]] = alloca [2 x ptr], align 4
 // CHECK3-NEXT:    [[_TMP53:%.*]] = alloca i32, align 4
+// CHECK3-NEXT:    [[KERNEL_ARGS54:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK3-NEXT:    [[DOTOFFLOAD_BASEPTRS57:%.*]] = alloca [2 x ptr], align 4
 // CHECK3-NEXT:    [[DOTOFFLOAD_PTRS58:%.*]] = alloca [2 x ptr], align 4
 // CHECK3-NEXT:    [[DOTOFFLOAD_MAPPERS59:%.*]] = alloca [2 x ptr], align 4
 // CHECK3-NEXT:    [[_TMP60:%.*]] = alloca i32, align 4
+// CHECK3-NEXT:    [[KERNEL_ARGS61:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK3-NEXT:    store i32 0, ptr [[T_VAR]], align 4
 // CHECK3-NEXT:    call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[VEC]], ptr align 4 @__const._Z5tmainIiET_v.vec, i32 8, i1 false)
 // CHECK3-NEXT:    [[TMP0:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
@@ -6003,7 +6013,6 @@ int main() {
 // CHECK3-NEXT:    store ptr null, ptr [[TMP2]], align 4
 // CHECK3-NEXT:    [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK3-NEXT:    [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK3-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK3-NEXT:    [[TMP5:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK3-NEXT:    store i32 2, ptr [[TMP5]], align 4
 // CHECK3-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -6051,7 +6060,6 @@ int main() {
 // CHECK3-NEXT:    store ptr null, ptr [[TMP25]], align 4
 // CHECK3-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS1]], i32 0, i32 0
 // CHECK3-NEXT:    [[TMP27:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS2]], i32 0, i32 0
-// CHECK3-NEXT:    [[KERNEL_ARGS5:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK3-NEXT:    [[TMP28:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 0
 // CHECK3-NEXT:    store i32 2, ptr [[TMP28]], align 4
 // CHECK3-NEXT:    [[TMP29:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 1
@@ -6100,7 +6108,6 @@ int main() {
 // CHECK3-NEXT:    store ptr null, ptr [[TMP48]], align 4
 // CHECK3-NEXT:    [[TMP49:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS8]], i32 0, i32 0
 // CHECK3-NEXT:    [[TMP50:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS9]], i32 0, i32 0
-// CHECK3-NEXT:    [[KERNEL_ARGS12:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK3-NEXT:    [[TMP51:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS12]], i32 0, i32 0
 // CHECK3-NEXT:    store i32 2, ptr [[TMP51]], align 4
 // CHECK3-NEXT:    [[TMP52:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS12]], i32 0, i32 1
@@ -6149,7 +6156,6 @@ int main() {
 // CHECK3-NEXT:    store ptr null, ptr [[TMP71]], align 4
 // CHECK3-NEXT:    [[TMP72:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS15]], i32 0, i32 0
 // CHECK3-NEXT:    [[TMP73:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS16]], i32 0, i32 0
-// CHECK3-NEXT:    [[KERNEL_ARGS19:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK3-NEXT:    [[TMP74:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS19]], i32 0, i32 0
 // CHECK3-NEXT:    store i32 2, ptr [[TMP74]], align 4
 // CHECK3-NEXT:    [[TMP75:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS19]], i32 0, i32 1
@@ -6198,7 +6204,6 @@ int main() {
 // CHECK3-NEXT:    store ptr null, ptr [[TMP94]], align 4
 // CHECK3-NEXT:    [[TMP95:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS22]], i32 0, i32 0
 // CHECK3-NEXT:    [[TMP96:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS23]], i32 0, i32 0
-// CHECK3-NEXT:    [[KERNEL_ARGS26:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK3-NEXT:    [[TMP97:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS26]], i32 0, i32 0
 // CHECK3-NEXT:    store i32 2, ptr [[TMP97]], align 4
 // CHECK3-NEXT:    [[TMP98:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS26]], i32 0, i32 1
@@ -6247,7 +6252,6 @@ int main() {
 // CHECK3-NEXT:    store ptr null, ptr [[TMP117]], align 4
 // CHECK3-NEXT:    [[TMP118:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS29]], i32 0, i32 0
 // CHECK3-NEXT:    [[TMP119:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS30]], i32 0, i32 0
-// CHECK3-NEXT:    [[KERNEL_ARGS33:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK3-NEXT:    [[TMP120:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS33]], i32 0, i32 0
 // CHECK3-NEXT:    store i32 2, ptr [[TMP120]], align 4
 // CHECK3-NEXT:    [[TMP121:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS33]], i32 0, i32 1
@@ -6295,7 +6299,6 @@ int main() {
 // CHECK3-NEXT:    store ptr null, ptr [[TMP140]], align 4
 // CHECK3-NEXT:    [[TMP141:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS36]], i32 0, i32 0
 // CHECK3-NEXT:    [[TMP142:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS37]], i32 0, i32 0
-// CHECK3-NEXT:    [[KERNEL_ARGS40:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK3-NEXT:    [[TMP143:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS40]], i32 0, i32 0
 // CHECK3-NEXT:    store i32 2, ptr [[TMP143]], align 4
 // CHECK3-NEXT:    [[TMP144:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS40]], i32 0, i32 1
@@ -6343,7 +6346,6 @@ int main() {
 // CHECK3-NEXT:    store ptr null, ptr [[TMP163]], align 4
 // CHECK3-NEXT:    [[TMP164:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS43]], i32 0, i32 0
 // CHECK3-NEXT:    [[TMP165:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS44]], i32 0, i32 0
-// CHECK3-NEXT:    [[KERNEL_ARGS47:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK3-NEXT:    [[TMP166:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS47]], i32 0, i32 0
 // CHECK3-NEXT:    store i32 2, ptr [[TMP166]], align 4
 // CHECK3-NEXT:    [[TMP167:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS47]], i32 0, i32 1
@@ -6392,7 +6394,6 @@ int main() {
 // CHECK3-NEXT:    store ptr null, ptr [[TMP186]], align 4
 // CHECK3-NEXT:    [[TMP187:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS50]], i32 0, i32 0
 // CHECK3-NEXT:    [[TMP188:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS51]], i32 0, i32 0
-// CHECK3-NEXT:    [[KERNEL_ARGS54:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK3-NEXT:    [[TMP189:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS54]], i32 0, i32 0
 // CHECK3-NEXT:    store i32 2, ptr [[TMP189]], align 4
 // CHECK3-NEXT:    [[TMP190:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS54]], i32 0, i32 1
@@ -6441,7 +6442,6 @@ int main() {
 // CHECK3-NEXT:    store ptr null, ptr [[TMP209]], align 4
 // CHECK3-NEXT:    [[TMP210:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS57]], i32 0, i32 0
 // CHECK3-NEXT:    [[TMP211:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS58]], i32 0, i32 0
-// CHECK3-NEXT:    [[KERNEL_ARGS61:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK3-NEXT:    [[TMP212:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS61]], i32 0, i32 0
 // CHECK3-NEXT:    store i32 2, ptr [[TMP212]], align 4
 // CHECK3-NEXT:    [[TMP213:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS61]], i32 0, i32 1

diff  --git a/clang/test/OpenMP/target_teams_distribute_simd_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_simd_codegen.cpp
index 47fd29ebacd75..7cc064f779c5d 100644
--- a/clang/test/OpenMP/target_teams_distribute_simd_codegen.cpp
+++ b/clang/test/OpenMP/target_teams_distribute_simd_codegen.cpp
@@ -320,18 +320,21 @@ int bar(int n){
 // CHECK1-NEXT:    [[DOTOFFLOAD_PTRS6:%.*]] = alloca [1 x ptr], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_MAPPERS7:%.*]] = alloca [1 x ptr], align 8
 // CHECK1-NEXT:    [[TMP:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK1-NEXT:    [[A_CASTED8:%.*]] = alloca i64, align 8
 // CHECK1-NEXT:    [[AA_CASTED9:%.*]] = alloca i64, align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_BASEPTRS10:%.*]] = alloca [2 x ptr], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_PTRS11:%.*]] = alloca [2 x ptr], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_MAPPERS12:%.*]] = alloca [2 x ptr], align 8
 // CHECK1-NEXT:    [[_TMP13:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[KERNEL_ARGS14:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK1-NEXT:    [[A_CASTED17:%.*]] = alloca i64, align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_BASEPTRS20:%.*]] = alloca [9 x ptr], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_PTRS21:%.*]] = alloca [9 x ptr], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_MAPPERS22:%.*]] = alloca [9 x ptr], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_SIZES:%.*]] = alloca [9 x i64], align 8
 // CHECK1-NEXT:    [[_TMP23:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[KERNEL_ARGS24:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK1-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2:[0-9]+]])
 // CHECK1-NEXT:    store i32 [[N]], ptr [[N_ADDR]], align 4
 // CHECK1-NEXT:    store i32 0, ptr [[A]], align 4
@@ -420,7 +423,6 @@ int bar(int n){
 // CHECK1-NEXT:    store ptr null, ptr [[TMP49]], align 8
 // CHECK1-NEXT:    [[TMP50:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS5]], i32 0, i32 0
 // CHECK1-NEXT:    [[TMP51:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS6]], i32 0, i32 0
-// CHECK1-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK1-NEXT:    [[TMP52:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK1-NEXT:    store i32 2, ptr [[TMP52]], align 4
 // CHECK1-NEXT:    [[TMP53:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -478,7 +480,6 @@ int bar(int n){
 // CHECK1-NEXT:    store ptr null, ptr [[TMP77]], align 8
 // CHECK1-NEXT:    [[TMP78:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS10]], i32 0, i32 0
 // CHECK1-NEXT:    [[TMP79:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS11]], i32 0, i32 0
-// CHECK1-NEXT:    [[KERNEL_ARGS14:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK1-NEXT:    [[TMP80:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS14]], i32 0, i32 0
 // CHECK1-NEXT:    store i32 2, ptr [[TMP80]], align 4
 // CHECK1-NEXT:    [[TMP81:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS14]], i32 0, i32 1
@@ -589,7 +590,6 @@ int bar(int n){
 // CHECK1-NEXT:    [[TMP130:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_BASEPTRS20]], i32 0, i32 0
 // CHECK1-NEXT:    [[TMP131:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_PTRS21]], i32 0, i32 0
 // CHECK1-NEXT:    [[TMP132:%.*]] = getelementptr inbounds [9 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK1-NEXT:    [[KERNEL_ARGS24:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK1-NEXT:    [[TMP133:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS24]], i32 0, i32 0
 // CHECK1-NEXT:    store i32 2, ptr [[TMP133]], align 4
 // CHECK1-NEXT:    [[TMP134:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS24]], i32 0, i32 1
@@ -766,10 +766,10 @@ int bar(int n){
 // CHECK1-NEXT:    [[DOTFIRSTPRIV_PTR_ADDR1_I:%.*]] = alloca ptr, align 8
 // CHECK1-NEXT:    [[DOTFIRSTPRIV_PTR_ADDR2_I:%.*]] = alloca ptr, align 8
 // CHECK1-NEXT:    [[DOTFIRSTPRIV_PTR_ADDR3_I:%.*]] = alloca ptr, align 8
+// CHECK1-NEXT:    [[KERNEL_ARGS_I:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK1-NEXT:    [[AA_CASTED_I:%.*]] = alloca i64, align 8
 // CHECK1-NEXT:    [[DOTCAPTURE_EXPR__CASTED_I:%.*]] = alloca i64, align 8
 // CHECK1-NEXT:    [[DOTCAPTURE_EXPR__CASTED4_I:%.*]] = alloca i64, align 8
-// CHECK1-NEXT:    [[KERNEL_ARGS_I:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK1-NEXT:    [[DOTADDR:%.*]] = alloca i32, align 4
 // CHECK1-NEXT:    [[DOTADDR1:%.*]] = alloca ptr, align 8
 // CHECK1-NEXT:    store i32 [[TMP0]], ptr [[DOTADDR]], align 4
@@ -1331,6 +1331,7 @@ int bar(int n){
 // CHECK1-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [5 x ptr], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_SIZES:%.*]] = alloca [5 x i64], align 8
 // CHECK1-NEXT:    [[TMP:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK1-NEXT:    store ptr [[THIS]], ptr [[THIS_ADDR]], align 8
 // CHECK1-NEXT:    store i32 [[N]], ptr [[N_ADDR]], align 4
 // CHECK1-NEXT:    [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8
@@ -1390,7 +1391,6 @@ int bar(int n){
 // CHECK1-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK1-NEXT:    [[TMP27:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
 // CHECK1-NEXT:    [[TMP28:%.*]] = getelementptr inbounds [5 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK1-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK1-NEXT:    [[TMP29:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK1-NEXT:    store i32 2, ptr [[TMP29]], align 4
 // CHECK1-NEXT:    [[TMP30:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -1460,6 +1460,7 @@ int bar(int n){
 // CHECK1-NEXT:    [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
 // CHECK1-NEXT:    [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4
 // CHECK1-NEXT:    [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK1-NEXT:    store i32 [[N]], ptr [[N_ADDR]], align 4
 // CHECK1-NEXT:    store i32 0, ptr [[A]], align 4
 // CHECK1-NEXT:    store i16 0, ptr [[AA]], align 2
@@ -1527,7 +1528,6 @@ int bar(int n){
 // CHECK1-NEXT:    [[TMP30:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4
 // CHECK1-NEXT:    [[ADD5:%.*]] = add i32 [[TMP30]], 1
 // CHECK1-NEXT:    [[TMP31:%.*]] = zext i32 [[ADD5]] to i64
-// CHECK1-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK1-NEXT:    [[TMP32:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK1-NEXT:    store i32 2, ptr [[TMP32]], align 4
 // CHECK1-NEXT:    [[TMP33:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -1583,6 +1583,7 @@ int bar(int n){
 // CHECK1-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [3 x ptr], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [3 x ptr], align 8
 // CHECK1-NEXT:    [[TMP:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK1-NEXT:    store i32 [[N]], ptr [[N_ADDR]], align 4
 // CHECK1-NEXT:    store i32 0, ptr [[A]], align 4
 // CHECK1-NEXT:    store i16 0, ptr [[AA]], align 2
@@ -1616,7 +1617,6 @@ int bar(int n){
 // CHECK1-NEXT:    store ptr null, ptr [[TMP13]], align 8
 // CHECK1-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK1-NEXT:    [[TMP15:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK1-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK1-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK1-NEXT:    store i32 2, ptr [[TMP16]], align 4
 // CHECK1-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -2091,18 +2091,21 @@ int bar(int n){
 // CHECK3-NEXT:    [[DOTOFFLOAD_PTRS6:%.*]] = alloca [1 x ptr], align 4
 // CHECK3-NEXT:    [[DOTOFFLOAD_MAPPERS7:%.*]] = alloca [1 x ptr], align 4
 // CHECK3-NEXT:    [[TMP:%.*]] = alloca i32, align 4
+// CHECK3-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK3-NEXT:    [[A_CASTED8:%.*]] = alloca i32, align 4
 // CHECK3-NEXT:    [[AA_CASTED9:%.*]] = alloca i32, align 4
 // CHECK3-NEXT:    [[DOTOFFLOAD_BASEPTRS10:%.*]] = alloca [2 x ptr], align 4
 // CHECK3-NEXT:    [[DOTOFFLOAD_PTRS11:%.*]] = alloca [2 x ptr], align 4
 // CHECK3-NEXT:    [[DOTOFFLOAD_MAPPERS12:%.*]] = alloca [2 x ptr], align 4
 // CHECK3-NEXT:    [[_TMP13:%.*]] = alloca i32, align 4
+// CHECK3-NEXT:    [[KERNEL_ARGS14:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK3-NEXT:    [[A_CASTED17:%.*]] = alloca i32, align 4
 // CHECK3-NEXT:    [[DOTOFFLOAD_BASEPTRS20:%.*]] = alloca [9 x ptr], align 4
 // CHECK3-NEXT:    [[DOTOFFLOAD_PTRS21:%.*]] = alloca [9 x ptr], align 4
 // CHECK3-NEXT:    [[DOTOFFLOAD_MAPPERS22:%.*]] = alloca [9 x ptr], align 4
 // CHECK3-NEXT:    [[DOTOFFLOAD_SIZES:%.*]] = alloca [9 x i64], align 4
 // CHECK3-NEXT:    [[_TMP23:%.*]] = alloca i32, align 4
+// CHECK3-NEXT:    [[KERNEL_ARGS24:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK3-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2:[0-9]+]])
 // CHECK3-NEXT:    store i32 [[N]], ptr [[N_ADDR]], align 4
 // CHECK3-NEXT:    store i32 0, ptr [[A]], align 4
@@ -2189,7 +2192,6 @@ int bar(int n){
 // CHECK3-NEXT:    store ptr null, ptr [[TMP47]], align 4
 // CHECK3-NEXT:    [[TMP48:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS5]], i32 0, i32 0
 // CHECK3-NEXT:    [[TMP49:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS6]], i32 0, i32 0
-// CHECK3-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK3-NEXT:    [[TMP50:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK3-NEXT:    store i32 2, ptr [[TMP50]], align 4
 // CHECK3-NEXT:    [[TMP51:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -2247,7 +2249,6 @@ int bar(int n){
 // CHECK3-NEXT:    store ptr null, ptr [[TMP75]], align 4
 // CHECK3-NEXT:    [[TMP76:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS10]], i32 0, i32 0
 // CHECK3-NEXT:    [[TMP77:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS11]], i32 0, i32 0
-// CHECK3-NEXT:    [[KERNEL_ARGS14:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK3-NEXT:    [[TMP78:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS14]], i32 0, i32 0
 // CHECK3-NEXT:    store i32 2, ptr [[TMP78]], align 4
 // CHECK3-NEXT:    [[TMP79:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS14]], i32 0, i32 1
@@ -2360,7 +2361,6 @@ int bar(int n){
 // CHECK3-NEXT:    [[TMP130:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_BASEPTRS20]], i32 0, i32 0
 // CHECK3-NEXT:    [[TMP131:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_PTRS21]], i32 0, i32 0
 // CHECK3-NEXT:    [[TMP132:%.*]] = getelementptr inbounds [9 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK3-NEXT:    [[KERNEL_ARGS24:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK3-NEXT:    [[TMP133:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS24]], i32 0, i32 0
 // CHECK3-NEXT:    store i32 2, ptr [[TMP133]], align 4
 // CHECK3-NEXT:    [[TMP134:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS24]], i32 0, i32 1
@@ -2537,10 +2537,10 @@ int bar(int n){
 // CHECK3-NEXT:    [[DOTFIRSTPRIV_PTR_ADDR1_I:%.*]] = alloca ptr, align 4
 // CHECK3-NEXT:    [[DOTFIRSTPRIV_PTR_ADDR2_I:%.*]] = alloca ptr, align 4
 // CHECK3-NEXT:    [[DOTFIRSTPRIV_PTR_ADDR3_I:%.*]] = alloca ptr, align 4
+// CHECK3-NEXT:    [[KERNEL_ARGS_I:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK3-NEXT:    [[AA_CASTED_I:%.*]] = alloca i32, align 4
 // CHECK3-NEXT:    [[DOTCAPTURE_EXPR__CASTED_I:%.*]] = alloca i32, align 4
 // CHECK3-NEXT:    [[DOTCAPTURE_EXPR__CASTED4_I:%.*]] = alloca i32, align 4
-// CHECK3-NEXT:    [[KERNEL_ARGS_I:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK3-NEXT:    [[DOTADDR:%.*]] = alloca i32, align 4
 // CHECK3-NEXT:    [[DOTADDR1:%.*]] = alloca ptr, align 4
 // CHECK3-NEXT:    store i32 [[TMP0]], ptr [[DOTADDR]], align 4
@@ -3102,6 +3102,7 @@ int bar(int n){
 // CHECK3-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [5 x ptr], align 4
 // CHECK3-NEXT:    [[DOTOFFLOAD_SIZES:%.*]] = alloca [5 x i64], align 4
 // CHECK3-NEXT:    [[TMP:%.*]] = alloca i32, align 4
+// CHECK3-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK3-NEXT:    store ptr [[THIS]], ptr [[THIS_ADDR]], align 4
 // CHECK3-NEXT:    store i32 [[N]], ptr [[N_ADDR]], align 4
 // CHECK3-NEXT:    [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4
@@ -3161,7 +3162,6 @@ int bar(int n){
 // CHECK3-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK3-NEXT:    [[TMP27:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
 // CHECK3-NEXT:    [[TMP28:%.*]] = getelementptr inbounds [5 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK3-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK3-NEXT:    [[TMP29:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK3-NEXT:    store i32 2, ptr [[TMP29]], align 4
 // CHECK3-NEXT:    [[TMP30:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -3231,6 +3231,7 @@ int bar(int n){
 // CHECK3-NEXT:    [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
 // CHECK3-NEXT:    [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4
 // CHECK3-NEXT:    [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4
+// CHECK3-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK3-NEXT:    store i32 [[N]], ptr [[N_ADDR]], align 4
 // CHECK3-NEXT:    store i32 0, ptr [[A]], align 4
 // CHECK3-NEXT:    store i16 0, ptr [[AA]], align 2
@@ -3298,7 +3299,6 @@ int bar(int n){
 // CHECK3-NEXT:    [[TMP30:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4
 // CHECK3-NEXT:    [[ADD5:%.*]] = add i32 [[TMP30]], 1
 // CHECK3-NEXT:    [[TMP31:%.*]] = zext i32 [[ADD5]] to i64
-// CHECK3-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK3-NEXT:    [[TMP32:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK3-NEXT:    store i32 2, ptr [[TMP32]], align 4
 // CHECK3-NEXT:    [[TMP33:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -3354,6 +3354,7 @@ int bar(int n){
 // CHECK3-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [3 x ptr], align 4
 // CHECK3-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [3 x ptr], align 4
 // CHECK3-NEXT:    [[TMP:%.*]] = alloca i32, align 4
+// CHECK3-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK3-NEXT:    store i32 [[N]], ptr [[N_ADDR]], align 4
 // CHECK3-NEXT:    store i32 0, ptr [[A]], align 4
 // CHECK3-NEXT:    store i16 0, ptr [[AA]], align 2
@@ -3387,7 +3388,6 @@ int bar(int n){
 // CHECK3-NEXT:    store ptr null, ptr [[TMP13]], align 4
 // CHECK3-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK3-NEXT:    [[TMP15:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK3-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK3-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK3-NEXT:    store i32 2, ptr [[TMP16]], align 4
 // CHECK3-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -3862,18 +3862,21 @@ int bar(int n){
 // CHECK5-NEXT:    [[DOTOFFLOAD_PTRS6:%.*]] = alloca [1 x ptr], align 8
 // CHECK5-NEXT:    [[DOTOFFLOAD_MAPPERS7:%.*]] = alloca [1 x ptr], align 8
 // CHECK5-NEXT:    [[TMP:%.*]] = alloca i32, align 4
+// CHECK5-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK5-NEXT:    [[A_CASTED8:%.*]] = alloca i64, align 8
 // CHECK5-NEXT:    [[AA_CASTED9:%.*]] = alloca i64, align 8
 // CHECK5-NEXT:    [[DOTOFFLOAD_BASEPTRS10:%.*]] = alloca [2 x ptr], align 8
 // CHECK5-NEXT:    [[DOTOFFLOAD_PTRS11:%.*]] = alloca [2 x ptr], align 8
 // CHECK5-NEXT:    [[DOTOFFLOAD_MAPPERS12:%.*]] = alloca [2 x ptr], align 8
 // CHECK5-NEXT:    [[_TMP13:%.*]] = alloca i32, align 4
+// CHECK5-NEXT:    [[KERNEL_ARGS14:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK5-NEXT:    [[A_CASTED17:%.*]] = alloca i64, align 8
 // CHECK5-NEXT:    [[DOTOFFLOAD_BASEPTRS20:%.*]] = alloca [9 x ptr], align 8
 // CHECK5-NEXT:    [[DOTOFFLOAD_PTRS21:%.*]] = alloca [9 x ptr], align 8
 // CHECK5-NEXT:    [[DOTOFFLOAD_MAPPERS22:%.*]] = alloca [9 x ptr], align 8
 // CHECK5-NEXT:    [[DOTOFFLOAD_SIZES:%.*]] = alloca [9 x i64], align 8
 // CHECK5-NEXT:    [[_TMP23:%.*]] = alloca i32, align 4
+// CHECK5-NEXT:    [[KERNEL_ARGS24:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK5-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2:[0-9]+]])
 // CHECK5-NEXT:    store i32 [[N]], ptr [[N_ADDR]], align 4
 // CHECK5-NEXT:    store i32 0, ptr [[A]], align 4
@@ -3962,7 +3965,6 @@ int bar(int n){
 // CHECK5-NEXT:    store ptr null, ptr [[TMP49]], align 8
 // CHECK5-NEXT:    [[TMP50:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS5]], i32 0, i32 0
 // CHECK5-NEXT:    [[TMP51:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS6]], i32 0, i32 0
-// CHECK5-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK5-NEXT:    [[TMP52:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK5-NEXT:    store i32 2, ptr [[TMP52]], align 4
 // CHECK5-NEXT:    [[TMP53:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -4020,7 +4022,6 @@ int bar(int n){
 // CHECK5-NEXT:    store ptr null, ptr [[TMP77]], align 8
 // CHECK5-NEXT:    [[TMP78:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS10]], i32 0, i32 0
 // CHECK5-NEXT:    [[TMP79:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS11]], i32 0, i32 0
-// CHECK5-NEXT:    [[KERNEL_ARGS14:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK5-NEXT:    [[TMP80:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS14]], i32 0, i32 0
 // CHECK5-NEXT:    store i32 2, ptr [[TMP80]], align 4
 // CHECK5-NEXT:    [[TMP81:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS14]], i32 0, i32 1
@@ -4131,7 +4132,6 @@ int bar(int n){
 // CHECK5-NEXT:    [[TMP130:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_BASEPTRS20]], i32 0, i32 0
 // CHECK5-NEXT:    [[TMP131:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_PTRS21]], i32 0, i32 0
 // CHECK5-NEXT:    [[TMP132:%.*]] = getelementptr inbounds [9 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK5-NEXT:    [[KERNEL_ARGS24:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK5-NEXT:    [[TMP133:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS24]], i32 0, i32 0
 // CHECK5-NEXT:    store i32 2, ptr [[TMP133]], align 4
 // CHECK5-NEXT:    [[TMP134:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS24]], i32 0, i32 1
@@ -4308,10 +4308,10 @@ int bar(int n){
 // CHECK5-NEXT:    [[DOTFIRSTPRIV_PTR_ADDR1_I:%.*]] = alloca ptr, align 8
 // CHECK5-NEXT:    [[DOTFIRSTPRIV_PTR_ADDR2_I:%.*]] = alloca ptr, align 8
 // CHECK5-NEXT:    [[DOTFIRSTPRIV_PTR_ADDR3_I:%.*]] = alloca ptr, align 8
+// CHECK5-NEXT:    [[KERNEL_ARGS_I:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK5-NEXT:    [[AA_CASTED_I:%.*]] = alloca i64, align 8
 // CHECK5-NEXT:    [[DOTCAPTURE_EXPR__CASTED_I:%.*]] = alloca i64, align 8
 // CHECK5-NEXT:    [[DOTCAPTURE_EXPR__CASTED4_I:%.*]] = alloca i64, align 8
-// CHECK5-NEXT:    [[KERNEL_ARGS_I:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK5-NEXT:    [[DOTADDR:%.*]] = alloca i32, align 4
 // CHECK5-NEXT:    [[DOTADDR1:%.*]] = alloca ptr, align 8
 // CHECK5-NEXT:    store i32 [[TMP0]], ptr [[DOTADDR]], align 4
@@ -4875,6 +4875,7 @@ int bar(int n){
 // CHECK5-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [6 x ptr], align 8
 // CHECK5-NEXT:    [[DOTOFFLOAD_SIZES:%.*]] = alloca [6 x i64], align 8
 // CHECK5-NEXT:    [[TMP:%.*]] = alloca i32, align 4
+// CHECK5-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK5-NEXT:    store ptr [[THIS]], ptr [[THIS_ADDR]], align 8
 // CHECK5-NEXT:    store i32 [[N]], ptr [[N_ADDR]], align 4
 // CHECK5-NEXT:    [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8
@@ -4949,7 +4950,6 @@ int bar(int n){
 // CHECK5-NEXT:    [[TMP32:%.*]] = getelementptr inbounds [6 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK5-NEXT:    [[TMP33:%.*]] = getelementptr inbounds [6 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
 // CHECK5-NEXT:    [[TMP34:%.*]] = getelementptr inbounds [6 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK5-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK5-NEXT:    [[TMP35:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK5-NEXT:    store i32 2, ptr [[TMP35]], align 4
 // CHECK5-NEXT:    [[TMP36:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -5019,6 +5019,7 @@ int bar(int n){
 // CHECK5-NEXT:    [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
 // CHECK5-NEXT:    [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4
 // CHECK5-NEXT:    [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4
+// CHECK5-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK5-NEXT:    store i32 [[N]], ptr [[N_ADDR]], align 4
 // CHECK5-NEXT:    store i32 0, ptr [[A]], align 4
 // CHECK5-NEXT:    store i16 0, ptr [[AA]], align 2
@@ -5086,7 +5087,6 @@ int bar(int n){
 // CHECK5-NEXT:    [[TMP30:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4
 // CHECK5-NEXT:    [[ADD5:%.*]] = add i32 [[TMP30]], 1
 // CHECK5-NEXT:    [[TMP31:%.*]] = zext i32 [[ADD5]] to i64
-// CHECK5-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK5-NEXT:    [[TMP32:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK5-NEXT:    store i32 2, ptr [[TMP32]], align 4
 // CHECK5-NEXT:    [[TMP33:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -5142,6 +5142,7 @@ int bar(int n){
 // CHECK5-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [3 x ptr], align 8
 // CHECK5-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [3 x ptr], align 8
 // CHECK5-NEXT:    [[TMP:%.*]] = alloca i32, align 4
+// CHECK5-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK5-NEXT:    store i32 [[N]], ptr [[N_ADDR]], align 4
 // CHECK5-NEXT:    store i32 0, ptr [[A]], align 4
 // CHECK5-NEXT:    store i16 0, ptr [[AA]], align 2
@@ -5175,7 +5176,6 @@ int bar(int n){
 // CHECK5-NEXT:    store ptr null, ptr [[TMP13]], align 8
 // CHECK5-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK5-NEXT:    [[TMP15:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK5-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK5-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK5-NEXT:    store i32 2, ptr [[TMP16]], align 4
 // CHECK5-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -5702,18 +5702,21 @@ int bar(int n){
 // CHECK7-NEXT:    [[DOTOFFLOAD_PTRS6:%.*]] = alloca [1 x ptr], align 4
 // CHECK7-NEXT:    [[DOTOFFLOAD_MAPPERS7:%.*]] = alloca [1 x ptr], align 4
 // CHECK7-NEXT:    [[TMP:%.*]] = alloca i32, align 4
+// CHECK7-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK7-NEXT:    [[A_CASTED8:%.*]] = alloca i32, align 4
 // CHECK7-NEXT:    [[AA_CASTED9:%.*]] = alloca i32, align 4
 // CHECK7-NEXT:    [[DOTOFFLOAD_BASEPTRS10:%.*]] = alloca [2 x ptr], align 4
 // CHECK7-NEXT:    [[DOTOFFLOAD_PTRS11:%.*]] = alloca [2 x ptr], align 4
 // CHECK7-NEXT:    [[DOTOFFLOAD_MAPPERS12:%.*]] = alloca [2 x ptr], align 4
 // CHECK7-NEXT:    [[_TMP13:%.*]] = alloca i32, align 4
+// CHECK7-NEXT:    [[KERNEL_ARGS14:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK7-NEXT:    [[A_CASTED17:%.*]] = alloca i32, align 4
 // CHECK7-NEXT:    [[DOTOFFLOAD_BASEPTRS20:%.*]] = alloca [9 x ptr], align 4
 // CHECK7-NEXT:    [[DOTOFFLOAD_PTRS21:%.*]] = alloca [9 x ptr], align 4
 // CHECK7-NEXT:    [[DOTOFFLOAD_MAPPERS22:%.*]] = alloca [9 x ptr], align 4
 // CHECK7-NEXT:    [[DOTOFFLOAD_SIZES:%.*]] = alloca [9 x i64], align 4
 // CHECK7-NEXT:    [[_TMP23:%.*]] = alloca i32, align 4
+// CHECK7-NEXT:    [[KERNEL_ARGS24:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK7-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2:[0-9]+]])
 // CHECK7-NEXT:    store i32 [[N]], ptr [[N_ADDR]], align 4
 // CHECK7-NEXT:    store i32 0, ptr [[A]], align 4
@@ -5800,7 +5803,6 @@ int bar(int n){
 // CHECK7-NEXT:    store ptr null, ptr [[TMP47]], align 4
 // CHECK7-NEXT:    [[TMP48:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS5]], i32 0, i32 0
 // CHECK7-NEXT:    [[TMP49:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS6]], i32 0, i32 0
-// CHECK7-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK7-NEXT:    [[TMP50:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK7-NEXT:    store i32 2, ptr [[TMP50]], align 4
 // CHECK7-NEXT:    [[TMP51:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -5858,7 +5860,6 @@ int bar(int n){
 // CHECK7-NEXT:    store ptr null, ptr [[TMP75]], align 4
 // CHECK7-NEXT:    [[TMP76:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS10]], i32 0, i32 0
 // CHECK7-NEXT:    [[TMP77:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS11]], i32 0, i32 0
-// CHECK7-NEXT:    [[KERNEL_ARGS14:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK7-NEXT:    [[TMP78:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS14]], i32 0, i32 0
 // CHECK7-NEXT:    store i32 2, ptr [[TMP78]], align 4
 // CHECK7-NEXT:    [[TMP79:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS14]], i32 0, i32 1
@@ -5971,7 +5972,6 @@ int bar(int n){
 // CHECK7-NEXT:    [[TMP130:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_BASEPTRS20]], i32 0, i32 0
 // CHECK7-NEXT:    [[TMP131:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_PTRS21]], i32 0, i32 0
 // CHECK7-NEXT:    [[TMP132:%.*]] = getelementptr inbounds [9 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK7-NEXT:    [[KERNEL_ARGS24:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK7-NEXT:    [[TMP133:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS24]], i32 0, i32 0
 // CHECK7-NEXT:    store i32 2, ptr [[TMP133]], align 4
 // CHECK7-NEXT:    [[TMP134:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS24]], i32 0, i32 1
@@ -6148,10 +6148,10 @@ int bar(int n){
 // CHECK7-NEXT:    [[DOTFIRSTPRIV_PTR_ADDR1_I:%.*]] = alloca ptr, align 4
 // CHECK7-NEXT:    [[DOTFIRSTPRIV_PTR_ADDR2_I:%.*]] = alloca ptr, align 4
 // CHECK7-NEXT:    [[DOTFIRSTPRIV_PTR_ADDR3_I:%.*]] = alloca ptr, align 4
+// CHECK7-NEXT:    [[KERNEL_ARGS_I:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK7-NEXT:    [[AA_CASTED_I:%.*]] = alloca i32, align 4
 // CHECK7-NEXT:    [[DOTCAPTURE_EXPR__CASTED_I:%.*]] = alloca i32, align 4
 // CHECK7-NEXT:    [[DOTCAPTURE_EXPR__CASTED4_I:%.*]] = alloca i32, align 4
-// CHECK7-NEXT:    [[KERNEL_ARGS_I:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK7-NEXT:    [[DOTADDR:%.*]] = alloca i32, align 4
 // CHECK7-NEXT:    [[DOTADDR1:%.*]] = alloca ptr, align 4
 // CHECK7-NEXT:    store i32 [[TMP0]], ptr [[DOTADDR]], align 4
@@ -6715,6 +6715,7 @@ int bar(int n){
 // CHECK7-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [6 x ptr], align 4
 // CHECK7-NEXT:    [[DOTOFFLOAD_SIZES:%.*]] = alloca [6 x i64], align 4
 // CHECK7-NEXT:    [[TMP:%.*]] = alloca i32, align 4
+// CHECK7-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK7-NEXT:    store ptr [[THIS]], ptr [[THIS_ADDR]], align 4
 // CHECK7-NEXT:    store i32 [[N]], ptr [[N_ADDR]], align 4
 // CHECK7-NEXT:    [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4
@@ -6789,7 +6790,6 @@ int bar(int n){
 // CHECK7-NEXT:    [[TMP32:%.*]] = getelementptr inbounds [6 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK7-NEXT:    [[TMP33:%.*]] = getelementptr inbounds [6 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
 // CHECK7-NEXT:    [[TMP34:%.*]] = getelementptr inbounds [6 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK7-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK7-NEXT:    [[TMP35:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK7-NEXT:    store i32 2, ptr [[TMP35]], align 4
 // CHECK7-NEXT:    [[TMP36:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -6859,6 +6859,7 @@ int bar(int n){
 // CHECK7-NEXT:    [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
 // CHECK7-NEXT:    [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4
 // CHECK7-NEXT:    [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4
+// CHECK7-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK7-NEXT:    store i32 [[N]], ptr [[N_ADDR]], align 4
 // CHECK7-NEXT:    store i32 0, ptr [[A]], align 4
 // CHECK7-NEXT:    store i16 0, ptr [[AA]], align 2
@@ -6926,7 +6927,6 @@ int bar(int n){
 // CHECK7-NEXT:    [[TMP30:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4
 // CHECK7-NEXT:    [[ADD5:%.*]] = add i32 [[TMP30]], 1
 // CHECK7-NEXT:    [[TMP31:%.*]] = zext i32 [[ADD5]] to i64
-// CHECK7-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK7-NEXT:    [[TMP32:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK7-NEXT:    store i32 2, ptr [[TMP32]], align 4
 // CHECK7-NEXT:    [[TMP33:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -6982,6 +6982,7 @@ int bar(int n){
 // CHECK7-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [3 x ptr], align 4
 // CHECK7-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [3 x ptr], align 4
 // CHECK7-NEXT:    [[TMP:%.*]] = alloca i32, align 4
+// CHECK7-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK7-NEXT:    store i32 [[N]], ptr [[N_ADDR]], align 4
 // CHECK7-NEXT:    store i32 0, ptr [[A]], align 4
 // CHECK7-NEXT:    store i16 0, ptr [[AA]], align 2
@@ -7015,7 +7016,6 @@ int bar(int n){
 // CHECK7-NEXT:    store ptr null, ptr [[TMP13]], align 4
 // CHECK7-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK7-NEXT:    [[TMP15:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK7-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK7-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK7-NEXT:    store i32 2, ptr [[TMP16]], align 4
 // CHECK7-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1

diff  --git a/clang/test/OpenMP/target_teams_distribute_simd_collapse_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_simd_collapse_codegen.cpp
index 4aa8008841c04..073204e9fbbf0 100644
--- a/clang/test/OpenMP/target_teams_distribute_simd_collapse_codegen.cpp
+++ b/clang/test/OpenMP/target_teams_distribute_simd_collapse_codegen.cpp
@@ -111,6 +111,7 @@ int main (int argc, char **argv) {
 // CHECK1-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [1 x ptr], align 8
 // CHECK1-NEXT:    [[TMP:%.*]] = alloca i32, align 4
 // CHECK1-NEXT:    [[_TMP2:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK1-NEXT:    store ptr [[THIS]], ptr [[THIS_ADDR]], align 8
 // CHECK1-NEXT:    [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8
 // CHECK1-NEXT:    [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[THIS1]], i32 0, i32 0
@@ -122,7 +123,6 @@ int main (int argc, char **argv) {
 // CHECK1-NEXT:    store ptr null, ptr [[TMP2]], align 8
 // CHECK1-NEXT:    [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK1-NEXT:    [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK1-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK1-NEXT:    [[TMP5:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK1-NEXT:    store i32 2, ptr [[TMP5]], align 4
 // CHECK1-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -287,6 +287,7 @@ int main (int argc, char **argv) {
 // CHECK3-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [1 x ptr], align 4
 // CHECK3-NEXT:    [[TMP:%.*]] = alloca i32, align 4
 // CHECK3-NEXT:    [[_TMP2:%.*]] = alloca i32, align 4
+// CHECK3-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK3-NEXT:    store ptr [[THIS]], ptr [[THIS_ADDR]], align 4
 // CHECK3-NEXT:    [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4
 // CHECK3-NEXT:    [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[THIS1]], i32 0, i32 0
@@ -298,7 +299,6 @@ int main (int argc, char **argv) {
 // CHECK3-NEXT:    store ptr null, ptr [[TMP2]], align 4
 // CHECK3-NEXT:    [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK3-NEXT:    [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK3-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK3-NEXT:    [[TMP5:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK3-NEXT:    store i32 2, ptr [[TMP5]], align 4
 // CHECK3-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -606,6 +606,7 @@ int main (int argc, char **argv) {
 // CHECK9-NEXT:    [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
 // CHECK9-NEXT:    [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4
 // CHECK9-NEXT:    [[DOTCAPTURE_EXPR_3:%.*]] = alloca i64, align 8
+// CHECK9-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK9-NEXT:    store i32 0, ptr [[RETVAL]], align 4
 // CHECK9-NEXT:    store i32 [[ARGC]], ptr [[ARGC_ADDR]], align 4
 // CHECK9-NEXT:    store ptr [[ARGV]], ptr [[ARGV_ADDR]], align 8
@@ -682,7 +683,6 @@ int main (int argc, char **argv) {
 // CHECK9-NEXT:    store i64 [[SUB7]], ptr [[DOTCAPTURE_EXPR_3]], align 8
 // CHECK9-NEXT:    [[TMP35:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_3]], align 8
 // CHECK9-NEXT:    [[ADD:%.*]] = add nsw i64 [[TMP35]], 1
-// CHECK9-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK9-NEXT:    [[TMP36:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK9-NEXT:    store i32 2, ptr [[TMP36]], align 4
 // CHECK9-NEXT:    [[TMP37:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -927,6 +927,7 @@ int main (int argc, char **argv) {
 // CHECK9-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [1 x ptr], align 8
 // CHECK9-NEXT:    [[TMP:%.*]] = alloca i32, align 4
 // CHECK9-NEXT:    [[_TMP1:%.*]] = alloca i32, align 4
+// CHECK9-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK9-NEXT:    store i32 [[ARGC]], ptr [[ARGC_ADDR]], align 4
 // CHECK9-NEXT:    [[TMP0:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK9-NEXT:    store ptr [[A]], ptr [[TMP0]], align 8
@@ -936,7 +937,6 @@ int main (int argc, char **argv) {
 // CHECK9-NEXT:    store ptr null, ptr [[TMP2]], align 8
 // CHECK9-NEXT:    [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK9-NEXT:    [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK9-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK9-NEXT:    [[TMP5:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK9-NEXT:    store i32 2, ptr [[TMP5]], align 4
 // CHECK9-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -1101,6 +1101,7 @@ int main (int argc, char **argv) {
 // CHECK11-NEXT:    [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
 // CHECK11-NEXT:    [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4
 // CHECK11-NEXT:    [[DOTCAPTURE_EXPR_3:%.*]] = alloca i64, align 8
+// CHECK11-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK11-NEXT:    store i32 0, ptr [[RETVAL]], align 4
 // CHECK11-NEXT:    store i32 [[ARGC]], ptr [[ARGC_ADDR]], align 4
 // CHECK11-NEXT:    store ptr [[ARGV]], ptr [[ARGV_ADDR]], align 4
@@ -1176,7 +1177,6 @@ int main (int argc, char **argv) {
 // CHECK11-NEXT:    store i64 [[SUB7]], ptr [[DOTCAPTURE_EXPR_3]], align 8
 // CHECK11-NEXT:    [[TMP34:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_3]], align 8
 // CHECK11-NEXT:    [[ADD:%.*]] = add nsw i64 [[TMP34]], 1
-// CHECK11-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK11-NEXT:    [[TMP35:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK11-NEXT:    store i32 2, ptr [[TMP35]], align 4
 // CHECK11-NEXT:    [[TMP36:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -1419,6 +1419,7 @@ int main (int argc, char **argv) {
 // CHECK11-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [1 x ptr], align 4
 // CHECK11-NEXT:    [[TMP:%.*]] = alloca i32, align 4
 // CHECK11-NEXT:    [[_TMP1:%.*]] = alloca i32, align 4
+// CHECK11-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK11-NEXT:    store i32 [[ARGC]], ptr [[ARGC_ADDR]], align 4
 // CHECK11-NEXT:    [[TMP0:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK11-NEXT:    store ptr [[A]], ptr [[TMP0]], align 4
@@ -1428,7 +1429,6 @@ int main (int argc, char **argv) {
 // CHECK11-NEXT:    store ptr null, ptr [[TMP2]], align 4
 // CHECK11-NEXT:    [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK11-NEXT:    [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK11-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK11-NEXT:    [[TMP5:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK11-NEXT:    store i32 2, ptr [[TMP5]], align 4
 // CHECK11-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1

diff  --git a/clang/test/OpenMP/target_teams_distribute_simd_dist_schedule_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_simd_dist_schedule_codegen.cpp
index 5a14f0ab1939e..0d43e6f5b5dfe 100644
--- a/clang/test/OpenMP/target_teams_distribute_simd_dist_schedule_codegen.cpp
+++ b/clang/test/OpenMP/target_teams_distribute_simd_dist_schedule_codegen.cpp
@@ -138,14 +138,17 @@ int main (int argc, char **argv) {
 // CHECK1-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [1 x ptr], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [1 x ptr], align 8
 // CHECK1-NEXT:    [[TMP:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_BASEPTRS3:%.*]] = alloca [1 x ptr], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_PTRS4:%.*]] = alloca [1 x ptr], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_MAPPERS5:%.*]] = alloca [1 x ptr], align 8
 // CHECK1-NEXT:    [[_TMP6:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[KERNEL_ARGS7:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_BASEPTRS11:%.*]] = alloca [1 x ptr], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_PTRS12:%.*]] = alloca [1 x ptr], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_MAPPERS13:%.*]] = alloca [1 x ptr], align 8
 // CHECK1-NEXT:    [[_TMP14:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[KERNEL_ARGS15:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK1-NEXT:    store ptr [[THIS]], ptr [[THIS_ADDR]], align 8
 // CHECK1-NEXT:    [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8
 // CHECK1-NEXT:    [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[THIS1]], i32 0, i32 0
@@ -157,7 +160,6 @@ int main (int argc, char **argv) {
 // CHECK1-NEXT:    store ptr null, ptr [[TMP2]], align 8
 // CHECK1-NEXT:    [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK1-NEXT:    [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK1-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK1-NEXT:    [[TMP5:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK1-NEXT:    store i32 2, ptr [[TMP5]], align 4
 // CHECK1-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -200,7 +202,6 @@ int main (int argc, char **argv) {
 // CHECK1-NEXT:    store ptr null, ptr [[TMP22]], align 8
 // CHECK1-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS3]], i32 0, i32 0
 // CHECK1-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS4]], i32 0, i32 0
-// CHECK1-NEXT:    [[KERNEL_ARGS7:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK1-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS7]], i32 0, i32 0
 // CHECK1-NEXT:    store i32 2, ptr [[TMP25]], align 4
 // CHECK1-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS7]], i32 0, i32 1
@@ -243,7 +244,6 @@ int main (int argc, char **argv) {
 // CHECK1-NEXT:    store ptr null, ptr [[TMP42]], align 8
 // CHECK1-NEXT:    [[TMP43:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS11]], i32 0, i32 0
 // CHECK1-NEXT:    [[TMP44:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS12]], i32 0, i32 0
-// CHECK1-NEXT:    [[KERNEL_ARGS15:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK1-NEXT:    [[TMP45:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS15]], i32 0, i32 0
 // CHECK1-NEXT:    store i32 2, ptr [[TMP45]], align 4
 // CHECK1-NEXT:    [[TMP46:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS15]], i32 0, i32 1
@@ -578,14 +578,17 @@ int main (int argc, char **argv) {
 // CHECK3-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [1 x ptr], align 4
 // CHECK3-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [1 x ptr], align 4
 // CHECK3-NEXT:    [[TMP:%.*]] = alloca i32, align 4
+// CHECK3-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK3-NEXT:    [[DOTOFFLOAD_BASEPTRS3:%.*]] = alloca [1 x ptr], align 4
 // CHECK3-NEXT:    [[DOTOFFLOAD_PTRS4:%.*]] = alloca [1 x ptr], align 4
 // CHECK3-NEXT:    [[DOTOFFLOAD_MAPPERS5:%.*]] = alloca [1 x ptr], align 4
 // CHECK3-NEXT:    [[_TMP6:%.*]] = alloca i32, align 4
+// CHECK3-NEXT:    [[KERNEL_ARGS7:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK3-NEXT:    [[DOTOFFLOAD_BASEPTRS11:%.*]] = alloca [1 x ptr], align 4
 // CHECK3-NEXT:    [[DOTOFFLOAD_PTRS12:%.*]] = alloca [1 x ptr], align 4
 // CHECK3-NEXT:    [[DOTOFFLOAD_MAPPERS13:%.*]] = alloca [1 x ptr], align 4
 // CHECK3-NEXT:    [[_TMP14:%.*]] = alloca i32, align 4
+// CHECK3-NEXT:    [[KERNEL_ARGS15:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK3-NEXT:    store ptr [[THIS]], ptr [[THIS_ADDR]], align 4
 // CHECK3-NEXT:    [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4
 // CHECK3-NEXT:    [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[THIS1]], i32 0, i32 0
@@ -597,7 +600,6 @@ int main (int argc, char **argv) {
 // CHECK3-NEXT:    store ptr null, ptr [[TMP2]], align 4
 // CHECK3-NEXT:    [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK3-NEXT:    [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK3-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK3-NEXT:    [[TMP5:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK3-NEXT:    store i32 2, ptr [[TMP5]], align 4
 // CHECK3-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -640,7 +642,6 @@ int main (int argc, char **argv) {
 // CHECK3-NEXT:    store ptr null, ptr [[TMP22]], align 4
 // CHECK3-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS3]], i32 0, i32 0
 // CHECK3-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS4]], i32 0, i32 0
-// CHECK3-NEXT:    [[KERNEL_ARGS7:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK3-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS7]], i32 0, i32 0
 // CHECK3-NEXT:    store i32 2, ptr [[TMP25]], align 4
 // CHECK3-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS7]], i32 0, i32 1
@@ -683,7 +684,6 @@ int main (int argc, char **argv) {
 // CHECK3-NEXT:    store ptr null, ptr [[TMP42]], align 4
 // CHECK3-NEXT:    [[TMP43:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS11]], i32 0, i32 0
 // CHECK3-NEXT:    [[TMP44:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS12]], i32 0, i32 0
-// CHECK3-NEXT:    [[KERNEL_ARGS15:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK3-NEXT:    [[TMP45:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS15]], i32 0, i32 0
 // CHECK3-NEXT:    store i32 2, ptr [[TMP45]], align 4
 // CHECK3-NEXT:    [[TMP46:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS15]], i32 0, i32 1
@@ -1263,6 +1263,7 @@ int main (int argc, char **argv) {
 // CHECK9-NEXT:    [[TMP:%.*]] = alloca i32, align 4
 // CHECK9-NEXT:    [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
 // CHECK9-NEXT:    [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4
+// CHECK9-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK9-NEXT:    [[N_CASTED3:%.*]] = alloca i64, align 8
 // CHECK9-NEXT:    [[DOTOFFLOAD_BASEPTRS4:%.*]] = alloca [3 x ptr], align 8
 // CHECK9-NEXT:    [[DOTOFFLOAD_PTRS5:%.*]] = alloca [3 x ptr], align 8
@@ -1271,6 +1272,7 @@ int main (int argc, char **argv) {
 // CHECK9-NEXT:    [[_TMP8:%.*]] = alloca i32, align 4
 // CHECK9-NEXT:    [[DOTCAPTURE_EXPR_9:%.*]] = alloca i32, align 4
 // CHECK9-NEXT:    [[DOTCAPTURE_EXPR_10:%.*]] = alloca i32, align 4
+// CHECK9-NEXT:    [[KERNEL_ARGS15:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK9-NEXT:    [[DOTCAPTURE_EXPR_18:%.*]] = alloca i32, align 4
 // CHECK9-NEXT:    [[N_CASTED19:%.*]] = alloca i64, align 8
 // CHECK9-NEXT:    [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8
@@ -1281,6 +1283,7 @@ int main (int argc, char **argv) {
 // CHECK9-NEXT:    [[_TMP24:%.*]] = alloca i32, align 4
 // CHECK9-NEXT:    [[DOTCAPTURE_EXPR_25:%.*]] = alloca i32, align 4
 // CHECK9-NEXT:    [[DOTCAPTURE_EXPR_26:%.*]] = alloca i32, align 4
+// CHECK9-NEXT:    [[KERNEL_ARGS31:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK9-NEXT:    store i32 0, ptr [[RETVAL]], align 4
 // CHECK9-NEXT:    store i32 [[ARGC]], ptr [[ARGC_ADDR]], align 4
 // CHECK9-NEXT:    store ptr [[ARGV]], ptr [[ARGV_ADDR]], align 8
@@ -1329,7 +1332,6 @@ int main (int argc, char **argv) {
 // CHECK9-NEXT:    [[TMP21:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
 // CHECK9-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP21]], 1
 // CHECK9-NEXT:    [[TMP22:%.*]] = zext i32 [[ADD]] to i64
-// CHECK9-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK9-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK9-NEXT:    store i32 2, ptr [[TMP23]], align 4
 // CHECK9-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -1401,7 +1403,6 @@ int main (int argc, char **argv) {
 // CHECK9-NEXT:    [[TMP56:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_10]], align 4
 // CHECK9-NEXT:    [[ADD14:%.*]] = add nsw i32 [[TMP56]], 1
 // CHECK9-NEXT:    [[TMP57:%.*]] = zext i32 [[ADD14]] to i64
-// CHECK9-NEXT:    [[KERNEL_ARGS15:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK9-NEXT:    [[TMP58:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS15]], i32 0, i32 0
 // CHECK9-NEXT:    store i32 2, ptr [[TMP58]], align 4
 // CHECK9-NEXT:    [[TMP59:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS15]], i32 0, i32 1
@@ -1484,7 +1485,6 @@ int main (int argc, char **argv) {
 // CHECK9-NEXT:    [[TMP97:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_26]], align 4
 // CHECK9-NEXT:    [[ADD30:%.*]] = add nsw i32 [[TMP97]], 1
 // CHECK9-NEXT:    [[TMP98:%.*]] = zext i32 [[ADD30]] to i64
-// CHECK9-NEXT:    [[KERNEL_ARGS31:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK9-NEXT:    [[TMP99:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS31]], i32 0, i32 0
 // CHECK9-NEXT:    store i32 2, ptr [[TMP99]], align 4
 // CHECK9-NEXT:    [[TMP100:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS31]], i32 0, i32 1
@@ -1937,14 +1937,17 @@ int main (int argc, char **argv) {
 // CHECK9-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [1 x ptr], align 8
 // CHECK9-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [1 x ptr], align 8
 // CHECK9-NEXT:    [[TMP:%.*]] = alloca i32, align 4
+// CHECK9-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK9-NEXT:    [[DOTOFFLOAD_BASEPTRS1:%.*]] = alloca [1 x ptr], align 8
 // CHECK9-NEXT:    [[DOTOFFLOAD_PTRS2:%.*]] = alloca [1 x ptr], align 8
 // CHECK9-NEXT:    [[DOTOFFLOAD_MAPPERS3:%.*]] = alloca [1 x ptr], align 8
 // CHECK9-NEXT:    [[_TMP4:%.*]] = alloca i32, align 4
+// CHECK9-NEXT:    [[KERNEL_ARGS5:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK9-NEXT:    [[DOTOFFLOAD_BASEPTRS8:%.*]] = alloca [1 x ptr], align 8
 // CHECK9-NEXT:    [[DOTOFFLOAD_PTRS9:%.*]] = alloca [1 x ptr], align 8
 // CHECK9-NEXT:    [[DOTOFFLOAD_MAPPERS10:%.*]] = alloca [1 x ptr], align 8
 // CHECK9-NEXT:    [[_TMP11:%.*]] = alloca i32, align 4
+// CHECK9-NEXT:    [[KERNEL_ARGS12:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK9-NEXT:    store i32 [[ARGC]], ptr [[ARGC_ADDR]], align 4
 // CHECK9-NEXT:    [[TMP0:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK9-NEXT:    store ptr [[A]], ptr [[TMP0]], align 8
@@ -1954,7 +1957,6 @@ int main (int argc, char **argv) {
 // CHECK9-NEXT:    store ptr null, ptr [[TMP2]], align 8
 // CHECK9-NEXT:    [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK9-NEXT:    [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK9-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK9-NEXT:    [[TMP5:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK9-NEXT:    store i32 2, ptr [[TMP5]], align 4
 // CHECK9-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -1996,7 +1998,6 @@ int main (int argc, char **argv) {
 // CHECK9-NEXT:    store ptr null, ptr [[TMP22]], align 8
 // CHECK9-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS1]], i32 0, i32 0
 // CHECK9-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS2]], i32 0, i32 0
-// CHECK9-NEXT:    [[KERNEL_ARGS5:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK9-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 0
 // CHECK9-NEXT:    store i32 2, ptr [[TMP25]], align 4
 // CHECK9-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 1
@@ -2038,7 +2039,6 @@ int main (int argc, char **argv) {
 // CHECK9-NEXT:    store ptr null, ptr [[TMP42]], align 8
 // CHECK9-NEXT:    [[TMP43:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS8]], i32 0, i32 0
 // CHECK9-NEXT:    [[TMP44:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS9]], i32 0, i32 0
-// CHECK9-NEXT:    [[KERNEL_ARGS12:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK9-NEXT:    [[TMP45:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS12]], i32 0, i32 0
 // CHECK9-NEXT:    store i32 2, ptr [[TMP45]], align 4
 // CHECK9-NEXT:    [[TMP46:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS12]], i32 0, i32 1
@@ -2368,6 +2368,7 @@ int main (int argc, char **argv) {
 // CHECK11-NEXT:    [[TMP:%.*]] = alloca i32, align 4
 // CHECK11-NEXT:    [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
 // CHECK11-NEXT:    [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4
+// CHECK11-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK11-NEXT:    [[N_CASTED3:%.*]] = alloca i32, align 4
 // CHECK11-NEXT:    [[DOTOFFLOAD_BASEPTRS4:%.*]] = alloca [3 x ptr], align 4
 // CHECK11-NEXT:    [[DOTOFFLOAD_PTRS5:%.*]] = alloca [3 x ptr], align 4
@@ -2376,6 +2377,7 @@ int main (int argc, char **argv) {
 // CHECK11-NEXT:    [[_TMP8:%.*]] = alloca i32, align 4
 // CHECK11-NEXT:    [[DOTCAPTURE_EXPR_9:%.*]] = alloca i32, align 4
 // CHECK11-NEXT:    [[DOTCAPTURE_EXPR_10:%.*]] = alloca i32, align 4
+// CHECK11-NEXT:    [[KERNEL_ARGS15:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK11-NEXT:    [[DOTCAPTURE_EXPR_18:%.*]] = alloca i32, align 4
 // CHECK11-NEXT:    [[N_CASTED19:%.*]] = alloca i32, align 4
 // CHECK11-NEXT:    [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4
@@ -2386,6 +2388,7 @@ int main (int argc, char **argv) {
 // CHECK11-NEXT:    [[_TMP24:%.*]] = alloca i32, align 4
 // CHECK11-NEXT:    [[DOTCAPTURE_EXPR_25:%.*]] = alloca i32, align 4
 // CHECK11-NEXT:    [[DOTCAPTURE_EXPR_26:%.*]] = alloca i32, align 4
+// CHECK11-NEXT:    [[KERNEL_ARGS31:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK11-NEXT:    store i32 0, ptr [[RETVAL]], align 4
 // CHECK11-NEXT:    store i32 [[ARGC]], ptr [[ARGC_ADDR]], align 4
 // CHECK11-NEXT:    store ptr [[ARGV]], ptr [[ARGV_ADDR]], align 4
@@ -2434,7 +2437,6 @@ int main (int argc, char **argv) {
 // CHECK11-NEXT:    [[TMP21:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
 // CHECK11-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP21]], 1
 // CHECK11-NEXT:    [[TMP22:%.*]] = zext i32 [[ADD]] to i64
-// CHECK11-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK11-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK11-NEXT:    store i32 2, ptr [[TMP23]], align 4
 // CHECK11-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -2507,7 +2509,6 @@ int main (int argc, char **argv) {
 // CHECK11-NEXT:    [[TMP57:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_10]], align 4
 // CHECK11-NEXT:    [[ADD14:%.*]] = add nsw i32 [[TMP57]], 1
 // CHECK11-NEXT:    [[TMP58:%.*]] = zext i32 [[ADD14]] to i64
-// CHECK11-NEXT:    [[KERNEL_ARGS15:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK11-NEXT:    [[TMP59:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS15]], i32 0, i32 0
 // CHECK11-NEXT:    store i32 2, ptr [[TMP59]], align 4
 // CHECK11-NEXT:    [[TMP60:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS15]], i32 0, i32 1
@@ -2591,7 +2592,6 @@ int main (int argc, char **argv) {
 // CHECK11-NEXT:    [[TMP99:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_26]], align 4
 // CHECK11-NEXT:    [[ADD30:%.*]] = add nsw i32 [[TMP99]], 1
 // CHECK11-NEXT:    [[TMP100:%.*]] = zext i32 [[ADD30]] to i64
-// CHECK11-NEXT:    [[KERNEL_ARGS31:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK11-NEXT:    [[TMP101:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS31]], i32 0, i32 0
 // CHECK11-NEXT:    store i32 2, ptr [[TMP101]], align 4
 // CHECK11-NEXT:    [[TMP102:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS31]], i32 0, i32 1
@@ -3041,14 +3041,17 @@ int main (int argc, char **argv) {
 // CHECK11-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [1 x ptr], align 4
 // CHECK11-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [1 x ptr], align 4
 // CHECK11-NEXT:    [[TMP:%.*]] = alloca i32, align 4
+// CHECK11-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK11-NEXT:    [[DOTOFFLOAD_BASEPTRS1:%.*]] = alloca [1 x ptr], align 4
 // CHECK11-NEXT:    [[DOTOFFLOAD_PTRS2:%.*]] = alloca [1 x ptr], align 4
 // CHECK11-NEXT:    [[DOTOFFLOAD_MAPPERS3:%.*]] = alloca [1 x ptr], align 4
 // CHECK11-NEXT:    [[_TMP4:%.*]] = alloca i32, align 4
+// CHECK11-NEXT:    [[KERNEL_ARGS5:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK11-NEXT:    [[DOTOFFLOAD_BASEPTRS8:%.*]] = alloca [1 x ptr], align 4
 // CHECK11-NEXT:    [[DOTOFFLOAD_PTRS9:%.*]] = alloca [1 x ptr], align 4
 // CHECK11-NEXT:    [[DOTOFFLOAD_MAPPERS10:%.*]] = alloca [1 x ptr], align 4
 // CHECK11-NEXT:    [[_TMP11:%.*]] = alloca i32, align 4
+// CHECK11-NEXT:    [[KERNEL_ARGS12:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK11-NEXT:    store i32 [[ARGC]], ptr [[ARGC_ADDR]], align 4
 // CHECK11-NEXT:    [[TMP0:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK11-NEXT:    store ptr [[A]], ptr [[TMP0]], align 4
@@ -3058,7 +3061,6 @@ int main (int argc, char **argv) {
 // CHECK11-NEXT:    store ptr null, ptr [[TMP2]], align 4
 // CHECK11-NEXT:    [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK11-NEXT:    [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK11-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK11-NEXT:    [[TMP5:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK11-NEXT:    store i32 2, ptr [[TMP5]], align 4
 // CHECK11-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -3100,7 +3102,6 @@ int main (int argc, char **argv) {
 // CHECK11-NEXT:    store ptr null, ptr [[TMP22]], align 4
 // CHECK11-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS1]], i32 0, i32 0
 // CHECK11-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS2]], i32 0, i32 0
-// CHECK11-NEXT:    [[KERNEL_ARGS5:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK11-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 0
 // CHECK11-NEXT:    store i32 2, ptr [[TMP25]], align 4
 // CHECK11-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 1
@@ -3142,7 +3143,6 @@ int main (int argc, char **argv) {
 // CHECK11-NEXT:    store ptr null, ptr [[TMP42]], align 4
 // CHECK11-NEXT:    [[TMP43:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS8]], i32 0, i32 0
 // CHECK11-NEXT:    [[TMP44:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS9]], i32 0, i32 0
-// CHECK11-NEXT:    [[KERNEL_ARGS12:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK11-NEXT:    [[TMP45:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS12]], i32 0, i32 0
 // CHECK11-NEXT:    store i32 2, ptr [[TMP45]], align 4
 // CHECK11-NEXT:    [[TMP46:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS12]], i32 0, i32 1

diff  --git a/clang/test/OpenMP/target_teams_distribute_simd_firstprivate_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_simd_firstprivate_codegen.cpp
index 20e018504e5c6..fb0824a86d926 100644
--- a/clang/test/OpenMP/target_teams_distribute_simd_firstprivate_codegen.cpp
+++ b/clang/test/OpenMP/target_teams_distribute_simd_firstprivate_codegen.cpp
@@ -262,6 +262,7 @@ int main() {
 // CHECK1-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [5 x ptr], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [5 x ptr], align 8
 // CHECK1-NEXT:    [[TMP:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK1-NEXT:    store i32 0, ptr [[RETVAL]], align 4
 // CHECK1-NEXT:    [[TMP0:%.*]] = load i32, ptr @t_var, align 4
 // CHECK1-NEXT:    store i32 [[TMP0]], ptr [[T_VAR_CASTED]], align 4
@@ -301,7 +302,6 @@ int main() {
 // CHECK1-NEXT:    store ptr null, ptr [[TMP18]], align 8
 // CHECK1-NEXT:    [[TMP19:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK1-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK1-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK1-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK1-NEXT:    store i32 2, ptr [[TMP21]], align 4
 // CHECK1-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -549,6 +549,7 @@ int main() {
 // CHECK1-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [4 x ptr], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [4 x ptr], align 8
 // CHECK1-NEXT:    [[_TMP1:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK1-NEXT:    call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[TEST]])
 // CHECK1-NEXT:    store i32 0, ptr [[T_VAR]], align 4
 // CHECK1-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VEC]], ptr align 4 @__const._Z5tmainIiET_v.vec, i64 8, i1 false)
@@ -589,7 +590,6 @@ int main() {
 // CHECK1-NEXT:    store ptr null, ptr [[TMP15]], align 8
 // CHECK1-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK1-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK1-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK1-NEXT:    [[TMP18:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK1-NEXT:    store i32 2, ptr [[TMP18]], align 4
 // CHECK1-NEXT:    [[TMP19:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -1085,6 +1085,7 @@ int main() {
 // CHECK3-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [5 x ptr], align 4
 // CHECK3-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [5 x ptr], align 4
 // CHECK3-NEXT:    [[TMP:%.*]] = alloca i32, align 4
+// CHECK3-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK3-NEXT:    store i32 0, ptr [[RETVAL]], align 4
 // CHECK3-NEXT:    [[TMP0:%.*]] = load i32, ptr @t_var, align 4
 // CHECK3-NEXT:    store i32 [[TMP0]], ptr [[T_VAR_CASTED]], align 4
@@ -1124,7 +1125,6 @@ int main() {
 // CHECK3-NEXT:    store ptr null, ptr [[TMP18]], align 4
 // CHECK3-NEXT:    [[TMP19:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK3-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK3-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK3-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK3-NEXT:    store i32 2, ptr [[TMP21]], align 4
 // CHECK3-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -1370,6 +1370,7 @@ int main() {
 // CHECK3-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [4 x ptr], align 4
 // CHECK3-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [4 x ptr], align 4
 // CHECK3-NEXT:    [[_TMP1:%.*]] = alloca i32, align 4
+// CHECK3-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK3-NEXT:    call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[TEST]])
 // CHECK3-NEXT:    store i32 0, ptr [[T_VAR]], align 4
 // CHECK3-NEXT:    call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[VEC]], ptr align 4 @__const._Z5tmainIiET_v.vec, i32 8, i1 false)
@@ -1410,7 +1411,6 @@ int main() {
 // CHECK3-NEXT:    store ptr null, ptr [[TMP15]], align 4
 // CHECK3-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK3-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK3-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK3-NEXT:    [[TMP18:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK3-NEXT:    store i32 2, ptr [[TMP18]], align 4
 // CHECK3-NEXT:    [[TMP19:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1

diff  --git a/clang/test/OpenMP/target_teams_distribute_simd_lastprivate_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_simd_lastprivate_codegen.cpp
index f0786333ad125..060b5ba4e6123 100644
--- a/clang/test/OpenMP/target_teams_distribute_simd_lastprivate_codegen.cpp
+++ b/clang/test/OpenMP/target_teams_distribute_simd_lastprivate_codegen.cpp
@@ -532,6 +532,7 @@ int main() {
 // CHECK9-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [5 x ptr], align 8
 // CHECK9-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [5 x ptr], align 8
 // CHECK9-NEXT:    [[_TMP1:%.*]] = alloca i32, align 4
+// CHECK9-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK9-NEXT:    [[I:%.*]] = alloca i32, align 4
 // CHECK9-NEXT:    store i32 0, ptr [[RETVAL]], align 4
 // CHECK9-NEXT:    store ptr [[G]], ptr [[G1]], align 8
@@ -584,7 +585,6 @@ int main() {
 // CHECK9-NEXT:    store ptr null, ptr [[TMP20]], align 8
 // CHECK9-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK9-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK9-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK9-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK9-NEXT:    store i32 2, ptr [[TMP23]], align 4
 // CHECK9-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -866,6 +866,7 @@ int main() {
 // CHECK9-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [4 x ptr], align 8
 // CHECK9-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [4 x ptr], align 8
 // CHECK9-NEXT:    [[_TMP1:%.*]] = alloca i32, align 4
+// CHECK9-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK9-NEXT:    call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[TEST]])
 // CHECK9-NEXT:    store i32 0, ptr [[T_VAR]], align 4
 // CHECK9-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VEC]], ptr align 4 @__const._Z5tmainIiET_v.vec, i64 8, i1 false)
@@ -906,7 +907,6 @@ int main() {
 // CHECK9-NEXT:    store ptr null, ptr [[TMP15]], align 8
 // CHECK9-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK9-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK9-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK9-NEXT:    [[TMP18:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK9-NEXT:    store i32 2, ptr [[TMP18]], align 4
 // CHECK9-NEXT:    [[TMP19:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -1254,6 +1254,7 @@ int main() {
 // CHECK11-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [5 x ptr], align 4
 // CHECK11-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [5 x ptr], align 4
 // CHECK11-NEXT:    [[_TMP1:%.*]] = alloca i32, align 4
+// CHECK11-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK11-NEXT:    [[I:%.*]] = alloca i32, align 4
 // CHECK11-NEXT:    store i32 0, ptr [[RETVAL]], align 4
 // CHECK11-NEXT:    store ptr [[G]], ptr [[G1]], align 4
@@ -1306,7 +1307,6 @@ int main() {
 // CHECK11-NEXT:    store ptr null, ptr [[TMP20]], align 4
 // CHECK11-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK11-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK11-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK11-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK11-NEXT:    store i32 2, ptr [[TMP23]], align 4
 // CHECK11-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -1586,6 +1586,7 @@ int main() {
 // CHECK11-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [4 x ptr], align 4
 // CHECK11-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [4 x ptr], align 4
 // CHECK11-NEXT:    [[_TMP1:%.*]] = alloca i32, align 4
+// CHECK11-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK11-NEXT:    call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[TEST]])
 // CHECK11-NEXT:    store i32 0, ptr [[T_VAR]], align 4
 // CHECK11-NEXT:    call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[VEC]], ptr align 4 @__const._Z5tmainIiET_v.vec, i32 8, i1 false)
@@ -1626,7 +1627,6 @@ int main() {
 // CHECK11-NEXT:    store ptr null, ptr [[TMP15]], align 4
 // CHECK11-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK11-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK11-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK11-NEXT:    [[TMP18:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK11-NEXT:    store i32 2, ptr [[TMP18]], align 4
 // CHECK11-NEXT:    [[TMP19:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1

diff  --git a/clang/test/OpenMP/target_teams_distribute_simd_private_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_simd_private_codegen.cpp
index e4f62469a8b26..2e7a6d5f92273 100644
--- a/clang/test/OpenMP/target_teams_distribute_simd_private_codegen.cpp
+++ b/clang/test/OpenMP/target_teams_distribute_simd_private_codegen.cpp
@@ -237,8 +237,8 @@ int main() {
 // CHECK1-NEXT:  entry:
 // CHECK1-NEXT:    [[RETVAL:%.*]] = alloca i32, align 4
 // CHECK1-NEXT:    [[TMP:%.*]] = alloca i32, align 4
-// CHECK1-NEXT:    store i32 0, ptr [[RETVAL]], align 4
 // CHECK1-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
+// CHECK1-NEXT:    store i32 0, ptr [[RETVAL]], align 4
 // CHECK1-NEXT:    [[TMP0:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK1-NEXT:    store i32 2, ptr [[TMP0]], align 4
 // CHECK1-NEXT:    [[TMP1:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -405,6 +405,7 @@ int main() {
 // CHECK1-NEXT:    [[VAR:%.*]] = alloca ptr, align 8
 // CHECK1-NEXT:    [[TMP:%.*]] = alloca i32, align 4
 // CHECK1-NEXT:    [[_TMP1:%.*]] = alloca ptr, align 8
+// CHECK1-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK1-NEXT:    call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[TEST]])
 // CHECK1-NEXT:    store i32 0, ptr [[T_VAR]], align 4
 // CHECK1-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VEC]], ptr align 4 @__const._Z5tmainIiET_v.vec, i64 8, i1 false)
@@ -414,7 +415,6 @@ int main() {
 // CHECK1-NEXT:    call void @_ZN1SIiEC1Ei(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_ELEMENT]], i32 noundef signext 2)
 // CHECK1-NEXT:    store ptr [[TEST]], ptr [[VAR]], align 8
 // CHECK1-NEXT:    store ptr undef, ptr [[_TMP1]], align 8
-// CHECK1-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK1-NEXT:    [[TMP0:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK1-NEXT:    store i32 2, ptr [[TMP0]], align 4
 // CHECK1-NEXT:    [[TMP1:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -786,8 +786,8 @@ int main() {
 // CHECK3-NEXT:  entry:
 // CHECK3-NEXT:    [[RETVAL:%.*]] = alloca i32, align 4
 // CHECK3-NEXT:    [[TMP:%.*]] = alloca i32, align 4
-// CHECK3-NEXT:    store i32 0, ptr [[RETVAL]], align 4
 // CHECK3-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
+// CHECK3-NEXT:    store i32 0, ptr [[RETVAL]], align 4
 // CHECK3-NEXT:    [[TMP0:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK3-NEXT:    store i32 2, ptr [[TMP0]], align 4
 // CHECK3-NEXT:    [[TMP1:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -952,6 +952,7 @@ int main() {
 // CHECK3-NEXT:    [[VAR:%.*]] = alloca ptr, align 4
 // CHECK3-NEXT:    [[TMP:%.*]] = alloca i32, align 4
 // CHECK3-NEXT:    [[_TMP1:%.*]] = alloca ptr, align 4
+// CHECK3-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK3-NEXT:    call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[TEST]])
 // CHECK3-NEXT:    store i32 0, ptr [[T_VAR]], align 4
 // CHECK3-NEXT:    call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[VEC]], ptr align 4 @__const._Z5tmainIiET_v.vec, i32 8, i1 false)
@@ -961,7 +962,6 @@ int main() {
 // CHECK3-NEXT:    call void @_ZN1SIiEC1Ei(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_ELEMENT]], i32 noundef 2)
 // CHECK3-NEXT:    store ptr [[TEST]], ptr [[VAR]], align 4
 // CHECK3-NEXT:    store ptr undef, ptr [[_TMP1]], align 4
-// CHECK3-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK3-NEXT:    [[TMP0:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK3-NEXT:    store i32 2, ptr [[TMP0]], align 4
 // CHECK3-NEXT:    [[TMP1:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1

diff  --git a/clang/test/OpenMP/target_teams_distribute_simd_reduction_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_simd_reduction_codegen.cpp
index 38adc315e8e82..394a44458f587 100644
--- a/clang/test/OpenMP/target_teams_distribute_simd_reduction_codegen.cpp
+++ b/clang/test/OpenMP/target_teams_distribute_simd_reduction_codegen.cpp
@@ -86,6 +86,7 @@ int main() {
 // CHECK1-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [1 x ptr], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [1 x ptr], align 8
 // CHECK1-NEXT:    [[TMP:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK1-NEXT:    store i32 0, ptr [[RETVAL]], align 4
 // CHECK1-NEXT:    [[TMP0:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK1-NEXT:    store ptr @_ZZ4mainE5sivar, ptr [[TMP0]], align 8
@@ -95,7 +96,6 @@ int main() {
 // CHECK1-NEXT:    store ptr null, ptr [[TMP2]], align 8
 // CHECK1-NEXT:    [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK1-NEXT:    [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK1-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK1-NEXT:    [[TMP5:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK1-NEXT:    store i32 2, ptr [[TMP5]], align 4
 // CHECK1-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -269,6 +269,7 @@ int main() {
 // CHECK1-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [1 x ptr], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [1 x ptr], align 8
 // CHECK1-NEXT:    [[TMP:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK1-NEXT:    store i32 0, ptr [[T_VAR]], align 4
 // CHECK1-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VEC]], ptr align 4 @__const._Z5tmainIiET_v.vec, i64 8, i1 false)
 // CHECK1-NEXT:    [[TMP0:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
@@ -279,7 +280,6 @@ int main() {
 // CHECK1-NEXT:    store ptr null, ptr [[TMP2]], align 8
 // CHECK1-NEXT:    [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK1-NEXT:    [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK1-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK1-NEXT:    [[TMP5:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK1-NEXT:    store i32 2, ptr [[TMP5]], align 4
 // CHECK1-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -458,6 +458,7 @@ int main() {
 // CHECK3-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [1 x ptr], align 4
 // CHECK3-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [1 x ptr], align 4
 // CHECK3-NEXT:    [[TMP:%.*]] = alloca i32, align 4
+// CHECK3-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK3-NEXT:    store i32 0, ptr [[RETVAL]], align 4
 // CHECK3-NEXT:    [[TMP0:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK3-NEXT:    store ptr @_ZZ4mainE5sivar, ptr [[TMP0]], align 4
@@ -467,7 +468,6 @@ int main() {
 // CHECK3-NEXT:    store ptr null, ptr [[TMP2]], align 4
 // CHECK3-NEXT:    [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK3-NEXT:    [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK3-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK3-NEXT:    [[TMP5:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK3-NEXT:    store i32 2, ptr [[TMP5]], align 4
 // CHECK3-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -641,6 +641,7 @@ int main() {
 // CHECK3-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [1 x ptr], align 4
 // CHECK3-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [1 x ptr], align 4
 // CHECK3-NEXT:    [[TMP:%.*]] = alloca i32, align 4
+// CHECK3-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK3-NEXT:    store i32 0, ptr [[T_VAR]], align 4
 // CHECK3-NEXT:    call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[VEC]], ptr align 4 @__const._Z5tmainIiET_v.vec, i32 8, i1 false)
 // CHECK3-NEXT:    [[TMP0:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
@@ -651,7 +652,6 @@ int main() {
 // CHECK3-NEXT:    store ptr null, ptr [[TMP2]], align 4
 // CHECK3-NEXT:    [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK3-NEXT:    [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK3-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK3-NEXT:    [[TMP5:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK3-NEXT:    store i32 2, ptr [[TMP5]], align 4
 // CHECK3-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1

diff  --git a/clang/test/OpenMP/target_teams_map_codegen.cpp b/clang/test/OpenMP/target_teams_map_codegen.cpp
index 69aa5002ca591..a2b30400287ce 100644
--- a/clang/test/OpenMP/target_teams_map_codegen.cpp
+++ b/clang/test/OpenMP/target_teams_map_codegen.cpp
@@ -84,6 +84,7 @@ void mapInt128() {
 // CHECK1-NEXT:    [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [2 x ptr], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [2 x ptr], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [2 x ptr], align 8
+// CHECK1-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK1-NEXT:    [[TMP0:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK1-NEXT:    store ptr [[X]], ptr [[TMP0]], align 8
 // CHECK1-NEXT:    [[TMP1:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
@@ -98,7 +99,6 @@ void mapInt128() {
 // CHECK1-NEXT:    store ptr null, ptr [[TMP5]], align 8
 // CHECK1-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK1-NEXT:    [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK1-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK1-NEXT:    [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK1-NEXT:    store i32 2, ptr [[TMP8]], align 4
 // CHECK1-NEXT:    [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -162,6 +162,7 @@ void mapInt128() {
 // CHECK1-NEXT:    [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [2 x ptr], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [2 x ptr], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [2 x ptr], align 8
+// CHECK1-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK1-NEXT:    [[TMP0:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK1-NEXT:    store ptr [[X]], ptr [[TMP0]], align 8
 // CHECK1-NEXT:    [[TMP1:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
@@ -176,7 +177,6 @@ void mapInt128() {
 // CHECK1-NEXT:    store ptr null, ptr [[TMP5]], align 8
 // CHECK1-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK1-NEXT:    [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK1-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK1-NEXT:    [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK1-NEXT:    store i32 2, ptr [[TMP8]], align 4
 // CHECK1-NEXT:    [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -256,6 +256,7 @@ void mapInt128() {
 // CHECK1-NEXT:    [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [2 x ptr], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [2 x ptr], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [2 x ptr], align 8
+// CHECK1-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK1-NEXT:    [[TMP0:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK1-NEXT:    store ptr [[X]], ptr [[TMP0]], align 8
 // CHECK1-NEXT:    [[TMP1:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
@@ -270,7 +271,6 @@ void mapInt128() {
 // CHECK1-NEXT:    store ptr null, ptr [[TMP5]], align 8
 // CHECK1-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK1-NEXT:    [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK1-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK1-NEXT:    [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK1-NEXT:    store i32 2, ptr [[TMP8]], align 4
 // CHECK1-NEXT:    [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -406,6 +406,7 @@ void mapInt128() {
 // CHECK1-NEXT:    [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [1 x ptr], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [1 x ptr], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [1 x ptr], align 8
+// CHECK1-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK1-NEXT:    [[TMP0:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK1-NEXT:    store ptr [[X]], ptr [[TMP0]], align 8
 // CHECK1-NEXT:    [[TMP1:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
@@ -414,7 +415,6 @@ void mapInt128() {
 // CHECK1-NEXT:    store ptr null, ptr [[TMP2]], align 8
 // CHECK1-NEXT:    [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK1-NEXT:    [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK1-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK1-NEXT:    [[TMP5:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK1-NEXT:    store i32 2, ptr [[TMP5]], align 4
 // CHECK1-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -484,6 +484,7 @@ void mapInt128() {
 // CHECK1-NEXT:    [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [1 x ptr], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [1 x ptr], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [1 x ptr], align 8
+// CHECK1-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK1-NEXT:    [[TMP0:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK1-NEXT:    store ptr [[X]], ptr [[TMP0]], align 8
 // CHECK1-NEXT:    [[TMP1:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
@@ -492,7 +493,6 @@ void mapInt128() {
 // CHECK1-NEXT:    store ptr null, ptr [[TMP2]], align 8
 // CHECK1-NEXT:    [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK1-NEXT:    [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK1-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK1-NEXT:    [[TMP5:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK1-NEXT:    store i32 2, ptr [[TMP5]], align 4
 // CHECK1-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -562,6 +562,7 @@ void mapInt128() {
 // CHECK1-NEXT:    [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [1 x ptr], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [1 x ptr], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [1 x ptr], align 8
+// CHECK1-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK1-NEXT:    [[TMP0:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK1-NEXT:    store ptr [[X]], ptr [[TMP0]], align 8
 // CHECK1-NEXT:    [[TMP1:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
@@ -570,7 +571,6 @@ void mapInt128() {
 // CHECK1-NEXT:    store ptr null, ptr [[TMP2]], align 8
 // CHECK1-NEXT:    [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK1-NEXT:    [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK1-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK1-NEXT:    [[TMP5:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK1-NEXT:    store i32 2, ptr [[TMP5]], align 4
 // CHECK1-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -642,9 +642,11 @@ void mapInt128() {
 // CHECK1-NEXT:    [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [3 x ptr], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [3 x ptr], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [3 x ptr], align 8
+// CHECK1-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_BASEPTRS1:%.*]] = alloca [3 x ptr], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_PTRS2:%.*]] = alloca [3 x ptr], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_MAPPERS3:%.*]] = alloca [3 x ptr], align 8
+// CHECK1-NEXT:    [[KERNEL_ARGS4:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK1-NEXT:    [[TMP0:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK1-NEXT:    store ptr [[Y]], ptr [[TMP0]], align 8
 // CHECK1-NEXT:    [[TMP1:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
@@ -665,7 +667,6 @@ void mapInt128() {
 // CHECK1-NEXT:    store ptr null, ptr [[TMP8]], align 8
 // CHECK1-NEXT:    [[TMP9:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK1-NEXT:    [[TMP10:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK1-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK1-NEXT:    [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK1-NEXT:    store i32 2, ptr [[TMP11]], align 4
 // CHECK1-NEXT:    [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -719,7 +720,6 @@ void mapInt128() {
 // CHECK1-NEXT:    store ptr null, ptr [[TMP34]], align 8
 // CHECK1-NEXT:    [[TMP35:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS1]], i32 0, i32 0
 // CHECK1-NEXT:    [[TMP36:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS2]], i32 0, i32 0
-// CHECK1-NEXT:    [[KERNEL_ARGS4:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK1-NEXT:    [[TMP37:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 0
 // CHECK1-NEXT:    store i32 2, ptr [[TMP37]], align 4
 // CHECK1-NEXT:    [[TMP38:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 1
@@ -1005,9 +1005,11 @@ void mapInt128() {
 // CHECK1-NEXT:    [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [3 x ptr], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [3 x ptr], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [3 x ptr], align 8
+// CHECK1-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_BASEPTRS1:%.*]] = alloca [3 x ptr], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_PTRS2:%.*]] = alloca [3 x ptr], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_MAPPERS3:%.*]] = alloca [3 x ptr], align 8
+// CHECK1-NEXT:    [[KERNEL_ARGS4:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK1-NEXT:    [[TMP0:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK1-NEXT:    store ptr [[Y]], ptr [[TMP0]], align 8
 // CHECK1-NEXT:    [[TMP1:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
@@ -1028,7 +1030,6 @@ void mapInt128() {
 // CHECK1-NEXT:    store ptr null, ptr [[TMP8]], align 8
 // CHECK1-NEXT:    [[TMP9:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK1-NEXT:    [[TMP10:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK1-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK1-NEXT:    [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK1-NEXT:    store i32 2, ptr [[TMP11]], align 4
 // CHECK1-NEXT:    [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -1082,7 +1083,6 @@ void mapInt128() {
 // CHECK1-NEXT:    store ptr null, ptr [[TMP34]], align 8
 // CHECK1-NEXT:    [[TMP35:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS1]], i32 0, i32 0
 // CHECK1-NEXT:    [[TMP36:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS2]], i32 0, i32 0
-// CHECK1-NEXT:    [[KERNEL_ARGS4:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK1-NEXT:    [[TMP37:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 0
 // CHECK1-NEXT:    store i32 2, ptr [[TMP37]], align 4
 // CHECK1-NEXT:    [[TMP38:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 1
@@ -1318,6 +1318,7 @@ void mapInt128() {
 // CHECK3-NEXT:    [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [2 x ptr], align 4
 // CHECK3-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [2 x ptr], align 4
 // CHECK3-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [2 x ptr], align 4
+// CHECK3-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK3-NEXT:    [[TMP0:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK3-NEXT:    store ptr [[X]], ptr [[TMP0]], align 4
 // CHECK3-NEXT:    [[TMP1:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
@@ -1332,7 +1333,6 @@ void mapInt128() {
 // CHECK3-NEXT:    store ptr null, ptr [[TMP5]], align 4
 // CHECK3-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK3-NEXT:    [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK3-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK3-NEXT:    [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK3-NEXT:    store i32 2, ptr [[TMP8]], align 4
 // CHECK3-NEXT:    [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -1396,6 +1396,7 @@ void mapInt128() {
 // CHECK3-NEXT:    [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [2 x ptr], align 4
 // CHECK3-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [2 x ptr], align 4
 // CHECK3-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [2 x ptr], align 4
+// CHECK3-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK3-NEXT:    [[TMP0:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK3-NEXT:    store ptr [[X]], ptr [[TMP0]], align 4
 // CHECK3-NEXT:    [[TMP1:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
@@ -1410,7 +1411,6 @@ void mapInt128() {
 // CHECK3-NEXT:    store ptr null, ptr [[TMP5]], align 4
 // CHECK3-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK3-NEXT:    [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK3-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK3-NEXT:    [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK3-NEXT:    store i32 2, ptr [[TMP8]], align 4
 // CHECK3-NEXT:    [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -1490,6 +1490,7 @@ void mapInt128() {
 // CHECK3-NEXT:    [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [2 x ptr], align 4
 // CHECK3-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [2 x ptr], align 4
 // CHECK3-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [2 x ptr], align 4
+// CHECK3-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK3-NEXT:    [[TMP0:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK3-NEXT:    store ptr [[X]], ptr [[TMP0]], align 4
 // CHECK3-NEXT:    [[TMP1:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
@@ -1504,7 +1505,6 @@ void mapInt128() {
 // CHECK3-NEXT:    store ptr null, ptr [[TMP5]], align 4
 // CHECK3-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK3-NEXT:    [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK3-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK3-NEXT:    [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK3-NEXT:    store i32 2, ptr [[TMP8]], align 4
 // CHECK3-NEXT:    [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -1640,6 +1640,7 @@ void mapInt128() {
 // CHECK3-NEXT:    [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [1 x ptr], align 4
 // CHECK3-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [1 x ptr], align 4
 // CHECK3-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [1 x ptr], align 4
+// CHECK3-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK3-NEXT:    [[TMP0:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK3-NEXT:    store ptr [[X]], ptr [[TMP0]], align 4
 // CHECK3-NEXT:    [[TMP1:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
@@ -1648,7 +1649,6 @@ void mapInt128() {
 // CHECK3-NEXT:    store ptr null, ptr [[TMP2]], align 4
 // CHECK3-NEXT:    [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK3-NEXT:    [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK3-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK3-NEXT:    [[TMP5:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK3-NEXT:    store i32 2, ptr [[TMP5]], align 4
 // CHECK3-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -1718,6 +1718,7 @@ void mapInt128() {
 // CHECK3-NEXT:    [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [1 x ptr], align 4
 // CHECK3-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [1 x ptr], align 4
 // CHECK3-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [1 x ptr], align 4
+// CHECK3-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK3-NEXT:    [[TMP0:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK3-NEXT:    store ptr [[X]], ptr [[TMP0]], align 4
 // CHECK3-NEXT:    [[TMP1:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
@@ -1726,7 +1727,6 @@ void mapInt128() {
 // CHECK3-NEXT:    store ptr null, ptr [[TMP2]], align 4
 // CHECK3-NEXT:    [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK3-NEXT:    [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK3-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK3-NEXT:    [[TMP5:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK3-NEXT:    store i32 2, ptr [[TMP5]], align 4
 // CHECK3-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -1796,6 +1796,7 @@ void mapInt128() {
 // CHECK3-NEXT:    [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [1 x ptr], align 4
 // CHECK3-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [1 x ptr], align 4
 // CHECK3-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [1 x ptr], align 4
+// CHECK3-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK3-NEXT:    [[TMP0:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK3-NEXT:    store ptr [[X]], ptr [[TMP0]], align 4
 // CHECK3-NEXT:    [[TMP1:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
@@ -1804,7 +1805,6 @@ void mapInt128() {
 // CHECK3-NEXT:    store ptr null, ptr [[TMP2]], align 4
 // CHECK3-NEXT:    [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK3-NEXT:    [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK3-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK3-NEXT:    [[TMP5:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK3-NEXT:    store i32 2, ptr [[TMP5]], align 4
 // CHECK3-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -1876,9 +1876,11 @@ void mapInt128() {
 // CHECK3-NEXT:    [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [3 x ptr], align 4
 // CHECK3-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [3 x ptr], align 4
 // CHECK3-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [3 x ptr], align 4
+// CHECK3-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK3-NEXT:    [[DOTOFFLOAD_BASEPTRS1:%.*]] = alloca [3 x ptr], align 4
 // CHECK3-NEXT:    [[DOTOFFLOAD_PTRS2:%.*]] = alloca [3 x ptr], align 4
 // CHECK3-NEXT:    [[DOTOFFLOAD_MAPPERS3:%.*]] = alloca [3 x ptr], align 4
+// CHECK3-NEXT:    [[KERNEL_ARGS4:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK3-NEXT:    [[TMP0:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK3-NEXT:    store ptr [[Y]], ptr [[TMP0]], align 4
 // CHECK3-NEXT:    [[TMP1:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
@@ -1899,7 +1901,6 @@ void mapInt128() {
 // CHECK3-NEXT:    store ptr null, ptr [[TMP8]], align 4
 // CHECK3-NEXT:    [[TMP9:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK3-NEXT:    [[TMP10:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK3-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK3-NEXT:    [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK3-NEXT:    store i32 2, ptr [[TMP11]], align 4
 // CHECK3-NEXT:    [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -1953,7 +1954,6 @@ void mapInt128() {
 // CHECK3-NEXT:    store ptr null, ptr [[TMP34]], align 4
 // CHECK3-NEXT:    [[TMP35:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS1]], i32 0, i32 0
 // CHECK3-NEXT:    [[TMP36:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS2]], i32 0, i32 0
-// CHECK3-NEXT:    [[KERNEL_ARGS4:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK3-NEXT:    [[TMP37:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 0
 // CHECK3-NEXT:    store i32 2, ptr [[TMP37]], align 4
 // CHECK3-NEXT:    [[TMP38:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 1

diff  --git a/clang/test/OpenMP/target_teams_num_teams_codegen.cpp b/clang/test/OpenMP/target_teams_num_teams_codegen.cpp
index b25d10dac42c2..4aea3fbd932b2 100644
--- a/clang/test/OpenMP/target_teams_num_teams_codegen.cpp
+++ b/clang/test/OpenMP/target_teams_num_teams_codegen.cpp
@@ -213,9 +213,11 @@ int bar(int n){
 // CHECK1-NEXT:    [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [3 x ptr], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [3 x ptr], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [3 x ptr], align 8
+// CHECK1-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_BASEPTRS3:%.*]] = alloca [1 x ptr], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_PTRS4:%.*]] = alloca [1 x ptr], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_MAPPERS5:%.*]] = alloca [1 x ptr], align 8
+// CHECK1-NEXT:    [[KERNEL_ARGS6:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK1-NEXT:    store ptr [[THIS]], ptr [[THIS_ADDR]], align 8
 // CHECK1-NEXT:    store i32 [[N]], ptr [[N_ADDR]], align 4
 // CHECK1-NEXT:    [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8
@@ -253,7 +255,6 @@ int bar(int n){
 // CHECK1-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
 // CHECK1-NEXT:    [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
 // CHECK1-NEXT:    [[TMP18:%.*]] = insertvalue [3 x i32] zeroinitializer, i32 [[TMP17]], 0
-// CHECK1-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK1-NEXT:    [[TMP19:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK1-NEXT:    store i32 2, ptr [[TMP19]], align 4
 // CHECK1-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -296,7 +297,6 @@ int bar(int n){
 // CHECK1-NEXT:    store ptr null, ptr [[TMP36]], align 8
 // CHECK1-NEXT:    [[TMP37:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS3]], i32 0, i32 0
 // CHECK1-NEXT:    [[TMP38:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS4]], i32 0, i32 0
-// CHECK1-NEXT:    [[KERNEL_ARGS6:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK1-NEXT:    [[TMP39:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 0
 // CHECK1-NEXT:    store i32 2, ptr [[TMP39]], align 4
 // CHECK1-NEXT:    [[TMP40:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 1
@@ -345,11 +345,13 @@ int bar(int n){
 // CHECK1-NEXT:    [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [1 x ptr], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [1 x ptr], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [1 x ptr], align 8
+// CHECK1-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK1-NEXT:    [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4
 // CHECK1-NEXT:    [[DOTCAPTURE_EXPR__CASTED2:%.*]] = alloca i64, align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_BASEPTRS3:%.*]] = alloca [1 x ptr], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_PTRS4:%.*]] = alloca [1 x ptr], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_MAPPERS5:%.*]] = alloca [1 x ptr], align 8
+// CHECK1-NEXT:    [[KERNEL_ARGS6:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK1-NEXT:    store i32 [[N]], ptr [[N_ADDR]], align 4
 // CHECK1-NEXT:    [[TMP0:%.*]] = load i32, ptr [[N_ADDR]], align 4
 // CHECK1-NEXT:    store i32 [[TMP0]], ptr [[DOTCAPTURE_EXPR_]], align 4
@@ -366,7 +368,6 @@ int bar(int n){
 // CHECK1-NEXT:    [[TMP7:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
 // CHECK1-NEXT:    [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
 // CHECK1-NEXT:    [[TMP9:%.*]] = insertvalue [3 x i32] zeroinitializer, i32 [[TMP8]], 0
-// CHECK1-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK1-NEXT:    [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK1-NEXT:    store i32 2, ptr [[TMP10]], align 4
 // CHECK1-NEXT:    [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -416,7 +417,6 @@ int bar(int n){
 // CHECK1-NEXT:    [[TMP32:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS4]], i32 0, i32 0
 // CHECK1-NEXT:    [[TMP33:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
 // CHECK1-NEXT:    [[TMP34:%.*]] = insertvalue [3 x i32] zeroinitializer, i32 [[TMP33]], 0
-// CHECK1-NEXT:    [[KERNEL_ARGS6:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK1-NEXT:    [[TMP35:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 0
 // CHECK1-NEXT:    store i32 2, ptr [[TMP35]], align 4
 // CHECK1-NEXT:    [[TMP36:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 1
@@ -460,6 +460,7 @@ int bar(int n){
 // CHECK1-NEXT:  entry:
 // CHECK1-NEXT:    [[N_ADDR:%.*]] = alloca i32, align 4
 // CHECK1-NEXT:    [[A:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK1-NEXT:    [[B:%.*]] = alloca i16, align 2
 // CHECK1-NEXT:    [[DOTCAPTURE_EXPR_:%.*]] = alloca i16, align 2
 // CHECK1-NEXT:    [[A_CASTED:%.*]] = alloca i64, align 8
@@ -468,9 +469,9 @@ int bar(int n){
 // CHECK1-NEXT:    [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [3 x ptr], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [3 x ptr], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [3 x ptr], align 8
+// CHECK1-NEXT:    [[KERNEL_ARGS1:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK1-NEXT:    store i32 [[N]], ptr [[N_ADDR]], align 4
 // CHECK1-NEXT:    store i32 0, ptr [[A]], align 4
-// CHECK1-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK1-NEXT:    [[TMP0:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK1-NEXT:    store i32 2, ptr [[TMP0]], align 4
 // CHECK1-NEXT:    [[TMP1:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -539,7 +540,6 @@ int bar(int n){
 // CHECK1-NEXT:    [[TMP33:%.*]] = load i16, ptr [[DOTCAPTURE_EXPR_]], align 2
 // CHECK1-NEXT:    [[TMP34:%.*]] = sext i16 [[TMP33]] to i32
 // CHECK1-NEXT:    [[TMP35:%.*]] = insertvalue [3 x i32] zeroinitializer, i32 [[TMP34]], 0
-// CHECK1-NEXT:    [[KERNEL_ARGS1:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK1-NEXT:    [[TMP36:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS1]], i32 0, i32 0
 // CHECK1-NEXT:    store i32 2, ptr [[TMP36]], align 4
 // CHECK1-NEXT:    [[TMP37:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS1]], i32 0, i32 1
@@ -798,9 +798,11 @@ int bar(int n){
 // CHECK3-NEXT:    [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [3 x ptr], align 4
 // CHECK3-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [3 x ptr], align 4
 // CHECK3-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [3 x ptr], align 4
+// CHECK3-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK3-NEXT:    [[DOTOFFLOAD_BASEPTRS3:%.*]] = alloca [1 x ptr], align 4
 // CHECK3-NEXT:    [[DOTOFFLOAD_PTRS4:%.*]] = alloca [1 x ptr], align 4
 // CHECK3-NEXT:    [[DOTOFFLOAD_MAPPERS5:%.*]] = alloca [1 x ptr], align 4
+// CHECK3-NEXT:    [[KERNEL_ARGS6:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK3-NEXT:    store ptr [[THIS]], ptr [[THIS_ADDR]], align 4
 // CHECK3-NEXT:    store i32 [[N]], ptr [[N_ADDR]], align 4
 // CHECK3-NEXT:    [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4
@@ -838,7 +840,6 @@ int bar(int n){
 // CHECK3-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
 // CHECK3-NEXT:    [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
 // CHECK3-NEXT:    [[TMP18:%.*]] = insertvalue [3 x i32] zeroinitializer, i32 [[TMP17]], 0
-// CHECK3-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK3-NEXT:    [[TMP19:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK3-NEXT:    store i32 2, ptr [[TMP19]], align 4
 // CHECK3-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -881,7 +882,6 @@ int bar(int n){
 // CHECK3-NEXT:    store ptr null, ptr [[TMP36]], align 4
 // CHECK3-NEXT:    [[TMP37:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS3]], i32 0, i32 0
 // CHECK3-NEXT:    [[TMP38:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS4]], i32 0, i32 0
-// CHECK3-NEXT:    [[KERNEL_ARGS6:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK3-NEXT:    [[TMP39:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 0
 // CHECK3-NEXT:    store i32 2, ptr [[TMP39]], align 4
 // CHECK3-NEXT:    [[TMP40:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 1
@@ -930,11 +930,13 @@ int bar(int n){
 // CHECK3-NEXT:    [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [1 x ptr], align 4
 // CHECK3-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [1 x ptr], align 4
 // CHECK3-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [1 x ptr], align 4
+// CHECK3-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK3-NEXT:    [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4
 // CHECK3-NEXT:    [[DOTCAPTURE_EXPR__CASTED2:%.*]] = alloca i32, align 4
 // CHECK3-NEXT:    [[DOTOFFLOAD_BASEPTRS3:%.*]] = alloca [1 x ptr], align 4
 // CHECK3-NEXT:    [[DOTOFFLOAD_PTRS4:%.*]] = alloca [1 x ptr], align 4
 // CHECK3-NEXT:    [[DOTOFFLOAD_MAPPERS5:%.*]] = alloca [1 x ptr], align 4
+// CHECK3-NEXT:    [[KERNEL_ARGS6:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK3-NEXT:    store i32 [[N]], ptr [[N_ADDR]], align 4
 // CHECK3-NEXT:    [[TMP0:%.*]] = load i32, ptr [[N_ADDR]], align 4
 // CHECK3-NEXT:    store i32 [[TMP0]], ptr [[DOTCAPTURE_EXPR_]], align 4
@@ -951,7 +953,6 @@ int bar(int n){
 // CHECK3-NEXT:    [[TMP7:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
 // CHECK3-NEXT:    [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
 // CHECK3-NEXT:    [[TMP9:%.*]] = insertvalue [3 x i32] zeroinitializer, i32 [[TMP8]], 0
-// CHECK3-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK3-NEXT:    [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK3-NEXT:    store i32 2, ptr [[TMP10]], align 4
 // CHECK3-NEXT:    [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -1001,7 +1002,6 @@ int bar(int n){
 // CHECK3-NEXT:    [[TMP32:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS4]], i32 0, i32 0
 // CHECK3-NEXT:    [[TMP33:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
 // CHECK3-NEXT:    [[TMP34:%.*]] = insertvalue [3 x i32] zeroinitializer, i32 [[TMP33]], 0
-// CHECK3-NEXT:    [[KERNEL_ARGS6:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK3-NEXT:    [[TMP35:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 0
 // CHECK3-NEXT:    store i32 2, ptr [[TMP35]], align 4
 // CHECK3-NEXT:    [[TMP36:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 1
@@ -1045,6 +1045,7 @@ int bar(int n){
 // CHECK3-NEXT:  entry:
 // CHECK3-NEXT:    [[N_ADDR:%.*]] = alloca i32, align 4
 // CHECK3-NEXT:    [[A:%.*]] = alloca i32, align 4
+// CHECK3-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK3-NEXT:    [[B:%.*]] = alloca i16, align 2
 // CHECK3-NEXT:    [[DOTCAPTURE_EXPR_:%.*]] = alloca i16, align 2
 // CHECK3-NEXT:    [[A_CASTED:%.*]] = alloca i32, align 4
@@ -1053,9 +1054,9 @@ int bar(int n){
 // CHECK3-NEXT:    [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [3 x ptr], align 4
 // CHECK3-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [3 x ptr], align 4
 // CHECK3-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [3 x ptr], align 4
+// CHECK3-NEXT:    [[KERNEL_ARGS1:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK3-NEXT:    store i32 [[N]], ptr [[N_ADDR]], align 4
 // CHECK3-NEXT:    store i32 0, ptr [[A]], align 4
-// CHECK3-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK3-NEXT:    [[TMP0:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK3-NEXT:    store i32 2, ptr [[TMP0]], align 4
 // CHECK3-NEXT:    [[TMP1:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -1124,7 +1125,6 @@ int bar(int n){
 // CHECK3-NEXT:    [[TMP33:%.*]] = load i16, ptr [[DOTCAPTURE_EXPR_]], align 2
 // CHECK3-NEXT:    [[TMP34:%.*]] = sext i16 [[TMP33]] to i32
 // CHECK3-NEXT:    [[TMP35:%.*]] = insertvalue [3 x i32] zeroinitializer, i32 [[TMP34]], 0
-// CHECK3-NEXT:    [[KERNEL_ARGS1:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK3-NEXT:    [[TMP36:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS1]], i32 0, i32 0
 // CHECK3-NEXT:    store i32 2, ptr [[TMP36]], align 4
 // CHECK3-NEXT:    [[TMP37:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS1]], i32 0, i32 1

diff  --git a/clang/test/OpenMP/target_teams_thread_limit_codegen.cpp b/clang/test/OpenMP/target_teams_thread_limit_codegen.cpp
index 6a1dfdfa5fca0..377d983b72786 100644
--- a/clang/test/OpenMP/target_teams_thread_limit_codegen.cpp
+++ b/clang/test/OpenMP/target_teams_thread_limit_codegen.cpp
@@ -213,9 +213,11 @@ int bar(int n){
 // CHECK1-NEXT:    [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [3 x ptr], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [3 x ptr], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [3 x ptr], align 8
+// CHECK1-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_BASEPTRS3:%.*]] = alloca [1 x ptr], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_PTRS4:%.*]] = alloca [1 x ptr], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_MAPPERS5:%.*]] = alloca [1 x ptr], align 8
+// CHECK1-NEXT:    [[KERNEL_ARGS6:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK1-NEXT:    store ptr [[THIS]], ptr [[THIS_ADDR]], align 8
 // CHECK1-NEXT:    store i32 [[N]], ptr [[N_ADDR]], align 4
 // CHECK1-NEXT:    [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8
@@ -253,7 +255,6 @@ int bar(int n){
 // CHECK1-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
 // CHECK1-NEXT:    [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
 // CHECK1-NEXT:    [[TMP18:%.*]] = insertvalue [3 x i32] zeroinitializer, i32 [[TMP17]], 0
-// CHECK1-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK1-NEXT:    [[TMP19:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK1-NEXT:    store i32 2, ptr [[TMP19]], align 4
 // CHECK1-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -296,7 +297,6 @@ int bar(int n){
 // CHECK1-NEXT:    store ptr null, ptr [[TMP36]], align 8
 // CHECK1-NEXT:    [[TMP37:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS3]], i32 0, i32 0
 // CHECK1-NEXT:    [[TMP38:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS4]], i32 0, i32 0
-// CHECK1-NEXT:    [[KERNEL_ARGS6:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK1-NEXT:    [[TMP39:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 0
 // CHECK1-NEXT:    store i32 2, ptr [[TMP39]], align 4
 // CHECK1-NEXT:    [[TMP40:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 1
@@ -347,11 +347,13 @@ int bar(int n){
 // CHECK1-NEXT:    [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [2 x ptr], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [2 x ptr], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [2 x ptr], align 8
+// CHECK1-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK1-NEXT:    [[DOTCAPTURE_EXPR_3:%.*]] = alloca i32, align 4
 // CHECK1-NEXT:    [[DOTCAPTURE_EXPR__CASTED4:%.*]] = alloca i64, align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_BASEPTRS5:%.*]] = alloca [1 x ptr], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_PTRS6:%.*]] = alloca [1 x ptr], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_MAPPERS7:%.*]] = alloca [1 x ptr], align 8
+// CHECK1-NEXT:    [[KERNEL_ARGS8:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK1-NEXT:    store i32 [[N]], ptr [[N_ADDR]], align 4
 // CHECK1-NEXT:    [[TMP0:%.*]] = load i32, ptr [[N_ADDR]], align 4
 // CHECK1-NEXT:    store i32 [[TMP0]], ptr [[DOTCAPTURE_EXPR_]], align 4
@@ -382,7 +384,6 @@ int bar(int n){
 // CHECK1-NEXT:    [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
 // CHECK1-NEXT:    [[TMP16:%.*]] = insertvalue [3 x i32] zeroinitializer, i32 [[TMP14]], 0
 // CHECK1-NEXT:    [[TMP17:%.*]] = insertvalue [3 x i32] zeroinitializer, i32 [[TMP15]], 0
-// CHECK1-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK1-NEXT:    [[TMP18:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK1-NEXT:    store i32 2, ptr [[TMP18]], align 4
 // CHECK1-NEXT:    [[TMP19:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -432,7 +433,6 @@ int bar(int n){
 // CHECK1-NEXT:    [[TMP40:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS6]], i32 0, i32 0
 // CHECK1-NEXT:    [[TMP41:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_3]], align 4
 // CHECK1-NEXT:    [[TMP42:%.*]] = insertvalue [3 x i32] zeroinitializer, i32 [[TMP41]], 0
-// CHECK1-NEXT:    [[KERNEL_ARGS8:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK1-NEXT:    [[TMP43:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS8]], i32 0, i32 0
 // CHECK1-NEXT:    store i32 2, ptr [[TMP43]], align 4
 // CHECK1-NEXT:    [[TMP44:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS8]], i32 0, i32 1
@@ -476,6 +476,7 @@ int bar(int n){
 // CHECK1-NEXT:  entry:
 // CHECK1-NEXT:    [[N_ADDR:%.*]] = alloca i32, align 4
 // CHECK1-NEXT:    [[A:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK1-NEXT:    [[B:%.*]] = alloca i16, align 2
 // CHECK1-NEXT:    [[DOTCAPTURE_EXPR_:%.*]] = alloca i16, align 2
 // CHECK1-NEXT:    [[A_CASTED:%.*]] = alloca i64, align 8
@@ -484,9 +485,9 @@ int bar(int n){
 // CHECK1-NEXT:    [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [3 x ptr], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [3 x ptr], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [3 x ptr], align 8
+// CHECK1-NEXT:    [[KERNEL_ARGS1:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK1-NEXT:    store i32 [[N]], ptr [[N_ADDR]], align 4
 // CHECK1-NEXT:    store i32 0, ptr [[A]], align 4
-// CHECK1-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK1-NEXT:    [[TMP0:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK1-NEXT:    store i32 2, ptr [[TMP0]], align 4
 // CHECK1-NEXT:    [[TMP1:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -555,7 +556,6 @@ int bar(int n){
 // CHECK1-NEXT:    [[TMP33:%.*]] = load i16, ptr [[DOTCAPTURE_EXPR_]], align 2
 // CHECK1-NEXT:    [[TMP34:%.*]] = sext i16 [[TMP33]] to i32
 // CHECK1-NEXT:    [[TMP35:%.*]] = insertvalue [3 x i32] zeroinitializer, i32 [[TMP34]], 0
-// CHECK1-NEXT:    [[KERNEL_ARGS1:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK1-NEXT:    [[TMP36:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS1]], i32 0, i32 0
 // CHECK1-NEXT:    store i32 2, ptr [[TMP36]], align 4
 // CHECK1-NEXT:    [[TMP37:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS1]], i32 0, i32 1
@@ -817,9 +817,11 @@ int bar(int n){
 // CHECK3-NEXT:    [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [3 x ptr], align 4
 // CHECK3-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [3 x ptr], align 4
 // CHECK3-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [3 x ptr], align 4
+// CHECK3-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK3-NEXT:    [[DOTOFFLOAD_BASEPTRS3:%.*]] = alloca [1 x ptr], align 4
 // CHECK3-NEXT:    [[DOTOFFLOAD_PTRS4:%.*]] = alloca [1 x ptr], align 4
 // CHECK3-NEXT:    [[DOTOFFLOAD_MAPPERS5:%.*]] = alloca [1 x ptr], align 4
+// CHECK3-NEXT:    [[KERNEL_ARGS6:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK3-NEXT:    store ptr [[THIS]], ptr [[THIS_ADDR]], align 4
 // CHECK3-NEXT:    store i32 [[N]], ptr [[N_ADDR]], align 4
 // CHECK3-NEXT:    [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4
@@ -857,7 +859,6 @@ int bar(int n){
 // CHECK3-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
 // CHECK3-NEXT:    [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
 // CHECK3-NEXT:    [[TMP18:%.*]] = insertvalue [3 x i32] zeroinitializer, i32 [[TMP17]], 0
-// CHECK3-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK3-NEXT:    [[TMP19:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK3-NEXT:    store i32 2, ptr [[TMP19]], align 4
 // CHECK3-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -900,7 +901,6 @@ int bar(int n){
 // CHECK3-NEXT:    store ptr null, ptr [[TMP36]], align 4
 // CHECK3-NEXT:    [[TMP37:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS3]], i32 0, i32 0
 // CHECK3-NEXT:    [[TMP38:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS4]], i32 0, i32 0
-// CHECK3-NEXT:    [[KERNEL_ARGS6:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK3-NEXT:    [[TMP39:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 0
 // CHECK3-NEXT:    store i32 2, ptr [[TMP39]], align 4
 // CHECK3-NEXT:    [[TMP40:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 1
@@ -951,11 +951,13 @@ int bar(int n){
 // CHECK3-NEXT:    [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [2 x ptr], align 4
 // CHECK3-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [2 x ptr], align 4
 // CHECK3-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [2 x ptr], align 4
+// CHECK3-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK3-NEXT:    [[DOTCAPTURE_EXPR_3:%.*]] = alloca i32, align 4
 // CHECK3-NEXT:    [[DOTCAPTURE_EXPR__CASTED4:%.*]] = alloca i32, align 4
 // CHECK3-NEXT:    [[DOTOFFLOAD_BASEPTRS5:%.*]] = alloca [1 x ptr], align 4
 // CHECK3-NEXT:    [[DOTOFFLOAD_PTRS6:%.*]] = alloca [1 x ptr], align 4
 // CHECK3-NEXT:    [[DOTOFFLOAD_MAPPERS7:%.*]] = alloca [1 x ptr], align 4
+// CHECK3-NEXT:    [[KERNEL_ARGS8:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK3-NEXT:    store i32 [[N]], ptr [[N_ADDR]], align 4
 // CHECK3-NEXT:    [[TMP0:%.*]] = load i32, ptr [[N_ADDR]], align 4
 // CHECK3-NEXT:    store i32 [[TMP0]], ptr [[DOTCAPTURE_EXPR_]], align 4
@@ -986,7 +988,6 @@ int bar(int n){
 // CHECK3-NEXT:    [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
 // CHECK3-NEXT:    [[TMP16:%.*]] = insertvalue [3 x i32] zeroinitializer, i32 [[TMP14]], 0
 // CHECK3-NEXT:    [[TMP17:%.*]] = insertvalue [3 x i32] zeroinitializer, i32 [[TMP15]], 0
-// CHECK3-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK3-NEXT:    [[TMP18:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK3-NEXT:    store i32 2, ptr [[TMP18]], align 4
 // CHECK3-NEXT:    [[TMP19:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -1036,7 +1037,6 @@ int bar(int n){
 // CHECK3-NEXT:    [[TMP40:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS6]], i32 0, i32 0
 // CHECK3-NEXT:    [[TMP41:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_3]], align 4
 // CHECK3-NEXT:    [[TMP42:%.*]] = insertvalue [3 x i32] zeroinitializer, i32 [[TMP41]], 0
-// CHECK3-NEXT:    [[KERNEL_ARGS8:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK3-NEXT:    [[TMP43:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS8]], i32 0, i32 0
 // CHECK3-NEXT:    store i32 2, ptr [[TMP43]], align 4
 // CHECK3-NEXT:    [[TMP44:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS8]], i32 0, i32 1
@@ -1080,6 +1080,7 @@ int bar(int n){
 // CHECK3-NEXT:  entry:
 // CHECK3-NEXT:    [[N_ADDR:%.*]] = alloca i32, align 4
 // CHECK3-NEXT:    [[A:%.*]] = alloca i32, align 4
+// CHECK3-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK3-NEXT:    [[B:%.*]] = alloca i16, align 2
 // CHECK3-NEXT:    [[DOTCAPTURE_EXPR_:%.*]] = alloca i16, align 2
 // CHECK3-NEXT:    [[A_CASTED:%.*]] = alloca i32, align 4
@@ -1088,9 +1089,9 @@ int bar(int n){
 // CHECK3-NEXT:    [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [3 x ptr], align 4
 // CHECK3-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [3 x ptr], align 4
 // CHECK3-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [3 x ptr], align 4
+// CHECK3-NEXT:    [[KERNEL_ARGS1:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK3-NEXT:    store i32 [[N]], ptr [[N_ADDR]], align 4
 // CHECK3-NEXT:    store i32 0, ptr [[A]], align 4
-// CHECK3-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK3-NEXT:    [[TMP0:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK3-NEXT:    store i32 2, ptr [[TMP0]], align 4
 // CHECK3-NEXT:    [[TMP1:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -1159,7 +1160,6 @@ int bar(int n){
 // CHECK3-NEXT:    [[TMP33:%.*]] = load i16, ptr [[DOTCAPTURE_EXPR_]], align 2
 // CHECK3-NEXT:    [[TMP34:%.*]] = sext i16 [[TMP33]] to i32
 // CHECK3-NEXT:    [[TMP35:%.*]] = insertvalue [3 x i32] zeroinitializer, i32 [[TMP34]], 0
-// CHECK3-NEXT:    [[KERNEL_ARGS1:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK3-NEXT:    [[TMP36:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS1]], i32 0, i32 0
 // CHECK3-NEXT:    store i32 2, ptr [[TMP36]], align 4
 // CHECK3-NEXT:    [[TMP37:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS1]], i32 0, i32 1

diff  --git a/clang/test/OpenMP/teams_codegen.cpp b/clang/test/OpenMP/teams_codegen.cpp
index 9d6da1ec7fd4e..3c6bd6dbcf883 100644
--- a/clang/test/OpenMP/teams_codegen.cpp
+++ b/clang/test/OpenMP/teams_codegen.cpp
@@ -309,20 +309,24 @@ void foo() {
 // CHECK1-NEXT:    [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [1 x ptr], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [1 x ptr], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [1 x ptr], align 8
+// CHECK1-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK1-NEXT:    [[COMP_CASTED1:%.*]] = alloca i64, align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_BASEPTRS2:%.*]] = alloca [1 x ptr], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_PTRS3:%.*]] = alloca [1 x ptr], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_MAPPERS4:%.*]] = alloca [1 x ptr], align 8
+// CHECK1-NEXT:    [[KERNEL_ARGS5:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK1-NEXT:    [[LA_CASTED:%.*]] = alloca i64, align 8
 // CHECK1-NEXT:    [[COMP_CASTED8:%.*]] = alloca i64, align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_BASEPTRS9:%.*]] = alloca [2 x ptr], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_PTRS10:%.*]] = alloca [2 x ptr], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_MAPPERS11:%.*]] = alloca [2 x ptr], align 8
+// CHECK1-NEXT:    [[KERNEL_ARGS12:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK1-NEXT:    [[LA_CASTED15:%.*]] = alloca i64, align 8
 // CHECK1-NEXT:    [[COMP_CASTED16:%.*]] = alloca i64, align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_BASEPTRS17:%.*]] = alloca [2 x ptr], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_PTRS18:%.*]] = alloca [2 x ptr], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_MAPPERS19:%.*]] = alloca [2 x ptr], align 8
+// CHECK1-NEXT:    [[KERNEL_ARGS20:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK1-NEXT:    [[GBLA_CASTED:%.*]] = alloca i64, align 8
 // CHECK1-NEXT:    [[A_CASTED:%.*]] = alloca i64, align 8
 // CHECK1-NEXT:    [[GBLB_CASTED:%.*]] = alloca i64, align 8
@@ -331,6 +335,7 @@ void foo() {
 // CHECK1-NEXT:    [[DOTOFFLOAD_BASEPTRS24:%.*]] = alloca [5 x ptr], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_PTRS25:%.*]] = alloca [5 x ptr], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_MAPPERS26:%.*]] = alloca [5 x ptr], align 8
+// CHECK1-NEXT:    [[KERNEL_ARGS28:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK1-NEXT:    [[TMP:%.*]] = alloca ptr, align 8
 // CHECK1-NEXT:    [[GBLC_CASTED:%.*]] = alloca i64, align 8
 // CHECK1-NEXT:    [[COMP_CASTED31:%.*]] = alloca i64, align 8
@@ -339,6 +344,7 @@ void foo() {
 // CHECK1-NEXT:    [[DOTOFFLOAD_MAPPERS34:%.*]] = alloca [2 x ptr], align 8
 // CHECK1-NEXT:    [[_TMP35:%.*]] = alloca ptr, align 8
 // CHECK1-NEXT:    [[_TMP37:%.*]] = alloca ptr, align 8
+// CHECK1-NEXT:    [[KERNEL_ARGS39:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK1-NEXT:    store i32 [[A]], ptr [[A_ADDR]], align 4
 // CHECK1-NEXT:    store i32 1, ptr [[COMP]], align 4
 // CHECK1-NEXT:    store i32 23, ptr [[LA]], align 4
@@ -354,7 +360,6 @@ void foo() {
 // CHECK1-NEXT:    store ptr null, ptr [[TMP4]], align 8
 // CHECK1-NEXT:    [[TMP5:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK1-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK1-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK1-NEXT:    [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK1-NEXT:    store i32 2, ptr [[TMP7]], align 4
 // CHECK1-NEXT:    [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -399,7 +404,6 @@ void foo() {
 // CHECK1-NEXT:    store ptr null, ptr [[TMP26]], align 8
 // CHECK1-NEXT:    [[TMP27:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS2]], i32 0, i32 0
 // CHECK1-NEXT:    [[TMP28:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS3]], i32 0, i32 0
-// CHECK1-NEXT:    [[KERNEL_ARGS5:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK1-NEXT:    [[TMP29:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 0
 // CHECK1-NEXT:    store i32 2, ptr [[TMP29]], align 4
 // CHECK1-NEXT:    [[TMP30:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 1
@@ -455,7 +459,6 @@ void foo() {
 // CHECK1-NEXT:    [[TMP55:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS10]], i32 0, i32 0
 // CHECK1-NEXT:    [[TMP56:%.*]] = load i32, ptr [[LA]], align 4
 // CHECK1-NEXT:    [[TMP57:%.*]] = insertvalue [3 x i32] zeroinitializer, i32 [[TMP56]], 0
-// CHECK1-NEXT:    [[KERNEL_ARGS12:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK1-NEXT:    [[TMP58:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS12]], i32 0, i32 0
 // CHECK1-NEXT:    store i32 2, ptr [[TMP58]], align 4
 // CHECK1-NEXT:    [[TMP59:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS12]], i32 0, i32 1
@@ -511,7 +514,6 @@ void foo() {
 // CHECK1-NEXT:    [[TMP84:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS18]], i32 0, i32 0
 // CHECK1-NEXT:    [[TMP85:%.*]] = load i32, ptr [[LA]], align 4
 // CHECK1-NEXT:    [[TMP86:%.*]] = insertvalue [3 x i32] zeroinitializer, i32 [[TMP85]], 0
-// CHECK1-NEXT:    [[KERNEL_ARGS20:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK1-NEXT:    [[TMP87:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS20]], i32 0, i32 0
 // CHECK1-NEXT:    store i32 2, ptr [[TMP87]], align 4
 // CHECK1-NEXT:    [[TMP88:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS20]], i32 0, i32 1
@@ -602,7 +604,6 @@ void foo() {
 // CHECK1-NEXT:    [[TMP133:%.*]] = trunc i64 [[ADD27]] to i32
 // CHECK1-NEXT:    [[TMP134:%.*]] = insertvalue [3 x i32] zeroinitializer, i32 [[ADD]], 0
 // CHECK1-NEXT:    [[TMP135:%.*]] = insertvalue [3 x i32] zeroinitializer, i32 [[TMP133]], 0
-// CHECK1-NEXT:    [[KERNEL_ARGS28:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK1-NEXT:    [[TMP136:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS28]], i32 0, i32 0
 // CHECK1-NEXT:    store i32 2, ptr [[TMP136]], align 4
 // CHECK1-NEXT:    [[TMP137:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS28]], i32 0, i32 1
@@ -668,7 +669,6 @@ void foo() {
 // CHECK1-NEXT:    [[ADD38:%.*]] = add nsw i32 [[TMP167]], 2
 // CHECK1-NEXT:    [[TMP168:%.*]] = insertvalue [3 x i32] zeroinitializer, i32 [[ADD36]], 0
 // CHECK1-NEXT:    [[TMP169:%.*]] = insertvalue [3 x i32] zeroinitializer, i32 [[ADD38]], 0
-// CHECK1-NEXT:    [[KERNEL_ARGS39:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK1-NEXT:    [[TMP170:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS39]], i32 0, i32 0
 // CHECK1-NEXT:    store i32 2, ptr [[TMP170]], align 4
 // CHECK1-NEXT:    [[TMP171:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS39]], i32 0, i32 1
@@ -919,20 +919,24 @@ void foo() {
 // CHECK3-NEXT:    [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [1 x ptr], align 4
 // CHECK3-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [1 x ptr], align 4
 // CHECK3-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [1 x ptr], align 4
+// CHECK3-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK3-NEXT:    [[COMP_CASTED1:%.*]] = alloca i32, align 4
 // CHECK3-NEXT:    [[DOTOFFLOAD_BASEPTRS2:%.*]] = alloca [1 x ptr], align 4
 // CHECK3-NEXT:    [[DOTOFFLOAD_PTRS3:%.*]] = alloca [1 x ptr], align 4
 // CHECK3-NEXT:    [[DOTOFFLOAD_MAPPERS4:%.*]] = alloca [1 x ptr], align 4
+// CHECK3-NEXT:    [[KERNEL_ARGS5:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK3-NEXT:    [[LA_CASTED:%.*]] = alloca i32, align 4
 // CHECK3-NEXT:    [[COMP_CASTED8:%.*]] = alloca i32, align 4
 // CHECK3-NEXT:    [[DOTOFFLOAD_BASEPTRS9:%.*]] = alloca [2 x ptr], align 4
 // CHECK3-NEXT:    [[DOTOFFLOAD_PTRS10:%.*]] = alloca [2 x ptr], align 4
 // CHECK3-NEXT:    [[DOTOFFLOAD_MAPPERS11:%.*]] = alloca [2 x ptr], align 4
+// CHECK3-NEXT:    [[KERNEL_ARGS12:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK3-NEXT:    [[LA_CASTED15:%.*]] = alloca i32, align 4
 // CHECK3-NEXT:    [[COMP_CASTED16:%.*]] = alloca i32, align 4
 // CHECK3-NEXT:    [[DOTOFFLOAD_BASEPTRS17:%.*]] = alloca [2 x ptr], align 4
 // CHECK3-NEXT:    [[DOTOFFLOAD_PTRS18:%.*]] = alloca [2 x ptr], align 4
 // CHECK3-NEXT:    [[DOTOFFLOAD_MAPPERS19:%.*]] = alloca [2 x ptr], align 4
+// CHECK3-NEXT:    [[KERNEL_ARGS20:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK3-NEXT:    [[GBLA_CASTED:%.*]] = alloca i32, align 4
 // CHECK3-NEXT:    [[A_CASTED:%.*]] = alloca i32, align 4
 // CHECK3-NEXT:    [[LC_CASTED:%.*]] = alloca i32, align 4
@@ -940,6 +944,7 @@ void foo() {
 // CHECK3-NEXT:    [[DOTOFFLOAD_BASEPTRS24:%.*]] = alloca [5 x ptr], align 4
 // CHECK3-NEXT:    [[DOTOFFLOAD_PTRS25:%.*]] = alloca [5 x ptr], align 4
 // CHECK3-NEXT:    [[DOTOFFLOAD_MAPPERS26:%.*]] = alloca [5 x ptr], align 4
+// CHECK3-NEXT:    [[KERNEL_ARGS28:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK3-NEXT:    [[TMP:%.*]] = alloca ptr, align 4
 // CHECK3-NEXT:    [[GBLC_CASTED:%.*]] = alloca i32, align 4
 // CHECK3-NEXT:    [[COMP_CASTED31:%.*]] = alloca i32, align 4
@@ -948,6 +953,7 @@ void foo() {
 // CHECK3-NEXT:    [[DOTOFFLOAD_MAPPERS34:%.*]] = alloca [2 x ptr], align 4
 // CHECK3-NEXT:    [[_TMP35:%.*]] = alloca ptr, align 4
 // CHECK3-NEXT:    [[_TMP37:%.*]] = alloca ptr, align 4
+// CHECK3-NEXT:    [[KERNEL_ARGS39:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK3-NEXT:    store i32 [[A]], ptr [[A_ADDR]], align 4
 // CHECK3-NEXT:    store i32 1, ptr [[COMP]], align 4
 // CHECK3-NEXT:    store i32 23, ptr [[LA]], align 4
@@ -963,7 +969,6 @@ void foo() {
 // CHECK3-NEXT:    store ptr null, ptr [[TMP4]], align 4
 // CHECK3-NEXT:    [[TMP5:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK3-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK3-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK3-NEXT:    [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK3-NEXT:    store i32 2, ptr [[TMP7]], align 4
 // CHECK3-NEXT:    [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -1008,7 +1013,6 @@ void foo() {
 // CHECK3-NEXT:    store ptr null, ptr [[TMP26]], align 4
 // CHECK3-NEXT:    [[TMP27:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS2]], i32 0, i32 0
 // CHECK3-NEXT:    [[TMP28:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS3]], i32 0, i32 0
-// CHECK3-NEXT:    [[KERNEL_ARGS5:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK3-NEXT:    [[TMP29:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 0
 // CHECK3-NEXT:    store i32 2, ptr [[TMP29]], align 4
 // CHECK3-NEXT:    [[TMP30:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 1
@@ -1064,7 +1068,6 @@ void foo() {
 // CHECK3-NEXT:    [[TMP55:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS10]], i32 0, i32 0
 // CHECK3-NEXT:    [[TMP56:%.*]] = load i32, ptr [[LA]], align 4
 // CHECK3-NEXT:    [[TMP57:%.*]] = insertvalue [3 x i32] zeroinitializer, i32 [[TMP56]], 0
-// CHECK3-NEXT:    [[KERNEL_ARGS12:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK3-NEXT:    [[TMP58:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS12]], i32 0, i32 0
 // CHECK3-NEXT:    store i32 2, ptr [[TMP58]], align 4
 // CHECK3-NEXT:    [[TMP59:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS12]], i32 0, i32 1
@@ -1120,7 +1123,6 @@ void foo() {
 // CHECK3-NEXT:    [[TMP84:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS18]], i32 0, i32 0
 // CHECK3-NEXT:    [[TMP85:%.*]] = load i32, ptr [[LA]], align 4
 // CHECK3-NEXT:    [[TMP86:%.*]] = insertvalue [3 x i32] zeroinitializer, i32 [[TMP85]], 0
-// CHECK3-NEXT:    [[KERNEL_ARGS20:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK3-NEXT:    [[TMP87:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS20]], i32 0, i32 0
 // CHECK3-NEXT:    store i32 2, ptr [[TMP87]], align 4
 // CHECK3-NEXT:    [[TMP88:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS20]], i32 0, i32 1
@@ -1208,7 +1210,6 @@ void foo() {
 // CHECK3-NEXT:    [[TMP131:%.*]] = trunc i64 [[ADD27]] to i32
 // CHECK3-NEXT:    [[TMP132:%.*]] = insertvalue [3 x i32] zeroinitializer, i32 [[ADD]], 0
 // CHECK3-NEXT:    [[TMP133:%.*]] = insertvalue [3 x i32] zeroinitializer, i32 [[TMP131]], 0
-// CHECK3-NEXT:    [[KERNEL_ARGS28:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK3-NEXT:    [[TMP134:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS28]], i32 0, i32 0
 // CHECK3-NEXT:    store i32 2, ptr [[TMP134]], align 4
 // CHECK3-NEXT:    [[TMP135:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS28]], i32 0, i32 1
@@ -1274,7 +1275,6 @@ void foo() {
 // CHECK3-NEXT:    [[ADD38:%.*]] = add nsw i32 [[TMP165]], 2
 // CHECK3-NEXT:    [[TMP166:%.*]] = insertvalue [3 x i32] zeroinitializer, i32 [[ADD36]], 0
 // CHECK3-NEXT:    [[TMP167:%.*]] = insertvalue [3 x i32] zeroinitializer, i32 [[ADD38]], 0
-// CHECK3-NEXT:    [[KERNEL_ARGS39:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK3-NEXT:    [[TMP168:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS39]], i32 0, i32 0
 // CHECK3-NEXT:    store i32 2, ptr [[TMP168]], align 4
 // CHECK3-NEXT:    [[TMP169:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS39]], i32 0, i32 1
@@ -1528,10 +1528,12 @@ void foo() {
 // CHECK9-NEXT:    [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [3 x ptr], align 8
 // CHECK9-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [3 x ptr], align 8
 // CHECK9-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [3 x ptr], align 8
+// CHECK9-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK9-NEXT:    [[COMP_CASTED1:%.*]] = alloca i64, align 8
 // CHECK9-NEXT:    [[DOTOFFLOAD_BASEPTRS2:%.*]] = alloca [3 x ptr], align 8
 // CHECK9-NEXT:    [[DOTOFFLOAD_PTRS3:%.*]] = alloca [3 x ptr], align 8
 // CHECK9-NEXT:    [[DOTOFFLOAD_MAPPERS4:%.*]] = alloca [3 x ptr], align 8
+// CHECK9-NEXT:    [[KERNEL_ARGS7:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK9-NEXT:    store i32 1, ptr [[COMP]], align 4
 // CHECK9-NEXT:    [[TMP0:%.*]] = load i32, ptr [[COMP]], align 4
 // CHECK9-NEXT:    store i32 [[TMP0]], ptr [[COMP_CASTED]], align 4
@@ -1563,7 +1565,6 @@ void foo() {
 // CHECK9-NEXT:    [[TMP15:%.*]] = trunc i64 [[CONV]] to i32
 // CHECK9-NEXT:    [[TMP16:%.*]] = insertvalue [3 x i32] zeroinitializer, i32 [[TMP13]], 0
 // CHECK9-NEXT:    [[TMP17:%.*]] = insertvalue [3 x i32] zeroinitializer, i32 [[TMP15]], 0
-// CHECK9-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK9-NEXT:    [[TMP18:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK9-NEXT:    store i32 2, ptr [[TMP18]], align 4
 // CHECK9-NEXT:    [[TMP19:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -1628,7 +1629,6 @@ void foo() {
 // CHECK9-NEXT:    [[TMP49:%.*]] = trunc i64 [[TMP48]] to i32
 // CHECK9-NEXT:    [[TMP50:%.*]] = insertvalue [3 x i32] zeroinitializer, i32 [[TMP47]], 0
 // CHECK9-NEXT:    [[TMP51:%.*]] = insertvalue [3 x i32] zeroinitializer, i32 [[TMP49]], 0
-// CHECK9-NEXT:    [[KERNEL_ARGS7:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK9-NEXT:    [[TMP52:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS7]], i32 0, i32 0
 // CHECK9-NEXT:    store i32 2, ptr [[TMP52]], align 4
 // CHECK9-NEXT:    [[TMP53:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS7]], i32 0, i32 1
@@ -1762,10 +1762,12 @@ void foo() {
 // CHECK11-NEXT:    [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [3 x ptr], align 4
 // CHECK11-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [3 x ptr], align 4
 // CHECK11-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [3 x ptr], align 4
+// CHECK11-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK11-NEXT:    [[COMP_CASTED1:%.*]] = alloca i32, align 4
 // CHECK11-NEXT:    [[DOTOFFLOAD_BASEPTRS2:%.*]] = alloca [3 x ptr], align 4
 // CHECK11-NEXT:    [[DOTOFFLOAD_PTRS3:%.*]] = alloca [3 x ptr], align 4
 // CHECK11-NEXT:    [[DOTOFFLOAD_MAPPERS4:%.*]] = alloca [3 x ptr], align 4
+// CHECK11-NEXT:    [[KERNEL_ARGS7:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK11-NEXT:    store i32 1, ptr [[COMP]], align 4
 // CHECK11-NEXT:    [[TMP0:%.*]] = load i32, ptr [[COMP]], align 4
 // CHECK11-NEXT:    store i32 [[TMP0]], ptr [[COMP_CASTED]], align 4
@@ -1797,7 +1799,6 @@ void foo() {
 // CHECK11-NEXT:    [[TMP15:%.*]] = trunc i64 [[CONV]] to i32
 // CHECK11-NEXT:    [[TMP16:%.*]] = insertvalue [3 x i32] zeroinitializer, i32 [[TMP13]], 0
 // CHECK11-NEXT:    [[TMP17:%.*]] = insertvalue [3 x i32] zeroinitializer, i32 [[TMP15]], 0
-// CHECK11-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK11-NEXT:    [[TMP18:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK11-NEXT:    store i32 2, ptr [[TMP18]], align 4
 // CHECK11-NEXT:    [[TMP19:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -1862,7 +1863,6 @@ void foo() {
 // CHECK11-NEXT:    [[TMP49:%.*]] = trunc i64 [[TMP48]] to i32
 // CHECK11-NEXT:    [[TMP50:%.*]] = insertvalue [3 x i32] zeroinitializer, i32 [[TMP47]], 0
 // CHECK11-NEXT:    [[TMP51:%.*]] = insertvalue [3 x i32] zeroinitializer, i32 [[TMP49]], 0
-// CHECK11-NEXT:    [[KERNEL_ARGS7:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK11-NEXT:    [[TMP52:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS7]], i32 0, i32 0
 // CHECK11-NEXT:    store i32 2, ptr [[TMP52]], align 4
 // CHECK11-NEXT:    [[TMP53:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS7]], i32 0, i32 1
@@ -2003,10 +2003,12 @@ void foo() {
 // CHECK17-NEXT:    [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [2 x ptr], align 8
 // CHECK17-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [2 x ptr], align 8
 // CHECK17-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [2 x ptr], align 8
+// CHECK17-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK17-NEXT:    [[COMP_CASTED3:%.*]] = alloca i64, align 8
 // CHECK17-NEXT:    [[DOTOFFLOAD_BASEPTRS4:%.*]] = alloca [2 x ptr], align 8
 // CHECK17-NEXT:    [[DOTOFFLOAD_PTRS5:%.*]] = alloca [2 x ptr], align 8
 // CHECK17-NEXT:    [[DOTOFFLOAD_MAPPERS6:%.*]] = alloca [2 x ptr], align 8
+// CHECK17-NEXT:    [[KERNEL_ARGS8:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK17-NEXT:    store ptr [[THIS]], ptr [[THIS_ADDR]], align 8
 // CHECK17-NEXT:    [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8
 // CHECK17-NEXT:    store i32 1, ptr [[COMP]], align 4
@@ -2031,7 +2033,6 @@ void foo() {
 // CHECK17-NEXT:    [[A2:%.*]] = getelementptr inbounds [[STRUCT_SS]], ptr [[THIS1]], i32 0, i32 0
 // CHECK17-NEXT:    [[TMP10:%.*]] = load i32, ptr [[A2]], align 4
 // CHECK17-NEXT:    [[TMP11:%.*]] = insertvalue [3 x i32] zeroinitializer, i32 [[TMP10]], 0
-// CHECK17-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK17-NEXT:    [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK17-NEXT:    store i32 2, ptr [[TMP12]], align 4
 // CHECK17-NEXT:    [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -2088,7 +2089,6 @@ void foo() {
 // CHECK17-NEXT:    [[CONV:%.*]] = fptosi float [[TMP37]] to i32
 // CHECK17-NEXT:    [[ADD:%.*]] = add nsw i32 [[CONV]], 123
 // CHECK17-NEXT:    [[TMP38:%.*]] = insertvalue [3 x i32] zeroinitializer, i32 [[ADD]], 0
-// CHECK17-NEXT:    [[KERNEL_ARGS8:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK17-NEXT:    [[TMP39:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS8]], i32 0, i32 0
 // CHECK17-NEXT:    store i32 2, ptr [[TMP39]], align 4
 // CHECK17-NEXT:    [[TMP40:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS8]], i32 0, i32 1
@@ -2216,10 +2216,12 @@ void foo() {
 // CHECK19-NEXT:    [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [2 x ptr], align 4
 // CHECK19-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [2 x ptr], align 4
 // CHECK19-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [2 x ptr], align 4
+// CHECK19-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK19-NEXT:    [[COMP_CASTED3:%.*]] = alloca i32, align 4
 // CHECK19-NEXT:    [[DOTOFFLOAD_BASEPTRS4:%.*]] = alloca [2 x ptr], align 4
 // CHECK19-NEXT:    [[DOTOFFLOAD_PTRS5:%.*]] = alloca [2 x ptr], align 4
 // CHECK19-NEXT:    [[DOTOFFLOAD_MAPPERS6:%.*]] = alloca [2 x ptr], align 4
+// CHECK19-NEXT:    [[KERNEL_ARGS8:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK19-NEXT:    store ptr [[THIS]], ptr [[THIS_ADDR]], align 4
 // CHECK19-NEXT:    [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4
 // CHECK19-NEXT:    store i32 1, ptr [[COMP]], align 4
@@ -2244,7 +2246,6 @@ void foo() {
 // CHECK19-NEXT:    [[A2:%.*]] = getelementptr inbounds [[STRUCT_SS]], ptr [[THIS1]], i32 0, i32 0
 // CHECK19-NEXT:    [[TMP10:%.*]] = load i32, ptr [[A2]], align 4
 // CHECK19-NEXT:    [[TMP11:%.*]] = insertvalue [3 x i32] zeroinitializer, i32 [[TMP10]], 0
-// CHECK19-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK19-NEXT:    [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK19-NEXT:    store i32 2, ptr [[TMP12]], align 4
 // CHECK19-NEXT:    [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -2301,7 +2302,6 @@ void foo() {
 // CHECK19-NEXT:    [[CONV:%.*]] = fptosi float [[TMP37]] to i32
 // CHECK19-NEXT:    [[ADD:%.*]] = add nsw i32 [[CONV]], 123
 // CHECK19-NEXT:    [[TMP38:%.*]] = insertvalue [3 x i32] zeroinitializer, i32 [[ADD]], 0
-// CHECK19-NEXT:    [[KERNEL_ARGS8:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK19-NEXT:    [[TMP39:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS8]], i32 0, i32 0
 // CHECK19-NEXT:    store i32 2, ptr [[TMP39]], align 4
 // CHECK19-NEXT:    [[TMP40:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS8]], i32 0, i32 1

diff  --git a/clang/test/OpenMP/teams_distribute_codegen.cpp b/clang/test/OpenMP/teams_distribute_codegen.cpp
index a11bae68a7e5a..87d6ca297233c 100644
--- a/clang/test/OpenMP/teams_distribute_codegen.cpp
+++ b/clang/test/OpenMP/teams_distribute_codegen.cpp
@@ -190,6 +190,7 @@ int main (int argc, char **argv) {
 // CHECK1-NEXT:    [[TMP:%.*]] = alloca i32, align 4
 // CHECK1-NEXT:    [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
 // CHECK1-NEXT:    [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK1-NEXT:    [[N_CASTED4:%.*]] = alloca i64, align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_BASEPTRS5:%.*]] = alloca [2 x ptr], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_PTRS6:%.*]] = alloca [2 x ptr], align 8
@@ -197,6 +198,7 @@ int main (int argc, char **argv) {
 // CHECK1-NEXT:    [[_TMP8:%.*]] = alloca i32, align 4
 // CHECK1-NEXT:    [[DOTCAPTURE_EXPR_9:%.*]] = alloca i32, align 4
 // CHECK1-NEXT:    [[DOTCAPTURE_EXPR_10:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[KERNEL_ARGS15:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK1-NEXT:    store i32 [[N]], ptr [[N_ADDR]], align 4
 // CHECK1-NEXT:    [[TMP0:%.*]] = load i32, ptr [[N_ADDR]], align 4
 // CHECK1-NEXT:    [[DIV:%.*]] = sdiv i32 [[TMP0]], 128
@@ -251,7 +253,6 @@ int main (int argc, char **argv) {
 // CHECK1-NEXT:    [[TMP26:%.*]] = zext i32 [[ADD]] to i64
 // CHECK1-NEXT:    [[TMP27:%.*]] = insertvalue [3 x i32] zeroinitializer, i32 [[TMP21]], 0
 // CHECK1-NEXT:    [[TMP28:%.*]] = insertvalue [3 x i32] zeroinitializer, i32 [[TMP22]], 0
-// CHECK1-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK1-NEXT:    [[TMP29:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK1-NEXT:    store i32 2, ptr [[TMP29]], align 4
 // CHECK1-NEXT:    [[TMP30:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -312,7 +313,6 @@ int main (int argc, char **argv) {
 // CHECK1-NEXT:    [[TMP56:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_10]], align 4
 // CHECK1-NEXT:    [[ADD14:%.*]] = add nsw i32 [[TMP56]], 1
 // CHECK1-NEXT:    [[TMP57:%.*]] = zext i32 [[ADD14]] to i64
-// CHECK1-NEXT:    [[KERNEL_ARGS15:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK1-NEXT:    [[TMP58:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS15]], i32 0, i32 0
 // CHECK1-NEXT:    store i32 2, ptr [[TMP58]], align 4
 // CHECK1-NEXT:    [[TMP59:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS15]], i32 0, i32 1
@@ -588,6 +588,7 @@ int main (int argc, char **argv) {
 // CHECK3-NEXT:    [[TMP:%.*]] = alloca i32, align 4
 // CHECK3-NEXT:    [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
 // CHECK3-NEXT:    [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4
+// CHECK3-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK3-NEXT:    [[N_CASTED4:%.*]] = alloca i32, align 4
 // CHECK3-NEXT:    [[DOTOFFLOAD_BASEPTRS5:%.*]] = alloca [2 x ptr], align 4
 // CHECK3-NEXT:    [[DOTOFFLOAD_PTRS6:%.*]] = alloca [2 x ptr], align 4
@@ -595,6 +596,7 @@ int main (int argc, char **argv) {
 // CHECK3-NEXT:    [[_TMP8:%.*]] = alloca i32, align 4
 // CHECK3-NEXT:    [[DOTCAPTURE_EXPR_9:%.*]] = alloca i32, align 4
 // CHECK3-NEXT:    [[DOTCAPTURE_EXPR_10:%.*]] = alloca i32, align 4
+// CHECK3-NEXT:    [[KERNEL_ARGS15:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK3-NEXT:    store i32 [[N]], ptr [[N_ADDR]], align 4
 // CHECK3-NEXT:    [[TMP0:%.*]] = load i32, ptr [[N_ADDR]], align 4
 // CHECK3-NEXT:    [[DIV:%.*]] = sdiv i32 [[TMP0]], 128
@@ -649,7 +651,6 @@ int main (int argc, char **argv) {
 // CHECK3-NEXT:    [[TMP26:%.*]] = zext i32 [[ADD]] to i64
 // CHECK3-NEXT:    [[TMP27:%.*]] = insertvalue [3 x i32] zeroinitializer, i32 [[TMP21]], 0
 // CHECK3-NEXT:    [[TMP28:%.*]] = insertvalue [3 x i32] zeroinitializer, i32 [[TMP22]], 0
-// CHECK3-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK3-NEXT:    [[TMP29:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK3-NEXT:    store i32 2, ptr [[TMP29]], align 4
 // CHECK3-NEXT:    [[TMP30:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -710,7 +711,6 @@ int main (int argc, char **argv) {
 // CHECK3-NEXT:    [[TMP56:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_10]], align 4
 // CHECK3-NEXT:    [[ADD14:%.*]] = add nsw i32 [[TMP56]], 1
 // CHECK3-NEXT:    [[TMP57:%.*]] = zext i32 [[ADD14]] to i64
-// CHECK3-NEXT:    [[KERNEL_ARGS15:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK3-NEXT:    [[TMP58:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS15]], i32 0, i32 0
 // CHECK3-NEXT:    store i32 2, ptr [[TMP58]], align 4
 // CHECK3-NEXT:    [[TMP59:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS15]], i32 0, i32 1
@@ -983,6 +983,7 @@ int main (int argc, char **argv) {
 // CHECK9-NEXT:    [[TMP:%.*]] = alloca i32, align 4
 // CHECK9-NEXT:    [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
 // CHECK9-NEXT:    [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4
+// CHECK9-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK9-NEXT:    store i32 100, ptr [[N]], align 4
 // CHECK9-NEXT:    [[TMP0:%.*]] = load i32, ptr [[N]], align 4
 // CHECK9-NEXT:    [[TMP1:%.*]] = zext i32 [[TMP0]] to i64
@@ -1028,7 +1029,6 @@ int main (int argc, char **argv) {
 // CHECK9-NEXT:    [[TMP21:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
 // CHECK9-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP21]], 1
 // CHECK9-NEXT:    [[TMP22:%.*]] = zext i32 [[ADD]] to i64
-// CHECK9-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK9-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK9-NEXT:    store i32 2, ptr [[TMP23]], align 4
 // CHECK9-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -1200,6 +1200,7 @@ int main (int argc, char **argv) {
 // CHECK11-NEXT:    [[TMP:%.*]] = alloca i32, align 4
 // CHECK11-NEXT:    [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
 // CHECK11-NEXT:    [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4
+// CHECK11-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK11-NEXT:    store i32 100, ptr [[N]], align 4
 // CHECK11-NEXT:    [[TMP0:%.*]] = load i32, ptr [[N]], align 4
 // CHECK11-NEXT:    [[TMP1:%.*]] = call ptr @llvm.stacksave()
@@ -1245,7 +1246,6 @@ int main (int argc, char **argv) {
 // CHECK11-NEXT:    [[TMP21:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
 // CHECK11-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP21]], 1
 // CHECK11-NEXT:    [[TMP22:%.*]] = zext i32 [[ADD]] to i64
-// CHECK11-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK11-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK11-NEXT:    store i32 2, ptr [[TMP23]], align 4
 // CHECK11-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -1418,6 +1418,7 @@ int main (int argc, char **argv) {
 // CHECK17-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [1 x ptr], align 8
 // CHECK17-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [1 x ptr], align 8
 // CHECK17-NEXT:    [[TMP:%.*]] = alloca i32, align 4
+// CHECK17-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK17-NEXT:    store ptr [[THIS]], ptr [[THIS_ADDR]], align 8
 // CHECK17-NEXT:    [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8
 // CHECK17-NEXT:    [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[THIS1]], i32 0, i32 0
@@ -1429,7 +1430,6 @@ int main (int argc, char **argv) {
 // CHECK17-NEXT:    store ptr null, ptr [[TMP2]], align 8
 // CHECK17-NEXT:    [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK17-NEXT:    [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK17-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK17-NEXT:    [[TMP5:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK17-NEXT:    store i32 2, ptr [[TMP5]], align 4
 // CHECK17-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -1570,6 +1570,7 @@ int main (int argc, char **argv) {
 // CHECK19-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [1 x ptr], align 4
 // CHECK19-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [1 x ptr], align 4
 // CHECK19-NEXT:    [[TMP:%.*]] = alloca i32, align 4
+// CHECK19-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK19-NEXT:    store ptr [[THIS]], ptr [[THIS_ADDR]], align 4
 // CHECK19-NEXT:    [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4
 // CHECK19-NEXT:    [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[THIS1]], i32 0, i32 0
@@ -1581,7 +1582,6 @@ int main (int argc, char **argv) {
 // CHECK19-NEXT:    store ptr null, ptr [[TMP2]], align 4
 // CHECK19-NEXT:    [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK19-NEXT:    [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK19-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK19-NEXT:    [[TMP5:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK19-NEXT:    store i32 2, ptr [[TMP5]], align 4
 // CHECK19-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -1722,6 +1722,7 @@ int main (int argc, char **argv) {
 // CHECK25-NEXT:    [[TMP:%.*]] = alloca i32, align 4
 // CHECK25-NEXT:    [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
 // CHECK25-NEXT:    [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4
+// CHECK25-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK25-NEXT:    store i32 0, ptr [[RETVAL]], align 4
 // CHECK25-NEXT:    store i32 [[ARGC]], ptr [[ARGC_ADDR]], align 4
 // CHECK25-NEXT:    store ptr [[ARGV]], ptr [[ARGV_ADDR]], align 8
@@ -1770,7 +1771,6 @@ int main (int argc, char **argv) {
 // CHECK25-NEXT:    [[TMP21:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
 // CHECK25-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP21]], 1
 // CHECK25-NEXT:    [[TMP22:%.*]] = zext i32 [[ADD]] to i64
-// CHECK25-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK25-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK25-NEXT:    store i32 2, ptr [[TMP23]], align 4
 // CHECK25-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -1936,6 +1936,7 @@ int main (int argc, char **argv) {
 // CHECK25-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [3 x ptr], align 8
 // CHECK25-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [3 x ptr], align 8
 // CHECK25-NEXT:    [[TMP:%.*]] = alloca i32, align 4
+// CHECK25-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK25-NEXT:    store i32 [[ARGC]], ptr [[ARGC_ADDR]], align 4
 // CHECK25-NEXT:    store i32 0, ptr [[TE]], align 4
 // CHECK25-NEXT:    store i32 128, ptr [[TH]], align 4
@@ -1969,7 +1970,6 @@ int main (int argc, char **argv) {
 // CHECK25-NEXT:    [[TMP16:%.*]] = load i32, ptr [[TH]], align 4
 // CHECK25-NEXT:    [[TMP17:%.*]] = insertvalue [3 x i32] zeroinitializer, i32 [[TMP15]], 0
 // CHECK25-NEXT:    [[TMP18:%.*]] = insertvalue [3 x i32] zeroinitializer, i32 [[TMP16]], 0
-// CHECK25-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK25-NEXT:    [[TMP19:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK25-NEXT:    store i32 2, ptr [[TMP19]], align 4
 // CHECK25-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -2115,6 +2115,7 @@ int main (int argc, char **argv) {
 // CHECK27-NEXT:    [[TMP:%.*]] = alloca i32, align 4
 // CHECK27-NEXT:    [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
 // CHECK27-NEXT:    [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4
+// CHECK27-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK27-NEXT:    store i32 0, ptr [[RETVAL]], align 4
 // CHECK27-NEXT:    store i32 [[ARGC]], ptr [[ARGC_ADDR]], align 4
 // CHECK27-NEXT:    store ptr [[ARGV]], ptr [[ARGV_ADDR]], align 4
@@ -2163,7 +2164,6 @@ int main (int argc, char **argv) {
 // CHECK27-NEXT:    [[TMP21:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
 // CHECK27-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP21]], 1
 // CHECK27-NEXT:    [[TMP22:%.*]] = zext i32 [[ADD]] to i64
-// CHECK27-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK27-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK27-NEXT:    store i32 2, ptr [[TMP23]], align 4
 // CHECK27-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -2328,6 +2328,7 @@ int main (int argc, char **argv) {
 // CHECK27-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [3 x ptr], align 4
 // CHECK27-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [3 x ptr], align 4
 // CHECK27-NEXT:    [[TMP:%.*]] = alloca i32, align 4
+// CHECK27-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK27-NEXT:    store i32 [[ARGC]], ptr [[ARGC_ADDR]], align 4
 // CHECK27-NEXT:    store i32 0, ptr [[TE]], align 4
 // CHECK27-NEXT:    store i32 128, ptr [[TH]], align 4
@@ -2361,7 +2362,6 @@ int main (int argc, char **argv) {
 // CHECK27-NEXT:    [[TMP16:%.*]] = load i32, ptr [[TH]], align 4
 // CHECK27-NEXT:    [[TMP17:%.*]] = insertvalue [3 x i32] zeroinitializer, i32 [[TMP15]], 0
 // CHECK27-NEXT:    [[TMP18:%.*]] = insertvalue [3 x i32] zeroinitializer, i32 [[TMP16]], 0
-// CHECK27-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK27-NEXT:    [[TMP19:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK27-NEXT:    store i32 2, ptr [[TMP19]], align 4
 // CHECK27-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1

diff  --git a/clang/test/OpenMP/teams_distribute_collapse_codegen.cpp b/clang/test/OpenMP/teams_distribute_collapse_codegen.cpp
index 256c823223aa3..98156e8a99f8b 100644
--- a/clang/test/OpenMP/teams_distribute_collapse_codegen.cpp
+++ b/clang/test/OpenMP/teams_distribute_collapse_codegen.cpp
@@ -114,6 +114,7 @@ int main (int argc, char **argv) {
 // CHECK1-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [1 x ptr], align 8
 // CHECK1-NEXT:    [[TMP:%.*]] = alloca i32, align 4
 // CHECK1-NEXT:    [[_TMP2:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK1-NEXT:    store ptr [[THIS]], ptr [[THIS_ADDR]], align 8
 // CHECK1-NEXT:    [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8
 // CHECK1-NEXT:    [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[THIS1]], i32 0, i32 0
@@ -125,7 +126,6 @@ int main (int argc, char **argv) {
 // CHECK1-NEXT:    store ptr null, ptr [[TMP2]], align 8
 // CHECK1-NEXT:    [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK1-NEXT:    [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK1-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK1-NEXT:    [[TMP5:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK1-NEXT:    store i32 2, ptr [[TMP5]], align 4
 // CHECK1-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -282,6 +282,7 @@ int main (int argc, char **argv) {
 // CHECK3-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [1 x ptr], align 4
 // CHECK3-NEXT:    [[TMP:%.*]] = alloca i32, align 4
 // CHECK3-NEXT:    [[_TMP2:%.*]] = alloca i32, align 4
+// CHECK3-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK3-NEXT:    store ptr [[THIS]], ptr [[THIS_ADDR]], align 4
 // CHECK3-NEXT:    [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4
 // CHECK3-NEXT:    [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[THIS1]], i32 0, i32 0
@@ -293,7 +294,6 @@ int main (int argc, char **argv) {
 // CHECK3-NEXT:    store ptr null, ptr [[TMP2]], align 4
 // CHECK3-NEXT:    [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK3-NEXT:    [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK3-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK3-NEXT:    [[TMP5:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK3-NEXT:    store i32 2, ptr [[TMP5]], align 4
 // CHECK3-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -453,6 +453,7 @@ int main (int argc, char **argv) {
 // CHECK9-NEXT:    [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
 // CHECK9-NEXT:    [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4
 // CHECK9-NEXT:    [[DOTCAPTURE_EXPR_3:%.*]] = alloca i64, align 8
+// CHECK9-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK9-NEXT:    store i32 0, ptr [[RETVAL]], align 4
 // CHECK9-NEXT:    store i32 [[ARGC]], ptr [[ARGC_ADDR]], align 4
 // CHECK9-NEXT:    store ptr [[ARGV]], ptr [[ARGV_ADDR]], align 8
@@ -529,7 +530,6 @@ int main (int argc, char **argv) {
 // CHECK9-NEXT:    store i64 [[SUB7]], ptr [[DOTCAPTURE_EXPR_3]], align 8
 // CHECK9-NEXT:    [[TMP35:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_3]], align 8
 // CHECK9-NEXT:    [[ADD:%.*]] = add nsw i64 [[TMP35]], 1
-// CHECK9-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK9-NEXT:    [[TMP36:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK9-NEXT:    store i32 2, ptr [[TMP36]], align 4
 // CHECK9-NEXT:    [[TMP37:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -750,6 +750,7 @@ int main (int argc, char **argv) {
 // CHECK9-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [1 x ptr], align 8
 // CHECK9-NEXT:    [[TMP:%.*]] = alloca i32, align 4
 // CHECK9-NEXT:    [[_TMP1:%.*]] = alloca i32, align 4
+// CHECK9-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK9-NEXT:    store i32 [[ARGC]], ptr [[ARGC_ADDR]], align 4
 // CHECK9-NEXT:    [[TMP0:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK9-NEXT:    store ptr [[A]], ptr [[TMP0]], align 8
@@ -759,7 +760,6 @@ int main (int argc, char **argv) {
 // CHECK9-NEXT:    store ptr null, ptr [[TMP2]], align 8
 // CHECK9-NEXT:    [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK9-NEXT:    [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK9-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK9-NEXT:    [[TMP5:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK9-NEXT:    store i32 2, ptr [[TMP5]], align 4
 // CHECK9-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -916,6 +916,7 @@ int main (int argc, char **argv) {
 // CHECK11-NEXT:    [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
 // CHECK11-NEXT:    [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4
 // CHECK11-NEXT:    [[DOTCAPTURE_EXPR_3:%.*]] = alloca i64, align 8
+// CHECK11-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK11-NEXT:    store i32 0, ptr [[RETVAL]], align 4
 // CHECK11-NEXT:    store i32 [[ARGC]], ptr [[ARGC_ADDR]], align 4
 // CHECK11-NEXT:    store ptr [[ARGV]], ptr [[ARGV_ADDR]], align 4
@@ -991,7 +992,6 @@ int main (int argc, char **argv) {
 // CHECK11-NEXT:    store i64 [[SUB7]], ptr [[DOTCAPTURE_EXPR_3]], align 8
 // CHECK11-NEXT:    [[TMP34:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_3]], align 8
 // CHECK11-NEXT:    [[ADD:%.*]] = add nsw i64 [[TMP34]], 1
-// CHECK11-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK11-NEXT:    [[TMP35:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK11-NEXT:    store i32 2, ptr [[TMP35]], align 4
 // CHECK11-NEXT:    [[TMP36:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -1210,6 +1210,7 @@ int main (int argc, char **argv) {
 // CHECK11-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [1 x ptr], align 4
 // CHECK11-NEXT:    [[TMP:%.*]] = alloca i32, align 4
 // CHECK11-NEXT:    [[_TMP1:%.*]] = alloca i32, align 4
+// CHECK11-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK11-NEXT:    store i32 [[ARGC]], ptr [[ARGC_ADDR]], align 4
 // CHECK11-NEXT:    [[TMP0:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK11-NEXT:    store ptr [[A]], ptr [[TMP0]], align 4
@@ -1219,7 +1220,6 @@ int main (int argc, char **argv) {
 // CHECK11-NEXT:    store ptr null, ptr [[TMP2]], align 4
 // CHECK11-NEXT:    [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK11-NEXT:    [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK11-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK11-NEXT:    [[TMP5:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK11-NEXT:    store i32 2, ptr [[TMP5]], align 4
 // CHECK11-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1

diff  --git a/clang/test/OpenMP/teams_distribute_dist_schedule_codegen.cpp b/clang/test/OpenMP/teams_distribute_dist_schedule_codegen.cpp
index 94e47786fd9f7..d0781b8ba88c6 100644
--- a/clang/test/OpenMP/teams_distribute_dist_schedule_codegen.cpp
+++ b/clang/test/OpenMP/teams_distribute_dist_schedule_codegen.cpp
@@ -147,14 +147,17 @@ int main (int argc, char **argv) {
 // CHECK1-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [1 x ptr], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [1 x ptr], align 8
 // CHECK1-NEXT:    [[TMP:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_BASEPTRS3:%.*]] = alloca [1 x ptr], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_PTRS4:%.*]] = alloca [1 x ptr], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_MAPPERS5:%.*]] = alloca [1 x ptr], align 8
 // CHECK1-NEXT:    [[_TMP6:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[KERNEL_ARGS7:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_BASEPTRS11:%.*]] = alloca [1 x ptr], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_PTRS12:%.*]] = alloca [1 x ptr], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_MAPPERS13:%.*]] = alloca [1 x ptr], align 8
 // CHECK1-NEXT:    [[_TMP14:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[KERNEL_ARGS15:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK1-NEXT:    store ptr [[THIS]], ptr [[THIS_ADDR]], align 8
 // CHECK1-NEXT:    [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8
 // CHECK1-NEXT:    [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[THIS1]], i32 0, i32 0
@@ -166,7 +169,6 @@ int main (int argc, char **argv) {
 // CHECK1-NEXT:    store ptr null, ptr [[TMP2]], align 8
 // CHECK1-NEXT:    [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK1-NEXT:    [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK1-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK1-NEXT:    [[TMP5:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK1-NEXT:    store i32 2, ptr [[TMP5]], align 4
 // CHECK1-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -209,7 +211,6 @@ int main (int argc, char **argv) {
 // CHECK1-NEXT:    store ptr null, ptr [[TMP22]], align 8
 // CHECK1-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS3]], i32 0, i32 0
 // CHECK1-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS4]], i32 0, i32 0
-// CHECK1-NEXT:    [[KERNEL_ARGS7:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK1-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS7]], i32 0, i32 0
 // CHECK1-NEXT:    store i32 2, ptr [[TMP25]], align 4
 // CHECK1-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS7]], i32 0, i32 1
@@ -252,7 +253,6 @@ int main (int argc, char **argv) {
 // CHECK1-NEXT:    store ptr null, ptr [[TMP42]], align 8
 // CHECK1-NEXT:    [[TMP43:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS11]], i32 0, i32 0
 // CHECK1-NEXT:    [[TMP44:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS12]], i32 0, i32 0
-// CHECK1-NEXT:    [[KERNEL_ARGS15:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK1-NEXT:    [[TMP45:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS15]], i32 0, i32 0
 // CHECK1-NEXT:    store i32 2, ptr [[TMP45]], align 4
 // CHECK1-NEXT:    [[TMP46:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS15]], i32 0, i32 1
@@ -566,14 +566,17 @@ int main (int argc, char **argv) {
 // CHECK3-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [1 x ptr], align 4
 // CHECK3-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [1 x ptr], align 4
 // CHECK3-NEXT:    [[TMP:%.*]] = alloca i32, align 4
+// CHECK3-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK3-NEXT:    [[DOTOFFLOAD_BASEPTRS3:%.*]] = alloca [1 x ptr], align 4
 // CHECK3-NEXT:    [[DOTOFFLOAD_PTRS4:%.*]] = alloca [1 x ptr], align 4
 // CHECK3-NEXT:    [[DOTOFFLOAD_MAPPERS5:%.*]] = alloca [1 x ptr], align 4
 // CHECK3-NEXT:    [[_TMP6:%.*]] = alloca i32, align 4
+// CHECK3-NEXT:    [[KERNEL_ARGS7:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK3-NEXT:    [[DOTOFFLOAD_BASEPTRS11:%.*]] = alloca [1 x ptr], align 4
 // CHECK3-NEXT:    [[DOTOFFLOAD_PTRS12:%.*]] = alloca [1 x ptr], align 4
 // CHECK3-NEXT:    [[DOTOFFLOAD_MAPPERS13:%.*]] = alloca [1 x ptr], align 4
 // CHECK3-NEXT:    [[_TMP14:%.*]] = alloca i32, align 4
+// CHECK3-NEXT:    [[KERNEL_ARGS15:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK3-NEXT:    store ptr [[THIS]], ptr [[THIS_ADDR]], align 4
 // CHECK3-NEXT:    [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4
 // CHECK3-NEXT:    [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[THIS1]], i32 0, i32 0
@@ -585,7 +588,6 @@ int main (int argc, char **argv) {
 // CHECK3-NEXT:    store ptr null, ptr [[TMP2]], align 4
 // CHECK3-NEXT:    [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK3-NEXT:    [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK3-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK3-NEXT:    [[TMP5:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK3-NEXT:    store i32 2, ptr [[TMP5]], align 4
 // CHECK3-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -628,7 +630,6 @@ int main (int argc, char **argv) {
 // CHECK3-NEXT:    store ptr null, ptr [[TMP22]], align 4
 // CHECK3-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS3]], i32 0, i32 0
 // CHECK3-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS4]], i32 0, i32 0
-// CHECK3-NEXT:    [[KERNEL_ARGS7:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK3-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS7]], i32 0, i32 0
 // CHECK3-NEXT:    store i32 2, ptr [[TMP25]], align 4
 // CHECK3-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS7]], i32 0, i32 1
@@ -671,7 +672,6 @@ int main (int argc, char **argv) {
 // CHECK3-NEXT:    store ptr null, ptr [[TMP42]], align 4
 // CHECK3-NEXT:    [[TMP43:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS11]], i32 0, i32 0
 // CHECK3-NEXT:    [[TMP44:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS12]], i32 0, i32 0
-// CHECK3-NEXT:    [[KERNEL_ARGS15:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK3-NEXT:    [[TMP45:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS15]], i32 0, i32 0
 // CHECK3-NEXT:    store i32 2, ptr [[TMP45]], align 4
 // CHECK3-NEXT:    [[TMP46:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS15]], i32 0, i32 1
@@ -983,6 +983,7 @@ int main (int argc, char **argv) {
 // CHECK9-NEXT:    [[TMP:%.*]] = alloca i32, align 4
 // CHECK9-NEXT:    [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
 // CHECK9-NEXT:    [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4
+// CHECK9-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK9-NEXT:    [[N_CASTED3:%.*]] = alloca i64, align 8
 // CHECK9-NEXT:    [[DOTOFFLOAD_BASEPTRS4:%.*]] = alloca [3 x ptr], align 8
 // CHECK9-NEXT:    [[DOTOFFLOAD_PTRS5:%.*]] = alloca [3 x ptr], align 8
@@ -991,6 +992,7 @@ int main (int argc, char **argv) {
 // CHECK9-NEXT:    [[_TMP8:%.*]] = alloca i32, align 4
 // CHECK9-NEXT:    [[DOTCAPTURE_EXPR_9:%.*]] = alloca i32, align 4
 // CHECK9-NEXT:    [[DOTCAPTURE_EXPR_10:%.*]] = alloca i32, align 4
+// CHECK9-NEXT:    [[KERNEL_ARGS15:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK9-NEXT:    [[N_CASTED18:%.*]] = alloca i64, align 8
 // CHECK9-NEXT:    [[DOTOFFLOAD_BASEPTRS19:%.*]] = alloca [3 x ptr], align 8
 // CHECK9-NEXT:    [[DOTOFFLOAD_PTRS20:%.*]] = alloca [3 x ptr], align 8
@@ -999,6 +1001,7 @@ int main (int argc, char **argv) {
 // CHECK9-NEXT:    [[_TMP23:%.*]] = alloca i32, align 4
 // CHECK9-NEXT:    [[DOTCAPTURE_EXPR_24:%.*]] = alloca i32, align 4
 // CHECK9-NEXT:    [[DOTCAPTURE_EXPR_25:%.*]] = alloca i32, align 4
+// CHECK9-NEXT:    [[KERNEL_ARGS30:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK9-NEXT:    store i32 0, ptr [[RETVAL]], align 4
 // CHECK9-NEXT:    store i32 [[ARGC]], ptr [[ARGC_ADDR]], align 4
 // CHECK9-NEXT:    store ptr [[ARGV]], ptr [[ARGV_ADDR]], align 8
@@ -1047,7 +1050,6 @@ int main (int argc, char **argv) {
 // CHECK9-NEXT:    [[TMP21:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
 // CHECK9-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP21]], 1
 // CHECK9-NEXT:    [[TMP22:%.*]] = zext i32 [[ADD]] to i64
-// CHECK9-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK9-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK9-NEXT:    store i32 2, ptr [[TMP23]], align 4
 // CHECK9-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -1119,7 +1121,6 @@ int main (int argc, char **argv) {
 // CHECK9-NEXT:    [[TMP56:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_10]], align 4
 // CHECK9-NEXT:    [[ADD14:%.*]] = add nsw i32 [[TMP56]], 1
 // CHECK9-NEXT:    [[TMP57:%.*]] = zext i32 [[ADD14]] to i64
-// CHECK9-NEXT:    [[KERNEL_ARGS15:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK9-NEXT:    [[TMP58:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS15]], i32 0, i32 0
 // CHECK9-NEXT:    store i32 2, ptr [[TMP58]], align 4
 // CHECK9-NEXT:    [[TMP59:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS15]], i32 0, i32 1
@@ -1191,7 +1192,6 @@ int main (int argc, char **argv) {
 // CHECK9-NEXT:    [[TMP91:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_25]], align 4
 // CHECK9-NEXT:    [[ADD29:%.*]] = add nsw i32 [[TMP91]], 1
 // CHECK9-NEXT:    [[TMP92:%.*]] = zext i32 [[ADD29]] to i64
-// CHECK9-NEXT:    [[KERNEL_ARGS30:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK9-NEXT:    [[TMP93:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS30]], i32 0, i32 0
 // CHECK9-NEXT:    store i32 2, ptr [[TMP93]], align 4
 // CHECK9-NEXT:    [[TMP94:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS30]], i32 0, i32 1
@@ -1600,14 +1600,17 @@ int main (int argc, char **argv) {
 // CHECK9-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [1 x ptr], align 8
 // CHECK9-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [1 x ptr], align 8
 // CHECK9-NEXT:    [[TMP:%.*]] = alloca i32, align 4
+// CHECK9-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK9-NEXT:    [[DOTOFFLOAD_BASEPTRS1:%.*]] = alloca [1 x ptr], align 8
 // CHECK9-NEXT:    [[DOTOFFLOAD_PTRS2:%.*]] = alloca [1 x ptr], align 8
 // CHECK9-NEXT:    [[DOTOFFLOAD_MAPPERS3:%.*]] = alloca [1 x ptr], align 8
 // CHECK9-NEXT:    [[_TMP4:%.*]] = alloca i32, align 4
+// CHECK9-NEXT:    [[KERNEL_ARGS5:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK9-NEXT:    [[DOTOFFLOAD_BASEPTRS8:%.*]] = alloca [1 x ptr], align 8
 // CHECK9-NEXT:    [[DOTOFFLOAD_PTRS9:%.*]] = alloca [1 x ptr], align 8
 // CHECK9-NEXT:    [[DOTOFFLOAD_MAPPERS10:%.*]] = alloca [1 x ptr], align 8
 // CHECK9-NEXT:    [[_TMP11:%.*]] = alloca i32, align 4
+// CHECK9-NEXT:    [[KERNEL_ARGS12:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK9-NEXT:    store i32 [[ARGC]], ptr [[ARGC_ADDR]], align 4
 // CHECK9-NEXT:    [[TMP0:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK9-NEXT:    store ptr [[A]], ptr [[TMP0]], align 8
@@ -1617,7 +1620,6 @@ int main (int argc, char **argv) {
 // CHECK9-NEXT:    store ptr null, ptr [[TMP2]], align 8
 // CHECK9-NEXT:    [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK9-NEXT:    [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK9-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK9-NEXT:    [[TMP5:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK9-NEXT:    store i32 2, ptr [[TMP5]], align 4
 // CHECK9-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -1659,7 +1661,6 @@ int main (int argc, char **argv) {
 // CHECK9-NEXT:    store ptr null, ptr [[TMP22]], align 8
 // CHECK9-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS1]], i32 0, i32 0
 // CHECK9-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS2]], i32 0, i32 0
-// CHECK9-NEXT:    [[KERNEL_ARGS5:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK9-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 0
 // CHECK9-NEXT:    store i32 2, ptr [[TMP25]], align 4
 // CHECK9-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 1
@@ -1701,7 +1702,6 @@ int main (int argc, char **argv) {
 // CHECK9-NEXT:    store ptr null, ptr [[TMP42]], align 8
 // CHECK9-NEXT:    [[TMP43:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS8]], i32 0, i32 0
 // CHECK9-NEXT:    [[TMP44:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS9]], i32 0, i32 0
-// CHECK9-NEXT:    [[KERNEL_ARGS12:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK9-NEXT:    [[TMP45:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS12]], i32 0, i32 0
 // CHECK9-NEXT:    store i32 2, ptr [[TMP45]], align 4
 // CHECK9-NEXT:    [[TMP46:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS12]], i32 0, i32 1
@@ -2010,6 +2010,7 @@ int main (int argc, char **argv) {
 // CHECK11-NEXT:    [[TMP:%.*]] = alloca i32, align 4
 // CHECK11-NEXT:    [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
 // CHECK11-NEXT:    [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4
+// CHECK11-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK11-NEXT:    [[N_CASTED3:%.*]] = alloca i32, align 4
 // CHECK11-NEXT:    [[DOTOFFLOAD_BASEPTRS4:%.*]] = alloca [3 x ptr], align 4
 // CHECK11-NEXT:    [[DOTOFFLOAD_PTRS5:%.*]] = alloca [3 x ptr], align 4
@@ -2018,6 +2019,7 @@ int main (int argc, char **argv) {
 // CHECK11-NEXT:    [[_TMP8:%.*]] = alloca i32, align 4
 // CHECK11-NEXT:    [[DOTCAPTURE_EXPR_9:%.*]] = alloca i32, align 4
 // CHECK11-NEXT:    [[DOTCAPTURE_EXPR_10:%.*]] = alloca i32, align 4
+// CHECK11-NEXT:    [[KERNEL_ARGS15:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK11-NEXT:    [[N_CASTED18:%.*]] = alloca i32, align 4
 // CHECK11-NEXT:    [[DOTOFFLOAD_BASEPTRS19:%.*]] = alloca [3 x ptr], align 4
 // CHECK11-NEXT:    [[DOTOFFLOAD_PTRS20:%.*]] = alloca [3 x ptr], align 4
@@ -2026,6 +2028,7 @@ int main (int argc, char **argv) {
 // CHECK11-NEXT:    [[_TMP23:%.*]] = alloca i32, align 4
 // CHECK11-NEXT:    [[DOTCAPTURE_EXPR_24:%.*]] = alloca i32, align 4
 // CHECK11-NEXT:    [[DOTCAPTURE_EXPR_25:%.*]] = alloca i32, align 4
+// CHECK11-NEXT:    [[KERNEL_ARGS30:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK11-NEXT:    store i32 0, ptr [[RETVAL]], align 4
 // CHECK11-NEXT:    store i32 [[ARGC]], ptr [[ARGC_ADDR]], align 4
 // CHECK11-NEXT:    store ptr [[ARGV]], ptr [[ARGV_ADDR]], align 4
@@ -2074,7 +2077,6 @@ int main (int argc, char **argv) {
 // CHECK11-NEXT:    [[TMP21:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
 // CHECK11-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP21]], 1
 // CHECK11-NEXT:    [[TMP22:%.*]] = zext i32 [[ADD]] to i64
-// CHECK11-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK11-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK11-NEXT:    store i32 2, ptr [[TMP23]], align 4
 // CHECK11-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -2147,7 +2149,6 @@ int main (int argc, char **argv) {
 // CHECK11-NEXT:    [[TMP57:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_10]], align 4
 // CHECK11-NEXT:    [[ADD14:%.*]] = add nsw i32 [[TMP57]], 1
 // CHECK11-NEXT:    [[TMP58:%.*]] = zext i32 [[ADD14]] to i64
-// CHECK11-NEXT:    [[KERNEL_ARGS15:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK11-NEXT:    [[TMP59:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS15]], i32 0, i32 0
 // CHECK11-NEXT:    store i32 2, ptr [[TMP59]], align 4
 // CHECK11-NEXT:    [[TMP60:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS15]], i32 0, i32 1
@@ -2220,7 +2221,6 @@ int main (int argc, char **argv) {
 // CHECK11-NEXT:    [[TMP93:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_25]], align 4
 // CHECK11-NEXT:    [[ADD29:%.*]] = add nsw i32 [[TMP93]], 1
 // CHECK11-NEXT:    [[TMP94:%.*]] = zext i32 [[ADD29]] to i64
-// CHECK11-NEXT:    [[KERNEL_ARGS30:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK11-NEXT:    [[TMP95:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS30]], i32 0, i32 0
 // CHECK11-NEXT:    store i32 2, ptr [[TMP95]], align 4
 // CHECK11-NEXT:    [[TMP96:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS30]], i32 0, i32 1
@@ -2626,14 +2626,17 @@ int main (int argc, char **argv) {
 // CHECK11-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [1 x ptr], align 4
 // CHECK11-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [1 x ptr], align 4
 // CHECK11-NEXT:    [[TMP:%.*]] = alloca i32, align 4
+// CHECK11-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK11-NEXT:    [[DOTOFFLOAD_BASEPTRS1:%.*]] = alloca [1 x ptr], align 4
 // CHECK11-NEXT:    [[DOTOFFLOAD_PTRS2:%.*]] = alloca [1 x ptr], align 4
 // CHECK11-NEXT:    [[DOTOFFLOAD_MAPPERS3:%.*]] = alloca [1 x ptr], align 4
 // CHECK11-NEXT:    [[_TMP4:%.*]] = alloca i32, align 4
+// CHECK11-NEXT:    [[KERNEL_ARGS5:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK11-NEXT:    [[DOTOFFLOAD_BASEPTRS8:%.*]] = alloca [1 x ptr], align 4
 // CHECK11-NEXT:    [[DOTOFFLOAD_PTRS9:%.*]] = alloca [1 x ptr], align 4
 // CHECK11-NEXT:    [[DOTOFFLOAD_MAPPERS10:%.*]] = alloca [1 x ptr], align 4
 // CHECK11-NEXT:    [[_TMP11:%.*]] = alloca i32, align 4
+// CHECK11-NEXT:    [[KERNEL_ARGS12:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK11-NEXT:    store i32 [[ARGC]], ptr [[ARGC_ADDR]], align 4
 // CHECK11-NEXT:    [[TMP0:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK11-NEXT:    store ptr [[A]], ptr [[TMP0]], align 4
@@ -2643,7 +2646,6 @@ int main (int argc, char **argv) {
 // CHECK11-NEXT:    store ptr null, ptr [[TMP2]], align 4
 // CHECK11-NEXT:    [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK11-NEXT:    [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK11-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK11-NEXT:    [[TMP5:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK11-NEXT:    store i32 2, ptr [[TMP5]], align 4
 // CHECK11-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -2685,7 +2687,6 @@ int main (int argc, char **argv) {
 // CHECK11-NEXT:    store ptr null, ptr [[TMP22]], align 4
 // CHECK11-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS1]], i32 0, i32 0
 // CHECK11-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS2]], i32 0, i32 0
-// CHECK11-NEXT:    [[KERNEL_ARGS5:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK11-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 0
 // CHECK11-NEXT:    store i32 2, ptr [[TMP25]], align 4
 // CHECK11-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 1
@@ -2727,7 +2728,6 @@ int main (int argc, char **argv) {
 // CHECK11-NEXT:    store ptr null, ptr [[TMP42]], align 4
 // CHECK11-NEXT:    [[TMP43:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS8]], i32 0, i32 0
 // CHECK11-NEXT:    [[TMP44:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS9]], i32 0, i32 0
-// CHECK11-NEXT:    [[KERNEL_ARGS12:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK11-NEXT:    [[TMP45:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS12]], i32 0, i32 0
 // CHECK11-NEXT:    store i32 2, ptr [[TMP45]], align 4
 // CHECK11-NEXT:    [[TMP46:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS12]], i32 0, i32 1

diff  --git a/clang/test/OpenMP/teams_distribute_firstprivate_codegen.cpp b/clang/test/OpenMP/teams_distribute_firstprivate_codegen.cpp
index 3f07941cc1b57..571206b301715 100644
--- a/clang/test/OpenMP/teams_distribute_firstprivate_codegen.cpp
+++ b/clang/test/OpenMP/teams_distribute_firstprivate_codegen.cpp
@@ -265,6 +265,7 @@ int main() {
 // CHECK1-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [5 x ptr], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [5 x ptr], align 8
 // CHECK1-NEXT:    [[TMP:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK1-NEXT:    store i32 0, ptr [[RETVAL]], align 4
 // CHECK1-NEXT:    [[TMP0:%.*]] = load i32, ptr @t_var, align 4
 // CHECK1-NEXT:    store i32 [[TMP0]], ptr [[T_VAR_CASTED]], align 4
@@ -304,7 +305,6 @@ int main() {
 // CHECK1-NEXT:    store ptr null, ptr [[TMP18]], align 8
 // CHECK1-NEXT:    [[TMP19:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK1-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK1-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK1-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK1-NEXT:    store i32 2, ptr [[TMP21]], align 4
 // CHECK1-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -545,6 +545,7 @@ int main() {
 // CHECK1-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [4 x ptr], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [4 x ptr], align 8
 // CHECK1-NEXT:    [[_TMP1:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK1-NEXT:    call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[TEST]])
 // CHECK1-NEXT:    store i32 0, ptr [[T_VAR]], align 4
 // CHECK1-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VEC]], ptr align 4 @__const._Z5tmainIiET_v.vec, i64 8, i1 false)
@@ -587,7 +588,6 @@ int main() {
 // CHECK1-NEXT:    store ptr null, ptr [[TMP17]], align 8
 // CHECK1-NEXT:    [[TMP18:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK1-NEXT:    [[TMP19:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK1-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK1-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK1-NEXT:    store i32 2, ptr [[TMP20]], align 4
 // CHECK1-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -1076,6 +1076,7 @@ int main() {
 // CHECK3-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [5 x ptr], align 4
 // CHECK3-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [5 x ptr], align 4
 // CHECK3-NEXT:    [[TMP:%.*]] = alloca i32, align 4
+// CHECK3-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK3-NEXT:    store i32 0, ptr [[RETVAL]], align 4
 // CHECK3-NEXT:    [[TMP0:%.*]] = load i32, ptr @t_var, align 4
 // CHECK3-NEXT:    store i32 [[TMP0]], ptr [[T_VAR_CASTED]], align 4
@@ -1115,7 +1116,6 @@ int main() {
 // CHECK3-NEXT:    store ptr null, ptr [[TMP18]], align 4
 // CHECK3-NEXT:    [[TMP19:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK3-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK3-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK3-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK3-NEXT:    store i32 2, ptr [[TMP21]], align 4
 // CHECK3-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -1354,6 +1354,7 @@ int main() {
 // CHECK3-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [4 x ptr], align 4
 // CHECK3-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [4 x ptr], align 4
 // CHECK3-NEXT:    [[_TMP1:%.*]] = alloca i32, align 4
+// CHECK3-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK3-NEXT:    call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[TEST]])
 // CHECK3-NEXT:    store i32 0, ptr [[T_VAR]], align 4
 // CHECK3-NEXT:    call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[VEC]], ptr align 4 @__const._Z5tmainIiET_v.vec, i32 8, i1 false)
@@ -1396,7 +1397,6 @@ int main() {
 // CHECK3-NEXT:    store ptr null, ptr [[TMP17]], align 4
 // CHECK3-NEXT:    [[TMP18:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK3-NEXT:    [[TMP19:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK3-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK3-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK3-NEXT:    store i32 2, ptr [[TMP20]], align 4
 // CHECK3-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1

diff  --git a/clang/test/OpenMP/teams_distribute_lastprivate_codegen.cpp b/clang/test/OpenMP/teams_distribute_lastprivate_codegen.cpp
index a3c4723bad37b..ff70b5fab6114 100644
--- a/clang/test/OpenMP/teams_distribute_lastprivate_codegen.cpp
+++ b/clang/test/OpenMP/teams_distribute_lastprivate_codegen.cpp
@@ -476,6 +476,7 @@ int main() {
 // CHECK9-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [5 x ptr], align 8
 // CHECK9-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [5 x ptr], align 8
 // CHECK9-NEXT:    [[_TMP1:%.*]] = alloca i32, align 4
+// CHECK9-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK9-NEXT:    [[I:%.*]] = alloca i32, align 4
 // CHECK9-NEXT:    store i32 0, ptr [[RETVAL]], align 4
 // CHECK9-NEXT:    store ptr [[G]], ptr [[G1]], align 8
@@ -530,7 +531,6 @@ int main() {
 // CHECK9-NEXT:    store ptr null, ptr [[TMP22]], align 8
 // CHECK9-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK9-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK9-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK9-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK9-NEXT:    store i32 2, ptr [[TMP25]], align 4
 // CHECK9-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -799,6 +799,7 @@ int main() {
 // CHECK9-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [4 x ptr], align 8
 // CHECK9-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [4 x ptr], align 8
 // CHECK9-NEXT:    [[_TMP1:%.*]] = alloca i32, align 4
+// CHECK9-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK9-NEXT:    call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[TEST]])
 // CHECK9-NEXT:    store i32 0, ptr [[T_VAR]], align 4
 // CHECK9-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VEC]], ptr align 4 @__const._Z5tmainIiET_v.vec, i64 8, i1 false)
@@ -841,7 +842,6 @@ int main() {
 // CHECK9-NEXT:    store ptr null, ptr [[TMP17]], align 8
 // CHECK9-NEXT:    [[TMP18:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK9-NEXT:    [[TMP19:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK9-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK9-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK9-NEXT:    store i32 2, ptr [[TMP20]], align 4
 // CHECK9-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -1179,6 +1179,7 @@ int main() {
 // CHECK11-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [5 x ptr], align 4
 // CHECK11-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [5 x ptr], align 4
 // CHECK11-NEXT:    [[_TMP1:%.*]] = alloca i32, align 4
+// CHECK11-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK11-NEXT:    [[I:%.*]] = alloca i32, align 4
 // CHECK11-NEXT:    store i32 0, ptr [[RETVAL]], align 4
 // CHECK11-NEXT:    store ptr [[G]], ptr [[G1]], align 4
@@ -1233,7 +1234,6 @@ int main() {
 // CHECK11-NEXT:    store ptr null, ptr [[TMP22]], align 4
 // CHECK11-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK11-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK11-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK11-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK11-NEXT:    store i32 2, ptr [[TMP25]], align 4
 // CHECK11-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -1500,6 +1500,7 @@ int main() {
 // CHECK11-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [4 x ptr], align 4
 // CHECK11-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [4 x ptr], align 4
 // CHECK11-NEXT:    [[_TMP1:%.*]] = alloca i32, align 4
+// CHECK11-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK11-NEXT:    call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[TEST]])
 // CHECK11-NEXT:    store i32 0, ptr [[T_VAR]], align 4
 // CHECK11-NEXT:    call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[VEC]], ptr align 4 @__const._Z5tmainIiET_v.vec, i32 8, i1 false)
@@ -1542,7 +1543,6 @@ int main() {
 // CHECK11-NEXT:    store ptr null, ptr [[TMP17]], align 4
 // CHECK11-NEXT:    [[TMP18:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK11-NEXT:    [[TMP19:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK11-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK11-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK11-NEXT:    store i32 2, ptr [[TMP20]], align 4
 // CHECK11-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1

diff  --git a/clang/test/OpenMP/teams_distribute_parallel_for_codegen.cpp b/clang/test/OpenMP/teams_distribute_parallel_for_codegen.cpp
index f7046f352bf6c..00d54c6f8ae2a 100644
--- a/clang/test/OpenMP/teams_distribute_parallel_for_codegen.cpp
+++ b/clang/test/OpenMP/teams_distribute_parallel_for_codegen.cpp
@@ -190,6 +190,7 @@ int main (int argc, char **argv) {
 // CHECK1-NEXT:    [[TMP:%.*]] = alloca i32, align 4
 // CHECK1-NEXT:    [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
 // CHECK1-NEXT:    [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK1-NEXT:    [[N_CASTED4:%.*]] = alloca i64, align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_BASEPTRS5:%.*]] = alloca [2 x ptr], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_PTRS6:%.*]] = alloca [2 x ptr], align 8
@@ -197,6 +198,7 @@ int main (int argc, char **argv) {
 // CHECK1-NEXT:    [[_TMP8:%.*]] = alloca i32, align 4
 // CHECK1-NEXT:    [[DOTCAPTURE_EXPR_9:%.*]] = alloca i32, align 4
 // CHECK1-NEXT:    [[DOTCAPTURE_EXPR_10:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[KERNEL_ARGS15:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK1-NEXT:    store i32 [[N]], ptr [[N_ADDR]], align 4
 // CHECK1-NEXT:    [[TMP0:%.*]] = load i32, ptr [[N_ADDR]], align 4
 // CHECK1-NEXT:    [[DIV:%.*]] = sdiv i32 [[TMP0]], 128
@@ -249,7 +251,6 @@ int main (int argc, char **argv) {
 // CHECK1-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP24]], 1
 // CHECK1-NEXT:    [[TMP25:%.*]] = zext i32 [[ADD]] to i64
 // CHECK1-NEXT:    [[TMP26:%.*]] = insertvalue [3 x i32] zeroinitializer, i32 [[TMP21]], 0
-// CHECK1-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK1-NEXT:    [[TMP27:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK1-NEXT:    store i32 2, ptr [[TMP27]], align 4
 // CHECK1-NEXT:    [[TMP28:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -310,7 +311,6 @@ int main (int argc, char **argv) {
 // CHECK1-NEXT:    [[TMP54:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_10]], align 4
 // CHECK1-NEXT:    [[ADD14:%.*]] = add nsw i32 [[TMP54]], 1
 // CHECK1-NEXT:    [[TMP55:%.*]] = zext i32 [[ADD14]] to i64
-// CHECK1-NEXT:    [[KERNEL_ARGS15:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK1-NEXT:    [[TMP56:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS15]], i32 0, i32 0
 // CHECK1-NEXT:    store i32 2, ptr [[TMP56]], align 4
 // CHECK1-NEXT:    [[TMP57:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS15]], i32 0, i32 1
@@ -797,6 +797,7 @@ int main (int argc, char **argv) {
 // CHECK3-NEXT:    [[TMP:%.*]] = alloca i32, align 4
 // CHECK3-NEXT:    [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
 // CHECK3-NEXT:    [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4
+// CHECK3-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK3-NEXT:    [[N_CASTED4:%.*]] = alloca i32, align 4
 // CHECK3-NEXT:    [[DOTOFFLOAD_BASEPTRS5:%.*]] = alloca [2 x ptr], align 4
 // CHECK3-NEXT:    [[DOTOFFLOAD_PTRS6:%.*]] = alloca [2 x ptr], align 4
@@ -804,6 +805,7 @@ int main (int argc, char **argv) {
 // CHECK3-NEXT:    [[_TMP8:%.*]] = alloca i32, align 4
 // CHECK3-NEXT:    [[DOTCAPTURE_EXPR_9:%.*]] = alloca i32, align 4
 // CHECK3-NEXT:    [[DOTCAPTURE_EXPR_10:%.*]] = alloca i32, align 4
+// CHECK3-NEXT:    [[KERNEL_ARGS15:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK3-NEXT:    store i32 [[N]], ptr [[N_ADDR]], align 4
 // CHECK3-NEXT:    [[TMP0:%.*]] = load i32, ptr [[N_ADDR]], align 4
 // CHECK3-NEXT:    [[DIV:%.*]] = sdiv i32 [[TMP0]], 128
@@ -856,7 +858,6 @@ int main (int argc, char **argv) {
 // CHECK3-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP24]], 1
 // CHECK3-NEXT:    [[TMP25:%.*]] = zext i32 [[ADD]] to i64
 // CHECK3-NEXT:    [[TMP26:%.*]] = insertvalue [3 x i32] zeroinitializer, i32 [[TMP21]], 0
-// CHECK3-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK3-NEXT:    [[TMP27:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK3-NEXT:    store i32 2, ptr [[TMP27]], align 4
 // CHECK3-NEXT:    [[TMP28:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -917,7 +918,6 @@ int main (int argc, char **argv) {
 // CHECK3-NEXT:    [[TMP54:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_10]], align 4
 // CHECK3-NEXT:    [[ADD14:%.*]] = add nsw i32 [[TMP54]], 1
 // CHECK3-NEXT:    [[TMP55:%.*]] = zext i32 [[ADD14]] to i64
-// CHECK3-NEXT:    [[KERNEL_ARGS15:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK3-NEXT:    [[TMP56:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS15]], i32 0, i32 0
 // CHECK3-NEXT:    store i32 2, ptr [[TMP56]], align 4
 // CHECK3-NEXT:    [[TMP57:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS15]], i32 0, i32 1
@@ -1393,6 +1393,7 @@ int main (int argc, char **argv) {
 // CHECK9-NEXT:    [[TMP:%.*]] = alloca i32, align 4
 // CHECK9-NEXT:    [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
 // CHECK9-NEXT:    [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4
+// CHECK9-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK9-NEXT:    store i32 100, ptr [[N]], align 4
 // CHECK9-NEXT:    [[TMP0:%.*]] = load i32, ptr [[N]], align 4
 // CHECK9-NEXT:    [[TMP1:%.*]] = zext i32 [[TMP0]] to i64
@@ -1438,7 +1439,6 @@ int main (int argc, char **argv) {
 // CHECK9-NEXT:    [[TMP21:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
 // CHECK9-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP21]], 1
 // CHECK9-NEXT:    [[TMP22:%.*]] = zext i32 [[ADD]] to i64
-// CHECK9-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK9-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK9-NEXT:    store i32 2, ptr [[TMP23]], align 4
 // CHECK9-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -1711,6 +1711,7 @@ int main (int argc, char **argv) {
 // CHECK11-NEXT:    [[TMP:%.*]] = alloca i32, align 4
 // CHECK11-NEXT:    [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
 // CHECK11-NEXT:    [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4
+// CHECK11-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK11-NEXT:    store i32 100, ptr [[N]], align 4
 // CHECK11-NEXT:    [[TMP0:%.*]] = load i32, ptr [[N]], align 4
 // CHECK11-NEXT:    [[TMP1:%.*]] = call ptr @llvm.stacksave()
@@ -1756,7 +1757,6 @@ int main (int argc, char **argv) {
 // CHECK11-NEXT:    [[TMP21:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
 // CHECK11-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP21]], 1
 // CHECK11-NEXT:    [[TMP22:%.*]] = zext i32 [[ADD]] to i64
-// CHECK11-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK11-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK11-NEXT:    store i32 2, ptr [[TMP23]], align 4
 // CHECK11-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -2026,6 +2026,7 @@ int main (int argc, char **argv) {
 // CHECK17-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [1 x ptr], align 8
 // CHECK17-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [1 x ptr], align 8
 // CHECK17-NEXT:    [[TMP:%.*]] = alloca i32, align 4
+// CHECK17-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK17-NEXT:    store ptr [[THIS]], ptr [[THIS_ADDR]], align 8
 // CHECK17-NEXT:    [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8
 // CHECK17-NEXT:    [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[THIS1]], i32 0, i32 0
@@ -2037,7 +2038,6 @@ int main (int argc, char **argv) {
 // CHECK17-NEXT:    store ptr null, ptr [[TMP2]], align 8
 // CHECK17-NEXT:    [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK17-NEXT:    [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK17-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK17-NEXT:    [[TMP5:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK17-NEXT:    store i32 2, ptr [[TMP5]], align 4
 // CHECK17-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -2251,6 +2251,7 @@ int main (int argc, char **argv) {
 // CHECK19-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [1 x ptr], align 4
 // CHECK19-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [1 x ptr], align 4
 // CHECK19-NEXT:    [[TMP:%.*]] = alloca i32, align 4
+// CHECK19-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK19-NEXT:    store ptr [[THIS]], ptr [[THIS_ADDR]], align 4
 // CHECK19-NEXT:    [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4
 // CHECK19-NEXT:    [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[THIS1]], i32 0, i32 0
@@ -2262,7 +2263,6 @@ int main (int argc, char **argv) {
 // CHECK19-NEXT:    store ptr null, ptr [[TMP2]], align 4
 // CHECK19-NEXT:    [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK19-NEXT:    [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK19-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK19-NEXT:    [[TMP5:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK19-NEXT:    store i32 2, ptr [[TMP5]], align 4
 // CHECK19-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -2472,6 +2472,7 @@ int main (int argc, char **argv) {
 // CHECK25-NEXT:    [[TMP:%.*]] = alloca i32, align 4
 // CHECK25-NEXT:    [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
 // CHECK25-NEXT:    [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4
+// CHECK25-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK25-NEXT:    store i32 0, ptr [[RETVAL]], align 4
 // CHECK25-NEXT:    store i32 [[ARGC]], ptr [[ARGC_ADDR]], align 4
 // CHECK25-NEXT:    store ptr [[ARGV]], ptr [[ARGV_ADDR]], align 8
@@ -2520,7 +2521,6 @@ int main (int argc, char **argv) {
 // CHECK25-NEXT:    [[TMP21:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
 // CHECK25-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP21]], 1
 // CHECK25-NEXT:    [[TMP22:%.*]] = zext i32 [[ADD]] to i64
-// CHECK25-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK25-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK25-NEXT:    store i32 2, ptr [[TMP23]], align 4
 // CHECK25-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -2787,6 +2787,7 @@ int main (int argc, char **argv) {
 // CHECK25-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [3 x ptr], align 8
 // CHECK25-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [3 x ptr], align 8
 // CHECK25-NEXT:    [[TMP:%.*]] = alloca i32, align 4
+// CHECK25-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK25-NEXT:    store i32 [[ARGC]], ptr [[ARGC_ADDR]], align 4
 // CHECK25-NEXT:    store i32 0, ptr [[TE]], align 4
 // CHECK25-NEXT:    store i32 128, ptr [[TH]], align 4
@@ -2818,7 +2819,6 @@ int main (int argc, char **argv) {
 // CHECK25-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
 // CHECK25-NEXT:    [[TMP15:%.*]] = load i32, ptr [[TE]], align 4
 // CHECK25-NEXT:    [[TMP16:%.*]] = insertvalue [3 x i32] zeroinitializer, i32 [[TMP15]], 0
-// CHECK25-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK25-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK25-NEXT:    store i32 2, ptr [[TMP17]], align 4
 // CHECK25-NEXT:    [[TMP18:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -3037,6 +3037,7 @@ int main (int argc, char **argv) {
 // CHECK27-NEXT:    [[TMP:%.*]] = alloca i32, align 4
 // CHECK27-NEXT:    [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
 // CHECK27-NEXT:    [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4
+// CHECK27-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK27-NEXT:    store i32 0, ptr [[RETVAL]], align 4
 // CHECK27-NEXT:    store i32 [[ARGC]], ptr [[ARGC_ADDR]], align 4
 // CHECK27-NEXT:    store ptr [[ARGV]], ptr [[ARGV_ADDR]], align 4
@@ -3085,7 +3086,6 @@ int main (int argc, char **argv) {
 // CHECK27-NEXT:    [[TMP21:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
 // CHECK27-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP21]], 1
 // CHECK27-NEXT:    [[TMP22:%.*]] = zext i32 [[ADD]] to i64
-// CHECK27-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK27-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK27-NEXT:    store i32 2, ptr [[TMP23]], align 4
 // CHECK27-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -3347,6 +3347,7 @@ int main (int argc, char **argv) {
 // CHECK27-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [3 x ptr], align 4
 // CHECK27-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [3 x ptr], align 4
 // CHECK27-NEXT:    [[TMP:%.*]] = alloca i32, align 4
+// CHECK27-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK27-NEXT:    store i32 [[ARGC]], ptr [[ARGC_ADDR]], align 4
 // CHECK27-NEXT:    store i32 0, ptr [[TE]], align 4
 // CHECK27-NEXT:    store i32 128, ptr [[TH]], align 4
@@ -3378,7 +3379,6 @@ int main (int argc, char **argv) {
 // CHECK27-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
 // CHECK27-NEXT:    [[TMP15:%.*]] = load i32, ptr [[TE]], align 4
 // CHECK27-NEXT:    [[TMP16:%.*]] = insertvalue [3 x i32] zeroinitializer, i32 [[TMP15]], 0
-// CHECK27-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK27-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK27-NEXT:    store i32 2, ptr [[TMP17]], align 4
 // CHECK27-NEXT:    [[TMP18:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1

diff  --git a/clang/test/OpenMP/teams_distribute_parallel_for_collapse_codegen.cpp b/clang/test/OpenMP/teams_distribute_parallel_for_collapse_codegen.cpp
index e4501d975a531..7841cfd908ba4 100644
--- a/clang/test/OpenMP/teams_distribute_parallel_for_collapse_codegen.cpp
+++ b/clang/test/OpenMP/teams_distribute_parallel_for_collapse_codegen.cpp
@@ -119,6 +119,7 @@ int main (int argc, char **argv) {
 // CHECK1-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [1 x ptr], align 8
 // CHECK1-NEXT:    [[TMP:%.*]] = alloca i32, align 4
 // CHECK1-NEXT:    [[_TMP2:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK1-NEXT:    store ptr [[THIS]], ptr [[THIS_ADDR]], align 8
 // CHECK1-NEXT:    [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8
 // CHECK1-NEXT:    [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[THIS1]], i32 0, i32 0
@@ -130,7 +131,6 @@ int main (int argc, char **argv) {
 // CHECK1-NEXT:    store ptr null, ptr [[TMP2]], align 8
 // CHECK1-NEXT:    [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK1-NEXT:    [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK1-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK1-NEXT:    [[TMP5:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK1-NEXT:    store i32 2, ptr [[TMP5]], align 4
 // CHECK1-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -362,6 +362,7 @@ int main (int argc, char **argv) {
 // CHECK3-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [1 x ptr], align 4
 // CHECK3-NEXT:    [[TMP:%.*]] = alloca i32, align 4
 // CHECK3-NEXT:    [[_TMP2:%.*]] = alloca i32, align 4
+// CHECK3-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK3-NEXT:    store ptr [[THIS]], ptr [[THIS_ADDR]], align 4
 // CHECK3-NEXT:    [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4
 // CHECK3-NEXT:    [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[THIS1]], i32 0, i32 0
@@ -373,7 +374,6 @@ int main (int argc, char **argv) {
 // CHECK3-NEXT:    store ptr null, ptr [[TMP2]], align 4
 // CHECK3-NEXT:    [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK3-NEXT:    [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK3-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK3-NEXT:    [[TMP5:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK3-NEXT:    store i32 2, ptr [[TMP5]], align 4
 // CHECK3-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -604,6 +604,7 @@ int main (int argc, char **argv) {
 // CHECK9-NEXT:    [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
 // CHECK9-NEXT:    [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4
 // CHECK9-NEXT:    [[DOTCAPTURE_EXPR_3:%.*]] = alloca i64, align 8
+// CHECK9-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK9-NEXT:    store i32 0, ptr [[RETVAL]], align 4
 // CHECK9-NEXT:    store i32 [[ARGC]], ptr [[ARGC_ADDR]], align 4
 // CHECK9-NEXT:    store ptr [[ARGV]], ptr [[ARGV_ADDR]], align 8
@@ -680,7 +681,6 @@ int main (int argc, char **argv) {
 // CHECK9-NEXT:    store i64 [[SUB7]], ptr [[DOTCAPTURE_EXPR_3]], align 8
 // CHECK9-NEXT:    [[TMP35:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_3]], align 8
 // CHECK9-NEXT:    [[ADD:%.*]] = add nsw i64 [[TMP35]], 1
-// CHECK9-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK9-NEXT:    [[TMP36:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK9-NEXT:    store i32 2, ptr [[TMP36]], align 4
 // CHECK9-NEXT:    [[TMP37:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -1021,6 +1021,7 @@ int main (int argc, char **argv) {
 // CHECK9-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [1 x ptr], align 8
 // CHECK9-NEXT:    [[TMP:%.*]] = alloca i32, align 4
 // CHECK9-NEXT:    [[_TMP1:%.*]] = alloca i32, align 4
+// CHECK9-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK9-NEXT:    store i32 [[ARGC]], ptr [[ARGC_ADDR]], align 4
 // CHECK9-NEXT:    [[TMP0:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK9-NEXT:    store ptr [[A]], ptr [[TMP0]], align 8
@@ -1030,7 +1031,6 @@ int main (int argc, char **argv) {
 // CHECK9-NEXT:    store ptr null, ptr [[TMP2]], align 8
 // CHECK9-NEXT:    [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK9-NEXT:    [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK9-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK9-NEXT:    [[TMP5:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK9-NEXT:    store i32 2, ptr [[TMP5]], align 4
 // CHECK9-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -1262,6 +1262,7 @@ int main (int argc, char **argv) {
 // CHECK11-NEXT:    [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
 // CHECK11-NEXT:    [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4
 // CHECK11-NEXT:    [[DOTCAPTURE_EXPR_3:%.*]] = alloca i64, align 8
+// CHECK11-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK11-NEXT:    store i32 0, ptr [[RETVAL]], align 4
 // CHECK11-NEXT:    store i32 [[ARGC]], ptr [[ARGC_ADDR]], align 4
 // CHECK11-NEXT:    store ptr [[ARGV]], ptr [[ARGV_ADDR]], align 4
@@ -1337,7 +1338,6 @@ int main (int argc, char **argv) {
 // CHECK11-NEXT:    store i64 [[SUB7]], ptr [[DOTCAPTURE_EXPR_3]], align 8
 // CHECK11-NEXT:    [[TMP34:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_3]], align 8
 // CHECK11-NEXT:    [[ADD:%.*]] = add nsw i64 [[TMP34]], 1
-// CHECK11-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK11-NEXT:    [[TMP35:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK11-NEXT:    store i32 2, ptr [[TMP35]], align 4
 // CHECK11-NEXT:    [[TMP36:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -1680,6 +1680,7 @@ int main (int argc, char **argv) {
 // CHECK11-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [1 x ptr], align 4
 // CHECK11-NEXT:    [[TMP:%.*]] = alloca i32, align 4
 // CHECK11-NEXT:    [[_TMP1:%.*]] = alloca i32, align 4
+// CHECK11-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK11-NEXT:    store i32 [[ARGC]], ptr [[ARGC_ADDR]], align 4
 // CHECK11-NEXT:    [[TMP0:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK11-NEXT:    store ptr [[A]], ptr [[TMP0]], align 4
@@ -1689,7 +1690,6 @@ int main (int argc, char **argv) {
 // CHECK11-NEXT:    store ptr null, ptr [[TMP2]], align 4
 // CHECK11-NEXT:    [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK11-NEXT:    [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK11-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK11-NEXT:    [[TMP5:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK11-NEXT:    store i32 2, ptr [[TMP5]], align 4
 // CHECK11-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1

diff  --git a/clang/test/OpenMP/teams_distribute_parallel_for_copyin_codegen.cpp b/clang/test/OpenMP/teams_distribute_parallel_for_copyin_codegen.cpp
index 6df07faa00ee0..229914d64bee8 100644
--- a/clang/test/OpenMP/teams_distribute_parallel_for_copyin_codegen.cpp
+++ b/clang/test/OpenMP/teams_distribute_parallel_for_copyin_codegen.cpp
@@ -102,6 +102,7 @@ int main() {
 // CHECK1-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [2 x ptr], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [2 x ptr], align 8
 // CHECK1-NEXT:    [[TMP:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK1-NEXT:    store i32 0, ptr [[RETVAL]], align 4
 // CHECK1-NEXT:    [[TMP0:%.*]] = call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @x)
 // CHECK1-NEXT:    [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4
@@ -121,7 +122,6 @@ int main() {
 // CHECK1-NEXT:    store ptr null, ptr [[TMP8]], align 8
 // CHECK1-NEXT:    [[TMP9:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK1-NEXT:    [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK1-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK1-NEXT:    [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK1-NEXT:    store i32 2, ptr [[TMP11]], align 4
 // CHECK1-NEXT:    [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -327,6 +327,7 @@ int main() {
 // CHECK1-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [2 x ptr], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [2 x ptr], align 8
 // CHECK1-NEXT:    [[TMP:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK1-NEXT:    [[TMP0:%.*]] = call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @x)
 // CHECK1-NEXT:    [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4
 // CHECK1-NEXT:    store i32 [[TMP1]], ptr [[X_CASTED]], align 4
@@ -345,7 +346,6 @@ int main() {
 // CHECK1-NEXT:    store ptr null, ptr [[TMP8]], align 8
 // CHECK1-NEXT:    [[TMP9:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK1-NEXT:    [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK1-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK1-NEXT:    [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK1-NEXT:    store i32 2, ptr [[TMP11]], align 4
 // CHECK1-NEXT:    [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -564,6 +564,7 @@ int main() {
 // CHECK3-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [2 x ptr], align 4
 // CHECK3-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [2 x ptr], align 4
 // CHECK3-NEXT:    [[TMP:%.*]] = alloca i32, align 4
+// CHECK3-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK3-NEXT:    store i32 0, ptr [[RETVAL]], align 4
 // CHECK3-NEXT:    [[TMP0:%.*]] = call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @x)
 // CHECK3-NEXT:    [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4
@@ -583,7 +584,6 @@ int main() {
 // CHECK3-NEXT:    store ptr null, ptr [[TMP8]], align 4
 // CHECK3-NEXT:    [[TMP9:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK3-NEXT:    [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK3-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK3-NEXT:    [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK3-NEXT:    store i32 2, ptr [[TMP11]], align 4
 // CHECK3-NEXT:    [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -784,6 +784,7 @@ int main() {
 // CHECK3-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [2 x ptr], align 4
 // CHECK3-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [2 x ptr], align 4
 // CHECK3-NEXT:    [[TMP:%.*]] = alloca i32, align 4
+// CHECK3-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK3-NEXT:    [[TMP0:%.*]] = call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @x)
 // CHECK3-NEXT:    [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4
 // CHECK3-NEXT:    store i32 [[TMP1]], ptr [[X_CASTED]], align 4
@@ -802,7 +803,6 @@ int main() {
 // CHECK3-NEXT:    store ptr null, ptr [[TMP8]], align 4
 // CHECK3-NEXT:    [[TMP9:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK3-NEXT:    [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK3-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK3-NEXT:    [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK3-NEXT:    store i32 2, ptr [[TMP11]], align 4
 // CHECK3-NEXT:    [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1

diff  --git a/clang/test/OpenMP/teams_distribute_parallel_for_dist_schedule_codegen.cpp b/clang/test/OpenMP/teams_distribute_parallel_for_dist_schedule_codegen.cpp
index 49667087c8673..30beb98b7b7f3 100644
--- a/clang/test/OpenMP/teams_distribute_parallel_for_dist_schedule_codegen.cpp
+++ b/clang/test/OpenMP/teams_distribute_parallel_for_dist_schedule_codegen.cpp
@@ -159,14 +159,17 @@ int main (int argc, char **argv) {
 // CHECK1-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [1 x ptr], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [1 x ptr], align 8
 // CHECK1-NEXT:    [[TMP:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_BASEPTRS3:%.*]] = alloca [1 x ptr], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_PTRS4:%.*]] = alloca [1 x ptr], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_MAPPERS5:%.*]] = alloca [1 x ptr], align 8
 // CHECK1-NEXT:    [[_TMP6:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[KERNEL_ARGS7:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_BASEPTRS11:%.*]] = alloca [1 x ptr], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_PTRS12:%.*]] = alloca [1 x ptr], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_MAPPERS13:%.*]] = alloca [1 x ptr], align 8
 // CHECK1-NEXT:    [[_TMP14:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[KERNEL_ARGS15:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK1-NEXT:    store ptr [[THIS]], ptr [[THIS_ADDR]], align 8
 // CHECK1-NEXT:    [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8
 // CHECK1-NEXT:    [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[THIS1]], i32 0, i32 0
@@ -178,7 +181,6 @@ int main (int argc, char **argv) {
 // CHECK1-NEXT:    store ptr null, ptr [[TMP2]], align 8
 // CHECK1-NEXT:    [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK1-NEXT:    [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK1-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK1-NEXT:    [[TMP5:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK1-NEXT:    store i32 2, ptr [[TMP5]], align 4
 // CHECK1-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -221,7 +223,6 @@ int main (int argc, char **argv) {
 // CHECK1-NEXT:    store ptr null, ptr [[TMP22]], align 8
 // CHECK1-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS3]], i32 0, i32 0
 // CHECK1-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS4]], i32 0, i32 0
-// CHECK1-NEXT:    [[KERNEL_ARGS7:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK1-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS7]], i32 0, i32 0
 // CHECK1-NEXT:    store i32 2, ptr [[TMP25]], align 4
 // CHECK1-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS7]], i32 0, i32 1
@@ -264,7 +265,6 @@ int main (int argc, char **argv) {
 // CHECK1-NEXT:    store ptr null, ptr [[TMP42]], align 8
 // CHECK1-NEXT:    [[TMP43:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS11]], i32 0, i32 0
 // CHECK1-NEXT:    [[TMP44:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS12]], i32 0, i32 0
-// CHECK1-NEXT:    [[KERNEL_ARGS15:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK1-NEXT:    [[TMP45:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS15]], i32 0, i32 0
 // CHECK1-NEXT:    store i32 2, ptr [[TMP45]], align 4
 // CHECK1-NEXT:    [[TMP46:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS15]], i32 0, i32 1
@@ -800,14 +800,17 @@ int main (int argc, char **argv) {
 // CHECK3-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [1 x ptr], align 4
 // CHECK3-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [1 x ptr], align 4
 // CHECK3-NEXT:    [[TMP:%.*]] = alloca i32, align 4
+// CHECK3-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK3-NEXT:    [[DOTOFFLOAD_BASEPTRS3:%.*]] = alloca [1 x ptr], align 4
 // CHECK3-NEXT:    [[DOTOFFLOAD_PTRS4:%.*]] = alloca [1 x ptr], align 4
 // CHECK3-NEXT:    [[DOTOFFLOAD_MAPPERS5:%.*]] = alloca [1 x ptr], align 4
 // CHECK3-NEXT:    [[_TMP6:%.*]] = alloca i32, align 4
+// CHECK3-NEXT:    [[KERNEL_ARGS7:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK3-NEXT:    [[DOTOFFLOAD_BASEPTRS11:%.*]] = alloca [1 x ptr], align 4
 // CHECK3-NEXT:    [[DOTOFFLOAD_PTRS12:%.*]] = alloca [1 x ptr], align 4
 // CHECK3-NEXT:    [[DOTOFFLOAD_MAPPERS13:%.*]] = alloca [1 x ptr], align 4
 // CHECK3-NEXT:    [[_TMP14:%.*]] = alloca i32, align 4
+// CHECK3-NEXT:    [[KERNEL_ARGS15:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK3-NEXT:    store ptr [[THIS]], ptr [[THIS_ADDR]], align 4
 // CHECK3-NEXT:    [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4
 // CHECK3-NEXT:    [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[THIS1]], i32 0, i32 0
@@ -819,7 +822,6 @@ int main (int argc, char **argv) {
 // CHECK3-NEXT:    store ptr null, ptr [[TMP2]], align 4
 // CHECK3-NEXT:    [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK3-NEXT:    [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK3-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK3-NEXT:    [[TMP5:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK3-NEXT:    store i32 2, ptr [[TMP5]], align 4
 // CHECK3-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -862,7 +864,6 @@ int main (int argc, char **argv) {
 // CHECK3-NEXT:    store ptr null, ptr [[TMP22]], align 4
 // CHECK3-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS3]], i32 0, i32 0
 // CHECK3-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS4]], i32 0, i32 0
-// CHECK3-NEXT:    [[KERNEL_ARGS7:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK3-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS7]], i32 0, i32 0
 // CHECK3-NEXT:    store i32 2, ptr [[TMP25]], align 4
 // CHECK3-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS7]], i32 0, i32 1
@@ -905,7 +906,6 @@ int main (int argc, char **argv) {
 // CHECK3-NEXT:    store ptr null, ptr [[TMP42]], align 4
 // CHECK3-NEXT:    [[TMP43:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS11]], i32 0, i32 0
 // CHECK3-NEXT:    [[TMP44:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS12]], i32 0, i32 0
-// CHECK3-NEXT:    [[KERNEL_ARGS15:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK3-NEXT:    [[TMP45:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS15]], i32 0, i32 0
 // CHECK3-NEXT:    store i32 2, ptr [[TMP45]], align 4
 // CHECK3-NEXT:    [[TMP46:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS15]], i32 0, i32 1
@@ -1428,6 +1428,7 @@ int main (int argc, char **argv) {
 // CHECK9-NEXT:    [[TMP:%.*]] = alloca i32, align 4
 // CHECK9-NEXT:    [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
 // CHECK9-NEXT:    [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4
+// CHECK9-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK9-NEXT:    [[N_CASTED3:%.*]] = alloca i64, align 8
 // CHECK9-NEXT:    [[DOTOFFLOAD_BASEPTRS4:%.*]] = alloca [3 x ptr], align 8
 // CHECK9-NEXT:    [[DOTOFFLOAD_PTRS5:%.*]] = alloca [3 x ptr], align 8
@@ -1436,6 +1437,7 @@ int main (int argc, char **argv) {
 // CHECK9-NEXT:    [[_TMP8:%.*]] = alloca i32, align 4
 // CHECK9-NEXT:    [[DOTCAPTURE_EXPR_9:%.*]] = alloca i32, align 4
 // CHECK9-NEXT:    [[DOTCAPTURE_EXPR_10:%.*]] = alloca i32, align 4
+// CHECK9-NEXT:    [[KERNEL_ARGS15:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK9-NEXT:    [[M_CASTED:%.*]] = alloca i64, align 8
 // CHECK9-NEXT:    [[N_CASTED18:%.*]] = alloca i64, align 8
 // CHECK9-NEXT:    [[DOTOFFLOAD_BASEPTRS19:%.*]] = alloca [4 x ptr], align 8
@@ -1445,6 +1447,7 @@ int main (int argc, char **argv) {
 // CHECK9-NEXT:    [[_TMP23:%.*]] = alloca i32, align 4
 // CHECK9-NEXT:    [[DOTCAPTURE_EXPR_24:%.*]] = alloca i32, align 4
 // CHECK9-NEXT:    [[DOTCAPTURE_EXPR_25:%.*]] = alloca i32, align 4
+// CHECK9-NEXT:    [[KERNEL_ARGS30:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK9-NEXT:    store i32 0, ptr [[RETVAL]], align 4
 // CHECK9-NEXT:    store i32 [[ARGC]], ptr [[ARGC_ADDR]], align 4
 // CHECK9-NEXT:    store ptr [[ARGV]], ptr [[ARGV_ADDR]], align 8
@@ -1494,7 +1497,6 @@ int main (int argc, char **argv) {
 // CHECK9-NEXT:    [[TMP21:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
 // CHECK9-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP21]], 1
 // CHECK9-NEXT:    [[TMP22:%.*]] = zext i32 [[ADD]] to i64
-// CHECK9-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK9-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK9-NEXT:    store i32 2, ptr [[TMP23]], align 4
 // CHECK9-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -1566,7 +1568,6 @@ int main (int argc, char **argv) {
 // CHECK9-NEXT:    [[TMP56:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_10]], align 4
 // CHECK9-NEXT:    [[ADD14:%.*]] = add nsw i32 [[TMP56]], 1
 // CHECK9-NEXT:    [[TMP57:%.*]] = zext i32 [[ADD14]] to i64
-// CHECK9-NEXT:    [[KERNEL_ARGS15:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK9-NEXT:    [[TMP58:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS15]], i32 0, i32 0
 // CHECK9-NEXT:    store i32 2, ptr [[TMP58]], align 4
 // CHECK9-NEXT:    [[TMP59:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS15]], i32 0, i32 1
@@ -1647,7 +1648,6 @@ int main (int argc, char **argv) {
 // CHECK9-NEXT:    [[TMP96:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_25]], align 4
 // CHECK9-NEXT:    [[ADD29:%.*]] = add nsw i32 [[TMP96]], 1
 // CHECK9-NEXT:    [[TMP97:%.*]] = zext i32 [[ADD29]] to i64
-// CHECK9-NEXT:    [[KERNEL_ARGS30:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK9-NEXT:    [[TMP98:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS30]], i32 0, i32 0
 // CHECK9-NEXT:    store i32 2, ptr [[TMP98]], align 4
 // CHECK9-NEXT:    [[TMP99:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS30]], i32 0, i32 1
@@ -2375,15 +2375,18 @@ int main (int argc, char **argv) {
 // CHECK9-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [1 x ptr], align 8
 // CHECK9-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [1 x ptr], align 8
 // CHECK9-NEXT:    [[TMP:%.*]] = alloca i32, align 4
+// CHECK9-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK9-NEXT:    [[DOTOFFLOAD_BASEPTRS1:%.*]] = alloca [1 x ptr], align 8
 // CHECK9-NEXT:    [[DOTOFFLOAD_PTRS2:%.*]] = alloca [1 x ptr], align 8
 // CHECK9-NEXT:    [[DOTOFFLOAD_MAPPERS3:%.*]] = alloca [1 x ptr], align 8
 // CHECK9-NEXT:    [[_TMP4:%.*]] = alloca i32, align 4
+// CHECK9-NEXT:    [[KERNEL_ARGS5:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK9-NEXT:    [[M_CASTED:%.*]] = alloca i64, align 8
 // CHECK9-NEXT:    [[DOTOFFLOAD_BASEPTRS8:%.*]] = alloca [2 x ptr], align 8
 // CHECK9-NEXT:    [[DOTOFFLOAD_PTRS9:%.*]] = alloca [2 x ptr], align 8
 // CHECK9-NEXT:    [[DOTOFFLOAD_MAPPERS10:%.*]] = alloca [2 x ptr], align 8
 // CHECK9-NEXT:    [[_TMP11:%.*]] = alloca i32, align 4
+// CHECK9-NEXT:    [[KERNEL_ARGS12:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK9-NEXT:    store i32 [[ARGC]], ptr [[ARGC_ADDR]], align 4
 // CHECK9-NEXT:    store i32 10, ptr [[M]], align 4
 // CHECK9-NEXT:    [[TMP0:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
@@ -2394,7 +2397,6 @@ int main (int argc, char **argv) {
 // CHECK9-NEXT:    store ptr null, ptr [[TMP2]], align 8
 // CHECK9-NEXT:    [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK9-NEXT:    [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK9-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK9-NEXT:    [[TMP5:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK9-NEXT:    store i32 2, ptr [[TMP5]], align 4
 // CHECK9-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -2436,7 +2438,6 @@ int main (int argc, char **argv) {
 // CHECK9-NEXT:    store ptr null, ptr [[TMP22]], align 8
 // CHECK9-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS1]], i32 0, i32 0
 // CHECK9-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS2]], i32 0, i32 0
-// CHECK9-NEXT:    [[KERNEL_ARGS5:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK9-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 0
 // CHECK9-NEXT:    store i32 2, ptr [[TMP25]], align 4
 // CHECK9-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 1
@@ -2487,7 +2488,6 @@ int main (int argc, char **argv) {
 // CHECK9-NEXT:    store ptr null, ptr [[TMP47]], align 8
 // CHECK9-NEXT:    [[TMP48:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS8]], i32 0, i32 0
 // CHECK9-NEXT:    [[TMP49:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS9]], i32 0, i32 0
-// CHECK9-NEXT:    [[KERNEL_ARGS12:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK9-NEXT:    [[TMP50:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS12]], i32 0, i32 0
 // CHECK9-NEXT:    store i32 2, ptr [[TMP50]], align 4
 // CHECK9-NEXT:    [[TMP51:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS12]], i32 0, i32 1
@@ -3037,6 +3037,7 @@ int main (int argc, char **argv) {
 // CHECK11-NEXT:    [[TMP:%.*]] = alloca i32, align 4
 // CHECK11-NEXT:    [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
 // CHECK11-NEXT:    [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4
+// CHECK11-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK11-NEXT:    [[N_CASTED3:%.*]] = alloca i32, align 4
 // CHECK11-NEXT:    [[DOTOFFLOAD_BASEPTRS4:%.*]] = alloca [3 x ptr], align 4
 // CHECK11-NEXT:    [[DOTOFFLOAD_PTRS5:%.*]] = alloca [3 x ptr], align 4
@@ -3045,6 +3046,7 @@ int main (int argc, char **argv) {
 // CHECK11-NEXT:    [[_TMP8:%.*]] = alloca i32, align 4
 // CHECK11-NEXT:    [[DOTCAPTURE_EXPR_9:%.*]] = alloca i32, align 4
 // CHECK11-NEXT:    [[DOTCAPTURE_EXPR_10:%.*]] = alloca i32, align 4
+// CHECK11-NEXT:    [[KERNEL_ARGS15:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK11-NEXT:    [[M_CASTED:%.*]] = alloca i32, align 4
 // CHECK11-NEXT:    [[N_CASTED18:%.*]] = alloca i32, align 4
 // CHECK11-NEXT:    [[DOTOFFLOAD_BASEPTRS19:%.*]] = alloca [4 x ptr], align 4
@@ -3054,6 +3056,7 @@ int main (int argc, char **argv) {
 // CHECK11-NEXT:    [[_TMP23:%.*]] = alloca i32, align 4
 // CHECK11-NEXT:    [[DOTCAPTURE_EXPR_24:%.*]] = alloca i32, align 4
 // CHECK11-NEXT:    [[DOTCAPTURE_EXPR_25:%.*]] = alloca i32, align 4
+// CHECK11-NEXT:    [[KERNEL_ARGS30:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK11-NEXT:    store i32 0, ptr [[RETVAL]], align 4
 // CHECK11-NEXT:    store i32 [[ARGC]], ptr [[ARGC_ADDR]], align 4
 // CHECK11-NEXT:    store ptr [[ARGV]], ptr [[ARGV_ADDR]], align 4
@@ -3103,7 +3106,6 @@ int main (int argc, char **argv) {
 // CHECK11-NEXT:    [[TMP21:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
 // CHECK11-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP21]], 1
 // CHECK11-NEXT:    [[TMP22:%.*]] = zext i32 [[ADD]] to i64
-// CHECK11-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK11-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK11-NEXT:    store i32 2, ptr [[TMP23]], align 4
 // CHECK11-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -3176,7 +3178,6 @@ int main (int argc, char **argv) {
 // CHECK11-NEXT:    [[TMP57:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_10]], align 4
 // CHECK11-NEXT:    [[ADD14:%.*]] = add nsw i32 [[TMP57]], 1
 // CHECK11-NEXT:    [[TMP58:%.*]] = zext i32 [[ADD14]] to i64
-// CHECK11-NEXT:    [[KERNEL_ARGS15:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK11-NEXT:    [[TMP59:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS15]], i32 0, i32 0
 // CHECK11-NEXT:    store i32 2, ptr [[TMP59]], align 4
 // CHECK11-NEXT:    [[TMP60:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS15]], i32 0, i32 1
@@ -3258,7 +3259,6 @@ int main (int argc, char **argv) {
 // CHECK11-NEXT:    [[TMP98:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_25]], align 4
 // CHECK11-NEXT:    [[ADD29:%.*]] = add nsw i32 [[TMP98]], 1
 // CHECK11-NEXT:    [[TMP99:%.*]] = zext i32 [[ADD29]] to i64
-// CHECK11-NEXT:    [[KERNEL_ARGS30:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK11-NEXT:    [[TMP100:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS30]], i32 0, i32 0
 // CHECK11-NEXT:    store i32 2, ptr [[TMP100]], align 4
 // CHECK11-NEXT:    [[TMP101:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS30]], i32 0, i32 1
@@ -3971,15 +3971,18 @@ int main (int argc, char **argv) {
 // CHECK11-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [1 x ptr], align 4
 // CHECK11-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [1 x ptr], align 4
 // CHECK11-NEXT:    [[TMP:%.*]] = alloca i32, align 4
+// CHECK11-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK11-NEXT:    [[DOTOFFLOAD_BASEPTRS1:%.*]] = alloca [1 x ptr], align 4
 // CHECK11-NEXT:    [[DOTOFFLOAD_PTRS2:%.*]] = alloca [1 x ptr], align 4
 // CHECK11-NEXT:    [[DOTOFFLOAD_MAPPERS3:%.*]] = alloca [1 x ptr], align 4
 // CHECK11-NEXT:    [[_TMP4:%.*]] = alloca i32, align 4
+// CHECK11-NEXT:    [[KERNEL_ARGS5:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK11-NEXT:    [[M_CASTED:%.*]] = alloca i32, align 4
 // CHECK11-NEXT:    [[DOTOFFLOAD_BASEPTRS8:%.*]] = alloca [2 x ptr], align 4
 // CHECK11-NEXT:    [[DOTOFFLOAD_PTRS9:%.*]] = alloca [2 x ptr], align 4
 // CHECK11-NEXT:    [[DOTOFFLOAD_MAPPERS10:%.*]] = alloca [2 x ptr], align 4
 // CHECK11-NEXT:    [[_TMP11:%.*]] = alloca i32, align 4
+// CHECK11-NEXT:    [[KERNEL_ARGS12:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK11-NEXT:    store i32 [[ARGC]], ptr [[ARGC_ADDR]], align 4
 // CHECK11-NEXT:    store i32 10, ptr [[M]], align 4
 // CHECK11-NEXT:    [[TMP0:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
@@ -3990,7 +3993,6 @@ int main (int argc, char **argv) {
 // CHECK11-NEXT:    store ptr null, ptr [[TMP2]], align 4
 // CHECK11-NEXT:    [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK11-NEXT:    [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK11-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK11-NEXT:    [[TMP5:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK11-NEXT:    store i32 2, ptr [[TMP5]], align 4
 // CHECK11-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -4032,7 +4034,6 @@ int main (int argc, char **argv) {
 // CHECK11-NEXT:    store ptr null, ptr [[TMP22]], align 4
 // CHECK11-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS1]], i32 0, i32 0
 // CHECK11-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS2]], i32 0, i32 0
-// CHECK11-NEXT:    [[KERNEL_ARGS5:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK11-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 0
 // CHECK11-NEXT:    store i32 2, ptr [[TMP25]], align 4
 // CHECK11-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 1
@@ -4083,7 +4084,6 @@ int main (int argc, char **argv) {
 // CHECK11-NEXT:    store ptr null, ptr [[TMP47]], align 4
 // CHECK11-NEXT:    [[TMP48:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS8]], i32 0, i32 0
 // CHECK11-NEXT:    [[TMP49:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS9]], i32 0, i32 0
-// CHECK11-NEXT:    [[KERNEL_ARGS12:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK11-NEXT:    [[TMP50:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS12]], i32 0, i32 0
 // CHECK11-NEXT:    store i32 2, ptr [[TMP50]], align 4
 // CHECK11-NEXT:    [[TMP51:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS12]], i32 0, i32 1

diff  --git a/clang/test/OpenMP/teams_distribute_parallel_for_firstprivate_codegen.cpp b/clang/test/OpenMP/teams_distribute_parallel_for_firstprivate_codegen.cpp
index 1884ea8f6df8f..f97a598783d4e 100644
--- a/clang/test/OpenMP/teams_distribute_parallel_for_firstprivate_codegen.cpp
+++ b/clang/test/OpenMP/teams_distribute_parallel_for_firstprivate_codegen.cpp
@@ -299,6 +299,7 @@ int main() {
 // CHECK1-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [5 x ptr], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [5 x ptr], align 8
 // CHECK1-NEXT:    [[TMP:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK1-NEXT:    store i32 0, ptr [[RETVAL]], align 4
 // CHECK1-NEXT:    [[TMP0:%.*]] = load i32, ptr @t_var, align 4
 // CHECK1-NEXT:    store i32 [[TMP0]], ptr [[T_VAR_CASTED]], align 4
@@ -338,7 +339,6 @@ int main() {
 // CHECK1-NEXT:    store ptr null, ptr [[TMP18]], align 8
 // CHECK1-NEXT:    [[TMP19:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK1-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK1-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK1-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK1-NEXT:    store i32 2, ptr [[TMP21]], align 4
 // CHECK1-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -709,6 +709,7 @@ int main() {
 // CHECK1-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [4 x ptr], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [4 x ptr], align 8
 // CHECK1-NEXT:    [[_TMP1:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK1-NEXT:    call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[TEST]])
 // CHECK1-NEXT:    store i32 0, ptr [[T_VAR]], align 4
 // CHECK1-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VEC]], ptr align 4 @__const._Z5tmainIiET_v.vec, i64 8, i1 false)
@@ -751,7 +752,6 @@ int main() {
 // CHECK1-NEXT:    store ptr null, ptr [[TMP17]], align 8
 // CHECK1-NEXT:    [[TMP18:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK1-NEXT:    [[TMP19:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK1-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK1-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK1-NEXT:    store i32 2, ptr [[TMP20]], align 4
 // CHECK1-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -1370,6 +1370,7 @@ int main() {
 // CHECK3-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [5 x ptr], align 4
 // CHECK3-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [5 x ptr], align 4
 // CHECK3-NEXT:    [[TMP:%.*]] = alloca i32, align 4
+// CHECK3-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK3-NEXT:    store i32 0, ptr [[RETVAL]], align 4
 // CHECK3-NEXT:    [[TMP0:%.*]] = load i32, ptr @t_var, align 4
 // CHECK3-NEXT:    store i32 [[TMP0]], ptr [[T_VAR_CASTED]], align 4
@@ -1409,7 +1410,6 @@ int main() {
 // CHECK3-NEXT:    store ptr null, ptr [[TMP18]], align 4
 // CHECK3-NEXT:    [[TMP19:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK3-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK3-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK3-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK3-NEXT:    store i32 2, ptr [[TMP21]], align 4
 // CHECK3-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -1774,6 +1774,7 @@ int main() {
 // CHECK3-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [4 x ptr], align 4
 // CHECK3-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [4 x ptr], align 4
 // CHECK3-NEXT:    [[_TMP1:%.*]] = alloca i32, align 4
+// CHECK3-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK3-NEXT:    call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[TEST]])
 // CHECK3-NEXT:    store i32 0, ptr [[T_VAR]], align 4
 // CHECK3-NEXT:    call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[VEC]], ptr align 4 @__const._Z5tmainIiET_v.vec, i32 8, i1 false)
@@ -1816,7 +1817,6 @@ int main() {
 // CHECK3-NEXT:    store ptr null, ptr [[TMP17]], align 4
 // CHECK3-NEXT:    [[TMP18:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK3-NEXT:    [[TMP19:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK3-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK3-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK3-NEXT:    store i32 2, ptr [[TMP20]], align 4
 // CHECK3-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1

diff  --git a/clang/test/OpenMP/teams_distribute_parallel_for_if_codegen.cpp b/clang/test/OpenMP/teams_distribute_parallel_for_if_codegen.cpp
index 718d4c5812622..22b580988dccc 100644
--- a/clang/test/OpenMP/teams_distribute_parallel_for_if_codegen.cpp
+++ b/clang/test/OpenMP/teams_distribute_parallel_for_if_codegen.cpp
@@ -117,8 +117,9 @@ int main() {
 // CHECK1-SAME: () #[[ATTR0:[0-9]+]] {
 // CHECK1-NEXT:  entry:
 // CHECK1-NEXT:    [[TMP:%.*]] = alloca i32, align 4
-// CHECK1-NEXT:    [[_TMP1:%.*]] = alloca i32, align 4
 // CHECK1-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
+// CHECK1-NEXT:    [[_TMP1:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[KERNEL_ARGS2:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK1-NEXT:    [[TMP0:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK1-NEXT:    store i32 2, ptr [[TMP0]], align 4
 // CHECK1-NEXT:    [[TMP1:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -152,7 +153,6 @@ int main() {
 // CHECK1-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9gtid_testv_l48() #[[ATTR2:[0-9]+]]
 // CHECK1-NEXT:    br label [[OMP_OFFLOAD_CONT]]
 // CHECK1:       omp_offload.cont:
-// CHECK1-NEXT:    [[KERNEL_ARGS2:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK1-NEXT:    [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS2]], i32 0, i32 0
 // CHECK1-NEXT:    store i32 2, ptr [[TMP15]], align 4
 // CHECK1-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS2]], i32 0, i32 1
@@ -474,15 +474,17 @@ int main() {
 // CHECK1-NEXT:  entry:
 // CHECK1-NEXT:    [[RETVAL:%.*]] = alloca i32, align 4
 // CHECK1-NEXT:    [[TMP:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK1-NEXT:    [[_TMP1:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[KERNEL_ARGS2:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK1-NEXT:    [[ARG_CASTED:%.*]] = alloca i64, align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [1 x ptr], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [1 x ptr], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [1 x ptr], align 8
 // CHECK1-NEXT:    [[DOTCAPTURE_EXPR_:%.*]] = alloca i8, align 1
 // CHECK1-NEXT:    [[_TMP6:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[KERNEL_ARGS7:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK1-NEXT:    store i32 0, ptr [[RETVAL]], align 4
-// CHECK1-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK1-NEXT:    [[TMP0:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK1-NEXT:    store i32 2, ptr [[TMP0]], align 4
 // CHECK1-NEXT:    [[TMP1:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -516,7 +518,6 @@ int main() {
 // CHECK1-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l81() #[[ATTR2]]
 // CHECK1-NEXT:    br label [[OMP_OFFLOAD_CONT]]
 // CHECK1:       omp_offload.cont:
-// CHECK1-NEXT:    [[KERNEL_ARGS2:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK1-NEXT:    [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS2]], i32 0, i32 0
 // CHECK1-NEXT:    store i32 2, ptr [[TMP15]], align 4
 // CHECK1-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS2]], i32 0, i32 1
@@ -569,7 +570,6 @@ int main() {
 // CHECK1-NEXT:    [[TOBOOL5:%.*]] = trunc i8 [[TMP38]] to i1
 // CHECK1-NEXT:    [[TMP39:%.*]] = select i1 [[TOBOOL5]], i32 0, i32 1
 // CHECK1-NEXT:    [[TMP40:%.*]] = insertvalue [3 x i32] zeroinitializer, i32 [[TMP39]], 0
-// CHECK1-NEXT:    [[KERNEL_ARGS7:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK1-NEXT:    [[TMP41:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS7]], i32 0, i32 0
 // CHECK1-NEXT:    store i32 2, ptr [[TMP41]], align 4
 // CHECK1-NEXT:    [[TMP42:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS7]], i32 0, i32 1
@@ -1061,15 +1061,17 @@ int main() {
 // CHECK1-NEXT:  entry:
 // CHECK1-NEXT:    [[ARG_ADDR:%.*]] = alloca i32, align 4
 // CHECK1-NEXT:    [[TMP:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK1-NEXT:    [[_TMP1:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[KERNEL_ARGS2:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK1-NEXT:    [[ARG_CASTED:%.*]] = alloca i64, align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [1 x ptr], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [1 x ptr], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [1 x ptr], align 8
 // CHECK1-NEXT:    [[DOTCAPTURE_EXPR_:%.*]] = alloca i8, align 1
 // CHECK1-NEXT:    [[_TMP6:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[KERNEL_ARGS7:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK1-NEXT:    store i32 [[ARG]], ptr [[ARG_ADDR]], align 4
-// CHECK1-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK1-NEXT:    [[TMP0:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK1-NEXT:    store i32 2, ptr [[TMP0]], align 4
 // CHECK1-NEXT:    [[TMP1:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -1103,7 +1105,6 @@ int main() {
 // CHECK1-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiEiT__l62() #[[ATTR2]]
 // CHECK1-NEXT:    br label [[OMP_OFFLOAD_CONT]]
 // CHECK1:       omp_offload.cont:
-// CHECK1-NEXT:    [[KERNEL_ARGS2:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK1-NEXT:    [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS2]], i32 0, i32 0
 // CHECK1-NEXT:    store i32 2, ptr [[TMP15]], align 4
 // CHECK1-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS2]], i32 0, i32 1
@@ -1156,7 +1157,6 @@ int main() {
 // CHECK1-NEXT:    [[TOBOOL5:%.*]] = trunc i8 [[TMP38]] to i1
 // CHECK1-NEXT:    [[TMP39:%.*]] = select i1 [[TOBOOL5]], i32 0, i32 1
 // CHECK1-NEXT:    [[TMP40:%.*]] = insertvalue [3 x i32] zeroinitializer, i32 [[TMP39]], 0
-// CHECK1-NEXT:    [[KERNEL_ARGS7:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK1-NEXT:    [[TMP41:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS7]], i32 0, i32 0
 // CHECK1-NEXT:    store i32 2, ptr [[TMP41]], align 4
 // CHECK1-NEXT:    [[TMP42:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS7]], i32 0, i32 1

diff  --git a/clang/test/OpenMP/teams_distribute_parallel_for_lastprivate_codegen.cpp b/clang/test/OpenMP/teams_distribute_parallel_for_lastprivate_codegen.cpp
index 25d1bca100ed0..97060d6d62498 100644
--- a/clang/test/OpenMP/teams_distribute_parallel_for_lastprivate_codegen.cpp
+++ b/clang/test/OpenMP/teams_distribute_parallel_for_lastprivate_codegen.cpp
@@ -724,6 +724,7 @@ int main() {
 // CHECK9-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [5 x ptr], align 8
 // CHECK9-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [5 x ptr], align 8
 // CHECK9-NEXT:    [[_TMP1:%.*]] = alloca i32, align 4
+// CHECK9-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK9-NEXT:    [[I:%.*]] = alloca i32, align 4
 // CHECK9-NEXT:    store i32 0, ptr [[RETVAL]], align 4
 // CHECK9-NEXT:    store ptr [[G]], ptr [[G1]], align 8
@@ -778,7 +779,6 @@ int main() {
 // CHECK9-NEXT:    store ptr null, ptr [[TMP22]], align 8
 // CHECK9-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK9-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK9-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK9-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK9-NEXT:    store i32 2, ptr [[TMP25]], align 4
 // CHECK9-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -1195,6 +1195,7 @@ int main() {
 // CHECK9-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [4 x ptr], align 8
 // CHECK9-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [4 x ptr], align 8
 // CHECK9-NEXT:    [[_TMP1:%.*]] = alloca i32, align 4
+// CHECK9-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK9-NEXT:    call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[TEST]])
 // CHECK9-NEXT:    store i32 0, ptr [[T_VAR]], align 4
 // CHECK9-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VEC]], ptr align 4 @__const._Z5tmainIiET_v.vec, i64 8, i1 false)
@@ -1237,7 +1238,6 @@ int main() {
 // CHECK9-NEXT:    store ptr null, ptr [[TMP17]], align 8
 // CHECK9-NEXT:    [[TMP18:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK9-NEXT:    [[TMP19:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK9-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK9-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK9-NEXT:    store i32 2, ptr [[TMP20]], align 4
 // CHECK9-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -1717,6 +1717,7 @@ int main() {
 // CHECK11-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [5 x ptr], align 4
 // CHECK11-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [5 x ptr], align 4
 // CHECK11-NEXT:    [[_TMP1:%.*]] = alloca i32, align 4
+// CHECK11-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK11-NEXT:    [[I:%.*]] = alloca i32, align 4
 // CHECK11-NEXT:    store i32 0, ptr [[RETVAL]], align 4
 // CHECK11-NEXT:    store ptr [[G]], ptr [[G1]], align 4
@@ -1771,7 +1772,6 @@ int main() {
 // CHECK11-NEXT:    store ptr null, ptr [[TMP22]], align 4
 // CHECK11-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK11-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK11-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK11-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK11-NEXT:    store i32 2, ptr [[TMP25]], align 4
 // CHECK11-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -2182,6 +2182,7 @@ int main() {
 // CHECK11-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [4 x ptr], align 4
 // CHECK11-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [4 x ptr], align 4
 // CHECK11-NEXT:    [[_TMP1:%.*]] = alloca i32, align 4
+// CHECK11-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK11-NEXT:    call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[TEST]])
 // CHECK11-NEXT:    store i32 0, ptr [[T_VAR]], align 4
 // CHECK11-NEXT:    call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[VEC]], ptr align 4 @__const._Z5tmainIiET_v.vec, i32 8, i1 false)
@@ -2224,7 +2225,6 @@ int main() {
 // CHECK11-NEXT:    store ptr null, ptr [[TMP17]], align 4
 // CHECK11-NEXT:    [[TMP18:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK11-NEXT:    [[TMP19:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK11-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK11-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK11-NEXT:    store i32 2, ptr [[TMP20]], align 4
 // CHECK11-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1

diff  --git a/clang/test/OpenMP/teams_distribute_parallel_for_num_threads_codegen.cpp b/clang/test/OpenMP/teams_distribute_parallel_for_num_threads_codegen.cpp
index a36a096858c56..ca6f26a731e6c 100644
--- a/clang/test/OpenMP/teams_distribute_parallel_for_num_threads_codegen.cpp
+++ b/clang/test/OpenMP/teams_distribute_parallel_for_num_threads_codegen.cpp
@@ -82,19 +82,20 @@ int main() {
 // CHECK1-NEXT:    [[EXN_SLOT:%.*]] = alloca ptr, align 8
 // CHECK1-NEXT:    [[EHSELECTOR_SLOT:%.*]] = alloca i32, align 4
 // CHECK1-NEXT:    [[TMP:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK1-NEXT:    [[A_CASTED:%.*]] = alloca i64, align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [1 x ptr], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [1 x ptr], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [1 x ptr], align 8
 // CHECK1-NEXT:    [[DOTCAPTURE_EXPR_:%.*]] = alloca i8, align 1
 // CHECK1-NEXT:    [[_TMP1:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[KERNEL_ARGS2:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK1-NEXT:    store i32 0, ptr [[RETVAL]], align 4
 // CHECK1-NEXT:    call void @_ZN1SC1El(ptr noundef nonnull align 8 dereferenceable(24) [[S]], i64 noundef 0)
 // CHECK1-NEXT:    [[CALL:%.*]] = invoke noundef signext i8 @_ZN1ScvcEv(ptr noundef nonnull align 8 dereferenceable(24) [[S]])
 // CHECK1-NEXT:    to label [[INVOKE_CONT:%.*]] unwind label [[LPAD:%.*]]
 // CHECK1:       invoke.cont:
 // CHECK1-NEXT:    store i8 [[CALL]], ptr [[A]], align 1
-// CHECK1-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK1-NEXT:    [[TMP0:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK1-NEXT:    store i32 2, ptr [[TMP0]], align 4
 // CHECK1-NEXT:    [[TMP1:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -153,7 +154,6 @@ int main() {
 // CHECK1-NEXT:    [[TMP26:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1
 // CHECK1-NEXT:    [[TMP27:%.*]] = zext i8 [[TMP26]] to i32
 // CHECK1-NEXT:    [[TMP28:%.*]] = insertvalue [3 x i32] zeroinitializer, i32 [[TMP27]], 0
-// CHECK1-NEXT:    [[KERNEL_ARGS2:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK1-NEXT:    [[TMP29:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS2]], i32 0, i32 0
 // CHECK1-NEXT:    store i32 2, ptr [[TMP29]], align 4
 // CHECK1-NEXT:    [[TMP30:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS2]], i32 0, i32 1
@@ -552,8 +552,9 @@ int main() {
 // CHECK1-SAME: () #[[ATTR2]] comdat {
 // CHECK1-NEXT:  entry:
 // CHECK1-NEXT:    [[TMP:%.*]] = alloca i32, align 4
-// CHECK1-NEXT:    [[_TMP1:%.*]] = alloca i32, align 4
 // CHECK1-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
+// CHECK1-NEXT:    [[_TMP1:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[KERNEL_ARGS2:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK1-NEXT:    [[TMP0:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK1-NEXT:    store i32 2, ptr [[TMP0]], align 4
 // CHECK1-NEXT:    [[TMP1:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -587,7 +588,6 @@ int main() {
 // CHECK1-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIcLi5EEiv_l36() #[[ATTR6]]
 // CHECK1-NEXT:    br label [[OMP_OFFLOAD_CONT]]
 // CHECK1:       omp_offload.cont:
-// CHECK1-NEXT:    [[KERNEL_ARGS2:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK1-NEXT:    [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS2]], i32 0, i32 0
 // CHECK1-NEXT:    store i32 2, ptr [[TMP15]], align 4
 // CHECK1-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS2]], i32 0, i32 1
@@ -628,10 +628,11 @@ int main() {
 // CHECK1-SAME: () #[[ATTR2]] comdat personality ptr @__gxx_personality_v0 {
 // CHECK1-NEXT:  entry:
 // CHECK1-NEXT:    [[TMP:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK1-NEXT:    [[DOTCAPTURE_EXPR_:%.*]] = alloca i8, align 1
 // CHECK1-NEXT:    [[REF_TMP:%.*]] = alloca [[STRUCT_S:%.*]], align 8
 // CHECK1-NEXT:    [[_TMP1:%.*]] = alloca i32, align 4
-// CHECK1-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
+// CHECK1-NEXT:    [[KERNEL_ARGS2:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK1-NEXT:    [[TMP0:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK1-NEXT:    store i32 2, ptr [[TMP0]], align 4
 // CHECK1-NEXT:    [[TMP1:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -674,7 +675,6 @@ int main() {
 // CHECK1-NEXT:    [[TMP15:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1
 // CHECK1-NEXT:    [[TMP16:%.*]] = zext i8 [[TMP15]] to i32
 // CHECK1-NEXT:    [[TMP17:%.*]] = insertvalue [3 x i32] zeroinitializer, i32 [[TMP16]], 0
-// CHECK1-NEXT:    [[KERNEL_ARGS2:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK1-NEXT:    [[TMP18:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS2]], i32 0, i32 0
 // CHECK1-NEXT:    store i32 2, ptr [[TMP18]], align 4
 // CHECK1-NEXT:    [[TMP19:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS2]], i32 0, i32 1
@@ -1376,19 +1376,20 @@ int main() {
 // CHECK5-NEXT:    [[EXN_SLOT:%.*]] = alloca ptr, align 8
 // CHECK5-NEXT:    [[EHSELECTOR_SLOT:%.*]] = alloca i32, align 4
 // CHECK5-NEXT:    [[TMP:%.*]] = alloca i32, align 4
+// CHECK5-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK5-NEXT:    [[A_CASTED:%.*]] = alloca i64, align 8
 // CHECK5-NEXT:    [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [1 x ptr], align 8
 // CHECK5-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [1 x ptr], align 8
 // CHECK5-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [1 x ptr], align 8
 // CHECK5-NEXT:    [[DOTCAPTURE_EXPR_:%.*]] = alloca i8, align 1
 // CHECK5-NEXT:    [[_TMP1:%.*]] = alloca i32, align 4
+// CHECK5-NEXT:    [[KERNEL_ARGS2:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK5-NEXT:    store i32 0, ptr [[RETVAL]], align 4
 // CHECK5-NEXT:    call void @_ZN1SC1El(ptr noundef nonnull align 8 dereferenceable(24) [[S]], i64 noundef 0)
 // CHECK5-NEXT:    [[CALL:%.*]] = invoke noundef signext i8 @_ZN1ScvcEv(ptr noundef nonnull align 8 dereferenceable(24) [[S]])
 // CHECK5-NEXT:    to label [[INVOKE_CONT:%.*]] unwind label [[LPAD:%.*]]
 // CHECK5:       invoke.cont:
 // CHECK5-NEXT:    store i8 [[CALL]], ptr [[A]], align 1
-// CHECK5-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK5-NEXT:    [[TMP0:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK5-NEXT:    store i32 2, ptr [[TMP0]], align 4
 // CHECK5-NEXT:    [[TMP1:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -1447,7 +1448,6 @@ int main() {
 // CHECK5-NEXT:    [[TMP26:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1
 // CHECK5-NEXT:    [[TMP27:%.*]] = zext i8 [[TMP26]] to i32
 // CHECK5-NEXT:    [[TMP28:%.*]] = insertvalue [3 x i32] zeroinitializer, i32 [[TMP27]], 0
-// CHECK5-NEXT:    [[KERNEL_ARGS2:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK5-NEXT:    [[TMP29:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS2]], i32 0, i32 0
 // CHECK5-NEXT:    store i32 2, ptr [[TMP29]], align 4
 // CHECK5-NEXT:    [[TMP30:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS2]], i32 0, i32 1
@@ -1846,8 +1846,9 @@ int main() {
 // CHECK5-SAME: () #[[ATTR2]] comdat {
 // CHECK5-NEXT:  entry:
 // CHECK5-NEXT:    [[TMP:%.*]] = alloca i32, align 4
-// CHECK5-NEXT:    [[_TMP1:%.*]] = alloca i32, align 4
 // CHECK5-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
+// CHECK5-NEXT:    [[_TMP1:%.*]] = alloca i32, align 4
+// CHECK5-NEXT:    [[KERNEL_ARGS2:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK5-NEXT:    [[TMP0:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK5-NEXT:    store i32 2, ptr [[TMP0]], align 4
 // CHECK5-NEXT:    [[TMP1:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -1881,7 +1882,6 @@ int main() {
 // CHECK5-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIcLi5EEiv_l36() #[[ATTR6]]
 // CHECK5-NEXT:    br label [[OMP_OFFLOAD_CONT]]
 // CHECK5:       omp_offload.cont:
-// CHECK5-NEXT:    [[KERNEL_ARGS2:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK5-NEXT:    [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS2]], i32 0, i32 0
 // CHECK5-NEXT:    store i32 2, ptr [[TMP15]], align 4
 // CHECK5-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS2]], i32 0, i32 1
@@ -1922,10 +1922,11 @@ int main() {
 // CHECK5-SAME: () #[[ATTR2]] comdat personality ptr @__gxx_personality_v0 {
 // CHECK5-NEXT:  entry:
 // CHECK5-NEXT:    [[TMP:%.*]] = alloca i32, align 4
+// CHECK5-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK5-NEXT:    [[DOTCAPTURE_EXPR_:%.*]] = alloca i8, align 1
 // CHECK5-NEXT:    [[REF_TMP:%.*]] = alloca [[STRUCT_S:%.*]], align 8
 // CHECK5-NEXT:    [[_TMP1:%.*]] = alloca i32, align 4
-// CHECK5-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
+// CHECK5-NEXT:    [[KERNEL_ARGS2:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK5-NEXT:    [[TMP0:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK5-NEXT:    store i32 2, ptr [[TMP0]], align 4
 // CHECK5-NEXT:    [[TMP1:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -1968,7 +1969,6 @@ int main() {
 // CHECK5-NEXT:    [[TMP15:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1
 // CHECK5-NEXT:    [[TMP16:%.*]] = zext i8 [[TMP15]] to i32
 // CHECK5-NEXT:    [[TMP17:%.*]] = insertvalue [3 x i32] zeroinitializer, i32 [[TMP16]], 0
-// CHECK5-NEXT:    [[KERNEL_ARGS2:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK5-NEXT:    [[TMP18:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS2]], i32 0, i32 0
 // CHECK5-NEXT:    store i32 2, ptr [[TMP18]], align 4
 // CHECK5-NEXT:    [[TMP19:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS2]], i32 0, i32 1

diff  --git a/clang/test/OpenMP/teams_distribute_parallel_for_private_codegen.cpp b/clang/test/OpenMP/teams_distribute_parallel_for_private_codegen.cpp
index c3ba6bc777d8f..0b62934b5b5fa 100644
--- a/clang/test/OpenMP/teams_distribute_parallel_for_private_codegen.cpp
+++ b/clang/test/OpenMP/teams_distribute_parallel_for_private_codegen.cpp
@@ -260,8 +260,8 @@ int main() {
 // CHECK1-NEXT:  entry:
 // CHECK1-NEXT:    [[RETVAL:%.*]] = alloca i32, align 4
 // CHECK1-NEXT:    [[TMP:%.*]] = alloca i32, align 4
-// CHECK1-NEXT:    store i32 0, ptr [[RETVAL]], align 4
 // CHECK1-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
+// CHECK1-NEXT:    store i32 0, ptr [[RETVAL]], align 4
 // CHECK1-NEXT:    [[TMP0:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK1-NEXT:    store i32 2, ptr [[TMP0]], align 4
 // CHECK1-NEXT:    [[TMP1:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -522,6 +522,7 @@ int main() {
 // CHECK1-NEXT:    [[VAR:%.*]] = alloca ptr, align 8
 // CHECK1-NEXT:    [[TMP:%.*]] = alloca i32, align 4
 // CHECK1-NEXT:    [[_TMP1:%.*]] = alloca ptr, align 8
+// CHECK1-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK1-NEXT:    call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[TEST]])
 // CHECK1-NEXT:    store i32 0, ptr [[T_VAR]], align 4
 // CHECK1-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VEC]], ptr align 4 @__const._Z5tmainIiET_v.vec, i64 8, i1 false)
@@ -531,7 +532,6 @@ int main() {
 // CHECK1-NEXT:    call void @_ZN1SIiEC1Ei(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_ELEMENT]], i32 noundef signext 2)
 // CHECK1-NEXT:    store ptr [[TEST]], ptr [[VAR]], align 8
 // CHECK1-NEXT:    store ptr undef, ptr [[_TMP1]], align 8
-// CHECK1-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK1-NEXT:    [[TMP0:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK1-NEXT:    store i32 2, ptr [[TMP0]], align 4
 // CHECK1-NEXT:    [[TMP1:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -1000,8 +1000,8 @@ int main() {
 // CHECK3-NEXT:  entry:
 // CHECK3-NEXT:    [[RETVAL:%.*]] = alloca i32, align 4
 // CHECK3-NEXT:    [[TMP:%.*]] = alloca i32, align 4
-// CHECK3-NEXT:    store i32 0, ptr [[RETVAL]], align 4
 // CHECK3-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
+// CHECK3-NEXT:    store i32 0, ptr [[RETVAL]], align 4
 // CHECK3-NEXT:    [[TMP0:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK3-NEXT:    store i32 2, ptr [[TMP0]], align 4
 // CHECK3-NEXT:    [[TMP1:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -1256,6 +1256,7 @@ int main() {
 // CHECK3-NEXT:    [[VAR:%.*]] = alloca ptr, align 4
 // CHECK3-NEXT:    [[TMP:%.*]] = alloca i32, align 4
 // CHECK3-NEXT:    [[_TMP1:%.*]] = alloca ptr, align 4
+// CHECK3-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK3-NEXT:    call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[TEST]])
 // CHECK3-NEXT:    store i32 0, ptr [[T_VAR]], align 4
 // CHECK3-NEXT:    call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[VEC]], ptr align 4 @__const._Z5tmainIiET_v.vec, i32 8, i1 false)
@@ -1265,7 +1266,6 @@ int main() {
 // CHECK3-NEXT:    call void @_ZN1SIiEC1Ei(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_ELEMENT]], i32 noundef 2)
 // CHECK3-NEXT:    store ptr [[TEST]], ptr [[VAR]], align 4
 // CHECK3-NEXT:    store ptr undef, ptr [[_TMP1]], align 4
-// CHECK3-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK3-NEXT:    [[TMP0:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK3-NEXT:    store i32 2, ptr [[TMP0]], align 4
 // CHECK3-NEXT:    [[TMP1:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1

diff  --git a/clang/test/OpenMP/teams_distribute_parallel_for_proc_bind_codegen.cpp b/clang/test/OpenMP/teams_distribute_parallel_for_proc_bind_codegen.cpp
index 141adc5c8c9de..85dbe73382c7d 100644
--- a/clang/test/OpenMP/teams_distribute_parallel_for_proc_bind_codegen.cpp
+++ b/clang/test/OpenMP/teams_distribute_parallel_for_proc_bind_codegen.cpp
@@ -55,9 +55,10 @@ int main() {
 // CHECK1-NEXT:  entry:
 // CHECK1-NEXT:    [[RETVAL:%.*]] = alloca i32, align 4
 // CHECK1-NEXT:    [[TMP:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK1-NEXT:    [[_TMP1:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[KERNEL_ARGS2:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK1-NEXT:    store i32 0, ptr [[RETVAL]], align 4
-// CHECK1-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK1-NEXT:    [[TMP0:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK1-NEXT:    store i32 2, ptr [[TMP0]], align 4
 // CHECK1-NEXT:    [[TMP1:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -91,7 +92,6 @@ int main() {
 // CHECK1-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l36() #[[ATTR2:[0-9]+]]
 // CHECK1-NEXT:    br label [[OMP_OFFLOAD_CONT]]
 // CHECK1:       omp_offload.cont:
-// CHECK1-NEXT:    [[KERNEL_ARGS2:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK1-NEXT:    [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS2]], i32 0, i32 0
 // CHECK1-NEXT:    store i32 2, ptr [[TMP15]], align 4
 // CHECK1-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS2]], i32 0, i32 1

diff  --git a/clang/test/OpenMP/teams_distribute_parallel_for_reduction_codegen.cpp b/clang/test/OpenMP/teams_distribute_parallel_for_reduction_codegen.cpp
index de787727f3fa9..96fb0fef623a3 100644
--- a/clang/test/OpenMP/teams_distribute_parallel_for_reduction_codegen.cpp
+++ b/clang/test/OpenMP/teams_distribute_parallel_for_reduction_codegen.cpp
@@ -102,6 +102,7 @@ int main() {
 // CHECK1-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [1 x ptr], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [1 x ptr], align 8
 // CHECK1-NEXT:    [[TMP:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK1-NEXT:    store i32 0, ptr [[RETVAL]], align 4
 // CHECK1-NEXT:    [[TMP0:%.*]] = load i32, ptr @_ZZ4mainE5sivar, align 4
 // CHECK1-NEXT:    store i32 [[TMP0]], ptr [[SIVAR_CASTED]], align 4
@@ -114,7 +115,6 @@ int main() {
 // CHECK1-NEXT:    store ptr null, ptr [[TMP4]], align 8
 // CHECK1-NEXT:    [[TMP5:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK1-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK1-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK1-NEXT:    [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK1-NEXT:    store i32 2, ptr [[TMP7]], align 4
 // CHECK1-NEXT:    [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -395,6 +395,7 @@ int main() {
 // CHECK1-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [1 x ptr], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [1 x ptr], align 8
 // CHECK1-NEXT:    [[TMP:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK1-NEXT:    store i32 0, ptr [[T_VAR]], align 4
 // CHECK1-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VEC]], ptr align 4 @__const._Z5tmainIiET_v.vec, i64 8, i1 false)
 // CHECK1-NEXT:    [[TMP0:%.*]] = load i32, ptr [[T_VAR]], align 4
@@ -408,7 +409,6 @@ int main() {
 // CHECK1-NEXT:    store ptr null, ptr [[TMP4]], align 8
 // CHECK1-NEXT:    [[TMP5:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK1-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK1-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK1-NEXT:    [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK1-NEXT:    store i32 2, ptr [[TMP7]], align 4
 // CHECK1-NEXT:    [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -694,6 +694,7 @@ int main() {
 // CHECK3-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [1 x ptr], align 4
 // CHECK3-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [1 x ptr], align 4
 // CHECK3-NEXT:    [[TMP:%.*]] = alloca i32, align 4
+// CHECK3-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK3-NEXT:    store i32 0, ptr [[RETVAL]], align 4
 // CHECK3-NEXT:    [[TMP0:%.*]] = load i32, ptr @_ZZ4mainE5sivar, align 4
 // CHECK3-NEXT:    store i32 [[TMP0]], ptr [[SIVAR_CASTED]], align 4
@@ -706,7 +707,6 @@ int main() {
 // CHECK3-NEXT:    store ptr null, ptr [[TMP4]], align 4
 // CHECK3-NEXT:    [[TMP5:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK3-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK3-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK3-NEXT:    [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK3-NEXT:    store i32 2, ptr [[TMP7]], align 4
 // CHECK3-NEXT:    [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -983,6 +983,7 @@ int main() {
 // CHECK3-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [1 x ptr], align 4
 // CHECK3-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [1 x ptr], align 4
 // CHECK3-NEXT:    [[TMP:%.*]] = alloca i32, align 4
+// CHECK3-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK3-NEXT:    store i32 0, ptr [[T_VAR]], align 4
 // CHECK3-NEXT:    call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[VEC]], ptr align 4 @__const._Z5tmainIiET_v.vec, i32 8, i1 false)
 // CHECK3-NEXT:    [[TMP0:%.*]] = load i32, ptr [[T_VAR]], align 4
@@ -996,7 +997,6 @@ int main() {
 // CHECK3-NEXT:    store ptr null, ptr [[TMP4]], align 4
 // CHECK3-NEXT:    [[TMP5:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK3-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK3-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK3-NEXT:    [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK3-NEXT:    store i32 2, ptr [[TMP7]], align 4
 // CHECK3-NEXT:    [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1

diff  --git a/clang/test/OpenMP/teams_distribute_parallel_for_schedule_codegen.cpp b/clang/test/OpenMP/teams_distribute_parallel_for_schedule_codegen.cpp
index 0b8915954cff8..a5304673f42e2 100644
--- a/clang/test/OpenMP/teams_distribute_parallel_for_schedule_codegen.cpp
+++ b/clang/test/OpenMP/teams_distribute_parallel_for_schedule_codegen.cpp
@@ -224,22 +224,27 @@ int main (int argc, char **argv) {
 // CHECK1-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [1 x ptr], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [1 x ptr], align 8
 // CHECK1-NEXT:    [[TMP:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_BASEPTRS3:%.*]] = alloca [1 x ptr], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_PTRS4:%.*]] = alloca [1 x ptr], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_MAPPERS5:%.*]] = alloca [1 x ptr], align 8
 // CHECK1-NEXT:    [[_TMP6:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[KERNEL_ARGS7:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_BASEPTRS11:%.*]] = alloca [1 x ptr], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_PTRS12:%.*]] = alloca [1 x ptr], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_MAPPERS13:%.*]] = alloca [1 x ptr], align 8
 // CHECK1-NEXT:    [[_TMP14:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[KERNEL_ARGS15:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_BASEPTRS19:%.*]] = alloca [1 x ptr], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_PTRS20:%.*]] = alloca [1 x ptr], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_MAPPERS21:%.*]] = alloca [1 x ptr], align 8
 // CHECK1-NEXT:    [[_TMP22:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[KERNEL_ARGS23:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_BASEPTRS27:%.*]] = alloca [1 x ptr], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_PTRS28:%.*]] = alloca [1 x ptr], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_MAPPERS29:%.*]] = alloca [1 x ptr], align 8
 // CHECK1-NEXT:    [[_TMP30:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[KERNEL_ARGS31:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK1-NEXT:    store ptr [[THIS]], ptr [[THIS_ADDR]], align 8
 // CHECK1-NEXT:    [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8
 // CHECK1-NEXT:    [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[THIS1]], i32 0, i32 0
@@ -251,7 +256,6 @@ int main (int argc, char **argv) {
 // CHECK1-NEXT:    store ptr null, ptr [[TMP2]], align 8
 // CHECK1-NEXT:    [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK1-NEXT:    [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK1-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK1-NEXT:    [[TMP5:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK1-NEXT:    store i32 2, ptr [[TMP5]], align 4
 // CHECK1-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -294,7 +298,6 @@ int main (int argc, char **argv) {
 // CHECK1-NEXT:    store ptr null, ptr [[TMP22]], align 8
 // CHECK1-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS3]], i32 0, i32 0
 // CHECK1-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS4]], i32 0, i32 0
-// CHECK1-NEXT:    [[KERNEL_ARGS7:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK1-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS7]], i32 0, i32 0
 // CHECK1-NEXT:    store i32 2, ptr [[TMP25]], align 4
 // CHECK1-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS7]], i32 0, i32 1
@@ -337,7 +340,6 @@ int main (int argc, char **argv) {
 // CHECK1-NEXT:    store ptr null, ptr [[TMP42]], align 8
 // CHECK1-NEXT:    [[TMP43:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS11]], i32 0, i32 0
 // CHECK1-NEXT:    [[TMP44:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS12]], i32 0, i32 0
-// CHECK1-NEXT:    [[KERNEL_ARGS15:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK1-NEXT:    [[TMP45:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS15]], i32 0, i32 0
 // CHECK1-NEXT:    store i32 2, ptr [[TMP45]], align 4
 // CHECK1-NEXT:    [[TMP46:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS15]], i32 0, i32 1
@@ -380,7 +382,6 @@ int main (int argc, char **argv) {
 // CHECK1-NEXT:    store ptr null, ptr [[TMP62]], align 8
 // CHECK1-NEXT:    [[TMP63:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS19]], i32 0, i32 0
 // CHECK1-NEXT:    [[TMP64:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS20]], i32 0, i32 0
-// CHECK1-NEXT:    [[KERNEL_ARGS23:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK1-NEXT:    [[TMP65:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS23]], i32 0, i32 0
 // CHECK1-NEXT:    store i32 2, ptr [[TMP65]], align 4
 // CHECK1-NEXT:    [[TMP66:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS23]], i32 0, i32 1
@@ -423,7 +424,6 @@ int main (int argc, char **argv) {
 // CHECK1-NEXT:    store ptr null, ptr [[TMP82]], align 8
 // CHECK1-NEXT:    [[TMP83:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS27]], i32 0, i32 0
 // CHECK1-NEXT:    [[TMP84:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS28]], i32 0, i32 0
-// CHECK1-NEXT:    [[KERNEL_ARGS31:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK1-NEXT:    [[TMP85:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS31]], i32 0, i32 0
 // CHECK1-NEXT:    store i32 2, ptr [[TMP85]], align 4
 // CHECK1-NEXT:    [[TMP86:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS31]], i32 0, i32 1
@@ -1258,22 +1258,27 @@ int main (int argc, char **argv) {
 // CHECK3-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [1 x ptr], align 4
 // CHECK3-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [1 x ptr], align 4
 // CHECK3-NEXT:    [[TMP:%.*]] = alloca i32, align 4
+// CHECK3-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK3-NEXT:    [[DOTOFFLOAD_BASEPTRS3:%.*]] = alloca [1 x ptr], align 4
 // CHECK3-NEXT:    [[DOTOFFLOAD_PTRS4:%.*]] = alloca [1 x ptr], align 4
 // CHECK3-NEXT:    [[DOTOFFLOAD_MAPPERS5:%.*]] = alloca [1 x ptr], align 4
 // CHECK3-NEXT:    [[_TMP6:%.*]] = alloca i32, align 4
+// CHECK3-NEXT:    [[KERNEL_ARGS7:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK3-NEXT:    [[DOTOFFLOAD_BASEPTRS11:%.*]] = alloca [1 x ptr], align 4
 // CHECK3-NEXT:    [[DOTOFFLOAD_PTRS12:%.*]] = alloca [1 x ptr], align 4
 // CHECK3-NEXT:    [[DOTOFFLOAD_MAPPERS13:%.*]] = alloca [1 x ptr], align 4
 // CHECK3-NEXT:    [[_TMP14:%.*]] = alloca i32, align 4
+// CHECK3-NEXT:    [[KERNEL_ARGS15:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK3-NEXT:    [[DOTOFFLOAD_BASEPTRS19:%.*]] = alloca [1 x ptr], align 4
 // CHECK3-NEXT:    [[DOTOFFLOAD_PTRS20:%.*]] = alloca [1 x ptr], align 4
 // CHECK3-NEXT:    [[DOTOFFLOAD_MAPPERS21:%.*]] = alloca [1 x ptr], align 4
 // CHECK3-NEXT:    [[_TMP22:%.*]] = alloca i32, align 4
+// CHECK3-NEXT:    [[KERNEL_ARGS23:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK3-NEXT:    [[DOTOFFLOAD_BASEPTRS27:%.*]] = alloca [1 x ptr], align 4
 // CHECK3-NEXT:    [[DOTOFFLOAD_PTRS28:%.*]] = alloca [1 x ptr], align 4
 // CHECK3-NEXT:    [[DOTOFFLOAD_MAPPERS29:%.*]] = alloca [1 x ptr], align 4
 // CHECK3-NEXT:    [[_TMP30:%.*]] = alloca i32, align 4
+// CHECK3-NEXT:    [[KERNEL_ARGS31:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK3-NEXT:    store ptr [[THIS]], ptr [[THIS_ADDR]], align 4
 // CHECK3-NEXT:    [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4
 // CHECK3-NEXT:    [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[THIS1]], i32 0, i32 0
@@ -1285,7 +1290,6 @@ int main (int argc, char **argv) {
 // CHECK3-NEXT:    store ptr null, ptr [[TMP2]], align 4
 // CHECK3-NEXT:    [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK3-NEXT:    [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK3-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK3-NEXT:    [[TMP5:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK3-NEXT:    store i32 2, ptr [[TMP5]], align 4
 // CHECK3-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -1328,7 +1332,6 @@ int main (int argc, char **argv) {
 // CHECK3-NEXT:    store ptr null, ptr [[TMP22]], align 4
 // CHECK3-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS3]], i32 0, i32 0
 // CHECK3-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS4]], i32 0, i32 0
-// CHECK3-NEXT:    [[KERNEL_ARGS7:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK3-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS7]], i32 0, i32 0
 // CHECK3-NEXT:    store i32 2, ptr [[TMP25]], align 4
 // CHECK3-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS7]], i32 0, i32 1
@@ -1371,7 +1374,6 @@ int main (int argc, char **argv) {
 // CHECK3-NEXT:    store ptr null, ptr [[TMP42]], align 4
 // CHECK3-NEXT:    [[TMP43:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS11]], i32 0, i32 0
 // CHECK3-NEXT:    [[TMP44:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS12]], i32 0, i32 0
-// CHECK3-NEXT:    [[KERNEL_ARGS15:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK3-NEXT:    [[TMP45:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS15]], i32 0, i32 0
 // CHECK3-NEXT:    store i32 2, ptr [[TMP45]], align 4
 // CHECK3-NEXT:    [[TMP46:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS15]], i32 0, i32 1
@@ -1414,7 +1416,6 @@ int main (int argc, char **argv) {
 // CHECK3-NEXT:    store ptr null, ptr [[TMP62]], align 4
 // CHECK3-NEXT:    [[TMP63:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS19]], i32 0, i32 0
 // CHECK3-NEXT:    [[TMP64:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS20]], i32 0, i32 0
-// CHECK3-NEXT:    [[KERNEL_ARGS23:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK3-NEXT:    [[TMP65:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS23]], i32 0, i32 0
 // CHECK3-NEXT:    store i32 2, ptr [[TMP65]], align 4
 // CHECK3-NEXT:    [[TMP66:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS23]], i32 0, i32 1
@@ -1457,7 +1458,6 @@ int main (int argc, char **argv) {
 // CHECK3-NEXT:    store ptr null, ptr [[TMP82]], align 4
 // CHECK3-NEXT:    [[TMP83:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS27]], i32 0, i32 0
 // CHECK3-NEXT:    [[TMP84:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS28]], i32 0, i32 0
-// CHECK3-NEXT:    [[KERNEL_ARGS31:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK3-NEXT:    [[TMP85:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS31]], i32 0, i32 0
 // CHECK3-NEXT:    store i32 2, ptr [[TMP85]], align 4
 // CHECK3-NEXT:    [[TMP86:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS31]], i32 0, i32 1
@@ -2265,22 +2265,27 @@ int main (int argc, char **argv) {
 // CHECK5-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [1 x ptr], align 8
 // CHECK5-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [1 x ptr], align 8
 // CHECK5-NEXT:    [[TMP:%.*]] = alloca i32, align 4
+// CHECK5-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK5-NEXT:    [[DOTOFFLOAD_BASEPTRS3:%.*]] = alloca [1 x ptr], align 8
 // CHECK5-NEXT:    [[DOTOFFLOAD_PTRS4:%.*]] = alloca [1 x ptr], align 8
 // CHECK5-NEXT:    [[DOTOFFLOAD_MAPPERS5:%.*]] = alloca [1 x ptr], align 8
 // CHECK5-NEXT:    [[_TMP6:%.*]] = alloca i32, align 4
+// CHECK5-NEXT:    [[KERNEL_ARGS7:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK5-NEXT:    [[DOTOFFLOAD_BASEPTRS11:%.*]] = alloca [1 x ptr], align 8
 // CHECK5-NEXT:    [[DOTOFFLOAD_PTRS12:%.*]] = alloca [1 x ptr], align 8
 // CHECK5-NEXT:    [[DOTOFFLOAD_MAPPERS13:%.*]] = alloca [1 x ptr], align 8
 // CHECK5-NEXT:    [[_TMP14:%.*]] = alloca i32, align 4
+// CHECK5-NEXT:    [[KERNEL_ARGS15:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK5-NEXT:    [[DOTOFFLOAD_BASEPTRS19:%.*]] = alloca [1 x ptr], align 8
 // CHECK5-NEXT:    [[DOTOFFLOAD_PTRS20:%.*]] = alloca [1 x ptr], align 8
 // CHECK5-NEXT:    [[DOTOFFLOAD_MAPPERS21:%.*]] = alloca [1 x ptr], align 8
 // CHECK5-NEXT:    [[_TMP22:%.*]] = alloca i32, align 4
+// CHECK5-NEXT:    [[KERNEL_ARGS23:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK5-NEXT:    [[DOTOFFLOAD_BASEPTRS27:%.*]] = alloca [1 x ptr], align 8
 // CHECK5-NEXT:    [[DOTOFFLOAD_PTRS28:%.*]] = alloca [1 x ptr], align 8
 // CHECK5-NEXT:    [[DOTOFFLOAD_MAPPERS29:%.*]] = alloca [1 x ptr], align 8
 // CHECK5-NEXT:    [[_TMP30:%.*]] = alloca i32, align 4
+// CHECK5-NEXT:    [[KERNEL_ARGS31:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK5-NEXT:    store ptr [[THIS]], ptr [[THIS_ADDR]], align 8
 // CHECK5-NEXT:    [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8
 // CHECK5-NEXT:    [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[THIS1]], i32 0, i32 0
@@ -2292,7 +2297,6 @@ int main (int argc, char **argv) {
 // CHECK5-NEXT:    store ptr null, ptr [[TMP2]], align 8
 // CHECK5-NEXT:    [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK5-NEXT:    [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK5-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK5-NEXT:    [[TMP5:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK5-NEXT:    store i32 2, ptr [[TMP5]], align 4
 // CHECK5-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -2335,7 +2339,6 @@ int main (int argc, char **argv) {
 // CHECK5-NEXT:    store ptr null, ptr [[TMP22]], align 8
 // CHECK5-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS3]], i32 0, i32 0
 // CHECK5-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS4]], i32 0, i32 0
-// CHECK5-NEXT:    [[KERNEL_ARGS7:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK5-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS7]], i32 0, i32 0
 // CHECK5-NEXT:    store i32 2, ptr [[TMP25]], align 4
 // CHECK5-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS7]], i32 0, i32 1
@@ -2378,7 +2381,6 @@ int main (int argc, char **argv) {
 // CHECK5-NEXT:    store ptr null, ptr [[TMP42]], align 8
 // CHECK5-NEXT:    [[TMP43:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS11]], i32 0, i32 0
 // CHECK5-NEXT:    [[TMP44:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS12]], i32 0, i32 0
-// CHECK5-NEXT:    [[KERNEL_ARGS15:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK5-NEXT:    [[TMP45:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS15]], i32 0, i32 0
 // CHECK5-NEXT:    store i32 2, ptr [[TMP45]], align 4
 // CHECK5-NEXT:    [[TMP46:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS15]], i32 0, i32 1
@@ -2421,7 +2423,6 @@ int main (int argc, char **argv) {
 // CHECK5-NEXT:    store ptr null, ptr [[TMP62]], align 8
 // CHECK5-NEXT:    [[TMP63:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS19]], i32 0, i32 0
 // CHECK5-NEXT:    [[TMP64:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS20]], i32 0, i32 0
-// CHECK5-NEXT:    [[KERNEL_ARGS23:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK5-NEXT:    [[TMP65:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS23]], i32 0, i32 0
 // CHECK5-NEXT:    store i32 2, ptr [[TMP65]], align 4
 // CHECK5-NEXT:    [[TMP66:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS23]], i32 0, i32 1
@@ -2464,7 +2465,6 @@ int main (int argc, char **argv) {
 // CHECK5-NEXT:    store ptr null, ptr [[TMP82]], align 8
 // CHECK5-NEXT:    [[TMP83:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS27]], i32 0, i32 0
 // CHECK5-NEXT:    [[TMP84:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS28]], i32 0, i32 0
-// CHECK5-NEXT:    [[KERNEL_ARGS31:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK5-NEXT:    [[TMP85:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS31]], i32 0, i32 0
 // CHECK5-NEXT:    store i32 2, ptr [[TMP85]], align 4
 // CHECK5-NEXT:    [[TMP86:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS31]], i32 0, i32 1
@@ -3299,22 +3299,27 @@ int main (int argc, char **argv) {
 // CHECK7-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [1 x ptr], align 4
 // CHECK7-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [1 x ptr], align 4
 // CHECK7-NEXT:    [[TMP:%.*]] = alloca i32, align 4
+// CHECK7-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK7-NEXT:    [[DOTOFFLOAD_BASEPTRS3:%.*]] = alloca [1 x ptr], align 4
 // CHECK7-NEXT:    [[DOTOFFLOAD_PTRS4:%.*]] = alloca [1 x ptr], align 4
 // CHECK7-NEXT:    [[DOTOFFLOAD_MAPPERS5:%.*]] = alloca [1 x ptr], align 4
 // CHECK7-NEXT:    [[_TMP6:%.*]] = alloca i32, align 4
+// CHECK7-NEXT:    [[KERNEL_ARGS7:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK7-NEXT:    [[DOTOFFLOAD_BASEPTRS11:%.*]] = alloca [1 x ptr], align 4
 // CHECK7-NEXT:    [[DOTOFFLOAD_PTRS12:%.*]] = alloca [1 x ptr], align 4
 // CHECK7-NEXT:    [[DOTOFFLOAD_MAPPERS13:%.*]] = alloca [1 x ptr], align 4
 // CHECK7-NEXT:    [[_TMP14:%.*]] = alloca i32, align 4
+// CHECK7-NEXT:    [[KERNEL_ARGS15:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK7-NEXT:    [[DOTOFFLOAD_BASEPTRS19:%.*]] = alloca [1 x ptr], align 4
 // CHECK7-NEXT:    [[DOTOFFLOAD_PTRS20:%.*]] = alloca [1 x ptr], align 4
 // CHECK7-NEXT:    [[DOTOFFLOAD_MAPPERS21:%.*]] = alloca [1 x ptr], align 4
 // CHECK7-NEXT:    [[_TMP22:%.*]] = alloca i32, align 4
+// CHECK7-NEXT:    [[KERNEL_ARGS23:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK7-NEXT:    [[DOTOFFLOAD_BASEPTRS27:%.*]] = alloca [1 x ptr], align 4
 // CHECK7-NEXT:    [[DOTOFFLOAD_PTRS28:%.*]] = alloca [1 x ptr], align 4
 // CHECK7-NEXT:    [[DOTOFFLOAD_MAPPERS29:%.*]] = alloca [1 x ptr], align 4
 // CHECK7-NEXT:    [[_TMP30:%.*]] = alloca i32, align 4
+// CHECK7-NEXT:    [[KERNEL_ARGS31:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK7-NEXT:    store ptr [[THIS]], ptr [[THIS_ADDR]], align 4
 // CHECK7-NEXT:    [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4
 // CHECK7-NEXT:    [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[THIS1]], i32 0, i32 0
@@ -3326,7 +3331,6 @@ int main (int argc, char **argv) {
 // CHECK7-NEXT:    store ptr null, ptr [[TMP2]], align 4
 // CHECK7-NEXT:    [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK7-NEXT:    [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK7-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK7-NEXT:    [[TMP5:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK7-NEXT:    store i32 2, ptr [[TMP5]], align 4
 // CHECK7-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -3369,7 +3373,6 @@ int main (int argc, char **argv) {
 // CHECK7-NEXT:    store ptr null, ptr [[TMP22]], align 4
 // CHECK7-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS3]], i32 0, i32 0
 // CHECK7-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS4]], i32 0, i32 0
-// CHECK7-NEXT:    [[KERNEL_ARGS7:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK7-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS7]], i32 0, i32 0
 // CHECK7-NEXT:    store i32 2, ptr [[TMP25]], align 4
 // CHECK7-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS7]], i32 0, i32 1
@@ -3412,7 +3415,6 @@ int main (int argc, char **argv) {
 // CHECK7-NEXT:    store ptr null, ptr [[TMP42]], align 4
 // CHECK7-NEXT:    [[TMP43:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS11]], i32 0, i32 0
 // CHECK7-NEXT:    [[TMP44:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS12]], i32 0, i32 0
-// CHECK7-NEXT:    [[KERNEL_ARGS15:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK7-NEXT:    [[TMP45:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS15]], i32 0, i32 0
 // CHECK7-NEXT:    store i32 2, ptr [[TMP45]], align 4
 // CHECK7-NEXT:    [[TMP46:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS15]], i32 0, i32 1
@@ -3455,7 +3457,6 @@ int main (int argc, char **argv) {
 // CHECK7-NEXT:    store ptr null, ptr [[TMP62]], align 4
 // CHECK7-NEXT:    [[TMP63:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS19]], i32 0, i32 0
 // CHECK7-NEXT:    [[TMP64:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS20]], i32 0, i32 0
-// CHECK7-NEXT:    [[KERNEL_ARGS23:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK7-NEXT:    [[TMP65:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS23]], i32 0, i32 0
 // CHECK7-NEXT:    store i32 2, ptr [[TMP65]], align 4
 // CHECK7-NEXT:    [[TMP66:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS23]], i32 0, i32 1
@@ -3498,7 +3499,6 @@ int main (int argc, char **argv) {
 // CHECK7-NEXT:    store ptr null, ptr [[TMP82]], align 4
 // CHECK7-NEXT:    [[TMP83:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS27]], i32 0, i32 0
 // CHECK7-NEXT:    [[TMP84:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS28]], i32 0, i32 0
-// CHECK7-NEXT:    [[KERNEL_ARGS31:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK7-NEXT:    [[TMP85:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS31]], i32 0, i32 0
 // CHECK7-NEXT:    store i32 2, ptr [[TMP85]], align 4
 // CHECK7-NEXT:    [[TMP86:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS31]], i32 0, i32 1
@@ -4308,6 +4308,7 @@ int main (int argc, char **argv) {
 // CHECK13-NEXT:    [[TMP:%.*]] = alloca i32, align 4
 // CHECK13-NEXT:    [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
 // CHECK13-NEXT:    [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4
+// CHECK13-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK13-NEXT:    [[N_CASTED3:%.*]] = alloca i64, align 8
 // CHECK13-NEXT:    [[DOTOFFLOAD_BASEPTRS4:%.*]] = alloca [3 x ptr], align 8
 // CHECK13-NEXT:    [[DOTOFFLOAD_PTRS5:%.*]] = alloca [3 x ptr], align 8
@@ -4316,6 +4317,7 @@ int main (int argc, char **argv) {
 // CHECK13-NEXT:    [[_TMP8:%.*]] = alloca i32, align 4
 // CHECK13-NEXT:    [[DOTCAPTURE_EXPR_9:%.*]] = alloca i32, align 4
 // CHECK13-NEXT:    [[DOTCAPTURE_EXPR_10:%.*]] = alloca i32, align 4
+// CHECK13-NEXT:    [[KERNEL_ARGS15:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK13-NEXT:    [[M_CASTED:%.*]] = alloca i64, align 8
 // CHECK13-NEXT:    [[N_CASTED18:%.*]] = alloca i64, align 8
 // CHECK13-NEXT:    [[DOTOFFLOAD_BASEPTRS19:%.*]] = alloca [4 x ptr], align 8
@@ -4325,6 +4327,7 @@ int main (int argc, char **argv) {
 // CHECK13-NEXT:    [[_TMP23:%.*]] = alloca i32, align 4
 // CHECK13-NEXT:    [[DOTCAPTURE_EXPR_24:%.*]] = alloca i32, align 4
 // CHECK13-NEXT:    [[DOTCAPTURE_EXPR_25:%.*]] = alloca i32, align 4
+// CHECK13-NEXT:    [[KERNEL_ARGS30:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK13-NEXT:    [[N_CASTED33:%.*]] = alloca i64, align 8
 // CHECK13-NEXT:    [[DOTOFFLOAD_BASEPTRS34:%.*]] = alloca [3 x ptr], align 8
 // CHECK13-NEXT:    [[DOTOFFLOAD_PTRS35:%.*]] = alloca [3 x ptr], align 8
@@ -4333,6 +4336,7 @@ int main (int argc, char **argv) {
 // CHECK13-NEXT:    [[_TMP38:%.*]] = alloca i32, align 4
 // CHECK13-NEXT:    [[DOTCAPTURE_EXPR_39:%.*]] = alloca i32, align 4
 // CHECK13-NEXT:    [[DOTCAPTURE_EXPR_40:%.*]] = alloca i32, align 4
+// CHECK13-NEXT:    [[KERNEL_ARGS45:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK13-NEXT:    [[M_CASTED48:%.*]] = alloca i64, align 8
 // CHECK13-NEXT:    [[N_CASTED49:%.*]] = alloca i64, align 8
 // CHECK13-NEXT:    [[DOTOFFLOAD_BASEPTRS50:%.*]] = alloca [4 x ptr], align 8
@@ -4342,6 +4346,7 @@ int main (int argc, char **argv) {
 // CHECK13-NEXT:    [[_TMP54:%.*]] = alloca i32, align 4
 // CHECK13-NEXT:    [[DOTCAPTURE_EXPR_55:%.*]] = alloca i32, align 4
 // CHECK13-NEXT:    [[DOTCAPTURE_EXPR_56:%.*]] = alloca i32, align 4
+// CHECK13-NEXT:    [[KERNEL_ARGS61:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK13-NEXT:    store i32 0, ptr [[RETVAL]], align 4
 // CHECK13-NEXT:    store i32 [[ARGC]], ptr [[ARGC_ADDR]], align 4
 // CHECK13-NEXT:    store ptr [[ARGV]], ptr [[ARGV_ADDR]], align 8
@@ -4391,7 +4396,6 @@ int main (int argc, char **argv) {
 // CHECK13-NEXT:    [[TMP21:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
 // CHECK13-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP21]], 1
 // CHECK13-NEXT:    [[TMP22:%.*]] = zext i32 [[ADD]] to i64
-// CHECK13-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK13-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK13-NEXT:    store i32 2, ptr [[TMP23]], align 4
 // CHECK13-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -4463,7 +4467,6 @@ int main (int argc, char **argv) {
 // CHECK13-NEXT:    [[TMP56:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_10]], align 4
 // CHECK13-NEXT:    [[ADD14:%.*]] = add nsw i32 [[TMP56]], 1
 // CHECK13-NEXT:    [[TMP57:%.*]] = zext i32 [[ADD14]] to i64
-// CHECK13-NEXT:    [[KERNEL_ARGS15:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK13-NEXT:    [[TMP58:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS15]], i32 0, i32 0
 // CHECK13-NEXT:    store i32 2, ptr [[TMP58]], align 4
 // CHECK13-NEXT:    [[TMP59:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS15]], i32 0, i32 1
@@ -4544,7 +4547,6 @@ int main (int argc, char **argv) {
 // CHECK13-NEXT:    [[TMP96:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_25]], align 4
 // CHECK13-NEXT:    [[ADD29:%.*]] = add nsw i32 [[TMP96]], 1
 // CHECK13-NEXT:    [[TMP97:%.*]] = zext i32 [[ADD29]] to i64
-// CHECK13-NEXT:    [[KERNEL_ARGS30:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK13-NEXT:    [[TMP98:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS30]], i32 0, i32 0
 // CHECK13-NEXT:    store i32 2, ptr [[TMP98]], align 4
 // CHECK13-NEXT:    [[TMP99:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS30]], i32 0, i32 1
@@ -4616,7 +4618,6 @@ int main (int argc, char **argv) {
 // CHECK13-NEXT:    [[TMP131:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_40]], align 4
 // CHECK13-NEXT:    [[ADD44:%.*]] = add nsw i32 [[TMP131]], 1
 // CHECK13-NEXT:    [[TMP132:%.*]] = zext i32 [[ADD44]] to i64
-// CHECK13-NEXT:    [[KERNEL_ARGS45:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK13-NEXT:    [[TMP133:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS45]], i32 0, i32 0
 // CHECK13-NEXT:    store i32 2, ptr [[TMP133]], align 4
 // CHECK13-NEXT:    [[TMP134:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS45]], i32 0, i32 1
@@ -4697,7 +4698,6 @@ int main (int argc, char **argv) {
 // CHECK13-NEXT:    [[TMP171:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_56]], align 4
 // CHECK13-NEXT:    [[ADD60:%.*]] = add nsw i32 [[TMP171]], 1
 // CHECK13-NEXT:    [[TMP172:%.*]] = zext i32 [[ADD60]] to i64
-// CHECK13-NEXT:    [[KERNEL_ARGS61:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK13-NEXT:    [[TMP173:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS61]], i32 0, i32 0
 // CHECK13-NEXT:    store i32 2, ptr [[TMP173]], align 4
 // CHECK13-NEXT:    [[TMP174:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS61]], i32 0, i32 1
@@ -5857,24 +5857,29 @@ int main (int argc, char **argv) {
 // CHECK13-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [1 x ptr], align 8
 // CHECK13-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [1 x ptr], align 8
 // CHECK13-NEXT:    [[TMP:%.*]] = alloca i32, align 4
+// CHECK13-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK13-NEXT:    [[DOTOFFLOAD_BASEPTRS1:%.*]] = alloca [1 x ptr], align 8
 // CHECK13-NEXT:    [[DOTOFFLOAD_PTRS2:%.*]] = alloca [1 x ptr], align 8
 // CHECK13-NEXT:    [[DOTOFFLOAD_MAPPERS3:%.*]] = alloca [1 x ptr], align 8
 // CHECK13-NEXT:    [[_TMP4:%.*]] = alloca i32, align 4
+// CHECK13-NEXT:    [[KERNEL_ARGS5:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK13-NEXT:    [[M_CASTED:%.*]] = alloca i64, align 8
 // CHECK13-NEXT:    [[DOTOFFLOAD_BASEPTRS8:%.*]] = alloca [2 x ptr], align 8
 // CHECK13-NEXT:    [[DOTOFFLOAD_PTRS9:%.*]] = alloca [2 x ptr], align 8
 // CHECK13-NEXT:    [[DOTOFFLOAD_MAPPERS10:%.*]] = alloca [2 x ptr], align 8
 // CHECK13-NEXT:    [[_TMP11:%.*]] = alloca i32, align 4
+// CHECK13-NEXT:    [[KERNEL_ARGS12:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK13-NEXT:    [[DOTOFFLOAD_BASEPTRS15:%.*]] = alloca [1 x ptr], align 8
 // CHECK13-NEXT:    [[DOTOFFLOAD_PTRS16:%.*]] = alloca [1 x ptr], align 8
 // CHECK13-NEXT:    [[DOTOFFLOAD_MAPPERS17:%.*]] = alloca [1 x ptr], align 8
 // CHECK13-NEXT:    [[_TMP18:%.*]] = alloca i32, align 4
+// CHECK13-NEXT:    [[KERNEL_ARGS19:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK13-NEXT:    [[M_CASTED22:%.*]] = alloca i64, align 8
 // CHECK13-NEXT:    [[DOTOFFLOAD_BASEPTRS23:%.*]] = alloca [2 x ptr], align 8
 // CHECK13-NEXT:    [[DOTOFFLOAD_PTRS24:%.*]] = alloca [2 x ptr], align 8
 // CHECK13-NEXT:    [[DOTOFFLOAD_MAPPERS25:%.*]] = alloca [2 x ptr], align 8
 // CHECK13-NEXT:    [[_TMP26:%.*]] = alloca i32, align 4
+// CHECK13-NEXT:    [[KERNEL_ARGS27:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK13-NEXT:    store i32 [[ARGC]], ptr [[ARGC_ADDR]], align 4
 // CHECK13-NEXT:    store i32 10, ptr [[M]], align 4
 // CHECK13-NEXT:    [[TMP0:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
@@ -5885,7 +5890,6 @@ int main (int argc, char **argv) {
 // CHECK13-NEXT:    store ptr null, ptr [[TMP2]], align 8
 // CHECK13-NEXT:    [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK13-NEXT:    [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK13-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK13-NEXT:    [[TMP5:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK13-NEXT:    store i32 2, ptr [[TMP5]], align 4
 // CHECK13-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -5927,7 +5931,6 @@ int main (int argc, char **argv) {
 // CHECK13-NEXT:    store ptr null, ptr [[TMP22]], align 8
 // CHECK13-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS1]], i32 0, i32 0
 // CHECK13-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS2]], i32 0, i32 0
-// CHECK13-NEXT:    [[KERNEL_ARGS5:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK13-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 0
 // CHECK13-NEXT:    store i32 2, ptr [[TMP25]], align 4
 // CHECK13-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 1
@@ -5978,7 +5981,6 @@ int main (int argc, char **argv) {
 // CHECK13-NEXT:    store ptr null, ptr [[TMP47]], align 8
 // CHECK13-NEXT:    [[TMP48:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS8]], i32 0, i32 0
 // CHECK13-NEXT:    [[TMP49:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS9]], i32 0, i32 0
-// CHECK13-NEXT:    [[KERNEL_ARGS12:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK13-NEXT:    [[TMP50:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS12]], i32 0, i32 0
 // CHECK13-NEXT:    store i32 2, ptr [[TMP50]], align 4
 // CHECK13-NEXT:    [[TMP51:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS12]], i32 0, i32 1
@@ -6020,7 +6022,6 @@ int main (int argc, char **argv) {
 // CHECK13-NEXT:    store ptr null, ptr [[TMP67]], align 8
 // CHECK13-NEXT:    [[TMP68:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS15]], i32 0, i32 0
 // CHECK13-NEXT:    [[TMP69:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS16]], i32 0, i32 0
-// CHECK13-NEXT:    [[KERNEL_ARGS19:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK13-NEXT:    [[TMP70:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS19]], i32 0, i32 0
 // CHECK13-NEXT:    store i32 2, ptr [[TMP70]], align 4
 // CHECK13-NEXT:    [[TMP71:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS19]], i32 0, i32 1
@@ -6071,7 +6072,6 @@ int main (int argc, char **argv) {
 // CHECK13-NEXT:    store ptr null, ptr [[TMP92]], align 8
 // CHECK13-NEXT:    [[TMP93:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS23]], i32 0, i32 0
 // CHECK13-NEXT:    [[TMP94:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS24]], i32 0, i32 0
-// CHECK13-NEXT:    [[KERNEL_ARGS27:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK13-NEXT:    [[TMP95:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS27]], i32 0, i32 0
 // CHECK13-NEXT:    store i32 2, ptr [[TMP95]], align 4
 // CHECK13-NEXT:    [[TMP96:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS27]], i32 0, i32 1
@@ -6936,6 +6936,7 @@ int main (int argc, char **argv) {
 // CHECK15-NEXT:    [[TMP:%.*]] = alloca i32, align 4
 // CHECK15-NEXT:    [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
 // CHECK15-NEXT:    [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4
+// CHECK15-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK15-NEXT:    [[N_CASTED3:%.*]] = alloca i32, align 4
 // CHECK15-NEXT:    [[DOTOFFLOAD_BASEPTRS4:%.*]] = alloca [3 x ptr], align 4
 // CHECK15-NEXT:    [[DOTOFFLOAD_PTRS5:%.*]] = alloca [3 x ptr], align 4
@@ -6944,6 +6945,7 @@ int main (int argc, char **argv) {
 // CHECK15-NEXT:    [[_TMP8:%.*]] = alloca i32, align 4
 // CHECK15-NEXT:    [[DOTCAPTURE_EXPR_9:%.*]] = alloca i32, align 4
 // CHECK15-NEXT:    [[DOTCAPTURE_EXPR_10:%.*]] = alloca i32, align 4
+// CHECK15-NEXT:    [[KERNEL_ARGS15:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK15-NEXT:    [[M_CASTED:%.*]] = alloca i32, align 4
 // CHECK15-NEXT:    [[N_CASTED18:%.*]] = alloca i32, align 4
 // CHECK15-NEXT:    [[DOTOFFLOAD_BASEPTRS19:%.*]] = alloca [4 x ptr], align 4
@@ -6953,6 +6955,7 @@ int main (int argc, char **argv) {
 // CHECK15-NEXT:    [[_TMP23:%.*]] = alloca i32, align 4
 // CHECK15-NEXT:    [[DOTCAPTURE_EXPR_24:%.*]] = alloca i32, align 4
 // CHECK15-NEXT:    [[DOTCAPTURE_EXPR_25:%.*]] = alloca i32, align 4
+// CHECK15-NEXT:    [[KERNEL_ARGS30:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK15-NEXT:    [[N_CASTED33:%.*]] = alloca i32, align 4
 // CHECK15-NEXT:    [[DOTOFFLOAD_BASEPTRS34:%.*]] = alloca [3 x ptr], align 4
 // CHECK15-NEXT:    [[DOTOFFLOAD_PTRS35:%.*]] = alloca [3 x ptr], align 4
@@ -6961,6 +6964,7 @@ int main (int argc, char **argv) {
 // CHECK15-NEXT:    [[_TMP38:%.*]] = alloca i32, align 4
 // CHECK15-NEXT:    [[DOTCAPTURE_EXPR_39:%.*]] = alloca i32, align 4
 // CHECK15-NEXT:    [[DOTCAPTURE_EXPR_40:%.*]] = alloca i32, align 4
+// CHECK15-NEXT:    [[KERNEL_ARGS45:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK15-NEXT:    [[M_CASTED48:%.*]] = alloca i32, align 4
 // CHECK15-NEXT:    [[N_CASTED49:%.*]] = alloca i32, align 4
 // CHECK15-NEXT:    [[DOTOFFLOAD_BASEPTRS50:%.*]] = alloca [4 x ptr], align 4
@@ -6970,6 +6974,7 @@ int main (int argc, char **argv) {
 // CHECK15-NEXT:    [[_TMP54:%.*]] = alloca i32, align 4
 // CHECK15-NEXT:    [[DOTCAPTURE_EXPR_55:%.*]] = alloca i32, align 4
 // CHECK15-NEXT:    [[DOTCAPTURE_EXPR_56:%.*]] = alloca i32, align 4
+// CHECK15-NEXT:    [[KERNEL_ARGS61:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK15-NEXT:    store i32 0, ptr [[RETVAL]], align 4
 // CHECK15-NEXT:    store i32 [[ARGC]], ptr [[ARGC_ADDR]], align 4
 // CHECK15-NEXT:    store ptr [[ARGV]], ptr [[ARGV_ADDR]], align 4
@@ -7019,7 +7024,6 @@ int main (int argc, char **argv) {
 // CHECK15-NEXT:    [[TMP21:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
 // CHECK15-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP21]], 1
 // CHECK15-NEXT:    [[TMP22:%.*]] = zext i32 [[ADD]] to i64
-// CHECK15-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK15-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK15-NEXT:    store i32 2, ptr [[TMP23]], align 4
 // CHECK15-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -7092,7 +7096,6 @@ int main (int argc, char **argv) {
 // CHECK15-NEXT:    [[TMP57:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_10]], align 4
 // CHECK15-NEXT:    [[ADD14:%.*]] = add nsw i32 [[TMP57]], 1
 // CHECK15-NEXT:    [[TMP58:%.*]] = zext i32 [[ADD14]] to i64
-// CHECK15-NEXT:    [[KERNEL_ARGS15:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK15-NEXT:    [[TMP59:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS15]], i32 0, i32 0
 // CHECK15-NEXT:    store i32 2, ptr [[TMP59]], align 4
 // CHECK15-NEXT:    [[TMP60:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS15]], i32 0, i32 1
@@ -7174,7 +7177,6 @@ int main (int argc, char **argv) {
 // CHECK15-NEXT:    [[TMP98:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_25]], align 4
 // CHECK15-NEXT:    [[ADD29:%.*]] = add nsw i32 [[TMP98]], 1
 // CHECK15-NEXT:    [[TMP99:%.*]] = zext i32 [[ADD29]] to i64
-// CHECK15-NEXT:    [[KERNEL_ARGS30:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK15-NEXT:    [[TMP100:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS30]], i32 0, i32 0
 // CHECK15-NEXT:    store i32 2, ptr [[TMP100]], align 4
 // CHECK15-NEXT:    [[TMP101:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS30]], i32 0, i32 1
@@ -7247,7 +7249,6 @@ int main (int argc, char **argv) {
 // CHECK15-NEXT:    [[TMP134:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_40]], align 4
 // CHECK15-NEXT:    [[ADD44:%.*]] = add nsw i32 [[TMP134]], 1
 // CHECK15-NEXT:    [[TMP135:%.*]] = zext i32 [[ADD44]] to i64
-// CHECK15-NEXT:    [[KERNEL_ARGS45:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK15-NEXT:    [[TMP136:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS45]], i32 0, i32 0
 // CHECK15-NEXT:    store i32 2, ptr [[TMP136]], align 4
 // CHECK15-NEXT:    [[TMP137:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS45]], i32 0, i32 1
@@ -7329,7 +7330,6 @@ int main (int argc, char **argv) {
 // CHECK15-NEXT:    [[TMP175:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_56]], align 4
 // CHECK15-NEXT:    [[ADD60:%.*]] = add nsw i32 [[TMP175]], 1
 // CHECK15-NEXT:    [[TMP176:%.*]] = zext i32 [[ADD60]] to i64
-// CHECK15-NEXT:    [[KERNEL_ARGS61:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK15-NEXT:    [[TMP177:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS61]], i32 0, i32 0
 // CHECK15-NEXT:    store i32 2, ptr [[TMP177]], align 4
 // CHECK15-NEXT:    [[TMP178:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS61]], i32 0, i32 1
@@ -8464,24 +8464,29 @@ int main (int argc, char **argv) {
 // CHECK15-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [1 x ptr], align 4
 // CHECK15-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [1 x ptr], align 4
 // CHECK15-NEXT:    [[TMP:%.*]] = alloca i32, align 4
+// CHECK15-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK15-NEXT:    [[DOTOFFLOAD_BASEPTRS1:%.*]] = alloca [1 x ptr], align 4
 // CHECK15-NEXT:    [[DOTOFFLOAD_PTRS2:%.*]] = alloca [1 x ptr], align 4
 // CHECK15-NEXT:    [[DOTOFFLOAD_MAPPERS3:%.*]] = alloca [1 x ptr], align 4
 // CHECK15-NEXT:    [[_TMP4:%.*]] = alloca i32, align 4
+// CHECK15-NEXT:    [[KERNEL_ARGS5:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK15-NEXT:    [[M_CASTED:%.*]] = alloca i32, align 4
 // CHECK15-NEXT:    [[DOTOFFLOAD_BASEPTRS8:%.*]] = alloca [2 x ptr], align 4
 // CHECK15-NEXT:    [[DOTOFFLOAD_PTRS9:%.*]] = alloca [2 x ptr], align 4
 // CHECK15-NEXT:    [[DOTOFFLOAD_MAPPERS10:%.*]] = alloca [2 x ptr], align 4
 // CHECK15-NEXT:    [[_TMP11:%.*]] = alloca i32, align 4
+// CHECK15-NEXT:    [[KERNEL_ARGS12:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK15-NEXT:    [[DOTOFFLOAD_BASEPTRS15:%.*]] = alloca [1 x ptr], align 4
 // CHECK15-NEXT:    [[DOTOFFLOAD_PTRS16:%.*]] = alloca [1 x ptr], align 4
 // CHECK15-NEXT:    [[DOTOFFLOAD_MAPPERS17:%.*]] = alloca [1 x ptr], align 4
 // CHECK15-NEXT:    [[_TMP18:%.*]] = alloca i32, align 4
+// CHECK15-NEXT:    [[KERNEL_ARGS19:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK15-NEXT:    [[M_CASTED22:%.*]] = alloca i32, align 4
 // CHECK15-NEXT:    [[DOTOFFLOAD_BASEPTRS23:%.*]] = alloca [2 x ptr], align 4
 // CHECK15-NEXT:    [[DOTOFFLOAD_PTRS24:%.*]] = alloca [2 x ptr], align 4
 // CHECK15-NEXT:    [[DOTOFFLOAD_MAPPERS25:%.*]] = alloca [2 x ptr], align 4
 // CHECK15-NEXT:    [[_TMP26:%.*]] = alloca i32, align 4
+// CHECK15-NEXT:    [[KERNEL_ARGS27:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK15-NEXT:    store i32 [[ARGC]], ptr [[ARGC_ADDR]], align 4
 // CHECK15-NEXT:    store i32 10, ptr [[M]], align 4
 // CHECK15-NEXT:    [[TMP0:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
@@ -8492,7 +8497,6 @@ int main (int argc, char **argv) {
 // CHECK15-NEXT:    store ptr null, ptr [[TMP2]], align 4
 // CHECK15-NEXT:    [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK15-NEXT:    [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK15-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK15-NEXT:    [[TMP5:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK15-NEXT:    store i32 2, ptr [[TMP5]], align 4
 // CHECK15-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -8534,7 +8538,6 @@ int main (int argc, char **argv) {
 // CHECK15-NEXT:    store ptr null, ptr [[TMP22]], align 4
 // CHECK15-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS1]], i32 0, i32 0
 // CHECK15-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS2]], i32 0, i32 0
-// CHECK15-NEXT:    [[KERNEL_ARGS5:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK15-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 0
 // CHECK15-NEXT:    store i32 2, ptr [[TMP25]], align 4
 // CHECK15-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 1
@@ -8585,7 +8588,6 @@ int main (int argc, char **argv) {
 // CHECK15-NEXT:    store ptr null, ptr [[TMP47]], align 4
 // CHECK15-NEXT:    [[TMP48:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS8]], i32 0, i32 0
 // CHECK15-NEXT:    [[TMP49:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS9]], i32 0, i32 0
-// CHECK15-NEXT:    [[KERNEL_ARGS12:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK15-NEXT:    [[TMP50:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS12]], i32 0, i32 0
 // CHECK15-NEXT:    store i32 2, ptr [[TMP50]], align 4
 // CHECK15-NEXT:    [[TMP51:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS12]], i32 0, i32 1
@@ -8627,7 +8629,6 @@ int main (int argc, char **argv) {
 // CHECK15-NEXT:    store ptr null, ptr [[TMP67]], align 4
 // CHECK15-NEXT:    [[TMP68:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS15]], i32 0, i32 0
 // CHECK15-NEXT:    [[TMP69:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS16]], i32 0, i32 0
-// CHECK15-NEXT:    [[KERNEL_ARGS19:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK15-NEXT:    [[TMP70:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS19]], i32 0, i32 0
 // CHECK15-NEXT:    store i32 2, ptr [[TMP70]], align 4
 // CHECK15-NEXT:    [[TMP71:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS19]], i32 0, i32 1
@@ -8678,7 +8679,6 @@ int main (int argc, char **argv) {
 // CHECK15-NEXT:    store ptr null, ptr [[TMP92]], align 4
 // CHECK15-NEXT:    [[TMP93:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS23]], i32 0, i32 0
 // CHECK15-NEXT:    [[TMP94:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS24]], i32 0, i32 0
-// CHECK15-NEXT:    [[KERNEL_ARGS27:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK15-NEXT:    [[TMP95:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS27]], i32 0, i32 0
 // CHECK15-NEXT:    store i32 2, ptr [[TMP95]], align 4
 // CHECK15-NEXT:    [[TMP96:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS27]], i32 0, i32 1
@@ -9516,6 +9516,7 @@ int main (int argc, char **argv) {
 // CHECK17-NEXT:    [[TMP:%.*]] = alloca i32, align 4
 // CHECK17-NEXT:    [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
 // CHECK17-NEXT:    [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4
+// CHECK17-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK17-NEXT:    [[N_CASTED3:%.*]] = alloca i64, align 8
 // CHECK17-NEXT:    [[DOTOFFLOAD_BASEPTRS4:%.*]] = alloca [3 x ptr], align 8
 // CHECK17-NEXT:    [[DOTOFFLOAD_PTRS5:%.*]] = alloca [3 x ptr], align 8
@@ -9524,6 +9525,7 @@ int main (int argc, char **argv) {
 // CHECK17-NEXT:    [[_TMP8:%.*]] = alloca i32, align 4
 // CHECK17-NEXT:    [[DOTCAPTURE_EXPR_9:%.*]] = alloca i32, align 4
 // CHECK17-NEXT:    [[DOTCAPTURE_EXPR_10:%.*]] = alloca i32, align 4
+// CHECK17-NEXT:    [[KERNEL_ARGS15:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK17-NEXT:    [[M_CASTED:%.*]] = alloca i64, align 8
 // CHECK17-NEXT:    [[N_CASTED18:%.*]] = alloca i64, align 8
 // CHECK17-NEXT:    [[DOTOFFLOAD_BASEPTRS19:%.*]] = alloca [4 x ptr], align 8
@@ -9533,6 +9535,7 @@ int main (int argc, char **argv) {
 // CHECK17-NEXT:    [[_TMP23:%.*]] = alloca i32, align 4
 // CHECK17-NEXT:    [[DOTCAPTURE_EXPR_24:%.*]] = alloca i32, align 4
 // CHECK17-NEXT:    [[DOTCAPTURE_EXPR_25:%.*]] = alloca i32, align 4
+// CHECK17-NEXT:    [[KERNEL_ARGS30:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK17-NEXT:    [[N_CASTED33:%.*]] = alloca i64, align 8
 // CHECK17-NEXT:    [[DOTOFFLOAD_BASEPTRS34:%.*]] = alloca [3 x ptr], align 8
 // CHECK17-NEXT:    [[DOTOFFLOAD_PTRS35:%.*]] = alloca [3 x ptr], align 8
@@ -9541,6 +9544,7 @@ int main (int argc, char **argv) {
 // CHECK17-NEXT:    [[_TMP38:%.*]] = alloca i32, align 4
 // CHECK17-NEXT:    [[DOTCAPTURE_EXPR_39:%.*]] = alloca i32, align 4
 // CHECK17-NEXT:    [[DOTCAPTURE_EXPR_40:%.*]] = alloca i32, align 4
+// CHECK17-NEXT:    [[KERNEL_ARGS45:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK17-NEXT:    [[M_CASTED48:%.*]] = alloca i64, align 8
 // CHECK17-NEXT:    [[N_CASTED49:%.*]] = alloca i64, align 8
 // CHECK17-NEXT:    [[DOTOFFLOAD_BASEPTRS50:%.*]] = alloca [4 x ptr], align 8
@@ -9550,6 +9554,7 @@ int main (int argc, char **argv) {
 // CHECK17-NEXT:    [[_TMP54:%.*]] = alloca i32, align 4
 // CHECK17-NEXT:    [[DOTCAPTURE_EXPR_55:%.*]] = alloca i32, align 4
 // CHECK17-NEXT:    [[DOTCAPTURE_EXPR_56:%.*]] = alloca i32, align 4
+// CHECK17-NEXT:    [[KERNEL_ARGS61:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK17-NEXT:    store i32 0, ptr [[RETVAL]], align 4
 // CHECK17-NEXT:    store i32 [[ARGC]], ptr [[ARGC_ADDR]], align 4
 // CHECK17-NEXT:    store ptr [[ARGV]], ptr [[ARGV_ADDR]], align 8
@@ -9599,7 +9604,6 @@ int main (int argc, char **argv) {
 // CHECK17-NEXT:    [[TMP21:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
 // CHECK17-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP21]], 1
 // CHECK17-NEXT:    [[TMP22:%.*]] = zext i32 [[ADD]] to i64
-// CHECK17-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK17-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK17-NEXT:    store i32 2, ptr [[TMP23]], align 4
 // CHECK17-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -9671,7 +9675,6 @@ int main (int argc, char **argv) {
 // CHECK17-NEXT:    [[TMP56:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_10]], align 4
 // CHECK17-NEXT:    [[ADD14:%.*]] = add nsw i32 [[TMP56]], 1
 // CHECK17-NEXT:    [[TMP57:%.*]] = zext i32 [[ADD14]] to i64
-// CHECK17-NEXT:    [[KERNEL_ARGS15:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK17-NEXT:    [[TMP58:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS15]], i32 0, i32 0
 // CHECK17-NEXT:    store i32 2, ptr [[TMP58]], align 4
 // CHECK17-NEXT:    [[TMP59:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS15]], i32 0, i32 1
@@ -9752,7 +9755,6 @@ int main (int argc, char **argv) {
 // CHECK17-NEXT:    [[TMP96:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_25]], align 4
 // CHECK17-NEXT:    [[ADD29:%.*]] = add nsw i32 [[TMP96]], 1
 // CHECK17-NEXT:    [[TMP97:%.*]] = zext i32 [[ADD29]] to i64
-// CHECK17-NEXT:    [[KERNEL_ARGS30:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK17-NEXT:    [[TMP98:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS30]], i32 0, i32 0
 // CHECK17-NEXT:    store i32 2, ptr [[TMP98]], align 4
 // CHECK17-NEXT:    [[TMP99:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS30]], i32 0, i32 1
@@ -9824,7 +9826,6 @@ int main (int argc, char **argv) {
 // CHECK17-NEXT:    [[TMP131:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_40]], align 4
 // CHECK17-NEXT:    [[ADD44:%.*]] = add nsw i32 [[TMP131]], 1
 // CHECK17-NEXT:    [[TMP132:%.*]] = zext i32 [[ADD44]] to i64
-// CHECK17-NEXT:    [[KERNEL_ARGS45:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK17-NEXT:    [[TMP133:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS45]], i32 0, i32 0
 // CHECK17-NEXT:    store i32 2, ptr [[TMP133]], align 4
 // CHECK17-NEXT:    [[TMP134:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS45]], i32 0, i32 1
@@ -9905,7 +9906,6 @@ int main (int argc, char **argv) {
 // CHECK17-NEXT:    [[TMP171:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_56]], align 4
 // CHECK17-NEXT:    [[ADD60:%.*]] = add nsw i32 [[TMP171]], 1
 // CHECK17-NEXT:    [[TMP172:%.*]] = zext i32 [[ADD60]] to i64
-// CHECK17-NEXT:    [[KERNEL_ARGS61:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK17-NEXT:    [[TMP173:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS61]], i32 0, i32 0
 // CHECK17-NEXT:    store i32 2, ptr [[TMP173]], align 4
 // CHECK17-NEXT:    [[TMP174:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS61]], i32 0, i32 1
@@ -11065,24 +11065,29 @@ int main (int argc, char **argv) {
 // CHECK17-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [1 x ptr], align 8
 // CHECK17-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [1 x ptr], align 8
 // CHECK17-NEXT:    [[TMP:%.*]] = alloca i32, align 4
+// CHECK17-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK17-NEXT:    [[DOTOFFLOAD_BASEPTRS1:%.*]] = alloca [1 x ptr], align 8
 // CHECK17-NEXT:    [[DOTOFFLOAD_PTRS2:%.*]] = alloca [1 x ptr], align 8
 // CHECK17-NEXT:    [[DOTOFFLOAD_MAPPERS3:%.*]] = alloca [1 x ptr], align 8
 // CHECK17-NEXT:    [[_TMP4:%.*]] = alloca i32, align 4
+// CHECK17-NEXT:    [[KERNEL_ARGS5:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK17-NEXT:    [[M_CASTED:%.*]] = alloca i64, align 8
 // CHECK17-NEXT:    [[DOTOFFLOAD_BASEPTRS8:%.*]] = alloca [2 x ptr], align 8
 // CHECK17-NEXT:    [[DOTOFFLOAD_PTRS9:%.*]] = alloca [2 x ptr], align 8
 // CHECK17-NEXT:    [[DOTOFFLOAD_MAPPERS10:%.*]] = alloca [2 x ptr], align 8
 // CHECK17-NEXT:    [[_TMP11:%.*]] = alloca i32, align 4
+// CHECK17-NEXT:    [[KERNEL_ARGS12:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK17-NEXT:    [[DOTOFFLOAD_BASEPTRS15:%.*]] = alloca [1 x ptr], align 8
 // CHECK17-NEXT:    [[DOTOFFLOAD_PTRS16:%.*]] = alloca [1 x ptr], align 8
 // CHECK17-NEXT:    [[DOTOFFLOAD_MAPPERS17:%.*]] = alloca [1 x ptr], align 8
 // CHECK17-NEXT:    [[_TMP18:%.*]] = alloca i32, align 4
+// CHECK17-NEXT:    [[KERNEL_ARGS19:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK17-NEXT:    [[M_CASTED22:%.*]] = alloca i64, align 8
 // CHECK17-NEXT:    [[DOTOFFLOAD_BASEPTRS23:%.*]] = alloca [2 x ptr], align 8
 // CHECK17-NEXT:    [[DOTOFFLOAD_PTRS24:%.*]] = alloca [2 x ptr], align 8
 // CHECK17-NEXT:    [[DOTOFFLOAD_MAPPERS25:%.*]] = alloca [2 x ptr], align 8
 // CHECK17-NEXT:    [[_TMP26:%.*]] = alloca i32, align 4
+// CHECK17-NEXT:    [[KERNEL_ARGS27:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK17-NEXT:    store i32 [[ARGC]], ptr [[ARGC_ADDR]], align 4
 // CHECK17-NEXT:    store i32 10, ptr [[M]], align 4
 // CHECK17-NEXT:    [[TMP0:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
@@ -11093,7 +11098,6 @@ int main (int argc, char **argv) {
 // CHECK17-NEXT:    store ptr null, ptr [[TMP2]], align 8
 // CHECK17-NEXT:    [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK17-NEXT:    [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK17-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK17-NEXT:    [[TMP5:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK17-NEXT:    store i32 2, ptr [[TMP5]], align 4
 // CHECK17-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -11135,7 +11139,6 @@ int main (int argc, char **argv) {
 // CHECK17-NEXT:    store ptr null, ptr [[TMP22]], align 8
 // CHECK17-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS1]], i32 0, i32 0
 // CHECK17-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS2]], i32 0, i32 0
-// CHECK17-NEXT:    [[KERNEL_ARGS5:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK17-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 0
 // CHECK17-NEXT:    store i32 2, ptr [[TMP25]], align 4
 // CHECK17-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 1
@@ -11186,7 +11189,6 @@ int main (int argc, char **argv) {
 // CHECK17-NEXT:    store ptr null, ptr [[TMP47]], align 8
 // CHECK17-NEXT:    [[TMP48:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS8]], i32 0, i32 0
 // CHECK17-NEXT:    [[TMP49:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS9]], i32 0, i32 0
-// CHECK17-NEXT:    [[KERNEL_ARGS12:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK17-NEXT:    [[TMP50:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS12]], i32 0, i32 0
 // CHECK17-NEXT:    store i32 2, ptr [[TMP50]], align 4
 // CHECK17-NEXT:    [[TMP51:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS12]], i32 0, i32 1
@@ -11228,7 +11230,6 @@ int main (int argc, char **argv) {
 // CHECK17-NEXT:    store ptr null, ptr [[TMP67]], align 8
 // CHECK17-NEXT:    [[TMP68:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS15]], i32 0, i32 0
 // CHECK17-NEXT:    [[TMP69:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS16]], i32 0, i32 0
-// CHECK17-NEXT:    [[KERNEL_ARGS19:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK17-NEXT:    [[TMP70:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS19]], i32 0, i32 0
 // CHECK17-NEXT:    store i32 2, ptr [[TMP70]], align 4
 // CHECK17-NEXT:    [[TMP71:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS19]], i32 0, i32 1
@@ -11279,7 +11280,6 @@ int main (int argc, char **argv) {
 // CHECK17-NEXT:    store ptr null, ptr [[TMP92]], align 8
 // CHECK17-NEXT:    [[TMP93:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS23]], i32 0, i32 0
 // CHECK17-NEXT:    [[TMP94:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS24]], i32 0, i32 0
-// CHECK17-NEXT:    [[KERNEL_ARGS27:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK17-NEXT:    [[TMP95:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS27]], i32 0, i32 0
 // CHECK17-NEXT:    store i32 2, ptr [[TMP95]], align 4
 // CHECK17-NEXT:    [[TMP96:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS27]], i32 0, i32 1
@@ -12144,6 +12144,7 @@ int main (int argc, char **argv) {
 // CHECK19-NEXT:    [[TMP:%.*]] = alloca i32, align 4
 // CHECK19-NEXT:    [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
 // CHECK19-NEXT:    [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4
+// CHECK19-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK19-NEXT:    [[N_CASTED3:%.*]] = alloca i32, align 4
 // CHECK19-NEXT:    [[DOTOFFLOAD_BASEPTRS4:%.*]] = alloca [3 x ptr], align 4
 // CHECK19-NEXT:    [[DOTOFFLOAD_PTRS5:%.*]] = alloca [3 x ptr], align 4
@@ -12152,6 +12153,7 @@ int main (int argc, char **argv) {
 // CHECK19-NEXT:    [[_TMP8:%.*]] = alloca i32, align 4
 // CHECK19-NEXT:    [[DOTCAPTURE_EXPR_9:%.*]] = alloca i32, align 4
 // CHECK19-NEXT:    [[DOTCAPTURE_EXPR_10:%.*]] = alloca i32, align 4
+// CHECK19-NEXT:    [[KERNEL_ARGS15:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK19-NEXT:    [[M_CASTED:%.*]] = alloca i32, align 4
 // CHECK19-NEXT:    [[N_CASTED18:%.*]] = alloca i32, align 4
 // CHECK19-NEXT:    [[DOTOFFLOAD_BASEPTRS19:%.*]] = alloca [4 x ptr], align 4
@@ -12161,6 +12163,7 @@ int main (int argc, char **argv) {
 // CHECK19-NEXT:    [[_TMP23:%.*]] = alloca i32, align 4
 // CHECK19-NEXT:    [[DOTCAPTURE_EXPR_24:%.*]] = alloca i32, align 4
 // CHECK19-NEXT:    [[DOTCAPTURE_EXPR_25:%.*]] = alloca i32, align 4
+// CHECK19-NEXT:    [[KERNEL_ARGS30:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK19-NEXT:    [[N_CASTED33:%.*]] = alloca i32, align 4
 // CHECK19-NEXT:    [[DOTOFFLOAD_BASEPTRS34:%.*]] = alloca [3 x ptr], align 4
 // CHECK19-NEXT:    [[DOTOFFLOAD_PTRS35:%.*]] = alloca [3 x ptr], align 4
@@ -12169,6 +12172,7 @@ int main (int argc, char **argv) {
 // CHECK19-NEXT:    [[_TMP38:%.*]] = alloca i32, align 4
 // CHECK19-NEXT:    [[DOTCAPTURE_EXPR_39:%.*]] = alloca i32, align 4
 // CHECK19-NEXT:    [[DOTCAPTURE_EXPR_40:%.*]] = alloca i32, align 4
+// CHECK19-NEXT:    [[KERNEL_ARGS45:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK19-NEXT:    [[M_CASTED48:%.*]] = alloca i32, align 4
 // CHECK19-NEXT:    [[N_CASTED49:%.*]] = alloca i32, align 4
 // CHECK19-NEXT:    [[DOTOFFLOAD_BASEPTRS50:%.*]] = alloca [4 x ptr], align 4
@@ -12178,6 +12182,7 @@ int main (int argc, char **argv) {
 // CHECK19-NEXT:    [[_TMP54:%.*]] = alloca i32, align 4
 // CHECK19-NEXT:    [[DOTCAPTURE_EXPR_55:%.*]] = alloca i32, align 4
 // CHECK19-NEXT:    [[DOTCAPTURE_EXPR_56:%.*]] = alloca i32, align 4
+// CHECK19-NEXT:    [[KERNEL_ARGS61:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK19-NEXT:    store i32 0, ptr [[RETVAL]], align 4
 // CHECK19-NEXT:    store i32 [[ARGC]], ptr [[ARGC_ADDR]], align 4
 // CHECK19-NEXT:    store ptr [[ARGV]], ptr [[ARGV_ADDR]], align 4
@@ -12227,7 +12232,6 @@ int main (int argc, char **argv) {
 // CHECK19-NEXT:    [[TMP21:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
 // CHECK19-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP21]], 1
 // CHECK19-NEXT:    [[TMP22:%.*]] = zext i32 [[ADD]] to i64
-// CHECK19-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK19-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK19-NEXT:    store i32 2, ptr [[TMP23]], align 4
 // CHECK19-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -12300,7 +12304,6 @@ int main (int argc, char **argv) {
 // CHECK19-NEXT:    [[TMP57:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_10]], align 4
 // CHECK19-NEXT:    [[ADD14:%.*]] = add nsw i32 [[TMP57]], 1
 // CHECK19-NEXT:    [[TMP58:%.*]] = zext i32 [[ADD14]] to i64
-// CHECK19-NEXT:    [[KERNEL_ARGS15:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK19-NEXT:    [[TMP59:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS15]], i32 0, i32 0
 // CHECK19-NEXT:    store i32 2, ptr [[TMP59]], align 4
 // CHECK19-NEXT:    [[TMP60:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS15]], i32 0, i32 1
@@ -12382,7 +12385,6 @@ int main (int argc, char **argv) {
 // CHECK19-NEXT:    [[TMP98:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_25]], align 4
 // CHECK19-NEXT:    [[ADD29:%.*]] = add nsw i32 [[TMP98]], 1
 // CHECK19-NEXT:    [[TMP99:%.*]] = zext i32 [[ADD29]] to i64
-// CHECK19-NEXT:    [[KERNEL_ARGS30:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK19-NEXT:    [[TMP100:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS30]], i32 0, i32 0
 // CHECK19-NEXT:    store i32 2, ptr [[TMP100]], align 4
 // CHECK19-NEXT:    [[TMP101:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS30]], i32 0, i32 1
@@ -12455,7 +12457,6 @@ int main (int argc, char **argv) {
 // CHECK19-NEXT:    [[TMP134:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_40]], align 4
 // CHECK19-NEXT:    [[ADD44:%.*]] = add nsw i32 [[TMP134]], 1
 // CHECK19-NEXT:    [[TMP135:%.*]] = zext i32 [[ADD44]] to i64
-// CHECK19-NEXT:    [[KERNEL_ARGS45:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK19-NEXT:    [[TMP136:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS45]], i32 0, i32 0
 // CHECK19-NEXT:    store i32 2, ptr [[TMP136]], align 4
 // CHECK19-NEXT:    [[TMP137:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS45]], i32 0, i32 1
@@ -12537,7 +12538,6 @@ int main (int argc, char **argv) {
 // CHECK19-NEXT:    [[TMP175:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_56]], align 4
 // CHECK19-NEXT:    [[ADD60:%.*]] = add nsw i32 [[TMP175]], 1
 // CHECK19-NEXT:    [[TMP176:%.*]] = zext i32 [[ADD60]] to i64
-// CHECK19-NEXT:    [[KERNEL_ARGS61:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK19-NEXT:    [[TMP177:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS61]], i32 0, i32 0
 // CHECK19-NEXT:    store i32 2, ptr [[TMP177]], align 4
 // CHECK19-NEXT:    [[TMP178:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS61]], i32 0, i32 1
@@ -13672,24 +13672,29 @@ int main (int argc, char **argv) {
 // CHECK19-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [1 x ptr], align 4
 // CHECK19-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [1 x ptr], align 4
 // CHECK19-NEXT:    [[TMP:%.*]] = alloca i32, align 4
+// CHECK19-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK19-NEXT:    [[DOTOFFLOAD_BASEPTRS1:%.*]] = alloca [1 x ptr], align 4
 // CHECK19-NEXT:    [[DOTOFFLOAD_PTRS2:%.*]] = alloca [1 x ptr], align 4
 // CHECK19-NEXT:    [[DOTOFFLOAD_MAPPERS3:%.*]] = alloca [1 x ptr], align 4
 // CHECK19-NEXT:    [[_TMP4:%.*]] = alloca i32, align 4
+// CHECK19-NEXT:    [[KERNEL_ARGS5:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK19-NEXT:    [[M_CASTED:%.*]] = alloca i32, align 4
 // CHECK19-NEXT:    [[DOTOFFLOAD_BASEPTRS8:%.*]] = alloca [2 x ptr], align 4
 // CHECK19-NEXT:    [[DOTOFFLOAD_PTRS9:%.*]] = alloca [2 x ptr], align 4
 // CHECK19-NEXT:    [[DOTOFFLOAD_MAPPERS10:%.*]] = alloca [2 x ptr], align 4
 // CHECK19-NEXT:    [[_TMP11:%.*]] = alloca i32, align 4
+// CHECK19-NEXT:    [[KERNEL_ARGS12:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK19-NEXT:    [[DOTOFFLOAD_BASEPTRS15:%.*]] = alloca [1 x ptr], align 4
 // CHECK19-NEXT:    [[DOTOFFLOAD_PTRS16:%.*]] = alloca [1 x ptr], align 4
 // CHECK19-NEXT:    [[DOTOFFLOAD_MAPPERS17:%.*]] = alloca [1 x ptr], align 4
 // CHECK19-NEXT:    [[_TMP18:%.*]] = alloca i32, align 4
+// CHECK19-NEXT:    [[KERNEL_ARGS19:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK19-NEXT:    [[M_CASTED22:%.*]] = alloca i32, align 4
 // CHECK19-NEXT:    [[DOTOFFLOAD_BASEPTRS23:%.*]] = alloca [2 x ptr], align 4
 // CHECK19-NEXT:    [[DOTOFFLOAD_PTRS24:%.*]] = alloca [2 x ptr], align 4
 // CHECK19-NEXT:    [[DOTOFFLOAD_MAPPERS25:%.*]] = alloca [2 x ptr], align 4
 // CHECK19-NEXT:    [[_TMP26:%.*]] = alloca i32, align 4
+// CHECK19-NEXT:    [[KERNEL_ARGS27:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK19-NEXT:    store i32 [[ARGC]], ptr [[ARGC_ADDR]], align 4
 // CHECK19-NEXT:    store i32 10, ptr [[M]], align 4
 // CHECK19-NEXT:    [[TMP0:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
@@ -13700,7 +13705,6 @@ int main (int argc, char **argv) {
 // CHECK19-NEXT:    store ptr null, ptr [[TMP2]], align 4
 // CHECK19-NEXT:    [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK19-NEXT:    [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK19-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK19-NEXT:    [[TMP5:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK19-NEXT:    store i32 2, ptr [[TMP5]], align 4
 // CHECK19-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -13742,7 +13746,6 @@ int main (int argc, char **argv) {
 // CHECK19-NEXT:    store ptr null, ptr [[TMP22]], align 4
 // CHECK19-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS1]], i32 0, i32 0
 // CHECK19-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS2]], i32 0, i32 0
-// CHECK19-NEXT:    [[KERNEL_ARGS5:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK19-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 0
 // CHECK19-NEXT:    store i32 2, ptr [[TMP25]], align 4
 // CHECK19-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 1
@@ -13793,7 +13796,6 @@ int main (int argc, char **argv) {
 // CHECK19-NEXT:    store ptr null, ptr [[TMP47]], align 4
 // CHECK19-NEXT:    [[TMP48:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS8]], i32 0, i32 0
 // CHECK19-NEXT:    [[TMP49:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS9]], i32 0, i32 0
-// CHECK19-NEXT:    [[KERNEL_ARGS12:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK19-NEXT:    [[TMP50:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS12]], i32 0, i32 0
 // CHECK19-NEXT:    store i32 2, ptr [[TMP50]], align 4
 // CHECK19-NEXT:    [[TMP51:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS12]], i32 0, i32 1
@@ -13835,7 +13837,6 @@ int main (int argc, char **argv) {
 // CHECK19-NEXT:    store ptr null, ptr [[TMP67]], align 4
 // CHECK19-NEXT:    [[TMP68:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS15]], i32 0, i32 0
 // CHECK19-NEXT:    [[TMP69:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS16]], i32 0, i32 0
-// CHECK19-NEXT:    [[KERNEL_ARGS19:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK19-NEXT:    [[TMP70:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS19]], i32 0, i32 0
 // CHECK19-NEXT:    store i32 2, ptr [[TMP70]], align 4
 // CHECK19-NEXT:    [[TMP71:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS19]], i32 0, i32 1
@@ -13886,7 +13887,6 @@ int main (int argc, char **argv) {
 // CHECK19-NEXT:    store ptr null, ptr [[TMP92]], align 4
 // CHECK19-NEXT:    [[TMP93:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS23]], i32 0, i32 0
 // CHECK19-NEXT:    [[TMP94:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS24]], i32 0, i32 0
-// CHECK19-NEXT:    [[KERNEL_ARGS27:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK19-NEXT:    [[TMP95:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS27]], i32 0, i32 0
 // CHECK19-NEXT:    store i32 2, ptr [[TMP95]], align 4
 // CHECK19-NEXT:    [[TMP96:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS27]], i32 0, i32 1

diff  --git a/clang/test/OpenMP/teams_distribute_parallel_for_simd_codegen.cpp b/clang/test/OpenMP/teams_distribute_parallel_for_simd_codegen.cpp
index c9a831352c50a..c511f9f02aa05 100644
--- a/clang/test/OpenMP/teams_distribute_parallel_for_simd_codegen.cpp
+++ b/clang/test/OpenMP/teams_distribute_parallel_for_simd_codegen.cpp
@@ -196,6 +196,7 @@ int main (int argc, char **argv) {
 // CHECK1-NEXT:    [[TMP:%.*]] = alloca i32, align 4
 // CHECK1-NEXT:    [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
 // CHECK1-NEXT:    [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK1-NEXT:    [[I:%.*]] = alloca i32, align 4
 // CHECK1-NEXT:    [[I_CASTED:%.*]] = alloca i64, align 8
 // CHECK1-NEXT:    [[N_CASTED4:%.*]] = alloca i64, align 8
@@ -205,6 +206,7 @@ int main (int argc, char **argv) {
 // CHECK1-NEXT:    [[_TMP8:%.*]] = alloca i32, align 4
 // CHECK1-NEXT:    [[DOTCAPTURE_EXPR_9:%.*]] = alloca i32, align 4
 // CHECK1-NEXT:    [[DOTCAPTURE_EXPR_10:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[KERNEL_ARGS15:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK1-NEXT:    store i32 [[N]], ptr [[N_ADDR]], align 4
 // CHECK1-NEXT:    [[TMP0:%.*]] = load i32, ptr [[N_ADDR]], align 4
 // CHECK1-NEXT:    [[DIV:%.*]] = sdiv i32 [[TMP0]], 128
@@ -257,7 +259,6 @@ int main (int argc, char **argv) {
 // CHECK1-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP24]], 1
 // CHECK1-NEXT:    [[TMP25:%.*]] = zext i32 [[ADD]] to i64
 // CHECK1-NEXT:    [[TMP26:%.*]] = insertvalue [3 x i32] zeroinitializer, i32 [[TMP21]], 0
-// CHECK1-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK1-NEXT:    [[TMP27:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK1-NEXT:    store i32 2, ptr [[TMP27]], align 4
 // CHECK1-NEXT:    [[TMP28:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -327,7 +328,6 @@ int main (int argc, char **argv) {
 // CHECK1-NEXT:    [[TMP59:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_10]], align 4
 // CHECK1-NEXT:    [[ADD14:%.*]] = add nsw i32 [[TMP59]], 1
 // CHECK1-NEXT:    [[TMP60:%.*]] = zext i32 [[ADD14]] to i64
-// CHECK1-NEXT:    [[KERNEL_ARGS15:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK1-NEXT:    [[TMP61:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS15]], i32 0, i32 0
 // CHECK1-NEXT:    store i32 2, ptr [[TMP61]], align 4
 // CHECK1-NEXT:    [[TMP62:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS15]], i32 0, i32 1
@@ -872,6 +872,7 @@ int main (int argc, char **argv) {
 // CHECK3-NEXT:    [[TMP:%.*]] = alloca i32, align 4
 // CHECK3-NEXT:    [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
 // CHECK3-NEXT:    [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4
+// CHECK3-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK3-NEXT:    [[I:%.*]] = alloca i32, align 4
 // CHECK3-NEXT:    [[I_CASTED:%.*]] = alloca i32, align 4
 // CHECK3-NEXT:    [[N_CASTED4:%.*]] = alloca i32, align 4
@@ -881,6 +882,7 @@ int main (int argc, char **argv) {
 // CHECK3-NEXT:    [[_TMP8:%.*]] = alloca i32, align 4
 // CHECK3-NEXT:    [[DOTCAPTURE_EXPR_9:%.*]] = alloca i32, align 4
 // CHECK3-NEXT:    [[DOTCAPTURE_EXPR_10:%.*]] = alloca i32, align 4
+// CHECK3-NEXT:    [[KERNEL_ARGS15:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK3-NEXT:    store i32 [[N]], ptr [[N_ADDR]], align 4
 // CHECK3-NEXT:    [[TMP0:%.*]] = load i32, ptr [[N_ADDR]], align 4
 // CHECK3-NEXT:    [[DIV:%.*]] = sdiv i32 [[TMP0]], 128
@@ -933,7 +935,6 @@ int main (int argc, char **argv) {
 // CHECK3-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP24]], 1
 // CHECK3-NEXT:    [[TMP25:%.*]] = zext i32 [[ADD]] to i64
 // CHECK3-NEXT:    [[TMP26:%.*]] = insertvalue [3 x i32] zeroinitializer, i32 [[TMP21]], 0
-// CHECK3-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK3-NEXT:    [[TMP27:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK3-NEXT:    store i32 2, ptr [[TMP27]], align 4
 // CHECK3-NEXT:    [[TMP28:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -1003,7 +1004,6 @@ int main (int argc, char **argv) {
 // CHECK3-NEXT:    [[TMP59:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_10]], align 4
 // CHECK3-NEXT:    [[ADD14:%.*]] = add nsw i32 [[TMP59]], 1
 // CHECK3-NEXT:    [[TMP60:%.*]] = zext i32 [[ADD14]] to i64
-// CHECK3-NEXT:    [[KERNEL_ARGS15:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK3-NEXT:    [[TMP61:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS15]], i32 0, i32 0
 // CHECK3-NEXT:    store i32 2, ptr [[TMP61]], align 4
 // CHECK3-NEXT:    [[TMP62:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS15]], i32 0, i32 1
@@ -1807,6 +1807,7 @@ int main (int argc, char **argv) {
 // CHECK9-NEXT:    [[TMP:%.*]] = alloca i32, align 4
 // CHECK9-NEXT:    [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
 // CHECK9-NEXT:    [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4
+// CHECK9-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK9-NEXT:    store i32 100, ptr [[N]], align 4
 // CHECK9-NEXT:    [[TMP0:%.*]] = load i32, ptr [[N]], align 4
 // CHECK9-NEXT:    [[TMP1:%.*]] = zext i32 [[TMP0]] to i64
@@ -1861,7 +1862,6 @@ int main (int argc, char **argv) {
 // CHECK9-NEXT:    [[TMP26:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
 // CHECK9-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP26]], 1
 // CHECK9-NEXT:    [[TMP27:%.*]] = zext i32 [[ADD]] to i64
-// CHECK9-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK9-NEXT:    [[TMP28:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK9-NEXT:    store i32 2, ptr [[TMP28]], align 4
 // CHECK9-NEXT:    [[TMP29:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -2183,6 +2183,7 @@ int main (int argc, char **argv) {
 // CHECK11-NEXT:    [[TMP:%.*]] = alloca i32, align 4
 // CHECK11-NEXT:    [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
 // CHECK11-NEXT:    [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4
+// CHECK11-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK11-NEXT:    store i32 100, ptr [[N]], align 4
 // CHECK11-NEXT:    [[TMP0:%.*]] = load i32, ptr [[N]], align 4
 // CHECK11-NEXT:    [[TMP1:%.*]] = call ptr @llvm.stacksave()
@@ -2237,7 +2238,6 @@ int main (int argc, char **argv) {
 // CHECK11-NEXT:    [[TMP26:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
 // CHECK11-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP26]], 1
 // CHECK11-NEXT:    [[TMP27:%.*]] = zext i32 [[ADD]] to i64
-// CHECK11-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK11-NEXT:    [[TMP28:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK11-NEXT:    store i32 2, ptr [[TMP28]], align 4
 // CHECK11-NEXT:    [[TMP29:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -2720,6 +2720,7 @@ int main (int argc, char **argv) {
 // CHECK17-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [2 x ptr], align 8
 // CHECK17-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [2 x ptr], align 8
 // CHECK17-NEXT:    [[TMP:%.*]] = alloca i32, align 4
+// CHECK17-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK17-NEXT:    store ptr [[THIS]], ptr [[THIS_ADDR]], align 8
 // CHECK17-NEXT:    [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8
 // CHECK17-NEXT:    [[TMP0:%.*]] = load i32, ptr [[I]], align 4
@@ -2740,7 +2741,6 @@ int main (int argc, char **argv) {
 // CHECK17-NEXT:    store ptr null, ptr [[TMP7]], align 8
 // CHECK17-NEXT:    [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK17-NEXT:    [[TMP9:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK17-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK17-NEXT:    [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK17-NEXT:    store i32 2, ptr [[TMP10]], align 4
 // CHECK17-NEXT:    [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -2995,6 +2995,7 @@ int main (int argc, char **argv) {
 // CHECK19-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [2 x ptr], align 4
 // CHECK19-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [2 x ptr], align 4
 // CHECK19-NEXT:    [[TMP:%.*]] = alloca i32, align 4
+// CHECK19-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK19-NEXT:    store ptr [[THIS]], ptr [[THIS_ADDR]], align 4
 // CHECK19-NEXT:    [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4
 // CHECK19-NEXT:    [[TMP0:%.*]] = load i32, ptr [[I]], align 4
@@ -3015,7 +3016,6 @@ int main (int argc, char **argv) {
 // CHECK19-NEXT:    store ptr null, ptr [[TMP7]], align 4
 // CHECK19-NEXT:    [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK19-NEXT:    [[TMP9:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK19-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK19-NEXT:    [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK19-NEXT:    store i32 2, ptr [[TMP10]], align 4
 // CHECK19-NEXT:    [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -3391,6 +3391,7 @@ int main (int argc, char **argv) {
 // CHECK25-NEXT:    [[TMP:%.*]] = alloca i32, align 4
 // CHECK25-NEXT:    [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
 // CHECK25-NEXT:    [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4
+// CHECK25-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK25-NEXT:    store i32 0, ptr [[RETVAL]], align 4
 // CHECK25-NEXT:    store i32 [[ARGC]], ptr [[ARGC_ADDR]], align 4
 // CHECK25-NEXT:    store ptr [[ARGV]], ptr [[ARGV_ADDR]], align 8
@@ -3448,7 +3449,6 @@ int main (int argc, char **argv) {
 // CHECK25-NEXT:    [[TMP26:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
 // CHECK25-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP26]], 1
 // CHECK25-NEXT:    [[TMP27:%.*]] = zext i32 [[ADD]] to i64
-// CHECK25-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK25-NEXT:    [[TMP28:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK25-NEXT:    store i32 2, ptr [[TMP28]], align 4
 // CHECK25-NEXT:    [[TMP29:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -3762,6 +3762,7 @@ int main (int argc, char **argv) {
 // CHECK25-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [3 x ptr], align 8
 // CHECK25-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [3 x ptr], align 8
 // CHECK25-NEXT:    [[TMP:%.*]] = alloca i32, align 4
+// CHECK25-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK25-NEXT:    store i32 [[ARGC]], ptr [[ARGC_ADDR]], align 4
 // CHECK25-NEXT:    store i32 0, ptr [[TE]], align 4
 // CHECK25-NEXT:    store i32 128, ptr [[TH]], align 4
@@ -3793,7 +3794,6 @@ int main (int argc, char **argv) {
 // CHECK25-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
 // CHECK25-NEXT:    [[TMP15:%.*]] = load i32, ptr [[TE]], align 4
 // CHECK25-NEXT:    [[TMP16:%.*]] = insertvalue [3 x i32] zeroinitializer, i32 [[TMP15]], 0
-// CHECK25-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK25-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK25-NEXT:    store i32 2, ptr [[TMP17]], align 4
 // CHECK25-NEXT:    [[TMP18:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -4028,6 +4028,7 @@ int main (int argc, char **argv) {
 // CHECK27-NEXT:    [[TMP:%.*]] = alloca i32, align 4
 // CHECK27-NEXT:    [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
 // CHECK27-NEXT:    [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4
+// CHECK27-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK27-NEXT:    store i32 0, ptr [[RETVAL]], align 4
 // CHECK27-NEXT:    store i32 [[ARGC]], ptr [[ARGC_ADDR]], align 4
 // CHECK27-NEXT:    store ptr [[ARGV]], ptr [[ARGV_ADDR]], align 4
@@ -4085,7 +4086,6 @@ int main (int argc, char **argv) {
 // CHECK27-NEXT:    [[TMP26:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
 // CHECK27-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP26]], 1
 // CHECK27-NEXT:    [[TMP27:%.*]] = zext i32 [[ADD]] to i64
-// CHECK27-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK27-NEXT:    [[TMP28:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK27-NEXT:    store i32 2, ptr [[TMP28]], align 4
 // CHECK27-NEXT:    [[TMP29:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -4394,6 +4394,7 @@ int main (int argc, char **argv) {
 // CHECK27-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [3 x ptr], align 4
 // CHECK27-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [3 x ptr], align 4
 // CHECK27-NEXT:    [[TMP:%.*]] = alloca i32, align 4
+// CHECK27-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK27-NEXT:    store i32 [[ARGC]], ptr [[ARGC_ADDR]], align 4
 // CHECK27-NEXT:    store i32 0, ptr [[TE]], align 4
 // CHECK27-NEXT:    store i32 128, ptr [[TH]], align 4
@@ -4425,7 +4426,6 @@ int main (int argc, char **argv) {
 // CHECK27-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
 // CHECK27-NEXT:    [[TMP15:%.*]] = load i32, ptr [[TE]], align 4
 // CHECK27-NEXT:    [[TMP16:%.*]] = insertvalue [3 x i32] zeroinitializer, i32 [[TMP15]], 0
-// CHECK27-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK27-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK27-NEXT:    store i32 2, ptr [[TMP17]], align 4
 // CHECK27-NEXT:    [[TMP18:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1

diff  --git a/clang/test/OpenMP/teams_distribute_parallel_for_simd_collapse_codegen.cpp b/clang/test/OpenMP/teams_distribute_parallel_for_simd_collapse_codegen.cpp
index 3d971c5e92491..ac6bb3a9d835b 100644
--- a/clang/test/OpenMP/teams_distribute_parallel_for_simd_collapse_codegen.cpp
+++ b/clang/test/OpenMP/teams_distribute_parallel_for_simd_collapse_codegen.cpp
@@ -124,6 +124,7 @@ int main (int argc, char **argv) {
 // CHECK1-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [1 x ptr], align 8
 // CHECK1-NEXT:    [[TMP:%.*]] = alloca i32, align 4
 // CHECK1-NEXT:    [[_TMP2:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK1-NEXT:    store ptr [[THIS]], ptr [[THIS_ADDR]], align 8
 // CHECK1-NEXT:    [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8
 // CHECK1-NEXT:    [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[THIS1]], i32 0, i32 0
@@ -135,7 +136,6 @@ int main (int argc, char **argv) {
 // CHECK1-NEXT:    store ptr null, ptr [[TMP2]], align 8
 // CHECK1-NEXT:    [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK1-NEXT:    [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK1-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK1-NEXT:    [[TMP5:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK1-NEXT:    store i32 2, ptr [[TMP5]], align 4
 // CHECK1-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -383,6 +383,7 @@ int main (int argc, char **argv) {
 // CHECK3-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [1 x ptr], align 4
 // CHECK3-NEXT:    [[TMP:%.*]] = alloca i32, align 4
 // CHECK3-NEXT:    [[_TMP2:%.*]] = alloca i32, align 4
+// CHECK3-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK3-NEXT:    store ptr [[THIS]], ptr [[THIS_ADDR]], align 4
 // CHECK3-NEXT:    [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4
 // CHECK3-NEXT:    [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[THIS1]], i32 0, i32 0
@@ -394,7 +395,6 @@ int main (int argc, char **argv) {
 // CHECK3-NEXT:    store ptr null, ptr [[TMP2]], align 4
 // CHECK3-NEXT:    [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK3-NEXT:    [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK3-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK3-NEXT:    [[TMP5:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK3-NEXT:    store i32 2, ptr [[TMP5]], align 4
 // CHECK3-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -781,6 +781,7 @@ int main (int argc, char **argv) {
 // CHECK9-NEXT:    [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
 // CHECK9-NEXT:    [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4
 // CHECK9-NEXT:    [[DOTCAPTURE_EXPR_3:%.*]] = alloca i64, align 8
+// CHECK9-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK9-NEXT:    store i32 0, ptr [[RETVAL]], align 4
 // CHECK9-NEXT:    store i32 [[ARGC]], ptr [[ARGC_ADDR]], align 4
 // CHECK9-NEXT:    store ptr [[ARGV]], ptr [[ARGV_ADDR]], align 8
@@ -857,7 +858,6 @@ int main (int argc, char **argv) {
 // CHECK9-NEXT:    store i64 [[SUB7]], ptr [[DOTCAPTURE_EXPR_3]], align 8
 // CHECK9-NEXT:    [[TMP35:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_3]], align 8
 // CHECK9-NEXT:    [[ADD:%.*]] = add nsw i64 [[TMP35]], 1
-// CHECK9-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK9-NEXT:    [[TMP36:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK9-NEXT:    store i32 2, ptr [[TMP36]], align 4
 // CHECK9-NEXT:    [[TMP37:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -1234,6 +1234,7 @@ int main (int argc, char **argv) {
 // CHECK9-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [1 x ptr], align 8
 // CHECK9-NEXT:    [[TMP:%.*]] = alloca i32, align 4
 // CHECK9-NEXT:    [[_TMP1:%.*]] = alloca i32, align 4
+// CHECK9-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK9-NEXT:    store i32 [[ARGC]], ptr [[ARGC_ADDR]], align 4
 // CHECK9-NEXT:    [[TMP0:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK9-NEXT:    store ptr [[A]], ptr [[TMP0]], align 8
@@ -1243,7 +1244,6 @@ int main (int argc, char **argv) {
 // CHECK9-NEXT:    store ptr null, ptr [[TMP2]], align 8
 // CHECK9-NEXT:    [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK9-NEXT:    [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK9-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK9-NEXT:    [[TMP5:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK9-NEXT:    store i32 2, ptr [[TMP5]], align 4
 // CHECK9-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -1491,6 +1491,7 @@ int main (int argc, char **argv) {
 // CHECK11-NEXT:    [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
 // CHECK11-NEXT:    [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4
 // CHECK11-NEXT:    [[DOTCAPTURE_EXPR_3:%.*]] = alloca i64, align 8
+// CHECK11-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK11-NEXT:    store i32 0, ptr [[RETVAL]], align 4
 // CHECK11-NEXT:    store i32 [[ARGC]], ptr [[ARGC_ADDR]], align 4
 // CHECK11-NEXT:    store ptr [[ARGV]], ptr [[ARGV_ADDR]], align 4
@@ -1566,7 +1567,6 @@ int main (int argc, char **argv) {
 // CHECK11-NEXT:    store i64 [[SUB7]], ptr [[DOTCAPTURE_EXPR_3]], align 8
 // CHECK11-NEXT:    [[TMP34:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_3]], align 8
 // CHECK11-NEXT:    [[ADD:%.*]] = add nsw i64 [[TMP34]], 1
-// CHECK11-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK11-NEXT:    [[TMP35:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK11-NEXT:    store i32 2, ptr [[TMP35]], align 4
 // CHECK11-NEXT:    [[TMP36:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -1945,6 +1945,7 @@ int main (int argc, char **argv) {
 // CHECK11-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [1 x ptr], align 4
 // CHECK11-NEXT:    [[TMP:%.*]] = alloca i32, align 4
 // CHECK11-NEXT:    [[_TMP1:%.*]] = alloca i32, align 4
+// CHECK11-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK11-NEXT:    store i32 [[ARGC]], ptr [[ARGC_ADDR]], align 4
 // CHECK11-NEXT:    [[TMP0:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK11-NEXT:    store ptr [[A]], ptr [[TMP0]], align 4
@@ -1954,7 +1955,6 @@ int main (int argc, char **argv) {
 // CHECK11-NEXT:    store ptr null, ptr [[TMP2]], align 4
 // CHECK11-NEXT:    [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK11-NEXT:    [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK11-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK11-NEXT:    [[TMP5:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK11-NEXT:    store i32 2, ptr [[TMP5]], align 4
 // CHECK11-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1

diff  --git a/clang/test/OpenMP/teams_distribute_parallel_for_simd_dist_schedule_codegen.cpp b/clang/test/OpenMP/teams_distribute_parallel_for_simd_dist_schedule_codegen.cpp
index c2579ccf475b2..260b050c69d61 100644
--- a/clang/test/OpenMP/teams_distribute_parallel_for_simd_dist_schedule_codegen.cpp
+++ b/clang/test/OpenMP/teams_distribute_parallel_for_simd_dist_schedule_codegen.cpp
@@ -162,14 +162,17 @@ int main (int argc, char **argv) {
 // CHECK1-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [1 x ptr], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [1 x ptr], align 8
 // CHECK1-NEXT:    [[TMP:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_BASEPTRS3:%.*]] = alloca [1 x ptr], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_PTRS4:%.*]] = alloca [1 x ptr], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_MAPPERS5:%.*]] = alloca [1 x ptr], align 8
 // CHECK1-NEXT:    [[_TMP6:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[KERNEL_ARGS7:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_BASEPTRS11:%.*]] = alloca [1 x ptr], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_PTRS12:%.*]] = alloca [1 x ptr], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_MAPPERS13:%.*]] = alloca [1 x ptr], align 8
 // CHECK1-NEXT:    [[_TMP14:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[KERNEL_ARGS15:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK1-NEXT:    store ptr [[THIS]], ptr [[THIS_ADDR]], align 8
 // CHECK1-NEXT:    [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8
 // CHECK1-NEXT:    [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[THIS1]], i32 0, i32 0
@@ -181,7 +184,6 @@ int main (int argc, char **argv) {
 // CHECK1-NEXT:    store ptr null, ptr [[TMP2]], align 8
 // CHECK1-NEXT:    [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK1-NEXT:    [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK1-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK1-NEXT:    [[TMP5:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK1-NEXT:    store i32 2, ptr [[TMP5]], align 4
 // CHECK1-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -224,7 +226,6 @@ int main (int argc, char **argv) {
 // CHECK1-NEXT:    store ptr null, ptr [[TMP22]], align 8
 // CHECK1-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS3]], i32 0, i32 0
 // CHECK1-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS4]], i32 0, i32 0
-// CHECK1-NEXT:    [[KERNEL_ARGS7:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK1-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS7]], i32 0, i32 0
 // CHECK1-NEXT:    store i32 2, ptr [[TMP25]], align 4
 // CHECK1-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS7]], i32 0, i32 1
@@ -267,7 +268,6 @@ int main (int argc, char **argv) {
 // CHECK1-NEXT:    store ptr null, ptr [[TMP42]], align 8
 // CHECK1-NEXT:    [[TMP43:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS11]], i32 0, i32 0
 // CHECK1-NEXT:    [[TMP44:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS12]], i32 0, i32 0
-// CHECK1-NEXT:    [[KERNEL_ARGS15:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK1-NEXT:    [[TMP45:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS15]], i32 0, i32 0
 // CHECK1-NEXT:    store i32 2, ptr [[TMP45]], align 4
 // CHECK1-NEXT:    [[TMP46:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS15]], i32 0, i32 1
@@ -845,14 +845,17 @@ int main (int argc, char **argv) {
 // CHECK3-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [1 x ptr], align 4
 // CHECK3-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [1 x ptr], align 4
 // CHECK3-NEXT:    [[TMP:%.*]] = alloca i32, align 4
+// CHECK3-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK3-NEXT:    [[DOTOFFLOAD_BASEPTRS3:%.*]] = alloca [1 x ptr], align 4
 // CHECK3-NEXT:    [[DOTOFFLOAD_PTRS4:%.*]] = alloca [1 x ptr], align 4
 // CHECK3-NEXT:    [[DOTOFFLOAD_MAPPERS5:%.*]] = alloca [1 x ptr], align 4
 // CHECK3-NEXT:    [[_TMP6:%.*]] = alloca i32, align 4
+// CHECK3-NEXT:    [[KERNEL_ARGS7:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK3-NEXT:    [[DOTOFFLOAD_BASEPTRS11:%.*]] = alloca [1 x ptr], align 4
 // CHECK3-NEXT:    [[DOTOFFLOAD_PTRS12:%.*]] = alloca [1 x ptr], align 4
 // CHECK3-NEXT:    [[DOTOFFLOAD_MAPPERS13:%.*]] = alloca [1 x ptr], align 4
 // CHECK3-NEXT:    [[_TMP14:%.*]] = alloca i32, align 4
+// CHECK3-NEXT:    [[KERNEL_ARGS15:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK3-NEXT:    store ptr [[THIS]], ptr [[THIS_ADDR]], align 4
 // CHECK3-NEXT:    [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4
 // CHECK3-NEXT:    [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[THIS1]], i32 0, i32 0
@@ -864,7 +867,6 @@ int main (int argc, char **argv) {
 // CHECK3-NEXT:    store ptr null, ptr [[TMP2]], align 4
 // CHECK3-NEXT:    [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK3-NEXT:    [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK3-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK3-NEXT:    [[TMP5:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK3-NEXT:    store i32 2, ptr [[TMP5]], align 4
 // CHECK3-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -907,7 +909,6 @@ int main (int argc, char **argv) {
 // CHECK3-NEXT:    store ptr null, ptr [[TMP22]], align 4
 // CHECK3-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS3]], i32 0, i32 0
 // CHECK3-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS4]], i32 0, i32 0
-// CHECK3-NEXT:    [[KERNEL_ARGS7:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK3-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS7]], i32 0, i32 0
 // CHECK3-NEXT:    store i32 2, ptr [[TMP25]], align 4
 // CHECK3-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS7]], i32 0, i32 1
@@ -950,7 +951,6 @@ int main (int argc, char **argv) {
 // CHECK3-NEXT:    store ptr null, ptr [[TMP42]], align 4
 // CHECK3-NEXT:    [[TMP43:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS11]], i32 0, i32 0
 // CHECK3-NEXT:    [[TMP44:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS12]], i32 0, i32 0
-// CHECK3-NEXT:    [[KERNEL_ARGS15:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK3-NEXT:    [[TMP45:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS15]], i32 0, i32 0
 // CHECK3-NEXT:    store i32 2, ptr [[TMP45]], align 4
 // CHECK3-NEXT:    [[TMP46:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS15]], i32 0, i32 1
@@ -1762,6 +1762,7 @@ int main (int argc, char **argv) {
 // CHECK9-NEXT:    [[TMP:%.*]] = alloca i32, align 4
 // CHECK9-NEXT:    [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
 // CHECK9-NEXT:    [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4
+// CHECK9-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK9-NEXT:    [[N_CASTED3:%.*]] = alloca i64, align 8
 // CHECK9-NEXT:    [[DOTOFFLOAD_BASEPTRS4:%.*]] = alloca [3 x ptr], align 8
 // CHECK9-NEXT:    [[DOTOFFLOAD_PTRS5:%.*]] = alloca [3 x ptr], align 8
@@ -1770,6 +1771,7 @@ int main (int argc, char **argv) {
 // CHECK9-NEXT:    [[_TMP8:%.*]] = alloca i32, align 4
 // CHECK9-NEXT:    [[DOTCAPTURE_EXPR_9:%.*]] = alloca i32, align 4
 // CHECK9-NEXT:    [[DOTCAPTURE_EXPR_10:%.*]] = alloca i32, align 4
+// CHECK9-NEXT:    [[KERNEL_ARGS15:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK9-NEXT:    [[M_CASTED:%.*]] = alloca i64, align 8
 // CHECK9-NEXT:    [[N_CASTED18:%.*]] = alloca i64, align 8
 // CHECK9-NEXT:    [[DOTOFFLOAD_BASEPTRS19:%.*]] = alloca [4 x ptr], align 8
@@ -1779,6 +1781,7 @@ int main (int argc, char **argv) {
 // CHECK9-NEXT:    [[_TMP23:%.*]] = alloca i32, align 4
 // CHECK9-NEXT:    [[DOTCAPTURE_EXPR_24:%.*]] = alloca i32, align 4
 // CHECK9-NEXT:    [[DOTCAPTURE_EXPR_25:%.*]] = alloca i32, align 4
+// CHECK9-NEXT:    [[KERNEL_ARGS30:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK9-NEXT:    store i32 0, ptr [[RETVAL]], align 4
 // CHECK9-NEXT:    store i32 [[ARGC]], ptr [[ARGC_ADDR]], align 4
 // CHECK9-NEXT:    store ptr [[ARGV]], ptr [[ARGV_ADDR]], align 8
@@ -1828,7 +1831,6 @@ int main (int argc, char **argv) {
 // CHECK9-NEXT:    [[TMP21:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
 // CHECK9-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP21]], 1
 // CHECK9-NEXT:    [[TMP22:%.*]] = zext i32 [[ADD]] to i64
-// CHECK9-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK9-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK9-NEXT:    store i32 2, ptr [[TMP23]], align 4
 // CHECK9-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -1900,7 +1902,6 @@ int main (int argc, char **argv) {
 // CHECK9-NEXT:    [[TMP56:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_10]], align 4
 // CHECK9-NEXT:    [[ADD14:%.*]] = add nsw i32 [[TMP56]], 1
 // CHECK9-NEXT:    [[TMP57:%.*]] = zext i32 [[ADD14]] to i64
-// CHECK9-NEXT:    [[KERNEL_ARGS15:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK9-NEXT:    [[TMP58:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS15]], i32 0, i32 0
 // CHECK9-NEXT:    store i32 2, ptr [[TMP58]], align 4
 // CHECK9-NEXT:    [[TMP59:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS15]], i32 0, i32 1
@@ -1981,7 +1982,6 @@ int main (int argc, char **argv) {
 // CHECK9-NEXT:    [[TMP96:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_25]], align 4
 // CHECK9-NEXT:    [[ADD29:%.*]] = add nsw i32 [[TMP96]], 1
 // CHECK9-NEXT:    [[TMP97:%.*]] = zext i32 [[ADD29]] to i64
-// CHECK9-NEXT:    [[KERNEL_ARGS30:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK9-NEXT:    [[TMP98:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS30]], i32 0, i32 0
 // CHECK9-NEXT:    store i32 2, ptr [[TMP98]], align 4
 // CHECK9-NEXT:    [[TMP99:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS30]], i32 0, i32 1
@@ -2781,15 +2781,18 @@ int main (int argc, char **argv) {
 // CHECK9-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [1 x ptr], align 8
 // CHECK9-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [1 x ptr], align 8
 // CHECK9-NEXT:    [[TMP:%.*]] = alloca i32, align 4
+// CHECK9-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK9-NEXT:    [[DOTOFFLOAD_BASEPTRS1:%.*]] = alloca [1 x ptr], align 8
 // CHECK9-NEXT:    [[DOTOFFLOAD_PTRS2:%.*]] = alloca [1 x ptr], align 8
 // CHECK9-NEXT:    [[DOTOFFLOAD_MAPPERS3:%.*]] = alloca [1 x ptr], align 8
 // CHECK9-NEXT:    [[_TMP4:%.*]] = alloca i32, align 4
+// CHECK9-NEXT:    [[KERNEL_ARGS5:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK9-NEXT:    [[M_CASTED:%.*]] = alloca i64, align 8
 // CHECK9-NEXT:    [[DOTOFFLOAD_BASEPTRS8:%.*]] = alloca [2 x ptr], align 8
 // CHECK9-NEXT:    [[DOTOFFLOAD_PTRS9:%.*]] = alloca [2 x ptr], align 8
 // CHECK9-NEXT:    [[DOTOFFLOAD_MAPPERS10:%.*]] = alloca [2 x ptr], align 8
 // CHECK9-NEXT:    [[_TMP11:%.*]] = alloca i32, align 4
+// CHECK9-NEXT:    [[KERNEL_ARGS12:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK9-NEXT:    store i32 [[ARGC]], ptr [[ARGC_ADDR]], align 4
 // CHECK9-NEXT:    store i32 10, ptr [[M]], align 4
 // CHECK9-NEXT:    [[TMP0:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
@@ -2800,7 +2803,6 @@ int main (int argc, char **argv) {
 // CHECK9-NEXT:    store ptr null, ptr [[TMP2]], align 8
 // CHECK9-NEXT:    [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK9-NEXT:    [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK9-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK9-NEXT:    [[TMP5:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK9-NEXT:    store i32 2, ptr [[TMP5]], align 4
 // CHECK9-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -2842,7 +2844,6 @@ int main (int argc, char **argv) {
 // CHECK9-NEXT:    store ptr null, ptr [[TMP22]], align 8
 // CHECK9-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS1]], i32 0, i32 0
 // CHECK9-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS2]], i32 0, i32 0
-// CHECK9-NEXT:    [[KERNEL_ARGS5:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK9-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 0
 // CHECK9-NEXT:    store i32 2, ptr [[TMP25]], align 4
 // CHECK9-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 1
@@ -2893,7 +2894,6 @@ int main (int argc, char **argv) {
 // CHECK9-NEXT:    store ptr null, ptr [[TMP47]], align 8
 // CHECK9-NEXT:    [[TMP48:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS8]], i32 0, i32 0
 // CHECK9-NEXT:    [[TMP49:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS9]], i32 0, i32 0
-// CHECK9-NEXT:    [[KERNEL_ARGS12:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK9-NEXT:    [[TMP50:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS12]], i32 0, i32 0
 // CHECK9-NEXT:    store i32 2, ptr [[TMP50]], align 4
 // CHECK9-NEXT:    [[TMP51:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS12]], i32 0, i32 1
@@ -3485,6 +3485,7 @@ int main (int argc, char **argv) {
 // CHECK11-NEXT:    [[TMP:%.*]] = alloca i32, align 4
 // CHECK11-NEXT:    [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
 // CHECK11-NEXT:    [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4
+// CHECK11-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK11-NEXT:    [[N_CASTED3:%.*]] = alloca i32, align 4
 // CHECK11-NEXT:    [[DOTOFFLOAD_BASEPTRS4:%.*]] = alloca [3 x ptr], align 4
 // CHECK11-NEXT:    [[DOTOFFLOAD_PTRS5:%.*]] = alloca [3 x ptr], align 4
@@ -3493,6 +3494,7 @@ int main (int argc, char **argv) {
 // CHECK11-NEXT:    [[_TMP8:%.*]] = alloca i32, align 4
 // CHECK11-NEXT:    [[DOTCAPTURE_EXPR_9:%.*]] = alloca i32, align 4
 // CHECK11-NEXT:    [[DOTCAPTURE_EXPR_10:%.*]] = alloca i32, align 4
+// CHECK11-NEXT:    [[KERNEL_ARGS15:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK11-NEXT:    [[M_CASTED:%.*]] = alloca i32, align 4
 // CHECK11-NEXT:    [[N_CASTED18:%.*]] = alloca i32, align 4
 // CHECK11-NEXT:    [[DOTOFFLOAD_BASEPTRS19:%.*]] = alloca [4 x ptr], align 4
@@ -3502,6 +3504,7 @@ int main (int argc, char **argv) {
 // CHECK11-NEXT:    [[_TMP23:%.*]] = alloca i32, align 4
 // CHECK11-NEXT:    [[DOTCAPTURE_EXPR_24:%.*]] = alloca i32, align 4
 // CHECK11-NEXT:    [[DOTCAPTURE_EXPR_25:%.*]] = alloca i32, align 4
+// CHECK11-NEXT:    [[KERNEL_ARGS30:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK11-NEXT:    store i32 0, ptr [[RETVAL]], align 4
 // CHECK11-NEXT:    store i32 [[ARGC]], ptr [[ARGC_ADDR]], align 4
 // CHECK11-NEXT:    store ptr [[ARGV]], ptr [[ARGV_ADDR]], align 4
@@ -3551,7 +3554,6 @@ int main (int argc, char **argv) {
 // CHECK11-NEXT:    [[TMP21:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
 // CHECK11-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP21]], 1
 // CHECK11-NEXT:    [[TMP22:%.*]] = zext i32 [[ADD]] to i64
-// CHECK11-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK11-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK11-NEXT:    store i32 2, ptr [[TMP23]], align 4
 // CHECK11-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -3624,7 +3626,6 @@ int main (int argc, char **argv) {
 // CHECK11-NEXT:    [[TMP57:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_10]], align 4
 // CHECK11-NEXT:    [[ADD14:%.*]] = add nsw i32 [[TMP57]], 1
 // CHECK11-NEXT:    [[TMP58:%.*]] = zext i32 [[ADD14]] to i64
-// CHECK11-NEXT:    [[KERNEL_ARGS15:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK11-NEXT:    [[TMP59:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS15]], i32 0, i32 0
 // CHECK11-NEXT:    store i32 2, ptr [[TMP59]], align 4
 // CHECK11-NEXT:    [[TMP60:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS15]], i32 0, i32 1
@@ -3706,7 +3707,6 @@ int main (int argc, char **argv) {
 // CHECK11-NEXT:    [[TMP98:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_25]], align 4
 // CHECK11-NEXT:    [[ADD29:%.*]] = add nsw i32 [[TMP98]], 1
 // CHECK11-NEXT:    [[TMP99:%.*]] = zext i32 [[ADD29]] to i64
-// CHECK11-NEXT:    [[KERNEL_ARGS30:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK11-NEXT:    [[TMP100:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS30]], i32 0, i32 0
 // CHECK11-NEXT:    store i32 2, ptr [[TMP100]], align 4
 // CHECK11-NEXT:    [[TMP101:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS30]], i32 0, i32 1
@@ -4491,15 +4491,18 @@ int main (int argc, char **argv) {
 // CHECK11-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [1 x ptr], align 4
 // CHECK11-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [1 x ptr], align 4
 // CHECK11-NEXT:    [[TMP:%.*]] = alloca i32, align 4
+// CHECK11-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK11-NEXT:    [[DOTOFFLOAD_BASEPTRS1:%.*]] = alloca [1 x ptr], align 4
 // CHECK11-NEXT:    [[DOTOFFLOAD_PTRS2:%.*]] = alloca [1 x ptr], align 4
 // CHECK11-NEXT:    [[DOTOFFLOAD_MAPPERS3:%.*]] = alloca [1 x ptr], align 4
 // CHECK11-NEXT:    [[_TMP4:%.*]] = alloca i32, align 4
+// CHECK11-NEXT:    [[KERNEL_ARGS5:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK11-NEXT:    [[M_CASTED:%.*]] = alloca i32, align 4
 // CHECK11-NEXT:    [[DOTOFFLOAD_BASEPTRS8:%.*]] = alloca [2 x ptr], align 4
 // CHECK11-NEXT:    [[DOTOFFLOAD_PTRS9:%.*]] = alloca [2 x ptr], align 4
 // CHECK11-NEXT:    [[DOTOFFLOAD_MAPPERS10:%.*]] = alloca [2 x ptr], align 4
 // CHECK11-NEXT:    [[_TMP11:%.*]] = alloca i32, align 4
+// CHECK11-NEXT:    [[KERNEL_ARGS12:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK11-NEXT:    store i32 [[ARGC]], ptr [[ARGC_ADDR]], align 4
 // CHECK11-NEXT:    store i32 10, ptr [[M]], align 4
 // CHECK11-NEXT:    [[TMP0:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
@@ -4510,7 +4513,6 @@ int main (int argc, char **argv) {
 // CHECK11-NEXT:    store ptr null, ptr [[TMP2]], align 4
 // CHECK11-NEXT:    [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK11-NEXT:    [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK11-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK11-NEXT:    [[TMP5:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK11-NEXT:    store i32 2, ptr [[TMP5]], align 4
 // CHECK11-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -4552,7 +4554,6 @@ int main (int argc, char **argv) {
 // CHECK11-NEXT:    store ptr null, ptr [[TMP22]], align 4
 // CHECK11-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS1]], i32 0, i32 0
 // CHECK11-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS2]], i32 0, i32 0
-// CHECK11-NEXT:    [[KERNEL_ARGS5:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK11-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 0
 // CHECK11-NEXT:    store i32 2, ptr [[TMP25]], align 4
 // CHECK11-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 1
@@ -4603,7 +4604,6 @@ int main (int argc, char **argv) {
 // CHECK11-NEXT:    store ptr null, ptr [[TMP47]], align 4
 // CHECK11-NEXT:    [[TMP48:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS8]], i32 0, i32 0
 // CHECK11-NEXT:    [[TMP49:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS9]], i32 0, i32 0
-// CHECK11-NEXT:    [[KERNEL_ARGS12:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK11-NEXT:    [[TMP50:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS12]], i32 0, i32 0
 // CHECK11-NEXT:    store i32 2, ptr [[TMP50]], align 4
 // CHECK11-NEXT:    [[TMP51:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS12]], i32 0, i32 1

diff  --git a/clang/test/OpenMP/teams_distribute_parallel_for_simd_firstprivate_codegen.cpp b/clang/test/OpenMP/teams_distribute_parallel_for_simd_firstprivate_codegen.cpp
index 5dc93d7473d99..c17d3675fd669 100644
--- a/clang/test/OpenMP/teams_distribute_parallel_for_simd_firstprivate_codegen.cpp
+++ b/clang/test/OpenMP/teams_distribute_parallel_for_simd_firstprivate_codegen.cpp
@@ -302,6 +302,7 @@ int main() {
 // CHECK1-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [5 x ptr], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [5 x ptr], align 8
 // CHECK1-NEXT:    [[TMP:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK1-NEXT:    store i32 0, ptr [[RETVAL]], align 4
 // CHECK1-NEXT:    [[TMP0:%.*]] = load i32, ptr @t_var, align 4
 // CHECK1-NEXT:    store i32 [[TMP0]], ptr [[T_VAR_CASTED]], align 4
@@ -341,7 +342,6 @@ int main() {
 // CHECK1-NEXT:    store ptr null, ptr [[TMP18]], align 8
 // CHECK1-NEXT:    [[TMP19:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK1-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK1-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK1-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK1-NEXT:    store i32 2, ptr [[TMP21]], align 4
 // CHECK1-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -726,6 +726,7 @@ int main() {
 // CHECK1-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [4 x ptr], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [4 x ptr], align 8
 // CHECK1-NEXT:    [[_TMP1:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK1-NEXT:    call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[TEST]])
 // CHECK1-NEXT:    store i32 0, ptr [[T_VAR]], align 4
 // CHECK1-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VEC]], ptr align 4 @__const._Z5tmainIiET_v.vec, i64 8, i1 false)
@@ -768,7 +769,6 @@ int main() {
 // CHECK1-NEXT:    store ptr null, ptr [[TMP17]], align 8
 // CHECK1-NEXT:    [[TMP18:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK1-NEXT:    [[TMP19:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK1-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK1-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK1-NEXT:    store i32 2, ptr [[TMP20]], align 4
 // CHECK1-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -1401,6 +1401,7 @@ int main() {
 // CHECK3-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [5 x ptr], align 4
 // CHECK3-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [5 x ptr], align 4
 // CHECK3-NEXT:    [[TMP:%.*]] = alloca i32, align 4
+// CHECK3-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK3-NEXT:    store i32 0, ptr [[RETVAL]], align 4
 // CHECK3-NEXT:    [[TMP0:%.*]] = load i32, ptr @t_var, align 4
 // CHECK3-NEXT:    store i32 [[TMP0]], ptr [[T_VAR_CASTED]], align 4
@@ -1440,7 +1441,6 @@ int main() {
 // CHECK3-NEXT:    store ptr null, ptr [[TMP18]], align 4
 // CHECK3-NEXT:    [[TMP19:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK3-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK3-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK3-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK3-NEXT:    store i32 2, ptr [[TMP21]], align 4
 // CHECK3-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -1819,6 +1819,7 @@ int main() {
 // CHECK3-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [4 x ptr], align 4
 // CHECK3-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [4 x ptr], align 4
 // CHECK3-NEXT:    [[_TMP1:%.*]] = alloca i32, align 4
+// CHECK3-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK3-NEXT:    call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[TEST]])
 // CHECK3-NEXT:    store i32 0, ptr [[T_VAR]], align 4
 // CHECK3-NEXT:    call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[VEC]], ptr align 4 @__const._Z5tmainIiET_v.vec, i32 8, i1 false)
@@ -1861,7 +1862,6 @@ int main() {
 // CHECK3-NEXT:    store ptr null, ptr [[TMP17]], align 4
 // CHECK3-NEXT:    [[TMP18:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK3-NEXT:    [[TMP19:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK3-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK3-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK3-NEXT:    store i32 2, ptr [[TMP20]], align 4
 // CHECK3-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1

diff  --git a/clang/test/OpenMP/teams_distribute_parallel_for_simd_if_codegen.cpp b/clang/test/OpenMP/teams_distribute_parallel_for_simd_if_codegen.cpp
index 7d8692db7efb7..e939511a9374e 100644
--- a/clang/test/OpenMP/teams_distribute_parallel_for_simd_if_codegen.cpp
+++ b/clang/test/OpenMP/teams_distribute_parallel_for_simd_if_codegen.cpp
@@ -114,8 +114,9 @@ int main() {
 // CHECK1-SAME: () #[[ATTR0:[0-9]+]] {
 // CHECK1-NEXT:  entry:
 // CHECK1-NEXT:    [[TMP:%.*]] = alloca i32, align 4
-// CHECK1-NEXT:    [[_TMP1:%.*]] = alloca i32, align 4
 // CHECK1-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
+// CHECK1-NEXT:    [[_TMP1:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[KERNEL_ARGS2:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK1-NEXT:    [[TMP0:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK1-NEXT:    store i32 2, ptr [[TMP0]], align 4
 // CHECK1-NEXT:    [[TMP1:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -149,7 +150,6 @@ int main() {
 // CHECK1-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9gtid_testv_l43() #[[ATTR2:[0-9]+]]
 // CHECK1-NEXT:    br label [[OMP_OFFLOAD_CONT]]
 // CHECK1:       omp_offload.cont:
-// CHECK1-NEXT:    [[KERNEL_ARGS2:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK1-NEXT:    [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS2]], i32 0, i32 0
 // CHECK1-NEXT:    store i32 2, ptr [[TMP15]], align 4
 // CHECK1-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS2]], i32 0, i32 1
@@ -499,15 +499,17 @@ int main() {
 // CHECK1-NEXT:  entry:
 // CHECK1-NEXT:    [[RETVAL:%.*]] = alloca i32, align 4
 // CHECK1-NEXT:    [[TMP:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK1-NEXT:    [[_TMP1:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[KERNEL_ARGS2:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK1-NEXT:    [[ARG_CASTED:%.*]] = alloca i64, align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [1 x ptr], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [1 x ptr], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [1 x ptr], align 8
 // CHECK1-NEXT:    [[DOTCAPTURE_EXPR_:%.*]] = alloca i8, align 1
 // CHECK1-NEXT:    [[_TMP6:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[KERNEL_ARGS7:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK1-NEXT:    store i32 0, ptr [[RETVAL]], align 4
-// CHECK1-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK1-NEXT:    [[TMP0:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK1-NEXT:    store i32 2, ptr [[TMP0]], align 4
 // CHECK1-NEXT:    [[TMP1:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -541,7 +543,6 @@ int main() {
 // CHECK1-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l76() #[[ATTR2]]
 // CHECK1-NEXT:    br label [[OMP_OFFLOAD_CONT]]
 // CHECK1:       omp_offload.cont:
-// CHECK1-NEXT:    [[KERNEL_ARGS2:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK1-NEXT:    [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS2]], i32 0, i32 0
 // CHECK1-NEXT:    store i32 2, ptr [[TMP15]], align 4
 // CHECK1-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS2]], i32 0, i32 1
@@ -594,7 +595,6 @@ int main() {
 // CHECK1-NEXT:    [[TOBOOL5:%.*]] = trunc i8 [[TMP38]] to i1
 // CHECK1-NEXT:    [[TMP39:%.*]] = select i1 [[TOBOOL5]], i32 0, i32 1
 // CHECK1-NEXT:    [[TMP40:%.*]] = insertvalue [3 x i32] zeroinitializer, i32 [[TMP39]], 0
-// CHECK1-NEXT:    [[KERNEL_ARGS7:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK1-NEXT:    [[TMP41:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS7]], i32 0, i32 0
 // CHECK1-NEXT:    store i32 2, ptr [[TMP41]], align 4
 // CHECK1-NEXT:    [[TMP42:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS7]], i32 0, i32 1
@@ -1128,15 +1128,17 @@ int main() {
 // CHECK1-NEXT:  entry:
 // CHECK1-NEXT:    [[ARG_ADDR:%.*]] = alloca i32, align 4
 // CHECK1-NEXT:    [[TMP:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK1-NEXT:    [[_TMP1:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[KERNEL_ARGS2:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK1-NEXT:    [[ARG_CASTED:%.*]] = alloca i64, align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [1 x ptr], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [1 x ptr], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [1 x ptr], align 8
 // CHECK1-NEXT:    [[DOTCAPTURE_EXPR_:%.*]] = alloca i8, align 1
 // CHECK1-NEXT:    [[_TMP6:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[KERNEL_ARGS7:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK1-NEXT:    store i32 [[ARG]], ptr [[ARG_ADDR]], align 4
-// CHECK1-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK1-NEXT:    [[TMP0:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK1-NEXT:    store i32 2, ptr [[TMP0]], align 4
 // CHECK1-NEXT:    [[TMP1:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -1170,7 +1172,6 @@ int main() {
 // CHECK1-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiEiT__l57() #[[ATTR2]]
 // CHECK1-NEXT:    br label [[OMP_OFFLOAD_CONT]]
 // CHECK1:       omp_offload.cont:
-// CHECK1-NEXT:    [[KERNEL_ARGS2:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK1-NEXT:    [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS2]], i32 0, i32 0
 // CHECK1-NEXT:    store i32 2, ptr [[TMP15]], align 4
 // CHECK1-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS2]], i32 0, i32 1
@@ -1223,7 +1224,6 @@ int main() {
 // CHECK1-NEXT:    [[TOBOOL5:%.*]] = trunc i8 [[TMP38]] to i1
 // CHECK1-NEXT:    [[TMP39:%.*]] = select i1 [[TOBOOL5]], i32 0, i32 1
 // CHECK1-NEXT:    [[TMP40:%.*]] = insertvalue [3 x i32] zeroinitializer, i32 [[TMP39]], 0
-// CHECK1-NEXT:    [[KERNEL_ARGS7:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK1-NEXT:    [[TMP41:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS7]], i32 0, i32 0
 // CHECK1-NEXT:    store i32 2, ptr [[TMP41]], align 4
 // CHECK1-NEXT:    [[TMP42:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS7]], i32 0, i32 1
@@ -1761,8 +1761,9 @@ int main() {
 // CHECK3-SAME: () #[[ATTR0:[0-9]+]] {
 // CHECK3-NEXT:  entry:
 // CHECK3-NEXT:    [[TMP:%.*]] = alloca i32, align 4
-// CHECK3-NEXT:    [[_TMP1:%.*]] = alloca i32, align 4
 // CHECK3-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
+// CHECK3-NEXT:    [[_TMP1:%.*]] = alloca i32, align 4
+// CHECK3-NEXT:    [[KERNEL_ARGS2:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK3-NEXT:    [[TMP0:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK3-NEXT:    store i32 2, ptr [[TMP0]], align 4
 // CHECK3-NEXT:    [[TMP1:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -1796,7 +1797,6 @@ int main() {
 // CHECK3-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9gtid_testv_l43() #[[ATTR2:[0-9]+]]
 // CHECK3-NEXT:    br label [[OMP_OFFLOAD_CONT]]
 // CHECK3:       omp_offload.cont:
-// CHECK3-NEXT:    [[KERNEL_ARGS2:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK3-NEXT:    [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS2]], i32 0, i32 0
 // CHECK3-NEXT:    store i32 2, ptr [[TMP15]], align 4
 // CHECK3-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS2]], i32 0, i32 1
@@ -2146,15 +2146,17 @@ int main() {
 // CHECK3-NEXT:  entry:
 // CHECK3-NEXT:    [[RETVAL:%.*]] = alloca i32, align 4
 // CHECK3-NEXT:    [[TMP:%.*]] = alloca i32, align 4
+// CHECK3-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK3-NEXT:    [[_TMP1:%.*]] = alloca i32, align 4
+// CHECK3-NEXT:    [[KERNEL_ARGS2:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK3-NEXT:    [[ARG_CASTED:%.*]] = alloca i64, align 8
 // CHECK3-NEXT:    [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [1 x ptr], align 8
 // CHECK3-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [1 x ptr], align 8
 // CHECK3-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [1 x ptr], align 8
 // CHECK3-NEXT:    [[DOTCAPTURE_EXPR_:%.*]] = alloca i8, align 1
 // CHECK3-NEXT:    [[_TMP6:%.*]] = alloca i32, align 4
+// CHECK3-NEXT:    [[KERNEL_ARGS7:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK3-NEXT:    store i32 0, ptr [[RETVAL]], align 4
-// CHECK3-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK3-NEXT:    [[TMP0:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK3-NEXT:    store i32 2, ptr [[TMP0]], align 4
 // CHECK3-NEXT:    [[TMP1:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -2188,7 +2190,6 @@ int main() {
 // CHECK3-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l76() #[[ATTR2]]
 // CHECK3-NEXT:    br label [[OMP_OFFLOAD_CONT]]
 // CHECK3:       omp_offload.cont:
-// CHECK3-NEXT:    [[KERNEL_ARGS2:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK3-NEXT:    [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS2]], i32 0, i32 0
 // CHECK3-NEXT:    store i32 2, ptr [[TMP15]], align 4
 // CHECK3-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS2]], i32 0, i32 1
@@ -2241,7 +2242,6 @@ int main() {
 // CHECK3-NEXT:    [[TOBOOL5:%.*]] = trunc i8 [[TMP38]] to i1
 // CHECK3-NEXT:    [[TMP39:%.*]] = select i1 [[TOBOOL5]], i32 0, i32 1
 // CHECK3-NEXT:    [[TMP40:%.*]] = insertvalue [3 x i32] zeroinitializer, i32 [[TMP39]], 0
-// CHECK3-NEXT:    [[KERNEL_ARGS7:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK3-NEXT:    [[TMP41:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS7]], i32 0, i32 0
 // CHECK3-NEXT:    store i32 2, ptr [[TMP41]], align 4
 // CHECK3-NEXT:    [[TMP42:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS7]], i32 0, i32 1
@@ -3005,15 +3005,17 @@ int main() {
 // CHECK3-NEXT:  entry:
 // CHECK3-NEXT:    [[ARG_ADDR:%.*]] = alloca i32, align 4
 // CHECK3-NEXT:    [[TMP:%.*]] = alloca i32, align 4
+// CHECK3-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK3-NEXT:    [[_TMP1:%.*]] = alloca i32, align 4
+// CHECK3-NEXT:    [[KERNEL_ARGS2:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK3-NEXT:    [[ARG_CASTED:%.*]] = alloca i64, align 8
 // CHECK3-NEXT:    [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [1 x ptr], align 8
 // CHECK3-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [1 x ptr], align 8
 // CHECK3-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [1 x ptr], align 8
 // CHECK3-NEXT:    [[DOTCAPTURE_EXPR_:%.*]] = alloca i8, align 1
 // CHECK3-NEXT:    [[_TMP6:%.*]] = alloca i32, align 4
+// CHECK3-NEXT:    [[KERNEL_ARGS7:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK3-NEXT:    store i32 [[ARG]], ptr [[ARG_ADDR]], align 4
-// CHECK3-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK3-NEXT:    [[TMP0:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK3-NEXT:    store i32 2, ptr [[TMP0]], align 4
 // CHECK3-NEXT:    [[TMP1:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -3047,7 +3049,6 @@ int main() {
 // CHECK3-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiEiT__l57() #[[ATTR2]]
 // CHECK3-NEXT:    br label [[OMP_OFFLOAD_CONT]]
 // CHECK3:       omp_offload.cont:
-// CHECK3-NEXT:    [[KERNEL_ARGS2:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK3-NEXT:    [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS2]], i32 0, i32 0
 // CHECK3-NEXT:    store i32 2, ptr [[TMP15]], align 4
 // CHECK3-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS2]], i32 0, i32 1
@@ -3100,7 +3101,6 @@ int main() {
 // CHECK3-NEXT:    [[TOBOOL5:%.*]] = trunc i8 [[TMP38]] to i1
 // CHECK3-NEXT:    [[TMP39:%.*]] = select i1 [[TOBOOL5]], i32 0, i32 1
 // CHECK3-NEXT:    [[TMP40:%.*]] = insertvalue [3 x i32] zeroinitializer, i32 [[TMP39]], 0
-// CHECK3-NEXT:    [[KERNEL_ARGS7:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK3-NEXT:    [[TMP41:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS7]], i32 0, i32 0
 // CHECK3-NEXT:    store i32 2, ptr [[TMP41]], align 4
 // CHECK3-NEXT:    [[TMP42:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS7]], i32 0, i32 1
@@ -4229,8 +4229,9 @@ int main() {
 // CHECK9-SAME: () #[[ATTR0:[0-9]+]] {
 // CHECK9-NEXT:  entry:
 // CHECK9-NEXT:    [[TMP:%.*]] = alloca i32, align 4
-// CHECK9-NEXT:    [[_TMP1:%.*]] = alloca i32, align 4
 // CHECK9-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
+// CHECK9-NEXT:    [[_TMP1:%.*]] = alloca i32, align 4
+// CHECK9-NEXT:    [[KERNEL_ARGS2:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK9-NEXT:    [[TMP0:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK9-NEXT:    store i32 2, ptr [[TMP0]], align 4
 // CHECK9-NEXT:    [[TMP1:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -4264,7 +4265,6 @@ int main() {
 // CHECK9-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9gtid_testv_l43() #[[ATTR2:[0-9]+]]
 // CHECK9-NEXT:    br label [[OMP_OFFLOAD_CONT]]
 // CHECK9:       omp_offload.cont:
-// CHECK9-NEXT:    [[KERNEL_ARGS2:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK9-NEXT:    [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS2]], i32 0, i32 0
 // CHECK9-NEXT:    store i32 2, ptr [[TMP15]], align 4
 // CHECK9-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS2]], i32 0, i32 1
@@ -4614,15 +4614,17 @@ int main() {
 // CHECK9-NEXT:  entry:
 // CHECK9-NEXT:    [[RETVAL:%.*]] = alloca i32, align 4
 // CHECK9-NEXT:    [[TMP:%.*]] = alloca i32, align 4
+// CHECK9-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK9-NEXT:    [[_TMP1:%.*]] = alloca i32, align 4
+// CHECK9-NEXT:    [[KERNEL_ARGS2:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK9-NEXT:    [[ARG_CASTED:%.*]] = alloca i64, align 8
 // CHECK9-NEXT:    [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [1 x ptr], align 8
 // CHECK9-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [1 x ptr], align 8
 // CHECK9-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [1 x ptr], align 8
 // CHECK9-NEXT:    [[DOTCAPTURE_EXPR_:%.*]] = alloca i8, align 1
 // CHECK9-NEXT:    [[_TMP6:%.*]] = alloca i32, align 4
+// CHECK9-NEXT:    [[KERNEL_ARGS7:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK9-NEXT:    store i32 0, ptr [[RETVAL]], align 4
-// CHECK9-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK9-NEXT:    [[TMP0:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK9-NEXT:    store i32 2, ptr [[TMP0]], align 4
 // CHECK9-NEXT:    [[TMP1:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -4656,7 +4658,6 @@ int main() {
 // CHECK9-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l76() #[[ATTR2]]
 // CHECK9-NEXT:    br label [[OMP_OFFLOAD_CONT]]
 // CHECK9:       omp_offload.cont:
-// CHECK9-NEXT:    [[KERNEL_ARGS2:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK9-NEXT:    [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS2]], i32 0, i32 0
 // CHECK9-NEXT:    store i32 2, ptr [[TMP15]], align 4
 // CHECK9-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS2]], i32 0, i32 1
@@ -4709,7 +4710,6 @@ int main() {
 // CHECK9-NEXT:    [[TOBOOL5:%.*]] = trunc i8 [[TMP38]] to i1
 // CHECK9-NEXT:    [[TMP39:%.*]] = select i1 [[TOBOOL5]], i32 0, i32 1
 // CHECK9-NEXT:    [[TMP40:%.*]] = insertvalue [3 x i32] zeroinitializer, i32 [[TMP39]], 0
-// CHECK9-NEXT:    [[KERNEL_ARGS7:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK9-NEXT:    [[TMP41:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS7]], i32 0, i32 0
 // CHECK9-NEXT:    store i32 2, ptr [[TMP41]], align 4
 // CHECK9-NEXT:    [[TMP42:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS7]], i32 0, i32 1
@@ -5243,15 +5243,17 @@ int main() {
 // CHECK9-NEXT:  entry:
 // CHECK9-NEXT:    [[ARG_ADDR:%.*]] = alloca i32, align 4
 // CHECK9-NEXT:    [[TMP:%.*]] = alloca i32, align 4
+// CHECK9-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK9-NEXT:    [[_TMP1:%.*]] = alloca i32, align 4
+// CHECK9-NEXT:    [[KERNEL_ARGS2:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK9-NEXT:    [[ARG_CASTED:%.*]] = alloca i64, align 8
 // CHECK9-NEXT:    [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [1 x ptr], align 8
 // CHECK9-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [1 x ptr], align 8
 // CHECK9-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [1 x ptr], align 8
 // CHECK9-NEXT:    [[DOTCAPTURE_EXPR_:%.*]] = alloca i8, align 1
 // CHECK9-NEXT:    [[_TMP6:%.*]] = alloca i32, align 4
+// CHECK9-NEXT:    [[KERNEL_ARGS7:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK9-NEXT:    store i32 [[ARG]], ptr [[ARG_ADDR]], align 4
-// CHECK9-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK9-NEXT:    [[TMP0:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK9-NEXT:    store i32 2, ptr [[TMP0]], align 4
 // CHECK9-NEXT:    [[TMP1:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -5285,7 +5287,6 @@ int main() {
 // CHECK9-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiEiT__l57() #[[ATTR2]]
 // CHECK9-NEXT:    br label [[OMP_OFFLOAD_CONT]]
 // CHECK9:       omp_offload.cont:
-// CHECK9-NEXT:    [[KERNEL_ARGS2:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK9-NEXT:    [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS2]], i32 0, i32 0
 // CHECK9-NEXT:    store i32 2, ptr [[TMP15]], align 4
 // CHECK9-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS2]], i32 0, i32 1
@@ -5338,7 +5339,6 @@ int main() {
 // CHECK9-NEXT:    [[TOBOOL5:%.*]] = trunc i8 [[TMP38]] to i1
 // CHECK9-NEXT:    [[TMP39:%.*]] = select i1 [[TOBOOL5]], i32 0, i32 1
 // CHECK9-NEXT:    [[TMP40:%.*]] = insertvalue [3 x i32] zeroinitializer, i32 [[TMP39]], 0
-// CHECK9-NEXT:    [[KERNEL_ARGS7:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK9-NEXT:    [[TMP41:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS7]], i32 0, i32 0
 // CHECK9-NEXT:    store i32 2, ptr [[TMP41]], align 4
 // CHECK9-NEXT:    [[TMP42:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS7]], i32 0, i32 1
@@ -5876,8 +5876,9 @@ int main() {
 // CHECK11-SAME: () #[[ATTR0:[0-9]+]] {
 // CHECK11-NEXT:  entry:
 // CHECK11-NEXT:    [[TMP:%.*]] = alloca i32, align 4
-// CHECK11-NEXT:    [[_TMP1:%.*]] = alloca i32, align 4
 // CHECK11-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
+// CHECK11-NEXT:    [[_TMP1:%.*]] = alloca i32, align 4
+// CHECK11-NEXT:    [[KERNEL_ARGS2:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK11-NEXT:    [[TMP0:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK11-NEXT:    store i32 2, ptr [[TMP0]], align 4
 // CHECK11-NEXT:    [[TMP1:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -5911,7 +5912,6 @@ int main() {
 // CHECK11-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9gtid_testv_l43() #[[ATTR2:[0-9]+]]
 // CHECK11-NEXT:    br label [[OMP_OFFLOAD_CONT]]
 // CHECK11:       omp_offload.cont:
-// CHECK11-NEXT:    [[KERNEL_ARGS2:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK11-NEXT:    [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS2]], i32 0, i32 0
 // CHECK11-NEXT:    store i32 2, ptr [[TMP15]], align 4
 // CHECK11-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS2]], i32 0, i32 1
@@ -6261,15 +6261,17 @@ int main() {
 // CHECK11-NEXT:  entry:
 // CHECK11-NEXT:    [[RETVAL:%.*]] = alloca i32, align 4
 // CHECK11-NEXT:    [[TMP:%.*]] = alloca i32, align 4
+// CHECK11-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK11-NEXT:    [[_TMP1:%.*]] = alloca i32, align 4
+// CHECK11-NEXT:    [[KERNEL_ARGS2:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK11-NEXT:    [[ARG_CASTED:%.*]] = alloca i64, align 8
 // CHECK11-NEXT:    [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [1 x ptr], align 8
 // CHECK11-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [1 x ptr], align 8
 // CHECK11-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [1 x ptr], align 8
 // CHECK11-NEXT:    [[DOTCAPTURE_EXPR_:%.*]] = alloca i8, align 1
 // CHECK11-NEXT:    [[_TMP6:%.*]] = alloca i32, align 4
+// CHECK11-NEXT:    [[KERNEL_ARGS7:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK11-NEXT:    store i32 0, ptr [[RETVAL]], align 4
-// CHECK11-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK11-NEXT:    [[TMP0:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK11-NEXT:    store i32 2, ptr [[TMP0]], align 4
 // CHECK11-NEXT:    [[TMP1:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -6303,7 +6305,6 @@ int main() {
 // CHECK11-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l76() #[[ATTR2]]
 // CHECK11-NEXT:    br label [[OMP_OFFLOAD_CONT]]
 // CHECK11:       omp_offload.cont:
-// CHECK11-NEXT:    [[KERNEL_ARGS2:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK11-NEXT:    [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS2]], i32 0, i32 0
 // CHECK11-NEXT:    store i32 2, ptr [[TMP15]], align 4
 // CHECK11-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS2]], i32 0, i32 1
@@ -6356,7 +6357,6 @@ int main() {
 // CHECK11-NEXT:    [[TOBOOL5:%.*]] = trunc i8 [[TMP38]] to i1
 // CHECK11-NEXT:    [[TMP39:%.*]] = select i1 [[TOBOOL5]], i32 0, i32 1
 // CHECK11-NEXT:    [[TMP40:%.*]] = insertvalue [3 x i32] zeroinitializer, i32 [[TMP39]], 0
-// CHECK11-NEXT:    [[KERNEL_ARGS7:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK11-NEXT:    [[TMP41:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS7]], i32 0, i32 0
 // CHECK11-NEXT:    store i32 2, ptr [[TMP41]], align 4
 // CHECK11-NEXT:    [[TMP42:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS7]], i32 0, i32 1
@@ -7120,15 +7120,17 @@ int main() {
 // CHECK11-NEXT:  entry:
 // CHECK11-NEXT:    [[ARG_ADDR:%.*]] = alloca i32, align 4
 // CHECK11-NEXT:    [[TMP:%.*]] = alloca i32, align 4
+// CHECK11-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK11-NEXT:    [[_TMP1:%.*]] = alloca i32, align 4
+// CHECK11-NEXT:    [[KERNEL_ARGS2:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK11-NEXT:    [[ARG_CASTED:%.*]] = alloca i64, align 8
 // CHECK11-NEXT:    [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [1 x ptr], align 8
 // CHECK11-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [1 x ptr], align 8
 // CHECK11-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [1 x ptr], align 8
 // CHECK11-NEXT:    [[DOTCAPTURE_EXPR_:%.*]] = alloca i8, align 1
 // CHECK11-NEXT:    [[_TMP6:%.*]] = alloca i32, align 4
+// CHECK11-NEXT:    [[KERNEL_ARGS7:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK11-NEXT:    store i32 [[ARG]], ptr [[ARG_ADDR]], align 4
-// CHECK11-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK11-NEXT:    [[TMP0:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK11-NEXT:    store i32 2, ptr [[TMP0]], align 4
 // CHECK11-NEXT:    [[TMP1:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -7162,7 +7164,6 @@ int main() {
 // CHECK11-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiEiT__l57() #[[ATTR2]]
 // CHECK11-NEXT:    br label [[OMP_OFFLOAD_CONT]]
 // CHECK11:       omp_offload.cont:
-// CHECK11-NEXT:    [[KERNEL_ARGS2:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK11-NEXT:    [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS2]], i32 0, i32 0
 // CHECK11-NEXT:    store i32 2, ptr [[TMP15]], align 4
 // CHECK11-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS2]], i32 0, i32 1
@@ -7215,7 +7216,6 @@ int main() {
 // CHECK11-NEXT:    [[TOBOOL5:%.*]] = trunc i8 [[TMP38]] to i1
 // CHECK11-NEXT:    [[TMP39:%.*]] = select i1 [[TOBOOL5]], i32 0, i32 1
 // CHECK11-NEXT:    [[TMP40:%.*]] = insertvalue [3 x i32] zeroinitializer, i32 [[TMP39]], 0
-// CHECK11-NEXT:    [[KERNEL_ARGS7:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK11-NEXT:    [[TMP41:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS7]], i32 0, i32 0
 // CHECK11-NEXT:    store i32 2, ptr [[TMP41]], align 4
 // CHECK11-NEXT:    [[TMP42:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS7]], i32 0, i32 1

diff  --git a/clang/test/OpenMP/teams_distribute_parallel_for_simd_lastprivate_codegen.cpp b/clang/test/OpenMP/teams_distribute_parallel_for_simd_lastprivate_codegen.cpp
index 2b5a3d788f2c0..b4362a372d6e9 100644
--- a/clang/test/OpenMP/teams_distribute_parallel_for_simd_lastprivate_codegen.cpp
+++ b/clang/test/OpenMP/teams_distribute_parallel_for_simd_lastprivate_codegen.cpp
@@ -797,6 +797,7 @@ int main() {
 // CHECK9-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [5 x ptr], align 8
 // CHECK9-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [5 x ptr], align 8
 // CHECK9-NEXT:    [[_TMP1:%.*]] = alloca i32, align 4
+// CHECK9-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK9-NEXT:    [[I:%.*]] = alloca i32, align 4
 // CHECK9-NEXT:    store i32 0, ptr [[RETVAL]], align 4
 // CHECK9-NEXT:    store ptr [[G]], ptr [[G1]], align 8
@@ -851,7 +852,6 @@ int main() {
 // CHECK9-NEXT:    store ptr null, ptr [[TMP22]], align 8
 // CHECK9-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK9-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK9-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK9-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK9-NEXT:    store i32 2, ptr [[TMP25]], align 4
 // CHECK9-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -1282,6 +1282,7 @@ int main() {
 // CHECK9-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [4 x ptr], align 8
 // CHECK9-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [4 x ptr], align 8
 // CHECK9-NEXT:    [[_TMP1:%.*]] = alloca i32, align 4
+// CHECK9-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK9-NEXT:    call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[TEST]])
 // CHECK9-NEXT:    store i32 0, ptr [[T_VAR]], align 4
 // CHECK9-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VEC]], ptr align 4 @__const._Z5tmainIiET_v.vec, i64 8, i1 false)
@@ -1324,7 +1325,6 @@ int main() {
 // CHECK9-NEXT:    store ptr null, ptr [[TMP17]], align 8
 // CHECK9-NEXT:    [[TMP18:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK9-NEXT:    [[TMP19:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK9-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK9-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK9-NEXT:    store i32 2, ptr [[TMP20]], align 4
 // CHECK9-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -1818,6 +1818,7 @@ int main() {
 // CHECK11-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [5 x ptr], align 4
 // CHECK11-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [5 x ptr], align 4
 // CHECK11-NEXT:    [[_TMP1:%.*]] = alloca i32, align 4
+// CHECK11-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK11-NEXT:    [[I:%.*]] = alloca i32, align 4
 // CHECK11-NEXT:    store i32 0, ptr [[RETVAL]], align 4
 // CHECK11-NEXT:    store ptr [[G]], ptr [[G1]], align 4
@@ -1872,7 +1873,6 @@ int main() {
 // CHECK11-NEXT:    store ptr null, ptr [[TMP22]], align 4
 // CHECK11-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK11-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK11-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK11-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK11-NEXT:    store i32 2, ptr [[TMP25]], align 4
 // CHECK11-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -2297,6 +2297,7 @@ int main() {
 // CHECK11-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [4 x ptr], align 4
 // CHECK11-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [4 x ptr], align 4
 // CHECK11-NEXT:    [[_TMP1:%.*]] = alloca i32, align 4
+// CHECK11-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK11-NEXT:    call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[TEST]])
 // CHECK11-NEXT:    store i32 0, ptr [[T_VAR]], align 4
 // CHECK11-NEXT:    call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[VEC]], ptr align 4 @__const._Z5tmainIiET_v.vec, i32 8, i1 false)
@@ -2339,7 +2340,6 @@ int main() {
 // CHECK11-NEXT:    store ptr null, ptr [[TMP17]], align 4
 // CHECK11-NEXT:    [[TMP18:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK11-NEXT:    [[TMP19:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK11-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK11-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK11-NEXT:    store i32 2, ptr [[TMP20]], align 4
 // CHECK11-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1

diff  --git a/clang/test/OpenMP/teams_distribute_parallel_for_simd_num_threads_codegen.cpp b/clang/test/OpenMP/teams_distribute_parallel_for_simd_num_threads_codegen.cpp
index 4792f1279200f..1a1b5d5e9acc6 100644
--- a/clang/test/OpenMP/teams_distribute_parallel_for_simd_num_threads_codegen.cpp
+++ b/clang/test/OpenMP/teams_distribute_parallel_for_simd_num_threads_codegen.cpp
@@ -84,19 +84,20 @@ int main() {
 // CHECK1-NEXT:    [[EXN_SLOT:%.*]] = alloca ptr, align 8
 // CHECK1-NEXT:    [[EHSELECTOR_SLOT:%.*]] = alloca i32, align 4
 // CHECK1-NEXT:    [[TMP:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK1-NEXT:    [[A_CASTED:%.*]] = alloca i64, align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [1 x ptr], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [1 x ptr], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [1 x ptr], align 8
 // CHECK1-NEXT:    [[DOTCAPTURE_EXPR_:%.*]] = alloca i8, align 1
 // CHECK1-NEXT:    [[_TMP1:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[KERNEL_ARGS2:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK1-NEXT:    store i32 0, ptr [[RETVAL]], align 4
 // CHECK1-NEXT:    call void @_ZN1SC1El(ptr noundef nonnull align 8 dereferenceable(24) [[S]], i64 noundef 0)
 // CHECK1-NEXT:    [[CALL:%.*]] = invoke noundef signext i8 @_ZN1ScvcEv(ptr noundef nonnull align 8 dereferenceable(24) [[S]])
 // CHECK1-NEXT:    to label [[INVOKE_CONT:%.*]] unwind label [[LPAD:%.*]]
 // CHECK1:       invoke.cont:
 // CHECK1-NEXT:    store i8 [[CALL]], ptr [[A]], align 1
-// CHECK1-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK1-NEXT:    [[TMP0:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK1-NEXT:    store i32 2, ptr [[TMP0]], align 4
 // CHECK1-NEXT:    [[TMP1:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -155,7 +156,6 @@ int main() {
 // CHECK1-NEXT:    [[TMP26:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1
 // CHECK1-NEXT:    [[TMP27:%.*]] = zext i8 [[TMP26]] to i32
 // CHECK1-NEXT:    [[TMP28:%.*]] = insertvalue [3 x i32] zeroinitializer, i32 [[TMP27]], 0
-// CHECK1-NEXT:    [[KERNEL_ARGS2:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK1-NEXT:    [[TMP29:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS2]], i32 0, i32 0
 // CHECK1-NEXT:    store i32 2, ptr [[TMP29]], align 4
 // CHECK1-NEXT:    [[TMP30:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS2]], i32 0, i32 1
@@ -582,8 +582,9 @@ int main() {
 // CHECK1-SAME: () #[[ATTR2]] comdat {
 // CHECK1-NEXT:  entry:
 // CHECK1-NEXT:    [[TMP:%.*]] = alloca i32, align 4
-// CHECK1-NEXT:    [[_TMP1:%.*]] = alloca i32, align 4
 // CHECK1-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
+// CHECK1-NEXT:    [[_TMP1:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[KERNEL_ARGS2:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK1-NEXT:    [[TMP0:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK1-NEXT:    store i32 2, ptr [[TMP0]], align 4
 // CHECK1-NEXT:    [[TMP1:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -617,7 +618,6 @@ int main() {
 // CHECK1-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIcLi5EEiv_l36() #[[ATTR6]]
 // CHECK1-NEXT:    br label [[OMP_OFFLOAD_CONT]]
 // CHECK1:       omp_offload.cont:
-// CHECK1-NEXT:    [[KERNEL_ARGS2:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK1-NEXT:    [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS2]], i32 0, i32 0
 // CHECK1-NEXT:    store i32 2, ptr [[TMP15]], align 4
 // CHECK1-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS2]], i32 0, i32 1
@@ -658,10 +658,11 @@ int main() {
 // CHECK1-SAME: () #[[ATTR2]] comdat personality ptr @__gxx_personality_v0 {
 // CHECK1-NEXT:  entry:
 // CHECK1-NEXT:    [[TMP:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK1-NEXT:    [[DOTCAPTURE_EXPR_:%.*]] = alloca i8, align 1
 // CHECK1-NEXT:    [[REF_TMP:%.*]] = alloca [[STRUCT_S:%.*]], align 8
 // CHECK1-NEXT:    [[_TMP1:%.*]] = alloca i32, align 4
-// CHECK1-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
+// CHECK1-NEXT:    [[KERNEL_ARGS2:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK1-NEXT:    [[TMP0:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK1-NEXT:    store i32 2, ptr [[TMP0]], align 4
 // CHECK1-NEXT:    [[TMP1:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -704,7 +705,6 @@ int main() {
 // CHECK1-NEXT:    [[TMP15:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1
 // CHECK1-NEXT:    [[TMP16:%.*]] = zext i8 [[TMP15]] to i32
 // CHECK1-NEXT:    [[TMP17:%.*]] = insertvalue [3 x i32] zeroinitializer, i32 [[TMP16]], 0
-// CHECK1-NEXT:    [[KERNEL_ARGS2:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK1-NEXT:    [[TMP18:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS2]], i32 0, i32 0
 // CHECK1-NEXT:    store i32 2, ptr [[TMP18]], align 4
 // CHECK1-NEXT:    [[TMP19:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS2]], i32 0, i32 1
@@ -1811,19 +1811,20 @@ int main() {
 // CHECK5-NEXT:    [[EXN_SLOT:%.*]] = alloca ptr, align 8
 // CHECK5-NEXT:    [[EHSELECTOR_SLOT:%.*]] = alloca i32, align 4
 // CHECK5-NEXT:    [[TMP:%.*]] = alloca i32, align 4
+// CHECK5-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK5-NEXT:    [[A_CASTED:%.*]] = alloca i64, align 8
 // CHECK5-NEXT:    [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [1 x ptr], align 8
 // CHECK5-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [1 x ptr], align 8
 // CHECK5-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [1 x ptr], align 8
 // CHECK5-NEXT:    [[DOTCAPTURE_EXPR_:%.*]] = alloca i8, align 1
 // CHECK5-NEXT:    [[_TMP1:%.*]] = alloca i32, align 4
+// CHECK5-NEXT:    [[KERNEL_ARGS2:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK5-NEXT:    store i32 0, ptr [[RETVAL]], align 4
 // CHECK5-NEXT:    call void @_ZN1SC1El(ptr noundef nonnull align 8 dereferenceable(24) [[S]], i64 noundef 0)
 // CHECK5-NEXT:    [[CALL:%.*]] = invoke noundef signext i8 @_ZN1ScvcEv(ptr noundef nonnull align 8 dereferenceable(24) [[S]])
 // CHECK5-NEXT:    to label [[INVOKE_CONT:%.*]] unwind label [[LPAD:%.*]]
 // CHECK5:       invoke.cont:
 // CHECK5-NEXT:    store i8 [[CALL]], ptr [[A]], align 1
-// CHECK5-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK5-NEXT:    [[TMP0:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK5-NEXT:    store i32 2, ptr [[TMP0]], align 4
 // CHECK5-NEXT:    [[TMP1:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -1882,7 +1883,6 @@ int main() {
 // CHECK5-NEXT:    [[TMP26:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1
 // CHECK5-NEXT:    [[TMP27:%.*]] = zext i8 [[TMP26]] to i32
 // CHECK5-NEXT:    [[TMP28:%.*]] = insertvalue [3 x i32] zeroinitializer, i32 [[TMP27]], 0
-// CHECK5-NEXT:    [[KERNEL_ARGS2:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK5-NEXT:    [[TMP29:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS2]], i32 0, i32 0
 // CHECK5-NEXT:    store i32 2, ptr [[TMP29]], align 4
 // CHECK5-NEXT:    [[TMP30:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS2]], i32 0, i32 1
@@ -2309,8 +2309,9 @@ int main() {
 // CHECK5-SAME: () #[[ATTR2]] comdat {
 // CHECK5-NEXT:  entry:
 // CHECK5-NEXT:    [[TMP:%.*]] = alloca i32, align 4
-// CHECK5-NEXT:    [[_TMP1:%.*]] = alloca i32, align 4
 // CHECK5-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
+// CHECK5-NEXT:    [[_TMP1:%.*]] = alloca i32, align 4
+// CHECK5-NEXT:    [[KERNEL_ARGS2:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK5-NEXT:    [[TMP0:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK5-NEXT:    store i32 2, ptr [[TMP0]], align 4
 // CHECK5-NEXT:    [[TMP1:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -2344,7 +2345,6 @@ int main() {
 // CHECK5-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIcLi5EEiv_l36() #[[ATTR6]]
 // CHECK5-NEXT:    br label [[OMP_OFFLOAD_CONT]]
 // CHECK5:       omp_offload.cont:
-// CHECK5-NEXT:    [[KERNEL_ARGS2:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK5-NEXT:    [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS2]], i32 0, i32 0
 // CHECK5-NEXT:    store i32 2, ptr [[TMP15]], align 4
 // CHECK5-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS2]], i32 0, i32 1
@@ -2385,10 +2385,11 @@ int main() {
 // CHECK5-SAME: () #[[ATTR2]] comdat personality ptr @__gxx_personality_v0 {
 // CHECK5-NEXT:  entry:
 // CHECK5-NEXT:    [[TMP:%.*]] = alloca i32, align 4
+// CHECK5-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK5-NEXT:    [[DOTCAPTURE_EXPR_:%.*]] = alloca i8, align 1
 // CHECK5-NEXT:    [[REF_TMP:%.*]] = alloca [[STRUCT_S:%.*]], align 8
 // CHECK5-NEXT:    [[_TMP1:%.*]] = alloca i32, align 4
-// CHECK5-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
+// CHECK5-NEXT:    [[KERNEL_ARGS2:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK5-NEXT:    [[TMP0:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK5-NEXT:    store i32 2, ptr [[TMP0]], align 4
 // CHECK5-NEXT:    [[TMP1:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -2431,7 +2432,6 @@ int main() {
 // CHECK5-NEXT:    [[TMP15:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1
 // CHECK5-NEXT:    [[TMP16:%.*]] = zext i8 [[TMP15]] to i32
 // CHECK5-NEXT:    [[TMP17:%.*]] = insertvalue [3 x i32] zeroinitializer, i32 [[TMP16]], 0
-// CHECK5-NEXT:    [[KERNEL_ARGS2:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK5-NEXT:    [[TMP18:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS2]], i32 0, i32 0
 // CHECK5-NEXT:    store i32 2, ptr [[TMP18]], align 4
 // CHECK5-NEXT:    [[TMP19:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS2]], i32 0, i32 1

diff  --git a/clang/test/OpenMP/teams_distribute_parallel_for_simd_private_codegen.cpp b/clang/test/OpenMP/teams_distribute_parallel_for_simd_private_codegen.cpp
index 35698f812ed88..06cb3a6af1c93 100644
--- a/clang/test/OpenMP/teams_distribute_parallel_for_simd_private_codegen.cpp
+++ b/clang/test/OpenMP/teams_distribute_parallel_for_simd_private_codegen.cpp
@@ -262,8 +262,8 @@ int main() {
 // CHECK1-NEXT:  entry:
 // CHECK1-NEXT:    [[RETVAL:%.*]] = alloca i32, align 4
 // CHECK1-NEXT:    [[TMP:%.*]] = alloca i32, align 4
-// CHECK1-NEXT:    store i32 0, ptr [[RETVAL]], align 4
 // CHECK1-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
+// CHECK1-NEXT:    store i32 0, ptr [[RETVAL]], align 4
 // CHECK1-NEXT:    [[TMP0:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK1-NEXT:    store i32 2, ptr [[TMP0]], align 4
 // CHECK1-NEXT:    [[TMP1:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -538,6 +538,7 @@ int main() {
 // CHECK1-NEXT:    [[VAR:%.*]] = alloca ptr, align 8
 // CHECK1-NEXT:    [[TMP:%.*]] = alloca i32, align 4
 // CHECK1-NEXT:    [[_TMP1:%.*]] = alloca ptr, align 8
+// CHECK1-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK1-NEXT:    call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[TEST]])
 // CHECK1-NEXT:    store i32 0, ptr [[T_VAR]], align 4
 // CHECK1-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VEC]], ptr align 4 @__const._Z5tmainIiET_v.vec, i64 8, i1 false)
@@ -547,7 +548,6 @@ int main() {
 // CHECK1-NEXT:    call void @_ZN1SIiEC1Ei(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_ELEMENT]], i32 noundef signext 2)
 // CHECK1-NEXT:    store ptr [[TEST]], ptr [[VAR]], align 8
 // CHECK1-NEXT:    store ptr undef, ptr [[_TMP1]], align 8
-// CHECK1-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK1-NEXT:    [[TMP0:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK1-NEXT:    store i32 2, ptr [[TMP0]], align 4
 // CHECK1-NEXT:    [[TMP1:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -1030,8 +1030,8 @@ int main() {
 // CHECK3-NEXT:  entry:
 // CHECK3-NEXT:    [[RETVAL:%.*]] = alloca i32, align 4
 // CHECK3-NEXT:    [[TMP:%.*]] = alloca i32, align 4
-// CHECK3-NEXT:    store i32 0, ptr [[RETVAL]], align 4
 // CHECK3-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
+// CHECK3-NEXT:    store i32 0, ptr [[RETVAL]], align 4
 // CHECK3-NEXT:    [[TMP0:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK3-NEXT:    store i32 2, ptr [[TMP0]], align 4
 // CHECK3-NEXT:    [[TMP1:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -1300,6 +1300,7 @@ int main() {
 // CHECK3-NEXT:    [[VAR:%.*]] = alloca ptr, align 4
 // CHECK3-NEXT:    [[TMP:%.*]] = alloca i32, align 4
 // CHECK3-NEXT:    [[_TMP1:%.*]] = alloca ptr, align 4
+// CHECK3-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK3-NEXT:    call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[TEST]])
 // CHECK3-NEXT:    store i32 0, ptr [[T_VAR]], align 4
 // CHECK3-NEXT:    call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[VEC]], ptr align 4 @__const._Z5tmainIiET_v.vec, i32 8, i1 false)
@@ -1309,7 +1310,6 @@ int main() {
 // CHECK3-NEXT:    call void @_ZN1SIiEC1Ei(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_ELEMENT]], i32 noundef 2)
 // CHECK3-NEXT:    store ptr [[TEST]], ptr [[VAR]], align 4
 // CHECK3-NEXT:    store ptr undef, ptr [[_TMP1]], align 4
-// CHECK3-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK3-NEXT:    [[TMP0:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK3-NEXT:    store i32 2, ptr [[TMP0]], align 4
 // CHECK3-NEXT:    [[TMP1:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1

diff  --git a/clang/test/OpenMP/teams_distribute_parallel_for_simd_proc_bind_codegen.cpp b/clang/test/OpenMP/teams_distribute_parallel_for_simd_proc_bind_codegen.cpp
index 590e65c5df49e..ed582616f4ce7 100644
--- a/clang/test/OpenMP/teams_distribute_parallel_for_simd_proc_bind_codegen.cpp
+++ b/clang/test/OpenMP/teams_distribute_parallel_for_simd_proc_bind_codegen.cpp
@@ -57,9 +57,10 @@ int main() {
 // CHECK1-NEXT:  entry:
 // CHECK1-NEXT:    [[RETVAL:%.*]] = alloca i32, align 4
 // CHECK1-NEXT:    [[TMP:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK1-NEXT:    [[_TMP1:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[KERNEL_ARGS2:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK1-NEXT:    store i32 0, ptr [[RETVAL]], align 4
-// CHECK1-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK1-NEXT:    [[TMP0:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK1-NEXT:    store i32 2, ptr [[TMP0]], align 4
 // CHECK1-NEXT:    [[TMP1:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -93,7 +94,6 @@ int main() {
 // CHECK1-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l36() #[[ATTR2:[0-9]+]]
 // CHECK1-NEXT:    br label [[OMP_OFFLOAD_CONT]]
 // CHECK1:       omp_offload.cont:
-// CHECK1-NEXT:    [[KERNEL_ARGS2:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK1-NEXT:    [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS2]], i32 0, i32 0
 // CHECK1-NEXT:    store i32 2, ptr [[TMP15]], align 4
 // CHECK1-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS2]], i32 0, i32 1

diff  --git a/clang/test/OpenMP/teams_distribute_parallel_for_simd_reduction_codegen.cpp b/clang/test/OpenMP/teams_distribute_parallel_for_simd_reduction_codegen.cpp
index 1df8bce9953a5..9635e16438832 100644
--- a/clang/test/OpenMP/teams_distribute_parallel_for_simd_reduction_codegen.cpp
+++ b/clang/test/OpenMP/teams_distribute_parallel_for_simd_reduction_codegen.cpp
@@ -106,6 +106,7 @@ int main() {
 // CHECK1-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [1 x ptr], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [1 x ptr], align 8
 // CHECK1-NEXT:    [[TMP:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK1-NEXT:    store i32 0, ptr [[RETVAL]], align 4
 // CHECK1-NEXT:    [[TMP0:%.*]] = load i32, ptr @_ZZ4mainE5sivar, align 4
 // CHECK1-NEXT:    store i32 [[TMP0]], ptr [[SIVAR_CASTED]], align 4
@@ -118,7 +119,6 @@ int main() {
 // CHECK1-NEXT:    store ptr null, ptr [[TMP4]], align 8
 // CHECK1-NEXT:    [[TMP5:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK1-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK1-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK1-NEXT:    [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK1-NEXT:    store i32 2, ptr [[TMP7]], align 4
 // CHECK1-NEXT:    [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -413,6 +413,7 @@ int main() {
 // CHECK1-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [1 x ptr], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [1 x ptr], align 8
 // CHECK1-NEXT:    [[TMP:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK1-NEXT:    store i32 0, ptr [[T_VAR]], align 4
 // CHECK1-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VEC]], ptr align 4 @__const._Z5tmainIiET_v.vec, i64 8, i1 false)
 // CHECK1-NEXT:    [[TMP0:%.*]] = load i32, ptr [[T_VAR]], align 4
@@ -426,7 +427,6 @@ int main() {
 // CHECK1-NEXT:    store ptr null, ptr [[TMP4]], align 8
 // CHECK1-NEXT:    [[TMP5:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK1-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK1-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK1-NEXT:    [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK1-NEXT:    store i32 2, ptr [[TMP7]], align 4
 // CHECK1-NEXT:    [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -726,6 +726,7 @@ int main() {
 // CHECK3-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [1 x ptr], align 4
 // CHECK3-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [1 x ptr], align 4
 // CHECK3-NEXT:    [[TMP:%.*]] = alloca i32, align 4
+// CHECK3-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK3-NEXT:    store i32 0, ptr [[RETVAL]], align 4
 // CHECK3-NEXT:    [[TMP0:%.*]] = load i32, ptr @_ZZ4mainE5sivar, align 4
 // CHECK3-NEXT:    store i32 [[TMP0]], ptr [[SIVAR_CASTED]], align 4
@@ -738,7 +739,6 @@ int main() {
 // CHECK3-NEXT:    store ptr null, ptr [[TMP4]], align 4
 // CHECK3-NEXT:    [[TMP5:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK3-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK3-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK3-NEXT:    [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK3-NEXT:    store i32 2, ptr [[TMP7]], align 4
 // CHECK3-NEXT:    [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -1029,6 +1029,7 @@ int main() {
 // CHECK3-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [1 x ptr], align 4
 // CHECK3-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [1 x ptr], align 4
 // CHECK3-NEXT:    [[TMP:%.*]] = alloca i32, align 4
+// CHECK3-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK3-NEXT:    store i32 0, ptr [[T_VAR]], align 4
 // CHECK3-NEXT:    call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[VEC]], ptr align 4 @__const._Z5tmainIiET_v.vec, i32 8, i1 false)
 // CHECK3-NEXT:    [[TMP0:%.*]] = load i32, ptr [[T_VAR]], align 4
@@ -1042,7 +1043,6 @@ int main() {
 // CHECK3-NEXT:    store ptr null, ptr [[TMP4]], align 4
 // CHECK3-NEXT:    [[TMP5:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK3-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK3-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK3-NEXT:    [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK3-NEXT:    store i32 2, ptr [[TMP7]], align 4
 // CHECK3-NEXT:    [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1

diff  --git a/clang/test/OpenMP/teams_distribute_parallel_for_simd_schedule_codegen.cpp b/clang/test/OpenMP/teams_distribute_parallel_for_simd_schedule_codegen.cpp
index a1254b10fa10c..b11de268e262b 100644
--- a/clang/test/OpenMP/teams_distribute_parallel_for_simd_schedule_codegen.cpp
+++ b/clang/test/OpenMP/teams_distribute_parallel_for_simd_schedule_codegen.cpp
@@ -234,22 +234,27 @@ int main (int argc, char **argv) {
 // CHECK1-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [1 x ptr], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [1 x ptr], align 8
 // CHECK1-NEXT:    [[TMP:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_BASEPTRS3:%.*]] = alloca [1 x ptr], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_PTRS4:%.*]] = alloca [1 x ptr], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_MAPPERS5:%.*]] = alloca [1 x ptr], align 8
 // CHECK1-NEXT:    [[_TMP6:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[KERNEL_ARGS7:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_BASEPTRS11:%.*]] = alloca [1 x ptr], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_PTRS12:%.*]] = alloca [1 x ptr], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_MAPPERS13:%.*]] = alloca [1 x ptr], align 8
 // CHECK1-NEXT:    [[_TMP14:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[KERNEL_ARGS15:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_BASEPTRS19:%.*]] = alloca [1 x ptr], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_PTRS20:%.*]] = alloca [1 x ptr], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_MAPPERS21:%.*]] = alloca [1 x ptr], align 8
 // CHECK1-NEXT:    [[_TMP22:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[KERNEL_ARGS23:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_BASEPTRS27:%.*]] = alloca [1 x ptr], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_PTRS28:%.*]] = alloca [1 x ptr], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_MAPPERS29:%.*]] = alloca [1 x ptr], align 8
 // CHECK1-NEXT:    [[_TMP30:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[KERNEL_ARGS31:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK1-NEXT:    store ptr [[THIS]], ptr [[THIS_ADDR]], align 8
 // CHECK1-NEXT:    [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8
 // CHECK1-NEXT:    [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[THIS1]], i32 0, i32 0
@@ -261,7 +266,6 @@ int main (int argc, char **argv) {
 // CHECK1-NEXT:    store ptr null, ptr [[TMP2]], align 8
 // CHECK1-NEXT:    [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK1-NEXT:    [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK1-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK1-NEXT:    [[TMP5:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK1-NEXT:    store i32 2, ptr [[TMP5]], align 4
 // CHECK1-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -304,7 +308,6 @@ int main (int argc, char **argv) {
 // CHECK1-NEXT:    store ptr null, ptr [[TMP22]], align 8
 // CHECK1-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS3]], i32 0, i32 0
 // CHECK1-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS4]], i32 0, i32 0
-// CHECK1-NEXT:    [[KERNEL_ARGS7:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK1-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS7]], i32 0, i32 0
 // CHECK1-NEXT:    store i32 2, ptr [[TMP25]], align 4
 // CHECK1-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS7]], i32 0, i32 1
@@ -347,7 +350,6 @@ int main (int argc, char **argv) {
 // CHECK1-NEXT:    store ptr null, ptr [[TMP42]], align 8
 // CHECK1-NEXT:    [[TMP43:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS11]], i32 0, i32 0
 // CHECK1-NEXT:    [[TMP44:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS12]], i32 0, i32 0
-// CHECK1-NEXT:    [[KERNEL_ARGS15:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK1-NEXT:    [[TMP45:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS15]], i32 0, i32 0
 // CHECK1-NEXT:    store i32 2, ptr [[TMP45]], align 4
 // CHECK1-NEXT:    [[TMP46:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS15]], i32 0, i32 1
@@ -390,7 +392,6 @@ int main (int argc, char **argv) {
 // CHECK1-NEXT:    store ptr null, ptr [[TMP62]], align 8
 // CHECK1-NEXT:    [[TMP63:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS19]], i32 0, i32 0
 // CHECK1-NEXT:    [[TMP64:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS20]], i32 0, i32 0
-// CHECK1-NEXT:    [[KERNEL_ARGS23:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK1-NEXT:    [[TMP65:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS23]], i32 0, i32 0
 // CHECK1-NEXT:    store i32 2, ptr [[TMP65]], align 4
 // CHECK1-NEXT:    [[TMP66:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS23]], i32 0, i32 1
@@ -433,7 +434,6 @@ int main (int argc, char **argv) {
 // CHECK1-NEXT:    store ptr null, ptr [[TMP82]], align 8
 // CHECK1-NEXT:    [[TMP83:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS27]], i32 0, i32 0
 // CHECK1-NEXT:    [[TMP84:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS28]], i32 0, i32 0
-// CHECK1-NEXT:    [[KERNEL_ARGS31:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK1-NEXT:    [[TMP85:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS31]], i32 0, i32 0
 // CHECK1-NEXT:    store i32 2, ptr [[TMP85]], align 4
 // CHECK1-NEXT:    [[TMP86:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS31]], i32 0, i32 1
@@ -1338,22 +1338,27 @@ int main (int argc, char **argv) {
 // CHECK2-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [1 x ptr], align 8
 // CHECK2-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [1 x ptr], align 8
 // CHECK2-NEXT:    [[TMP:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK2-NEXT:    [[DOTOFFLOAD_BASEPTRS3:%.*]] = alloca [1 x ptr], align 8
 // CHECK2-NEXT:    [[DOTOFFLOAD_PTRS4:%.*]] = alloca [1 x ptr], align 8
 // CHECK2-NEXT:    [[DOTOFFLOAD_MAPPERS5:%.*]] = alloca [1 x ptr], align 8
 // CHECK2-NEXT:    [[_TMP6:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    [[KERNEL_ARGS7:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK2-NEXT:    [[DOTOFFLOAD_BASEPTRS11:%.*]] = alloca [1 x ptr], align 8
 // CHECK2-NEXT:    [[DOTOFFLOAD_PTRS12:%.*]] = alloca [1 x ptr], align 8
 // CHECK2-NEXT:    [[DOTOFFLOAD_MAPPERS13:%.*]] = alloca [1 x ptr], align 8
 // CHECK2-NEXT:    [[_TMP14:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    [[KERNEL_ARGS15:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK2-NEXT:    [[DOTOFFLOAD_BASEPTRS19:%.*]] = alloca [1 x ptr], align 8
 // CHECK2-NEXT:    [[DOTOFFLOAD_PTRS20:%.*]] = alloca [1 x ptr], align 8
 // CHECK2-NEXT:    [[DOTOFFLOAD_MAPPERS21:%.*]] = alloca [1 x ptr], align 8
 // CHECK2-NEXT:    [[_TMP22:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    [[KERNEL_ARGS23:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK2-NEXT:    [[DOTOFFLOAD_BASEPTRS27:%.*]] = alloca [1 x ptr], align 8
 // CHECK2-NEXT:    [[DOTOFFLOAD_PTRS28:%.*]] = alloca [1 x ptr], align 8
 // CHECK2-NEXT:    [[DOTOFFLOAD_MAPPERS29:%.*]] = alloca [1 x ptr], align 8
 // CHECK2-NEXT:    [[_TMP30:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    [[KERNEL_ARGS31:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK2-NEXT:    store ptr [[THIS]], ptr [[THIS_ADDR]], align 8
 // CHECK2-NEXT:    [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8
 // CHECK2-NEXT:    [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[THIS1]], i32 0, i32 0
@@ -1365,7 +1370,6 @@ int main (int argc, char **argv) {
 // CHECK2-NEXT:    store ptr null, ptr [[TMP2]], align 8
 // CHECK2-NEXT:    [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK2-NEXT:    [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK2-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK2-NEXT:    [[TMP5:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK2-NEXT:    store i32 2, ptr [[TMP5]], align 4
 // CHECK2-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -1408,7 +1412,6 @@ int main (int argc, char **argv) {
 // CHECK2-NEXT:    store ptr null, ptr [[TMP22]], align 8
 // CHECK2-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS3]], i32 0, i32 0
 // CHECK2-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS4]], i32 0, i32 0
-// CHECK2-NEXT:    [[KERNEL_ARGS7:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK2-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS7]], i32 0, i32 0
 // CHECK2-NEXT:    store i32 2, ptr [[TMP25]], align 4
 // CHECK2-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS7]], i32 0, i32 1
@@ -1451,7 +1454,6 @@ int main (int argc, char **argv) {
 // CHECK2-NEXT:    store ptr null, ptr [[TMP42]], align 8
 // CHECK2-NEXT:    [[TMP43:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS11]], i32 0, i32 0
 // CHECK2-NEXT:    [[TMP44:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS12]], i32 0, i32 0
-// CHECK2-NEXT:    [[KERNEL_ARGS15:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK2-NEXT:    [[TMP45:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS15]], i32 0, i32 0
 // CHECK2-NEXT:    store i32 2, ptr [[TMP45]], align 4
 // CHECK2-NEXT:    [[TMP46:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS15]], i32 0, i32 1
@@ -1494,7 +1496,6 @@ int main (int argc, char **argv) {
 // CHECK2-NEXT:    store ptr null, ptr [[TMP62]], align 8
 // CHECK2-NEXT:    [[TMP63:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS19]], i32 0, i32 0
 // CHECK2-NEXT:    [[TMP64:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS20]], i32 0, i32 0
-// CHECK2-NEXT:    [[KERNEL_ARGS23:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK2-NEXT:    [[TMP65:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS23]], i32 0, i32 0
 // CHECK2-NEXT:    store i32 2, ptr [[TMP65]], align 4
 // CHECK2-NEXT:    [[TMP66:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS23]], i32 0, i32 1
@@ -1537,7 +1538,6 @@ int main (int argc, char **argv) {
 // CHECK2-NEXT:    store ptr null, ptr [[TMP82]], align 8
 // CHECK2-NEXT:    [[TMP83:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS27]], i32 0, i32 0
 // CHECK2-NEXT:    [[TMP84:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS28]], i32 0, i32 0
-// CHECK2-NEXT:    [[KERNEL_ARGS31:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK2-NEXT:    [[TMP85:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS31]], i32 0, i32 0
 // CHECK2-NEXT:    store i32 2, ptr [[TMP85]], align 4
 // CHECK2-NEXT:    [[TMP86:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS31]], i32 0, i32 1
@@ -2442,22 +2442,27 @@ int main (int argc, char **argv) {
 // CHECK5-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [1 x ptr], align 4
 // CHECK5-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [1 x ptr], align 4
 // CHECK5-NEXT:    [[TMP:%.*]] = alloca i32, align 4
+// CHECK5-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK5-NEXT:    [[DOTOFFLOAD_BASEPTRS3:%.*]] = alloca [1 x ptr], align 4
 // CHECK5-NEXT:    [[DOTOFFLOAD_PTRS4:%.*]] = alloca [1 x ptr], align 4
 // CHECK5-NEXT:    [[DOTOFFLOAD_MAPPERS5:%.*]] = alloca [1 x ptr], align 4
 // CHECK5-NEXT:    [[_TMP6:%.*]] = alloca i32, align 4
+// CHECK5-NEXT:    [[KERNEL_ARGS7:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK5-NEXT:    [[DOTOFFLOAD_BASEPTRS11:%.*]] = alloca [1 x ptr], align 4
 // CHECK5-NEXT:    [[DOTOFFLOAD_PTRS12:%.*]] = alloca [1 x ptr], align 4
 // CHECK5-NEXT:    [[DOTOFFLOAD_MAPPERS13:%.*]] = alloca [1 x ptr], align 4
 // CHECK5-NEXT:    [[_TMP14:%.*]] = alloca i32, align 4
+// CHECK5-NEXT:    [[KERNEL_ARGS15:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK5-NEXT:    [[DOTOFFLOAD_BASEPTRS19:%.*]] = alloca [1 x ptr], align 4
 // CHECK5-NEXT:    [[DOTOFFLOAD_PTRS20:%.*]] = alloca [1 x ptr], align 4
 // CHECK5-NEXT:    [[DOTOFFLOAD_MAPPERS21:%.*]] = alloca [1 x ptr], align 4
 // CHECK5-NEXT:    [[_TMP22:%.*]] = alloca i32, align 4
+// CHECK5-NEXT:    [[KERNEL_ARGS23:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK5-NEXT:    [[DOTOFFLOAD_BASEPTRS27:%.*]] = alloca [1 x ptr], align 4
 // CHECK5-NEXT:    [[DOTOFFLOAD_PTRS28:%.*]] = alloca [1 x ptr], align 4
 // CHECK5-NEXT:    [[DOTOFFLOAD_MAPPERS29:%.*]] = alloca [1 x ptr], align 4
 // CHECK5-NEXT:    [[_TMP30:%.*]] = alloca i32, align 4
+// CHECK5-NEXT:    [[KERNEL_ARGS31:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK5-NEXT:    store ptr [[THIS]], ptr [[THIS_ADDR]], align 4
 // CHECK5-NEXT:    [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4
 // CHECK5-NEXT:    [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[THIS1]], i32 0, i32 0
@@ -2469,7 +2474,6 @@ int main (int argc, char **argv) {
 // CHECK5-NEXT:    store ptr null, ptr [[TMP2]], align 4
 // CHECK5-NEXT:    [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK5-NEXT:    [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK5-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK5-NEXT:    [[TMP5:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK5-NEXT:    store i32 2, ptr [[TMP5]], align 4
 // CHECK5-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -2512,7 +2516,6 @@ int main (int argc, char **argv) {
 // CHECK5-NEXT:    store ptr null, ptr [[TMP22]], align 4
 // CHECK5-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS3]], i32 0, i32 0
 // CHECK5-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS4]], i32 0, i32 0
-// CHECK5-NEXT:    [[KERNEL_ARGS7:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK5-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS7]], i32 0, i32 0
 // CHECK5-NEXT:    store i32 2, ptr [[TMP25]], align 4
 // CHECK5-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS7]], i32 0, i32 1
@@ -2555,7 +2558,6 @@ int main (int argc, char **argv) {
 // CHECK5-NEXT:    store ptr null, ptr [[TMP42]], align 4
 // CHECK5-NEXT:    [[TMP43:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS11]], i32 0, i32 0
 // CHECK5-NEXT:    [[TMP44:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS12]], i32 0, i32 0
-// CHECK5-NEXT:    [[KERNEL_ARGS15:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK5-NEXT:    [[TMP45:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS15]], i32 0, i32 0
 // CHECK5-NEXT:    store i32 2, ptr [[TMP45]], align 4
 // CHECK5-NEXT:    [[TMP46:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS15]], i32 0, i32 1
@@ -2598,7 +2600,6 @@ int main (int argc, char **argv) {
 // CHECK5-NEXT:    store ptr null, ptr [[TMP62]], align 4
 // CHECK5-NEXT:    [[TMP63:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS19]], i32 0, i32 0
 // CHECK5-NEXT:    [[TMP64:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS20]], i32 0, i32 0
-// CHECK5-NEXT:    [[KERNEL_ARGS23:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK5-NEXT:    [[TMP65:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS23]], i32 0, i32 0
 // CHECK5-NEXT:    store i32 2, ptr [[TMP65]], align 4
 // CHECK5-NEXT:    [[TMP66:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS23]], i32 0, i32 1
@@ -2641,7 +2642,6 @@ int main (int argc, char **argv) {
 // CHECK5-NEXT:    store ptr null, ptr [[TMP82]], align 4
 // CHECK5-NEXT:    [[TMP83:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS27]], i32 0, i32 0
 // CHECK5-NEXT:    [[TMP84:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS28]], i32 0, i32 0
-// CHECK5-NEXT:    [[KERNEL_ARGS31:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK5-NEXT:    [[TMP85:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS31]], i32 0, i32 0
 // CHECK5-NEXT:    store i32 2, ptr [[TMP85]], align 4
 // CHECK5-NEXT:    [[TMP86:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS31]], i32 0, i32 1
@@ -3519,22 +3519,27 @@ int main (int argc, char **argv) {
 // CHECK6-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [1 x ptr], align 4
 // CHECK6-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [1 x ptr], align 4
 // CHECK6-NEXT:    [[TMP:%.*]] = alloca i32, align 4
+// CHECK6-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK6-NEXT:    [[DOTOFFLOAD_BASEPTRS3:%.*]] = alloca [1 x ptr], align 4
 // CHECK6-NEXT:    [[DOTOFFLOAD_PTRS4:%.*]] = alloca [1 x ptr], align 4
 // CHECK6-NEXT:    [[DOTOFFLOAD_MAPPERS5:%.*]] = alloca [1 x ptr], align 4
 // CHECK6-NEXT:    [[_TMP6:%.*]] = alloca i32, align 4
+// CHECK6-NEXT:    [[KERNEL_ARGS7:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK6-NEXT:    [[DOTOFFLOAD_BASEPTRS11:%.*]] = alloca [1 x ptr], align 4
 // CHECK6-NEXT:    [[DOTOFFLOAD_PTRS12:%.*]] = alloca [1 x ptr], align 4
 // CHECK6-NEXT:    [[DOTOFFLOAD_MAPPERS13:%.*]] = alloca [1 x ptr], align 4
 // CHECK6-NEXT:    [[_TMP14:%.*]] = alloca i32, align 4
+// CHECK6-NEXT:    [[KERNEL_ARGS15:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK6-NEXT:    [[DOTOFFLOAD_BASEPTRS19:%.*]] = alloca [1 x ptr], align 4
 // CHECK6-NEXT:    [[DOTOFFLOAD_PTRS20:%.*]] = alloca [1 x ptr], align 4
 // CHECK6-NEXT:    [[DOTOFFLOAD_MAPPERS21:%.*]] = alloca [1 x ptr], align 4
 // CHECK6-NEXT:    [[_TMP22:%.*]] = alloca i32, align 4
+// CHECK6-NEXT:    [[KERNEL_ARGS23:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK6-NEXT:    [[DOTOFFLOAD_BASEPTRS27:%.*]] = alloca [1 x ptr], align 4
 // CHECK6-NEXT:    [[DOTOFFLOAD_PTRS28:%.*]] = alloca [1 x ptr], align 4
 // CHECK6-NEXT:    [[DOTOFFLOAD_MAPPERS29:%.*]] = alloca [1 x ptr], align 4
 // CHECK6-NEXT:    [[_TMP30:%.*]] = alloca i32, align 4
+// CHECK6-NEXT:    [[KERNEL_ARGS31:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK6-NEXT:    store ptr [[THIS]], ptr [[THIS_ADDR]], align 4
 // CHECK6-NEXT:    [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4
 // CHECK6-NEXT:    [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[THIS1]], i32 0, i32 0
@@ -3546,7 +3551,6 @@ int main (int argc, char **argv) {
 // CHECK6-NEXT:    store ptr null, ptr [[TMP2]], align 4
 // CHECK6-NEXT:    [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK6-NEXT:    [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK6-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK6-NEXT:    [[TMP5:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK6-NEXT:    store i32 2, ptr [[TMP5]], align 4
 // CHECK6-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -3589,7 +3593,6 @@ int main (int argc, char **argv) {
 // CHECK6-NEXT:    store ptr null, ptr [[TMP22]], align 4
 // CHECK6-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS3]], i32 0, i32 0
 // CHECK6-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS4]], i32 0, i32 0
-// CHECK6-NEXT:    [[KERNEL_ARGS7:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK6-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS7]], i32 0, i32 0
 // CHECK6-NEXT:    store i32 2, ptr [[TMP25]], align 4
 // CHECK6-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS7]], i32 0, i32 1
@@ -3632,7 +3635,6 @@ int main (int argc, char **argv) {
 // CHECK6-NEXT:    store ptr null, ptr [[TMP42]], align 4
 // CHECK6-NEXT:    [[TMP43:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS11]], i32 0, i32 0
 // CHECK6-NEXT:    [[TMP44:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS12]], i32 0, i32 0
-// CHECK6-NEXT:    [[KERNEL_ARGS15:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK6-NEXT:    [[TMP45:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS15]], i32 0, i32 0
 // CHECK6-NEXT:    store i32 2, ptr [[TMP45]], align 4
 // CHECK6-NEXT:    [[TMP46:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS15]], i32 0, i32 1
@@ -3675,7 +3677,6 @@ int main (int argc, char **argv) {
 // CHECK6-NEXT:    store ptr null, ptr [[TMP62]], align 4
 // CHECK6-NEXT:    [[TMP63:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS19]], i32 0, i32 0
 // CHECK6-NEXT:    [[TMP64:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS20]], i32 0, i32 0
-// CHECK6-NEXT:    [[KERNEL_ARGS23:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK6-NEXT:    [[TMP65:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS23]], i32 0, i32 0
 // CHECK6-NEXT:    store i32 2, ptr [[TMP65]], align 4
 // CHECK6-NEXT:    [[TMP66:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS23]], i32 0, i32 1
@@ -3718,7 +3719,6 @@ int main (int argc, char **argv) {
 // CHECK6-NEXT:    store ptr null, ptr [[TMP82]], align 4
 // CHECK6-NEXT:    [[TMP83:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS27]], i32 0, i32 0
 // CHECK6-NEXT:    [[TMP84:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS28]], i32 0, i32 0
-// CHECK6-NEXT:    [[KERNEL_ARGS31:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK6-NEXT:    [[TMP85:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS31]], i32 0, i32 0
 // CHECK6-NEXT:    store i32 2, ptr [[TMP85]], align 4
 // CHECK6-NEXT:    [[TMP86:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS31]], i32 0, i32 1
@@ -4983,6 +4983,7 @@ int main (int argc, char **argv) {
 // CHECK13-NEXT:    [[TMP:%.*]] = alloca i32, align 4
 // CHECK13-NEXT:    [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
 // CHECK13-NEXT:    [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4
+// CHECK13-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK13-NEXT:    [[N_CASTED3:%.*]] = alloca i64, align 8
 // CHECK13-NEXT:    [[DOTOFFLOAD_BASEPTRS4:%.*]] = alloca [3 x ptr], align 8
 // CHECK13-NEXT:    [[DOTOFFLOAD_PTRS5:%.*]] = alloca [3 x ptr], align 8
@@ -4991,6 +4992,7 @@ int main (int argc, char **argv) {
 // CHECK13-NEXT:    [[_TMP8:%.*]] = alloca i32, align 4
 // CHECK13-NEXT:    [[DOTCAPTURE_EXPR_9:%.*]] = alloca i32, align 4
 // CHECK13-NEXT:    [[DOTCAPTURE_EXPR_10:%.*]] = alloca i32, align 4
+// CHECK13-NEXT:    [[KERNEL_ARGS15:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK13-NEXT:    [[M_CASTED:%.*]] = alloca i64, align 8
 // CHECK13-NEXT:    [[N_CASTED18:%.*]] = alloca i64, align 8
 // CHECK13-NEXT:    [[DOTOFFLOAD_BASEPTRS19:%.*]] = alloca [4 x ptr], align 8
@@ -5000,6 +5002,7 @@ int main (int argc, char **argv) {
 // CHECK13-NEXT:    [[_TMP23:%.*]] = alloca i32, align 4
 // CHECK13-NEXT:    [[DOTCAPTURE_EXPR_24:%.*]] = alloca i32, align 4
 // CHECK13-NEXT:    [[DOTCAPTURE_EXPR_25:%.*]] = alloca i32, align 4
+// CHECK13-NEXT:    [[KERNEL_ARGS30:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK13-NEXT:    [[N_CASTED33:%.*]] = alloca i64, align 8
 // CHECK13-NEXT:    [[DOTOFFLOAD_BASEPTRS34:%.*]] = alloca [3 x ptr], align 8
 // CHECK13-NEXT:    [[DOTOFFLOAD_PTRS35:%.*]] = alloca [3 x ptr], align 8
@@ -5008,6 +5011,7 @@ int main (int argc, char **argv) {
 // CHECK13-NEXT:    [[_TMP38:%.*]] = alloca i32, align 4
 // CHECK13-NEXT:    [[DOTCAPTURE_EXPR_39:%.*]] = alloca i32, align 4
 // CHECK13-NEXT:    [[DOTCAPTURE_EXPR_40:%.*]] = alloca i32, align 4
+// CHECK13-NEXT:    [[KERNEL_ARGS45:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK13-NEXT:    [[M_CASTED48:%.*]] = alloca i64, align 8
 // CHECK13-NEXT:    [[N_CASTED49:%.*]] = alloca i64, align 8
 // CHECK13-NEXT:    [[DOTOFFLOAD_BASEPTRS50:%.*]] = alloca [4 x ptr], align 8
@@ -5017,6 +5021,7 @@ int main (int argc, char **argv) {
 // CHECK13-NEXT:    [[_TMP54:%.*]] = alloca i32, align 4
 // CHECK13-NEXT:    [[DOTCAPTURE_EXPR_55:%.*]] = alloca i32, align 4
 // CHECK13-NEXT:    [[DOTCAPTURE_EXPR_56:%.*]] = alloca i32, align 4
+// CHECK13-NEXT:    [[KERNEL_ARGS61:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK13-NEXT:    store i32 0, ptr [[RETVAL]], align 4
 // CHECK13-NEXT:    store i32 [[ARGC]], ptr [[ARGC_ADDR]], align 4
 // CHECK13-NEXT:    store ptr [[ARGV]], ptr [[ARGV_ADDR]], align 8
@@ -5066,7 +5071,6 @@ int main (int argc, char **argv) {
 // CHECK13-NEXT:    [[TMP21:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
 // CHECK13-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP21]], 1
 // CHECK13-NEXT:    [[TMP22:%.*]] = zext i32 [[ADD]] to i64
-// CHECK13-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK13-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK13-NEXT:    store i32 2, ptr [[TMP23]], align 4
 // CHECK13-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -5138,7 +5142,6 @@ int main (int argc, char **argv) {
 // CHECK13-NEXT:    [[TMP56:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_10]], align 4
 // CHECK13-NEXT:    [[ADD14:%.*]] = add nsw i32 [[TMP56]], 1
 // CHECK13-NEXT:    [[TMP57:%.*]] = zext i32 [[ADD14]] to i64
-// CHECK13-NEXT:    [[KERNEL_ARGS15:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK13-NEXT:    [[TMP58:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS15]], i32 0, i32 0
 // CHECK13-NEXT:    store i32 2, ptr [[TMP58]], align 4
 // CHECK13-NEXT:    [[TMP59:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS15]], i32 0, i32 1
@@ -5219,7 +5222,6 @@ int main (int argc, char **argv) {
 // CHECK13-NEXT:    [[TMP96:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_25]], align 4
 // CHECK13-NEXT:    [[ADD29:%.*]] = add nsw i32 [[TMP96]], 1
 // CHECK13-NEXT:    [[TMP97:%.*]] = zext i32 [[ADD29]] to i64
-// CHECK13-NEXT:    [[KERNEL_ARGS30:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK13-NEXT:    [[TMP98:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS30]], i32 0, i32 0
 // CHECK13-NEXT:    store i32 2, ptr [[TMP98]], align 4
 // CHECK13-NEXT:    [[TMP99:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS30]], i32 0, i32 1
@@ -5291,7 +5293,6 @@ int main (int argc, char **argv) {
 // CHECK13-NEXT:    [[TMP131:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_40]], align 4
 // CHECK13-NEXT:    [[ADD44:%.*]] = add nsw i32 [[TMP131]], 1
 // CHECK13-NEXT:    [[TMP132:%.*]] = zext i32 [[ADD44]] to i64
-// CHECK13-NEXT:    [[KERNEL_ARGS45:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK13-NEXT:    [[TMP133:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS45]], i32 0, i32 0
 // CHECK13-NEXT:    store i32 2, ptr [[TMP133]], align 4
 // CHECK13-NEXT:    [[TMP134:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS45]], i32 0, i32 1
@@ -5372,7 +5373,6 @@ int main (int argc, char **argv) {
 // CHECK13-NEXT:    [[TMP171:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_56]], align 4
 // CHECK13-NEXT:    [[ADD60:%.*]] = add nsw i32 [[TMP171]], 1
 // CHECK13-NEXT:    [[TMP172:%.*]] = zext i32 [[ADD60]] to i64
-// CHECK13-NEXT:    [[KERNEL_ARGS61:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK13-NEXT:    [[TMP173:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS61]], i32 0, i32 0
 // CHECK13-NEXT:    store i32 2, ptr [[TMP173]], align 4
 // CHECK13-NEXT:    [[TMP174:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS61]], i32 0, i32 1
@@ -6652,24 +6652,29 @@ int main (int argc, char **argv) {
 // CHECK13-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [1 x ptr], align 8
 // CHECK13-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [1 x ptr], align 8
 // CHECK13-NEXT:    [[TMP:%.*]] = alloca i32, align 4
+// CHECK13-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK13-NEXT:    [[DOTOFFLOAD_BASEPTRS1:%.*]] = alloca [1 x ptr], align 8
 // CHECK13-NEXT:    [[DOTOFFLOAD_PTRS2:%.*]] = alloca [1 x ptr], align 8
 // CHECK13-NEXT:    [[DOTOFFLOAD_MAPPERS3:%.*]] = alloca [1 x ptr], align 8
 // CHECK13-NEXT:    [[_TMP4:%.*]] = alloca i32, align 4
+// CHECK13-NEXT:    [[KERNEL_ARGS5:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK13-NEXT:    [[M_CASTED:%.*]] = alloca i64, align 8
 // CHECK13-NEXT:    [[DOTOFFLOAD_BASEPTRS8:%.*]] = alloca [2 x ptr], align 8
 // CHECK13-NEXT:    [[DOTOFFLOAD_PTRS9:%.*]] = alloca [2 x ptr], align 8
 // CHECK13-NEXT:    [[DOTOFFLOAD_MAPPERS10:%.*]] = alloca [2 x ptr], align 8
 // CHECK13-NEXT:    [[_TMP11:%.*]] = alloca i32, align 4
+// CHECK13-NEXT:    [[KERNEL_ARGS12:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK13-NEXT:    [[DOTOFFLOAD_BASEPTRS15:%.*]] = alloca [1 x ptr], align 8
 // CHECK13-NEXT:    [[DOTOFFLOAD_PTRS16:%.*]] = alloca [1 x ptr], align 8
 // CHECK13-NEXT:    [[DOTOFFLOAD_MAPPERS17:%.*]] = alloca [1 x ptr], align 8
 // CHECK13-NEXT:    [[_TMP18:%.*]] = alloca i32, align 4
+// CHECK13-NEXT:    [[KERNEL_ARGS19:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK13-NEXT:    [[M_CASTED22:%.*]] = alloca i64, align 8
 // CHECK13-NEXT:    [[DOTOFFLOAD_BASEPTRS23:%.*]] = alloca [2 x ptr], align 8
 // CHECK13-NEXT:    [[DOTOFFLOAD_PTRS24:%.*]] = alloca [2 x ptr], align 8
 // CHECK13-NEXT:    [[DOTOFFLOAD_MAPPERS25:%.*]] = alloca [2 x ptr], align 8
 // CHECK13-NEXT:    [[_TMP26:%.*]] = alloca i32, align 4
+// CHECK13-NEXT:    [[KERNEL_ARGS27:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK13-NEXT:    store i32 [[ARGC]], ptr [[ARGC_ADDR]], align 4
 // CHECK13-NEXT:    store i32 10, ptr [[M]], align 4
 // CHECK13-NEXT:    [[TMP0:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
@@ -6680,7 +6685,6 @@ int main (int argc, char **argv) {
 // CHECK13-NEXT:    store ptr null, ptr [[TMP2]], align 8
 // CHECK13-NEXT:    [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK13-NEXT:    [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK13-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK13-NEXT:    [[TMP5:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK13-NEXT:    store i32 2, ptr [[TMP5]], align 4
 // CHECK13-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -6722,7 +6726,6 @@ int main (int argc, char **argv) {
 // CHECK13-NEXT:    store ptr null, ptr [[TMP22]], align 8
 // CHECK13-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS1]], i32 0, i32 0
 // CHECK13-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS2]], i32 0, i32 0
-// CHECK13-NEXT:    [[KERNEL_ARGS5:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK13-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 0
 // CHECK13-NEXT:    store i32 2, ptr [[TMP25]], align 4
 // CHECK13-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 1
@@ -6773,7 +6776,6 @@ int main (int argc, char **argv) {
 // CHECK13-NEXT:    store ptr null, ptr [[TMP47]], align 8
 // CHECK13-NEXT:    [[TMP48:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS8]], i32 0, i32 0
 // CHECK13-NEXT:    [[TMP49:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS9]], i32 0, i32 0
-// CHECK13-NEXT:    [[KERNEL_ARGS12:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK13-NEXT:    [[TMP50:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS12]], i32 0, i32 0
 // CHECK13-NEXT:    store i32 2, ptr [[TMP50]], align 4
 // CHECK13-NEXT:    [[TMP51:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS12]], i32 0, i32 1
@@ -6815,7 +6817,6 @@ int main (int argc, char **argv) {
 // CHECK13-NEXT:    store ptr null, ptr [[TMP67]], align 8
 // CHECK13-NEXT:    [[TMP68:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS15]], i32 0, i32 0
 // CHECK13-NEXT:    [[TMP69:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS16]], i32 0, i32 0
-// CHECK13-NEXT:    [[KERNEL_ARGS19:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK13-NEXT:    [[TMP70:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS19]], i32 0, i32 0
 // CHECK13-NEXT:    store i32 2, ptr [[TMP70]], align 4
 // CHECK13-NEXT:    [[TMP71:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS19]], i32 0, i32 1
@@ -6866,7 +6867,6 @@ int main (int argc, char **argv) {
 // CHECK13-NEXT:    store ptr null, ptr [[TMP92]], align 8
 // CHECK13-NEXT:    [[TMP93:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS23]], i32 0, i32 0
 // CHECK13-NEXT:    [[TMP94:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS24]], i32 0, i32 0
-// CHECK13-NEXT:    [[KERNEL_ARGS27:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK13-NEXT:    [[TMP95:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS27]], i32 0, i32 0
 // CHECK13-NEXT:    store i32 2, ptr [[TMP95]], align 4
 // CHECK13-NEXT:    [[TMP96:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS27]], i32 0, i32 1
@@ -7801,6 +7801,7 @@ int main (int argc, char **argv) {
 // CHECK14-NEXT:    [[TMP:%.*]] = alloca i32, align 4
 // CHECK14-NEXT:    [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
 // CHECK14-NEXT:    [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4
+// CHECK14-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK14-NEXT:    [[N_CASTED3:%.*]] = alloca i64, align 8
 // CHECK14-NEXT:    [[DOTOFFLOAD_BASEPTRS4:%.*]] = alloca [3 x ptr], align 8
 // CHECK14-NEXT:    [[DOTOFFLOAD_PTRS5:%.*]] = alloca [3 x ptr], align 8
@@ -7809,6 +7810,7 @@ int main (int argc, char **argv) {
 // CHECK14-NEXT:    [[_TMP8:%.*]] = alloca i32, align 4
 // CHECK14-NEXT:    [[DOTCAPTURE_EXPR_9:%.*]] = alloca i32, align 4
 // CHECK14-NEXT:    [[DOTCAPTURE_EXPR_10:%.*]] = alloca i32, align 4
+// CHECK14-NEXT:    [[KERNEL_ARGS15:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK14-NEXT:    [[M_CASTED:%.*]] = alloca i64, align 8
 // CHECK14-NEXT:    [[N_CASTED18:%.*]] = alloca i64, align 8
 // CHECK14-NEXT:    [[DOTOFFLOAD_BASEPTRS19:%.*]] = alloca [4 x ptr], align 8
@@ -7818,6 +7820,7 @@ int main (int argc, char **argv) {
 // CHECK14-NEXT:    [[_TMP23:%.*]] = alloca i32, align 4
 // CHECK14-NEXT:    [[DOTCAPTURE_EXPR_24:%.*]] = alloca i32, align 4
 // CHECK14-NEXT:    [[DOTCAPTURE_EXPR_25:%.*]] = alloca i32, align 4
+// CHECK14-NEXT:    [[KERNEL_ARGS30:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK14-NEXT:    [[N_CASTED33:%.*]] = alloca i64, align 8
 // CHECK14-NEXT:    [[DOTOFFLOAD_BASEPTRS34:%.*]] = alloca [3 x ptr], align 8
 // CHECK14-NEXT:    [[DOTOFFLOAD_PTRS35:%.*]] = alloca [3 x ptr], align 8
@@ -7826,6 +7829,7 @@ int main (int argc, char **argv) {
 // CHECK14-NEXT:    [[_TMP38:%.*]] = alloca i32, align 4
 // CHECK14-NEXT:    [[DOTCAPTURE_EXPR_39:%.*]] = alloca i32, align 4
 // CHECK14-NEXT:    [[DOTCAPTURE_EXPR_40:%.*]] = alloca i32, align 4
+// CHECK14-NEXT:    [[KERNEL_ARGS45:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK14-NEXT:    [[M_CASTED48:%.*]] = alloca i64, align 8
 // CHECK14-NEXT:    [[N_CASTED49:%.*]] = alloca i64, align 8
 // CHECK14-NEXT:    [[DOTOFFLOAD_BASEPTRS50:%.*]] = alloca [4 x ptr], align 8
@@ -7835,6 +7839,7 @@ int main (int argc, char **argv) {
 // CHECK14-NEXT:    [[_TMP54:%.*]] = alloca i32, align 4
 // CHECK14-NEXT:    [[DOTCAPTURE_EXPR_55:%.*]] = alloca i32, align 4
 // CHECK14-NEXT:    [[DOTCAPTURE_EXPR_56:%.*]] = alloca i32, align 4
+// CHECK14-NEXT:    [[KERNEL_ARGS61:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK14-NEXT:    store i32 0, ptr [[RETVAL]], align 4
 // CHECK14-NEXT:    store i32 [[ARGC]], ptr [[ARGC_ADDR]], align 4
 // CHECK14-NEXT:    store ptr [[ARGV]], ptr [[ARGV_ADDR]], align 8
@@ -7884,7 +7889,6 @@ int main (int argc, char **argv) {
 // CHECK14-NEXT:    [[TMP21:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
 // CHECK14-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP21]], 1
 // CHECK14-NEXT:    [[TMP22:%.*]] = zext i32 [[ADD]] to i64
-// CHECK14-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK14-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK14-NEXT:    store i32 2, ptr [[TMP23]], align 4
 // CHECK14-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -7956,7 +7960,6 @@ int main (int argc, char **argv) {
 // CHECK14-NEXT:    [[TMP56:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_10]], align 4
 // CHECK14-NEXT:    [[ADD14:%.*]] = add nsw i32 [[TMP56]], 1
 // CHECK14-NEXT:    [[TMP57:%.*]] = zext i32 [[ADD14]] to i64
-// CHECK14-NEXT:    [[KERNEL_ARGS15:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK14-NEXT:    [[TMP58:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS15]], i32 0, i32 0
 // CHECK14-NEXT:    store i32 2, ptr [[TMP58]], align 4
 // CHECK14-NEXT:    [[TMP59:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS15]], i32 0, i32 1
@@ -8037,7 +8040,6 @@ int main (int argc, char **argv) {
 // CHECK14-NEXT:    [[TMP96:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_25]], align 4
 // CHECK14-NEXT:    [[ADD29:%.*]] = add nsw i32 [[TMP96]], 1
 // CHECK14-NEXT:    [[TMP97:%.*]] = zext i32 [[ADD29]] to i64
-// CHECK14-NEXT:    [[KERNEL_ARGS30:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK14-NEXT:    [[TMP98:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS30]], i32 0, i32 0
 // CHECK14-NEXT:    store i32 2, ptr [[TMP98]], align 4
 // CHECK14-NEXT:    [[TMP99:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS30]], i32 0, i32 1
@@ -8109,7 +8111,6 @@ int main (int argc, char **argv) {
 // CHECK14-NEXT:    [[TMP131:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_40]], align 4
 // CHECK14-NEXT:    [[ADD44:%.*]] = add nsw i32 [[TMP131]], 1
 // CHECK14-NEXT:    [[TMP132:%.*]] = zext i32 [[ADD44]] to i64
-// CHECK14-NEXT:    [[KERNEL_ARGS45:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK14-NEXT:    [[TMP133:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS45]], i32 0, i32 0
 // CHECK14-NEXT:    store i32 2, ptr [[TMP133]], align 4
 // CHECK14-NEXT:    [[TMP134:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS45]], i32 0, i32 1
@@ -8190,7 +8191,6 @@ int main (int argc, char **argv) {
 // CHECK14-NEXT:    [[TMP171:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_56]], align 4
 // CHECK14-NEXT:    [[ADD60:%.*]] = add nsw i32 [[TMP171]], 1
 // CHECK14-NEXT:    [[TMP172:%.*]] = zext i32 [[ADD60]] to i64
-// CHECK14-NEXT:    [[KERNEL_ARGS61:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK14-NEXT:    [[TMP173:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS61]], i32 0, i32 0
 // CHECK14-NEXT:    store i32 2, ptr [[TMP173]], align 4
 // CHECK14-NEXT:    [[TMP174:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS61]], i32 0, i32 1
@@ -9470,24 +9470,29 @@ int main (int argc, char **argv) {
 // CHECK14-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [1 x ptr], align 8
 // CHECK14-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [1 x ptr], align 8
 // CHECK14-NEXT:    [[TMP:%.*]] = alloca i32, align 4
+// CHECK14-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK14-NEXT:    [[DOTOFFLOAD_BASEPTRS1:%.*]] = alloca [1 x ptr], align 8
 // CHECK14-NEXT:    [[DOTOFFLOAD_PTRS2:%.*]] = alloca [1 x ptr], align 8
 // CHECK14-NEXT:    [[DOTOFFLOAD_MAPPERS3:%.*]] = alloca [1 x ptr], align 8
 // CHECK14-NEXT:    [[_TMP4:%.*]] = alloca i32, align 4
+// CHECK14-NEXT:    [[KERNEL_ARGS5:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK14-NEXT:    [[M_CASTED:%.*]] = alloca i64, align 8
 // CHECK14-NEXT:    [[DOTOFFLOAD_BASEPTRS8:%.*]] = alloca [2 x ptr], align 8
 // CHECK14-NEXT:    [[DOTOFFLOAD_PTRS9:%.*]] = alloca [2 x ptr], align 8
 // CHECK14-NEXT:    [[DOTOFFLOAD_MAPPERS10:%.*]] = alloca [2 x ptr], align 8
 // CHECK14-NEXT:    [[_TMP11:%.*]] = alloca i32, align 4
+// CHECK14-NEXT:    [[KERNEL_ARGS12:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK14-NEXT:    [[DOTOFFLOAD_BASEPTRS15:%.*]] = alloca [1 x ptr], align 8
 // CHECK14-NEXT:    [[DOTOFFLOAD_PTRS16:%.*]] = alloca [1 x ptr], align 8
 // CHECK14-NEXT:    [[DOTOFFLOAD_MAPPERS17:%.*]] = alloca [1 x ptr], align 8
 // CHECK14-NEXT:    [[_TMP18:%.*]] = alloca i32, align 4
+// CHECK14-NEXT:    [[KERNEL_ARGS19:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK14-NEXT:    [[M_CASTED22:%.*]] = alloca i64, align 8
 // CHECK14-NEXT:    [[DOTOFFLOAD_BASEPTRS23:%.*]] = alloca [2 x ptr], align 8
 // CHECK14-NEXT:    [[DOTOFFLOAD_PTRS24:%.*]] = alloca [2 x ptr], align 8
 // CHECK14-NEXT:    [[DOTOFFLOAD_MAPPERS25:%.*]] = alloca [2 x ptr], align 8
 // CHECK14-NEXT:    [[_TMP26:%.*]] = alloca i32, align 4
+// CHECK14-NEXT:    [[KERNEL_ARGS27:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK14-NEXT:    store i32 [[ARGC]], ptr [[ARGC_ADDR]], align 4
 // CHECK14-NEXT:    store i32 10, ptr [[M]], align 4
 // CHECK14-NEXT:    [[TMP0:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
@@ -9498,7 +9503,6 @@ int main (int argc, char **argv) {
 // CHECK14-NEXT:    store ptr null, ptr [[TMP2]], align 8
 // CHECK14-NEXT:    [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK14-NEXT:    [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK14-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK14-NEXT:    [[TMP5:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK14-NEXT:    store i32 2, ptr [[TMP5]], align 4
 // CHECK14-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -9540,7 +9544,6 @@ int main (int argc, char **argv) {
 // CHECK14-NEXT:    store ptr null, ptr [[TMP22]], align 8
 // CHECK14-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS1]], i32 0, i32 0
 // CHECK14-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS2]], i32 0, i32 0
-// CHECK14-NEXT:    [[KERNEL_ARGS5:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK14-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 0
 // CHECK14-NEXT:    store i32 2, ptr [[TMP25]], align 4
 // CHECK14-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 1
@@ -9591,7 +9594,6 @@ int main (int argc, char **argv) {
 // CHECK14-NEXT:    store ptr null, ptr [[TMP47]], align 8
 // CHECK14-NEXT:    [[TMP48:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS8]], i32 0, i32 0
 // CHECK14-NEXT:    [[TMP49:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS9]], i32 0, i32 0
-// CHECK14-NEXT:    [[KERNEL_ARGS12:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK14-NEXT:    [[TMP50:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS12]], i32 0, i32 0
 // CHECK14-NEXT:    store i32 2, ptr [[TMP50]], align 4
 // CHECK14-NEXT:    [[TMP51:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS12]], i32 0, i32 1
@@ -9633,7 +9635,6 @@ int main (int argc, char **argv) {
 // CHECK14-NEXT:    store ptr null, ptr [[TMP67]], align 8
 // CHECK14-NEXT:    [[TMP68:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS15]], i32 0, i32 0
 // CHECK14-NEXT:    [[TMP69:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS16]], i32 0, i32 0
-// CHECK14-NEXT:    [[KERNEL_ARGS19:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK14-NEXT:    [[TMP70:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS19]], i32 0, i32 0
 // CHECK14-NEXT:    store i32 2, ptr [[TMP70]], align 4
 // CHECK14-NEXT:    [[TMP71:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS19]], i32 0, i32 1
@@ -9684,7 +9685,6 @@ int main (int argc, char **argv) {
 // CHECK14-NEXT:    store ptr null, ptr [[TMP92]], align 8
 // CHECK14-NEXT:    [[TMP93:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS23]], i32 0, i32 0
 // CHECK14-NEXT:    [[TMP94:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS24]], i32 0, i32 0
-// CHECK14-NEXT:    [[KERNEL_ARGS27:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK14-NEXT:    [[TMP95:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS27]], i32 0, i32 0
 // CHECK14-NEXT:    store i32 2, ptr [[TMP95]], align 4
 // CHECK14-NEXT:    [[TMP96:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS27]], i32 0, i32 1
@@ -10619,6 +10619,7 @@ int main (int argc, char **argv) {
 // CHECK17-NEXT:    [[TMP:%.*]] = alloca i32, align 4
 // CHECK17-NEXT:    [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
 // CHECK17-NEXT:    [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4
+// CHECK17-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK17-NEXT:    [[N_CASTED3:%.*]] = alloca i32, align 4
 // CHECK17-NEXT:    [[DOTOFFLOAD_BASEPTRS4:%.*]] = alloca [3 x ptr], align 4
 // CHECK17-NEXT:    [[DOTOFFLOAD_PTRS5:%.*]] = alloca [3 x ptr], align 4
@@ -10627,6 +10628,7 @@ int main (int argc, char **argv) {
 // CHECK17-NEXT:    [[_TMP8:%.*]] = alloca i32, align 4
 // CHECK17-NEXT:    [[DOTCAPTURE_EXPR_9:%.*]] = alloca i32, align 4
 // CHECK17-NEXT:    [[DOTCAPTURE_EXPR_10:%.*]] = alloca i32, align 4
+// CHECK17-NEXT:    [[KERNEL_ARGS15:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK17-NEXT:    [[M_CASTED:%.*]] = alloca i32, align 4
 // CHECK17-NEXT:    [[N_CASTED18:%.*]] = alloca i32, align 4
 // CHECK17-NEXT:    [[DOTOFFLOAD_BASEPTRS19:%.*]] = alloca [4 x ptr], align 4
@@ -10636,6 +10638,7 @@ int main (int argc, char **argv) {
 // CHECK17-NEXT:    [[_TMP23:%.*]] = alloca i32, align 4
 // CHECK17-NEXT:    [[DOTCAPTURE_EXPR_24:%.*]] = alloca i32, align 4
 // CHECK17-NEXT:    [[DOTCAPTURE_EXPR_25:%.*]] = alloca i32, align 4
+// CHECK17-NEXT:    [[KERNEL_ARGS30:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK17-NEXT:    [[N_CASTED33:%.*]] = alloca i32, align 4
 // CHECK17-NEXT:    [[DOTOFFLOAD_BASEPTRS34:%.*]] = alloca [3 x ptr], align 4
 // CHECK17-NEXT:    [[DOTOFFLOAD_PTRS35:%.*]] = alloca [3 x ptr], align 4
@@ -10644,6 +10647,7 @@ int main (int argc, char **argv) {
 // CHECK17-NEXT:    [[_TMP38:%.*]] = alloca i32, align 4
 // CHECK17-NEXT:    [[DOTCAPTURE_EXPR_39:%.*]] = alloca i32, align 4
 // CHECK17-NEXT:    [[DOTCAPTURE_EXPR_40:%.*]] = alloca i32, align 4
+// CHECK17-NEXT:    [[KERNEL_ARGS45:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK17-NEXT:    [[M_CASTED48:%.*]] = alloca i32, align 4
 // CHECK17-NEXT:    [[N_CASTED49:%.*]] = alloca i32, align 4
 // CHECK17-NEXT:    [[DOTOFFLOAD_BASEPTRS50:%.*]] = alloca [4 x ptr], align 4
@@ -10653,6 +10657,7 @@ int main (int argc, char **argv) {
 // CHECK17-NEXT:    [[_TMP54:%.*]] = alloca i32, align 4
 // CHECK17-NEXT:    [[DOTCAPTURE_EXPR_55:%.*]] = alloca i32, align 4
 // CHECK17-NEXT:    [[DOTCAPTURE_EXPR_56:%.*]] = alloca i32, align 4
+// CHECK17-NEXT:    [[KERNEL_ARGS61:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK17-NEXT:    store i32 0, ptr [[RETVAL]], align 4
 // CHECK17-NEXT:    store i32 [[ARGC]], ptr [[ARGC_ADDR]], align 4
 // CHECK17-NEXT:    store ptr [[ARGV]], ptr [[ARGV_ADDR]], align 4
@@ -10702,7 +10707,6 @@ int main (int argc, char **argv) {
 // CHECK17-NEXT:    [[TMP21:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
 // CHECK17-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP21]], 1
 // CHECK17-NEXT:    [[TMP22:%.*]] = zext i32 [[ADD]] to i64
-// CHECK17-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK17-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK17-NEXT:    store i32 2, ptr [[TMP23]], align 4
 // CHECK17-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -10775,7 +10779,6 @@ int main (int argc, char **argv) {
 // CHECK17-NEXT:    [[TMP57:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_10]], align 4
 // CHECK17-NEXT:    [[ADD14:%.*]] = add nsw i32 [[TMP57]], 1
 // CHECK17-NEXT:    [[TMP58:%.*]] = zext i32 [[ADD14]] to i64
-// CHECK17-NEXT:    [[KERNEL_ARGS15:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK17-NEXT:    [[TMP59:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS15]], i32 0, i32 0
 // CHECK17-NEXT:    store i32 2, ptr [[TMP59]], align 4
 // CHECK17-NEXT:    [[TMP60:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS15]], i32 0, i32 1
@@ -10857,7 +10860,6 @@ int main (int argc, char **argv) {
 // CHECK17-NEXT:    [[TMP98:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_25]], align 4
 // CHECK17-NEXT:    [[ADD29:%.*]] = add nsw i32 [[TMP98]], 1
 // CHECK17-NEXT:    [[TMP99:%.*]] = zext i32 [[ADD29]] to i64
-// CHECK17-NEXT:    [[KERNEL_ARGS30:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK17-NEXT:    [[TMP100:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS30]], i32 0, i32 0
 // CHECK17-NEXT:    store i32 2, ptr [[TMP100]], align 4
 // CHECK17-NEXT:    [[TMP101:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS30]], i32 0, i32 1
@@ -10930,7 +10932,6 @@ int main (int argc, char **argv) {
 // CHECK17-NEXT:    [[TMP134:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_40]], align 4
 // CHECK17-NEXT:    [[ADD44:%.*]] = add nsw i32 [[TMP134]], 1
 // CHECK17-NEXT:    [[TMP135:%.*]] = zext i32 [[ADD44]] to i64
-// CHECK17-NEXT:    [[KERNEL_ARGS45:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK17-NEXT:    [[TMP136:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS45]], i32 0, i32 0
 // CHECK17-NEXT:    store i32 2, ptr [[TMP136]], align 4
 // CHECK17-NEXT:    [[TMP137:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS45]], i32 0, i32 1
@@ -11012,7 +11013,6 @@ int main (int argc, char **argv) {
 // CHECK17-NEXT:    [[TMP175:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_56]], align 4
 // CHECK17-NEXT:    [[ADD60:%.*]] = add nsw i32 [[TMP175]], 1
 // CHECK17-NEXT:    [[TMP176:%.*]] = zext i32 [[ADD60]] to i64
-// CHECK17-NEXT:    [[KERNEL_ARGS61:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK17-NEXT:    [[TMP177:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS61]], i32 0, i32 0
 // CHECK17-NEXT:    store i32 2, ptr [[TMP177]], align 4
 // CHECK17-NEXT:    [[TMP178:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS61]], i32 0, i32 1
@@ -12267,24 +12267,29 @@ int main (int argc, char **argv) {
 // CHECK17-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [1 x ptr], align 4
 // CHECK17-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [1 x ptr], align 4
 // CHECK17-NEXT:    [[TMP:%.*]] = alloca i32, align 4
+// CHECK17-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK17-NEXT:    [[DOTOFFLOAD_BASEPTRS1:%.*]] = alloca [1 x ptr], align 4
 // CHECK17-NEXT:    [[DOTOFFLOAD_PTRS2:%.*]] = alloca [1 x ptr], align 4
 // CHECK17-NEXT:    [[DOTOFFLOAD_MAPPERS3:%.*]] = alloca [1 x ptr], align 4
 // CHECK17-NEXT:    [[_TMP4:%.*]] = alloca i32, align 4
+// CHECK17-NEXT:    [[KERNEL_ARGS5:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK17-NEXT:    [[M_CASTED:%.*]] = alloca i32, align 4
 // CHECK17-NEXT:    [[DOTOFFLOAD_BASEPTRS8:%.*]] = alloca [2 x ptr], align 4
 // CHECK17-NEXT:    [[DOTOFFLOAD_PTRS9:%.*]] = alloca [2 x ptr], align 4
 // CHECK17-NEXT:    [[DOTOFFLOAD_MAPPERS10:%.*]] = alloca [2 x ptr], align 4
 // CHECK17-NEXT:    [[_TMP11:%.*]] = alloca i32, align 4
+// CHECK17-NEXT:    [[KERNEL_ARGS12:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK17-NEXT:    [[DOTOFFLOAD_BASEPTRS15:%.*]] = alloca [1 x ptr], align 4
 // CHECK17-NEXT:    [[DOTOFFLOAD_PTRS16:%.*]] = alloca [1 x ptr], align 4
 // CHECK17-NEXT:    [[DOTOFFLOAD_MAPPERS17:%.*]] = alloca [1 x ptr], align 4
 // CHECK17-NEXT:    [[_TMP18:%.*]] = alloca i32, align 4
+// CHECK17-NEXT:    [[KERNEL_ARGS19:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK17-NEXT:    [[M_CASTED22:%.*]] = alloca i32, align 4
 // CHECK17-NEXT:    [[DOTOFFLOAD_BASEPTRS23:%.*]] = alloca [2 x ptr], align 4
 // CHECK17-NEXT:    [[DOTOFFLOAD_PTRS24:%.*]] = alloca [2 x ptr], align 4
 // CHECK17-NEXT:    [[DOTOFFLOAD_MAPPERS25:%.*]] = alloca [2 x ptr], align 4
 // CHECK17-NEXT:    [[_TMP26:%.*]] = alloca i32, align 4
+// CHECK17-NEXT:    [[KERNEL_ARGS27:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK17-NEXT:    store i32 [[ARGC]], ptr [[ARGC_ADDR]], align 4
 // CHECK17-NEXT:    store i32 10, ptr [[M]], align 4
 // CHECK17-NEXT:    [[TMP0:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
@@ -12295,7 +12300,6 @@ int main (int argc, char **argv) {
 // CHECK17-NEXT:    store ptr null, ptr [[TMP2]], align 4
 // CHECK17-NEXT:    [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK17-NEXT:    [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK17-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK17-NEXT:    [[TMP5:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK17-NEXT:    store i32 2, ptr [[TMP5]], align 4
 // CHECK17-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -12337,7 +12341,6 @@ int main (int argc, char **argv) {
 // CHECK17-NEXT:    store ptr null, ptr [[TMP22]], align 4
 // CHECK17-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS1]], i32 0, i32 0
 // CHECK17-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS2]], i32 0, i32 0
-// CHECK17-NEXT:    [[KERNEL_ARGS5:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK17-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 0
 // CHECK17-NEXT:    store i32 2, ptr [[TMP25]], align 4
 // CHECK17-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 1
@@ -12388,7 +12391,6 @@ int main (int argc, char **argv) {
 // CHECK17-NEXT:    store ptr null, ptr [[TMP47]], align 4
 // CHECK17-NEXT:    [[TMP48:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS8]], i32 0, i32 0
 // CHECK17-NEXT:    [[TMP49:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS9]], i32 0, i32 0
-// CHECK17-NEXT:    [[KERNEL_ARGS12:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK17-NEXT:    [[TMP50:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS12]], i32 0, i32 0
 // CHECK17-NEXT:    store i32 2, ptr [[TMP50]], align 4
 // CHECK17-NEXT:    [[TMP51:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS12]], i32 0, i32 1
@@ -12430,7 +12432,6 @@ int main (int argc, char **argv) {
 // CHECK17-NEXT:    store ptr null, ptr [[TMP67]], align 4
 // CHECK17-NEXT:    [[TMP68:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS15]], i32 0, i32 0
 // CHECK17-NEXT:    [[TMP69:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS16]], i32 0, i32 0
-// CHECK17-NEXT:    [[KERNEL_ARGS19:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK17-NEXT:    [[TMP70:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS19]], i32 0, i32 0
 // CHECK17-NEXT:    store i32 2, ptr [[TMP70]], align 4
 // CHECK17-NEXT:    [[TMP71:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS19]], i32 0, i32 1
@@ -12481,7 +12482,6 @@ int main (int argc, char **argv) {
 // CHECK17-NEXT:    store ptr null, ptr [[TMP92]], align 4
 // CHECK17-NEXT:    [[TMP93:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS23]], i32 0, i32 0
 // CHECK17-NEXT:    [[TMP94:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS24]], i32 0, i32 0
-// CHECK17-NEXT:    [[KERNEL_ARGS27:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK17-NEXT:    [[TMP95:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS27]], i32 0, i32 0
 // CHECK17-NEXT:    store i32 2, ptr [[TMP95]], align 4
 // CHECK17-NEXT:    [[TMP96:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS27]], i32 0, i32 1
@@ -13389,6 +13389,7 @@ int main (int argc, char **argv) {
 // CHECK19-NEXT:    [[TMP:%.*]] = alloca i32, align 4
 // CHECK19-NEXT:    [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
 // CHECK19-NEXT:    [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4
+// CHECK19-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK19-NEXT:    [[N_CASTED3:%.*]] = alloca i32, align 4
 // CHECK19-NEXT:    [[DOTOFFLOAD_BASEPTRS4:%.*]] = alloca [3 x ptr], align 4
 // CHECK19-NEXT:    [[DOTOFFLOAD_PTRS5:%.*]] = alloca [3 x ptr], align 4
@@ -13397,6 +13398,7 @@ int main (int argc, char **argv) {
 // CHECK19-NEXT:    [[_TMP8:%.*]] = alloca i32, align 4
 // CHECK19-NEXT:    [[DOTCAPTURE_EXPR_9:%.*]] = alloca i32, align 4
 // CHECK19-NEXT:    [[DOTCAPTURE_EXPR_10:%.*]] = alloca i32, align 4
+// CHECK19-NEXT:    [[KERNEL_ARGS15:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK19-NEXT:    [[M_CASTED:%.*]] = alloca i32, align 4
 // CHECK19-NEXT:    [[N_CASTED18:%.*]] = alloca i32, align 4
 // CHECK19-NEXT:    [[DOTOFFLOAD_BASEPTRS19:%.*]] = alloca [4 x ptr], align 4
@@ -13406,6 +13408,7 @@ int main (int argc, char **argv) {
 // CHECK19-NEXT:    [[_TMP23:%.*]] = alloca i32, align 4
 // CHECK19-NEXT:    [[DOTCAPTURE_EXPR_24:%.*]] = alloca i32, align 4
 // CHECK19-NEXT:    [[DOTCAPTURE_EXPR_25:%.*]] = alloca i32, align 4
+// CHECK19-NEXT:    [[KERNEL_ARGS30:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK19-NEXT:    [[N_CASTED33:%.*]] = alloca i32, align 4
 // CHECK19-NEXT:    [[DOTOFFLOAD_BASEPTRS34:%.*]] = alloca [3 x ptr], align 4
 // CHECK19-NEXT:    [[DOTOFFLOAD_PTRS35:%.*]] = alloca [3 x ptr], align 4
@@ -13414,6 +13417,7 @@ int main (int argc, char **argv) {
 // CHECK19-NEXT:    [[_TMP38:%.*]] = alloca i32, align 4
 // CHECK19-NEXT:    [[DOTCAPTURE_EXPR_39:%.*]] = alloca i32, align 4
 // CHECK19-NEXT:    [[DOTCAPTURE_EXPR_40:%.*]] = alloca i32, align 4
+// CHECK19-NEXT:    [[KERNEL_ARGS45:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK19-NEXT:    [[M_CASTED48:%.*]] = alloca i32, align 4
 // CHECK19-NEXT:    [[N_CASTED49:%.*]] = alloca i32, align 4
 // CHECK19-NEXT:    [[DOTOFFLOAD_BASEPTRS50:%.*]] = alloca [4 x ptr], align 4
@@ -13423,6 +13427,7 @@ int main (int argc, char **argv) {
 // CHECK19-NEXT:    [[_TMP54:%.*]] = alloca i32, align 4
 // CHECK19-NEXT:    [[DOTCAPTURE_EXPR_55:%.*]] = alloca i32, align 4
 // CHECK19-NEXT:    [[DOTCAPTURE_EXPR_56:%.*]] = alloca i32, align 4
+// CHECK19-NEXT:    [[KERNEL_ARGS61:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK19-NEXT:    store i32 0, ptr [[RETVAL]], align 4
 // CHECK19-NEXT:    store i32 [[ARGC]], ptr [[ARGC_ADDR]], align 4
 // CHECK19-NEXT:    store ptr [[ARGV]], ptr [[ARGV_ADDR]], align 4
@@ -13472,7 +13477,6 @@ int main (int argc, char **argv) {
 // CHECK19-NEXT:    [[TMP21:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
 // CHECK19-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP21]], 1
 // CHECK19-NEXT:    [[TMP22:%.*]] = zext i32 [[ADD]] to i64
-// CHECK19-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK19-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK19-NEXT:    store i32 2, ptr [[TMP23]], align 4
 // CHECK19-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -13545,7 +13549,6 @@ int main (int argc, char **argv) {
 // CHECK19-NEXT:    [[TMP57:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_10]], align 4
 // CHECK19-NEXT:    [[ADD14:%.*]] = add nsw i32 [[TMP57]], 1
 // CHECK19-NEXT:    [[TMP58:%.*]] = zext i32 [[ADD14]] to i64
-// CHECK19-NEXT:    [[KERNEL_ARGS15:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK19-NEXT:    [[TMP59:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS15]], i32 0, i32 0
 // CHECK19-NEXT:    store i32 2, ptr [[TMP59]], align 4
 // CHECK19-NEXT:    [[TMP60:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS15]], i32 0, i32 1
@@ -13627,7 +13630,6 @@ int main (int argc, char **argv) {
 // CHECK19-NEXT:    [[TMP98:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_25]], align 4
 // CHECK19-NEXT:    [[ADD29:%.*]] = add nsw i32 [[TMP98]], 1
 // CHECK19-NEXT:    [[TMP99:%.*]] = zext i32 [[ADD29]] to i64
-// CHECK19-NEXT:    [[KERNEL_ARGS30:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK19-NEXT:    [[TMP100:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS30]], i32 0, i32 0
 // CHECK19-NEXT:    store i32 2, ptr [[TMP100]], align 4
 // CHECK19-NEXT:    [[TMP101:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS30]], i32 0, i32 1
@@ -13700,7 +13702,6 @@ int main (int argc, char **argv) {
 // CHECK19-NEXT:    [[TMP134:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_40]], align 4
 // CHECK19-NEXT:    [[ADD44:%.*]] = add nsw i32 [[TMP134]], 1
 // CHECK19-NEXT:    [[TMP135:%.*]] = zext i32 [[ADD44]] to i64
-// CHECK19-NEXT:    [[KERNEL_ARGS45:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK19-NEXT:    [[TMP136:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS45]], i32 0, i32 0
 // CHECK19-NEXT:    store i32 2, ptr [[TMP136]], align 4
 // CHECK19-NEXT:    [[TMP137:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS45]], i32 0, i32 1
@@ -13782,7 +13783,6 @@ int main (int argc, char **argv) {
 // CHECK19-NEXT:    [[TMP175:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_56]], align 4
 // CHECK19-NEXT:    [[ADD60:%.*]] = add nsw i32 [[TMP175]], 1
 // CHECK19-NEXT:    [[TMP176:%.*]] = zext i32 [[ADD60]] to i64
-// CHECK19-NEXT:    [[KERNEL_ARGS61:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK19-NEXT:    [[TMP177:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS61]], i32 0, i32 0
 // CHECK19-NEXT:    store i32 2, ptr [[TMP177]], align 4
 // CHECK19-NEXT:    [[TMP178:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS61]], i32 0, i32 1
@@ -15037,24 +15037,29 @@ int main (int argc, char **argv) {
 // CHECK19-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [1 x ptr], align 4
 // CHECK19-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [1 x ptr], align 4
 // CHECK19-NEXT:    [[TMP:%.*]] = alloca i32, align 4
+// CHECK19-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK19-NEXT:    [[DOTOFFLOAD_BASEPTRS1:%.*]] = alloca [1 x ptr], align 4
 // CHECK19-NEXT:    [[DOTOFFLOAD_PTRS2:%.*]] = alloca [1 x ptr], align 4
 // CHECK19-NEXT:    [[DOTOFFLOAD_MAPPERS3:%.*]] = alloca [1 x ptr], align 4
 // CHECK19-NEXT:    [[_TMP4:%.*]] = alloca i32, align 4
+// CHECK19-NEXT:    [[KERNEL_ARGS5:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK19-NEXT:    [[M_CASTED:%.*]] = alloca i32, align 4
 // CHECK19-NEXT:    [[DOTOFFLOAD_BASEPTRS8:%.*]] = alloca [2 x ptr], align 4
 // CHECK19-NEXT:    [[DOTOFFLOAD_PTRS9:%.*]] = alloca [2 x ptr], align 4
 // CHECK19-NEXT:    [[DOTOFFLOAD_MAPPERS10:%.*]] = alloca [2 x ptr], align 4
 // CHECK19-NEXT:    [[_TMP11:%.*]] = alloca i32, align 4
+// CHECK19-NEXT:    [[KERNEL_ARGS12:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK19-NEXT:    [[DOTOFFLOAD_BASEPTRS15:%.*]] = alloca [1 x ptr], align 4
 // CHECK19-NEXT:    [[DOTOFFLOAD_PTRS16:%.*]] = alloca [1 x ptr], align 4
 // CHECK19-NEXT:    [[DOTOFFLOAD_MAPPERS17:%.*]] = alloca [1 x ptr], align 4
 // CHECK19-NEXT:    [[_TMP18:%.*]] = alloca i32, align 4
+// CHECK19-NEXT:    [[KERNEL_ARGS19:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK19-NEXT:    [[M_CASTED22:%.*]] = alloca i32, align 4
 // CHECK19-NEXT:    [[DOTOFFLOAD_BASEPTRS23:%.*]] = alloca [2 x ptr], align 4
 // CHECK19-NEXT:    [[DOTOFFLOAD_PTRS24:%.*]] = alloca [2 x ptr], align 4
 // CHECK19-NEXT:    [[DOTOFFLOAD_MAPPERS25:%.*]] = alloca [2 x ptr], align 4
 // CHECK19-NEXT:    [[_TMP26:%.*]] = alloca i32, align 4
+// CHECK19-NEXT:    [[KERNEL_ARGS27:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK19-NEXT:    store i32 [[ARGC]], ptr [[ARGC_ADDR]], align 4
 // CHECK19-NEXT:    store i32 10, ptr [[M]], align 4
 // CHECK19-NEXT:    [[TMP0:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
@@ -15065,7 +15070,6 @@ int main (int argc, char **argv) {
 // CHECK19-NEXT:    store ptr null, ptr [[TMP2]], align 4
 // CHECK19-NEXT:    [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK19-NEXT:    [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK19-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK19-NEXT:    [[TMP5:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK19-NEXT:    store i32 2, ptr [[TMP5]], align 4
 // CHECK19-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -15107,7 +15111,6 @@ int main (int argc, char **argv) {
 // CHECK19-NEXT:    store ptr null, ptr [[TMP22]], align 4
 // CHECK19-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS1]], i32 0, i32 0
 // CHECK19-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS2]], i32 0, i32 0
-// CHECK19-NEXT:    [[KERNEL_ARGS5:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK19-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 0
 // CHECK19-NEXT:    store i32 2, ptr [[TMP25]], align 4
 // CHECK19-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 1
@@ -15158,7 +15161,6 @@ int main (int argc, char **argv) {
 // CHECK19-NEXT:    store ptr null, ptr [[TMP47]], align 4
 // CHECK19-NEXT:    [[TMP48:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS8]], i32 0, i32 0
 // CHECK19-NEXT:    [[TMP49:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS9]], i32 0, i32 0
-// CHECK19-NEXT:    [[KERNEL_ARGS12:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK19-NEXT:    [[TMP50:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS12]], i32 0, i32 0
 // CHECK19-NEXT:    store i32 2, ptr [[TMP50]], align 4
 // CHECK19-NEXT:    [[TMP51:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS12]], i32 0, i32 1
@@ -15200,7 +15202,6 @@ int main (int argc, char **argv) {
 // CHECK19-NEXT:    store ptr null, ptr [[TMP67]], align 4
 // CHECK19-NEXT:    [[TMP68:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS15]], i32 0, i32 0
 // CHECK19-NEXT:    [[TMP69:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS16]], i32 0, i32 0
-// CHECK19-NEXT:    [[KERNEL_ARGS19:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK19-NEXT:    [[TMP70:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS19]], i32 0, i32 0
 // CHECK19-NEXT:    store i32 2, ptr [[TMP70]], align 4
 // CHECK19-NEXT:    [[TMP71:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS19]], i32 0, i32 1
@@ -15251,7 +15252,6 @@ int main (int argc, char **argv) {
 // CHECK19-NEXT:    store ptr null, ptr [[TMP92]], align 4
 // CHECK19-NEXT:    [[TMP93:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS23]], i32 0, i32 0
 // CHECK19-NEXT:    [[TMP94:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS24]], i32 0, i32 0
-// CHECK19-NEXT:    [[KERNEL_ARGS27:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK19-NEXT:    [[TMP95:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS27]], i32 0, i32 0
 // CHECK19-NEXT:    store i32 2, ptr [[TMP95]], align 4
 // CHECK19-NEXT:    [[TMP96:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS27]], i32 0, i32 1

diff  --git a/clang/test/OpenMP/teams_distribute_private_codegen.cpp b/clang/test/OpenMP/teams_distribute_private_codegen.cpp
index e01cf84087b9f..58c2cab50319d 100644
--- a/clang/test/OpenMP/teams_distribute_private_codegen.cpp
+++ b/clang/test/OpenMP/teams_distribute_private_codegen.cpp
@@ -240,8 +240,8 @@ int main() {
 // CHECK1-NEXT:  entry:
 // CHECK1-NEXT:    [[RETVAL:%.*]] = alloca i32, align 4
 // CHECK1-NEXT:    [[TMP:%.*]] = alloca i32, align 4
-// CHECK1-NEXT:    store i32 0, ptr [[RETVAL]], align 4
 // CHECK1-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
+// CHECK1-NEXT:    store i32 0, ptr [[RETVAL]], align 4
 // CHECK1-NEXT:    [[TMP0:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK1-NEXT:    store i32 2, ptr [[TMP0]], align 4
 // CHECK1-NEXT:    [[TMP1:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -401,6 +401,7 @@ int main() {
 // CHECK1-NEXT:    [[VAR:%.*]] = alloca ptr, align 8
 // CHECK1-NEXT:    [[TMP:%.*]] = alloca i32, align 4
 // CHECK1-NEXT:    [[_TMP1:%.*]] = alloca ptr, align 8
+// CHECK1-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK1-NEXT:    call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[TEST]])
 // CHECK1-NEXT:    store i32 0, ptr [[T_VAR]], align 4
 // CHECK1-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VEC]], ptr align 4 @__const._Z5tmainIiET_v.vec, i64 8, i1 false)
@@ -410,7 +411,6 @@ int main() {
 // CHECK1-NEXT:    call void @_ZN1SIiEC1Ei(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_ELEMENT]], i32 noundef signext 2)
 // CHECK1-NEXT:    store ptr [[TEST]], ptr [[VAR]], align 8
 // CHECK1-NEXT:    store ptr undef, ptr [[_TMP1]], align 8
-// CHECK1-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK1-NEXT:    [[TMP0:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK1-NEXT:    store i32 2, ptr [[TMP0]], align 4
 // CHECK1-NEXT:    [[TMP1:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -775,8 +775,8 @@ int main() {
 // CHECK3-NEXT:  entry:
 // CHECK3-NEXT:    [[RETVAL:%.*]] = alloca i32, align 4
 // CHECK3-NEXT:    [[TMP:%.*]] = alloca i32, align 4
-// CHECK3-NEXT:    store i32 0, ptr [[RETVAL]], align 4
 // CHECK3-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
+// CHECK3-NEXT:    store i32 0, ptr [[RETVAL]], align 4
 // CHECK3-NEXT:    [[TMP0:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK3-NEXT:    store i32 2, ptr [[TMP0]], align 4
 // CHECK3-NEXT:    [[TMP1:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -934,6 +934,7 @@ int main() {
 // CHECK3-NEXT:    [[VAR:%.*]] = alloca ptr, align 4
 // CHECK3-NEXT:    [[TMP:%.*]] = alloca i32, align 4
 // CHECK3-NEXT:    [[_TMP1:%.*]] = alloca ptr, align 4
+// CHECK3-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK3-NEXT:    call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[TEST]])
 // CHECK3-NEXT:    store i32 0, ptr [[T_VAR]], align 4
 // CHECK3-NEXT:    call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[VEC]], ptr align 4 @__const._Z5tmainIiET_v.vec, i32 8, i1 false)
@@ -943,7 +944,6 @@ int main() {
 // CHECK3-NEXT:    call void @_ZN1SIiEC1Ei(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_ELEMENT]], i32 noundef 2)
 // CHECK3-NEXT:    store ptr [[TEST]], ptr [[VAR]], align 4
 // CHECK3-NEXT:    store ptr undef, ptr [[_TMP1]], align 4
-// CHECK3-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK3-NEXT:    [[TMP0:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK3-NEXT:    store i32 2, ptr [[TMP0]], align 4
 // CHECK3-NEXT:    [[TMP1:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1

diff  --git a/clang/test/OpenMP/teams_distribute_reduction_codegen.cpp b/clang/test/OpenMP/teams_distribute_reduction_codegen.cpp
index 9659ee44f480b..a0c18f8d837d1 100644
--- a/clang/test/OpenMP/teams_distribute_reduction_codegen.cpp
+++ b/clang/test/OpenMP/teams_distribute_reduction_codegen.cpp
@@ -91,6 +91,7 @@ int main() {
 // CHECK1-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [1 x ptr], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [1 x ptr], align 8
 // CHECK1-NEXT:    [[TMP:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK1-NEXT:    store i32 0, ptr [[RETVAL]], align 4
 // CHECK1-NEXT:    [[TMP0:%.*]] = load i32, ptr @_ZZ4mainE5sivar, align 4
 // CHECK1-NEXT:    store i32 [[TMP0]], ptr [[SIVAR_CASTED]], align 4
@@ -103,7 +104,6 @@ int main() {
 // CHECK1-NEXT:    store ptr null, ptr [[TMP4]], align 8
 // CHECK1-NEXT:    [[TMP5:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK1-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK1-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK1-NEXT:    [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK1-NEXT:    store i32 2, ptr [[TMP7]], align 4
 // CHECK1-NEXT:    [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -270,6 +270,7 @@ int main() {
 // CHECK1-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [1 x ptr], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [1 x ptr], align 8
 // CHECK1-NEXT:    [[TMP:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK1-NEXT:    store i32 0, ptr [[T_VAR]], align 4
 // CHECK1-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VEC]], ptr align 4 @__const._Z5tmainIiET_v.vec, i64 8, i1 false)
 // CHECK1-NEXT:    [[TMP0:%.*]] = load i32, ptr [[T_VAR]], align 4
@@ -283,7 +284,6 @@ int main() {
 // CHECK1-NEXT:    store ptr null, ptr [[TMP4]], align 8
 // CHECK1-NEXT:    [[TMP5:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK1-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK1-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK1-NEXT:    [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK1-NEXT:    store i32 2, ptr [[TMP7]], align 4
 // CHECK1-NEXT:    [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -455,6 +455,7 @@ int main() {
 // CHECK3-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [1 x ptr], align 4
 // CHECK3-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [1 x ptr], align 4
 // CHECK3-NEXT:    [[TMP:%.*]] = alloca i32, align 4
+// CHECK3-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK3-NEXT:    store i32 0, ptr [[RETVAL]], align 4
 // CHECK3-NEXT:    [[TMP0:%.*]] = load i32, ptr @_ZZ4mainE5sivar, align 4
 // CHECK3-NEXT:    store i32 [[TMP0]], ptr [[SIVAR_CASTED]], align 4
@@ -467,7 +468,6 @@ int main() {
 // CHECK3-NEXT:    store ptr null, ptr [[TMP4]], align 4
 // CHECK3-NEXT:    [[TMP5:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK3-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK3-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK3-NEXT:    [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK3-NEXT:    store i32 2, ptr [[TMP7]], align 4
 // CHECK3-NEXT:    [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -634,6 +634,7 @@ int main() {
 // CHECK3-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [1 x ptr], align 4
 // CHECK3-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [1 x ptr], align 4
 // CHECK3-NEXT:    [[TMP:%.*]] = alloca i32, align 4
+// CHECK3-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK3-NEXT:    store i32 0, ptr [[T_VAR]], align 4
 // CHECK3-NEXT:    call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[VEC]], ptr align 4 @__const._Z5tmainIiET_v.vec, i32 8, i1 false)
 // CHECK3-NEXT:    [[TMP0:%.*]] = load i32, ptr [[T_VAR]], align 4
@@ -647,7 +648,6 @@ int main() {
 // CHECK3-NEXT:    store ptr null, ptr [[TMP4]], align 4
 // CHECK3-NEXT:    [[TMP5:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK3-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK3-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK3-NEXT:    [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK3-NEXT:    store i32 2, ptr [[TMP7]], align 4
 // CHECK3-NEXT:    [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1

diff  --git a/clang/test/OpenMP/teams_distribute_simd_codegen.cpp b/clang/test/OpenMP/teams_distribute_simd_codegen.cpp
index 249b8922165e3..c7060469881a0 100644
--- a/clang/test/OpenMP/teams_distribute_simd_codegen.cpp
+++ b/clang/test/OpenMP/teams_distribute_simd_codegen.cpp
@@ -228,6 +228,7 @@ int main (int argc, char **argv) {
 // CHECK1-NEXT:    [[TMP:%.*]] = alloca i32, align 4
 // CHECK1-NEXT:    [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
 // CHECK1-NEXT:    [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK1-NEXT:    [[N_CASTED4:%.*]] = alloca i64, align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_BASEPTRS5:%.*]] = alloca [2 x ptr], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_PTRS6:%.*]] = alloca [2 x ptr], align 8
@@ -235,6 +236,7 @@ int main (int argc, char **argv) {
 // CHECK1-NEXT:    [[_TMP8:%.*]] = alloca i32, align 4
 // CHECK1-NEXT:    [[DOTCAPTURE_EXPR_9:%.*]] = alloca i32, align 4
 // CHECK1-NEXT:    [[DOTCAPTURE_EXPR_10:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[KERNEL_ARGS15:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK1-NEXT:    store i32 [[N]], ptr [[N_ADDR]], align 4
 // CHECK1-NEXT:    [[TMP0:%.*]] = load i32, ptr [[N_ADDR]], align 4
 // CHECK1-NEXT:    [[DIV:%.*]] = sdiv i32 [[TMP0]], 128
@@ -296,7 +298,6 @@ int main (int argc, char **argv) {
 // CHECK1-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP29]], 1
 // CHECK1-NEXT:    [[TMP30:%.*]] = zext i32 [[ADD]] to i64
 // CHECK1-NEXT:    [[TMP31:%.*]] = insertvalue [3 x i32] zeroinitializer, i32 [[TMP26]], 0
-// CHECK1-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK1-NEXT:    [[TMP32:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK1-NEXT:    store i32 2, ptr [[TMP32]], align 4
 // CHECK1-NEXT:    [[TMP33:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -357,7 +358,6 @@ int main (int argc, char **argv) {
 // CHECK1-NEXT:    [[TMP59:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_10]], align 4
 // CHECK1-NEXT:    [[ADD14:%.*]] = add nsw i32 [[TMP59]], 1
 // CHECK1-NEXT:    [[TMP60:%.*]] = zext i32 [[ADD14]] to i64
-// CHECK1-NEXT:    [[KERNEL_ARGS15:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK1-NEXT:    [[TMP61:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS15]], i32 0, i32 0
 // CHECK1-NEXT:    store i32 2, ptr [[TMP61]], align 4
 // CHECK1-NEXT:    [[TMP62:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS15]], i32 0, i32 1
@@ -666,6 +666,7 @@ int main (int argc, char **argv) {
 // CHECK3-NEXT:    [[TMP:%.*]] = alloca i32, align 4
 // CHECK3-NEXT:    [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
 // CHECK3-NEXT:    [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4
+// CHECK3-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK3-NEXT:    [[N_CASTED4:%.*]] = alloca i32, align 4
 // CHECK3-NEXT:    [[DOTOFFLOAD_BASEPTRS5:%.*]] = alloca [2 x ptr], align 4
 // CHECK3-NEXT:    [[DOTOFFLOAD_PTRS6:%.*]] = alloca [2 x ptr], align 4
@@ -673,6 +674,7 @@ int main (int argc, char **argv) {
 // CHECK3-NEXT:    [[_TMP8:%.*]] = alloca i32, align 4
 // CHECK3-NEXT:    [[DOTCAPTURE_EXPR_9:%.*]] = alloca i32, align 4
 // CHECK3-NEXT:    [[DOTCAPTURE_EXPR_10:%.*]] = alloca i32, align 4
+// CHECK3-NEXT:    [[KERNEL_ARGS15:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK3-NEXT:    store i32 [[N]], ptr [[N_ADDR]], align 4
 // CHECK3-NEXT:    [[TMP0:%.*]] = load i32, ptr [[N_ADDR]], align 4
 // CHECK3-NEXT:    [[DIV:%.*]] = sdiv i32 [[TMP0]], 128
@@ -734,7 +736,6 @@ int main (int argc, char **argv) {
 // CHECK3-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP29]], 1
 // CHECK3-NEXT:    [[TMP30:%.*]] = zext i32 [[ADD]] to i64
 // CHECK3-NEXT:    [[TMP31:%.*]] = insertvalue [3 x i32] zeroinitializer, i32 [[TMP26]], 0
-// CHECK3-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK3-NEXT:    [[TMP32:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK3-NEXT:    store i32 2, ptr [[TMP32]], align 4
 // CHECK3-NEXT:    [[TMP33:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -795,7 +796,6 @@ int main (int argc, char **argv) {
 // CHECK3-NEXT:    [[TMP59:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_10]], align 4
 // CHECK3-NEXT:    [[ADD14:%.*]] = add nsw i32 [[TMP59]], 1
 // CHECK3-NEXT:    [[TMP60:%.*]] = zext i32 [[ADD14]] to i64
-// CHECK3-NEXT:    [[KERNEL_ARGS15:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK3-NEXT:    [[TMP61:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS15]], i32 0, i32 0
 // CHECK3-NEXT:    store i32 2, ptr [[TMP61]], align 4
 // CHECK3-NEXT:    [[TMP62:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS15]], i32 0, i32 1
@@ -1367,6 +1367,7 @@ int main (int argc, char **argv) {
 // CHECK9-NEXT:    [[TMP:%.*]] = alloca i32, align 4
 // CHECK9-NEXT:    [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
 // CHECK9-NEXT:    [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4
+// CHECK9-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK9-NEXT:    store i32 100, ptr [[N]], align 4
 // CHECK9-NEXT:    [[TMP0:%.*]] = load i32, ptr [[N]], align 4
 // CHECK9-NEXT:    [[TMP1:%.*]] = zext i32 [[TMP0]] to i64
@@ -1412,7 +1413,6 @@ int main (int argc, char **argv) {
 // CHECK9-NEXT:    [[TMP21:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
 // CHECK9-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP21]], 1
 // CHECK9-NEXT:    [[TMP22:%.*]] = zext i32 [[ADD]] to i64
-// CHECK9-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK9-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK9-NEXT:    store i32 2, ptr [[TMP23]], align 4
 // CHECK9-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -1596,6 +1596,7 @@ int main (int argc, char **argv) {
 // CHECK11-NEXT:    [[TMP:%.*]] = alloca i32, align 4
 // CHECK11-NEXT:    [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
 // CHECK11-NEXT:    [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4
+// CHECK11-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK11-NEXT:    store i32 100, ptr [[N]], align 4
 // CHECK11-NEXT:    [[TMP0:%.*]] = load i32, ptr [[N]], align 4
 // CHECK11-NEXT:    [[TMP1:%.*]] = call ptr @llvm.stacksave()
@@ -1641,7 +1642,6 @@ int main (int argc, char **argv) {
 // CHECK11-NEXT:    [[TMP21:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
 // CHECK11-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP21]], 1
 // CHECK11-NEXT:    [[TMP22:%.*]] = zext i32 [[ADD]] to i64
-// CHECK11-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK11-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK11-NEXT:    store i32 2, ptr [[TMP23]], align 4
 // CHECK11-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -1979,6 +1979,7 @@ int main (int argc, char **argv) {
 // CHECK17-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [3 x ptr], align 8
 // CHECK17-NEXT:    [[DOTOFFLOAD_SIZES:%.*]] = alloca [3 x i64], align 8
 // CHECK17-NEXT:    [[TMP:%.*]] = alloca i32, align 4
+// CHECK17-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK17-NEXT:    store ptr [[THIS]], ptr [[THIS_ADDR]], align 8
 // CHECK17-NEXT:    [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8
 // CHECK17-NEXT:    [[B:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[THIS1]], i32 0, i32 1
@@ -2012,7 +2013,6 @@ int main (int argc, char **argv) {
 // CHECK17-NEXT:    [[TMP15:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK17-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
 // CHECK17-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [3 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK17-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK17-NEXT:    [[TMP18:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK17-NEXT:    store i32 2, ptr [[TMP18]], align 4
 // CHECK17-NEXT:    [[TMP19:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -2164,6 +2164,7 @@ int main (int argc, char **argv) {
 // CHECK19-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [3 x ptr], align 4
 // CHECK19-NEXT:    [[DOTOFFLOAD_SIZES:%.*]] = alloca [3 x i64], align 4
 // CHECK19-NEXT:    [[TMP:%.*]] = alloca i32, align 4
+// CHECK19-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK19-NEXT:    store ptr [[THIS]], ptr [[THIS_ADDR]], align 4
 // CHECK19-NEXT:    [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4
 // CHECK19-NEXT:    [[B:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[THIS1]], i32 0, i32 1
@@ -2197,7 +2198,6 @@ int main (int argc, char **argv) {
 // CHECK19-NEXT:    [[TMP15:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK19-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
 // CHECK19-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [3 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK19-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK19-NEXT:    [[TMP18:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK19-NEXT:    store i32 2, ptr [[TMP18]], align 4
 // CHECK19-NEXT:    [[TMP19:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -2348,6 +2348,7 @@ int main (int argc, char **argv) {
 // CHECK21-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [3 x ptr], align 8
 // CHECK21-NEXT:    [[DOTOFFLOAD_SIZES:%.*]] = alloca [3 x i64], align 8
 // CHECK21-NEXT:    [[TMP:%.*]] = alloca i32, align 4
+// CHECK21-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK21-NEXT:    store ptr [[THIS]], ptr [[THIS_ADDR]], align 8
 // CHECK21-NEXT:    [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8
 // CHECK21-NEXT:    [[B:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[THIS1]], i32 0, i32 1
@@ -2381,7 +2382,6 @@ int main (int argc, char **argv) {
 // CHECK21-NEXT:    [[TMP15:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK21-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
 // CHECK21-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [3 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK21-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK21-NEXT:    [[TMP18:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK21-NEXT:    store i32 2, ptr [[TMP18]], align 4
 // CHECK21-NEXT:    [[TMP19:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -2583,6 +2583,7 @@ int main (int argc, char **argv) {
 // CHECK23-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [3 x ptr], align 4
 // CHECK23-NEXT:    [[DOTOFFLOAD_SIZES:%.*]] = alloca [3 x i64], align 4
 // CHECK23-NEXT:    [[TMP:%.*]] = alloca i32, align 4
+// CHECK23-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK23-NEXT:    store ptr [[THIS]], ptr [[THIS_ADDR]], align 4
 // CHECK23-NEXT:    [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4
 // CHECK23-NEXT:    [[B:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[THIS1]], i32 0, i32 1
@@ -2616,7 +2617,6 @@ int main (int argc, char **argv) {
 // CHECK23-NEXT:    [[TMP15:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK23-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
 // CHECK23-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [3 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK23-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK23-NEXT:    [[TMP18:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK23-NEXT:    store i32 2, ptr [[TMP18]], align 4
 // CHECK23-NEXT:    [[TMP19:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -3129,6 +3129,7 @@ int main (int argc, char **argv) {
 // CHECK33-NEXT:    [[TMP:%.*]] = alloca i32, align 4
 // CHECK33-NEXT:    [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
 // CHECK33-NEXT:    [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4
+// CHECK33-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK33-NEXT:    store i32 0, ptr [[RETVAL]], align 4
 // CHECK33-NEXT:    store i32 [[ARGC]], ptr [[ARGC_ADDR]], align 4
 // CHECK33-NEXT:    store ptr [[ARGV]], ptr [[ARGV_ADDR]], align 8
@@ -3177,7 +3178,6 @@ int main (int argc, char **argv) {
 // CHECK33-NEXT:    [[TMP21:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
 // CHECK33-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP21]], 1
 // CHECK33-NEXT:    [[TMP22:%.*]] = zext i32 [[ADD]] to i64
-// CHECK33-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK33-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK33-NEXT:    store i32 2, ptr [[TMP23]], align 4
 // CHECK33-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -3355,6 +3355,7 @@ int main (int argc, char **argv) {
 // CHECK33-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [3 x ptr], align 8
 // CHECK33-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [3 x ptr], align 8
 // CHECK33-NEXT:    [[TMP:%.*]] = alloca i32, align 4
+// CHECK33-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK33-NEXT:    store i32 [[ARGC]], ptr [[ARGC_ADDR]], align 4
 // CHECK33-NEXT:    store i32 0, ptr [[TE]], align 4
 // CHECK33-NEXT:    store i32 128, ptr [[TH]], align 4
@@ -3386,7 +3387,6 @@ int main (int argc, char **argv) {
 // CHECK33-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
 // CHECK33-NEXT:    [[TMP15:%.*]] = load i32, ptr [[TE]], align 4
 // CHECK33-NEXT:    [[TMP16:%.*]] = insertvalue [3 x i32] zeroinitializer, i32 [[TMP15]], 0
-// CHECK33-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK33-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK33-NEXT:    store i32 2, ptr [[TMP17]], align 4
 // CHECK33-NEXT:    [[TMP18:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -3539,6 +3539,7 @@ int main (int argc, char **argv) {
 // CHECK35-NEXT:    [[TMP:%.*]] = alloca i32, align 4
 // CHECK35-NEXT:    [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
 // CHECK35-NEXT:    [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4
+// CHECK35-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK35-NEXT:    store i32 0, ptr [[RETVAL]], align 4
 // CHECK35-NEXT:    store i32 [[ARGC]], ptr [[ARGC_ADDR]], align 4
 // CHECK35-NEXT:    store ptr [[ARGV]], ptr [[ARGV_ADDR]], align 4
@@ -3587,7 +3588,6 @@ int main (int argc, char **argv) {
 // CHECK35-NEXT:    [[TMP21:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
 // CHECK35-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP21]], 1
 // CHECK35-NEXT:    [[TMP22:%.*]] = zext i32 [[ADD]] to i64
-// CHECK35-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK35-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK35-NEXT:    store i32 2, ptr [[TMP23]], align 4
 // CHECK35-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -3764,6 +3764,7 @@ int main (int argc, char **argv) {
 // CHECK35-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [3 x ptr], align 4
 // CHECK35-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [3 x ptr], align 4
 // CHECK35-NEXT:    [[TMP:%.*]] = alloca i32, align 4
+// CHECK35-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK35-NEXT:    store i32 [[ARGC]], ptr [[ARGC_ADDR]], align 4
 // CHECK35-NEXT:    store i32 0, ptr [[TE]], align 4
 // CHECK35-NEXT:    store i32 128, ptr [[TH]], align 4
@@ -3795,7 +3796,6 @@ int main (int argc, char **argv) {
 // CHECK35-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
 // CHECK35-NEXT:    [[TMP15:%.*]] = load i32, ptr [[TE]], align 4
 // CHECK35-NEXT:    [[TMP16:%.*]] = insertvalue [3 x i32] zeroinitializer, i32 [[TMP15]], 0
-// CHECK35-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK35-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK35-NEXT:    store i32 2, ptr [[TMP17]], align 4
 // CHECK35-NEXT:    [[TMP18:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -3948,6 +3948,7 @@ int main (int argc, char **argv) {
 // CHECK37-NEXT:    [[TMP:%.*]] = alloca i32, align 4
 // CHECK37-NEXT:    [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
 // CHECK37-NEXT:    [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4
+// CHECK37-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK37-NEXT:    store i32 0, ptr [[RETVAL]], align 4
 // CHECK37-NEXT:    store i32 [[ARGC]], ptr [[ARGC_ADDR]], align 4
 // CHECK37-NEXT:    store ptr [[ARGV]], ptr [[ARGV_ADDR]], align 8
@@ -4005,7 +4006,6 @@ int main (int argc, char **argv) {
 // CHECK37-NEXT:    [[TMP26:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
 // CHECK37-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP26]], 1
 // CHECK37-NEXT:    [[TMP27:%.*]] = zext i32 [[ADD]] to i64
-// CHECK37-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK37-NEXT:    [[TMP28:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK37-NEXT:    store i32 2, ptr [[TMP28]], align 4
 // CHECK37-NEXT:    [[TMP29:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -4230,6 +4230,7 @@ int main (int argc, char **argv) {
 // CHECK37-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [3 x ptr], align 8
 // CHECK37-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [3 x ptr], align 8
 // CHECK37-NEXT:    [[TMP:%.*]] = alloca i32, align 4
+// CHECK37-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK37-NEXT:    store i32 [[ARGC]], ptr [[ARGC_ADDR]], align 4
 // CHECK37-NEXT:    store i32 0, ptr [[TE]], align 4
 // CHECK37-NEXT:    store i32 128, ptr [[TH]], align 4
@@ -4261,7 +4262,6 @@ int main (int argc, char **argv) {
 // CHECK37-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
 // CHECK37-NEXT:    [[TMP15:%.*]] = load i32, ptr [[TE]], align 4
 // CHECK37-NEXT:    [[TMP16:%.*]] = insertvalue [3 x i32] zeroinitializer, i32 [[TMP15]], 0
-// CHECK37-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK37-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK37-NEXT:    store i32 2, ptr [[TMP17]], align 4
 // CHECK37-NEXT:    [[TMP18:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -4415,6 +4415,7 @@ int main (int argc, char **argv) {
 // CHECK39-NEXT:    [[TMP:%.*]] = alloca i32, align 4
 // CHECK39-NEXT:    [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
 // CHECK39-NEXT:    [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4
+// CHECK39-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK39-NEXT:    store i32 0, ptr [[RETVAL]], align 4
 // CHECK39-NEXT:    store i32 [[ARGC]], ptr [[ARGC_ADDR]], align 4
 // CHECK39-NEXT:    store ptr [[ARGV]], ptr [[ARGV_ADDR]], align 4
@@ -4472,7 +4473,6 @@ int main (int argc, char **argv) {
 // CHECK39-NEXT:    [[TMP26:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
 // CHECK39-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP26]], 1
 // CHECK39-NEXT:    [[TMP27:%.*]] = zext i32 [[ADD]] to i64
-// CHECK39-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK39-NEXT:    [[TMP28:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK39-NEXT:    store i32 2, ptr [[TMP28]], align 4
 // CHECK39-NEXT:    [[TMP29:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -4695,6 +4695,7 @@ int main (int argc, char **argv) {
 // CHECK39-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [3 x ptr], align 4
 // CHECK39-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [3 x ptr], align 4
 // CHECK39-NEXT:    [[TMP:%.*]] = alloca i32, align 4
+// CHECK39-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK39-NEXT:    store i32 [[ARGC]], ptr [[ARGC_ADDR]], align 4
 // CHECK39-NEXT:    store i32 0, ptr [[TE]], align 4
 // CHECK39-NEXT:    store i32 128, ptr [[TH]], align 4
@@ -4726,7 +4727,6 @@ int main (int argc, char **argv) {
 // CHECK39-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
 // CHECK39-NEXT:    [[TMP15:%.*]] = load i32, ptr [[TE]], align 4
 // CHECK39-NEXT:    [[TMP16:%.*]] = insertvalue [3 x i32] zeroinitializer, i32 [[TMP15]], 0
-// CHECK39-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK39-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK39-NEXT:    store i32 2, ptr [[TMP17]], align 4
 // CHECK39-NEXT:    [[TMP18:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1

diff  --git a/clang/test/OpenMP/teams_distribute_simd_collapse_codegen.cpp b/clang/test/OpenMP/teams_distribute_simd_collapse_codegen.cpp
index efde7b7425f65..20414448bc831 100644
--- a/clang/test/OpenMP/teams_distribute_simd_collapse_codegen.cpp
+++ b/clang/test/OpenMP/teams_distribute_simd_collapse_codegen.cpp
@@ -115,6 +115,7 @@ int main (int argc, char **argv) {
 // CHECK1-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [1 x ptr], align 8
 // CHECK1-NEXT:    [[TMP:%.*]] = alloca i32, align 4
 // CHECK1-NEXT:    [[_TMP2:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK1-NEXT:    store ptr [[THIS]], ptr [[THIS_ADDR]], align 8
 // CHECK1-NEXT:    [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8
 // CHECK1-NEXT:    [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[THIS1]], i32 0, i32 0
@@ -126,7 +127,6 @@ int main (int argc, char **argv) {
 // CHECK1-NEXT:    store ptr null, ptr [[TMP2]], align 8
 // CHECK1-NEXT:    [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK1-NEXT:    [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK1-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK1-NEXT:    [[TMP5:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK1-NEXT:    store i32 2, ptr [[TMP5]], align 4
 // CHECK1-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -291,6 +291,7 @@ int main (int argc, char **argv) {
 // CHECK3-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [1 x ptr], align 4
 // CHECK3-NEXT:    [[TMP:%.*]] = alloca i32, align 4
 // CHECK3-NEXT:    [[_TMP2:%.*]] = alloca i32, align 4
+// CHECK3-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK3-NEXT:    store ptr [[THIS]], ptr [[THIS_ADDR]], align 4
 // CHECK3-NEXT:    [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4
 // CHECK3-NEXT:    [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[THIS1]], i32 0, i32 0
@@ -302,7 +303,6 @@ int main (int argc, char **argv) {
 // CHECK3-NEXT:    store ptr null, ptr [[TMP2]], align 4
 // CHECK3-NEXT:    [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK3-NEXT:    [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK3-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK3-NEXT:    [[TMP5:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK3-NEXT:    store i32 2, ptr [[TMP5]], align 4
 // CHECK3-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -610,6 +610,7 @@ int main (int argc, char **argv) {
 // CHECK9-NEXT:    [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
 // CHECK9-NEXT:    [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4
 // CHECK9-NEXT:    [[DOTCAPTURE_EXPR_3:%.*]] = alloca i64, align 8
+// CHECK9-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK9-NEXT:    store i32 0, ptr [[RETVAL]], align 4
 // CHECK9-NEXT:    store i32 [[ARGC]], ptr [[ARGC_ADDR]], align 4
 // CHECK9-NEXT:    store ptr [[ARGV]], ptr [[ARGV_ADDR]], align 8
@@ -686,7 +687,6 @@ int main (int argc, char **argv) {
 // CHECK9-NEXT:    store i64 [[SUB7]], ptr [[DOTCAPTURE_EXPR_3]], align 8
 // CHECK9-NEXT:    [[TMP35:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_3]], align 8
 // CHECK9-NEXT:    [[ADD:%.*]] = add nsw i64 [[TMP35]], 1
-// CHECK9-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK9-NEXT:    [[TMP36:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK9-NEXT:    store i32 2, ptr [[TMP36]], align 4
 // CHECK9-NEXT:    [[TMP37:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -925,6 +925,7 @@ int main (int argc, char **argv) {
 // CHECK9-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [1 x ptr], align 8
 // CHECK9-NEXT:    [[TMP:%.*]] = alloca i32, align 4
 // CHECK9-NEXT:    [[_TMP1:%.*]] = alloca i32, align 4
+// CHECK9-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK9-NEXT:    store i32 [[ARGC]], ptr [[ARGC_ADDR]], align 4
 // CHECK9-NEXT:    [[TMP0:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK9-NEXT:    store ptr [[A]], ptr [[TMP0]], align 8
@@ -934,7 +935,6 @@ int main (int argc, char **argv) {
 // CHECK9-NEXT:    store ptr null, ptr [[TMP2]], align 8
 // CHECK9-NEXT:    [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK9-NEXT:    [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK9-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK9-NEXT:    [[TMP5:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK9-NEXT:    store i32 2, ptr [[TMP5]], align 4
 // CHECK9-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -1099,6 +1099,7 @@ int main (int argc, char **argv) {
 // CHECK11-NEXT:    [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
 // CHECK11-NEXT:    [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4
 // CHECK11-NEXT:    [[DOTCAPTURE_EXPR_3:%.*]] = alloca i64, align 8
+// CHECK11-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK11-NEXT:    store i32 0, ptr [[RETVAL]], align 4
 // CHECK11-NEXT:    store i32 [[ARGC]], ptr [[ARGC_ADDR]], align 4
 // CHECK11-NEXT:    store ptr [[ARGV]], ptr [[ARGV_ADDR]], align 4
@@ -1174,7 +1175,6 @@ int main (int argc, char **argv) {
 // CHECK11-NEXT:    store i64 [[SUB7]], ptr [[DOTCAPTURE_EXPR_3]], align 8
 // CHECK11-NEXT:    [[TMP34:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_3]], align 8
 // CHECK11-NEXT:    [[ADD:%.*]] = add nsw i64 [[TMP34]], 1
-// CHECK11-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK11-NEXT:    [[TMP35:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK11-NEXT:    store i32 2, ptr [[TMP35]], align 4
 // CHECK11-NEXT:    [[TMP36:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -1411,6 +1411,7 @@ int main (int argc, char **argv) {
 // CHECK11-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [1 x ptr], align 4
 // CHECK11-NEXT:    [[TMP:%.*]] = alloca i32, align 4
 // CHECK11-NEXT:    [[_TMP1:%.*]] = alloca i32, align 4
+// CHECK11-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK11-NEXT:    store i32 [[ARGC]], ptr [[ARGC_ADDR]], align 4
 // CHECK11-NEXT:    [[TMP0:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK11-NEXT:    store ptr [[A]], ptr [[TMP0]], align 4
@@ -1420,7 +1421,6 @@ int main (int argc, char **argv) {
 // CHECK11-NEXT:    store ptr null, ptr [[TMP2]], align 4
 // CHECK11-NEXT:    [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK11-NEXT:    [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK11-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK11-NEXT:    [[TMP5:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK11-NEXT:    store i32 2, ptr [[TMP5]], align 4
 // CHECK11-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1

diff  --git a/clang/test/OpenMP/teams_distribute_simd_dist_schedule_codegen.cpp b/clang/test/OpenMP/teams_distribute_simd_dist_schedule_codegen.cpp
index a90b9ebf04ffb..96910943e34d3 100644
--- a/clang/test/OpenMP/teams_distribute_simd_dist_schedule_codegen.cpp
+++ b/clang/test/OpenMP/teams_distribute_simd_dist_schedule_codegen.cpp
@@ -147,14 +147,17 @@ int main (int argc, char **argv) {
 // CHECK1-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [1 x ptr], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [1 x ptr], align 8
 // CHECK1-NEXT:    [[TMP:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_BASEPTRS3:%.*]] = alloca [1 x ptr], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_PTRS4:%.*]] = alloca [1 x ptr], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_MAPPERS5:%.*]] = alloca [1 x ptr], align 8
 // CHECK1-NEXT:    [[_TMP6:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[KERNEL_ARGS7:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_BASEPTRS11:%.*]] = alloca [1 x ptr], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_PTRS12:%.*]] = alloca [1 x ptr], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_MAPPERS13:%.*]] = alloca [1 x ptr], align 8
 // CHECK1-NEXT:    [[_TMP14:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[KERNEL_ARGS15:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK1-NEXT:    store ptr [[THIS]], ptr [[THIS_ADDR]], align 8
 // CHECK1-NEXT:    [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8
 // CHECK1-NEXT:    [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[THIS1]], i32 0, i32 0
@@ -166,7 +169,6 @@ int main (int argc, char **argv) {
 // CHECK1-NEXT:    store ptr null, ptr [[TMP2]], align 8
 // CHECK1-NEXT:    [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK1-NEXT:    [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK1-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK1-NEXT:    [[TMP5:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK1-NEXT:    store i32 2, ptr [[TMP5]], align 4
 // CHECK1-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -209,7 +211,6 @@ int main (int argc, char **argv) {
 // CHECK1-NEXT:    store ptr null, ptr [[TMP22]], align 8
 // CHECK1-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS3]], i32 0, i32 0
 // CHECK1-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS4]], i32 0, i32 0
-// CHECK1-NEXT:    [[KERNEL_ARGS7:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK1-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS7]], i32 0, i32 0
 // CHECK1-NEXT:    store i32 2, ptr [[TMP25]], align 4
 // CHECK1-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS7]], i32 0, i32 1
@@ -252,7 +253,6 @@ int main (int argc, char **argv) {
 // CHECK1-NEXT:    store ptr null, ptr [[TMP42]], align 8
 // CHECK1-NEXT:    [[TMP43:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS11]], i32 0, i32 0
 // CHECK1-NEXT:    [[TMP44:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS12]], i32 0, i32 0
-// CHECK1-NEXT:    [[KERNEL_ARGS15:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK1-NEXT:    [[TMP45:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS15]], i32 0, i32 0
 // CHECK1-NEXT:    store i32 2, ptr [[TMP45]], align 4
 // CHECK1-NEXT:    [[TMP46:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS15]], i32 0, i32 1
@@ -587,14 +587,17 @@ int main (int argc, char **argv) {
 // CHECK3-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [1 x ptr], align 4
 // CHECK3-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [1 x ptr], align 4
 // CHECK3-NEXT:    [[TMP:%.*]] = alloca i32, align 4
+// CHECK3-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK3-NEXT:    [[DOTOFFLOAD_BASEPTRS3:%.*]] = alloca [1 x ptr], align 4
 // CHECK3-NEXT:    [[DOTOFFLOAD_PTRS4:%.*]] = alloca [1 x ptr], align 4
 // CHECK3-NEXT:    [[DOTOFFLOAD_MAPPERS5:%.*]] = alloca [1 x ptr], align 4
 // CHECK3-NEXT:    [[_TMP6:%.*]] = alloca i32, align 4
+// CHECK3-NEXT:    [[KERNEL_ARGS7:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK3-NEXT:    [[DOTOFFLOAD_BASEPTRS11:%.*]] = alloca [1 x ptr], align 4
 // CHECK3-NEXT:    [[DOTOFFLOAD_PTRS12:%.*]] = alloca [1 x ptr], align 4
 // CHECK3-NEXT:    [[DOTOFFLOAD_MAPPERS13:%.*]] = alloca [1 x ptr], align 4
 // CHECK3-NEXT:    [[_TMP14:%.*]] = alloca i32, align 4
+// CHECK3-NEXT:    [[KERNEL_ARGS15:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK3-NEXT:    store ptr [[THIS]], ptr [[THIS_ADDR]], align 4
 // CHECK3-NEXT:    [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4
 // CHECK3-NEXT:    [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[THIS1]], i32 0, i32 0
@@ -606,7 +609,6 @@ int main (int argc, char **argv) {
 // CHECK3-NEXT:    store ptr null, ptr [[TMP2]], align 4
 // CHECK3-NEXT:    [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK3-NEXT:    [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK3-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK3-NEXT:    [[TMP5:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK3-NEXT:    store i32 2, ptr [[TMP5]], align 4
 // CHECK3-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -649,7 +651,6 @@ int main (int argc, char **argv) {
 // CHECK3-NEXT:    store ptr null, ptr [[TMP22]], align 4
 // CHECK3-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS3]], i32 0, i32 0
 // CHECK3-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS4]], i32 0, i32 0
-// CHECK3-NEXT:    [[KERNEL_ARGS7:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK3-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS7]], i32 0, i32 0
 // CHECK3-NEXT:    store i32 2, ptr [[TMP25]], align 4
 // CHECK3-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS7]], i32 0, i32 1
@@ -692,7 +693,6 @@ int main (int argc, char **argv) {
 // CHECK3-NEXT:    store ptr null, ptr [[TMP42]], align 4
 // CHECK3-NEXT:    [[TMP43:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS11]], i32 0, i32 0
 // CHECK3-NEXT:    [[TMP44:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS12]], i32 0, i32 0
-// CHECK3-NEXT:    [[KERNEL_ARGS15:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK3-NEXT:    [[TMP45:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS15]], i32 0, i32 0
 // CHECK3-NEXT:    store i32 2, ptr [[TMP45]], align 4
 // CHECK3-NEXT:    [[TMP46:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS15]], i32 0, i32 1
@@ -1272,6 +1272,7 @@ int main (int argc, char **argv) {
 // CHECK9-NEXT:    [[TMP:%.*]] = alloca i32, align 4
 // CHECK9-NEXT:    [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
 // CHECK9-NEXT:    [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4
+// CHECK9-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK9-NEXT:    [[N_CASTED3:%.*]] = alloca i64, align 8
 // CHECK9-NEXT:    [[DOTOFFLOAD_BASEPTRS4:%.*]] = alloca [3 x ptr], align 8
 // CHECK9-NEXT:    [[DOTOFFLOAD_PTRS5:%.*]] = alloca [3 x ptr], align 8
@@ -1280,6 +1281,7 @@ int main (int argc, char **argv) {
 // CHECK9-NEXT:    [[_TMP8:%.*]] = alloca i32, align 4
 // CHECK9-NEXT:    [[DOTCAPTURE_EXPR_9:%.*]] = alloca i32, align 4
 // CHECK9-NEXT:    [[DOTCAPTURE_EXPR_10:%.*]] = alloca i32, align 4
+// CHECK9-NEXT:    [[KERNEL_ARGS15:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK9-NEXT:    [[N_CASTED18:%.*]] = alloca i64, align 8
 // CHECK9-NEXT:    [[DOTOFFLOAD_BASEPTRS19:%.*]] = alloca [3 x ptr], align 8
 // CHECK9-NEXT:    [[DOTOFFLOAD_PTRS20:%.*]] = alloca [3 x ptr], align 8
@@ -1288,6 +1290,7 @@ int main (int argc, char **argv) {
 // CHECK9-NEXT:    [[_TMP23:%.*]] = alloca i32, align 4
 // CHECK9-NEXT:    [[DOTCAPTURE_EXPR_24:%.*]] = alloca i32, align 4
 // CHECK9-NEXT:    [[DOTCAPTURE_EXPR_25:%.*]] = alloca i32, align 4
+// CHECK9-NEXT:    [[KERNEL_ARGS30:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK9-NEXT:    store i32 0, ptr [[RETVAL]], align 4
 // CHECK9-NEXT:    store i32 [[ARGC]], ptr [[ARGC_ADDR]], align 4
 // CHECK9-NEXT:    store ptr [[ARGV]], ptr [[ARGV_ADDR]], align 8
@@ -1336,7 +1339,6 @@ int main (int argc, char **argv) {
 // CHECK9-NEXT:    [[TMP21:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
 // CHECK9-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP21]], 1
 // CHECK9-NEXT:    [[TMP22:%.*]] = zext i32 [[ADD]] to i64
-// CHECK9-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK9-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK9-NEXT:    store i32 2, ptr [[TMP23]], align 4
 // CHECK9-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -1408,7 +1410,6 @@ int main (int argc, char **argv) {
 // CHECK9-NEXT:    [[TMP56:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_10]], align 4
 // CHECK9-NEXT:    [[ADD14:%.*]] = add nsw i32 [[TMP56]], 1
 // CHECK9-NEXT:    [[TMP57:%.*]] = zext i32 [[ADD14]] to i64
-// CHECK9-NEXT:    [[KERNEL_ARGS15:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK9-NEXT:    [[TMP58:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS15]], i32 0, i32 0
 // CHECK9-NEXT:    store i32 2, ptr [[TMP58]], align 4
 // CHECK9-NEXT:    [[TMP59:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS15]], i32 0, i32 1
@@ -1480,7 +1481,6 @@ int main (int argc, char **argv) {
 // CHECK9-NEXT:    [[TMP91:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_25]], align 4
 // CHECK9-NEXT:    [[ADD29:%.*]] = add nsw i32 [[TMP91]], 1
 // CHECK9-NEXT:    [[TMP92:%.*]] = zext i32 [[ADD29]] to i64
-// CHECK9-NEXT:    [[KERNEL_ARGS30:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK9-NEXT:    [[TMP93:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS30]], i32 0, i32 0
 // CHECK9-NEXT:    store i32 2, ptr [[TMP93]], align 4
 // CHECK9-NEXT:    [[TMP94:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS30]], i32 0, i32 1
@@ -1925,14 +1925,17 @@ int main (int argc, char **argv) {
 // CHECK9-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [1 x ptr], align 8
 // CHECK9-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [1 x ptr], align 8
 // CHECK9-NEXT:    [[TMP:%.*]] = alloca i32, align 4
+// CHECK9-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK9-NEXT:    [[DOTOFFLOAD_BASEPTRS1:%.*]] = alloca [1 x ptr], align 8
 // CHECK9-NEXT:    [[DOTOFFLOAD_PTRS2:%.*]] = alloca [1 x ptr], align 8
 // CHECK9-NEXT:    [[DOTOFFLOAD_MAPPERS3:%.*]] = alloca [1 x ptr], align 8
 // CHECK9-NEXT:    [[_TMP4:%.*]] = alloca i32, align 4
+// CHECK9-NEXT:    [[KERNEL_ARGS5:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK9-NEXT:    [[DOTOFFLOAD_BASEPTRS8:%.*]] = alloca [1 x ptr], align 8
 // CHECK9-NEXT:    [[DOTOFFLOAD_PTRS9:%.*]] = alloca [1 x ptr], align 8
 // CHECK9-NEXT:    [[DOTOFFLOAD_MAPPERS10:%.*]] = alloca [1 x ptr], align 8
 // CHECK9-NEXT:    [[_TMP11:%.*]] = alloca i32, align 4
+// CHECK9-NEXT:    [[KERNEL_ARGS12:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK9-NEXT:    store i32 [[ARGC]], ptr [[ARGC_ADDR]], align 4
 // CHECK9-NEXT:    [[TMP0:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK9-NEXT:    store ptr [[A]], ptr [[TMP0]], align 8
@@ -1942,7 +1945,6 @@ int main (int argc, char **argv) {
 // CHECK9-NEXT:    store ptr null, ptr [[TMP2]], align 8
 // CHECK9-NEXT:    [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK9-NEXT:    [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK9-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK9-NEXT:    [[TMP5:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK9-NEXT:    store i32 2, ptr [[TMP5]], align 4
 // CHECK9-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -1984,7 +1986,6 @@ int main (int argc, char **argv) {
 // CHECK9-NEXT:    store ptr null, ptr [[TMP22]], align 8
 // CHECK9-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS1]], i32 0, i32 0
 // CHECK9-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS2]], i32 0, i32 0
-// CHECK9-NEXT:    [[KERNEL_ARGS5:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK9-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 0
 // CHECK9-NEXT:    store i32 2, ptr [[TMP25]], align 4
 // CHECK9-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 1
@@ -2026,7 +2027,6 @@ int main (int argc, char **argv) {
 // CHECK9-NEXT:    store ptr null, ptr [[TMP42]], align 8
 // CHECK9-NEXT:    [[TMP43:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS8]], i32 0, i32 0
 // CHECK9-NEXT:    [[TMP44:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS9]], i32 0, i32 0
-// CHECK9-NEXT:    [[KERNEL_ARGS12:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK9-NEXT:    [[TMP45:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS12]], i32 0, i32 0
 // CHECK9-NEXT:    store i32 2, ptr [[TMP45]], align 4
 // CHECK9-NEXT:    [[TMP46:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS12]], i32 0, i32 1
@@ -2356,6 +2356,7 @@ int main (int argc, char **argv) {
 // CHECK11-NEXT:    [[TMP:%.*]] = alloca i32, align 4
 // CHECK11-NEXT:    [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
 // CHECK11-NEXT:    [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4
+// CHECK11-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK11-NEXT:    [[N_CASTED3:%.*]] = alloca i32, align 4
 // CHECK11-NEXT:    [[DOTOFFLOAD_BASEPTRS4:%.*]] = alloca [3 x ptr], align 4
 // CHECK11-NEXT:    [[DOTOFFLOAD_PTRS5:%.*]] = alloca [3 x ptr], align 4
@@ -2364,6 +2365,7 @@ int main (int argc, char **argv) {
 // CHECK11-NEXT:    [[_TMP8:%.*]] = alloca i32, align 4
 // CHECK11-NEXT:    [[DOTCAPTURE_EXPR_9:%.*]] = alloca i32, align 4
 // CHECK11-NEXT:    [[DOTCAPTURE_EXPR_10:%.*]] = alloca i32, align 4
+// CHECK11-NEXT:    [[KERNEL_ARGS15:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK11-NEXT:    [[N_CASTED18:%.*]] = alloca i32, align 4
 // CHECK11-NEXT:    [[DOTOFFLOAD_BASEPTRS19:%.*]] = alloca [3 x ptr], align 4
 // CHECK11-NEXT:    [[DOTOFFLOAD_PTRS20:%.*]] = alloca [3 x ptr], align 4
@@ -2372,6 +2374,7 @@ int main (int argc, char **argv) {
 // CHECK11-NEXT:    [[_TMP23:%.*]] = alloca i32, align 4
 // CHECK11-NEXT:    [[DOTCAPTURE_EXPR_24:%.*]] = alloca i32, align 4
 // CHECK11-NEXT:    [[DOTCAPTURE_EXPR_25:%.*]] = alloca i32, align 4
+// CHECK11-NEXT:    [[KERNEL_ARGS30:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK11-NEXT:    store i32 0, ptr [[RETVAL]], align 4
 // CHECK11-NEXT:    store i32 [[ARGC]], ptr [[ARGC_ADDR]], align 4
 // CHECK11-NEXT:    store ptr [[ARGV]], ptr [[ARGV_ADDR]], align 4
@@ -2420,7 +2423,6 @@ int main (int argc, char **argv) {
 // CHECK11-NEXT:    [[TMP21:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
 // CHECK11-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP21]], 1
 // CHECK11-NEXT:    [[TMP22:%.*]] = zext i32 [[ADD]] to i64
-// CHECK11-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK11-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK11-NEXT:    store i32 2, ptr [[TMP23]], align 4
 // CHECK11-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -2493,7 +2495,6 @@ int main (int argc, char **argv) {
 // CHECK11-NEXT:    [[TMP57:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_10]], align 4
 // CHECK11-NEXT:    [[ADD14:%.*]] = add nsw i32 [[TMP57]], 1
 // CHECK11-NEXT:    [[TMP58:%.*]] = zext i32 [[ADD14]] to i64
-// CHECK11-NEXT:    [[KERNEL_ARGS15:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK11-NEXT:    [[TMP59:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS15]], i32 0, i32 0
 // CHECK11-NEXT:    store i32 2, ptr [[TMP59]], align 4
 // CHECK11-NEXT:    [[TMP60:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS15]], i32 0, i32 1
@@ -2566,7 +2567,6 @@ int main (int argc, char **argv) {
 // CHECK11-NEXT:    [[TMP93:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_25]], align 4
 // CHECK11-NEXT:    [[ADD29:%.*]] = add nsw i32 [[TMP93]], 1
 // CHECK11-NEXT:    [[TMP94:%.*]] = zext i32 [[ADD29]] to i64
-// CHECK11-NEXT:    [[KERNEL_ARGS30:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK11-NEXT:    [[TMP95:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS30]], i32 0, i32 0
 // CHECK11-NEXT:    store i32 2, ptr [[TMP95]], align 4
 // CHECK11-NEXT:    [[TMP96:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS30]], i32 0, i32 1
@@ -3008,14 +3008,17 @@ int main (int argc, char **argv) {
 // CHECK11-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [1 x ptr], align 4
 // CHECK11-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [1 x ptr], align 4
 // CHECK11-NEXT:    [[TMP:%.*]] = alloca i32, align 4
+// CHECK11-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK11-NEXT:    [[DOTOFFLOAD_BASEPTRS1:%.*]] = alloca [1 x ptr], align 4
 // CHECK11-NEXT:    [[DOTOFFLOAD_PTRS2:%.*]] = alloca [1 x ptr], align 4
 // CHECK11-NEXT:    [[DOTOFFLOAD_MAPPERS3:%.*]] = alloca [1 x ptr], align 4
 // CHECK11-NEXT:    [[_TMP4:%.*]] = alloca i32, align 4
+// CHECK11-NEXT:    [[KERNEL_ARGS5:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK11-NEXT:    [[DOTOFFLOAD_BASEPTRS8:%.*]] = alloca [1 x ptr], align 4
 // CHECK11-NEXT:    [[DOTOFFLOAD_PTRS9:%.*]] = alloca [1 x ptr], align 4
 // CHECK11-NEXT:    [[DOTOFFLOAD_MAPPERS10:%.*]] = alloca [1 x ptr], align 4
 // CHECK11-NEXT:    [[_TMP11:%.*]] = alloca i32, align 4
+// CHECK11-NEXT:    [[KERNEL_ARGS12:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK11-NEXT:    store i32 [[ARGC]], ptr [[ARGC_ADDR]], align 4
 // CHECK11-NEXT:    [[TMP0:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK11-NEXT:    store ptr [[A]], ptr [[TMP0]], align 4
@@ -3025,7 +3028,6 @@ int main (int argc, char **argv) {
 // CHECK11-NEXT:    store ptr null, ptr [[TMP2]], align 4
 // CHECK11-NEXT:    [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK11-NEXT:    [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK11-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK11-NEXT:    [[TMP5:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK11-NEXT:    store i32 2, ptr [[TMP5]], align 4
 // CHECK11-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -3067,7 +3069,6 @@ int main (int argc, char **argv) {
 // CHECK11-NEXT:    store ptr null, ptr [[TMP22]], align 4
 // CHECK11-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS1]], i32 0, i32 0
 // CHECK11-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS2]], i32 0, i32 0
-// CHECK11-NEXT:    [[KERNEL_ARGS5:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK11-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 0
 // CHECK11-NEXT:    store i32 2, ptr [[TMP25]], align 4
 // CHECK11-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 1
@@ -3109,7 +3110,6 @@ int main (int argc, char **argv) {
 // CHECK11-NEXT:    store ptr null, ptr [[TMP42]], align 4
 // CHECK11-NEXT:    [[TMP43:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS8]], i32 0, i32 0
 // CHECK11-NEXT:    [[TMP44:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS9]], i32 0, i32 0
-// CHECK11-NEXT:    [[KERNEL_ARGS12:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK11-NEXT:    [[TMP45:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS12]], i32 0, i32 0
 // CHECK11-NEXT:    store i32 2, ptr [[TMP45]], align 4
 // CHECK11-NEXT:    [[TMP46:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS12]], i32 0, i32 1

diff  --git a/clang/test/OpenMP/teams_distribute_simd_firstprivate_codegen.cpp b/clang/test/OpenMP/teams_distribute_simd_firstprivate_codegen.cpp
index 164a8d91650b8..217c8b1dc9a8f 100644
--- a/clang/test/OpenMP/teams_distribute_simd_firstprivate_codegen.cpp
+++ b/clang/test/OpenMP/teams_distribute_simd_firstprivate_codegen.cpp
@@ -265,6 +265,7 @@ int main() {
 // CHECK1-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [5 x ptr], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [5 x ptr], align 8
 // CHECK1-NEXT:    [[TMP:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK1-NEXT:    store i32 0, ptr [[RETVAL]], align 4
 // CHECK1-NEXT:    [[TMP0:%.*]] = load i32, ptr @t_var, align 4
 // CHECK1-NEXT:    store i32 [[TMP0]], ptr [[T_VAR_CASTED]], align 4
@@ -304,7 +305,6 @@ int main() {
 // CHECK1-NEXT:    store ptr null, ptr [[TMP18]], align 8
 // CHECK1-NEXT:    [[TMP19:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK1-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK1-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK1-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK1-NEXT:    store i32 2, ptr [[TMP21]], align 4
 // CHECK1-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -552,6 +552,7 @@ int main() {
 // CHECK1-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [4 x ptr], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [4 x ptr], align 8
 // CHECK1-NEXT:    [[_TMP1:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK1-NEXT:    call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[TEST]])
 // CHECK1-NEXT:    store i32 0, ptr [[T_VAR]], align 4
 // CHECK1-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VEC]], ptr align 4 @__const._Z5tmainIiET_v.vec, i64 8, i1 false)
@@ -594,7 +595,6 @@ int main() {
 // CHECK1-NEXT:    store ptr null, ptr [[TMP17]], align 8
 // CHECK1-NEXT:    [[TMP18:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK1-NEXT:    [[TMP19:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK1-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK1-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK1-NEXT:    store i32 2, ptr [[TMP20]], align 4
 // CHECK1-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -1090,6 +1090,7 @@ int main() {
 // CHECK3-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [5 x ptr], align 4
 // CHECK3-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [5 x ptr], align 4
 // CHECK3-NEXT:    [[TMP:%.*]] = alloca i32, align 4
+// CHECK3-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK3-NEXT:    store i32 0, ptr [[RETVAL]], align 4
 // CHECK3-NEXT:    [[TMP0:%.*]] = load i32, ptr @t_var, align 4
 // CHECK3-NEXT:    store i32 [[TMP0]], ptr [[T_VAR_CASTED]], align 4
@@ -1129,7 +1130,6 @@ int main() {
 // CHECK3-NEXT:    store ptr null, ptr [[TMP18]], align 4
 // CHECK3-NEXT:    [[TMP19:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK3-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK3-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK3-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK3-NEXT:    store i32 2, ptr [[TMP21]], align 4
 // CHECK3-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -1375,6 +1375,7 @@ int main() {
 // CHECK3-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [4 x ptr], align 4
 // CHECK3-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [4 x ptr], align 4
 // CHECK3-NEXT:    [[_TMP1:%.*]] = alloca i32, align 4
+// CHECK3-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK3-NEXT:    call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[TEST]])
 // CHECK3-NEXT:    store i32 0, ptr [[T_VAR]], align 4
 // CHECK3-NEXT:    call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[VEC]], ptr align 4 @__const._Z5tmainIiET_v.vec, i32 8, i1 false)
@@ -1417,7 +1418,6 @@ int main() {
 // CHECK3-NEXT:    store ptr null, ptr [[TMP17]], align 4
 // CHECK3-NEXT:    [[TMP18:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK3-NEXT:    [[TMP19:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK3-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK3-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK3-NEXT:    store i32 2, ptr [[TMP20]], align 4
 // CHECK3-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1

diff  --git a/clang/test/OpenMP/teams_distribute_simd_lastprivate_codegen.cpp b/clang/test/OpenMP/teams_distribute_simd_lastprivate_codegen.cpp
index 80a7ccad9bba1..8b2bda6303e72 100644
--- a/clang/test/OpenMP/teams_distribute_simd_lastprivate_codegen.cpp
+++ b/clang/test/OpenMP/teams_distribute_simd_lastprivate_codegen.cpp
@@ -526,6 +526,7 @@ int main() {
 // CHECK9-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [5 x ptr], align 8
 // CHECK9-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [5 x ptr], align 8
 // CHECK9-NEXT:    [[_TMP1:%.*]] = alloca i32, align 4
+// CHECK9-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK9-NEXT:    [[I:%.*]] = alloca i32, align 4
 // CHECK9-NEXT:    store i32 0, ptr [[RETVAL]], align 4
 // CHECK9-NEXT:    store ptr [[G]], ptr [[G1]], align 8
@@ -580,7 +581,6 @@ int main() {
 // CHECK9-NEXT:    store ptr null, ptr [[TMP22]], align 8
 // CHECK9-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK9-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK9-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK9-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK9-NEXT:    store i32 2, ptr [[TMP25]], align 4
 // CHECK9-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -856,6 +856,7 @@ int main() {
 // CHECK9-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [4 x ptr], align 8
 // CHECK9-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [4 x ptr], align 8
 // CHECK9-NEXT:    [[_TMP1:%.*]] = alloca i32, align 4
+// CHECK9-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK9-NEXT:    call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[TEST]])
 // CHECK9-NEXT:    store i32 0, ptr [[T_VAR]], align 4
 // CHECK9-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VEC]], ptr align 4 @__const._Z5tmainIiET_v.vec, i64 8, i1 false)
@@ -898,7 +899,6 @@ int main() {
 // CHECK9-NEXT:    store ptr null, ptr [[TMP17]], align 8
 // CHECK9-NEXT:    [[TMP18:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK9-NEXT:    [[TMP19:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK9-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK9-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK9-NEXT:    store i32 2, ptr [[TMP20]], align 4
 // CHECK9-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -1243,6 +1243,7 @@ int main() {
 // CHECK11-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [5 x ptr], align 4
 // CHECK11-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [5 x ptr], align 4
 // CHECK11-NEXT:    [[_TMP1:%.*]] = alloca i32, align 4
+// CHECK11-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK11-NEXT:    [[I:%.*]] = alloca i32, align 4
 // CHECK11-NEXT:    store i32 0, ptr [[RETVAL]], align 4
 // CHECK11-NEXT:    store ptr [[G]], ptr [[G1]], align 4
@@ -1297,7 +1298,6 @@ int main() {
 // CHECK11-NEXT:    store ptr null, ptr [[TMP22]], align 4
 // CHECK11-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK11-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK11-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK11-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK11-NEXT:    store i32 2, ptr [[TMP25]], align 4
 // CHECK11-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -1571,6 +1571,7 @@ int main() {
 // CHECK11-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [4 x ptr], align 4
 // CHECK11-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [4 x ptr], align 4
 // CHECK11-NEXT:    [[_TMP1:%.*]] = alloca i32, align 4
+// CHECK11-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK11-NEXT:    call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[TEST]])
 // CHECK11-NEXT:    store i32 0, ptr [[T_VAR]], align 4
 // CHECK11-NEXT:    call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[VEC]], ptr align 4 @__const._Z5tmainIiET_v.vec, i32 8, i1 false)
@@ -1613,7 +1614,6 @@ int main() {
 // CHECK11-NEXT:    store ptr null, ptr [[TMP17]], align 4
 // CHECK11-NEXT:    [[TMP18:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK11-NEXT:    [[TMP19:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK11-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK11-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK11-NEXT:    store i32 2, ptr [[TMP20]], align 4
 // CHECK11-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1

diff  --git a/clang/test/OpenMP/teams_distribute_simd_private_codegen.cpp b/clang/test/OpenMP/teams_distribute_simd_private_codegen.cpp
index e4fb4beba475d..f33edf0e50b0e 100644
--- a/clang/test/OpenMP/teams_distribute_simd_private_codegen.cpp
+++ b/clang/test/OpenMP/teams_distribute_simd_private_codegen.cpp
@@ -241,8 +241,8 @@ int main() {
 // CHECK1-NEXT:  entry:
 // CHECK1-NEXT:    [[RETVAL:%.*]] = alloca i32, align 4
 // CHECK1-NEXT:    [[TMP:%.*]] = alloca i32, align 4
-// CHECK1-NEXT:    store i32 0, ptr [[RETVAL]], align 4
 // CHECK1-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
+// CHECK1-NEXT:    store i32 0, ptr [[RETVAL]], align 4
 // CHECK1-NEXT:    [[TMP0:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK1-NEXT:    store i32 2, ptr [[TMP0]], align 4
 // CHECK1-NEXT:    [[TMP1:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -409,6 +409,7 @@ int main() {
 // CHECK1-NEXT:    [[VAR:%.*]] = alloca ptr, align 8
 // CHECK1-NEXT:    [[TMP:%.*]] = alloca i32, align 4
 // CHECK1-NEXT:    [[_TMP1:%.*]] = alloca ptr, align 8
+// CHECK1-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK1-NEXT:    call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[TEST]])
 // CHECK1-NEXT:    store i32 0, ptr [[T_VAR]], align 4
 // CHECK1-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VEC]], ptr align 4 @__const._Z5tmainIiET_v.vec, i64 8, i1 false)
@@ -418,7 +419,6 @@ int main() {
 // CHECK1-NEXT:    call void @_ZN1SIiEC1Ei(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_ELEMENT]], i32 noundef signext 2)
 // CHECK1-NEXT:    store ptr [[TEST]], ptr [[VAR]], align 8
 // CHECK1-NEXT:    store ptr undef, ptr [[_TMP1]], align 8
-// CHECK1-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK1-NEXT:    [[TMP0:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK1-NEXT:    store i32 2, ptr [[TMP0]], align 4
 // CHECK1-NEXT:    [[TMP1:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -790,8 +790,8 @@ int main() {
 // CHECK3-NEXT:  entry:
 // CHECK3-NEXT:    [[RETVAL:%.*]] = alloca i32, align 4
 // CHECK3-NEXT:    [[TMP:%.*]] = alloca i32, align 4
-// CHECK3-NEXT:    store i32 0, ptr [[RETVAL]], align 4
 // CHECK3-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
+// CHECK3-NEXT:    store i32 0, ptr [[RETVAL]], align 4
 // CHECK3-NEXT:    [[TMP0:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK3-NEXT:    store i32 2, ptr [[TMP0]], align 4
 // CHECK3-NEXT:    [[TMP1:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -956,6 +956,7 @@ int main() {
 // CHECK3-NEXT:    [[VAR:%.*]] = alloca ptr, align 4
 // CHECK3-NEXT:    [[TMP:%.*]] = alloca i32, align 4
 // CHECK3-NEXT:    [[_TMP1:%.*]] = alloca ptr, align 4
+// CHECK3-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK3-NEXT:    call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[TEST]])
 // CHECK3-NEXT:    store i32 0, ptr [[T_VAR]], align 4
 // CHECK3-NEXT:    call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[VEC]], ptr align 4 @__const._Z5tmainIiET_v.vec, i32 8, i1 false)
@@ -965,7 +966,6 @@ int main() {
 // CHECK3-NEXT:    call void @_ZN1SIiEC1Ei(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_ELEMENT]], i32 noundef 2)
 // CHECK3-NEXT:    store ptr [[TEST]], ptr [[VAR]], align 4
 // CHECK3-NEXT:    store ptr undef, ptr [[_TMP1]], align 4
-// CHECK3-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK3-NEXT:    [[TMP0:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK3-NEXT:    store i32 2, ptr [[TMP0]], align 4
 // CHECK3-NEXT:    [[TMP1:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1

diff  --git a/clang/test/OpenMP/teams_distribute_simd_reduction_codegen.cpp b/clang/test/OpenMP/teams_distribute_simd_reduction_codegen.cpp
index 22912243f2015..3d53139a19d0d 100644
--- a/clang/test/OpenMP/teams_distribute_simd_reduction_codegen.cpp
+++ b/clang/test/OpenMP/teams_distribute_simd_reduction_codegen.cpp
@@ -91,6 +91,7 @@ int main() {
 // CHECK1-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [1 x ptr], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [1 x ptr], align 8
 // CHECK1-NEXT:    [[TMP:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK1-NEXT:    store i32 0, ptr [[RETVAL]], align 4
 // CHECK1-NEXT:    [[TMP0:%.*]] = load i32, ptr @_ZZ4mainE5sivar, align 4
 // CHECK1-NEXT:    store i32 [[TMP0]], ptr [[SIVAR_CASTED]], align 4
@@ -103,7 +104,6 @@ int main() {
 // CHECK1-NEXT:    store ptr null, ptr [[TMP4]], align 8
 // CHECK1-NEXT:    [[TMP5:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK1-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK1-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK1-NEXT:    [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK1-NEXT:    store i32 2, ptr [[TMP7]], align 4
 // CHECK1-NEXT:    [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -277,6 +277,7 @@ int main() {
 // CHECK1-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [1 x ptr], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [1 x ptr], align 8
 // CHECK1-NEXT:    [[TMP:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK1-NEXT:    store i32 0, ptr [[T_VAR]], align 4
 // CHECK1-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VEC]], ptr align 4 @__const._Z5tmainIiET_v.vec, i64 8, i1 false)
 // CHECK1-NEXT:    [[TMP0:%.*]] = load i32, ptr [[T_VAR]], align 4
@@ -290,7 +291,6 @@ int main() {
 // CHECK1-NEXT:    store ptr null, ptr [[TMP4]], align 8
 // CHECK1-NEXT:    [[TMP5:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK1-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK1-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK1-NEXT:    [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK1-NEXT:    store i32 2, ptr [[TMP7]], align 4
 // CHECK1-NEXT:    [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -469,6 +469,7 @@ int main() {
 // CHECK3-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [1 x ptr], align 4
 // CHECK3-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [1 x ptr], align 4
 // CHECK3-NEXT:    [[TMP:%.*]] = alloca i32, align 4
+// CHECK3-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK3-NEXT:    store i32 0, ptr [[RETVAL]], align 4
 // CHECK3-NEXT:    [[TMP0:%.*]] = load i32, ptr @_ZZ4mainE5sivar, align 4
 // CHECK3-NEXT:    store i32 [[TMP0]], ptr [[SIVAR_CASTED]], align 4
@@ -481,7 +482,6 @@ int main() {
 // CHECK3-NEXT:    store ptr null, ptr [[TMP4]], align 4
 // CHECK3-NEXT:    [[TMP5:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK3-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK3-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK3-NEXT:    [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK3-NEXT:    store i32 2, ptr [[TMP7]], align 4
 // CHECK3-NEXT:    [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -655,6 +655,7 @@ int main() {
 // CHECK3-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [1 x ptr], align 4
 // CHECK3-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [1 x ptr], align 4
 // CHECK3-NEXT:    [[TMP:%.*]] = alloca i32, align 4
+// CHECK3-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK3-NEXT:    store i32 0, ptr [[T_VAR]], align 4
 // CHECK3-NEXT:    call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[VEC]], ptr align 4 @__const._Z5tmainIiET_v.vec, i32 8, i1 false)
 // CHECK3-NEXT:    [[TMP0:%.*]] = load i32, ptr [[T_VAR]], align 4
@@ -668,7 +669,6 @@ int main() {
 // CHECK3-NEXT:    store ptr null, ptr [[TMP4]], align 4
 // CHECK3-NEXT:    [[TMP5:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK3-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK3-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK3-NEXT:    [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK3-NEXT:    store i32 2, ptr [[TMP7]], align 4
 // CHECK3-NEXT:    [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1

diff  --git a/clang/test/OpenMP/teams_firstprivate_codegen.cpp b/clang/test/OpenMP/teams_firstprivate_codegen.cpp
index e08d498d874bf..c1617e16b402c 100644
--- a/clang/test/OpenMP/teams_firstprivate_codegen.cpp
+++ b/clang/test/OpenMP/teams_firstprivate_codegen.cpp
@@ -295,10 +295,12 @@ void array_func(float a[3], St s[2], int n, long double vla1[n]) {
 // CHECK9-NEXT:    [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [5 x ptr], align 8
 // CHECK9-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [5 x ptr], align 8
 // CHECK9-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [5 x ptr], align 8
+// CHECK9-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK9-NEXT:    [[T_VAR_CASTED1:%.*]] = alloca i64, align 8
 // CHECK9-NEXT:    [[DOTOFFLOAD_BASEPTRS2:%.*]] = alloca [1 x ptr], align 8
 // CHECK9-NEXT:    [[DOTOFFLOAD_PTRS3:%.*]] = alloca [1 x ptr], align 8
 // CHECK9-NEXT:    [[DOTOFFLOAD_MAPPERS4:%.*]] = alloca [1 x ptr], align 8
+// CHECK9-NEXT:    [[KERNEL_ARGS5:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK9-NEXT:    store i32 0, ptr [[RETVAL]], align 4
 // CHECK9-NEXT:    call void @_ZN1SIfEC1Ev(ptr nonnull align 4 dereferenceable(4) [[TEST]])
 // CHECK9-NEXT:    store i32 0, ptr [[T_VAR]], align 4
@@ -346,7 +348,6 @@ void array_func(float a[3], St s[2], int n, long double vla1[n]) {
 // CHECK9-NEXT:    store ptr null, ptr [[TMP18]], align 8
 // CHECK9-NEXT:    [[TMP19:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK9-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK9-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK9-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK9-NEXT:    store i32 2, ptr [[TMP21]], align 4
 // CHECK9-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -391,7 +392,6 @@ void array_func(float a[3], St s[2], int n, long double vla1[n]) {
 // CHECK9-NEXT:    store ptr null, ptr [[TMP40]], align 8
 // CHECK9-NEXT:    [[TMP41:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS2]], i32 0, i32 0
 // CHECK9-NEXT:    [[TMP42:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS3]], i32 0, i32 0
-// CHECK9-NEXT:    [[KERNEL_ARGS5:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK9-NEXT:    [[TMP43:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 0
 // CHECK9-NEXT:    store i32 2, ptr [[TMP43]], align 4
 // CHECK9-NEXT:    [[TMP44:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 1
@@ -640,9 +640,11 @@ void array_func(float a[3], St s[2], int n, long double vla1[n]) {
 // CHECK9-NEXT:    [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [4 x ptr], align 8
 // CHECK9-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [4 x ptr], align 8
 // CHECK9-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [4 x ptr], align 8
+// CHECK9-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK9-NEXT:    [[DOTOFFLOAD_BASEPTRS1:%.*]] = alloca [1 x ptr], align 8
 // CHECK9-NEXT:    [[DOTOFFLOAD_PTRS2:%.*]] = alloca [1 x ptr], align 8
 // CHECK9-NEXT:    [[DOTOFFLOAD_MAPPERS3:%.*]] = alloca [1 x ptr], align 8
+// CHECK9-NEXT:    [[KERNEL_ARGS4:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK9-NEXT:    call void @_ZN1SIiEC1Ev(ptr nonnull align 4 dereferenceable(4) [[TEST]])
 // CHECK9-NEXT:    store i32 0, ptr [[T_VAR]], align 128
 // CHECK9-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr align 128 [[VEC]], ptr align 128 @__const._Z5tmainIiET_v.vec, i64 8, i1 false)
@@ -677,7 +679,6 @@ void array_func(float a[3], St s[2], int n, long double vla1[n]) {
 // CHECK9-NEXT:    store ptr null, ptr [[TMP11]], align 8
 // CHECK9-NEXT:    [[TMP12:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK9-NEXT:    [[TMP13:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK9-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK9-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK9-NEXT:    store i32 2, ptr [[TMP14]], align 4
 // CHECK9-NEXT:    [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -719,7 +720,6 @@ void array_func(float a[3], St s[2], int n, long double vla1[n]) {
 // CHECK9-NEXT:    store ptr null, ptr [[TMP31]], align 8
 // CHECK9-NEXT:    [[TMP32:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS1]], i32 0, i32 0
 // CHECK9-NEXT:    [[TMP33:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS2]], i32 0, i32 0
-// CHECK9-NEXT:    [[KERNEL_ARGS4:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK9-NEXT:    [[TMP34:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 0
 // CHECK9-NEXT:    store i32 2, ptr [[TMP34]], align 4
 // CHECK9-NEXT:    [[TMP35:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 1
@@ -1096,10 +1096,12 @@ void array_func(float a[3], St s[2], int n, long double vla1[n]) {
 // CHECK11-NEXT:    [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [5 x ptr], align 4
 // CHECK11-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [5 x ptr], align 4
 // CHECK11-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [5 x ptr], align 4
+// CHECK11-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK11-NEXT:    [[T_VAR_CASTED1:%.*]] = alloca i32, align 4
 // CHECK11-NEXT:    [[DOTOFFLOAD_BASEPTRS2:%.*]] = alloca [1 x ptr], align 4
 // CHECK11-NEXT:    [[DOTOFFLOAD_PTRS3:%.*]] = alloca [1 x ptr], align 4
 // CHECK11-NEXT:    [[DOTOFFLOAD_MAPPERS4:%.*]] = alloca [1 x ptr], align 4
+// CHECK11-NEXT:    [[KERNEL_ARGS5:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK11-NEXT:    store i32 0, ptr [[RETVAL]], align 4
 // CHECK11-NEXT:    call void @_ZN1SIfEC1Ev(ptr nonnull align 4 dereferenceable(4) [[TEST]])
 // CHECK11-NEXT:    store i32 0, ptr [[T_VAR]], align 4
@@ -1147,7 +1149,6 @@ void array_func(float a[3], St s[2], int n, long double vla1[n]) {
 // CHECK11-NEXT:    store ptr null, ptr [[TMP18]], align 4
 // CHECK11-NEXT:    [[TMP19:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK11-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK11-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK11-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK11-NEXT:    store i32 2, ptr [[TMP21]], align 4
 // CHECK11-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -1192,7 +1193,6 @@ void array_func(float a[3], St s[2], int n, long double vla1[n]) {
 // CHECK11-NEXT:    store ptr null, ptr [[TMP40]], align 4
 // CHECK11-NEXT:    [[TMP41:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS2]], i32 0, i32 0
 // CHECK11-NEXT:    [[TMP42:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS3]], i32 0, i32 0
-// CHECK11-NEXT:    [[KERNEL_ARGS5:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK11-NEXT:    [[TMP43:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 0
 // CHECK11-NEXT:    store i32 2, ptr [[TMP43]], align 4
 // CHECK11-NEXT:    [[TMP44:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 1
@@ -1441,9 +1441,11 @@ void array_func(float a[3], St s[2], int n, long double vla1[n]) {
 // CHECK11-NEXT:    [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [4 x ptr], align 4
 // CHECK11-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [4 x ptr], align 4
 // CHECK11-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [4 x ptr], align 4
+// CHECK11-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK11-NEXT:    [[DOTOFFLOAD_BASEPTRS1:%.*]] = alloca [1 x ptr], align 4
 // CHECK11-NEXT:    [[DOTOFFLOAD_PTRS2:%.*]] = alloca [1 x ptr], align 4
 // CHECK11-NEXT:    [[DOTOFFLOAD_MAPPERS3:%.*]] = alloca [1 x ptr], align 4
+// CHECK11-NEXT:    [[KERNEL_ARGS4:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK11-NEXT:    call void @_ZN1SIiEC1Ev(ptr nonnull align 4 dereferenceable(4) [[TEST]])
 // CHECK11-NEXT:    store i32 0, ptr [[T_VAR]], align 128
 // CHECK11-NEXT:    call void @llvm.memcpy.p0.p0.i32(ptr align 128 [[VEC]], ptr align 128 @__const._Z5tmainIiET_v.vec, i32 8, i1 false)
@@ -1478,7 +1480,6 @@ void array_func(float a[3], St s[2], int n, long double vla1[n]) {
 // CHECK11-NEXT:    store ptr null, ptr [[TMP11]], align 4
 // CHECK11-NEXT:    [[TMP12:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK11-NEXT:    [[TMP13:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK11-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK11-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK11-NEXT:    store i32 2, ptr [[TMP14]], align 4
 // CHECK11-NEXT:    [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -1520,7 +1521,6 @@ void array_func(float a[3], St s[2], int n, long double vla1[n]) {
 // CHECK11-NEXT:    store ptr null, ptr [[TMP31]], align 4
 // CHECK11-NEXT:    [[TMP32:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS1]], i32 0, i32 0
 // CHECK11-NEXT:    [[TMP33:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS2]], i32 0, i32 0
-// CHECK11-NEXT:    [[KERNEL_ARGS4:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK11-NEXT:    [[TMP34:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 0
 // CHECK11-NEXT:    store i32 2, ptr [[TMP34]], align 4
 // CHECK11-NEXT:    [[TMP35:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 1
@@ -1898,6 +1898,7 @@ void array_func(float a[3], St s[2], int n, long double vla1[n]) {
 // CHECK17-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [8 x ptr], align 8
 // CHECK17-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [8 x ptr], align 8
 // CHECK17-NEXT:    [[DOTOFFLOAD_SIZES:%.*]] = alloca [8 x i64], align 8
+// CHECK17-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK17-NEXT:    store ptr [[A]], ptr [[A_ADDR]], align 8
 // CHECK17-NEXT:    store ptr [[S]], ptr [[S_ADDR]], align 8
 // CHECK17-NEXT:    store i32 [[N]], ptr [[N_ADDR]], align 4
@@ -1976,7 +1977,6 @@ void array_func(float a[3], St s[2], int n, long double vla1[n]) {
 // CHECK17-NEXT:    [[TMP40:%.*]] = getelementptr inbounds [8 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK17-NEXT:    [[TMP41:%.*]] = getelementptr inbounds [8 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
 // CHECK17-NEXT:    [[TMP42:%.*]] = getelementptr inbounds [8 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK17-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK17-NEXT:    [[TMP43:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK17-NEXT:    store i32 2, ptr [[TMP43]], align 4
 // CHECK17-NEXT:    [[TMP44:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -2111,6 +2111,7 @@ void array_func(float a[3], St s[2], int n, long double vla1[n]) {
 // CHECK17-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [10 x ptr], align 8
 // CHECK17-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [10 x ptr], align 8
 // CHECK17-NEXT:    [[DOTOFFLOAD_SIZES:%.*]] = alloca [10 x i64], align 8
+// CHECK17-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK17-NEXT:    store ptr [[THIS]], ptr [[THIS_ADDR]], align 8
 // CHECK17-NEXT:    store ptr [[S]], ptr [[S_ADDR]], align 8
 // CHECK17-NEXT:    store i32 [[N]], ptr [[N_ADDR]], align 4
@@ -2214,7 +2215,6 @@ void array_func(float a[3], St s[2], int n, long double vla1[n]) {
 // CHECK17-NEXT:    [[TMP52:%.*]] = getelementptr inbounds [10 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK17-NEXT:    [[TMP53:%.*]] = getelementptr inbounds [10 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
 // CHECK17-NEXT:    [[TMP54:%.*]] = getelementptr inbounds [10 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK17-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK17-NEXT:    [[TMP55:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK17-NEXT:    store i32 2, ptr [[TMP55]], align 4
 // CHECK17-NEXT:    [[TMP56:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -2370,6 +2370,7 @@ void array_func(float a[3], St s[2], int n, long double vla1[n]) {
 // CHECK19-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [8 x ptr], align 4
 // CHECK19-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [8 x ptr], align 4
 // CHECK19-NEXT:    [[DOTOFFLOAD_SIZES:%.*]] = alloca [8 x i64], align 4
+// CHECK19-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK19-NEXT:    store ptr [[A]], ptr [[A_ADDR]], align 4
 // CHECK19-NEXT:    store ptr [[S]], ptr [[S_ADDR]], align 4
 // CHECK19-NEXT:    store i32 [[N]], ptr [[N_ADDR]], align 4
@@ -2446,7 +2447,6 @@ void array_func(float a[3], St s[2], int n, long double vla1[n]) {
 // CHECK19-NEXT:    [[TMP38:%.*]] = getelementptr inbounds [8 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK19-NEXT:    [[TMP39:%.*]] = getelementptr inbounds [8 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
 // CHECK19-NEXT:    [[TMP40:%.*]] = getelementptr inbounds [8 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK19-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK19-NEXT:    [[TMP41:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK19-NEXT:    store i32 2, ptr [[TMP41]], align 4
 // CHECK19-NEXT:    [[TMP42:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -2581,6 +2581,7 @@ void array_func(float a[3], St s[2], int n, long double vla1[n]) {
 // CHECK19-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [10 x ptr], align 4
 // CHECK19-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [10 x ptr], align 4
 // CHECK19-NEXT:    [[DOTOFFLOAD_SIZES:%.*]] = alloca [10 x i64], align 4
+// CHECK19-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK19-NEXT:    store ptr [[THIS]], ptr [[THIS_ADDR]], align 4
 // CHECK19-NEXT:    store ptr [[S]], ptr [[S_ADDR]], align 4
 // CHECK19-NEXT:    store i32 [[N]], ptr [[N_ADDR]], align 4
@@ -2682,7 +2683,6 @@ void array_func(float a[3], St s[2], int n, long double vla1[n]) {
 // CHECK19-NEXT:    [[TMP50:%.*]] = getelementptr inbounds [10 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK19-NEXT:    [[TMP51:%.*]] = getelementptr inbounds [10 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
 // CHECK19-NEXT:    [[TMP52:%.*]] = getelementptr inbounds [10 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK19-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK19-NEXT:    [[TMP53:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK19-NEXT:    store i32 2, ptr [[TMP53]], align 4
 // CHECK19-NEXT:    [[TMP54:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1

diff  --git a/clang/test/OpenMP/teams_private_codegen.cpp b/clang/test/OpenMP/teams_private_codegen.cpp
index b8bb9ed3e7484..f6101e4cbf6ef 100644
--- a/clang/test/OpenMP/teams_private_codegen.cpp
+++ b/clang/test/OpenMP/teams_private_codegen.cpp
@@ -196,6 +196,7 @@ int main() {
 // CHECK1-NEXT:    [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [1 x ptr], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [1 x ptr], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [1 x ptr], align 8
+// CHECK1-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK1-NEXT:    store ptr [[THIS]], ptr [[THIS_ADDR]], align 8
 // CHECK1-NEXT:    store ptr [[D]], ptr [[D_ADDR]], align 8
 // CHECK1-NEXT:    [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8
@@ -217,7 +218,6 @@ int main() {
 // CHECK1-NEXT:    store ptr null, ptr [[TMP3]], align 8
 // CHECK1-NEXT:    [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK1-NEXT:    [[TMP5:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK1-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK1-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK1-NEXT:    store i32 2, ptr [[TMP6]], align 4
 // CHECK1-NEXT:    [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -389,6 +389,7 @@ int main() {
 // CHECK3-NEXT:    [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [1 x ptr], align 4
 // CHECK3-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [1 x ptr], align 4
 // CHECK3-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [1 x ptr], align 4
+// CHECK3-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK3-NEXT:    store ptr [[THIS]], ptr [[THIS_ADDR]], align 4
 // CHECK3-NEXT:    store ptr [[D]], ptr [[D_ADDR]], align 4
 // CHECK3-NEXT:    [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4
@@ -410,7 +411,6 @@ int main() {
 // CHECK3-NEXT:    store ptr null, ptr [[TMP3]], align 4
 // CHECK3-NEXT:    [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK3-NEXT:    [[TMP5:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK3-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK3-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK3-NEXT:    store i32 2, ptr [[TMP6]], align 4
 // CHECK3-NEXT:    [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -559,6 +559,7 @@ int main() {
 // CHECK9-NEXT:    [[VEC:%.*]] = alloca [2 x i32], align 4
 // CHECK9-NEXT:    [[S_ARR:%.*]] = alloca [2 x %struct.S], align 4
 // CHECK9-NEXT:    [[VAR:%.*]] = alloca [[STRUCT_S]], align 4
+// CHECK9-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK9-NEXT:    store i32 0, ptr [[RETVAL]], align 4
 // CHECK9-NEXT:    call void @_ZN2SSC1ERi(ptr noundef nonnull align 8 dereferenceable(16) [[SS]], ptr noundef nonnull align 4 dereferenceable(4) @_ZZ4mainE5sivar)
 // CHECK9-NEXT:    call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[TEST]])
@@ -569,7 +570,6 @@ int main() {
 // CHECK9-NEXT:    [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYINIT_BEGIN]], i64 1
 // CHECK9-NEXT:    call void @_ZN1SIfEC1Ef(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_ELEMENT]], float noundef 2.000000e+00)
 // CHECK9-NEXT:    call void @_ZN1SIfEC1Ef(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]], float noundef 3.000000e+00)
-// CHECK9-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK9-NEXT:    [[TMP0:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK9-NEXT:    store i32 2, ptr [[TMP0]], align 4
 // CHECK9-NEXT:    [[TMP1:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -727,6 +727,7 @@ int main() {
 // CHECK9-NEXT:    [[VEC:%.*]] = alloca [2 x i32], align 128
 // CHECK9-NEXT:    [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 128
 // CHECK9-NEXT:    [[VAR:%.*]] = alloca [[STRUCT_S_0]], align 128
+// CHECK9-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK9-NEXT:    call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[TEST]])
 // CHECK9-NEXT:    call void @_ZN3SSTIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[SST]])
 // CHECK9-NEXT:    store i32 0, ptr [[T_VAR]], align 128
@@ -736,7 +737,6 @@ int main() {
 // CHECK9-NEXT:    [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYINIT_BEGIN]], i64 1
 // CHECK9-NEXT:    call void @_ZN1SIiEC1Ei(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_ELEMENT]], i32 noundef signext 2)
 // CHECK9-NEXT:    call void @_ZN1SIiEC1Ei(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]], i32 noundef signext 3)
-// CHECK9-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK9-NEXT:    [[TMP0:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK9-NEXT:    store i32 2, ptr [[TMP0]], align 4
 // CHECK9-NEXT:    [[TMP1:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -795,6 +795,7 @@ int main() {
 // CHECK9-NEXT:    [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [1 x ptr], align 8
 // CHECK9-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [1 x ptr], align 8
 // CHECK9-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [1 x ptr], align 8
+// CHECK9-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK9-NEXT:    store ptr [[THIS]], ptr [[THIS_ADDR]], align 8
 // CHECK9-NEXT:    store ptr [[D]], ptr [[D_ADDR]], align 8
 // CHECK9-NEXT:    [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8
@@ -816,7 +817,6 @@ int main() {
 // CHECK9-NEXT:    store ptr null, ptr [[TMP3]], align 8
 // CHECK9-NEXT:    [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK9-NEXT:    [[TMP5:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK9-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK9-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK9-NEXT:    store i32 2, ptr [[TMP6]], align 4
 // CHECK9-NEXT:    [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -1037,6 +1037,7 @@ int main() {
 // CHECK9-NEXT:    [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [1 x ptr], align 8
 // CHECK9-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [1 x ptr], align 8
 // CHECK9-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [1 x ptr], align 8
+// CHECK9-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK9-NEXT:    store ptr [[THIS]], ptr [[THIS_ADDR]], align 8
 // CHECK9-NEXT:    [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8
 // CHECK9-NEXT:    [[A:%.*]] = getelementptr inbounds [[STRUCT_SST:%.*]], ptr [[THIS1]], i32 0, i32 0
@@ -1049,7 +1050,6 @@ int main() {
 // CHECK9-NEXT:    store ptr null, ptr [[TMP2]], align 8
 // CHECK9-NEXT:    [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK9-NEXT:    [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK9-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK9-NEXT:    [[TMP5:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK9-NEXT:    store i32 2, ptr [[TMP5]], align 4
 // CHECK9-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -1156,6 +1156,7 @@ int main() {
 // CHECK11-NEXT:    [[VEC:%.*]] = alloca [2 x i32], align 4
 // CHECK11-NEXT:    [[S_ARR:%.*]] = alloca [2 x %struct.S], align 4
 // CHECK11-NEXT:    [[VAR:%.*]] = alloca [[STRUCT_S]], align 4
+// CHECK11-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK11-NEXT:    store i32 0, ptr [[RETVAL]], align 4
 // CHECK11-NEXT:    call void @_ZN2SSC1ERi(ptr noundef nonnull align 4 dereferenceable(12) [[SS]], ptr noundef nonnull align 4 dereferenceable(4) @_ZZ4mainE5sivar)
 // CHECK11-NEXT:    call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[TEST]])
@@ -1166,7 +1167,6 @@ int main() {
 // CHECK11-NEXT:    [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYINIT_BEGIN]], i32 1
 // CHECK11-NEXT:    call void @_ZN1SIfEC1Ef(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_ELEMENT]], float noundef 2.000000e+00)
 // CHECK11-NEXT:    call void @_ZN1SIfEC1Ef(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]], float noundef 3.000000e+00)
-// CHECK11-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK11-NEXT:    [[TMP0:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK11-NEXT:    store i32 2, ptr [[TMP0]], align 4
 // CHECK11-NEXT:    [[TMP1:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -1324,6 +1324,7 @@ int main() {
 // CHECK11-NEXT:    [[VEC:%.*]] = alloca [2 x i32], align 128
 // CHECK11-NEXT:    [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 128
 // CHECK11-NEXT:    [[VAR:%.*]] = alloca [[STRUCT_S_0]], align 128
+// CHECK11-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK11-NEXT:    call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[TEST]])
 // CHECK11-NEXT:    call void @_ZN3SSTIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[SST]])
 // CHECK11-NEXT:    store i32 0, ptr [[T_VAR]], align 128
@@ -1333,7 +1334,6 @@ int main() {
 // CHECK11-NEXT:    [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYINIT_BEGIN]], i32 1
 // CHECK11-NEXT:    call void @_ZN1SIiEC1Ei(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_ELEMENT]], i32 noundef 2)
 // CHECK11-NEXT:    call void @_ZN1SIiEC1Ei(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]], i32 noundef 3)
-// CHECK11-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK11-NEXT:    [[TMP0:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK11-NEXT:    store i32 2, ptr [[TMP0]], align 4
 // CHECK11-NEXT:    [[TMP1:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -1392,6 +1392,7 @@ int main() {
 // CHECK11-NEXT:    [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [1 x ptr], align 4
 // CHECK11-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [1 x ptr], align 4
 // CHECK11-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [1 x ptr], align 4
+// CHECK11-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK11-NEXT:    store ptr [[THIS]], ptr [[THIS_ADDR]], align 4
 // CHECK11-NEXT:    store ptr [[D]], ptr [[D_ADDR]], align 4
 // CHECK11-NEXT:    [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4
@@ -1413,7 +1414,6 @@ int main() {
 // CHECK11-NEXT:    store ptr null, ptr [[TMP3]], align 4
 // CHECK11-NEXT:    [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK11-NEXT:    [[TMP5:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK11-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK11-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK11-NEXT:    store i32 2, ptr [[TMP6]], align 4
 // CHECK11-NEXT:    [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
@@ -1634,6 +1634,7 @@ int main() {
 // CHECK11-NEXT:    [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [1 x ptr], align 4
 // CHECK11-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [1 x ptr], align 4
 // CHECK11-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [1 x ptr], align 4
+// CHECK11-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK11-NEXT:    store ptr [[THIS]], ptr [[THIS_ADDR]], align 4
 // CHECK11-NEXT:    [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4
 // CHECK11-NEXT:    [[A:%.*]] = getelementptr inbounds [[STRUCT_SST:%.*]], ptr [[THIS1]], i32 0, i32 0
@@ -1646,7 +1647,6 @@ int main() {
 // CHECK11-NEXT:    store ptr null, ptr [[TMP2]], align 4
 // CHECK11-NEXT:    [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK11-NEXT:    [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK11-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
 // CHECK11-NEXT:    [[TMP5:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK11-NEXT:    store i32 2, ptr [[TMP5]], align 4
 // CHECK11-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1

diff  --git a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
index 1dfa858421d93..b3c4f21274cb2 100644
--- a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
+++ b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
@@ -988,6 +988,7 @@ class OpenMPIRBuilder {
   /// Generate a target region entry call.
   ///
   /// \param Loc The location at which the request originated and is fulfilled.
+  /// \param AllocaIP The insertion point to be used for alloca instructions.
   /// \param Return Return value of the created function returned by reference.
   /// \param DeviceID Identifier for the device via the 'device' clause.
   /// \param NumTeams Numer of teams for the region via the 'num_teams' clause
@@ -995,7 +996,8 @@ class OpenMPIRBuilder {
   /// \param NumThreads Number of threads via the 'thread_limit' clause.
   /// \param HostPtr Pointer to the host-side pointer of the target kernel.
   /// \param KernelArgs Array of arguments to the kernel.
-  InsertPointTy emitTargetKernel(const LocationDescription &Loc, Value *&Return,
+  InsertPointTy emitTargetKernel(const LocationDescription &Loc,
+                                 InsertPointTy AllocaIP, Value *&Return,
                                  Value *Ident, Value *DeviceID, Value *NumTeams,
                                  Value *NumThreads, Value *HostPtr,
                                  ArrayRef<Value *> KernelArgs);

diff  --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
index 9381f42454f8b..fd4d2b5d51c86 100644
--- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
+++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
@@ -813,14 +813,17 @@ void OpenMPIRBuilder::emitOffloadingEntry(Constant *Addr, StringRef Name,
 }
 
 OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::emitTargetKernel(
-    const LocationDescription &Loc, Value *&Return, Value *Ident,
-    Value *DeviceID, Value *NumTeams, Value *NumThreads, Value *HostPtr,
-    ArrayRef<Value *> KernelArgs) {
+    const LocationDescription &Loc, InsertPointTy AllocaIP, Value *&Return,
+    Value *Ident, Value *DeviceID, Value *NumTeams, Value *NumThreads,
+    Value *HostPtr, ArrayRef<Value *> KernelArgs) {
   if (!updateToLocation(Loc))
     return Loc.IP;
 
+  Builder.restoreIP(AllocaIP);
   auto *KernelArgsPtr =
       Builder.CreateAlloca(OpenMPIRBuilder::KernelArgs, nullptr, "kernel_args");
+  Builder.restoreIP(Loc.IP);
+
   for (unsigned I = 0, Size = KernelArgs.size(); I != Size; ++I) {
     llvm::Value *Arg =
         Builder.CreateStructGEP(OpenMPIRBuilder::KernelArgs, KernelArgsPtr, I);


        


More information about the llvm-commits mailing list