[clang] [libclc] [llvm] [libclc] Add initial LIT tests (PR #87989)

Mon Jul 7 05:04:31 PDT 2025

github-actions[bot] wrote:




:warning: C/C++ code formatter, clang-format found issues in your code. :warning:

<details>
<summary>
You can test this locally with the following command:
</summary>

``````````bash
git-clang-format --diff HEAD~1 HEAD --extensions h,cpp,cl -- clang/test/Driver/opencl-libclc.cl libclc/test/geometric/cross.cl libclc/test/integer/add_sat.cl libclc/test/integer/sub_sat.cl libclc/test/math/cos.cl libclc/test/math/fabs.cl libclc/test/math/rsqrt.cl libclc/test/misc/as_type.cl libclc/test/misc/convert.cl libclc/test/work-item/get_group_id.cl clang/include/clang/Driver/CommonArgs.h clang/lib/Driver/ToolChains/AMDGPU.cpp clang/lib/Driver/ToolChains/CommonArgs.cpp
``````````

</details>

<details>
<summary>
View the diff from clang-format here.
</summary>

``````````diff

diff --git a/libclc/test/geometric/cross.cl b/libclc/test/geometric/cross.cl
index 4cb8c53be..1a8462489 100644
--- a/libclc/test/geometric/cross.cl
+++ b/libclc/test/geometric/cross.cl
@@ -1,4 +1,5 @@
-// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
+// UTC_ARGS: --version 5
 //===----------------------------------------------------------------------===//
 //
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
@@ -7,38 +8,44 @@
 //
 //===----------------------------------------------------------------------===//
 
-// RUN: %clang -target amdgcn-mesa-mesa3d -O1 -emit-llvm -S -o - --libclc-lib=tahiti-amdgcn-mesa-mesa3d -fno-builtin --no-offloadlib %s | FileCheck %s
+// RUN: %clang -target amdgcn-mesa-mesa3d -O1 -emit-llvm -S -o -
+// --libclc-lib=tahiti-amdgcn-mesa-mesa3d -fno-builtin --no-offloadlib %s |
+// FileCheck %s
 
 // CHECK-LABEL: define protected amdgpu_kernel void @foo(
-// CHECK-SAME: ptr addrspace(1) noundef align 16 captures(none) [[F:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] !kernel_arg_addr_space [[META6:![0-9]+]] !kernel_arg_access_qual [[META7:![0-9]+]] !kernel_arg_type [[META8:![0-9]+]] !kernel_arg_base_type [[META9:![0-9]+]] !kernel_arg_type_qual [[META10:![0-9]+]] {
-// CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr addrspace(1) [[F]], align 16, !tbaa [[TBAA11:![0-9]+]]
-// CHECK-NEXT:    [[ARRAYIDX1_I:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(1) [[F]], i64 16
-// CHECK-NEXT:    [[TMP1:%.*]] = load <4 x float>, ptr addrspace(1) [[ARRAYIDX1_I]], align 16, !tbaa [[TBAA11]]
-// CHECK-NEXT:    [[TMP2:%.*]] = extractelement <4 x float> [[TMP0]], i64 1
-// CHECK-NEXT:    [[TMP3:%.*]] = extractelement <4 x float> [[TMP1]], i64 2
-// CHECK-NEXT:    [[TMP4:%.*]] = extractelement <4 x float> [[TMP0]], i64 2
-// CHECK-NEXT:    [[TMP5:%.*]] = extractelement <4 x float> [[TMP1]], i64 1
-// CHECK-NEXT:    [[TMP6:%.*]] = fneg float [[TMP5]]
-// CHECK-NEXT:    [[NEG_I_I:%.*]] = fmul float [[TMP4]], [[TMP6]]
-// CHECK-NEXT:    [[TMP7:%.*]] = tail call float @llvm.fmuladd.f32(float [[TMP2]], float [[TMP3]], float [[NEG_I_I]])
+// CHECK-SAME: ptr addrspace(1) noundef align 16 captures(none) [[F:%.*]])
+// local_unnamed_addr #[[ATTR0:[0-9]+]] !kernel_arg_addr_space [[META6:![0-9]+]]
+// !kernel_arg_access_qual [[META7:![0-9]+]] !kernel_arg_type [[META8:![0-9]+]]
+// !kernel_arg_base_type [[META9:![0-9]+]] !kernel_arg_type_qual
+// [[META10:![0-9]+]] { CHECK-NEXT:  [[ENTRY:.*:]] CHECK-NEXT:    [[TMP0:%.*]] =
+// load <4 x float>, ptr addrspace(1) [[F]], align 16, !tbaa [[TBAA11:![0-9]+]]
+// CHECK-NEXT:    [[ARRAYIDX1_I:%.*]] = getelementptr inbounds nuw i8, ptr
+// addrspace(1) [[F]], i64 16 CHECK-NEXT:    [[TMP1:%.*]] = load <4 x float>,
+// ptr addrspace(1) [[ARRAYIDX1_I]], align 16, !tbaa [[TBAA11]] CHECK-NEXT:
+// [[TMP2:%.*]] = extractelement <4 x float> [[TMP0]], i64 1 CHECK-NEXT:
+// [[TMP3:%.*]] = extractelement <4 x float> [[TMP1]], i64 2 CHECK-NEXT:
+// [[TMP4:%.*]] = extractelement <4 x float> [[TMP0]], i64 2 CHECK-NEXT:
+// [[TMP5:%.*]] = extractelement <4 x float> [[TMP1]], i64 1 CHECK-NEXT:
+// [[TMP6:%.*]] = fneg float [[TMP5]] CHECK-NEXT:    [[NEG_I_I:%.*]] = fmul
+// float [[TMP4]], [[TMP6]] CHECK-NEXT:    [[TMP7:%.*]] = tail call float
+// @llvm.fmuladd.f32(float [[TMP2]], float [[TMP3]], float [[NEG_I_I]])
 // CHECK-NEXT:    [[TMP8:%.*]] = extractelement <4 x float> [[TMP1]], i64 0
 // CHECK-NEXT:    [[TMP9:%.*]] = extractelement <4 x float> [[TMP0]], i64 0
 // CHECK-NEXT:    [[TMP10:%.*]] = fneg float [[TMP3]]
 // CHECK-NEXT:    [[NEG3_I_I:%.*]] = fmul float [[TMP9]], [[TMP10]]
-// CHECK-NEXT:    [[TMP11:%.*]] = tail call float @llvm.fmuladd.f32(float [[TMP4]], float [[TMP8]], float [[NEG3_I_I]])
-// CHECK-NEXT:    [[TMP12:%.*]] = fneg float [[TMP8]]
-// CHECK-NEXT:    [[NEG6_I_I:%.*]] = fmul float [[TMP2]], [[TMP12]]
-// CHECK-NEXT:    [[TMP13:%.*]] = tail call float @llvm.fmuladd.f32(float [[TMP9]], float [[TMP5]], float [[NEG6_I_I]])
-// CHECK-NEXT:    [[TMP14:%.*]] = insertelement <4 x float> <float poison, float poison, float poison, float 0.000000e+00>, float [[TMP7]], i64 0
-// CHECK-NEXT:    [[TMP15:%.*]] = insertelement <4 x float> [[TMP14]], float [[TMP11]], i64 1
-// CHECK-NEXT:    [[VECINIT8_I_I:%.*]] = insertelement <4 x float> [[TMP15]], float [[TMP13]], i64 2
-// CHECK-NEXT:    store <4 x float> [[VECINIT8_I_I]], ptr addrspace(1) [[F]], align 16, !tbaa [[TBAA11]]
-// CHECK-NEXT:    ret void
+// CHECK-NEXT:    [[TMP11:%.*]] = tail call float @llvm.fmuladd.f32(float
+// [[TMP4]], float [[TMP8]], float [[NEG3_I_I]]) CHECK-NEXT:    [[TMP12:%.*]] =
+// fneg float [[TMP8]] CHECK-NEXT:    [[NEG6_I_I:%.*]] = fmul float [[TMP2]],
+// [[TMP12]] CHECK-NEXT:    [[TMP13:%.*]] = tail call float
+// @llvm.fmuladd.f32(float [[TMP9]], float [[TMP5]], float [[NEG6_I_I]])
+// CHECK-NEXT:    [[TMP14:%.*]] = insertelement <4 x float> <float poison, float
+// poison, float poison, float 0.000000e+00>, float [[TMP7]], i64 0 CHECK-NEXT:
+// [[TMP15:%.*]] = insertelement <4 x float> [[TMP14]], float [[TMP11]], i64 1
+// CHECK-NEXT:    [[VECINIT8_I_I:%.*]] = insertelement <4 x float> [[TMP15]],
+// float [[TMP13]], i64 2 CHECK-NEXT:    store <4 x float> [[VECINIT8_I_I]], ptr
+// addrspace(1) [[F]], align 16, !tbaa [[TBAA11]] CHECK-NEXT:    ret void
 //
-__kernel void foo(__global float4 *f) {
-  *f = cross(f[0], f[1]);
-}
+__kernel void foo(__global float4 *f) { *f = cross(f[0], f[1]); }
 //.
 // CHECK: [[META6]] = !{i32 1}
 // CHECK: [[META7]] = !{!"none"}
diff --git a/libclc/test/integer/add_sat.cl b/libclc/test/integer/add_sat.cl
index ef5bf77b6..8c16b2371 100644
--- a/libclc/test/integer/add_sat.cl
+++ b/libclc/test/integer/add_sat.cl
@@ -1,4 +1,5 @@
-// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
+// UTC_ARGS: --version 5
 //===----------------------------------------------------------------------===//
 //
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
@@ -7,15 +8,23 @@
 //
 //===----------------------------------------------------------------------===//
 
-// RUN: %clang -target amdgcn-mesa-mesa3d -O1 -emit-llvm -S -o - --libclc-lib=tahiti-amdgcn-mesa-mesa3d -fno-builtin --no-offloadlib %s | FileCheck %s
+// RUN: %clang -target amdgcn-mesa-mesa3d -O1 -emit-llvm -S -o -
+// --libclc-lib=tahiti-amdgcn-mesa-mesa3d -fno-builtin --no-offloadlib %s |
+// FileCheck %s
 
 // CHECK-LABEL: define protected amdgpu_kernel void @foo(
-// CHECK-SAME: ptr addrspace(1) noundef writeonly align 1 captures(none) initializes((0, 1)) [[A:%.*]], ptr addrspace(1) noundef readonly align 1 captures(none) [[B:%.*]], ptr addrspace(1) noundef readonly align 1 captures(none) [[C:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] !kernel_arg_addr_space [[META6:![0-9]+]] !kernel_arg_access_qual [[META7:![0-9]+]] !kernel_arg_type [[META8:![0-9]+]] !kernel_arg_base_type [[META8]] !kernel_arg_type_qual [[META9:![0-9]+]] {
-// CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    [[TMP0:%.*]] = load i8, ptr addrspace(1) [[B]], align 1, !tbaa [[TBAA10:![0-9]+]]
-// CHECK-NEXT:    [[TMP1:%.*]] = load i8, ptr addrspace(1) [[C]], align 1, !tbaa [[TBAA10]]
-// CHECK-NEXT:    [[ELT_SAT_I_I:%.*]] = tail call noundef i8 @llvm.sadd.sat.i8(i8 [[TMP0]], i8 [[TMP1]])
-// CHECK-NEXT:    store i8 [[ELT_SAT_I_I]], ptr addrspace(1) [[A]], align 1, !tbaa [[TBAA10]]
+// CHECK-SAME: ptr addrspace(1) noundef writeonly align 1 captures(none)
+// initializes((0, 1)) [[A:%.*]], ptr addrspace(1) noundef readonly align 1
+// captures(none) [[B:%.*]], ptr addrspace(1) noundef readonly align 1
+// captures(none) [[C:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]]
+// !kernel_arg_addr_space [[META6:![0-9]+]] !kernel_arg_access_qual
+// [[META7:![0-9]+]] !kernel_arg_type [[META8:![0-9]+]] !kernel_arg_base_type
+// [[META8]] !kernel_arg_type_qual [[META9:![0-9]+]] { CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = load i8, ptr addrspace(1) [[B]], align 1, !tbaa
+// [[TBAA10:![0-9]+]] CHECK-NEXT:    [[TMP1:%.*]] = load i8, ptr addrspace(1)
+// [[C]], align 1, !tbaa [[TBAA10]] CHECK-NEXT:    [[ELT_SAT_I_I:%.*]] = tail
+// call noundef i8 @llvm.sadd.sat.i8(i8 [[TMP0]], i8 [[TMP1]]) CHECK-NEXT: store
+// i8 [[ELT_SAT_I_I]], ptr addrspace(1) [[A]], align 1, !tbaa [[TBAA10]]
 // CHECK-NEXT:    ret void
 //
 __kernel void foo(__global char *a, __global char *b, __global char *c) {
diff --git a/libclc/test/integer/sub_sat.cl b/libclc/test/integer/sub_sat.cl
index 7c3f0a3aa..5da7c6f7c 100644
--- a/libclc/test/integer/sub_sat.cl
+++ b/libclc/test/integer/sub_sat.cl
@@ -1,4 +1,5 @@
-// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
+// UTC_ARGS: --version 5
 //===----------------------------------------------------------------------===//
 //
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
@@ -7,13 +8,20 @@
 //
 //===----------------------------------------------------------------------===//
 
-// RUN: %clang -target amdgcn-mesa-mesa3d -O1 -emit-llvm -S -o - --libclc-lib=tahiti-amdgcn-mesa-mesa3d -fno-builtin --no-offloadlib %s | FileCheck %s
+// RUN: %clang -target amdgcn-mesa-mesa3d -O1 -emit-llvm -S -o -
+// --libclc-lib=tahiti-amdgcn-mesa-mesa3d -fno-builtin --no-offloadlib %s |
+// FileCheck %s
 
 // CHECK-LABEL: define protected amdgpu_kernel void @test_subsat_char(
-// CHECK-SAME: ptr addrspace(1) noundef writeonly align 1 captures(none) initializes((0, 1)) [[A:%.*]], i8 noundef [[X:%.*]], i8 noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] !kernel_arg_addr_space [[META6:![0-9]+]] !kernel_arg_access_qual [[META7:![0-9]+]] !kernel_arg_type [[META8:![0-9]+]] !kernel_arg_base_type [[META8]] !kernel_arg_type_qual [[META9:![0-9]+]] {
+// CHECK-SAME: ptr addrspace(1) noundef writeonly align 1 captures(none)
+// initializes((0, 1)) [[A:%.*]], i8 noundef [[X:%.*]], i8 noundef [[Y:%.*]])
+// local_unnamed_addr #[[ATTR0:[0-9]+]] !kernel_arg_addr_space [[META6:![0-9]+]]
+// !kernel_arg_access_qual [[META7:![0-9]+]] !kernel_arg_type [[META8:![0-9]+]]
+// !kernel_arg_base_type [[META8]] !kernel_arg_type_qual [[META9:![0-9]+]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    [[ELT_SAT_I_I:%.*]] = tail call noundef i8 @llvm.ssub.sat.i8(i8 [[X]], i8 [[Y]])
-// CHECK-NEXT:    store i8 [[ELT_SAT_I_I]], ptr addrspace(1) [[A]], align 1, !tbaa [[TBAA10:![0-9]+]]
+// CHECK-NEXT:    [[ELT_SAT_I_I:%.*]] = tail call noundef i8
+// @llvm.ssub.sat.i8(i8 [[X]], i8 [[Y]]) CHECK-NEXT:    store i8
+// [[ELT_SAT_I_I]], ptr addrspace(1) [[A]], align 1, !tbaa [[TBAA10:![0-9]+]]
 // CHECK-NEXT:    ret void
 //
 __kernel void test_subsat_char(__global char *a, char x, char y) {
@@ -22,10 +30,15 @@ __kernel void test_subsat_char(__global char *a, char x, char y) {
 }
 
 // CHECK-LABEL: define protected amdgpu_kernel void @test_subsat_uchar(
-// CHECK-SAME: ptr addrspace(1) noundef writeonly align 1 captures(none) initializes((0, 1)) [[A:%.*]], i8 noundef [[X:%.*]], i8 noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR0]] !kernel_arg_addr_space [[META6]] !kernel_arg_access_qual [[META7]] !kernel_arg_type [[META13:![0-9]+]] !kernel_arg_base_type [[META13]] !kernel_arg_type_qual [[META9]] {
+// CHECK-SAME: ptr addrspace(1) noundef writeonly align 1 captures(none)
+// initializes((0, 1)) [[A:%.*]], i8 noundef [[X:%.*]], i8 noundef [[Y:%.*]])
+// local_unnamed_addr #[[ATTR0]] !kernel_arg_addr_space [[META6]]
+// !kernel_arg_access_qual [[META7]] !kernel_arg_type [[META13:![0-9]+]]
+// !kernel_arg_base_type [[META13]] !kernel_arg_type_qual [[META9]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    [[ELT_SAT_I_I:%.*]] = tail call noundef i8 @llvm.usub.sat.i8(i8 [[X]], i8 [[Y]])
-// CHECK-NEXT:    store i8 [[ELT_SAT_I_I]], ptr addrspace(1) [[A]], align 1, !tbaa [[TBAA10]]
+// CHECK-NEXT:    [[ELT_SAT_I_I:%.*]] = tail call noundef i8
+// @llvm.usub.sat.i8(i8 [[X]], i8 [[Y]]) CHECK-NEXT:    store i8
+// [[ELT_SAT_I_I]], ptr addrspace(1) [[A]], align 1, !tbaa [[TBAA10]]
 // CHECK-NEXT:    ret void
 //
 __kernel void test_subsat_uchar(__global uchar *a, uchar x, uchar y) {
@@ -34,10 +47,15 @@ __kernel void test_subsat_uchar(__global uchar *a, uchar x, uchar y) {
 }
 
 // CHECK-LABEL: define protected amdgpu_kernel void @test_subsat_long(
-// CHECK-SAME: ptr addrspace(1) noundef writeonly align 8 captures(none) initializes((0, 8)) [[A:%.*]], i64 noundef [[X:%.*]], i64 noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR0]] !kernel_arg_addr_space [[META6]] !kernel_arg_access_qual [[META7]] !kernel_arg_type [[META14:![0-9]+]] !kernel_arg_base_type [[META14]] !kernel_arg_type_qual [[META9]] {
+// CHECK-SAME: ptr addrspace(1) noundef writeonly align 8 captures(none)
+// initializes((0, 8)) [[A:%.*]], i64 noundef [[X:%.*]], i64 noundef [[Y:%.*]])
+// local_unnamed_addr #[[ATTR0]] !kernel_arg_addr_space [[META6]]
+// !kernel_arg_access_qual [[META7]] !kernel_arg_type [[META14:![0-9]+]]
+// !kernel_arg_base_type [[META14]] !kernel_arg_type_qual [[META9]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    [[ELT_SAT_I_I:%.*]] = tail call noundef i64 @llvm.ssub.sat.i64(i64 [[X]], i64 [[Y]])
-// CHECK-NEXT:    store i64 [[ELT_SAT_I_I]], ptr addrspace(1) [[A]], align 8, !tbaa [[TBAA15:![0-9]+]]
+// CHECK-NEXT:    [[ELT_SAT_I_I:%.*]] = tail call noundef i64
+// @llvm.ssub.sat.i64(i64 [[X]], i64 [[Y]]) CHECK-NEXT:    store i64
+// [[ELT_SAT_I_I]], ptr addrspace(1) [[A]], align 8, !tbaa [[TBAA15:![0-9]+]]
 // CHECK-NEXT:    ret void
 //
 __kernel void test_subsat_long(__global long *a, long x, long y) {
@@ -46,10 +64,15 @@ __kernel void test_subsat_long(__global long *a, long x, long y) {
 }
 
 // CHECK-LABEL: define protected amdgpu_kernel void @test_subsat_ulong(
-// CHECK-SAME: ptr addrspace(1) noundef writeonly align 8 captures(none) initializes((0, 8)) [[A:%.*]], i64 noundef [[X:%.*]], i64 noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR0]] !kernel_arg_addr_space [[META6]] !kernel_arg_access_qual [[META7]] !kernel_arg_type [[META17:![0-9]+]] !kernel_arg_base_type [[META17]] !kernel_arg_type_qual [[META9]] {
+// CHECK-SAME: ptr addrspace(1) noundef writeonly align 8 captures(none)
+// initializes((0, 8)) [[A:%.*]], i64 noundef [[X:%.*]], i64 noundef [[Y:%.*]])
+// local_unnamed_addr #[[ATTR0]] !kernel_arg_addr_space [[META6]]
+// !kernel_arg_access_qual [[META7]] !kernel_arg_type [[META17:![0-9]+]]
+// !kernel_arg_base_type [[META17]] !kernel_arg_type_qual [[META9]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    [[ELT_SAT_I_I:%.*]] = tail call noundef i64 @llvm.usub.sat.i64(i64 [[X]], i64 [[Y]])
-// CHECK-NEXT:    store i64 [[ELT_SAT_I_I]], ptr addrspace(1) [[A]], align 8, !tbaa [[TBAA15]]
+// CHECK-NEXT:    [[ELT_SAT_I_I:%.*]] = tail call noundef i64
+// @llvm.usub.sat.i64(i64 [[X]], i64 [[Y]]) CHECK-NEXT:    store i64
+// [[ELT_SAT_I_I]], ptr addrspace(1) [[A]], align 8, !tbaa [[TBAA15]]
 // CHECK-NEXT:    ret void
 //
 __kernel void test_subsat_ulong(__global ulong *a, ulong x, ulong y) {
diff --git a/libclc/test/math/cos.cl b/libclc/test/math/cos.cl
index 79272cce4..4a95787e8 100644
--- a/libclc/test/math/cos.cl
+++ b/libclc/test/math/cos.cl
@@ -1,4 +1,5 @@
-// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
+// UTC_ARGS: --version 5
 //===----------------------------------------------------------------------===//
 //
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
@@ -7,283 +8,393 @@
 //
 //===----------------------------------------------------------------------===//
 
-// RUN: %clang -target amdgcn-mesa-mesa3d -O1 -emit-llvm -S -o - --libclc-lib=tahiti-amdgcn-mesa-mesa3d -fno-builtin --no-offloadlib %s | FileCheck %s
+// RUN: %clang -target amdgcn-mesa-mesa3d -O1 -emit-llvm -S -o -
+// --libclc-lib=tahiti-amdgcn-mesa-mesa3d -fno-builtin --no-offloadlib %s |
+// FileCheck %s
 
 // CHECK-LABEL: define protected amdgpu_kernel void @foo(
-// CHECK-SAME: ptr addrspace(1) noundef align 16 captures(none) [[F:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] !kernel_arg_addr_space [[META6:![0-9]+]] !kernel_arg_access_qual [[META7:![0-9]+]] !kernel_arg_type [[META8:![0-9]+]] !kernel_arg_base_type [[META9:![0-9]+]] !kernel_arg_type_qual [[META10:![0-9]+]] {
-// CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr addrspace(1) [[F]], align 16, !tbaa [[TBAA11:![0-9]+]]
-// CHECK-NEXT:    [[ELT_ABS_I_I_I:%.*]] = tail call <4 x float> @llvm.fabs.v4f32(<4 x float> [[TMP0]])
-// CHECK-NEXT:    [[CMP_I_I:%.*]] = fcmp olt <4 x float> [[ELT_ABS_I_I_I]], splat (float 0x4160000000000000)
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call noundef <4 x float> @llvm.fmuladd.v4f32(<4 x float> [[ELT_ABS_I_I_I]], <4 x float> splat (float 0x3FE45F3060000000), <4 x float> splat (float 5.000000e-01))
-// CHECK-NEXT:    [[ELT_TRUNC_I_I:%.*]] = tail call noundef <4 x float> @llvm.trunc.v4f32(<4 x float> [[TMP1]])
-// CHECK-NEXT:    [[MUL_I30_I_I_I_I:%.*]] = fmul <4 x float> [[ELT_TRUNC_I_I]], splat (float 0x3FF921FB40000000)
-// CHECK-NEXT:    [[FNEG_I31_I_I_I_I:%.*]] = fneg <4 x float> [[MUL_I30_I_I_I_I]]
-// CHECK-NEXT:    [[TMP2:%.*]] = tail call noundef <4 x float> @llvm.fma.v4f32(<4 x float> [[ELT_TRUNC_I_I]], <4 x float> splat (float 0x3FF921FB40000000), <4 x float> [[FNEG_I31_I_I_I_I]])
-// CHECK-NEXT:    [[SUB_I_I_I_I:%.*]] = fsub <4 x float> [[ELT_ABS_I_I_I]], [[MUL_I30_I_I_I_I]]
-// CHECK-NEXT:    [[SUB2_I_I_I_I:%.*]] = fsub <4 x float> [[ELT_ABS_I_I_I]], [[SUB_I_I_I_I]]
-// CHECK-NEXT:    [[SUB3_I_I_I_I:%.*]] = fsub <4 x float> [[SUB2_I_I_I_I]], [[MUL_I30_I_I_I_I]]
-// CHECK-NEXT:    [[SUB4_I_I_I_I:%.*]] = fsub <4 x float> [[SUB3_I_I_I_I]], [[TMP2]]
-// CHECK-NEXT:    [[ADD_I_I_I_I:%.*]] = fadd <4 x float> [[SUB_I_I_I_I]], [[SUB4_I_I_I_I]]
-// CHECK-NEXT:    [[MUL_I27_I_I_I_I:%.*]] = fmul <4 x float> [[ELT_TRUNC_I_I]], splat (float 0x3E74442D00000000)
-// CHECK-NEXT:    [[FNEG_I28_I_I_I_I:%.*]] = fneg <4 x float> [[MUL_I27_I_I_I_I]]
-// CHECK-NEXT:    [[TMP3:%.*]] = tail call noundef <4 x float> @llvm.fma.v4f32(<4 x float> [[ELT_TRUNC_I_I]], <4 x float> splat (float 0x3E74442D00000000), <4 x float> [[FNEG_I28_I_I_I_I]])
-// CHECK-NEXT:    [[SUB5_I_I_I_I:%.*]] = fsub <4 x float> [[ADD_I_I_I_I]], [[MUL_I27_I_I_I_I]]
-// CHECK-NEXT:    [[SUB6_I_I_I_I:%.*]] = fsub <4 x float> [[ADD_I_I_I_I]], [[SUB5_I_I_I_I]]
-// CHECK-NEXT:    [[SUB7_I_I_I_I:%.*]] = fsub <4 x float> [[SUB6_I_I_I_I]], [[MUL_I27_I_I_I_I]]
-// CHECK-NEXT:    [[SUB8_I_I_I_I:%.*]] = fsub <4 x float> [[SUB7_I_I_I_I]], [[TMP3]]
-// CHECK-NEXT:    [[ADD9_I_I_I_I:%.*]] = fadd <4 x float> [[SUB5_I_I_I_I]], [[SUB8_I_I_I_I]]
-// CHECK-NEXT:    [[MUL_I_I_I_I_I:%.*]] = fmul <4 x float> [[ELT_TRUNC_I_I]], splat (float 0x3CF8469880000000)
-// CHECK-NEXT:    [[FNEG_I_I_I_I_I:%.*]] = fneg <4 x float> [[MUL_I_I_I_I_I]]
-// CHECK-NEXT:    [[TMP4:%.*]] = tail call noundef <4 x float> @llvm.fma.v4f32(<4 x float> [[ELT_TRUNC_I_I]], <4 x float> splat (float 0x3CF8469880000000), <4 x float> [[FNEG_I_I_I_I_I]])
-// CHECK-NEXT:    [[SUB10_I_I_I_I:%.*]] = fsub <4 x float> [[ADD9_I_I_I_I]], [[MUL_I_I_I_I_I]]
-// CHECK-NEXT:    [[SUB11_I_I_I_I:%.*]] = fsub <4 x float> [[ADD9_I_I_I_I]], [[SUB10_I_I_I_I]]
-// CHECK-NEXT:    [[SUB12_I_I_I_I:%.*]] = fsub <4 x float> [[SUB11_I_I_I_I]], [[MUL_I_I_I_I_I]]
-// CHECK-NEXT:    [[ADD13_I_I_I_I:%.*]] = fadd <4 x float> [[SUB10_I_I_I_I]], [[SUB12_I_I_I_I]]
-// CHECK-NEXT:    [[FNEG_I_I_I_I:%.*]] = fneg <4 x float> [[TMP4]]
-// CHECK-NEXT:    [[CONV_I_I_I:%.*]] = fptosi <4 x float> [[ELT_TRUNC_I_I]] to <4 x i32>
-// CHECK-NEXT:    [[ASTYPE_I_I_I:%.*]] = bitcast <4 x float> [[ELT_ABS_I_I_I]] to <4 x i32>
-// CHECK-NEXT:    [[SHR_I_I_I:%.*]] = lshr <4 x i32> [[ASTYPE_I_I_I]], splat (i32 23)
-// CHECK-NEXT:    [[AND_I11_I_I:%.*]] = and <4 x i32> [[ASTYPE_I_I_I]], splat (i32 8388607)
-// CHECK-NEXT:    [[OR_I_I_I:%.*]] = or disjoint <4 x i32> [[AND_I11_I_I]], splat (i32 8388608)
-// CHECK-NEXT:    [[MUL_I_I_I:%.*]] = mul <4 x i32> [[OR_I_I_I]], splat (i32 -28220501)
-// CHECK-NEXT:    [[CONV_I1_I27_I_I:%.*]] = zext nneg <4 x i32> [[OR_I_I_I]] to <4 x i64>
-// CHECK-NEXT:    [[MUL_I28_I_I:%.*]] = mul nuw nsw <4 x i64> [[CONV_I1_I27_I_I]], splat (i64 4266746795)
-// CHECK-NEXT:    [[SHR_I29_I_I:%.*]] = lshr <4 x i64> [[MUL_I28_I_I]], splat (i64 32)
-// CHECK-NEXT:    [[CONV_I2_I30_I_I:%.*]] = trunc nuw nsw <4 x i64> [[SHR_I29_I_I]] to <4 x i32>
-// CHECK-NEXT:    [[MUL2_I_I_I:%.*]] = mul <4 x i32> [[OR_I_I_I]], splat (i32 1011060801)
-// CHECK-NEXT:    [[ADD_I_I_I:%.*]] = add <4 x i32> [[MUL2_I_I_I]], [[CONV_I2_I30_I_I]]
-// CHECK-NEXT:    [[MUL_I24_I_I:%.*]] = mul nuw nsw <4 x i64> [[CONV_I1_I27_I_I]], splat (i64 1011060801)
-// CHECK-NEXT:    [[SHR_I25_I_I:%.*]] = lshr <4 x i64> [[MUL_I24_I_I]], splat (i64 32)
-// CHECK-NEXT:    [[CONV_I2_I26_I_I:%.*]] = trunc nuw nsw <4 x i64> [[SHR_I25_I_I]] to <4 x i32>
-// CHECK-NEXT:    [[CMP_I_I_I:%.*]] = icmp ult <4 x i32> [[ADD_I_I_I]], [[CONV_I2_I30_I_I]]
-// CHECK-NEXT:    [[SEXT_I_I1_I:%.*]] = zext <4 x i1> [[CMP_I_I_I]] to <4 x i32>
-// CHECK-NEXT:    [[ADD5_I_I_I:%.*]] = add nuw nsw <4 x i32> [[SEXT_I_I1_I]], [[CONV_I2_I26_I_I]]
-// CHECK-NEXT:    [[MUL6_I_I_I:%.*]] = mul <4 x i32> [[OR_I_I_I]], splat (i32 -614296167)
-// CHECK-NEXT:    [[ADD7_I_I_I:%.*]] = add <4 x i32> [[ADD5_I_I_I]], [[MUL6_I_I_I]]
-// CHECK-NEXT:    [[MUL_I20_I_I:%.*]] = mul nuw nsw <4 x i64> [[CONV_I1_I27_I_I]], splat (i64 3680671129)
-// CHECK-NEXT:    [[SHR_I21_I_I:%.*]] = lshr <4 x i64> [[MUL_I20_I_I]], splat (i64 32)
-// CHECK-NEXT:    [[CONV_I2_I22_I_I:%.*]] = trunc nuw nsw <4 x i64> [[SHR_I21_I_I]] to <4 x i32>
-// CHECK-NEXT:    [[CMP9_I_I_I:%.*]] = icmp ult <4 x i32> [[ADD7_I_I_I]], [[ADD5_I_I_I]]
-// CHECK-NEXT:    [[SEXT10_I_I_I:%.*]] = zext <4 x i1> [[CMP9_I_I_I]] to <4 x i32>
-// CHECK-NEXT:    [[ADD13_I_I_I:%.*]] = add nuw nsw <4 x i32> [[SEXT10_I_I_I]], [[CONV_I2_I22_I_I]]
-// CHECK-NEXT:    [[MUL14_I_I_I:%.*]] = mul <4 x i32> [[OR_I_I_I]], splat (i32 -181084736)
-// CHECK-NEXT:    [[ADD15_I_I_I:%.*]] = add <4 x i32> [[ADD13_I_I_I]], [[MUL14_I_I_I]]
-// CHECK-NEXT:    [[MUL_I16_I_I:%.*]] = mul nuw nsw <4 x i64> [[CONV_I1_I27_I_I]], splat (i64 4113882560)
-// CHECK-NEXT:    [[SHR_I17_I_I:%.*]] = lshr <4 x i64> [[MUL_I16_I_I]], splat (i64 32)
-// CHECK-NEXT:    [[CONV_I2_I18_I_I:%.*]] = trunc nuw nsw <4 x i64> [[SHR_I17_I_I]] to <4 x i32>
-// CHECK-NEXT:    [[CMP17_I_I_I:%.*]] = icmp ult <4 x i32> [[ADD15_I_I_I]], [[ADD13_I_I_I]]
-// CHECK-NEXT:    [[SEXT18_I_I_I:%.*]] = zext <4 x i1> [[CMP17_I_I_I]] to <4 x i32>
-// CHECK-NEXT:    [[ADD21_I_I_I:%.*]] = add nuw nsw <4 x i32> [[SEXT18_I_I_I]], [[CONV_I2_I18_I_I]]
-// CHECK-NEXT:    [[MUL22_I_I_I:%.*]] = mul <4 x i32> [[OR_I_I_I]], splat (i32 -64530479)
-// CHECK-NEXT:    [[ADD23_I_I_I:%.*]] = add <4 x i32> [[ADD21_I_I_I]], [[MUL22_I_I_I]]
-// CHECK-NEXT:    [[MUL_I12_I_I:%.*]] = mul nuw nsw <4 x i64> [[CONV_I1_I27_I_I]], splat (i64 4230436817)
-// CHECK-NEXT:    [[SHR_I13_I_I:%.*]] = lshr <4 x i64> [[MUL_I12_I_I]], splat (i64 32)
-// CHECK-NEXT:    [[CONV_I2_I14_I_I:%.*]] = trunc nuw nsw <4 x i64> [[SHR_I13_I_I]] to <4 x i32>
-// CHECK-NEXT:    [[CMP25_I_I_I:%.*]] = icmp ult <4 x i32> [[ADD23_I_I_I]], [[ADD21_I_I_I]]
-// CHECK-NEXT:    [[SEXT26_I_I_I:%.*]] = zext <4 x i1> [[CMP25_I_I_I]] to <4 x i32>
-// CHECK-NEXT:    [[ADD29_I_I_I:%.*]] = add nuw nsw <4 x i32> [[SEXT26_I_I_I]], [[CONV_I2_I14_I_I]]
-// CHECK-NEXT:    [[MUL30_I_I_I:%.*]] = mul <4 x i32> [[OR_I_I_I]], splat (i32 1313084713)
-// CHECK-NEXT:    [[ADD31_I_I_I:%.*]] = add <4 x i32> [[ADD29_I_I_I]], [[MUL30_I_I_I]]
-// CHECK-NEXT:    [[MUL_I8_I_I:%.*]] = mul nuw nsw <4 x i64> [[CONV_I1_I27_I_I]], splat (i64 1313084713)
-// CHECK-NEXT:    [[SHR_I9_I_I:%.*]] = lshr <4 x i64> [[MUL_I8_I_I]], splat (i64 32)
-// CHECK-NEXT:    [[CONV_I2_I10_I_I:%.*]] = trunc nuw nsw <4 x i64> [[SHR_I9_I_I]] to <4 x i32>
-// CHECK-NEXT:    [[CMP33_I_I_I:%.*]] = icmp ult <4 x i32> [[ADD31_I_I_I]], [[ADD29_I_I_I]]
-// CHECK-NEXT:    [[SEXT34_I_I_I:%.*]] = zext <4 x i1> [[CMP33_I_I_I]] to <4 x i32>
-// CHECK-NEXT:    [[ADD37_I_I_I:%.*]] = add nuw nsw <4 x i32> [[SEXT34_I_I_I]], [[CONV_I2_I10_I_I]]
-// CHECK-NEXT:    [[MUL38_I_I_I:%.*]] = mul <4 x i32> [[OR_I_I_I]], splat (i32 -1560706194)
-// CHECK-NEXT:    [[ADD39_I_I_I:%.*]] = add <4 x i32> [[ADD37_I_I_I]], [[MUL38_I_I_I]]
-// CHECK-NEXT:    [[MUL_I5_I_I:%.*]] = mul nuw nsw <4 x i64> [[CONV_I1_I27_I_I]], splat (i64 2734261102)
-// CHECK-NEXT:    [[SHR_I6_I_I:%.*]] = lshr <4 x i64> [[MUL_I5_I_I]], splat (i64 32)
-// CHECK-NEXT:    [[CONV_I2_I_I_I:%.*]] = trunc nuw nsw <4 x i64> [[SHR_I6_I_I]] to <4 x i32>
-// CHECK-NEXT:    [[CMP41_I_I_I:%.*]] = icmp ult <4 x i32> [[ADD39_I_I_I]], [[ADD37_I_I_I]]
-// CHECK-NEXT:    [[SEXT42_I_I_I:%.*]] = zext <4 x i1> [[CMP41_I_I_I]] to <4 x i32>
-// CHECK-NEXT:    [[ADD45_I_I_I:%.*]] = add nuw nsw <4 x i32> [[SEXT42_I_I_I]], [[CONV_I2_I_I_I]]
-// CHECK-NEXT:    [[SUB47_I_I_I:%.*]] = add nsw <4 x i32> [[SHR_I_I_I]], splat (i32 -120)
-// CHECK-NEXT:    [[CMP48_I_I_I:%.*]] = icmp ugt <4 x i32> [[SUB47_I_I_I]], splat (i32 31)
-// CHECK-NEXT:    [[COND51_I_I_I:%.*]] = select <4 x i1> [[CMP48_I_I_I]], <4 x i32> [[ADD39_I_I_I]], <4 x i32> [[ADD45_I_I_I]]
-// CHECK-NEXT:    [[COND53_I_I_I:%.*]] = select <4 x i1> [[CMP48_I_I_I]], <4 x i32> [[ADD31_I_I_I]], <4 x i32> [[ADD39_I_I_I]]
-// CHECK-NEXT:    [[COND55_I_I_I:%.*]] = select <4 x i1> [[CMP48_I_I_I]], <4 x i32> [[ADD23_I_I_I]], <4 x i32> [[ADD31_I_I_I]]
-// CHECK-NEXT:    [[COND57_I_I_I:%.*]] = select <4 x i1> [[CMP48_I_I_I]], <4 x i32> [[ADD15_I_I_I]], <4 x i32> [[ADD23_I_I_I]]
-// CHECK-NEXT:    [[COND59_I_I_I:%.*]] = select <4 x i1> [[CMP48_I_I_I]], <4 x i32> [[ADD7_I_I_I]], <4 x i32> [[ADD15_I_I_I]]
-// CHECK-NEXT:    [[COND61_I_I_I:%.*]] = select <4 x i1> [[CMP48_I_I_I]], <4 x i32> [[ADD_I_I_I]], <4 x i32> [[ADD7_I_I_I]]
-// CHECK-NEXT:    [[COND63_I_I_I:%.*]] = select <4 x i1> [[CMP48_I_I_I]], <4 x i32> [[MUL_I_I_I]], <4 x i32> [[ADD_I_I_I]]
-// CHECK-NEXT:    [[DOTNEG_I_I_I:%.*]] = select <4 x i1> [[CMP48_I_I_I]], <4 x i32> splat (i32 -32), <4 x i32> zeroinitializer
-// CHECK-NEXT:    [[SUB66_I_I_I:%.*]] = add nsw <4 x i32> [[DOTNEG_I_I_I]], [[SUB47_I_I_I]]
-// CHECK-NEXT:    [[CMP67_I_I_I:%.*]] = icmp ugt <4 x i32> [[SUB66_I_I_I]], splat (i32 31)
-// CHECK-NEXT:    [[COND70_I_I_I:%.*]] = select <4 x i1> [[CMP67_I_I_I]], <4 x i32> [[COND53_I_I_I]], <4 x i32> [[COND51_I_I_I]]
-// CHECK-NEXT:    [[COND72_I_I_I:%.*]] = select <4 x i1> [[CMP67_I_I_I]], <4 x i32> [[COND55_I_I_I]], <4 x i32> [[COND53_I_I_I]]
-// CHECK-NEXT:    [[COND74_I_I_I:%.*]] = select <4 x i1> [[CMP67_I_I_I]], <4 x i32> [[COND57_I_I_I]], <4 x i32> [[COND55_I_I_I]]
-// CHECK-NEXT:    [[COND76_I_I_I:%.*]] = select <4 x i1> [[CMP67_I_I_I]], <4 x i32> [[COND59_I_I_I]], <4 x i32> [[COND57_I_I_I]]
-// CHECK-NEXT:    [[COND78_I_I_I:%.*]] = select <4 x i1> [[CMP67_I_I_I]], <4 x i32> [[COND61_I_I_I]], <4 x i32> [[COND59_I_I_I]]
-// CHECK-NEXT:    [[COND80_I_I_I:%.*]] = select <4 x i1> [[CMP67_I_I_I]], <4 x i32> [[COND63_I_I_I]], <4 x i32> [[COND61_I_I_I]]
-// CHECK-NEXT:    [[DOTNEG379_I_I_I:%.*]] = select <4 x i1> [[CMP67_I_I_I]], <4 x i32> splat (i32 -32), <4 x i32> zeroinitializer
-// CHECK-NEXT:    [[SUB83_I_I_I:%.*]] = add nsw <4 x i32> [[DOTNEG379_I_I_I]], [[SUB66_I_I_I]]
-// CHECK-NEXT:    [[CMP84_I_I_I:%.*]] = icmp ugt <4 x i32> [[SUB83_I_I_I]], splat (i32 31)
-// CHECK-NEXT:    [[COND87_I_I_I:%.*]] = select <4 x i1> [[CMP84_I_I_I]], <4 x i32> [[COND72_I_I_I]], <4 x i32> [[COND70_I_I_I]]
-// CHECK-NEXT:    [[COND89_I_I_I:%.*]] = select <4 x i1> [[CMP84_I_I_I]], <4 x i32> [[COND74_I_I_I]], <4 x i32> [[COND72_I_I_I]]
-// CHECK-NEXT:    [[COND91_I_I_I:%.*]] = select <4 x i1> [[CMP84_I_I_I]], <4 x i32> [[COND76_I_I_I]], <4 x i32> [[COND74_I_I_I]]
-// CHECK-NEXT:    [[COND93_I_I_I:%.*]] = select <4 x i1> [[CMP84_I_I_I]], <4 x i32> [[COND78_I_I_I]], <4 x i32> [[COND76_I_I_I]]
-// CHECK-NEXT:    [[COND95_I_I_I:%.*]] = select <4 x i1> [[CMP84_I_I_I]], <4 x i32> [[COND80_I_I_I]], <4 x i32> [[COND78_I_I_I]]
-// CHECK-NEXT:    [[DOTNEG380_I_I_I:%.*]] = select <4 x i1> [[CMP84_I_I_I]], <4 x i32> splat (i32 -32), <4 x i32> zeroinitializer
-// CHECK-NEXT:    [[SUB98_I_I_I:%.*]] = add nsw <4 x i32> [[DOTNEG380_I_I_I]], [[SUB83_I_I_I]]
-// CHECK-NEXT:    [[CMP99_I_I_I:%.*]] = icmp ugt <4 x i32> [[SUB98_I_I_I]], splat (i32 31)
-// CHECK-NEXT:    [[COND102_I_I_I:%.*]] = select <4 x i1> [[CMP99_I_I_I]], <4 x i32> [[COND89_I_I_I]], <4 x i32> [[COND87_I_I_I]]
-// CHECK-NEXT:    [[COND104_I_I_I:%.*]] = select <4 x i1> [[CMP99_I_I_I]], <4 x i32> [[COND91_I_I_I]], <4 x i32> [[COND89_I_I_I]]
-// CHECK-NEXT:    [[COND106_I_I_I:%.*]] = select <4 x i1> [[CMP99_I_I_I]], <4 x i32> [[COND93_I_I_I]], <4 x i32> [[COND91_I_I_I]]
-// CHECK-NEXT:    [[COND108_I_I_I:%.*]] = select <4 x i1> [[CMP99_I_I_I]], <4 x i32> [[COND95_I_I_I]], <4 x i32> [[COND93_I_I_I]]
-// CHECK-NEXT:    [[DOTNEG381_I_I_I:%.*]] = select <4 x i1> [[CMP99_I_I_I]], <4 x i32> splat (i32 -32), <4 x i32> zeroinitializer
-// CHECK-NEXT:    [[SUB111_I_I_I:%.*]] = sub nsw <4 x i32> zeroinitializer, [[SUB98_I_I_I]]
-// CHECK-NEXT:    [[CMP112_NOT_I_I_I:%.*]] = icmp eq <4 x i32> [[DOTNEG381_I_I_I]], [[SUB111_I_I_I]]
-// CHECK-NEXT:    [[SUB114_I_I_I:%.*]] = sub nsw <4 x i32> splat (i32 24), [[SHR_I_I_I]]
-// CHECK-NEXT:    [[SHL_MASK_I_I_I:%.*]] = and <4 x i32> [[SUB47_I_I_I]], splat (i32 31)
-// CHECK-NEXT:    [[SHL_I_I_I:%.*]] = shl <4 x i32> [[COND102_I_I_I]], [[SHL_MASK_I_I_I]]
-// CHECK-NEXT:    [[SHR_MASK_I_I_I:%.*]] = and <4 x i32> [[SUB114_I_I_I]], splat (i32 31)
-// CHECK-NEXT:    [[SHR116_I_I_I:%.*]] = lshr <4 x i32> [[COND104_I_I_I]], [[SHR_MASK_I_I_I]]
-// CHECK-NEXT:    [[OR117_I_I_I:%.*]] = or <4 x i32> [[SHL_I_I_I]], [[SHR116_I_I_I]]
-// CHECK-NEXT:    [[SHL120_I_I_I:%.*]] = shl <4 x i32> [[COND104_I_I_I]], [[SHL_MASK_I_I_I]]
-// CHECK-NEXT:    [[SHR122_I_I_I:%.*]] = lshr <4 x i32> [[COND106_I_I_I]], [[SHR_MASK_I_I_I]]
-// CHECK-NEXT:    [[OR123_I_I_I:%.*]] = or <4 x i32> [[SHL120_I_I_I]], [[SHR122_I_I_I]]
-// CHECK-NEXT:    [[SHL126_I_I_I:%.*]] = shl <4 x i32> [[COND106_I_I_I]], [[SHL_MASK_I_I_I]]
-// CHECK-NEXT:    [[SHR128_I_I_I:%.*]] = lshr <4 x i32> [[COND108_I_I_I]], [[SHR_MASK_I_I_I]]
-// CHECK-NEXT:    [[OR129_I_I_I:%.*]] = or <4 x i32> [[SHL126_I_I_I]], [[SHR128_I_I_I]]
-// CHECK-NEXT:    [[COND131_I_I_I:%.*]] = select <4 x i1> [[CMP112_NOT_I_I_I]], <4 x i32> [[COND102_I_I_I]], <4 x i32> [[OR117_I_I_I]]
-// CHECK-NEXT:    [[COND133_I_I_I:%.*]] = select <4 x i1> [[CMP112_NOT_I_I_I]], <4 x i32> [[COND104_I_I_I]], <4 x i32> [[OR123_I_I_I]]
-// CHECK-NEXT:    [[COND135_I_I_I:%.*]] = select <4 x i1> [[CMP112_NOT_I_I_I]], <4 x i32> [[COND106_I_I_I]], <4 x i32> [[OR129_I_I_I]]
-// CHECK-NEXT:    [[SHR136_I_I_I:%.*]] = lshr <4 x i32> [[COND131_I_I_I]], splat (i32 29)
-// CHECK-NEXT:    [[OR139_I_I_I:%.*]] = tail call <4 x i32> @llvm.fshl.v4i32(<4 x i32> [[COND131_I_I_I]], <4 x i32> [[COND133_I_I_I]], <4 x i32> splat (i32 2))
-// CHECK-NEXT:    [[OR142_I_I_I:%.*]] = tail call <4 x i32> @llvm.fshl.v4i32(<4 x i32> [[COND133_I_I_I]], <4 x i32> [[COND135_I_I_I]], <4 x i32> splat (i32 2))
-// CHECK-NEXT:    [[OR145_I_I_I:%.*]] = tail call <4 x i32> @llvm.fshl.v4i32(<4 x i32> [[COND135_I_I_I]], <4 x i32> [[COND108_I_I_I]], <4 x i32> splat (i32 2))
-// CHECK-NEXT:    [[AND146_I_I_I:%.*]] = and <4 x i32> [[SHR136_I_I_I]], splat (i32 1)
-// CHECK-NEXT:    [[SEXT148_I_I_I:%.*]] = sub nsw <4 x i32> zeroinitializer, [[AND146_I_I_I]]
-// CHECK-NEXT:    [[TMP5:%.*]] = and <4 x i32> [[SEXT148_I_I_I]], splat (i32 -2147483648)
-// CHECK-NEXT:    [[XOR_I_I_I:%.*]] = xor <4 x i32> [[OR139_I_I_I]], [[SEXT148_I_I_I]]
-// CHECK-NEXT:    [[XOR156_I_I_I:%.*]] = xor <4 x i32> [[OR142_I_I_I]], [[SEXT148_I_I_I]]
-// CHECK-NEXT:    [[XOR157_I_I_I:%.*]] = xor <4 x i32> [[OR145_I_I_I]], [[SEXT148_I_I_I]]
-// CHECK-NEXT:    [[TMP6:%.*]] = extractelement <4 x i32> [[XOR_I_I_I]], i64 0
-// CHECK-NEXT:    [[TMP7:%.*]] = tail call range(i32 0, 33) i32 @llvm.ctlz.i32(i32 [[TMP6]], i1 false)
-// CHECK-NEXT:    [[VECINIT_I1_I_I:%.*]] = insertelement <4 x i32> poison, i32 [[TMP7]], i64 0
-// CHECK-NEXT:    [[TMP8:%.*]] = extractelement <4 x i32> [[XOR_I_I_I]], i64 1
-// CHECK-NEXT:    [[TMP9:%.*]] = tail call range(i32 0, 33) i32 @llvm.ctlz.i32(i32 [[TMP8]], i1 false)
-// CHECK-NEXT:    [[VECINIT2_I2_I_I:%.*]] = insertelement <4 x i32> [[VECINIT_I1_I_I]], i32 [[TMP9]], i64 1
-// CHECK-NEXT:    [[TMP10:%.*]] = extractelement <4 x i32> [[XOR_I_I_I]], i64 2
-// CHECK-NEXT:    [[TMP11:%.*]] = tail call range(i32 0, 33) i32 @llvm.ctlz.i32(i32 [[TMP10]], i1 false)
-// CHECK-NEXT:    [[VECINIT4_I3_I_I:%.*]] = insertelement <4 x i32> [[VECINIT2_I2_I_I]], i32 [[TMP11]], i64 2
-// CHECK-NEXT:    [[TMP12:%.*]] = extractelement <4 x i32> [[XOR_I_I_I]], i64 3
-// CHECK-NEXT:    [[TMP13:%.*]] = tail call range(i32 0, 33) i32 @llvm.ctlz.i32(i32 [[TMP12]], i1 false)
-// CHECK-NEXT:    [[VECINIT6_I4_I_I:%.*]] = insertelement <4 x i32> [[VECINIT4_I3_I_I]], i32 [[TMP13]], i64 3
-// CHECK-NEXT:    [[ADD159_I_I_I:%.*]] = add nuw nsw <4 x i32> [[VECINIT6_I4_I_I]], splat (i32 1)
-// CHECK-NEXT:    [[SHL_MASK162_I_I_I:%.*]] = and <4 x i32> [[ADD159_I_I_I]], splat (i32 31)
-// CHECK-NEXT:    [[SHL163_I_I_I:%.*]] = shl <4 x i32> [[XOR_I_I_I]], [[SHL_MASK162_I_I_I]]
-// CHECK-NEXT:    [[TMP14:%.*]] = and <4 x i32> [[VECINIT6_I4_I_I]], splat (i32 31)
-// CHECK-NEXT:    [[SHR_MASK164_I_I_I:%.*]] = xor <4 x i32> [[TMP14]], splat (i32 31)
-// CHECK-NEXT:    [[SHR165_I_I_I:%.*]] = lshr <4 x i32> [[XOR156_I_I_I]], [[SHR_MASK164_I_I_I]]
-// CHECK-NEXT:    [[OR166_I_I_I:%.*]] = or <4 x i32> [[SHL163_I_I_I]], [[SHR165_I_I_I]]
-// CHECK-NEXT:    [[SHL169_I_I_I:%.*]] = shl <4 x i32> [[XOR156_I_I_I]], [[SHL_MASK162_I_I_I]]
-// CHECK-NEXT:    [[SHR171_I_I_I:%.*]] = lshr <4 x i32> [[XOR157_I_I_I]], [[SHR_MASK164_I_I_I]]
-// CHECK-NEXT:    [[OR172_I_I_I:%.*]] = or <4 x i32> [[SHL169_I_I_I]], [[SHR171_I_I_I]]
-// CHECK-NEXT:    [[SHR176_I_I_I:%.*]] = lshr <4 x i32> [[OR166_I_I_I]], splat (i32 9)
-// CHECK-NEXT:    [[TMP15:%.*]] = shl nuw nsw <4 x i32> [[VECINIT6_I4_I_I]], splat (i32 23)
-// CHECK-NEXT:    [[REASS_SUB:%.*]] = sub nsw <4 x i32> [[SHR176_I_I_I]], [[TMP15]]
-// CHECK-NEXT:    [[TMP16:%.*]] = add <4 x i32> [[REASS_SUB]], splat (i32 1056964608)
+// CHECK-SAME: ptr addrspace(1) noundef align 16 captures(none) [[F:%.*]])
+// local_unnamed_addr #[[ATTR0:[0-9]+]] !kernel_arg_addr_space [[META6:![0-9]+]]
+// !kernel_arg_access_qual [[META7:![0-9]+]] !kernel_arg_type [[META8:![0-9]+]]
+// !kernel_arg_base_type [[META9:![0-9]+]] !kernel_arg_type_qual
+// [[META10:![0-9]+]] { CHECK-NEXT:  [[ENTRY:.*:]] CHECK-NEXT:    [[TMP0:%.*]] =
+// load <4 x float>, ptr addrspace(1) [[F]], align 16, !tbaa [[TBAA11:![0-9]+]]
+// CHECK-NEXT:    [[ELT_ABS_I_I_I:%.*]] = tail call <4 x float>
+// @llvm.fabs.v4f32(<4 x float> [[TMP0]]) CHECK-NEXT:    [[CMP_I_I:%.*]] = fcmp
+// olt <4 x float> [[ELT_ABS_I_I_I]], splat (float 0x4160000000000000)
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call noundef <4 x float>
+// @llvm.fmuladd.v4f32(<4 x float> [[ELT_ABS_I_I_I]], <4 x float> splat (float
+// 0x3FE45F3060000000), <4 x float> splat (float 5.000000e-01)) CHECK-NEXT:
+// [[ELT_TRUNC_I_I:%.*]] = tail call noundef <4 x float> @llvm.trunc.v4f32(<4 x
+// float> [[TMP1]]) CHECK-NEXT:    [[MUL_I30_I_I_I_I:%.*]] = fmul <4 x float>
+// [[ELT_TRUNC_I_I]], splat (float 0x3FF921FB40000000) CHECK-NEXT:
+// [[FNEG_I31_I_I_I_I:%.*]] = fneg <4 x float> [[MUL_I30_I_I_I_I]] CHECK-NEXT:
+// [[TMP2:%.*]] = tail call noundef <4 x float> @llvm.fma.v4f32(<4 x float>
+// [[ELT_TRUNC_I_I]], <4 x float> splat (float 0x3FF921FB40000000), <4 x float>
+// [[FNEG_I31_I_I_I_I]]) CHECK-NEXT:    [[SUB_I_I_I_I:%.*]] = fsub <4 x float>
+// [[ELT_ABS_I_I_I]], [[MUL_I30_I_I_I_I]] CHECK-NEXT:    [[SUB2_I_I_I_I:%.*]] =
+// fsub <4 x float> [[ELT_ABS_I_I_I]], [[SUB_I_I_I_I]] CHECK-NEXT:
+// [[SUB3_I_I_I_I:%.*]] = fsub <4 x float> [[SUB2_I_I_I_I]], [[MUL_I30_I_I_I_I]]
+// CHECK-NEXT:    [[SUB4_I_I_I_I:%.*]] = fsub <4 x float> [[SUB3_I_I_I_I]],
+// [[TMP2]] CHECK-NEXT:    [[ADD_I_I_I_I:%.*]] = fadd <4 x float>
+// [[SUB_I_I_I_I]], [[SUB4_I_I_I_I]] CHECK-NEXT:    [[MUL_I27_I_I_I_I:%.*]] =
+// fmul <4 x float> [[ELT_TRUNC_I_I]], splat (float 0x3E74442D00000000)
+// CHECK-NEXT:    [[FNEG_I28_I_I_I_I:%.*]] = fneg <4 x float>
+// [[MUL_I27_I_I_I_I]] CHECK-NEXT:    [[TMP3:%.*]] = tail call noundef <4 x
+// float> @llvm.fma.v4f32(<4 x float> [[ELT_TRUNC_I_I]], <4 x float> splat
+// (float 0x3E74442D00000000), <4 x float> [[FNEG_I28_I_I_I_I]]) CHECK-NEXT:
+// [[SUB5_I_I_I_I:%.*]] = fsub <4 x float> [[ADD_I_I_I_I]], [[MUL_I27_I_I_I_I]]
+// CHECK-NEXT:    [[SUB6_I_I_I_I:%.*]] = fsub <4 x float> [[ADD_I_I_I_I]],
+// [[SUB5_I_I_I_I]] CHECK-NEXT:    [[SUB7_I_I_I_I:%.*]] = fsub <4 x float>
+// [[SUB6_I_I_I_I]], [[MUL_I27_I_I_I_I]] CHECK-NEXT:    [[SUB8_I_I_I_I:%.*]] =
+// fsub <4 x float> [[SUB7_I_I_I_I]], [[TMP3]] CHECK-NEXT: [[ADD9_I_I_I_I:%.*]]
+// = fadd <4 x float> [[SUB5_I_I_I_I]], [[SUB8_I_I_I_I]] CHECK-NEXT:
+// [[MUL_I_I_I_I_I:%.*]] = fmul <4 x float> [[ELT_TRUNC_I_I]], splat (float
+// 0x3CF8469880000000) CHECK-NEXT:    [[FNEG_I_I_I_I_I:%.*]] = fneg <4 x float>
+// [[MUL_I_I_I_I_I]] CHECK-NEXT:    [[TMP4:%.*]] = tail call noundef <4 x float>
+// @llvm.fma.v4f32(<4 x float> [[ELT_TRUNC_I_I]], <4 x float> splat (float
+// 0x3CF8469880000000), <4 x float> [[FNEG_I_I_I_I_I]]) CHECK-NEXT:
+// [[SUB10_I_I_I_I:%.*]] = fsub <4 x float> [[ADD9_I_I_I_I]], [[MUL_I_I_I_I_I]]
+// CHECK-NEXT:    [[SUB11_I_I_I_I:%.*]] = fsub <4 x float> [[ADD9_I_I_I_I]],
+// [[SUB10_I_I_I_I]] CHECK-NEXT:    [[SUB12_I_I_I_I:%.*]] = fsub <4 x float>
+// [[SUB11_I_I_I_I]], [[MUL_I_I_I_I_I]] CHECK-NEXT:    [[ADD13_I_I_I_I:%.*]] =
+// fadd <4 x float> [[SUB10_I_I_I_I]], [[SUB12_I_I_I_I]] CHECK-NEXT:
+// [[FNEG_I_I_I_I:%.*]] = fneg <4 x float> [[TMP4]] CHECK-NEXT:
+// [[CONV_I_I_I:%.*]] = fptosi <4 x float> [[ELT_TRUNC_I_I]] to <4 x i32>
+// CHECK-NEXT:    [[ASTYPE_I_I_I:%.*]] = bitcast <4 x float> [[ELT_ABS_I_I_I]]
+// to <4 x i32> CHECK-NEXT:    [[SHR_I_I_I:%.*]] = lshr <4 x i32>
+// [[ASTYPE_I_I_I]], splat (i32 23) CHECK-NEXT:    [[AND_I11_I_I:%.*]] = and <4
+// x i32> [[ASTYPE_I_I_I]], splat (i32 8388607) CHECK-NEXT:    [[OR_I_I_I:%.*]]
+// = or disjoint <4 x i32> [[AND_I11_I_I]], splat (i32 8388608) CHECK-NEXT:
+// [[MUL_I_I_I:%.*]] = mul <4 x i32> [[OR_I_I_I]], splat (i32 -28220501)
+// CHECK-NEXT:    [[CONV_I1_I27_I_I:%.*]] = zext nneg <4 x i32> [[OR_I_I_I]] to
+// <4 x i64> CHECK-NEXT:    [[MUL_I28_I_I:%.*]] = mul nuw nsw <4 x i64>
+// [[CONV_I1_I27_I_I]], splat (i64 4266746795) CHECK-NEXT: [[SHR_I29_I_I:%.*]] =
+// lshr <4 x i64> [[MUL_I28_I_I]], splat (i64 32) CHECK-NEXT:
+// [[CONV_I2_I30_I_I:%.*]] = trunc nuw nsw <4 x i64> [[SHR_I29_I_I]] to <4 x
+// i32> CHECK-NEXT:    [[MUL2_I_I_I:%.*]] = mul <4 x i32> [[OR_I_I_I]], splat
+// (i32 1011060801) CHECK-NEXT:    [[ADD_I_I_I:%.*]] = add <4 x i32>
+// [[MUL2_I_I_I]], [[CONV_I2_I30_I_I]] CHECK-NEXT:    [[MUL_I24_I_I:%.*]] = mul
+// nuw nsw <4 x i64> [[CONV_I1_I27_I_I]], splat (i64 1011060801) CHECK-NEXT:
+// [[SHR_I25_I_I:%.*]] = lshr <4 x i64> [[MUL_I24_I_I]], splat (i64 32)
+// CHECK-NEXT:    [[CONV_I2_I26_I_I:%.*]] = trunc nuw nsw <4 x i64>
+// [[SHR_I25_I_I]] to <4 x i32> CHECK-NEXT:    [[CMP_I_I_I:%.*]] = icmp ult <4 x
+// i32> [[ADD_I_I_I]], [[CONV_I2_I30_I_I]] CHECK-NEXT:    [[SEXT_I_I1_I:%.*]] =
+// zext <4 x i1> [[CMP_I_I_I]] to <4 x i32> CHECK-NEXT:    [[ADD5_I_I_I:%.*]] =
+// add nuw nsw <4 x i32> [[SEXT_I_I1_I]], [[CONV_I2_I26_I_I]] CHECK-NEXT:
+// [[MUL6_I_I_I:%.*]] = mul <4 x i32> [[OR_I_I_I]], splat (i32 -614296167)
+// CHECK-NEXT:    [[ADD7_I_I_I:%.*]] = add <4 x i32> [[ADD5_I_I_I]],
+// [[MUL6_I_I_I]] CHECK-NEXT:    [[MUL_I20_I_I:%.*]] = mul nuw nsw <4 x i64>
+// [[CONV_I1_I27_I_I]], splat (i64 3680671129) CHECK-NEXT: [[SHR_I21_I_I:%.*]] =
+// lshr <4 x i64> [[MUL_I20_I_I]], splat (i64 32) CHECK-NEXT:
+// [[CONV_I2_I22_I_I:%.*]] = trunc nuw nsw <4 x i64> [[SHR_I21_I_I]] to <4 x
+// i32> CHECK-NEXT:    [[CMP9_I_I_I:%.*]] = icmp ult <4 x i32> [[ADD7_I_I_I]],
+// [[ADD5_I_I_I]] CHECK-NEXT:    [[SEXT10_I_I_I:%.*]] = zext <4 x i1>
+// [[CMP9_I_I_I]] to <4 x i32> CHECK-NEXT:    [[ADD13_I_I_I:%.*]] = add nuw nsw
+// <4 x i32> [[SEXT10_I_I_I]], [[CONV_I2_I22_I_I]] CHECK-NEXT:
+// [[MUL14_I_I_I:%.*]] = mul <4 x i32> [[OR_I_I_I]], splat (i32 -181084736)
+// CHECK-NEXT:    [[ADD15_I_I_I:%.*]] = add <4 x i32> [[ADD13_I_I_I]],
+// [[MUL14_I_I_I]] CHECK-NEXT:    [[MUL_I16_I_I:%.*]] = mul nuw nsw <4 x i64>
+// [[CONV_I1_I27_I_I]], splat (i64 4113882560) CHECK-NEXT: [[SHR_I17_I_I:%.*]] =
+// lshr <4 x i64> [[MUL_I16_I_I]], splat (i64 32) CHECK-NEXT:
+// [[CONV_I2_I18_I_I:%.*]] = trunc nuw nsw <4 x i64> [[SHR_I17_I_I]] to <4 x
+// i32> CHECK-NEXT:    [[CMP17_I_I_I:%.*]] = icmp ult <4 x i32> [[ADD15_I_I_I]],
+// [[ADD13_I_I_I]] CHECK-NEXT:    [[SEXT18_I_I_I:%.*]] = zext <4 x i1>
+// [[CMP17_I_I_I]] to <4 x i32> CHECK-NEXT:    [[ADD21_I_I_I:%.*]] = add nuw nsw
+// <4 x i32> [[SEXT18_I_I_I]], [[CONV_I2_I18_I_I]] CHECK-NEXT:
+// [[MUL22_I_I_I:%.*]] = mul <4 x i32> [[OR_I_I_I]], splat (i32 -64530479)
+// CHECK-NEXT:    [[ADD23_I_I_I:%.*]] = add <4 x i32> [[ADD21_I_I_I]],
+// [[MUL22_I_I_I]] CHECK-NEXT:    [[MUL_I12_I_I:%.*]] = mul nuw nsw <4 x i64>
+// [[CONV_I1_I27_I_I]], splat (i64 4230436817) CHECK-NEXT: [[SHR_I13_I_I:%.*]] =
+// lshr <4 x i64> [[MUL_I12_I_I]], splat (i64 32) CHECK-NEXT:
+// [[CONV_I2_I14_I_I:%.*]] = trunc nuw nsw <4 x i64> [[SHR_I13_I_I]] to <4 x
+// i32> CHECK-NEXT:    [[CMP25_I_I_I:%.*]] = icmp ult <4 x i32> [[ADD23_I_I_I]],
+// [[ADD21_I_I_I]] CHECK-NEXT:    [[SEXT26_I_I_I:%.*]] = zext <4 x i1>
+// [[CMP25_I_I_I]] to <4 x i32> CHECK-NEXT:    [[ADD29_I_I_I:%.*]] = add nuw nsw
+// <4 x i32> [[SEXT26_I_I_I]], [[CONV_I2_I14_I_I]] CHECK-NEXT:
+// [[MUL30_I_I_I:%.*]] = mul <4 x i32> [[OR_I_I_I]], splat (i32 1313084713)
+// CHECK-NEXT:    [[ADD31_I_I_I:%.*]] = add <4 x i32> [[ADD29_I_I_I]],
+// [[MUL30_I_I_I]] CHECK-NEXT:    [[MUL_I8_I_I:%.*]] = mul nuw nsw <4 x i64>
+// [[CONV_I1_I27_I_I]], splat (i64 1313084713) CHECK-NEXT:    [[SHR_I9_I_I:%.*]]
+// = lshr <4 x i64> [[MUL_I8_I_I]], splat (i64 32) CHECK-NEXT:
+// [[CONV_I2_I10_I_I:%.*]] = trunc nuw nsw <4 x i64> [[SHR_I9_I_I]] to <4 x i32>
+// CHECK-NEXT:    [[CMP33_I_I_I:%.*]] = icmp ult <4 x i32> [[ADD31_I_I_I]],
+// [[ADD29_I_I_I]] CHECK-NEXT:    [[SEXT34_I_I_I:%.*]] = zext <4 x i1>
+// [[CMP33_I_I_I]] to <4 x i32> CHECK-NEXT:    [[ADD37_I_I_I:%.*]] = add nuw nsw
+// <4 x i32> [[SEXT34_I_I_I]], [[CONV_I2_I10_I_I]] CHECK-NEXT:
+// [[MUL38_I_I_I:%.*]] = mul <4 x i32> [[OR_I_I_I]], splat (i32 -1560706194)
+// CHECK-NEXT:    [[ADD39_I_I_I:%.*]] = add <4 x i32> [[ADD37_I_I_I]],
+// [[MUL38_I_I_I]] CHECK-NEXT:    [[MUL_I5_I_I:%.*]] = mul nuw nsw <4 x i64>
+// [[CONV_I1_I27_I_I]], splat (i64 2734261102) CHECK-NEXT:    [[SHR_I6_I_I:%.*]]
+// = lshr <4 x i64> [[MUL_I5_I_I]], splat (i64 32) CHECK-NEXT:
+// [[CONV_I2_I_I_I:%.*]] = trunc nuw nsw <4 x i64> [[SHR_I6_I_I]] to <4 x i32>
+// CHECK-NEXT:    [[CMP41_I_I_I:%.*]] = icmp ult <4 x i32> [[ADD39_I_I_I]],
+// [[ADD37_I_I_I]] CHECK-NEXT:    [[SEXT42_I_I_I:%.*]] = zext <4 x i1>
+// [[CMP41_I_I_I]] to <4 x i32> CHECK-NEXT:    [[ADD45_I_I_I:%.*]] = add nuw nsw
+// <4 x i32> [[SEXT42_I_I_I]], [[CONV_I2_I_I_I]] CHECK-NEXT: [[SUB47_I_I_I:%.*]]
+// = add nsw <4 x i32> [[SHR_I_I_I]], splat (i32 -120) CHECK-NEXT:
+// [[CMP48_I_I_I:%.*]] = icmp ugt <4 x i32> [[SUB47_I_I_I]], splat (i32 31)
+// CHECK-NEXT:    [[COND51_I_I_I:%.*]] = select <4 x i1> [[CMP48_I_I_I]], <4 x
+// i32> [[ADD39_I_I_I]], <4 x i32> [[ADD45_I_I_I]] CHECK-NEXT:
+// [[COND53_I_I_I:%.*]] = select <4 x i1> [[CMP48_I_I_I]], <4 x i32>
+// [[ADD31_I_I_I]], <4 x i32> [[ADD39_I_I_I]] CHECK-NEXT: [[COND55_I_I_I:%.*]] =
+// select <4 x i1> [[CMP48_I_I_I]], <4 x i32> [[ADD23_I_I_I]], <4 x i32>
+// [[ADD31_I_I_I]] CHECK-NEXT:    [[COND57_I_I_I:%.*]] = select <4 x i1>
+// [[CMP48_I_I_I]], <4 x i32> [[ADD15_I_I_I]], <4 x i32> [[ADD23_I_I_I]]
+// CHECK-NEXT:    [[COND59_I_I_I:%.*]] = select <4 x i1> [[CMP48_I_I_I]], <4 x
+// i32> [[ADD7_I_I_I]], <4 x i32> [[ADD15_I_I_I]] CHECK-NEXT:
+// [[COND61_I_I_I:%.*]] = select <4 x i1> [[CMP48_I_I_I]], <4 x i32>
+// [[ADD_I_I_I]], <4 x i32> [[ADD7_I_I_I]] CHECK-NEXT:    [[COND63_I_I_I:%.*]] =
+// select <4 x i1> [[CMP48_I_I_I]], <4 x i32> [[MUL_I_I_I]], <4 x i32>
+// [[ADD_I_I_I]] CHECK-NEXT:    [[DOTNEG_I_I_I:%.*]] = select <4 x i1>
+// [[CMP48_I_I_I]], <4 x i32> splat (i32 -32), <4 x i32> zeroinitializer
+// CHECK-NEXT:    [[SUB66_I_I_I:%.*]] = add nsw <4 x i32> [[DOTNEG_I_I_I]],
+// [[SUB47_I_I_I]] CHECK-NEXT:    [[CMP67_I_I_I:%.*]] = icmp ugt <4 x i32>
+// [[SUB66_I_I_I]], splat (i32 31) CHECK-NEXT:    [[COND70_I_I_I:%.*]] = select
+// <4 x i1> [[CMP67_I_I_I]], <4 x i32> [[COND53_I_I_I]], <4 x i32>
+// [[COND51_I_I_I]] CHECK-NEXT:    [[COND72_I_I_I:%.*]] = select <4 x i1>
+// [[CMP67_I_I_I]], <4 x i32> [[COND55_I_I_I]], <4 x i32> [[COND53_I_I_I]]
+// CHECK-NEXT:    [[COND74_I_I_I:%.*]] = select <4 x i1> [[CMP67_I_I_I]], <4 x
+// i32> [[COND57_I_I_I]], <4 x i32> [[COND55_I_I_I]] CHECK-NEXT:
+// [[COND76_I_I_I:%.*]] = select <4 x i1> [[CMP67_I_I_I]], <4 x i32>
+// [[COND59_I_I_I]], <4 x i32> [[COND57_I_I_I]] CHECK-NEXT: [[COND78_I_I_I:%.*]]
+// = select <4 x i1> [[CMP67_I_I_I]], <4 x i32> [[COND61_I_I_I]], <4 x i32>
+// [[COND59_I_I_I]] CHECK-NEXT:    [[COND80_I_I_I:%.*]] = select <4 x i1>
+// [[CMP67_I_I_I]], <4 x i32> [[COND63_I_I_I]], <4 x i32> [[COND61_I_I_I]]
+// CHECK-NEXT:    [[DOTNEG379_I_I_I:%.*]] = select <4 x i1> [[CMP67_I_I_I]], <4
+// x i32> splat (i32 -32), <4 x i32> zeroinitializer CHECK-NEXT:
+// [[SUB83_I_I_I:%.*]] = add nsw <4 x i32> [[DOTNEG379_I_I_I]], [[SUB66_I_I_I]]
+// CHECK-NEXT:    [[CMP84_I_I_I:%.*]] = icmp ugt <4 x i32> [[SUB83_I_I_I]],
+// splat (i32 31) CHECK-NEXT:    [[COND87_I_I_I:%.*]] = select <4 x i1>
+// [[CMP84_I_I_I]], <4 x i32> [[COND72_I_I_I]], <4 x i32> [[COND70_I_I_I]]
+// CHECK-NEXT:    [[COND89_I_I_I:%.*]] = select <4 x i1> [[CMP84_I_I_I]], <4 x
+// i32> [[COND74_I_I_I]], <4 x i32> [[COND72_I_I_I]] CHECK-NEXT:
+// [[COND91_I_I_I:%.*]] = select <4 x i1> [[CMP84_I_I_I]], <4 x i32>
+// [[COND76_I_I_I]], <4 x i32> [[COND74_I_I_I]] CHECK-NEXT: [[COND93_I_I_I:%.*]]
+// = select <4 x i1> [[CMP84_I_I_I]], <4 x i32> [[COND78_I_I_I]], <4 x i32>
+// [[COND76_I_I_I]] CHECK-NEXT:    [[COND95_I_I_I:%.*]] = select <4 x i1>
+// [[CMP84_I_I_I]], <4 x i32> [[COND80_I_I_I]], <4 x i32> [[COND78_I_I_I]]
+// CHECK-NEXT:    [[DOTNEG380_I_I_I:%.*]] = select <4 x i1> [[CMP84_I_I_I]], <4
+// x i32> splat (i32 -32), <4 x i32> zeroinitializer CHECK-NEXT:
+// [[SUB98_I_I_I:%.*]] = add nsw <4 x i32> [[DOTNEG380_I_I_I]], [[SUB83_I_I_I]]
+// CHECK-NEXT:    [[CMP99_I_I_I:%.*]] = icmp ugt <4 x i32> [[SUB98_I_I_I]],
+// splat (i32 31) CHECK-NEXT:    [[COND102_I_I_I:%.*]] = select <4 x i1>
+// [[CMP99_I_I_I]], <4 x i32> [[COND89_I_I_I]], <4 x i32> [[COND87_I_I_I]]
+// CHECK-NEXT:    [[COND104_I_I_I:%.*]] = select <4 x i1> [[CMP99_I_I_I]], <4 x
+// i32> [[COND91_I_I_I]], <4 x i32> [[COND89_I_I_I]] CHECK-NEXT:
+// [[COND106_I_I_I:%.*]] = select <4 x i1> [[CMP99_I_I_I]], <4 x i32>
+// [[COND93_I_I_I]], <4 x i32> [[COND91_I_I_I]] CHECK-NEXT:
+// [[COND108_I_I_I:%.*]] = select <4 x i1> [[CMP99_I_I_I]], <4 x i32>
+// [[COND95_I_I_I]], <4 x i32> [[COND93_I_I_I]] CHECK-NEXT:
+// [[DOTNEG381_I_I_I:%.*]] = select <4 x i1> [[CMP99_I_I_I]], <4 x i32> splat
+// (i32 -32), <4 x i32> zeroinitializer CHECK-NEXT:    [[SUB111_I_I_I:%.*]] =
+// sub nsw <4 x i32> zeroinitializer, [[SUB98_I_I_I]] CHECK-NEXT:
+// [[CMP112_NOT_I_I_I:%.*]] = icmp eq <4 x i32> [[DOTNEG381_I_I_I]],
+// [[SUB111_I_I_I]] CHECK-NEXT:    [[SUB114_I_I_I:%.*]] = sub nsw <4 x i32>
+// splat (i32 24), [[SHR_I_I_I]] CHECK-NEXT:    [[SHL_MASK_I_I_I:%.*]] = and <4
+// x i32> [[SUB47_I_I_I]], splat (i32 31) CHECK-NEXT:    [[SHL_I_I_I:%.*]] = shl
+// <4 x i32> [[COND102_I_I_I]], [[SHL_MASK_I_I_I]] CHECK-NEXT:
+// [[SHR_MASK_I_I_I:%.*]] = and <4 x i32> [[SUB114_I_I_I]], splat (i32 31)
+// CHECK-NEXT:    [[SHR116_I_I_I:%.*]] = lshr <4 x i32> [[COND104_I_I_I]],
+// [[SHR_MASK_I_I_I]] CHECK-NEXT:    [[OR117_I_I_I:%.*]] = or <4 x i32>
+// [[SHL_I_I_I]], [[SHR116_I_I_I]] CHECK-NEXT:    [[SHL120_I_I_I:%.*]] = shl <4
+// x i32> [[COND104_I_I_I]], [[SHL_MASK_I_I_I]] CHECK-NEXT: [[SHR122_I_I_I:%.*]]
+// = lshr <4 x i32> [[COND106_I_I_I]], [[SHR_MASK_I_I_I]] CHECK-NEXT:
+// [[OR123_I_I_I:%.*]] = or <4 x i32> [[SHL120_I_I_I]], [[SHR122_I_I_I]]
+// CHECK-NEXT:    [[SHL126_I_I_I:%.*]] = shl <4 x i32> [[COND106_I_I_I]],
+// [[SHL_MASK_I_I_I]] CHECK-NEXT:    [[SHR128_I_I_I:%.*]] = lshr <4 x i32>
+// [[COND108_I_I_I]], [[SHR_MASK_I_I_I]] CHECK-NEXT:    [[OR129_I_I_I:%.*]] = or
+// <4 x i32> [[SHL126_I_I_I]], [[SHR128_I_I_I]] CHECK-NEXT:
+// [[COND131_I_I_I:%.*]] = select <4 x i1> [[CMP112_NOT_I_I_I]], <4 x i32>
+// [[COND102_I_I_I]], <4 x i32> [[OR117_I_I_I]] CHECK-NEXT:
+// [[COND133_I_I_I:%.*]] = select <4 x i1> [[CMP112_NOT_I_I_I]], <4 x i32>
+// [[COND104_I_I_I]], <4 x i32> [[OR123_I_I_I]] CHECK-NEXT:
+// [[COND135_I_I_I:%.*]] = select <4 x i1> [[CMP112_NOT_I_I_I]], <4 x i32>
+// [[COND106_I_I_I]], <4 x i32> [[OR129_I_I_I]] CHECK-NEXT: [[SHR136_I_I_I:%.*]]
+// = lshr <4 x i32> [[COND131_I_I_I]], splat (i32 29) CHECK-NEXT:
+// [[OR139_I_I_I:%.*]] = tail call <4 x i32> @llvm.fshl.v4i32(<4 x i32>
+// [[COND131_I_I_I]], <4 x i32> [[COND133_I_I_I]], <4 x i32> splat (i32 2))
+// CHECK-NEXT:    [[OR142_I_I_I:%.*]] = tail call <4 x i32> @llvm.fshl.v4i32(<4
+// x i32> [[COND133_I_I_I]], <4 x i32> [[COND135_I_I_I]], <4 x i32> splat (i32
+// 2)) CHECK-NEXT:    [[OR145_I_I_I:%.*]] = tail call <4 x i32>
+// @llvm.fshl.v4i32(<4 x i32> [[COND135_I_I_I]], <4 x i32> [[COND108_I_I_I]], <4
+// x i32> splat (i32 2)) CHECK-NEXT:    [[AND146_I_I_I:%.*]] = and <4 x i32>
+// [[SHR136_I_I_I]], splat (i32 1) CHECK-NEXT:    [[SEXT148_I_I_I:%.*]] = sub
+// nsw <4 x i32> zeroinitializer, [[AND146_I_I_I]] CHECK-NEXT:    [[TMP5:%.*]] =
+// and <4 x i32> [[SEXT148_I_I_I]], splat (i32 -2147483648) CHECK-NEXT:
+// [[XOR_I_I_I:%.*]] = xor <4 x i32> [[OR139_I_I_I]], [[SEXT148_I_I_I]]
+// CHECK-NEXT:    [[XOR156_I_I_I:%.*]] = xor <4 x i32> [[OR142_I_I_I]],
+// [[SEXT148_I_I_I]] CHECK-NEXT:    [[XOR157_I_I_I:%.*]] = xor <4 x i32>
+// [[OR145_I_I_I]], [[SEXT148_I_I_I]] CHECK-NEXT:    [[TMP6:%.*]] =
+// extractelement <4 x i32> [[XOR_I_I_I]], i64 0 CHECK-NEXT:    [[TMP7:%.*]] =
+// tail call range(i32 0, 33) i32 @llvm.ctlz.i32(i32 [[TMP6]], i1 false)
+// CHECK-NEXT:    [[VECINIT_I1_I_I:%.*]] = insertelement <4 x i32> poison, i32
+// [[TMP7]], i64 0 CHECK-NEXT:    [[TMP8:%.*]] = extractelement <4 x i32>
+// [[XOR_I_I_I]], i64 1 CHECK-NEXT:    [[TMP9:%.*]] = tail call range(i32 0, 33)
+// i32 @llvm.ctlz.i32(i32 [[TMP8]], i1 false) CHECK-NEXT:
+// [[VECINIT2_I2_I_I:%.*]] = insertelement <4 x i32> [[VECINIT_I1_I_I]], i32
+// [[TMP9]], i64 1 CHECK-NEXT:    [[TMP10:%.*]] = extractelement <4 x i32>
+// [[XOR_I_I_I]], i64 2 CHECK-NEXT:    [[TMP11:%.*]] = tail call range(i32 0,
+// 33) i32 @llvm.ctlz.i32(i32 [[TMP10]], i1 false) CHECK-NEXT:
+// [[VECINIT4_I3_I_I:%.*]] = insertelement <4 x i32> [[VECINIT2_I2_I_I]], i32
+// [[TMP11]], i64 2 CHECK-NEXT:    [[TMP12:%.*]] = extractelement <4 x i32>
+// [[XOR_I_I_I]], i64 3 CHECK-NEXT:    [[TMP13:%.*]] = tail call range(i32 0,
+// 33) i32 @llvm.ctlz.i32(i32 [[TMP12]], i1 false) CHECK-NEXT:
+// [[VECINIT6_I4_I_I:%.*]] = insertelement <4 x i32> [[VECINIT4_I3_I_I]], i32
+// [[TMP13]], i64 3 CHECK-NEXT:    [[ADD159_I_I_I:%.*]] = add nuw nsw <4 x i32>
+// [[VECINIT6_I4_I_I]], splat (i32 1) CHECK-NEXT:    [[SHL_MASK162_I_I_I:%.*]] =
+// and <4 x i32> [[ADD159_I_I_I]], splat (i32 31) CHECK-NEXT:
+// [[SHL163_I_I_I:%.*]] = shl <4 x i32> [[XOR_I_I_I]], [[SHL_MASK162_I_I_I]]
+// CHECK-NEXT:    [[TMP14:%.*]] = and <4 x i32> [[VECINIT6_I4_I_I]], splat (i32
+// 31) CHECK-NEXT:    [[SHR_MASK164_I_I_I:%.*]] = xor <4 x i32> [[TMP14]], splat
+// (i32 31) CHECK-NEXT:    [[SHR165_I_I_I:%.*]] = lshr <4 x i32>
+// [[XOR156_I_I_I]], [[SHR_MASK164_I_I_I]] CHECK-NEXT:    [[OR166_I_I_I:%.*]] =
+// or <4 x i32> [[SHL163_I_I_I]], [[SHR165_I_I_I]] CHECK-NEXT:
+// [[SHL169_I_I_I:%.*]] = shl <4 x i32> [[XOR156_I_I_I]], [[SHL_MASK162_I_I_I]]
+// CHECK-NEXT:    [[SHR171_I_I_I:%.*]] = lshr <4 x i32> [[XOR157_I_I_I]],
+// [[SHR_MASK164_I_I_I]] CHECK-NEXT:    [[OR172_I_I_I:%.*]] = or <4 x i32>
+// [[SHL169_I_I_I]], [[SHR171_I_I_I]] CHECK-NEXT:    [[SHR176_I_I_I:%.*]] = lshr
+// <4 x i32> [[OR166_I_I_I]], splat (i32 9) CHECK-NEXT:    [[TMP15:%.*]] = shl
+// nuw nsw <4 x i32> [[VECINIT6_I4_I_I]], splat (i32 23) CHECK-NEXT:
+// [[REASS_SUB:%.*]] = sub nsw <4 x i32> [[SHR176_I_I_I]], [[TMP15]] CHECK-NEXT:
+// [[TMP16:%.*]] = add <4 x i32> [[REASS_SUB]], splat (i32 1056964608)
 // CHECK-NEXT:    [[OR177_I_I_I:%.*]] = or <4 x i32> [[TMP16]], [[TMP5]]
-// CHECK-NEXT:    [[ASTYPE178_I_I_I:%.*]] = bitcast <4 x i32> [[OR177_I_I_I]] to <4 x float>
-// CHECK-NEXT:    [[OR181_I_I_I:%.*]] = tail call <4 x i32> @llvm.fshl.v4i32(<4 x i32> [[OR166_I_I_I]], <4 x i32> [[OR172_I_I_I]], <4 x i32> splat (i32 23))
-// CHECK-NEXT:    [[TMP17:%.*]] = extractelement <4 x i32> [[OR181_I_I_I]], i64 0
-// CHECK-NEXT:    [[TMP18:%.*]] = tail call range(i32 0, 33) i32 @llvm.ctlz.i32(i32 [[TMP17]], i1 false)
-// CHECK-NEXT:    [[VECINIT_I_I_I:%.*]] = insertelement <4 x i32> poison, i32 [[TMP18]], i64 0
-// CHECK-NEXT:    [[TMP19:%.*]] = extractelement <4 x i32> [[OR181_I_I_I]], i64 1
-// CHECK-NEXT:    [[TMP20:%.*]] = tail call range(i32 0, 33) i32 @llvm.ctlz.i32(i32 [[TMP19]], i1 false)
-// CHECK-NEXT:    [[VECINIT2_I_I_I:%.*]] = insertelement <4 x i32> [[VECINIT_I_I_I]], i32 [[TMP20]], i64 1
-// CHECK-NEXT:    [[TMP21:%.*]] = extractelement <4 x i32> [[OR181_I_I_I]], i64 2
-// CHECK-NEXT:    [[TMP22:%.*]] = tail call range(i32 0, 33) i32 @llvm.ctlz.i32(i32 [[TMP21]], i1 false)
-// CHECK-NEXT:    [[VECINIT4_I_I_I:%.*]] = insertelement <4 x i32> [[VECINIT2_I_I_I]], i32 [[TMP22]], i64 2
-// CHECK-NEXT:    [[TMP23:%.*]] = extractelement <4 x i32> [[OR181_I_I_I]], i64 3
-// CHECK-NEXT:    [[TMP24:%.*]] = tail call range(i32 0, 33) i32 @llvm.ctlz.i32(i32 [[TMP23]], i1 false)
-// CHECK-NEXT:    [[VECINIT6_I_I_I:%.*]] = insertelement <4 x i32> [[VECINIT4_I_I_I]], i32 [[TMP24]], i64 3
-// CHECK-NEXT:    [[ADD183_I_I_NEG_I:%.*]] = xor <4 x i32> [[VECINIT6_I_I_I]], splat (i32 -1)
-// CHECK-NEXT:    [[ADD183_I_I_I:%.*]] = add nuw nsw <4 x i32> [[VECINIT6_I_I_I]], splat (i32 1)
-// CHECK-NEXT:    [[SHL_MASK186_I_I_I:%.*]] = and <4 x i32> [[ADD183_I_I_I]], splat (i32 31)
-// CHECK-NEXT:    [[SHL187_I_I_I:%.*]] = shl <4 x i32> [[OR181_I_I_I]], [[SHL_MASK186_I_I_I]]
-// CHECK-NEXT:    [[TMP25:%.*]] = and <4 x i32> [[VECINIT6_I_I_I]], splat (i32 31)
-// CHECK-NEXT:    [[SHR_MASK189_I_I_I:%.*]] = xor <4 x i32> [[TMP25]], splat (i32 31)
-// CHECK-NEXT:    [[SHR190_I_I_I:%.*]] = lshr <4 x i32> [[OR172_I_I_I]], [[SHR_MASK189_I_I_I]]
-// CHECK-NEXT:    [[OR191_I_I_I:%.*]] = or <4 x i32> [[SHL187_I_I_I]], [[SHR190_I_I_I]]
-// CHECK-NEXT:    [[REASS_SUB10_I:%.*]] = sub nuw nsw <4 x i32> [[ADD183_I_I_NEG_I]], [[VECINIT6_I4_I_I]]
-// CHECK-NEXT:    [[ADD193_I_I_NEG_I:%.*]] = shl <4 x i32> [[REASS_SUB10_I]], splat (i32 23)
-// CHECK-NEXT:    [[SHR197_I_I_I:%.*]] = lshr <4 x i32> [[OR191_I_I_I]], splat (i32 9)
-// CHECK-NEXT:    [[REASS_SUB16_I_I:%.*]] = add <4 x i32> [[ADD193_I_I_NEG_I]], splat (i32 864026624)
-// CHECK-NEXT:    [[TMP26:%.*]] = or disjoint <4 x i32> [[SHR197_I_I_I]], [[REASS_SUB16_I_I]]
-// CHECK-NEXT:    [[OR198_I_I_I:%.*]] = or <4 x i32> [[TMP26]], [[TMP5]]
-// CHECK-NEXT:    [[ASTYPE199_I_I_I:%.*]] = bitcast <4 x i32> [[OR198_I_I_I]] to <4 x float>
-// CHECK-NEXT:    [[MUL200_I_I_I:%.*]] = fmul <4 x float> [[ASTYPE178_I_I_I]], splat (float 0x3FF921FB40000000)
-// CHECK-NEXT:    [[FNEG_I_I_I:%.*]] = fneg <4 x float> [[MUL200_I_I_I]]
-// CHECK-NEXT:    [[TMP27:%.*]] = tail call noundef <4 x float> @llvm.fma.v4f32(<4 x float> [[ASTYPE178_I_I_I]], <4 x float> splat (float 0x3FF921FB40000000), <4 x float> [[FNEG_I_I_I]])
-// CHECK-NEXT:    [[TMP28:%.*]] = tail call noundef <4 x float> @llvm.fma.v4f32(<4 x float> [[ASTYPE178_I_I_I]], <4 x float> splat (float 0x3E74442D00000000), <4 x float> [[TMP27]])
-// CHECK-NEXT:    [[TMP29:%.*]] = tail call noundef <4 x float> @llvm.fma.v4f32(<4 x float> [[ASTYPE199_I_I_I]], <4 x float> splat (float 0x3FF921FB40000000), <4 x float> [[TMP28]])
-// CHECK-NEXT:    [[ADD204_I_I_I:%.*]] = fadd <4 x float> [[MUL200_I_I_I]], [[TMP29]]
-// CHECK-NEXT:    [[SUB205_I_I_I:%.*]] = fsub <4 x float> [[ADD204_I_I_I]], [[MUL200_I_I_I]]
-// CHECK-NEXT:    [[SUB206_I_I_I:%.*]] = fsub <4 x float> [[TMP29]], [[SUB205_I_I_I]]
-// CHECK-NEXT:    [[SHR207_I_I_I:%.*]] = lshr <4 x i32> [[COND131_I_I_I]], splat (i32 30)
-// CHECK-NEXT:    [[ADD209_I_I_I:%.*]] = add nuw nsw <4 x i32> [[AND146_I_I_I]], [[SHR207_I_I_I]]
-// CHECK-NEXT:    [[COND_V_I2_I:%.*]] = select <4 x i1> [[CMP_I_I]], <4 x float> [[ADD13_I_I_I_I]], <4 x float> [[ADD204_I_I_I]]
-// CHECK-NEXT:    [[COND4_V_I_I:%.*]] = select <4 x i1> [[CMP_I_I]], <4 x float> [[FNEG_I_I_I_I]], <4 x float> [[SUB206_I_I_I]]
-// CHECK-NEXT:    [[COND6_V_I_I:%.*]] = select <4 x i1> [[CMP_I_I]], <4 x i32> [[CONV_I_I_I]], <4 x i32> [[ADD209_I_I_I]]
-// CHECK-NEXT:    [[COND6_I_I:%.*]] = and <4 x i32> [[COND6_V_I_I]], splat (i32 2)
-// CHECK-NEXT:    [[MUL_I_I:%.*]] = fmul <4 x float> [[COND_V_I2_I]], [[COND_V_I2_I]]
-// CHECK-NEXT:    [[MUL1_I_I:%.*]] = fmul <4 x float> [[COND_V_I2_I]], [[MUL_I_I]]
-// CHECK-NEXT:    [[TMP30:%.*]] = tail call noundef <4 x float> @llvm.fmuladd.v4f32(<4 x float> [[MUL_I_I]], <4 x float> splat (float 0x3DE5D93A60000000), <4 x float> splat (float 0xBE5AE5E680000000))
-// CHECK-NEXT:    [[TMP31:%.*]] = tail call noundef <4 x float> @llvm.fmuladd.v4f32(<4 x float> [[MUL_I_I]], <4 x float> [[TMP30]], <4 x float> splat (float 0x3EC6DBE4A0000000))
-// CHECK-NEXT:    [[TMP32:%.*]] = tail call noundef <4 x float> @llvm.fmuladd.v4f32(<4 x float> [[MUL_I_I]], <4 x float> [[TMP31]], <4 x float> splat (float 0xBF2A013A80000000))
-// CHECK-NEXT:    [[TMP33:%.*]] = tail call noundef <4 x float> @llvm.fmuladd.v4f32(<4 x float> [[MUL_I_I]], <4 x float> [[TMP32]], <4 x float> splat (float 0x3F811110E0000000))
-// CHECK-NEXT:    [[FNEG_I3_I:%.*]] = fneg <4 x float> [[MUL1_I_I]]
-// CHECK-NEXT:    [[MUL5_I_I:%.*]] = fmul <4 x float> [[TMP33]], [[FNEG_I3_I]]
-// CHECK-NEXT:    [[TMP34:%.*]] = tail call noundef <4 x float> @llvm.fmuladd.v4f32(<4 x float> [[COND4_V_I_I]], <4 x float> splat (float 5.000000e-01), <4 x float> [[MUL5_I_I]])
-// CHECK-NEXT:    [[FNEG7_I_I:%.*]] = fneg <4 x float> [[COND4_V_I_I]]
-// CHECK-NEXT:    [[TMP35:%.*]] = tail call noundef <4 x float> @llvm.fmuladd.v4f32(<4 x float> [[MUL_I_I]], <4 x float> [[TMP34]], <4 x float> [[FNEG7_I_I]])
-// CHECK-NEXT:    [[TMP36:%.*]] = tail call noundef <4 x float> @llvm.fmuladd.v4f32(<4 x float> [[MUL1_I_I]], <4 x float> splat (float 0x3FC5555560000000), <4 x float> [[TMP35]])
-// CHECK-NEXT:    [[SUB_I_I:%.*]] = fsub <4 x float> [[COND_V_I2_I]], [[TMP36]]
-// CHECK-NEXT:    [[FNEG_I_I:%.*]] = fneg <4 x float> [[SUB_I_I]]
-// CHECK-NEXT:    [[TMP37:%.*]] = tail call noundef <4 x float> @llvm.fmuladd.v4f32(<4 x float> [[MUL_I_I]], <4 x float> splat (float 0xBDA8FAE9C0000000), <4 x float> splat (float 0x3E21EE9EC0000000))
-// CHECK-NEXT:    [[TMP38:%.*]] = tail call noundef <4 x float> @llvm.fmuladd.v4f32(<4 x float> [[MUL_I_I]], <4 x float> [[TMP37]], <4 x float> splat (float 0xBE92524740000000))
-// CHECK-NEXT:    [[TMP39:%.*]] = tail call noundef <4 x float> @llvm.fmuladd.v4f32(<4 x float> [[MUL_I_I]], <4 x float> [[TMP38]], <4 x float> splat (float 0x3EFA015C40000000))
-// CHECK-NEXT:    [[TMP40:%.*]] = tail call noundef <4 x float> @llvm.fmuladd.v4f32(<4 x float> [[MUL_I_I]], <4 x float> [[TMP39]], <4 x float> splat (float 0xBF56C16C00000000))
-// CHECK-NEXT:    [[TMP41:%.*]] = tail call noundef <4 x float> @llvm.fmuladd.v4f32(<4 x float> [[MUL_I_I]], <4 x float> [[TMP40]], <4 x float> splat (float 0x3FA5555560000000))
-// CHECK-NEXT:    [[MUL5_I5_I:%.*]] = fmul <4 x float> [[MUL_I_I]], [[TMP41]]
-// CHECK-NEXT:    [[TMP42:%.*]] = tail call <4 x float> @llvm.fabs.v4f32(<4 x float> [[COND_V_I2_I]])
-// CHECK-NEXT:    [[AND_I_I:%.*]] = bitcast <4 x float> [[TMP42]] to <4 x i32>
-// CHECK-NEXT:    [[SUB_I6_I:%.*]] = add nsw <4 x i32> [[AND_I_I]], splat (i32 -16777216)
-// CHECK-NEXT:    [[TMP43:%.*]] = add nsw <4 x i32> [[AND_I_I]], splat (i32 -1050253722)
-// CHECK-NEXT:    [[AND938_I_I:%.*]] = icmp ult <4 x i32> [[TMP43]], splat (i32 11429479)
-// CHECK-NEXT:    [[TMP44:%.*]] = select <4 x i1> [[AND938_I_I]], <4 x i32> [[SUB_I6_I]], <4 x i32> zeroinitializer
-// CHECK-NEXT:    [[CMP11_I_I:%.*]] = icmp samesign ugt <4 x i32> [[AND_I_I]], splat (i32 1061683200)
-// CHECK-NEXT:    [[COND14_I_I:%.*]] = select <4 x i1> [[CMP11_I_I]], <4 x i32> splat (i32 1049624576), <4 x i32> [[TMP44]]
-// CHECK-NEXT:    [[TMP45:%.*]] = bitcast <4 x i32> [[COND14_I_I]] to <4 x float>
-// CHECK-NEXT:    [[FNEG_I7_I:%.*]] = fneg <4 x float> [[TMP45]]
-// CHECK-NEXT:    [[TMP46:%.*]] = tail call noundef <4 x float> @llvm.fmuladd.v4f32(<4 x float> [[MUL_I_I]], <4 x float> splat (float 5.000000e-01), <4 x float> [[FNEG_I7_I]])
-// CHECK-NEXT:    [[SUB16_I_I:%.*]] = fsub <4 x float> splat (float 1.000000e+00), [[TMP45]]
+// CHECK-NEXT:    [[ASTYPE178_I_I_I:%.*]] = bitcast <4 x i32> [[OR177_I_I_I]] to
+// <4 x float> CHECK-NEXT:    [[OR181_I_I_I:%.*]] = tail call <4 x i32>
+// @llvm.fshl.v4i32(<4 x i32> [[OR166_I_I_I]], <4 x i32> [[OR172_I_I_I]], <4 x
+// i32> splat (i32 23)) CHECK-NEXT:    [[TMP17:%.*]] = extractelement <4 x i32>
+// [[OR181_I_I_I]], i64 0 CHECK-NEXT:    [[TMP18:%.*]] = tail call range(i32 0,
+// 33) i32 @llvm.ctlz.i32(i32 [[TMP17]], i1 false) CHECK-NEXT:
+// [[VECINIT_I_I_I:%.*]] = insertelement <4 x i32> poison, i32 [[TMP18]], i64 0
+// CHECK-NEXT:    [[TMP19:%.*]] = extractelement <4 x i32> [[OR181_I_I_I]], i64
+// 1 CHECK-NEXT:    [[TMP20:%.*]] = tail call range(i32 0, 33) i32
+// @llvm.ctlz.i32(i32 [[TMP19]], i1 false) CHECK-NEXT:    [[VECINIT2_I_I_I:%.*]]
+// = insertelement <4 x i32> [[VECINIT_I_I_I]], i32 [[TMP20]], i64 1 CHECK-NEXT:
+// [[TMP21:%.*]] = extractelement <4 x i32> [[OR181_I_I_I]], i64 2 CHECK-NEXT:
+// [[TMP22:%.*]] = tail call range(i32 0, 33) i32 @llvm.ctlz.i32(i32 [[TMP21]],
+// i1 false) CHECK-NEXT:    [[VECINIT4_I_I_I:%.*]] = insertelement <4 x i32>
+// [[VECINIT2_I_I_I]], i32 [[TMP22]], i64 2 CHECK-NEXT:    [[TMP23:%.*]] =
+// extractelement <4 x i32> [[OR181_I_I_I]], i64 3 CHECK-NEXT:    [[TMP24:%.*]]
+// = tail call range(i32 0, 33) i32 @llvm.ctlz.i32(i32 [[TMP23]], i1 false)
+// CHECK-NEXT:    [[VECINIT6_I_I_I:%.*]] = insertelement <4 x i32>
+// [[VECINIT4_I_I_I]], i32 [[TMP24]], i64 3 CHECK-NEXT: [[ADD183_I_I_NEG_I:%.*]]
+// = xor <4 x i32> [[VECINIT6_I_I_I]], splat (i32 -1) CHECK-NEXT:
+// [[ADD183_I_I_I:%.*]] = add nuw nsw <4 x i32> [[VECINIT6_I_I_I]], splat (i32
+// 1) CHECK-NEXT:    [[SHL_MASK186_I_I_I:%.*]] = and <4 x i32> [[ADD183_I_I_I]],
+// splat (i32 31) CHECK-NEXT:    [[SHL187_I_I_I:%.*]] = shl <4 x i32>
+// [[OR181_I_I_I]], [[SHL_MASK186_I_I_I]] CHECK-NEXT:    [[TMP25:%.*]] = and <4
+// x i32> [[VECINIT6_I_I_I]], splat (i32 31) CHECK-NEXT:
+// [[SHR_MASK189_I_I_I:%.*]] = xor <4 x i32> [[TMP25]], splat (i32 31)
+// CHECK-NEXT:    [[SHR190_I_I_I:%.*]] = lshr <4 x i32> [[OR172_I_I_I]],
+// [[SHR_MASK189_I_I_I]] CHECK-NEXT:    [[OR191_I_I_I:%.*]] = or <4 x i32>
+// [[SHL187_I_I_I]], [[SHR190_I_I_I]] CHECK-NEXT:    [[REASS_SUB10_I:%.*]] = sub
+// nuw nsw <4 x i32> [[ADD183_I_I_NEG_I]], [[VECINIT6_I4_I_I]] CHECK-NEXT:
+// [[ADD193_I_I_NEG_I:%.*]] = shl <4 x i32> [[REASS_SUB10_I]], splat (i32 23)
+// CHECK-NEXT:    [[SHR197_I_I_I:%.*]] = lshr <4 x i32> [[OR191_I_I_I]], splat
+// (i32 9) CHECK-NEXT:    [[REASS_SUB16_I_I:%.*]] = add <4 x i32>
+// [[ADD193_I_I_NEG_I]], splat (i32 864026624) CHECK-NEXT:    [[TMP26:%.*]] = or
+// disjoint <4 x i32> [[SHR197_I_I_I]], [[REASS_SUB16_I_I]] CHECK-NEXT:
+// [[OR198_I_I_I:%.*]] = or <4 x i32> [[TMP26]], [[TMP5]] CHECK-NEXT:
+// [[ASTYPE199_I_I_I:%.*]] = bitcast <4 x i32> [[OR198_I_I_I]] to <4 x float>
+// CHECK-NEXT:    [[MUL200_I_I_I:%.*]] = fmul <4 x float> [[ASTYPE178_I_I_I]],
+// splat (float 0x3FF921FB40000000) CHECK-NEXT:    [[FNEG_I_I_I:%.*]] = fneg <4
+// x float> [[MUL200_I_I_I]] CHECK-NEXT:    [[TMP27:%.*]] = tail call noundef <4
+// x float> @llvm.fma.v4f32(<4 x float> [[ASTYPE178_I_I_I]], <4 x float> splat
+// (float 0x3FF921FB40000000), <4 x float> [[FNEG_I_I_I]]) CHECK-NEXT:
+// [[TMP28:%.*]] = tail call noundef <4 x float> @llvm.fma.v4f32(<4 x float>
+// [[ASTYPE178_I_I_I]], <4 x float> splat (float 0x3E74442D00000000), <4 x
+// float> [[TMP27]]) CHECK-NEXT:    [[TMP29:%.*]] = tail call noundef <4 x
+// float> @llvm.fma.v4f32(<4 x float> [[ASTYPE199_I_I_I]], <4 x float> splat
+// (float 0x3FF921FB40000000), <4 x float> [[TMP28]]) CHECK-NEXT:
+// [[ADD204_I_I_I:%.*]] = fadd <4 x float> [[MUL200_I_I_I]], [[TMP29]]
+// CHECK-NEXT:    [[SUB205_I_I_I:%.*]] = fsub <4 x float> [[ADD204_I_I_I]],
+// [[MUL200_I_I_I]] CHECK-NEXT:    [[SUB206_I_I_I:%.*]] = fsub <4 x float>
+// [[TMP29]], [[SUB205_I_I_I]] CHECK-NEXT:    [[SHR207_I_I_I:%.*]] = lshr <4 x
+// i32> [[COND131_I_I_I]], splat (i32 30) CHECK-NEXT:    [[ADD209_I_I_I:%.*]] =
+// add nuw nsw <4 x i32> [[AND146_I_I_I]], [[SHR207_I_I_I]] CHECK-NEXT:
+// [[COND_V_I2_I:%.*]] = select <4 x i1> [[CMP_I_I]], <4 x float>
+// [[ADD13_I_I_I_I]], <4 x float> [[ADD204_I_I_I]] CHECK-NEXT:
+// [[COND4_V_I_I:%.*]] = select <4 x i1> [[CMP_I_I]], <4 x float>
+// [[FNEG_I_I_I_I]], <4 x float> [[SUB206_I_I_I]] CHECK-NEXT:
+// [[COND6_V_I_I:%.*]] = select <4 x i1> [[CMP_I_I]], <4 x i32> [[CONV_I_I_I]],
+// <4 x i32> [[ADD209_I_I_I]] CHECK-NEXT:    [[COND6_I_I:%.*]] = and <4 x i32>
+// [[COND6_V_I_I]], splat (i32 2) CHECK-NEXT:    [[MUL_I_I:%.*]] = fmul <4 x
+// float> [[COND_V_I2_I]], [[COND_V_I2_I]] CHECK-NEXT:    [[MUL1_I_I:%.*]] =
+// fmul <4 x float> [[COND_V_I2_I]], [[MUL_I_I]] CHECK-NEXT:    [[TMP30:%.*]] =
+// tail call noundef <4 x float> @llvm.fmuladd.v4f32(<4 x float> [[MUL_I_I]], <4
+// x float> splat (float 0x3DE5D93A60000000), <4 x float> splat (float
+// 0xBE5AE5E680000000)) CHECK-NEXT:    [[TMP31:%.*]] = tail call noundef <4 x
+// float> @llvm.fmuladd.v4f32(<4 x float> [[MUL_I_I]], <4 x float> [[TMP30]], <4
+// x float> splat (float 0x3EC6DBE4A0000000)) CHECK-NEXT:    [[TMP32:%.*]] =
+// tail call noundef <4 x float> @llvm.fmuladd.v4f32(<4 x float> [[MUL_I_I]], <4
+// x float> [[TMP31]], <4 x float> splat (float 0xBF2A013A80000000)) CHECK-NEXT:
+// [[TMP33:%.*]] = tail call noundef <4 x float> @llvm.fmuladd.v4f32(<4 x float>
+// [[MUL_I_I]], <4 x float> [[TMP32]], <4 x float> splat (float
+// 0x3F811110E0000000)) CHECK-NEXT:    [[FNEG_I3_I:%.*]] = fneg <4 x float>
+// [[MUL1_I_I]] CHECK-NEXT:    [[MUL5_I_I:%.*]] = fmul <4 x float> [[TMP33]],
+// [[FNEG_I3_I]] CHECK-NEXT:    [[TMP34:%.*]] = tail call noundef <4 x float>
+// @llvm.fmuladd.v4f32(<4 x float> [[COND4_V_I_I]], <4 x float> splat
+// (float 5.000000e-01), <4 x float> [[MUL5_I_I]]) CHECK-NEXT: [[FNEG7_I_I:%.*]]
+// = fneg <4 x float> [[COND4_V_I_I]] CHECK-NEXT:    [[TMP35:%.*]] = tail call
+// noundef <4 x float> @llvm.fmuladd.v4f32(<4 x float> [[MUL_I_I]], <4 x float>
+// [[TMP34]], <4 x float> [[FNEG7_I_I]]) CHECK-NEXT:    [[TMP36:%.*]] = tail
+// call noundef <4 x float> @llvm.fmuladd.v4f32(<4 x float> [[MUL1_I_I]], <4 x
+// float> splat (float 0x3FC5555560000000), <4 x float> [[TMP35]]) CHECK-NEXT:
+// [[SUB_I_I:%.*]] = fsub <4 x float> [[COND_V_I2_I]], [[TMP36]] CHECK-NEXT:
+// [[FNEG_I_I:%.*]] = fneg <4 x float> [[SUB_I_I]] CHECK-NEXT:    [[TMP37:%.*]]
+// = tail call noundef <4 x float> @llvm.fmuladd.v4f32(<4 x float> [[MUL_I_I]],
+// <4 x float> splat (float 0xBDA8FAE9C0000000), <4 x float> splat (float
+// 0x3E21EE9EC0000000)) CHECK-NEXT:    [[TMP38:%.*]] = tail call noundef <4 x
+// float> @llvm.fmuladd.v4f32(<4 x float> [[MUL_I_I]], <4 x float> [[TMP37]], <4
+// x float> splat (float 0xBE92524740000000)) CHECK-NEXT:    [[TMP39:%.*]] =
+// tail call noundef <4 x float> @llvm.fmuladd.v4f32(<4 x float> [[MUL_I_I]], <4
+// x float> [[TMP38]], <4 x float> splat (float 0x3EFA015C40000000)) CHECK-NEXT:
+// [[TMP40:%.*]] = tail call noundef <4 x float> @llvm.fmuladd.v4f32(<4 x float>
+// [[MUL_I_I]], <4 x float> [[TMP39]], <4 x float> splat (float
+// 0xBF56C16C00000000)) CHECK-NEXT:    [[TMP41:%.*]] = tail call noundef <4 x
+// float> @llvm.fmuladd.v4f32(<4 x float> [[MUL_I_I]], <4 x float> [[TMP40]], <4
+// x float> splat (float 0x3FA5555560000000)) CHECK-NEXT:    [[MUL5_I5_I:%.*]] =
+// fmul <4 x float> [[MUL_I_I]], [[TMP41]] CHECK-NEXT:    [[TMP42:%.*]] = tail
+// call <4 x float> @llvm.fabs.v4f32(<4 x float> [[COND_V_I2_I]]) CHECK-NEXT:
+// [[AND_I_I:%.*]] = bitcast <4 x float> [[TMP42]] to <4 x i32> CHECK-NEXT:
+// [[SUB_I6_I:%.*]] = add nsw <4 x i32> [[AND_I_I]], splat (i32 -16777216)
+// CHECK-NEXT:    [[TMP43:%.*]] = add nsw <4 x i32> [[AND_I_I]], splat (i32
+// -1050253722) CHECK-NEXT:    [[AND938_I_I:%.*]] = icmp ult <4 x i32>
+// [[TMP43]], splat (i32 11429479) CHECK-NEXT:    [[TMP44:%.*]] = select <4 x
+// i1> [[AND938_I_I]], <4 x i32> [[SUB_I6_I]], <4 x i32> zeroinitializer
+// CHECK-NEXT:    [[CMP11_I_I:%.*]] = icmp samesign ugt <4 x i32> [[AND_I_I]],
+// splat (i32 1061683200) CHECK-NEXT:    [[COND14_I_I:%.*]] = select <4 x i1>
+// [[CMP11_I_I]], <4 x i32> splat (i32 1049624576), <4 x i32> [[TMP44]]
+// CHECK-NEXT:    [[TMP45:%.*]] = bitcast <4 x i32> [[COND14_I_I]] to <4 x
+// float> CHECK-NEXT:    [[FNEG_I7_I:%.*]] = fneg <4 x float> [[TMP45]]
+// CHECK-NEXT:    [[TMP46:%.*]] = tail call noundef <4 x float>
+// @llvm.fmuladd.v4f32(<4 x float> [[MUL_I_I]], <4 x float> splat
+// (float 5.000000e-01), <4 x float> [[FNEG_I7_I]]) CHECK-NEXT:
+// [[SUB16_I_I:%.*]] = fsub <4 x float> splat (float 1.000000e+00), [[TMP45]]
 // CHECK-NEXT:    [[FNEG17_I_I:%.*]] = fneg <4 x float> [[COND_V_I2_I]]
-// CHECK-NEXT:    [[MUL18_I_I:%.*]] = fmul <4 x float> [[COND4_V_I_I]], [[FNEG17_I_I]]
-// CHECK-NEXT:    [[TMP47:%.*]] = tail call noundef <4 x float> @llvm.fmuladd.v4f32(<4 x float> [[MUL_I_I]], <4 x float> [[MUL5_I5_I]], <4 x float> [[MUL18_I_I]])
-// CHECK-NEXT:    [[TMP48:%.*]] = fsub <4 x float> [[TMP47]], [[TMP46]]
-// CHECK-NEXT:    [[SUB21_I_I:%.*]] = fadd <4 x float> [[SUB16_I_I]], [[TMP48]]
-// CHECK-NEXT:    [[TMP49:%.*]] = and <4 x i32> [[COND6_V_I_I]], splat (i32 1)
-// CHECK-NEXT:    [[TMP50:%.*]] = icmp eq <4 x i32> [[TMP49]], zeroinitializer
-// CHECK-NEXT:    [[COND_V_I_I:%.*]] = select <4 x i1> [[TMP50]], <4 x float> [[SUB21_I_I]], <4 x float> [[FNEG_I_I]]
-// CHECK-NEXT:    [[COND_I_I:%.*]] = bitcast <4 x float> [[COND_V_I_I]] to <4 x i32>
-// CHECK-NEXT:    [[CMP5_I_I:%.*]] = icmp ne <4 x i32> [[COND6_I_I]], zeroinitializer
-// CHECK-NEXT:    [[SEXT6_I_I:%.*]] = sext <4 x i1> [[CMP5_I_I]] to <4 x i32>
-// CHECK-NEXT:    [[SHL_I_I:%.*]] = shl nsw <4 x i32> [[SEXT6_I_I]], splat (i32 31)
-// CHECK-NEXT:    [[XOR_I_I:%.*]] = xor <4 x i32> [[SHL_I_I]], [[COND_I_I]]
-// CHECK-NEXT:    [[ASTYPE7_I_I:%.*]] = bitcast <4 x i32> [[XOR_I_I]] to <4 x float>
-// CHECK-NEXT:    [[TMP51:%.*]] = fcmp ueq <4 x float> [[ELT_ABS_I_I_I]], splat (float 0x7FF0000000000000)
-// CHECK-NEXT:    [[COND_V_I_I_I:%.*]] = select <4 x i1> [[TMP51]], <4 x float> splat (float 0x7FF8000000000000), <4 x float> [[ASTYPE7_I_I]]
-// CHECK-NEXT:    store <4 x float> [[COND_V_I_I_I]], ptr addrspace(1) [[F]], align 16, !tbaa [[TBAA11]]
+// CHECK-NEXT:    [[MUL18_I_I:%.*]] = fmul <4 x float> [[COND4_V_I_I]],
+// [[FNEG17_I_I]] CHECK-NEXT:    [[TMP47:%.*]] = tail call noundef <4 x float>
+// @llvm.fmuladd.v4f32(<4 x float> [[MUL_I_I]], <4 x float> [[MUL5_I5_I]], <4 x
+// float> [[MUL18_I_I]]) CHECK-NEXT:    [[TMP48:%.*]] = fsub <4 x float>
+// [[TMP47]], [[TMP46]] CHECK-NEXT:    [[SUB21_I_I:%.*]] = fadd <4 x float>
+// [[SUB16_I_I]], [[TMP48]] CHECK-NEXT:    [[TMP49:%.*]] = and <4 x i32>
+// [[COND6_V_I_I]], splat (i32 1) CHECK-NEXT:    [[TMP50:%.*]] = icmp eq <4 x
+// i32> [[TMP49]], zeroinitializer CHECK-NEXT:    [[COND_V_I_I:%.*]] = select <4
+// x i1> [[TMP50]], <4 x float> [[SUB21_I_I]], <4 x float> [[FNEG_I_I]]
+// CHECK-NEXT:    [[COND_I_I:%.*]] = bitcast <4 x float> [[COND_V_I_I]] to <4 x
+// i32> CHECK-NEXT:    [[CMP5_I_I:%.*]] = icmp ne <4 x i32> [[COND6_I_I]],
+// zeroinitializer CHECK-NEXT:    [[SEXT6_I_I:%.*]] = sext <4 x i1> [[CMP5_I_I]]
+// to <4 x i32> CHECK-NEXT:    [[SHL_I_I:%.*]] = shl nsw <4 x i32>
+// [[SEXT6_I_I]], splat (i32 31) CHECK-NEXT:    [[XOR_I_I:%.*]] = xor <4 x i32>
+// [[SHL_I_I]], [[COND_I_I]] CHECK-NEXT:    [[ASTYPE7_I_I:%.*]] = bitcast <4 x
+// i32> [[XOR_I_I]] to <4 x float> CHECK-NEXT:    [[TMP51:%.*]] = fcmp ueq <4 x
+// float> [[ELT_ABS_I_I_I]], splat (float 0x7FF0000000000000) CHECK-NEXT:
+// [[COND_V_I_I_I:%.*]] = select <4 x i1> [[TMP51]], <4 x float> splat (float
+// 0x7FF8000000000000), <4 x float> [[ASTYPE7_I_I]] CHECK-NEXT:    store <4 x
+// float> [[COND_V_I_I_I]], ptr addrspace(1) [[F]], align 16, !tbaa [[TBAA11]]
 // CHECK-NEXT:    ret void
 //
-__kernel void foo(__global float4 *f) {
-  *f = cos(*f);
-}
+__kernel void foo(__global float4 *f) { *f = cos(*f); }
 //.
 // CHECK: [[META6]] = !{i32 1}
 // CHECK: [[META7]] = !{!"none"}
diff --git a/libclc/test/math/fabs.cl b/libclc/test/math/fabs.cl
index 5935fc9f6..1ee1074b0 100644
--- a/libclc/test/math/fabs.cl
+++ b/libclc/test/math/fabs.cl
@@ -1,4 +1,5 @@
-// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
+// UTC_ARGS: --version 5
 //===----------------------------------------------------------------------===//
 //
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
@@ -7,19 +8,23 @@
 //
 //===----------------------------------------------------------------------===//
 
-// RUN: %clang -target amdgcn-mesa-mesa3d -O1 -emit-llvm -S -o - --libclc-lib=tahiti-amdgcn-mesa-mesa3d -fno-builtin --no-offloadlib %s | FileCheck %s
+// RUN: %clang -target amdgcn-mesa-mesa3d -O1 -emit-llvm -S -o -
+// --libclc-lib=tahiti-amdgcn-mesa-mesa3d -fno-builtin --no-offloadlib %s |
+// FileCheck %s
 
 // CHECK-LABEL: define protected amdgpu_kernel void @foo(
-// CHECK-SAME: ptr addrspace(1) noundef align 4 captures(none) [[F:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] !kernel_arg_addr_space [[META6:![0-9]+]] !kernel_arg_access_qual [[META7:![0-9]+]] !kernel_arg_type [[META8:![0-9]+]] !kernel_arg_base_type [[META8]] !kernel_arg_type_qual [[META9:![0-9]+]] {
+// CHECK-SAME: ptr addrspace(1) noundef align 4 captures(none) [[F:%.*]])
+// local_unnamed_addr #[[ATTR0:[0-9]+]] !kernel_arg_addr_space [[META6:![0-9]+]]
+// !kernel_arg_access_qual [[META7:![0-9]+]] !kernel_arg_type [[META8:![0-9]+]]
+// !kernel_arg_base_type [[META8]] !kernel_arg_type_qual [[META9:![0-9]+]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    [[TMP0:%.*]] = load float, ptr addrspace(1) [[F]], align 4, !tbaa [[TBAA10:![0-9]+]]
-// CHECK-NEXT:    [[ELT_ABS_I_I:%.*]] = tail call noundef float @llvm.fabs.f32(float [[TMP0]])
-// CHECK-NEXT:    store float [[ELT_ABS_I_I]], ptr addrspace(1) [[F]], align 4, !tbaa [[TBAA10]]
+// CHECK-NEXT:    [[TMP0:%.*]] = load float, ptr addrspace(1) [[F]], align 4,
+// !tbaa [[TBAA10:![0-9]+]] CHECK-NEXT:    [[ELT_ABS_I_I:%.*]] = tail call
+// noundef float @llvm.fabs.f32(float [[TMP0]]) CHECK-NEXT:    store float
+// [[ELT_ABS_I_I]], ptr addrspace(1) [[F]], align 4, !tbaa [[TBAA10]]
 // CHECK-NEXT:    ret void
 //
-__kernel void foo(__global float *f) {
-  *f = fabs(*f);
-}
+__kernel void foo(__global float *f) { *f = fabs(*f); }
 //.
 // CHECK: [[META6]] = !{i32 1}
 // CHECK: [[META7]] = !{!"none"}
diff --git a/libclc/test/math/rsqrt.cl b/libclc/test/math/rsqrt.cl
index fa5e13f27..ce5c54485 100644
--- a/libclc/test/math/rsqrt.cl
+++ b/libclc/test/math/rsqrt.cl
@@ -1,4 +1,5 @@
-// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
+// UTC_ARGS: --version 5
 //===----------------------------------------------------------------------===//
 //
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
@@ -7,25 +8,36 @@
 //
 //===----------------------------------------------------------------------===//
 
-// RUN: %clang -target amdgcn-mesa-mesa3d -O1 -emit-llvm -S -o - --libclc-lib=tahiti-amdgcn-mesa-mesa3d -fno-builtin --no-offloadlib %s | FileCheck %s
+// RUN: %clang -target amdgcn-mesa-mesa3d -O1 -emit-llvm -S -o -
+// --libclc-lib=tahiti-amdgcn-mesa-mesa3d -fno-builtin --no-offloadlib %s |
+// FileCheck %s
 
 #pragma OPENCL EXTENSION cl_khr_fp64 : enable
 
 #if defined(cl_khr_fp64)
 
 // CHECK-LABEL: define protected amdgpu_kernel void @foo(
-// CHECK-SAME: ptr addrspace(1) noundef align 16 captures(none) initializes((16, 32)) [[X:%.*]], ptr addrspace(1) noundef align 32 captures(none) initializes((32, 64)) [[Y:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] !kernel_arg_addr_space [[META6:![0-9]+]] !kernel_arg_access_qual [[META7:![0-9]+]] !kernel_arg_type [[META8:![0-9]+]] !kernel_arg_base_type [[META9:![0-9]+]] !kernel_arg_type_qual [[META10:![0-9]+]] {
-// CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr addrspace(1) [[X]], align 16, !tbaa [[TBAA11:![0-9]+]]
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call contract <4 x float> @llvm.sqrt.v4f32(<4 x float> [[TMP0]]), !fpmath [[META14:![0-9]+]]
-// CHECK-NEXT:    [[DIV_I_I:%.*]] = fdiv contract <4 x float> splat (float 1.000000e+00), [[TMP1]], !fpmath [[META15:![0-9]+]]
-// CHECK-NEXT:    [[ARRAYIDX1_I:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(1) [[X]], i64 16
-// CHECK-NEXT:    store <4 x float> [[DIV_I_I]], ptr addrspace(1) [[ARRAYIDX1_I]], align 16, !tbaa [[TBAA11]]
-// CHECK-NEXT:    [[TMP2:%.*]] = load <4 x double>, ptr addrspace(1) [[Y]], align 32, !tbaa [[TBAA11]]
-// CHECK-NEXT:    [[TMP3:%.*]] = tail call contract <4 x double> @llvm.sqrt.v4f64(<4 x double> [[TMP2]])
-// CHECK-NEXT:    [[DIV_I_I1:%.*]] = fdiv contract <4 x double> splat (double 1.000000e+00), [[TMP3]]
-// CHECK-NEXT:    [[ARRAYIDX4_I:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(1) [[Y]], i64 32
-// CHECK-NEXT:    store <4 x double> [[DIV_I_I1]], ptr addrspace(1) [[ARRAYIDX4_I]], align 32, !tbaa [[TBAA11]]
+// CHECK-SAME: ptr addrspace(1) noundef align 16 captures(none) initializes((16,
+// 32)) [[X:%.*]], ptr addrspace(1) noundef align 32 captures(none)
+// initializes((32, 64)) [[Y:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]]
+// !kernel_arg_addr_space [[META6:![0-9]+]] !kernel_arg_access_qual
+// [[META7:![0-9]+]] !kernel_arg_type [[META8:![0-9]+]] !kernel_arg_base_type
+// [[META9:![0-9]+]] !kernel_arg_type_qual [[META10:![0-9]+]] { CHECK-NEXT:
+// [[ENTRY:.*:]] CHECK-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr
+// addrspace(1) [[X]], align 16, !tbaa [[TBAA11:![0-9]+]] CHECK-NEXT:
+// [[TMP1:%.*]] = tail call contract <4 x float> @llvm.sqrt.v4f32(<4 x float>
+// [[TMP0]]), !fpmath [[META14:![0-9]+]] CHECK-NEXT:    [[DIV_I_I:%.*]] = fdiv
+// contract <4 x float> splat (float 1.000000e+00), [[TMP1]], !fpmath
+// [[META15:![0-9]+]] CHECK-NEXT:    [[ARRAYIDX1_I:%.*]] = getelementptr
+// inbounds nuw i8, ptr addrspace(1) [[X]], i64 16 CHECK-NEXT:    store <4 x
+// float> [[DIV_I_I]], ptr addrspace(1) [[ARRAYIDX1_I]], align 16, !tbaa
+// [[TBAA11]] CHECK-NEXT:    [[TMP2:%.*]] = load <4 x double>, ptr addrspace(1)
+// [[Y]], align 32, !tbaa [[TBAA11]] CHECK-NEXT:    [[TMP3:%.*]] = tail call
+// contract <4 x double> @llvm.sqrt.v4f64(<4 x double> [[TMP2]]) CHECK-NEXT:
+// [[DIV_I_I1:%.*]] = fdiv contract <4 x double> splat (double 1.000000e+00),
+// [[TMP3]] CHECK-NEXT:    [[ARRAYIDX4_I:%.*]] = getelementptr inbounds nuw i8,
+// ptr addrspace(1) [[Y]], i64 32 CHECK-NEXT:    store <4 x double>
+// [[DIV_I_I1]], ptr addrspace(1) [[ARRAYIDX4_I]], align 32, !tbaa [[TBAA11]]
 // CHECK-NEXT:    ret void
 //
 __kernel void foo(__global float4 *x, __global double4 *y) {
@@ -38,11 +50,10 @@ __kernel void foo(__global float4 *x, __global double4 *y) {
 // CHECK: [[META6]] = !{i32 1, i32 1}
 // CHECK: [[META7]] = !{!"none", !"none"}
 // CHECK: [[META8]] = !{!"float4*", !"double4*"}
-// CHECK: [[META9]] = !{!"float __attribute__((ext_vector_type(4)))*", !"double __attribute__((ext_vector_type(4)))*"}
-// CHECK: [[META10]] = !{!"", !""}
-// CHECK: [[TBAA11]] = !{[[META12:![0-9]+]], [[META12]], i64 0}
-// CHECK: [[META12]] = !{!"omnipotent char", [[META13:![0-9]+]], i64 0}
-// CHECK: [[META13]] = !{!"Simple C/C++ TBAA"}
-// CHECK: [[META14]] = !{float 3.000000e+00}
-// CHECK: [[META15]] = !{float 2.500000e+00}
+// CHECK: [[META9]] = !{!"float __attribute__((ext_vector_type(4)))*", !"double
+// __attribute__((ext_vector_type(4)))*"} CHECK: [[META10]] = !{!"", !""} CHECK:
+// [[TBAA11]] = !{[[META12:![0-9]+]], [[META12]], i64 0} CHECK: [[META12]] =
+// !{!"omnipotent char", [[META13:![0-9]+]], i64 0} CHECK: [[META13]] =
+// !{!"Simple C/C++ TBAA"} CHECK: [[META14]] = !{float 3.000000e+00} CHECK:
+// [[META15]] = !{float 2.500000e+00}
 //.
diff --git a/libclc/test/misc/as_type.cl b/libclc/test/misc/as_type.cl
index a475956e6..fa00577f3 100644
--- a/libclc/test/misc/as_type.cl
+++ b/libclc/test/misc/as_type.cl
@@ -1,4 +1,5 @@
-// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
+// UTC_ARGS: --version 5
 //===----------------------------------------------------------------------===//
 //
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
@@ -7,25 +8,30 @@
 //
 //===----------------------------------------------------------------------===//
 
-// RUN: %clang -target amdgcn-mesa-mesa3d -O1 -emit-llvm -S -o - --libclc-lib=tahiti-amdgcn-mesa-mesa3d -fno-builtin --no-offloadlib %s | FileCheck %s
+// RUN: %clang -target amdgcn-mesa-mesa3d -O1 -emit-llvm -S -o -
+// --libclc-lib=tahiti-amdgcn-mesa-mesa3d -fno-builtin --no-offloadlib %s |
+// FileCheck %s
 
 // CHECK-LABEL: define protected amdgpu_kernel void @foo(
-// CHECK-SAME: ptr addrspace(1) noundef writeonly align 16 captures(none) initializes((0, 16)) [[X:%.*]], ptr addrspace(1) noundef readonly align 16 captures(none) [[Y:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] !kernel_arg_addr_space [[META6:![0-9]+]] !kernel_arg_access_qual [[META7:![0-9]+]] !kernel_arg_type [[META8:![0-9]+]] !kernel_arg_base_type [[META9:![0-9]+]] !kernel_arg_type_qual [[META10:![0-9]+]] {
-// CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    [[TMP0:%.*]] = load <4 x i32>, ptr addrspace(1) [[Y]], align 16, !tbaa [[TBAA11:![0-9]+]]
-// CHECK-NEXT:    store <4 x i32> [[TMP0]], ptr addrspace(1) [[X]], align 16, !tbaa [[TBAA11]]
-// CHECK-NEXT:    ret void
+// CHECK-SAME: ptr addrspace(1) noundef writeonly align 16 captures(none)
+// initializes((0, 16)) [[X:%.*]], ptr addrspace(1) noundef readonly align 16
+// captures(none) [[Y:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]]
+// !kernel_arg_addr_space [[META6:![0-9]+]] !kernel_arg_access_qual
+// [[META7:![0-9]+]] !kernel_arg_type [[META8:![0-9]+]] !kernel_arg_base_type
+// [[META9:![0-9]+]] !kernel_arg_type_qual [[META10:![0-9]+]] { CHECK-NEXT:
+// [[ENTRY:.*:]] CHECK-NEXT:    [[TMP0:%.*]] = load <4 x i32>, ptr addrspace(1)
+// [[Y]], align 16, !tbaa [[TBAA11:![0-9]+]] CHECK-NEXT:    store <4 x i32>
+// [[TMP0]], ptr addrspace(1) [[X]], align 16, !tbaa [[TBAA11]] CHECK-NEXT: ret
+// void
 //
-__kernel void foo(__global int4 *x, __global float4 *y) {
-  *x = as_int4(*y);
-}
+__kernel void foo(__global int4 *x, __global float4 *y) { *x = as_int4(*y); }
 //.
 // CHECK: [[META6]] = !{i32 1, i32 1}
 // CHECK: [[META7]] = !{!"none", !"none"}
 // CHECK: [[META8]] = !{!"int4*", !"float4*"}
-// CHECK: [[META9]] = !{!"int __attribute__((ext_vector_type(4)))*", !"float __attribute__((ext_vector_type(4)))*"}
-// CHECK: [[META10]] = !{!"", !""}
-// CHECK: [[TBAA11]] = !{[[META12:![0-9]+]], [[META12]], i64 0}
-// CHECK: [[META12]] = !{!"omnipotent char", [[META13:![0-9]+]], i64 0}
-// CHECK: [[META13]] = !{!"Simple C/C++ TBAA"}
+// CHECK: [[META9]] = !{!"int __attribute__((ext_vector_type(4)))*", !"float
+// __attribute__((ext_vector_type(4)))*"} CHECK: [[META10]] = !{!"", !""} CHECK:
+// [[TBAA11]] = !{[[META12:![0-9]+]], [[META12]], i64 0} CHECK: [[META12]] =
+// !{!"omnipotent char", [[META13:![0-9]+]], i64 0} CHECK: [[META13]] =
+// !{!"Simple C/C++ TBAA"}
 //.
diff --git a/libclc/test/misc/convert.cl b/libclc/test/misc/convert.cl
index cd8c41465..2959ab1e8 100644
--- a/libclc/test/misc/convert.cl
+++ b/libclc/test/misc/convert.cl
@@ -1,4 +1,5 @@
-// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
+// UTC_ARGS: --version 5
 //===----------------------------------------------------------------------===//
 //
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
@@ -7,14 +8,21 @@
 //
 //===----------------------------------------------------------------------===//
 
-// RUN: %clang -target amdgcn-mesa-mesa3d -O1 -emit-llvm -S -o - --libclc-lib=tahiti-amdgcn-mesa-mesa3d -fno-builtin --no-offloadlib %s | FileCheck %s
+// RUN: %clang -target amdgcn-mesa-mesa3d -O1 -emit-llvm -S -o -
+// --libclc-lib=tahiti-amdgcn-mesa-mesa3d -fno-builtin --no-offloadlib %s |
+// FileCheck %s
 
 // CHECK-LABEL: define protected amdgpu_kernel void @foo(
-// CHECK-SAME: ptr addrspace(1) noundef writeonly align 16 captures(none) initializes((0, 16)) [[X:%.*]], ptr addrspace(1) noundef readonly align 16 captures(none) [[Y:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] !kernel_arg_addr_space [[META6:![0-9]+]] !kernel_arg_access_qual [[META7:![0-9]+]] !kernel_arg_type [[META8:![0-9]+]] !kernel_arg_base_type [[META9:![0-9]+]] !kernel_arg_type_qual [[META10:![0-9]+]] {
-// CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr addrspace(1) [[Y]], align 16, !tbaa [[TBAA11:![0-9]+]]
-// CHECK-NEXT:    [[CONV_I_I:%.*]] = fptosi <4 x float> [[TMP0]] to <4 x i32>
-// CHECK-NEXT:    store <4 x i32> [[CONV_I_I]], ptr addrspace(1) [[X]], align 16, !tbaa [[TBAA11]]
+// CHECK-SAME: ptr addrspace(1) noundef writeonly align 16 captures(none)
+// initializes((0, 16)) [[X:%.*]], ptr addrspace(1) noundef readonly align 16
+// captures(none) [[Y:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]]
+// !kernel_arg_addr_space [[META6:![0-9]+]] !kernel_arg_access_qual
+// [[META7:![0-9]+]] !kernel_arg_type [[META8:![0-9]+]] !kernel_arg_base_type
+// [[META9:![0-9]+]] !kernel_arg_type_qual [[META10:![0-9]+]] { CHECK-NEXT:
+// [[ENTRY:.*:]] CHECK-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr
+// addrspace(1) [[Y]], align 16, !tbaa [[TBAA11:![0-9]+]] CHECK-NEXT:
+// [[CONV_I_I:%.*]] = fptosi <4 x float> [[TMP0]] to <4 x i32> CHECK-NEXT: store
+// <4 x i32> [[CONV_I_I]], ptr addrspace(1) [[X]], align 16, !tbaa [[TBAA11]]
 // CHECK-NEXT:    ret void
 //
 __kernel void foo(__global int4 *x, __global float4 *y) {
@@ -24,9 +32,9 @@ __kernel void foo(__global int4 *x, __global float4 *y) {
 // CHECK: [[META6]] = !{i32 1, i32 1}
 // CHECK: [[META7]] = !{!"none", !"none"}
 // CHECK: [[META8]] = !{!"int4*", !"float4*"}
-// CHECK: [[META9]] = !{!"int __attribute__((ext_vector_type(4)))*", !"float __attribute__((ext_vector_type(4)))*"}
-// CHECK: [[META10]] = !{!"", !""}
-// CHECK: [[TBAA11]] = !{[[META12:![0-9]+]], [[META12]], i64 0}
-// CHECK: [[META12]] = !{!"omnipotent char", [[META13:![0-9]+]], i64 0}
-// CHECK: [[META13]] = !{!"Simple C/C++ TBAA"}
+// CHECK: [[META9]] = !{!"int __attribute__((ext_vector_type(4)))*", !"float
+// __attribute__((ext_vector_type(4)))*"} CHECK: [[META10]] = !{!"", !""} CHECK:
+// [[TBAA11]] = !{[[META12:![0-9]+]], [[META12]], i64 0} CHECK: [[META12]] =
+// !{!"omnipotent char", [[META13:![0-9]+]], i64 0} CHECK: [[META13]] =
+// !{!"Simple C/C++ TBAA"}
 //.
diff --git a/libclc/test/work-item/get_group_id.cl b/libclc/test/work-item/get_group_id.cl
index f73f8f76c..6a51a2280 100644
--- a/libclc/test/work-item/get_group_id.cl
+++ b/libclc/test/work-item/get_group_id.cl
@@ -1,4 +1,5 @@
-// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
+// UTC_ARGS: --version 5
 //===----------------------------------------------------------------------===//
 //
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
@@ -7,20 +8,23 @@
 //
 //===----------------------------------------------------------------------===//
 
-// RUN: %clang -target amdgcn-mesa-mesa3d -O1 -emit-llvm -S -o - --libclc-lib=tahiti-amdgcn-mesa-mesa3d -fno-builtin --no-offloadlib %s | FileCheck %s
+// RUN: %clang -target amdgcn-mesa-mesa3d -O1 -emit-llvm -S -o -
+// --libclc-lib=tahiti-amdgcn-mesa-mesa3d -fno-builtin --no-offloadlib %s |
+// FileCheck %s
 
 // CHECK-LABEL: define protected amdgpu_kernel void @foo(
-// CHECK-SAME: ptr addrspace(1) noundef writeonly align 4 captures(none) [[I:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] !kernel_arg_addr_space [[META6:![0-9]+]] !kernel_arg_access_qual [[META7:![0-9]+]] !kernel_arg_type [[META8:![0-9]+]] !kernel_arg_base_type [[META8]] !kernel_arg_type_qual [[META9:![0-9]+]] {
-// CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call i32 @llvm.amdgcn.workgroup.id.x()
-// CHECK-NEXT:    [[RETVAL_0_I:%.*]] = zext i32 [[TMP0]] to i64
-// CHECK-NEXT:    [[ARRAYIDX_I:%.*]] = getelementptr inbounds nuw i32, ptr addrspace(1) [[I]], i64 [[RETVAL_0_I]]
-// CHECK-NEXT:    store i32 1, ptr addrspace(1) [[ARRAYIDX_I]], align 4, !tbaa [[TBAA10:![0-9]+]]
-// CHECK-NEXT:    ret void
+// CHECK-SAME: ptr addrspace(1) noundef writeonly align 4 captures(none)
+// [[I:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] !kernel_arg_addr_space
+// [[META6:![0-9]+]] !kernel_arg_access_qual [[META7:![0-9]+]] !kernel_arg_type
+// [[META8:![0-9]+]] !kernel_arg_base_type [[META8]] !kernel_arg_type_qual
+// [[META9:![0-9]+]] { CHECK-NEXT:  [[ENTRY:.*:]] CHECK-NEXT:    [[TMP0:%.*]] =
+// tail call i32 @llvm.amdgcn.workgroup.id.x() CHECK-NEXT:    [[RETVAL_0_I:%.*]]
+// = zext i32 [[TMP0]] to i64 CHECK-NEXT:    [[ARRAYIDX_I:%.*]] = getelementptr
+// inbounds nuw i32, ptr addrspace(1) [[I]], i64 [[RETVAL_0_I]] CHECK-NEXT:
+// store i32 1, ptr addrspace(1) [[ARRAYIDX_I]], align 4, !tbaa
+// [[TBAA10:![0-9]+]] CHECK-NEXT:    ret void
 //
-__kernel void foo(__global int *i) {
-  i[get_group_id(0)] = 1;
-}
+__kernel void foo(__global int *i) { i[get_group_id(0)] = 1; }
 //.
 // CHECK: [[META6]] = !{i32 1}
 // CHECK: [[META7]] = !{!"none"}

``````````

</details>


https://github.com/llvm/llvm-project/pull/87989