[clang] [CIR][AMDGPU] Adds lowering for amdgcn extended image sample/gather4 builtins (PR #201761)

Thu Jun 4 23:25:34 PDT 2026

https://github.com/ranapratap55 created https://github.com/llvm/llvm-project/pull/201761

Support for lowering of` __builtin_amdgcn_image_sample/gather4` for AMDGPU builtins to clangIR.
Followed similar lowering from clang->llvmir: `clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp`.

Upstreaming clangIR PR: [llvm/clangir#2083](https://github.com/llvm/clangir/pull/2083)

>From 70fca464c4c7e20c3202cd2880af523bddc5a7da Mon Sep 17 00:00:00 2001
From: ranapratap55 <RanaPratapReddy.Nimmakayala at amd.com>
Date: Fri, 5 Jun 2026 11:40:44 +0530
Subject: [PATCH] [CIR][AMDGPU] Adds lowering for amdgcn extended image
 sample/gather4 builtins

---
 clang/lib/CIR/CodeGen/CIRGenBuiltinAMDGPU.cpp |  62 +++-
 .../builtins-amdgcn-extended-image.hip        | 350 ++++++++++++++++++
 2 files changed, 400 insertions(+), 12 deletions(-)
 create mode 100644 clang/test/CIR/CodeGenHIP/builtins-amdgcn-extended-image.hip

diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltinAMDGPU.cpp b/clang/lib/CIR/CodeGen/CIRGenBuiltinAMDGPU.cpp
index c22d7d8f8e3b1..4506eeb61a4fb 100644
--- a/clang/lib/CIR/CodeGen/CIRGenBuiltinAMDGPU.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenBuiltinAMDGPU.cpp
@@ -650,69 +650,107 @@ CIRGenFunction::emitAMDGPUBuiltinExpr(unsigned builtinId,
         *this, expr, "amdgcn.image.store.mip.cube", true);
   case AMDGPU::BI__builtin_amdgcn_image_sample_1d_v4f32_f32:
   case AMDGPU::BI__builtin_amdgcn_image_sample_1d_v4f16_f32:
+    return emitAMDGCNImageOverloadedReturnType(*this, expr,
+                                               "amdgcn.image.sample.1d", false);
   case AMDGPU::BI__builtin_amdgcn_image_sample_1darray_v4f32_f32:
   case AMDGPU::BI__builtin_amdgcn_image_sample_1darray_v4f16_f32:
+    return emitAMDGCNImageOverloadedReturnType(
+        *this, expr, "amdgcn.image.sample.1darray", false);
   case AMDGPU::BI__builtin_amdgcn_image_sample_2d_f32_f32:
   case AMDGPU::BI__builtin_amdgcn_image_sample_2d_v4f32_f32:
   case AMDGPU::BI__builtin_amdgcn_image_sample_2d_v4f16_f32:
+    return emitAMDGCNImageOverloadedReturnType(*this, expr,
+                                               "amdgcn.image.sample.2d", false);
   case AMDGPU::BI__builtin_amdgcn_image_sample_2darray_f32_f32:
   case AMDGPU::BI__builtin_amdgcn_image_sample_2darray_v4f32_f32:
   case AMDGPU::BI__builtin_amdgcn_image_sample_2darray_v4f16_f32:
+    return emitAMDGCNImageOverloadedReturnType(
+        *this, expr, "amdgcn.image.sample.2darray", false);
   case AMDGPU::BI__builtin_amdgcn_image_sample_3d_v4f32_f32:
   case AMDGPU::BI__builtin_amdgcn_image_sample_3d_v4f16_f32:
+    return emitAMDGCNImageOverloadedReturnType(*this, expr,
+                                               "amdgcn.image.sample.3d", false);
   case AMDGPU::BI__builtin_amdgcn_image_sample_cube_v4f32_f32:
   case AMDGPU::BI__builtin_amdgcn_image_sample_cube_v4f16_f32:
+    return emitAMDGCNImageOverloadedReturnType(*this, expr,
+                                               "amdgcn.image.sample.cube", false);
   case AMDGPU::BI__builtin_amdgcn_image_sample_lz_1d_v4f32_f32:
   case AMDGPU::BI__builtin_amdgcn_image_sample_lz_1d_v4f16_f32:
+    return emitAMDGCNImageOverloadedReturnType(*this, expr,
+                                               "amdgcn.image.sample.lz.1d", false);
   case AMDGPU::BI__builtin_amdgcn_image_sample_l_1d_v4f32_f32:
   case AMDGPU::BI__builtin_amdgcn_image_sample_l_1d_v4f16_f32:
+    return emitAMDGCNImageOverloadedReturnType(*this, expr,
+                                               "amdgcn.image.sample.l.1d", false);
   case AMDGPU::BI__builtin_amdgcn_image_sample_d_1d_v4f32_f32:
   case AMDGPU::BI__builtin_amdgcn_image_sample_d_1d_v4f16_f32:
+    return emitAMDGCNImageOverloadedReturnType(*this, expr,
+                                               "amdgcn.image.sample.d.1d", false);
   case AMDGPU::BI__builtin_amdgcn_image_sample_lz_2d_v4f32_f32:
   case AMDGPU::BI__builtin_amdgcn_image_sample_lz_2d_v4f16_f32:
   case AMDGPU::BI__builtin_amdgcn_image_sample_lz_2d_f32_f32:
+    return emitAMDGCNImageOverloadedReturnType(*this, expr,
+                                               "amdgcn.image.sample.lz.2d", false);
   case AMDGPU::BI__builtin_amdgcn_image_sample_l_2d_v4f32_f32:
   case AMDGPU::BI__builtin_amdgcn_image_sample_l_2d_v4f16_f32:
   case AMDGPU::BI__builtin_amdgcn_image_sample_l_2d_f32_f32:
+    return emitAMDGCNImageOverloadedReturnType(*this, expr,
+                                               "amdgcn.image.sample.l.2d", false);
   case AMDGPU::BI__builtin_amdgcn_image_sample_d_2d_v4f32_f32:
   case AMDGPU::BI__builtin_amdgcn_image_sample_d_2d_v4f16_f32:
   case AMDGPU::BI__builtin_amdgcn_image_sample_d_2d_f32_f32:
+    return emitAMDGCNImageOverloadedReturnType(*this, expr,
+                                               "amdgcn.image.sample.d.2d", false);
   case AMDGPU::BI__builtin_amdgcn_image_sample_lz_3d_v4f32_f32:
   case AMDGPU::BI__builtin_amdgcn_image_sample_lz_3d_v4f16_f32:
+    return emitAMDGCNImageOverloadedReturnType(*this, expr,
+                                               "amdgcn.image.sample.lz.3d", false);
   case AMDGPU::BI__builtin_amdgcn_image_sample_l_3d_v4f32_f32:
   case AMDGPU::BI__builtin_amdgcn_image_sample_l_3d_v4f16_f32:
+    return emitAMDGCNImageOverloadedReturnType(*this, expr,
+                                               "amdgcn.image.sample.l.3d", false);
   case AMDGPU::BI__builtin_amdgcn_image_sample_d_3d_v4f32_f32:
   case AMDGPU::BI__builtin_amdgcn_image_sample_d_3d_v4f16_f32:
+    return emitAMDGCNImageOverloadedReturnType(*this, expr,
+                                               "amdgcn.image.sample.d.3d", false);
   case AMDGPU::BI__builtin_amdgcn_image_sample_lz_cube_v4f32_f32:
   case AMDGPU::BI__builtin_amdgcn_image_sample_lz_cube_v4f16_f32:
+    return emitAMDGCNImageOverloadedReturnType(*this, expr,
+                                               "amdgcn.image.sample.lz.cube", false);
   case AMDGPU::BI__builtin_amdgcn_image_sample_l_cube_v4f32_f32:
   case AMDGPU::BI__builtin_amdgcn_image_sample_l_cube_v4f16_f32:
+    return emitAMDGCNImageOverloadedReturnType(*this, expr,
+                                               "amdgcn.image.sample.l.cube", false);
   case AMDGPU::BI__builtin_amdgcn_image_sample_lz_1darray_v4f32_f32:
   case AMDGPU::BI__builtin_amdgcn_image_sample_lz_1darray_v4f16_f32:
+    return emitAMDGCNImageOverloadedReturnType(*this, expr,
+                                               "amdgcn.image.sample.lz.1darray", false);
   case AMDGPU::BI__builtin_amdgcn_image_sample_l_1darray_v4f32_f32:
   case AMDGPU::BI__builtin_amdgcn_image_sample_l_1darray_v4f16_f32:
+    return emitAMDGCNImageOverloadedReturnType(*this, expr,
+                                               "amdgcn.image.sample.l.1darray", false);
   case AMDGPU::BI__builtin_amdgcn_image_sample_d_1darray_v4f32_f32:
   case AMDGPU::BI__builtin_amdgcn_image_sample_d_1darray_v4f16_f32:
+    return emitAMDGCNImageOverloadedReturnType(*this, expr,
+                                               "amdgcn.image.sample.d.1darray", false);
   case AMDGPU::BI__builtin_amdgcn_image_sample_lz_2darray_v4f32_f32:
   case AMDGPU::BI__builtin_amdgcn_image_sample_lz_2darray_v4f16_f32:
   case AMDGPU::BI__builtin_amdgcn_image_sample_lz_2darray_f32_f32:
+    return emitAMDGCNImageOverloadedReturnType(*this, expr,
+                                               "amdgcn.image.sample.lz.2darray", false);
   case AMDGPU::BI__builtin_amdgcn_image_sample_l_2darray_v4f32_f32:
   case AMDGPU::BI__builtin_amdgcn_image_sample_l_2darray_v4f16_f32:
   case AMDGPU::BI__builtin_amdgcn_image_sample_l_2darray_f32_f32:
+    return emitAMDGCNImageOverloadedReturnType(*this, expr,
+                                               "amdgcn.image.sample.l.2darray", false);
   case AMDGPU::BI__builtin_amdgcn_image_sample_d_2darray_v4f32_f32:
   case AMDGPU::BI__builtin_amdgcn_image_sample_d_2darray_v4f16_f32:
-  case AMDGPU::BI__builtin_amdgcn_image_sample_d_2darray_f32_f32: {
-    cgm.errorNYI(expr->getSourceRange(),
-                 std::string("unimplemented AMDGPU builtin call: ") +
-                     getContext().BuiltinInfo.getName(builtinId));
-    return mlir::Value{};
-  }
-  case AMDGPU::BI__builtin_amdgcn_image_gather4_lz_2d_v4f32_f32: {
-    cgm.errorNYI(expr->getSourceRange(),
-                 std::string("unimplemented AMDGPU builtin call: ") +
-                     getContext().BuiltinInfo.getName(builtinId));
-    return mlir::Value{};
-  }
+  case AMDGPU::BI__builtin_amdgcn_image_sample_d_2darray_f32_f32:
+    return emitAMDGCNImageOverloadedReturnType(*this, expr,
+                                               "amdgcn.image.sample.d.2darray", false);
+  case AMDGPU::BI__builtin_amdgcn_image_gather4_lz_2d_v4f32_f32:
+    return emitAMDGCNImageOverloadedReturnType(*this, expr,
+                                               "amdgcn.image.gather4.lz.2d", false);
   case AMDGPU::BI__builtin_amdgcn_mfma_scale_f32_16x16x128_f8f6f4:
   case AMDGPU::BI__builtin_amdgcn_mfma_scale_f32_32x32x64_f8f6f4: {
     cgm.errorNYI(expr->getSourceRange(),
diff --git a/clang/test/CIR/CodeGenHIP/builtins-amdgcn-extended-image.hip b/clang/test/CIR/CodeGenHIP/builtins-amdgcn-extended-image.hip
new file mode 100644
index 0000000000000..c69925399d900
--- /dev/null
+++ b/clang/test/CIR/CodeGenHIP/builtins-amdgcn-extended-image.hip
@@ -0,0 +1,350 @@
+#include "../CodeGenCUDA/Inputs/cuda.h"
+
+// REQUIRES: amdgpu-registered-target
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -x hip -std=c++11 -fclangir \
+// RUN:            -target-cpu gfx1100 -target-feature +extended-image-insts \
+// RUN:            -fcuda-is-device -emit-cir %s -o %t.cir
+// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
+
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -x hip -std=c++11 -fclangir \
+// RUN:            -target-cpu gfx1100 -target-feature +extended-image-insts \
+// RUN:            -fcuda-is-device -emit-llvm %s -o %t-cir.ll
+// RUN: FileCheck --check-prefix=LLVM --input-file=%t-cir.ll %s
+
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -x hip -std=c++11 \
+// RUN:            -target-cpu gfx1100 -target-feature +extended-image-insts \
+// RUN:            -fcuda-is-device -emit-llvm %s -o %t.ll
+// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s
+
+typedef float float4 __attribute__((ext_vector_type(4)));
+typedef int int4 __attribute__((ext_vector_type(4)));
+typedef _Float16 half;
+typedef half half4 __attribute__((ext_vector_type(4)));
+
+// CIR-LABEL: @_Z24test_gather4_lz_2d_v4f32
+// CIR: cir.call_llvm_intrinsic "amdgcn.image.gather4.lz.2d" {{.*}} : (!s32i, !cir.float, !cir.float, !cir.vector<8 x !s32i>, !cir.vector<4 x !s32i>, !cir.bool, !s32i, !s32i) -> !cir.vector<4 x !cir.float>
+// LLVM: define{{.*}} <4 x float> @_Z24test_gather4_lz_2d_v4f32ffu18__amdgpu_texture_tDv4_i(
+// LLVM: call {{.*}}<4 x float> @llvm.amdgcn.image.gather4.lz.2d.v4f32.f32.v8i32.v4i32(i32 {{.*}}, float {{.*}}, float {{.*}}, <8 x i32> {{.*}}, <4 x i32> {{.*}}, i1 {{.*}}, i32 {{.*}}, i32 {{.*}})
+__device__ float4 test_gather4_lz_2d_v4f32(float s, float t, __amdgpu_texture_t tex, int4 samp) {
+  return __builtin_amdgcn_image_gather4_lz_2d_v4f32_f32(1, s, t, tex, samp, 0, 120, 110);
+}
+
+// CIR-LABEL: @_Z23test_sample_lz_1d_v4f32
+// CIR: cir.call_llvm_intrinsic "amdgcn.image.sample.lz.1d" {{.*}} : (!s32i, !cir.float, !cir.vector<8 x !s32i>, !cir.vector<4 x !s32i>, !cir.bool, !s32i, !s32i) -> !cir.vector<4 x !cir.float>
+// LLVM: define{{.*}} <4 x float> @_Z23test_sample_lz_1d_v4f32fu18__amdgpu_texture_tDv4_i(
+// LLVM: call {{.*}}<4 x float> @llvm.amdgcn.image.sample.lz.1d.v4f32.f32.v8i32.v4i32(i32 {{.*}}, float {{.*}}, <8 x i32> {{.*}}, <4 x i32> {{.*}}, i1 {{.*}}, i32 {{.*}}, i32 {{.*}})
+__device__ float4 test_sample_lz_1d_v4f32(float s, __amdgpu_texture_t tex, int4 samp) {
+  return __builtin_amdgcn_image_sample_lz_1d_v4f32_f32(15, s, tex, samp, 0, 120, 110);
+}
+
+// CIR-LABEL: @_Z22test_sample_l_1d_v4f32
+// CIR: cir.call_llvm_intrinsic "amdgcn.image.sample.l.1d" {{.*}} : (!s32i, !cir.float, !cir.float, !cir.vector<8 x !s32i>, !cir.vector<4 x !s32i>, !cir.bool, !s32i, !s32i) -> !cir.vector<4 x !cir.float>
+// LLVM: define{{.*}} <4 x float> @_Z22test_sample_l_1d_v4f32ffu18__amdgpu_texture_tDv4_i(
+// LLVM: call {{.*}}<4 x float> @llvm.amdgcn.image.sample.l.1d.v4f32.f32.v8i32.v4i32(i32 {{.*}}, float {{.*}}, float {{.*}}, <8 x i32> {{.*}}, <4 x i32> {{.*}}, i1 {{.*}}, i32 {{.*}}, i32 {{.*}})
+__device__ float4 test_sample_l_1d_v4f32(float s, float lod, __amdgpu_texture_t tex, int4 samp) {
+  return __builtin_amdgcn_image_sample_l_1d_v4f32_f32(15, s, lod, tex, samp, 0, 120, 110);
+}
+
+// CIR-LABEL: @_Z22test_sample_d_1d_v4f32
+// CIR: cir.call_llvm_intrinsic "amdgcn.image.sample.d.1d" {{.*}} : (!s32i, !cir.float, !cir.float, !cir.float, !cir.vector<8 x !s32i>, !cir.vector<4 x !s32i>, !cir.bool, !s32i, !s32i) -> !cir.vector<4 x !cir.float>
+// LLVM: define{{.*}} <4 x float> @_Z22test_sample_d_1d_v4f32fffu18__amdgpu_texture_tDv4_i(
+// LLVM: call {{.*}}<4 x float> @llvm.amdgcn.image.sample.d.1d.v4f32.f32.f32.v8i32.v4i32(i32 {{.*}}, float {{.*}}, float {{.*}}, float {{.*}}, <8 x i32> {{.*}}, <4 x i32> {{.*}}, i1 {{.*}}, i32 {{.*}}, i32 {{.*}})
+__device__ float4 test_sample_d_1d_v4f32(float dsdx, float dsdy, float s, __amdgpu_texture_t tex, int4 samp) {
+  return __builtin_amdgcn_image_sample_d_1d_v4f32_f32(15, dsdx, dsdy, s, tex, samp, 0, 120, 110);
+}
+
+// CIR-LABEL: @_Z23test_sample_lz_2d_v4f32
+// CIR: cir.call_llvm_intrinsic "amdgcn.image.sample.lz.2d" {{.*}} : (!s32i, !cir.float, !cir.float, !cir.vector<8 x !s32i>, !cir.vector<4 x !s32i>, !cir.bool, !s32i, !s32i) -> !cir.vector<4 x !cir.float>
+// LLVM: define{{.*}} <4 x float> @_Z23test_sample_lz_2d_v4f32ffu18__amdgpu_texture_tDv4_i(
+// LLVM: call {{.*}}<4 x float> @llvm.amdgcn.image.sample.lz.2d.v4f32.f32.v8i32.v4i32(i32 {{.*}}, float {{.*}}, float {{.*}}, <8 x i32> {{.*}}, <4 x i32> {{.*}}, i1 {{.*}}, i32 {{.*}}, i32 {{.*}})
+__device__ float4 test_sample_lz_2d_v4f32(float s, float t, __amdgpu_texture_t tex, int4 samp) {
+  return __builtin_amdgcn_image_sample_lz_2d_v4f32_f32(15, s, t, tex, samp, 0, 120, 110);
+}
+
+// CIR-LABEL: @_Z22test_sample_l_2d_v4f32
+// CIR: cir.call_llvm_intrinsic "amdgcn.image.sample.l.2d" {{.*}} : (!s32i, !cir.float, !cir.float, !cir.float, !cir.vector<8 x !s32i>, !cir.vector<4 x !s32i>, !cir.bool, !s32i, !s32i) -> !cir.vector<4 x !cir.float>
+// LLVM: define{{.*}} <4 x float> @_Z22test_sample_l_2d_v4f32fffu18__amdgpu_texture_tDv4_i(
+// LLVM: call {{.*}}<4 x float> @llvm.amdgcn.image.sample.l.2d.v4f32.f32.v8i32.v4i32(i32 {{.*}}, float {{.*}}, float {{.*}}, float {{.*}}, <8 x i32> {{.*}}, <4 x i32> {{.*}}, i1 {{.*}}, i32 {{.*}}, i32 {{.*}})
+__device__ float4 test_sample_l_2d_v4f32(float s, float t, float lod, __amdgpu_texture_t tex, int4 samp) {
+  return __builtin_amdgcn_image_sample_l_2d_v4f32_f32(10, s, t, lod, tex, samp, 0, 120, 110);
+}
+
+// CIR-LABEL: @_Z22test_sample_d_2d_v4f32
+// CIR: cir.call_llvm_intrinsic "amdgcn.image.sample.d.2d" {{.*}} : (!s32i, !cir.float, !cir.float, !cir.float, !cir.float, !cir.float, !cir.float, !cir.vector<8 x !s32i>, !cir.vector<4 x !s32i>, !cir.bool, !s32i, !s32i) -> !cir.vector<4 x !cir.float>
+// LLVM: define{{.*}} <4 x float> @_Z22test_sample_d_2d_v4f32ffffffu18__amdgpu_texture_tDv4_i(
+// LLVM: call {{.*}}<4 x float> @llvm.amdgcn.image.sample.d.2d.v4f32.f32.f32.v8i32.v4i32(i32 {{.*}}, float {{.*}}, float {{.*}}, float {{.*}}, float {{.*}}, float {{.*}}, float {{.*}}, <8 x i32> {{.*}}, <4 x i32> {{.*}}, i1 {{.*}}, i32 {{.*}}, i32 {{.*}})
+__device__ float4 test_sample_d_2d_v4f32(float dsdx, float dtdx, float dsdy, float dtdy, float s, float t, __amdgpu_texture_t tex, int4 samp) {
+  return __builtin_amdgcn_image_sample_d_2d_v4f32_f32(15, dsdx, dtdx, dsdy, dtdy, s, t, tex, samp, 0, 120, 110);
+}
+
+// CIR-LABEL: @_Z23test_sample_lz_3d_v4f32
+// CIR: cir.call_llvm_intrinsic "amdgcn.image.sample.lz.3d" {{.*}} : (!s32i, !cir.float, !cir.float, !cir.float, !cir.vector<8 x !s32i>, !cir.vector<4 x !s32i>, !cir.bool, !s32i, !s32i) -> !cir.vector<4 x !cir.float>
+// LLVM: define{{.*}} <4 x float> @_Z23test_sample_lz_3d_v4f32fffu18__amdgpu_texture_tDv4_i(
+// LLVM: call {{.*}}<4 x float> @llvm.amdgcn.image.sample.lz.3d.v4f32.f32.v8i32.v4i32(i32 {{.*}}, float {{.*}}, float {{.*}}, float {{.*}}, <8 x i32> {{.*}}, <4 x i32> {{.*}}, i1 {{.*}}, i32 {{.*}}, i32 {{.*}})
+__device__ float4 test_sample_lz_3d_v4f32(float s, float t, float r, __amdgpu_texture_t tex, int4 samp) {
+  return __builtin_amdgcn_image_sample_lz_3d_v4f32_f32(15, s, t, r, tex, samp, 0, 120, 110);
+}
+
+// CIR-LABEL: @_Z22test_sample_l_3d_v4f32
+// CIR: cir.call_llvm_intrinsic "amdgcn.image.sample.l.3d" {{.*}} : (!s32i, !cir.float, !cir.float, !cir.float, !cir.float, !cir.vector<8 x !s32i>, !cir.vector<4 x !s32i>, !cir.bool, !s32i, !s32i) -> !cir.vector<4 x !cir.float>
+// LLVM: define{{.*}} <4 x float> @_Z22test_sample_l_3d_v4f32ffffu18__amdgpu_texture_tDv4_i(
+// LLVM: call {{.*}}<4 x float> @llvm.amdgcn.image.sample.l.3d.v4f32.f32.v8i32.v4i32(i32 {{.*}}, float {{.*}}, float {{.*}}, float {{.*}}, float {{.*}}, <8 x i32> {{.*}}, <4 x i32> {{.*}}, i1 {{.*}}, i32 {{.*}}, i32 {{.*}})
+__device__ float4 test_sample_l_3d_v4f32(float s, float t, float r, float lod, __amdgpu_texture_t tex, int4 samp) {
+  return __builtin_amdgcn_image_sample_l_3d_v4f32_f32(1, s, t, r, lod, tex, samp, 0, 120, 110);
+}
+
+// CIR-LABEL: @_Z22test_sample_d_3d_v4f32
+// CIR: cir.call_llvm_intrinsic "amdgcn.image.sample.d.3d" {{.*}} : (!s32i, !cir.float, !cir.float, !cir.float, !cir.float, !cir.float, !cir.float, !cir.float, !cir.float, !cir.float, !cir.vector<8 x !s32i>, !cir.vector<4 x !s32i>, !cir.bool, !s32i, !s32i) -> !cir.vector<4 x !cir.float>
+// LLVM: define{{.*}} <4 x float> @_Z22test_sample_d_3d_v4f32fffffffffu18__amdgpu_texture_tDv4_i(
+// LLVM: call {{.*}}<4 x float> @llvm.amdgcn.image.sample.d.3d.v4f32.f32.f32.v8i32.v4i32(i32 {{.*}}, float {{.*}}, float {{.*}}, float {{.*}}, float {{.*}}, float {{.*}}, float {{.*}}, float {{.*}}, float {{.*}}, float {{.*}}, <8 x i32> {{.*}}, <4 x i32> {{.*}}, i1 {{.*}}, i32 {{.*}}, i32 {{.*}})
+__device__ float4 test_sample_d_3d_v4f32(float dsdx, float dtdx, float drdx, float dsdy, float dtdy, float drdy, float s, float t, float r, __amdgpu_texture_t tex, int4 samp) {
+  return __builtin_amdgcn_image_sample_d_3d_v4f32_f32(1, dsdx, dtdx, drdx, dsdy, dtdy, drdy, s, t, r, tex, samp, 0, 120, 110);
+}
+
+// CIR-LABEL: @_Z25test_sample_lz_cube_v4f32
+// CIR: cir.call_llvm_intrinsic "amdgcn.image.sample.lz.cube" {{.*}} : (!s32i, !cir.float, !cir.float, !cir.float, !cir.vector<8 x !s32i>, !cir.vector<4 x !s32i>, !cir.bool, !s32i, !s32i) -> !cir.vector<4 x !cir.float>
+// LLVM: define{{.*}} <4 x float> @_Z25test_sample_lz_cube_v4f32fffu18__amdgpu_texture_tDv4_i(
+// LLVM: call {{.*}}<4 x float> @llvm.amdgcn.image.sample.lz.cube.v4f32.f32.v8i32.v4i32(i32 {{.*}}, float {{.*}}, float {{.*}}, float {{.*}}, <8 x i32> {{.*}}, <4 x i32> {{.*}}, i1 {{.*}}, i32 {{.*}}, i32 {{.*}})
+__device__ float4 test_sample_lz_cube_v4f32(float s, float t, float face, __amdgpu_texture_t tex, int4 samp) {
+  return __builtin_amdgcn_image_sample_lz_cube_v4f32_f32(1, s, t, face, tex, samp, 0, 120, 110);
+}
+
+// CIR-LABEL: @_Z24test_sample_l_cube_v4f32
+// CIR: cir.call_llvm_intrinsic "amdgcn.image.sample.l.cube" {{.*}} : (!s32i, !cir.float, !cir.float, !cir.float, !cir.float, !cir.vector<8 x !s32i>, !cir.vector<4 x !s32i>, !cir.bool, !s32i, !s32i) -> !cir.vector<4 x !cir.float>
+// LLVM: define{{.*}} <4 x float> @_Z24test_sample_l_cube_v4f32ffffu18__amdgpu_texture_tDv4_i(
+// LLVM: call {{.*}}<4 x float> @llvm.amdgcn.image.sample.l.cube.v4f32.f32.v8i32.v4i32(i32 {{.*}}, float {{.*}}, float {{.*}}, float {{.*}}, float {{.*}}, <8 x i32> {{.*}}, <4 x i32> {{.*}}, i1 {{.*}}, i32 {{.*}}, i32 {{.*}})
+__device__ float4 test_sample_l_cube_v4f32(float s, float t, float face, float lod, __amdgpu_texture_t tex, int4 samp) {
+  return __builtin_amdgcn_image_sample_l_cube_v4f32_f32(1, s, t, face, lod, tex, samp, 0, 120, 110);
+}
+
+// CIR-LABEL: @_Z28test_sample_lz_1darray_v4f32
+// CIR: cir.call_llvm_intrinsic "amdgcn.image.sample.lz.1darray" {{.*}} : (!s32i, !cir.float, !cir.float, !cir.vector<8 x !s32i>, !cir.vector<4 x !s32i>, !cir.bool, !s32i, !s32i) -> !cir.vector<4 x !cir.float>
+// LLVM: define{{.*}} <4 x float> @_Z28test_sample_lz_1darray_v4f32ffu18__amdgpu_texture_tDv4_i(
+// LLVM: call {{.*}}<4 x float> @llvm.amdgcn.image.sample.lz.1darray.v4f32.f32.v8i32.v4i32(i32 {{.*}}, float {{.*}}, float {{.*}}, <8 x i32> {{.*}}, <4 x i32> {{.*}}, i1 {{.*}}, i32 {{.*}}, i32 {{.*}})
+__device__ float4 test_sample_lz_1darray_v4f32(float s, float slice, __amdgpu_texture_t tex, int4 samp) {
+  return __builtin_amdgcn_image_sample_lz_1darray_v4f32_f32(1, s, slice, tex, samp, 0, 120, 110);
+}
+
+// CIR-LABEL: @_Z27test_sample_l_1darray_v4f32
+// CIR: cir.call_llvm_intrinsic "amdgcn.image.sample.l.1darray" {{.*}} : (!s32i, !cir.float, !cir.float, !cir.float, !cir.vector<8 x !s32i>, !cir.vector<4 x !s32i>, !cir.bool, !s32i, !s32i) -> !cir.vector<4 x !cir.float>
+// LLVM: define{{.*}} <4 x float> @_Z27test_sample_l_1darray_v4f32fffu18__amdgpu_texture_tDv4_i(
+// LLVM: call {{.*}}<4 x float> @llvm.amdgcn.image.sample.l.1darray.v4f32.f32.v8i32.v4i32(i32 {{.*}}, float {{.*}}, float {{.*}}, float {{.*}}, <8 x i32> {{.*}}, <4 x i32> {{.*}}, i1 {{.*}}, i32 {{.*}}, i32 {{.*}})
+__device__ float4 test_sample_l_1darray_v4f32(float s, float slice, float lod, __amdgpu_texture_t tex, int4 samp) {
+  return __builtin_amdgcn_image_sample_l_1darray_v4f32_f32(1, s, slice, lod, tex, samp, 0, 120, 110);
+}
+
+// CIR-LABEL: @_Z27test_sample_d_1darray_v4f32
+// CIR: cir.call_llvm_intrinsic "amdgcn.image.sample.d.1darray" {{.*}} : (!s32i, !cir.float, !cir.float, !cir.float, !cir.float, !cir.vector<8 x !s32i>, !cir.vector<4 x !s32i>, !cir.bool, !s32i, !s32i) -> !cir.vector<4 x !cir.float>
+// LLVM: define{{.*}} <4 x float> @_Z27test_sample_d_1darray_v4f32ffffu18__amdgpu_texture_tDv4_i(
+// LLVM: call {{.*}}<4 x float> @llvm.amdgcn.image.sample.d.1darray.v4f32.f32.f32.v8i32.v4i32(i32 {{.*}}, float {{.*}}, float {{.*}}, float {{.*}}, float {{.*}}, <8 x i32> {{.*}}, <4 x i32> {{.*}}, i1 {{.*}}, i32 {{.*}}, i32 {{.*}})
+__device__ float4 test_sample_d_1darray_v4f32(float dsdx, float dsdy, float s, float slice, __amdgpu_texture_t tex, int4 samp) {
+  return __builtin_amdgcn_image_sample_d_1darray_v4f32_f32(1, dsdx, dsdy, s, slice, tex, samp, 0, 120, 110);
+}
+
+// CIR-LABEL: @_Z28test_sample_lz_2darray_v4f32
+// CIR: cir.call_llvm_intrinsic "amdgcn.image.sample.lz.2darray" {{.*}} : (!s32i, !cir.float, !cir.float, !cir.float, !cir.vector<8 x !s32i>, !cir.vector<4 x !s32i>, !cir.bool, !s32i, !s32i) -> !cir.vector<4 x !cir.float>
+// LLVM: define{{.*}} <4 x float> @_Z28test_sample_lz_2darray_v4f32fffu18__amdgpu_texture_tDv4_i(
+// LLVM: call {{.*}}<4 x float> @llvm.amdgcn.image.sample.lz.2darray.v4f32.f32.v8i32.v4i32(i32 {{.*}}, float {{.*}}, float {{.*}}, float {{.*}}, <8 x i32> {{.*}}, <4 x i32> {{.*}}, i1 {{.*}}, i32 {{.*}}, i32 {{.*}})
+__device__ float4 test_sample_lz_2darray_v4f32(float s, float t, float slice, __amdgpu_texture_t tex, int4 samp) {
+  return __builtin_amdgcn_image_sample_lz_2darray_v4f32_f32(1, s, t, slice, tex, samp, 0, 120, 110);
+}
+
+// CIR-LABEL: @_Z27test_sample_l_2darray_v4f32
+// CIR: cir.call_llvm_intrinsic "amdgcn.image.sample.l.2darray" {{.*}} : (!s32i, !cir.float, !cir.float, !cir.float, !cir.float, !cir.vector<8 x !s32i>, !cir.vector<4 x !s32i>, !cir.bool, !s32i, !s32i) -> !cir.vector<4 x !cir.float>
+// LLVM: define{{.*}} <4 x float> @_Z27test_sample_l_2darray_v4f32ffffu18__amdgpu_texture_tDv4_i(
+// LLVM: call {{.*}}<4 x float> @llvm.amdgcn.image.sample.l.2darray.v4f32.f32.v8i32.v4i32(i32 {{.*}}, float {{.*}}, float {{.*}}, float {{.*}}, float {{.*}}, <8 x i32> {{.*}}, <4 x i32> {{.*}}, i1 {{.*}}, i32 {{.*}}, i32 {{.*}})
+__device__ float4 test_sample_l_2darray_v4f32(float s, float t, float slice, float lod, __amdgpu_texture_t tex, int4 samp) {
+  return __builtin_amdgcn_image_sample_l_2darray_v4f32_f32(1, s, t, slice, lod, tex, samp, 0, 120, 110);
+}
+
+// CIR-LABEL: @_Z27test_sample_d_2darray_v4f32
+// CIR: cir.call_llvm_intrinsic "amdgcn.image.sample.d.2darray" {{.*}} : (!s32i, !cir.float, !cir.float, !cir.float, !cir.float, !cir.float, !cir.float, !cir.float, !cir.vector<8 x !s32i>, !cir.vector<4 x !s32i>, !cir.bool, !s32i, !s32i) -> !cir.vector<4 x !cir.float>
+// LLVM: define{{.*}} <4 x float> @_Z27test_sample_d_2darray_v4f32fffffffu18__amdgpu_texture_tDv4_i(
+// LLVM: call {{.*}}<4 x float> @llvm.amdgcn.image.sample.d.2darray.v4f32.f32.f32.v8i32.v4i32(i32 {{.*}}, float {{.*}}, float {{.*}}, float {{.*}}, float {{.*}}, float {{.*}}, float {{.*}}, float {{.*}}, <8 x i32> {{.*}}, <4 x i32> {{.*}}, i1 {{.*}}, i32 {{.*}}, i32 {{.*}})
+__device__ float4 test_sample_d_2darray_v4f32(float dsdx, float dtdx, float dsdy, float dtdy, float s, float t, float slice, __amdgpu_texture_t tex, int4 samp) {
+  return __builtin_amdgcn_image_sample_d_2darray_v4f32_f32(1, dsdx, dtdx, dsdy, dtdy, s, t, slice, tex, samp, 0, 120, 110);
+}
+
+// CIR-LABEL: @_Z23test_sample_lz_1d_v4f16
+// CIR: cir.call_llvm_intrinsic "amdgcn.image.sample.lz.1d" {{.*}} : (!s32i, !cir.float, !cir.vector<8 x !s32i>, !cir.vector<4 x !s32i>, !cir.bool, !s32i, !s32i) -> !cir.vector<4 x !cir.f16>
+// LLVM: define{{.*}} <4 x half> @_Z23test_sample_lz_1d_v4f16fu18__amdgpu_texture_tDv4_i(
+// LLVM: call {{.*}}<4 x half> @llvm.amdgcn.image.sample.lz.1d.v4f16.f32.v8i32.v4i32(i32 {{.*}}, float {{.*}}, <8 x i32> {{.*}}, <4 x i32> {{.*}}, i1 {{.*}}, i32 {{.*}}, i32 {{.*}})
+__device__ half4 test_sample_lz_1d_v4f16(float s, __amdgpu_texture_t tex, int4 samp) {
+  return __builtin_amdgcn_image_sample_lz_1d_v4f16_f32(15, s, tex, samp, 0, 120, 110);
+}
+
+// CIR-LABEL: @_Z22test_sample_l_1d_v4f16
+// CIR: cir.call_llvm_intrinsic "amdgcn.image.sample.l.1d" {{.*}} : (!s32i, !cir.float, !cir.float, !cir.vector<8 x !s32i>, !cir.vector<4 x !s32i>, !cir.bool, !s32i, !s32i) -> !cir.vector<4 x !cir.f16>
+// LLVM: define{{.*}} <4 x half> @_Z22test_sample_l_1d_v4f16ffu18__amdgpu_texture_tDv4_i(
+// LLVM: call {{.*}}<4 x half> @llvm.amdgcn.image.sample.l.1d.v4f16.f32.v8i32.v4i32(i32 {{.*}}, float {{.*}}, float {{.*}}, <8 x i32> {{.*}}, <4 x i32> {{.*}}, i1 {{.*}}, i32 {{.*}}, i32 {{.*}})
+__device__ half4 test_sample_l_1d_v4f16(float s, float lod, __amdgpu_texture_t tex, int4 samp) {
+  return __builtin_amdgcn_image_sample_l_1d_v4f16_f32(15, s, lod, tex, samp, 0, 120, 110);
+}
+
+// CIR-LABEL: @_Z22test_sample_d_1d_v4f16
+// CIR: cir.call_llvm_intrinsic "amdgcn.image.sample.d.1d" {{.*}} : (!s32i, !cir.float, !cir.float, !cir.float, !cir.vector<8 x !s32i>, !cir.vector<4 x !s32i>, !cir.bool, !s32i, !s32i) -> !cir.vector<4 x !cir.f16>
+// LLVM: define{{.*}} <4 x half> @_Z22test_sample_d_1d_v4f16fffu18__amdgpu_texture_tDv4_i(
+// LLVM: call {{.*}}<4 x half> @llvm.amdgcn.image.sample.d.1d.v4f16.f32.f32.v8i32.v4i32(i32 {{.*}}, float {{.*}}, float {{.*}}, float {{.*}}, <8 x i32> {{.*}}, <4 x i32> {{.*}}, i1 {{.*}}, i32 {{.*}}, i32 {{.*}})
+__device__ half4 test_sample_d_1d_v4f16(float dsdx, float dsdy, float s, __amdgpu_texture_t tex, int4 samp) {
+  return __builtin_amdgcn_image_sample_d_1d_v4f16_f32(15, dsdx, dsdy, s, tex, samp, 0, 120, 110);
+}
+
+// CIR-LABEL: @_Z23test_sample_lz_2d_v4f16
+// CIR: cir.call_llvm_intrinsic "amdgcn.image.sample.lz.2d" {{.*}} : (!s32i, !cir.float, !cir.float, !cir.vector<8 x !s32i>, !cir.vector<4 x !s32i>, !cir.bool, !s32i, !s32i) -> !cir.vector<4 x !cir.f16>
+// LLVM: define{{.*}} <4 x half> @_Z23test_sample_lz_2d_v4f16ffu18__amdgpu_texture_tDv4_i(
+// LLVM: call {{.*}}<4 x half> @llvm.amdgcn.image.sample.lz.2d.v4f16.f32.v8i32.v4i32(i32 {{.*}}, float {{.*}}, float {{.*}}, <8 x i32> {{.*}}, <4 x i32> {{.*}}, i1 {{.*}}, i32 {{.*}}, i32 {{.*}})
+__device__ half4 test_sample_lz_2d_v4f16(float s, float t, __amdgpu_texture_t tex, int4 samp) {
+  return __builtin_amdgcn_image_sample_lz_2d_v4f16_f32(15, s, t, tex, samp, 0, 120, 110);
+}
+
+// CIR-LABEL: @_Z22test_sample_l_2d_v4f16
+// CIR: cir.call_llvm_intrinsic "amdgcn.image.sample.l.2d" {{.*}} : (!s32i, !cir.float, !cir.float, !cir.float, !cir.vector<8 x !s32i>, !cir.vector<4 x !s32i>, !cir.bool, !s32i, !s32i) -> !cir.vector<4 x !cir.f16>
+// LLVM: define{{.*}} <4 x half> @_Z22test_sample_l_2d_v4f16fffu18__amdgpu_texture_tDv4_i(
+// LLVM: call {{.*}}<4 x half> @llvm.amdgcn.image.sample.l.2d.v4f16.f32.v8i32.v4i32(i32 {{.*}}, float {{.*}}, float {{.*}}, float {{.*}}, <8 x i32> {{.*}}, <4 x i32> {{.*}}, i1 {{.*}}, i32 {{.*}}, i32 {{.*}})
+__device__ half4 test_sample_l_2d_v4f16(float s, float t, float lod, __amdgpu_texture_t tex, int4 samp) {
+  return __builtin_amdgcn_image_sample_l_2d_v4f16_f32(15, s, t, lod, tex, samp, 0, 120, 110);
+}
+
+// CIR-LABEL: @_Z22test_sample_d_2d_v4f16
+// CIR: cir.call_llvm_intrinsic "amdgcn.image.sample.d.2d" {{.*}} : (!s32i, !cir.float, !cir.float, !cir.float, !cir.float, !cir.float, !cir.float, !cir.vector<8 x !s32i>, !cir.vector<4 x !s32i>, !cir.bool, !s32i, !s32i) -> !cir.vector<4 x !cir.f16>
+// LLVM: define{{.*}} <4 x half> @_Z22test_sample_d_2d_v4f16ffffffu18__amdgpu_texture_tDv4_i(
+// LLVM: call {{.*}}<4 x half> @llvm.amdgcn.image.sample.d.2d.v4f16.f32.f32.v8i32.v4i32(i32 {{.*}}, float {{.*}}, float {{.*}}, float {{.*}}, float {{.*}}, float {{.*}}, float {{.*}}, <8 x i32> {{.*}}, <4 x i32> {{.*}}, i1 {{.*}}, i32 {{.*}}, i32 {{.*}})
+__device__ half4 test_sample_d_2d_v4f16(float dsdx, float dtdx, float dsdy, float dtdy, float s, float t, __amdgpu_texture_t tex, int4 samp) {
+  return __builtin_amdgcn_image_sample_d_2d_v4f16_f32(15, dsdx, dtdx, dsdy, dtdy, s, t, tex, samp, 0, 120, 110);
+}
+
+// CIR-LABEL: @_Z23test_sample_lz_3d_v4f16
+// CIR: cir.call_llvm_intrinsic "amdgcn.image.sample.lz.3d" {{.*}} : (!s32i, !cir.float, !cir.float, !cir.float, !cir.vector<8 x !s32i>, !cir.vector<4 x !s32i>, !cir.bool, !s32i, !s32i) -> !cir.vector<4 x !cir.f16>
+// LLVM: define{{.*}} <4 x half> @_Z23test_sample_lz_3d_v4f16fffu18__amdgpu_texture_tDv4_i(
+// LLVM: call {{.*}}<4 x half> @llvm.amdgcn.image.sample.lz.3d.v4f16.f32.v8i32.v4i32(i32 {{.*}}, float {{.*}}, float {{.*}}, float {{.*}}, <8 x i32> {{.*}}, <4 x i32> {{.*}}, i1 {{.*}}, i32 {{.*}}, i32 {{.*}})
+__device__ half4 test_sample_lz_3d_v4f16(float s, float t, float r, __amdgpu_texture_t tex, int4 samp) {
+  return __builtin_amdgcn_image_sample_lz_3d_v4f16_f32(15, s, t, r, tex, samp, 0, 120, 110);
+}
+
+// CIR-LABEL: @_Z22test_sample_l_3d_v4f16
+// CIR: cir.call_llvm_intrinsic "amdgcn.image.sample.l.3d" {{.*}} : (!s32i, !cir.float, !cir.float, !cir.float, !cir.float, !cir.vector<8 x !s32i>, !cir.vector<4 x !s32i>, !cir.bool, !s32i, !s32i) -> !cir.vector<4 x !cir.f16>
+// LLVM: define{{.*}} <4 x half> @_Z22test_sample_l_3d_v4f16ffffu18__amdgpu_texture_tDv4_i(
+// LLVM: call {{.*}}<4 x half> @llvm.amdgcn.image.sample.l.3d.v4f16.f32.v8i32.v4i32(i32 {{.*}}, float {{.*}}, float {{.*}}, float {{.*}}, float {{.*}}, <8 x i32> {{.*}}, <4 x i32> {{.*}}, i1 {{.*}}, i32 {{.*}}, i32 {{.*}})
+__device__ half4 test_sample_l_3d_v4f16(float s, float t, float r, float lod, __amdgpu_texture_t tex, int4 samp) {
+  return __builtin_amdgcn_image_sample_l_3d_v4f16_f32(15, s, t, r, lod, tex, samp, 0, 120, 110);
+}
+
+// CIR-LABEL: @_Z22test_sample_d_3d_v4f16
+// CIR: cir.call_llvm_intrinsic "amdgcn.image.sample.d.3d" {{.*}} : (!s32i, !cir.float, !cir.float, !cir.float, !cir.float, !cir.float, !cir.float, !cir.float, !cir.float, !cir.float, !cir.vector<8 x !s32i>, !cir.vector<4 x !s32i>, !cir.bool, !s32i, !s32i) -> !cir.vector<4 x !cir.f16>
+// LLVM: define{{.*}} <4 x half> @_Z22test_sample_d_3d_v4f16fffffffffu18__amdgpu_texture_tDv4_i(
+// LLVM: call {{.*}}<4 x half> @llvm.amdgcn.image.sample.d.3d.v4f16.f32.f32.v8i32.v4i32(i32 {{.*}}, float {{.*}}, float {{.*}}, float {{.*}}, float {{.*}}, float {{.*}}, float {{.*}}, float {{.*}}, float {{.*}}, float {{.*}}, <8 x i32> {{.*}}, <4 x i32> {{.*}}, i1 {{.*}}, i32 {{.*}}, i32 {{.*}})
+__device__ half4 test_sample_d_3d_v4f16(float dsdx, float dtdx, float drdx, float dsdy, float dtdy, float drdy, float s, float t, float r, __amdgpu_texture_t tex, int4 samp) {
+  return __builtin_amdgcn_image_sample_d_3d_v4f16_f32(15, dsdx, dtdx, drdx, dsdy, dtdy, drdy, s, t, r, tex, samp, 0, 120, 110);
+}
+
+// CIR-LABEL: @_Z25test_sample_lz_cube_v4f16
+// CIR: cir.call_llvm_intrinsic "amdgcn.image.sample.lz.cube" {{.*}} : (!s32i, !cir.float, !cir.float, !cir.float, !cir.vector<8 x !s32i>, !cir.vector<4 x !s32i>, !cir.bool, !s32i, !s32i) -> !cir.vector<4 x !cir.f16>
+// LLVM: define{{.*}} <4 x half> @_Z25test_sample_lz_cube_v4f16fffu18__amdgpu_texture_tDv4_i(
+// LLVM: call {{.*}}<4 x half> @llvm.amdgcn.image.sample.lz.cube.v4f16.f32.v8i32.v4i32(i32 {{.*}}, float {{.*}}, float {{.*}}, float {{.*}}, <8 x i32> {{.*}}, <4 x i32> {{.*}}, i1 {{.*}}, i32 {{.*}}, i32 {{.*}})
+__device__ half4 test_sample_lz_cube_v4f16(float s, float t, float face, __amdgpu_texture_t tex, int4 samp) {
+  return __builtin_amdgcn_image_sample_lz_cube_v4f16_f32(15, s, t, face, tex, samp, 0, 120, 110);
+}
+
+// CIR-LABEL: @_Z24test_sample_l_cube_v4f16
+// CIR: cir.call_llvm_intrinsic "amdgcn.image.sample.l.cube" {{.*}} : (!s32i, !cir.float, !cir.float, !cir.float, !cir.float, !cir.vector<8 x !s32i>, !cir.vector<4 x !s32i>, !cir.bool, !s32i, !s32i) -> !cir.vector<4 x !cir.f16>
+// LLVM: define{{.*}} <4 x half> @_Z24test_sample_l_cube_v4f16ffffu18__amdgpu_texture_tDv4_i(
+// LLVM: call {{.*}}<4 x half> @llvm.amdgcn.image.sample.l.cube.v4f16.f32.v8i32.v4i32(i32 {{.*}}, float {{.*}}, float {{.*}}, float {{.*}}, float {{.*}}, <8 x i32> {{.*}}, <4 x i32> {{.*}}, i1 {{.*}}, i32 {{.*}}, i32 {{.*}})
+__device__ half4 test_sample_l_cube_v4f16(float s, float t, float face, float lod, __amdgpu_texture_t tex, int4 samp) {
+  return __builtin_amdgcn_image_sample_l_cube_v4f16_f32(15, s, t, face, lod, tex, samp, 0, 120, 110);
+}
+
+// CIR-LABEL: @_Z28test_sample_lz_1darray_v4f16
+// CIR: cir.call_llvm_intrinsic "amdgcn.image.sample.lz.1darray" {{.*}} : (!s32i, !cir.float, !cir.float, !cir.vector<8 x !s32i>, !cir.vector<4 x !s32i>, !cir.bool, !s32i, !s32i) -> !cir.vector<4 x !cir.f16>
+// LLVM: define{{.*}} <4 x half> @_Z28test_sample_lz_1darray_v4f16ffu18__amdgpu_texture_tDv4_i(
+// LLVM: call {{.*}}<4 x half> @llvm.amdgcn.image.sample.lz.1darray.v4f16.f32.v8i32.v4i32(i32 {{.*}}, float {{.*}}, float {{.*}}, <8 x i32> {{.*}}, <4 x i32> {{.*}}, i1 {{.*}}, i32 {{.*}}, i32 {{.*}})
+__device__ half4 test_sample_lz_1darray_v4f16(float s, float slice, __amdgpu_texture_t tex, int4 samp) {
+  return __builtin_amdgcn_image_sample_lz_1darray_v4f16_f32(15, s, slice, tex, samp, 0, 120, 110);
+}
+
+// CIR-LABEL: @_Z27test_sample_l_1darray_v4f16
+// CIR: cir.call_llvm_intrinsic "amdgcn.image.sample.l.1darray" {{.*}} : (!s32i, !cir.float, !cir.float, !cir.float, !cir.vector<8 x !s32i>, !cir.vector<4 x !s32i>, !cir.bool, !s32i, !s32i) -> !cir.vector<4 x !cir.f16>
+// LLVM: define{{.*}} <4 x half> @_Z27test_sample_l_1darray_v4f16fffu18__amdgpu_texture_tDv4_i(
+// LLVM: call {{.*}}<4 x half> @llvm.amdgcn.image.sample.l.1darray.v4f16.f32.v8i32.v4i32(i32 {{.*}}, float {{.*}}, float {{.*}}, float {{.*}}, <8 x i32> {{.*}}, <4 x i32> {{.*}}, i1 {{.*}}, i32 {{.*}}, i32 {{.*}})
+__device__ half4 test_sample_l_1darray_v4f16(float s, float slice, float lod, __amdgpu_texture_t tex, int4 samp) {
+  return __builtin_amdgcn_image_sample_l_1darray_v4f16_f32(15, s, slice, lod, tex, samp, 0, 120, 110);
+}
+
+// CIR-LABEL: @_Z27test_sample_d_1darray_v4f16
+// CIR: cir.call_llvm_intrinsic "amdgcn.image.sample.d.1darray" {{.*}} : (!s32i, !cir.float, !cir.float, !cir.float, !cir.float, !cir.vector<8 x !s32i>, !cir.vector<4 x !s32i>, !cir.bool, !s32i, !s32i) -> !cir.vector<4 x !cir.f16>
+// LLVM: define{{.*}} <4 x half> @_Z27test_sample_d_1darray_v4f16ffffu18__amdgpu_texture_tDv4_i(
+// LLVM: call {{.*}}<4 x half> @llvm.amdgcn.image.sample.d.1darray.v4f16.f32.f32.v8i32.v4i32(i32 {{.*}}, float {{.*}}, float {{.*}}, float {{.*}}, float {{.*}}, <8 x i32> {{.*}}, <4 x i32> {{.*}}, i1 {{.*}}, i32 {{.*}}, i32 {{.*}})
+__device__ half4 test_sample_d_1darray_v4f16(float dsdx, float dsdy, float s, float slice, __amdgpu_texture_t tex, int4 samp) {
+  return __builtin_amdgcn_image_sample_d_1darray_v4f16_f32(15, dsdx, dsdy, s, slice, tex, samp, 0, 120, 110);
+}
+
+// CIR-LABEL: @_Z28test_sample_lz_2darray_v4f16
+// CIR: cir.call_llvm_intrinsic "amdgcn.image.sample.lz.2darray" {{.*}} : (!s32i, !cir.float, !cir.float, !cir.float, !cir.vector<8 x !s32i>, !cir.vector<4 x !s32i>, !cir.bool, !s32i, !s32i) -> !cir.vector<4 x !cir.f16>
+// LLVM: define{{.*}} <4 x half> @_Z28test_sample_lz_2darray_v4f16fffu18__amdgpu_texture_tDv4_i(
+// LLVM: call {{.*}}<4 x half> @llvm.amdgcn.image.sample.lz.2darray.v4f16.f32.v8i32.v4i32(i32 {{.*}}, float {{.*}}, float {{.*}}, float {{.*}}, <8 x i32> {{.*}}, <4 x i32> {{.*}}, i1 {{.*}}, i32 {{.*}}, i32 {{.*}})
+__device__ half4 test_sample_lz_2darray_v4f16(float s, float t, float slice, __amdgpu_texture_t tex, int4 samp) {
+  return __builtin_amdgcn_image_sample_lz_2darray_v4f16_f32(15, s, t, slice, tex, samp, 0, 120, 110);
+}
+
+// CIR-LABEL: @_Z27test_sample_l_2darray_v4f16
+// CIR: cir.call_llvm_intrinsic "amdgcn.image.sample.l.2darray" {{.*}} : (!s32i, !cir.float, !cir.float, !cir.float, !cir.float, !cir.vector<8 x !s32i>, !cir.vector<4 x !s32i>, !cir.bool, !s32i, !s32i) -> !cir.vector<4 x !cir.f16>
+// LLVM: define{{.*}} <4 x half> @_Z27test_sample_l_2darray_v4f16ffffu18__amdgpu_texture_tDv4_i(
+// LLVM: call {{.*}}<4 x half> @llvm.amdgcn.image.sample.l.2darray.v4f16.f32.v8i32.v4i32(i32 {{.*}}, float {{.*}}, float {{.*}}, float {{.*}}, float {{.*}}, <8 x i32> {{.*}}, <4 x i32> {{.*}}, i1 {{.*}}, i32 {{.*}}, i32 {{.*}})
+__device__ half4 test_sample_l_2darray_v4f16(float s, float t, float slice, float lod, __amdgpu_texture_t tex, int4 samp) {
+  return __builtin_amdgcn_image_sample_l_2darray_v4f16_f32(15, s, t, slice, lod, tex, samp, 0, 120, 110);
+}
+
+// CIR-LABEL: @_Z27test_sample_d_2darray_v4f16
+// CIR: cir.call_llvm_intrinsic "amdgcn.image.sample.d.2darray" {{.*}} : (!s32i, !cir.float, !cir.float, !cir.float, !cir.float, !cir.float, !cir.float, !cir.float, !cir.vector<8 x !s32i>, !cir.vector<4 x !s32i>, !cir.bool, !s32i, !s32i) -> !cir.vector<4 x !cir.f16>
+// LLVM: define{{.*}} <4 x half> @_Z27test_sample_d_2darray_v4f16fffffffu18__amdgpu_texture_tDv4_i(
+// LLVM: call {{.*}}<4 x half> @llvm.amdgcn.image.sample.d.2darray.v4f16.f32.f32.v8i32.v4i32(i32 {{.*}}, float {{.*}}, float {{.*}}, float {{.*}}, float {{.*}}, float {{.*}}, float {{.*}}, float {{.*}}, <8 x i32> {{.*}}, <4 x i32> {{.*}}, i1 {{.*}}, i32 {{.*}}, i32 {{.*}})
+__device__ half4 test_sample_d_2darray_v4f16(float dsdx, float dtdx, float dsdy, float dtdy, float s, float t, float slice, __amdgpu_texture_t tex, int4 samp) {
+  return __builtin_amdgcn_image_sample_d_2darray_v4f16_f32(15, dsdx, dtdx, dsdy, dtdy, s, t, slice, tex, samp, 0, 120, 110);
+}
+
+// CIR-LABEL: @_Z21test_sample_lz_2d_f32
+// CIR: cir.call_llvm_intrinsic "amdgcn.image.sample.lz.2d" {{.*}} : (!s32i, !cir.float, !cir.float, !cir.vector<8 x !s32i>, !cir.vector<4 x !s32i>, !cir.bool, !s32i, !s32i) -> !cir.float
+// LLVM: define{{.*}} float @_Z21test_sample_lz_2d_f32ffu18__amdgpu_texture_tDv4_i(
+// LLVM: call {{.*}}float @llvm.amdgcn.image.sample.lz.2d.f32.f32.v8i32.v4i32(i32 {{.*}}, float {{.*}}, float {{.*}}, <8 x i32> {{.*}}, <4 x i32> {{.*}}, i1 {{.*}}, i32 {{.*}}, i32 {{.*}})
+__device__ float test_sample_lz_2d_f32(float s, float t, __amdgpu_texture_t tex, int4 samp) {
+  return __builtin_amdgcn_image_sample_lz_2d_f32_f32(1, s, t, tex, samp, 0, 120, 110);
+}
+
+// CIR-LABEL: @_Z20test_sample_l_2d_f32
+// CIR: cir.call_llvm_intrinsic "amdgcn.image.sample.l.2d" {{.*}} : (!s32i, !cir.float, !cir.float, !cir.float, !cir.vector<8 x !s32i>, !cir.vector<4 x !s32i>, !cir.bool, !s32i, !s32i) -> !cir.float
+// LLVM: define{{.*}} float @_Z20test_sample_l_2d_f32fffu18__amdgpu_texture_tDv4_i(
+// LLVM: call {{.*}}float @llvm.amdgcn.image.sample.l.2d.f32.f32.v8i32.v4i32(i32 {{.*}}, float {{.*}}, float {{.*}}, float {{.*}}, <8 x i32> {{.*}}, <4 x i32> {{.*}}, i1 {{.*}}, i32 {{.*}}, i32 {{.*}})
+__device__ float test_sample_l_2d_f32(float s, float t, float lod, __amdgpu_texture_t tex, int4 samp) {
+  return __builtin_amdgcn_image_sample_l_2d_f32_f32(1, s, t, lod, tex, samp, 0, 120, 110);
+}
+
+// CIR-LABEL: @_Z20test_sample_d_2d_f32
+// CIR: cir.call_llvm_intrinsic "amdgcn.image.sample.d.2d" {{.*}} : (!s32i, !cir.float, !cir.float, !cir.float, !cir.float, !cir.float, !cir.float, !cir.vector<8 x !s32i>, !cir.vector<4 x !s32i>, !cir.bool, !s32i, !s32i) -> !cir.float
+// LLVM: define{{.*}} float @_Z20test_sample_d_2d_f32ffffffu18__amdgpu_texture_tDv4_i(
+// LLVM: call {{.*}}float @llvm.amdgcn.image.sample.d.2d.f32.f32.f32.v8i32.v4i32(i32 {{.*}}, float {{.*}}, float {{.*}}, float {{.*}}, float {{.*}}, float {{.*}}, float {{.*}}, <8 x i32> {{.*}}, <4 x i32> {{.*}}, i1 {{.*}}, i32 {{.*}}, i32 {{.*}})
+__device__ float test_sample_d_2d_f32(float dsdx, float dtdx, float dsdy, float dtdy, float s, float t, __amdgpu_texture_t tex, int4 samp) {
+  return __builtin_amdgcn_image_sample_d_2d_f32_f32(1, dsdx, dtdx, dsdy, dtdy, s, t, tex, samp, 0, 120, 110);
+}
+
+// CIR-LABEL: @_Z26test_sample_lz_2darray_f32
+// CIR: cir.call_llvm_intrinsic "amdgcn.image.sample.lz.2darray" {{.*}} : (!s32i, !cir.float, !cir.float, !cir.float, !cir.vector<8 x !s32i>, !cir.vector<4 x !s32i>, !cir.bool, !s32i, !s32i) -> !cir.float
+// LLVM: define{{.*}} float @_Z26test_sample_lz_2darray_f32fffu18__amdgpu_texture_tDv4_i(
+// LLVM: call {{.*}}float @llvm.amdgcn.image.sample.lz.2darray.f32.f32.v8i32.v4i32(i32 {{.*}}, float {{.*}}, float {{.*}}, float {{.*}}, <8 x i32> {{.*}}, <4 x i32> {{.*}}, i1 {{.*}}, i32 {{.*}}, i32 {{.*}})
+__device__ float test_sample_lz_2darray_f32(float s, float t, float slice, __amdgpu_texture_t tex, int4 samp) {
+  return __builtin_amdgcn_image_sample_lz_2darray_f32_f32(1, s, t, slice, tex, samp, 0, 120, 110);
+}
+
+// CIR-LABEL: @_Z25test_sample_l_2darray_f32
+// CIR: cir.call_llvm_intrinsic "amdgcn.image.sample.l.2darray" {{.*}} : (!s32i, !cir.float, !cir.float, !cir.float, !cir.float, !cir.vector<8 x !s32i>, !cir.vector<4 x !s32i>, !cir.bool, !s32i, !s32i) -> !cir.float
+// LLVM: define{{.*}} float @_Z25test_sample_l_2darray_f32ffffu18__amdgpu_texture_tDv4_i(
+// LLVM: call {{.*}}float @llvm.amdgcn.image.sample.l.2darray.f32.f32.v8i32.v4i32(i32 {{.*}}, float {{.*}}, float {{.*}}, float {{.*}}, float {{.*}}, <8 x i32> {{.*}}, <4 x i32> {{.*}}, i1 {{.*}}, i32 {{.*}}, i32 {{.*}})
+__device__ float test_sample_l_2darray_f32(float s, float t, float slice, float lod, __amdgpu_texture_t tex, int4 samp) {
+  return __builtin_amdgcn_image_sample_l_2darray_f32_f32(1, s, t, slice, lod, tex, samp, 0, 120, 110);
+}
+
+// CIR-LABEL: @_Z25test_sample_d_2darray_f32
+// CIR: cir.call_llvm_intrinsic "amdgcn.image.sample.d.2darray" {{.*}} : (!s32i, !cir.float, !cir.float, !cir.float, !cir.float, !cir.float, !cir.float, !cir.float, !cir.vector<8 x !s32i>, !cir.vector<4 x !s32i>, !cir.bool, !s32i, !s32i) -> !cir.float
+// LLVM: define{{.*}} float @_Z25test_sample_d_2darray_f32fffffffu18__amdgpu_texture_tDv4_i(
+// LLVM: call {{.*}}float @llvm.amdgcn.image.sample.d.2darray.f32.f32.f32.v8i32.v4i32(i32 {{.*}}, float {{.*}}, float {{.*}}, float {{.*}}, float {{.*}}, float {{.*}}, float {{.*}}, float {{.*}}, <8 x i32> {{.*}}, <4 x i32> {{.*}}, i1 {{.*}}, i32 {{.*}}, i32 {{.*}})
+__device__ float test_sample_d_2darray_f32(float dsdx, float dtdx, float dsdy, float dtdy, float s, float t, float slice, __amdgpu_texture_t tex, int4 samp) {
+  return __builtin_amdgcn_image_sample_d_2darray_f32_f32(1, dsdx, dtdx, dsdy, dtdy, s, t, slice, tex, samp, 0, 120, 110);
+}