[clang] [AMDGPU] Support for type inferring image load/store builtins for AMDGPU (PR #140210)
Rana Pratap Reddy via cfe-commits
cfe-commits at lists.llvm.org
Tue Oct 7 00:58:43 PDT 2025
https://github.com/ranapratap55 updated https://github.com/llvm/llvm-project/pull/140210
>From 4fbfa90618ec4902fa46f400a4f734927fb2c441 Mon Sep 17 00:00:00 2001
From: ranapratap55 <RanaPratapReddy.Nimmakayala at amd.com>
Date: Fri, 16 May 2025 12:50:09 +0530
Subject: [PATCH 1/7] [WIP][AMDGPU] Support for type inferring image load/store
builtins for AMDGPU
---
clang/include/clang/Basic/BuiltinsAMDGPU.def | 74 ++
clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp | 145 ++
clang/lib/Sema/SemaAMDGPU.cpp | 100 ++
.../test/CodeGen/builtins-image-load-2d-f32.c | 31 +
clang/test/CodeGen/builtins-image-load.c | 1162 +++++++++++++++++
clang/test/CodeGen/builtins-image-store.c | 703 ++++++++++
.../SemaOpenCL/builtins-image-load-param.cl | 222 ++++
.../SemaOpenCL/builtins-image-store-param.cl | 145 ++
8 files changed, 2582 insertions(+)
create mode 100644 clang/test/CodeGen/builtins-image-load-2d-f32.c
create mode 100644 clang/test/CodeGen/builtins-image-load.c
create mode 100644 clang/test/CodeGen/builtins-image-store.c
create mode 100644 clang/test/SemaOpenCL/builtins-image-load-param.cl
create mode 100644 clang/test/SemaOpenCL/builtins-image-store-param.cl
diff --git a/clang/include/clang/Basic/BuiltinsAMDGPU.def b/clang/include/clang/Basic/BuiltinsAMDGPU.def
index 3e45c04687a64..970eb68355c08 100644
--- a/clang/include/clang/Basic/BuiltinsAMDGPU.def
+++ b/clang/include/clang/Basic/BuiltinsAMDGPU.def
@@ -885,5 +885,79 @@ TARGET_BUILTIN(__builtin_amdgcn_cooperative_atomic_store_16x8B, "vV2i*V2iIicC*",
TARGET_BUILTIN(__builtin_amdgcn_cooperative_atomic_load_8x16B, "V4iV4i*IicC*", "nc", "gfx1250-insts,wavefrontsize32")
TARGET_BUILTIN(__builtin_amdgcn_cooperative_atomic_store_8x16B, "vV4i*V4iIicC*", "nc", "gfx1250-insts,wavefrontsize32")
+//===----------------------------------------------------------------------===//
+// Image builtins
+//===----------------------------------------------------------------------===//
+TARGET_BUILTIN(__builtin_amdgcn_image_load_1d_v4f32_i32, "V4fiiV8iii", "nc", "image-insts")
+TARGET_BUILTIN(__builtin_amdgcn_image_load_1d_v4f16_i32, "V4hiiV8iii", "nc", "image-insts")
+TARGET_BUILTIN(__builtin_amdgcn_image_load_1darray_v4f32_i32, "V4fiiiV8iii", "nc", "image-insts")
+TARGET_BUILTIN(__builtin_amdgcn_image_load_1darray_v4f16_i32, "V4hiiiV8iii", "nc", "image-insts")
+TARGET_BUILTIN(__builtin_amdgcn_image_load_2d_f32_i32, "fiiiV8iii", "nc", "image-insts")
+TARGET_BUILTIN(__builtin_amdgcn_image_load_2d_v4f32_i32, "V4fiiiV8iii", "nc", "image-insts")
+TARGET_BUILTIN(__builtin_amdgcn_image_load_2d_v4f16_i32, "V4hiiiV8iii", "nc", "image-insts")
+TARGET_BUILTIN(__builtin_amdgcn_image_load_2darray_f32_i32, "fiiiiV8iii", "nc", "image-insts")
+TARGET_BUILTIN(__builtin_amdgcn_image_load_2darray_v4f32_i32, "V4fiiiiV8iii", "nc", "image-insts")
+TARGET_BUILTIN(__builtin_amdgcn_image_load_2darray_v4f16_i32, "V4hiiiiV8iii", "nc", "image-insts")
+TARGET_BUILTIN(__builtin_amdgcn_image_load_3d_v4f32_i32, "V4fiiiiV8iii", "nc", "image-insts")
+TARGET_BUILTIN(__builtin_amdgcn_image_load_3d_v4f16_i32, "V4hiiiiV8iii", "nc", "image-insts")
+TARGET_BUILTIN(__builtin_amdgcn_image_load_cube_v4f32_i32, "V4fiiiiV8iii", "nc", "image-insts")
+TARGET_BUILTIN(__builtin_amdgcn_image_load_cube_v4f16_i32, "V4hiiiiV8iii", "nc", "image-insts")
+TARGET_BUILTIN(__builtin_amdgcn_image_load_mip_1d_v4f32_i32, "V4fiiiV8iii", "nc", "image-insts")
+TARGET_BUILTIN(__builtin_amdgcn_image_load_mip_1d_v4f16_i32, "V4hiiiV8iii", "nc", "image-insts")
+TARGET_BUILTIN(__builtin_amdgcn_image_load_mip_1darray_v4f32_i32, "V4fiiiiV8iii", "nc", "image-insts")
+TARGET_BUILTIN(__builtin_amdgcn_image_load_mip_1darray_v4f16_i32, "V4hiiiiV8iii", "nc", "image-insts")
+TARGET_BUILTIN(__builtin_amdgcn_image_load_mip_2d_f32_i32, "fiiiiV8iii", "nc", "image-insts")
+TARGET_BUILTIN(__builtin_amdgcn_image_load_mip_2d_v4f32_i32, "V4fiiiiV8iii", "nc", "image-insts")
+TARGET_BUILTIN(__builtin_amdgcn_image_load_mip_2d_v4f16_i32, "V4hiiiiV8iii", "nc", "image-insts")
+TARGET_BUILTIN(__builtin_amdgcn_image_load_mip_2darray_f32_i32, "fiiiiiV8iii", "nc", "image-insts")
+TARGET_BUILTIN(__builtin_amdgcn_image_load_mip_2darray_v4f32_i32, "V4fiiiiiV8iii", "nc", "image-insts")
+TARGET_BUILTIN(__builtin_amdgcn_image_load_mip_2darray_v4f16_i32, "V4hiiiiiV8iii", "nc", "image-insts")
+TARGET_BUILTIN(__builtin_amdgcn_image_load_mip_3d_v4f32_i32, "V4fiiiiiV8iii", "nc", "image-insts")
+TARGET_BUILTIN(__builtin_amdgcn_image_load_mip_3d_v4f16_i32, "V4hiiiiiV8iii", "nc", "image-insts")
+TARGET_BUILTIN(__builtin_amdgcn_image_load_mip_cube_v4f32_i32, "V4fiiiiiV8iii", "nc", "image-insts")
+TARGET_BUILTIN(__builtin_amdgcn_image_load_mip_cube_v4f16_i32, "V4hiiiiiV8iii", "nc", "image-insts")
+TARGET_BUILTIN(__builtin_amdgcn_image_store_1d_v4f32_i32, "vV4fiiV8iii", "nc", "image-insts")
+TARGET_BUILTIN(__builtin_amdgcn_image_store_1d_v4f16_i32, "vV4hiiV8iii", "nc", "image-insts")
+TARGET_BUILTIN(__builtin_amdgcn_image_store_1darray_v4f32_i32, "vV4fiiiV8iii", "nc", "image-insts")
+TARGET_BUILTIN(__builtin_amdgcn_image_store_1darray_v4f16_i32, "vV4hiiiV8iii", "nc", "image-insts")
+TARGET_BUILTIN(__builtin_amdgcn_image_store_2d_f32_i32, "vfiiiV8iii", "nc", "image-insts")
+TARGET_BUILTIN(__builtin_amdgcn_image_store_2d_v4f32_i32, "vV4fiiiV8iii", "nc", "image-insts")
+TARGET_BUILTIN(__builtin_amdgcn_image_store_2d_v4f16_i32, "vV4hiiiV8iii", "nc", "image-insts")
+TARGET_BUILTIN(__builtin_amdgcn_image_store_2darray_f32_i32, "vfiiiiV8iii", "nc", "image-insts")
+TARGET_BUILTIN(__builtin_amdgcn_image_store_2darray_v4f32_i32, "vV4fiiiiV8iii", "nc", "image-insts")
+TARGET_BUILTIN(__builtin_amdgcn_image_store_2darray_v4f16_i32, "vV4hiiiiV8iii", "nc", "image-insts")
+TARGET_BUILTIN(__builtin_amdgcn_image_store_3d_v4f32_i32, "vV4fiiiiV8iii", "nc", "image-insts")
+TARGET_BUILTIN(__builtin_amdgcn_image_store_3d_v4f16_i32, "vV4hiiiiV8iii", "nc", "image-insts")
+TARGET_BUILTIN(__builtin_amdgcn_image_store_cube_v4f32_i32, "vV4fiiiiV8iii", "nc", "image-insts")
+TARGET_BUILTIN(__builtin_amdgcn_image_store_cube_v4f16_i32, "vV4hiiiiV8iii", "nc", "image-insts")
+TARGET_BUILTIN(__builtin_amdgcn_image_store_mip_1d_v4f32_i32, "vV4fiiiV8iii", "nc", "image-insts")
+TARGET_BUILTIN(__builtin_amdgcn_image_store_mip_1d_v4f16_i32, "vV4hiiiV8iii", "nc", "image-insts")
+TARGET_BUILTIN(__builtin_amdgcn_image_store_mip_1darray_v4f32_i32, "vV4fiiiiV8iii", "nc", "image-insts")
+TARGET_BUILTIN(__builtin_amdgcn_image_store_mip_1darray_v4f16_i32, "vV4hiiiiV8iii", "nc", "image-insts")
+TARGET_BUILTIN(__builtin_amdgcn_image_store_mip_2d_f32_i32, "vfiiiiV8iii", "nc", "image-insts")
+TARGET_BUILTIN(__builtin_amdgcn_image_store_mip_2d_v4f32_i32, "vV4fiiiiV8iii", "nc", "image-insts")
+TARGET_BUILTIN(__builtin_amdgcn_image_store_mip_2d_v4f16_i32, "vV4hiiiiV8iii", "nc", "image-insts")
+TARGET_BUILTIN(__builtin_amdgcn_image_store_mip_2darray_f32_i32, "vfiiiiiV8iii", "nc", "image-insts")
+TARGET_BUILTIN(__builtin_amdgcn_image_store_mip_2darray_v4f32_i32, "vV4fiiiiiV8iii", "nc", "image-insts")
+TARGET_BUILTIN(__builtin_amdgcn_image_store_mip_2darray_v4f16_i32, "vV4hiiiiiV8iii", "nc", "image-insts")
+TARGET_BUILTIN(__builtin_amdgcn_image_store_mip_3d_v4f32_i32, "vV4fiiiiiV8iii", "nc", "image-insts")
+TARGET_BUILTIN(__builtin_amdgcn_image_store_mip_3d_v4f16_i32, "vV4hiiiiiV8iii", "nc", "image-insts")
+TARGET_BUILTIN(__builtin_amdgcn_image_store_mip_cube_v4f32_i32, "vV4fiiiiiV8iii", "nc", "image-insts")
+TARGET_BUILTIN(__builtin_amdgcn_image_store_mip_cube_v4f16_i32, "vV4hiiiiiV8iii", "nc", "image-insts")
+TARGET_BUILTIN(__builtin_amdgcn_image_sample_1d_v4f32_f32, "V4fifV8iV4ibii", "nc", "image-insts")
+TARGET_BUILTIN(__builtin_amdgcn_image_sample_1d_v4f16_f32, "V4hifV8iV4ibii", "nc", "image-insts")
+TARGET_BUILTIN(__builtin_amdgcn_image_sample_1darray_v4f32_f32, "V4fiffV8iV4ibii", "nc", "image-insts")
+TARGET_BUILTIN(__builtin_amdgcn_image_sample_1darray_v4f16_f32, "V4hiffV8iV4ibii", "nc", "image-insts")
+TARGET_BUILTIN(__builtin_amdgcn_image_sample_2d_f32_f32, "fiffV8iV4ibii", "nc", "image-insts")
+TARGET_BUILTIN(__builtin_amdgcn_image_sample_2d_v4f32_f32, "V4fiffV8iV4ibii", "nc", "image-insts")
+TARGET_BUILTIN(__builtin_amdgcn_image_sample_2d_v4f16_f32, "V4hiffV8iV4ibii", "nc", "image-insts")
+TARGET_BUILTIN(__builtin_amdgcn_image_sample_2darray_f32_f32, "fifffV8iV4ibii", "nc", "image-insts")
+TARGET_BUILTIN(__builtin_amdgcn_image_sample_2darray_v4f32_f32, "V4fifffV8iV4ibii", "nc", "image-insts")
+TARGET_BUILTIN(__builtin_amdgcn_image_sample_2darray_v4f16_f32, "V4hifffV8iV4ibii", "nc", "image-insts")
+TARGET_BUILTIN(__builtin_amdgcn_image_sample_3d_v4f32_f32, "V4fifffV8iV4ibii", "nc", "image-insts")
+TARGET_BUILTIN(__builtin_amdgcn_image_sample_3d_v4f16_f32, "V4hifffV8iV4ibii", "nc", "image-insts")
+TARGET_BUILTIN(__builtin_amdgcn_image_sample_cube_v4f32_f32, "V4fifffV8iV4ibii", "nc", "image-insts")
+TARGET_BUILTIN(__builtin_amdgcn_image_sample_cube_v4f16_f32, "V4hifffV8iV4ibii", "nc", "image-insts")
+
#undef BUILTIN
#undef TARGET_BUILTIN
diff --git a/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp b/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp
index 6596ec06199dc..69e5201ff7e9e 100644
--- a/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp
+++ b/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp
@@ -181,6 +181,21 @@ static Value *EmitAMDGCNBallotForExec(CodeGenFunction &CGF, const CallExpr *E,
return Call;
}
+llvm::CallInst *EmitAMDGCNImageOverloadedReturnType(clang::CodeGen::CodeGenFunction &CGF,
+ const clang::CallExpr *E,
+ unsigned IntrinsicID,
+ bool IsImageStore) {
+ clang::SmallVector<llvm::Value *, 10> Args;
+ for (unsigned I = 0; I < E->getNumArgs(); ++I)
+ Args.push_back(CGF.EmitScalarExpr(E->getArg(I)));
+
+ llvm::Type *RetTy = CGF.ConvertType(E->getType());
+ if (IsImageStore)
+ RetTy = CGF.VoidTy;
+ llvm::CallInst *Call = CGF.Builder.CreateIntrinsic(RetTy, IntrinsicID, Args);
+ return Call;
+}
+
// Emit an intrinsic that has 1 float or double operand, and 1 integer.
static Value *emitFPIntBuiltin(CodeGenFunction &CGF,
const CallExpr *E,
@@ -937,6 +952,136 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID,
return Builder.CreateInsertElement(I0, A, 1);
}
+ case AMDGPU::BI__builtin_amdgcn_image_load_1d_v4f32_i32:
+ case AMDGPU::BI__builtin_amdgcn_image_load_1d_v4f16_i32:
+ return EmitAMDGCNImageOverloadedReturnType(
+ *this, E, Intrinsic::amdgcn_image_load_1d, false);
+ case AMDGPU::BI__builtin_amdgcn_image_load_1darray_v4f32_i32:
+ case AMDGPU::BI__builtin_amdgcn_image_load_1darray_v4f16_i32:
+ return EmitAMDGCNImageOverloadedReturnType(
+ *this, E, Intrinsic::amdgcn_image_load_1darray, false);
+ case AMDGPU::BI__builtin_amdgcn_image_load_2d_f32_i32:
+ case AMDGPU::BI__builtin_amdgcn_image_load_2d_v4f32_i32:
+ case AMDGPU::BI__builtin_amdgcn_image_load_2d_v4f16_i32:
+ return EmitAMDGCNImageOverloadedReturnType(
+ *this, E, Intrinsic::amdgcn_image_load_2d, false);
+ case AMDGPU::BI__builtin_amdgcn_image_load_2darray_f32_i32:
+ case AMDGPU::BI__builtin_amdgcn_image_load_2darray_v4f32_i32:
+ case AMDGPU::BI__builtin_amdgcn_image_load_2darray_v4f16_i32:
+ return EmitAMDGCNImageOverloadedReturnType(
+ *this, E, Intrinsic::amdgcn_image_load_2darray, false);
+ case AMDGPU::BI__builtin_amdgcn_image_load_3d_v4f32_i32:
+ case AMDGPU::BI__builtin_amdgcn_image_load_3d_v4f16_i32:
+ return EmitAMDGCNImageOverloadedReturnType(
+ *this, E, Intrinsic::amdgcn_image_load_3d, false);
+ case AMDGPU::BI__builtin_amdgcn_image_load_cube_v4f32_i32:
+ case AMDGPU::BI__builtin_amdgcn_image_load_cube_v4f16_i32:
+ return EmitAMDGCNImageOverloadedReturnType(
+ *this, E, Intrinsic::amdgcn_image_load_cube, false);
+ case AMDGPU::BI__builtin_amdgcn_image_load_mip_1d_v4f32_i32:
+ case AMDGPU::BI__builtin_amdgcn_image_load_mip_1d_v4f16_i32:
+ return EmitAMDGCNImageOverloadedReturnType(
+ *this, E, Intrinsic::amdgcn_image_load_mip_1d, false);
+ case AMDGPU::BI__builtin_amdgcn_image_load_mip_1darray_v4f32_i32:
+ case AMDGPU::BI__builtin_amdgcn_image_load_mip_1darray_v4f16_i32:
+ return EmitAMDGCNImageOverloadedReturnType(
+ *this, E, Intrinsic::amdgcn_image_load_mip_1darray, false);
+ case AMDGPU::BI__builtin_amdgcn_image_load_mip_2d_f32_i32:
+ case AMDGPU::BI__builtin_amdgcn_image_load_mip_2d_v4f32_i32:
+ case AMDGPU::BI__builtin_amdgcn_image_load_mip_2d_v4f16_i32:
+ return EmitAMDGCNImageOverloadedReturnType(
+ *this, E, Intrinsic::amdgcn_image_load_mip_2d, false);
+ case AMDGPU::BI__builtin_amdgcn_image_load_mip_2darray_f32_i32:
+ case AMDGPU::BI__builtin_amdgcn_image_load_mip_2darray_v4f32_i32:
+ case AMDGPU::BI__builtin_amdgcn_image_load_mip_2darray_v4f16_i32:
+ return EmitAMDGCNImageOverloadedReturnType(
+ *this, E, Intrinsic::amdgcn_image_load_mip_2darray, false);
+ case AMDGPU::BI__builtin_amdgcn_image_load_mip_3d_v4f32_i32:
+ case AMDGPU::BI__builtin_amdgcn_image_load_mip_3d_v4f16_i32:
+ return EmitAMDGCNImageOverloadedReturnType(
+ *this, E, Intrinsic::amdgcn_image_load_mip_3d, false);
+ case AMDGPU::BI__builtin_amdgcn_image_load_mip_cube_v4f32_i32:
+ case AMDGPU::BI__builtin_amdgcn_image_load_mip_cube_v4f16_i32:
+ return EmitAMDGCNImageOverloadedReturnType(
+ *this, E, Intrinsic::amdgcn_image_load_mip_cube, false);
+ case AMDGPU::BI__builtin_amdgcn_image_store_1d_v4f32_i32:
+ case AMDGPU::BI__builtin_amdgcn_image_store_1d_v4f16_i32:
+ return EmitAMDGCNImageOverloadedReturnType(
+ *this, E, Intrinsic::amdgcn_image_store_1d, true);
+ case AMDGPU::BI__builtin_amdgcn_image_store_1darray_v4f32_i32:
+ case AMDGPU::BI__builtin_amdgcn_image_store_1darray_v4f16_i32:
+ return EmitAMDGCNImageOverloadedReturnType(
+ *this, E, Intrinsic::amdgcn_image_store_1darray, true);
+ case AMDGPU::BI__builtin_amdgcn_image_store_2d_f32_i32:
+ case AMDGPU::BI__builtin_amdgcn_image_store_2d_v4f32_i32:
+ case AMDGPU::BI__builtin_amdgcn_image_store_2d_v4f16_i32:
+ return EmitAMDGCNImageOverloadedReturnType(
+ *this, E, Intrinsic::amdgcn_image_store_2d, true);
+ case AMDGPU::BI__builtin_amdgcn_image_store_2darray_f32_i32:
+ case AMDGPU::BI__builtin_amdgcn_image_store_2darray_v4f32_i32:
+ case AMDGPU::BI__builtin_amdgcn_image_store_2darray_v4f16_i32:
+ return EmitAMDGCNImageOverloadedReturnType(
+ *this, E, Intrinsic::amdgcn_image_store_2darray, true);
+ case AMDGPU::BI__builtin_amdgcn_image_store_3d_v4f32_i32:
+ case AMDGPU::BI__builtin_amdgcn_image_store_3d_v4f16_i32:
+ return EmitAMDGCNImageOverloadedReturnType(
+ *this, E, Intrinsic::amdgcn_image_store_3d, true);
+ case AMDGPU::BI__builtin_amdgcn_image_store_cube_v4f32_i32:
+ case AMDGPU::BI__builtin_amdgcn_image_store_cube_v4f16_i32:
+ return EmitAMDGCNImageOverloadedReturnType(
+ *this, E, Intrinsic::amdgcn_image_store_cube, true);
+ case AMDGPU::BI__builtin_amdgcn_image_store_mip_1d_v4f32_i32:
+ case AMDGPU::BI__builtin_amdgcn_image_store_mip_1d_v4f16_i32:
+ return EmitAMDGCNImageOverloadedReturnType(
+ *this, E, Intrinsic::amdgcn_image_store_mip_1d, true);
+ case AMDGPU::BI__builtin_amdgcn_image_store_mip_1darray_v4f32_i32:
+ case AMDGPU::BI__builtin_amdgcn_image_store_mip_1darray_v4f16_i32:
+ return EmitAMDGCNImageOverloadedReturnType(
+ *this, E, Intrinsic::amdgcn_image_store_mip_1darray, true);
+ case AMDGPU::BI__builtin_amdgcn_image_store_mip_2d_f32_i32:
+ case AMDGPU::BI__builtin_amdgcn_image_store_mip_2d_v4f32_i32:
+ case AMDGPU::BI__builtin_amdgcn_image_store_mip_2d_v4f16_i32:
+ return EmitAMDGCNImageOverloadedReturnType(
+ *this, E, Intrinsic::amdgcn_image_store_mip_2d, true);
+ case AMDGPU::BI__builtin_amdgcn_image_store_mip_2darray_f32_i32:
+ case AMDGPU::BI__builtin_amdgcn_image_store_mip_2darray_v4f32_i32:
+ case AMDGPU::BI__builtin_amdgcn_image_store_mip_2darray_v4f16_i32:
+ return EmitAMDGCNImageOverloadedReturnType(
+ *this, E, Intrinsic::amdgcn_image_store_mip_2darray, true);
+ case AMDGPU::BI__builtin_amdgcn_image_store_mip_3d_v4f32_i32:
+ case AMDGPU::BI__builtin_amdgcn_image_store_mip_3d_v4f16_i32:
+ return EmitAMDGCNImageOverloadedReturnType(
+ *this, E, Intrinsic::amdgcn_image_store_mip_3d, true);
+ case AMDGPU::BI__builtin_amdgcn_image_store_mip_cube_v4f32_i32:
+ case AMDGPU::BI__builtin_amdgcn_image_store_mip_cube_v4f16_i32:
+ return EmitAMDGCNImageOverloadedReturnType(
+ *this, E, Intrinsic::amdgcn_image_store_mip_cube, true);
+ case AMDGPU::BI__builtin_amdgcn_image_sample_1d_v4f32_f32:
+ case AMDGPU::BI__builtin_amdgcn_image_sample_1d_v4f16_f32:
+ return EmitAMDGCNImageOverloadedReturnType(
+ *this, E, Intrinsic::amdgcn_image_sample_1d, false);
+ case AMDGPU::BI__builtin_amdgcn_image_sample_1darray_v4f32_f32:
+ case AMDGPU::BI__builtin_amdgcn_image_sample_1darray_v4f16_f32:
+ return EmitAMDGCNImageOverloadedReturnType(
+ *this, E, Intrinsic::amdgcn_image_sample_1darray, false);
+ case AMDGPU::BI__builtin_amdgcn_image_sample_2d_f32_f32:
+ case AMDGPU::BI__builtin_amdgcn_image_sample_2d_v4f32_f32:
+ case AMDGPU::BI__builtin_amdgcn_image_sample_2d_v4f16_f32:
+ return EmitAMDGCNImageOverloadedReturnType(
+ *this, E, Intrinsic::amdgcn_image_sample_2d, false);
+ case AMDGPU::BI__builtin_amdgcn_image_sample_2darray_f32_f32:
+ case AMDGPU::BI__builtin_amdgcn_image_sample_2darray_v4f32_f32:
+ case AMDGPU::BI__builtin_amdgcn_image_sample_2darray_v4f16_f32:
+ return EmitAMDGCNImageOverloadedReturnType(
+ *this, E, Intrinsic::amdgcn_image_sample_2darray, false);
+ case AMDGPU::BI__builtin_amdgcn_image_sample_3d_v4f32_f32:
+ case AMDGPU::BI__builtin_amdgcn_image_sample_3d_v4f16_f32:
+ return EmitAMDGCNImageOverloadedReturnType(
+ *this, E, Intrinsic::amdgcn_image_sample_3d, false);
+ case AMDGPU::BI__builtin_amdgcn_image_sample_cube_v4f32_f32:
+ case AMDGPU::BI__builtin_amdgcn_image_sample_cube_v4f16_f32:
+ return EmitAMDGCNImageOverloadedReturnType(
+ *this, E, Intrinsic::amdgcn_image_sample_cube, false);
case AMDGPU::BI__builtin_amdgcn_mfma_scale_f32_16x16x128_f8f6f4:
case AMDGPU::BI__builtin_amdgcn_mfma_scale_f32_32x32x64_f8f6f4: {
llvm::FixedVectorType *VT = FixedVectorType::get(Builder.getInt32Ty(), 8);
diff --git a/clang/lib/Sema/SemaAMDGPU.cpp b/clang/lib/Sema/SemaAMDGPU.cpp
index 3a0c23187d45d..1eaaa838fa3ce 100644
--- a/clang/lib/Sema/SemaAMDGPU.cpp
+++ b/clang/lib/Sema/SemaAMDGPU.cpp
@@ -111,6 +111,106 @@ bool SemaAMDGPU::CheckAMDGCNBuiltinFunctionCall(unsigned BuiltinID,
case AMDGPU::BI__builtin_amdgcn_cooperative_atomic_store_16x8B:
case AMDGPU::BI__builtin_amdgcn_cooperative_atomic_store_8x16B:
return checkCoopAtomicFunctionCall(TheCall, /*IsStore=*/true);
+ case AMDGPU::BI__builtin_amdgcn_image_load_1d_v4f32_i32:
+ case AMDGPU::BI__builtin_amdgcn_image_load_1darray_v4f32_i32:
+ case AMDGPU::BI__builtin_amdgcn_image_load_1d_v4f16_i32:
+ case AMDGPU::BI__builtin_amdgcn_image_load_1darray_v4f16_i32:
+ case AMDGPU::BI__builtin_amdgcn_image_load_2d_f32_i32:
+ case AMDGPU::BI__builtin_amdgcn_image_load_2d_v4f32_i32:
+ case AMDGPU::BI__builtin_amdgcn_image_load_2d_v4f16_i32:
+ case AMDGPU::BI__builtin_amdgcn_image_load_2darray_f32_i32:
+ case AMDGPU::BI__builtin_amdgcn_image_load_2darray_v4f32_i32:
+ case AMDGPU::BI__builtin_amdgcn_image_load_2darray_v4f16_i32:
+ case AMDGPU::BI__builtin_amdgcn_image_load_3d_v4f32_i32:
+ case AMDGPU::BI__builtin_amdgcn_image_load_3d_v4f16_i32:
+ case AMDGPU::BI__builtin_amdgcn_image_load_cube_v4f32_i32:
+ case AMDGPU::BI__builtin_amdgcn_image_load_cube_v4f16_i32:
+ case AMDGPU::BI__builtin_amdgcn_image_load_mip_1d_v4f32_i32:
+ case AMDGPU::BI__builtin_amdgcn_image_load_mip_1d_v4f16_i32:
+ case AMDGPU::BI__builtin_amdgcn_image_load_mip_1darray_v4f32_i32:
+ case AMDGPU::BI__builtin_amdgcn_image_load_mip_1darray_v4f16_i32:
+ case AMDGPU::BI__builtin_amdgcn_image_load_mip_2d_f32_i32:
+ case AMDGPU::BI__builtin_amdgcn_image_load_mip_2d_v4f32_i32:
+ case AMDGPU::BI__builtin_amdgcn_image_load_mip_2d_v4f16_i32:
+ case AMDGPU::BI__builtin_amdgcn_image_load_mip_2darray_f32_i32:
+ case AMDGPU::BI__builtin_amdgcn_image_load_mip_2darray_v4f32_i32:
+ case AMDGPU::BI__builtin_amdgcn_image_load_mip_2darray_v4f16_i32:
+ case AMDGPU::BI__builtin_amdgcn_image_load_mip_3d_v4f32_i32:
+ case AMDGPU::BI__builtin_amdgcn_image_load_mip_3d_v4f16_i32:
+ case AMDGPU::BI__builtin_amdgcn_image_load_mip_cube_v4f32_i32:
+ case AMDGPU::BI__builtin_amdgcn_image_load_mip_cube_v4f16_i32:
+ case AMDGPU::BI__builtin_amdgcn_image_sample_1d_v4f32_f32:
+ case AMDGPU::BI__builtin_amdgcn_image_sample_1darray_v4f32_f32:
+ case AMDGPU::BI__builtin_amdgcn_image_sample_1d_v4f16_f32:
+ case AMDGPU::BI__builtin_amdgcn_image_sample_1darray_v4f16_f32:
+ case AMDGPU::BI__builtin_amdgcn_image_sample_2d_f32_f32:
+ case AMDGPU::BI__builtin_amdgcn_image_sample_2d_v4f32_f32:
+ case AMDGPU::BI__builtin_amdgcn_image_sample_2d_v4f16_f32:
+ case AMDGPU::BI__builtin_amdgcn_image_sample_2darray_f32_f32:
+ case AMDGPU::BI__builtin_amdgcn_image_sample_2darray_v4f32_f32:
+ case AMDGPU::BI__builtin_amdgcn_image_sample_2darray_v4f16_f32:
+ case AMDGPU::BI__builtin_amdgcn_image_sample_3d_v4f32_f32:
+ case AMDGPU::BI__builtin_amdgcn_image_sample_3d_v4f16_f32:
+ case AMDGPU::BI__builtin_amdgcn_image_sample_cube_v4f32_f32:
+ case AMDGPU::BI__builtin_amdgcn_image_sample_cube_v4f16_f32: {
+ StringRef FeatureList(getASTContext().BuiltinInfo.getRequiredFeatures(BuiltinID));
+ if (!Builtin::evaluateRequiredTargetFeatures(
+ FeatureList, CallerFeatureMap)){
+ Diag(TheCall->getBeginLoc(), diag::err_builtin_needs_feature)
+ << FD->getDeclName() << FeatureList;
+ return false;
+ }
+
+ unsigned ArgCount = TheCall->getNumArgs() - 1;
+ llvm::APSInt Result;
+
+ return ((SemaRef.BuiltinConstantArg(TheCall, 0, Result)) ||
+ (SemaRef.BuiltinConstantArg(TheCall, ArgCount, Result)) ||
+ (SemaRef.BuiltinConstantArg(TheCall, (ArgCount - 1), Result)));
+ }
+ case AMDGPU::BI__builtin_amdgcn_image_store_1d_v4f32_i32:
+ case AMDGPU::BI__builtin_amdgcn_image_store_1darray_v4f32_i32:
+ case AMDGPU::BI__builtin_amdgcn_image_store_1d_v4f16_i32:
+ case AMDGPU::BI__builtin_amdgcn_image_store_1darray_v4f16_i32:
+ case AMDGPU::BI__builtin_amdgcn_image_store_2d_f32_i32:
+ case AMDGPU::BI__builtin_amdgcn_image_store_2d_v4f32_i32:
+ case AMDGPU::BI__builtin_amdgcn_image_store_2d_v4f16_i32:
+ case AMDGPU::BI__builtin_amdgcn_image_store_2darray_f32_i32:
+ case AMDGPU::BI__builtin_amdgcn_image_store_2darray_v4f32_i32:
+ case AMDGPU::BI__builtin_amdgcn_image_store_2darray_v4f16_i32:
+ case AMDGPU::BI__builtin_amdgcn_image_store_3d_v4f32_i32:
+ case AMDGPU::BI__builtin_amdgcn_image_store_3d_v4f16_i32:
+ case AMDGPU::BI__builtin_amdgcn_image_store_cube_v4f32_i32:
+ case AMDGPU::BI__builtin_amdgcn_image_store_cube_v4f16_i32:
+ case AMDGPU::BI__builtin_amdgcn_image_store_mip_1d_v4f32_i32:
+ case AMDGPU::BI__builtin_amdgcn_image_store_mip_1d_v4f16_i32:
+ case AMDGPU::BI__builtin_amdgcn_image_store_mip_1darray_v4f32_i32:
+ case AMDGPU::BI__builtin_amdgcn_image_store_mip_1darray_v4f16_i32:
+ case AMDGPU::BI__builtin_amdgcn_image_store_mip_2d_f32_i32:
+ case AMDGPU::BI__builtin_amdgcn_image_store_mip_2d_v4f32_i32:
+ case AMDGPU::BI__builtin_amdgcn_image_store_mip_2d_v4f16_i32:
+ case AMDGPU::BI__builtin_amdgcn_image_store_mip_2darray_f32_i32:
+ case AMDGPU::BI__builtin_amdgcn_image_store_mip_2darray_v4f32_i32:
+ case AMDGPU::BI__builtin_amdgcn_image_store_mip_2darray_v4f16_i32:
+ case AMDGPU::BI__builtin_amdgcn_image_store_mip_3d_v4f32_i32:
+ case AMDGPU::BI__builtin_amdgcn_image_store_mip_3d_v4f16_i32:
+ case AMDGPU::BI__builtin_amdgcn_image_store_mip_cube_v4f32_i32:
+ case AMDGPU::BI__builtin_amdgcn_image_store_mip_cube_v4f16_i32: {
+ StringRef FeatureList(getASTContext().BuiltinInfo.getRequiredFeatures(BuiltinID));
+ if (!Builtin::evaluateRequiredTargetFeatures(
+ FeatureList, CallerFeatureMap)){
+ Diag(TheCall->getBeginLoc(), diag::err_builtin_needs_feature)
+ << FD->getDeclName() << FeatureList;
+ return false;
+ }
+
+ unsigned ArgCount = TheCall->getNumArgs() - 1;
+ llvm::APSInt Result;
+
+ return ((SemaRef.BuiltinConstantArg(TheCall, 1, Result)) ||
+ (SemaRef.BuiltinConstantArg(TheCall, ArgCount, Result)) ||
+ (SemaRef.BuiltinConstantArg(TheCall, (ArgCount - 1), Result)));
+ }
default:
return false;
}
diff --git a/clang/test/CodeGen/builtins-image-load-2d-f32.c b/clang/test/CodeGen/builtins-image-load-2d-f32.c
new file mode 100644
index 0000000000000..78dab461c1f38
--- /dev/null
+++ b/clang/test/CodeGen/builtins-image-load-2d-f32.c
@@ -0,0 +1,31 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5
+// RUN: %clang_cc1 -triple amdgcn-unknown-unknown %s -emit-llvm -o - | FileCheck %s
+
+#pragma OPENCL EXTENSION cl_khr_fp64:enable
+
+typedef int v8i __attribute__((ext_vector_type(8)));
+
+// CHECK-LABEL: define dso_local float @test_builtin_image_load_2d(
+// CHECK-SAME: float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VECI32:%.*]]) #[[ATTR0:[0-9]+]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[RETVAL:%.*]] = alloca float, align 4, addrspace(5)
+// CHECK-NEXT: [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5)
+// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[VECI32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
+// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
+// CHECK-NEXT: [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr
+// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
+// CHECK-NEXT: [[VECI32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VECI32_ADDR]] to ptr
+// CHECK-NEXT: store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: store <8 x i32> [[VECI32]], ptr [[VECI32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP2:%.*]] = load <8 x i32>, ptr [[VECI32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: [[TMP3:%.*]] = call float @llvm.amdgcn.image.load.2d.f32.i32.v8i32(i32 1, i32 [[TMP0]], i32 [[TMP1]], <8 x i32> [[TMP2]], i32 0, i32 0)
+// CHECK-NEXT: ret float [[TMP3]]
+//
+float test_builtin_image_load_2d(float f32, int i32, v8i veci32) {
+
+ return __builtin_amdgcn_image_load_2d_f32_i32(i32, i32, veci32);
+}
diff --git a/clang/test/CodeGen/builtins-image-load.c b/clang/test/CodeGen/builtins-image-load.c
new file mode 100644
index 0000000000000..67548a567723e
--- /dev/null
+++ b/clang/test/CodeGen/builtins-image-load.c
@@ -0,0 +1,1162 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5
+// RUN: %clang_cc1 -triple amdgcn-- -target-cpu gfx1100 %s -emit-llvm -o - | FileCheck %s
+
+typedef int int8 __attribute__((ext_vector_type(8)));
+typedef int int4 __attribute__((ext_vector_type(4)));
+typedef float float4 __attribute__((ext_vector_type(4)));
+typedef _Float16 half;
+typedef half half4 __attribute__((ext_vector_type(4)));
+
+// CHECK-LABEL: define dso_local float @test_builtin_image_load_2d(
+// CHECK-SAME: float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0:[0-9]+]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[RETVAL:%.*]] = alloca float, align 4, addrspace(5)
+// CHECK-NEXT: [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5)
+// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
+// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
+// CHECK-NEXT: [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr
+// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
+// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr
+// CHECK-NEXT: store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP2:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: [[TMP3:%.*]] = call float @llvm.amdgcn.image.load.2d.f32.i32.v8i32(i32 12, i32 [[TMP0]], i32 [[TMP1]], <8 x i32> [[TMP2]], i32 106, i32 103)
+// CHECK-NEXT: ret float [[TMP3]]
+//
+float test_builtin_image_load_2d(float f32, int i32, int8 vec8i32) {
+
+ return __builtin_amdgcn_image_load_2d_f32_i32(12, i32, i32, vec8i32, 106, 103);
+}
+
+// CHECK-LABEL: define dso_local <4 x float> @test_builtin_image_load_2d_1(
+// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x float>, align 16, addrspace(5)
+// CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5)
+// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
+// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
+// CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr
+// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
+// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr
+// CHECK-NEXT: store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16
+// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP2:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: [[TMP3:%.*]] = call <4 x float> @llvm.amdgcn.image.load.2d.v4f32.i32.v8i32(i32 100, i32 [[TMP0]], i32 [[TMP1]], <8 x i32> [[TMP2]], i32 120, i32 110)
+// CHECK-NEXT: ret <4 x float> [[TMP3]]
+//
+float4 test_builtin_image_load_2d_1(float4 v4f32, int i32, int8 vec8i32) {
+
+ return __builtin_amdgcn_image_load_2d_v4f32_i32(100, i32, i32, vec8i32, 120, 110);
+}
+
+// CHECK-LABEL: define dso_local <4 x half> @test_builtin_image_load_2d_2(
+// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x half>, align 8, addrspace(5)
+// CHECK-NEXT: [[V4F16_ADDR:%.*]] = alloca <4 x half>, align 8, addrspace(5)
+// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
+// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
+// CHECK-NEXT: [[V4F16_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F16_ADDR]] to ptr
+// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
+// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr
+// CHECK-NEXT: store <4 x half> [[V4F16]], ptr [[V4F16_ADDR_ASCAST]], align 8
+// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP2:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: [[TMP3:%.*]] = call <4 x half> @llvm.amdgcn.image.load.2d.v4f16.i32.v8i32(i32 100, i32 [[TMP0]], i32 [[TMP1]], <8 x i32> [[TMP2]], i32 120, i32 110)
+// CHECK-NEXT: ret <4 x half> [[TMP3]]
+//
+half4 test_builtin_image_load_2d_2(half4 v4f16, int i32, int8 vec8i32) {
+
+ return __builtin_amdgcn_image_load_2d_v4f16_i32(100, i32, i32, vec8i32, 120, 110);
+}
+
+// CHECK-LABEL: define dso_local float @test_builtin_image_load_2darray(
+// CHECK-SAME: float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[RETVAL:%.*]] = alloca float, align 4, addrspace(5)
+// CHECK-NEXT: [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5)
+// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
+// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
+// CHECK-NEXT: [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr
+// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
+// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr
+// CHECK-NEXT: store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP3:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: [[TMP4:%.*]] = call float @llvm.amdgcn.image.load.2darray.f32.i32.v8i32(i32 100, i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]], <8 x i32> [[TMP3]], i32 120, i32 110)
+// CHECK-NEXT: ret float [[TMP4]]
+//
+float test_builtin_image_load_2darray(float f32, int i32, int8 vec8i32) {
+
+ return __builtin_amdgcn_image_load_2darray_f32_i32(100, i32, i32, i32, vec8i32, 120, 110);
+}
+
+// CHECK-LABEL: define dso_local <4 x float> @test_builtin_image_load_2darray_1(
+// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x float>, align 16, addrspace(5)
+// CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5)
+// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
+// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
+// CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr
+// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
+// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr
+// CHECK-NEXT: store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16
+// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP3:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: [[TMP4:%.*]] = call <4 x float> @llvm.amdgcn.image.load.2darray.v4f32.i32.v8i32(i32 100, i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]], <8 x i32> [[TMP3]], i32 120, i32 110)
+// CHECK-NEXT: ret <4 x float> [[TMP4]]
+//
+float4 test_builtin_image_load_2darray_1(float4 v4f32, int i32, int8 vec8i32) {
+
+ return __builtin_amdgcn_image_load_2darray_v4f32_i32(100, i32, i32, i32, vec8i32, 120, 110);
+}
+
+// CHECK-LABEL: define dso_local <4 x half> @test_builtin_image_load_2darray_2(
+// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x half>, align 8, addrspace(5)
+// CHECK-NEXT: [[V4F16_ADDR:%.*]] = alloca <4 x half>, align 8, addrspace(5)
+// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
+// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
+// CHECK-NEXT: [[V4F16_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F16_ADDR]] to ptr
+// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
+// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr
+// CHECK-NEXT: store <4 x half> [[V4F16]], ptr [[V4F16_ADDR_ASCAST]], align 8
+// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP3:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: [[TMP4:%.*]] = call <4 x half> @llvm.amdgcn.image.load.2darray.v4f16.i32.v8i32(i32 100, i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]], <8 x i32> [[TMP3]], i32 120, i32 110)
+// CHECK-NEXT: ret <4 x half> [[TMP4]]
+//
+half4 test_builtin_image_load_2darray_2(half4 v4f16, int i32, int8 vec8i32) {
+
+ return __builtin_amdgcn_image_load_2darray_v4f16_i32(100, i32, i32, i32, vec8i32, 120, 110);
+}
+
+// CHECK-LABEL: define dso_local <4 x float> @test_builtin_image_load_1d_1(
+// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x float>, align 16, addrspace(5)
+// CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5)
+// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
+// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
+// CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr
+// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
+// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr
+// CHECK-NEXT: store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16
+// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP1:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: [[TMP2:%.*]] = call <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i32.v8i32(i32 100, i32 [[TMP0]], <8 x i32> [[TMP1]], i32 120, i32 110)
+// CHECK-NEXT: ret <4 x float> [[TMP2]]
+//
+float4 test_builtin_image_load_1d_1(float4 v4f32, int i32, int8 vec8i32) {
+
+ return __builtin_amdgcn_image_load_1d_v4f32_i32(100, i32, vec8i32, 120, 110);
+}
+
+// CHECK-LABEL: define dso_local <4 x half> @test_builtin_image_load_1d_2(
+// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x half>, align 8, addrspace(5)
+// CHECK-NEXT: [[V4F16_ADDR:%.*]] = alloca <4 x half>, align 8, addrspace(5)
+// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
+// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
+// CHECK-NEXT: [[V4F16_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F16_ADDR]] to ptr
+// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
+// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr
+// CHECK-NEXT: store <4 x half> [[V4F16]], ptr [[V4F16_ADDR_ASCAST]], align 8
+// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP1:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: [[TMP2:%.*]] = call <4 x half> @llvm.amdgcn.image.load.1d.v4f16.i32.v8i32(i32 100, i32 [[TMP0]], <8 x i32> [[TMP1]], i32 120, i32 110)
+// CHECK-NEXT: ret <4 x half> [[TMP2]]
+//
+half4 test_builtin_image_load_1d_2(half4 v4f16, int i32, int8 vec8i32) {
+
+ return __builtin_amdgcn_image_load_1d_v4f16_i32(100, i32, vec8i32, 120, 110);
+}
+
+// CHECK-LABEL: define dso_local <4 x float> @test_builtin_image_load_1darray_1(
+// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x float>, align 16, addrspace(5)
+// CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5)
+// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
+// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
+// CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr
+// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
+// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr
+// CHECK-NEXT: store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16
+// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP2:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: [[TMP3:%.*]] = call <4 x float> @llvm.amdgcn.image.load.1darray.v4f32.i32.v8i32(i32 100, i32 [[TMP0]], i32 [[TMP1]], <8 x i32> [[TMP2]], i32 120, i32 110)
+// CHECK-NEXT: ret <4 x float> [[TMP3]]
+//
+float4 test_builtin_image_load_1darray_1(float4 v4f32, int i32, int8 vec8i32) {
+
+ return __builtin_amdgcn_image_load_1darray_v4f32_i32(100, i32, i32, vec8i32, 120, 110);
+}
+
+// CHECK-LABEL: define dso_local <4 x half> @test_builtin_image_load_1darray_2(
+// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x half>, align 8, addrspace(5)
+// CHECK-NEXT: [[V4F16_ADDR:%.*]] = alloca <4 x half>, align 8, addrspace(5)
+// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
+// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
+// CHECK-NEXT: [[V4F16_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F16_ADDR]] to ptr
+// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
+// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr
+// CHECK-NEXT: store <4 x half> [[V4F16]], ptr [[V4F16_ADDR_ASCAST]], align 8
+// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP2:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: [[TMP3:%.*]] = call <4 x half> @llvm.amdgcn.image.load.1darray.v4f16.i32.v8i32(i32 100, i32 [[TMP0]], i32 [[TMP1]], <8 x i32> [[TMP2]], i32 120, i32 110)
+// CHECK-NEXT: ret <4 x half> [[TMP3]]
+//
+half4 test_builtin_image_load_1darray_2(half4 v4f16, int i32, int8 vec8i32) {
+
+ return __builtin_amdgcn_image_load_1darray_v4f16_i32(100, i32, i32, vec8i32, 120, 110);
+}
+
+// CHECK-LABEL: define dso_local <4 x float> @test_builtin_image_load_3d_1(
+// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x float>, align 16, addrspace(5)
+// CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5)
+// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
+// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
+// CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr
+// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
+// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr
+// CHECK-NEXT: store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16
+// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP3:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: [[TMP4:%.*]] = call <4 x float> @llvm.amdgcn.image.load.3d.v4f32.i32.v8i32(i32 100, i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]], <8 x i32> [[TMP3]], i32 120, i32 110)
+// CHECK-NEXT: ret <4 x float> [[TMP4]]
+//
+float4 test_builtin_image_load_3d_1(float4 v4f32, int i32, int8 vec8i32) {
+
+ return __builtin_amdgcn_image_load_3d_v4f32_i32(100, i32, i32, i32, vec8i32, 120, 110);
+}
+
+// CHECK-LABEL: define dso_local <4 x half> @test_builtin_image_load_3d_2(
+// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x half>, align 8, addrspace(5)
+// CHECK-NEXT: [[V4F16_ADDR:%.*]] = alloca <4 x half>, align 8, addrspace(5)
+// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
+// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
+// CHECK-NEXT: [[V4F16_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F16_ADDR]] to ptr
+// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
+// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr
+// CHECK-NEXT: store <4 x half> [[V4F16]], ptr [[V4F16_ADDR_ASCAST]], align 8
+// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP3:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: [[TMP4:%.*]] = call <4 x half> @llvm.amdgcn.image.load.3d.v4f16.i32.v8i32(i32 100, i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]], <8 x i32> [[TMP3]], i32 120, i32 110)
+// CHECK-NEXT: ret <4 x half> [[TMP4]]
+//
+half4 test_builtin_image_load_3d_2(half4 v4f16, int i32, int8 vec8i32) {
+
+ return __builtin_amdgcn_image_load_3d_v4f16_i32(100, i32, i32, i32, vec8i32, 120, 110);
+}
+
+// CHECK-LABEL: define dso_local <4 x float> @test_builtin_image_load_cube_1(
+// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x float>, align 16, addrspace(5)
+// CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5)
+// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
+// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
+// CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr
+// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
+// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr
+// CHECK-NEXT: store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16
+// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP3:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: [[TMP4:%.*]] = call <4 x float> @llvm.amdgcn.image.load.cube.v4f32.i32.v8i32(i32 100, i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]], <8 x i32> [[TMP3]], i32 120, i32 110)
+// CHECK-NEXT: ret <4 x float> [[TMP4]]
+//
+float4 test_builtin_image_load_cube_1(float4 v4f32, int i32, int8 vec8i32) {
+
+ return __builtin_amdgcn_image_load_cube_v4f32_i32(100, i32, i32, i32, vec8i32, 120, 110);
+}
+
+// CHECK-LABEL: define dso_local <4 x half> @test_builtin_image_load_cube_2(
+// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x half>, align 8, addrspace(5)
+// CHECK-NEXT: [[V4F16_ADDR:%.*]] = alloca <4 x half>, align 8, addrspace(5)
+// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
+// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
+// CHECK-NEXT: [[V4F16_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F16_ADDR]] to ptr
+// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
+// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr
+// CHECK-NEXT: store <4 x half> [[V4F16]], ptr [[V4F16_ADDR_ASCAST]], align 8
+// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP3:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: [[TMP4:%.*]] = call <4 x half> @llvm.amdgcn.image.load.cube.v4f16.i32.v8i32(i32 100, i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]], <8 x i32> [[TMP3]], i32 120, i32 110)
+// CHECK-NEXT: ret <4 x half> [[TMP4]]
+//
+half4 test_builtin_image_load_cube_2(half4 v4f16, int i32, int8 vec8i32) {
+
+ return __builtin_amdgcn_image_load_cube_v4f16_i32(100, i32, i32, i32, vec8i32, 120, 110);
+}
+
+// CHECK-LABEL: define dso_local <4 x float> @test_builtin_image_load_mip_1d_1(
+// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x float>, align 16, addrspace(5)
+// CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5)
+// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
+// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
+// CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr
+// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
+// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr
+// CHECK-NEXT: store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16
+// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP2:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: [[TMP3:%.*]] = call <4 x float> @llvm.amdgcn.image.load.mip.1d.v4f32.i32.v8i32(i32 100, i32 [[TMP0]], i32 [[TMP1]], <8 x i32> [[TMP2]], i32 120, i32 110)
+// CHECK-NEXT: ret <4 x float> [[TMP3]]
+//
+float4 test_builtin_image_load_mip_1d_1(float4 v4f32, int i32, int8 vec8i32) {
+
+ return __builtin_amdgcn_image_load_mip_1d_v4f32_i32(100, i32, i32, vec8i32, 120, 110);
+}
+
+// CHECK-LABEL: define dso_local <4 x half> @test_builtin_image_load_mip_1d_2(
+// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x half>, align 8, addrspace(5)
+// CHECK-NEXT: [[V4F16_ADDR:%.*]] = alloca <4 x half>, align 8, addrspace(5)
+// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
+// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
+// CHECK-NEXT: [[V4F16_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F16_ADDR]] to ptr
+// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
+// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr
+// CHECK-NEXT: store <4 x half> [[V4F16]], ptr [[V4F16_ADDR_ASCAST]], align 8
+// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP2:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: [[TMP3:%.*]] = call <4 x half> @llvm.amdgcn.image.load.mip.1d.v4f16.i32.v8i32(i32 100, i32 [[TMP0]], i32 [[TMP1]], <8 x i32> [[TMP2]], i32 120, i32 110)
+// CHECK-NEXT: ret <4 x half> [[TMP3]]
+//
+half4 test_builtin_image_load_mip_1d_2(half4 v4f16, int i32, int8 vec8i32) {
+
+ return __builtin_amdgcn_image_load_mip_1d_v4f16_i32(100, i32, i32, vec8i32, 120, 110);
+}
+
+// CHECK-LABEL: define dso_local <4 x float> @test_builtin_image_load_mip_1darray_1(
+// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x float>, align 16, addrspace(5)
+// CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5)
+// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
+// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
+// CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr
+// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
+// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr
+// CHECK-NEXT: store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16
+// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP3:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: [[TMP4:%.*]] = call <4 x float> @llvm.amdgcn.image.load.mip.1darray.v4f32.i32.v8i32(i32 100, i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]], <8 x i32> [[TMP3]], i32 120, i32 110)
+// CHECK-NEXT: ret <4 x float> [[TMP4]]
+//
+float4 test_builtin_image_load_mip_1darray_1(float4 v4f32, int i32, int8 vec8i32) {
+
+ return __builtin_amdgcn_image_load_mip_1darray_v4f32_i32(100, i32, i32, i32, vec8i32, 120, 110);
+}
+
+// CHECK-LABEL: define dso_local <4 x half> @test_builtin_image_load_mip_1darray_2(
+// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x half>, align 8, addrspace(5)
+// CHECK-NEXT: [[V4F16_ADDR:%.*]] = alloca <4 x half>, align 8, addrspace(5)
+// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
+// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
+// CHECK-NEXT: [[V4F16_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F16_ADDR]] to ptr
+// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
+// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr
+// CHECK-NEXT: store <4 x half> [[V4F16]], ptr [[V4F16_ADDR_ASCAST]], align 8
+// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP3:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: [[TMP4:%.*]] = call <4 x half> @llvm.amdgcn.image.load.mip.1darray.v4f16.i32.v8i32(i32 100, i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]], <8 x i32> [[TMP3]], i32 120, i32 110)
+// CHECK-NEXT: ret <4 x half> [[TMP4]]
+//
+half4 test_builtin_image_load_mip_1darray_2(half4 v4f16, int i32, int8 vec8i32) {
+
+ return __builtin_amdgcn_image_load_mip_1darray_v4f16_i32(100, i32, i32, i32, vec8i32, 120, 110);
+}
+
+// CHECK-LABEL: define dso_local float @test_builtin_image_load_mip_2d(
+// CHECK-SAME: float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[RETVAL:%.*]] = alloca float, align 4, addrspace(5)
+// CHECK-NEXT: [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5)
+// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
+// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
+// CHECK-NEXT: [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr
+// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
+// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr
+// CHECK-NEXT: store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP3:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: [[TMP4:%.*]] = call float @llvm.amdgcn.image.load.mip.2d.f32.i32.v8i32(i32 100, i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]], <8 x i32> [[TMP3]], i32 120, i32 110)
+// CHECK-NEXT: ret float [[TMP4]]
+//
+float test_builtin_image_load_mip_2d(float f32, int i32, int8 vec8i32) {
+
+ return __builtin_amdgcn_image_load_mip_2d_f32_i32(100, i32, i32, i32, vec8i32, 120, 110);
+}
+
+// CHECK-LABEL: define dso_local <4 x float> @test_builtin_image_load_mip_2d_1(
+// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x float>, align 16, addrspace(5)
+// CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5)
+// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
+// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
+// CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr
+// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
+// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr
+// CHECK-NEXT: store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16
+// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP3:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: [[TMP4:%.*]] = call <4 x float> @llvm.amdgcn.image.load.mip.2d.v4f32.i32.v8i32(i32 100, i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]], <8 x i32> [[TMP3]], i32 120, i32 110)
+// CHECK-NEXT: ret <4 x float> [[TMP4]]
+//
+float4 test_builtin_image_load_mip_2d_1(float4 v4f32, int i32, int8 vec8i32) {
+
+ return __builtin_amdgcn_image_load_mip_2d_v4f32_i32(100, i32, i32, i32, vec8i32, 120, 110);
+}
+
+// CHECK-LABEL: define dso_local <4 x half> @test_builtin_image_load_mip_2d_2(
+// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x half>, align 8, addrspace(5)
+// CHECK-NEXT: [[V4F16_ADDR:%.*]] = alloca <4 x half>, align 8, addrspace(5)
+// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
+// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
+// CHECK-NEXT: [[V4F16_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F16_ADDR]] to ptr
+// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
+// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr
+// CHECK-NEXT: store <4 x half> [[V4F16]], ptr [[V4F16_ADDR_ASCAST]], align 8
+// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP3:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: [[TMP4:%.*]] = call <4 x half> @llvm.amdgcn.image.load.mip.2d.v4f16.i32.v8i32(i32 100, i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]], <8 x i32> [[TMP3]], i32 120, i32 110)
+// CHECK-NEXT: ret <4 x half> [[TMP4]]
+//
+half4 test_builtin_image_load_mip_2d_2(half4 v4f16, int i32, int8 vec8i32) {
+
+ return __builtin_amdgcn_image_load_mip_2d_v4f16_i32(100, i32, i32, i32, vec8i32, 120, 110);
+}
+
+// CHECK-LABEL: define dso_local float @test_builtin_image_load_mip_2darray(
+// CHECK-SAME: float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[RETVAL:%.*]] = alloca float, align 4, addrspace(5)
+// CHECK-NEXT: [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5)
+// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
+// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
+// CHECK-NEXT: [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr
+// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
+// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr
+// CHECK-NEXT: store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP4:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: [[TMP5:%.*]] = call float @llvm.amdgcn.image.load.mip.2darray.f32.i32.v8i32(i32 100, i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]], i32 [[TMP3]], <8 x i32> [[TMP4]], i32 120, i32 110)
+// CHECK-NEXT: ret float [[TMP5]]
+//
+float test_builtin_image_load_mip_2darray(float f32, int i32, int8 vec8i32) {
+
+ return __builtin_amdgcn_image_load_mip_2darray_f32_i32(100, i32, i32, i32, i32, vec8i32, 120, 110);
+}
+
+// CHECK-LABEL: define dso_local <4 x float> @test_builtin_image_load_mip_2darray_1(
+// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x float>, align 16, addrspace(5)
+// CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5)
+// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
+// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
+// CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr
+// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
+// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr
+// CHECK-NEXT: store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16
+// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP4:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: [[TMP5:%.*]] = call <4 x float> @llvm.amdgcn.image.load.mip.2darray.v4f32.i32.v8i32(i32 100, i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]], i32 [[TMP3]], <8 x i32> [[TMP4]], i32 120, i32 110)
+// CHECK-NEXT: ret <4 x float> [[TMP5]]
+//
+float4 test_builtin_image_load_mip_2darray_1(float4 v4f32, int i32, int8 vec8i32) {
+
+ return __builtin_amdgcn_image_load_mip_2darray_v4f32_i32(100, i32, i32, i32, i32, vec8i32, 120, 110);
+}
+
+// CHECK-LABEL: define dso_local <4 x half> @test_builtin_image_load_mip_2darray_2(
+// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x half>, align 8, addrspace(5)
+// CHECK-NEXT: [[V4F16_ADDR:%.*]] = alloca <4 x half>, align 8, addrspace(5)
+// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
+// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
+// CHECK-NEXT: [[V4F16_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F16_ADDR]] to ptr
+// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
+// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr
+// CHECK-NEXT: store <4 x half> [[V4F16]], ptr [[V4F16_ADDR_ASCAST]], align 8
+// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP4:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: [[TMP5:%.*]] = call <4 x half> @llvm.amdgcn.image.load.mip.2darray.v4f16.i32.v8i32(i32 100, i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]], i32 [[TMP3]], <8 x i32> [[TMP4]], i32 120, i32 110)
+// CHECK-NEXT: ret <4 x half> [[TMP5]]
+//
+half4 test_builtin_image_load_mip_2darray_2(half4 v4f16, int i32, int8 vec8i32) {
+
+ return __builtin_amdgcn_image_load_mip_2darray_v4f16_i32(100, i32, i32, i32, i32, vec8i32, 120, 110);
+}
+
+// CHECK-LABEL: define dso_local <4 x float> @test_builtin_image_load_mip_3d_1(
+// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x float>, align 16, addrspace(5)
+// CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5)
+// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
+// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
+// CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr
+// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
+// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr
+// CHECK-NEXT: store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16
+// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP4:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: [[TMP5:%.*]] = call <4 x float> @llvm.amdgcn.image.load.mip.3d.v4f32.i32.v8i32(i32 100, i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]], i32 [[TMP3]], <8 x i32> [[TMP4]], i32 120, i32 110)
+// CHECK-NEXT: ret <4 x float> [[TMP5]]
+//
+float4 test_builtin_image_load_mip_3d_1(float4 v4f32, int i32, int8 vec8i32) {
+
+ return __builtin_amdgcn_image_load_mip_3d_v4f32_i32(100, i32, i32, i32, i32, vec8i32, 120, 110);
+}
+
+// CHECK-LABEL: define dso_local <4 x half> @test_builtin_image_load_mip_3d_2(
+// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x half>, align 8, addrspace(5)
+// CHECK-NEXT: [[V4F16_ADDR:%.*]] = alloca <4 x half>, align 8, addrspace(5)
+// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
+// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
+// CHECK-NEXT: [[V4F16_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F16_ADDR]] to ptr
+// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
+// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr
+// CHECK-NEXT: store <4 x half> [[V4F16]], ptr [[V4F16_ADDR_ASCAST]], align 8
+// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP4:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: [[TMP5:%.*]] = call <4 x half> @llvm.amdgcn.image.load.mip.3d.v4f16.i32.v8i32(i32 100, i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]], i32 [[TMP3]], <8 x i32> [[TMP4]], i32 120, i32 110)
+// CHECK-NEXT: ret <4 x half> [[TMP5]]
+//
+half4 test_builtin_image_load_mip_3d_2(half4 v4f16, int i32, int8 vec8i32) {
+
+ return __builtin_amdgcn_image_load_mip_3d_v4f16_i32(100, i32, i32, i32, i32, vec8i32, 120, 110);
+}
+
+// CHECK-LABEL: define dso_local <4 x float> @test_builtin_image_load_mip_cube_1(
+// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x float>, align 16, addrspace(5)
+// CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5)
+// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
+// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
+// CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr
+// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
+// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr
+// CHECK-NEXT: store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16
+// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP4:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: [[TMP5:%.*]] = call <4 x float> @llvm.amdgcn.image.load.mip.cube.v4f32.i32.v8i32(i32 100, i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]], i32 [[TMP3]], <8 x i32> [[TMP4]], i32 120, i32 110)
+// CHECK-NEXT: ret <4 x float> [[TMP5]]
+//
+float4 test_builtin_image_load_mip_cube_1(float4 v4f32, int i32, int8 vec8i32) {
+
+ return __builtin_amdgcn_image_load_mip_cube_v4f32_i32(100, i32, i32, i32, i32, vec8i32, 120, 110);
+}
+
+// CHECK-LABEL: define dso_local <4 x half> @test_builtin_image_load_mip_cube_2(
+// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x half>, align 8, addrspace(5)
+// CHECK-NEXT: [[V4F16_ADDR:%.*]] = alloca <4 x half>, align 8, addrspace(5)
+// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
+// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
+// CHECK-NEXT: [[V4F16_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F16_ADDR]] to ptr
+// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
+// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr
+// CHECK-NEXT: store <4 x half> [[V4F16]], ptr [[V4F16_ADDR_ASCAST]], align 8
+// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP4:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: [[TMP5:%.*]] = call <4 x half> @llvm.amdgcn.image.load.mip.cube.v4f16.i32.v8i32(i32 100, i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]], i32 [[TMP3]], <8 x i32> [[TMP4]], i32 120, i32 110)
+// CHECK-NEXT: ret <4 x half> [[TMP5]]
+//
+half4 test_builtin_image_load_mip_cube_2(half4 v4f16, int i32, int8 vec8i32) {
+
+ return __builtin_amdgcn_image_load_mip_cube_v4f16_i32(100, i32, i32, i32, i32, vec8i32, 120, 110);
+}
+
+// CHECK-LABEL: define dso_local <4 x float> @test_builtin_amdgcn_image_sample_1d_v4f32_f32(
+// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], i32 noundef [[I32:%.*]], float noundef [[F32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x float>, align 16, addrspace(5)
+// CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5)
+// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5)
+// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
+// CHECK-NEXT: [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5)
+// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
+// CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr
+// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
+// CHECK-NEXT: [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr
+// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr
+// CHECK-NEXT: [[VEC4I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC4I32_ADDR]] to ptr
+// CHECK-NEXT: store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16
+// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: store <4 x i32> [[VEC4I32]], ptr [[VEC4I32_ADDR_ASCAST]], align 16
+// CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP1:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr [[VEC4I32_ADDR_ASCAST]], align 16
+// CHECK-NEXT: [[TMP3:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32.v8i32.v4i32(i32 100, float [[TMP0]], <8 x i32> [[TMP1]], <4 x i32> [[TMP2]], i1 false, i32 120, i32 110)
+// CHECK-NEXT: ret <4 x float> [[TMP3]]
+//
+float4 test_builtin_amdgcn_image_sample_1d_v4f32_f32(float4 v4f32, int i32, float f32, int8 vec8i32, int4 vec4i32) {
+ return __builtin_amdgcn_image_sample_1d_v4f32_f32(100, f32, vec8i32, vec4i32, 0, 120, 110);
+}
+
+// CHECK-LABEL: define dso_local <4 x half> @test_builtin_amdgcn_image_sample_1d_v4f16_f32(
+// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], float noundef [[F32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x half>, align 8, addrspace(5)
+// CHECK-NEXT: [[V4F16_ADDR:%.*]] = alloca <4 x half>, align 8, addrspace(5)
+// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5)
+// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
+// CHECK-NEXT: [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5)
+// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
+// CHECK-NEXT: [[V4F16_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F16_ADDR]] to ptr
+// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
+// CHECK-NEXT: [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr
+// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr
+// CHECK-NEXT: [[VEC4I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC4I32_ADDR]] to ptr
+// CHECK-NEXT: store <4 x half> [[V4F16]], ptr [[V4F16_ADDR_ASCAST]], align 8
+// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: store <4 x i32> [[VEC4I32]], ptr [[VEC4I32_ADDR_ASCAST]], align 16
+// CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP1:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr [[VEC4I32_ADDR_ASCAST]], align 16
+// CHECK-NEXT: [[TMP3:%.*]] = call <4 x half> @llvm.amdgcn.image.sample.1d.v4f16.f32.v8i32.v4i32(i32 100, float [[TMP0]], <8 x i32> [[TMP1]], <4 x i32> [[TMP2]], i1 false, i32 120, i32 110)
+// CHECK-NEXT: ret <4 x half> [[TMP3]]
+//
+half4 test_builtin_amdgcn_image_sample_1d_v4f16_f32(half4 v4f16, int i32, float f32, int8 vec8i32, int4 vec4i32) {
+ return __builtin_amdgcn_image_sample_1d_v4f16_f32(100, f32, vec8i32, vec4i32, 0, 120, 110);
+}
+
+// CHECK-LABEL: define dso_local <4 x float> @test_builtin_amdgcn_image_sample_1darray_v4f32_f32(
+// CHECK-SAME: i32 noundef [[I32:%.*]], float noundef [[F32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x float>, align 16, addrspace(5)
+// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5)
+// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
+// CHECK-NEXT: [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5)
+// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
+// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
+// CHECK-NEXT: [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr
+// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr
+// CHECK-NEXT: [[VEC4I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC4I32_ADDR]] to ptr
+// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: store <4 x i32> [[VEC4I32]], ptr [[VEC4I32_ADDR_ASCAST]], align 16
+// CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP1:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP2:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: [[TMP3:%.*]] = load <4 x i32>, ptr [[VEC4I32_ADDR_ASCAST]], align 16
+// CHECK-NEXT: [[TMP4:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.1darray.v4f32.f32.v8i32.v4i32(i32 100, float [[TMP0]], float [[TMP1]], <8 x i32> [[TMP2]], <4 x i32> [[TMP3]], i1 false, i32 120, i32 110)
+// CHECK-NEXT: ret <4 x float> [[TMP4]]
+//
+float4 test_builtin_amdgcn_image_sample_1darray_v4f32_f32(int i32, float f32, int8 vec8i32, int4 vec4i32) {
+ return __builtin_amdgcn_image_sample_1darray_v4f32_f32(100, f32, f32, vec8i32, vec4i32, 0, 120, 110);
+}
+
+// CHECK-LABEL: define dso_local <4 x half> @test_builtin_amdgcn_image_sample_1darray_v4f16_f32(
+// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], float noundef [[F32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x half>, align 8, addrspace(5)
+// CHECK-NEXT: [[V4F16_ADDR:%.*]] = alloca <4 x half>, align 8, addrspace(5)
+// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5)
+// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
+// CHECK-NEXT: [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5)
+// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
+// CHECK-NEXT: [[V4F16_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F16_ADDR]] to ptr
+// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
+// CHECK-NEXT: [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr
+// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr
+// CHECK-NEXT: [[VEC4I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC4I32_ADDR]] to ptr
+// CHECK-NEXT: store <4 x half> [[V4F16]], ptr [[V4F16_ADDR_ASCAST]], align 8
+// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: store <4 x i32> [[VEC4I32]], ptr [[VEC4I32_ADDR_ASCAST]], align 16
+// CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP1:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP2:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: [[TMP3:%.*]] = load <4 x i32>, ptr [[VEC4I32_ADDR_ASCAST]], align 16
+// CHECK-NEXT: [[TMP4:%.*]] = call <4 x half> @llvm.amdgcn.image.sample.1darray.v4f16.f32.v8i32.v4i32(i32 100, float [[TMP0]], float [[TMP1]], <8 x i32> [[TMP2]], <4 x i32> [[TMP3]], i1 false, i32 120, i32 110)
+// CHECK-NEXT: ret <4 x half> [[TMP4]]
+//
+half4 test_builtin_amdgcn_image_sample_1darray_v4f16_f32(half4 v4f16, int i32, float f32, int8 vec8i32, int4 vec4i32) {
+ return __builtin_amdgcn_image_sample_1darray_v4f16_f32(100, f32, f32, vec8i32, vec4i32, 0, 120, 110);
+}
+
+// CHECK-LABEL: define dso_local float @test_builtin_amdgcn_image_sample_2d_f32_f32(
+// CHECK-SAME: i32 noundef [[I32:%.*]], float noundef [[F32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[RETVAL:%.*]] = alloca float, align 4, addrspace(5)
+// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5)
+// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
+// CHECK-NEXT: [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5)
+// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
+// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
+// CHECK-NEXT: [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr
+// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr
+// CHECK-NEXT: [[VEC4I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC4I32_ADDR]] to ptr
+// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: store <4 x i32> [[VEC4I32]], ptr [[VEC4I32_ADDR_ASCAST]], align 16
+// CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP1:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP2:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: [[TMP3:%.*]] = load <4 x i32>, ptr [[VEC4I32_ADDR_ASCAST]], align 16
+// CHECK-NEXT: [[TMP4:%.*]] = call float @llvm.amdgcn.image.sample.2d.f32.f32.v8i32.v4i32(i32 100, float [[TMP0]], float [[TMP1]], <8 x i32> [[TMP2]], <4 x i32> [[TMP3]], i1 false, i32 120, i32 110)
+// CHECK-NEXT: ret float [[TMP4]]
+//
+float test_builtin_amdgcn_image_sample_2d_f32_f32(int i32, float f32, int8 vec8i32, int4 vec4i32) {
+ return __builtin_amdgcn_image_sample_2d_f32_f32(100, f32, f32, vec8i32, vec4i32, 0, 120, 110);
+}
+
+// CHECK-LABEL: define dso_local <4 x float> @test_builtin_amdgcn_image_sample_2d_v4f32_f32(
+// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], i32 noundef [[I32:%.*]], float noundef [[F32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x float>, align 16, addrspace(5)
+// CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5)
+// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5)
+// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
+// CHECK-NEXT: [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5)
+// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
+// CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr
+// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
+// CHECK-NEXT: [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr
+// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr
+// CHECK-NEXT: [[VEC4I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC4I32_ADDR]] to ptr
+// CHECK-NEXT: store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16
+// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: store <4 x i32> [[VEC4I32]], ptr [[VEC4I32_ADDR_ASCAST]], align 16
+// CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP1:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP2:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: [[TMP3:%.*]] = load <4 x i32>, ptr [[VEC4I32_ADDR_ASCAST]], align 16
+// CHECK-NEXT: [[TMP4:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32.v8i32.v4i32(i32 100, float [[TMP0]], float [[TMP1]], <8 x i32> [[TMP2]], <4 x i32> [[TMP3]], i1 false, i32 120, i32 110)
+// CHECK-NEXT: ret <4 x float> [[TMP4]]
+//
+float4 test_builtin_amdgcn_image_sample_2d_v4f32_f32(float4 v4f32, int i32, float f32, int8 vec8i32, int4 vec4i32) {
+ return __builtin_amdgcn_image_sample_2d_v4f32_f32(100, f32, f32, vec8i32, vec4i32, 0, 120, 110);
+}
+
+// CHECK-LABEL: define dso_local <4 x half> @test_builtin_amdgcn_image_sample_2d_v4f16_f32(
+// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], float noundef [[F32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x half>, align 8, addrspace(5)
+// CHECK-NEXT: [[V4F16_ADDR:%.*]] = alloca <4 x half>, align 8, addrspace(5)
+// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5)
+// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
+// CHECK-NEXT: [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5)
+// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
+// CHECK-NEXT: [[V4F16_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F16_ADDR]] to ptr
+// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
+// CHECK-NEXT: [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr
+// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr
+// CHECK-NEXT: [[VEC4I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC4I32_ADDR]] to ptr
+// CHECK-NEXT: store <4 x half> [[V4F16]], ptr [[V4F16_ADDR_ASCAST]], align 8
+// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: store <4 x i32> [[VEC4I32]], ptr [[VEC4I32_ADDR_ASCAST]], align 16
+// CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP1:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP2:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: [[TMP3:%.*]] = load <4 x i32>, ptr [[VEC4I32_ADDR_ASCAST]], align 16
+// CHECK-NEXT: [[TMP4:%.*]] = call <4 x half> @llvm.amdgcn.image.sample.2d.v4f16.f32.v8i32.v4i32(i32 100, float [[TMP0]], float [[TMP1]], <8 x i32> [[TMP2]], <4 x i32> [[TMP3]], i1 false, i32 120, i32 110)
+// CHECK-NEXT: ret <4 x half> [[TMP4]]
+//
+half4 test_builtin_amdgcn_image_sample_2d_v4f16_f32(half4 v4f16, int i32, float f32, int8 vec8i32, int4 vec4i32) {
+ return __builtin_amdgcn_image_sample_2d_v4f16_f32(100, f32, f32, vec8i32, vec4i32, 0, 120, 110);
+}
+
+// CHECK-LABEL: define dso_local float @test_builtin_amdgcn_image_sample_2darray_f32_f32(
+// CHECK-SAME: i32 noundef [[I32:%.*]], float noundef [[F32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[RETVAL:%.*]] = alloca float, align 4, addrspace(5)
+// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5)
+// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
+// CHECK-NEXT: [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5)
+// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
+// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
+// CHECK-NEXT: [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr
+// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr
+// CHECK-NEXT: [[VEC4I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC4I32_ADDR]] to ptr
+// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: store <4 x i32> [[VEC4I32]], ptr [[VEC4I32_ADDR_ASCAST]], align 16
+// CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP1:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP2:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP3:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: [[TMP4:%.*]] = load <4 x i32>, ptr [[VEC4I32_ADDR_ASCAST]], align 16
+// CHECK-NEXT: [[TMP5:%.*]] = call float @llvm.amdgcn.image.sample.2darray.f32.f32.v8i32.v4i32(i32 100, float [[TMP0]], float [[TMP1]], float [[TMP2]], <8 x i32> [[TMP3]], <4 x i32> [[TMP4]], i1 false, i32 120, i32 110)
+// CHECK-NEXT: ret float [[TMP5]]
+//
+float test_builtin_amdgcn_image_sample_2darray_f32_f32(int i32, float f32, int8 vec8i32, int4 vec4i32) {
+ return __builtin_amdgcn_image_sample_2darray_f32_f32(100, f32, f32, f32, vec8i32, vec4i32, 0, 120, 110);
+}
+
+// CHECK-LABEL: define dso_local <4 x float> @test_builtin_amdgcn_image_sample_2darray_v4f32_f32(
+// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], i32 noundef [[I32:%.*]], float noundef [[F32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x float>, align 16, addrspace(5)
+// CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5)
+// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5)
+// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
+// CHECK-NEXT: [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5)
+// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
+// CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr
+// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
+// CHECK-NEXT: [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr
+// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr
+// CHECK-NEXT: [[VEC4I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC4I32_ADDR]] to ptr
+// CHECK-NEXT: store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16
+// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: store <4 x i32> [[VEC4I32]], ptr [[VEC4I32_ADDR_ASCAST]], align 16
+// CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP1:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP2:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP3:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: [[TMP4:%.*]] = load <4 x i32>, ptr [[VEC4I32_ADDR_ASCAST]], align 16
+// CHECK-NEXT: [[TMP5:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.2darray.v4f32.f32.v8i32.v4i32(i32 100, float [[TMP0]], float [[TMP1]], float [[TMP2]], <8 x i32> [[TMP3]], <4 x i32> [[TMP4]], i1 false, i32 120, i32 110)
+// CHECK-NEXT: ret <4 x float> [[TMP5]]
+//
+float4 test_builtin_amdgcn_image_sample_2darray_v4f32_f32(float4 v4f32, int i32, float f32, int8 vec8i32, int4 vec4i32) {
+ return __builtin_amdgcn_image_sample_2darray_v4f32_f32(100, f32, f32, f32, vec8i32, vec4i32, 0, 120, 110);
+}
+
+// CHECK-LABEL: define dso_local <4 x half> @test_builtin_amdgcn_image_sample_2darray_v4f16_f32(
+// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], float noundef [[F32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x half>, align 8, addrspace(5)
+// CHECK-NEXT: [[V4F16_ADDR:%.*]] = alloca <4 x half>, align 8, addrspace(5)
+// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5)
+// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
+// CHECK-NEXT: [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5)
+// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
+// CHECK-NEXT: [[V4F16_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F16_ADDR]] to ptr
+// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
+// CHECK-NEXT: [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr
+// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr
+// CHECK-NEXT: [[VEC4I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC4I32_ADDR]] to ptr
+// CHECK-NEXT: store <4 x half> [[V4F16]], ptr [[V4F16_ADDR_ASCAST]], align 8
+// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: store <4 x i32> [[VEC4I32]], ptr [[VEC4I32_ADDR_ASCAST]], align 16
+// CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP1:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP2:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP3:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: [[TMP4:%.*]] = load <4 x i32>, ptr [[VEC4I32_ADDR_ASCAST]], align 16
+// CHECK-NEXT: [[TMP5:%.*]] = call <4 x half> @llvm.amdgcn.image.sample.2darray.v4f16.f32.v8i32.v4i32(i32 100, float [[TMP0]], float [[TMP1]], float [[TMP2]], <8 x i32> [[TMP3]], <4 x i32> [[TMP4]], i1 false, i32 120, i32 110)
+// CHECK-NEXT: ret <4 x half> [[TMP5]]
+//
+half4 test_builtin_amdgcn_image_sample_2darray_v4f16_f32(half4 v4f16, int i32, float f32, int8 vec8i32, int4 vec4i32) {
+ return __builtin_amdgcn_image_sample_2darray_v4f16_f32(100, f32, f32, f32, vec8i32, vec4i32, 0, 120, 110);
+}
+
+// CHECK-LABEL: define dso_local <4 x float> @test_builtin_amdgcn_image_sample_3d_v4f32_f32(
+// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], i32 noundef [[I32:%.*]], float noundef [[F32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x float>, align 16, addrspace(5)
+// CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5)
+// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5)
+// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
+// CHECK-NEXT: [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5)
+// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
+// CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr
+// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
+// CHECK-NEXT: [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr
+// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr
+// CHECK-NEXT: [[VEC4I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC4I32_ADDR]] to ptr
+// CHECK-NEXT: store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16
+// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: store <4 x i32> [[VEC4I32]], ptr [[VEC4I32_ADDR_ASCAST]], align 16
+// CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP1:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP2:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP3:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: [[TMP4:%.*]] = load <4 x i32>, ptr [[VEC4I32_ADDR_ASCAST]], align 16
+// CHECK-NEXT: [[TMP5:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.3d.v4f32.f32.v8i32.v4i32(i32 100, float [[TMP0]], float [[TMP1]], float [[TMP2]], <8 x i32> [[TMP3]], <4 x i32> [[TMP4]], i1 false, i32 120, i32 110)
+// CHECK-NEXT: ret <4 x float> [[TMP5]]
+//
+float4 test_builtin_amdgcn_image_sample_3d_v4f32_f32(float4 v4f32, int i32, float f32, int8 vec8i32, int4 vec4i32) {
+ return __builtin_amdgcn_image_sample_3d_v4f32_f32(100, f32, f32, f32, vec8i32, vec4i32, 0, 120, 110);
+}
+
+// CHECK-LABEL: define dso_local <4 x half> @test_builtin_amdgcn_image_sample_3d_v4f16_f32(
+// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], float noundef [[F32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x half>, align 8, addrspace(5)
+// CHECK-NEXT: [[V4F16_ADDR:%.*]] = alloca <4 x half>, align 8, addrspace(5)
+// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5)
+// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
+// CHECK-NEXT: [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5)
+// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
+// CHECK-NEXT: [[V4F16_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F16_ADDR]] to ptr
+// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
+// CHECK-NEXT: [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr
+// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr
+// CHECK-NEXT: [[VEC4I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC4I32_ADDR]] to ptr
+// CHECK-NEXT: store <4 x half> [[V4F16]], ptr [[V4F16_ADDR_ASCAST]], align 8
+// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: store <4 x i32> [[VEC4I32]], ptr [[VEC4I32_ADDR_ASCAST]], align 16
+// CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP1:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP2:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP3:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: [[TMP4:%.*]] = load <4 x i32>, ptr [[VEC4I32_ADDR_ASCAST]], align 16
+// CHECK-NEXT: [[TMP5:%.*]] = call <4 x half> @llvm.amdgcn.image.sample.3d.v4f16.f32.v8i32.v4i32(i32 100, float [[TMP0]], float [[TMP1]], float [[TMP2]], <8 x i32> [[TMP3]], <4 x i32> [[TMP4]], i1 false, i32 120, i32 110)
+// CHECK-NEXT: ret <4 x half> [[TMP5]]
+//
+half4 test_builtin_amdgcn_image_sample_3d_v4f16_f32(half4 v4f16, int i32, float f32, int8 vec8i32, int4 vec4i32) {
+ return __builtin_amdgcn_image_sample_3d_v4f16_f32(100, f32, f32, f32, vec8i32, vec4i32, 0, 120, 110);
+}
+
+// CHECK-LABEL: define dso_local <4 x float> @test_builtin_amdgcn_image_sample_cube_v4f32_f32(
+// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], i32 noundef [[I32:%.*]], float noundef [[F32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x float>, align 16, addrspace(5)
+// CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5)
+// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5)
+// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
+// CHECK-NEXT: [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5)
+// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
+// CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr
+// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
+// CHECK-NEXT: [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr
+// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr
+// CHECK-NEXT: [[VEC4I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC4I32_ADDR]] to ptr
+// CHECK-NEXT: store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16
+// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: store <4 x i32> [[VEC4I32]], ptr [[VEC4I32_ADDR_ASCAST]], align 16
+// CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP1:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP2:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP3:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: [[TMP4:%.*]] = load <4 x i32>, ptr [[VEC4I32_ADDR_ASCAST]], align 16
+// CHECK-NEXT: [[TMP5:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.cube.v4f32.f32.v8i32.v4i32(i32 100, float [[TMP0]], float [[TMP1]], float [[TMP2]], <8 x i32> [[TMP3]], <4 x i32> [[TMP4]], i1 false, i32 120, i32 110)
+// CHECK-NEXT: ret <4 x float> [[TMP5]]
+//
+float4 test_builtin_amdgcn_image_sample_cube_v4f32_f32(float4 v4f32, int i32, float f32, int8 vec8i32, int4 vec4i32) {
+ return __builtin_amdgcn_image_sample_cube_v4f32_f32(100, f32, f32, f32, vec8i32, vec4i32, 0, 120, 110);
+}
+
+// CHECK-LABEL: define dso_local <4 x half> @test_builtin_amdgcn_image_sample_cube_v4f16_f32(
+// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], float noundef [[F32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x half>, align 8, addrspace(5)
+// CHECK-NEXT: [[V4F16_ADDR:%.*]] = alloca <4 x half>, align 8, addrspace(5)
+// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5)
+// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
+// CHECK-NEXT: [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5)
+// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
+// CHECK-NEXT: [[V4F16_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F16_ADDR]] to ptr
+// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
+// CHECK-NEXT: [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr
+// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr
+// CHECK-NEXT: [[VEC4I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC4I32_ADDR]] to ptr
+// CHECK-NEXT: store <4 x half> [[V4F16]], ptr [[V4F16_ADDR_ASCAST]], align 8
+// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: store <4 x i32> [[VEC4I32]], ptr [[VEC4I32_ADDR_ASCAST]], align 16
+// CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP1:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP2:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP3:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: [[TMP4:%.*]] = load <4 x i32>, ptr [[VEC4I32_ADDR_ASCAST]], align 16
+// CHECK-NEXT: [[TMP5:%.*]] = call <4 x half> @llvm.amdgcn.image.sample.cube.v4f16.f32.v8i32.v4i32(i32 100, float [[TMP0]], float [[TMP1]], float [[TMP2]], <8 x i32> [[TMP3]], <4 x i32> [[TMP4]], i1 false, i32 120, i32 110)
+// CHECK-NEXT: ret <4 x half> [[TMP5]]
+//
+half4 test_builtin_amdgcn_image_sample_cube_v4f16_f32(half4 v4f16, int i32, float f32, int8 vec8i32, int4 vec4i32) {
+ return __builtin_amdgcn_image_sample_cube_v4f16_f32(100, f32, f32, f32, vec8i32, vec4i32, 0, 120, 110);
+}
diff --git a/clang/test/CodeGen/builtins-image-store.c b/clang/test/CodeGen/builtins-image-store.c
new file mode 100644
index 0000000000000..cd2b09e074c59
--- /dev/null
+++ b/clang/test/CodeGen/builtins-image-store.c
@@ -0,0 +1,703 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5
+// RUN: %clang_cc1 -triple amdgcn-- -target-cpu gfx1010 %s -emit-llvm -o - | FileCheck %s
+
+typedef int int8 __attribute__((ext_vector_type(8)));
+typedef float float4 __attribute__((ext_vector_type(4)));
+typedef _Float16 half;
+typedef half half4 __attribute__((ext_vector_type(4)));
+
+// CHECK-LABEL: define dso_local void @test_builtin_image_store_2d(
+// CHECK-SAME: float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0:[0-9]+]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5)
+// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
+// CHECK-NEXT: [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr
+// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
+// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr
+// CHECK-NEXT: store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP3:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: call void @llvm.amdgcn.image.store.2d.f32.i32.v8i32(float [[TMP0]], i32 12, i32 [[TMP1]], i32 [[TMP2]], <8 x i32> [[TMP3]], i32 106, i32 103)
+// CHECK-NEXT: ret void
+//
+void test_builtin_image_store_2d(float f32, int i32, int8 vec8i32) {
+
+ __builtin_amdgcn_image_store_2d_f32_i32(f32, 12, i32, i32, vec8i32, 106, 103);
+ }
+
+// CHECK-LABEL: define dso_local void @test_builtin_image_store_2d_1(
+// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5)
+// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
+// CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr
+// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
+// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr
+// CHECK-NEXT: store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16
+// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[V4F32_ADDR_ASCAST]], align 16
+// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP3:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: call void @llvm.amdgcn.image.store.2d.v4f32.i32.v8i32(<4 x float> [[TMP0]], i32 100, i32 [[TMP1]], i32 [[TMP2]], <8 x i32> [[TMP3]], i32 120, i32 110)
+// CHECK-NEXT: ret void
+//
+void test_builtin_image_store_2d_1(float4 v4f32, int i32, int8 vec8i32) {
+
+ __builtin_amdgcn_image_store_2d_v4f32_i32(v4f32, 100, i32, i32, vec8i32, 120, 110);
+ }
+
+// CHECK-LABEL: define dso_local void @test_builtin_image_store_2d_2(
+// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[V4F16_ADDR:%.*]] = alloca <4 x half>, align 8, addrspace(5)
+// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
+// CHECK-NEXT: [[V4F16_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F16_ADDR]] to ptr
+// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
+// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr
+// CHECK-NEXT: store <4 x half> [[V4F16]], ptr [[V4F16_ADDR_ASCAST]], align 8
+// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: [[TMP0:%.*]] = load <4 x half>, ptr [[V4F16_ADDR_ASCAST]], align 8
+// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP3:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: call void @llvm.amdgcn.image.store.2d.v4f16.i32.v8i32(<4 x half> [[TMP0]], i32 100, i32 [[TMP1]], i32 [[TMP2]], <8 x i32> [[TMP3]], i32 120, i32 110)
+// CHECK-NEXT: ret void
+//
+void test_builtin_image_store_2d_2(half4 v4f16, int i32, int8 vec8i32) {
+
+ __builtin_amdgcn_image_store_2d_v4f16_i32(v4f16, 100, i32, i32, vec8i32, 120, 110);
+ }
+
+// CHECK-LABEL: define dso_local void @test_builtin_image_store_2darray(
+// CHECK-SAME: float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5)
+// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
+// CHECK-NEXT: [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr
+// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
+// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr
+// CHECK-NEXT: store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP4:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: call void @llvm.amdgcn.image.store.2darray.f32.i32.v8i32(float [[TMP0]], i32 100, i32 [[TMP1]], i32 [[TMP2]], i32 [[TMP3]], <8 x i32> [[TMP4]], i32 120, i32 110)
+// CHECK-NEXT: ret void
+//
+void test_builtin_image_store_2darray(float f32, int i32, int8 vec8i32) {
+
+ __builtin_amdgcn_image_store_2darray_f32_i32(f32, 100, i32, i32, i32, vec8i32, 120, 110);
+ }
+
+// CHECK-LABEL: define dso_local void @test_builtin_image_store_2darray_1(
+// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5)
+// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
+// CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr
+// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
+// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr
+// CHECK-NEXT: store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16
+// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[V4F32_ADDR_ASCAST]], align 16
+// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP4:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: call void @llvm.amdgcn.image.store.2darray.v4f32.i32.v8i32(<4 x float> [[TMP0]], i32 100, i32 [[TMP1]], i32 [[TMP2]], i32 [[TMP3]], <8 x i32> [[TMP4]], i32 120, i32 110)
+// CHECK-NEXT: ret void
+//
+void test_builtin_image_store_2darray_1(float4 v4f32, int i32, int8 vec8i32) {
+
+ __builtin_amdgcn_image_store_2darray_v4f32_i32(v4f32, 100, i32, i32, i32, vec8i32, 120, 110);
+ }
+
+// CHECK-LABEL: define dso_local void @test_builtin_image_store_2darray_2(
+// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[V4F16_ADDR:%.*]] = alloca <4 x half>, align 8, addrspace(5)
+// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
+// CHECK-NEXT: [[V4F16_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F16_ADDR]] to ptr
+// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
+// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr
+// CHECK-NEXT: store <4 x half> [[V4F16]], ptr [[V4F16_ADDR_ASCAST]], align 8
+// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: [[TMP0:%.*]] = load <4 x half>, ptr [[V4F16_ADDR_ASCAST]], align 8
+// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP4:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: call void @llvm.amdgcn.image.store.2darray.v4f16.i32.v8i32(<4 x half> [[TMP0]], i32 100, i32 [[TMP1]], i32 [[TMP2]], i32 [[TMP3]], <8 x i32> [[TMP4]], i32 120, i32 110)
+// CHECK-NEXT: ret void
+//
+void test_builtin_image_store_2darray_2(half4 v4f16, int i32, int8 vec8i32) {
+
+ __builtin_amdgcn_image_store_2darray_v4f16_i32(v4f16, 100, i32, i32, i32, vec8i32, 120, 110);
+ }
+
+// CHECK-LABEL: define dso_local void @test_builtin_image_store_1d_1(
+// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5)
+// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
+// CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr
+// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
+// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr
+// CHECK-NEXT: store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16
+// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[V4F32_ADDR_ASCAST]], align 16
+// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP2:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: call void @llvm.amdgcn.image.store.1d.v4f32.i32.v8i32(<4 x float> [[TMP0]], i32 100, i32 [[TMP1]], <8 x i32> [[TMP2]], i32 120, i32 110)
+// CHECK-NEXT: ret void
+//
+void test_builtin_image_store_1d_1(float4 v4f32, int i32, int8 vec8i32) {
+
+ __builtin_amdgcn_image_store_1d_v4f32_i32(v4f32, 100, i32, vec8i32, 120, 110);
+ }
+
+// CHECK-LABEL: define dso_local void @test_builtin_image_store_1d_2(
+// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[V4F16_ADDR:%.*]] = alloca <4 x half>, align 8, addrspace(5)
+// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
+// CHECK-NEXT: [[V4F16_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F16_ADDR]] to ptr
+// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
+// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr
+// CHECK-NEXT: store <4 x half> [[V4F16]], ptr [[V4F16_ADDR_ASCAST]], align 8
+// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: [[TMP0:%.*]] = load <4 x half>, ptr [[V4F16_ADDR_ASCAST]], align 8
+// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP2:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: call void @llvm.amdgcn.image.store.1d.v4f16.i32.v8i32(<4 x half> [[TMP0]], i32 100, i32 [[TMP1]], <8 x i32> [[TMP2]], i32 120, i32 110)
+// CHECK-NEXT: ret void
+//
+void test_builtin_image_store_1d_2(half4 v4f16, int i32, int8 vec8i32) {
+
+ __builtin_amdgcn_image_store_1d_v4f16_i32(v4f16, 100, i32, vec8i32, 120, 110);
+ }
+
+// CHECK-LABEL: define dso_local void @test_builtin_image_store_1darray_1(
+// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5)
+// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
+// CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr
+// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
+// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr
+// CHECK-NEXT: store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16
+// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[V4F32_ADDR_ASCAST]], align 16
+// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP3:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: call void @llvm.amdgcn.image.store.1darray.v4f32.i32.v8i32(<4 x float> [[TMP0]], i32 100, i32 [[TMP1]], i32 [[TMP2]], <8 x i32> [[TMP3]], i32 120, i32 110)
+// CHECK-NEXT: ret void
+//
+void test_builtin_image_store_1darray_1(float4 v4f32, int i32, int8 vec8i32) {
+
+ __builtin_amdgcn_image_store_1darray_v4f32_i32(v4f32, 100, i32, i32, vec8i32, 120, 110);
+ }
+
+// CHECK-LABEL: define dso_local void @test_builtin_image_store_1darray_2(
+// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[V4F16_ADDR:%.*]] = alloca <4 x half>, align 8, addrspace(5)
+// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
+// CHECK-NEXT: [[V4F16_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F16_ADDR]] to ptr
+// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
+// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr
+// CHECK-NEXT: store <4 x half> [[V4F16]], ptr [[V4F16_ADDR_ASCAST]], align 8
+// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: [[TMP0:%.*]] = load <4 x half>, ptr [[V4F16_ADDR_ASCAST]], align 8
+// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP3:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: call void @llvm.amdgcn.image.store.1darray.v4f16.i32.v8i32(<4 x half> [[TMP0]], i32 100, i32 [[TMP1]], i32 [[TMP2]], <8 x i32> [[TMP3]], i32 120, i32 110)
+// CHECK-NEXT: ret void
+//
+void test_builtin_image_store_1darray_2(half4 v4f16, int i32, int8 vec8i32) {
+
+ __builtin_amdgcn_image_store_1darray_v4f16_i32(v4f16, 100, i32, i32, vec8i32, 120, 110);
+ }
+
+// CHECK-LABEL: define dso_local void @test_builtin_image_store_3d_1(
+// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5)
+// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
+// CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr
+// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
+// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr
+// CHECK-NEXT: store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16
+// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[V4F32_ADDR_ASCAST]], align 16
+// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP4:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: call void @llvm.amdgcn.image.store.3d.v4f32.i32.v8i32(<4 x float> [[TMP0]], i32 100, i32 [[TMP1]], i32 [[TMP2]], i32 [[TMP3]], <8 x i32> [[TMP4]], i32 120, i32 110)
+// CHECK-NEXT: ret void
+//
+void test_builtin_image_store_3d_1(float4 v4f32, int i32, int8 vec8i32) {
+
+ __builtin_amdgcn_image_store_3d_v4f32_i32(v4f32, 100, i32, i32, i32, vec8i32, 120, 110);
+ }
+
+// CHECK-LABEL: define dso_local void @test_builtin_image_store_3d_2(
+// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[V4F16_ADDR:%.*]] = alloca <4 x half>, align 8, addrspace(5)
+// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
+// CHECK-NEXT: [[V4F16_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F16_ADDR]] to ptr
+// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
+// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr
+// CHECK-NEXT: store <4 x half> [[V4F16]], ptr [[V4F16_ADDR_ASCAST]], align 8
+// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: [[TMP0:%.*]] = load <4 x half>, ptr [[V4F16_ADDR_ASCAST]], align 8
+// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP4:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: call void @llvm.amdgcn.image.store.3d.v4f16.i32.v8i32(<4 x half> [[TMP0]], i32 100, i32 [[TMP1]], i32 [[TMP2]], i32 [[TMP3]], <8 x i32> [[TMP4]], i32 120, i32 110)
+// CHECK-NEXT: ret void
+//
+void test_builtin_image_store_3d_2(half4 v4f16, int i32, int8 vec8i32) {
+
+ __builtin_amdgcn_image_store_3d_v4f16_i32(v4f16, 100, i32, i32, i32, vec8i32, 120, 110);
+ }
+
+// CHECK-LABEL: define dso_local void @test_builtin_image_store_cube_1(
+// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5)
+// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
+// CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr
+// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
+// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr
+// CHECK-NEXT: store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16
+// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[V4F32_ADDR_ASCAST]], align 16
+// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP4:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: call void @llvm.amdgcn.image.store.cube.v4f32.i32.v8i32(<4 x float> [[TMP0]], i32 100, i32 [[TMP1]], i32 [[TMP2]], i32 [[TMP3]], <8 x i32> [[TMP4]], i32 120, i32 110)
+// CHECK-NEXT: ret void
+//
+void test_builtin_image_store_cube_1(float4 v4f32, int i32, int8 vec8i32) {
+
+ __builtin_amdgcn_image_store_cube_v4f32_i32(v4f32, 100, i32, i32, i32, vec8i32, 120, 110);
+ }
+
+// CHECK-LABEL: define dso_local void @test_builtin_image_store_cube_2(
+// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[V4F16_ADDR:%.*]] = alloca <4 x half>, align 8, addrspace(5)
+// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
+// CHECK-NEXT: [[V4F16_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F16_ADDR]] to ptr
+// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
+// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr
+// CHECK-NEXT: store <4 x half> [[V4F16]], ptr [[V4F16_ADDR_ASCAST]], align 8
+// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: [[TMP0:%.*]] = load <4 x half>, ptr [[V4F16_ADDR_ASCAST]], align 8
+// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP4:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: call void @llvm.amdgcn.image.store.cube.v4f16.i32.v8i32(<4 x half> [[TMP0]], i32 100, i32 [[TMP1]], i32 [[TMP2]], i32 [[TMP3]], <8 x i32> [[TMP4]], i32 120, i32 110)
+// CHECK-NEXT: ret void
+//
+void test_builtin_image_store_cube_2(half4 v4f16, int i32, int8 vec8i32) {
+
+ __builtin_amdgcn_image_store_cube_v4f16_i32(v4f16, 100, i32, i32, i32, vec8i32, 120, 110);
+ }
+
+// CHECK-LABEL: define dso_local void @test_builtin_image_store_mip_1d_1(
+// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5)
+// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
+// CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr
+// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
+// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr
+// CHECK-NEXT: store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16
+// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[V4F32_ADDR_ASCAST]], align 16
+// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP3:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: call void @llvm.amdgcn.image.store.mip.1d.v4f32.i32.v8i32(<4 x float> [[TMP0]], i32 100, i32 [[TMP1]], i32 [[TMP2]], <8 x i32> [[TMP3]], i32 120, i32 110)
+// CHECK-NEXT: ret void
+//
+void test_builtin_image_store_mip_1d_1(float4 v4f32, int i32, int8 vec8i32) {
+
+ __builtin_amdgcn_image_store_mip_1d_v4f32_i32(v4f32, 100, i32, i32, vec8i32, 120, 110);
+ }
+
+// CHECK-LABEL: define dso_local void @test_builtin_image_store_mip_1d_2(
+// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[V4F16_ADDR:%.*]] = alloca <4 x half>, align 8, addrspace(5)
+// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
+// CHECK-NEXT: [[V4F16_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F16_ADDR]] to ptr
+// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
+// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr
+// CHECK-NEXT: store <4 x half> [[V4F16]], ptr [[V4F16_ADDR_ASCAST]], align 8
+// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: [[TMP0:%.*]] = load <4 x half>, ptr [[V4F16_ADDR_ASCAST]], align 8
+// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP3:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: call void @llvm.amdgcn.image.store.mip.1d.v4f16.i32.v8i32(<4 x half> [[TMP0]], i32 100, i32 [[TMP1]], i32 [[TMP2]], <8 x i32> [[TMP3]], i32 120, i32 110)
+// CHECK-NEXT: ret void
+//
+void test_builtin_image_store_mip_1d_2(half4 v4f16, int i32, int8 vec8i32) {
+
+ __builtin_amdgcn_image_store_mip_1d_v4f16_i32(v4f16, 100, i32, i32, vec8i32, 120, 110);
+ }
+
+// CHECK-LABEL: define dso_local void @test_builtin_image_store_mip_1darray_1(
+// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5)
+// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
+// CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr
+// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
+// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr
+// CHECK-NEXT: store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16
+// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[V4F32_ADDR_ASCAST]], align 16
+// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP4:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: call void @llvm.amdgcn.image.store.mip.1darray.v4f32.i32.v8i32(<4 x float> [[TMP0]], i32 100, i32 [[TMP1]], i32 [[TMP2]], i32 [[TMP3]], <8 x i32> [[TMP4]], i32 120, i32 110)
+// CHECK-NEXT: ret void
+//
+void test_builtin_image_store_mip_1darray_1(float4 v4f32, int i32, int8 vec8i32) {
+
+ __builtin_amdgcn_image_store_mip_1darray_v4f32_i32(v4f32, 100, i32, i32, i32, vec8i32, 120, 110);
+ }
+
+// CHECK-LABEL: define dso_local void @test_builtin_image_store_mip_1darray_2(
+// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[V4F16_ADDR:%.*]] = alloca <4 x half>, align 8, addrspace(5)
+// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
+// CHECK-NEXT: [[V4F16_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F16_ADDR]] to ptr
+// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
+// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr
+// CHECK-NEXT: store <4 x half> [[V4F16]], ptr [[V4F16_ADDR_ASCAST]], align 8
+// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: [[TMP0:%.*]] = load <4 x half>, ptr [[V4F16_ADDR_ASCAST]], align 8
+// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP4:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: call void @llvm.amdgcn.image.store.mip.1darray.v4f16.i32.v8i32(<4 x half> [[TMP0]], i32 100, i32 [[TMP1]], i32 [[TMP2]], i32 [[TMP3]], <8 x i32> [[TMP4]], i32 120, i32 110)
+// CHECK-NEXT: ret void
+//
+void test_builtin_image_store_mip_1darray_2(half4 v4f16, int i32, int8 vec8i32) {
+
+ __builtin_amdgcn_image_store_mip_1darray_v4f16_i32(v4f16, 100, i32, i32, i32, vec8i32, 120, 110);
+ }
+
+// CHECK-LABEL: define dso_local void @test_builtin_image_store_mip_2d(
+// CHECK-SAME: float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5)
+// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
+// CHECK-NEXT: [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr
+// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
+// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr
+// CHECK-NEXT: store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP4:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: call void @llvm.amdgcn.image.store.mip.2d.f32.i32.v8i32(float [[TMP0]], i32 100, i32 [[TMP1]], i32 [[TMP2]], i32 [[TMP3]], <8 x i32> [[TMP4]], i32 120, i32 110)
+// CHECK-NEXT: ret void
+//
+void test_builtin_image_store_mip_2d(float f32, int i32, int8 vec8i32) {
+
+ __builtin_amdgcn_image_store_mip_2d_f32_i32(f32, 100, i32, i32, i32, vec8i32, 120, 110);
+ }
+
+// CHECK-LABEL: define dso_local void @test_builtin_image_store_mip_2d_1(
+// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5)
+// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
+// CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr
+// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
+// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr
+// CHECK-NEXT: store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16
+// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[V4F32_ADDR_ASCAST]], align 16
+// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP4:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: call void @llvm.amdgcn.image.store.mip.2d.v4f32.i32.v8i32(<4 x float> [[TMP0]], i32 100, i32 [[TMP1]], i32 [[TMP2]], i32 [[TMP3]], <8 x i32> [[TMP4]], i32 120, i32 110)
+// CHECK-NEXT: ret void
+//
+void test_builtin_image_store_mip_2d_1(float4 v4f32, int i32, int8 vec8i32) {
+
+ __builtin_amdgcn_image_store_mip_2d_v4f32_i32(v4f32, 100, i32, i32, i32, vec8i32, 120, 110);
+ }
+
+// CHECK-LABEL: define dso_local void @test_builtin_image_store_mip_2d_2(
+// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[V4F16_ADDR:%.*]] = alloca <4 x half>, align 8, addrspace(5)
+// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
+// CHECK-NEXT: [[V4F16_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F16_ADDR]] to ptr
+// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
+// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr
+// CHECK-NEXT: store <4 x half> [[V4F16]], ptr [[V4F16_ADDR_ASCAST]], align 8
+// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: [[TMP0:%.*]] = load <4 x half>, ptr [[V4F16_ADDR_ASCAST]], align 8
+// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP4:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: call void @llvm.amdgcn.image.store.mip.2d.v4f16.i32.v8i32(<4 x half> [[TMP0]], i32 100, i32 [[TMP1]], i32 [[TMP2]], i32 [[TMP3]], <8 x i32> [[TMP4]], i32 120, i32 110)
+// CHECK-NEXT: ret void
+//
+void test_builtin_image_store_mip_2d_2(half4 v4f16, int i32, int8 vec8i32) {
+
+ __builtin_amdgcn_image_store_mip_2d_v4f16_i32(v4f16, 100, i32, i32, i32, vec8i32, 120, 110);
+ }
+
+// CHECK-LABEL: define dso_local void @test_builtin_image_store_mip_2darray(
+// CHECK-SAME: float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5)
+// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
+// CHECK-NEXT: [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr
+// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
+// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr
+// CHECK-NEXT: store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP5:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: call void @llvm.amdgcn.image.store.mip.2darray.f32.i32.v8i32(float [[TMP0]], i32 100, i32 [[TMP1]], i32 [[TMP2]], i32 [[TMP3]], i32 [[TMP4]], <8 x i32> [[TMP5]], i32 120, i32 110)
+// CHECK-NEXT: ret void
+//
+void test_builtin_image_store_mip_2darray(float f32, int i32, int8 vec8i32) {
+
+ __builtin_amdgcn_image_store_mip_2darray_f32_i32(f32, 100, i32, i32, i32, i32, vec8i32, 120, 110);
+ }
+
+// CHECK-LABEL: define dso_local void @test_builtin_image_store_mip_2darray_1(
+// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5)
+// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
+// CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr
+// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
+// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr
+// CHECK-NEXT: store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16
+// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[V4F32_ADDR_ASCAST]], align 16
+// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP5:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: call void @llvm.amdgcn.image.store.mip.2darray.v4f32.i32.v8i32(<4 x float> [[TMP0]], i32 100, i32 [[TMP1]], i32 [[TMP2]], i32 [[TMP3]], i32 [[TMP4]], <8 x i32> [[TMP5]], i32 120, i32 110)
+// CHECK-NEXT: ret void
+//
+void test_builtin_image_store_mip_2darray_1(float4 v4f32, int i32, int8 vec8i32) {
+
+ __builtin_amdgcn_image_store_mip_2darray_v4f32_i32(v4f32, 100, i32, i32, i32, i32, vec8i32, 120, 110);
+ }
+
+// CHECK-LABEL: define dso_local void @test_builtin_image_store_mip_2darray_2(
+// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[V4F16_ADDR:%.*]] = alloca <4 x half>, align 8, addrspace(5)
+// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
+// CHECK-NEXT: [[V4F16_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F16_ADDR]] to ptr
+// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
+// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr
+// CHECK-NEXT: store <4 x half> [[V4F16]], ptr [[V4F16_ADDR_ASCAST]], align 8
+// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: [[TMP0:%.*]] = load <4 x half>, ptr [[V4F16_ADDR_ASCAST]], align 8
+// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP5:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: call void @llvm.amdgcn.image.store.mip.2darray.v4f16.i32.v8i32(<4 x half> [[TMP0]], i32 100, i32 [[TMP1]], i32 [[TMP2]], i32 [[TMP3]], i32 [[TMP4]], <8 x i32> [[TMP5]], i32 120, i32 110)
+// CHECK-NEXT: ret void
+//
+void test_builtin_image_store_mip_2darray_2(half4 v4f16, int i32, int8 vec8i32) {
+
+ __builtin_amdgcn_image_store_mip_2darray_v4f16_i32(v4f16, 100, i32, i32, i32, i32, vec8i32, 120, 110);
+ }
+
+// CHECK-LABEL: define dso_local void @test_builtin_image_store_mip_3d_1(
+// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5)
+// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
+// CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr
+// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
+// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr
+// CHECK-NEXT: store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16
+// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[V4F32_ADDR_ASCAST]], align 16
+// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP5:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: call void @llvm.amdgcn.image.store.mip.3d.v4f32.i32.v8i32(<4 x float> [[TMP0]], i32 100, i32 [[TMP1]], i32 [[TMP2]], i32 [[TMP3]], i32 [[TMP4]], <8 x i32> [[TMP5]], i32 120, i32 110)
+// CHECK-NEXT: ret void
+//
+void test_builtin_image_store_mip_3d_1(float4 v4f32, int i32, int8 vec8i32) {
+
+ __builtin_amdgcn_image_store_mip_3d_v4f32_i32(v4f32, 100, i32, i32, i32, i32, vec8i32, 120, 110);
+ }
+
+// CHECK-LABEL: define dso_local void @test_builtin_image_store_mip_3d_2(
+// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[V4F16_ADDR:%.*]] = alloca <4 x half>, align 8, addrspace(5)
+// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
+// CHECK-NEXT: [[V4F16_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F16_ADDR]] to ptr
+// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
+// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr
+// CHECK-NEXT: store <4 x half> [[V4F16]], ptr [[V4F16_ADDR_ASCAST]], align 8
+// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: [[TMP0:%.*]] = load <4 x half>, ptr [[V4F16_ADDR_ASCAST]], align 8
+// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP5:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: call void @llvm.amdgcn.image.store.mip.3d.v4f16.i32.v8i32(<4 x half> [[TMP0]], i32 100, i32 [[TMP1]], i32 [[TMP2]], i32 [[TMP3]], i32 [[TMP4]], <8 x i32> [[TMP5]], i32 120, i32 110)
+// CHECK-NEXT: ret void
+//
+void test_builtin_image_store_mip_3d_2(half4 v4f16, int i32, int8 vec8i32) {
+
+ __builtin_amdgcn_image_store_mip_3d_v4f16_i32(v4f16, 100, i32, i32, i32, i32, vec8i32, 120, 110);
+ }
+
+// CHECK-LABEL: define dso_local void @test_builtin_image_store_mip_cube_1(
+// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5)
+// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
+// CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr
+// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
+// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr
+// CHECK-NEXT: store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16
+// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[V4F32_ADDR_ASCAST]], align 16
+// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP5:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: call void @llvm.amdgcn.image.store.mip.cube.v4f32.i32.v8i32(<4 x float> [[TMP0]], i32 100, i32 [[TMP1]], i32 [[TMP2]], i32 [[TMP3]], i32 [[TMP4]], <8 x i32> [[TMP5]], i32 120, i32 110)
+// CHECK-NEXT: ret void
+//
+void test_builtin_image_store_mip_cube_1(float4 v4f32, int i32, int8 vec8i32) {
+
+ __builtin_amdgcn_image_store_mip_cube_v4f32_i32(v4f32, 100, i32, i32, i32, i32, vec8i32, 120, 110);
+ }
+
+// CHECK-LABEL: define dso_local void @test_builtin_image_store_mip_cube_2(
+// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[V4F16_ADDR:%.*]] = alloca <4 x half>, align 8, addrspace(5)
+// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
+// CHECK-NEXT: [[V4F16_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F16_ADDR]] to ptr
+// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
+// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr
+// CHECK-NEXT: store <4 x half> [[V4F16]], ptr [[V4F16_ADDR_ASCAST]], align 8
+// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: [[TMP0:%.*]] = load <4 x half>, ptr [[V4F16_ADDR_ASCAST]], align 8
+// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP5:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: call void @llvm.amdgcn.image.store.mip.cube.v4f16.i32.v8i32(<4 x half> [[TMP0]], i32 100, i32 [[TMP1]], i32 [[TMP2]], i32 [[TMP3]], i32 [[TMP4]], <8 x i32> [[TMP5]], i32 120, i32 110)
+// CHECK-NEXT: ret void
+//
+void test_builtin_image_store_mip_cube_2(half4 v4f16, int i32, int8 vec8i32) {
+
+ __builtin_amdgcn_image_store_mip_cube_v4f16_i32(v4f16, 100, i32, i32, i32, i32, vec8i32, 120, 110);
+ }
diff --git a/clang/test/SemaOpenCL/builtins-image-load-param.cl b/clang/test/SemaOpenCL/builtins-image-load-param.cl
new file mode 100644
index 0000000000000..249bb9211ab75
--- /dev/null
+++ b/clang/test/SemaOpenCL/builtins-image-load-param.cl
@@ -0,0 +1,222 @@
+// RUN: %clang_cc1 -triple amdgcn-- -target-cpu gfx1100 -S -verify=expected -o - %s
+// RUN: %clang_cc1 -triple amdgcn-- -target-cpu tahiti -S -verify=expected -o - %s
+// RUN: %clang_cc1 -triple amdgcn-- -target-cpu gfx942 -S -verify=GFX94 -o - %s
+// REQUIRES: amdgpu-registered-target
+
+typedef int int8 __attribute__((ext_vector_type(8)));
+typedef int int4 __attribute__((ext_vector_type(4)));
+typedef float float4 __attribute__((ext_vector_type(4)));
+typedef half half4 __attribute__((ext_vector_type(4)));
+
+float test_builtin_image_load_2d(float f32, int i32, int8 vec8i32) {
+
+ return __builtin_amdgcn_image_load_2d_f32_i32(i32, i32, i32, vec8i32, 106, 103); //expected-error{{argument to '__builtin_amdgcn_image_load_2d_f32_i32' must be a constant integer}}
+}
+float4 test_builtin_image_load_2d_1(float4 v4f32, int i32, int8 vec8i32) {
+
+ return __builtin_amdgcn_image_load_2d_v4f32_i32(100, i32, i32, vec8i32, i32, 110); //expected-error{{argument to '__builtin_amdgcn_image_load_2d_v4f32_i32' must be a constant integer}}
+}
+half4 test_builtin_image_load_2d_2(half4 v4f16, int i32, int8 vec8i32) {
+
+ return __builtin_amdgcn_image_load_2d_v4f16_i32(100, i32, i32, vec8i32, 120, i32); //expected-error{{argument to '__builtin_amdgcn_image_load_2d_v4f16_i32' must be a constant integer}}
+}
+
+
+float test_builtin_image_load_2darray(float f32, int i32, int8 vec8i32) {
+
+ return __builtin_amdgcn_image_load_2darray_f32_i32(100, i32, i32, i32, vec8i32, i32, 110); //expected-error{{argument to '__builtin_amdgcn_image_load_2darray_f32_i32' must be a constant integer}}
+}
+float4 test_builtin_image_load_2darray_1(float4 v4f32, int i32, int8 vec8i32) {
+
+ return __builtin_amdgcn_image_load_2darray_v4f32_i32(100, i32, i32, i32, vec8i32, i32, 110); //expected-error{{argument to '__builtin_amdgcn_image_load_2darray_v4f32_i32' must be a constant integer}}
+}
+half4 test_builtin_image_load_2darray_2(half4 v4f16, int i32, int8 vec8i32) {
+
+ return __builtin_amdgcn_image_load_2darray_v4f16_i32(100, i32, i32, i32, vec8i32, 120, i32); //expected-error{{argument to '__builtin_amdgcn_image_load_2darray_v4f16_i32' must be a constant integer}}
+}
+
+float4 test_builtin_image_load_1d_1(float4 v4f32, int i32, int8 vec8i32) {
+
+ return __builtin_amdgcn_image_load_1d_v4f32_i32(i32, i32, vec8i32, 120, i32); //expected-error{{argument to '__builtin_amdgcn_image_load_1d_v4f32_i32' must be a constant integer}}
+}
+half4 test_builtin_image_load_1d_2(half4 v4f16, int i32, int8 vec8i32) {
+
+ return __builtin_amdgcn_image_load_1d_v4f16_i32(100, i32, vec8i32, 120, i32); //expected-error{{argument to '__builtin_amdgcn_image_load_1d_v4f16_i32' must be a constant integer}}
+}
+
+float4 test_builtin_image_load_1darray_1(float4 v4f32, int i32, int8 vec8i32) {
+
+ return __builtin_amdgcn_image_load_1darray_v4f32_i32(100, i32, i32, vec8i32, i32, 110); //expected-error{{argument to '__builtin_amdgcn_image_load_1darray_v4f32_i32' must be a constant integer}}
+}
+half4 test_builtin_image_load_1darray_2(half4 v4f16, int i32, int8 vec8i32) {
+
+ return __builtin_amdgcn_image_load_1darray_v4f16_i32(100, i32, i32, vec8i32, i32, 110); //expected-error{{argument to '__builtin_amdgcn_image_load_1darray_v4f16_i32' must be a constant integer}}
+}
+
+float4 test_builtin_image_load_3d_1(float4 v4f32, int i32, int8 vec8i32) {
+
+ return __builtin_amdgcn_image_load_3d_v4f32_i32(100, i32, i32, i32, vec8i32, 120, i32); //expected-error{{argument to '__builtin_amdgcn_image_load_3d_v4f32_i32' must be a constant integer}}
+}
+half4 test_builtin_image_load_3d_2(half4 v4f16, int i32, int8 vec8i32) {
+
+ return __builtin_amdgcn_image_load_3d_v4f16_i32(i32, i32, i32, i32, vec8i32, 120, i32); //expected-error{{argument to '__builtin_amdgcn_image_load_3d_v4f16_i32' must be a constant integer}}
+}
+
+float4 test_builtin_image_load_cube_1(float4 v4f32, int i32, int8 vec8i32) {
+
+ return __builtin_amdgcn_image_load_cube_v4f32_i32(i32, i32, i32, i32, vec8i32, 120, 110); //expected-error{{argument to '__builtin_amdgcn_image_load_cube_v4f32_i32' must be a constant integer}}
+}
+half4 test_builtin_image_load_cube_2(half4 v4f16, int i32, int8 vec8i32) {
+
+ return __builtin_amdgcn_image_load_cube_v4f16_i32(i32, i32, i32, i32, vec8i32, 120, 110); //expected-error{{argument to '__builtin_amdgcn_image_load_cube_v4f16_i32' must be a constant integer}}
+}
+
+float4 test_builtin_image_load_mip_1d_1(float4 v4f32, int i32, int8 vec8i32) {
+
+ return __builtin_amdgcn_image_load_mip_1d_v4f32_i32(i32, i32, i32, vec8i32, 120, i32); //expected-error{{argument to '__builtin_amdgcn_image_load_mip_1d_v4f32_i32' must be a constant integer}}
+}
+half4 test_builtin_image_load_mip_1d_2(half4 v4f16, int i32, int8 vec8i32) {
+
+ return __builtin_amdgcn_image_load_mip_1d_v4f16_i32(100, i32, i32, vec8i32, 120, i32); //expected-error{{argument to '__builtin_amdgcn_image_load_mip_1d_v4f16_i32' must be a constant integer}}
+}
+
+float4 test_builtin_image_load_mip_1darray_1(float4 v4f32, int i32, int8 vec8i32) {
+
+ return __builtin_amdgcn_image_load_mip_1darray_v4f32_i32(i32, i32, i32, i32, vec8i32, i32, 110); //expected-error{{argument to '__builtin_amdgcn_image_load_mip_1darray_v4f32_i32' must be a constant integer}}
+}
+half4 test_builtin_image_load_mip_1darray_2(half4 v4f16, int i32, int8 vec8i32) {
+
+ return __builtin_amdgcn_image_load_mip_1darray_v4f16_i32(100, i32, i32, i32, vec8i32, i32, 110); //expected-error{{argument to '__builtin_amdgcn_image_load_mip_1darray_v4f16_i32' must be a constant integer}}
+}
+
+float test_builtin_image_load_mip_2d(float f32, int i32, int8 vec8i32) {
+
+ return __builtin_amdgcn_image_load_mip_2d_f32_i32(i32, i32, i32, i32, vec8i32, 120, i32); //expected-error{{argument to '__builtin_amdgcn_image_load_mip_2d_f32_i32' must be a constant integer}}
+}
+float4 test_builtin_image_load_mip_2d_1(float4 v4f32, int i32, int8 vec8i32) {
+
+ return __builtin_amdgcn_image_load_mip_2d_v4f32_i32(100, i32, i32, i32, vec8i32, 120, i32); //expected-error{{argument to '__builtin_amdgcn_image_load_mip_2d_v4f32_i32' must be a constant integer}}
+}
+half4 test_builtin_image_load_mip_2d_2(half4 v4f16, int i32, int8 vec8i32) {
+
+ return __builtin_amdgcn_image_load_mip_2d_v4f16_i32(i32, i32, i32, i32, vec8i32, 120, 110); //expected-error{{argument to '__builtin_amdgcn_image_load_mip_2d_v4f16_i32' must be a constant integer}}
+}
+
+float test_builtin_image_load_mip_2darray(float f32, int i32, int8 vec8i32) {
+
+ return __builtin_amdgcn_image_load_mip_2darray_f32_i32(i32, i32, i32, i32, i32, vec8i32, 120, 110); //expected-error{{argument to '__builtin_amdgcn_image_load_mip_2darray_f32_i32' must be a constant integer}}
+}
+float4 test_builtin_image_load_mip_2darray_1(float4 v4f32, int i32, int8 vec8i32) {
+
+ return __builtin_amdgcn_image_load_mip_2darray_v4f32_i32(100, i32, i32, i32, i32, vec8i32, 120, i32); //expected-error{{argument to '__builtin_amdgcn_image_load_mip_2darray_v4f32_i32' must be a constant integer}}
+}
+half4 test_builtin_image_load_mip_2darray_2(half4 v4f16, int i32, int8 vec8i32) {
+
+ return __builtin_amdgcn_image_load_mip_2darray_v4f16_i32(100, i32, i32, i32, i32, vec8i32, 120, i32); //expected-error{{argument to '__builtin_amdgcn_image_load_mip_2darray_v4f16_i32' must be a constant integer}}
+}
+
+float4 test_builtin_image_load_mip_3d_1(float4 v4f32, int i32, int8 vec8i32) {
+
+ return __builtin_amdgcn_image_load_mip_3d_v4f32_i32(i32, i32, i32, i32, i32, vec8i32, i32, 110); //expected-error{{argument to '__builtin_amdgcn_image_load_mip_3d_v4f32_i32' must be a constant integer}}
+}
+half4 test_builtin_image_load_mip_3d_2(half4 v4f16, int i32, int8 vec8i32) {
+
+ return __builtin_amdgcn_image_load_mip_3d_v4f16_i32(i32, i32, i32, i32, i32, vec8i32, i32, 110); //expected-error{{argument to '__builtin_amdgcn_image_load_mip_3d_v4f16_i32' must be a constant integer}}
+}
+
+float4 test_builtin_image_load_mip_cube_1(float4 v4f32, int i32, int8 vec8i32) {
+
+ return __builtin_amdgcn_image_load_mip_cube_v4f32_i32(i32, i32, i32, i32, i32, vec8i32, 120, i32); //expected-error{{argument to '__builtin_amdgcn_image_load_mip_cube_v4f32_i32' must be a constant integer}}
+}
+half4 test_builtin_image_load_mip_cube_2(half4 v4f16, int i32, int8 vec8i32) {
+
+ return __builtin_amdgcn_image_load_mip_cube_v4f16_i32(100, i32, i32, i32, i32, vec8i32, 120, i32); //expected-error{{argument to '__builtin_amdgcn_image_load_mip_cube_v4f16_i32' must be a constant integer}}
+}
+
+float test_builtin_image_load_2d_gfx(float f32, int i32, int8 vec8i32) {
+
+ return __builtin_amdgcn_image_load_2d_f32_i32(12, i32, i32, vec8i32, 106, 103); //GFX94-error{{'__builtin_amdgcn_image_load_2d_f32_i32' needs target feature image-insts}}
+}
+float4 test_builtin_image_load_2d_gfx_1(float4 v4f32, int i32, int8 vec8i32) {
+
+ return __builtin_amdgcn_image_load_2d_v4f32_i32(100, i32, i32, vec8i32, 120, 110); //GFX94-error{{'__builtin_amdgcn_image_load_2d_v4f32_i32' needs target feature image-insts}}
+}
+half4 test_builtin_image_load_2d_gfx_2(half4 v4f16, int i32, int8 vec8i32) {
+
+ return __builtin_amdgcn_image_load_2d_v4f16_i32(100, i32, i32, vec8i32, 120, 110); //GFX94-error{{'__builtin_amdgcn_image_load_2d_v4f16_i32' needs target feature image-insts}}
+}
+
+float test_builtin_image_sample_2d(float f32, int i32, int8 vec8i32, int4 vec4i32) {
+
+ return __builtin_amdgcn_image_sample_2d_f32_f32(i32, f32, f32, vec8i32, vec4i32, 0, 106, 103); //expected-error{{argument to '__builtin_amdgcn_image_sample_2d_f32_f32' must be a constant integer}}
+}
+float4 test_builtin_image_sample_2d_1(float4 v4f32, float f32, int i32, int8 vec8i32, int4 vec4i32) {
+
+ return __builtin_amdgcn_image_sample_2d_v4f32_f32(100, f32, f32, vec8i32, vec4i32, 0, i32, 110); //expected-error{{argument to '__builtin_amdgcn_image_sample_2d_v4f32_f32' must be a constant integer}}
+}
+half4 test_builtin_image_sample_2d_2(half4 v4f16, float f32, int i32, int8 vec8i32, int4 vec4i32) {
+
+ return __builtin_amdgcn_image_sample_2d_v4f16_f32(100, f32, f32, vec8i32, vec4i32, 0, 120, i32); //expected-error{{argument to '__builtin_amdgcn_image_sample_2d_v4f16_f32' must be a constant integer}}
+}
+
+float test_builtin_image_sample_2darray(float f32, int i32, int8 vec8i32, int4 vec4i32) {
+
+ return __builtin_amdgcn_image_sample_2darray_f32_f32(100, f32, f32, f32, vec8i32, vec4i32, 0, i32, 110); //expected-error{{argument to '__builtin_amdgcn_image_sample_2darray_f32_f32' must be a constant integer}}
+}
+float4 test_builtin_image_sample_2darray_1(float4 v4f32, float f32, int i32, int8 vec8i32, int4 vec4i32) {
+
+ return __builtin_amdgcn_image_sample_2darray_v4f32_f32(100, f32, f32, f32, vec8i32, vec4i32, 0, i32, 110); //expected-error{{argument to '__builtin_amdgcn_image_sample_2darray_v4f32_f32' must be a constant integer}}
+}
+half4 test_builtin_image_sample_2darray_2(half4 v4f16, float f32, int i32, int8 vec8i32, int4 vec4i32) {
+
+ return __builtin_amdgcn_image_sample_2darray_v4f16_f32(100, f32, f32, f32, vec8i32, vec4i32, 0, 120, i32); //expected-error{{argument to '__builtin_amdgcn_image_sample_2darray_v4f16_f32' must be a constant integer}}
+}
+
+float4 test_builtin_image_sample_1d_1(float4 v4f32, float f32, int i32, int8 vec8i32, int4 vec4i32) {
+
+ return __builtin_amdgcn_image_sample_1d_v4f32_f32(i32, f32, vec8i32, vec4i32, 0, 120, i32); //expected-error{{argument to '__builtin_amdgcn_image_sample_1d_v4f32_f32' must be a constant integer}}
+}
+half4 test_builtin_image_sample_1d_2(half4 v4f16, float f32, int i32, int8 vec8i32, int4 vec4i32) {
+
+ return __builtin_amdgcn_image_sample_1d_v4f16_f32(100, f32, vec8i32, vec4i32, 0, 120, i32); //expected-error{{argument to '__builtin_amdgcn_image_sample_1d_v4f16_f32' must be a constant integer}}
+}
+
+float4 test_builtin_image_sample_1darray_1(float4 v4f32, float f32, int i32, int8 vec8i32, int4 vec4i32) {
+
+ return __builtin_amdgcn_image_sample_1darray_v4f32_f32(100, f32, f32, vec8i32, vec4i32, 0, i32, 110); //expected-error{{argument to '__builtin_amdgcn_image_sample_1darray_v4f32_f32' must be a constant integer}}
+}
+half4 test_builtin_image_sample_1darray_2(half4 v4f16, float f32, int i32, int8 vec8i32, int4 vec4i32) {
+
+ return __builtin_amdgcn_image_sample_1darray_v4f16_f32(100, f32, f32, vec8i32, vec4i32, 0, i32, 110); //expected-error{{argument to '__builtin_amdgcn_image_sample_1darray_v4f16_f32' must be a constant integer}}
+}
+
+float4 test_builtin_image_sample_3d_1(float4 v4f32, float f32, int i32, int8 vec8i32, int4 vec4i32) {
+
+ return __builtin_amdgcn_image_sample_3d_v4f32_f32(100, f32, f32, f32, vec8i32, vec4i32, 0, 120, i32); //expected-error{{argument to '__builtin_amdgcn_image_sample_3d_v4f32_f32' must be a constant integer}}
+}
+half4 test_builtin_image_sample_3d_2(half4 v4f16, float f32, int i32, int8 vec8i32, int4 vec4i32) {
+
+ return __builtin_amdgcn_image_sample_3d_v4f16_f32(i32, f32, f32, f32, vec8i32, vec4i32, 0, 120, i32); //expected-error{{argument to '__builtin_amdgcn_image_sample_3d_v4f16_f32' must be a constant integer}}
+}
+
+float4 test_builtin_image_sample_cube_1(float4 v4f32, float f32, int i32, int8 vec8i32, int4 vec4i32) {
+
+ return __builtin_amdgcn_image_sample_cube_v4f32_f32(i32, f32, f32, f32, vec8i32, vec4i32, 0, 120, 110); //expected-error{{argument to '__builtin_amdgcn_image_sample_cube_v4f32_f32' must be a constant integer}}
+}
+half4 test_builtin_image_sample_cube_2(half4 v4f16, float f32, int i32, int8 vec8i32, int4 vec4i32) {
+
+ return __builtin_amdgcn_image_sample_cube_v4f16_f32(i32, f32, f32, f32, vec8i32, vec4i32, 0, 120, 110); //expected-error{{argument to '__builtin_amdgcn_image_sample_cube_v4f16_f32' must be a constant integer}}
+}
+
+float test_builtin_image_sample_2d_gfx(float f32, int i32, int8 vec8i32, int4 vec4i32) {
+
+ return __builtin_amdgcn_image_sample_2d_f32_f32(100, f32, f32, vec8i32, vec4i32, 0, 120, 110); //GFX94-error{{'__builtin_amdgcn_image_sample_2d_f32_f32' needs target feature image-insts}}
+}
+float4 test_builtin_image_sample_2d_gfx_1(float4 v4f32, float f32, int i32, int8 vec8i32, int4 vec4i32) {
+
+ return __builtin_amdgcn_image_sample_2d_v4f32_f32(100, f32, f32, vec8i32, vec4i32, 0, 120, 110); //GFX94-error{{'__builtin_amdgcn_image_sample_2d_v4f32_f32' needs target feature image-insts}}
+}
+half4 test_builtin_image_sample_2d_gfx_2(half4 v4f16, float f32, int i32, int8 vec8i32, int4 vec4i32) {
+
+ return __builtin_amdgcn_image_sample_2d_v4f16_f32(100, f32, f32, vec8i32, vec4i32, 0, 120, 110); //GFX94-error{{'__builtin_amdgcn_image_sample_2d_v4f16_f32' needs target feature image-insts}}
+}
\ No newline at end of file
diff --git a/clang/test/SemaOpenCL/builtins-image-store-param.cl b/clang/test/SemaOpenCL/builtins-image-store-param.cl
new file mode 100644
index 0000000000000..f84df77171098
--- /dev/null
+++ b/clang/test/SemaOpenCL/builtins-image-store-param.cl
@@ -0,0 +1,145 @@
+// RUN: %clang_cc1 -triple amdgcn-- -target-cpu gfx90a -S -verify=expected -o - %s
+// RUN: %clang_cc1 -triple amdgcn-- -target-cpu gfx942 -S -verify=GFX94 -o - %s
+// REQUIRES: amdgpu-registered-target
+
+typedef int int8 __attribute__((ext_vector_type(8)));
+typedef float float4 __attribute__((ext_vector_type(4)));
+typedef half half4 __attribute__((ext_vector_type(4)));
+
+
+void test_builtin_image_store_2d(float f32, int i32, int8 vec8i32) {
+
+ return __builtin_amdgcn_image_store_2d_f32_i32(f32, i32, i32, i32, vec8i32, 106, 103); //expected-error{{argument to '__builtin_amdgcn_image_store_2d_f32_i32' must be a constant integer}}
+}
+void test_builtin_image_store_2d_1(float4 v4f32, int i32, int8 vec8i32) {
+
+ return __builtin_amdgcn_image_store_2d_v4f32_i32(v4f32, 100, i32, i32, vec8i32, i32, 110); //expected-error{{argument to '__builtin_amdgcn_image_store_2d_v4f32_i32' must be a constant integer}}
+}
+void test_builtin_image_store_2d_2(half4 v4f16, int i32, int8 vec8i32) {
+
+ return __builtin_amdgcn_image_store_2d_v4f16_i32(v4f16, 100, i32, i32, vec8i32, 120, i32); //expected-error{{argument to '__builtin_amdgcn_image_store_2d_v4f16_i32' must be a constant integer}}
+}
+
+void test_builtin_image_store_2darray(float f32, int i32, int8 vec8i32) {
+
+ return __builtin_amdgcn_image_store_2darray_f32_i32(f32, 100, i32, i32, i32, vec8i32, i32, 110); //expected-error{{argument to '__builtin_amdgcn_image_store_2darray_f32_i32' must be a constant integer}}
+}
+void test_builtin_image_store_2darray_1(float4 v4f32, int i32, int8 vec8i32) {
+
+ return __builtin_amdgcn_image_store_2darray_v4f32_i32(v4f32, 100, i32, i32, i32, vec8i32, i32, 110); //expected-error{{argument to '__builtin_amdgcn_image_store_2darray_v4f32_i32' must be a constant integer}}
+}
+void test_builtin_image_store_2darray_2(half4 v4f16, int i32, int8 vec8i32) {
+
+ return __builtin_amdgcn_image_store_2darray_v4f16_i32(v4f16, 100, i32, i32, i32, vec8i32, 120, i32); //expected-error{{argument to '__builtin_amdgcn_image_store_2darray_v4f16_i32' must be a constant integer}}
+}
+
+void test_builtin_image_store_1d_1(float4 v4f32, int i32, int8 vec8i32) {
+
+ return __builtin_amdgcn_image_store_1d_v4f32_i32(v4f32, i32, i32, vec8i32, 120, i32); //expected-error{{argument to '__builtin_amdgcn_image_store_1d_v4f32_i32' must be a constant integer}}
+}
+void test_builtin_image_store_1d_2(half4 v4f16, int i32, int8 vec8i32) {
+
+ return __builtin_amdgcn_image_store_1d_v4f16_i32(v4f16, 100, i32, vec8i32, 120, i32); //expected-error{{argument to '__builtin_amdgcn_image_store_1d_v4f16_i32' must be a constant integer}}
+}
+
+void test_builtin_image_store_1darray_1(float4 v4f32, int i32, int8 vec8i32) {
+
+ return __builtin_amdgcn_image_store_1darray_v4f32_i32(v4f32, 100, i32, i32, vec8i32, i32, 110); //expected-error{{argument to '__builtin_amdgcn_image_store_1darray_v4f32_i32' must be a constant integer}}
+}
+void test_builtin_image_store_1darray_2(half4 v4f16, int i32, int8 vec8i32) {
+
+ return __builtin_amdgcn_image_store_1darray_v4f16_i32(v4f16, 100, i32, i32, vec8i32, i32, 110); //expected-error{{argument to '__builtin_amdgcn_image_store_1darray_v4f16_i32' must be a constant integer}}
+}
+
+void test_builtin_image_store_3d_1(float4 v4f32, int i32, int8 vec8i32) {
+
+ return __builtin_amdgcn_image_store_3d_v4f32_i32(v4f32, 100, i32, i32, i32, vec8i32, 120, i32); //expected-error{{argument to '__builtin_amdgcn_image_store_3d_v4f32_i32' must be a constant integer}}
+}
+void test_builtin_image_store_3d_2(half4 v4f16, int i32, int8 vec8i32) {
+
+ return __builtin_amdgcn_image_store_3d_v4f16_i32(v4f16, i32, i32, i32, i32, vec8i32, 120, i32); //expected-error{{argument to '__builtin_amdgcn_image_store_3d_v4f16_i32' must be a constant integer}}
+}
+
+void test_builtin_image_store_cube_1(float4 v4f32, int i32, int8 vec8i32) {
+
+ return __builtin_amdgcn_image_store_cube_v4f32_i32(v4f32, i32, i32, i32, i32, vec8i32, 120, 110); //expected-error{{argument to '__builtin_amdgcn_image_store_cube_v4f32_i32' must be a constant integer}}
+}
+void test_builtin_image_store_cube_2(half4 v4f16, int i32, int8 vec8i32) {
+
+ return __builtin_amdgcn_image_store_cube_v4f16_i32(v4f16, i32, i32, i32, i32, vec8i32, 120, 110); //expected-error{{argument to '__builtin_amdgcn_image_store_cube_v4f16_i32' must be a constant integer}}
+}
+
+void test_builtin_image_store_mip_1d_1(float4 v4f32, int i32, int8 vec8i32) {
+
+ return __builtin_amdgcn_image_store_mip_1d_v4f32_i32(v4f32, i32, i32, i32, vec8i32, 120, i32); //expected-error{{argument to '__builtin_amdgcn_image_store_mip_1d_v4f32_i32' must be a constant integer}}
+}
+void test_builtin_image_store_mip_1d_2(half4 v4f16, int i32, int8 vec8i32) {
+
+ return __builtin_amdgcn_image_store_mip_1d_v4f16_i32(v4f16, 100, i32, i32, vec8i32, 120, i32); //expected-error{{argument to '__builtin_amdgcn_image_store_mip_1d_v4f16_i32' must be a constant integer}}
+}
+
+void test_builtin_image_store_mip_1darray_1(float4 v4f32, int i32, int8 vec8i32) {
+
+ return __builtin_amdgcn_image_store_mip_1darray_v4f32_i32(v4f32, i32, i32, i32, i32, vec8i32, i32, 110); //expected-error{{argument to '__builtin_amdgcn_image_store_mip_1darray_v4f32_i32' must be a constant integer}}
+}
+void test_builtin_image_store_mip_1darray_2(half4 v4f16, int i32, int8 vec8i32) {
+
+ return __builtin_amdgcn_image_store_mip_1darray_v4f16_i32(v4f16, 100, i32, i32, i32, vec8i32, i32, 110); //expected-error{{argument to '__builtin_amdgcn_image_store_mip_1darray_v4f16_i32' must be a constant integer}}
+}
+
+void test_builtin_image_store_mip_2d(float f32, int i32, int8 vec8i32) {
+
+ return __builtin_amdgcn_image_store_mip_2d_f32_i32(f32, i32, i32, i32, i32, vec8i32, 120, i32); //expected-error{{argument to '__builtin_amdgcn_image_store_mip_2d_f32_i32' must be a constant integer}}
+}
+void test_builtin_image_store_mip_2d_1(float4 v4f32, int i32, int8 vec8i32) {
+
+ return __builtin_amdgcn_image_store_mip_2d_v4f32_i32(v4f32, 100, i32, i32, i32, vec8i32, 120, i32); //expected-error{{argument to '__builtin_amdgcn_image_store_mip_2d_v4f32_i32' must be a constant integer}}
+}
+void test_builtin_image_store_mip_2d_2(half4 v4f16, int i32, int8 vec8i32) {
+
+ return __builtin_amdgcn_image_store_mip_2d_v4f16_i32(v4f16, i32, i32, i32, i32, vec8i32, 120, 110); //expected-error{{argument to '__builtin_amdgcn_image_store_mip_2d_v4f16_i32' must be a constant integer}}
+}
+
+void test_builtin_image_store_mip_2darray(float f32, int i32, int8 vec8i32) {
+
+ return __builtin_amdgcn_image_store_mip_2darray_f32_i32(f32, i32, i32, i32, i32, i32, vec8i32, 120, 110); //expected-error{{argument to '__builtin_amdgcn_image_store_mip_2darray_f32_i32' must be a constant integer}}
+}
+void test_builtin_image_store_mip_2darray_1(float4 v4f32, int i32, int8 vec8i32) {
+
+ return __builtin_amdgcn_image_store_mip_2darray_v4f32_i32(v4f32, 100, i32, i32, i32, i32, vec8i32, 120, i32); //expected-error{{argument to '__builtin_amdgcn_image_store_mip_2darray_v4f32_i32' must be a constant integer}}
+}
+void test_builtin_image_store_mip_2darray_2(half4 v4f16, int i32, int8 vec8i32) {
+
+ return __builtin_amdgcn_image_store_mip_2darray_v4f16_i32(v4f16, 100, i32, i32, i32, i32, vec8i32, 120, i32); //expected-error{{argument to '__builtin_amdgcn_image_store_mip_2darray_v4f16_i32' must be a constant integer}}
+}
+
+void test_builtin_image_store_mip_3d_1(float4 v4f32, int i32, int8 vec8i32) {
+
+ return __builtin_amdgcn_image_store_mip_3d_v4f32_i32(v4f32, i32, i32, i32, i32, i32, vec8i32, i32, 110); //expected-error{{argument to '__builtin_amdgcn_image_store_mip_3d_v4f32_i32' must be a constant integer}}
+}
+void test_builtin_image_store_mip_3d_2(half4 v4f16, int i32, int8 vec8i32) {
+
+ return __builtin_amdgcn_image_store_mip_3d_v4f16_i32(v4f16, i32, i32, i32, i32, i32, vec8i32, i32, 110); //expected-error{{argument to '__builtin_amdgcn_image_store_mip_3d_v4f16_i32' must be a constant integer}}
+}
+
+void test_builtin_image_store_mip_cube_1(float4 v4f32, int i32, int8 vec8i32) {
+
+ return __builtin_amdgcn_image_store_mip_cube_v4f32_i32(v4f32, i32, i32, i32, i32, i32, vec8i32, 120, i32); //expected-error{{argument to '__builtin_amdgcn_image_store_mip_cube_v4f32_i32' must be a constant integer}}
+}
+void test_builtin_image_store_mip_cube_2(half4 v4f16, int i32, int8 vec8i32) {
+
+ return __builtin_amdgcn_image_store_mip_cube_v4f16_i32(v4f16, 100, i32, i32, i32, i32, vec8i32, 120, i32); //expected-error{{argument to '__builtin_amdgcn_image_store_mip_cube_v4f16_i32' must be a constant integer}}
+}
+
+void test_builtin_image_store_2d_gfx(float f32, int i32, int8 vec8i32) {
+
+ __builtin_amdgcn_image_store_2d_f32_i32(f32, 12, i32, i32, vec8i32, 106, 103); //GFX94-error{{'__builtin_amdgcn_image_store_2d_f32_i32' needs target feature image-insts}}
+}
+void test_builtin_image_store_2d_gfx_1(float4 v4f32, int i32, int8 vec8i32) {
+
+ __builtin_amdgcn_image_store_2d_v4f32_i32(v4f32, 100, i32, i32, vec8i32, 120, 110); //GFX94-error{{'__builtin_amdgcn_image_store_2d_v4f32_i32' needs target feature image-insts}}
+ }
+ void test_builtin_image_store_2d_gfx_2(half4 v4f16, int i32, int8 vec8i32) {
+
+ __builtin_amdgcn_image_store_2d_v4f16_i32(v4f16, 100, i32, i32, vec8i32, 120, 110); //GFX94-error{{'__builtin_amdgcn_image_store_2d_v4f16_i32' needs target feature image-insts}}
+ }
>From 90065eeb87fb8f5bb831eb4f9659844afc4a4b44 Mon Sep 17 00:00:00 2001
From: ranapratap55 <RanaPratapReddy.Nimmakayala at amd.com>
Date: Wed, 18 Jun 2025 10:55:22 +0530
Subject: [PATCH 2/7] [AMDGPU] Adds builtins for image load and sema checking
for image load
---
clang/include/clang/Sema/SemaAMDGPU.h | 2 +
clang/lib/Sema/SemaAMDGPU.cpp | 32 +
.../test/CodeGen/builtins-image-load-2d-f32.c | 715 +++++++++++++++++-
.../builtins-image-load-2d-f32-param.cl | 132 ++++
4 files changed, 869 insertions(+), 12 deletions(-)
create mode 100644 clang/test/SemaOpenCL/builtins-image-load-2d-f32-param.cl
diff --git a/clang/include/clang/Sema/SemaAMDGPU.h b/clang/include/clang/Sema/SemaAMDGPU.h
index bac812a9d4fcf..43dea8445ef3d 100644
--- a/clang/include/clang/Sema/SemaAMDGPU.h
+++ b/clang/include/clang/Sema/SemaAMDGPU.h
@@ -31,6 +31,8 @@ class SemaAMDGPU : public SemaBase {
bool checkMovDPPFunctionCall(CallExpr *TheCall, unsigned NumArgs,
unsigned NumDataArgs);
+ bool checkImageImmArgFunctionCall(CallExpr *TheCall, unsigned ArgCount);
+
/// Create an AMDGPUWavesPerEUAttr attribute.
AMDGPUFlatWorkGroupSizeAttr *
CreateAMDGPUFlatWorkGroupSizeAttr(const AttributeCommonInfo &CI, Expr *Min,
diff --git a/clang/lib/Sema/SemaAMDGPU.cpp b/clang/lib/Sema/SemaAMDGPU.cpp
index 1eaaa838fa3ce..666645628decc 100644
--- a/clang/lib/Sema/SemaAMDGPU.cpp
+++ b/clang/lib/Sema/SemaAMDGPU.cpp
@@ -211,6 +211,38 @@ bool SemaAMDGPU::CheckAMDGCNBuiltinFunctionCall(unsigned BuiltinID,
(SemaRef.BuiltinConstantArg(TheCall, ArgCount, Result)) ||
(SemaRef.BuiltinConstantArg(TheCall, (ArgCount - 1), Result)));
}
+ case AMDGPU::BI__builtin_amdgcn_image_load_1d_v4f32_i32:
+ case AMDGPU::BI__builtin_amdgcn_image_load_1darray_v4f32_i32:
+ case AMDGPU::BI__builtin_amdgcn_image_load_1d_v4f16_i32:
+ case AMDGPU::BI__builtin_amdgcn_image_load_1darray_v4f16_i32:
+ case AMDGPU::BI__builtin_amdgcn_image_load_2d_f32_i32:
+ case AMDGPU::BI__builtin_amdgcn_image_load_2d_v4f32_i32:
+ case AMDGPU::BI__builtin_amdgcn_image_load_2d_v4f16_i32:
+ case AMDGPU::BI__builtin_amdgcn_image_load_2darray_f32_i32:
+ case AMDGPU::BI__builtin_amdgcn_image_load_2darray_v4f32_i32:
+ case AMDGPU::BI__builtin_amdgcn_image_load_2darray_v4f16_i32:
+ case AMDGPU::BI__builtin_amdgcn_image_load_3d_v4f32_i32:
+ case AMDGPU::BI__builtin_amdgcn_image_load_3d_v4f16_i32:
+ case AMDGPU::BI__builtin_amdgcn_image_load_cube_v4f32_i32:
+ case AMDGPU::BI__builtin_amdgcn_image_load_cube_v4f16_i32:
+ case AMDGPU::BI__builtin_amdgcn_image_load_mip_1d_v4f32_i32:
+ case AMDGPU::BI__builtin_amdgcn_image_load_mip_1d_v4f16_i32:
+ case AMDGPU::BI__builtin_amdgcn_image_load_mip_1darray_v4f32_i32:
+ case AMDGPU::BI__builtin_amdgcn_image_load_mip_1darray_v4f16_i32:
+ case AMDGPU::BI__builtin_amdgcn_image_load_mip_2d_f32_i32:
+ case AMDGPU::BI__builtin_amdgcn_image_load_mip_2d_v4f32_i32:
+ case AMDGPU::BI__builtin_amdgcn_image_load_mip_2d_v4f16_i32:
+ case AMDGPU::BI__builtin_amdgcn_image_load_mip_2darray_f32_i32:
+ case AMDGPU::BI__builtin_amdgcn_image_load_mip_2darray_v4f32_i32:
+ case AMDGPU::BI__builtin_amdgcn_image_load_mip_2darray_v4f16_i32:
+ case AMDGPU::BI__builtin_amdgcn_image_load_mip_3d_v4f32_i32:
+ case AMDGPU::BI__builtin_amdgcn_image_load_mip_3d_v4f16_i32:
+ case AMDGPU::BI__builtin_amdgcn_image_load_mip_cube_v4f32_i32:
+ case AMDGPU::BI__builtin_amdgcn_image_load_mip_cube_v4f16_i32: {
+ unsigned ArgCount = TheCall->getNumArgs() - 1;
+
+ return checkImageImmArgFunctionCall(TheCall, ArgCount);
+ }
default:
return false;
}
diff --git a/clang/test/CodeGen/builtins-image-load-2d-f32.c b/clang/test/CodeGen/builtins-image-load-2d-f32.c
index 78dab461c1f38..aee97af37aaf0 100644
--- a/clang/test/CodeGen/builtins-image-load-2d-f32.c
+++ b/clang/test/CodeGen/builtins-image-load-2d-f32.c
@@ -1,31 +1,722 @@
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5
-// RUN: %clang_cc1 -triple amdgcn-unknown-unknown %s -emit-llvm -o - | FileCheck %s
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa %s -emit-llvm -o - | FileCheck %s
-#pragma OPENCL EXTENSION cl_khr_fp64:enable
-
-typedef int v8i __attribute__((ext_vector_type(8)));
+typedef int int8 __attribute__((ext_vector_type(8)));
+typedef float float4 __attribute__((ext_vector_type(4)));
+typedef _Float16 half;
+typedef half half4 __attribute__((ext_vector_type(4)));
// CHECK-LABEL: define dso_local float @test_builtin_image_load_2d(
-// CHECK-SAME: float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VECI32:%.*]]) #[[ATTR0:[0-9]+]] {
+// CHECK-SAME: float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0:[0-9]+]] {
// CHECK-NEXT: [[ENTRY:.*:]]
// CHECK-NEXT: [[RETVAL:%.*]] = alloca float, align 4, addrspace(5)
// CHECK-NEXT: [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5)
// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
-// CHECK-NEXT: [[VECI32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
+// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
// CHECK-NEXT: [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr
// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
-// CHECK-NEXT: [[VECI32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VECI32_ADDR]] to ptr
+// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr
// CHECK-NEXT: store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4
// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
-// CHECK-NEXT: store <8 x i32> [[VECI32]], ptr [[VECI32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32
// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
-// CHECK-NEXT: [[TMP2:%.*]] = load <8 x i32>, ptr [[VECI32_ADDR_ASCAST]], align 32
-// CHECK-NEXT: [[TMP3:%.*]] = call float @llvm.amdgcn.image.load.2d.f32.i32.v8i32(i32 1, i32 [[TMP0]], i32 [[TMP1]], <8 x i32> [[TMP2]], i32 0, i32 0)
+// CHECK-NEXT: [[TMP2:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: [[TMP3:%.*]] = call float @llvm.amdgcn.image.load.2d.f32.i32.v8i32(i32 12, i32 [[TMP0]], i32 [[TMP1]], <8 x i32> [[TMP2]], i32 106, i32 103)
// CHECK-NEXT: ret float [[TMP3]]
//
-float test_builtin_image_load_2d(float f32, int i32, v8i veci32) {
+float test_builtin_image_load_2d(float f32, int i32, int8 vec8i32) {
+
+ return __builtin_amdgcn_image_load_2d_f32_i32(12, i32, i32, vec8i32, 106, 103);
+}
+
+// CHECK-LABEL: define dso_local <4 x float> @test_builtin_image_load_2d_1(
+// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x float>, align 16, addrspace(5)
+// CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5)
+// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
+// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
+// CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr
+// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
+// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr
+// CHECK-NEXT: store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16
+// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP2:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: [[TMP3:%.*]] = call <4 x float> @llvm.amdgcn.image.load.2d.v4f32.i32.v8i32(i32 100, i32 [[TMP0]], i32 [[TMP1]], <8 x i32> [[TMP2]], i32 120, i32 110)
+// CHECK-NEXT: ret <4 x float> [[TMP3]]
+//
+float4 test_builtin_image_load_2d_1(float4 v4f32, int i32, int8 vec8i32) {
+
+ return __builtin_amdgcn_image_load_2d_v4f32_i32(100, i32, i32, vec8i32, 120, 110);
+}
+// CHECK-LABEL: define dso_local <4 x half> @test_builtin_image_load_2d_2(
+// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x half>, align 8, addrspace(5)
+// CHECK-NEXT: [[V4F16_ADDR:%.*]] = alloca <4 x half>, align 8, addrspace(5)
+// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
+// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
+// CHECK-NEXT: [[V4F16_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F16_ADDR]] to ptr
+// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
+// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr
+// CHECK-NEXT: store <4 x half> [[V4F16]], ptr [[V4F16_ADDR_ASCAST]], align 8
+// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP2:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: [[TMP3:%.*]] = call <4 x half> @llvm.amdgcn.image.load.2d.v4f16.i32.v8i32(i32 100, i32 [[TMP0]], i32 [[TMP1]], <8 x i32> [[TMP2]], i32 120, i32 110)
+// CHECK-NEXT: ret <4 x half> [[TMP3]]
+//
+half4 test_builtin_image_load_2d_2(half4 v4f16, int i32, int8 vec8i32) {
+
+ return __builtin_amdgcn_image_load_2d_v4f16_i32(100, i32, i32, vec8i32, 120, 110);
+}
+
+// CHECK-LABEL: define dso_local float @test_builtin_image_load_2darray(
+// CHECK-SAME: float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[RETVAL:%.*]] = alloca float, align 4, addrspace(5)
+// CHECK-NEXT: [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5)
+// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
+// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
+// CHECK-NEXT: [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr
+// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
+// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr
+// CHECK-NEXT: store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP3:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: [[TMP4:%.*]] = call float @llvm.amdgcn.image.load.2darray.f32.i32.v8i32(i32 100, i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]], <8 x i32> [[TMP3]], i32 120, i32 110)
+// CHECK-NEXT: ret float [[TMP4]]
+//
+float test_builtin_image_load_2darray(float f32, int i32, int8 vec8i32) {
+
+ return __builtin_amdgcn_image_load_2darray_f32_i32(100, i32, i32, i32, vec8i32, 120, 110);
+}
+// CHECK-LABEL: define dso_local <4 x float> @test_builtin_image_load_2darray_1(
+// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x float>, align 16, addrspace(5)
+// CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5)
+// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
+// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
+// CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr
+// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
+// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr
+// CHECK-NEXT: store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16
+// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP3:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: [[TMP4:%.*]] = call <4 x float> @llvm.amdgcn.image.load.2darray.v4f32.i32.v8i32(i32 100, i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]], <8 x i32> [[TMP3]], i32 120, i32 110)
+// CHECK-NEXT: ret <4 x float> [[TMP4]]
+//
+float4 test_builtin_image_load_2darray_1(float4 v4f32, int i32, int8 vec8i32) {
+
+ return __builtin_amdgcn_image_load_2darray_v4f32_i32(100, i32, i32, i32, vec8i32, 120, 110);
+}
+// CHECK-LABEL: define dso_local <4 x half> @test_builtin_image_load_2darray_2(
+// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x half>, align 8, addrspace(5)
+// CHECK-NEXT: [[V4F16_ADDR:%.*]] = alloca <4 x half>, align 8, addrspace(5)
+// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
+// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
+// CHECK-NEXT: [[V4F16_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F16_ADDR]] to ptr
+// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
+// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr
+// CHECK-NEXT: store <4 x half> [[V4F16]], ptr [[V4F16_ADDR_ASCAST]], align 8
+// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP3:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: [[TMP4:%.*]] = call <4 x half> @llvm.amdgcn.image.load.2darray.v4f16.i32.v8i32(i32 100, i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]], <8 x i32> [[TMP3]], i32 120, i32 110)
+// CHECK-NEXT: ret <4 x half> [[TMP4]]
+//
+half4 test_builtin_image_load_2darray_2(half4 v4f16, int i32, int8 vec8i32) {
+
+ return __builtin_amdgcn_image_load_2darray_v4f16_i32(100, i32, i32, i32, vec8i32, 120, 110);
+}
+
+// CHECK-LABEL: define dso_local <4 x float> @test_builtin_image_load_1d_1(
+// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x float>, align 16, addrspace(5)
+// CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5)
+// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
+// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
+// CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr
+// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
+// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr
+// CHECK-NEXT: store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16
+// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP1:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: [[TMP2:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: [[TMP3:%.*]] = call <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i32.v8i32(i32 100, i32 [[TMP0]], <8 x i32> [[TMP2]], i32 120, i32 110)
+// CHECK-NEXT: ret <4 x float> [[TMP3]]
+//
+float4 test_builtin_image_load_1d_1(float4 v4f32, int i32, int8 vec8i32) {
+
+ return __builtin_amdgcn_image_load_1d_v4f32_i32(100, i32, vec8i32, 120, 110);
+}
+// CHECK-LABEL: define dso_local <4 x half> @test_builtin_image_load_1d_2(
+// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x half>, align 8, addrspace(5)
+// CHECK-NEXT: [[V4F16_ADDR:%.*]] = alloca <4 x half>, align 8, addrspace(5)
+// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
+// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
+// CHECK-NEXT: [[V4F16_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F16_ADDR]] to ptr
+// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
+// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr
+// CHECK-NEXT: store <4 x half> [[V4F16]], ptr [[V4F16_ADDR_ASCAST]], align 8
+// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP1:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: [[TMP2:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: [[TMP3:%.*]] = call <4 x half> @llvm.amdgcn.image.load.1d.v4f16.i32.v8i32(i32 100, i32 [[TMP0]], <8 x i32> [[TMP2]], i32 120, i32 110)
+// CHECK-NEXT: ret <4 x half> [[TMP3]]
+//
+half4 test_builtin_image_load_1d_2(half4 v4f16, int i32, int8 vec8i32) {
+
+ return __builtin_amdgcn_image_load_1d_v4f16_i32(100, i32, vec8i32, 120, 110);
+}
+
+// CHECK-LABEL: define dso_local <4 x float> @test_builtin_image_load_1darray_1(
+// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x float>, align 16, addrspace(5)
+// CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5)
+// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
+// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
+// CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr
+// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
+// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr
+// CHECK-NEXT: store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16
+// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP3:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: [[TMP4:%.*]] = call <4 x float> @llvm.amdgcn.image.load.1darray.v4f32.i32.v8i32(i32 100, i32 [[TMP0]], i32 [[TMP2]], <8 x i32> [[TMP3]], i32 120, i32 110)
+// CHECK-NEXT: ret <4 x float> [[TMP4]]
+//
+float4 test_builtin_image_load_1darray_1(float4 v4f32, int i32, int8 vec8i32) {
+
+ return __builtin_amdgcn_image_load_1darray_v4f32_i32(100, i32, i32, vec8i32, 120, 110);
+}
+// CHECK-LABEL: define dso_local <4 x half> @test_builtin_image_load_1darray_2(
+// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x half>, align 8, addrspace(5)
+// CHECK-NEXT: [[V4F16_ADDR:%.*]] = alloca <4 x half>, align 8, addrspace(5)
+// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
+// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
+// CHECK-NEXT: [[V4F16_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F16_ADDR]] to ptr
+// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
+// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr
+// CHECK-NEXT: store <4 x half> [[V4F16]], ptr [[V4F16_ADDR_ASCAST]], align 8
+// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP3:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: [[TMP4:%.*]] = call <4 x half> @llvm.amdgcn.image.load.1darray.v4f16.i32.v8i32(i32 100, i32 [[TMP0]], i32 [[TMP2]], <8 x i32> [[TMP3]], i32 120, i32 110)
+// CHECK-NEXT: ret <4 x half> [[TMP4]]
+//
+half4 test_builtin_image_load_1darray_2(half4 v4f16, int i32, int8 vec8i32) {
+
+ return __builtin_amdgcn_image_load_1darray_v4f16_i32(100, i32, i32, vec8i32, 120, 110);
+}
+
+// CHECK-LABEL: define dso_local <4 x float> @test_builtin_image_load_3d_1(
+// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x float>, align 16, addrspace(5)
+// CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5)
+// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
+// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
+// CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr
+// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
+// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr
+// CHECK-NEXT: store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16
+// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP3:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: [[TMP4:%.*]] = call <4 x float> @llvm.amdgcn.image.load.3d.v4f32.i32.v8i32(i32 100, i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]], <8 x i32> [[TMP3]], i32 120, i32 110)
+// CHECK-NEXT: ret <4 x float> [[TMP4]]
+//
+float4 test_builtin_image_load_3d_1(float4 v4f32, int i32, int8 vec8i32) {
+
+ return __builtin_amdgcn_image_load_3d_v4f32_i32(100, i32, i32, i32, vec8i32, 120, 110);
+}
+// CHECK-LABEL: define dso_local <4 x half> @test_builtin_image_load_3d_2(
+// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x half>, align 8, addrspace(5)
+// CHECK-NEXT: [[V4F16_ADDR:%.*]] = alloca <4 x half>, align 8, addrspace(5)
+// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
+// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
+// CHECK-NEXT: [[V4F16_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F16_ADDR]] to ptr
+// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
+// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr
+// CHECK-NEXT: store <4 x half> [[V4F16]], ptr [[V4F16_ADDR_ASCAST]], align 8
+// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP3:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: [[TMP4:%.*]] = call <4 x half> @llvm.amdgcn.image.load.3d.v4f16.i32.v8i32(i32 100, i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]], <8 x i32> [[TMP3]], i32 120, i32 110)
+// CHECK-NEXT: ret <4 x half> [[TMP4]]
+//
+half4 test_builtin_image_load_3d_2(half4 v4f16, int i32, int8 vec8i32) {
+
+ return __builtin_amdgcn_image_load_3d_v4f16_i32(100, i32, i32, i32, vec8i32, 120, 110);
+}
+
+// CHECK-LABEL: define dso_local <4 x float> @test_builtin_image_load_cube_1(
+// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x float>, align 16, addrspace(5)
+// CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5)
+// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
+// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
+// CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr
+// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
+// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr
+// CHECK-NEXT: store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16
+// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP3:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: [[TMP4:%.*]] = call <4 x float> @llvm.amdgcn.image.load.cube.v4f32.i32.v8i32(i32 100, i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]], <8 x i32> [[TMP3]], i32 120, i32 110)
+// CHECK-NEXT: ret <4 x float> [[TMP4]]
+//
+float4 test_builtin_image_load_cube_1(float4 v4f32, int i32, int8 vec8i32) {
+
+ return __builtin_amdgcn_image_load_cube_v4f32_i32(100, i32, i32, i32, vec8i32, 120, 110);
+}
+// CHECK-LABEL: define dso_local <4 x half> @test_builtin_image_load_cube_2(
+// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x half>, align 8, addrspace(5)
+// CHECK-NEXT: [[V4F16_ADDR:%.*]] = alloca <4 x half>, align 8, addrspace(5)
+// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
+// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
+// CHECK-NEXT: [[V4F16_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F16_ADDR]] to ptr
+// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
+// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr
+// CHECK-NEXT: store <4 x half> [[V4F16]], ptr [[V4F16_ADDR_ASCAST]], align 8
+// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP3:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: [[TMP4:%.*]] = call <4 x half> @llvm.amdgcn.image.load.cube.v4f16.i32.v8i32(i32 100, i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]], <8 x i32> [[TMP3]], i32 120, i32 110)
+// CHECK-NEXT: ret <4 x half> [[TMP4]]
+//
+half4 test_builtin_image_load_cube_2(half4 v4f16, int i32, int8 vec8i32) {
+
+ return __builtin_amdgcn_image_load_cube_v4f16_i32(100, i32, i32, i32, vec8i32, 120, 110);
+}
+
+// CHECK-LABEL: define dso_local <4 x float> @test_builtin_image_load_mip_1d_1(
+// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x float>, align 16, addrspace(5)
+// CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5)
+// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
+// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
+// CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr
+// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
+// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr
+// CHECK-NEXT: store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16
+// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP3:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: [[TMP4:%.*]] = call <4 x float> @llvm.amdgcn.image.load.mip.1d.v4f32.i32.v8i32(i32 100, i32 [[TMP0]], i32 [[TMP2]], <8 x i32> [[TMP3]], i32 120, i32 110)
+// CHECK-NEXT: ret <4 x float> [[TMP4]]
+//
+float4 test_builtin_image_load_mip_1d_1(float4 v4f32, int i32, int8 vec8i32) {
+
+ return __builtin_amdgcn_image_load_mip_1d_v4f32_i32(100, i32, i32, vec8i32, 120, 110);
+}
+// CHECK-LABEL: define dso_local <4 x half> @test_builtin_image_load_mip_1d_2(
+// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x half>, align 8, addrspace(5)
+// CHECK-NEXT: [[V4F16_ADDR:%.*]] = alloca <4 x half>, align 8, addrspace(5)
+// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
+// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
+// CHECK-NEXT: [[V4F16_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F16_ADDR]] to ptr
+// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
+// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr
+// CHECK-NEXT: store <4 x half> [[V4F16]], ptr [[V4F16_ADDR_ASCAST]], align 8
+// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP3:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: [[TMP4:%.*]] = call <4 x half> @llvm.amdgcn.image.load.mip.1d.v4f16.i32.v8i32(i32 100, i32 [[TMP0]], i32 [[TMP2]], <8 x i32> [[TMP3]], i32 120, i32 110)
+// CHECK-NEXT: ret <4 x half> [[TMP4]]
+//
+half4 test_builtin_image_load_mip_1d_2(half4 v4f16, int i32, int8 vec8i32) {
+
+ return __builtin_amdgcn_image_load_mip_1d_v4f16_i32(100, i32, i32, vec8i32, 120, 110);
+}
+
+// CHECK-LABEL: define dso_local <4 x float> @test_builtin_image_load_mip_1darray_1(
+// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x float>, align 16, addrspace(5)
+// CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5)
+// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
+// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
+// CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr
+// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
+// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr
+// CHECK-NEXT: store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16
+// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP3:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: [[TMP4:%.*]] = call <4 x float> @llvm.amdgcn.image.load.mip.1darray.v4f32.i32.v8i32(i32 100, i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]], <8 x i32> [[TMP3]], i32 120, i32 110)
+// CHECK-NEXT: ret <4 x float> [[TMP4]]
+//
+float4 test_builtin_image_load_mip_1darray_1(float4 v4f32, int i32, int8 vec8i32) {
+
+ return __builtin_amdgcn_image_load_mip_1darray_v4f32_i32(100, i32, i32, i32, vec8i32, 120, 110);
+}
+// CHECK-LABEL: define dso_local <4 x half> @test_builtin_image_load_mip_1darray_2(
+// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x half>, align 8, addrspace(5)
+// CHECK-NEXT: [[V4F16_ADDR:%.*]] = alloca <4 x half>, align 8, addrspace(5)
+// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
+// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
+// CHECK-NEXT: [[V4F16_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F16_ADDR]] to ptr
+// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
+// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr
+// CHECK-NEXT: store <4 x half> [[V4F16]], ptr [[V4F16_ADDR_ASCAST]], align 8
+// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP3:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: [[TMP4:%.*]] = call <4 x half> @llvm.amdgcn.image.load.mip.1darray.v4f16.i32.v8i32(i32 100, i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]], <8 x i32> [[TMP3]], i32 120, i32 110)
+// CHECK-NEXT: ret <4 x half> [[TMP4]]
+//
+half4 test_builtin_image_load_mip_1darray_2(half4 v4f16, int i32, int8 vec8i32) {
+
+ return __builtin_amdgcn_image_load_mip_1darray_v4f16_i32(100, i32, i32, i32, vec8i32, 120, 110);
+}
+
+// CHECK-LABEL: define dso_local float @test_builtin_image_load_mip_2d(
+// CHECK-SAME: float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[RETVAL:%.*]] = alloca float, align 4, addrspace(5)
+// CHECK-NEXT: [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5)
+// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
+// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
+// CHECK-NEXT: [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr
+// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
+// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr
+// CHECK-NEXT: store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP3:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: [[TMP4:%.*]] = call float @llvm.amdgcn.image.load.mip.2d.f32.i32.v8i32(i32 100, i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]], <8 x i32> [[TMP3]], i32 120, i32 110)
+// CHECK-NEXT: ret float [[TMP4]]
+//
+float test_builtin_image_load_mip_2d(float f32, int i32, int8 vec8i32) {
+
+ return __builtin_amdgcn_image_load_mip_2d_f32_i32(100, i32, i32, i32, vec8i32, 120, 110);
+}
+// CHECK-LABEL: define dso_local <4 x float> @test_builtin_image_load_mip_2d_1(
+// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x float>, align 16, addrspace(5)
+// CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5)
+// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
+// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
+// CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr
+// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
+// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr
+// CHECK-NEXT: store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16
+// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP3:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: [[TMP4:%.*]] = call <4 x float> @llvm.amdgcn.image.load.mip.2d.v4f32.i32.v8i32(i32 100, i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]], <8 x i32> [[TMP3]], i32 120, i32 110)
+// CHECK-NEXT: ret <4 x float> [[TMP4]]
+//
+float4 test_builtin_image_load_mip_2d_1(float4 v4f32, int i32, int8 vec8i32) {
+
+ return __builtin_amdgcn_image_load_mip_2d_v4f32_i32(100, i32, i32, i32, vec8i32, 120, 110);
+}
+// CHECK-LABEL: define dso_local <4 x half> @test_builtin_image_load_mip_2d_2(
+// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x half>, align 8, addrspace(5)
+// CHECK-NEXT: [[V4F16_ADDR:%.*]] = alloca <4 x half>, align 8, addrspace(5)
+// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
+// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
+// CHECK-NEXT: [[V4F16_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F16_ADDR]] to ptr
+// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
+// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr
+// CHECK-NEXT: store <4 x half> [[V4F16]], ptr [[V4F16_ADDR_ASCAST]], align 8
+// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP3:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: [[TMP4:%.*]] = call <4 x half> @llvm.amdgcn.image.load.mip.2d.v4f16.i32.v8i32(i32 100, i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]], <8 x i32> [[TMP3]], i32 120, i32 110)
+// CHECK-NEXT: ret <4 x half> [[TMP4]]
+//
+half4 test_builtin_image_load_mip_2d_2(half4 v4f16, int i32, int8 vec8i32) {
+
+ return __builtin_amdgcn_image_load_mip_2d_v4f16_i32(100, i32, i32, i32, vec8i32, 120, 110);
+}
+
+// CHECK-LABEL: define dso_local float @test_builtin_image_load_mip_2darray(
+// CHECK-SAME: float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[RETVAL:%.*]] = alloca float, align 4, addrspace(5)
+// CHECK-NEXT: [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5)
+// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
+// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
+// CHECK-NEXT: [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr
+// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
+// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr
+// CHECK-NEXT: store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP4:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: [[TMP5:%.*]] = call float @llvm.amdgcn.image.load.mip.2darray.f32.i32.v8i32(i32 100, i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]], i32 [[TMP3]], <8 x i32> [[TMP4]], i32 120, i32 110)
+// CHECK-NEXT: ret float [[TMP5]]
+//
+float test_builtin_image_load_mip_2darray(float f32, int i32, int8 vec8i32) {
+
+ return __builtin_amdgcn_image_load_mip_2darray_f32_i32(100, i32, i32, i32, i32, vec8i32, 120, 110);
+}
+// CHECK-LABEL: define dso_local <4 x float> @test_builtin_image_load_mip_2darray_1(
+// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x float>, align 16, addrspace(5)
+// CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5)
+// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
+// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
+// CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr
+// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
+// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr
+// CHECK-NEXT: store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16
+// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP4:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: [[TMP5:%.*]] = call <4 x float> @llvm.amdgcn.image.load.mip.2darray.v4f32.i32.v8i32(i32 100, i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]], i32 [[TMP3]], <8 x i32> [[TMP4]], i32 120, i32 110)
+// CHECK-NEXT: ret <4 x float> [[TMP5]]
+//
+float4 test_builtin_image_load_mip_2darray_1(float4 v4f32, int i32, int8 vec8i32) {
+
+ return __builtin_amdgcn_image_load_mip_2darray_v4f32_i32(100, i32, i32, i32, i32, vec8i32, 120, 110);
+}
+// CHECK-LABEL: define dso_local <4 x half> @test_builtin_image_load_mip_2darray_2(
+// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x half>, align 8, addrspace(5)
+// CHECK-NEXT: [[V4F16_ADDR:%.*]] = alloca <4 x half>, align 8, addrspace(5)
+// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
+// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
+// CHECK-NEXT: [[V4F16_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F16_ADDR]] to ptr
+// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
+// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr
+// CHECK-NEXT: store <4 x half> [[V4F16]], ptr [[V4F16_ADDR_ASCAST]], align 8
+// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP4:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: [[TMP5:%.*]] = call <4 x half> @llvm.amdgcn.image.load.mip.2darray.v4f16.i32.v8i32(i32 100, i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]], i32 [[TMP3]], <8 x i32> [[TMP4]], i32 120, i32 110)
+// CHECK-NEXT: ret <4 x half> [[TMP5]]
+//
+half4 test_builtin_image_load_mip_2darray_2(half4 v4f16, int i32, int8 vec8i32) {
+
+ return __builtin_amdgcn_image_load_mip_2darray_v4f16_i32(100, i32, i32, i32, i32, vec8i32, 120, 110);
+}
+
+// CHECK-LABEL: define dso_local <4 x float> @test_builtin_image_load_mip_3d_1(
+// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x float>, align 16, addrspace(5)
+// CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5)
+// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
+// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
+// CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr
+// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
+// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr
+// CHECK-NEXT: store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16
+// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP4:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: [[TMP5:%.*]] = call <4 x float> @llvm.amdgcn.image.load.mip.3d.v4f32.i32.v8i32(i32 100, i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]], i32 [[TMP3]], <8 x i32> [[TMP4]], i32 120, i32 110)
+// CHECK-NEXT: ret <4 x float> [[TMP5]]
+//
+float4 test_builtin_image_load_mip_3d_1(float4 v4f32, int i32, int8 vec8i32) {
+
+ return __builtin_amdgcn_image_load_mip_3d_v4f32_i32(100, i32, i32, i32, i32, vec8i32, 120, 110);
+}
+// CHECK-LABEL: define dso_local <4 x half> @test_builtin_image_load_mip_3d_2(
+// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x half>, align 8, addrspace(5)
+// CHECK-NEXT: [[V4F16_ADDR:%.*]] = alloca <4 x half>, align 8, addrspace(5)
+// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
+// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
+// CHECK-NEXT: [[V4F16_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F16_ADDR]] to ptr
+// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
+// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr
+// CHECK-NEXT: store <4 x half> [[V4F16]], ptr [[V4F16_ADDR_ASCAST]], align 8
+// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP4:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: [[TMP5:%.*]] = call <4 x half> @llvm.amdgcn.image.load.mip.3d.v4f16.i32.v8i32(i32 100, i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]], i32 [[TMP3]], <8 x i32> [[TMP4]], i32 120, i32 110)
+// CHECK-NEXT: ret <4 x half> [[TMP5]]
+//
+half4 test_builtin_image_load_mip_3d_2(half4 v4f16, int i32, int8 vec8i32) {
+
+ return __builtin_amdgcn_image_load_mip_3d_v4f16_i32(100, i32, i32, i32, i32, vec8i32, 120, 110);
+}
+
+// CHECK-LABEL: define dso_local <4 x float> @test_builtin_image_load_mip_cube_1(
+// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x float>, align 16, addrspace(5)
+// CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5)
+// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
+// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
+// CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr
+// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
+// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr
+// CHECK-NEXT: store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16
+// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP4:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: [[TMP5:%.*]] = call <4 x float> @llvm.amdgcn.image.load.mip.cube.v4f32.i32.v8i32(i32 100, i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]], i32 [[TMP3]], <8 x i32> [[TMP4]], i32 120, i32 110)
+// CHECK-NEXT: ret <4 x float> [[TMP5]]
+//
+float4 test_builtin_image_load_mip_cube_1(float4 v4f32, int i32, int8 vec8i32) {
+
+ return __builtin_amdgcn_image_load_mip_cube_v4f32_i32(100, i32, i32, i32, i32, vec8i32, 120, 110);
+}
+// CHECK-LABEL: define dso_local <4 x half> @test_builtin_image_load_mip_cube_2(
+// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x half>, align 8, addrspace(5)
+// CHECK-NEXT: [[V4F16_ADDR:%.*]] = alloca <4 x half>, align 8, addrspace(5)
+// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
+// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
+// CHECK-NEXT: [[V4F16_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F16_ADDR]] to ptr
+// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
+// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr
+// CHECK-NEXT: store <4 x half> [[V4F16]], ptr [[V4F16_ADDR_ASCAST]], align 8
+// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP4:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: [[TMP5:%.*]] = call <4 x half> @llvm.amdgcn.image.load.mip.cube.v4f16.i32.v8i32(i32 100, i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]], i32 [[TMP3]], <8 x i32> [[TMP4]], i32 120, i32 110)
+// CHECK-NEXT: ret <4 x half> [[TMP5]]
+//
+half4 test_builtin_image_load_mip_cube_2(half4 v4f16, int i32, int8 vec8i32) {
- return __builtin_amdgcn_image_load_2d_f32_i32(i32, i32, veci32);
+ return __builtin_amdgcn_image_load_mip_cube_v4f16_i32(100, i32, i32, i32, i32, vec8i32, 120, 110);
}
diff --git a/clang/test/SemaOpenCL/builtins-image-load-2d-f32-param.cl b/clang/test/SemaOpenCL/builtins-image-load-2d-f32-param.cl
new file mode 100644
index 0000000000000..7b5aab4011da9
--- /dev/null
+++ b/clang/test/SemaOpenCL/builtins-image-load-2d-f32-param.cl
@@ -0,0 +1,132 @@
+// RUN: %clang_cc1 -triple amdgcn-- -target-cpu gfx90a -S -verify=expected -o - %s
+// REQUIRES: amdgpu-registered-target
+
+typedef int int8 __attribute__((ext_vector_type(8)));
+typedef float float4 __attribute__((ext_vector_type(4)));
+//typedef _Float16 half;
+typedef half half4 __attribute__((ext_vector_type(4)));
+
+
+float test_builtin_image_load_2d(float f32, int i32, int8 vec8i32) {
+
+ return __builtin_amdgcn_image_load_2d_f32_i32(i32, i32, i32, vec8i32, 106, 103); //expected-error{{argument to '__builtin_amdgcn_image_load_2d_f32_i32' must be a constant integer}}
+}
+float4 test_builtin_image_load_2d_1(float4 v4f32, int i32, int8 vec8i32) {
+
+ return __builtin_amdgcn_image_load_2d_v4f32_i32(100, i32, i32, vec8i32, i32, 110); //expected-error{{argument to '__builtin_amdgcn_image_load_2d_v4f32_i32' must be a constant integer}}
+}
+half4 test_builtin_image_load_2d_2(half4 v4f16, int i32, int8 vec8i32) {
+
+ return __builtin_amdgcn_image_load_2d_v4f16_i32(100, i32, i32, vec8i32, 120, i32); //expected-error{{argument to '__builtin_amdgcn_image_load_2d_v4f16_i32' must be a constant integer}}
+}
+
+float test_builtin_image_load_2darray(float f32, int i32, int8 vec8i32) {
+
+ return __builtin_amdgcn_image_load_2darray_f32_i32(100, i32, i32, i32, vec8i32, i32, 110); //expected-error{{argument to '__builtin_amdgcn_image_load_2darray_f32_i32' must be a constant integer}}
+}
+float4 test_builtin_image_load_2darray_1(float4 v4f32, int i32, int8 vec8i32) {
+
+ return __builtin_amdgcn_image_load_2darray_v4f32_i32(100, i32, i32, i32, vec8i32, i32, 110); //expected-error{{argument to '__builtin_amdgcn_image_load_2darray_v4f32_i32' must be a constant integer}}
+}
+half4 test_builtin_image_load_2darray_2(half4 v4f16, int i32, int8 vec8i32) {
+
+ return __builtin_amdgcn_image_load_2darray_v4f16_i32(100, i32, i32, i32, vec8i32, 120, i32); //expected-error{{argument to '__builtin_amdgcn_image_load_2darray_v4f16_i32' must be a constant integer}}
+}
+
+float4 test_builtin_image_load_1d_1(float4 v4f32, int i32, int8 vec8i32) {
+
+ return __builtin_amdgcn_image_load_1d_v4f32_i32(i32, i32, vec8i32, 120, i32); //expected-error{{argument to '__builtin_amdgcn_image_load_1d_v4f32_i32' must be a constant integer}}
+}
+half4 test_builtin_image_load_1d_2(half4 v4f16, int i32, int8 vec8i32) {
+
+ return __builtin_amdgcn_image_load_1d_v4f16_i32(100, i32, vec8i32, 120, i32); //expected-error{{argument to '__builtin_amdgcn_image_load_1d_v4f16_i32' must be a constant integer}}
+}
+
+float4 test_builtin_image_load_1darray_1(float4 v4f32, int i32, int8 vec8i32) {
+
+ return __builtin_amdgcn_image_load_1darray_v4f32_i32(100, i32, i32, vec8i32, i32, 110); //expected-error{{argument to '__builtin_amdgcn_image_load_1darray_v4f32_i32' must be a constant integer}}
+}
+half4 test_builtin_image_load_1darray_2(half4 v4f16, int i32, int8 vec8i32) {
+
+ return __builtin_amdgcn_image_load_1darray_v4f16_i32(100, i32, i32, vec8i32, i32, 110); //expected-error{{argument to '__builtin_amdgcn_image_load_1darray_v4f16_i32' must be a constant integer}}
+}
+
+float4 test_builtin_image_load_3d_1(float4 v4f32, int i32, int8 vec8i32) {
+
+ return __builtin_amdgcn_image_load_3d_v4f32_i32(100, i32, i32, i32, vec8i32, 120, i32); //expected-error{{argument to '__builtin_amdgcn_image_load_3d_v4f32_i32' must be a constant integer}}
+}
+half4 test_builtin_image_load_3d_2(half4 v4f16, int i32, int8 vec8i32) {
+
+ return __builtin_amdgcn_image_load_3d_v4f16_i32(i32, i32, i32, i32, vec8i32, 120, i32); //expected-error{{argument to '__builtin_amdgcn_image_load_3d_v4f16_i32' must be a constant integer}}
+}
+
+float4 test_builtin_image_load_cube_1(float4 v4f32, int i32, int8 vec8i32) {
+
+ return __builtin_amdgcn_image_load_cube_v4f32_i32(i32, i32, i32, i32, vec8i32, 120, 110); //expected-error{{argument to '__builtin_amdgcn_image_load_cube_v4f32_i32' must be a constant integer}}
+}
+half4 test_builtin_image_load_cube_2(half4 v4f16, int i32, int8 vec8i32) {
+
+ return __builtin_amdgcn_image_load_cube_v4f16_i32(i32, i32, i32, i32, vec8i32, 120, 110); //expected-error{{argument to '__builtin_amdgcn_image_load_cube_v4f16_i32' must be a constant integer}}
+}
+
+float4 test_builtin_image_load_mip_1d_1(float4 v4f32, int i32, int8 vec8i32) {
+
+ return __builtin_amdgcn_image_load_mip_1d_v4f32_i32(i32, i32, i32, vec8i32, 120, i32); //expected-error{{argument to '__builtin_amdgcn_image_load_mip_1d_v4f32_i32' must be a constant integer}}
+}
+half4 test_builtin_image_load_mip_1d_2(half4 v4f16, int i32, int8 vec8i32) {
+
+ return __builtin_amdgcn_image_load_mip_1d_v4f16_i32(100, i32, i32, vec8i32, 120, i32); //expected-error{{argument to '__builtin_amdgcn_image_load_mip_1d_v4f16_i32' must be a constant integer}}
+}
+
+float4 test_builtin_image_load_mip_1darray_1(float4 v4f32, int i32, int8 vec8i32) {
+
+ return __builtin_amdgcn_image_load_mip_1darray_v4f32_i32(i32, i32, i32, i32, vec8i32, i32, 110); //expected-error{{argument to '__builtin_amdgcn_image_load_mip_1darray_v4f32_i32' must be a constant integer}}
+}
+half4 test_builtin_image_load_mip_1darray_2(half4 v4f16, int i32, int8 vec8i32) {
+
+ return __builtin_amdgcn_image_load_mip_1darray_v4f16_i32(100, i32, i32, i32, vec8i32, i32, 110); //expected-error{{argument to '__builtin_amdgcn_image_load_mip_1darray_v4f16_i32' must be a constant integer}}
+}
+
+float test_builtin_image_load_mip_2d(float f32, int i32, int8 vec8i32) {
+
+ return __builtin_amdgcn_image_load_mip_2d_f32_i32(i32, i32, i32, i32, vec8i32, 120, i32); //expected-error{{argument to '__builtin_amdgcn_image_load_mip_2d_f32_i32' must be a constant integer}}
+}
+float4 test_builtin_image_load_mip_2d_1(float4 v4f32, int i32, int8 vec8i32) {
+
+ return __builtin_amdgcn_image_load_mip_2d_v4f32_i32(100, i32, i32, i32, vec8i32, 120, i32); //expected-error{{argument to '__builtin_amdgcn_image_load_mip_2d_v4f32_i32' must be a constant integer}}
+}
+half4 test_builtin_image_load_mip_2d_2(half4 v4f16, int i32, int8 vec8i32) {
+
+ return __builtin_amdgcn_image_load_mip_2d_v4f16_i32(i32, i32, i32, i32, vec8i32, 120, 110); //expected-error{{argument to '__builtin_amdgcn_image_load_mip_2d_v4f16_i32' must be a constant integer}}
+}
+
+float test_builtin_image_load_mip_2darray(float f32, int i32, int8 vec8i32) {
+
+ return __builtin_amdgcn_image_load_mip_2darray_f32_i32(i32, i32, i32, i32, i32, vec8i32, 120, 110); //expected-error{{argument to '__builtin_amdgcn_image_load_mip_2darray_f32_i32' must be a constant integer}}
+}
+float4 test_builtin_image_load_mip_2darray_1(float4 v4f32, int i32, int8 vec8i32) {
+
+ return __builtin_amdgcn_image_load_mip_2darray_v4f32_i32(100, i32, i32, i32, i32, vec8i32, 120, i32); //expected-error{{argument to '__builtin_amdgcn_image_load_mip_2darray_v4f32_i32' must be a constant integer}}
+}
+half4 test_builtin_image_load_mip_2darray_2(half4 v4f16, int i32, int8 vec8i32) {
+
+ return __builtin_amdgcn_image_load_mip_2darray_v4f16_i32(100, i32, i32, i32, i32, vec8i32, 120, i32); //expected-error{{argument to '__builtin_amdgcn_image_load_mip_2darray_v4f16_i32' must be a constant integer}}
+}
+
+float4 test_builtin_image_load_mip_3d_1(float4 v4f32, int i32, int8 vec8i32) {
+
+ return __builtin_amdgcn_image_load_mip_3d_v4f32_i32(i32, i32, i32, i32, i32, vec8i32, i32, 110); //expected-error{{argument to '__builtin_amdgcn_image_load_mip_3d_v4f32_i32' must be a constant integer}}
+}
+half4 test_builtin_image_load_mip_3d_2(half4 v4f16, int i32, int8 vec8i32) {
+
+ return __builtin_amdgcn_image_load_mip_3d_v4f16_i32(i32, i32, i32, i32, i32, vec8i32, i32, 110); //expected-error{{argument to '__builtin_amdgcn_image_load_mip_3d_v4f16_i32' must be a constant integer}}
+}
+
+float4 test_builtin_image_load_mip_cube_1(float4 v4f32, int i32, int8 vec8i32) {
+
+ return __builtin_amdgcn_image_load_mip_cube_v4f32_i32(i32, i32, i32, i32, i32, vec8i32, 120, i32); //expected-error{{argument to '__builtin_amdgcn_image_load_mip_cube_v4f32_i32' must be a constant integer}}
+}
+half4 test_builtin_image_load_mip_cube_2(half4 v4f16, int i32, int8 vec8i32) {
+
+ return __builtin_amdgcn_image_load_mip_cube_v4f16_i32(100, i32, i32, i32, i32, vec8i32, 120, i32); //expected-error{{argument to '__builtin_amdgcn_image_load_mip_cube_v4f16_i32' must be a constant integer}}
+}
>From 21c682925d8c7d6ff2e5b286fc6ee6ef09f5e016 Mon Sep 17 00:00:00 2001
From: ranapratap55 <RanaPratapReddy.Nimmakayala at amd.com>
Date: Thu, 26 Jun 2025 14:36:33 +0530
Subject: [PATCH 3/7] [AMDGPU] Adds builtins for image store and sema checking
for image store
---
clang/include/clang/Basic/BuiltinsAMDGPU.def | 29 +
clang/include/clang/Sema/SemaAMDGPU.h | 2 -
clang/lib/Sema/SemaAMDGPU.cpp | 50 +-
.../test/CodeGen/builtins-image-load-2d-f32.c | 722 ------------------
clang/test/CodeGen/builtins-image-load.c | 15 -
.../builtins-image-load-2d-f32-param.cl | 132 ----
.../SemaOpenCL/builtins-image-load-param.cl | 3 +-
7 files changed, 78 insertions(+), 875 deletions(-)
delete mode 100644 clang/test/CodeGen/builtins-image-load-2d-f32.c
delete mode 100644 clang/test/SemaOpenCL/builtins-image-load-2d-f32-param.cl
diff --git a/clang/include/clang/Basic/BuiltinsAMDGPU.def b/clang/include/clang/Basic/BuiltinsAMDGPU.def
index 970eb68355c08..7202a69c1f2cf 100644
--- a/clang/include/clang/Basic/BuiltinsAMDGPU.def
+++ b/clang/include/clang/Basic/BuiltinsAMDGPU.def
@@ -959,5 +959,34 @@ TARGET_BUILTIN(__builtin_amdgcn_image_sample_3d_v4f16_f32, "V4hifffV8iV4ibii", "
TARGET_BUILTIN(__builtin_amdgcn_image_sample_cube_v4f32_f32, "V4fifffV8iV4ibii", "nc", "image-insts")
TARGET_BUILTIN(__builtin_amdgcn_image_sample_cube_v4f16_f32, "V4hifffV8iV4ibii", "nc", "image-insts")
+TARGET_BUILTIN(__builtin_amdgcn_image_store_1d_v4f32_i32, "vV4fiiV8iii", "nc", "")
+TARGET_BUILTIN(__builtin_amdgcn_image_store_1d_v4f16_i32, "vV4hiiV8iii", "nc", "")
+TARGET_BUILTIN(__builtin_amdgcn_image_store_1darray_v4f32_i32, "vV4fiiiV8iii", "nc", "")
+TARGET_BUILTIN(__builtin_amdgcn_image_store_1darray_v4f16_i32, "vV4hiiiV8iii", "nc", "")
+TARGET_BUILTIN(__builtin_amdgcn_image_store_2d_f32_i32, "vfiiiV8iii", "nc", "")
+TARGET_BUILTIN(__builtin_amdgcn_image_store_2d_v4f32_i32, "vV4fiiiV8iii", "nc", "")
+TARGET_BUILTIN(__builtin_amdgcn_image_store_2d_v4f16_i32, "vV4hiiiV8iii", "nc", "")
+TARGET_BUILTIN(__builtin_amdgcn_image_store_2darray_f32_i32, "vfiiiiV8iii", "nc", "")
+TARGET_BUILTIN(__builtin_amdgcn_image_store_2darray_v4f32_i32, "vV4fiiiiV8iii", "nc", "")
+TARGET_BUILTIN(__builtin_amdgcn_image_store_2darray_v4f16_i32, "vV4hiiiiV8iii", "nc", "")
+TARGET_BUILTIN(__builtin_amdgcn_image_store_3d_v4f32_i32, "vV4fiiiiV8iii", "nc", "")
+TARGET_BUILTIN(__builtin_amdgcn_image_store_3d_v4f16_i32, "vV4hiiiiV8iii", "nc", "")
+TARGET_BUILTIN(__builtin_amdgcn_image_store_cube_v4f32_i32, "vV4fiiiiV8iii", "nc", "")
+TARGET_BUILTIN(__builtin_amdgcn_image_store_cube_v4f16_i32, "vV4hiiiiV8iii", "nc", "")
+TARGET_BUILTIN(__builtin_amdgcn_image_store_mip_1d_v4f32_i32, "vV4fiiiV8iii", "nc", "")
+TARGET_BUILTIN(__builtin_amdgcn_image_store_mip_1d_v4f16_i32, "vV4hiiiV8iii", "nc", "")
+TARGET_BUILTIN(__builtin_amdgcn_image_store_mip_1darray_v4f32_i32, "vV4fiiiiV8iii", "nc", "")
+TARGET_BUILTIN(__builtin_amdgcn_image_store_mip_1darray_v4f16_i32, "vV4hiiiiV8iii", "nc", "")
+TARGET_BUILTIN(__builtin_amdgcn_image_store_mip_2d_f32_i32, "vfiiiiV8iii", "nc", "")
+TARGET_BUILTIN(__builtin_amdgcn_image_store_mip_2d_v4f32_i32, "vV4fiiiiV8iii", "nc", "")
+TARGET_BUILTIN(__builtin_amdgcn_image_store_mip_2d_v4f16_i32, "vV4hiiiiV8iii", "nc", "")
+TARGET_BUILTIN(__builtin_amdgcn_image_store_mip_2darray_f32_i32, "vfiiiiiV8iii", "nc", "")
+TARGET_BUILTIN(__builtin_amdgcn_image_store_mip_2darray_v4f32_i32, "vV4fiiiiiV8iii", "nc", "")
+TARGET_BUILTIN(__builtin_amdgcn_image_store_mip_2darray_v4f16_i32, "vV4hiiiiiV8iii", "nc", "")
+TARGET_BUILTIN(__builtin_amdgcn_image_store_mip_3d_v4f32_i32, "vV4fiiiiiV8iii", "nc", "")
+TARGET_BUILTIN(__builtin_amdgcn_image_store_mip_3d_v4f16_i32, "vV4hiiiiiV8iii", "nc", "")
+TARGET_BUILTIN(__builtin_amdgcn_image_store_mip_cube_v4f32_i32, "vV4fiiiiiV8iii", "nc", "")
+TARGET_BUILTIN(__builtin_amdgcn_image_store_mip_cube_v4f16_i32, "vV4hiiiiiV8iii", "nc", "")
+
#undef BUILTIN
#undef TARGET_BUILTIN
diff --git a/clang/include/clang/Sema/SemaAMDGPU.h b/clang/include/clang/Sema/SemaAMDGPU.h
index 43dea8445ef3d..bac812a9d4fcf 100644
--- a/clang/include/clang/Sema/SemaAMDGPU.h
+++ b/clang/include/clang/Sema/SemaAMDGPU.h
@@ -31,8 +31,6 @@ class SemaAMDGPU : public SemaBase {
bool checkMovDPPFunctionCall(CallExpr *TheCall, unsigned NumArgs,
unsigned NumDataArgs);
- bool checkImageImmArgFunctionCall(CallExpr *TheCall, unsigned ArgCount);
-
/// Create an AMDGPUWavesPerEUAttr attribute.
AMDGPUFlatWorkGroupSizeAttr *
CreateAMDGPUFlatWorkGroupSizeAttr(const AttributeCommonInfo &CI, Expr *Min,
diff --git a/clang/lib/Sema/SemaAMDGPU.cpp b/clang/lib/Sema/SemaAMDGPU.cpp
index 666645628decc..2f2d6ec94de23 100644
--- a/clang/lib/Sema/SemaAMDGPU.cpp
+++ b/clang/lib/Sema/SemaAMDGPU.cpp
@@ -240,8 +240,54 @@ bool SemaAMDGPU::CheckAMDGCNBuiltinFunctionCall(unsigned BuiltinID,
case AMDGPU::BI__builtin_amdgcn_image_load_mip_cube_v4f32_i32:
case AMDGPU::BI__builtin_amdgcn_image_load_mip_cube_v4f16_i32: {
unsigned ArgCount = TheCall->getNumArgs() - 1;
-
- return checkImageImmArgFunctionCall(TheCall, ArgCount);
+ llvm::APSInt Result;
+ bool isImmArg =
+ (!(SemaRef.BuiltinConstantArg(TheCall, 0, Result)) &&
+ !(SemaRef.BuiltinConstantArg(TheCall, ArgCount, Result)) &&
+ !(SemaRef.BuiltinConstantArg(TheCall, (ArgCount - 1), Result)))
+ ? false
+ : true;
+
+ return isImmArg;
+ }
+ case AMDGPU::BI__builtin_amdgcn_image_store_1d_v4f32_i32:
+ case AMDGPU::BI__builtin_amdgcn_image_store_1darray_v4f32_i32:
+ case AMDGPU::BI__builtin_amdgcn_image_store_1d_v4f16_i32:
+ case AMDGPU::BI__builtin_amdgcn_image_store_1darray_v4f16_i32:
+ case AMDGPU::BI__builtin_amdgcn_image_store_2d_f32_i32:
+ case AMDGPU::BI__builtin_amdgcn_image_store_2d_v4f32_i32:
+ case AMDGPU::BI__builtin_amdgcn_image_store_2d_v4f16_i32:
+ case AMDGPU::BI__builtin_amdgcn_image_store_2darray_f32_i32:
+ case AMDGPU::BI__builtin_amdgcn_image_store_2darray_v4f32_i32:
+ case AMDGPU::BI__builtin_amdgcn_image_store_2darray_v4f16_i32:
+ case AMDGPU::BI__builtin_amdgcn_image_store_3d_v4f32_i32:
+ case AMDGPU::BI__builtin_amdgcn_image_store_3d_v4f16_i32:
+ case AMDGPU::BI__builtin_amdgcn_image_store_cube_v4f32_i32:
+ case AMDGPU::BI__builtin_amdgcn_image_store_cube_v4f16_i32:
+ case AMDGPU::BI__builtin_amdgcn_image_store_mip_1d_v4f32_i32:
+ case AMDGPU::BI__builtin_amdgcn_image_store_mip_1d_v4f16_i32:
+ case AMDGPU::BI__builtin_amdgcn_image_store_mip_1darray_v4f32_i32:
+ case AMDGPU::BI__builtin_amdgcn_image_store_mip_1darray_v4f16_i32:
+ case AMDGPU::BI__builtin_amdgcn_image_store_mip_2d_f32_i32:
+ case AMDGPU::BI__builtin_amdgcn_image_store_mip_2d_v4f32_i32:
+ case AMDGPU::BI__builtin_amdgcn_image_store_mip_2d_v4f16_i32:
+ case AMDGPU::BI__builtin_amdgcn_image_store_mip_2darray_f32_i32:
+ case AMDGPU::BI__builtin_amdgcn_image_store_mip_2darray_v4f32_i32:
+ case AMDGPU::BI__builtin_amdgcn_image_store_mip_2darray_v4f16_i32:
+ case AMDGPU::BI__builtin_amdgcn_image_store_mip_3d_v4f32_i32:
+ case AMDGPU::BI__builtin_amdgcn_image_store_mip_3d_v4f16_i32:
+ case AMDGPU::BI__builtin_amdgcn_image_store_mip_cube_v4f32_i32:
+ case AMDGPU::BI__builtin_amdgcn_image_store_mip_cube_v4f16_i32: {
+ unsigned ArgCount = TheCall->getNumArgs() - 1;
+ llvm::APSInt Result;
+ bool isImmArg =
+ (!(SemaRef.BuiltinConstantArg(TheCall, 1, Result)) &&
+ !(SemaRef.BuiltinConstantArg(TheCall, ArgCount, Result)) &&
+ !(SemaRef.BuiltinConstantArg(TheCall, (ArgCount - 1), Result)))
+ ? false
+ : true;
+
+ return isImmArg;
}
default:
return false;
diff --git a/clang/test/CodeGen/builtins-image-load-2d-f32.c b/clang/test/CodeGen/builtins-image-load-2d-f32.c
deleted file mode 100644
index aee97af37aaf0..0000000000000
--- a/clang/test/CodeGen/builtins-image-load-2d-f32.c
+++ /dev/null
@@ -1,722 +0,0 @@
-// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5
-// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa %s -emit-llvm -o - | FileCheck %s
-
-typedef int int8 __attribute__((ext_vector_type(8)));
-typedef float float4 __attribute__((ext_vector_type(4)));
-typedef _Float16 half;
-typedef half half4 __attribute__((ext_vector_type(4)));
-
-// CHECK-LABEL: define dso_local float @test_builtin_image_load_2d(
-// CHECK-SAME: float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0:[0-9]+]] {
-// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[RETVAL:%.*]] = alloca float, align 4, addrspace(5)
-// CHECK-NEXT: [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5)
-// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
-// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
-// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
-// CHECK-NEXT: [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr
-// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
-// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr
-// CHECK-NEXT: store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4
-// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
-// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32
-// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
-// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
-// CHECK-NEXT: [[TMP2:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32
-// CHECK-NEXT: [[TMP3:%.*]] = call float @llvm.amdgcn.image.load.2d.f32.i32.v8i32(i32 12, i32 [[TMP0]], i32 [[TMP1]], <8 x i32> [[TMP2]], i32 106, i32 103)
-// CHECK-NEXT: ret float [[TMP3]]
-//
-float test_builtin_image_load_2d(float f32, int i32, int8 vec8i32) {
-
- return __builtin_amdgcn_image_load_2d_f32_i32(12, i32, i32, vec8i32, 106, 103);
-}
-
-// CHECK-LABEL: define dso_local <4 x float> @test_builtin_image_load_2d_1(
-// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] {
-// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x float>, align 16, addrspace(5)
-// CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5)
-// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
-// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
-// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
-// CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr
-// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
-// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr
-// CHECK-NEXT: store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16
-// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
-// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32
-// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
-// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
-// CHECK-NEXT: [[TMP2:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32
-// CHECK-NEXT: [[TMP3:%.*]] = call <4 x float> @llvm.amdgcn.image.load.2d.v4f32.i32.v8i32(i32 100, i32 [[TMP0]], i32 [[TMP1]], <8 x i32> [[TMP2]], i32 120, i32 110)
-// CHECK-NEXT: ret <4 x float> [[TMP3]]
-//
-float4 test_builtin_image_load_2d_1(float4 v4f32, int i32, int8 vec8i32) {
-
- return __builtin_amdgcn_image_load_2d_v4f32_i32(100, i32, i32, vec8i32, 120, 110);
-}
-// CHECK-LABEL: define dso_local <4 x half> @test_builtin_image_load_2d_2(
-// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] {
-// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x half>, align 8, addrspace(5)
-// CHECK-NEXT: [[V4F16_ADDR:%.*]] = alloca <4 x half>, align 8, addrspace(5)
-// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
-// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
-// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
-// CHECK-NEXT: [[V4F16_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F16_ADDR]] to ptr
-// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
-// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr
-// CHECK-NEXT: store <4 x half> [[V4F16]], ptr [[V4F16_ADDR_ASCAST]], align 8
-// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
-// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32
-// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
-// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
-// CHECK-NEXT: [[TMP2:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32
-// CHECK-NEXT: [[TMP3:%.*]] = call <4 x half> @llvm.amdgcn.image.load.2d.v4f16.i32.v8i32(i32 100, i32 [[TMP0]], i32 [[TMP1]], <8 x i32> [[TMP2]], i32 120, i32 110)
-// CHECK-NEXT: ret <4 x half> [[TMP3]]
-//
-half4 test_builtin_image_load_2d_2(half4 v4f16, int i32, int8 vec8i32) {
-
- return __builtin_amdgcn_image_load_2d_v4f16_i32(100, i32, i32, vec8i32, 120, 110);
-}
-
-// CHECK-LABEL: define dso_local float @test_builtin_image_load_2darray(
-// CHECK-SAME: float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] {
-// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[RETVAL:%.*]] = alloca float, align 4, addrspace(5)
-// CHECK-NEXT: [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5)
-// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
-// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
-// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
-// CHECK-NEXT: [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr
-// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
-// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr
-// CHECK-NEXT: store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4
-// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
-// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32
-// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
-// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
-// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
-// CHECK-NEXT: [[TMP3:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32
-// CHECK-NEXT: [[TMP4:%.*]] = call float @llvm.amdgcn.image.load.2darray.f32.i32.v8i32(i32 100, i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]], <8 x i32> [[TMP3]], i32 120, i32 110)
-// CHECK-NEXT: ret float [[TMP4]]
-//
-float test_builtin_image_load_2darray(float f32, int i32, int8 vec8i32) {
-
- return __builtin_amdgcn_image_load_2darray_f32_i32(100, i32, i32, i32, vec8i32, 120, 110);
-}
-// CHECK-LABEL: define dso_local <4 x float> @test_builtin_image_load_2darray_1(
-// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] {
-// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x float>, align 16, addrspace(5)
-// CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5)
-// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
-// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
-// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
-// CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr
-// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
-// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr
-// CHECK-NEXT: store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16
-// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
-// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32
-// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
-// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
-// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
-// CHECK-NEXT: [[TMP3:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32
-// CHECK-NEXT: [[TMP4:%.*]] = call <4 x float> @llvm.amdgcn.image.load.2darray.v4f32.i32.v8i32(i32 100, i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]], <8 x i32> [[TMP3]], i32 120, i32 110)
-// CHECK-NEXT: ret <4 x float> [[TMP4]]
-//
-float4 test_builtin_image_load_2darray_1(float4 v4f32, int i32, int8 vec8i32) {
-
- return __builtin_amdgcn_image_load_2darray_v4f32_i32(100, i32, i32, i32, vec8i32, 120, 110);
-}
-// CHECK-LABEL: define dso_local <4 x half> @test_builtin_image_load_2darray_2(
-// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] {
-// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x half>, align 8, addrspace(5)
-// CHECK-NEXT: [[V4F16_ADDR:%.*]] = alloca <4 x half>, align 8, addrspace(5)
-// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
-// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
-// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
-// CHECK-NEXT: [[V4F16_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F16_ADDR]] to ptr
-// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
-// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr
-// CHECK-NEXT: store <4 x half> [[V4F16]], ptr [[V4F16_ADDR_ASCAST]], align 8
-// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
-// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32
-// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
-// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
-// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
-// CHECK-NEXT: [[TMP3:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32
-// CHECK-NEXT: [[TMP4:%.*]] = call <4 x half> @llvm.amdgcn.image.load.2darray.v4f16.i32.v8i32(i32 100, i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]], <8 x i32> [[TMP3]], i32 120, i32 110)
-// CHECK-NEXT: ret <4 x half> [[TMP4]]
-//
-half4 test_builtin_image_load_2darray_2(half4 v4f16, int i32, int8 vec8i32) {
-
- return __builtin_amdgcn_image_load_2darray_v4f16_i32(100, i32, i32, i32, vec8i32, 120, 110);
-}
-
-// CHECK-LABEL: define dso_local <4 x float> @test_builtin_image_load_1d_1(
-// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] {
-// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x float>, align 16, addrspace(5)
-// CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5)
-// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
-// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
-// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
-// CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr
-// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
-// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr
-// CHECK-NEXT: store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16
-// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
-// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32
-// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
-// CHECK-NEXT: [[TMP1:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32
-// CHECK-NEXT: [[TMP2:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32
-// CHECK-NEXT: [[TMP3:%.*]] = call <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i32.v8i32(i32 100, i32 [[TMP0]], <8 x i32> [[TMP2]], i32 120, i32 110)
-// CHECK-NEXT: ret <4 x float> [[TMP3]]
-//
-float4 test_builtin_image_load_1d_1(float4 v4f32, int i32, int8 vec8i32) {
-
- return __builtin_amdgcn_image_load_1d_v4f32_i32(100, i32, vec8i32, 120, 110);
-}
-// CHECK-LABEL: define dso_local <4 x half> @test_builtin_image_load_1d_2(
-// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] {
-// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x half>, align 8, addrspace(5)
-// CHECK-NEXT: [[V4F16_ADDR:%.*]] = alloca <4 x half>, align 8, addrspace(5)
-// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
-// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
-// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
-// CHECK-NEXT: [[V4F16_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F16_ADDR]] to ptr
-// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
-// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr
-// CHECK-NEXT: store <4 x half> [[V4F16]], ptr [[V4F16_ADDR_ASCAST]], align 8
-// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
-// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32
-// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
-// CHECK-NEXT: [[TMP1:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32
-// CHECK-NEXT: [[TMP2:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32
-// CHECK-NEXT: [[TMP3:%.*]] = call <4 x half> @llvm.amdgcn.image.load.1d.v4f16.i32.v8i32(i32 100, i32 [[TMP0]], <8 x i32> [[TMP2]], i32 120, i32 110)
-// CHECK-NEXT: ret <4 x half> [[TMP3]]
-//
-half4 test_builtin_image_load_1d_2(half4 v4f16, int i32, int8 vec8i32) {
-
- return __builtin_amdgcn_image_load_1d_v4f16_i32(100, i32, vec8i32, 120, 110);
-}
-
-// CHECK-LABEL: define dso_local <4 x float> @test_builtin_image_load_1darray_1(
-// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] {
-// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x float>, align 16, addrspace(5)
-// CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5)
-// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
-// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
-// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
-// CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr
-// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
-// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr
-// CHECK-NEXT: store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16
-// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
-// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32
-// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
-// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
-// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
-// CHECK-NEXT: [[TMP3:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32
-// CHECK-NEXT: [[TMP4:%.*]] = call <4 x float> @llvm.amdgcn.image.load.1darray.v4f32.i32.v8i32(i32 100, i32 [[TMP0]], i32 [[TMP2]], <8 x i32> [[TMP3]], i32 120, i32 110)
-// CHECK-NEXT: ret <4 x float> [[TMP4]]
-//
-float4 test_builtin_image_load_1darray_1(float4 v4f32, int i32, int8 vec8i32) {
-
- return __builtin_amdgcn_image_load_1darray_v4f32_i32(100, i32, i32, vec8i32, 120, 110);
-}
-// CHECK-LABEL: define dso_local <4 x half> @test_builtin_image_load_1darray_2(
-// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] {
-// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x half>, align 8, addrspace(5)
-// CHECK-NEXT: [[V4F16_ADDR:%.*]] = alloca <4 x half>, align 8, addrspace(5)
-// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
-// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
-// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
-// CHECK-NEXT: [[V4F16_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F16_ADDR]] to ptr
-// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
-// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr
-// CHECK-NEXT: store <4 x half> [[V4F16]], ptr [[V4F16_ADDR_ASCAST]], align 8
-// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
-// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32
-// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
-// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
-// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
-// CHECK-NEXT: [[TMP3:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32
-// CHECK-NEXT: [[TMP4:%.*]] = call <4 x half> @llvm.amdgcn.image.load.1darray.v4f16.i32.v8i32(i32 100, i32 [[TMP0]], i32 [[TMP2]], <8 x i32> [[TMP3]], i32 120, i32 110)
-// CHECK-NEXT: ret <4 x half> [[TMP4]]
-//
-half4 test_builtin_image_load_1darray_2(half4 v4f16, int i32, int8 vec8i32) {
-
- return __builtin_amdgcn_image_load_1darray_v4f16_i32(100, i32, i32, vec8i32, 120, 110);
-}
-
-// CHECK-LABEL: define dso_local <4 x float> @test_builtin_image_load_3d_1(
-// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] {
-// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x float>, align 16, addrspace(5)
-// CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5)
-// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
-// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
-// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
-// CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr
-// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
-// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr
-// CHECK-NEXT: store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16
-// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
-// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32
-// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
-// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
-// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
-// CHECK-NEXT: [[TMP3:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32
-// CHECK-NEXT: [[TMP4:%.*]] = call <4 x float> @llvm.amdgcn.image.load.3d.v4f32.i32.v8i32(i32 100, i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]], <8 x i32> [[TMP3]], i32 120, i32 110)
-// CHECK-NEXT: ret <4 x float> [[TMP4]]
-//
-float4 test_builtin_image_load_3d_1(float4 v4f32, int i32, int8 vec8i32) {
-
- return __builtin_amdgcn_image_load_3d_v4f32_i32(100, i32, i32, i32, vec8i32, 120, 110);
-}
-// CHECK-LABEL: define dso_local <4 x half> @test_builtin_image_load_3d_2(
-// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] {
-// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x half>, align 8, addrspace(5)
-// CHECK-NEXT: [[V4F16_ADDR:%.*]] = alloca <4 x half>, align 8, addrspace(5)
-// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
-// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
-// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
-// CHECK-NEXT: [[V4F16_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F16_ADDR]] to ptr
-// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
-// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr
-// CHECK-NEXT: store <4 x half> [[V4F16]], ptr [[V4F16_ADDR_ASCAST]], align 8
-// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
-// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32
-// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
-// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
-// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
-// CHECK-NEXT: [[TMP3:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32
-// CHECK-NEXT: [[TMP4:%.*]] = call <4 x half> @llvm.amdgcn.image.load.3d.v4f16.i32.v8i32(i32 100, i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]], <8 x i32> [[TMP3]], i32 120, i32 110)
-// CHECK-NEXT: ret <4 x half> [[TMP4]]
-//
-half4 test_builtin_image_load_3d_2(half4 v4f16, int i32, int8 vec8i32) {
-
- return __builtin_amdgcn_image_load_3d_v4f16_i32(100, i32, i32, i32, vec8i32, 120, 110);
-}
-
-// CHECK-LABEL: define dso_local <4 x float> @test_builtin_image_load_cube_1(
-// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] {
-// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x float>, align 16, addrspace(5)
-// CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5)
-// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
-// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
-// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
-// CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr
-// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
-// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr
-// CHECK-NEXT: store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16
-// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
-// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32
-// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
-// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
-// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
-// CHECK-NEXT: [[TMP3:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32
-// CHECK-NEXT: [[TMP4:%.*]] = call <4 x float> @llvm.amdgcn.image.load.cube.v4f32.i32.v8i32(i32 100, i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]], <8 x i32> [[TMP3]], i32 120, i32 110)
-// CHECK-NEXT: ret <4 x float> [[TMP4]]
-//
-float4 test_builtin_image_load_cube_1(float4 v4f32, int i32, int8 vec8i32) {
-
- return __builtin_amdgcn_image_load_cube_v4f32_i32(100, i32, i32, i32, vec8i32, 120, 110);
-}
-// CHECK-LABEL: define dso_local <4 x half> @test_builtin_image_load_cube_2(
-// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] {
-// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x half>, align 8, addrspace(5)
-// CHECK-NEXT: [[V4F16_ADDR:%.*]] = alloca <4 x half>, align 8, addrspace(5)
-// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
-// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
-// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
-// CHECK-NEXT: [[V4F16_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F16_ADDR]] to ptr
-// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
-// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr
-// CHECK-NEXT: store <4 x half> [[V4F16]], ptr [[V4F16_ADDR_ASCAST]], align 8
-// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
-// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32
-// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
-// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
-// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
-// CHECK-NEXT: [[TMP3:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32
-// CHECK-NEXT: [[TMP4:%.*]] = call <4 x half> @llvm.amdgcn.image.load.cube.v4f16.i32.v8i32(i32 100, i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]], <8 x i32> [[TMP3]], i32 120, i32 110)
-// CHECK-NEXT: ret <4 x half> [[TMP4]]
-//
-half4 test_builtin_image_load_cube_2(half4 v4f16, int i32, int8 vec8i32) {
-
- return __builtin_amdgcn_image_load_cube_v4f16_i32(100, i32, i32, i32, vec8i32, 120, 110);
-}
-
-// CHECK-LABEL: define dso_local <4 x float> @test_builtin_image_load_mip_1d_1(
-// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] {
-// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x float>, align 16, addrspace(5)
-// CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5)
-// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
-// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
-// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
-// CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr
-// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
-// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr
-// CHECK-NEXT: store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16
-// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
-// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32
-// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
-// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
-// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
-// CHECK-NEXT: [[TMP3:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32
-// CHECK-NEXT: [[TMP4:%.*]] = call <4 x float> @llvm.amdgcn.image.load.mip.1d.v4f32.i32.v8i32(i32 100, i32 [[TMP0]], i32 [[TMP2]], <8 x i32> [[TMP3]], i32 120, i32 110)
-// CHECK-NEXT: ret <4 x float> [[TMP4]]
-//
-float4 test_builtin_image_load_mip_1d_1(float4 v4f32, int i32, int8 vec8i32) {
-
- return __builtin_amdgcn_image_load_mip_1d_v4f32_i32(100, i32, i32, vec8i32, 120, 110);
-}
-// CHECK-LABEL: define dso_local <4 x half> @test_builtin_image_load_mip_1d_2(
-// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] {
-// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x half>, align 8, addrspace(5)
-// CHECK-NEXT: [[V4F16_ADDR:%.*]] = alloca <4 x half>, align 8, addrspace(5)
-// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
-// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
-// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
-// CHECK-NEXT: [[V4F16_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F16_ADDR]] to ptr
-// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
-// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr
-// CHECK-NEXT: store <4 x half> [[V4F16]], ptr [[V4F16_ADDR_ASCAST]], align 8
-// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
-// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32
-// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
-// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
-// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
-// CHECK-NEXT: [[TMP3:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32
-// CHECK-NEXT: [[TMP4:%.*]] = call <4 x half> @llvm.amdgcn.image.load.mip.1d.v4f16.i32.v8i32(i32 100, i32 [[TMP0]], i32 [[TMP2]], <8 x i32> [[TMP3]], i32 120, i32 110)
-// CHECK-NEXT: ret <4 x half> [[TMP4]]
-//
-half4 test_builtin_image_load_mip_1d_2(half4 v4f16, int i32, int8 vec8i32) {
-
- return __builtin_amdgcn_image_load_mip_1d_v4f16_i32(100, i32, i32, vec8i32, 120, 110);
-}
-
-// CHECK-LABEL: define dso_local <4 x float> @test_builtin_image_load_mip_1darray_1(
-// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] {
-// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x float>, align 16, addrspace(5)
-// CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5)
-// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
-// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
-// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
-// CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr
-// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
-// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr
-// CHECK-NEXT: store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16
-// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
-// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32
-// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
-// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
-// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
-// CHECK-NEXT: [[TMP3:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32
-// CHECK-NEXT: [[TMP4:%.*]] = call <4 x float> @llvm.amdgcn.image.load.mip.1darray.v4f32.i32.v8i32(i32 100, i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]], <8 x i32> [[TMP3]], i32 120, i32 110)
-// CHECK-NEXT: ret <4 x float> [[TMP4]]
-//
-float4 test_builtin_image_load_mip_1darray_1(float4 v4f32, int i32, int8 vec8i32) {
-
- return __builtin_amdgcn_image_load_mip_1darray_v4f32_i32(100, i32, i32, i32, vec8i32, 120, 110);
-}
-// CHECK-LABEL: define dso_local <4 x half> @test_builtin_image_load_mip_1darray_2(
-// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] {
-// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x half>, align 8, addrspace(5)
-// CHECK-NEXT: [[V4F16_ADDR:%.*]] = alloca <4 x half>, align 8, addrspace(5)
-// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
-// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
-// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
-// CHECK-NEXT: [[V4F16_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F16_ADDR]] to ptr
-// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
-// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr
-// CHECK-NEXT: store <4 x half> [[V4F16]], ptr [[V4F16_ADDR_ASCAST]], align 8
-// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
-// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32
-// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
-// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
-// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
-// CHECK-NEXT: [[TMP3:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32
-// CHECK-NEXT: [[TMP4:%.*]] = call <4 x half> @llvm.amdgcn.image.load.mip.1darray.v4f16.i32.v8i32(i32 100, i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]], <8 x i32> [[TMP3]], i32 120, i32 110)
-// CHECK-NEXT: ret <4 x half> [[TMP4]]
-//
-half4 test_builtin_image_load_mip_1darray_2(half4 v4f16, int i32, int8 vec8i32) {
-
- return __builtin_amdgcn_image_load_mip_1darray_v4f16_i32(100, i32, i32, i32, vec8i32, 120, 110);
-}
-
-// CHECK-LABEL: define dso_local float @test_builtin_image_load_mip_2d(
-// CHECK-SAME: float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] {
-// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[RETVAL:%.*]] = alloca float, align 4, addrspace(5)
-// CHECK-NEXT: [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5)
-// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
-// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
-// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
-// CHECK-NEXT: [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr
-// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
-// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr
-// CHECK-NEXT: store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4
-// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
-// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32
-// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
-// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
-// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
-// CHECK-NEXT: [[TMP3:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32
-// CHECK-NEXT: [[TMP4:%.*]] = call float @llvm.amdgcn.image.load.mip.2d.f32.i32.v8i32(i32 100, i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]], <8 x i32> [[TMP3]], i32 120, i32 110)
-// CHECK-NEXT: ret float [[TMP4]]
-//
-float test_builtin_image_load_mip_2d(float f32, int i32, int8 vec8i32) {
-
- return __builtin_amdgcn_image_load_mip_2d_f32_i32(100, i32, i32, i32, vec8i32, 120, 110);
-}
-// CHECK-LABEL: define dso_local <4 x float> @test_builtin_image_load_mip_2d_1(
-// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] {
-// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x float>, align 16, addrspace(5)
-// CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5)
-// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
-// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
-// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
-// CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr
-// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
-// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr
-// CHECK-NEXT: store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16
-// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
-// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32
-// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
-// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
-// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
-// CHECK-NEXT: [[TMP3:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32
-// CHECK-NEXT: [[TMP4:%.*]] = call <4 x float> @llvm.amdgcn.image.load.mip.2d.v4f32.i32.v8i32(i32 100, i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]], <8 x i32> [[TMP3]], i32 120, i32 110)
-// CHECK-NEXT: ret <4 x float> [[TMP4]]
-//
-float4 test_builtin_image_load_mip_2d_1(float4 v4f32, int i32, int8 vec8i32) {
-
- return __builtin_amdgcn_image_load_mip_2d_v4f32_i32(100, i32, i32, i32, vec8i32, 120, 110);
-}
-// CHECK-LABEL: define dso_local <4 x half> @test_builtin_image_load_mip_2d_2(
-// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] {
-// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x half>, align 8, addrspace(5)
-// CHECK-NEXT: [[V4F16_ADDR:%.*]] = alloca <4 x half>, align 8, addrspace(5)
-// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
-// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
-// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
-// CHECK-NEXT: [[V4F16_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F16_ADDR]] to ptr
-// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
-// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr
-// CHECK-NEXT: store <4 x half> [[V4F16]], ptr [[V4F16_ADDR_ASCAST]], align 8
-// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
-// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32
-// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
-// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
-// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
-// CHECK-NEXT: [[TMP3:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32
-// CHECK-NEXT: [[TMP4:%.*]] = call <4 x half> @llvm.amdgcn.image.load.mip.2d.v4f16.i32.v8i32(i32 100, i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]], <8 x i32> [[TMP3]], i32 120, i32 110)
-// CHECK-NEXT: ret <4 x half> [[TMP4]]
-//
-half4 test_builtin_image_load_mip_2d_2(half4 v4f16, int i32, int8 vec8i32) {
-
- return __builtin_amdgcn_image_load_mip_2d_v4f16_i32(100, i32, i32, i32, vec8i32, 120, 110);
-}
-
-// CHECK-LABEL: define dso_local float @test_builtin_image_load_mip_2darray(
-// CHECK-SAME: float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] {
-// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[RETVAL:%.*]] = alloca float, align 4, addrspace(5)
-// CHECK-NEXT: [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5)
-// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
-// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
-// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
-// CHECK-NEXT: [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr
-// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
-// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr
-// CHECK-NEXT: store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4
-// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
-// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32
-// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
-// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
-// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
-// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
-// CHECK-NEXT: [[TMP4:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32
-// CHECK-NEXT: [[TMP5:%.*]] = call float @llvm.amdgcn.image.load.mip.2darray.f32.i32.v8i32(i32 100, i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]], i32 [[TMP3]], <8 x i32> [[TMP4]], i32 120, i32 110)
-// CHECK-NEXT: ret float [[TMP5]]
-//
-float test_builtin_image_load_mip_2darray(float f32, int i32, int8 vec8i32) {
-
- return __builtin_amdgcn_image_load_mip_2darray_f32_i32(100, i32, i32, i32, i32, vec8i32, 120, 110);
-}
-// CHECK-LABEL: define dso_local <4 x float> @test_builtin_image_load_mip_2darray_1(
-// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] {
-// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x float>, align 16, addrspace(5)
-// CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5)
-// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
-// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
-// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
-// CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr
-// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
-// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr
-// CHECK-NEXT: store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16
-// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
-// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32
-// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
-// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
-// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
-// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
-// CHECK-NEXT: [[TMP4:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32
-// CHECK-NEXT: [[TMP5:%.*]] = call <4 x float> @llvm.amdgcn.image.load.mip.2darray.v4f32.i32.v8i32(i32 100, i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]], i32 [[TMP3]], <8 x i32> [[TMP4]], i32 120, i32 110)
-// CHECK-NEXT: ret <4 x float> [[TMP5]]
-//
-float4 test_builtin_image_load_mip_2darray_1(float4 v4f32, int i32, int8 vec8i32) {
-
- return __builtin_amdgcn_image_load_mip_2darray_v4f32_i32(100, i32, i32, i32, i32, vec8i32, 120, 110);
-}
-// CHECK-LABEL: define dso_local <4 x half> @test_builtin_image_load_mip_2darray_2(
-// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] {
-// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x half>, align 8, addrspace(5)
-// CHECK-NEXT: [[V4F16_ADDR:%.*]] = alloca <4 x half>, align 8, addrspace(5)
-// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
-// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
-// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
-// CHECK-NEXT: [[V4F16_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F16_ADDR]] to ptr
-// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
-// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr
-// CHECK-NEXT: store <4 x half> [[V4F16]], ptr [[V4F16_ADDR_ASCAST]], align 8
-// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
-// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32
-// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
-// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
-// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
-// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
-// CHECK-NEXT: [[TMP4:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32
-// CHECK-NEXT: [[TMP5:%.*]] = call <4 x half> @llvm.amdgcn.image.load.mip.2darray.v4f16.i32.v8i32(i32 100, i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]], i32 [[TMP3]], <8 x i32> [[TMP4]], i32 120, i32 110)
-// CHECK-NEXT: ret <4 x half> [[TMP5]]
-//
-half4 test_builtin_image_load_mip_2darray_2(half4 v4f16, int i32, int8 vec8i32) {
-
- return __builtin_amdgcn_image_load_mip_2darray_v4f16_i32(100, i32, i32, i32, i32, vec8i32, 120, 110);
-}
-
-// CHECK-LABEL: define dso_local <4 x float> @test_builtin_image_load_mip_3d_1(
-// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] {
-// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x float>, align 16, addrspace(5)
-// CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5)
-// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
-// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
-// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
-// CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr
-// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
-// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr
-// CHECK-NEXT: store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16
-// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
-// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32
-// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
-// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
-// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
-// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
-// CHECK-NEXT: [[TMP4:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32
-// CHECK-NEXT: [[TMP5:%.*]] = call <4 x float> @llvm.amdgcn.image.load.mip.3d.v4f32.i32.v8i32(i32 100, i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]], i32 [[TMP3]], <8 x i32> [[TMP4]], i32 120, i32 110)
-// CHECK-NEXT: ret <4 x float> [[TMP5]]
-//
-float4 test_builtin_image_load_mip_3d_1(float4 v4f32, int i32, int8 vec8i32) {
-
- return __builtin_amdgcn_image_load_mip_3d_v4f32_i32(100, i32, i32, i32, i32, vec8i32, 120, 110);
-}
-// CHECK-LABEL: define dso_local <4 x half> @test_builtin_image_load_mip_3d_2(
-// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] {
-// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x half>, align 8, addrspace(5)
-// CHECK-NEXT: [[V4F16_ADDR:%.*]] = alloca <4 x half>, align 8, addrspace(5)
-// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
-// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
-// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
-// CHECK-NEXT: [[V4F16_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F16_ADDR]] to ptr
-// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
-// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr
-// CHECK-NEXT: store <4 x half> [[V4F16]], ptr [[V4F16_ADDR_ASCAST]], align 8
-// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
-// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32
-// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
-// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
-// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
-// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
-// CHECK-NEXT: [[TMP4:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32
-// CHECK-NEXT: [[TMP5:%.*]] = call <4 x half> @llvm.amdgcn.image.load.mip.3d.v4f16.i32.v8i32(i32 100, i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]], i32 [[TMP3]], <8 x i32> [[TMP4]], i32 120, i32 110)
-// CHECK-NEXT: ret <4 x half> [[TMP5]]
-//
-half4 test_builtin_image_load_mip_3d_2(half4 v4f16, int i32, int8 vec8i32) {
-
- return __builtin_amdgcn_image_load_mip_3d_v4f16_i32(100, i32, i32, i32, i32, vec8i32, 120, 110);
-}
-
-// CHECK-LABEL: define dso_local <4 x float> @test_builtin_image_load_mip_cube_1(
-// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] {
-// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x float>, align 16, addrspace(5)
-// CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5)
-// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
-// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
-// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
-// CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr
-// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
-// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr
-// CHECK-NEXT: store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16
-// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
-// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32
-// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
-// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
-// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
-// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
-// CHECK-NEXT: [[TMP4:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32
-// CHECK-NEXT: [[TMP5:%.*]] = call <4 x float> @llvm.amdgcn.image.load.mip.cube.v4f32.i32.v8i32(i32 100, i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]], i32 [[TMP3]], <8 x i32> [[TMP4]], i32 120, i32 110)
-// CHECK-NEXT: ret <4 x float> [[TMP5]]
-//
-float4 test_builtin_image_load_mip_cube_1(float4 v4f32, int i32, int8 vec8i32) {
-
- return __builtin_amdgcn_image_load_mip_cube_v4f32_i32(100, i32, i32, i32, i32, vec8i32, 120, 110);
-}
-// CHECK-LABEL: define dso_local <4 x half> @test_builtin_image_load_mip_cube_2(
-// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] {
-// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x half>, align 8, addrspace(5)
-// CHECK-NEXT: [[V4F16_ADDR:%.*]] = alloca <4 x half>, align 8, addrspace(5)
-// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
-// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
-// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
-// CHECK-NEXT: [[V4F16_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F16_ADDR]] to ptr
-// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
-// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr
-// CHECK-NEXT: store <4 x half> [[V4F16]], ptr [[V4F16_ADDR_ASCAST]], align 8
-// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
-// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32
-// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
-// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
-// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
-// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
-// CHECK-NEXT: [[TMP4:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32
-// CHECK-NEXT: [[TMP5:%.*]] = call <4 x half> @llvm.amdgcn.image.load.mip.cube.v4f16.i32.v8i32(i32 100, i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]], i32 [[TMP3]], <8 x i32> [[TMP4]], i32 120, i32 110)
-// CHECK-NEXT: ret <4 x half> [[TMP5]]
-//
-half4 test_builtin_image_load_mip_cube_2(half4 v4f16, int i32, int8 vec8i32) {
-
- return __builtin_amdgcn_image_load_mip_cube_v4f16_i32(100, i32, i32, i32, i32, vec8i32, 120, 110);
-}
diff --git a/clang/test/CodeGen/builtins-image-load.c b/clang/test/CodeGen/builtins-image-load.c
index 67548a567723e..3c3e1ec35806f 100644
--- a/clang/test/CodeGen/builtins-image-load.c
+++ b/clang/test/CodeGen/builtins-image-load.c
@@ -56,7 +56,6 @@ float4 test_builtin_image_load_2d_1(float4 v4f32, int i32, int8 vec8i32) {
return __builtin_amdgcn_image_load_2d_v4f32_i32(100, i32, i32, vec8i32, 120, 110);
}
-
// CHECK-LABEL: define dso_local <4 x half> @test_builtin_image_load_2d_2(
// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
@@ -107,7 +106,6 @@ float test_builtin_image_load_2darray(float f32, int i32, int8 vec8i32) {
return __builtin_amdgcn_image_load_2darray_f32_i32(100, i32, i32, i32, vec8i32, 120, 110);
}
-
// CHECK-LABEL: define dso_local <4 x float> @test_builtin_image_load_2darray_1(
// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
@@ -133,7 +131,6 @@ float4 test_builtin_image_load_2darray_1(float4 v4f32, int i32, int8 vec8i32) {
return __builtin_amdgcn_image_load_2darray_v4f32_i32(100, i32, i32, i32, vec8i32, 120, 110);
}
-
// CHECK-LABEL: define dso_local <4 x half> @test_builtin_image_load_2darray_2(
// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
@@ -183,7 +180,6 @@ float4 test_builtin_image_load_1d_1(float4 v4f32, int i32, int8 vec8i32) {
return __builtin_amdgcn_image_load_1d_v4f32_i32(100, i32, vec8i32, 120, 110);
}
-
// CHECK-LABEL: define dso_local <4 x half> @test_builtin_image_load_1d_2(
// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
@@ -232,7 +228,6 @@ float4 test_builtin_image_load_1darray_1(float4 v4f32, int i32, int8 vec8i32) {
return __builtin_amdgcn_image_load_1darray_v4f32_i32(100, i32, i32, vec8i32, 120, 110);
}
-
// CHECK-LABEL: define dso_local <4 x half> @test_builtin_image_load_1darray_2(
// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
@@ -283,7 +278,6 @@ float4 test_builtin_image_load_3d_1(float4 v4f32, int i32, int8 vec8i32) {
return __builtin_amdgcn_image_load_3d_v4f32_i32(100, i32, i32, i32, vec8i32, 120, 110);
}
-
// CHECK-LABEL: define dso_local <4 x half> @test_builtin_image_load_3d_2(
// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
@@ -335,7 +329,6 @@ float4 test_builtin_image_load_cube_1(float4 v4f32, int i32, int8 vec8i32) {
return __builtin_amdgcn_image_load_cube_v4f32_i32(100, i32, i32, i32, vec8i32, 120, 110);
}
-
// CHECK-LABEL: define dso_local <4 x half> @test_builtin_image_load_cube_2(
// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
@@ -386,7 +379,6 @@ float4 test_builtin_image_load_mip_1d_1(float4 v4f32, int i32, int8 vec8i32) {
return __builtin_amdgcn_image_load_mip_1d_v4f32_i32(100, i32, i32, vec8i32, 120, 110);
}
-
// CHECK-LABEL: define dso_local <4 x half> @test_builtin_image_load_mip_1d_2(
// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
@@ -437,7 +429,6 @@ float4 test_builtin_image_load_mip_1darray_1(float4 v4f32, int i32, int8 vec8i32
return __builtin_amdgcn_image_load_mip_1darray_v4f32_i32(100, i32, i32, i32, vec8i32, 120, 110);
}
-
// CHECK-LABEL: define dso_local <4 x half> @test_builtin_image_load_mip_1darray_2(
// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
@@ -489,7 +480,6 @@ float test_builtin_image_load_mip_2d(float f32, int i32, int8 vec8i32) {
return __builtin_amdgcn_image_load_mip_2d_f32_i32(100, i32, i32, i32, vec8i32, 120, 110);
}
-
// CHECK-LABEL: define dso_local <4 x float> @test_builtin_image_load_mip_2d_1(
// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
@@ -515,7 +505,6 @@ float4 test_builtin_image_load_mip_2d_1(float4 v4f32, int i32, int8 vec8i32) {
return __builtin_amdgcn_image_load_mip_2d_v4f32_i32(100, i32, i32, i32, vec8i32, 120, 110);
}
-
// CHECK-LABEL: define dso_local <4 x half> @test_builtin_image_load_mip_2d_2(
// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
@@ -568,7 +557,6 @@ float test_builtin_image_load_mip_2darray(float f32, int i32, int8 vec8i32) {
return __builtin_amdgcn_image_load_mip_2darray_f32_i32(100, i32, i32, i32, i32, vec8i32, 120, 110);
}
-
// CHECK-LABEL: define dso_local <4 x float> @test_builtin_image_load_mip_2darray_1(
// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
@@ -595,7 +583,6 @@ float4 test_builtin_image_load_mip_2darray_1(float4 v4f32, int i32, int8 vec8i32
return __builtin_amdgcn_image_load_mip_2darray_v4f32_i32(100, i32, i32, i32, i32, vec8i32, 120, 110);
}
-
// CHECK-LABEL: define dso_local <4 x half> @test_builtin_image_load_mip_2darray_2(
// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
@@ -649,7 +636,6 @@ float4 test_builtin_image_load_mip_3d_1(float4 v4f32, int i32, int8 vec8i32) {
return __builtin_amdgcn_image_load_mip_3d_v4f32_i32(100, i32, i32, i32, i32, vec8i32, 120, 110);
}
-
// CHECK-LABEL: define dso_local <4 x half> @test_builtin_image_load_mip_3d_2(
// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
@@ -703,7 +689,6 @@ float4 test_builtin_image_load_mip_cube_1(float4 v4f32, int i32, int8 vec8i32) {
return __builtin_amdgcn_image_load_mip_cube_v4f32_i32(100, i32, i32, i32, i32, vec8i32, 120, 110);
}
-
// CHECK-LABEL: define dso_local <4 x half> @test_builtin_image_load_mip_cube_2(
// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
diff --git a/clang/test/SemaOpenCL/builtins-image-load-2d-f32-param.cl b/clang/test/SemaOpenCL/builtins-image-load-2d-f32-param.cl
deleted file mode 100644
index 7b5aab4011da9..0000000000000
--- a/clang/test/SemaOpenCL/builtins-image-load-2d-f32-param.cl
+++ /dev/null
@@ -1,132 +0,0 @@
-// RUN: %clang_cc1 -triple amdgcn-- -target-cpu gfx90a -S -verify=expected -o - %s
-// REQUIRES: amdgpu-registered-target
-
-typedef int int8 __attribute__((ext_vector_type(8)));
-typedef float float4 __attribute__((ext_vector_type(4)));
-//typedef _Float16 half;
-typedef half half4 __attribute__((ext_vector_type(4)));
-
-
-float test_builtin_image_load_2d(float f32, int i32, int8 vec8i32) {
-
- return __builtin_amdgcn_image_load_2d_f32_i32(i32, i32, i32, vec8i32, 106, 103); //expected-error{{argument to '__builtin_amdgcn_image_load_2d_f32_i32' must be a constant integer}}
-}
-float4 test_builtin_image_load_2d_1(float4 v4f32, int i32, int8 vec8i32) {
-
- return __builtin_amdgcn_image_load_2d_v4f32_i32(100, i32, i32, vec8i32, i32, 110); //expected-error{{argument to '__builtin_amdgcn_image_load_2d_v4f32_i32' must be a constant integer}}
-}
-half4 test_builtin_image_load_2d_2(half4 v4f16, int i32, int8 vec8i32) {
-
- return __builtin_amdgcn_image_load_2d_v4f16_i32(100, i32, i32, vec8i32, 120, i32); //expected-error{{argument to '__builtin_amdgcn_image_load_2d_v4f16_i32' must be a constant integer}}
-}
-
-float test_builtin_image_load_2darray(float f32, int i32, int8 vec8i32) {
-
- return __builtin_amdgcn_image_load_2darray_f32_i32(100, i32, i32, i32, vec8i32, i32, 110); //expected-error{{argument to '__builtin_amdgcn_image_load_2darray_f32_i32' must be a constant integer}}
-}
-float4 test_builtin_image_load_2darray_1(float4 v4f32, int i32, int8 vec8i32) {
-
- return __builtin_amdgcn_image_load_2darray_v4f32_i32(100, i32, i32, i32, vec8i32, i32, 110); //expected-error{{argument to '__builtin_amdgcn_image_load_2darray_v4f32_i32' must be a constant integer}}
-}
-half4 test_builtin_image_load_2darray_2(half4 v4f16, int i32, int8 vec8i32) {
-
- return __builtin_amdgcn_image_load_2darray_v4f16_i32(100, i32, i32, i32, vec8i32, 120, i32); //expected-error{{argument to '__builtin_amdgcn_image_load_2darray_v4f16_i32' must be a constant integer}}
-}
-
-float4 test_builtin_image_load_1d_1(float4 v4f32, int i32, int8 vec8i32) {
-
- return __builtin_amdgcn_image_load_1d_v4f32_i32(i32, i32, vec8i32, 120, i32); //expected-error{{argument to '__builtin_amdgcn_image_load_1d_v4f32_i32' must be a constant integer}}
-}
-half4 test_builtin_image_load_1d_2(half4 v4f16, int i32, int8 vec8i32) {
-
- return __builtin_amdgcn_image_load_1d_v4f16_i32(100, i32, vec8i32, 120, i32); //expected-error{{argument to '__builtin_amdgcn_image_load_1d_v4f16_i32' must be a constant integer}}
-}
-
-float4 test_builtin_image_load_1darray_1(float4 v4f32, int i32, int8 vec8i32) {
-
- return __builtin_amdgcn_image_load_1darray_v4f32_i32(100, i32, i32, vec8i32, i32, 110); //expected-error{{argument to '__builtin_amdgcn_image_load_1darray_v4f32_i32' must be a constant integer}}
-}
-half4 test_builtin_image_load_1darray_2(half4 v4f16, int i32, int8 vec8i32) {
-
- return __builtin_amdgcn_image_load_1darray_v4f16_i32(100, i32, i32, vec8i32, i32, 110); //expected-error{{argument to '__builtin_amdgcn_image_load_1darray_v4f16_i32' must be a constant integer}}
-}
-
-float4 test_builtin_image_load_3d_1(float4 v4f32, int i32, int8 vec8i32) {
-
- return __builtin_amdgcn_image_load_3d_v4f32_i32(100, i32, i32, i32, vec8i32, 120, i32); //expected-error{{argument to '__builtin_amdgcn_image_load_3d_v4f32_i32' must be a constant integer}}
-}
-half4 test_builtin_image_load_3d_2(half4 v4f16, int i32, int8 vec8i32) {
-
- return __builtin_amdgcn_image_load_3d_v4f16_i32(i32, i32, i32, i32, vec8i32, 120, i32); //expected-error{{argument to '__builtin_amdgcn_image_load_3d_v4f16_i32' must be a constant integer}}
-}
-
-float4 test_builtin_image_load_cube_1(float4 v4f32, int i32, int8 vec8i32) {
-
- return __builtin_amdgcn_image_load_cube_v4f32_i32(i32, i32, i32, i32, vec8i32, 120, 110); //expected-error{{argument to '__builtin_amdgcn_image_load_cube_v4f32_i32' must be a constant integer}}
-}
-half4 test_builtin_image_load_cube_2(half4 v4f16, int i32, int8 vec8i32) {
-
- return __builtin_amdgcn_image_load_cube_v4f16_i32(i32, i32, i32, i32, vec8i32, 120, 110); //expected-error{{argument to '__builtin_amdgcn_image_load_cube_v4f16_i32' must be a constant integer}}
-}
-
-float4 test_builtin_image_load_mip_1d_1(float4 v4f32, int i32, int8 vec8i32) {
-
- return __builtin_amdgcn_image_load_mip_1d_v4f32_i32(i32, i32, i32, vec8i32, 120, i32); //expected-error{{argument to '__builtin_amdgcn_image_load_mip_1d_v4f32_i32' must be a constant integer}}
-}
-half4 test_builtin_image_load_mip_1d_2(half4 v4f16, int i32, int8 vec8i32) {
-
- return __builtin_amdgcn_image_load_mip_1d_v4f16_i32(100, i32, i32, vec8i32, 120, i32); //expected-error{{argument to '__builtin_amdgcn_image_load_mip_1d_v4f16_i32' must be a constant integer}}
-}
-
-float4 test_builtin_image_load_mip_1darray_1(float4 v4f32, int i32, int8 vec8i32) {
-
- return __builtin_amdgcn_image_load_mip_1darray_v4f32_i32(i32, i32, i32, i32, vec8i32, i32, 110); //expected-error{{argument to '__builtin_amdgcn_image_load_mip_1darray_v4f32_i32' must be a constant integer}}
-}
-half4 test_builtin_image_load_mip_1darray_2(half4 v4f16, int i32, int8 vec8i32) {
-
- return __builtin_amdgcn_image_load_mip_1darray_v4f16_i32(100, i32, i32, i32, vec8i32, i32, 110); //expected-error{{argument to '__builtin_amdgcn_image_load_mip_1darray_v4f16_i32' must be a constant integer}}
-}
-
-float test_builtin_image_load_mip_2d(float f32, int i32, int8 vec8i32) {
-
- return __builtin_amdgcn_image_load_mip_2d_f32_i32(i32, i32, i32, i32, vec8i32, 120, i32); //expected-error{{argument to '__builtin_amdgcn_image_load_mip_2d_f32_i32' must be a constant integer}}
-}
-float4 test_builtin_image_load_mip_2d_1(float4 v4f32, int i32, int8 vec8i32) {
-
- return __builtin_amdgcn_image_load_mip_2d_v4f32_i32(100, i32, i32, i32, vec8i32, 120, i32); //expected-error{{argument to '__builtin_amdgcn_image_load_mip_2d_v4f32_i32' must be a constant integer}}
-}
-half4 test_builtin_image_load_mip_2d_2(half4 v4f16, int i32, int8 vec8i32) {
-
- return __builtin_amdgcn_image_load_mip_2d_v4f16_i32(i32, i32, i32, i32, vec8i32, 120, 110); //expected-error{{argument to '__builtin_amdgcn_image_load_mip_2d_v4f16_i32' must be a constant integer}}
-}
-
-float test_builtin_image_load_mip_2darray(float f32, int i32, int8 vec8i32) {
-
- return __builtin_amdgcn_image_load_mip_2darray_f32_i32(i32, i32, i32, i32, i32, vec8i32, 120, 110); //expected-error{{argument to '__builtin_amdgcn_image_load_mip_2darray_f32_i32' must be a constant integer}}
-}
-float4 test_builtin_image_load_mip_2darray_1(float4 v4f32, int i32, int8 vec8i32) {
-
- return __builtin_amdgcn_image_load_mip_2darray_v4f32_i32(100, i32, i32, i32, i32, vec8i32, 120, i32); //expected-error{{argument to '__builtin_amdgcn_image_load_mip_2darray_v4f32_i32' must be a constant integer}}
-}
-half4 test_builtin_image_load_mip_2darray_2(half4 v4f16, int i32, int8 vec8i32) {
-
- return __builtin_amdgcn_image_load_mip_2darray_v4f16_i32(100, i32, i32, i32, i32, vec8i32, 120, i32); //expected-error{{argument to '__builtin_amdgcn_image_load_mip_2darray_v4f16_i32' must be a constant integer}}
-}
-
-float4 test_builtin_image_load_mip_3d_1(float4 v4f32, int i32, int8 vec8i32) {
-
- return __builtin_amdgcn_image_load_mip_3d_v4f32_i32(i32, i32, i32, i32, i32, vec8i32, i32, 110); //expected-error{{argument to '__builtin_amdgcn_image_load_mip_3d_v4f32_i32' must be a constant integer}}
-}
-half4 test_builtin_image_load_mip_3d_2(half4 v4f16, int i32, int8 vec8i32) {
-
- return __builtin_amdgcn_image_load_mip_3d_v4f16_i32(i32, i32, i32, i32, i32, vec8i32, i32, 110); //expected-error{{argument to '__builtin_amdgcn_image_load_mip_3d_v4f16_i32' must be a constant integer}}
-}
-
-float4 test_builtin_image_load_mip_cube_1(float4 v4f32, int i32, int8 vec8i32) {
-
- return __builtin_amdgcn_image_load_mip_cube_v4f32_i32(i32, i32, i32, i32, i32, vec8i32, 120, i32); //expected-error{{argument to '__builtin_amdgcn_image_load_mip_cube_v4f32_i32' must be a constant integer}}
-}
-half4 test_builtin_image_load_mip_cube_2(half4 v4f16, int i32, int8 vec8i32) {
-
- return __builtin_amdgcn_image_load_mip_cube_v4f16_i32(100, i32, i32, i32, i32, vec8i32, 120, i32); //expected-error{{argument to '__builtin_amdgcn_image_load_mip_cube_v4f16_i32' must be a constant integer}}
-}
diff --git a/clang/test/SemaOpenCL/builtins-image-load-param.cl b/clang/test/SemaOpenCL/builtins-image-load-param.cl
index 249bb9211ab75..03afbde19a359 100644
--- a/clang/test/SemaOpenCL/builtins-image-load-param.cl
+++ b/clang/test/SemaOpenCL/builtins-image-load-param.cl
@@ -21,7 +21,6 @@ half4 test_builtin_image_load_2d_2(half4 v4f16, int i32, int8 vec8i32) {
return __builtin_amdgcn_image_load_2d_v4f16_i32(100, i32, i32, vec8i32, 120, i32); //expected-error{{argument to '__builtin_amdgcn_image_load_2d_v4f16_i32' must be a constant integer}}
}
-
float test_builtin_image_load_2darray(float f32, int i32, int8 vec8i32) {
return __builtin_amdgcn_image_load_2darray_f32_i32(100, i32, i32, i32, vec8i32, i32, 110); //expected-error{{argument to '__builtin_amdgcn_image_load_2darray_f32_i32' must be a constant integer}}
@@ -219,4 +218,4 @@ float4 test_builtin_image_sample_2d_gfx_1(float4 v4f32, float f32, int i32, int8
half4 test_builtin_image_sample_2d_gfx_2(half4 v4f16, float f32, int i32, int8 vec8i32, int4 vec4i32) {
return __builtin_amdgcn_image_sample_2d_v4f16_f32(100, f32, f32, vec8i32, vec4i32, 0, 120, 110); //GFX94-error{{'__builtin_amdgcn_image_sample_2d_v4f16_f32' needs target feature image-insts}}
-}
\ No newline at end of file
+}
>From 78caf4feae75859bcdb12d6f187a1e06c920a7dc Mon Sep 17 00:00:00 2001
From: ranapratap55 <RanaPratapReddy.Nimmakayala at amd.com>
Date: Tue, 12 Aug 2025 13:19:37 +0530
Subject: [PATCH 4/7] [AMDGPU] Adds EmitAMDGCNImageOverloadedReturnType for
amdgcn_image_load/store and adds 'image-insts' feature
---
clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp | 3 +
clang/lib/Sema/SemaAMDGPU.cpp | 79 +--------------------
clang/test/CodeGen/builtins-image-load.c | 15 ++++
3 files changed, 19 insertions(+), 78 deletions(-)
diff --git a/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp b/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp
index 69e5201ff7e9e..47a975ca78df4 100644
--- a/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp
+++ b/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp
@@ -10,9 +10,12 @@
//
//===----------------------------------------------------------------------===//
+#include "CodeGenFunction.h"
#include "CGBuiltin.h"
#include "clang/Basic/TargetBuiltins.h"
+#include "clang/Frontend/FrontendDiagnostic.h"
#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/IR/IntrinsicsAMDGPU.h"
#include "llvm/IR/IntrinsicsR600.h"
#include "llvm/IR/MemoryModelRelaxationAnnotations.h"
diff --git a/clang/lib/Sema/SemaAMDGPU.cpp b/clang/lib/Sema/SemaAMDGPU.cpp
index 2f2d6ec94de23..d08778f2d1f79 100644
--- a/clang/lib/Sema/SemaAMDGPU.cpp
+++ b/clang/lib/Sema/SemaAMDGPU.cpp
@@ -13,6 +13,7 @@
#include "clang/Sema/SemaAMDGPU.h"
#include "clang/Basic/DiagnosticSema.h"
#include "clang/Basic/TargetBuiltins.h"
+#include "clang/Frontend/FrontendDiagnostic.h"
#include "clang/Sema/Ownership.h"
#include "clang/Sema/Sema.h"
#include "llvm/Support/AMDGPUAddrSpace.h"
@@ -211,84 +212,6 @@ bool SemaAMDGPU::CheckAMDGCNBuiltinFunctionCall(unsigned BuiltinID,
(SemaRef.BuiltinConstantArg(TheCall, ArgCount, Result)) ||
(SemaRef.BuiltinConstantArg(TheCall, (ArgCount - 1), Result)));
}
- case AMDGPU::BI__builtin_amdgcn_image_load_1d_v4f32_i32:
- case AMDGPU::BI__builtin_amdgcn_image_load_1darray_v4f32_i32:
- case AMDGPU::BI__builtin_amdgcn_image_load_1d_v4f16_i32:
- case AMDGPU::BI__builtin_amdgcn_image_load_1darray_v4f16_i32:
- case AMDGPU::BI__builtin_amdgcn_image_load_2d_f32_i32:
- case AMDGPU::BI__builtin_amdgcn_image_load_2d_v4f32_i32:
- case AMDGPU::BI__builtin_amdgcn_image_load_2d_v4f16_i32:
- case AMDGPU::BI__builtin_amdgcn_image_load_2darray_f32_i32:
- case AMDGPU::BI__builtin_amdgcn_image_load_2darray_v4f32_i32:
- case AMDGPU::BI__builtin_amdgcn_image_load_2darray_v4f16_i32:
- case AMDGPU::BI__builtin_amdgcn_image_load_3d_v4f32_i32:
- case AMDGPU::BI__builtin_amdgcn_image_load_3d_v4f16_i32:
- case AMDGPU::BI__builtin_amdgcn_image_load_cube_v4f32_i32:
- case AMDGPU::BI__builtin_amdgcn_image_load_cube_v4f16_i32:
- case AMDGPU::BI__builtin_amdgcn_image_load_mip_1d_v4f32_i32:
- case AMDGPU::BI__builtin_amdgcn_image_load_mip_1d_v4f16_i32:
- case AMDGPU::BI__builtin_amdgcn_image_load_mip_1darray_v4f32_i32:
- case AMDGPU::BI__builtin_amdgcn_image_load_mip_1darray_v4f16_i32:
- case AMDGPU::BI__builtin_amdgcn_image_load_mip_2d_f32_i32:
- case AMDGPU::BI__builtin_amdgcn_image_load_mip_2d_v4f32_i32:
- case AMDGPU::BI__builtin_amdgcn_image_load_mip_2d_v4f16_i32:
- case AMDGPU::BI__builtin_amdgcn_image_load_mip_2darray_f32_i32:
- case AMDGPU::BI__builtin_amdgcn_image_load_mip_2darray_v4f32_i32:
- case AMDGPU::BI__builtin_amdgcn_image_load_mip_2darray_v4f16_i32:
- case AMDGPU::BI__builtin_amdgcn_image_load_mip_3d_v4f32_i32:
- case AMDGPU::BI__builtin_amdgcn_image_load_mip_3d_v4f16_i32:
- case AMDGPU::BI__builtin_amdgcn_image_load_mip_cube_v4f32_i32:
- case AMDGPU::BI__builtin_amdgcn_image_load_mip_cube_v4f16_i32: {
- unsigned ArgCount = TheCall->getNumArgs() - 1;
- llvm::APSInt Result;
- bool isImmArg =
- (!(SemaRef.BuiltinConstantArg(TheCall, 0, Result)) &&
- !(SemaRef.BuiltinConstantArg(TheCall, ArgCount, Result)) &&
- !(SemaRef.BuiltinConstantArg(TheCall, (ArgCount - 1), Result)))
- ? false
- : true;
-
- return isImmArg;
- }
- case AMDGPU::BI__builtin_amdgcn_image_store_1d_v4f32_i32:
- case AMDGPU::BI__builtin_amdgcn_image_store_1darray_v4f32_i32:
- case AMDGPU::BI__builtin_amdgcn_image_store_1d_v4f16_i32:
- case AMDGPU::BI__builtin_amdgcn_image_store_1darray_v4f16_i32:
- case AMDGPU::BI__builtin_amdgcn_image_store_2d_f32_i32:
- case AMDGPU::BI__builtin_amdgcn_image_store_2d_v4f32_i32:
- case AMDGPU::BI__builtin_amdgcn_image_store_2d_v4f16_i32:
- case AMDGPU::BI__builtin_amdgcn_image_store_2darray_f32_i32:
- case AMDGPU::BI__builtin_amdgcn_image_store_2darray_v4f32_i32:
- case AMDGPU::BI__builtin_amdgcn_image_store_2darray_v4f16_i32:
- case AMDGPU::BI__builtin_amdgcn_image_store_3d_v4f32_i32:
- case AMDGPU::BI__builtin_amdgcn_image_store_3d_v4f16_i32:
- case AMDGPU::BI__builtin_amdgcn_image_store_cube_v4f32_i32:
- case AMDGPU::BI__builtin_amdgcn_image_store_cube_v4f16_i32:
- case AMDGPU::BI__builtin_amdgcn_image_store_mip_1d_v4f32_i32:
- case AMDGPU::BI__builtin_amdgcn_image_store_mip_1d_v4f16_i32:
- case AMDGPU::BI__builtin_amdgcn_image_store_mip_1darray_v4f32_i32:
- case AMDGPU::BI__builtin_amdgcn_image_store_mip_1darray_v4f16_i32:
- case AMDGPU::BI__builtin_amdgcn_image_store_mip_2d_f32_i32:
- case AMDGPU::BI__builtin_amdgcn_image_store_mip_2d_v4f32_i32:
- case AMDGPU::BI__builtin_amdgcn_image_store_mip_2d_v4f16_i32:
- case AMDGPU::BI__builtin_amdgcn_image_store_mip_2darray_f32_i32:
- case AMDGPU::BI__builtin_amdgcn_image_store_mip_2darray_v4f32_i32:
- case AMDGPU::BI__builtin_amdgcn_image_store_mip_2darray_v4f16_i32:
- case AMDGPU::BI__builtin_amdgcn_image_store_mip_3d_v4f32_i32:
- case AMDGPU::BI__builtin_amdgcn_image_store_mip_3d_v4f16_i32:
- case AMDGPU::BI__builtin_amdgcn_image_store_mip_cube_v4f32_i32:
- case AMDGPU::BI__builtin_amdgcn_image_store_mip_cube_v4f16_i32: {
- unsigned ArgCount = TheCall->getNumArgs() - 1;
- llvm::APSInt Result;
- bool isImmArg =
- (!(SemaRef.BuiltinConstantArg(TheCall, 1, Result)) &&
- !(SemaRef.BuiltinConstantArg(TheCall, ArgCount, Result)) &&
- !(SemaRef.BuiltinConstantArg(TheCall, (ArgCount - 1), Result)))
- ? false
- : true;
-
- return isImmArg;
- }
default:
return false;
}
diff --git a/clang/test/CodeGen/builtins-image-load.c b/clang/test/CodeGen/builtins-image-load.c
index 3c3e1ec35806f..67548a567723e 100644
--- a/clang/test/CodeGen/builtins-image-load.c
+++ b/clang/test/CodeGen/builtins-image-load.c
@@ -56,6 +56,7 @@ float4 test_builtin_image_load_2d_1(float4 v4f32, int i32, int8 vec8i32) {
return __builtin_amdgcn_image_load_2d_v4f32_i32(100, i32, i32, vec8i32, 120, 110);
}
+
// CHECK-LABEL: define dso_local <4 x half> @test_builtin_image_load_2d_2(
// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
@@ -106,6 +107,7 @@ float test_builtin_image_load_2darray(float f32, int i32, int8 vec8i32) {
return __builtin_amdgcn_image_load_2darray_f32_i32(100, i32, i32, i32, vec8i32, 120, 110);
}
+
// CHECK-LABEL: define dso_local <4 x float> @test_builtin_image_load_2darray_1(
// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
@@ -131,6 +133,7 @@ float4 test_builtin_image_load_2darray_1(float4 v4f32, int i32, int8 vec8i32) {
return __builtin_amdgcn_image_load_2darray_v4f32_i32(100, i32, i32, i32, vec8i32, 120, 110);
}
+
// CHECK-LABEL: define dso_local <4 x half> @test_builtin_image_load_2darray_2(
// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
@@ -180,6 +183,7 @@ float4 test_builtin_image_load_1d_1(float4 v4f32, int i32, int8 vec8i32) {
return __builtin_amdgcn_image_load_1d_v4f32_i32(100, i32, vec8i32, 120, 110);
}
+
// CHECK-LABEL: define dso_local <4 x half> @test_builtin_image_load_1d_2(
// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
@@ -228,6 +232,7 @@ float4 test_builtin_image_load_1darray_1(float4 v4f32, int i32, int8 vec8i32) {
return __builtin_amdgcn_image_load_1darray_v4f32_i32(100, i32, i32, vec8i32, 120, 110);
}
+
// CHECK-LABEL: define dso_local <4 x half> @test_builtin_image_load_1darray_2(
// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
@@ -278,6 +283,7 @@ float4 test_builtin_image_load_3d_1(float4 v4f32, int i32, int8 vec8i32) {
return __builtin_amdgcn_image_load_3d_v4f32_i32(100, i32, i32, i32, vec8i32, 120, 110);
}
+
// CHECK-LABEL: define dso_local <4 x half> @test_builtin_image_load_3d_2(
// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
@@ -329,6 +335,7 @@ float4 test_builtin_image_load_cube_1(float4 v4f32, int i32, int8 vec8i32) {
return __builtin_amdgcn_image_load_cube_v4f32_i32(100, i32, i32, i32, vec8i32, 120, 110);
}
+
// CHECK-LABEL: define dso_local <4 x half> @test_builtin_image_load_cube_2(
// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
@@ -379,6 +386,7 @@ float4 test_builtin_image_load_mip_1d_1(float4 v4f32, int i32, int8 vec8i32) {
return __builtin_amdgcn_image_load_mip_1d_v4f32_i32(100, i32, i32, vec8i32, 120, 110);
}
+
// CHECK-LABEL: define dso_local <4 x half> @test_builtin_image_load_mip_1d_2(
// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
@@ -429,6 +437,7 @@ float4 test_builtin_image_load_mip_1darray_1(float4 v4f32, int i32, int8 vec8i32
return __builtin_amdgcn_image_load_mip_1darray_v4f32_i32(100, i32, i32, i32, vec8i32, 120, 110);
}
+
// CHECK-LABEL: define dso_local <4 x half> @test_builtin_image_load_mip_1darray_2(
// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
@@ -480,6 +489,7 @@ float test_builtin_image_load_mip_2d(float f32, int i32, int8 vec8i32) {
return __builtin_amdgcn_image_load_mip_2d_f32_i32(100, i32, i32, i32, vec8i32, 120, 110);
}
+
// CHECK-LABEL: define dso_local <4 x float> @test_builtin_image_load_mip_2d_1(
// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
@@ -505,6 +515,7 @@ float4 test_builtin_image_load_mip_2d_1(float4 v4f32, int i32, int8 vec8i32) {
return __builtin_amdgcn_image_load_mip_2d_v4f32_i32(100, i32, i32, i32, vec8i32, 120, 110);
}
+
// CHECK-LABEL: define dso_local <4 x half> @test_builtin_image_load_mip_2d_2(
// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
@@ -557,6 +568,7 @@ float test_builtin_image_load_mip_2darray(float f32, int i32, int8 vec8i32) {
return __builtin_amdgcn_image_load_mip_2darray_f32_i32(100, i32, i32, i32, i32, vec8i32, 120, 110);
}
+
// CHECK-LABEL: define dso_local <4 x float> @test_builtin_image_load_mip_2darray_1(
// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
@@ -583,6 +595,7 @@ float4 test_builtin_image_load_mip_2darray_1(float4 v4f32, int i32, int8 vec8i32
return __builtin_amdgcn_image_load_mip_2darray_v4f32_i32(100, i32, i32, i32, i32, vec8i32, 120, 110);
}
+
// CHECK-LABEL: define dso_local <4 x half> @test_builtin_image_load_mip_2darray_2(
// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
@@ -636,6 +649,7 @@ float4 test_builtin_image_load_mip_3d_1(float4 v4f32, int i32, int8 vec8i32) {
return __builtin_amdgcn_image_load_mip_3d_v4f32_i32(100, i32, i32, i32, i32, vec8i32, 120, 110);
}
+
// CHECK-LABEL: define dso_local <4 x half> @test_builtin_image_load_mip_3d_2(
// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
@@ -689,6 +703,7 @@ float4 test_builtin_image_load_mip_cube_1(float4 v4f32, int i32, int8 vec8i32) {
return __builtin_amdgcn_image_load_mip_cube_v4f32_i32(100, i32, i32, i32, i32, vec8i32, 120, 110);
}
+
// CHECK-LABEL: define dso_local <4 x half> @test_builtin_image_load_mip_cube_2(
// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
>From f27e89abd9d70d8af6eec2b8571a78843700ae17 Mon Sep 17 00:00:00 2001
From: ranapratap55 <RanaPratapReddy.Nimmakayala at amd.com>
Date: Tue, 9 Sep 2025 10:37:23 +0530
Subject: [PATCH 5/7] [AMDGPU] Extends builtin support for amdgcn_image_sample
and adds sema checking tests
---
clang/test/SemaOpenCL/builtins-image-load-param.cl | 1 +
1 file changed, 1 insertion(+)
diff --git a/clang/test/SemaOpenCL/builtins-image-load-param.cl b/clang/test/SemaOpenCL/builtins-image-load-param.cl
index 03afbde19a359..0da4831e847f7 100644
--- a/clang/test/SemaOpenCL/builtins-image-load-param.cl
+++ b/clang/test/SemaOpenCL/builtins-image-load-param.cl
@@ -21,6 +21,7 @@ half4 test_builtin_image_load_2d_2(half4 v4f16, int i32, int8 vec8i32) {
return __builtin_amdgcn_image_load_2d_v4f16_i32(100, i32, i32, vec8i32, 120, i32); //expected-error{{argument to '__builtin_amdgcn_image_load_2d_v4f16_i32' must be a constant integer}}
}
+
float test_builtin_image_load_2darray(float f32, int i32, int8 vec8i32) {
return __builtin_amdgcn_image_load_2darray_f32_i32(100, i32, i32, i32, vec8i32, i32, 110); //expected-error{{argument to '__builtin_amdgcn_image_load_2darray_f32_i32' must be a constant integer}}
>From 46c833f037c2973c2e15437973e6b98f6e8a8da8 Mon Sep 17 00:00:00 2001
From: ranapratap55 <RanaPratapReddy.Nimmakayala at amd.com>
Date: Tue, 7 Oct 2025 11:26:08 +0530
Subject: [PATCH 6/7] Using image desc as opaque ptr type for image
load/store/sample
---
clang/include/clang/Basic/BuiltinsAMDGPU.def | 169 ++---
clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp | 74 +-
clang/test/CodeGen/builtins-image-load.c | 715 +++++++++---------
clang/test/CodeGen/builtins-image-store.c | 477 ++++++------
.../builtins-image-load-param-gfx1100.cl | 194 +++++
.../builtins-image-load-param-gfx942.cl | 219 ++++++
.../SemaOpenCL/builtins-image-load-param.cl | 222 ------
.../builtins-image-store-param-gfx1100.cl | 129 ++++
.../builtins-image-store-param-gfx942.cl | 129 ++++
.../SemaOpenCL/builtins-image-store-param.cl | 145 ----
10 files changed, 1443 insertions(+), 1030 deletions(-)
create mode 100644 clang/test/SemaOpenCL/builtins-image-load-param-gfx1100.cl
create mode 100644 clang/test/SemaOpenCL/builtins-image-load-param-gfx942.cl
delete mode 100644 clang/test/SemaOpenCL/builtins-image-load-param.cl
create mode 100644 clang/test/SemaOpenCL/builtins-image-store-param-gfx1100.cl
create mode 100644 clang/test/SemaOpenCL/builtins-image-store-param-gfx942.cl
delete mode 100644 clang/test/SemaOpenCL/builtins-image-store-param.cl
diff --git a/clang/include/clang/Basic/BuiltinsAMDGPU.def b/clang/include/clang/Basic/BuiltinsAMDGPU.def
index 7202a69c1f2cf..8428fa97fe445 100644
--- a/clang/include/clang/Basic/BuiltinsAMDGPU.def
+++ b/clang/include/clang/Basic/BuiltinsAMDGPU.def
@@ -888,105 +888,76 @@ TARGET_BUILTIN(__builtin_amdgcn_cooperative_atomic_store_8x16B, "vV4i*V4iIicC*",
//===----------------------------------------------------------------------===//
// Image builtins
//===----------------------------------------------------------------------===//
-TARGET_BUILTIN(__builtin_amdgcn_image_load_1d_v4f32_i32, "V4fiiV8iii", "nc", "image-insts")
-TARGET_BUILTIN(__builtin_amdgcn_image_load_1d_v4f16_i32, "V4hiiV8iii", "nc", "image-insts")
-TARGET_BUILTIN(__builtin_amdgcn_image_load_1darray_v4f32_i32, "V4fiiiV8iii", "nc", "image-insts")
-TARGET_BUILTIN(__builtin_amdgcn_image_load_1darray_v4f16_i32, "V4hiiiV8iii", "nc", "image-insts")
-TARGET_BUILTIN(__builtin_amdgcn_image_load_2d_f32_i32, "fiiiV8iii", "nc", "image-insts")
-TARGET_BUILTIN(__builtin_amdgcn_image_load_2d_v4f32_i32, "V4fiiiV8iii", "nc", "image-insts")
-TARGET_BUILTIN(__builtin_amdgcn_image_load_2d_v4f16_i32, "V4hiiiV8iii", "nc", "image-insts")
-TARGET_BUILTIN(__builtin_amdgcn_image_load_2darray_f32_i32, "fiiiiV8iii", "nc", "image-insts")
-TARGET_BUILTIN(__builtin_amdgcn_image_load_2darray_v4f32_i32, "V4fiiiiV8iii", "nc", "image-insts")
-TARGET_BUILTIN(__builtin_amdgcn_image_load_2darray_v4f16_i32, "V4hiiiiV8iii", "nc", "image-insts")
-TARGET_BUILTIN(__builtin_amdgcn_image_load_3d_v4f32_i32, "V4fiiiiV8iii", "nc", "image-insts")
-TARGET_BUILTIN(__builtin_amdgcn_image_load_3d_v4f16_i32, "V4hiiiiV8iii", "nc", "image-insts")
-TARGET_BUILTIN(__builtin_amdgcn_image_load_cube_v4f32_i32, "V4fiiiiV8iii", "nc", "image-insts")
-TARGET_BUILTIN(__builtin_amdgcn_image_load_cube_v4f16_i32, "V4hiiiiV8iii", "nc", "image-insts")
-TARGET_BUILTIN(__builtin_amdgcn_image_load_mip_1d_v4f32_i32, "V4fiiiV8iii", "nc", "image-insts")
-TARGET_BUILTIN(__builtin_amdgcn_image_load_mip_1d_v4f16_i32, "V4hiiiV8iii", "nc", "image-insts")
-TARGET_BUILTIN(__builtin_amdgcn_image_load_mip_1darray_v4f32_i32, "V4fiiiiV8iii", "nc", "image-insts")
-TARGET_BUILTIN(__builtin_amdgcn_image_load_mip_1darray_v4f16_i32, "V4hiiiiV8iii", "nc", "image-insts")
-TARGET_BUILTIN(__builtin_amdgcn_image_load_mip_2d_f32_i32, "fiiiiV8iii", "nc", "image-insts")
-TARGET_BUILTIN(__builtin_amdgcn_image_load_mip_2d_v4f32_i32, "V4fiiiiV8iii", "nc", "image-insts")
-TARGET_BUILTIN(__builtin_amdgcn_image_load_mip_2d_v4f16_i32, "V4hiiiiV8iii", "nc", "image-insts")
-TARGET_BUILTIN(__builtin_amdgcn_image_load_mip_2darray_f32_i32, "fiiiiiV8iii", "nc", "image-insts")
-TARGET_BUILTIN(__builtin_amdgcn_image_load_mip_2darray_v4f32_i32, "V4fiiiiiV8iii", "nc", "image-insts")
-TARGET_BUILTIN(__builtin_amdgcn_image_load_mip_2darray_v4f16_i32, "V4hiiiiiV8iii", "nc", "image-insts")
-TARGET_BUILTIN(__builtin_amdgcn_image_load_mip_3d_v4f32_i32, "V4fiiiiiV8iii", "nc", "image-insts")
-TARGET_BUILTIN(__builtin_amdgcn_image_load_mip_3d_v4f16_i32, "V4hiiiiiV8iii", "nc", "image-insts")
-TARGET_BUILTIN(__builtin_amdgcn_image_load_mip_cube_v4f32_i32, "V4fiiiiiV8iii", "nc", "image-insts")
-TARGET_BUILTIN(__builtin_amdgcn_image_load_mip_cube_v4f16_i32, "V4hiiiiiV8iii", "nc", "image-insts")
-TARGET_BUILTIN(__builtin_amdgcn_image_store_1d_v4f32_i32, "vV4fiiV8iii", "nc", "image-insts")
-TARGET_BUILTIN(__builtin_amdgcn_image_store_1d_v4f16_i32, "vV4hiiV8iii", "nc", "image-insts")
-TARGET_BUILTIN(__builtin_amdgcn_image_store_1darray_v4f32_i32, "vV4fiiiV8iii", "nc", "image-insts")
-TARGET_BUILTIN(__builtin_amdgcn_image_store_1darray_v4f16_i32, "vV4hiiiV8iii", "nc", "image-insts")
-TARGET_BUILTIN(__builtin_amdgcn_image_store_2d_f32_i32, "vfiiiV8iii", "nc", "image-insts")
-TARGET_BUILTIN(__builtin_amdgcn_image_store_2d_v4f32_i32, "vV4fiiiV8iii", "nc", "image-insts")
-TARGET_BUILTIN(__builtin_amdgcn_image_store_2d_v4f16_i32, "vV4hiiiV8iii", "nc", "image-insts")
-TARGET_BUILTIN(__builtin_amdgcn_image_store_2darray_f32_i32, "vfiiiiV8iii", "nc", "image-insts")
-TARGET_BUILTIN(__builtin_amdgcn_image_store_2darray_v4f32_i32, "vV4fiiiiV8iii", "nc", "image-insts")
-TARGET_BUILTIN(__builtin_amdgcn_image_store_2darray_v4f16_i32, "vV4hiiiiV8iii", "nc", "image-insts")
-TARGET_BUILTIN(__builtin_amdgcn_image_store_3d_v4f32_i32, "vV4fiiiiV8iii", "nc", "image-insts")
-TARGET_BUILTIN(__builtin_amdgcn_image_store_3d_v4f16_i32, "vV4hiiiiV8iii", "nc", "image-insts")
-TARGET_BUILTIN(__builtin_amdgcn_image_store_cube_v4f32_i32, "vV4fiiiiV8iii", "nc", "image-insts")
-TARGET_BUILTIN(__builtin_amdgcn_image_store_cube_v4f16_i32, "vV4hiiiiV8iii", "nc", "image-insts")
-TARGET_BUILTIN(__builtin_amdgcn_image_store_mip_1d_v4f32_i32, "vV4fiiiV8iii", "nc", "image-insts")
-TARGET_BUILTIN(__builtin_amdgcn_image_store_mip_1d_v4f16_i32, "vV4hiiiV8iii", "nc", "image-insts")
-TARGET_BUILTIN(__builtin_amdgcn_image_store_mip_1darray_v4f32_i32, "vV4fiiiiV8iii", "nc", "image-insts")
-TARGET_BUILTIN(__builtin_amdgcn_image_store_mip_1darray_v4f16_i32, "vV4hiiiiV8iii", "nc", "image-insts")
-TARGET_BUILTIN(__builtin_amdgcn_image_store_mip_2d_f32_i32, "vfiiiiV8iii", "nc", "image-insts")
-TARGET_BUILTIN(__builtin_amdgcn_image_store_mip_2d_v4f32_i32, "vV4fiiiiV8iii", "nc", "image-insts")
-TARGET_BUILTIN(__builtin_amdgcn_image_store_mip_2d_v4f16_i32, "vV4hiiiiV8iii", "nc", "image-insts")
-TARGET_BUILTIN(__builtin_amdgcn_image_store_mip_2darray_f32_i32, "vfiiiiiV8iii", "nc", "image-insts")
-TARGET_BUILTIN(__builtin_amdgcn_image_store_mip_2darray_v4f32_i32, "vV4fiiiiiV8iii", "nc", "image-insts")
-TARGET_BUILTIN(__builtin_amdgcn_image_store_mip_2darray_v4f16_i32, "vV4hiiiiiV8iii", "nc", "image-insts")
-TARGET_BUILTIN(__builtin_amdgcn_image_store_mip_3d_v4f32_i32, "vV4fiiiiiV8iii", "nc", "image-insts")
-TARGET_BUILTIN(__builtin_amdgcn_image_store_mip_3d_v4f16_i32, "vV4hiiiiiV8iii", "nc", "image-insts")
-TARGET_BUILTIN(__builtin_amdgcn_image_store_mip_cube_v4f32_i32, "vV4fiiiiiV8iii", "nc", "image-insts")
-TARGET_BUILTIN(__builtin_amdgcn_image_store_mip_cube_v4f16_i32, "vV4hiiiiiV8iii", "nc", "image-insts")
-TARGET_BUILTIN(__builtin_amdgcn_image_sample_1d_v4f32_f32, "V4fifV8iV4ibii", "nc", "image-insts")
-TARGET_BUILTIN(__builtin_amdgcn_image_sample_1d_v4f16_f32, "V4hifV8iV4ibii", "nc", "image-insts")
-TARGET_BUILTIN(__builtin_amdgcn_image_sample_1darray_v4f32_f32, "V4fiffV8iV4ibii", "nc", "image-insts")
-TARGET_BUILTIN(__builtin_amdgcn_image_sample_1darray_v4f16_f32, "V4hiffV8iV4ibii", "nc", "image-insts")
-TARGET_BUILTIN(__builtin_amdgcn_image_sample_2d_f32_f32, "fiffV8iV4ibii", "nc", "image-insts")
-TARGET_BUILTIN(__builtin_amdgcn_image_sample_2d_v4f32_f32, "V4fiffV8iV4ibii", "nc", "image-insts")
-TARGET_BUILTIN(__builtin_amdgcn_image_sample_2d_v4f16_f32, "V4hiffV8iV4ibii", "nc", "image-insts")
-TARGET_BUILTIN(__builtin_amdgcn_image_sample_2darray_f32_f32, "fifffV8iV4ibii", "nc", "image-insts")
-TARGET_BUILTIN(__builtin_amdgcn_image_sample_2darray_v4f32_f32, "V4fifffV8iV4ibii", "nc", "image-insts")
-TARGET_BUILTIN(__builtin_amdgcn_image_sample_2darray_v4f16_f32, "V4hifffV8iV4ibii", "nc", "image-insts")
-TARGET_BUILTIN(__builtin_amdgcn_image_sample_3d_v4f32_f32, "V4fifffV8iV4ibii", "nc", "image-insts")
-TARGET_BUILTIN(__builtin_amdgcn_image_sample_3d_v4f16_f32, "V4hifffV8iV4ibii", "nc", "image-insts")
-TARGET_BUILTIN(__builtin_amdgcn_image_sample_cube_v4f32_f32, "V4fifffV8iV4ibii", "nc", "image-insts")
-TARGET_BUILTIN(__builtin_amdgcn_image_sample_cube_v4f16_f32, "V4hifffV8iV4ibii", "nc", "image-insts")
-
-TARGET_BUILTIN(__builtin_amdgcn_image_store_1d_v4f32_i32, "vV4fiiV8iii", "nc", "")
-TARGET_BUILTIN(__builtin_amdgcn_image_store_1d_v4f16_i32, "vV4hiiV8iii", "nc", "")
-TARGET_BUILTIN(__builtin_amdgcn_image_store_1darray_v4f32_i32, "vV4fiiiV8iii", "nc", "")
-TARGET_BUILTIN(__builtin_amdgcn_image_store_1darray_v4f16_i32, "vV4hiiiV8iii", "nc", "")
-TARGET_BUILTIN(__builtin_amdgcn_image_store_2d_f32_i32, "vfiiiV8iii", "nc", "")
-TARGET_BUILTIN(__builtin_amdgcn_image_store_2d_v4f32_i32, "vV4fiiiV8iii", "nc", "")
-TARGET_BUILTIN(__builtin_amdgcn_image_store_2d_v4f16_i32, "vV4hiiiV8iii", "nc", "")
-TARGET_BUILTIN(__builtin_amdgcn_image_store_2darray_f32_i32, "vfiiiiV8iii", "nc", "")
-TARGET_BUILTIN(__builtin_amdgcn_image_store_2darray_v4f32_i32, "vV4fiiiiV8iii", "nc", "")
-TARGET_BUILTIN(__builtin_amdgcn_image_store_2darray_v4f16_i32, "vV4hiiiiV8iii", "nc", "")
-TARGET_BUILTIN(__builtin_amdgcn_image_store_3d_v4f32_i32, "vV4fiiiiV8iii", "nc", "")
-TARGET_BUILTIN(__builtin_amdgcn_image_store_3d_v4f16_i32, "vV4hiiiiV8iii", "nc", "")
-TARGET_BUILTIN(__builtin_amdgcn_image_store_cube_v4f32_i32, "vV4fiiiiV8iii", "nc", "")
-TARGET_BUILTIN(__builtin_amdgcn_image_store_cube_v4f16_i32, "vV4hiiiiV8iii", "nc", "")
-TARGET_BUILTIN(__builtin_amdgcn_image_store_mip_1d_v4f32_i32, "vV4fiiiV8iii", "nc", "")
-TARGET_BUILTIN(__builtin_amdgcn_image_store_mip_1d_v4f16_i32, "vV4hiiiV8iii", "nc", "")
-TARGET_BUILTIN(__builtin_amdgcn_image_store_mip_1darray_v4f32_i32, "vV4fiiiiV8iii", "nc", "")
-TARGET_BUILTIN(__builtin_amdgcn_image_store_mip_1darray_v4f16_i32, "vV4hiiiiV8iii", "nc", "")
-TARGET_BUILTIN(__builtin_amdgcn_image_store_mip_2d_f32_i32, "vfiiiiV8iii", "nc", "")
-TARGET_BUILTIN(__builtin_amdgcn_image_store_mip_2d_v4f32_i32, "vV4fiiiiV8iii", "nc", "")
-TARGET_BUILTIN(__builtin_amdgcn_image_store_mip_2d_v4f16_i32, "vV4hiiiiV8iii", "nc", "")
-TARGET_BUILTIN(__builtin_amdgcn_image_store_mip_2darray_f32_i32, "vfiiiiiV8iii", "nc", "")
-TARGET_BUILTIN(__builtin_amdgcn_image_store_mip_2darray_v4f32_i32, "vV4fiiiiiV8iii", "nc", "")
-TARGET_BUILTIN(__builtin_amdgcn_image_store_mip_2darray_v4f16_i32, "vV4hiiiiiV8iii", "nc", "")
-TARGET_BUILTIN(__builtin_amdgcn_image_store_mip_3d_v4f32_i32, "vV4fiiiiiV8iii", "nc", "")
-TARGET_BUILTIN(__builtin_amdgcn_image_store_mip_3d_v4f16_i32, "vV4hiiiiiV8iii", "nc", "")
-TARGET_BUILTIN(__builtin_amdgcn_image_store_mip_cube_v4f32_i32, "vV4fiiiiiV8iii", "nc", "")
-TARGET_BUILTIN(__builtin_amdgcn_image_store_mip_cube_v4f16_i32, "vV4hiiiiiV8iii", "nc", "")
+TARGET_BUILTIN(__builtin_amdgcn_image_load_1d_v4f32_i32, "V4fiiQtii", "nc", "image-insts")
+TARGET_BUILTIN(__builtin_amdgcn_image_load_1d_v4f16_i32, "V4hiiQtii", "nc", "image-insts")
+TARGET_BUILTIN(__builtin_amdgcn_image_load_1darray_v4f32_i32, "V4fiiiQtii", "nc", "image-insts")
+TARGET_BUILTIN(__builtin_amdgcn_image_load_1darray_v4f16_i32, "V4hiiiQtii", "nc", "image-insts")
+TARGET_BUILTIN(__builtin_amdgcn_image_load_2d_f32_i32, "fiiiQtii", "nc", "image-insts")
+TARGET_BUILTIN(__builtin_amdgcn_image_load_2d_v4f32_i32, "V4fiiiQtii", "nc", "image-insts")
+TARGET_BUILTIN(__builtin_amdgcn_image_load_2d_v4f16_i32, "V4hiiiQtii", "nc", "image-insts")
+TARGET_BUILTIN(__builtin_amdgcn_image_load_2darray_f32_i32, "fiiiiQtii", "nc", "image-insts")
+TARGET_BUILTIN(__builtin_amdgcn_image_load_2darray_v4f32_i32, "V4fiiiiQtii", "nc", "image-insts")
+TARGET_BUILTIN(__builtin_amdgcn_image_load_2darray_v4f16_i32, "V4hiiiiQtii", "nc", "image-insts")
+TARGET_BUILTIN(__builtin_amdgcn_image_load_3d_v4f32_i32, "V4fiiiiQtii", "nc", "image-insts")
+TARGET_BUILTIN(__builtin_amdgcn_image_load_3d_v4f16_i32, "V4hiiiiQtii", "nc", "image-insts")
+TARGET_BUILTIN(__builtin_amdgcn_image_load_cube_v4f32_i32, "V4fiiiiQtii", "nc", "image-insts")
+TARGET_BUILTIN(__builtin_amdgcn_image_load_cube_v4f16_i32, "V4hiiiiQtii", "nc", "image-insts")
+TARGET_BUILTIN(__builtin_amdgcn_image_load_mip_1d_v4f32_i32, "V4fiiiQtii", "nc", "image-insts")
+TARGET_BUILTIN(__builtin_amdgcn_image_load_mip_1d_v4f16_i32, "V4hiiiQtii", "nc", "image-insts")
+TARGET_BUILTIN(__builtin_amdgcn_image_load_mip_1darray_v4f32_i32, "V4fiiiiQtii", "nc", "image-insts")
+TARGET_BUILTIN(__builtin_amdgcn_image_load_mip_1darray_v4f16_i32, "V4hiiiiQtii", "nc", "image-insts")
+TARGET_BUILTIN(__builtin_amdgcn_image_load_mip_2d_f32_i32, "fiiiiQtii", "nc", "image-insts")
+TARGET_BUILTIN(__builtin_amdgcn_image_load_mip_2d_v4f32_i32, "V4fiiiiQtii", "nc", "image-insts")
+TARGET_BUILTIN(__builtin_amdgcn_image_load_mip_2d_v4f16_i32, "V4hiiiiQtii", "nc", "image-insts")
+TARGET_BUILTIN(__builtin_amdgcn_image_load_mip_2darray_f32_i32, "fiiiiiQtii", "nc", "image-insts")
+TARGET_BUILTIN(__builtin_amdgcn_image_load_mip_2darray_v4f32_i32, "V4fiiiiiQtii", "nc", "image-insts")
+TARGET_BUILTIN(__builtin_amdgcn_image_load_mip_2darray_v4f16_i32, "V4hiiiiiQtii", "nc", "image-insts")
+TARGET_BUILTIN(__builtin_amdgcn_image_load_mip_3d_v4f32_i32, "V4fiiiiiQtii", "nc", "image-insts")
+TARGET_BUILTIN(__builtin_amdgcn_image_load_mip_3d_v4f16_i32, "V4hiiiiiQtii", "nc", "image-insts")
+TARGET_BUILTIN(__builtin_amdgcn_image_load_mip_cube_v4f32_i32, "V4fiiiiiQtii", "nc", "image-insts")
+TARGET_BUILTIN(__builtin_amdgcn_image_load_mip_cube_v4f16_i32, "V4hiiiiiQtii", "nc", "image-insts")
+TARGET_BUILTIN(__builtin_amdgcn_image_store_1d_v4f32_i32, "vV4fiiQtii", "nc", "image-insts")
+TARGET_BUILTIN(__builtin_amdgcn_image_store_1d_v4f16_i32, "vV4hiiQtii", "nc", "image-insts")
+TARGET_BUILTIN(__builtin_amdgcn_image_store_1darray_v4f32_i32, "vV4fiiiQtii", "nc", "image-insts")
+TARGET_BUILTIN(__builtin_amdgcn_image_store_1darray_v4f16_i32, "vV4hiiiQtii", "nc", "image-insts")
+TARGET_BUILTIN(__builtin_amdgcn_image_store_2d_f32_i32, "vfiiiQtii", "nc", "image-insts")
+TARGET_BUILTIN(__builtin_amdgcn_image_store_2d_v4f32_i32, "vV4fiiiQtii", "nc", "image-insts")
+TARGET_BUILTIN(__builtin_amdgcn_image_store_2d_v4f16_i32, "vV4hiiiQtii", "nc", "image-insts")
+TARGET_BUILTIN(__builtin_amdgcn_image_store_2darray_f32_i32, "vfiiiiQtii", "nc", "image-insts")
+TARGET_BUILTIN(__builtin_amdgcn_image_store_2darray_v4f32_i32, "vV4fiiiiQtii", "nc", "image-insts")
+TARGET_BUILTIN(__builtin_amdgcn_image_store_2darray_v4f16_i32, "vV4hiiiiQtii", "nc", "image-insts")
+TARGET_BUILTIN(__builtin_amdgcn_image_store_3d_v4f32_i32, "vV4fiiiiQtii", "nc", "image-insts")
+TARGET_BUILTIN(__builtin_amdgcn_image_store_3d_v4f16_i32, "vV4hiiiiQtii", "nc", "image-insts")
+TARGET_BUILTIN(__builtin_amdgcn_image_store_cube_v4f32_i32, "vV4fiiiiQtii", "nc", "image-insts")
+TARGET_BUILTIN(__builtin_amdgcn_image_store_cube_v4f16_i32, "vV4hiiiiQtii", "nc", "image-insts")
+TARGET_BUILTIN(__builtin_amdgcn_image_store_mip_1d_v4f32_i32, "vV4fiiiQtii", "nc", "image-insts")
+TARGET_BUILTIN(__builtin_amdgcn_image_store_mip_1d_v4f16_i32, "vV4hiiiQtii", "nc", "image-insts")
+TARGET_BUILTIN(__builtin_amdgcn_image_store_mip_1darray_v4f32_i32, "vV4fiiiiQtii", "nc", "image-insts")
+TARGET_BUILTIN(__builtin_amdgcn_image_store_mip_1darray_v4f16_i32, "vV4hiiiiQtii", "nc", "image-insts")
+TARGET_BUILTIN(__builtin_amdgcn_image_store_mip_2d_f32_i32, "vfiiiiQtii", "nc", "image-insts")
+TARGET_BUILTIN(__builtin_amdgcn_image_store_mip_2d_v4f32_i32, "vV4fiiiiQtii", "nc", "image-insts")
+TARGET_BUILTIN(__builtin_amdgcn_image_store_mip_2d_v4f16_i32, "vV4hiiiiQtii", "nc", "image-insts")
+TARGET_BUILTIN(__builtin_amdgcn_image_store_mip_2darray_f32_i32, "vfiiiiiQtii", "nc", "image-insts")
+TARGET_BUILTIN(__builtin_amdgcn_image_store_mip_2darray_v4f32_i32, "vV4fiiiiiQtii", "nc", "image-insts")
+TARGET_BUILTIN(__builtin_amdgcn_image_store_mip_2darray_v4f16_i32, "vV4hiiiiiQtii", "nc", "image-insts")
+TARGET_BUILTIN(__builtin_amdgcn_image_store_mip_3d_v4f32_i32, "vV4fiiiiiQtii", "nc", "image-insts")
+TARGET_BUILTIN(__builtin_amdgcn_image_store_mip_3d_v4f16_i32, "vV4hiiiiiQtii", "nc", "image-insts")
+TARGET_BUILTIN(__builtin_amdgcn_image_store_mip_cube_v4f32_i32, "vV4fiiiiiQtii", "nc", "image-insts")
+TARGET_BUILTIN(__builtin_amdgcn_image_store_mip_cube_v4f16_i32, "vV4hiiiiiQtii", "nc", "image-insts")
+TARGET_BUILTIN(__builtin_amdgcn_image_sample_1d_v4f32_f32, "V4fifQtV4ibii", "nc", "image-insts")
+TARGET_BUILTIN(__builtin_amdgcn_image_sample_1d_v4f16_f32, "V4hifQtV4ibii", "nc", "image-insts")
+TARGET_BUILTIN(__builtin_amdgcn_image_sample_1darray_v4f32_f32, "V4fiffQtV4ibii", "nc", "image-insts")
+TARGET_BUILTIN(__builtin_amdgcn_image_sample_1darray_v4f16_f32, "V4hiffQtV4ibii", "nc", "image-insts")
+TARGET_BUILTIN(__builtin_amdgcn_image_sample_2d_f32_f32, "fiffQtV4ibii", "nc", "image-insts")
+TARGET_BUILTIN(__builtin_amdgcn_image_sample_2d_v4f32_f32, "V4fiffQtV4ibii", "nc", "image-insts")
+TARGET_BUILTIN(__builtin_amdgcn_image_sample_2d_v4f16_f32, "V4hiffQtV4ibii", "nc", "image-insts")
+TARGET_BUILTIN(__builtin_amdgcn_image_sample_2darray_f32_f32, "fifffQtV4ibii", "nc", "image-insts")
+TARGET_BUILTIN(__builtin_amdgcn_image_sample_2darray_v4f32_f32, "V4fifffQtV4ibii", "nc", "image-insts")
+TARGET_BUILTIN(__builtin_amdgcn_image_sample_2darray_v4f16_f32, "V4hifffQtV4ibii", "nc", "image-insts")
+TARGET_BUILTIN(__builtin_amdgcn_image_sample_3d_v4f32_f32, "V4fifffQtV4ibii", "nc", "image-insts")
+TARGET_BUILTIN(__builtin_amdgcn_image_sample_3d_v4f16_f32, "V4hifffQtV4ibii", "nc", "image-insts")
+TARGET_BUILTIN(__builtin_amdgcn_image_sample_cube_v4f32_f32, "V4fifffQtV4ibii", "nc", "image-insts")
+TARGET_BUILTIN(__builtin_amdgcn_image_sample_cube_v4f16_f32, "V4hifffQtV4ibii", "nc", "image-insts")
#undef BUILTIN
#undef TARGET_BUILTIN
diff --git a/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp b/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp
index 47a975ca78df4..84119b7b4a768 100644
--- a/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp
+++ b/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp
@@ -184,13 +184,83 @@ static Value *EmitAMDGCNBallotForExec(CodeGenFunction &CGF, const CallExpr *E,
return Call;
}
+static bool IsImageSampleBuiltIn(unsigned BuiltinID) {
+ switch (BuiltinID) {
+ case clang::AMDGPU::BI__builtin_amdgcn_image_sample_1d_v4f32_f32:
+ case clang::AMDGPU::BI__builtin_amdgcn_image_sample_1d_v4f16_f32:
+ case clang::AMDGPU::BI__builtin_amdgcn_image_sample_1darray_v4f32_f32:
+ case clang::AMDGPU::BI__builtin_amdgcn_image_sample_1darray_v4f16_f32:
+ case clang::AMDGPU::BI__builtin_amdgcn_image_sample_2d_f32_f32:
+ case clang::AMDGPU::BI__builtin_amdgcn_image_sample_2d_v4f32_f32:
+ case clang::AMDGPU::BI__builtin_amdgcn_image_sample_2d_v4f16_f32:
+ case clang::AMDGPU::BI__builtin_amdgcn_image_sample_2darray_f32_f32:
+ case clang::AMDGPU::BI__builtin_amdgcn_image_sample_2darray_v4f32_f32:
+ case clang::AMDGPU::BI__builtin_amdgcn_image_sample_2darray_v4f16_f32:
+ case clang::AMDGPU::BI__builtin_amdgcn_image_sample_3d_v4f32_f32:
+ case clang::AMDGPU::BI__builtin_amdgcn_image_sample_3d_v4f16_f32:
+ case clang::AMDGPU::BI__builtin_amdgcn_image_sample_cube_v4f32_f32:
+ case clang::AMDGPU::BI__builtin_amdgcn_image_sample_cube_v4f16_f32:
+ return true;
+ default:
+ return false;
+ }
+}
+
+static llvm::Value *LoadTextureDescPtorAsVec8I32(CodeGenFunction &CGF,
+ llvm::Value *RsrcPtr) {
+ auto &B = CGF.Builder;
+ auto *VecTy = llvm::FixedVectorType::get(B.getInt32Ty(), 8);
+
+ if (RsrcPtr->getType() == VecTy)
+ return RsrcPtr;
+
+ if (RsrcPtr->getType()->isIntegerTy(32)) {
+ unsigned AS = 8;
+ llvm::PointerType *VecPtrTy = llvm::PointerType::get(VecTy, AS);
+ llvm::Value *Ptr =
+ B.CreateIntToPtr(RsrcPtr, VecPtrTy, "tex.rsrc.from.int");
+ return B.CreateAlignedLoad(VecTy, Ptr, llvm::Align(32), "tex.rsrc.val");
+ }
+
+ if (RsrcPtr->getType()->isPointerTy()) {
+ unsigned AS = RsrcPtr->getType()->getPointerAddressSpace();
+ auto *VecPtrTy = llvm::PointerType::get(VecTy, AS);
+ llvm::Value *Typed = B.CreateBitCast(RsrcPtr, VecPtrTy, "tex.rsrc.typed");
+ return B.CreateAlignedLoad(VecTy, Typed, llvm::Align(32), "tex.rsrc.val");
+ }
+
+ const auto &DL = CGF.CGM.getDataLayout();
+ if (DL.getTypeSizeInBits(RsrcPtr->getType()) == 256)
+ return B.CreateBitCast(RsrcPtr, VecTy, "tex.rsrc.val");
+
+ RsrcPtr->getType()->print(llvm::errs());
+ llvm::report_fatal_error(": Unexpected texture resource argument form");
+}
+
+static unsigned GetTextureDescIndex(unsigned BuiltinID, const CallExpr *E) {
+ unsigned N = E->getNumArgs();
+ if (IsImageSampleBuiltIn(BuiltinID)) {
+ if (N < 5) return (unsigned)-1;
+ return N - 5;
+ }
+
+ if (N < 3) return (unsigned)-1;
+ return N - 3;
+}
+
llvm::CallInst *EmitAMDGCNImageOverloadedReturnType(clang::CodeGen::CodeGenFunction &CGF,
const clang::CallExpr *E,
unsigned IntrinsicID,
bool IsImageStore) {
clang::SmallVector<llvm::Value *, 10> Args;
- for (unsigned I = 0; I < E->getNumArgs(); ++I)
- Args.push_back(CGF.EmitScalarExpr(E->getArg(I)));
+ unsigned RsrcIndex = GetTextureDescIndex(E->getBuiltinCallee(), E);
+
+ for (unsigned I = 0; I < E->getNumArgs(); ++I){
+ llvm::Value *V = CGF.EmitScalarExpr(E->getArg(I));
+ if (I == RsrcIndex)
+ V = LoadTextureDescPtorAsVec8I32(CGF, V);
+ Args.push_back(V);
+ }
llvm::Type *RetTy = CGF.ConvertType(E->getType());
if (IsImageStore)
diff --git a/clang/test/CodeGen/builtins-image-load.c b/clang/test/CodeGen/builtins-image-load.c
index 67548a567723e..8442124416338 100644
--- a/clang/test/CodeGen/builtins-image-load.c
+++ b/clang/test/CodeGen/builtins-image-load.c
@@ -1,1162 +1,1203 @@
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5
// RUN: %clang_cc1 -triple amdgcn-- -target-cpu gfx1100 %s -emit-llvm -o - | FileCheck %s
-typedef int int8 __attribute__((ext_vector_type(8)));
typedef int int4 __attribute__((ext_vector_type(4)));
typedef float float4 __attribute__((ext_vector_type(4)));
typedef _Float16 half;
typedef half half4 __attribute__((ext_vector_type(4)));
// CHECK-LABEL: define dso_local float @test_builtin_image_load_2d(
-// CHECK-SAME: float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0:[0-9]+]] {
+// CHECK-SAME: float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]]) #[[ATTR0:[0-9]+]] {
// CHECK-NEXT: [[ENTRY:.*:]]
// CHECK-NEXT: [[RETVAL:%.*]] = alloca float, align 4, addrspace(5)
// CHECK-NEXT: [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5)
// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
-// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
+// CHECK-NEXT: [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5)
// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
// CHECK-NEXT: [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr
// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
-// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr
+// CHECK-NEXT: [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr
// CHECK-NEXT: store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4
// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
-// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32
// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
-// CHECK-NEXT: [[TMP2:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32
-// CHECK-NEXT: [[TMP3:%.*]] = call float @llvm.amdgcn.image.load.2d.f32.i32.v8i32(i32 12, i32 [[TMP0]], i32 [[TMP1]], <8 x i32> [[TMP2]], i32 106, i32 103)
+// CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32
+// CHECK-NEXT: [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP2]], align 32
+// CHECK-NEXT: [[TMP3:%.*]] = call float @llvm.amdgcn.image.load.2d.f32.i32.v8i32(i32 12, i32 [[TMP0]], i32 [[TMP1]], <8 x i32> [[TEX_RSRC_VAL]], i32 106, i32 103)
// CHECK-NEXT: ret float [[TMP3]]
//
-float test_builtin_image_load_2d(float f32, int i32, int8 vec8i32) {
+float test_builtin_image_load_2d(float f32, int i32, __amdgpu_texture_t tex) {
- return __builtin_amdgcn_image_load_2d_f32_i32(12, i32, i32, vec8i32, 106, 103);
+ return __builtin_amdgcn_image_load_2d_f32_i32(12, i32, i32, tex, 106, 103);
}
// CHECK-LABEL: define dso_local <4 x float> @test_builtin_image_load_2d_1(
-// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] {
+// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x float>, align 16, addrspace(5)
// CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5)
// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
-// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
+// CHECK-NEXT: [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5)
// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
// CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr
// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
-// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr
+// CHECK-NEXT: [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr
// CHECK-NEXT: store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16
// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
-// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32
// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
-// CHECK-NEXT: [[TMP2:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32
-// CHECK-NEXT: [[TMP3:%.*]] = call <4 x float> @llvm.amdgcn.image.load.2d.v4f32.i32.v8i32(i32 100, i32 [[TMP0]], i32 [[TMP1]], <8 x i32> [[TMP2]], i32 120, i32 110)
+// CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32
+// CHECK-NEXT: [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP2]], align 32
+// CHECK-NEXT: [[TMP3:%.*]] = call <4 x float> @llvm.amdgcn.image.load.2d.v4f32.i32.v8i32(i32 100, i32 [[TMP0]], i32 [[TMP1]], <8 x i32> [[TEX_RSRC_VAL]], i32 120, i32 110)
// CHECK-NEXT: ret <4 x float> [[TMP3]]
//
-float4 test_builtin_image_load_2d_1(float4 v4f32, int i32, int8 vec8i32) {
+float4 test_builtin_image_load_2d_1(float4 v4f32, int i32, __amdgpu_texture_t tex) {
- return __builtin_amdgcn_image_load_2d_v4f32_i32(100, i32, i32, vec8i32, 120, 110);
+ return __builtin_amdgcn_image_load_2d_v4f32_i32(100, i32, i32, tex, 120, 110);
}
// CHECK-LABEL: define dso_local <4 x half> @test_builtin_image_load_2d_2(
-// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] {
+// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x half>, align 8, addrspace(5)
// CHECK-NEXT: [[V4F16_ADDR:%.*]] = alloca <4 x half>, align 8, addrspace(5)
// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
-// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
+// CHECK-NEXT: [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5)
// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
// CHECK-NEXT: [[V4F16_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F16_ADDR]] to ptr
// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
-// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr
+// CHECK-NEXT: [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr
// CHECK-NEXT: store <4 x half> [[V4F16]], ptr [[V4F16_ADDR_ASCAST]], align 8
// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
-// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32
// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
-// CHECK-NEXT: [[TMP2:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32
-// CHECK-NEXT: [[TMP3:%.*]] = call <4 x half> @llvm.amdgcn.image.load.2d.v4f16.i32.v8i32(i32 100, i32 [[TMP0]], i32 [[TMP1]], <8 x i32> [[TMP2]], i32 120, i32 110)
+// CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32
+// CHECK-NEXT: [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP2]], align 32
+// CHECK-NEXT: [[TMP3:%.*]] = call <4 x half> @llvm.amdgcn.image.load.2d.v4f16.i32.v8i32(i32 100, i32 [[TMP0]], i32 [[TMP1]], <8 x i32> [[TEX_RSRC_VAL]], i32 120, i32 110)
// CHECK-NEXT: ret <4 x half> [[TMP3]]
//
-half4 test_builtin_image_load_2d_2(half4 v4f16, int i32, int8 vec8i32) {
+half4 test_builtin_image_load_2d_2(half4 v4f16, int i32, __amdgpu_texture_t tex) {
- return __builtin_amdgcn_image_load_2d_v4f16_i32(100, i32, i32, vec8i32, 120, 110);
+ return __builtin_amdgcn_image_load_2d_v4f16_i32(100, i32, i32, tex, 120, 110);
}
// CHECK-LABEL: define dso_local float @test_builtin_image_load_2darray(
-// CHECK-SAME: float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] {
+// CHECK-SAME: float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
// CHECK-NEXT: [[RETVAL:%.*]] = alloca float, align 4, addrspace(5)
// CHECK-NEXT: [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5)
// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
-// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
+// CHECK-NEXT: [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5)
// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
// CHECK-NEXT: [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr
// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
-// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr
+// CHECK-NEXT: [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr
// CHECK-NEXT: store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4
// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
-// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32
// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
-// CHECK-NEXT: [[TMP3:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32
-// CHECK-NEXT: [[TMP4:%.*]] = call float @llvm.amdgcn.image.load.2darray.f32.i32.v8i32(i32 100, i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]], <8 x i32> [[TMP3]], i32 120, i32 110)
+// CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32
+// CHECK-NEXT: [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP3]], align 32
+// CHECK-NEXT: [[TMP4:%.*]] = call float @llvm.amdgcn.image.load.2darray.f32.i32.v8i32(i32 100, i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]], <8 x i32> [[TEX_RSRC_VAL]], i32 120, i32 110)
// CHECK-NEXT: ret float [[TMP4]]
//
-float test_builtin_image_load_2darray(float f32, int i32, int8 vec8i32) {
+float test_builtin_image_load_2darray(float f32, int i32, __amdgpu_texture_t tex) {
- return __builtin_amdgcn_image_load_2darray_f32_i32(100, i32, i32, i32, vec8i32, 120, 110);
+ return __builtin_amdgcn_image_load_2darray_f32_i32(100, i32, i32, i32, tex, 120, 110);
}
// CHECK-LABEL: define dso_local <4 x float> @test_builtin_image_load_2darray_1(
-// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] {
+// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x float>, align 16, addrspace(5)
// CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5)
// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
-// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
+// CHECK-NEXT: [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5)
// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
// CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr
// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
-// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr
+// CHECK-NEXT: [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr
// CHECK-NEXT: store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16
// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
-// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32
// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
-// CHECK-NEXT: [[TMP3:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32
-// CHECK-NEXT: [[TMP4:%.*]] = call <4 x float> @llvm.amdgcn.image.load.2darray.v4f32.i32.v8i32(i32 100, i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]], <8 x i32> [[TMP3]], i32 120, i32 110)
+// CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32
+// CHECK-NEXT: [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP3]], align 32
+// CHECK-NEXT: [[TMP4:%.*]] = call <4 x float> @llvm.amdgcn.image.load.2darray.v4f32.i32.v8i32(i32 100, i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]], <8 x i32> [[TEX_RSRC_VAL]], i32 120, i32 110)
// CHECK-NEXT: ret <4 x float> [[TMP4]]
//
-float4 test_builtin_image_load_2darray_1(float4 v4f32, int i32, int8 vec8i32) {
+float4 test_builtin_image_load_2darray_1(float4 v4f32, int i32, __amdgpu_texture_t tex) {
- return __builtin_amdgcn_image_load_2darray_v4f32_i32(100, i32, i32, i32, vec8i32, 120, 110);
+ return __builtin_amdgcn_image_load_2darray_v4f32_i32(100, i32, i32, i32, tex, 120, 110);
}
// CHECK-LABEL: define dso_local <4 x half> @test_builtin_image_load_2darray_2(
-// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] {
+// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x half>, align 8, addrspace(5)
// CHECK-NEXT: [[V4F16_ADDR:%.*]] = alloca <4 x half>, align 8, addrspace(5)
// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
-// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
+// CHECK-NEXT: [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5)
// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
// CHECK-NEXT: [[V4F16_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F16_ADDR]] to ptr
// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
-// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr
+// CHECK-NEXT: [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr
// CHECK-NEXT: store <4 x half> [[V4F16]], ptr [[V4F16_ADDR_ASCAST]], align 8
// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
-// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32
// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
-// CHECK-NEXT: [[TMP3:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32
-// CHECK-NEXT: [[TMP4:%.*]] = call <4 x half> @llvm.amdgcn.image.load.2darray.v4f16.i32.v8i32(i32 100, i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]], <8 x i32> [[TMP3]], i32 120, i32 110)
+// CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32
+// CHECK-NEXT: [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP3]], align 32
+// CHECK-NEXT: [[TMP4:%.*]] = call <4 x half> @llvm.amdgcn.image.load.2darray.v4f16.i32.v8i32(i32 100, i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]], <8 x i32> [[TEX_RSRC_VAL]], i32 120, i32 110)
// CHECK-NEXT: ret <4 x half> [[TMP4]]
//
-half4 test_builtin_image_load_2darray_2(half4 v4f16, int i32, int8 vec8i32) {
+half4 test_builtin_image_load_2darray_2(half4 v4f16, int i32, __amdgpu_texture_t tex) {
- return __builtin_amdgcn_image_load_2darray_v4f16_i32(100, i32, i32, i32, vec8i32, 120, 110);
+ return __builtin_amdgcn_image_load_2darray_v4f16_i32(100, i32, i32, i32, tex, 120, 110);
}
// CHECK-LABEL: define dso_local <4 x float> @test_builtin_image_load_1d_1(
-// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] {
+// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x float>, align 16, addrspace(5)
// CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5)
// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
-// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
+// CHECK-NEXT: [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5)
// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
// CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr
// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
-// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr
+// CHECK-NEXT: [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr
// CHECK-NEXT: store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16
// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
-// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32
// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
-// CHECK-NEXT: [[TMP1:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32
-// CHECK-NEXT: [[TMP2:%.*]] = call <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i32.v8i32(i32 100, i32 [[TMP0]], <8 x i32> [[TMP1]], i32 120, i32 110)
+// CHECK-NEXT: [[TMP1:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32
+// CHECK-NEXT: [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32
+// CHECK-NEXT: [[TMP2:%.*]] = call <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i32.v8i32(i32 100, i32 [[TMP0]], <8 x i32> [[TEX_RSRC_VAL]], i32 120, i32 110)
// CHECK-NEXT: ret <4 x float> [[TMP2]]
//
-float4 test_builtin_image_load_1d_1(float4 v4f32, int i32, int8 vec8i32) {
+float4 test_builtin_image_load_1d_1(float4 v4f32, int i32, __amdgpu_texture_t tex) {
- return __builtin_amdgcn_image_load_1d_v4f32_i32(100, i32, vec8i32, 120, 110);
+ return __builtin_amdgcn_image_load_1d_v4f32_i32(100, i32, tex, 120, 110);
}
// CHECK-LABEL: define dso_local <4 x half> @test_builtin_image_load_1d_2(
-// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] {
+// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x half>, align 8, addrspace(5)
// CHECK-NEXT: [[V4F16_ADDR:%.*]] = alloca <4 x half>, align 8, addrspace(5)
// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
-// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
+// CHECK-NEXT: [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5)
// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
// CHECK-NEXT: [[V4F16_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F16_ADDR]] to ptr
// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
-// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr
+// CHECK-NEXT: [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr
// CHECK-NEXT: store <4 x half> [[V4F16]], ptr [[V4F16_ADDR_ASCAST]], align 8
// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
-// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32
// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
-// CHECK-NEXT: [[TMP1:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32
-// CHECK-NEXT: [[TMP2:%.*]] = call <4 x half> @llvm.amdgcn.image.load.1d.v4f16.i32.v8i32(i32 100, i32 [[TMP0]], <8 x i32> [[TMP1]], i32 120, i32 110)
+// CHECK-NEXT: [[TMP1:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32
+// CHECK-NEXT: [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32
+// CHECK-NEXT: [[TMP2:%.*]] = call <4 x half> @llvm.amdgcn.image.load.1d.v4f16.i32.v8i32(i32 100, i32 [[TMP0]], <8 x i32> [[TEX_RSRC_VAL]], i32 120, i32 110)
// CHECK-NEXT: ret <4 x half> [[TMP2]]
//
-half4 test_builtin_image_load_1d_2(half4 v4f16, int i32, int8 vec8i32) {
+half4 test_builtin_image_load_1d_2(half4 v4f16, int i32, __amdgpu_texture_t tex) {
- return __builtin_amdgcn_image_load_1d_v4f16_i32(100, i32, vec8i32, 120, 110);
+ return __builtin_amdgcn_image_load_1d_v4f16_i32(100, i32, tex, 120, 110);
}
// CHECK-LABEL: define dso_local <4 x float> @test_builtin_image_load_1darray_1(
-// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] {
+// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x float>, align 16, addrspace(5)
// CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5)
// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
-// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
+// CHECK-NEXT: [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5)
// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
// CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr
// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
-// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr
+// CHECK-NEXT: [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr
// CHECK-NEXT: store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16
// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
-// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32
// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
-// CHECK-NEXT: [[TMP2:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32
-// CHECK-NEXT: [[TMP3:%.*]] = call <4 x float> @llvm.amdgcn.image.load.1darray.v4f32.i32.v8i32(i32 100, i32 [[TMP0]], i32 [[TMP1]], <8 x i32> [[TMP2]], i32 120, i32 110)
+// CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32
+// CHECK-NEXT: [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP2]], align 32
+// CHECK-NEXT: [[TMP3:%.*]] = call <4 x float> @llvm.amdgcn.image.load.1darray.v4f32.i32.v8i32(i32 100, i32 [[TMP0]], i32 [[TMP1]], <8 x i32> [[TEX_RSRC_VAL]], i32 120, i32 110)
// CHECK-NEXT: ret <4 x float> [[TMP3]]
//
-float4 test_builtin_image_load_1darray_1(float4 v4f32, int i32, int8 vec8i32) {
+float4 test_builtin_image_load_1darray_1(float4 v4f32, int i32, __amdgpu_texture_t tex) {
- return __builtin_amdgcn_image_load_1darray_v4f32_i32(100, i32, i32, vec8i32, 120, 110);
+ return __builtin_amdgcn_image_load_1darray_v4f32_i32(100, i32, i32, tex, 120, 110);
}
// CHECK-LABEL: define dso_local <4 x half> @test_builtin_image_load_1darray_2(
-// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] {
+// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x half>, align 8, addrspace(5)
// CHECK-NEXT: [[V4F16_ADDR:%.*]] = alloca <4 x half>, align 8, addrspace(5)
// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
-// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
+// CHECK-NEXT: [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5)
// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
// CHECK-NEXT: [[V4F16_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F16_ADDR]] to ptr
// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
-// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr
+// CHECK-NEXT: [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr
// CHECK-NEXT: store <4 x half> [[V4F16]], ptr [[V4F16_ADDR_ASCAST]], align 8
// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
-// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32
// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
-// CHECK-NEXT: [[TMP2:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32
-// CHECK-NEXT: [[TMP3:%.*]] = call <4 x half> @llvm.amdgcn.image.load.1darray.v4f16.i32.v8i32(i32 100, i32 [[TMP0]], i32 [[TMP1]], <8 x i32> [[TMP2]], i32 120, i32 110)
+// CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32
+// CHECK-NEXT: [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP2]], align 32
+// CHECK-NEXT: [[TMP3:%.*]] = call <4 x half> @llvm.amdgcn.image.load.1darray.v4f16.i32.v8i32(i32 100, i32 [[TMP0]], i32 [[TMP1]], <8 x i32> [[TEX_RSRC_VAL]], i32 120, i32 110)
// CHECK-NEXT: ret <4 x half> [[TMP3]]
//
-half4 test_builtin_image_load_1darray_2(half4 v4f16, int i32, int8 vec8i32) {
+half4 test_builtin_image_load_1darray_2(half4 v4f16, int i32, __amdgpu_texture_t tex) {
- return __builtin_amdgcn_image_load_1darray_v4f16_i32(100, i32, i32, vec8i32, 120, 110);
+ return __builtin_amdgcn_image_load_1darray_v4f16_i32(100, i32, i32, tex, 120, 110);
}
// CHECK-LABEL: define dso_local <4 x float> @test_builtin_image_load_3d_1(
-// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] {
+// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x float>, align 16, addrspace(5)
// CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5)
// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
-// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
+// CHECK-NEXT: [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5)
// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
// CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr
// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
-// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr
+// CHECK-NEXT: [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr
// CHECK-NEXT: store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16
// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
-// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32
// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
-// CHECK-NEXT: [[TMP3:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32
-// CHECK-NEXT: [[TMP4:%.*]] = call <4 x float> @llvm.amdgcn.image.load.3d.v4f32.i32.v8i32(i32 100, i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]], <8 x i32> [[TMP3]], i32 120, i32 110)
+// CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32
+// CHECK-NEXT: [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP3]], align 32
+// CHECK-NEXT: [[TMP4:%.*]] = call <4 x float> @llvm.amdgcn.image.load.3d.v4f32.i32.v8i32(i32 100, i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]], <8 x i32> [[TEX_RSRC_VAL]], i32 120, i32 110)
// CHECK-NEXT: ret <4 x float> [[TMP4]]
//
-float4 test_builtin_image_load_3d_1(float4 v4f32, int i32, int8 vec8i32) {
+float4 test_builtin_image_load_3d_1(float4 v4f32, int i32, __amdgpu_texture_t tex) {
- return __builtin_amdgcn_image_load_3d_v4f32_i32(100, i32, i32, i32, vec8i32, 120, 110);
+ return __builtin_amdgcn_image_load_3d_v4f32_i32(100, i32, i32, i32, tex, 120, 110);
}
// CHECK-LABEL: define dso_local <4 x half> @test_builtin_image_load_3d_2(
-// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] {
+// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x half>, align 8, addrspace(5)
// CHECK-NEXT: [[V4F16_ADDR:%.*]] = alloca <4 x half>, align 8, addrspace(5)
// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
-// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
+// CHECK-NEXT: [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5)
// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
// CHECK-NEXT: [[V4F16_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F16_ADDR]] to ptr
// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
-// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr
+// CHECK-NEXT: [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr
// CHECK-NEXT: store <4 x half> [[V4F16]], ptr [[V4F16_ADDR_ASCAST]], align 8
// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
-// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32
// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
-// CHECK-NEXT: [[TMP3:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32
-// CHECK-NEXT: [[TMP4:%.*]] = call <4 x half> @llvm.amdgcn.image.load.3d.v4f16.i32.v8i32(i32 100, i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]], <8 x i32> [[TMP3]], i32 120, i32 110)
+// CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32
+// CHECK-NEXT: [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP3]], align 32
+// CHECK-NEXT: [[TMP4:%.*]] = call <4 x half> @llvm.amdgcn.image.load.3d.v4f16.i32.v8i32(i32 100, i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]], <8 x i32> [[TEX_RSRC_VAL]], i32 120, i32 110)
// CHECK-NEXT: ret <4 x half> [[TMP4]]
//
-half4 test_builtin_image_load_3d_2(half4 v4f16, int i32, int8 vec8i32) {
+half4 test_builtin_image_load_3d_2(half4 v4f16, int i32, __amdgpu_texture_t tex) {
- return __builtin_amdgcn_image_load_3d_v4f16_i32(100, i32, i32, i32, vec8i32, 120, 110);
+ return __builtin_amdgcn_image_load_3d_v4f16_i32(100, i32, i32, i32, tex, 120, 110);
}
// CHECK-LABEL: define dso_local <4 x float> @test_builtin_image_load_cube_1(
-// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] {
+// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x float>, align 16, addrspace(5)
// CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5)
// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
-// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
+// CHECK-NEXT: [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5)
// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
// CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr
// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
-// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr
+// CHECK-NEXT: [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr
// CHECK-NEXT: store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16
// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
-// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32
// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
-// CHECK-NEXT: [[TMP3:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32
-// CHECK-NEXT: [[TMP4:%.*]] = call <4 x float> @llvm.amdgcn.image.load.cube.v4f32.i32.v8i32(i32 100, i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]], <8 x i32> [[TMP3]], i32 120, i32 110)
+// CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32
+// CHECK-NEXT: [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP3]], align 32
+// CHECK-NEXT: [[TMP4:%.*]] = call <4 x float> @llvm.amdgcn.image.load.cube.v4f32.i32.v8i32(i32 100, i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]], <8 x i32> [[TEX_RSRC_VAL]], i32 120, i32 110)
// CHECK-NEXT: ret <4 x float> [[TMP4]]
//
-float4 test_builtin_image_load_cube_1(float4 v4f32, int i32, int8 vec8i32) {
+float4 test_builtin_image_load_cube_1(float4 v4f32, int i32, __amdgpu_texture_t tex) {
- return __builtin_amdgcn_image_load_cube_v4f32_i32(100, i32, i32, i32, vec8i32, 120, 110);
+ return __builtin_amdgcn_image_load_cube_v4f32_i32(100, i32, i32, i32, tex, 120, 110);
}
// CHECK-LABEL: define dso_local <4 x half> @test_builtin_image_load_cube_2(
-// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] {
+// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x half>, align 8, addrspace(5)
// CHECK-NEXT: [[V4F16_ADDR:%.*]] = alloca <4 x half>, align 8, addrspace(5)
// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
-// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
+// CHECK-NEXT: [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5)
// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
// CHECK-NEXT: [[V4F16_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F16_ADDR]] to ptr
// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
-// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr
+// CHECK-NEXT: [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr
// CHECK-NEXT: store <4 x half> [[V4F16]], ptr [[V4F16_ADDR_ASCAST]], align 8
// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
-// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32
// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
-// CHECK-NEXT: [[TMP3:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32
-// CHECK-NEXT: [[TMP4:%.*]] = call <4 x half> @llvm.amdgcn.image.load.cube.v4f16.i32.v8i32(i32 100, i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]], <8 x i32> [[TMP3]], i32 120, i32 110)
+// CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32
+// CHECK-NEXT: [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP3]], align 32
+// CHECK-NEXT: [[TMP4:%.*]] = call <4 x half> @llvm.amdgcn.image.load.cube.v4f16.i32.v8i32(i32 100, i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]], <8 x i32> [[TEX_RSRC_VAL]], i32 120, i32 110)
// CHECK-NEXT: ret <4 x half> [[TMP4]]
//
-half4 test_builtin_image_load_cube_2(half4 v4f16, int i32, int8 vec8i32) {
+half4 test_builtin_image_load_cube_2(half4 v4f16, int i32, __amdgpu_texture_t tex) {
- return __builtin_amdgcn_image_load_cube_v4f16_i32(100, i32, i32, i32, vec8i32, 120, 110);
+ return __builtin_amdgcn_image_load_cube_v4f16_i32(100, i32, i32, i32, tex, 120, 110);
}
// CHECK-LABEL: define dso_local <4 x float> @test_builtin_image_load_mip_1d_1(
-// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] {
+// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x float>, align 16, addrspace(5)
// CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5)
// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
-// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
+// CHECK-NEXT: [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5)
// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
// CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr
// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
-// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr
+// CHECK-NEXT: [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr
// CHECK-NEXT: store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16
// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
-// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32
// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
-// CHECK-NEXT: [[TMP2:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32
-// CHECK-NEXT: [[TMP3:%.*]] = call <4 x float> @llvm.amdgcn.image.load.mip.1d.v4f32.i32.v8i32(i32 100, i32 [[TMP0]], i32 [[TMP1]], <8 x i32> [[TMP2]], i32 120, i32 110)
+// CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32
+// CHECK-NEXT: [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP2]], align 32
+// CHECK-NEXT: [[TMP3:%.*]] = call <4 x float> @llvm.amdgcn.image.load.mip.1d.v4f32.i32.v8i32(i32 100, i32 [[TMP0]], i32 [[TMP1]], <8 x i32> [[TEX_RSRC_VAL]], i32 120, i32 110)
// CHECK-NEXT: ret <4 x float> [[TMP3]]
//
-float4 test_builtin_image_load_mip_1d_1(float4 v4f32, int i32, int8 vec8i32) {
+float4 test_builtin_image_load_mip_1d_1(float4 v4f32, int i32, __amdgpu_texture_t tex) {
- return __builtin_amdgcn_image_load_mip_1d_v4f32_i32(100, i32, i32, vec8i32, 120, 110);
+ return __builtin_amdgcn_image_load_mip_1d_v4f32_i32(100, i32, i32, tex, 120, 110);
}
// CHECK-LABEL: define dso_local <4 x half> @test_builtin_image_load_mip_1d_2(
-// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] {
+// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x half>, align 8, addrspace(5)
// CHECK-NEXT: [[V4F16_ADDR:%.*]] = alloca <4 x half>, align 8, addrspace(5)
// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
-// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
+// CHECK-NEXT: [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5)
// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
// CHECK-NEXT: [[V4F16_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F16_ADDR]] to ptr
// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
-// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr
+// CHECK-NEXT: [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr
// CHECK-NEXT: store <4 x half> [[V4F16]], ptr [[V4F16_ADDR_ASCAST]], align 8
// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
-// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32
// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
-// CHECK-NEXT: [[TMP2:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32
-// CHECK-NEXT: [[TMP3:%.*]] = call <4 x half> @llvm.amdgcn.image.load.mip.1d.v4f16.i32.v8i32(i32 100, i32 [[TMP0]], i32 [[TMP1]], <8 x i32> [[TMP2]], i32 120, i32 110)
+// CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32
+// CHECK-NEXT: [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP2]], align 32
+// CHECK-NEXT: [[TMP3:%.*]] = call <4 x half> @llvm.amdgcn.image.load.mip.1d.v4f16.i32.v8i32(i32 100, i32 [[TMP0]], i32 [[TMP1]], <8 x i32> [[TEX_RSRC_VAL]], i32 120, i32 110)
// CHECK-NEXT: ret <4 x half> [[TMP3]]
//
-half4 test_builtin_image_load_mip_1d_2(half4 v4f16, int i32, int8 vec8i32) {
+half4 test_builtin_image_load_mip_1d_2(half4 v4f16, int i32, __amdgpu_texture_t tex) {
- return __builtin_amdgcn_image_load_mip_1d_v4f16_i32(100, i32, i32, vec8i32, 120, 110);
+ return __builtin_amdgcn_image_load_mip_1d_v4f16_i32(100, i32, i32, tex, 120, 110);
}
// CHECK-LABEL: define dso_local <4 x float> @test_builtin_image_load_mip_1darray_1(
-// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] {
+// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x float>, align 16, addrspace(5)
// CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5)
// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
-// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
+// CHECK-NEXT: [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5)
// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
// CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr
// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
-// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr
+// CHECK-NEXT: [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr
// CHECK-NEXT: store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16
// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
-// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32
// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
-// CHECK-NEXT: [[TMP3:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32
-// CHECK-NEXT: [[TMP4:%.*]] = call <4 x float> @llvm.amdgcn.image.load.mip.1darray.v4f32.i32.v8i32(i32 100, i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]], <8 x i32> [[TMP3]], i32 120, i32 110)
+// CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32
+// CHECK-NEXT: [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP3]], align 32
+// CHECK-NEXT: [[TMP4:%.*]] = call <4 x float> @llvm.amdgcn.image.load.mip.1darray.v4f32.i32.v8i32(i32 100, i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]], <8 x i32> [[TEX_RSRC_VAL]], i32 120, i32 110)
// CHECK-NEXT: ret <4 x float> [[TMP4]]
//
-float4 test_builtin_image_load_mip_1darray_1(float4 v4f32, int i32, int8 vec8i32) {
+float4 test_builtin_image_load_mip_1darray_1(float4 v4f32, int i32, __amdgpu_texture_t tex) {
- return __builtin_amdgcn_image_load_mip_1darray_v4f32_i32(100, i32, i32, i32, vec8i32, 120, 110);
+ return __builtin_amdgcn_image_load_mip_1darray_v4f32_i32(100, i32, i32, i32, tex, 120, 110);
}
// CHECK-LABEL: define dso_local <4 x half> @test_builtin_image_load_mip_1darray_2(
-// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] {
+// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x half>, align 8, addrspace(5)
// CHECK-NEXT: [[V4F16_ADDR:%.*]] = alloca <4 x half>, align 8, addrspace(5)
// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
-// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
+// CHECK-NEXT: [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5)
// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
// CHECK-NEXT: [[V4F16_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F16_ADDR]] to ptr
// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
-// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr
+// CHECK-NEXT: [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr
// CHECK-NEXT: store <4 x half> [[V4F16]], ptr [[V4F16_ADDR_ASCAST]], align 8
// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
-// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32
// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
-// CHECK-NEXT: [[TMP3:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32
-// CHECK-NEXT: [[TMP4:%.*]] = call <4 x half> @llvm.amdgcn.image.load.mip.1darray.v4f16.i32.v8i32(i32 100, i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]], <8 x i32> [[TMP3]], i32 120, i32 110)
+// CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32
+// CHECK-NEXT: [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP3]], align 32
+// CHECK-NEXT: [[TMP4:%.*]] = call <4 x half> @llvm.amdgcn.image.load.mip.1darray.v4f16.i32.v8i32(i32 100, i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]], <8 x i32> [[TEX_RSRC_VAL]], i32 120, i32 110)
// CHECK-NEXT: ret <4 x half> [[TMP4]]
//
-half4 test_builtin_image_load_mip_1darray_2(half4 v4f16, int i32, int8 vec8i32) {
+half4 test_builtin_image_load_mip_1darray_2(half4 v4f16, int i32, __amdgpu_texture_t tex) {
- return __builtin_amdgcn_image_load_mip_1darray_v4f16_i32(100, i32, i32, i32, vec8i32, 120, 110);
+ return __builtin_amdgcn_image_load_mip_1darray_v4f16_i32(100, i32, i32, i32, tex, 120, 110);
}
// CHECK-LABEL: define dso_local float @test_builtin_image_load_mip_2d(
-// CHECK-SAME: float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] {
+// CHECK-SAME: float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
// CHECK-NEXT: [[RETVAL:%.*]] = alloca float, align 4, addrspace(5)
// CHECK-NEXT: [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5)
// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
-// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
+// CHECK-NEXT: [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5)
// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
// CHECK-NEXT: [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr
// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
-// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr
+// CHECK-NEXT: [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr
// CHECK-NEXT: store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4
// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
-// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32
// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
-// CHECK-NEXT: [[TMP3:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32
-// CHECK-NEXT: [[TMP4:%.*]] = call float @llvm.amdgcn.image.load.mip.2d.f32.i32.v8i32(i32 100, i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]], <8 x i32> [[TMP3]], i32 120, i32 110)
+// CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32
+// CHECK-NEXT: [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP3]], align 32
+// CHECK-NEXT: [[TMP4:%.*]] = call float @llvm.amdgcn.image.load.mip.2d.f32.i32.v8i32(i32 100, i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]], <8 x i32> [[TEX_RSRC_VAL]], i32 120, i32 110)
// CHECK-NEXT: ret float [[TMP4]]
//
-float test_builtin_image_load_mip_2d(float f32, int i32, int8 vec8i32) {
+float test_builtin_image_load_mip_2d(float f32, int i32, __amdgpu_texture_t tex) {
- return __builtin_amdgcn_image_load_mip_2d_f32_i32(100, i32, i32, i32, vec8i32, 120, 110);
+ return __builtin_amdgcn_image_load_mip_2d_f32_i32(100, i32, i32, i32, tex, 120, 110);
}
// CHECK-LABEL: define dso_local <4 x float> @test_builtin_image_load_mip_2d_1(
-// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] {
+// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x float>, align 16, addrspace(5)
// CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5)
// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
-// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
+// CHECK-NEXT: [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5)
// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
// CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr
// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
-// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr
+// CHECK-NEXT: [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr
// CHECK-NEXT: store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16
// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
-// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32
// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
-// CHECK-NEXT: [[TMP3:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32
-// CHECK-NEXT: [[TMP4:%.*]] = call <4 x float> @llvm.amdgcn.image.load.mip.2d.v4f32.i32.v8i32(i32 100, i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]], <8 x i32> [[TMP3]], i32 120, i32 110)
+// CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32
+// CHECK-NEXT: [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP3]], align 32
+// CHECK-NEXT: [[TMP4:%.*]] = call <4 x float> @llvm.amdgcn.image.load.mip.2d.v4f32.i32.v8i32(i32 100, i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]], <8 x i32> [[TEX_RSRC_VAL]], i32 120, i32 110)
// CHECK-NEXT: ret <4 x float> [[TMP4]]
//
-float4 test_builtin_image_load_mip_2d_1(float4 v4f32, int i32, int8 vec8i32) {
+float4 test_builtin_image_load_mip_2d_1(float4 v4f32, int i32, __amdgpu_texture_t tex) {
- return __builtin_amdgcn_image_load_mip_2d_v4f32_i32(100, i32, i32, i32, vec8i32, 120, 110);
+ return __builtin_amdgcn_image_load_mip_2d_v4f32_i32(100, i32, i32, i32, tex, 120, 110);
}
// CHECK-LABEL: define dso_local <4 x half> @test_builtin_image_load_mip_2d_2(
-// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] {
+// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x half>, align 8, addrspace(5)
// CHECK-NEXT: [[V4F16_ADDR:%.*]] = alloca <4 x half>, align 8, addrspace(5)
// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
-// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
+// CHECK-NEXT: [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5)
// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
// CHECK-NEXT: [[V4F16_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F16_ADDR]] to ptr
// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
-// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr
+// CHECK-NEXT: [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr
// CHECK-NEXT: store <4 x half> [[V4F16]], ptr [[V4F16_ADDR_ASCAST]], align 8
// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
-// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32
// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
-// CHECK-NEXT: [[TMP3:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32
-// CHECK-NEXT: [[TMP4:%.*]] = call <4 x half> @llvm.amdgcn.image.load.mip.2d.v4f16.i32.v8i32(i32 100, i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]], <8 x i32> [[TMP3]], i32 120, i32 110)
+// CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32
+// CHECK-NEXT: [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP3]], align 32
+// CHECK-NEXT: [[TMP4:%.*]] = call <4 x half> @llvm.amdgcn.image.load.mip.2d.v4f16.i32.v8i32(i32 100, i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]], <8 x i32> [[TEX_RSRC_VAL]], i32 120, i32 110)
// CHECK-NEXT: ret <4 x half> [[TMP4]]
//
-half4 test_builtin_image_load_mip_2d_2(half4 v4f16, int i32, int8 vec8i32) {
+half4 test_builtin_image_load_mip_2d_2(half4 v4f16, int i32, __amdgpu_texture_t tex) {
- return __builtin_amdgcn_image_load_mip_2d_v4f16_i32(100, i32, i32, i32, vec8i32, 120, 110);
+ return __builtin_amdgcn_image_load_mip_2d_v4f16_i32(100, i32, i32, i32, tex, 120, 110);
}
// CHECK-LABEL: define dso_local float @test_builtin_image_load_mip_2darray(
-// CHECK-SAME: float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] {
+// CHECK-SAME: float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
// CHECK-NEXT: [[RETVAL:%.*]] = alloca float, align 4, addrspace(5)
// CHECK-NEXT: [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5)
// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
-// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
+// CHECK-NEXT: [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5)
// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
// CHECK-NEXT: [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr
// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
-// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr
+// CHECK-NEXT: [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr
// CHECK-NEXT: store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4
// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
-// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32
// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
-// CHECK-NEXT: [[TMP4:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32
-// CHECK-NEXT: [[TMP5:%.*]] = call float @llvm.amdgcn.image.load.mip.2darray.f32.i32.v8i32(i32 100, i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]], i32 [[TMP3]], <8 x i32> [[TMP4]], i32 120, i32 110)
+// CHECK-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32
+// CHECK-NEXT: [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP4]], align 32
+// CHECK-NEXT: [[TMP5:%.*]] = call float @llvm.amdgcn.image.load.mip.2darray.f32.i32.v8i32(i32 100, i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]], i32 [[TMP3]], <8 x i32> [[TEX_RSRC_VAL]], i32 120, i32 110)
// CHECK-NEXT: ret float [[TMP5]]
//
-float test_builtin_image_load_mip_2darray(float f32, int i32, int8 vec8i32) {
+float test_builtin_image_load_mip_2darray(float f32, int i32, __amdgpu_texture_t tex) {
- return __builtin_amdgcn_image_load_mip_2darray_f32_i32(100, i32, i32, i32, i32, vec8i32, 120, 110);
+ return __builtin_amdgcn_image_load_mip_2darray_f32_i32(100, i32, i32, i32, i32, tex, 120, 110);
}
// CHECK-LABEL: define dso_local <4 x float> @test_builtin_image_load_mip_2darray_1(
-// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] {
+// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x float>, align 16, addrspace(5)
// CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5)
// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
-// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
+// CHECK-NEXT: [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5)
// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
// CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr
// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
-// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr
+// CHECK-NEXT: [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr
// CHECK-NEXT: store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16
// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
-// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32
// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
-// CHECK-NEXT: [[TMP4:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32
-// CHECK-NEXT: [[TMP5:%.*]] = call <4 x float> @llvm.amdgcn.image.load.mip.2darray.v4f32.i32.v8i32(i32 100, i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]], i32 [[TMP3]], <8 x i32> [[TMP4]], i32 120, i32 110)
+// CHECK-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32
+// CHECK-NEXT: [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP4]], align 32
+// CHECK-NEXT: [[TMP5:%.*]] = call <4 x float> @llvm.amdgcn.image.load.mip.2darray.v4f32.i32.v8i32(i32 100, i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]], i32 [[TMP3]], <8 x i32> [[TEX_RSRC_VAL]], i32 120, i32 110)
// CHECK-NEXT: ret <4 x float> [[TMP5]]
//
-float4 test_builtin_image_load_mip_2darray_1(float4 v4f32, int i32, int8 vec8i32) {
+float4 test_builtin_image_load_mip_2darray_1(float4 v4f32, int i32, __amdgpu_texture_t tex) {
- return __builtin_amdgcn_image_load_mip_2darray_v4f32_i32(100, i32, i32, i32, i32, vec8i32, 120, 110);
+ return __builtin_amdgcn_image_load_mip_2darray_v4f32_i32(100, i32, i32, i32, i32, tex, 120, 110);
}
// CHECK-LABEL: define dso_local <4 x half> @test_builtin_image_load_mip_2darray_2(
-// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] {
+// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x half>, align 8, addrspace(5)
// CHECK-NEXT: [[V4F16_ADDR:%.*]] = alloca <4 x half>, align 8, addrspace(5)
// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
-// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
+// CHECK-NEXT: [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5)
// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
// CHECK-NEXT: [[V4F16_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F16_ADDR]] to ptr
// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
-// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr
+// CHECK-NEXT: [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr
// CHECK-NEXT: store <4 x half> [[V4F16]], ptr [[V4F16_ADDR_ASCAST]], align 8
// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
-// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32
// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
-// CHECK-NEXT: [[TMP4:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32
-// CHECK-NEXT: [[TMP5:%.*]] = call <4 x half> @llvm.amdgcn.image.load.mip.2darray.v4f16.i32.v8i32(i32 100, i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]], i32 [[TMP3]], <8 x i32> [[TMP4]], i32 120, i32 110)
+// CHECK-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32
+// CHECK-NEXT: [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP4]], align 32
+// CHECK-NEXT: [[TMP5:%.*]] = call <4 x half> @llvm.amdgcn.image.load.mip.2darray.v4f16.i32.v8i32(i32 100, i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]], i32 [[TMP3]], <8 x i32> [[TEX_RSRC_VAL]], i32 120, i32 110)
// CHECK-NEXT: ret <4 x half> [[TMP5]]
//
-half4 test_builtin_image_load_mip_2darray_2(half4 v4f16, int i32, int8 vec8i32) {
+half4 test_builtin_image_load_mip_2darray_2(half4 v4f16, int i32, __amdgpu_texture_t tex) {
- return __builtin_amdgcn_image_load_mip_2darray_v4f16_i32(100, i32, i32, i32, i32, vec8i32, 120, 110);
+ return __builtin_amdgcn_image_load_mip_2darray_v4f16_i32(100, i32, i32, i32, i32, tex, 120, 110);
}
// CHECK-LABEL: define dso_local <4 x float> @test_builtin_image_load_mip_3d_1(
-// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] {
+// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x float>, align 16, addrspace(5)
// CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5)
// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
-// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
+// CHECK-NEXT: [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5)
// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
// CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr
// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
-// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr
+// CHECK-NEXT: [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr
// CHECK-NEXT: store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16
// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
-// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32
// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
-// CHECK-NEXT: [[TMP4:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32
-// CHECK-NEXT: [[TMP5:%.*]] = call <4 x float> @llvm.amdgcn.image.load.mip.3d.v4f32.i32.v8i32(i32 100, i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]], i32 [[TMP3]], <8 x i32> [[TMP4]], i32 120, i32 110)
+// CHECK-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32
+// CHECK-NEXT: [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP4]], align 32
+// CHECK-NEXT: [[TMP5:%.*]] = call <4 x float> @llvm.amdgcn.image.load.mip.3d.v4f32.i32.v8i32(i32 100, i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]], i32 [[TMP3]], <8 x i32> [[TEX_RSRC_VAL]], i32 120, i32 110)
// CHECK-NEXT: ret <4 x float> [[TMP5]]
//
-float4 test_builtin_image_load_mip_3d_1(float4 v4f32, int i32, int8 vec8i32) {
+float4 test_builtin_image_load_mip_3d_1(float4 v4f32, int i32, __amdgpu_texture_t tex) {
- return __builtin_amdgcn_image_load_mip_3d_v4f32_i32(100, i32, i32, i32, i32, vec8i32, 120, 110);
+ return __builtin_amdgcn_image_load_mip_3d_v4f32_i32(100, i32, i32, i32, i32, tex, 120, 110);
}
// CHECK-LABEL: define dso_local <4 x half> @test_builtin_image_load_mip_3d_2(
-// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] {
+// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x half>, align 8, addrspace(5)
// CHECK-NEXT: [[V4F16_ADDR:%.*]] = alloca <4 x half>, align 8, addrspace(5)
// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
-// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
+// CHECK-NEXT: [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5)
// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
// CHECK-NEXT: [[V4F16_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F16_ADDR]] to ptr
// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
-// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr
+// CHECK-NEXT: [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr
// CHECK-NEXT: store <4 x half> [[V4F16]], ptr [[V4F16_ADDR_ASCAST]], align 8
// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
-// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32
// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
-// CHECK-NEXT: [[TMP4:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32
-// CHECK-NEXT: [[TMP5:%.*]] = call <4 x half> @llvm.amdgcn.image.load.mip.3d.v4f16.i32.v8i32(i32 100, i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]], i32 [[TMP3]], <8 x i32> [[TMP4]], i32 120, i32 110)
+// CHECK-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32
+// CHECK-NEXT: [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP4]], align 32
+// CHECK-NEXT: [[TMP5:%.*]] = call <4 x half> @llvm.amdgcn.image.load.mip.3d.v4f16.i32.v8i32(i32 100, i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]], i32 [[TMP3]], <8 x i32> [[TEX_RSRC_VAL]], i32 120, i32 110)
// CHECK-NEXT: ret <4 x half> [[TMP5]]
//
-half4 test_builtin_image_load_mip_3d_2(half4 v4f16, int i32, int8 vec8i32) {
+half4 test_builtin_image_load_mip_3d_2(half4 v4f16, int i32, __amdgpu_texture_t tex) {
- return __builtin_amdgcn_image_load_mip_3d_v4f16_i32(100, i32, i32, i32, i32, vec8i32, 120, 110);
+ return __builtin_amdgcn_image_load_mip_3d_v4f16_i32(100, i32, i32, i32, i32, tex, 120, 110);
}
// CHECK-LABEL: define dso_local <4 x float> @test_builtin_image_load_mip_cube_1(
-// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] {
+// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x float>, align 16, addrspace(5)
// CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5)
// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
-// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
+// CHECK-NEXT: [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5)
// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
// CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr
// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
-// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr
+// CHECK-NEXT: [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr
// CHECK-NEXT: store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16
// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
-// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32
// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
-// CHECK-NEXT: [[TMP4:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32
-// CHECK-NEXT: [[TMP5:%.*]] = call <4 x float> @llvm.amdgcn.image.load.mip.cube.v4f32.i32.v8i32(i32 100, i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]], i32 [[TMP3]], <8 x i32> [[TMP4]], i32 120, i32 110)
+// CHECK-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32
+// CHECK-NEXT: [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP4]], align 32
+// CHECK-NEXT: [[TMP5:%.*]] = call <4 x float> @llvm.amdgcn.image.load.mip.cube.v4f32.i32.v8i32(i32 100, i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]], i32 [[TMP3]], <8 x i32> [[TEX_RSRC_VAL]], i32 120, i32 110)
// CHECK-NEXT: ret <4 x float> [[TMP5]]
//
-float4 test_builtin_image_load_mip_cube_1(float4 v4f32, int i32, int8 vec8i32) {
+float4 test_builtin_image_load_mip_cube_1(float4 v4f32, int i32, __amdgpu_texture_t tex) {
- return __builtin_amdgcn_image_load_mip_cube_v4f32_i32(100, i32, i32, i32, i32, vec8i32, 120, 110);
+ return __builtin_amdgcn_image_load_mip_cube_v4f32_i32(100, i32, i32, i32, i32, tex, 120, 110);
}
// CHECK-LABEL: define dso_local <4 x half> @test_builtin_image_load_mip_cube_2(
-// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] {
+// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x half>, align 8, addrspace(5)
// CHECK-NEXT: [[V4F16_ADDR:%.*]] = alloca <4 x half>, align 8, addrspace(5)
// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
-// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
+// CHECK-NEXT: [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5)
// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
// CHECK-NEXT: [[V4F16_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F16_ADDR]] to ptr
// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
-// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr
+// CHECK-NEXT: [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr
// CHECK-NEXT: store <4 x half> [[V4F16]], ptr [[V4F16_ADDR_ASCAST]], align 8
// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
-// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32
// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
-// CHECK-NEXT: [[TMP4:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32
-// CHECK-NEXT: [[TMP5:%.*]] = call <4 x half> @llvm.amdgcn.image.load.mip.cube.v4f16.i32.v8i32(i32 100, i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]], i32 [[TMP3]], <8 x i32> [[TMP4]], i32 120, i32 110)
+// CHECK-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32
+// CHECK-NEXT: [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP4]], align 32
+// CHECK-NEXT: [[TMP5:%.*]] = call <4 x half> @llvm.amdgcn.image.load.mip.cube.v4f16.i32.v8i32(i32 100, i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]], i32 [[TMP3]], <8 x i32> [[TEX_RSRC_VAL]], i32 120, i32 110)
// CHECK-NEXT: ret <4 x half> [[TMP5]]
//
-half4 test_builtin_image_load_mip_cube_2(half4 v4f16, int i32, int8 vec8i32) {
+half4 test_builtin_image_load_mip_cube_2(half4 v4f16, int i32, __amdgpu_texture_t tex) {
- return __builtin_amdgcn_image_load_mip_cube_v4f16_i32(100, i32, i32, i32, i32, vec8i32, 120, 110);
+ return __builtin_amdgcn_image_load_mip_cube_v4f16_i32(100, i32, i32, i32, i32, tex, 120, 110);
}
// CHECK-LABEL: define dso_local <4 x float> @test_builtin_amdgcn_image_sample_1d_v4f32_f32(
-// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], i32 noundef [[I32:%.*]], float noundef [[F32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] {
+// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], i32 noundef [[I32:%.*]], float noundef [[F32:%.*]], ptr [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x float>, align 16, addrspace(5)
// CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5)
// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
// CHECK-NEXT: [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5)
-// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
+// CHECK-NEXT: [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5)
// CHECK-NEXT: [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5)
// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
// CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr
// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
// CHECK-NEXT: [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr
-// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr
+// CHECK-NEXT: [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr
// CHECK-NEXT: [[VEC4I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC4I32_ADDR]] to ptr
// CHECK-NEXT: store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16
// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
// CHECK-NEXT: store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4
-// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32
// CHECK-NEXT: store <4 x i32> [[VEC4I32]], ptr [[VEC4I32_ADDR_ASCAST]], align 16
// CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
-// CHECK-NEXT: [[TMP1:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: [[TMP1:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32
+// CHECK-NEXT: [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32
// CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr [[VEC4I32_ADDR_ASCAST]], align 16
-// CHECK-NEXT: [[TMP3:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32.v8i32.v4i32(i32 100, float [[TMP0]], <8 x i32> [[TMP1]], <4 x i32> [[TMP2]], i1 false, i32 120, i32 110)
+// CHECK-NEXT: [[TMP3:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32.v8i32.v4i32(i32 100, float [[TMP0]], <8 x i32> [[TEX_RSRC_VAL]], <4 x i32> [[TMP2]], i1 false, i32 120, i32 110)
// CHECK-NEXT: ret <4 x float> [[TMP3]]
//
-float4 test_builtin_amdgcn_image_sample_1d_v4f32_f32(float4 v4f32, int i32, float f32, int8 vec8i32, int4 vec4i32) {
- return __builtin_amdgcn_image_sample_1d_v4f32_f32(100, f32, vec8i32, vec4i32, 0, 120, 110);
+float4 test_builtin_amdgcn_image_sample_1d_v4f32_f32(float4 v4f32, int i32, float f32, __amdgpu_texture_t tex, int4 vec4i32) {
+ return __builtin_amdgcn_image_sample_1d_v4f32_f32(100, f32, tex, vec4i32, 0, 120, 110);
}
// CHECK-LABEL: define dso_local <4 x half> @test_builtin_amdgcn_image_sample_1d_v4f16_f32(
-// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], float noundef [[F32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] {
+// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], float noundef [[F32:%.*]], ptr [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x half>, align 8, addrspace(5)
// CHECK-NEXT: [[V4F16_ADDR:%.*]] = alloca <4 x half>, align 8, addrspace(5)
// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
// CHECK-NEXT: [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5)
-// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
+// CHECK-NEXT: [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5)
// CHECK-NEXT: [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5)
// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
// CHECK-NEXT: [[V4F16_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F16_ADDR]] to ptr
// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
// CHECK-NEXT: [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr
-// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr
+// CHECK-NEXT: [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr
// CHECK-NEXT: [[VEC4I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC4I32_ADDR]] to ptr
// CHECK-NEXT: store <4 x half> [[V4F16]], ptr [[V4F16_ADDR_ASCAST]], align 8
// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
// CHECK-NEXT: store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4
-// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32
// CHECK-NEXT: store <4 x i32> [[VEC4I32]], ptr [[VEC4I32_ADDR_ASCAST]], align 16
// CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
-// CHECK-NEXT: [[TMP1:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: [[TMP1:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32
+// CHECK-NEXT: [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32
// CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr [[VEC4I32_ADDR_ASCAST]], align 16
-// CHECK-NEXT: [[TMP3:%.*]] = call <4 x half> @llvm.amdgcn.image.sample.1d.v4f16.f32.v8i32.v4i32(i32 100, float [[TMP0]], <8 x i32> [[TMP1]], <4 x i32> [[TMP2]], i1 false, i32 120, i32 110)
+// CHECK-NEXT: [[TMP3:%.*]] = call <4 x half> @llvm.amdgcn.image.sample.1d.v4f16.f32.v8i32.v4i32(i32 100, float [[TMP0]], <8 x i32> [[TEX_RSRC_VAL]], <4 x i32> [[TMP2]], i1 false, i32 120, i32 110)
// CHECK-NEXT: ret <4 x half> [[TMP3]]
//
-half4 test_builtin_amdgcn_image_sample_1d_v4f16_f32(half4 v4f16, int i32, float f32, int8 vec8i32, int4 vec4i32) {
- return __builtin_amdgcn_image_sample_1d_v4f16_f32(100, f32, vec8i32, vec4i32, 0, 120, 110);
+half4 test_builtin_amdgcn_image_sample_1d_v4f16_f32(half4 v4f16, int i32, float f32, __amdgpu_texture_t tex, int4 vec4i32) {
+ return __builtin_amdgcn_image_sample_1d_v4f16_f32(100, f32, tex, vec4i32, 0, 120, 110);
}
// CHECK-LABEL: define dso_local <4 x float> @test_builtin_amdgcn_image_sample_1darray_v4f32_f32(
-// CHECK-SAME: i32 noundef [[I32:%.*]], float noundef [[F32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] {
+// CHECK-SAME: i32 noundef [[I32:%.*]], float noundef [[F32:%.*]], ptr [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x float>, align 16, addrspace(5)
// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
// CHECK-NEXT: [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5)
-// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
+// CHECK-NEXT: [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5)
// CHECK-NEXT: [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5)
// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
// CHECK-NEXT: [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr
-// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr
+// CHECK-NEXT: [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr
// CHECK-NEXT: [[VEC4I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC4I32_ADDR]] to ptr
// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
// CHECK-NEXT: store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4
-// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32
// CHECK-NEXT: store <4 x i32> [[VEC4I32]], ptr [[VEC4I32_ADDR_ASCAST]], align 16
// CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
// CHECK-NEXT: [[TMP1:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
-// CHECK-NEXT: [[TMP2:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32
+// CHECK-NEXT: [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP2]], align 32
// CHECK-NEXT: [[TMP3:%.*]] = load <4 x i32>, ptr [[VEC4I32_ADDR_ASCAST]], align 16
-// CHECK-NEXT: [[TMP4:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.1darray.v4f32.f32.v8i32.v4i32(i32 100, float [[TMP0]], float [[TMP1]], <8 x i32> [[TMP2]], <4 x i32> [[TMP3]], i1 false, i32 120, i32 110)
+// CHECK-NEXT: [[TMP4:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.1darray.v4f32.f32.v8i32.v4i32(i32 100, float [[TMP0]], float [[TMP1]], <8 x i32> [[TEX_RSRC_VAL]], <4 x i32> [[TMP3]], i1 false, i32 120, i32 110)
// CHECK-NEXT: ret <4 x float> [[TMP4]]
//
-float4 test_builtin_amdgcn_image_sample_1darray_v4f32_f32(int i32, float f32, int8 vec8i32, int4 vec4i32) {
- return __builtin_amdgcn_image_sample_1darray_v4f32_f32(100, f32, f32, vec8i32, vec4i32, 0, 120, 110);
+float4 test_builtin_amdgcn_image_sample_1darray_v4f32_f32(int i32, float f32, __amdgpu_texture_t tex, int4 vec4i32) {
+ return __builtin_amdgcn_image_sample_1darray_v4f32_f32(100, f32, f32, tex, vec4i32, 0, 120, 110);
}
// CHECK-LABEL: define dso_local <4 x half> @test_builtin_amdgcn_image_sample_1darray_v4f16_f32(
-// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], float noundef [[F32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] {
+// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], float noundef [[F32:%.*]], ptr [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x half>, align 8, addrspace(5)
// CHECK-NEXT: [[V4F16_ADDR:%.*]] = alloca <4 x half>, align 8, addrspace(5)
// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
// CHECK-NEXT: [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5)
-// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
+// CHECK-NEXT: [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5)
// CHECK-NEXT: [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5)
// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
// CHECK-NEXT: [[V4F16_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F16_ADDR]] to ptr
// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
// CHECK-NEXT: [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr
-// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr
+// CHECK-NEXT: [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr
// CHECK-NEXT: [[VEC4I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC4I32_ADDR]] to ptr
// CHECK-NEXT: store <4 x half> [[V4F16]], ptr [[V4F16_ADDR_ASCAST]], align 8
// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
// CHECK-NEXT: store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4
-// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32
// CHECK-NEXT: store <4 x i32> [[VEC4I32]], ptr [[VEC4I32_ADDR_ASCAST]], align 16
// CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
// CHECK-NEXT: [[TMP1:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
-// CHECK-NEXT: [[TMP2:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32
+// CHECK-NEXT: [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP2]], align 32
// CHECK-NEXT: [[TMP3:%.*]] = load <4 x i32>, ptr [[VEC4I32_ADDR_ASCAST]], align 16
-// CHECK-NEXT: [[TMP4:%.*]] = call <4 x half> @llvm.amdgcn.image.sample.1darray.v4f16.f32.v8i32.v4i32(i32 100, float [[TMP0]], float [[TMP1]], <8 x i32> [[TMP2]], <4 x i32> [[TMP3]], i1 false, i32 120, i32 110)
+// CHECK-NEXT: [[TMP4:%.*]] = call <4 x half> @llvm.amdgcn.image.sample.1darray.v4f16.f32.v8i32.v4i32(i32 100, float [[TMP0]], float [[TMP1]], <8 x i32> [[TEX_RSRC_VAL]], <4 x i32> [[TMP3]], i1 false, i32 120, i32 110)
// CHECK-NEXT: ret <4 x half> [[TMP4]]
//
-half4 test_builtin_amdgcn_image_sample_1darray_v4f16_f32(half4 v4f16, int i32, float f32, int8 vec8i32, int4 vec4i32) {
- return __builtin_amdgcn_image_sample_1darray_v4f16_f32(100, f32, f32, vec8i32, vec4i32, 0, 120, 110);
+half4 test_builtin_amdgcn_image_sample_1darray_v4f16_f32(half4 v4f16, int i32, float f32, __amdgpu_texture_t tex, int4 vec4i32) {
+ return __builtin_amdgcn_image_sample_1darray_v4f16_f32(100, f32, f32, tex, vec4i32, 0, 120, 110);
}
// CHECK-LABEL: define dso_local float @test_builtin_amdgcn_image_sample_2d_f32_f32(
-// CHECK-SAME: i32 noundef [[I32:%.*]], float noundef [[F32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] {
+// CHECK-SAME: i32 noundef [[I32:%.*]], float noundef [[F32:%.*]], ptr [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
// CHECK-NEXT: [[RETVAL:%.*]] = alloca float, align 4, addrspace(5)
// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
// CHECK-NEXT: [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5)
-// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
+// CHECK-NEXT: [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5)
// CHECK-NEXT: [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5)
// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
// CHECK-NEXT: [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr
-// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr
+// CHECK-NEXT: [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr
// CHECK-NEXT: [[VEC4I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC4I32_ADDR]] to ptr
// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
// CHECK-NEXT: store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4
-// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32
// CHECK-NEXT: store <4 x i32> [[VEC4I32]], ptr [[VEC4I32_ADDR_ASCAST]], align 16
// CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
// CHECK-NEXT: [[TMP1:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
-// CHECK-NEXT: [[TMP2:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32
+// CHECK-NEXT: [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP2]], align 32
// CHECK-NEXT: [[TMP3:%.*]] = load <4 x i32>, ptr [[VEC4I32_ADDR_ASCAST]], align 16
-// CHECK-NEXT: [[TMP4:%.*]] = call float @llvm.amdgcn.image.sample.2d.f32.f32.v8i32.v4i32(i32 100, float [[TMP0]], float [[TMP1]], <8 x i32> [[TMP2]], <4 x i32> [[TMP3]], i1 false, i32 120, i32 110)
+// CHECK-NEXT: [[TMP4:%.*]] = call float @llvm.amdgcn.image.sample.2d.f32.f32.v8i32.v4i32(i32 100, float [[TMP0]], float [[TMP1]], <8 x i32> [[TEX_RSRC_VAL]], <4 x i32> [[TMP3]], i1 false, i32 120, i32 110)
// CHECK-NEXT: ret float [[TMP4]]
//
-float test_builtin_amdgcn_image_sample_2d_f32_f32(int i32, float f32, int8 vec8i32, int4 vec4i32) {
- return __builtin_amdgcn_image_sample_2d_f32_f32(100, f32, f32, vec8i32, vec4i32, 0, 120, 110);
+float test_builtin_amdgcn_image_sample_2d_f32_f32(int i32, float f32, __amdgpu_texture_t tex, int4 vec4i32) {
+ return __builtin_amdgcn_image_sample_2d_f32_f32(100, f32, f32, tex, vec4i32, 0, 120, 110);
}
// CHECK-LABEL: define dso_local <4 x float> @test_builtin_amdgcn_image_sample_2d_v4f32_f32(
-// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], i32 noundef [[I32:%.*]], float noundef [[F32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] {
+// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], i32 noundef [[I32:%.*]], float noundef [[F32:%.*]], ptr [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x float>, align 16, addrspace(5)
// CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5)
// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
// CHECK-NEXT: [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5)
-// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
+// CHECK-NEXT: [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5)
// CHECK-NEXT: [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5)
// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
// CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr
// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
// CHECK-NEXT: [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr
-// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr
+// CHECK-NEXT: [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr
// CHECK-NEXT: [[VEC4I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC4I32_ADDR]] to ptr
// CHECK-NEXT: store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16
// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
// CHECK-NEXT: store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4
-// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32
// CHECK-NEXT: store <4 x i32> [[VEC4I32]], ptr [[VEC4I32_ADDR_ASCAST]], align 16
// CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
// CHECK-NEXT: [[TMP1:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
-// CHECK-NEXT: [[TMP2:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32
+// CHECK-NEXT: [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP2]], align 32
// CHECK-NEXT: [[TMP3:%.*]] = load <4 x i32>, ptr [[VEC4I32_ADDR_ASCAST]], align 16
-// CHECK-NEXT: [[TMP4:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32.v8i32.v4i32(i32 100, float [[TMP0]], float [[TMP1]], <8 x i32> [[TMP2]], <4 x i32> [[TMP3]], i1 false, i32 120, i32 110)
+// CHECK-NEXT: [[TMP4:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32.v8i32.v4i32(i32 100, float [[TMP0]], float [[TMP1]], <8 x i32> [[TEX_RSRC_VAL]], <4 x i32> [[TMP3]], i1 false, i32 120, i32 110)
// CHECK-NEXT: ret <4 x float> [[TMP4]]
//
-float4 test_builtin_amdgcn_image_sample_2d_v4f32_f32(float4 v4f32, int i32, float f32, int8 vec8i32, int4 vec4i32) {
- return __builtin_amdgcn_image_sample_2d_v4f32_f32(100, f32, f32, vec8i32, vec4i32, 0, 120, 110);
+float4 test_builtin_amdgcn_image_sample_2d_v4f32_f32(float4 v4f32, int i32, float f32, __amdgpu_texture_t tex, int4 vec4i32) {
+ return __builtin_amdgcn_image_sample_2d_v4f32_f32(100, f32, f32, tex, vec4i32, 0, 120, 110);
}
// CHECK-LABEL: define dso_local <4 x half> @test_builtin_amdgcn_image_sample_2d_v4f16_f32(
-// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], float noundef [[F32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] {
+// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], float noundef [[F32:%.*]], ptr [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x half>, align 8, addrspace(5)
// CHECK-NEXT: [[V4F16_ADDR:%.*]] = alloca <4 x half>, align 8, addrspace(5)
// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
// CHECK-NEXT: [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5)
-// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
+// CHECK-NEXT: [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5)
// CHECK-NEXT: [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5)
// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
// CHECK-NEXT: [[V4F16_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F16_ADDR]] to ptr
// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
// CHECK-NEXT: [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr
-// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr
+// CHECK-NEXT: [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr
// CHECK-NEXT: [[VEC4I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC4I32_ADDR]] to ptr
// CHECK-NEXT: store <4 x half> [[V4F16]], ptr [[V4F16_ADDR_ASCAST]], align 8
// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
// CHECK-NEXT: store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4
-// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32
// CHECK-NEXT: store <4 x i32> [[VEC4I32]], ptr [[VEC4I32_ADDR_ASCAST]], align 16
// CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
// CHECK-NEXT: [[TMP1:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
-// CHECK-NEXT: [[TMP2:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32
+// CHECK-NEXT: [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP2]], align 32
// CHECK-NEXT: [[TMP3:%.*]] = load <4 x i32>, ptr [[VEC4I32_ADDR_ASCAST]], align 16
-// CHECK-NEXT: [[TMP4:%.*]] = call <4 x half> @llvm.amdgcn.image.sample.2d.v4f16.f32.v8i32.v4i32(i32 100, float [[TMP0]], float [[TMP1]], <8 x i32> [[TMP2]], <4 x i32> [[TMP3]], i1 false, i32 120, i32 110)
+// CHECK-NEXT: [[TMP4:%.*]] = call <4 x half> @llvm.amdgcn.image.sample.2d.v4f16.f32.v8i32.v4i32(i32 100, float [[TMP0]], float [[TMP1]], <8 x i32> [[TEX_RSRC_VAL]], <4 x i32> [[TMP3]], i1 false, i32 120, i32 110)
// CHECK-NEXT: ret <4 x half> [[TMP4]]
//
-half4 test_builtin_amdgcn_image_sample_2d_v4f16_f32(half4 v4f16, int i32, float f32, int8 vec8i32, int4 vec4i32) {
- return __builtin_amdgcn_image_sample_2d_v4f16_f32(100, f32, f32, vec8i32, vec4i32, 0, 120, 110);
+half4 test_builtin_amdgcn_image_sample_2d_v4f16_f32(half4 v4f16, int i32, float f32, __amdgpu_texture_t tex, int4 vec4i32) {
+ return __builtin_amdgcn_image_sample_2d_v4f16_f32(100, f32, f32, tex, vec4i32, 0, 120, 110);
}
// CHECK-LABEL: define dso_local float @test_builtin_amdgcn_image_sample_2darray_f32_f32(
-// CHECK-SAME: i32 noundef [[I32:%.*]], float noundef [[F32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] {
+// CHECK-SAME: i32 noundef [[I32:%.*]], float noundef [[F32:%.*]], ptr [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
// CHECK-NEXT: [[RETVAL:%.*]] = alloca float, align 4, addrspace(5)
// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
// CHECK-NEXT: [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5)
-// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
+// CHECK-NEXT: [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5)
// CHECK-NEXT: [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5)
// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
// CHECK-NEXT: [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr
-// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr
+// CHECK-NEXT: [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr
// CHECK-NEXT: [[VEC4I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC4I32_ADDR]] to ptr
// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
// CHECK-NEXT: store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4
-// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32
// CHECK-NEXT: store <4 x i32> [[VEC4I32]], ptr [[VEC4I32_ADDR_ASCAST]], align 16
// CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
// CHECK-NEXT: [[TMP1:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
// CHECK-NEXT: [[TMP2:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
-// CHECK-NEXT: [[TMP3:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32
+// CHECK-NEXT: [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP3]], align 32
// CHECK-NEXT: [[TMP4:%.*]] = load <4 x i32>, ptr [[VEC4I32_ADDR_ASCAST]], align 16
-// CHECK-NEXT: [[TMP5:%.*]] = call float @llvm.amdgcn.image.sample.2darray.f32.f32.v8i32.v4i32(i32 100, float [[TMP0]], float [[TMP1]], float [[TMP2]], <8 x i32> [[TMP3]], <4 x i32> [[TMP4]], i1 false, i32 120, i32 110)
+// CHECK-NEXT: [[TMP5:%.*]] = call float @llvm.amdgcn.image.sample.2darray.f32.f32.v8i32.v4i32(i32 100, float [[TMP0]], float [[TMP1]], float [[TMP2]], <8 x i32> [[TEX_RSRC_VAL]], <4 x i32> [[TMP4]], i1 false, i32 120, i32 110)
// CHECK-NEXT: ret float [[TMP5]]
//
-float test_builtin_amdgcn_image_sample_2darray_f32_f32(int i32, float f32, int8 vec8i32, int4 vec4i32) {
- return __builtin_amdgcn_image_sample_2darray_f32_f32(100, f32, f32, f32, vec8i32, vec4i32, 0, 120, 110);
+float test_builtin_amdgcn_image_sample_2darray_f32_f32(int i32, float f32, __amdgpu_texture_t tex, int4 vec4i32) {
+ return __builtin_amdgcn_image_sample_2darray_f32_f32(100, f32, f32, f32, tex, vec4i32, 0, 120, 110);
}
// CHECK-LABEL: define dso_local <4 x float> @test_builtin_amdgcn_image_sample_2darray_v4f32_f32(
-// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], i32 noundef [[I32:%.*]], float noundef [[F32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] {
+// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], i32 noundef [[I32:%.*]], float noundef [[F32:%.*]], ptr [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x float>, align 16, addrspace(5)
// CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5)
// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
// CHECK-NEXT: [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5)
-// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
+// CHECK-NEXT: [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5)
// CHECK-NEXT: [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5)
// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
// CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr
// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
// CHECK-NEXT: [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr
-// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr
+// CHECK-NEXT: [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr
// CHECK-NEXT: [[VEC4I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC4I32_ADDR]] to ptr
// CHECK-NEXT: store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16
// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
// CHECK-NEXT: store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4
-// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32
// CHECK-NEXT: store <4 x i32> [[VEC4I32]], ptr [[VEC4I32_ADDR_ASCAST]], align 16
// CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
// CHECK-NEXT: [[TMP1:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
// CHECK-NEXT: [[TMP2:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
-// CHECK-NEXT: [[TMP3:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32
+// CHECK-NEXT: [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP3]], align 32
// CHECK-NEXT: [[TMP4:%.*]] = load <4 x i32>, ptr [[VEC4I32_ADDR_ASCAST]], align 16
-// CHECK-NEXT: [[TMP5:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.2darray.v4f32.f32.v8i32.v4i32(i32 100, float [[TMP0]], float [[TMP1]], float [[TMP2]], <8 x i32> [[TMP3]], <4 x i32> [[TMP4]], i1 false, i32 120, i32 110)
+// CHECK-NEXT: [[TMP5:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.2darray.v4f32.f32.v8i32.v4i32(i32 100, float [[TMP0]], float [[TMP1]], float [[TMP2]], <8 x i32> [[TEX_RSRC_VAL]], <4 x i32> [[TMP4]], i1 false, i32 120, i32 110)
// CHECK-NEXT: ret <4 x float> [[TMP5]]
//
-float4 test_builtin_amdgcn_image_sample_2darray_v4f32_f32(float4 v4f32, int i32, float f32, int8 vec8i32, int4 vec4i32) {
- return __builtin_amdgcn_image_sample_2darray_v4f32_f32(100, f32, f32, f32, vec8i32, vec4i32, 0, 120, 110);
+float4 test_builtin_amdgcn_image_sample_2darray_v4f32_f32(float4 v4f32, int i32, float f32, __amdgpu_texture_t tex, int4 vec4i32) {
+ return __builtin_amdgcn_image_sample_2darray_v4f32_f32(100, f32, f32, f32, tex, vec4i32, 0, 120, 110);
}
// CHECK-LABEL: define dso_local <4 x half> @test_builtin_amdgcn_image_sample_2darray_v4f16_f32(
-// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], float noundef [[F32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] {
+// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], float noundef [[F32:%.*]], ptr [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x half>, align 8, addrspace(5)
// CHECK-NEXT: [[V4F16_ADDR:%.*]] = alloca <4 x half>, align 8, addrspace(5)
// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
// CHECK-NEXT: [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5)
-// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
+// CHECK-NEXT: [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5)
// CHECK-NEXT: [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5)
// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
// CHECK-NEXT: [[V4F16_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F16_ADDR]] to ptr
// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
// CHECK-NEXT: [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr
-// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr
+// CHECK-NEXT: [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr
// CHECK-NEXT: [[VEC4I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC4I32_ADDR]] to ptr
// CHECK-NEXT: store <4 x half> [[V4F16]], ptr [[V4F16_ADDR_ASCAST]], align 8
// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
// CHECK-NEXT: store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4
-// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32
// CHECK-NEXT: store <4 x i32> [[VEC4I32]], ptr [[VEC4I32_ADDR_ASCAST]], align 16
// CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
// CHECK-NEXT: [[TMP1:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
// CHECK-NEXT: [[TMP2:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
-// CHECK-NEXT: [[TMP3:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32
+// CHECK-NEXT: [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP3]], align 32
// CHECK-NEXT: [[TMP4:%.*]] = load <4 x i32>, ptr [[VEC4I32_ADDR_ASCAST]], align 16
-// CHECK-NEXT: [[TMP5:%.*]] = call <4 x half> @llvm.amdgcn.image.sample.2darray.v4f16.f32.v8i32.v4i32(i32 100, float [[TMP0]], float [[TMP1]], float [[TMP2]], <8 x i32> [[TMP3]], <4 x i32> [[TMP4]], i1 false, i32 120, i32 110)
+// CHECK-NEXT: [[TMP5:%.*]] = call <4 x half> @llvm.amdgcn.image.sample.2darray.v4f16.f32.v8i32.v4i32(i32 100, float [[TMP0]], float [[TMP1]], float [[TMP2]], <8 x i32> [[TEX_RSRC_VAL]], <4 x i32> [[TMP4]], i1 false, i32 120, i32 110)
// CHECK-NEXT: ret <4 x half> [[TMP5]]
//
-half4 test_builtin_amdgcn_image_sample_2darray_v4f16_f32(half4 v4f16, int i32, float f32, int8 vec8i32, int4 vec4i32) {
- return __builtin_amdgcn_image_sample_2darray_v4f16_f32(100, f32, f32, f32, vec8i32, vec4i32, 0, 120, 110);
+half4 test_builtin_amdgcn_image_sample_2darray_v4f16_f32(half4 v4f16, int i32, float f32, __amdgpu_texture_t tex, int4 vec4i32) {
+ return __builtin_amdgcn_image_sample_2darray_v4f16_f32(100, f32, f32, f32, tex, vec4i32, 0, 120, 110);
}
// CHECK-LABEL: define dso_local <4 x float> @test_builtin_amdgcn_image_sample_3d_v4f32_f32(
-// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], i32 noundef [[I32:%.*]], float noundef [[F32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] {
+// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], i32 noundef [[I32:%.*]], float noundef [[F32:%.*]], ptr [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x float>, align 16, addrspace(5)
// CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5)
// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
// CHECK-NEXT: [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5)
-// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
+// CHECK-NEXT: [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5)
// CHECK-NEXT: [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5)
// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
// CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr
// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
// CHECK-NEXT: [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr
-// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr
+// CHECK-NEXT: [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr
// CHECK-NEXT: [[VEC4I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC4I32_ADDR]] to ptr
// CHECK-NEXT: store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16
// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
// CHECK-NEXT: store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4
-// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32
// CHECK-NEXT: store <4 x i32> [[VEC4I32]], ptr [[VEC4I32_ADDR_ASCAST]], align 16
// CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
// CHECK-NEXT: [[TMP1:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
// CHECK-NEXT: [[TMP2:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
-// CHECK-NEXT: [[TMP3:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32
+// CHECK-NEXT: [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP3]], align 32
// CHECK-NEXT: [[TMP4:%.*]] = load <4 x i32>, ptr [[VEC4I32_ADDR_ASCAST]], align 16
-// CHECK-NEXT: [[TMP5:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.3d.v4f32.f32.v8i32.v4i32(i32 100, float [[TMP0]], float [[TMP1]], float [[TMP2]], <8 x i32> [[TMP3]], <4 x i32> [[TMP4]], i1 false, i32 120, i32 110)
+// CHECK-NEXT: [[TMP5:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.3d.v4f32.f32.v8i32.v4i32(i32 100, float [[TMP0]], float [[TMP1]], float [[TMP2]], <8 x i32> [[TEX_RSRC_VAL]], <4 x i32> [[TMP4]], i1 false, i32 120, i32 110)
// CHECK-NEXT: ret <4 x float> [[TMP5]]
//
-float4 test_builtin_amdgcn_image_sample_3d_v4f32_f32(float4 v4f32, int i32, float f32, int8 vec8i32, int4 vec4i32) {
- return __builtin_amdgcn_image_sample_3d_v4f32_f32(100, f32, f32, f32, vec8i32, vec4i32, 0, 120, 110);
+float4 test_builtin_amdgcn_image_sample_3d_v4f32_f32(float4 v4f32, int i32, float f32, __amdgpu_texture_t tex, int4 vec4i32) {
+ return __builtin_amdgcn_image_sample_3d_v4f32_f32(100, f32, f32, f32, tex, vec4i32, 0, 120, 110);
}
// CHECK-LABEL: define dso_local <4 x half> @test_builtin_amdgcn_image_sample_3d_v4f16_f32(
-// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], float noundef [[F32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] {
+// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], float noundef [[F32:%.*]], ptr [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x half>, align 8, addrspace(5)
// CHECK-NEXT: [[V4F16_ADDR:%.*]] = alloca <4 x half>, align 8, addrspace(5)
// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
// CHECK-NEXT: [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5)
-// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
+// CHECK-NEXT: [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5)
// CHECK-NEXT: [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5)
// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
// CHECK-NEXT: [[V4F16_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F16_ADDR]] to ptr
// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
// CHECK-NEXT: [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr
-// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr
+// CHECK-NEXT: [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr
// CHECK-NEXT: [[VEC4I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC4I32_ADDR]] to ptr
// CHECK-NEXT: store <4 x half> [[V4F16]], ptr [[V4F16_ADDR_ASCAST]], align 8
// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
// CHECK-NEXT: store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4
-// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32
// CHECK-NEXT: store <4 x i32> [[VEC4I32]], ptr [[VEC4I32_ADDR_ASCAST]], align 16
// CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
// CHECK-NEXT: [[TMP1:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
// CHECK-NEXT: [[TMP2:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
-// CHECK-NEXT: [[TMP3:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32
+// CHECK-NEXT: [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP3]], align 32
// CHECK-NEXT: [[TMP4:%.*]] = load <4 x i32>, ptr [[VEC4I32_ADDR_ASCAST]], align 16
-// CHECK-NEXT: [[TMP5:%.*]] = call <4 x half> @llvm.amdgcn.image.sample.3d.v4f16.f32.v8i32.v4i32(i32 100, float [[TMP0]], float [[TMP1]], float [[TMP2]], <8 x i32> [[TMP3]], <4 x i32> [[TMP4]], i1 false, i32 120, i32 110)
+// CHECK-NEXT: [[TMP5:%.*]] = call <4 x half> @llvm.amdgcn.image.sample.3d.v4f16.f32.v8i32.v4i32(i32 100, float [[TMP0]], float [[TMP1]], float [[TMP2]], <8 x i32> [[TEX_RSRC_VAL]], <4 x i32> [[TMP4]], i1 false, i32 120, i32 110)
// CHECK-NEXT: ret <4 x half> [[TMP5]]
//
-half4 test_builtin_amdgcn_image_sample_3d_v4f16_f32(half4 v4f16, int i32, float f32, int8 vec8i32, int4 vec4i32) {
- return __builtin_amdgcn_image_sample_3d_v4f16_f32(100, f32, f32, f32, vec8i32, vec4i32, 0, 120, 110);
+half4 test_builtin_amdgcn_image_sample_3d_v4f16_f32(half4 v4f16, int i32, float f32, __amdgpu_texture_t tex, int4 vec4i32) {
+ return __builtin_amdgcn_image_sample_3d_v4f16_f32(100, f32, f32, f32, tex, vec4i32, 0, 120, 110);
}
// CHECK-LABEL: define dso_local <4 x float> @test_builtin_amdgcn_image_sample_cube_v4f32_f32(
-// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], i32 noundef [[I32:%.*]], float noundef [[F32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] {
+// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], i32 noundef [[I32:%.*]], float noundef [[F32:%.*]], ptr [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x float>, align 16, addrspace(5)
// CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5)
// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
// CHECK-NEXT: [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5)
-// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
+// CHECK-NEXT: [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5)
// CHECK-NEXT: [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5)
// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
// CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr
// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
// CHECK-NEXT: [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr
-// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr
+// CHECK-NEXT: [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr
// CHECK-NEXT: [[VEC4I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC4I32_ADDR]] to ptr
// CHECK-NEXT: store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16
// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
// CHECK-NEXT: store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4
-// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32
// CHECK-NEXT: store <4 x i32> [[VEC4I32]], ptr [[VEC4I32_ADDR_ASCAST]], align 16
// CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
// CHECK-NEXT: [[TMP1:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
// CHECK-NEXT: [[TMP2:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
-// CHECK-NEXT: [[TMP3:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32
+// CHECK-NEXT: [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP3]], align 32
// CHECK-NEXT: [[TMP4:%.*]] = load <4 x i32>, ptr [[VEC4I32_ADDR_ASCAST]], align 16
-// CHECK-NEXT: [[TMP5:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.cube.v4f32.f32.v8i32.v4i32(i32 100, float [[TMP0]], float [[TMP1]], float [[TMP2]], <8 x i32> [[TMP3]], <4 x i32> [[TMP4]], i1 false, i32 120, i32 110)
+// CHECK-NEXT: [[TMP5:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.cube.v4f32.f32.v8i32.v4i32(i32 100, float [[TMP0]], float [[TMP1]], float [[TMP2]], <8 x i32> [[TEX_RSRC_VAL]], <4 x i32> [[TMP4]], i1 false, i32 120, i32 110)
// CHECK-NEXT: ret <4 x float> [[TMP5]]
//
-float4 test_builtin_amdgcn_image_sample_cube_v4f32_f32(float4 v4f32, int i32, float f32, int8 vec8i32, int4 vec4i32) {
- return __builtin_amdgcn_image_sample_cube_v4f32_f32(100, f32, f32, f32, vec8i32, vec4i32, 0, 120, 110);
+float4 test_builtin_amdgcn_image_sample_cube_v4f32_f32(float4 v4f32, int i32, float f32, __amdgpu_texture_t tex, int4 vec4i32) {
+ return __builtin_amdgcn_image_sample_cube_v4f32_f32(100, f32, f32, f32, tex, vec4i32, 0, 120, 110);
}
// CHECK-LABEL: define dso_local <4 x half> @test_builtin_amdgcn_image_sample_cube_v4f16_f32(
-// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], float noundef [[F32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] {
+// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], float noundef [[F32:%.*]], ptr [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x half>, align 8, addrspace(5)
// CHECK-NEXT: [[V4F16_ADDR:%.*]] = alloca <4 x half>, align 8, addrspace(5)
// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
// CHECK-NEXT: [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5)
-// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
+// CHECK-NEXT: [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5)
// CHECK-NEXT: [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5)
// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
// CHECK-NEXT: [[V4F16_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F16_ADDR]] to ptr
// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
// CHECK-NEXT: [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr
-// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr
+// CHECK-NEXT: [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr
// CHECK-NEXT: [[VEC4I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC4I32_ADDR]] to ptr
// CHECK-NEXT: store <4 x half> [[V4F16]], ptr [[V4F16_ADDR_ASCAST]], align 8
// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
// CHECK-NEXT: store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4
-// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32
// CHECK-NEXT: store <4 x i32> [[VEC4I32]], ptr [[VEC4I32_ADDR_ASCAST]], align 16
// CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
// CHECK-NEXT: [[TMP1:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
// CHECK-NEXT: [[TMP2:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
-// CHECK-NEXT: [[TMP3:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32
+// CHECK-NEXT: [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP3]], align 32
// CHECK-NEXT: [[TMP4:%.*]] = load <4 x i32>, ptr [[VEC4I32_ADDR_ASCAST]], align 16
-// CHECK-NEXT: [[TMP5:%.*]] = call <4 x half> @llvm.amdgcn.image.sample.cube.v4f16.f32.v8i32.v4i32(i32 100, float [[TMP0]], float [[TMP1]], float [[TMP2]], <8 x i32> [[TMP3]], <4 x i32> [[TMP4]], i1 false, i32 120, i32 110)
+// CHECK-NEXT: [[TMP5:%.*]] = call <4 x half> @llvm.amdgcn.image.sample.cube.v4f16.f32.v8i32.v4i32(i32 100, float [[TMP0]], float [[TMP1]], float [[TMP2]], <8 x i32> [[TEX_RSRC_VAL]], <4 x i32> [[TMP4]], i1 false, i32 120, i32 110)
// CHECK-NEXT: ret <4 x half> [[TMP5]]
//
-half4 test_builtin_amdgcn_image_sample_cube_v4f16_f32(half4 v4f16, int i32, float f32, int8 vec8i32, int4 vec4i32) {
- return __builtin_amdgcn_image_sample_cube_v4f16_f32(100, f32, f32, f32, vec8i32, vec4i32, 0, 120, 110);
+half4 test_builtin_amdgcn_image_sample_cube_v4f16_f32(half4 v4f16, int i32, float f32, __amdgpu_texture_t tex, int4 vec4i32) {
+ return __builtin_amdgcn_image_sample_cube_v4f16_f32(100, f32, f32, f32, tex, vec4i32, 0, 120, 110);
}
diff --git a/clang/test/CodeGen/builtins-image-store.c b/clang/test/CodeGen/builtins-image-store.c
index cd2b09e074c59..5309a16df7033 100644
--- a/clang/test/CodeGen/builtins-image-store.c
+++ b/clang/test/CodeGen/builtins-image-store.c
@@ -1,703 +1,730 @@
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5
// RUN: %clang_cc1 -triple amdgcn-- -target-cpu gfx1010 %s -emit-llvm -o - | FileCheck %s
-typedef int int8 __attribute__((ext_vector_type(8)));
typedef float float4 __attribute__((ext_vector_type(4)));
typedef _Float16 half;
typedef half half4 __attribute__((ext_vector_type(4)));
// CHECK-LABEL: define dso_local void @test_builtin_image_store_2d(
-// CHECK-SAME: float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0:[0-9]+]] {
+// CHECK-SAME: float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]]) #[[ATTR0:[0-9]+]] {
// CHECK-NEXT: [[ENTRY:.*:]]
// CHECK-NEXT: [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5)
// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
-// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
+// CHECK-NEXT: [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5)
// CHECK-NEXT: [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr
// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
-// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr
+// CHECK-NEXT: [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr
// CHECK-NEXT: store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4
// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
-// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32
// CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
-// CHECK-NEXT: [[TMP3:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32
-// CHECK-NEXT: call void @llvm.amdgcn.image.store.2d.f32.i32.v8i32(float [[TMP0]], i32 12, i32 [[TMP1]], i32 [[TMP2]], <8 x i32> [[TMP3]], i32 106, i32 103)
+// CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32
+// CHECK-NEXT: [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP3]], align 32
+// CHECK-NEXT: call void @llvm.amdgcn.image.store.2d.f32.i32.v8i32(float [[TMP0]], i32 12, i32 [[TMP1]], i32 [[TMP2]], <8 x i32> [[TEX_RSRC_VAL]], i32 106, i32 103)
// CHECK-NEXT: ret void
//
-void test_builtin_image_store_2d(float f32, int i32, int8 vec8i32) {
+void test_builtin_image_store_2d(float f32, int i32, __amdgpu_texture_t tex) {
- __builtin_amdgcn_image_store_2d_f32_i32(f32, 12, i32, i32, vec8i32, 106, 103);
+ __builtin_amdgcn_image_store_2d_f32_i32(f32, 12, i32, i32, tex, 106, 103);
}
// CHECK-LABEL: define dso_local void @test_builtin_image_store_2d_1(
-// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] {
+// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
// CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5)
// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
-// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
+// CHECK-NEXT: [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5)
// CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr
// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
-// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr
+// CHECK-NEXT: [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr
// CHECK-NEXT: store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16
// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
-// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32
// CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[V4F32_ADDR_ASCAST]], align 16
// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
-// CHECK-NEXT: [[TMP3:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32
-// CHECK-NEXT: call void @llvm.amdgcn.image.store.2d.v4f32.i32.v8i32(<4 x float> [[TMP0]], i32 100, i32 [[TMP1]], i32 [[TMP2]], <8 x i32> [[TMP3]], i32 120, i32 110)
+// CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32
+// CHECK-NEXT: [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP3]], align 32
+// CHECK-NEXT: call void @llvm.amdgcn.image.store.2d.v4f32.i32.v8i32(<4 x float> [[TMP0]], i32 100, i32 [[TMP1]], i32 [[TMP2]], <8 x i32> [[TEX_RSRC_VAL]], i32 120, i32 110)
// CHECK-NEXT: ret void
//
-void test_builtin_image_store_2d_1(float4 v4f32, int i32, int8 vec8i32) {
+void test_builtin_image_store_2d_1(float4 v4f32, int i32, __amdgpu_texture_t tex) {
- __builtin_amdgcn_image_store_2d_v4f32_i32(v4f32, 100, i32, i32, vec8i32, 120, 110);
+ __builtin_amdgcn_image_store_2d_v4f32_i32(v4f32, 100, i32, i32, tex, 120, 110);
}
// CHECK-LABEL: define dso_local void @test_builtin_image_store_2d_2(
-// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] {
+// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
// CHECK-NEXT: [[V4F16_ADDR:%.*]] = alloca <4 x half>, align 8, addrspace(5)
// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
-// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
+// CHECK-NEXT: [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5)
// CHECK-NEXT: [[V4F16_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F16_ADDR]] to ptr
// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
-// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr
+// CHECK-NEXT: [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr
// CHECK-NEXT: store <4 x half> [[V4F16]], ptr [[V4F16_ADDR_ASCAST]], align 8
// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
-// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32
// CHECK-NEXT: [[TMP0:%.*]] = load <4 x half>, ptr [[V4F16_ADDR_ASCAST]], align 8
// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
-// CHECK-NEXT: [[TMP3:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32
-// CHECK-NEXT: call void @llvm.amdgcn.image.store.2d.v4f16.i32.v8i32(<4 x half> [[TMP0]], i32 100, i32 [[TMP1]], i32 [[TMP2]], <8 x i32> [[TMP3]], i32 120, i32 110)
+// CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32
+// CHECK-NEXT: [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP3]], align 32
+// CHECK-NEXT: call void @llvm.amdgcn.image.store.2d.v4f16.i32.v8i32(<4 x half> [[TMP0]], i32 100, i32 [[TMP1]], i32 [[TMP2]], <8 x i32> [[TEX_RSRC_VAL]], i32 120, i32 110)
// CHECK-NEXT: ret void
//
-void test_builtin_image_store_2d_2(half4 v4f16, int i32, int8 vec8i32) {
+void test_builtin_image_store_2d_2(half4 v4f16, int i32, __amdgpu_texture_t tex) {
- __builtin_amdgcn_image_store_2d_v4f16_i32(v4f16, 100, i32, i32, vec8i32, 120, 110);
+ __builtin_amdgcn_image_store_2d_v4f16_i32(v4f16, 100, i32, i32, tex, 120, 110);
}
// CHECK-LABEL: define dso_local void @test_builtin_image_store_2darray(
-// CHECK-SAME: float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] {
+// CHECK-SAME: float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
// CHECK-NEXT: [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5)
// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
-// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
+// CHECK-NEXT: [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5)
// CHECK-NEXT: [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr
// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
-// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr
+// CHECK-NEXT: [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr
// CHECK-NEXT: store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4
// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
-// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32
// CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
-// CHECK-NEXT: [[TMP4:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32
-// CHECK-NEXT: call void @llvm.amdgcn.image.store.2darray.f32.i32.v8i32(float [[TMP0]], i32 100, i32 [[TMP1]], i32 [[TMP2]], i32 [[TMP3]], <8 x i32> [[TMP4]], i32 120, i32 110)
+// CHECK-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32
+// CHECK-NEXT: [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP4]], align 32
+// CHECK-NEXT: call void @llvm.amdgcn.image.store.2darray.f32.i32.v8i32(float [[TMP0]], i32 100, i32 [[TMP1]], i32 [[TMP2]], i32 [[TMP3]], <8 x i32> [[TEX_RSRC_VAL]], i32 120, i32 110)
// CHECK-NEXT: ret void
//
-void test_builtin_image_store_2darray(float f32, int i32, int8 vec8i32) {
+void test_builtin_image_store_2darray(float f32, int i32, __amdgpu_texture_t tex) {
- __builtin_amdgcn_image_store_2darray_f32_i32(f32, 100, i32, i32, i32, vec8i32, 120, 110);
+ __builtin_amdgcn_image_store_2darray_f32_i32(f32, 100, i32, i32, i32, tex, 120, 110);
}
// CHECK-LABEL: define dso_local void @test_builtin_image_store_2darray_1(
-// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] {
+// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
// CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5)
// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
-// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
+// CHECK-NEXT: [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5)
// CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr
// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
-// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr
+// CHECK-NEXT: [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr
// CHECK-NEXT: store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16
// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
-// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32
// CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[V4F32_ADDR_ASCAST]], align 16
// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
-// CHECK-NEXT: [[TMP4:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32
-// CHECK-NEXT: call void @llvm.amdgcn.image.store.2darray.v4f32.i32.v8i32(<4 x float> [[TMP0]], i32 100, i32 [[TMP1]], i32 [[TMP2]], i32 [[TMP3]], <8 x i32> [[TMP4]], i32 120, i32 110)
+// CHECK-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32
+// CHECK-NEXT: [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP4]], align 32
+// CHECK-NEXT: call void @llvm.amdgcn.image.store.2darray.v4f32.i32.v8i32(<4 x float> [[TMP0]], i32 100, i32 [[TMP1]], i32 [[TMP2]], i32 [[TMP3]], <8 x i32> [[TEX_RSRC_VAL]], i32 120, i32 110)
// CHECK-NEXT: ret void
//
-void test_builtin_image_store_2darray_1(float4 v4f32, int i32, int8 vec8i32) {
+void test_builtin_image_store_2darray_1(float4 v4f32, int i32, __amdgpu_texture_t tex) {
- __builtin_amdgcn_image_store_2darray_v4f32_i32(v4f32, 100, i32, i32, i32, vec8i32, 120, 110);
+ __builtin_amdgcn_image_store_2darray_v4f32_i32(v4f32, 100, i32, i32, i32, tex, 120, 110);
}
// CHECK-LABEL: define dso_local void @test_builtin_image_store_2darray_2(
-// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] {
+// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
// CHECK-NEXT: [[V4F16_ADDR:%.*]] = alloca <4 x half>, align 8, addrspace(5)
// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
-// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
+// CHECK-NEXT: [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5)
// CHECK-NEXT: [[V4F16_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F16_ADDR]] to ptr
// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
-// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr
+// CHECK-NEXT: [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr
// CHECK-NEXT: store <4 x half> [[V4F16]], ptr [[V4F16_ADDR_ASCAST]], align 8
// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
-// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32
// CHECK-NEXT: [[TMP0:%.*]] = load <4 x half>, ptr [[V4F16_ADDR_ASCAST]], align 8
// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
-// CHECK-NEXT: [[TMP4:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32
-// CHECK-NEXT: call void @llvm.amdgcn.image.store.2darray.v4f16.i32.v8i32(<4 x half> [[TMP0]], i32 100, i32 [[TMP1]], i32 [[TMP2]], i32 [[TMP3]], <8 x i32> [[TMP4]], i32 120, i32 110)
+// CHECK-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32
+// CHECK-NEXT: [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP4]], align 32
+// CHECK-NEXT: call void @llvm.amdgcn.image.store.2darray.v4f16.i32.v8i32(<4 x half> [[TMP0]], i32 100, i32 [[TMP1]], i32 [[TMP2]], i32 [[TMP3]], <8 x i32> [[TEX_RSRC_VAL]], i32 120, i32 110)
// CHECK-NEXT: ret void
//
-void test_builtin_image_store_2darray_2(half4 v4f16, int i32, int8 vec8i32) {
+void test_builtin_image_store_2darray_2(half4 v4f16, int i32, __amdgpu_texture_t tex) {
- __builtin_amdgcn_image_store_2darray_v4f16_i32(v4f16, 100, i32, i32, i32, vec8i32, 120, 110);
+ __builtin_amdgcn_image_store_2darray_v4f16_i32(v4f16, 100, i32, i32, i32, tex, 120, 110);
}
// CHECK-LABEL: define dso_local void @test_builtin_image_store_1d_1(
-// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] {
+// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
// CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5)
// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
-// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
+// CHECK-NEXT: [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5)
// CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr
// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
-// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr
+// CHECK-NEXT: [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr
// CHECK-NEXT: store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16
// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
-// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32
// CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[V4F32_ADDR_ASCAST]], align 16
// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
-// CHECK-NEXT: [[TMP2:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32
-// CHECK-NEXT: call void @llvm.amdgcn.image.store.1d.v4f32.i32.v8i32(<4 x float> [[TMP0]], i32 100, i32 [[TMP1]], <8 x i32> [[TMP2]], i32 120, i32 110)
+// CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32
+// CHECK-NEXT: [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP2]], align 32
+// CHECK-NEXT: call void @llvm.amdgcn.image.store.1d.v4f32.i32.v8i32(<4 x float> [[TMP0]], i32 100, i32 [[TMP1]], <8 x i32> [[TEX_RSRC_VAL]], i32 120, i32 110)
// CHECK-NEXT: ret void
//
-void test_builtin_image_store_1d_1(float4 v4f32, int i32, int8 vec8i32) {
+void test_builtin_image_store_1d_1(float4 v4f32, int i32, __amdgpu_texture_t tex) {
- __builtin_amdgcn_image_store_1d_v4f32_i32(v4f32, 100, i32, vec8i32, 120, 110);
+ __builtin_amdgcn_image_store_1d_v4f32_i32(v4f32, 100, i32, tex, 120, 110);
}
// CHECK-LABEL: define dso_local void @test_builtin_image_store_1d_2(
-// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] {
+// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
// CHECK-NEXT: [[V4F16_ADDR:%.*]] = alloca <4 x half>, align 8, addrspace(5)
// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
-// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
+// CHECK-NEXT: [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5)
// CHECK-NEXT: [[V4F16_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F16_ADDR]] to ptr
// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
-// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr
+// CHECK-NEXT: [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr
// CHECK-NEXT: store <4 x half> [[V4F16]], ptr [[V4F16_ADDR_ASCAST]], align 8
// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
-// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32
// CHECK-NEXT: [[TMP0:%.*]] = load <4 x half>, ptr [[V4F16_ADDR_ASCAST]], align 8
// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
-// CHECK-NEXT: [[TMP2:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32
-// CHECK-NEXT: call void @llvm.amdgcn.image.store.1d.v4f16.i32.v8i32(<4 x half> [[TMP0]], i32 100, i32 [[TMP1]], <8 x i32> [[TMP2]], i32 120, i32 110)
+// CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32
+// CHECK-NEXT: [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP2]], align 32
+// CHECK-NEXT: call void @llvm.amdgcn.image.store.1d.v4f16.i32.v8i32(<4 x half> [[TMP0]], i32 100, i32 [[TMP1]], <8 x i32> [[TEX_RSRC_VAL]], i32 120, i32 110)
// CHECK-NEXT: ret void
//
-void test_builtin_image_store_1d_2(half4 v4f16, int i32, int8 vec8i32) {
+void test_builtin_image_store_1d_2(half4 v4f16, int i32, __amdgpu_texture_t tex) {
- __builtin_amdgcn_image_store_1d_v4f16_i32(v4f16, 100, i32, vec8i32, 120, 110);
+ __builtin_amdgcn_image_store_1d_v4f16_i32(v4f16, 100, i32, tex, 120, 110);
}
// CHECK-LABEL: define dso_local void @test_builtin_image_store_1darray_1(
-// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] {
+// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
// CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5)
// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
-// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
+// CHECK-NEXT: [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5)
// CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr
// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
-// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr
+// CHECK-NEXT: [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr
// CHECK-NEXT: store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16
// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
-// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32
// CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[V4F32_ADDR_ASCAST]], align 16
// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
-// CHECK-NEXT: [[TMP3:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32
-// CHECK-NEXT: call void @llvm.amdgcn.image.store.1darray.v4f32.i32.v8i32(<4 x float> [[TMP0]], i32 100, i32 [[TMP1]], i32 [[TMP2]], <8 x i32> [[TMP3]], i32 120, i32 110)
+// CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32
+// CHECK-NEXT: [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP3]], align 32
+// CHECK-NEXT: call void @llvm.amdgcn.image.store.1darray.v4f32.i32.v8i32(<4 x float> [[TMP0]], i32 100, i32 [[TMP1]], i32 [[TMP2]], <8 x i32> [[TEX_RSRC_VAL]], i32 120, i32 110)
// CHECK-NEXT: ret void
//
-void test_builtin_image_store_1darray_1(float4 v4f32, int i32, int8 vec8i32) {
+void test_builtin_image_store_1darray_1(float4 v4f32, int i32, __amdgpu_texture_t tex) {
- __builtin_amdgcn_image_store_1darray_v4f32_i32(v4f32, 100, i32, i32, vec8i32, 120, 110);
+ __builtin_amdgcn_image_store_1darray_v4f32_i32(v4f32, 100, i32, i32, tex, 120, 110);
}
// CHECK-LABEL: define dso_local void @test_builtin_image_store_1darray_2(
-// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] {
+// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
// CHECK-NEXT: [[V4F16_ADDR:%.*]] = alloca <4 x half>, align 8, addrspace(5)
// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
-// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
+// CHECK-NEXT: [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5)
// CHECK-NEXT: [[V4F16_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F16_ADDR]] to ptr
// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
-// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr
+// CHECK-NEXT: [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr
// CHECK-NEXT: store <4 x half> [[V4F16]], ptr [[V4F16_ADDR_ASCAST]], align 8
// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
-// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32
// CHECK-NEXT: [[TMP0:%.*]] = load <4 x half>, ptr [[V4F16_ADDR_ASCAST]], align 8
// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
-// CHECK-NEXT: [[TMP3:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32
-// CHECK-NEXT: call void @llvm.amdgcn.image.store.1darray.v4f16.i32.v8i32(<4 x half> [[TMP0]], i32 100, i32 [[TMP1]], i32 [[TMP2]], <8 x i32> [[TMP3]], i32 120, i32 110)
+// CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32
+// CHECK-NEXT: [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP3]], align 32
+// CHECK-NEXT: call void @llvm.amdgcn.image.store.1darray.v4f16.i32.v8i32(<4 x half> [[TMP0]], i32 100, i32 [[TMP1]], i32 [[TMP2]], <8 x i32> [[TEX_RSRC_VAL]], i32 120, i32 110)
// CHECK-NEXT: ret void
//
-void test_builtin_image_store_1darray_2(half4 v4f16, int i32, int8 vec8i32) {
+void test_builtin_image_store_1darray_2(half4 v4f16, int i32, __amdgpu_texture_t tex) {
- __builtin_amdgcn_image_store_1darray_v4f16_i32(v4f16, 100, i32, i32, vec8i32, 120, 110);
+ __builtin_amdgcn_image_store_1darray_v4f16_i32(v4f16, 100, i32, i32, tex, 120, 110);
}
// CHECK-LABEL: define dso_local void @test_builtin_image_store_3d_1(
-// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] {
+// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
// CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5)
// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
-// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
+// CHECK-NEXT: [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5)
// CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr
// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
-// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr
+// CHECK-NEXT: [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr
// CHECK-NEXT: store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16
// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
-// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32
// CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[V4F32_ADDR_ASCAST]], align 16
// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
-// CHECK-NEXT: [[TMP4:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32
-// CHECK-NEXT: call void @llvm.amdgcn.image.store.3d.v4f32.i32.v8i32(<4 x float> [[TMP0]], i32 100, i32 [[TMP1]], i32 [[TMP2]], i32 [[TMP3]], <8 x i32> [[TMP4]], i32 120, i32 110)
+// CHECK-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32
+// CHECK-NEXT: [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP4]], align 32
+// CHECK-NEXT: call void @llvm.amdgcn.image.store.3d.v4f32.i32.v8i32(<4 x float> [[TMP0]], i32 100, i32 [[TMP1]], i32 [[TMP2]], i32 [[TMP3]], <8 x i32> [[TEX_RSRC_VAL]], i32 120, i32 110)
// CHECK-NEXT: ret void
//
-void test_builtin_image_store_3d_1(float4 v4f32, int i32, int8 vec8i32) {
+void test_builtin_image_store_3d_1(float4 v4f32, int i32, __amdgpu_texture_t tex) {
- __builtin_amdgcn_image_store_3d_v4f32_i32(v4f32, 100, i32, i32, i32, vec8i32, 120, 110);
+ __builtin_amdgcn_image_store_3d_v4f32_i32(v4f32, 100, i32, i32, i32, tex, 120, 110);
}
// CHECK-LABEL: define dso_local void @test_builtin_image_store_3d_2(
-// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] {
+// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
// CHECK-NEXT: [[V4F16_ADDR:%.*]] = alloca <4 x half>, align 8, addrspace(5)
// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
-// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
+// CHECK-NEXT: [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5)
// CHECK-NEXT: [[V4F16_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F16_ADDR]] to ptr
// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
-// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr
+// CHECK-NEXT: [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr
// CHECK-NEXT: store <4 x half> [[V4F16]], ptr [[V4F16_ADDR_ASCAST]], align 8
// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
-// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32
// CHECK-NEXT: [[TMP0:%.*]] = load <4 x half>, ptr [[V4F16_ADDR_ASCAST]], align 8
// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
-// CHECK-NEXT: [[TMP4:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32
-// CHECK-NEXT: call void @llvm.amdgcn.image.store.3d.v4f16.i32.v8i32(<4 x half> [[TMP0]], i32 100, i32 [[TMP1]], i32 [[TMP2]], i32 [[TMP3]], <8 x i32> [[TMP4]], i32 120, i32 110)
+// CHECK-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32
+// CHECK-NEXT: [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP4]], align 32
+// CHECK-NEXT: call void @llvm.amdgcn.image.store.3d.v4f16.i32.v8i32(<4 x half> [[TMP0]], i32 100, i32 [[TMP1]], i32 [[TMP2]], i32 [[TMP3]], <8 x i32> [[TEX_RSRC_VAL]], i32 120, i32 110)
// CHECK-NEXT: ret void
//
-void test_builtin_image_store_3d_2(half4 v4f16, int i32, int8 vec8i32) {
+void test_builtin_image_store_3d_2(half4 v4f16, int i32, __amdgpu_texture_t tex) {
- __builtin_amdgcn_image_store_3d_v4f16_i32(v4f16, 100, i32, i32, i32, vec8i32, 120, 110);
+ __builtin_amdgcn_image_store_3d_v4f16_i32(v4f16, 100, i32, i32, i32, tex, 120, 110);
}
// CHECK-LABEL: define dso_local void @test_builtin_image_store_cube_1(
-// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] {
+// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
// CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5)
// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
-// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
+// CHECK-NEXT: [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5)
// CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr
// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
-// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr
+// CHECK-NEXT: [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr
// CHECK-NEXT: store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16
// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
-// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32
// CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[V4F32_ADDR_ASCAST]], align 16
// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
-// CHECK-NEXT: [[TMP4:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32
-// CHECK-NEXT: call void @llvm.amdgcn.image.store.cube.v4f32.i32.v8i32(<4 x float> [[TMP0]], i32 100, i32 [[TMP1]], i32 [[TMP2]], i32 [[TMP3]], <8 x i32> [[TMP4]], i32 120, i32 110)
+// CHECK-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32
+// CHECK-NEXT: [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP4]], align 32
+// CHECK-NEXT: call void @llvm.amdgcn.image.store.cube.v4f32.i32.v8i32(<4 x float> [[TMP0]], i32 100, i32 [[TMP1]], i32 [[TMP2]], i32 [[TMP3]], <8 x i32> [[TEX_RSRC_VAL]], i32 120, i32 110)
// CHECK-NEXT: ret void
//
-void test_builtin_image_store_cube_1(float4 v4f32, int i32, int8 vec8i32) {
+void test_builtin_image_store_cube_1(float4 v4f32, int i32, __amdgpu_texture_t tex) {
- __builtin_amdgcn_image_store_cube_v4f32_i32(v4f32, 100, i32, i32, i32, vec8i32, 120, 110);
+ __builtin_amdgcn_image_store_cube_v4f32_i32(v4f32, 100, i32, i32, i32, tex, 120, 110);
}
// CHECK-LABEL: define dso_local void @test_builtin_image_store_cube_2(
-// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] {
+// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
// CHECK-NEXT: [[V4F16_ADDR:%.*]] = alloca <4 x half>, align 8, addrspace(5)
// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
-// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
+// CHECK-NEXT: [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5)
// CHECK-NEXT: [[V4F16_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F16_ADDR]] to ptr
// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
-// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr
+// CHECK-NEXT: [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr
// CHECK-NEXT: store <4 x half> [[V4F16]], ptr [[V4F16_ADDR_ASCAST]], align 8
// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
-// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32
// CHECK-NEXT: [[TMP0:%.*]] = load <4 x half>, ptr [[V4F16_ADDR_ASCAST]], align 8
// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
-// CHECK-NEXT: [[TMP4:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32
-// CHECK-NEXT: call void @llvm.amdgcn.image.store.cube.v4f16.i32.v8i32(<4 x half> [[TMP0]], i32 100, i32 [[TMP1]], i32 [[TMP2]], i32 [[TMP3]], <8 x i32> [[TMP4]], i32 120, i32 110)
+// CHECK-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32
+// CHECK-NEXT: [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP4]], align 32
+// CHECK-NEXT: call void @llvm.amdgcn.image.store.cube.v4f16.i32.v8i32(<4 x half> [[TMP0]], i32 100, i32 [[TMP1]], i32 [[TMP2]], i32 [[TMP3]], <8 x i32> [[TEX_RSRC_VAL]], i32 120, i32 110)
// CHECK-NEXT: ret void
//
-void test_builtin_image_store_cube_2(half4 v4f16, int i32, int8 vec8i32) {
+void test_builtin_image_store_cube_2(half4 v4f16, int i32, __amdgpu_texture_t tex) {
- __builtin_amdgcn_image_store_cube_v4f16_i32(v4f16, 100, i32, i32, i32, vec8i32, 120, 110);
+ __builtin_amdgcn_image_store_cube_v4f16_i32(v4f16, 100, i32, i32, i32, tex, 120, 110);
}
// CHECK-LABEL: define dso_local void @test_builtin_image_store_mip_1d_1(
-// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] {
+// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
// CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5)
// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
-// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
+// CHECK-NEXT: [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5)
// CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr
// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
-// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr
+// CHECK-NEXT: [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr
// CHECK-NEXT: store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16
// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
-// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32
// CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[V4F32_ADDR_ASCAST]], align 16
// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
-// CHECK-NEXT: [[TMP3:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32
-// CHECK-NEXT: call void @llvm.amdgcn.image.store.mip.1d.v4f32.i32.v8i32(<4 x float> [[TMP0]], i32 100, i32 [[TMP1]], i32 [[TMP2]], <8 x i32> [[TMP3]], i32 120, i32 110)
+// CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32
+// CHECK-NEXT: [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP3]], align 32
+// CHECK-NEXT: call void @llvm.amdgcn.image.store.mip.1d.v4f32.i32.v8i32(<4 x float> [[TMP0]], i32 100, i32 [[TMP1]], i32 [[TMP2]], <8 x i32> [[TEX_RSRC_VAL]], i32 120, i32 110)
// CHECK-NEXT: ret void
//
-void test_builtin_image_store_mip_1d_1(float4 v4f32, int i32, int8 vec8i32) {
+void test_builtin_image_store_mip_1d_1(float4 v4f32, int i32, __amdgpu_texture_t tex) {
- __builtin_amdgcn_image_store_mip_1d_v4f32_i32(v4f32, 100, i32, i32, vec8i32, 120, 110);
+ __builtin_amdgcn_image_store_mip_1d_v4f32_i32(v4f32, 100, i32, i32, tex, 120, 110);
}
// CHECK-LABEL: define dso_local void @test_builtin_image_store_mip_1d_2(
-// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] {
+// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
// CHECK-NEXT: [[V4F16_ADDR:%.*]] = alloca <4 x half>, align 8, addrspace(5)
// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
-// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
+// CHECK-NEXT: [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5)
// CHECK-NEXT: [[V4F16_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F16_ADDR]] to ptr
// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
-// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr
+// CHECK-NEXT: [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr
// CHECK-NEXT: store <4 x half> [[V4F16]], ptr [[V4F16_ADDR_ASCAST]], align 8
// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
-// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32
// CHECK-NEXT: [[TMP0:%.*]] = load <4 x half>, ptr [[V4F16_ADDR_ASCAST]], align 8
// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
-// CHECK-NEXT: [[TMP3:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32
-// CHECK-NEXT: call void @llvm.amdgcn.image.store.mip.1d.v4f16.i32.v8i32(<4 x half> [[TMP0]], i32 100, i32 [[TMP1]], i32 [[TMP2]], <8 x i32> [[TMP3]], i32 120, i32 110)
+// CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32
+// CHECK-NEXT: [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP3]], align 32
+// CHECK-NEXT: call void @llvm.amdgcn.image.store.mip.1d.v4f16.i32.v8i32(<4 x half> [[TMP0]], i32 100, i32 [[TMP1]], i32 [[TMP2]], <8 x i32> [[TEX_RSRC_VAL]], i32 120, i32 110)
// CHECK-NEXT: ret void
//
-void test_builtin_image_store_mip_1d_2(half4 v4f16, int i32, int8 vec8i32) {
+void test_builtin_image_store_mip_1d_2(half4 v4f16, int i32, __amdgpu_texture_t tex) {
- __builtin_amdgcn_image_store_mip_1d_v4f16_i32(v4f16, 100, i32, i32, vec8i32, 120, 110);
+ __builtin_amdgcn_image_store_mip_1d_v4f16_i32(v4f16, 100, i32, i32, tex, 120, 110);
}
// CHECK-LABEL: define dso_local void @test_builtin_image_store_mip_1darray_1(
-// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] {
+// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
// CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5)
// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
-// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
+// CHECK-NEXT: [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5)
// CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr
// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
-// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr
+// CHECK-NEXT: [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr
// CHECK-NEXT: store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16
// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
-// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32
// CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[V4F32_ADDR_ASCAST]], align 16
// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
-// CHECK-NEXT: [[TMP4:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32
-// CHECK-NEXT: call void @llvm.amdgcn.image.store.mip.1darray.v4f32.i32.v8i32(<4 x float> [[TMP0]], i32 100, i32 [[TMP1]], i32 [[TMP2]], i32 [[TMP3]], <8 x i32> [[TMP4]], i32 120, i32 110)
+// CHECK-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32
+// CHECK-NEXT: [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP4]], align 32
+// CHECK-NEXT: call void @llvm.amdgcn.image.store.mip.1darray.v4f32.i32.v8i32(<4 x float> [[TMP0]], i32 100, i32 [[TMP1]], i32 [[TMP2]], i32 [[TMP3]], <8 x i32> [[TEX_RSRC_VAL]], i32 120, i32 110)
// CHECK-NEXT: ret void
//
-void test_builtin_image_store_mip_1darray_1(float4 v4f32, int i32, int8 vec8i32) {
+void test_builtin_image_store_mip_1darray_1(float4 v4f32, int i32, __amdgpu_texture_t tex) {
- __builtin_amdgcn_image_store_mip_1darray_v4f32_i32(v4f32, 100, i32, i32, i32, vec8i32, 120, 110);
+ __builtin_amdgcn_image_store_mip_1darray_v4f32_i32(v4f32, 100, i32, i32, i32, tex, 120, 110);
}
// CHECK-LABEL: define dso_local void @test_builtin_image_store_mip_1darray_2(
-// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] {
+// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
// CHECK-NEXT: [[V4F16_ADDR:%.*]] = alloca <4 x half>, align 8, addrspace(5)
// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
-// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
+// CHECK-NEXT: [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5)
// CHECK-NEXT: [[V4F16_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F16_ADDR]] to ptr
// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
-// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr
+// CHECK-NEXT: [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr
// CHECK-NEXT: store <4 x half> [[V4F16]], ptr [[V4F16_ADDR_ASCAST]], align 8
// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
-// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32
// CHECK-NEXT: [[TMP0:%.*]] = load <4 x half>, ptr [[V4F16_ADDR_ASCAST]], align 8
// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
-// CHECK-NEXT: [[TMP4:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32
-// CHECK-NEXT: call void @llvm.amdgcn.image.store.mip.1darray.v4f16.i32.v8i32(<4 x half> [[TMP0]], i32 100, i32 [[TMP1]], i32 [[TMP2]], i32 [[TMP3]], <8 x i32> [[TMP4]], i32 120, i32 110)
+// CHECK-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32
+// CHECK-NEXT: [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP4]], align 32
+// CHECK-NEXT: call void @llvm.amdgcn.image.store.mip.1darray.v4f16.i32.v8i32(<4 x half> [[TMP0]], i32 100, i32 [[TMP1]], i32 [[TMP2]], i32 [[TMP3]], <8 x i32> [[TEX_RSRC_VAL]], i32 120, i32 110)
// CHECK-NEXT: ret void
//
-void test_builtin_image_store_mip_1darray_2(half4 v4f16, int i32, int8 vec8i32) {
+void test_builtin_image_store_mip_1darray_2(half4 v4f16, int i32, __amdgpu_texture_t tex) {
- __builtin_amdgcn_image_store_mip_1darray_v4f16_i32(v4f16, 100, i32, i32, i32, vec8i32, 120, 110);
+ __builtin_amdgcn_image_store_mip_1darray_v4f16_i32(v4f16, 100, i32, i32, i32, tex, 120, 110);
}
// CHECK-LABEL: define dso_local void @test_builtin_image_store_mip_2d(
-// CHECK-SAME: float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] {
+// CHECK-SAME: float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
// CHECK-NEXT: [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5)
// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
-// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
+// CHECK-NEXT: [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5)
// CHECK-NEXT: [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr
// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
-// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr
+// CHECK-NEXT: [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr
// CHECK-NEXT: store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4
// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
-// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32
// CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
-// CHECK-NEXT: [[TMP4:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32
-// CHECK-NEXT: call void @llvm.amdgcn.image.store.mip.2d.f32.i32.v8i32(float [[TMP0]], i32 100, i32 [[TMP1]], i32 [[TMP2]], i32 [[TMP3]], <8 x i32> [[TMP4]], i32 120, i32 110)
+// CHECK-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32
+// CHECK-NEXT: [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP4]], align 32
+// CHECK-NEXT: call void @llvm.amdgcn.image.store.mip.2d.f32.i32.v8i32(float [[TMP0]], i32 100, i32 [[TMP1]], i32 [[TMP2]], i32 [[TMP3]], <8 x i32> [[TEX_RSRC_VAL]], i32 120, i32 110)
// CHECK-NEXT: ret void
//
-void test_builtin_image_store_mip_2d(float f32, int i32, int8 vec8i32) {
+void test_builtin_image_store_mip_2d(float f32, int i32, __amdgpu_texture_t tex) {
- __builtin_amdgcn_image_store_mip_2d_f32_i32(f32, 100, i32, i32, i32, vec8i32, 120, 110);
+ __builtin_amdgcn_image_store_mip_2d_f32_i32(f32, 100, i32, i32, i32, tex, 120, 110);
}
// CHECK-LABEL: define dso_local void @test_builtin_image_store_mip_2d_1(
-// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] {
+// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
// CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5)
// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
-// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
+// CHECK-NEXT: [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5)
// CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr
// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
-// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr
+// CHECK-NEXT: [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr
// CHECK-NEXT: store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16
// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
-// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32
// CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[V4F32_ADDR_ASCAST]], align 16
// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
-// CHECK-NEXT: [[TMP4:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32
-// CHECK-NEXT: call void @llvm.amdgcn.image.store.mip.2d.v4f32.i32.v8i32(<4 x float> [[TMP0]], i32 100, i32 [[TMP1]], i32 [[TMP2]], i32 [[TMP3]], <8 x i32> [[TMP4]], i32 120, i32 110)
+// CHECK-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32
+// CHECK-NEXT: [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP4]], align 32
+// CHECK-NEXT: call void @llvm.amdgcn.image.store.mip.2d.v4f32.i32.v8i32(<4 x float> [[TMP0]], i32 100, i32 [[TMP1]], i32 [[TMP2]], i32 [[TMP3]], <8 x i32> [[TEX_RSRC_VAL]], i32 120, i32 110)
// CHECK-NEXT: ret void
//
-void test_builtin_image_store_mip_2d_1(float4 v4f32, int i32, int8 vec8i32) {
+void test_builtin_image_store_mip_2d_1(float4 v4f32, int i32, __amdgpu_texture_t tex) {
- __builtin_amdgcn_image_store_mip_2d_v4f32_i32(v4f32, 100, i32, i32, i32, vec8i32, 120, 110);
+ __builtin_amdgcn_image_store_mip_2d_v4f32_i32(v4f32, 100, i32, i32, i32, tex, 120, 110);
}
// CHECK-LABEL: define dso_local void @test_builtin_image_store_mip_2d_2(
-// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] {
+// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
// CHECK-NEXT: [[V4F16_ADDR:%.*]] = alloca <4 x half>, align 8, addrspace(5)
// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
-// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
+// CHECK-NEXT: [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5)
// CHECK-NEXT: [[V4F16_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F16_ADDR]] to ptr
// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
-// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr
+// CHECK-NEXT: [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr
// CHECK-NEXT: store <4 x half> [[V4F16]], ptr [[V4F16_ADDR_ASCAST]], align 8
// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
-// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32
// CHECK-NEXT: [[TMP0:%.*]] = load <4 x half>, ptr [[V4F16_ADDR_ASCAST]], align 8
// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
-// CHECK-NEXT: [[TMP4:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32
-// CHECK-NEXT: call void @llvm.amdgcn.image.store.mip.2d.v4f16.i32.v8i32(<4 x half> [[TMP0]], i32 100, i32 [[TMP1]], i32 [[TMP2]], i32 [[TMP3]], <8 x i32> [[TMP4]], i32 120, i32 110)
+// CHECK-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32
+// CHECK-NEXT: [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP4]], align 32
+// CHECK-NEXT: call void @llvm.amdgcn.image.store.mip.2d.v4f16.i32.v8i32(<4 x half> [[TMP0]], i32 100, i32 [[TMP1]], i32 [[TMP2]], i32 [[TMP3]], <8 x i32> [[TEX_RSRC_VAL]], i32 120, i32 110)
// CHECK-NEXT: ret void
//
-void test_builtin_image_store_mip_2d_2(half4 v4f16, int i32, int8 vec8i32) {
+void test_builtin_image_store_mip_2d_2(half4 v4f16, int i32, __amdgpu_texture_t tex) {
- __builtin_amdgcn_image_store_mip_2d_v4f16_i32(v4f16, 100, i32, i32, i32, vec8i32, 120, 110);
+ __builtin_amdgcn_image_store_mip_2d_v4f16_i32(v4f16, 100, i32, i32, i32, tex, 120, 110);
}
// CHECK-LABEL: define dso_local void @test_builtin_image_store_mip_2darray(
-// CHECK-SAME: float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] {
+// CHECK-SAME: float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
// CHECK-NEXT: [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5)
// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
-// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
+// CHECK-NEXT: [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5)
// CHECK-NEXT: [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr
// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
-// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr
+// CHECK-NEXT: [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr
// CHECK-NEXT: store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4
// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
-// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32
// CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
// CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
-// CHECK-NEXT: [[TMP5:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32
-// CHECK-NEXT: call void @llvm.amdgcn.image.store.mip.2darray.f32.i32.v8i32(float [[TMP0]], i32 100, i32 [[TMP1]], i32 [[TMP2]], i32 [[TMP3]], i32 [[TMP4]], <8 x i32> [[TMP5]], i32 120, i32 110)
+// CHECK-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32
+// CHECK-NEXT: [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP5]], align 32
+// CHECK-NEXT: call void @llvm.amdgcn.image.store.mip.2darray.f32.i32.v8i32(float [[TMP0]], i32 100, i32 [[TMP1]], i32 [[TMP2]], i32 [[TMP3]], i32 [[TMP4]], <8 x i32> [[TEX_RSRC_VAL]], i32 120, i32 110)
// CHECK-NEXT: ret void
//
-void test_builtin_image_store_mip_2darray(float f32, int i32, int8 vec8i32) {
+void test_builtin_image_store_mip_2darray(float f32, int i32, __amdgpu_texture_t tex) {
- __builtin_amdgcn_image_store_mip_2darray_f32_i32(f32, 100, i32, i32, i32, i32, vec8i32, 120, 110);
+ __builtin_amdgcn_image_store_mip_2darray_f32_i32(f32, 100, i32, i32, i32, i32, tex, 120, 110);
}
// CHECK-LABEL: define dso_local void @test_builtin_image_store_mip_2darray_1(
-// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] {
+// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
// CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5)
// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
-// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
+// CHECK-NEXT: [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5)
// CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr
// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
-// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr
+// CHECK-NEXT: [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr
// CHECK-NEXT: store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16
// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
-// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32
// CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[V4F32_ADDR_ASCAST]], align 16
// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
// CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
-// CHECK-NEXT: [[TMP5:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32
-// CHECK-NEXT: call void @llvm.amdgcn.image.store.mip.2darray.v4f32.i32.v8i32(<4 x float> [[TMP0]], i32 100, i32 [[TMP1]], i32 [[TMP2]], i32 [[TMP3]], i32 [[TMP4]], <8 x i32> [[TMP5]], i32 120, i32 110)
+// CHECK-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32
+// CHECK-NEXT: [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP5]], align 32
+// CHECK-NEXT: call void @llvm.amdgcn.image.store.mip.2darray.v4f32.i32.v8i32(<4 x float> [[TMP0]], i32 100, i32 [[TMP1]], i32 [[TMP2]], i32 [[TMP3]], i32 [[TMP4]], <8 x i32> [[TEX_RSRC_VAL]], i32 120, i32 110)
// CHECK-NEXT: ret void
//
-void test_builtin_image_store_mip_2darray_1(float4 v4f32, int i32, int8 vec8i32) {
+void test_builtin_image_store_mip_2darray_1(float4 v4f32, int i32, __amdgpu_texture_t tex) {
- __builtin_amdgcn_image_store_mip_2darray_v4f32_i32(v4f32, 100, i32, i32, i32, i32, vec8i32, 120, 110);
+ __builtin_amdgcn_image_store_mip_2darray_v4f32_i32(v4f32, 100, i32, i32, i32, i32, tex, 120, 110);
}
// CHECK-LABEL: define dso_local void @test_builtin_image_store_mip_2darray_2(
-// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] {
+// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
// CHECK-NEXT: [[V4F16_ADDR:%.*]] = alloca <4 x half>, align 8, addrspace(5)
// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
-// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
+// CHECK-NEXT: [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5)
// CHECK-NEXT: [[V4F16_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F16_ADDR]] to ptr
// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
-// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr
+// CHECK-NEXT: [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr
// CHECK-NEXT: store <4 x half> [[V4F16]], ptr [[V4F16_ADDR_ASCAST]], align 8
// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
-// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32
// CHECK-NEXT: [[TMP0:%.*]] = load <4 x half>, ptr [[V4F16_ADDR_ASCAST]], align 8
// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
// CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
-// CHECK-NEXT: [[TMP5:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32
-// CHECK-NEXT: call void @llvm.amdgcn.image.store.mip.2darray.v4f16.i32.v8i32(<4 x half> [[TMP0]], i32 100, i32 [[TMP1]], i32 [[TMP2]], i32 [[TMP3]], i32 [[TMP4]], <8 x i32> [[TMP5]], i32 120, i32 110)
+// CHECK-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32
+// CHECK-NEXT: [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP5]], align 32
+// CHECK-NEXT: call void @llvm.amdgcn.image.store.mip.2darray.v4f16.i32.v8i32(<4 x half> [[TMP0]], i32 100, i32 [[TMP1]], i32 [[TMP2]], i32 [[TMP3]], i32 [[TMP4]], <8 x i32> [[TEX_RSRC_VAL]], i32 120, i32 110)
// CHECK-NEXT: ret void
//
-void test_builtin_image_store_mip_2darray_2(half4 v4f16, int i32, int8 vec8i32) {
+void test_builtin_image_store_mip_2darray_2(half4 v4f16, int i32, __amdgpu_texture_t tex) {
- __builtin_amdgcn_image_store_mip_2darray_v4f16_i32(v4f16, 100, i32, i32, i32, i32, vec8i32, 120, 110);
+ __builtin_amdgcn_image_store_mip_2darray_v4f16_i32(v4f16, 100, i32, i32, i32, i32, tex, 120, 110);
}
// CHECK-LABEL: define dso_local void @test_builtin_image_store_mip_3d_1(
-// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] {
+// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
// CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5)
// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
-// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
+// CHECK-NEXT: [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5)
// CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr
// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
-// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr
+// CHECK-NEXT: [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr
// CHECK-NEXT: store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16
// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
-// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32
// CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[V4F32_ADDR_ASCAST]], align 16
// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
// CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
-// CHECK-NEXT: [[TMP5:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32
-// CHECK-NEXT: call void @llvm.amdgcn.image.store.mip.3d.v4f32.i32.v8i32(<4 x float> [[TMP0]], i32 100, i32 [[TMP1]], i32 [[TMP2]], i32 [[TMP3]], i32 [[TMP4]], <8 x i32> [[TMP5]], i32 120, i32 110)
+// CHECK-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32
+// CHECK-NEXT: [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP5]], align 32
+// CHECK-NEXT: call void @llvm.amdgcn.image.store.mip.3d.v4f32.i32.v8i32(<4 x float> [[TMP0]], i32 100, i32 [[TMP1]], i32 [[TMP2]], i32 [[TMP3]], i32 [[TMP4]], <8 x i32> [[TEX_RSRC_VAL]], i32 120, i32 110)
// CHECK-NEXT: ret void
//
-void test_builtin_image_store_mip_3d_1(float4 v4f32, int i32, int8 vec8i32) {
+void test_builtin_image_store_mip_3d_1(float4 v4f32, int i32, __amdgpu_texture_t tex) {
- __builtin_amdgcn_image_store_mip_3d_v4f32_i32(v4f32, 100, i32, i32, i32, i32, vec8i32, 120, 110);
+ __builtin_amdgcn_image_store_mip_3d_v4f32_i32(v4f32, 100, i32, i32, i32, i32, tex, 120, 110);
}
// CHECK-LABEL: define dso_local void @test_builtin_image_store_mip_3d_2(
-// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] {
+// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
// CHECK-NEXT: [[V4F16_ADDR:%.*]] = alloca <4 x half>, align 8, addrspace(5)
// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
-// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
+// CHECK-NEXT: [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5)
// CHECK-NEXT: [[V4F16_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F16_ADDR]] to ptr
// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
-// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr
+// CHECK-NEXT: [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr
// CHECK-NEXT: store <4 x half> [[V4F16]], ptr [[V4F16_ADDR_ASCAST]], align 8
// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
-// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32
// CHECK-NEXT: [[TMP0:%.*]] = load <4 x half>, ptr [[V4F16_ADDR_ASCAST]], align 8
// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
// CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
-// CHECK-NEXT: [[TMP5:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32
-// CHECK-NEXT: call void @llvm.amdgcn.image.store.mip.3d.v4f16.i32.v8i32(<4 x half> [[TMP0]], i32 100, i32 [[TMP1]], i32 [[TMP2]], i32 [[TMP3]], i32 [[TMP4]], <8 x i32> [[TMP5]], i32 120, i32 110)
+// CHECK-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32
+// CHECK-NEXT: [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP5]], align 32
+// CHECK-NEXT: call void @llvm.amdgcn.image.store.mip.3d.v4f16.i32.v8i32(<4 x half> [[TMP0]], i32 100, i32 [[TMP1]], i32 [[TMP2]], i32 [[TMP3]], i32 [[TMP4]], <8 x i32> [[TEX_RSRC_VAL]], i32 120, i32 110)
// CHECK-NEXT: ret void
//
-void test_builtin_image_store_mip_3d_2(half4 v4f16, int i32, int8 vec8i32) {
+void test_builtin_image_store_mip_3d_2(half4 v4f16, int i32, __amdgpu_texture_t tex) {
- __builtin_amdgcn_image_store_mip_3d_v4f16_i32(v4f16, 100, i32, i32, i32, i32, vec8i32, 120, 110);
+ __builtin_amdgcn_image_store_mip_3d_v4f16_i32(v4f16, 100, i32, i32, i32, i32, tex, 120, 110);
}
// CHECK-LABEL: define dso_local void @test_builtin_image_store_mip_cube_1(
-// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] {
+// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
// CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5)
// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
-// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
+// CHECK-NEXT: [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5)
// CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr
// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
-// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr
+// CHECK-NEXT: [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr
// CHECK-NEXT: store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16
// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
-// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32
// CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[V4F32_ADDR_ASCAST]], align 16
// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
// CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
-// CHECK-NEXT: [[TMP5:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32
-// CHECK-NEXT: call void @llvm.amdgcn.image.store.mip.cube.v4f32.i32.v8i32(<4 x float> [[TMP0]], i32 100, i32 [[TMP1]], i32 [[TMP2]], i32 [[TMP3]], i32 [[TMP4]], <8 x i32> [[TMP5]], i32 120, i32 110)
+// CHECK-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32
+// CHECK-NEXT: [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP5]], align 32
+// CHECK-NEXT: call void @llvm.amdgcn.image.store.mip.cube.v4f32.i32.v8i32(<4 x float> [[TMP0]], i32 100, i32 [[TMP1]], i32 [[TMP2]], i32 [[TMP3]], i32 [[TMP4]], <8 x i32> [[TEX_RSRC_VAL]], i32 120, i32 110)
// CHECK-NEXT: ret void
//
-void test_builtin_image_store_mip_cube_1(float4 v4f32, int i32, int8 vec8i32) {
+void test_builtin_image_store_mip_cube_1(float4 v4f32, int i32, __amdgpu_texture_t tex) {
- __builtin_amdgcn_image_store_mip_cube_v4f32_i32(v4f32, 100, i32, i32, i32, i32, vec8i32, 120, 110);
+ __builtin_amdgcn_image_store_mip_cube_v4f32_i32(v4f32, 100, i32, i32, i32, i32, tex, 120, 110);
}
// CHECK-LABEL: define dso_local void @test_builtin_image_store_mip_cube_2(
-// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] {
+// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
// CHECK-NEXT: [[V4F16_ADDR:%.*]] = alloca <4 x half>, align 8, addrspace(5)
// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
-// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
+// CHECK-NEXT: [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5)
// CHECK-NEXT: [[V4F16_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F16_ADDR]] to ptr
// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
-// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr
+// CHECK-NEXT: [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr
// CHECK-NEXT: store <4 x half> [[V4F16]], ptr [[V4F16_ADDR_ASCAST]], align 8
// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
-// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32
// CHECK-NEXT: [[TMP0:%.*]] = load <4 x half>, ptr [[V4F16_ADDR_ASCAST]], align 8
// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
// CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
-// CHECK-NEXT: [[TMP5:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32
-// CHECK-NEXT: call void @llvm.amdgcn.image.store.mip.cube.v4f16.i32.v8i32(<4 x half> [[TMP0]], i32 100, i32 [[TMP1]], i32 [[TMP2]], i32 [[TMP3]], i32 [[TMP4]], <8 x i32> [[TMP5]], i32 120, i32 110)
+// CHECK-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32
+// CHECK-NEXT: [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP5]], align 32
+// CHECK-NEXT: call void @llvm.amdgcn.image.store.mip.cube.v4f16.i32.v8i32(<4 x half> [[TMP0]], i32 100, i32 [[TMP1]], i32 [[TMP2]], i32 [[TMP3]], i32 [[TMP4]], <8 x i32> [[TEX_RSRC_VAL]], i32 120, i32 110)
// CHECK-NEXT: ret void
//
-void test_builtin_image_store_mip_cube_2(half4 v4f16, int i32, int8 vec8i32) {
+void test_builtin_image_store_mip_cube_2(half4 v4f16, int i32, __amdgpu_texture_t tex) {
- __builtin_amdgcn_image_store_mip_cube_v4f16_i32(v4f16, 100, i32, i32, i32, i32, vec8i32, 120, 110);
+ __builtin_amdgcn_image_store_mip_cube_v4f16_i32(v4f16, 100, i32, i32, i32, i32, tex, 120, 110);
}
diff --git a/clang/test/SemaOpenCL/builtins-image-load-param-gfx1100.cl b/clang/test/SemaOpenCL/builtins-image-load-param-gfx1100.cl
new file mode 100644
index 0000000000000..8f609dcbd34f2
--- /dev/null
+++ b/clang/test/SemaOpenCL/builtins-image-load-param-gfx1100.cl
@@ -0,0 +1,194 @@
+// RUN: %clang_cc1 -triple amdgcn-- -target-cpu gfx1100 -S -verify=expected -o - %s
+// REQUIRES: amdgpu-registered-target
+
+typedef int int4 __attribute__((ext_vector_type(4)));
+typedef float float4 __attribute__((ext_vector_type(4)));
+typedef half half4 __attribute__((ext_vector_type(4)));
+
+float test_builtin_image_load_2d(float f32, int i32, __amdgpu_texture_t tex) {
+
+ return __builtin_amdgcn_image_load_2d_f32_i32(i32, i32, i32, tex, 106, 103); //expected-error{{argument to '__builtin_amdgcn_image_load_2d_f32_i32' must be a constant integer}}
+}
+float4 test_builtin_image_load_2d_1(float4 v4f32, int i32, __amdgpu_texture_t tex) {
+
+ return __builtin_amdgcn_image_load_2d_v4f32_i32(100, i32, i32, tex, i32, 110); //expected-error{{argument to '__builtin_amdgcn_image_load_2d_v4f32_i32' must be a constant integer}}
+}
+half4 test_builtin_image_load_2d_2(half4 v4f16, int i32, __amdgpu_texture_t tex) {
+
+ return __builtin_amdgcn_image_load_2d_v4f16_i32(100, i32, i32, tex, 120, i32); //expected-error{{argument to '__builtin_amdgcn_image_load_2d_v4f16_i32' must be a constant integer}}
+}
+
+
+float test_builtin_image_load_2darray(float f32, int i32, __amdgpu_texture_t tex) {
+
+ return __builtin_amdgcn_image_load_2darray_f32_i32(100, i32, i32, i32, tex, i32, 110); //expected-error{{argument to '__builtin_amdgcn_image_load_2darray_f32_i32' must be a constant integer}}
+}
+float4 test_builtin_image_load_2darray_1(float4 v4f32, int i32, __amdgpu_texture_t tex) {
+
+ return __builtin_amdgcn_image_load_2darray_v4f32_i32(100, i32, i32, i32, tex, i32, 110); //expected-error{{argument to '__builtin_amdgcn_image_load_2darray_v4f32_i32' must be a constant integer}}
+}
+half4 test_builtin_image_load_2darray_2(half4 v4f16, int i32, __amdgpu_texture_t tex) {
+
+ return __builtin_amdgcn_image_load_2darray_v4f16_i32(100, i32, i32, i32, tex, 120, i32); //expected-error{{argument to '__builtin_amdgcn_image_load_2darray_v4f16_i32' must be a constant integer}}
+}
+
+float4 test_builtin_image_load_1d_1(float4 v4f32, int i32, __amdgpu_texture_t tex) {
+
+ return __builtin_amdgcn_image_load_1d_v4f32_i32(i32, i32, tex, 120, i32); //expected-error{{argument to '__builtin_amdgcn_image_load_1d_v4f32_i32' must be a constant integer}}
+}
+half4 test_builtin_image_load_1d_2(half4 v4f16, int i32, __amdgpu_texture_t tex) {
+
+ return __builtin_amdgcn_image_load_1d_v4f16_i32(100, i32, tex, 120, i32); //expected-error{{argument to '__builtin_amdgcn_image_load_1d_v4f16_i32' must be a constant integer}}
+}
+
+float4 test_builtin_image_load_1darray_1(float4 v4f32, int i32, __amdgpu_texture_t tex) {
+
+ return __builtin_amdgcn_image_load_1darray_v4f32_i32(100, i32, i32, tex, i32, 110); //expected-error{{argument to '__builtin_amdgcn_image_load_1darray_v4f32_i32' must be a constant integer}}
+}
+half4 test_builtin_image_load_1darray_2(half4 v4f16, int i32, __amdgpu_texture_t tex) {
+
+ return __builtin_amdgcn_image_load_1darray_v4f16_i32(100, i32, i32, tex, i32, 110); //expected-error{{argument to '__builtin_amdgcn_image_load_1darray_v4f16_i32' must be a constant integer}}
+}
+
+float4 test_builtin_image_load_3d_1(float4 v4f32, int i32, __amdgpu_texture_t tex) {
+
+ return __builtin_amdgcn_image_load_3d_v4f32_i32(100, i32, i32, i32, tex, 120, i32); //expected-error{{argument to '__builtin_amdgcn_image_load_3d_v4f32_i32' must be a constant integer}}
+}
+half4 test_builtin_image_load_3d_2(half4 v4f16, int i32, __amdgpu_texture_t tex) {
+
+ return __builtin_amdgcn_image_load_3d_v4f16_i32(i32, i32, i32, i32, tex, 120, i32); //expected-error{{argument to '__builtin_amdgcn_image_load_3d_v4f16_i32' must be a constant integer}}
+}
+
+float4 test_builtin_image_load_cube_1(float4 v4f32, int i32, __amdgpu_texture_t tex) {
+
+ return __builtin_amdgcn_image_load_cube_v4f32_i32(i32, i32, i32, i32, tex, 120, 110); //expected-error{{argument to '__builtin_amdgcn_image_load_cube_v4f32_i32' must be a constant integer}}
+}
+half4 test_builtin_image_load_cube_2(half4 v4f16, int i32, __amdgpu_texture_t tex) {
+
+ return __builtin_amdgcn_image_load_cube_v4f16_i32(i32, i32, i32, i32, tex, 120, 110); //expected-error{{argument to '__builtin_amdgcn_image_load_cube_v4f16_i32' must be a constant integer}}
+}
+
+float4 test_builtin_image_load_mip_1d_1(float4 v4f32, int i32, __amdgpu_texture_t tex) {
+
+ return __builtin_amdgcn_image_load_mip_1d_v4f32_i32(i32, i32, i32, tex, 120, i32); //expected-error{{argument to '__builtin_amdgcn_image_load_mip_1d_v4f32_i32' must be a constant integer}}
+}
+half4 test_builtin_image_load_mip_1d_2(half4 v4f16, int i32, __amdgpu_texture_t tex) {
+
+ return __builtin_amdgcn_image_load_mip_1d_v4f16_i32(100, i32, i32, tex, 120, i32); //expected-error{{argument to '__builtin_amdgcn_image_load_mip_1d_v4f16_i32' must be a constant integer}}
+}
+
+float4 test_builtin_image_load_mip_1darray_1(float4 v4f32, int i32, __amdgpu_texture_t tex) {
+
+ return __builtin_amdgcn_image_load_mip_1darray_v4f32_i32(i32, i32, i32, i32, tex, i32, 110); //expected-error{{argument to '__builtin_amdgcn_image_load_mip_1darray_v4f32_i32' must be a constant integer}}
+}
+half4 test_builtin_image_load_mip_1darray_2(half4 v4f16, int i32, __amdgpu_texture_t tex) {
+
+ return __builtin_amdgcn_image_load_mip_1darray_v4f16_i32(100, i32, i32, i32, tex, i32, 110); //expected-error{{argument to '__builtin_amdgcn_image_load_mip_1darray_v4f16_i32' must be a constant integer}}
+}
+
+float test_builtin_image_load_mip_2d(float f32, int i32, __amdgpu_texture_t tex) {
+
+ return __builtin_amdgcn_image_load_mip_2d_f32_i32(i32, i32, i32, i32, tex, 120, i32); //expected-error{{argument to '__builtin_amdgcn_image_load_mip_2d_f32_i32' must be a constant integer}}
+}
+float4 test_builtin_image_load_mip_2d_1(float4 v4f32, int i32, __amdgpu_texture_t tex) {
+
+ return __builtin_amdgcn_image_load_mip_2d_v4f32_i32(100, i32, i32, i32, tex, 120, i32); //expected-error{{argument to '__builtin_amdgcn_image_load_mip_2d_v4f32_i32' must be a constant integer}}
+}
+half4 test_builtin_image_load_mip_2d_2(half4 v4f16, int i32, __amdgpu_texture_t tex) {
+
+ return __builtin_amdgcn_image_load_mip_2d_v4f16_i32(i32, i32, i32, i32, tex, 120, 110); //expected-error{{argument to '__builtin_amdgcn_image_load_mip_2d_v4f16_i32' must be a constant integer}}
+}
+
+float test_builtin_image_load_mip_2darray(float f32, int i32, __amdgpu_texture_t tex) {
+
+ return __builtin_amdgcn_image_load_mip_2darray_f32_i32(i32, i32, i32, i32, i32, tex, 120, 110); //expected-error{{argument to '__builtin_amdgcn_image_load_mip_2darray_f32_i32' must be a constant integer}}
+}
+float4 test_builtin_image_load_mip_2darray_1(float4 v4f32, int i32, __amdgpu_texture_t tex) {
+
+ return __builtin_amdgcn_image_load_mip_2darray_v4f32_i32(100, i32, i32, i32, i32, tex, 120, i32); //expected-error{{argument to '__builtin_amdgcn_image_load_mip_2darray_v4f32_i32' must be a constant integer}}
+}
+half4 test_builtin_image_load_mip_2darray_2(half4 v4f16, int i32, __amdgpu_texture_t tex) {
+
+ return __builtin_amdgcn_image_load_mip_2darray_v4f16_i32(100, i32, i32, i32, i32, tex, 120, i32); //expected-error{{argument to '__builtin_amdgcn_image_load_mip_2darray_v4f16_i32' must be a constant integer}}
+}
+
+float4 test_builtin_image_load_mip_3d_1(float4 v4f32, int i32, __amdgpu_texture_t tex) {
+
+ return __builtin_amdgcn_image_load_mip_3d_v4f32_i32(i32, i32, i32, i32, i32, tex, i32, 110); //expected-error{{argument to '__builtin_amdgcn_image_load_mip_3d_v4f32_i32' must be a constant integer}}
+}
+half4 test_builtin_image_load_mip_3d_2(half4 v4f16, int i32, __amdgpu_texture_t tex) {
+
+ return __builtin_amdgcn_image_load_mip_3d_v4f16_i32(i32, i32, i32, i32, i32, tex, i32, 110); //expected-error{{argument to '__builtin_amdgcn_image_load_mip_3d_v4f16_i32' must be a constant integer}}
+}
+
+float4 test_builtin_image_load_mip_cube_1(float4 v4f32, int i32, __amdgpu_texture_t tex) {
+
+ return __builtin_amdgcn_image_load_mip_cube_v4f32_i32(i32, i32, i32, i32, i32, tex, 120, i32); //expected-error{{argument to '__builtin_amdgcn_image_load_mip_cube_v4f32_i32' must be a constant integer}}
+}
+half4 test_builtin_image_load_mip_cube_2(half4 v4f16, int i32, __amdgpu_texture_t tex) {
+
+ return __builtin_amdgcn_image_load_mip_cube_v4f16_i32(100, i32, i32, i32, i32, tex, 120, i32); //expected-error{{argument to '__builtin_amdgcn_image_load_mip_cube_v4f16_i32' must be a constant integer}}
+}
+
+float test_builtin_image_sample_2d(float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) {
+
+ return __builtin_amdgcn_image_sample_2d_f32_f32(i32, f32, f32, tex, vec4i32, 0, 106, 103); //expected-error{{argument to '__builtin_amdgcn_image_sample_2d_f32_f32' must be a constant integer}}
+}
+float4 test_builtin_image_sample_2d_1(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) {
+
+ return __builtin_amdgcn_image_sample_2d_v4f32_f32(100, f32, f32, tex, vec4i32, 0, i32, 110); //expected-error{{argument to '__builtin_amdgcn_image_sample_2d_v4f32_f32' must be a constant integer}}
+}
+half4 test_builtin_image_sample_2d_2(half4 v4f16, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) {
+
+ return __builtin_amdgcn_image_sample_2d_v4f16_f32(100, f32, f32, tex, vec4i32, 0, 120, i32); //expected-error{{argument to '__builtin_amdgcn_image_sample_2d_v4f16_f32' must be a constant integer}}
+}
+
+float test_builtin_image_sample_2darray(float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) {
+
+ return __builtin_amdgcn_image_sample_2darray_f32_f32(100, f32, f32, f32, tex, vec4i32, 0, i32, 110); //expected-error{{argument to '__builtin_amdgcn_image_sample_2darray_f32_f32' must be a constant integer}}
+}
+float4 test_builtin_image_sample_2darray_1(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) {
+
+ return __builtin_amdgcn_image_sample_2darray_v4f32_f32(100, f32, f32, f32, tex, vec4i32, 0, i32, 110); //expected-error{{argument to '__builtin_amdgcn_image_sample_2darray_v4f32_f32' must be a constant integer}}
+}
+half4 test_builtin_image_sample_2darray_2(half4 v4f16, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) {
+
+ return __builtin_amdgcn_image_sample_2darray_v4f16_f32(100, f32, f32, f32, tex, vec4i32, 0, 120, i32); //expected-error{{argument to '__builtin_amdgcn_image_sample_2darray_v4f16_f32' must be a constant integer}}
+}
+
+float4 test_builtin_image_sample_1d_1(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) {
+
+ return __builtin_amdgcn_image_sample_1d_v4f32_f32(i32, f32, tex, vec4i32, 0, 120, i32); //expected-error{{argument to '__builtin_amdgcn_image_sample_1d_v4f32_f32' must be a constant integer}}
+}
+half4 test_builtin_image_sample_1d_2(half4 v4f16, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) {
+
+ return __builtin_amdgcn_image_sample_1d_v4f16_f32(100, f32, tex, vec4i32, 0, 120, i32); //expected-error{{argument to '__builtin_amdgcn_image_sample_1d_v4f16_f32' must be a constant integer}}
+}
+
+float4 test_builtin_image_sample_1darray_1(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) {
+
+ return __builtin_amdgcn_image_sample_1darray_v4f32_f32(100, f32, f32, tex, vec4i32, 0, i32, 110); //expected-error{{argument to '__builtin_amdgcn_image_sample_1darray_v4f32_f32' must be a constant integer}}
+}
+half4 test_builtin_image_sample_1darray_2(half4 v4f16, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) {
+
+ return __builtin_amdgcn_image_sample_1darray_v4f16_f32(100, f32, f32, tex, vec4i32, 0, i32, 110); //expected-error{{argument to '__builtin_amdgcn_image_sample_1darray_v4f16_f32' must be a constant integer}}
+}
+
+float4 test_builtin_image_sample_3d_1(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) {
+
+ return __builtin_amdgcn_image_sample_3d_v4f32_f32(100, f32, f32, f32, tex, vec4i32, 0, 120, i32); //expected-error{{argument to '__builtin_amdgcn_image_sample_3d_v4f32_f32' must be a constant integer}}
+}
+half4 test_builtin_image_sample_3d_2(half4 v4f16, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) {
+
+ return __builtin_amdgcn_image_sample_3d_v4f16_f32(i32, f32, f32, f32, tex, vec4i32, 0, 120, i32); //expected-error{{argument to '__builtin_amdgcn_image_sample_3d_v4f16_f32' must be a constant integer}}
+}
+
+float4 test_builtin_image_sample_cube_1(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) {
+
+ return __builtin_amdgcn_image_sample_cube_v4f32_f32(i32, f32, f32, f32, tex, vec4i32, 0, 120, 110); //expected-error{{argument to '__builtin_amdgcn_image_sample_cube_v4f32_f32' must be a constant integer}}
+}
+half4 test_builtin_image_sample_cube_2(half4 v4f16, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) {
+
+ return __builtin_amdgcn_image_sample_cube_v4f16_f32(i32, f32, f32, f32, tex, vec4i32, 0, 120, 110); //expected-error{{argument to '__builtin_amdgcn_image_sample_cube_v4f16_f32' must be a constant integer}}
+}
+
diff --git a/clang/test/SemaOpenCL/builtins-image-load-param-gfx942.cl b/clang/test/SemaOpenCL/builtins-image-load-param-gfx942.cl
new file mode 100644
index 0000000000000..b8780024f1076
--- /dev/null
+++ b/clang/test/SemaOpenCL/builtins-image-load-param-gfx942.cl
@@ -0,0 +1,219 @@
+// RUN: %clang_cc1 -triple amdgcn-- -target-cpu gfx942 -verify -S -o - %s
+// REQUIRES: amdgpu-registered-target
+
+typedef int int4 __attribute__((ext_vector_type(4)));
+typedef float float4 __attribute__((ext_vector_type(4)));
+typedef half half4 __attribute__((ext_vector_type(4)));
+
+float test_builtin_image_load_2d(float f32, int i32, __amdgpu_texture_t tex) {
+
+ return __builtin_amdgcn_image_load_2d_f32_i32(i32, i32, i32, tex, 106, 103); //expected-error{{'test_builtin_image_load_2d' needs target feature image-insts}}
+}
+float4 test_builtin_image_load_2d_1(float4 v4f32, int i32, __amdgpu_texture_t tex) {
+
+ return __builtin_amdgcn_image_load_2d_v4f32_i32(100, i32, i32, tex, i32, 110); //expected-error{{'test_builtin_image_load_2d_1' needs target feature image-insts}}
+}
+half4 test_builtin_image_load_2d_2(half4 v4f16, int i32, __amdgpu_texture_t tex) {
+
+ return __builtin_amdgcn_image_load_2d_v4f16_i32(100, i32, i32, tex, 120, i32); //expected-error{{'test_builtin_image_load_2d_2' needs target feature image-insts}}
+}
+
+
+float test_builtin_image_load_2darray(float f32, int i32, __amdgpu_texture_t tex) {
+
+ return __builtin_amdgcn_image_load_2darray_f32_i32(100, i32, i32, i32, tex, i32, 110); //expected-error{{'test_builtin_image_load_2darray' needs target feature image-insts}}
+}
+float4 test_builtin_image_load_2darray_1(float4 v4f32, int i32, __amdgpu_texture_t tex) {
+
+ return __builtin_amdgcn_image_load_2darray_v4f32_i32(100, i32, i32, i32, tex, i32, 110); //expected-error{{'test_builtin_image_load_2darray_1' needs target feature image-insts}}
+}
+half4 test_builtin_image_load_2darray_2(half4 v4f16, int i32, __amdgpu_texture_t tex) {
+
+ return __builtin_amdgcn_image_load_2darray_v4f16_i32(100, i32, i32, i32, tex, 120, i32); //expected-error{{'test_builtin_image_load_2darray_2' needs target feature image-insts}}
+}
+
+float4 test_builtin_image_load_1d_1(float4 v4f32, int i32, __amdgpu_texture_t tex) {
+
+ return __builtin_amdgcn_image_load_1d_v4f32_i32(i32, i32, tex, 120, i32); //expected-error{{'test_builtin_image_load_1d_1' needs target feature image-insts}}
+}
+half4 test_builtin_image_load_1d_2(half4 v4f16, int i32, __amdgpu_texture_t tex) {
+
+ return __builtin_amdgcn_image_load_1d_v4f16_i32(100, i32, tex, 120, i32); //expected-error{{'test_builtin_image_load_1d_2' needs target feature image-insts}}
+}
+
+float4 test_builtin_image_load_1darray_1(float4 v4f32, int i32, __amdgpu_texture_t tex) {
+
+ return __builtin_amdgcn_image_load_1darray_v4f32_i32(100, i32, i32, tex, i32, 110); //expected-error{{'test_builtin_image_load_1darray_1' needs target feature image-insts}}
+}
+half4 test_builtin_image_load_1darray_2(half4 v4f16, int i32, __amdgpu_texture_t tex) {
+
+ return __builtin_amdgcn_image_load_1darray_v4f16_i32(100, i32, i32, tex, i32, 110); //expected-error{{'test_builtin_image_load_1darray_2' needs target feature image-insts}}
+}
+
+float4 test_builtin_image_load_3d_1(float4 v4f32, int i32, __amdgpu_texture_t tex) {
+
+ return __builtin_amdgcn_image_load_3d_v4f32_i32(100, i32, i32, i32, tex, 120, i32); //expected-error{{'test_builtin_image_load_3d_1' needs target feature image-insts}}
+}
+half4 test_builtin_image_load_3d_2(half4 v4f16, int i32, __amdgpu_texture_t tex) {
+
+ return __builtin_amdgcn_image_load_3d_v4f16_i32(i32, i32, i32, i32, tex, 120, i32); //expected-error{{'test_builtin_image_load_3d_2' needs target feature image-insts}}
+}
+
+float4 test_builtin_image_load_cube_1(float4 v4f32, int i32, __amdgpu_texture_t tex) {
+
+ return __builtin_amdgcn_image_load_cube_v4f32_i32(i32, i32, i32, i32, tex, 120, 110); //expected-error{{'test_builtin_image_load_cube_1' needs target feature image-insts}}
+}
+half4 test_builtin_image_load_cube_2(half4 v4f16, int i32, __amdgpu_texture_t tex) {
+
+ return __builtin_amdgcn_image_load_cube_v4f16_i32(i32, i32, i32, i32, tex, 120, 110); //expected-error{{'test_builtin_image_load_cube_2' needs target feature image-insts}}
+}
+
+float4 test_builtin_image_load_mip_1d_1(float4 v4f32, int i32, __amdgpu_texture_t tex) {
+
+ return __builtin_amdgcn_image_load_mip_1d_v4f32_i32(i32, i32, i32, tex, 120, i32); //expected-error{{'test_builtin_image_load_mip_1d_1' needs target feature image-insts}}
+}
+half4 test_builtin_image_load_mip_1d_2(half4 v4f16, int i32, __amdgpu_texture_t tex) {
+
+ return __builtin_amdgcn_image_load_mip_1d_v4f16_i32(100, i32, i32, tex, 120, i32); //expected-error{{'test_builtin_image_load_mip_1d_2' needs target feature image-insts}}
+}
+
+float4 test_builtin_image_load_mip_1darray_1(float4 v4f32, int i32, __amdgpu_texture_t tex) {
+
+ return __builtin_amdgcn_image_load_mip_1darray_v4f32_i32(i32, i32, i32, i32, tex, i32, 110); //expected-error{{'test_builtin_image_load_mip_1darray_1' needs target feature image-insts}}
+}
+half4 test_builtin_image_load_mip_1darray_2(half4 v4f16, int i32, __amdgpu_texture_t tex) {
+
+ return __builtin_amdgcn_image_load_mip_1darray_v4f16_i32(100, i32, i32, i32, tex, i32, 110); //expected-error{{'test_builtin_image_load_mip_1darray_2' needs target feature image-insts}}
+}
+
+float test_builtin_image_load_mip_2d(float f32, int i32, __amdgpu_texture_t tex) {
+
+ return __builtin_amdgcn_image_load_mip_2d_f32_i32(i32, i32, i32, i32, tex, 120, i32); //expected-error{{'test_builtin_image_load_mip_2d' needs target feature image-insts}}
+}
+float4 test_builtin_image_load_mip_2d_1(float4 v4f32, int i32, __amdgpu_texture_t tex) {
+
+ return __builtin_amdgcn_image_load_mip_2d_v4f32_i32(100, i32, i32, i32, tex, 120, i32); //expected-error{{'test_builtin_image_load_mip_2d_1' needs target feature image-insts}}
+}
+half4 test_builtin_image_load_mip_2d_2(half4 v4f16, int i32, __amdgpu_texture_t tex) {
+
+ return __builtin_amdgcn_image_load_mip_2d_v4f16_i32(i32, i32, i32, i32, tex, 120, 110); //expected-error{{'test_builtin_image_load_mip_2d_2' needs target feature image-insts}}
+}
+
+float test_builtin_image_load_mip_2darray(float f32, int i32, __amdgpu_texture_t tex) {
+
+ return __builtin_amdgcn_image_load_mip_2darray_f32_i32(i32, i32, i32, i32, i32, tex, 120, 110); //expected-error{{'test_builtin_image_load_mip_2darray' needs target feature image-insts}}
+}
+float4 test_builtin_image_load_mip_2darray_1(float4 v4f32, int i32, __amdgpu_texture_t tex) {
+
+ return __builtin_amdgcn_image_load_mip_2darray_v4f32_i32(100, i32, i32, i32, i32, tex, 120, i32); //expected-error{{'test_builtin_image_load_mip_2darray_1' needs target feature image-insts}}
+}
+half4 test_builtin_image_load_mip_2darray_2(half4 v4f16, int i32, __amdgpu_texture_t tex) {
+
+ return __builtin_amdgcn_image_load_mip_2darray_v4f16_i32(100, i32, i32, i32, i32, tex, 120, i32); //expected-error{{'test_builtin_image_load_mip_2darray_2' needs target feature image-insts}}
+}
+
+float4 test_builtin_image_load_mip_3d_1(float4 v4f32, int i32, __amdgpu_texture_t tex) {
+
+ return __builtin_amdgcn_image_load_mip_3d_v4f32_i32(i32, i32, i32, i32, i32, tex, i32, 110); //expected-error{{'test_builtin_image_load_mip_3d_1' needs target feature image-insts}}
+}
+half4 test_builtin_image_load_mip_3d_2(half4 v4f16, int i32, __amdgpu_texture_t tex) {
+
+ return __builtin_amdgcn_image_load_mip_3d_v4f16_i32(i32, i32, i32, i32, i32, tex, i32, 110); //expected-error{{'test_builtin_image_load_mip_3d_2' needs target feature image-insts}}
+}
+
+float4 test_builtin_image_load_mip_cube_1(float4 v4f32, int i32, __amdgpu_texture_t tex) {
+
+ return __builtin_amdgcn_image_load_mip_cube_v4f32_i32(i32, i32, i32, i32, i32, tex, 120, i32); //expected-error{{'test_builtin_image_load_mip_cube_1' needs target feature image-insts}}
+}
+half4 test_builtin_image_load_mip_cube_2(half4 v4f16, int i32, __amdgpu_texture_t tex) {
+
+ return __builtin_amdgcn_image_load_mip_cube_v4f16_i32(100, i32, i32, i32, i32, tex, 120, i32); //expected-error{{'test_builtin_image_load_mip_cube_2' needs target feature image-insts}}
+}
+
+float test_builtin_image_load_2d_gfx(float f32, int i32, __amdgpu_texture_t tex) {
+
+ return __builtin_amdgcn_image_load_2d_f32_i32(12, i32, i32, tex, 106, 103); //expected-error{{'test_builtin_image_load_2d_gfx' needs target feature image-insts}}
+}
+float4 test_builtin_image_load_2d_gfx_1(float4 v4f32, int i32, __amdgpu_texture_t tex) {
+
+ return __builtin_amdgcn_image_load_2d_v4f32_i32(100, i32, i32, tex, 120, 110); //expected-error{{'test_builtin_image_load_2d_gfx_1' needs target feature image-insts}}
+}
+half4 test_builtin_image_load_2d_gfx_2(half4 v4f16, int i32, __amdgpu_texture_t tex) {
+
+ return __builtin_amdgcn_image_load_2d_v4f16_i32(100, i32, i32, tex, 120, 110); //expected-error{{'test_builtin_image_load_2d_gfx_2' needs target feature image-insts}}
+}
+
+float test_builtin_image_sample_2d(float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) {
+
+ return __builtin_amdgcn_image_sample_2d_f32_f32(i32, f32, f32, tex, vec4i32, 0, 106, 103); //expected-error{{'test_builtin_image_sample_2d' needs target feature image-insts}}
+}
+float4 test_builtin_image_sample_2d_1(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) {
+
+ return __builtin_amdgcn_image_sample_2d_v4f32_f32(100, f32, f32, tex, vec4i32, 0, i32, 110); //expected-error{{'test_builtin_image_sample_2d_1' needs target feature image-insts}}
+}
+half4 test_builtin_image_sample_2d_2(half4 v4f16, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) {
+
+ return __builtin_amdgcn_image_sample_2d_v4f16_f32(100, f32, f32, tex, vec4i32, 0, 120, i32); //expected-error{{'test_builtin_image_sample_2d_2' needs target feature image-insts}}
+}
+
+float test_builtin_image_sample_2darray(float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) {
+
+ return __builtin_amdgcn_image_sample_2darray_f32_f32(100, f32, f32, f32, tex, vec4i32, 0, i32, 110); //expected-error{{'test_builtin_image_sample_2darray' needs target feature image-insts}}
+}
+float4 test_builtin_image_sample_2darray_1(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) {
+
+ return __builtin_amdgcn_image_sample_2darray_v4f32_f32(100, f32, f32, f32, tex, vec4i32, 0, i32, 110); //expected-error{{'test_builtin_image_sample_2darray_1' needs target feature image-insts}}
+}
+half4 test_builtin_image_sample_2darray_2(half4 v4f16, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) {
+
+ return __builtin_amdgcn_image_sample_2darray_v4f16_f32(100, f32, f32, f32, tex, vec4i32, 0, 120, i32); //expected-error{{'test_builtin_image_sample_2darray_2' needs target feature image-insts}}
+}
+
+float4 test_builtin_image_sample_1d_1(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) {
+
+ return __builtin_amdgcn_image_sample_1d_v4f32_f32(i32, f32, tex, vec4i32, 0, 120, i32); //expected-error{{'test_builtin_image_sample_1d_1' needs target feature image-insts}}
+}
+half4 test_builtin_image_sample_1d_2(half4 v4f16, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) {
+
+ return __builtin_amdgcn_image_sample_1d_v4f16_f32(100, f32, tex, vec4i32, 0, 120, i32); //expected-error{{'test_builtin_image_sample_1d_2' needs target feature image-insts}}
+}
+
+float4 test_builtin_image_sample_1darray_1(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) {
+
+ return __builtin_amdgcn_image_sample_1darray_v4f32_f32(100, f32, f32, tex, vec4i32, 0, i32, 110); //expected-error{{'test_builtin_image_sample_1darray_1' needs target feature image-insts}}
+}
+half4 test_builtin_image_sample_1darray_2(half4 v4f16, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) {
+
+ return __builtin_amdgcn_image_sample_1darray_v4f16_f32(100, f32, f32, tex, vec4i32, 0, i32, 110); //expected-error{{'test_builtin_image_sample_1darray_2' needs target feature image-insts}}
+}
+
+float4 test_builtin_image_sample_3d_1(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) {
+
+ return __builtin_amdgcn_image_sample_3d_v4f32_f32(100, f32, f32, f32, tex, vec4i32, 0, 120, i32); //expected-error{{'test_builtin_image_sample_3d_1' needs target feature image-insts}}
+}
+half4 test_builtin_image_sample_3d_2(half4 v4f16, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) {
+
+ return __builtin_amdgcn_image_sample_3d_v4f16_f32(i32, f32, f32, f32, tex, vec4i32, 0, 120, i32); //expected-error{{'test_builtin_image_sample_3d_2' needs target feature image-insts}}
+}
+
+float4 test_builtin_image_sample_cube_1(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) {
+
+ return __builtin_amdgcn_image_sample_cube_v4f32_f32(i32, f32, f32, f32, tex, vec4i32, 0, 120, 110); //expected-error{{'test_builtin_image_sample_cube_1' needs target feature image-insts}}
+}
+half4 test_builtin_image_sample_cube_2(half4 v4f16, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) {
+
+ return __builtin_amdgcn_image_sample_cube_v4f16_f32(i32, f32, f32, f32, tex, vec4i32, 0, 120, 110); //expected-error{{'test_builtin_image_sample_cube_2' needs target feature image-insts}}
+}
+
+float test_builtin_image_sample_2d_gfx(float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) {
+
+ return __builtin_amdgcn_image_sample_2d_f32_f32(100, f32, f32, tex, vec4i32, 0, 120, 110); //expected-error{{'test_builtin_image_sample_2d_gfx' needs target feature image-insts}}
+}
+float4 test_builtin_image_sample_2d_gfx_1(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) {
+
+ return __builtin_amdgcn_image_sample_2d_v4f32_f32(100, f32, f32, tex, vec4i32, 0, 120, 110); //expected-error{{'test_builtin_image_sample_2d_gfx_1' needs target feature image-insts}}
+}
+half4 test_builtin_image_sample_2d_gfx_2(half4 v4f16, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) {
+
+ return __builtin_amdgcn_image_sample_2d_v4f16_f32(100, f32, f32, tex, vec4i32, 0, 120, 110); //expected-error{{'test_builtin_image_sample_2d_gfx_2' needs target feature image-insts}}
+}
diff --git a/clang/test/SemaOpenCL/builtins-image-load-param.cl b/clang/test/SemaOpenCL/builtins-image-load-param.cl
deleted file mode 100644
index 0da4831e847f7..0000000000000
--- a/clang/test/SemaOpenCL/builtins-image-load-param.cl
+++ /dev/null
@@ -1,222 +0,0 @@
-// RUN: %clang_cc1 -triple amdgcn-- -target-cpu gfx1100 -S -verify=expected -o - %s
-// RUN: %clang_cc1 -triple amdgcn-- -target-cpu tahiti -S -verify=expected -o - %s
-// RUN: %clang_cc1 -triple amdgcn-- -target-cpu gfx942 -S -verify=GFX94 -o - %s
-// REQUIRES: amdgpu-registered-target
-
-typedef int int8 __attribute__((ext_vector_type(8)));
-typedef int int4 __attribute__((ext_vector_type(4)));
-typedef float float4 __attribute__((ext_vector_type(4)));
-typedef half half4 __attribute__((ext_vector_type(4)));
-
-float test_builtin_image_load_2d(float f32, int i32, int8 vec8i32) {
-
- return __builtin_amdgcn_image_load_2d_f32_i32(i32, i32, i32, vec8i32, 106, 103); //expected-error{{argument to '__builtin_amdgcn_image_load_2d_f32_i32' must be a constant integer}}
-}
-float4 test_builtin_image_load_2d_1(float4 v4f32, int i32, int8 vec8i32) {
-
- return __builtin_amdgcn_image_load_2d_v4f32_i32(100, i32, i32, vec8i32, i32, 110); //expected-error{{argument to '__builtin_amdgcn_image_load_2d_v4f32_i32' must be a constant integer}}
-}
-half4 test_builtin_image_load_2d_2(half4 v4f16, int i32, int8 vec8i32) {
-
- return __builtin_amdgcn_image_load_2d_v4f16_i32(100, i32, i32, vec8i32, 120, i32); //expected-error{{argument to '__builtin_amdgcn_image_load_2d_v4f16_i32' must be a constant integer}}
-}
-
-
-float test_builtin_image_load_2darray(float f32, int i32, int8 vec8i32) {
-
- return __builtin_amdgcn_image_load_2darray_f32_i32(100, i32, i32, i32, vec8i32, i32, 110); //expected-error{{argument to '__builtin_amdgcn_image_load_2darray_f32_i32' must be a constant integer}}
-}
-float4 test_builtin_image_load_2darray_1(float4 v4f32, int i32, int8 vec8i32) {
-
- return __builtin_amdgcn_image_load_2darray_v4f32_i32(100, i32, i32, i32, vec8i32, i32, 110); //expected-error{{argument to '__builtin_amdgcn_image_load_2darray_v4f32_i32' must be a constant integer}}
-}
-half4 test_builtin_image_load_2darray_2(half4 v4f16, int i32, int8 vec8i32) {
-
- return __builtin_amdgcn_image_load_2darray_v4f16_i32(100, i32, i32, i32, vec8i32, 120, i32); //expected-error{{argument to '__builtin_amdgcn_image_load_2darray_v4f16_i32' must be a constant integer}}
-}
-
-float4 test_builtin_image_load_1d_1(float4 v4f32, int i32, int8 vec8i32) {
-
- return __builtin_amdgcn_image_load_1d_v4f32_i32(i32, i32, vec8i32, 120, i32); //expected-error{{argument to '__builtin_amdgcn_image_load_1d_v4f32_i32' must be a constant integer}}
-}
-half4 test_builtin_image_load_1d_2(half4 v4f16, int i32, int8 vec8i32) {
-
- return __builtin_amdgcn_image_load_1d_v4f16_i32(100, i32, vec8i32, 120, i32); //expected-error{{argument to '__builtin_amdgcn_image_load_1d_v4f16_i32' must be a constant integer}}
-}
-
-float4 test_builtin_image_load_1darray_1(float4 v4f32, int i32, int8 vec8i32) {
-
- return __builtin_amdgcn_image_load_1darray_v4f32_i32(100, i32, i32, vec8i32, i32, 110); //expected-error{{argument to '__builtin_amdgcn_image_load_1darray_v4f32_i32' must be a constant integer}}
-}
-half4 test_builtin_image_load_1darray_2(half4 v4f16, int i32, int8 vec8i32) {
-
- return __builtin_amdgcn_image_load_1darray_v4f16_i32(100, i32, i32, vec8i32, i32, 110); //expected-error{{argument to '__builtin_amdgcn_image_load_1darray_v4f16_i32' must be a constant integer}}
-}
-
-float4 test_builtin_image_load_3d_1(float4 v4f32, int i32, int8 vec8i32) {
-
- return __builtin_amdgcn_image_load_3d_v4f32_i32(100, i32, i32, i32, vec8i32, 120, i32); //expected-error{{argument to '__builtin_amdgcn_image_load_3d_v4f32_i32' must be a constant integer}}
-}
-half4 test_builtin_image_load_3d_2(half4 v4f16, int i32, int8 vec8i32) {
-
- return __builtin_amdgcn_image_load_3d_v4f16_i32(i32, i32, i32, i32, vec8i32, 120, i32); //expected-error{{argument to '__builtin_amdgcn_image_load_3d_v4f16_i32' must be a constant integer}}
-}
-
-float4 test_builtin_image_load_cube_1(float4 v4f32, int i32, int8 vec8i32) {
-
- return __builtin_amdgcn_image_load_cube_v4f32_i32(i32, i32, i32, i32, vec8i32, 120, 110); //expected-error{{argument to '__builtin_amdgcn_image_load_cube_v4f32_i32' must be a constant integer}}
-}
-half4 test_builtin_image_load_cube_2(half4 v4f16, int i32, int8 vec8i32) {
-
- return __builtin_amdgcn_image_load_cube_v4f16_i32(i32, i32, i32, i32, vec8i32, 120, 110); //expected-error{{argument to '__builtin_amdgcn_image_load_cube_v4f16_i32' must be a constant integer}}
-}
-
-float4 test_builtin_image_load_mip_1d_1(float4 v4f32, int i32, int8 vec8i32) {
-
- return __builtin_amdgcn_image_load_mip_1d_v4f32_i32(i32, i32, i32, vec8i32, 120, i32); //expected-error{{argument to '__builtin_amdgcn_image_load_mip_1d_v4f32_i32' must be a constant integer}}
-}
-half4 test_builtin_image_load_mip_1d_2(half4 v4f16, int i32, int8 vec8i32) {
-
- return __builtin_amdgcn_image_load_mip_1d_v4f16_i32(100, i32, i32, vec8i32, 120, i32); //expected-error{{argument to '__builtin_amdgcn_image_load_mip_1d_v4f16_i32' must be a constant integer}}
-}
-
-float4 test_builtin_image_load_mip_1darray_1(float4 v4f32, int i32, int8 vec8i32) {
-
- return __builtin_amdgcn_image_load_mip_1darray_v4f32_i32(i32, i32, i32, i32, vec8i32, i32, 110); //expected-error{{argument to '__builtin_amdgcn_image_load_mip_1darray_v4f32_i32' must be a constant integer}}
-}
-half4 test_builtin_image_load_mip_1darray_2(half4 v4f16, int i32, int8 vec8i32) {
-
- return __builtin_amdgcn_image_load_mip_1darray_v4f16_i32(100, i32, i32, i32, vec8i32, i32, 110); //expected-error{{argument to '__builtin_amdgcn_image_load_mip_1darray_v4f16_i32' must be a constant integer}}
-}
-
-float test_builtin_image_load_mip_2d(float f32, int i32, int8 vec8i32) {
-
- return __builtin_amdgcn_image_load_mip_2d_f32_i32(i32, i32, i32, i32, vec8i32, 120, i32); //expected-error{{argument to '__builtin_amdgcn_image_load_mip_2d_f32_i32' must be a constant integer}}
-}
-float4 test_builtin_image_load_mip_2d_1(float4 v4f32, int i32, int8 vec8i32) {
-
- return __builtin_amdgcn_image_load_mip_2d_v4f32_i32(100, i32, i32, i32, vec8i32, 120, i32); //expected-error{{argument to '__builtin_amdgcn_image_load_mip_2d_v4f32_i32' must be a constant integer}}
-}
-half4 test_builtin_image_load_mip_2d_2(half4 v4f16, int i32, int8 vec8i32) {
-
- return __builtin_amdgcn_image_load_mip_2d_v4f16_i32(i32, i32, i32, i32, vec8i32, 120, 110); //expected-error{{argument to '__builtin_amdgcn_image_load_mip_2d_v4f16_i32' must be a constant integer}}
-}
-
-float test_builtin_image_load_mip_2darray(float f32, int i32, int8 vec8i32) {
-
- return __builtin_amdgcn_image_load_mip_2darray_f32_i32(i32, i32, i32, i32, i32, vec8i32, 120, 110); //expected-error{{argument to '__builtin_amdgcn_image_load_mip_2darray_f32_i32' must be a constant integer}}
-}
-float4 test_builtin_image_load_mip_2darray_1(float4 v4f32, int i32, int8 vec8i32) {
-
- return __builtin_amdgcn_image_load_mip_2darray_v4f32_i32(100, i32, i32, i32, i32, vec8i32, 120, i32); //expected-error{{argument to '__builtin_amdgcn_image_load_mip_2darray_v4f32_i32' must be a constant integer}}
-}
-half4 test_builtin_image_load_mip_2darray_2(half4 v4f16, int i32, int8 vec8i32) {
-
- return __builtin_amdgcn_image_load_mip_2darray_v4f16_i32(100, i32, i32, i32, i32, vec8i32, 120, i32); //expected-error{{argument to '__builtin_amdgcn_image_load_mip_2darray_v4f16_i32' must be a constant integer}}
-}
-
-float4 test_builtin_image_load_mip_3d_1(float4 v4f32, int i32, int8 vec8i32) {
-
- return __builtin_amdgcn_image_load_mip_3d_v4f32_i32(i32, i32, i32, i32, i32, vec8i32, i32, 110); //expected-error{{argument to '__builtin_amdgcn_image_load_mip_3d_v4f32_i32' must be a constant integer}}
-}
-half4 test_builtin_image_load_mip_3d_2(half4 v4f16, int i32, int8 vec8i32) {
-
- return __builtin_amdgcn_image_load_mip_3d_v4f16_i32(i32, i32, i32, i32, i32, vec8i32, i32, 110); //expected-error{{argument to '__builtin_amdgcn_image_load_mip_3d_v4f16_i32' must be a constant integer}}
-}
-
-float4 test_builtin_image_load_mip_cube_1(float4 v4f32, int i32, int8 vec8i32) {
-
- return __builtin_amdgcn_image_load_mip_cube_v4f32_i32(i32, i32, i32, i32, i32, vec8i32, 120, i32); //expected-error{{argument to '__builtin_amdgcn_image_load_mip_cube_v4f32_i32' must be a constant integer}}
-}
-half4 test_builtin_image_load_mip_cube_2(half4 v4f16, int i32, int8 vec8i32) {
-
- return __builtin_amdgcn_image_load_mip_cube_v4f16_i32(100, i32, i32, i32, i32, vec8i32, 120, i32); //expected-error{{argument to '__builtin_amdgcn_image_load_mip_cube_v4f16_i32' must be a constant integer}}
-}
-
-float test_builtin_image_load_2d_gfx(float f32, int i32, int8 vec8i32) {
-
- return __builtin_amdgcn_image_load_2d_f32_i32(12, i32, i32, vec8i32, 106, 103); //GFX94-error{{'__builtin_amdgcn_image_load_2d_f32_i32' needs target feature image-insts}}
-}
-float4 test_builtin_image_load_2d_gfx_1(float4 v4f32, int i32, int8 vec8i32) {
-
- return __builtin_amdgcn_image_load_2d_v4f32_i32(100, i32, i32, vec8i32, 120, 110); //GFX94-error{{'__builtin_amdgcn_image_load_2d_v4f32_i32' needs target feature image-insts}}
-}
-half4 test_builtin_image_load_2d_gfx_2(half4 v4f16, int i32, int8 vec8i32) {
-
- return __builtin_amdgcn_image_load_2d_v4f16_i32(100, i32, i32, vec8i32, 120, 110); //GFX94-error{{'__builtin_amdgcn_image_load_2d_v4f16_i32' needs target feature image-insts}}
-}
-
-float test_builtin_image_sample_2d(float f32, int i32, int8 vec8i32, int4 vec4i32) {
-
- return __builtin_amdgcn_image_sample_2d_f32_f32(i32, f32, f32, vec8i32, vec4i32, 0, 106, 103); //expected-error{{argument to '__builtin_amdgcn_image_sample_2d_f32_f32' must be a constant integer}}
-}
-float4 test_builtin_image_sample_2d_1(float4 v4f32, float f32, int i32, int8 vec8i32, int4 vec4i32) {
-
- return __builtin_amdgcn_image_sample_2d_v4f32_f32(100, f32, f32, vec8i32, vec4i32, 0, i32, 110); //expected-error{{argument to '__builtin_amdgcn_image_sample_2d_v4f32_f32' must be a constant integer}}
-}
-half4 test_builtin_image_sample_2d_2(half4 v4f16, float f32, int i32, int8 vec8i32, int4 vec4i32) {
-
- return __builtin_amdgcn_image_sample_2d_v4f16_f32(100, f32, f32, vec8i32, vec4i32, 0, 120, i32); //expected-error{{argument to '__builtin_amdgcn_image_sample_2d_v4f16_f32' must be a constant integer}}
-}
-
-float test_builtin_image_sample_2darray(float f32, int i32, int8 vec8i32, int4 vec4i32) {
-
- return __builtin_amdgcn_image_sample_2darray_f32_f32(100, f32, f32, f32, vec8i32, vec4i32, 0, i32, 110); //expected-error{{argument to '__builtin_amdgcn_image_sample_2darray_f32_f32' must be a constant integer}}
-}
-float4 test_builtin_image_sample_2darray_1(float4 v4f32, float f32, int i32, int8 vec8i32, int4 vec4i32) {
-
- return __builtin_amdgcn_image_sample_2darray_v4f32_f32(100, f32, f32, f32, vec8i32, vec4i32, 0, i32, 110); //expected-error{{argument to '__builtin_amdgcn_image_sample_2darray_v4f32_f32' must be a constant integer}}
-}
-half4 test_builtin_image_sample_2darray_2(half4 v4f16, float f32, int i32, int8 vec8i32, int4 vec4i32) {
-
- return __builtin_amdgcn_image_sample_2darray_v4f16_f32(100, f32, f32, f32, vec8i32, vec4i32, 0, 120, i32); //expected-error{{argument to '__builtin_amdgcn_image_sample_2darray_v4f16_f32' must be a constant integer}}
-}
-
-float4 test_builtin_image_sample_1d_1(float4 v4f32, float f32, int i32, int8 vec8i32, int4 vec4i32) {
-
- return __builtin_amdgcn_image_sample_1d_v4f32_f32(i32, f32, vec8i32, vec4i32, 0, 120, i32); //expected-error{{argument to '__builtin_amdgcn_image_sample_1d_v4f32_f32' must be a constant integer}}
-}
-half4 test_builtin_image_sample_1d_2(half4 v4f16, float f32, int i32, int8 vec8i32, int4 vec4i32) {
-
- return __builtin_amdgcn_image_sample_1d_v4f16_f32(100, f32, vec8i32, vec4i32, 0, 120, i32); //expected-error{{argument to '__builtin_amdgcn_image_sample_1d_v4f16_f32' must be a constant integer}}
-}
-
-float4 test_builtin_image_sample_1darray_1(float4 v4f32, float f32, int i32, int8 vec8i32, int4 vec4i32) {
-
- return __builtin_amdgcn_image_sample_1darray_v4f32_f32(100, f32, f32, vec8i32, vec4i32, 0, i32, 110); //expected-error{{argument to '__builtin_amdgcn_image_sample_1darray_v4f32_f32' must be a constant integer}}
-}
-half4 test_builtin_image_sample_1darray_2(half4 v4f16, float f32, int i32, int8 vec8i32, int4 vec4i32) {
-
- return __builtin_amdgcn_image_sample_1darray_v4f16_f32(100, f32, f32, vec8i32, vec4i32, 0, i32, 110); //expected-error{{argument to '__builtin_amdgcn_image_sample_1darray_v4f16_f32' must be a constant integer}}
-}
-
-float4 test_builtin_image_sample_3d_1(float4 v4f32, float f32, int i32, int8 vec8i32, int4 vec4i32) {
-
- return __builtin_amdgcn_image_sample_3d_v4f32_f32(100, f32, f32, f32, vec8i32, vec4i32, 0, 120, i32); //expected-error{{argument to '__builtin_amdgcn_image_sample_3d_v4f32_f32' must be a constant integer}}
-}
-half4 test_builtin_image_sample_3d_2(half4 v4f16, float f32, int i32, int8 vec8i32, int4 vec4i32) {
-
- return __builtin_amdgcn_image_sample_3d_v4f16_f32(i32, f32, f32, f32, vec8i32, vec4i32, 0, 120, i32); //expected-error{{argument to '__builtin_amdgcn_image_sample_3d_v4f16_f32' must be a constant integer}}
-}
-
-float4 test_builtin_image_sample_cube_1(float4 v4f32, float f32, int i32, int8 vec8i32, int4 vec4i32) {
-
- return __builtin_amdgcn_image_sample_cube_v4f32_f32(i32, f32, f32, f32, vec8i32, vec4i32, 0, 120, 110); //expected-error{{argument to '__builtin_amdgcn_image_sample_cube_v4f32_f32' must be a constant integer}}
-}
-half4 test_builtin_image_sample_cube_2(half4 v4f16, float f32, int i32, int8 vec8i32, int4 vec4i32) {
-
- return __builtin_amdgcn_image_sample_cube_v4f16_f32(i32, f32, f32, f32, vec8i32, vec4i32, 0, 120, 110); //expected-error{{argument to '__builtin_amdgcn_image_sample_cube_v4f16_f32' must be a constant integer}}
-}
-
-float test_builtin_image_sample_2d_gfx(float f32, int i32, int8 vec8i32, int4 vec4i32) {
-
- return __builtin_amdgcn_image_sample_2d_f32_f32(100, f32, f32, vec8i32, vec4i32, 0, 120, 110); //GFX94-error{{'__builtin_amdgcn_image_sample_2d_f32_f32' needs target feature image-insts}}
-}
-float4 test_builtin_image_sample_2d_gfx_1(float4 v4f32, float f32, int i32, int8 vec8i32, int4 vec4i32) {
-
- return __builtin_amdgcn_image_sample_2d_v4f32_f32(100, f32, f32, vec8i32, vec4i32, 0, 120, 110); //GFX94-error{{'__builtin_amdgcn_image_sample_2d_v4f32_f32' needs target feature image-insts}}
-}
-half4 test_builtin_image_sample_2d_gfx_2(half4 v4f16, float f32, int i32, int8 vec8i32, int4 vec4i32) {
-
- return __builtin_amdgcn_image_sample_2d_v4f16_f32(100, f32, f32, vec8i32, vec4i32, 0, 120, 110); //GFX94-error{{'__builtin_amdgcn_image_sample_2d_v4f16_f32' needs target feature image-insts}}
-}
diff --git a/clang/test/SemaOpenCL/builtins-image-store-param-gfx1100.cl b/clang/test/SemaOpenCL/builtins-image-store-param-gfx1100.cl
new file mode 100644
index 0000000000000..4f6347e1c5286
--- /dev/null
+++ b/clang/test/SemaOpenCL/builtins-image-store-param-gfx1100.cl
@@ -0,0 +1,129 @@
+// RUN: %clang_cc1 -triple amdgcn-- -target-cpu gfx1100 -S -verify=expected -o - %s
+// REQUIRES: amdgpu-registered-target
+
+typedef float float4 __attribute__((ext_vector_type(4)));
+typedef half half4 __attribute__((ext_vector_type(4)));
+
+void test_builtin_image_store_2d(float f32, int i32, __amdgpu_texture_t tex) {
+
+ return __builtin_amdgcn_image_store_2d_f32_i32(f32, i32, i32, i32, tex, 106, 103); //expected-error{{argument to '__builtin_amdgcn_image_store_2d_f32_i32' must be a constant integer}}
+}
+void test_builtin_image_store_2d_1(float4 v4f32, int i32, __amdgpu_texture_t tex) {
+
+ return __builtin_amdgcn_image_store_2d_v4f32_i32(v4f32, 100, i32, i32, tex, i32, 110); //expected-error{{argument to '__builtin_amdgcn_image_store_2d_v4f32_i32' must be a constant integer}}
+}
+void test_builtin_image_store_2d_2(half4 v4f16, int i32, __amdgpu_texture_t tex) {
+
+ return __builtin_amdgcn_image_store_2d_v4f16_i32(v4f16, 100, i32, i32, tex, 120, i32); //expected-error{{argument to '__builtin_amdgcn_image_store_2d_v4f16_i32' must be a constant integer}}
+}
+
+void test_builtin_image_store_2darray(float f32, int i32, __amdgpu_texture_t tex) {
+
+ return __builtin_amdgcn_image_store_2darray_f32_i32(f32, 100, i32, i32, i32, tex, i32, 110); //expected-error{{argument to '__builtin_amdgcn_image_store_2darray_f32_i32' must be a constant integer}}
+}
+void test_builtin_image_store_2darray_1(float4 v4f32, int i32, __amdgpu_texture_t tex) {
+
+ return __builtin_amdgcn_image_store_2darray_v4f32_i32(v4f32, 100, i32, i32, i32, tex, i32, 110); //expected-error{{argument to '__builtin_amdgcn_image_store_2darray_v4f32_i32' must be a constant integer}}
+}
+void test_builtin_image_store_2darray_2(half4 v4f16, int i32, __amdgpu_texture_t tex) {
+
+ return __builtin_amdgcn_image_store_2darray_v4f16_i32(v4f16, 100, i32, i32, i32, tex, 120, i32); //expected-error{{argument to '__builtin_amdgcn_image_store_2darray_v4f16_i32' must be a constant integer}}
+}
+
+void test_builtin_image_store_1d_1(float4 v4f32, int i32, __amdgpu_texture_t tex) {
+
+ return __builtin_amdgcn_image_store_1d_v4f32_i32(v4f32, i32, i32, tex, 120, i32); //expected-error{{argument to '__builtin_amdgcn_image_store_1d_v4f32_i32' must be a constant integer}}
+}
+void test_builtin_image_store_1d_2(half4 v4f16, int i32, __amdgpu_texture_t tex) {
+
+ return __builtin_amdgcn_image_store_1d_v4f16_i32(v4f16, 100, i32, tex, 120, i32); //expected-error{{argument to '__builtin_amdgcn_image_store_1d_v4f16_i32' must be a constant integer}}
+}
+
+void test_builtin_image_store_1darray_1(float4 v4f32, int i32, __amdgpu_texture_t tex) {
+
+ return __builtin_amdgcn_image_store_1darray_v4f32_i32(v4f32, 100, i32, i32, tex, i32, 110); //expected-error{{argument to '__builtin_amdgcn_image_store_1darray_v4f32_i32' must be a constant integer}}
+}
+void test_builtin_image_store_1darray_2(half4 v4f16, int i32, __amdgpu_texture_t tex) {
+
+ return __builtin_amdgcn_image_store_1darray_v4f16_i32(v4f16, 100, i32, i32, tex, i32, 110); //expected-error{{argument to '__builtin_amdgcn_image_store_1darray_v4f16_i32' must be a constant integer}}
+}
+
+void test_builtin_image_store_3d_1(float4 v4f32, int i32, __amdgpu_texture_t tex) {
+
+ return __builtin_amdgcn_image_store_3d_v4f32_i32(v4f32, 100, i32, i32, i32, tex, 120, i32); //expected-error{{argument to '__builtin_amdgcn_image_store_3d_v4f32_i32' must be a constant integer}}
+}
+void test_builtin_image_store_3d_2(half4 v4f16, int i32, __amdgpu_texture_t tex) {
+
+ return __builtin_amdgcn_image_store_3d_v4f16_i32(v4f16, i32, i32, i32, i32, tex, 120, i32); //expected-error{{argument to '__builtin_amdgcn_image_store_3d_v4f16_i32' must be a constant integer}}
+}
+
+void test_builtin_image_store_cube_1(float4 v4f32, int i32, __amdgpu_texture_t tex) {
+
+ return __builtin_amdgcn_image_store_cube_v4f32_i32(v4f32, i32, i32, i32, i32, tex, 120, 110); //expected-error{{argument to '__builtin_amdgcn_image_store_cube_v4f32_i32' must be a constant integer}}
+}
+void test_builtin_image_store_cube_2(half4 v4f16, int i32, __amdgpu_texture_t tex) {
+
+ return __builtin_amdgcn_image_store_cube_v4f16_i32(v4f16, i32, i32, i32, i32, tex, 120, 110); //expected-error{{argument to '__builtin_amdgcn_image_store_cube_v4f16_i32' must be a constant integer}}
+}
+
+void test_builtin_image_store_mip_1d_1(float4 v4f32, int i32, __amdgpu_texture_t tex) {
+
+ return __builtin_amdgcn_image_store_mip_1d_v4f32_i32(v4f32, i32, i32, i32, tex, 120, i32); //expected-error{{argument to '__builtin_amdgcn_image_store_mip_1d_v4f32_i32' must be a constant integer}}
+}
+void test_builtin_image_store_mip_1d_2(half4 v4f16, int i32, __amdgpu_texture_t tex) {
+
+ return __builtin_amdgcn_image_store_mip_1d_v4f16_i32(v4f16, 100, i32, i32, tex, 120, i32); //expected-error{{argument to '__builtin_amdgcn_image_store_mip_1d_v4f16_i32' must be a constant integer}}
+}
+
+void test_builtin_image_store_mip_1darray_1(float4 v4f32, int i32, __amdgpu_texture_t tex) {
+
+ return __builtin_amdgcn_image_store_mip_1darray_v4f32_i32(v4f32, i32, i32, i32, i32, tex, i32, 110); //expected-error{{argument to '__builtin_amdgcn_image_store_mip_1darray_v4f32_i32' must be a constant integer}}
+}
+void test_builtin_image_store_mip_1darray_2(half4 v4f16, int i32, __amdgpu_texture_t tex) {
+
+ return __builtin_amdgcn_image_store_mip_1darray_v4f16_i32(v4f16, 100, i32, i32, i32, tex, i32, 110); //expected-error{{argument to '__builtin_amdgcn_image_store_mip_1darray_v4f16_i32' must be a constant integer}}
+}
+
+void test_builtin_image_store_mip_2d(float f32, int i32, __amdgpu_texture_t tex) {
+
+ return __builtin_amdgcn_image_store_mip_2d_f32_i32(f32, i32, i32, i32, i32, tex, 120, i32); //expected-error{{argument to '__builtin_amdgcn_image_store_mip_2d_f32_i32' must be a constant integer}}
+}
+void test_builtin_image_store_mip_2d_1(float4 v4f32, int i32, __amdgpu_texture_t tex) {
+
+ return __builtin_amdgcn_image_store_mip_2d_v4f32_i32(v4f32, 100, i32, i32, i32, tex, 120, i32); //expected-error{{argument to '__builtin_amdgcn_image_store_mip_2d_v4f32_i32' must be a constant integer}}
+}
+void test_builtin_image_store_mip_2d_2(half4 v4f16, int i32, __amdgpu_texture_t tex) {
+
+ return __builtin_amdgcn_image_store_mip_2d_v4f16_i32(v4f16, i32, i32, i32, i32, tex, 120, 110); //expected-error{{argument to '__builtin_amdgcn_image_store_mip_2d_v4f16_i32' must be a constant integer}}
+}
+
+void test_builtin_image_store_mip_2darray(float f32, int i32, __amdgpu_texture_t tex) {
+
+ return __builtin_amdgcn_image_store_mip_2darray_f32_i32(f32, i32, i32, i32, i32, i32, tex, 120, 110); //expected-error{{argument to '__builtin_amdgcn_image_store_mip_2darray_f32_i32' must be a constant integer}}
+}
+void test_builtin_image_store_mip_2darray_1(float4 v4f32, int i32, __amdgpu_texture_t tex) {
+
+ return __builtin_amdgcn_image_store_mip_2darray_v4f32_i32(v4f32, 100, i32, i32, i32, i32, tex, 120, i32); //expected-error{{argument to '__builtin_amdgcn_image_store_mip_2darray_v4f32_i32' must be a constant integer}}
+}
+void test_builtin_image_store_mip_2darray_2(half4 v4f16, int i32, __amdgpu_texture_t tex) {
+
+ return __builtin_amdgcn_image_store_mip_2darray_v4f16_i32(v4f16, 100, i32, i32, i32, i32, tex, 120, i32); //expected-error{{argument to '__builtin_amdgcn_image_store_mip_2darray_v4f16_i32' must be a constant integer}}
+}
+
+void test_builtin_image_store_mip_3d_1(float4 v4f32, int i32, __amdgpu_texture_t tex) {
+
+ return __builtin_amdgcn_image_store_mip_3d_v4f32_i32(v4f32, i32, i32, i32, i32, i32, tex, i32, 110); //expected-error{{argument to '__builtin_amdgcn_image_store_mip_3d_v4f32_i32' must be a constant integer}}
+}
+void test_builtin_image_store_mip_3d_2(half4 v4f16, int i32, __amdgpu_texture_t tex) {
+
+ return __builtin_amdgcn_image_store_mip_3d_v4f16_i32(v4f16, i32, i32, i32, i32, i32, tex, i32, 110); //expected-error{{argument to '__builtin_amdgcn_image_store_mip_3d_v4f16_i32' must be a constant integer}}
+}
+
+void test_builtin_image_store_mip_cube_1(float4 v4f32, int i32, __amdgpu_texture_t tex) {
+
+ return __builtin_amdgcn_image_store_mip_cube_v4f32_i32(v4f32, i32, i32, i32, i32, i32, tex, 120, i32); //expected-error{{argument to '__builtin_amdgcn_image_store_mip_cube_v4f32_i32' must be a constant integer}}
+}
+void test_builtin_image_store_mip_cube_2(half4 v4f16, int i32, __amdgpu_texture_t tex) {
+
+ return __builtin_amdgcn_image_store_mip_cube_v4f16_i32(v4f16, 100, i32, i32, i32, i32, tex, 120, i32); //expected-error{{argument to '__builtin_amdgcn_image_store_mip_cube_v4f16_i32' must be a constant integer}}
+}
diff --git a/clang/test/SemaOpenCL/builtins-image-store-param-gfx942.cl b/clang/test/SemaOpenCL/builtins-image-store-param-gfx942.cl
new file mode 100644
index 0000000000000..d0085e5403b5f
--- /dev/null
+++ b/clang/test/SemaOpenCL/builtins-image-store-param-gfx942.cl
@@ -0,0 +1,129 @@
+// RUN: %clang_cc1 -triple amdgcn-- -target-cpu gfx942 -S -verify -o - %s
+// REQUIRES: amdgpu-registered-target
+
+typedef float float4 __attribute__((ext_vector_type(4)));
+typedef half half4 __attribute__((ext_vector_type(4)));
+
+void test_builtin_image_store_2d(float f32, int i32, __amdgpu_texture_t tex) {
+
+ return __builtin_amdgcn_image_store_2d_f32_i32(f32, i32, i32, i32, tex, 106, 103); //expected-error{{'test_builtin_image_store_2d' needs target feature image-insts}}
+}
+void test_builtin_image_store_2d_1(float4 v4f32, int i32, __amdgpu_texture_t tex) {
+
+ return __builtin_amdgcn_image_store_2d_v4f32_i32(v4f32, 100, i32, i32, tex, i32, 110); //expected-error{{'test_builtin_image_store_2d_1' needs target feature image-insts}}
+}
+void test_builtin_image_store_2d_2(half4 v4f16, int i32, __amdgpu_texture_t tex) {
+
+ return __builtin_amdgcn_image_store_2d_v4f16_i32(v4f16, 100, i32, i32, tex, 120, i32); //expected-error{{'test_builtin_image_store_2d_2' needs target feature image-insts}}
+}
+
+void test_builtin_image_store_2darray(float f32, int i32, __amdgpu_texture_t tex) {
+
+ return __builtin_amdgcn_image_store_2darray_f32_i32(f32, 100, i32, i32, i32, tex, i32, 110); //expected-error{{'test_builtin_image_store_2darray' needs target feature image-insts}}
+}
+void test_builtin_image_store_2darray_1(float4 v4f32, int i32, __amdgpu_texture_t tex) {
+
+ return __builtin_amdgcn_image_store_2darray_v4f32_i32(v4f32, 100, i32, i32, i32, tex, i32, 110); //expected-error{{'test_builtin_image_store_2darray_1' needs target feature image-insts}}
+}
+void test_builtin_image_store_2darray_2(half4 v4f16, int i32, __amdgpu_texture_t tex) {
+
+ return __builtin_amdgcn_image_store_2darray_v4f16_i32(v4f16, 100, i32, i32, i32, tex, 120, i32); //expected-error{{'test_builtin_image_store_2darray_2' needs target feature image-insts}}
+}
+
+void test_builtin_image_store_1d_1(float4 v4f32, int i32, __amdgpu_texture_t tex) {
+
+ return __builtin_amdgcn_image_store_1d_v4f32_i32(v4f32, i32, i32, tex, 120, i32); //expected-error{{'test_builtin_image_store_1d_1' needs target feature image-insts}}
+}
+void test_builtin_image_store_1d_2(half4 v4f16, int i32, __amdgpu_texture_t tex) {
+
+ return __builtin_amdgcn_image_store_1d_v4f16_i32(v4f16, 100, i32, tex, 120, i32); //expected-error{{'test_builtin_image_store_1d_2' needs target feature image-insts}}
+}
+
+void test_builtin_image_store_1darray_1(float4 v4f32, int i32, __amdgpu_texture_t tex) {
+
+ return __builtin_amdgcn_image_store_1darray_v4f32_i32(v4f32, 100, i32, i32, tex, i32, 110); //expected-error{{'test_builtin_image_store_1darray_1' needs target feature image-insts}}
+}
+void test_builtin_image_store_1darray_2(half4 v4f16, int i32, __amdgpu_texture_t tex) {
+
+ return __builtin_amdgcn_image_store_1darray_v4f16_i32(v4f16, 100, i32, i32, tex, i32, 110); //expected-error{{'test_builtin_image_store_1darray_2' needs target feature image-insts}}
+}
+
+void test_builtin_image_store_3d_1(float4 v4f32, int i32, __amdgpu_texture_t tex) {
+
+ return __builtin_amdgcn_image_store_3d_v4f32_i32(v4f32, 100, i32, i32, i32, tex, 120, i32); //expected-error{{'test_builtin_image_store_3d_1' needs target feature image-insts}}
+}
+void test_builtin_image_store_3d_2(half4 v4f16, int i32, __amdgpu_texture_t tex) {
+
+ return __builtin_amdgcn_image_store_3d_v4f16_i32(v4f16, i32, i32, i32, i32, tex, 120, i32); //expected-error{{'test_builtin_image_store_3d_2' needs target feature image-insts}}
+}
+
+void test_builtin_image_store_cube_1(float4 v4f32, int i32, __amdgpu_texture_t tex) {
+
+ return __builtin_amdgcn_image_store_cube_v4f32_i32(v4f32, i32, i32, i32, i32, tex, 120, 110); //expected-error{{'test_builtin_image_store_cube_1' needs target feature image-insts}}
+}
+void test_builtin_image_store_cube_2(half4 v4f16, int i32, __amdgpu_texture_t tex) {
+
+ return __builtin_amdgcn_image_store_cube_v4f16_i32(v4f16, i32, i32, i32, i32, tex, 120, 110); //expected-error{{'test_builtin_image_store_cube_2' needs target feature image-insts}}
+}
+
+void test_builtin_image_store_mip_1d_1(float4 v4f32, int i32, __amdgpu_texture_t tex) {
+
+ return __builtin_amdgcn_image_store_mip_1d_v4f32_i32(v4f32, i32, i32, i32, tex, 120, i32); //expected-error{{'test_builtin_image_store_mip_1d_1' needs target feature image-insts}}
+}
+void test_builtin_image_store_mip_1d_2(half4 v4f16, int i32, __amdgpu_texture_t tex) {
+
+ return __builtin_amdgcn_image_store_mip_1d_v4f16_i32(v4f16, 100, i32, i32, tex, 120, i32); //expected-error{{'test_builtin_image_store_mip_1d_2' needs target feature image-insts}}
+}
+
+void test_builtin_image_store_mip_1darray_1(float4 v4f32, int i32, __amdgpu_texture_t tex) {
+
+ return __builtin_amdgcn_image_store_mip_1darray_v4f32_i32(v4f32, i32, i32, i32, i32, tex, i32, 110); //expected-error{{'test_builtin_image_store_mip_1darray_1' needs target feature image-insts}}
+}
+void test_builtin_image_store_mip_1darray_2(half4 v4f16, int i32, __amdgpu_texture_t tex) {
+
+ return __builtin_amdgcn_image_store_mip_1darray_v4f16_i32(v4f16, 100, i32, i32, i32, tex, i32, 110); //expected-error{{'test_builtin_image_store_mip_1darray_2' needs target feature image-insts}}
+}
+
+void test_builtin_image_store_mip_2d(float f32, int i32, __amdgpu_texture_t tex) {
+
+ return __builtin_amdgcn_image_store_mip_2d_f32_i32(f32, i32, i32, i32, i32, tex, 120, i32); //expected-error{{'test_builtin_image_store_mip_2d' needs target feature image-insts}}
+}
+void test_builtin_image_store_mip_2d_1(float4 v4f32, int i32, __amdgpu_texture_t tex) {
+
+ return __builtin_amdgcn_image_store_mip_2d_v4f32_i32(v4f32, 100, i32, i32, i32, tex, 120, i32); //expected-error{{'test_builtin_image_store_mip_2d_1' needs target feature image-insts}}
+}
+void test_builtin_image_store_mip_2d_2(half4 v4f16, int i32, __amdgpu_texture_t tex) {
+
+ return __builtin_amdgcn_image_store_mip_2d_v4f16_i32(v4f16, i32, i32, i32, i32, tex, 120, 110); //expected-error{{'test_builtin_image_store_mip_2d_2' needs target feature image-insts}}
+}
+
+void test_builtin_image_store_mip_2darray(float f32, int i32, __amdgpu_texture_t tex) {
+
+ return __builtin_amdgcn_image_store_mip_2darray_f32_i32(f32, i32, i32, i32, i32, i32, tex, 120, 110); //expected-error{{'test_builtin_image_store_mip_2darray' needs target feature image-insts}}
+}
+void test_builtin_image_store_mip_2darray_1(float4 v4f32, int i32, __amdgpu_texture_t tex) {
+
+ return __builtin_amdgcn_image_store_mip_2darray_v4f32_i32(v4f32, 100, i32, i32, i32, i32, tex, 120, i32); //expected-error{{'test_builtin_image_store_mip_2darray_1' needs target feature image-insts}}
+}
+void test_builtin_image_store_mip_2darray_2(half4 v4f16, int i32, __amdgpu_texture_t tex) {
+
+ return __builtin_amdgcn_image_store_mip_2darray_v4f16_i32(v4f16, 100, i32, i32, i32, i32, tex, 120, i32); //expected-error{{'test_builtin_image_store_mip_2darray_2' needs target feature image-insts}}
+}
+
+void test_builtin_image_store_mip_3d_1(float4 v4f32, int i32, __amdgpu_texture_t tex) {
+
+ return __builtin_amdgcn_image_store_mip_3d_v4f32_i32(v4f32, i32, i32, i32, i32, i32, tex, i32, 110); //expected-error{{'test_builtin_image_store_mip_3d_1' needs target feature image-insts}}
+}
+void test_builtin_image_store_mip_3d_2(half4 v4f16, int i32, __amdgpu_texture_t tex) {
+
+ return __builtin_amdgcn_image_store_mip_3d_v4f16_i32(v4f16, i32, i32, i32, i32, i32, tex, i32, 110); //expected-error{{'test_builtin_image_store_mip_3d_2' needs target feature image-insts}}
+}
+
+void test_builtin_image_store_mip_cube_1(float4 v4f32, int i32, __amdgpu_texture_t tex) {
+
+ return __builtin_amdgcn_image_store_mip_cube_v4f32_i32(v4f32, i32, i32, i32, i32, i32, tex, 120, i32); //expected-error{{'test_builtin_image_store_mip_cube_1' needs target feature image-insts}}
+}
+void test_builtin_image_store_mip_cube_2(half4 v4f16, int i32, __amdgpu_texture_t tex) {
+
+ return __builtin_amdgcn_image_store_mip_cube_v4f16_i32(v4f16, 100, i32, i32, i32, i32, tex, 120, i32); //expected-error{{'test_builtin_image_store_mip_cube_2' needs target feature image-insts}}
+}
diff --git a/clang/test/SemaOpenCL/builtins-image-store-param.cl b/clang/test/SemaOpenCL/builtins-image-store-param.cl
deleted file mode 100644
index f84df77171098..0000000000000
--- a/clang/test/SemaOpenCL/builtins-image-store-param.cl
+++ /dev/null
@@ -1,145 +0,0 @@
-// RUN: %clang_cc1 -triple amdgcn-- -target-cpu gfx90a -S -verify=expected -o - %s
-// RUN: %clang_cc1 -triple amdgcn-- -target-cpu gfx942 -S -verify=GFX94 -o - %s
-// REQUIRES: amdgpu-registered-target
-
-typedef int int8 __attribute__((ext_vector_type(8)));
-typedef float float4 __attribute__((ext_vector_type(4)));
-typedef half half4 __attribute__((ext_vector_type(4)));
-
-
-void test_builtin_image_store_2d(float f32, int i32, int8 vec8i32) {
-
- return __builtin_amdgcn_image_store_2d_f32_i32(f32, i32, i32, i32, vec8i32, 106, 103); //expected-error{{argument to '__builtin_amdgcn_image_store_2d_f32_i32' must be a constant integer}}
-}
-void test_builtin_image_store_2d_1(float4 v4f32, int i32, int8 vec8i32) {
-
- return __builtin_amdgcn_image_store_2d_v4f32_i32(v4f32, 100, i32, i32, vec8i32, i32, 110); //expected-error{{argument to '__builtin_amdgcn_image_store_2d_v4f32_i32' must be a constant integer}}
-}
-void test_builtin_image_store_2d_2(half4 v4f16, int i32, int8 vec8i32) {
-
- return __builtin_amdgcn_image_store_2d_v4f16_i32(v4f16, 100, i32, i32, vec8i32, 120, i32); //expected-error{{argument to '__builtin_amdgcn_image_store_2d_v4f16_i32' must be a constant integer}}
-}
-
-void test_builtin_image_store_2darray(float f32, int i32, int8 vec8i32) {
-
- return __builtin_amdgcn_image_store_2darray_f32_i32(f32, 100, i32, i32, i32, vec8i32, i32, 110); //expected-error{{argument to '__builtin_amdgcn_image_store_2darray_f32_i32' must be a constant integer}}
-}
-void test_builtin_image_store_2darray_1(float4 v4f32, int i32, int8 vec8i32) {
-
- return __builtin_amdgcn_image_store_2darray_v4f32_i32(v4f32, 100, i32, i32, i32, vec8i32, i32, 110); //expected-error{{argument to '__builtin_amdgcn_image_store_2darray_v4f32_i32' must be a constant integer}}
-}
-void test_builtin_image_store_2darray_2(half4 v4f16, int i32, int8 vec8i32) {
-
- return __builtin_amdgcn_image_store_2darray_v4f16_i32(v4f16, 100, i32, i32, i32, vec8i32, 120, i32); //expected-error{{argument to '__builtin_amdgcn_image_store_2darray_v4f16_i32' must be a constant integer}}
-}
-
-void test_builtin_image_store_1d_1(float4 v4f32, int i32, int8 vec8i32) {
-
- return __builtin_amdgcn_image_store_1d_v4f32_i32(v4f32, i32, i32, vec8i32, 120, i32); //expected-error{{argument to '__builtin_amdgcn_image_store_1d_v4f32_i32' must be a constant integer}}
-}
-void test_builtin_image_store_1d_2(half4 v4f16, int i32, int8 vec8i32) {
-
- return __builtin_amdgcn_image_store_1d_v4f16_i32(v4f16, 100, i32, vec8i32, 120, i32); //expected-error{{argument to '__builtin_amdgcn_image_store_1d_v4f16_i32' must be a constant integer}}
-}
-
-void test_builtin_image_store_1darray_1(float4 v4f32, int i32, int8 vec8i32) {
-
- return __builtin_amdgcn_image_store_1darray_v4f32_i32(v4f32, 100, i32, i32, vec8i32, i32, 110); //expected-error{{argument to '__builtin_amdgcn_image_store_1darray_v4f32_i32' must be a constant integer}}
-}
-void test_builtin_image_store_1darray_2(half4 v4f16, int i32, int8 vec8i32) {
-
- return __builtin_amdgcn_image_store_1darray_v4f16_i32(v4f16, 100, i32, i32, vec8i32, i32, 110); //expected-error{{argument to '__builtin_amdgcn_image_store_1darray_v4f16_i32' must be a constant integer}}
-}
-
-void test_builtin_image_store_3d_1(float4 v4f32, int i32, int8 vec8i32) {
-
- return __builtin_amdgcn_image_store_3d_v4f32_i32(v4f32, 100, i32, i32, i32, vec8i32, 120, i32); //expected-error{{argument to '__builtin_amdgcn_image_store_3d_v4f32_i32' must be a constant integer}}
-}
-void test_builtin_image_store_3d_2(half4 v4f16, int i32, int8 vec8i32) {
-
- return __builtin_amdgcn_image_store_3d_v4f16_i32(v4f16, i32, i32, i32, i32, vec8i32, 120, i32); //expected-error{{argument to '__builtin_amdgcn_image_store_3d_v4f16_i32' must be a constant integer}}
-}
-
-void test_builtin_image_store_cube_1(float4 v4f32, int i32, int8 vec8i32) {
-
- return __builtin_amdgcn_image_store_cube_v4f32_i32(v4f32, i32, i32, i32, i32, vec8i32, 120, 110); //expected-error{{argument to '__builtin_amdgcn_image_store_cube_v4f32_i32' must be a constant integer}}
-}
-void test_builtin_image_store_cube_2(half4 v4f16, int i32, int8 vec8i32) {
-
- return __builtin_amdgcn_image_store_cube_v4f16_i32(v4f16, i32, i32, i32, i32, vec8i32, 120, 110); //expected-error{{argument to '__builtin_amdgcn_image_store_cube_v4f16_i32' must be a constant integer}}
-}
-
-void test_builtin_image_store_mip_1d_1(float4 v4f32, int i32, int8 vec8i32) {
-
- return __builtin_amdgcn_image_store_mip_1d_v4f32_i32(v4f32, i32, i32, i32, vec8i32, 120, i32); //expected-error{{argument to '__builtin_amdgcn_image_store_mip_1d_v4f32_i32' must be a constant integer}}
-}
-void test_builtin_image_store_mip_1d_2(half4 v4f16, int i32, int8 vec8i32) {
-
- return __builtin_amdgcn_image_store_mip_1d_v4f16_i32(v4f16, 100, i32, i32, vec8i32, 120, i32); //expected-error{{argument to '__builtin_amdgcn_image_store_mip_1d_v4f16_i32' must be a constant integer}}
-}
-
-void test_builtin_image_store_mip_1darray_1(float4 v4f32, int i32, int8 vec8i32) {
-
- return __builtin_amdgcn_image_store_mip_1darray_v4f32_i32(v4f32, i32, i32, i32, i32, vec8i32, i32, 110); //expected-error{{argument to '__builtin_amdgcn_image_store_mip_1darray_v4f32_i32' must be a constant integer}}
-}
-void test_builtin_image_store_mip_1darray_2(half4 v4f16, int i32, int8 vec8i32) {
-
- return __builtin_amdgcn_image_store_mip_1darray_v4f16_i32(v4f16, 100, i32, i32, i32, vec8i32, i32, 110); //expected-error{{argument to '__builtin_amdgcn_image_store_mip_1darray_v4f16_i32' must be a constant integer}}
-}
-
-void test_builtin_image_store_mip_2d(float f32, int i32, int8 vec8i32) {
-
- return __builtin_amdgcn_image_store_mip_2d_f32_i32(f32, i32, i32, i32, i32, vec8i32, 120, i32); //expected-error{{argument to '__builtin_amdgcn_image_store_mip_2d_f32_i32' must be a constant integer}}
-}
-void test_builtin_image_store_mip_2d_1(float4 v4f32, int i32, int8 vec8i32) {
-
- return __builtin_amdgcn_image_store_mip_2d_v4f32_i32(v4f32, 100, i32, i32, i32, vec8i32, 120, i32); //expected-error{{argument to '__builtin_amdgcn_image_store_mip_2d_v4f32_i32' must be a constant integer}}
-}
-void test_builtin_image_store_mip_2d_2(half4 v4f16, int i32, int8 vec8i32) {
-
- return __builtin_amdgcn_image_store_mip_2d_v4f16_i32(v4f16, i32, i32, i32, i32, vec8i32, 120, 110); //expected-error{{argument to '__builtin_amdgcn_image_store_mip_2d_v4f16_i32' must be a constant integer}}
-}
-
-void test_builtin_image_store_mip_2darray(float f32, int i32, int8 vec8i32) {
-
- return __builtin_amdgcn_image_store_mip_2darray_f32_i32(f32, i32, i32, i32, i32, i32, vec8i32, 120, 110); //expected-error{{argument to '__builtin_amdgcn_image_store_mip_2darray_f32_i32' must be a constant integer}}
-}
-void test_builtin_image_store_mip_2darray_1(float4 v4f32, int i32, int8 vec8i32) {
-
- return __builtin_amdgcn_image_store_mip_2darray_v4f32_i32(v4f32, 100, i32, i32, i32, i32, vec8i32, 120, i32); //expected-error{{argument to '__builtin_amdgcn_image_store_mip_2darray_v4f32_i32' must be a constant integer}}
-}
-void test_builtin_image_store_mip_2darray_2(half4 v4f16, int i32, int8 vec8i32) {
-
- return __builtin_amdgcn_image_store_mip_2darray_v4f16_i32(v4f16, 100, i32, i32, i32, i32, vec8i32, 120, i32); //expected-error{{argument to '__builtin_amdgcn_image_store_mip_2darray_v4f16_i32' must be a constant integer}}
-}
-
-void test_builtin_image_store_mip_3d_1(float4 v4f32, int i32, int8 vec8i32) {
-
- return __builtin_amdgcn_image_store_mip_3d_v4f32_i32(v4f32, i32, i32, i32, i32, i32, vec8i32, i32, 110); //expected-error{{argument to '__builtin_amdgcn_image_store_mip_3d_v4f32_i32' must be a constant integer}}
-}
-void test_builtin_image_store_mip_3d_2(half4 v4f16, int i32, int8 vec8i32) {
-
- return __builtin_amdgcn_image_store_mip_3d_v4f16_i32(v4f16, i32, i32, i32, i32, i32, vec8i32, i32, 110); //expected-error{{argument to '__builtin_amdgcn_image_store_mip_3d_v4f16_i32' must be a constant integer}}
-}
-
-void test_builtin_image_store_mip_cube_1(float4 v4f32, int i32, int8 vec8i32) {
-
- return __builtin_amdgcn_image_store_mip_cube_v4f32_i32(v4f32, i32, i32, i32, i32, i32, vec8i32, 120, i32); //expected-error{{argument to '__builtin_amdgcn_image_store_mip_cube_v4f32_i32' must be a constant integer}}
-}
-void test_builtin_image_store_mip_cube_2(half4 v4f16, int i32, int8 vec8i32) {
-
- return __builtin_amdgcn_image_store_mip_cube_v4f16_i32(v4f16, 100, i32, i32, i32, i32, vec8i32, 120, i32); //expected-error{{argument to '__builtin_amdgcn_image_store_mip_cube_v4f16_i32' must be a constant integer}}
-}
-
-void test_builtin_image_store_2d_gfx(float f32, int i32, int8 vec8i32) {
-
- __builtin_amdgcn_image_store_2d_f32_i32(f32, 12, i32, i32, vec8i32, 106, 103); //GFX94-error{{'__builtin_amdgcn_image_store_2d_f32_i32' needs target feature image-insts}}
-}
-void test_builtin_image_store_2d_gfx_1(float4 v4f32, int i32, int8 vec8i32) {
-
- __builtin_amdgcn_image_store_2d_v4f32_i32(v4f32, 100, i32, i32, vec8i32, 120, 110); //GFX94-error{{'__builtin_amdgcn_image_store_2d_v4f32_i32' needs target feature image-insts}}
- }
- void test_builtin_image_store_2d_gfx_2(half4 v4f16, int i32, int8 vec8i32) {
-
- __builtin_amdgcn_image_store_2d_v4f16_i32(v4f16, 100, i32, i32, vec8i32, 120, 110); //GFX94-error{{'__builtin_amdgcn_image_store_2d_v4f16_i32' needs target feature image-insts}}
- }
>From bd7427c96ebadddac822b73faa37e856d017eef1 Mon Sep 17 00:00:00 2001
From: ranapratap55 <RanaPratapReddy.Nimmakayala at amd.com>
Date: Tue, 7 Oct 2025 12:29:31 +0530
Subject: [PATCH 7/7] update clang format
---
clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp | 83 +++++++++++----------
clang/lib/Sema/SemaAMDGPU.cpp | 28 +++----
2 files changed, 57 insertions(+), 54 deletions(-)
diff --git a/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp b/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp
index 84119b7b4a768..c065200d646df 100644
--- a/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp
+++ b/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp
@@ -217,8 +217,7 @@ static llvm::Value *LoadTextureDescPtorAsVec8I32(CodeGenFunction &CGF,
if (RsrcPtr->getType()->isIntegerTy(32)) {
unsigned AS = 8;
llvm::PointerType *VecPtrTy = llvm::PointerType::get(VecTy, AS);
- llvm::Value *Ptr =
- B.CreateIntToPtr(RsrcPtr, VecPtrTy, "tex.rsrc.from.int");
+ llvm::Value *Ptr = B.CreateIntToPtr(RsrcPtr, VecPtrTy, "tex.rsrc.from.int");
return B.CreateAlignedLoad(VecTy, Ptr, llvm::Align(32), "tex.rsrc.val");
}
@@ -240,22 +239,24 @@ static llvm::Value *LoadTextureDescPtorAsVec8I32(CodeGenFunction &CGF,
static unsigned GetTextureDescIndex(unsigned BuiltinID, const CallExpr *E) {
unsigned N = E->getNumArgs();
if (IsImageSampleBuiltIn(BuiltinID)) {
- if (N < 5) return (unsigned)-1;
+ if (N < 5)
+ return (unsigned)-1;
return N - 5;
}
-
- if (N < 3) return (unsigned)-1;
+
+ if (N < 3)
+ return (unsigned)-1;
return N - 3;
}
-llvm::CallInst *EmitAMDGCNImageOverloadedReturnType(clang::CodeGen::CodeGenFunction &CGF,
- const clang::CallExpr *E,
- unsigned IntrinsicID,
- bool IsImageStore) {
+llvm::CallInst *
+EmitAMDGCNImageOverloadedReturnType(clang::CodeGen::CodeGenFunction &CGF,
+ const clang::CallExpr *E,
+ unsigned IntrinsicID, bool IsImageStore) {
clang::SmallVector<llvm::Value *, 10> Args;
unsigned RsrcIndex = GetTextureDescIndex(E->getBuiltinCallee(), E);
- for (unsigned I = 0; I < E->getNumArgs(); ++I){
+ for (unsigned I = 0; I < E->getNumArgs(); ++I) {
llvm::Value *V = CGF.EmitScalarExpr(E->getArg(I));
if (I == RsrcIndex)
V = LoadTextureDescPtorAsVec8I32(CGF, V);
@@ -1028,133 +1029,133 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID,
case AMDGPU::BI__builtin_amdgcn_image_load_1d_v4f32_i32:
case AMDGPU::BI__builtin_amdgcn_image_load_1d_v4f16_i32:
return EmitAMDGCNImageOverloadedReturnType(
- *this, E, Intrinsic::amdgcn_image_load_1d, false);
+ *this, E, Intrinsic::amdgcn_image_load_1d, false);
case AMDGPU::BI__builtin_amdgcn_image_load_1darray_v4f32_i32:
case AMDGPU::BI__builtin_amdgcn_image_load_1darray_v4f16_i32:
return EmitAMDGCNImageOverloadedReturnType(
- *this, E, Intrinsic::amdgcn_image_load_1darray, false);
+ *this, E, Intrinsic::amdgcn_image_load_1darray, false);
case AMDGPU::BI__builtin_amdgcn_image_load_2d_f32_i32:
case AMDGPU::BI__builtin_amdgcn_image_load_2d_v4f32_i32:
case AMDGPU::BI__builtin_amdgcn_image_load_2d_v4f16_i32:
return EmitAMDGCNImageOverloadedReturnType(
- *this, E, Intrinsic::amdgcn_image_load_2d, false);
+ *this, E, Intrinsic::amdgcn_image_load_2d, false);
case AMDGPU::BI__builtin_amdgcn_image_load_2darray_f32_i32:
case AMDGPU::BI__builtin_amdgcn_image_load_2darray_v4f32_i32:
case AMDGPU::BI__builtin_amdgcn_image_load_2darray_v4f16_i32:
return EmitAMDGCNImageOverloadedReturnType(
- *this, E, Intrinsic::amdgcn_image_load_2darray, false);
+ *this, E, Intrinsic::amdgcn_image_load_2darray, false);
case AMDGPU::BI__builtin_amdgcn_image_load_3d_v4f32_i32:
case AMDGPU::BI__builtin_amdgcn_image_load_3d_v4f16_i32:
return EmitAMDGCNImageOverloadedReturnType(
- *this, E, Intrinsic::amdgcn_image_load_3d, false);
+ *this, E, Intrinsic::amdgcn_image_load_3d, false);
case AMDGPU::BI__builtin_amdgcn_image_load_cube_v4f32_i32:
case AMDGPU::BI__builtin_amdgcn_image_load_cube_v4f16_i32:
return EmitAMDGCNImageOverloadedReturnType(
- *this, E, Intrinsic::amdgcn_image_load_cube, false);
+ *this, E, Intrinsic::amdgcn_image_load_cube, false);
case AMDGPU::BI__builtin_amdgcn_image_load_mip_1d_v4f32_i32:
case AMDGPU::BI__builtin_amdgcn_image_load_mip_1d_v4f16_i32:
return EmitAMDGCNImageOverloadedReturnType(
- *this, E, Intrinsic::amdgcn_image_load_mip_1d, false);
+ *this, E, Intrinsic::amdgcn_image_load_mip_1d, false);
case AMDGPU::BI__builtin_amdgcn_image_load_mip_1darray_v4f32_i32:
case AMDGPU::BI__builtin_amdgcn_image_load_mip_1darray_v4f16_i32:
return EmitAMDGCNImageOverloadedReturnType(
- *this, E, Intrinsic::amdgcn_image_load_mip_1darray, false);
+ *this, E, Intrinsic::amdgcn_image_load_mip_1darray, false);
case AMDGPU::BI__builtin_amdgcn_image_load_mip_2d_f32_i32:
case AMDGPU::BI__builtin_amdgcn_image_load_mip_2d_v4f32_i32:
case AMDGPU::BI__builtin_amdgcn_image_load_mip_2d_v4f16_i32:
return EmitAMDGCNImageOverloadedReturnType(
- *this, E, Intrinsic::amdgcn_image_load_mip_2d, false);
+ *this, E, Intrinsic::amdgcn_image_load_mip_2d, false);
case AMDGPU::BI__builtin_amdgcn_image_load_mip_2darray_f32_i32:
case AMDGPU::BI__builtin_amdgcn_image_load_mip_2darray_v4f32_i32:
case AMDGPU::BI__builtin_amdgcn_image_load_mip_2darray_v4f16_i32:
return EmitAMDGCNImageOverloadedReturnType(
- *this, E, Intrinsic::amdgcn_image_load_mip_2darray, false);
+ *this, E, Intrinsic::amdgcn_image_load_mip_2darray, false);
case AMDGPU::BI__builtin_amdgcn_image_load_mip_3d_v4f32_i32:
case AMDGPU::BI__builtin_amdgcn_image_load_mip_3d_v4f16_i32:
return EmitAMDGCNImageOverloadedReturnType(
- *this, E, Intrinsic::amdgcn_image_load_mip_3d, false);
+ *this, E, Intrinsic::amdgcn_image_load_mip_3d, false);
case AMDGPU::BI__builtin_amdgcn_image_load_mip_cube_v4f32_i32:
case AMDGPU::BI__builtin_amdgcn_image_load_mip_cube_v4f16_i32:
return EmitAMDGCNImageOverloadedReturnType(
- *this, E, Intrinsic::amdgcn_image_load_mip_cube, false);
+ *this, E, Intrinsic::amdgcn_image_load_mip_cube, false);
case AMDGPU::BI__builtin_amdgcn_image_store_1d_v4f32_i32:
case AMDGPU::BI__builtin_amdgcn_image_store_1d_v4f16_i32:
return EmitAMDGCNImageOverloadedReturnType(
- *this, E, Intrinsic::amdgcn_image_store_1d, true);
+ *this, E, Intrinsic::amdgcn_image_store_1d, true);
case AMDGPU::BI__builtin_amdgcn_image_store_1darray_v4f32_i32:
case AMDGPU::BI__builtin_amdgcn_image_store_1darray_v4f16_i32:
return EmitAMDGCNImageOverloadedReturnType(
- *this, E, Intrinsic::amdgcn_image_store_1darray, true);
+ *this, E, Intrinsic::amdgcn_image_store_1darray, true);
case AMDGPU::BI__builtin_amdgcn_image_store_2d_f32_i32:
case AMDGPU::BI__builtin_amdgcn_image_store_2d_v4f32_i32:
case AMDGPU::BI__builtin_amdgcn_image_store_2d_v4f16_i32:
return EmitAMDGCNImageOverloadedReturnType(
- *this, E, Intrinsic::amdgcn_image_store_2d, true);
+ *this, E, Intrinsic::amdgcn_image_store_2d, true);
case AMDGPU::BI__builtin_amdgcn_image_store_2darray_f32_i32:
case AMDGPU::BI__builtin_amdgcn_image_store_2darray_v4f32_i32:
case AMDGPU::BI__builtin_amdgcn_image_store_2darray_v4f16_i32:
return EmitAMDGCNImageOverloadedReturnType(
- *this, E, Intrinsic::amdgcn_image_store_2darray, true);
+ *this, E, Intrinsic::amdgcn_image_store_2darray, true);
case AMDGPU::BI__builtin_amdgcn_image_store_3d_v4f32_i32:
case AMDGPU::BI__builtin_amdgcn_image_store_3d_v4f16_i32:
return EmitAMDGCNImageOverloadedReturnType(
- *this, E, Intrinsic::amdgcn_image_store_3d, true);
+ *this, E, Intrinsic::amdgcn_image_store_3d, true);
case AMDGPU::BI__builtin_amdgcn_image_store_cube_v4f32_i32:
case AMDGPU::BI__builtin_amdgcn_image_store_cube_v4f16_i32:
return EmitAMDGCNImageOverloadedReturnType(
- *this, E, Intrinsic::amdgcn_image_store_cube, true);
+ *this, E, Intrinsic::amdgcn_image_store_cube, true);
case AMDGPU::BI__builtin_amdgcn_image_store_mip_1d_v4f32_i32:
case AMDGPU::BI__builtin_amdgcn_image_store_mip_1d_v4f16_i32:
return EmitAMDGCNImageOverloadedReturnType(
- *this, E, Intrinsic::amdgcn_image_store_mip_1d, true);
+ *this, E, Intrinsic::amdgcn_image_store_mip_1d, true);
case AMDGPU::BI__builtin_amdgcn_image_store_mip_1darray_v4f32_i32:
case AMDGPU::BI__builtin_amdgcn_image_store_mip_1darray_v4f16_i32:
return EmitAMDGCNImageOverloadedReturnType(
- *this, E, Intrinsic::amdgcn_image_store_mip_1darray, true);
+ *this, E, Intrinsic::amdgcn_image_store_mip_1darray, true);
case AMDGPU::BI__builtin_amdgcn_image_store_mip_2d_f32_i32:
case AMDGPU::BI__builtin_amdgcn_image_store_mip_2d_v4f32_i32:
case AMDGPU::BI__builtin_amdgcn_image_store_mip_2d_v4f16_i32:
return EmitAMDGCNImageOverloadedReturnType(
- *this, E, Intrinsic::amdgcn_image_store_mip_2d, true);
+ *this, E, Intrinsic::amdgcn_image_store_mip_2d, true);
case AMDGPU::BI__builtin_amdgcn_image_store_mip_2darray_f32_i32:
case AMDGPU::BI__builtin_amdgcn_image_store_mip_2darray_v4f32_i32:
case AMDGPU::BI__builtin_amdgcn_image_store_mip_2darray_v4f16_i32:
return EmitAMDGCNImageOverloadedReturnType(
- *this, E, Intrinsic::amdgcn_image_store_mip_2darray, true);
+ *this, E, Intrinsic::amdgcn_image_store_mip_2darray, true);
case AMDGPU::BI__builtin_amdgcn_image_store_mip_3d_v4f32_i32:
case AMDGPU::BI__builtin_amdgcn_image_store_mip_3d_v4f16_i32:
return EmitAMDGCNImageOverloadedReturnType(
- *this, E, Intrinsic::amdgcn_image_store_mip_3d, true);
+ *this, E, Intrinsic::amdgcn_image_store_mip_3d, true);
case AMDGPU::BI__builtin_amdgcn_image_store_mip_cube_v4f32_i32:
- case AMDGPU::BI__builtin_amdgcn_image_store_mip_cube_v4f16_i32:
+ case AMDGPU::BI__builtin_amdgcn_image_store_mip_cube_v4f16_i32:
return EmitAMDGCNImageOverloadedReturnType(
- *this, E, Intrinsic::amdgcn_image_store_mip_cube, true);
+ *this, E, Intrinsic::amdgcn_image_store_mip_cube, true);
case AMDGPU::BI__builtin_amdgcn_image_sample_1d_v4f32_f32:
case AMDGPU::BI__builtin_amdgcn_image_sample_1d_v4f16_f32:
return EmitAMDGCNImageOverloadedReturnType(
- *this, E, Intrinsic::amdgcn_image_sample_1d, false);
+ *this, E, Intrinsic::amdgcn_image_sample_1d, false);
case AMDGPU::BI__builtin_amdgcn_image_sample_1darray_v4f32_f32:
case AMDGPU::BI__builtin_amdgcn_image_sample_1darray_v4f16_f32:
return EmitAMDGCNImageOverloadedReturnType(
- *this, E, Intrinsic::amdgcn_image_sample_1darray, false);
+ *this, E, Intrinsic::amdgcn_image_sample_1darray, false);
case AMDGPU::BI__builtin_amdgcn_image_sample_2d_f32_f32:
case AMDGPU::BI__builtin_amdgcn_image_sample_2d_v4f32_f32:
case AMDGPU::BI__builtin_amdgcn_image_sample_2d_v4f16_f32:
return EmitAMDGCNImageOverloadedReturnType(
- *this, E, Intrinsic::amdgcn_image_sample_2d, false);
+ *this, E, Intrinsic::amdgcn_image_sample_2d, false);
case AMDGPU::BI__builtin_amdgcn_image_sample_2darray_f32_f32:
case AMDGPU::BI__builtin_amdgcn_image_sample_2darray_v4f32_f32:
case AMDGPU::BI__builtin_amdgcn_image_sample_2darray_v4f16_f32:
return EmitAMDGCNImageOverloadedReturnType(
- *this, E, Intrinsic::amdgcn_image_sample_2darray, false);
+ *this, E, Intrinsic::amdgcn_image_sample_2darray, false);
case AMDGPU::BI__builtin_amdgcn_image_sample_3d_v4f32_f32:
case AMDGPU::BI__builtin_amdgcn_image_sample_3d_v4f16_f32:
return EmitAMDGCNImageOverloadedReturnType(
- *this, E, Intrinsic::amdgcn_image_sample_3d, false);
+ *this, E, Intrinsic::amdgcn_image_sample_3d, false);
case AMDGPU::BI__builtin_amdgcn_image_sample_cube_v4f32_f32:
case AMDGPU::BI__builtin_amdgcn_image_sample_cube_v4f16_f32:
return EmitAMDGCNImageOverloadedReturnType(
- *this, E, Intrinsic::amdgcn_image_sample_cube, false);
+ *this, E, Intrinsic::amdgcn_image_sample_cube, false);
case AMDGPU::BI__builtin_amdgcn_mfma_scale_f32_16x16x128_f8f6f4:
case AMDGPU::BI__builtin_amdgcn_mfma_scale_f32_32x32x64_f8f6f4: {
llvm::FixedVectorType *VT = FixedVectorType::get(Builder.getInt32Ty(), 8);
diff --git a/clang/lib/Sema/SemaAMDGPU.cpp b/clang/lib/Sema/SemaAMDGPU.cpp
index d08778f2d1f79..9da426917e05e 100644
--- a/clang/lib/Sema/SemaAMDGPU.cpp
+++ b/clang/lib/Sema/SemaAMDGPU.cpp
@@ -139,7 +139,7 @@ bool SemaAMDGPU::CheckAMDGCNBuiltinFunctionCall(unsigned BuiltinID,
case AMDGPU::BI__builtin_amdgcn_image_load_mip_3d_v4f32_i32:
case AMDGPU::BI__builtin_amdgcn_image_load_mip_3d_v4f16_i32:
case AMDGPU::BI__builtin_amdgcn_image_load_mip_cube_v4f32_i32:
- case AMDGPU::BI__builtin_amdgcn_image_load_mip_cube_v4f16_i32:
+ case AMDGPU::BI__builtin_amdgcn_image_load_mip_cube_v4f16_i32:
case AMDGPU::BI__builtin_amdgcn_image_sample_1d_v4f32_f32:
case AMDGPU::BI__builtin_amdgcn_image_sample_1darray_v4f32_f32:
case AMDGPU::BI__builtin_amdgcn_image_sample_1d_v4f16_f32:
@@ -154,11 +154,12 @@ bool SemaAMDGPU::CheckAMDGCNBuiltinFunctionCall(unsigned BuiltinID,
case AMDGPU::BI__builtin_amdgcn_image_sample_3d_v4f16_f32:
case AMDGPU::BI__builtin_amdgcn_image_sample_cube_v4f32_f32:
case AMDGPU::BI__builtin_amdgcn_image_sample_cube_v4f16_f32: {
- StringRef FeatureList(getASTContext().BuiltinInfo.getRequiredFeatures(BuiltinID));
- if (!Builtin::evaluateRequiredTargetFeatures(
- FeatureList, CallerFeatureMap)){
+ StringRef FeatureList(
+ getASTContext().BuiltinInfo.getRequiredFeatures(BuiltinID));
+ if (!Builtin::evaluateRequiredTargetFeatures(FeatureList,
+ CallerFeatureMap)) {
Diag(TheCall->getBeginLoc(), diag::err_builtin_needs_feature)
- << FD->getDeclName() << FeatureList;
+ << FD->getDeclName() << FeatureList;
return false;
}
@@ -166,8 +167,8 @@ bool SemaAMDGPU::CheckAMDGCNBuiltinFunctionCall(unsigned BuiltinID,
llvm::APSInt Result;
return ((SemaRef.BuiltinConstantArg(TheCall, 0, Result)) ||
- (SemaRef.BuiltinConstantArg(TheCall, ArgCount, Result)) ||
- (SemaRef.BuiltinConstantArg(TheCall, (ArgCount - 1), Result)));
+ (SemaRef.BuiltinConstantArg(TheCall, ArgCount, Result)) ||
+ (SemaRef.BuiltinConstantArg(TheCall, (ArgCount - 1), Result)));
}
case AMDGPU::BI__builtin_amdgcn_image_store_1d_v4f32_i32:
case AMDGPU::BI__builtin_amdgcn_image_store_1darray_v4f32_i32:
@@ -197,11 +198,12 @@ bool SemaAMDGPU::CheckAMDGCNBuiltinFunctionCall(unsigned BuiltinID,
case AMDGPU::BI__builtin_amdgcn_image_store_mip_3d_v4f16_i32:
case AMDGPU::BI__builtin_amdgcn_image_store_mip_cube_v4f32_i32:
case AMDGPU::BI__builtin_amdgcn_image_store_mip_cube_v4f16_i32: {
- StringRef FeatureList(getASTContext().BuiltinInfo.getRequiredFeatures(BuiltinID));
- if (!Builtin::evaluateRequiredTargetFeatures(
- FeatureList, CallerFeatureMap)){
+ StringRef FeatureList(
+ getASTContext().BuiltinInfo.getRequiredFeatures(BuiltinID));
+ if (!Builtin::evaluateRequiredTargetFeatures(FeatureList,
+ CallerFeatureMap)) {
Diag(TheCall->getBeginLoc(), diag::err_builtin_needs_feature)
- << FD->getDeclName() << FeatureList;
+ << FD->getDeclName() << FeatureList;
return false;
}
@@ -209,8 +211,8 @@ bool SemaAMDGPU::CheckAMDGCNBuiltinFunctionCall(unsigned BuiltinID,
llvm::APSInt Result;
return ((SemaRef.BuiltinConstantArg(TheCall, 1, Result)) ||
- (SemaRef.BuiltinConstantArg(TheCall, ArgCount, Result)) ||
- (SemaRef.BuiltinConstantArg(TheCall, (ArgCount - 1), Result)));
+ (SemaRef.BuiltinConstantArg(TheCall, ArgCount, Result)) ||
+ (SemaRef.BuiltinConstantArg(TheCall, (ArgCount - 1), Result)));
}
default:
return false;
More information about the cfe-commits
mailing list