[clang] [WIP][AMDGPU] Support for type inferring image load/store builtins for AMDGPU (PR #140210)
Rana Pratap Reddy via cfe-commits
cfe-commits at lists.llvm.org
Mon Sep 8 22:07:55 PDT 2025
https://github.com/ranapratap55 updated https://github.com/llvm/llvm-project/pull/140210
>From d1571dd53f157c9d4180c51e709d9bed0ba00136 Mon Sep 17 00:00:00 2001
From: ranapratap55 <RanaPratapReddy.Nimmakayala at amd.com>
Date: Fri, 16 May 2025 12:50:09 +0530
Subject: [PATCH 1/7] [WIP][AMDGPU] Support for type inferring image load/store
builtins for AMDGPU
---
clang/include/clang/Basic/BuiltinsAMDGPU.def | 5 +++
clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp | 24 ++++++++++++++
.../test/CodeGen/builtins-image-load-2d-f32.c | 31 +++++++++++++++++++
3 files changed, 60 insertions(+)
create mode 100644 clang/test/CodeGen/builtins-image-load-2d-f32.c
diff --git a/clang/include/clang/Basic/BuiltinsAMDGPU.def b/clang/include/clang/Basic/BuiltinsAMDGPU.def
index 39fef9e4601f8..67045809fa726 100644
--- a/clang/include/clang/Basic/BuiltinsAMDGPU.def
+++ b/clang/include/clang/Basic/BuiltinsAMDGPU.def
@@ -635,5 +635,10 @@ TARGET_BUILTIN(__builtin_amdgcn_bitop3_b16, "ssssIUi", "nc", "bitop3-insts")
TARGET_BUILTIN(__builtin_amdgcn_cvt_sr_bf16_f32, "V2yV2yfUiIb", "nc", "f32-to-f16bf16-cvt-sr-insts")
TARGET_BUILTIN(__builtin_amdgcn_cvt_sr_f16_f32, "V2hV2hfUiIb", "nc", "f32-to-f16bf16-cvt-sr-insts")
+//===----------------------------------------------------------------------===//
+// Image builtins
+//===----------------------------------------------------------------------===//
+BUILTIN(__builtin_amdgcn_image_load_2d_f32_i32, "fiiV8i", "n")
+
#undef BUILTIN
#undef TARGET_BUILTIN
diff --git a/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp b/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp
index ad012d98635ff..15f5cd89beaa9 100644
--- a/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp
+++ b/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp
@@ -683,6 +683,30 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID,
return Builder.CreateInsertElement(I0, A, 1);
}
+ case AMDGPU::BI__builtin_amdgcn_image_load_2d_f32_i32: {
+ llvm::Type *RetTy = llvm::Type::getFloatTy(Builder.getContext());
+ llvm::Type *IntTy = llvm::IntegerType::get(Builder.getContext(), 32u);
+
+ llvm::Value *imm0 = llvm::ConstantInt::get(IntTy, 1);
+ llvm::Value *arg0 = EmitScalarExpr(E->getArg(0));
+ llvm::Value *arg1 = EmitScalarExpr(E->getArg(1));
+ llvm::Value *arg2 = EmitScalarExpr(E->getArg(2));
+ llvm::Value *imm1 = llvm::ConstantInt::get(IntTy, 0);
+ llvm::Value *imm2 = llvm::ConstantInt::get(IntTy, 0);
+
+ SmallVector<Value *, 6> ArgTys;
+ ArgTys.push_back(imm0);
+ ArgTys.push_back(arg0);
+ ArgTys.push_back(arg1);
+ ArgTys.push_back(arg2);
+ ArgTys.push_back(imm1);
+ ArgTys.push_back(imm2);
+
+ llvm::CallInst *Call =
+ Builder.CreateIntrinsic(RetTy, Intrinsic::amdgcn_image_load_2d, ArgTys);
+
+ return Call;
+ }
case AMDGPU::BI__builtin_amdgcn_mfma_scale_f32_16x16x128_f8f6f4:
case AMDGPU::BI__builtin_amdgcn_mfma_scale_f32_32x32x64_f8f6f4: {
llvm::FixedVectorType *VT = FixedVectorType::get(Builder.getInt32Ty(), 8);
diff --git a/clang/test/CodeGen/builtins-image-load-2d-f32.c b/clang/test/CodeGen/builtins-image-load-2d-f32.c
new file mode 100644
index 0000000000000..78dab461c1f38
--- /dev/null
+++ b/clang/test/CodeGen/builtins-image-load-2d-f32.c
@@ -0,0 +1,31 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5
+// RUN: %clang_cc1 -triple amdgcn-unknown-unknown %s -emit-llvm -o - | FileCheck %s
+
+#pragma OPENCL EXTENSION cl_khr_fp64:enable
+
+typedef int v8i __attribute__((ext_vector_type(8)));
+
+// CHECK-LABEL: define dso_local float @test_builtin_image_load_2d(
+// CHECK-SAME: float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VECI32:%.*]]) #[[ATTR0:[0-9]+]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[RETVAL:%.*]] = alloca float, align 4, addrspace(5)
+// CHECK-NEXT: [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5)
+// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[VECI32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
+// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
+// CHECK-NEXT: [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr
+// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
+// CHECK-NEXT: [[VECI32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VECI32_ADDR]] to ptr
+// CHECK-NEXT: store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: store <8 x i32> [[VECI32]], ptr [[VECI32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP2:%.*]] = load <8 x i32>, ptr [[VECI32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: [[TMP3:%.*]] = call float @llvm.amdgcn.image.load.2d.f32.i32.v8i32(i32 1, i32 [[TMP0]], i32 [[TMP1]], <8 x i32> [[TMP2]], i32 0, i32 0)
+// CHECK-NEXT: ret float [[TMP3]]
+//
+float test_builtin_image_load_2d(float f32, int i32, v8i veci32) {
+
+ return __builtin_amdgcn_image_load_2d_f32_i32(i32, i32, veci32);
+}
>From 7168d19ca478e6b3991011deae04584c6c11439a Mon Sep 17 00:00:00 2001
From: ranapratap55 <RanaPratapReddy.Nimmakayala at amd.com>
Date: Wed, 18 Jun 2025 10:55:22 +0530
Subject: [PATCH 2/7] [AMDGPU] Adds builtins for image load and sema checking
for image load
---
clang/include/clang/Basic/BuiltinsAMDGPU.def | 33 +-
clang/include/clang/Sema/SemaAMDGPU.h | 2 +
clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp | 216 +++++-
clang/lib/Sema/SemaAMDGPU.cpp | 42 +
.../test/CodeGen/builtins-image-load-2d-f32.c | 715 +++++++++++++++++-
.../builtins-image-load-2d-f32-param.cl | 132 ++++
6 files changed, 1105 insertions(+), 35 deletions(-)
create mode 100644 clang/test/SemaOpenCL/builtins-image-load-2d-f32-param.cl
diff --git a/clang/include/clang/Basic/BuiltinsAMDGPU.def b/clang/include/clang/Basic/BuiltinsAMDGPU.def
index 67045809fa726..1ae461676b034 100644
--- a/clang/include/clang/Basic/BuiltinsAMDGPU.def
+++ b/clang/include/clang/Basic/BuiltinsAMDGPU.def
@@ -636,9 +636,36 @@ TARGET_BUILTIN(__builtin_amdgcn_cvt_sr_bf16_f32, "V2yV2yfUiIb", "nc", "f32-to-f1
TARGET_BUILTIN(__builtin_amdgcn_cvt_sr_f16_f32, "V2hV2hfUiIb", "nc", "f32-to-f16bf16-cvt-sr-insts")
//===----------------------------------------------------------------------===//
-// Image builtins
-//===----------------------------------------------------------------------===//
-BUILTIN(__builtin_amdgcn_image_load_2d_f32_i32, "fiiV8i", "n")
+// Image load builtins
+//===----------------------------------------------------------------------===//
+TARGET_BUILTIN(__builtin_amdgcn_image_load_1d_v4f32_i32, "V4fiiV8iii", "nc", "")
+TARGET_BUILTIN(__builtin_amdgcn_image_load_1d_v4f16_i32, "V4hiiV8iii", "nc", "")
+TARGET_BUILTIN(__builtin_amdgcn_image_load_1darray_v4f32_i32, "V4fiiiV8iii", "nc", "")
+TARGET_BUILTIN(__builtin_amdgcn_image_load_1darray_v4f16_i32, "V4hiiiV8iii", "nc", "")
+TARGET_BUILTIN(__builtin_amdgcn_image_load_2d_f32_i32, "fiiiV8iii", "nc", "")
+TARGET_BUILTIN(__builtin_amdgcn_image_load_2d_v4f32_i32, "V4fiiiV8iii", "nc", "")
+TARGET_BUILTIN(__builtin_amdgcn_image_load_2d_v4f16_i32, "V4hiiiV8iii", "nc", "")
+TARGET_BUILTIN(__builtin_amdgcn_image_load_2darray_f32_i32, "fiiiiV8iii", "nc", "")
+TARGET_BUILTIN(__builtin_amdgcn_image_load_2darray_v4f32_i32, "V4fiiiiV8iii", "nc", "")
+TARGET_BUILTIN(__builtin_amdgcn_image_load_2darray_v4f16_i32, "V4hiiiiV8iii", "nc", "")
+TARGET_BUILTIN(__builtin_amdgcn_image_load_3d_v4f32_i32, "V4fiiiiV8iii", "nc", "")
+TARGET_BUILTIN(__builtin_amdgcn_image_load_3d_v4f16_i32, "V4hiiiiV8iii", "nc", "")
+TARGET_BUILTIN(__builtin_amdgcn_image_load_cube_v4f32_i32, "V4fiiiiV8iii", "nc", "")
+TARGET_BUILTIN(__builtin_amdgcn_image_load_cube_v4f16_i32, "V4hiiiiV8iii", "nc", "")
+TARGET_BUILTIN(__builtin_amdgcn_image_load_mip_1d_v4f32_i32, "V4fiiiV8iii", "nc", "")
+TARGET_BUILTIN(__builtin_amdgcn_image_load_mip_1d_v4f16_i32, "V4hiiiV8iii", "nc", "")
+TARGET_BUILTIN(__builtin_amdgcn_image_load_mip_1darray_v4f32_i32, "V4fiiiiV8iii", "nc", "")
+TARGET_BUILTIN(__builtin_amdgcn_image_load_mip_1darray_v4f16_i32, "V4hiiiiV8iii", "nc", "")
+TARGET_BUILTIN(__builtin_amdgcn_image_load_mip_2d_f32_i32, "fiiiiV8iii", "nc", "")
+TARGET_BUILTIN(__builtin_amdgcn_image_load_mip_2d_v4f32_i32, "V4fiiiiV8iii", "nc", "")
+TARGET_BUILTIN(__builtin_amdgcn_image_load_mip_2d_v4f16_i32, "V4hiiiiV8iii", "nc", "")
+TARGET_BUILTIN(__builtin_amdgcn_image_load_mip_2darray_f32_i32, "fiiiiiV8iii", "nc", "")
+TARGET_BUILTIN(__builtin_amdgcn_image_load_mip_2darray_v4f32_i32, "V4fiiiiiV8iii", "nc", "")
+TARGET_BUILTIN(__builtin_amdgcn_image_load_mip_2darray_v4f16_i32, "V4hiiiiiV8iii", "nc", "")
+TARGET_BUILTIN(__builtin_amdgcn_image_load_mip_3d_v4f32_i32, "V4fiiiiiV8iii", "nc", "")
+TARGET_BUILTIN(__builtin_amdgcn_image_load_mip_3d_v4f16_i32, "V4hiiiiiV8iii", "nc", "")
+TARGET_BUILTIN(__builtin_amdgcn_image_load_mip_cube_v4f32_i32, "V4fiiiiiV8iii", "nc", "")
+TARGET_BUILTIN(__builtin_amdgcn_image_load_mip_cube_v4f16_i32, "V4hiiiiiV8iii", "nc", "")
#undef BUILTIN
#undef TARGET_BUILTIN
diff --git a/clang/include/clang/Sema/SemaAMDGPU.h b/clang/include/clang/Sema/SemaAMDGPU.h
index d62c9bb65fadb..673f2719fe227 100644
--- a/clang/include/clang/Sema/SemaAMDGPU.h
+++ b/clang/include/clang/Sema/SemaAMDGPU.h
@@ -29,6 +29,8 @@ class SemaAMDGPU : public SemaBase {
bool checkMovDPPFunctionCall(CallExpr *TheCall, unsigned NumArgs,
unsigned NumDataArgs);
+ bool checkImageImmArgFunctionCall(CallExpr *TheCall, unsigned ArgCount);
+
/// Create an AMDGPUWavesPerEUAttr attribute.
AMDGPUFlatWorkGroupSizeAttr *
CreateAMDGPUFlatWorkGroupSizeAttr(const AttributeCommonInfo &CI, Expr *Min,
diff --git a/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp b/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp
index 15f5cd89beaa9..ef1cebf39d7d8 100644
--- a/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp
+++ b/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp
@@ -683,27 +683,203 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID,
return Builder.CreateInsertElement(I0, A, 1);
}
- case AMDGPU::BI__builtin_amdgcn_image_load_2d_f32_i32: {
- llvm::Type *RetTy = llvm::Type::getFloatTy(Builder.getContext());
- llvm::Type *IntTy = llvm::IntegerType::get(Builder.getContext(), 32u);
-
- llvm::Value *imm0 = llvm::ConstantInt::get(IntTy, 1);
- llvm::Value *arg0 = EmitScalarExpr(E->getArg(0));
- llvm::Value *arg1 = EmitScalarExpr(E->getArg(1));
- llvm::Value *arg2 = EmitScalarExpr(E->getArg(2));
- llvm::Value *imm1 = llvm::ConstantInt::get(IntTy, 0);
- llvm::Value *imm2 = llvm::ConstantInt::get(IntTy, 0);
-
- SmallVector<Value *, 6> ArgTys;
- ArgTys.push_back(imm0);
- ArgTys.push_back(arg0);
- ArgTys.push_back(arg1);
- ArgTys.push_back(arg2);
- ArgTys.push_back(imm1);
- ArgTys.push_back(imm2);
+ case AMDGPU::BI__builtin_amdgcn_image_load_1d_v4f32_i32:
+ case AMDGPU::BI__builtin_amdgcn_image_load_1darray_v4f32_i32:
+ case AMDGPU::BI__builtin_amdgcn_image_load_1d_v4f16_i32:
+ case AMDGPU::BI__builtin_amdgcn_image_load_1darray_v4f16_i32:
+ case AMDGPU::BI__builtin_amdgcn_image_load_2d_f32_i32:
+ case AMDGPU::BI__builtin_amdgcn_image_load_2d_v4f32_i32:
+ case AMDGPU::BI__builtin_amdgcn_image_load_2d_v4f16_i32:
+ case AMDGPU::BI__builtin_amdgcn_image_load_2darray_f32_i32:
+ case AMDGPU::BI__builtin_amdgcn_image_load_2darray_v4f32_i32:
+ case AMDGPU::BI__builtin_amdgcn_image_load_2darray_v4f16_i32:
+ case AMDGPU::BI__builtin_amdgcn_image_load_3d_v4f32_i32:
+ case AMDGPU::BI__builtin_amdgcn_image_load_3d_v4f16_i32:
+ case AMDGPU::BI__builtin_amdgcn_image_load_cube_v4f32_i32:
+ case AMDGPU::BI__builtin_amdgcn_image_load_cube_v4f16_i32:
+ case AMDGPU::BI__builtin_amdgcn_image_load_mip_1d_v4f32_i32:
+ case AMDGPU::BI__builtin_amdgcn_image_load_mip_1d_v4f16_i32:
+ case AMDGPU::BI__builtin_amdgcn_image_load_mip_1darray_v4f32_i32:
+ case AMDGPU::BI__builtin_amdgcn_image_load_mip_1darray_v4f16_i32:
+ case AMDGPU::BI__builtin_amdgcn_image_load_mip_2d_f32_i32:
+ case AMDGPU::BI__builtin_amdgcn_image_load_mip_2d_v4f32_i32:
+ case AMDGPU::BI__builtin_amdgcn_image_load_mip_2d_v4f16_i32:
+ case AMDGPU::BI__builtin_amdgcn_image_load_mip_2darray_f32_i32:
+ case AMDGPU::BI__builtin_amdgcn_image_load_mip_2darray_v4f32_i32:
+ case AMDGPU::BI__builtin_amdgcn_image_load_mip_2darray_v4f16_i32:
+ case AMDGPU::BI__builtin_amdgcn_image_load_mip_3d_v4f32_i32:
+ case AMDGPU::BI__builtin_amdgcn_image_load_mip_3d_v4f16_i32:
+ case AMDGPU::BI__builtin_amdgcn_image_load_mip_cube_v4f32_i32:
+ case AMDGPU::BI__builtin_amdgcn_image_load_mip_cube_v4f16_i32: {
+ llvm::Type *RetTy = nullptr;
+ switch (BuiltinID) {
+ case AMDGPU::BI__builtin_amdgcn_image_load_2d_f32_i32:
+ case AMDGPU::BI__builtin_amdgcn_image_load_2darray_f32_i32:
+ case AMDGPU::BI__builtin_amdgcn_image_load_mip_2d_f32_i32:
+ case AMDGPU::BI__builtin_amdgcn_image_load_mip_2darray_f32_i32:
+ RetTy = llvm::Type::getFloatTy(Builder.getContext());
+ break;
+ case AMDGPU::BI__builtin_amdgcn_image_load_1d_v4f32_i32:
+ case AMDGPU::BI__builtin_amdgcn_image_load_1darray_v4f32_i32:
+ case AMDGPU::BI__builtin_amdgcn_image_load_2d_v4f32_i32:
+ case AMDGPU::BI__builtin_amdgcn_image_load_2darray_v4f32_i32:
+ case AMDGPU::BI__builtin_amdgcn_image_load_3d_v4f32_i32:
+ case AMDGPU::BI__builtin_amdgcn_image_load_cube_v4f32_i32:
+ case AMDGPU::BI__builtin_amdgcn_image_load_mip_1d_v4f32_i32:
+ case AMDGPU::BI__builtin_amdgcn_image_load_mip_1darray_v4f32_i32:
+ case AMDGPU::BI__builtin_amdgcn_image_load_mip_2d_v4f32_i32:
+ case AMDGPU::BI__builtin_amdgcn_image_load_mip_2darray_v4f32_i32:
+ case AMDGPU::BI__builtin_amdgcn_image_load_mip_3d_v4f32_i32:
+ case AMDGPU::BI__builtin_amdgcn_image_load_mip_cube_v4f32_i32:
+ RetTy =
+ FixedVectorType::get(llvm::Type::getFloatTy(Builder.getContext()), 4);
+ break;
+ case AMDGPU::BI__builtin_amdgcn_image_load_1d_v4f16_i32:
+ case AMDGPU::BI__builtin_amdgcn_image_load_1darray_v4f16_i32:
+ case AMDGPU::BI__builtin_amdgcn_image_load_2d_v4f16_i32:
+ case AMDGPU::BI__builtin_amdgcn_image_load_2darray_v4f16_i32:
+ case AMDGPU::BI__builtin_amdgcn_image_load_3d_v4f16_i32:
+ case AMDGPU::BI__builtin_amdgcn_image_load_cube_v4f16_i32:
+ case AMDGPU::BI__builtin_amdgcn_image_load_mip_1d_v4f16_i32:
+ case AMDGPU::BI__builtin_amdgcn_image_load_mip_1darray_v4f16_i32:
+ case AMDGPU::BI__builtin_amdgcn_image_load_mip_2d_v4f16_i32:
+ case AMDGPU::BI__builtin_amdgcn_image_load_mip_2darray_v4f16_i32:
+ case AMDGPU::BI__builtin_amdgcn_image_load_mip_3d_v4f16_i32:
+ case AMDGPU::BI__builtin_amdgcn_image_load_mip_cube_v4f16_i32:
+ RetTy =
+ FixedVectorType::get(llvm::Type::getHalfTy(Builder.getContext()), 4);
+ break;
+ }
- llvm::CallInst *Call =
- Builder.CreateIntrinsic(RetTy, Intrinsic::amdgcn_image_load_2d, ArgTys);
+ llvm::Value *Dmask = EmitScalarExpr(E->getArg(0));
+ llvm::Value *S = EmitScalarExpr(E->getArg(1));
+ llvm::Value *T = EmitScalarExpr(E->getArg(2));
+ llvm::Value *Slice;
+ llvm::Value *Mip;
+ llvm::Value *Rsrc;
+ llvm::Value *Tfe;
+ llvm::Value *Cpol;
+
+ SmallVector<Value *, 10> ArgTys;
+
+ Intrinsic::ID IID;
+ llvm::CallInst *Call;
+
+ switch (BuiltinID) {
+ case AMDGPU::BI__builtin_amdgcn_image_load_1d_v4f32_i32:
+ case AMDGPU::BI__builtin_amdgcn_image_load_1d_v4f16_i32: {
+ Rsrc = EmitScalarExpr(E->getArg(2));
+ Tfe = EmitScalarExpr(E->getArg(3));
+ Cpol = EmitScalarExpr(E->getArg(4));
+
+ ArgTys = {Dmask, S, Rsrc, Tfe, Cpol};
+ IID = Intrinsic::amdgcn_image_load_1d;
+ Call = Builder.CreateIntrinsic(RetTy, IID, ArgTys);
+ break;
+ }
+ case AMDGPU::BI__builtin_amdgcn_image_load_1darray_v4f32_i32:
+ case AMDGPU::BI__builtin_amdgcn_image_load_1darray_v4f16_i32:
+ case AMDGPU::BI__builtin_amdgcn_image_load_mip_1d_v4f32_i32:
+ case AMDGPU::BI__builtin_amdgcn_image_load_mip_1d_v4f16_i32: {
+ Slice = EmitScalarExpr(E->getArg(2));
+ Rsrc = EmitScalarExpr(E->getArg(3));
+ Tfe = EmitScalarExpr(E->getArg(4));
+ Cpol = EmitScalarExpr(E->getArg(5));
+
+ ArgTys = {Dmask, S, Slice, Rsrc, Tfe, Cpol};
+ IID = Intrinsic::amdgcn_image_load_1darray;
+ switch (BuiltinID) {
+ case AMDGPU::BI__builtin_amdgcn_image_load_mip_1d_v4f32_i32:
+ case AMDGPU::BI__builtin_amdgcn_image_load_mip_1d_v4f16_i32:
+ IID = Intrinsic::amdgcn_image_load_mip_1d;
+ break;
+ }
+ Call = Builder.CreateIntrinsic(RetTy, IID, ArgTys);
+ break;
+ }
+ case AMDGPU::BI__builtin_amdgcn_image_load_2d_f32_i32:
+ case AMDGPU::BI__builtin_amdgcn_image_load_2d_v4f32_i32:
+ case AMDGPU::BI__builtin_amdgcn_image_load_2d_v4f16_i32: {
+ Rsrc = EmitScalarExpr(E->getArg(3));
+ Tfe = EmitScalarExpr(E->getArg(4));
+ Cpol = EmitScalarExpr(E->getArg(5));
+
+ ArgTys = {Dmask, S, T, Rsrc, Tfe, Cpol};
+ IID = Intrinsic::amdgcn_image_load_2d;
+ Call = Builder.CreateIntrinsic(RetTy, IID, ArgTys);
+ break;
+ }
+ case AMDGPU::BI__builtin_amdgcn_image_load_2darray_f32_i32:
+ case AMDGPU::BI__builtin_amdgcn_image_load_2darray_v4f32_i32:
+ case AMDGPU::BI__builtin_amdgcn_image_load_2darray_v4f16_i32:
+ case AMDGPU::BI__builtin_amdgcn_image_load_3d_v4f32_i32:
+ case AMDGPU::BI__builtin_amdgcn_image_load_3d_v4f16_i32:
+ case AMDGPU::BI__builtin_amdgcn_image_load_cube_v4f32_i32:
+ case AMDGPU::BI__builtin_amdgcn_image_load_cube_v4f16_i32:
+ case AMDGPU::BI__builtin_amdgcn_image_load_mip_1darray_v4f32_i32:
+ case AMDGPU::BI__builtin_amdgcn_image_load_mip_1darray_v4f16_i32:
+ case AMDGPU::BI__builtin_amdgcn_image_load_mip_2d_f32_i32:
+ case AMDGPU::BI__builtin_amdgcn_image_load_mip_2d_v4f32_i32:
+ case AMDGPU::BI__builtin_amdgcn_image_load_mip_2d_v4f16_i32: {
+ Slice = EmitScalarExpr(E->getArg(3));
+ Rsrc = EmitScalarExpr(E->getArg(4));
+ Tfe = EmitScalarExpr(E->getArg(5));
+ Cpol = EmitScalarExpr(E->getArg(6));
+
+ ArgTys = {Dmask, S, T, Slice, Rsrc, Tfe, Cpol};
+ IID = Intrinsic::amdgcn_image_load_2darray;
+
+ switch (BuiltinID) {
+ case AMDGPU::BI__builtin_amdgcn_image_load_3d_v4f32_i32:
+ case AMDGPU::BI__builtin_amdgcn_image_load_3d_v4f16_i32:
+ IID = Intrinsic::amdgcn_image_load_3d;
+ break;
+ case AMDGPU::BI__builtin_amdgcn_image_load_cube_v4f32_i32:
+ case AMDGPU::BI__builtin_amdgcn_image_load_cube_v4f16_i32:
+ IID = Intrinsic::amdgcn_image_load_cube;
+ break;
+ case AMDGPU::BI__builtin_amdgcn_image_load_mip_1darray_v4f32_i32:
+ case AMDGPU::BI__builtin_amdgcn_image_load_mip_1darray_v4f16_i32:
+ IID = Intrinsic::amdgcn_image_load_mip_1darray;
+ break;
+ case AMDGPU::BI__builtin_amdgcn_image_load_mip_2d_f32_i32:
+ case AMDGPU::BI__builtin_amdgcn_image_load_mip_2d_v4f32_i32:
+ case AMDGPU::BI__builtin_amdgcn_image_load_mip_2d_v4f16_i32:
+ IID = Intrinsic::amdgcn_image_load_mip_2d;
+ break;
+ }
+ Call = Builder.CreateIntrinsic(RetTy, IID, ArgTys);
+ break;
+ }
+ case AMDGPU::BI__builtin_amdgcn_image_load_mip_2darray_f32_i32:
+ case AMDGPU::BI__builtin_amdgcn_image_load_mip_2darray_v4f32_i32:
+ case AMDGPU::BI__builtin_amdgcn_image_load_mip_2darray_v4f16_i32:
+ case AMDGPU::BI__builtin_amdgcn_image_load_mip_3d_v4f32_i32:
+ case AMDGPU::BI__builtin_amdgcn_image_load_mip_3d_v4f16_i32:
+ case AMDGPU::BI__builtin_amdgcn_image_load_mip_cube_v4f32_i32:
+ case AMDGPU::BI__builtin_amdgcn_image_load_mip_cube_v4f16_i32: {
+ Slice = EmitScalarExpr(E->getArg(3));
+ Mip = EmitScalarExpr(E->getArg(4));
+ Rsrc = EmitScalarExpr(E->getArg(5));
+ Tfe = EmitScalarExpr(E->getArg(6));
+ Cpol = EmitScalarExpr(E->getArg(7));
+
+ ArgTys = {Dmask, S, T, Slice, Mip, Rsrc, Tfe, Cpol};
+ IID = Intrinsic::amdgcn_image_load_mip_2darray;
+ switch (BuiltinID) {
+ case AMDGPU::BI__builtin_amdgcn_image_load_mip_3d_v4f32_i32:
+ case AMDGPU::BI__builtin_amdgcn_image_load_mip_3d_v4f16_i32:
+ IID = Intrinsic::amdgcn_image_load_mip_3d;
+ break;
+ case AMDGPU::BI__builtin_amdgcn_image_load_mip_cube_v4f32_i32:
+ case AMDGPU::BI__builtin_amdgcn_image_load_mip_cube_v4f16_i32:
+ IID = Intrinsic::amdgcn_image_load_mip_cube;
+ break;
+ }
+ Call = Builder.CreateIntrinsic(RetTy, IID, ArgTys);
+ break;
+ }
+ }
return Call;
}
diff --git a/clang/lib/Sema/SemaAMDGPU.cpp b/clang/lib/Sema/SemaAMDGPU.cpp
index a6366aceec2a6..530d0e7553604 100644
--- a/clang/lib/Sema/SemaAMDGPU.cpp
+++ b/clang/lib/Sema/SemaAMDGPU.cpp
@@ -83,6 +83,38 @@ bool SemaAMDGPU::CheckAMDGCNBuiltinFunctionCall(unsigned BuiltinID,
case AMDGPU::BI__builtin_amdgcn_update_dpp: {
return checkMovDPPFunctionCall(TheCall, 6, 2);
}
+ case AMDGPU::BI__builtin_amdgcn_image_load_1d_v4f32_i32:
+ case AMDGPU::BI__builtin_amdgcn_image_load_1darray_v4f32_i32:
+ case AMDGPU::BI__builtin_amdgcn_image_load_1d_v4f16_i32:
+ case AMDGPU::BI__builtin_amdgcn_image_load_1darray_v4f16_i32:
+ case AMDGPU::BI__builtin_amdgcn_image_load_2d_f32_i32:
+ case AMDGPU::BI__builtin_amdgcn_image_load_2d_v4f32_i32:
+ case AMDGPU::BI__builtin_amdgcn_image_load_2d_v4f16_i32:
+ case AMDGPU::BI__builtin_amdgcn_image_load_2darray_f32_i32:
+ case AMDGPU::BI__builtin_amdgcn_image_load_2darray_v4f32_i32:
+ case AMDGPU::BI__builtin_amdgcn_image_load_2darray_v4f16_i32:
+ case AMDGPU::BI__builtin_amdgcn_image_load_3d_v4f32_i32:
+ case AMDGPU::BI__builtin_amdgcn_image_load_3d_v4f16_i32:
+ case AMDGPU::BI__builtin_amdgcn_image_load_cube_v4f32_i32:
+ case AMDGPU::BI__builtin_amdgcn_image_load_cube_v4f16_i32:
+ case AMDGPU::BI__builtin_amdgcn_image_load_mip_1d_v4f32_i32:
+ case AMDGPU::BI__builtin_amdgcn_image_load_mip_1d_v4f16_i32:
+ case AMDGPU::BI__builtin_amdgcn_image_load_mip_1darray_v4f32_i32:
+ case AMDGPU::BI__builtin_amdgcn_image_load_mip_1darray_v4f16_i32:
+ case AMDGPU::BI__builtin_amdgcn_image_load_mip_2d_f32_i32:
+ case AMDGPU::BI__builtin_amdgcn_image_load_mip_2d_v4f32_i32:
+ case AMDGPU::BI__builtin_amdgcn_image_load_mip_2d_v4f16_i32:
+ case AMDGPU::BI__builtin_amdgcn_image_load_mip_2darray_f32_i32:
+ case AMDGPU::BI__builtin_amdgcn_image_load_mip_2darray_v4f32_i32:
+ case AMDGPU::BI__builtin_amdgcn_image_load_mip_2darray_v4f16_i32:
+ case AMDGPU::BI__builtin_amdgcn_image_load_mip_3d_v4f32_i32:
+ case AMDGPU::BI__builtin_amdgcn_image_load_mip_3d_v4f16_i32:
+ case AMDGPU::BI__builtin_amdgcn_image_load_mip_cube_v4f32_i32:
+ case AMDGPU::BI__builtin_amdgcn_image_load_mip_cube_v4f16_i32: {
+ unsigned ArgCount = TheCall->getNumArgs() - 1;
+
+ return checkImageImmArgFunctionCall(TheCall, ArgCount);
+ }
default:
return false;
}
@@ -128,6 +160,16 @@ bool SemaAMDGPU::CheckAMDGCNBuiltinFunctionCall(unsigned BuiltinID,
return false;
}
+bool SemaAMDGPU::checkImageImmArgFunctionCall(CallExpr *TheCall,
+ unsigned ArgCount) {
+ llvm::APSInt Result;
+ if (!(SemaRef.BuiltinConstantArg(TheCall, 0, Result)) &&
+ !(SemaRef.BuiltinConstantArg(TheCall, ArgCount, Result)) &&
+ !(SemaRef.BuiltinConstantArg(TheCall, (ArgCount - 1), Result)))
+ return false;
+ return true;
+}
+
bool SemaAMDGPU::checkMovDPPFunctionCall(CallExpr *TheCall, unsigned NumArgs,
unsigned NumDataArgs) {
assert(NumDataArgs <= 2);
diff --git a/clang/test/CodeGen/builtins-image-load-2d-f32.c b/clang/test/CodeGen/builtins-image-load-2d-f32.c
index 78dab461c1f38..aee97af37aaf0 100644
--- a/clang/test/CodeGen/builtins-image-load-2d-f32.c
+++ b/clang/test/CodeGen/builtins-image-load-2d-f32.c
@@ -1,31 +1,722 @@
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5
-// RUN: %clang_cc1 -triple amdgcn-unknown-unknown %s -emit-llvm -o - | FileCheck %s
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa %s -emit-llvm -o - | FileCheck %s
-#pragma OPENCL EXTENSION cl_khr_fp64:enable
-
-typedef int v8i __attribute__((ext_vector_type(8)));
+typedef int int8 __attribute__((ext_vector_type(8)));
+typedef float float4 __attribute__((ext_vector_type(4)));
+typedef _Float16 half;
+typedef half half4 __attribute__((ext_vector_type(4)));
// CHECK-LABEL: define dso_local float @test_builtin_image_load_2d(
-// CHECK-SAME: float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VECI32:%.*]]) #[[ATTR0:[0-9]+]] {
+// CHECK-SAME: float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0:[0-9]+]] {
// CHECK-NEXT: [[ENTRY:.*:]]
// CHECK-NEXT: [[RETVAL:%.*]] = alloca float, align 4, addrspace(5)
// CHECK-NEXT: [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5)
// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
-// CHECK-NEXT: [[VECI32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
+// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
// CHECK-NEXT: [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr
// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
-// CHECK-NEXT: [[VECI32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VECI32_ADDR]] to ptr
+// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr
// CHECK-NEXT: store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4
// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
-// CHECK-NEXT: store <8 x i32> [[VECI32]], ptr [[VECI32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32
// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
-// CHECK-NEXT: [[TMP2:%.*]] = load <8 x i32>, ptr [[VECI32_ADDR_ASCAST]], align 32
-// CHECK-NEXT: [[TMP3:%.*]] = call float @llvm.amdgcn.image.load.2d.f32.i32.v8i32(i32 1, i32 [[TMP0]], i32 [[TMP1]], <8 x i32> [[TMP2]], i32 0, i32 0)
+// CHECK-NEXT: [[TMP2:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: [[TMP3:%.*]] = call float @llvm.amdgcn.image.load.2d.f32.i32.v8i32(i32 12, i32 [[TMP0]], i32 [[TMP1]], <8 x i32> [[TMP2]], i32 106, i32 103)
// CHECK-NEXT: ret float [[TMP3]]
//
-float test_builtin_image_load_2d(float f32, int i32, v8i veci32) {
+float test_builtin_image_load_2d(float f32, int i32, int8 vec8i32) {
+
+ return __builtin_amdgcn_image_load_2d_f32_i32(12, i32, i32, vec8i32, 106, 103);
+}
+
+// CHECK-LABEL: define dso_local <4 x float> @test_builtin_image_load_2d_1(
+// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x float>, align 16, addrspace(5)
+// CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5)
+// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
+// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
+// CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr
+// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
+// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr
+// CHECK-NEXT: store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16
+// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP2:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: [[TMP3:%.*]] = call <4 x float> @llvm.amdgcn.image.load.2d.v4f32.i32.v8i32(i32 100, i32 [[TMP0]], i32 [[TMP1]], <8 x i32> [[TMP2]], i32 120, i32 110)
+// CHECK-NEXT: ret <4 x float> [[TMP3]]
+//
+float4 test_builtin_image_load_2d_1(float4 v4f32, int i32, int8 vec8i32) {
+
+ return __builtin_amdgcn_image_load_2d_v4f32_i32(100, i32, i32, vec8i32, 120, 110);
+}
+// CHECK-LABEL: define dso_local <4 x half> @test_builtin_image_load_2d_2(
+// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x half>, align 8, addrspace(5)
+// CHECK-NEXT: [[V4F16_ADDR:%.*]] = alloca <4 x half>, align 8, addrspace(5)
+// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
+// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
+// CHECK-NEXT: [[V4F16_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F16_ADDR]] to ptr
+// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
+// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr
+// CHECK-NEXT: store <4 x half> [[V4F16]], ptr [[V4F16_ADDR_ASCAST]], align 8
+// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP2:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: [[TMP3:%.*]] = call <4 x half> @llvm.amdgcn.image.load.2d.v4f16.i32.v8i32(i32 100, i32 [[TMP0]], i32 [[TMP1]], <8 x i32> [[TMP2]], i32 120, i32 110)
+// CHECK-NEXT: ret <4 x half> [[TMP3]]
+//
+half4 test_builtin_image_load_2d_2(half4 v4f16, int i32, int8 vec8i32) {
+
+ return __builtin_amdgcn_image_load_2d_v4f16_i32(100, i32, i32, vec8i32, 120, 110);
+}
+
+// CHECK-LABEL: define dso_local float @test_builtin_image_load_2darray(
+// CHECK-SAME: float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[RETVAL:%.*]] = alloca float, align 4, addrspace(5)
+// CHECK-NEXT: [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5)
+// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
+// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
+// CHECK-NEXT: [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr
+// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
+// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr
+// CHECK-NEXT: store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP3:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: [[TMP4:%.*]] = call float @llvm.amdgcn.image.load.2darray.f32.i32.v8i32(i32 100, i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]], <8 x i32> [[TMP3]], i32 120, i32 110)
+// CHECK-NEXT: ret float [[TMP4]]
+//
+float test_builtin_image_load_2darray(float f32, int i32, int8 vec8i32) {
+
+ return __builtin_amdgcn_image_load_2darray_f32_i32(100, i32, i32, i32, vec8i32, 120, 110);
+}
+// CHECK-LABEL: define dso_local <4 x float> @test_builtin_image_load_2darray_1(
+// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x float>, align 16, addrspace(5)
+// CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5)
+// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
+// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
+// CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr
+// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
+// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr
+// CHECK-NEXT: store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16
+// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP3:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: [[TMP4:%.*]] = call <4 x float> @llvm.amdgcn.image.load.2darray.v4f32.i32.v8i32(i32 100, i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]], <8 x i32> [[TMP3]], i32 120, i32 110)
+// CHECK-NEXT: ret <4 x float> [[TMP4]]
+//
+float4 test_builtin_image_load_2darray_1(float4 v4f32, int i32, int8 vec8i32) {
+
+ return __builtin_amdgcn_image_load_2darray_v4f32_i32(100, i32, i32, i32, vec8i32, 120, 110);
+}
+// CHECK-LABEL: define dso_local <4 x half> @test_builtin_image_load_2darray_2(
+// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x half>, align 8, addrspace(5)
+// CHECK-NEXT: [[V4F16_ADDR:%.*]] = alloca <4 x half>, align 8, addrspace(5)
+// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
+// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
+// CHECK-NEXT: [[V4F16_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F16_ADDR]] to ptr
+// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
+// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr
+// CHECK-NEXT: store <4 x half> [[V4F16]], ptr [[V4F16_ADDR_ASCAST]], align 8
+// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP3:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: [[TMP4:%.*]] = call <4 x half> @llvm.amdgcn.image.load.2darray.v4f16.i32.v8i32(i32 100, i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]], <8 x i32> [[TMP3]], i32 120, i32 110)
+// CHECK-NEXT: ret <4 x half> [[TMP4]]
+//
+half4 test_builtin_image_load_2darray_2(half4 v4f16, int i32, int8 vec8i32) {
+
+ return __builtin_amdgcn_image_load_2darray_v4f16_i32(100, i32, i32, i32, vec8i32, 120, 110);
+}
+
+// CHECK-LABEL: define dso_local <4 x float> @test_builtin_image_load_1d_1(
+// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x float>, align 16, addrspace(5)
+// CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5)
+// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
+// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
+// CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr
+// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
+// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr
+// CHECK-NEXT: store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16
+// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP1:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: [[TMP2:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: [[TMP3:%.*]] = call <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i32.v8i32(i32 100, i32 [[TMP0]], <8 x i32> [[TMP2]], i32 120, i32 110)
+// CHECK-NEXT: ret <4 x float> [[TMP3]]
+//
+float4 test_builtin_image_load_1d_1(float4 v4f32, int i32, int8 vec8i32) {
+
+ return __builtin_amdgcn_image_load_1d_v4f32_i32(100, i32, vec8i32, 120, 110);
+}
+// CHECK-LABEL: define dso_local <4 x half> @test_builtin_image_load_1d_2(
+// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x half>, align 8, addrspace(5)
+// CHECK-NEXT: [[V4F16_ADDR:%.*]] = alloca <4 x half>, align 8, addrspace(5)
+// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
+// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
+// CHECK-NEXT: [[V4F16_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F16_ADDR]] to ptr
+// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
+// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr
+// CHECK-NEXT: store <4 x half> [[V4F16]], ptr [[V4F16_ADDR_ASCAST]], align 8
+// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP1:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: [[TMP2:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: [[TMP3:%.*]] = call <4 x half> @llvm.amdgcn.image.load.1d.v4f16.i32.v8i32(i32 100, i32 [[TMP0]], <8 x i32> [[TMP2]], i32 120, i32 110)
+// CHECK-NEXT: ret <4 x half> [[TMP3]]
+//
+half4 test_builtin_image_load_1d_2(half4 v4f16, int i32, int8 vec8i32) {
+
+ return __builtin_amdgcn_image_load_1d_v4f16_i32(100, i32, vec8i32, 120, 110);
+}
+
+// CHECK-LABEL: define dso_local <4 x float> @test_builtin_image_load_1darray_1(
+// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x float>, align 16, addrspace(5)
+// CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5)
+// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
+// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
+// CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr
+// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
+// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr
+// CHECK-NEXT: store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16
+// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP3:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: [[TMP4:%.*]] = call <4 x float> @llvm.amdgcn.image.load.1darray.v4f32.i32.v8i32(i32 100, i32 [[TMP0]], i32 [[TMP2]], <8 x i32> [[TMP3]], i32 120, i32 110)
+// CHECK-NEXT: ret <4 x float> [[TMP4]]
+//
+float4 test_builtin_image_load_1darray_1(float4 v4f32, int i32, int8 vec8i32) {
+
+ return __builtin_amdgcn_image_load_1darray_v4f32_i32(100, i32, i32, vec8i32, 120, 110);
+}
+// CHECK-LABEL: define dso_local <4 x half> @test_builtin_image_load_1darray_2(
+// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x half>, align 8, addrspace(5)
+// CHECK-NEXT: [[V4F16_ADDR:%.*]] = alloca <4 x half>, align 8, addrspace(5)
+// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
+// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
+// CHECK-NEXT: [[V4F16_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F16_ADDR]] to ptr
+// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
+// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr
+// CHECK-NEXT: store <4 x half> [[V4F16]], ptr [[V4F16_ADDR_ASCAST]], align 8
+// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP3:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: [[TMP4:%.*]] = call <4 x half> @llvm.amdgcn.image.load.1darray.v4f16.i32.v8i32(i32 100, i32 [[TMP0]], i32 [[TMP2]], <8 x i32> [[TMP3]], i32 120, i32 110)
+// CHECK-NEXT: ret <4 x half> [[TMP4]]
+//
+half4 test_builtin_image_load_1darray_2(half4 v4f16, int i32, int8 vec8i32) {
+
+ return __builtin_amdgcn_image_load_1darray_v4f16_i32(100, i32, i32, vec8i32, 120, 110);
+}
+
+// CHECK-LABEL: define dso_local <4 x float> @test_builtin_image_load_3d_1(
+// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x float>, align 16, addrspace(5)
+// CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5)
+// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
+// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
+// CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr
+// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
+// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr
+// CHECK-NEXT: store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16
+// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP3:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: [[TMP4:%.*]] = call <4 x float> @llvm.amdgcn.image.load.3d.v4f32.i32.v8i32(i32 100, i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]], <8 x i32> [[TMP3]], i32 120, i32 110)
+// CHECK-NEXT: ret <4 x float> [[TMP4]]
+//
+float4 test_builtin_image_load_3d_1(float4 v4f32, int i32, int8 vec8i32) {
+
+ return __builtin_amdgcn_image_load_3d_v4f32_i32(100, i32, i32, i32, vec8i32, 120, 110);
+}
+// CHECK-LABEL: define dso_local <4 x half> @test_builtin_image_load_3d_2(
+// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x half>, align 8, addrspace(5)
+// CHECK-NEXT: [[V4F16_ADDR:%.*]] = alloca <4 x half>, align 8, addrspace(5)
+// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
+// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
+// CHECK-NEXT: [[V4F16_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F16_ADDR]] to ptr
+// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
+// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr
+// CHECK-NEXT: store <4 x half> [[V4F16]], ptr [[V4F16_ADDR_ASCAST]], align 8
+// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP3:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: [[TMP4:%.*]] = call <4 x half> @llvm.amdgcn.image.load.3d.v4f16.i32.v8i32(i32 100, i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]], <8 x i32> [[TMP3]], i32 120, i32 110)
+// CHECK-NEXT: ret <4 x half> [[TMP4]]
+//
+half4 test_builtin_image_load_3d_2(half4 v4f16, int i32, int8 vec8i32) {
+
+ return __builtin_amdgcn_image_load_3d_v4f16_i32(100, i32, i32, i32, vec8i32, 120, 110);
+}
+
+// CHECK-LABEL: define dso_local <4 x float> @test_builtin_image_load_cube_1(
+// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x float>, align 16, addrspace(5)
+// CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5)
+// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
+// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
+// CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr
+// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
+// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr
+// CHECK-NEXT: store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16
+// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP3:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: [[TMP4:%.*]] = call <4 x float> @llvm.amdgcn.image.load.cube.v4f32.i32.v8i32(i32 100, i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]], <8 x i32> [[TMP3]], i32 120, i32 110)
+// CHECK-NEXT: ret <4 x float> [[TMP4]]
+//
+float4 test_builtin_image_load_cube_1(float4 v4f32, int i32, int8 vec8i32) {
+
+ return __builtin_amdgcn_image_load_cube_v4f32_i32(100, i32, i32, i32, vec8i32, 120, 110);
+}
+// CHECK-LABEL: define dso_local <4 x half> @test_builtin_image_load_cube_2(
+// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x half>, align 8, addrspace(5)
+// CHECK-NEXT: [[V4F16_ADDR:%.*]] = alloca <4 x half>, align 8, addrspace(5)
+// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
+// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
+// CHECK-NEXT: [[V4F16_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F16_ADDR]] to ptr
+// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
+// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr
+// CHECK-NEXT: store <4 x half> [[V4F16]], ptr [[V4F16_ADDR_ASCAST]], align 8
+// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP3:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: [[TMP4:%.*]] = call <4 x half> @llvm.amdgcn.image.load.cube.v4f16.i32.v8i32(i32 100, i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]], <8 x i32> [[TMP3]], i32 120, i32 110)
+// CHECK-NEXT: ret <4 x half> [[TMP4]]
+//
+half4 test_builtin_image_load_cube_2(half4 v4f16, int i32, int8 vec8i32) {
+
+ return __builtin_amdgcn_image_load_cube_v4f16_i32(100, i32, i32, i32, vec8i32, 120, 110);
+}
+
+// CHECK-LABEL: define dso_local <4 x float> @test_builtin_image_load_mip_1d_1(
+// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x float>, align 16, addrspace(5)
+// CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5)
+// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
+// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
+// CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr
+// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
+// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr
+// CHECK-NEXT: store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16
+// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP3:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: [[TMP4:%.*]] = call <4 x float> @llvm.amdgcn.image.load.mip.1d.v4f32.i32.v8i32(i32 100, i32 [[TMP0]], i32 [[TMP2]], <8 x i32> [[TMP3]], i32 120, i32 110)
+// CHECK-NEXT: ret <4 x float> [[TMP4]]
+//
+float4 test_builtin_image_load_mip_1d_1(float4 v4f32, int i32, int8 vec8i32) {
+
+ return __builtin_amdgcn_image_load_mip_1d_v4f32_i32(100, i32, i32, vec8i32, 120, 110);
+}
+// CHECK-LABEL: define dso_local <4 x half> @test_builtin_image_load_mip_1d_2(
+// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x half>, align 8, addrspace(5)
+// CHECK-NEXT: [[V4F16_ADDR:%.*]] = alloca <4 x half>, align 8, addrspace(5)
+// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
+// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
+// CHECK-NEXT: [[V4F16_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F16_ADDR]] to ptr
+// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
+// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr
+// CHECK-NEXT: store <4 x half> [[V4F16]], ptr [[V4F16_ADDR_ASCAST]], align 8
+// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP3:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: [[TMP4:%.*]] = call <4 x half> @llvm.amdgcn.image.load.mip.1d.v4f16.i32.v8i32(i32 100, i32 [[TMP0]], i32 [[TMP2]], <8 x i32> [[TMP3]], i32 120, i32 110)
+// CHECK-NEXT: ret <4 x half> [[TMP4]]
+//
+half4 test_builtin_image_load_mip_1d_2(half4 v4f16, int i32, int8 vec8i32) {
+
+ return __builtin_amdgcn_image_load_mip_1d_v4f16_i32(100, i32, i32, vec8i32, 120, 110);
+}
+
+// CHECK-LABEL: define dso_local <4 x float> @test_builtin_image_load_mip_1darray_1(
+// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x float>, align 16, addrspace(5)
+// CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5)
+// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
+// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
+// CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr
+// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
+// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr
+// CHECK-NEXT: store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16
+// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP3:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: [[TMP4:%.*]] = call <4 x float> @llvm.amdgcn.image.load.mip.1darray.v4f32.i32.v8i32(i32 100, i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]], <8 x i32> [[TMP3]], i32 120, i32 110)
+// CHECK-NEXT: ret <4 x float> [[TMP4]]
+//
+float4 test_builtin_image_load_mip_1darray_1(float4 v4f32, int i32, int8 vec8i32) {
+
+ return __builtin_amdgcn_image_load_mip_1darray_v4f32_i32(100, i32, i32, i32, vec8i32, 120, 110);
+}
+// CHECK-LABEL: define dso_local <4 x half> @test_builtin_image_load_mip_1darray_2(
+// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x half>, align 8, addrspace(5)
+// CHECK-NEXT: [[V4F16_ADDR:%.*]] = alloca <4 x half>, align 8, addrspace(5)
+// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
+// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
+// CHECK-NEXT: [[V4F16_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F16_ADDR]] to ptr
+// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
+// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr
+// CHECK-NEXT: store <4 x half> [[V4F16]], ptr [[V4F16_ADDR_ASCAST]], align 8
+// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP3:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: [[TMP4:%.*]] = call <4 x half> @llvm.amdgcn.image.load.mip.1darray.v4f16.i32.v8i32(i32 100, i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]], <8 x i32> [[TMP3]], i32 120, i32 110)
+// CHECK-NEXT: ret <4 x half> [[TMP4]]
+//
+half4 test_builtin_image_load_mip_1darray_2(half4 v4f16, int i32, int8 vec8i32) {
+
+ return __builtin_amdgcn_image_load_mip_1darray_v4f16_i32(100, i32, i32, i32, vec8i32, 120, 110);
+}
+
+// CHECK-LABEL: define dso_local float @test_builtin_image_load_mip_2d(
+// CHECK-SAME: float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[RETVAL:%.*]] = alloca float, align 4, addrspace(5)
+// CHECK-NEXT: [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5)
+// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
+// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
+// CHECK-NEXT: [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr
+// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
+// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr
+// CHECK-NEXT: store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP3:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: [[TMP4:%.*]] = call float @llvm.amdgcn.image.load.mip.2d.f32.i32.v8i32(i32 100, i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]], <8 x i32> [[TMP3]], i32 120, i32 110)
+// CHECK-NEXT: ret float [[TMP4]]
+//
+float test_builtin_image_load_mip_2d(float f32, int i32, int8 vec8i32) {
+
+ return __builtin_amdgcn_image_load_mip_2d_f32_i32(100, i32, i32, i32, vec8i32, 120, 110);
+}
+// CHECK-LABEL: define dso_local <4 x float> @test_builtin_image_load_mip_2d_1(
+// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x float>, align 16, addrspace(5)
+// CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5)
+// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
+// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
+// CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr
+// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
+// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr
+// CHECK-NEXT: store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16
+// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP3:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: [[TMP4:%.*]] = call <4 x float> @llvm.amdgcn.image.load.mip.2d.v4f32.i32.v8i32(i32 100, i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]], <8 x i32> [[TMP3]], i32 120, i32 110)
+// CHECK-NEXT: ret <4 x float> [[TMP4]]
+//
+float4 test_builtin_image_load_mip_2d_1(float4 v4f32, int i32, int8 vec8i32) {
+
+ return __builtin_amdgcn_image_load_mip_2d_v4f32_i32(100, i32, i32, i32, vec8i32, 120, 110);
+}
+// CHECK-LABEL: define dso_local <4 x half> @test_builtin_image_load_mip_2d_2(
+// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x half>, align 8, addrspace(5)
+// CHECK-NEXT: [[V4F16_ADDR:%.*]] = alloca <4 x half>, align 8, addrspace(5)
+// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
+// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
+// CHECK-NEXT: [[V4F16_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F16_ADDR]] to ptr
+// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
+// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr
+// CHECK-NEXT: store <4 x half> [[V4F16]], ptr [[V4F16_ADDR_ASCAST]], align 8
+// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP3:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: [[TMP4:%.*]] = call <4 x half> @llvm.amdgcn.image.load.mip.2d.v4f16.i32.v8i32(i32 100, i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]], <8 x i32> [[TMP3]], i32 120, i32 110)
+// CHECK-NEXT: ret <4 x half> [[TMP4]]
+//
+half4 test_builtin_image_load_mip_2d_2(half4 v4f16, int i32, int8 vec8i32) {
+
+ return __builtin_amdgcn_image_load_mip_2d_v4f16_i32(100, i32, i32, i32, vec8i32, 120, 110);
+}
+
+// CHECK-LABEL: define dso_local float @test_builtin_image_load_mip_2darray(
+// CHECK-SAME: float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[RETVAL:%.*]] = alloca float, align 4, addrspace(5)
+// CHECK-NEXT: [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5)
+// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
+// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
+// CHECK-NEXT: [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr
+// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
+// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr
+// CHECK-NEXT: store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP4:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: [[TMP5:%.*]] = call float @llvm.amdgcn.image.load.mip.2darray.f32.i32.v8i32(i32 100, i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]], i32 [[TMP3]], <8 x i32> [[TMP4]], i32 120, i32 110)
+// CHECK-NEXT: ret float [[TMP5]]
+//
+float test_builtin_image_load_mip_2darray(float f32, int i32, int8 vec8i32) {
+
+ return __builtin_amdgcn_image_load_mip_2darray_f32_i32(100, i32, i32, i32, i32, vec8i32, 120, 110);
+}
+// CHECK-LABEL: define dso_local <4 x float> @test_builtin_image_load_mip_2darray_1(
+// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x float>, align 16, addrspace(5)
+// CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5)
+// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
+// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
+// CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr
+// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
+// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr
+// CHECK-NEXT: store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16
+// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP4:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: [[TMP5:%.*]] = call <4 x float> @llvm.amdgcn.image.load.mip.2darray.v4f32.i32.v8i32(i32 100, i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]], i32 [[TMP3]], <8 x i32> [[TMP4]], i32 120, i32 110)
+// CHECK-NEXT: ret <4 x float> [[TMP5]]
+//
+float4 test_builtin_image_load_mip_2darray_1(float4 v4f32, int i32, int8 vec8i32) {
+
+ return __builtin_amdgcn_image_load_mip_2darray_v4f32_i32(100, i32, i32, i32, i32, vec8i32, 120, 110);
+}
+// CHECK-LABEL: define dso_local <4 x half> @test_builtin_image_load_mip_2darray_2(
+// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x half>, align 8, addrspace(5)
+// CHECK-NEXT: [[V4F16_ADDR:%.*]] = alloca <4 x half>, align 8, addrspace(5)
+// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
+// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
+// CHECK-NEXT: [[V4F16_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F16_ADDR]] to ptr
+// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
+// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr
+// CHECK-NEXT: store <4 x half> [[V4F16]], ptr [[V4F16_ADDR_ASCAST]], align 8
+// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP4:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: [[TMP5:%.*]] = call <4 x half> @llvm.amdgcn.image.load.mip.2darray.v4f16.i32.v8i32(i32 100, i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]], i32 [[TMP3]], <8 x i32> [[TMP4]], i32 120, i32 110)
+// CHECK-NEXT: ret <4 x half> [[TMP5]]
+//
+half4 test_builtin_image_load_mip_2darray_2(half4 v4f16, int i32, int8 vec8i32) {
+
+ return __builtin_amdgcn_image_load_mip_2darray_v4f16_i32(100, i32, i32, i32, i32, vec8i32, 120, 110);
+}
+
+// CHECK-LABEL: define dso_local <4 x float> @test_builtin_image_load_mip_3d_1(
+// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x float>, align 16, addrspace(5)
+// CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5)
+// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
+// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
+// CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr
+// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
+// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr
+// CHECK-NEXT: store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16
+// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP4:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: [[TMP5:%.*]] = call <4 x float> @llvm.amdgcn.image.load.mip.3d.v4f32.i32.v8i32(i32 100, i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]], i32 [[TMP3]], <8 x i32> [[TMP4]], i32 120, i32 110)
+// CHECK-NEXT: ret <4 x float> [[TMP5]]
+//
+float4 test_builtin_image_load_mip_3d_1(float4 v4f32, int i32, int8 vec8i32) {
+
+ return __builtin_amdgcn_image_load_mip_3d_v4f32_i32(100, i32, i32, i32, i32, vec8i32, 120, 110);
+}
+// CHECK-LABEL: define dso_local <4 x half> @test_builtin_image_load_mip_3d_2(
+// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x half>, align 8, addrspace(5)
+// CHECK-NEXT: [[V4F16_ADDR:%.*]] = alloca <4 x half>, align 8, addrspace(5)
+// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
+// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
+// CHECK-NEXT: [[V4F16_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F16_ADDR]] to ptr
+// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
+// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr
+// CHECK-NEXT: store <4 x half> [[V4F16]], ptr [[V4F16_ADDR_ASCAST]], align 8
+// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP4:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: [[TMP5:%.*]] = call <4 x half> @llvm.amdgcn.image.load.mip.3d.v4f16.i32.v8i32(i32 100, i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]], i32 [[TMP3]], <8 x i32> [[TMP4]], i32 120, i32 110)
+// CHECK-NEXT: ret <4 x half> [[TMP5]]
+//
+half4 test_builtin_image_load_mip_3d_2(half4 v4f16, int i32, int8 vec8i32) {
+
+ return __builtin_amdgcn_image_load_mip_3d_v4f16_i32(100, i32, i32, i32, i32, vec8i32, 120, 110);
+}
+
+// CHECK-LABEL: define dso_local <4 x float> @test_builtin_image_load_mip_cube_1(
+// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x float>, align 16, addrspace(5)
+// CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5)
+// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
+// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
+// CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr
+// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
+// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr
+// CHECK-NEXT: store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16
+// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP4:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: [[TMP5:%.*]] = call <4 x float> @llvm.amdgcn.image.load.mip.cube.v4f32.i32.v8i32(i32 100, i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]], i32 [[TMP3]], <8 x i32> [[TMP4]], i32 120, i32 110)
+// CHECK-NEXT: ret <4 x float> [[TMP5]]
+//
+float4 test_builtin_image_load_mip_cube_1(float4 v4f32, int i32, int8 vec8i32) {
+
+ return __builtin_amdgcn_image_load_mip_cube_v4f32_i32(100, i32, i32, i32, i32, vec8i32, 120, 110);
+}
+// CHECK-LABEL: define dso_local <4 x half> @test_builtin_image_load_mip_cube_2(
+// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x half>, align 8, addrspace(5)
+// CHECK-NEXT: [[V4F16_ADDR:%.*]] = alloca <4 x half>, align 8, addrspace(5)
+// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
+// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
+// CHECK-NEXT: [[V4F16_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F16_ADDR]] to ptr
+// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
+// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr
+// CHECK-NEXT: store <4 x half> [[V4F16]], ptr [[V4F16_ADDR_ASCAST]], align 8
+// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP4:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: [[TMP5:%.*]] = call <4 x half> @llvm.amdgcn.image.load.mip.cube.v4f16.i32.v8i32(i32 100, i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]], i32 [[TMP3]], <8 x i32> [[TMP4]], i32 120, i32 110)
+// CHECK-NEXT: ret <4 x half> [[TMP5]]
+//
+half4 test_builtin_image_load_mip_cube_2(half4 v4f16, int i32, int8 vec8i32) {
- return __builtin_amdgcn_image_load_2d_f32_i32(i32, i32, veci32);
+ return __builtin_amdgcn_image_load_mip_cube_v4f16_i32(100, i32, i32, i32, i32, vec8i32, 120, 110);
}
diff --git a/clang/test/SemaOpenCL/builtins-image-load-2d-f32-param.cl b/clang/test/SemaOpenCL/builtins-image-load-2d-f32-param.cl
new file mode 100644
index 0000000000000..7b5aab4011da9
--- /dev/null
+++ b/clang/test/SemaOpenCL/builtins-image-load-2d-f32-param.cl
@@ -0,0 +1,132 @@
+// RUN: %clang_cc1 -triple amdgcn-- -target-cpu gfx90a -S -verify=expected -o - %s
+// REQUIRES: amdgpu-registered-target
+
+typedef int int8 __attribute__((ext_vector_type(8)));
+typedef float float4 __attribute__((ext_vector_type(4)));
+//typedef _Float16 half;
+typedef half half4 __attribute__((ext_vector_type(4)));
+
+
+float test_builtin_image_load_2d(float f32, int i32, int8 vec8i32) {
+
+ return __builtin_amdgcn_image_load_2d_f32_i32(i32, i32, i32, vec8i32, 106, 103); //expected-error{{argument to '__builtin_amdgcn_image_load_2d_f32_i32' must be a constant integer}}
+}
+float4 test_builtin_image_load_2d_1(float4 v4f32, int i32, int8 vec8i32) {
+
+ return __builtin_amdgcn_image_load_2d_v4f32_i32(100, i32, i32, vec8i32, i32, 110); //expected-error{{argument to '__builtin_amdgcn_image_load_2d_v4f32_i32' must be a constant integer}}
+}
+half4 test_builtin_image_load_2d_2(half4 v4f16, int i32, int8 vec8i32) {
+
+ return __builtin_amdgcn_image_load_2d_v4f16_i32(100, i32, i32, vec8i32, 120, i32); //expected-error{{argument to '__builtin_amdgcn_image_load_2d_v4f16_i32' must be a constant integer}}
+}
+
+float test_builtin_image_load_2darray(float f32, int i32, int8 vec8i32) {
+
+ return __builtin_amdgcn_image_load_2darray_f32_i32(100, i32, i32, i32, vec8i32, i32, 110); //expected-error{{argument to '__builtin_amdgcn_image_load_2darray_f32_i32' must be a constant integer}}
+}
+float4 test_builtin_image_load_2darray_1(float4 v4f32, int i32, int8 vec8i32) {
+
+ return __builtin_amdgcn_image_load_2darray_v4f32_i32(100, i32, i32, i32, vec8i32, i32, 110); //expected-error{{argument to '__builtin_amdgcn_image_load_2darray_v4f32_i32' must be a constant integer}}
+}
+half4 test_builtin_image_load_2darray_2(half4 v4f16, int i32, int8 vec8i32) {
+
+ return __builtin_amdgcn_image_load_2darray_v4f16_i32(100, i32, i32, i32, vec8i32, 120, i32); //expected-error{{argument to '__builtin_amdgcn_image_load_2darray_v4f16_i32' must be a constant integer}}
+}
+
+float4 test_builtin_image_load_1d_1(float4 v4f32, int i32, int8 vec8i32) {
+
+ return __builtin_amdgcn_image_load_1d_v4f32_i32(i32, i32, vec8i32, 120, i32); //expected-error{{argument to '__builtin_amdgcn_image_load_1d_v4f32_i32' must be a constant integer}}
+}
+half4 test_builtin_image_load_1d_2(half4 v4f16, int i32, int8 vec8i32) {
+
+ return __builtin_amdgcn_image_load_1d_v4f16_i32(100, i32, vec8i32, 120, i32); //expected-error{{argument to '__builtin_amdgcn_image_load_1d_v4f16_i32' must be a constant integer}}
+}
+
+float4 test_builtin_image_load_1darray_1(float4 v4f32, int i32, int8 vec8i32) {
+
+ return __builtin_amdgcn_image_load_1darray_v4f32_i32(100, i32, i32, vec8i32, i32, 110); //expected-error{{argument to '__builtin_amdgcn_image_load_1darray_v4f32_i32' must be a constant integer}}
+}
+half4 test_builtin_image_load_1darray_2(half4 v4f16, int i32, int8 vec8i32) {
+
+ return __builtin_amdgcn_image_load_1darray_v4f16_i32(100, i32, i32, vec8i32, i32, 110); //expected-error{{argument to '__builtin_amdgcn_image_load_1darray_v4f16_i32' must be a constant integer}}
+}
+
+float4 test_builtin_image_load_3d_1(float4 v4f32, int i32, int8 vec8i32) {
+
+ return __builtin_amdgcn_image_load_3d_v4f32_i32(100, i32, i32, i32, vec8i32, 120, i32); //expected-error{{argument to '__builtin_amdgcn_image_load_3d_v4f32_i32' must be a constant integer}}
+}
+half4 test_builtin_image_load_3d_2(half4 v4f16, int i32, int8 vec8i32) {
+
+ return __builtin_amdgcn_image_load_3d_v4f16_i32(i32, i32, i32, i32, vec8i32, 120, i32); //expected-error{{argument to '__builtin_amdgcn_image_load_3d_v4f16_i32' must be a constant integer}}
+}
+
+float4 test_builtin_image_load_cube_1(float4 v4f32, int i32, int8 vec8i32) {
+
+ return __builtin_amdgcn_image_load_cube_v4f32_i32(i32, i32, i32, i32, vec8i32, 120, 110); //expected-error{{argument to '__builtin_amdgcn_image_load_cube_v4f32_i32' must be a constant integer}}
+}
+half4 test_builtin_image_load_cube_2(half4 v4f16, int i32, int8 vec8i32) {
+
+ return __builtin_amdgcn_image_load_cube_v4f16_i32(i32, i32, i32, i32, vec8i32, 120, 110); //expected-error{{argument to '__builtin_amdgcn_image_load_cube_v4f16_i32' must be a constant integer}}
+}
+
+float4 test_builtin_image_load_mip_1d_1(float4 v4f32, int i32, int8 vec8i32) {
+
+ return __builtin_amdgcn_image_load_mip_1d_v4f32_i32(i32, i32, i32, vec8i32, 120, i32); //expected-error{{argument to '__builtin_amdgcn_image_load_mip_1d_v4f32_i32' must be a constant integer}}
+}
+half4 test_builtin_image_load_mip_1d_2(half4 v4f16, int i32, int8 vec8i32) {
+
+ return __builtin_amdgcn_image_load_mip_1d_v4f16_i32(100, i32, i32, vec8i32, 120, i32); //expected-error{{argument to '__builtin_amdgcn_image_load_mip_1d_v4f16_i32' must be a constant integer}}
+}
+
+float4 test_builtin_image_load_mip_1darray_1(float4 v4f32, int i32, int8 vec8i32) {
+
+ return __builtin_amdgcn_image_load_mip_1darray_v4f32_i32(i32, i32, i32, i32, vec8i32, i32, 110); //expected-error{{argument to '__builtin_amdgcn_image_load_mip_1darray_v4f32_i32' must be a constant integer}}
+}
+half4 test_builtin_image_load_mip_1darray_2(half4 v4f16, int i32, int8 vec8i32) {
+
+ return __builtin_amdgcn_image_load_mip_1darray_v4f16_i32(100, i32, i32, i32, vec8i32, i32, 110); //expected-error{{argument to '__builtin_amdgcn_image_load_mip_1darray_v4f16_i32' must be a constant integer}}
+}
+
+float test_builtin_image_load_mip_2d(float f32, int i32, int8 vec8i32) {
+
+ return __builtin_amdgcn_image_load_mip_2d_f32_i32(i32, i32, i32, i32, vec8i32, 120, i32); //expected-error{{argument to '__builtin_amdgcn_image_load_mip_2d_f32_i32' must be a constant integer}}
+}
+float4 test_builtin_image_load_mip_2d_1(float4 v4f32, int i32, int8 vec8i32) {
+
+ return __builtin_amdgcn_image_load_mip_2d_v4f32_i32(100, i32, i32, i32, vec8i32, 120, i32); //expected-error{{argument to '__builtin_amdgcn_image_load_mip_2d_v4f32_i32' must be a constant integer}}
+}
+half4 test_builtin_image_load_mip_2d_2(half4 v4f16, int i32, int8 vec8i32) {
+
+ return __builtin_amdgcn_image_load_mip_2d_v4f16_i32(i32, i32, i32, i32, vec8i32, 120, 110); //expected-error{{argument to '__builtin_amdgcn_image_load_mip_2d_v4f16_i32' must be a constant integer}}
+}
+
+float test_builtin_image_load_mip_2darray(float f32, int i32, int8 vec8i32) {
+
+ return __builtin_amdgcn_image_load_mip_2darray_f32_i32(i32, i32, i32, i32, i32, vec8i32, 120, 110); //expected-error{{argument to '__builtin_amdgcn_image_load_mip_2darray_f32_i32' must be a constant integer}}
+}
+float4 test_builtin_image_load_mip_2darray_1(float4 v4f32, int i32, int8 vec8i32) {
+
+ return __builtin_amdgcn_image_load_mip_2darray_v4f32_i32(100, i32, i32, i32, i32, vec8i32, 120, i32); //expected-error{{argument to '__builtin_amdgcn_image_load_mip_2darray_v4f32_i32' must be a constant integer}}
+}
+half4 test_builtin_image_load_mip_2darray_2(half4 v4f16, int i32, int8 vec8i32) {
+
+ return __builtin_amdgcn_image_load_mip_2darray_v4f16_i32(100, i32, i32, i32, i32, vec8i32, 120, i32); //expected-error{{argument to '__builtin_amdgcn_image_load_mip_2darray_v4f16_i32' must be a constant integer}}
+}
+
+float4 test_builtin_image_load_mip_3d_1(float4 v4f32, int i32, int8 vec8i32) {
+
+ return __builtin_amdgcn_image_load_mip_3d_v4f32_i32(i32, i32, i32, i32, i32, vec8i32, i32, 110); //expected-error{{argument to '__builtin_amdgcn_image_load_mip_3d_v4f32_i32' must be a constant integer}}
+}
+half4 test_builtin_image_load_mip_3d_2(half4 v4f16, int i32, int8 vec8i32) {
+
+ return __builtin_amdgcn_image_load_mip_3d_v4f16_i32(i32, i32, i32, i32, i32, vec8i32, i32, 110); //expected-error{{argument to '__builtin_amdgcn_image_load_mip_3d_v4f16_i32' must be a constant integer}}
+}
+
+float4 test_builtin_image_load_mip_cube_1(float4 v4f32, int i32, int8 vec8i32) {
+
+ return __builtin_amdgcn_image_load_mip_cube_v4f32_i32(i32, i32, i32, i32, i32, vec8i32, 120, i32); //expected-error{{argument to '__builtin_amdgcn_image_load_mip_cube_v4f32_i32' must be a constant integer}}
+}
+half4 test_builtin_image_load_mip_cube_2(half4 v4f16, int i32, int8 vec8i32) {
+
+ return __builtin_amdgcn_image_load_mip_cube_v4f16_i32(100, i32, i32, i32, i32, vec8i32, 120, i32); //expected-error{{argument to '__builtin_amdgcn_image_load_mip_cube_v4f16_i32' must be a constant integer}}
+}
>From ab18d68c43d06516ca163d0594d94c3e47826ee6 Mon Sep 17 00:00:00 2001
From: ranapratap55 <RanaPratapReddy.Nimmakayala at amd.com>
Date: Thu, 26 Jun 2025 14:36:33 +0530
Subject: [PATCH 3/7] [AMDGPU] Adds builtins for image store and sema checking
for image store
---
clang/include/clang/Basic/BuiltinsAMDGPU.def | 31 +-
clang/include/clang/Sema/SemaAMDGPU.h | 2 -
clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp | 177 ++++-
clang/lib/Sema/SemaAMDGPU.cpp | 60 +-
...ge-load-2d-f32.c => builtins-image-load.c} | 0
clang/test/CodeGen/builtins-image-store.c | 695 ++++++++++++++++++
...-param.cl => builtins-image-load-param.cl} | 0
.../SemaOpenCL/builtins-image-store-param.cl | 132 ++++
8 files changed, 1073 insertions(+), 24 deletions(-)
rename clang/test/CodeGen/{builtins-image-load-2d-f32.c => builtins-image-load.c} (100%)
create mode 100644 clang/test/CodeGen/builtins-image-store.c
rename clang/test/SemaOpenCL/{builtins-image-load-2d-f32-param.cl => builtins-image-load-param.cl} (100%)
create mode 100644 clang/test/SemaOpenCL/builtins-image-store-param.cl
diff --git a/clang/include/clang/Basic/BuiltinsAMDGPU.def b/clang/include/clang/Basic/BuiltinsAMDGPU.def
index 1ae461676b034..02cf766f0c899 100644
--- a/clang/include/clang/Basic/BuiltinsAMDGPU.def
+++ b/clang/include/clang/Basic/BuiltinsAMDGPU.def
@@ -636,7 +636,7 @@ TARGET_BUILTIN(__builtin_amdgcn_cvt_sr_bf16_f32, "V2yV2yfUiIb", "nc", "f32-to-f1
TARGET_BUILTIN(__builtin_amdgcn_cvt_sr_f16_f32, "V2hV2hfUiIb", "nc", "f32-to-f16bf16-cvt-sr-insts")
//===----------------------------------------------------------------------===//
-// Image load builtins
+// Image load/store builtins
//===----------------------------------------------------------------------===//
TARGET_BUILTIN(__builtin_amdgcn_image_load_1d_v4f32_i32, "V4fiiV8iii", "nc", "")
TARGET_BUILTIN(__builtin_amdgcn_image_load_1d_v4f16_i32, "V4hiiV8iii", "nc", "")
@@ -667,5 +667,34 @@ TARGET_BUILTIN(__builtin_amdgcn_image_load_mip_3d_v4f16_i32, "V4hiiiiiV8iii", "n
TARGET_BUILTIN(__builtin_amdgcn_image_load_mip_cube_v4f32_i32, "V4fiiiiiV8iii", "nc", "")
TARGET_BUILTIN(__builtin_amdgcn_image_load_mip_cube_v4f16_i32, "V4hiiiiiV8iii", "nc", "")
+TARGET_BUILTIN(__builtin_amdgcn_image_store_1d_v4f32_i32, "vV4fiiV8iii", "nc", "")
+TARGET_BUILTIN(__builtin_amdgcn_image_store_1d_v4f16_i32, "vV4hiiV8iii", "nc", "")
+TARGET_BUILTIN(__builtin_amdgcn_image_store_1darray_v4f32_i32, "vV4fiiiV8iii", "nc", "")
+TARGET_BUILTIN(__builtin_amdgcn_image_store_1darray_v4f16_i32, "vV4hiiiV8iii", "nc", "")
+TARGET_BUILTIN(__builtin_amdgcn_image_store_2d_f32_i32, "vfiiiV8iii", "nc", "")
+TARGET_BUILTIN(__builtin_amdgcn_image_store_2d_v4f32_i32, "vV4fiiiV8iii", "nc", "")
+TARGET_BUILTIN(__builtin_amdgcn_image_store_2d_v4f16_i32, "vV4hiiiV8iii", "nc", "")
+TARGET_BUILTIN(__builtin_amdgcn_image_store_2darray_f32_i32, "vfiiiiV8iii", "nc", "")
+TARGET_BUILTIN(__builtin_amdgcn_image_store_2darray_v4f32_i32, "vV4fiiiiV8iii", "nc", "")
+TARGET_BUILTIN(__builtin_amdgcn_image_store_2darray_v4f16_i32, "vV4hiiiiV8iii", "nc", "")
+TARGET_BUILTIN(__builtin_amdgcn_image_store_3d_v4f32_i32, "vV4fiiiiV8iii", "nc", "")
+TARGET_BUILTIN(__builtin_amdgcn_image_store_3d_v4f16_i32, "vV4hiiiiV8iii", "nc", "")
+TARGET_BUILTIN(__builtin_amdgcn_image_store_cube_v4f32_i32, "vV4fiiiiV8iii", "nc", "")
+TARGET_BUILTIN(__builtin_amdgcn_image_store_cube_v4f16_i32, "vV4hiiiiV8iii", "nc", "")
+TARGET_BUILTIN(__builtin_amdgcn_image_store_mip_1d_v4f32_i32, "vV4fiiiV8iii", "nc", "")
+TARGET_BUILTIN(__builtin_amdgcn_image_store_mip_1d_v4f16_i32, "vV4hiiiV8iii", "nc", "")
+TARGET_BUILTIN(__builtin_amdgcn_image_store_mip_1darray_v4f32_i32, "vV4fiiiiV8iii", "nc", "")
+TARGET_BUILTIN(__builtin_amdgcn_image_store_mip_1darray_v4f16_i32, "vV4hiiiiV8iii", "nc", "")
+TARGET_BUILTIN(__builtin_amdgcn_image_store_mip_2d_f32_i32, "vfiiiiV8iii", "nc", "")
+TARGET_BUILTIN(__builtin_amdgcn_image_store_mip_2d_v4f32_i32, "vV4fiiiiV8iii", "nc", "")
+TARGET_BUILTIN(__builtin_amdgcn_image_store_mip_2d_v4f16_i32, "vV4hiiiiV8iii", "nc", "")
+TARGET_BUILTIN(__builtin_amdgcn_image_store_mip_2darray_f32_i32, "vfiiiiiV8iii", "nc", "")
+TARGET_BUILTIN(__builtin_amdgcn_image_store_mip_2darray_v4f32_i32, "vV4fiiiiiV8iii", "nc", "")
+TARGET_BUILTIN(__builtin_amdgcn_image_store_mip_2darray_v4f16_i32, "vV4hiiiiiV8iii", "nc", "")
+TARGET_BUILTIN(__builtin_amdgcn_image_store_mip_3d_v4f32_i32, "vV4fiiiiiV8iii", "nc", "")
+TARGET_BUILTIN(__builtin_amdgcn_image_store_mip_3d_v4f16_i32, "vV4hiiiiiV8iii", "nc", "")
+TARGET_BUILTIN(__builtin_amdgcn_image_store_mip_cube_v4f32_i32, "vV4fiiiiiV8iii", "nc", "")
+TARGET_BUILTIN(__builtin_amdgcn_image_store_mip_cube_v4f16_i32, "vV4hiiiiiV8iii", "nc", "")
+
#undef BUILTIN
#undef TARGET_BUILTIN
diff --git a/clang/include/clang/Sema/SemaAMDGPU.h b/clang/include/clang/Sema/SemaAMDGPU.h
index 673f2719fe227..d62c9bb65fadb 100644
--- a/clang/include/clang/Sema/SemaAMDGPU.h
+++ b/clang/include/clang/Sema/SemaAMDGPU.h
@@ -29,8 +29,6 @@ class SemaAMDGPU : public SemaBase {
bool checkMovDPPFunctionCall(CallExpr *TheCall, unsigned NumArgs,
unsigned NumDataArgs);
- bool checkImageImmArgFunctionCall(CallExpr *TheCall, unsigned ArgCount);
-
/// Create an AMDGPUWavesPerEUAttr attribute.
AMDGPUFlatWorkGroupSizeAttr *
CreateAMDGPUFlatWorkGroupSizeAttr(const AttributeCommonInfo &CI, Expr *Min,
diff --git a/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp b/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp
index ef1cebf39d7d8..97a1078d7bb77 100644
--- a/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp
+++ b/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp
@@ -754,11 +754,11 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID,
llvm::Value *Dmask = EmitScalarExpr(E->getArg(0));
llvm::Value *S = EmitScalarExpr(E->getArg(1));
llvm::Value *T = EmitScalarExpr(E->getArg(2));
- llvm::Value *Slice;
- llvm::Value *Mip;
- llvm::Value *Rsrc;
- llvm::Value *Tfe;
- llvm::Value *Cpol;
+ llvm::Value *Slice = nullptr;
+ llvm::Value *Mip = nullptr;
+ llvm::Value *Rsrc = nullptr;
+ llvm::Value *Tfe = nullptr;
+ llvm::Value *Cpol = nullptr;
SmallVector<Value *, 10> ArgTys;
@@ -788,11 +788,9 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID,
ArgTys = {Dmask, S, Slice, Rsrc, Tfe, Cpol};
IID = Intrinsic::amdgcn_image_load_1darray;
- switch (BuiltinID) {
- case AMDGPU::BI__builtin_amdgcn_image_load_mip_1d_v4f32_i32:
- case AMDGPU::BI__builtin_amdgcn_image_load_mip_1d_v4f16_i32:
+ if (BuiltinID == AMDGPU::BI__builtin_amdgcn_image_load_mip_1d_v4f32_i32 ||
+ BuiltinID == AMDGPU::BI__builtin_amdgcn_image_load_mip_1d_v4f16_i32) {
IID = Intrinsic::amdgcn_image_load_mip_1d;
- break;
}
Call = Builder.CreateIntrinsic(RetTy, IID, ArgTys);
break;
@@ -883,6 +881,167 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID,
return Call;
}
+ case AMDGPU::BI__builtin_amdgcn_image_store_1d_v4f32_i32:
+ case AMDGPU::BI__builtin_amdgcn_image_store_1darray_v4f32_i32:
+ case AMDGPU::BI__builtin_amdgcn_image_store_1d_v4f16_i32:
+ case AMDGPU::BI__builtin_amdgcn_image_store_1darray_v4f16_i32:
+ case AMDGPU::BI__builtin_amdgcn_image_store_2d_f32_i32:
+ case AMDGPU::BI__builtin_amdgcn_image_store_2d_v4f32_i32:
+ case AMDGPU::BI__builtin_amdgcn_image_store_2d_v4f16_i32:
+ case AMDGPU::BI__builtin_amdgcn_image_store_2darray_f32_i32:
+ case AMDGPU::BI__builtin_amdgcn_image_store_2darray_v4f32_i32:
+ case AMDGPU::BI__builtin_amdgcn_image_store_2darray_v4f16_i32:
+ case AMDGPU::BI__builtin_amdgcn_image_store_3d_v4f32_i32:
+ case AMDGPU::BI__builtin_amdgcn_image_store_3d_v4f16_i32:
+ case AMDGPU::BI__builtin_amdgcn_image_store_cube_v4f32_i32:
+ case AMDGPU::BI__builtin_amdgcn_image_store_cube_v4f16_i32:
+ case AMDGPU::BI__builtin_amdgcn_image_store_mip_1d_v4f32_i32:
+ case AMDGPU::BI__builtin_amdgcn_image_store_mip_1d_v4f16_i32:
+ case AMDGPU::BI__builtin_amdgcn_image_store_mip_1darray_v4f32_i32:
+ case AMDGPU::BI__builtin_amdgcn_image_store_mip_1darray_v4f16_i32:
+ case AMDGPU::BI__builtin_amdgcn_image_store_mip_2d_f32_i32:
+ case AMDGPU::BI__builtin_amdgcn_image_store_mip_2d_v4f32_i32:
+ case AMDGPU::BI__builtin_amdgcn_image_store_mip_2d_v4f16_i32:
+ case AMDGPU::BI__builtin_amdgcn_image_store_mip_2darray_f32_i32:
+ case AMDGPU::BI__builtin_amdgcn_image_store_mip_2darray_v4f32_i32:
+ case AMDGPU::BI__builtin_amdgcn_image_store_mip_2darray_v4f16_i32:
+ case AMDGPU::BI__builtin_amdgcn_image_store_mip_3d_v4f32_i32:
+ case AMDGPU::BI__builtin_amdgcn_image_store_mip_3d_v4f16_i32:
+ case AMDGPU::BI__builtin_amdgcn_image_store_mip_cube_v4f32_i32:
+ case AMDGPU::BI__builtin_amdgcn_image_store_mip_cube_v4f16_i32: {
+ llvm::Type *RetTy = VoidTy;
+ llvm::Value *Vdata = EmitScalarExpr(E->getArg(0));
+ llvm::Value *Dmask = EmitScalarExpr(E->getArg(1));
+ llvm::Value *S = EmitScalarExpr(E->getArg(2));
+ llvm::Value *T = EmitScalarExpr(E->getArg(3));
+ llvm::Value *Slice = nullptr;
+ llvm::Value *Mip = nullptr;
+ llvm::Value *Rsrc = nullptr;
+ llvm::Value *Tfe = nullptr;
+ llvm::Value *Cpol = nullptr;
+
+ SmallVector<Value *, 10> ArgTys;
+
+ Intrinsic::ID IID;
+ llvm::CallInst *Call;
+
+ switch (BuiltinID) {
+ case AMDGPU::BI__builtin_amdgcn_image_store_1d_v4f32_i32:
+ case AMDGPU::BI__builtin_amdgcn_image_store_1d_v4f16_i32: {
+ Rsrc = EmitScalarExpr(E->getArg(3));
+ Tfe = EmitScalarExpr(E->getArg(4));
+ Cpol = EmitScalarExpr(E->getArg(5));
+
+ ArgTys = {Vdata, Dmask, S, Rsrc, Tfe, Cpol};
+ IID = Intrinsic::amdgcn_image_store_1d;
+ Call = Builder.CreateIntrinsic(RetTy, IID, ArgTys);
+ break;
+ }
+ case AMDGPU::BI__builtin_amdgcn_image_store_1darray_v4f32_i32:
+ case AMDGPU::BI__builtin_amdgcn_image_store_1darray_v4f16_i32:
+ case AMDGPU::BI__builtin_amdgcn_image_store_mip_1d_v4f32_i32:
+ case AMDGPU::BI__builtin_amdgcn_image_store_mip_1d_v4f16_i32: {
+ Slice = EmitScalarExpr(E->getArg(3));
+ Rsrc = EmitScalarExpr(E->getArg(4));
+ Tfe = EmitScalarExpr(E->getArg(5));
+ Cpol = EmitScalarExpr(E->getArg(6));
+
+ ArgTys = {Vdata, Dmask, S, Slice, Rsrc, Tfe, Cpol};
+ IID = Intrinsic::amdgcn_image_store_1darray;
+ if (BuiltinID ==
+ AMDGPU::BI__builtin_amdgcn_image_store_mip_1d_v4f32_i32 ||
+ BuiltinID ==
+ AMDGPU::BI__builtin_amdgcn_image_store_mip_1d_v4f16_i32) {
+ IID = Intrinsic::amdgcn_image_store_mip_1d;
+ }
+ Call = Builder.CreateIntrinsic(RetTy, IID, ArgTys);
+ break;
+ }
+ case AMDGPU::BI__builtin_amdgcn_image_store_2d_f32_i32:
+ case AMDGPU::BI__builtin_amdgcn_image_store_2d_v4f32_i32:
+ case AMDGPU::BI__builtin_amdgcn_image_store_2d_v4f16_i32: {
+ Rsrc = EmitScalarExpr(E->getArg(4));
+ Tfe = EmitScalarExpr(E->getArg(5));
+ Cpol = EmitScalarExpr(E->getArg(6));
+
+ ArgTys = {Vdata, Dmask, S, T, Rsrc, Tfe, Cpol};
+ IID = Intrinsic::amdgcn_image_store_2d;
+ Call = Builder.CreateIntrinsic(RetTy, IID, ArgTys);
+ break;
+ }
+ case AMDGPU::BI__builtin_amdgcn_image_store_2darray_f32_i32:
+ case AMDGPU::BI__builtin_amdgcn_image_store_2darray_v4f32_i32:
+ case AMDGPU::BI__builtin_amdgcn_image_store_2darray_v4f16_i32:
+ case AMDGPU::BI__builtin_amdgcn_image_store_3d_v4f32_i32:
+ case AMDGPU::BI__builtin_amdgcn_image_store_3d_v4f16_i32:
+ case AMDGPU::BI__builtin_amdgcn_image_store_cube_v4f32_i32:
+ case AMDGPU::BI__builtin_amdgcn_image_store_cube_v4f16_i32:
+ case AMDGPU::BI__builtin_amdgcn_image_store_mip_1darray_v4f32_i32:
+ case AMDGPU::BI__builtin_amdgcn_image_store_mip_1darray_v4f16_i32:
+ case AMDGPU::BI__builtin_amdgcn_image_store_mip_2d_f32_i32:
+ case AMDGPU::BI__builtin_amdgcn_image_store_mip_2d_v4f32_i32:
+ case AMDGPU::BI__builtin_amdgcn_image_store_mip_2d_v4f16_i32: {
+ Slice = EmitScalarExpr(E->getArg(4));
+ Rsrc = EmitScalarExpr(E->getArg(5));
+ Tfe = EmitScalarExpr(E->getArg(6));
+ Cpol = EmitScalarExpr(E->getArg(7));
+
+ ArgTys = {Vdata, Dmask, S, T, Slice, Rsrc, Tfe, Cpol};
+ IID = Intrinsic::amdgcn_image_store_2darray;
+
+ switch (BuiltinID) {
+ case AMDGPU::BI__builtin_amdgcn_image_store_3d_v4f32_i32:
+ case AMDGPU::BI__builtin_amdgcn_image_store_3d_v4f16_i32:
+ IID = Intrinsic::amdgcn_image_store_3d;
+ break;
+ case AMDGPU::BI__builtin_amdgcn_image_store_cube_v4f32_i32:
+ case AMDGPU::BI__builtin_amdgcn_image_store_cube_v4f16_i32:
+ IID = Intrinsic::amdgcn_image_store_cube;
+ break;
+ case AMDGPU::BI__builtin_amdgcn_image_store_mip_1darray_v4f32_i32:
+ case AMDGPU::BI__builtin_amdgcn_image_store_mip_1darray_v4f16_i32:
+ IID = Intrinsic::amdgcn_image_store_mip_1darray;
+ break;
+ case AMDGPU::BI__builtin_amdgcn_image_store_mip_2d_f32_i32:
+ case AMDGPU::BI__builtin_amdgcn_image_store_mip_2d_v4f32_i32:
+ case AMDGPU::BI__builtin_amdgcn_image_store_mip_2d_v4f16_i32:
+ IID = Intrinsic::amdgcn_image_store_mip_2d;
+ break;
+ }
+ Call = Builder.CreateIntrinsic(RetTy, IID, ArgTys);
+ break;
+ }
+ case AMDGPU::BI__builtin_amdgcn_image_store_mip_2darray_f32_i32:
+ case AMDGPU::BI__builtin_amdgcn_image_store_mip_2darray_v4f32_i32:
+ case AMDGPU::BI__builtin_amdgcn_image_store_mip_2darray_v4f16_i32:
+ case AMDGPU::BI__builtin_amdgcn_image_store_mip_3d_v4f32_i32:
+ case AMDGPU::BI__builtin_amdgcn_image_store_mip_3d_v4f16_i32:
+ case AMDGPU::BI__builtin_amdgcn_image_store_mip_cube_v4f32_i32:
+ case AMDGPU::BI__builtin_amdgcn_image_store_mip_cube_v4f16_i32: {
+ Slice = EmitScalarExpr(E->getArg(4));
+ Mip = EmitScalarExpr(E->getArg(5));
+ Rsrc = EmitScalarExpr(E->getArg(6));
+ Tfe = EmitScalarExpr(E->getArg(7));
+ Cpol = EmitScalarExpr(E->getArg(8));
+
+ ArgTys = {Vdata, Dmask, S, T, Slice, Mip, Rsrc, Tfe, Cpol};
+ IID = Intrinsic::amdgcn_image_store_mip_2darray;
+ switch (BuiltinID) {
+ case AMDGPU::BI__builtin_amdgcn_image_store_mip_3d_v4f32_i32:
+ case AMDGPU::BI__builtin_amdgcn_image_store_mip_3d_v4f16_i32:
+ IID = Intrinsic::amdgcn_image_store_mip_3d;
+ break;
+ case AMDGPU::BI__builtin_amdgcn_image_store_mip_cube_v4f32_i32:
+ case AMDGPU::BI__builtin_amdgcn_image_store_mip_cube_v4f16_i32:
+ IID = Intrinsic::amdgcn_image_store_mip_cube;
+ break;
+ }
+ Call = Builder.CreateIntrinsic(RetTy, IID, ArgTys);
+ break;
+ }
+ }
+ return Call;
+ }
case AMDGPU::BI__builtin_amdgcn_mfma_scale_f32_16x16x128_f8f6f4:
case AMDGPU::BI__builtin_amdgcn_mfma_scale_f32_32x32x64_f8f6f4: {
llvm::FixedVectorType *VT = FixedVectorType::get(Builder.getInt32Ty(), 8);
diff --git a/clang/lib/Sema/SemaAMDGPU.cpp b/clang/lib/Sema/SemaAMDGPU.cpp
index 530d0e7553604..2381f73a870d6 100644
--- a/clang/lib/Sema/SemaAMDGPU.cpp
+++ b/clang/lib/Sema/SemaAMDGPU.cpp
@@ -112,8 +112,54 @@ bool SemaAMDGPU::CheckAMDGCNBuiltinFunctionCall(unsigned BuiltinID,
case AMDGPU::BI__builtin_amdgcn_image_load_mip_cube_v4f32_i32:
case AMDGPU::BI__builtin_amdgcn_image_load_mip_cube_v4f16_i32: {
unsigned ArgCount = TheCall->getNumArgs() - 1;
-
- return checkImageImmArgFunctionCall(TheCall, ArgCount);
+ llvm::APSInt Result;
+ bool isImmArg =
+ (!(SemaRef.BuiltinConstantArg(TheCall, 0, Result)) &&
+ !(SemaRef.BuiltinConstantArg(TheCall, ArgCount, Result)) &&
+ !(SemaRef.BuiltinConstantArg(TheCall, (ArgCount - 1), Result)))
+ ? false
+ : true;
+
+ return isImmArg;
+ }
+ case AMDGPU::BI__builtin_amdgcn_image_store_1d_v4f32_i32:
+ case AMDGPU::BI__builtin_amdgcn_image_store_1darray_v4f32_i32:
+ case AMDGPU::BI__builtin_amdgcn_image_store_1d_v4f16_i32:
+ case AMDGPU::BI__builtin_amdgcn_image_store_1darray_v4f16_i32:
+ case AMDGPU::BI__builtin_amdgcn_image_store_2d_f32_i32:
+ case AMDGPU::BI__builtin_amdgcn_image_store_2d_v4f32_i32:
+ case AMDGPU::BI__builtin_amdgcn_image_store_2d_v4f16_i32:
+ case AMDGPU::BI__builtin_amdgcn_image_store_2darray_f32_i32:
+ case AMDGPU::BI__builtin_amdgcn_image_store_2darray_v4f32_i32:
+ case AMDGPU::BI__builtin_amdgcn_image_store_2darray_v4f16_i32:
+ case AMDGPU::BI__builtin_amdgcn_image_store_3d_v4f32_i32:
+ case AMDGPU::BI__builtin_amdgcn_image_store_3d_v4f16_i32:
+ case AMDGPU::BI__builtin_amdgcn_image_store_cube_v4f32_i32:
+ case AMDGPU::BI__builtin_amdgcn_image_store_cube_v4f16_i32:
+ case AMDGPU::BI__builtin_amdgcn_image_store_mip_1d_v4f32_i32:
+ case AMDGPU::BI__builtin_amdgcn_image_store_mip_1d_v4f16_i32:
+ case AMDGPU::BI__builtin_amdgcn_image_store_mip_1darray_v4f32_i32:
+ case AMDGPU::BI__builtin_amdgcn_image_store_mip_1darray_v4f16_i32:
+ case AMDGPU::BI__builtin_amdgcn_image_store_mip_2d_f32_i32:
+ case AMDGPU::BI__builtin_amdgcn_image_store_mip_2d_v4f32_i32:
+ case AMDGPU::BI__builtin_amdgcn_image_store_mip_2d_v4f16_i32:
+ case AMDGPU::BI__builtin_amdgcn_image_store_mip_2darray_f32_i32:
+ case AMDGPU::BI__builtin_amdgcn_image_store_mip_2darray_v4f32_i32:
+ case AMDGPU::BI__builtin_amdgcn_image_store_mip_2darray_v4f16_i32:
+ case AMDGPU::BI__builtin_amdgcn_image_store_mip_3d_v4f32_i32:
+ case AMDGPU::BI__builtin_amdgcn_image_store_mip_3d_v4f16_i32:
+ case AMDGPU::BI__builtin_amdgcn_image_store_mip_cube_v4f32_i32:
+ case AMDGPU::BI__builtin_amdgcn_image_store_mip_cube_v4f16_i32: {
+ unsigned ArgCount = TheCall->getNumArgs() - 1;
+ llvm::APSInt Result;
+ bool isImmArg =
+ (!(SemaRef.BuiltinConstantArg(TheCall, 1, Result)) &&
+ !(SemaRef.BuiltinConstantArg(TheCall, ArgCount, Result)) &&
+ !(SemaRef.BuiltinConstantArg(TheCall, (ArgCount - 1), Result)))
+ ? false
+ : true;
+
+ return isImmArg;
}
default:
return false;
@@ -160,16 +206,6 @@ bool SemaAMDGPU::CheckAMDGCNBuiltinFunctionCall(unsigned BuiltinID,
return false;
}
-bool SemaAMDGPU::checkImageImmArgFunctionCall(CallExpr *TheCall,
- unsigned ArgCount) {
- llvm::APSInt Result;
- if (!(SemaRef.BuiltinConstantArg(TheCall, 0, Result)) &&
- !(SemaRef.BuiltinConstantArg(TheCall, ArgCount, Result)) &&
- !(SemaRef.BuiltinConstantArg(TheCall, (ArgCount - 1), Result)))
- return false;
- return true;
-}
-
bool SemaAMDGPU::checkMovDPPFunctionCall(CallExpr *TheCall, unsigned NumArgs,
unsigned NumDataArgs) {
assert(NumDataArgs <= 2);
diff --git a/clang/test/CodeGen/builtins-image-load-2d-f32.c b/clang/test/CodeGen/builtins-image-load.c
similarity index 100%
rename from clang/test/CodeGen/builtins-image-load-2d-f32.c
rename to clang/test/CodeGen/builtins-image-load.c
diff --git a/clang/test/CodeGen/builtins-image-store.c b/clang/test/CodeGen/builtins-image-store.c
new file mode 100644
index 0000000000000..63ab7f3cf518a
--- /dev/null
+++ b/clang/test/CodeGen/builtins-image-store.c
@@ -0,0 +1,695 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa %s -emit-llvm -o - | FileCheck %s
+
+typedef int int8 __attribute__((ext_vector_type(8)));
+typedef float float4 __attribute__((ext_vector_type(4)));
+typedef _Float16 half;
+typedef half half4 __attribute__((ext_vector_type(4)));
+
+// CHECK-LABEL: define dso_local void @test_builtin_image_store_2d(
+// CHECK-SAME: float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0:[0-9]+]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5)
+// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
+// CHECK-NEXT: [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr
+// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
+// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr
+// CHECK-NEXT: store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP3:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: call void @llvm.amdgcn.image.store.2d.f32.i32.v8i32(float [[TMP0]], i32 12, i32 [[TMP1]], i32 [[TMP2]], <8 x i32> [[TMP3]], i32 106, i32 103)
+// CHECK-NEXT: ret void
+//
+void test_builtin_image_store_2d(float f32, int i32, int8 vec8i32) {
+
+ return __builtin_amdgcn_image_store_2d_f32_i32(f32, 12, i32, i32, vec8i32, 106, 103);
+}
+
+// CHECK-LABEL: define dso_local void @test_builtin_image_store_2d_1(
+// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5)
+// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
+// CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr
+// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
+// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr
+// CHECK-NEXT: store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16
+// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[V4F32_ADDR_ASCAST]], align 16
+// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP3:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: call void @llvm.amdgcn.image.store.2d.v4f32.i32.v8i32(<4 x float> [[TMP0]], i32 100, i32 [[TMP1]], i32 [[TMP2]], <8 x i32> [[TMP3]], i32 120, i32 110)
+// CHECK-NEXT: ret void
+//
+void test_builtin_image_store_2d_1(float4 v4f32, int i32, int8 vec8i32) {
+
+ return __builtin_amdgcn_image_store_2d_v4f32_i32(v4f32, 100, i32, i32, vec8i32, 120, 110);
+}
+// CHECK-LABEL: define dso_local void @test_builtin_image_store_2d_2(
+// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[V4F16_ADDR:%.*]] = alloca <4 x half>, align 8, addrspace(5)
+// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
+// CHECK-NEXT: [[V4F16_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F16_ADDR]] to ptr
+// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
+// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr
+// CHECK-NEXT: store <4 x half> [[V4F16]], ptr [[V4F16_ADDR_ASCAST]], align 8
+// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: [[TMP0:%.*]] = load <4 x half>, ptr [[V4F16_ADDR_ASCAST]], align 8
+// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP3:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: call void @llvm.amdgcn.image.store.2d.v4f16.i32.v8i32(<4 x half> [[TMP0]], i32 100, i32 [[TMP1]], i32 [[TMP2]], <8 x i32> [[TMP3]], i32 120, i32 110)
+// CHECK-NEXT: ret void
+//
+void test_builtin_image_store_2d_2(half4 v4f16, int i32, int8 vec8i32) {
+
+ return __builtin_amdgcn_image_store_2d_v4f16_i32(v4f16, 100, i32, i32, vec8i32, 120, 110);
+}
+
+// CHECK-LABEL: define dso_local void @test_builtin_image_store_2darray(
+// CHECK-SAME: float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5)
+// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
+// CHECK-NEXT: [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr
+// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
+// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr
+// CHECK-NEXT: store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP4:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: call void @llvm.amdgcn.image.store.2darray.f32.i32.v8i32(float [[TMP0]], i32 100, i32 [[TMP1]], i32 [[TMP2]], i32 [[TMP3]], <8 x i32> [[TMP4]], i32 120, i32 110)
+// CHECK-NEXT: ret void
+//
+void test_builtin_image_store_2darray(float f32, int i32, int8 vec8i32) {
+
+ return __builtin_amdgcn_image_store_2darray_f32_i32(f32, 100, i32, i32, i32, vec8i32, 120, 110);
+}
+// CHECK-LABEL: define dso_local void @test_builtin_image_store_2darray_1(
+// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5)
+// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
+// CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr
+// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
+// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr
+// CHECK-NEXT: store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16
+// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[V4F32_ADDR_ASCAST]], align 16
+// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP4:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: call void @llvm.amdgcn.image.store.2darray.v4f32.i32.v8i32(<4 x float> [[TMP0]], i32 100, i32 [[TMP1]], i32 [[TMP2]], i32 [[TMP3]], <8 x i32> [[TMP4]], i32 120, i32 110)
+// CHECK-NEXT: ret void
+//
+void test_builtin_image_store_2darray_1(float4 v4f32, int i32, int8 vec8i32) {
+
+ return __builtin_amdgcn_image_store_2darray_v4f32_i32(v4f32, 100, i32, i32, i32, vec8i32, 120, 110);
+}
+// CHECK-LABEL: define dso_local void @test_builtin_image_store_2darray_2(
+// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[V4F16_ADDR:%.*]] = alloca <4 x half>, align 8, addrspace(5)
+// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
+// CHECK-NEXT: [[V4F16_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F16_ADDR]] to ptr
+// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
+// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr
+// CHECK-NEXT: store <4 x half> [[V4F16]], ptr [[V4F16_ADDR_ASCAST]], align 8
+// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: [[TMP0:%.*]] = load <4 x half>, ptr [[V4F16_ADDR_ASCAST]], align 8
+// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP4:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: call void @llvm.amdgcn.image.store.2darray.v4f16.i32.v8i32(<4 x half> [[TMP0]], i32 100, i32 [[TMP1]], i32 [[TMP2]], i32 [[TMP3]], <8 x i32> [[TMP4]], i32 120, i32 110)
+// CHECK-NEXT: ret void
+//
+void test_builtin_image_store_2darray_2(half4 v4f16, int i32, int8 vec8i32) {
+
+ return __builtin_amdgcn_image_store_2darray_v4f16_i32(v4f16, 100, i32, i32, i32, vec8i32, 120, 110);
+}
+
+
+// CHECK-LABEL: define dso_local void @test_builtin_image_store_1d_1(
+// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5)
+// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
+// CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr
+// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
+// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr
+// CHECK-NEXT: store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16
+// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[V4F32_ADDR_ASCAST]], align 16
+// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP2:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: [[TMP3:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: call void @llvm.amdgcn.image.store.1d.v4f32.i32.v8i32(<4 x float> [[TMP0]], i32 100, i32 [[TMP1]], <8 x i32> [[TMP3]], i32 120, i32 110)
+// CHECK-NEXT: ret void
+//
+void test_builtin_image_store_1d_1(float4 v4f32, int i32, int8 vec8i32) {
+
+ __builtin_amdgcn_image_store_1d_v4f32_i32(v4f32, 100, i32, vec8i32, 120, 110);
+}
+// CHECK-LABEL: define dso_local void @test_builtin_image_store_1d_2(
+// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[V4F16_ADDR:%.*]] = alloca <4 x half>, align 8, addrspace(5)
+// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
+// CHECK-NEXT: [[V4F16_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F16_ADDR]] to ptr
+// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
+// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr
+// CHECK-NEXT: store <4 x half> [[V4F16]], ptr [[V4F16_ADDR_ASCAST]], align 8
+// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: [[TMP0:%.*]] = load <4 x half>, ptr [[V4F16_ADDR_ASCAST]], align 8
+// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP2:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: [[TMP3:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: call void @llvm.amdgcn.image.store.1d.v4f16.i32.v8i32(<4 x half> [[TMP0]], i32 100, i32 [[TMP1]], <8 x i32> [[TMP3]], i32 120, i32 110)
+// CHECK-NEXT: ret void
+//
+void test_builtin_image_store_1d_2(half4 v4f16, int i32, int8 vec8i32) {
+
+ return __builtin_amdgcn_image_store_1d_v4f16_i32(v4f16, 100, i32, vec8i32, 120, 110);
+}
+
+// CHECK-LABEL: define dso_local void @test_builtin_image_store_1darray_1(
+// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5)
+// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
+// CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr
+// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
+// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr
+// CHECK-NEXT: store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16
+// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[V4F32_ADDR_ASCAST]], align 16
+// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP4:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: call void @llvm.amdgcn.image.store.1darray.v4f32.i32.v8i32(<4 x float> [[TMP0]], i32 100, i32 [[TMP1]], i32 [[TMP3]], <8 x i32> [[TMP4]], i32 120, i32 110)
+// CHECK-NEXT: ret void
+//
+void test_builtin_image_store_1darray_1(float4 v4f32, int i32, int8 vec8i32) {
+
+ return __builtin_amdgcn_image_store_1darray_v4f32_i32(v4f32, 100, i32, i32, vec8i32, 120, 110);
+}
+// CHECK-LABEL: define dso_local void @test_builtin_image_store_1darray_2(
+// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[V4F16_ADDR:%.*]] = alloca <4 x half>, align 8, addrspace(5)
+// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
+// CHECK-NEXT: [[V4F16_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F16_ADDR]] to ptr
+// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
+// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr
+// CHECK-NEXT: store <4 x half> [[V4F16]], ptr [[V4F16_ADDR_ASCAST]], align 8
+// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: [[TMP0:%.*]] = load <4 x half>, ptr [[V4F16_ADDR_ASCAST]], align 8
+// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP4:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: call void @llvm.amdgcn.image.store.1darray.v4f16.i32.v8i32(<4 x half> [[TMP0]], i32 100, i32 [[TMP1]], i32 [[TMP3]], <8 x i32> [[TMP4]], i32 120, i32 110)
+// CHECK-NEXT: ret void
+//
+void test_builtin_image_store_1darray_2(half4 v4f16, int i32, int8 vec8i32) {
+
+ return __builtin_amdgcn_image_store_1darray_v4f16_i32(v4f16, 100, i32, i32, vec8i32, 120, 110);
+}
+
+// CHECK-LABEL: define dso_local void @test_builtin_image_store_3d_1(
+// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5)
+// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
+// CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr
+// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
+// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr
+// CHECK-NEXT: store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16
+// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[V4F32_ADDR_ASCAST]], align 16
+// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP4:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: call void @llvm.amdgcn.image.store.3d.v4f32.i32.v8i32(<4 x float> [[TMP0]], i32 100, i32 [[TMP1]], i32 [[TMP2]], i32 [[TMP3]], <8 x i32> [[TMP4]], i32 120, i32 110)
+// CHECK-NEXT: ret void
+//
+void test_builtin_image_store_3d_1(float4 v4f32, int i32, int8 vec8i32) {
+
+ return __builtin_amdgcn_image_store_3d_v4f32_i32(v4f32, 100, i32, i32, i32, vec8i32, 120, 110);
+}
+// CHECK-LABEL: define dso_local void @test_builtin_image_store_3d_2(
+// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[V4F16_ADDR:%.*]] = alloca <4 x half>, align 8, addrspace(5)
+// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
+// CHECK-NEXT: [[V4F16_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F16_ADDR]] to ptr
+// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
+// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr
+// CHECK-NEXT: store <4 x half> [[V4F16]], ptr [[V4F16_ADDR_ASCAST]], align 8
+// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: [[TMP0:%.*]] = load <4 x half>, ptr [[V4F16_ADDR_ASCAST]], align 8
+// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP4:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: call void @llvm.amdgcn.image.store.3d.v4f16.i32.v8i32(<4 x half> [[TMP0]], i32 100, i32 [[TMP1]], i32 [[TMP2]], i32 [[TMP3]], <8 x i32> [[TMP4]], i32 120, i32 110)
+// CHECK-NEXT: ret void
+//
+void test_builtin_image_store_3d_2(half4 v4f16, int i32, int8 vec8i32) {
+
+ return __builtin_amdgcn_image_store_3d_v4f16_i32(v4f16, 100, i32, i32, i32, vec8i32, 120, 110);
+}
+
+// CHECK-LABEL: define dso_local void @test_builtin_image_store_cube_1(
+// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5)
+// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
+// CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr
+// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
+// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr
+// CHECK-NEXT: store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16
+// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[V4F32_ADDR_ASCAST]], align 16
+// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP4:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: call void @llvm.amdgcn.image.store.cube.v4f32.i32.v8i32(<4 x float> [[TMP0]], i32 100, i32 [[TMP1]], i32 [[TMP2]], i32 [[TMP3]], <8 x i32> [[TMP4]], i32 120, i32 110)
+// CHECK-NEXT: ret void
+//
+void test_builtin_image_store_cube_1(float4 v4f32, int i32, int8 vec8i32) {
+
+ return __builtin_amdgcn_image_store_cube_v4f32_i32(v4f32, 100, i32, i32, i32, vec8i32, 120, 110);
+}
+// CHECK-LABEL: define dso_local void @test_builtin_image_store_cube_2(
+// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[V4F16_ADDR:%.*]] = alloca <4 x half>, align 8, addrspace(5)
+// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
+// CHECK-NEXT: [[V4F16_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F16_ADDR]] to ptr
+// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
+// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr
+// CHECK-NEXT: store <4 x half> [[V4F16]], ptr [[V4F16_ADDR_ASCAST]], align 8
+// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: [[TMP0:%.*]] = load <4 x half>, ptr [[V4F16_ADDR_ASCAST]], align 8
+// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP4:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: call void @llvm.amdgcn.image.store.cube.v4f16.i32.v8i32(<4 x half> [[TMP0]], i32 100, i32 [[TMP1]], i32 [[TMP2]], i32 [[TMP3]], <8 x i32> [[TMP4]], i32 120, i32 110)
+// CHECK-NEXT: ret void
+//
+void test_builtin_image_store_cube_2(half4 v4f16, int i32, int8 vec8i32) {
+
+ return __builtin_amdgcn_image_store_cube_v4f16_i32(v4f16, 100, i32, i32, i32, vec8i32, 120, 110);
+}
+
+// CHECK-LABEL: define dso_local void @test_builtin_image_store_mip_1d_1(
+// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5)
+// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
+// CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr
+// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
+// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr
+// CHECK-NEXT: store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16
+// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[V4F32_ADDR_ASCAST]], align 16
+// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP4:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: call void @llvm.amdgcn.image.store.mip.1d.v4f32.i32.v8i32(<4 x float> [[TMP0]], i32 100, i32 [[TMP1]], i32 [[TMP3]], <8 x i32> [[TMP4]], i32 120, i32 110)
+// CHECK-NEXT: ret void
+//
+void test_builtin_image_store_mip_1d_1(float4 v4f32, int i32, int8 vec8i32) {
+
+ return __builtin_amdgcn_image_store_mip_1d_v4f32_i32(v4f32, 100, i32, i32, vec8i32, 120, 110);
+}
+// CHECK-LABEL: define dso_local void @test_builtin_image_store_mip_1d_2(
+// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[V4F16_ADDR:%.*]] = alloca <4 x half>, align 8, addrspace(5)
+// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
+// CHECK-NEXT: [[V4F16_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F16_ADDR]] to ptr
+// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
+// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr
+// CHECK-NEXT: store <4 x half> [[V4F16]], ptr [[V4F16_ADDR_ASCAST]], align 8
+// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: [[TMP0:%.*]] = load <4 x half>, ptr [[V4F16_ADDR_ASCAST]], align 8
+// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP4:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: call void @llvm.amdgcn.image.store.mip.1d.v4f16.i32.v8i32(<4 x half> [[TMP0]], i32 100, i32 [[TMP1]], i32 [[TMP3]], <8 x i32> [[TMP4]], i32 120, i32 110)
+// CHECK-NEXT: ret void
+//
+void test_builtin_image_store_mip_1d_2(half4 v4f16, int i32, int8 vec8i32) {
+
+ return __builtin_amdgcn_image_store_mip_1d_v4f16_i32(v4f16, 100, i32, i32, vec8i32, 120, 110);
+}
+
+// CHECK-LABEL: define dso_local void @test_builtin_image_store_mip_1darray_1(
+// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5)
+// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
+// CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr
+// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
+// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr
+// CHECK-NEXT: store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16
+// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[V4F32_ADDR_ASCAST]], align 16
+// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP4:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: call void @llvm.amdgcn.image.store.mip.1darray.v4f32.i32.v8i32(<4 x float> [[TMP0]], i32 100, i32 [[TMP1]], i32 [[TMP2]], i32 [[TMP3]], <8 x i32> [[TMP4]], i32 120, i32 110)
+// CHECK-NEXT: ret void
+//
+void test_builtin_image_store_mip_1darray_1(float4 v4f32, int i32, int8 vec8i32) {
+
+ return __builtin_amdgcn_image_store_mip_1darray_v4f32_i32(v4f32, 100, i32, i32, i32, vec8i32, 120, 110);
+}
+// CHECK-LABEL: define dso_local void @test_builtin_image_store_mip_1darray_2(
+// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[V4F16_ADDR:%.*]] = alloca <4 x half>, align 8, addrspace(5)
+// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
+// CHECK-NEXT: [[V4F16_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F16_ADDR]] to ptr
+// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
+// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr
+// CHECK-NEXT: store <4 x half> [[V4F16]], ptr [[V4F16_ADDR_ASCAST]], align 8
+// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: [[TMP0:%.*]] = load <4 x half>, ptr [[V4F16_ADDR_ASCAST]], align 8
+// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP4:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: call void @llvm.amdgcn.image.store.mip.1darray.v4f16.i32.v8i32(<4 x half> [[TMP0]], i32 100, i32 [[TMP1]], i32 [[TMP2]], i32 [[TMP3]], <8 x i32> [[TMP4]], i32 120, i32 110)
+// CHECK-NEXT: ret void
+//
+void test_builtin_image_store_mip_1darray_2(half4 v4f16, int i32, int8 vec8i32) {
+
+ return __builtin_amdgcn_image_store_mip_1darray_v4f16_i32(v4f16, 100, i32, i32, i32, vec8i32, 120, 110);
+}
+
+// CHECK-LABEL: define dso_local void @test_builtin_image_store_mip_2d(
+// CHECK-SAME: float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5)
+// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
+// CHECK-NEXT: [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr
+// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
+// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr
+// CHECK-NEXT: store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP4:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: call void @llvm.amdgcn.image.store.mip.2d.f32.i32.v8i32(float [[TMP0]], i32 100, i32 [[TMP1]], i32 [[TMP2]], i32 [[TMP3]], <8 x i32> [[TMP4]], i32 120, i32 110)
+// CHECK-NEXT: ret void
+//
+void test_builtin_image_store_mip_2d(float f32, int i32, int8 vec8i32) {
+
+ return __builtin_amdgcn_image_store_mip_2d_f32_i32(f32, 100, i32, i32, i32, vec8i32, 120, 110);
+}
+// CHECK-LABEL: define dso_local void @test_builtin_image_store_mip_2d_1(
+// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5)
+// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
+// CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr
+// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
+// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr
+// CHECK-NEXT: store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16
+// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[V4F32_ADDR_ASCAST]], align 16
+// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP4:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: call void @llvm.amdgcn.image.store.mip.2d.v4f32.i32.v8i32(<4 x float> [[TMP0]], i32 100, i32 [[TMP1]], i32 [[TMP2]], i32 [[TMP3]], <8 x i32> [[TMP4]], i32 120, i32 110)
+// CHECK-NEXT: ret void
+//
+void test_builtin_image_store_mip_2d_1(float4 v4f32, int i32, int8 vec8i32) {
+
+ return __builtin_amdgcn_image_store_mip_2d_v4f32_i32(v4f32, 100, i32, i32, i32, vec8i32, 120, 110);
+}
+// CHECK-LABEL: define dso_local void @test_builtin_image_store_mip_2d_2(
+// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[V4F16_ADDR:%.*]] = alloca <4 x half>, align 8, addrspace(5)
+// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
+// CHECK-NEXT: [[V4F16_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F16_ADDR]] to ptr
+// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
+// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr
+// CHECK-NEXT: store <4 x half> [[V4F16]], ptr [[V4F16_ADDR_ASCAST]], align 8
+// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: [[TMP0:%.*]] = load <4 x half>, ptr [[V4F16_ADDR_ASCAST]], align 8
+// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP4:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: call void @llvm.amdgcn.image.store.mip.2d.v4f16.i32.v8i32(<4 x half> [[TMP0]], i32 100, i32 [[TMP1]], i32 [[TMP2]], i32 [[TMP3]], <8 x i32> [[TMP4]], i32 120, i32 110)
+// CHECK-NEXT: ret void
+//
+void test_builtin_image_store_mip_2d_2(half4 v4f16, int i32, int8 vec8i32) {
+
+ return __builtin_amdgcn_image_store_mip_2d_v4f16_i32(v4f16, 100, i32, i32, i32, vec8i32, 120, 110);
+}
+
+// CHECK-LABEL: define dso_local void @test_builtin_image_store_mip_2darray(
+// CHECK-SAME: float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5)
+// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
+// CHECK-NEXT: [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr
+// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
+// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr
+// CHECK-NEXT: store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP5:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: call void @llvm.amdgcn.image.store.mip.2darray.f32.i32.v8i32(float [[TMP0]], i32 100, i32 [[TMP1]], i32 [[TMP2]], i32 [[TMP3]], i32 [[TMP4]], <8 x i32> [[TMP5]], i32 120, i32 110)
+// CHECK-NEXT: ret void
+//
+void test_builtin_image_store_mip_2darray(float f32, int i32, int8 vec8i32) {
+
+ return __builtin_amdgcn_image_store_mip_2darray_f32_i32(f32, 100, i32, i32, i32, i32, vec8i32, 120, 110);
+}
+// CHECK-LABEL: define dso_local void @test_builtin_image_store_mip_2darray_1(
+// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5)
+// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
+// CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr
+// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
+// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr
+// CHECK-NEXT: store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16
+// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[V4F32_ADDR_ASCAST]], align 16
+// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP5:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: call void @llvm.amdgcn.image.store.mip.2darray.v4f32.i32.v8i32(<4 x float> [[TMP0]], i32 100, i32 [[TMP1]], i32 [[TMP2]], i32 [[TMP3]], i32 [[TMP4]], <8 x i32> [[TMP5]], i32 120, i32 110)
+// CHECK-NEXT: ret void
+//
+void test_builtin_image_store_mip_2darray_1(float4 v4f32, int i32, int8 vec8i32) {
+
+ return __builtin_amdgcn_image_store_mip_2darray_v4f32_i32(v4f32, 100, i32, i32, i32, i32, vec8i32, 120, 110);
+}
+// CHECK-LABEL: define dso_local void @test_builtin_image_store_mip_2darray_2(
+// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[V4F16_ADDR:%.*]] = alloca <4 x half>, align 8, addrspace(5)
+// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
+// CHECK-NEXT: [[V4F16_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F16_ADDR]] to ptr
+// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
+// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr
+// CHECK-NEXT: store <4 x half> [[V4F16]], ptr [[V4F16_ADDR_ASCAST]], align 8
+// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: [[TMP0:%.*]] = load <4 x half>, ptr [[V4F16_ADDR_ASCAST]], align 8
+// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP5:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: call void @llvm.amdgcn.image.store.mip.2darray.v4f16.i32.v8i32(<4 x half> [[TMP0]], i32 100, i32 [[TMP1]], i32 [[TMP2]], i32 [[TMP3]], i32 [[TMP4]], <8 x i32> [[TMP5]], i32 120, i32 110)
+// CHECK-NEXT: ret void
+//
+void test_builtin_image_store_mip_2darray_2(half4 v4f16, int i32, int8 vec8i32) {
+
+ return __builtin_amdgcn_image_store_mip_2darray_v4f16_i32(v4f16, 100, i32, i32, i32, i32, vec8i32, 120, 110);
+}
+
+// CHECK-LABEL: define dso_local void @test_builtin_image_store_mip_3d_1(
+// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5)
+// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
+// CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr
+// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
+// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr
+// CHECK-NEXT: store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16
+// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[V4F32_ADDR_ASCAST]], align 16
+// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP5:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: call void @llvm.amdgcn.image.store.mip.3d.v4f32.i32.v8i32(<4 x float> [[TMP0]], i32 100, i32 [[TMP1]], i32 [[TMP2]], i32 [[TMP3]], i32 [[TMP4]], <8 x i32> [[TMP5]], i32 120, i32 110)
+// CHECK-NEXT: ret void
+//
+void test_builtin_image_store_mip_3d_1(float4 v4f32, int i32, int8 vec8i32) {
+
+ return __builtin_amdgcn_image_store_mip_3d_v4f32_i32(v4f32, 100, i32, i32, i32, i32, vec8i32, 120, 110);
+}
+// CHECK-LABEL: define dso_local void @test_builtin_image_store_mip_3d_2(
+// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[V4F16_ADDR:%.*]] = alloca <4 x half>, align 8, addrspace(5)
+// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
+// CHECK-NEXT: [[V4F16_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F16_ADDR]] to ptr
+// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
+// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr
+// CHECK-NEXT: store <4 x half> [[V4F16]], ptr [[V4F16_ADDR_ASCAST]], align 8
+// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: [[TMP0:%.*]] = load <4 x half>, ptr [[V4F16_ADDR_ASCAST]], align 8
+// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP5:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: call void @llvm.amdgcn.image.store.mip.3d.v4f16.i32.v8i32(<4 x half> [[TMP0]], i32 100, i32 [[TMP1]], i32 [[TMP2]], i32 [[TMP3]], i32 [[TMP4]], <8 x i32> [[TMP5]], i32 120, i32 110)
+// CHECK-NEXT: ret void
+//
+void test_builtin_image_store_mip_3d_2(half4 v4f16, int i32, int8 vec8i32) {
+
+ return __builtin_amdgcn_image_store_mip_3d_v4f16_i32(v4f16, 100, i32, i32, i32, i32, vec8i32, 120, 110);
+}
+
+// CHECK-LABEL: define dso_local void @test_builtin_image_store_mip_cube_1(
+// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5)
+// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
+// CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr
+// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
+// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr
+// CHECK-NEXT: store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16
+// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[V4F32_ADDR_ASCAST]], align 16
+// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP5:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: call void @llvm.amdgcn.image.store.mip.cube.v4f32.i32.v8i32(<4 x float> [[TMP0]], i32 100, i32 [[TMP1]], i32 [[TMP2]], i32 [[TMP3]], i32 [[TMP4]], <8 x i32> [[TMP5]], i32 120, i32 110)
+// CHECK-NEXT: ret void
+//
+void test_builtin_image_store_mip_cube_1(float4 v4f32, int i32, int8 vec8i32) {
+
+ return __builtin_amdgcn_image_store_mip_cube_v4f32_i32(v4f32, 100, i32, i32, i32, i32, vec8i32, 120, 110);
+}
+// CHECK-LABEL: define dso_local void @test_builtin_image_store_mip_cube_2(
+// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[V4F16_ADDR:%.*]] = alloca <4 x half>, align 8, addrspace(5)
+// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
+// CHECK-NEXT: [[V4F16_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F16_ADDR]] to ptr
+// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
+// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr
+// CHECK-NEXT: store <4 x half> [[V4F16]], ptr [[V4F16_ADDR_ASCAST]], align 8
+// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: [[TMP0:%.*]] = load <4 x half>, ptr [[V4F16_ADDR_ASCAST]], align 8
+// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP5:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: call void @llvm.amdgcn.image.store.mip.cube.v4f16.i32.v8i32(<4 x half> [[TMP0]], i32 100, i32 [[TMP1]], i32 [[TMP2]], i32 [[TMP3]], i32 [[TMP4]], <8 x i32> [[TMP5]], i32 120, i32 110)
+// CHECK-NEXT: ret void
+//
+void test_builtin_image_store_mip_cube_2(half4 v4f16, int i32, int8 vec8i32) {
+
+ return __builtin_amdgcn_image_store_mip_cube_v4f16_i32(v4f16, 100, i32, i32, i32, i32, vec8i32, 120, 110);
+}
diff --git a/clang/test/SemaOpenCL/builtins-image-load-2d-f32-param.cl b/clang/test/SemaOpenCL/builtins-image-load-param.cl
similarity index 100%
rename from clang/test/SemaOpenCL/builtins-image-load-2d-f32-param.cl
rename to clang/test/SemaOpenCL/builtins-image-load-param.cl
diff --git a/clang/test/SemaOpenCL/builtins-image-store-param.cl b/clang/test/SemaOpenCL/builtins-image-store-param.cl
new file mode 100644
index 0000000000000..c8fcc59bf2b66
--- /dev/null
+++ b/clang/test/SemaOpenCL/builtins-image-store-param.cl
@@ -0,0 +1,132 @@
+// RUN: %clang_cc1 -triple amdgcn-- -target-cpu gfx90a -S -verify=expected -o - %s
+// REQUIRES: amdgpu-registered-target
+
+typedef int int8 __attribute__((ext_vector_type(8)));
+typedef float float4 __attribute__((ext_vector_type(4)));
+//typedef _Float16 half;
+typedef half half4 __attribute__((ext_vector_type(4)));
+
+
+void test_builtin_image_store_2d(float f32, int i32, int8 vec8i32) {
+
+ return __builtin_amdgcn_image_store_2d_f32_i32(f32, i32, i32, i32, vec8i32, 106, 103); //expected-error{{argument to '__builtin_amdgcn_image_store_2d_f32_i32' must be a constant integer}}
+}
+void test_builtin_image_store_2d_1(float4 v4f32, int i32, int8 vec8i32) {
+
+ return __builtin_amdgcn_image_store_2d_v4f32_i32(v4f32, 100, i32, i32, vec8i32, i32, 110); //expected-error{{argument to '__builtin_amdgcn_image_store_2d_v4f32_i32' must be a constant integer}}
+}
+void test_builtin_image_store_2d_2(half4 v4f16, int i32, int8 vec8i32) {
+
+ return __builtin_amdgcn_image_store_2d_v4f16_i32(v4f16, 100, i32, i32, vec8i32, 120, i32); //expected-error{{argument to '__builtin_amdgcn_image_store_2d_v4f16_i32' must be a constant integer}}
+}
+
+void test_builtin_image_store_2darray(float f32, int i32, int8 vec8i32) {
+
+ return __builtin_amdgcn_image_store_2darray_f32_i32(f32, 100, i32, i32, i32, vec8i32, i32, 110); //expected-error{{argument to '__builtin_amdgcn_image_store_2darray_f32_i32' must be a constant integer}}
+}
+void test_builtin_image_store_2darray_1(float4 v4f32, int i32, int8 vec8i32) {
+
+ return __builtin_amdgcn_image_store_2darray_v4f32_i32(v4f32, 100, i32, i32, i32, vec8i32, i32, 110); //expected-error{{argument to '__builtin_amdgcn_image_store_2darray_v4f32_i32' must be a constant integer}}
+}
+void test_builtin_image_store_2darray_2(half4 v4f16, int i32, int8 vec8i32) {
+
+ return __builtin_amdgcn_image_store_2darray_v4f16_i32(v4f16, 100, i32, i32, i32, vec8i32, 120, i32); //expected-error{{argument to '__builtin_amdgcn_image_store_2darray_v4f16_i32' must be a constant integer}}
+}
+
+void test_builtin_image_store_1d_1(float4 v4f32, int i32, int8 vec8i32) {
+
+ return __builtin_amdgcn_image_store_1d_v4f32_i32(v4f32, i32, i32, vec8i32, 120, i32); //expected-error{{argument to '__builtin_amdgcn_image_store_1d_v4f32_i32' must be a constant integer}}
+}
+void test_builtin_image_store_1d_2(half4 v4f16, int i32, int8 vec8i32) {
+
+ return __builtin_amdgcn_image_store_1d_v4f16_i32(v4f16, 100, i32, vec8i32, 120, i32); //expected-error{{argument to '__builtin_amdgcn_image_store_1d_v4f16_i32' must be a constant integer}}
+}
+
+void test_builtin_image_store_1darray_1(float4 v4f32, int i32, int8 vec8i32) {
+
+ return __builtin_amdgcn_image_store_1darray_v4f32_i32(v4f32, 100, i32, i32, vec8i32, i32, 110); //expected-error{{argument to '__builtin_amdgcn_image_store_1darray_v4f32_i32' must be a constant integer}}
+}
+void test_builtin_image_store_1darray_2(half4 v4f16, int i32, int8 vec8i32) {
+
+ return __builtin_amdgcn_image_store_1darray_v4f16_i32(v4f16, 100, i32, i32, vec8i32, i32, 110); //expected-error{{argument to '__builtin_amdgcn_image_store_1darray_v4f16_i32' must be a constant integer}}
+}
+
+void test_builtin_image_store_3d_1(float4 v4f32, int i32, int8 vec8i32) {
+
+ return __builtin_amdgcn_image_store_3d_v4f32_i32(v4f32, 100, i32, i32, i32, vec8i32, 120, i32); //expected-error{{argument to '__builtin_amdgcn_image_store_3d_v4f32_i32' must be a constant integer}}
+}
+void test_builtin_image_store_3d_2(half4 v4f16, int i32, int8 vec8i32) {
+
+ return __builtin_amdgcn_image_store_3d_v4f16_i32(v4f16, i32, i32, i32, i32, vec8i32, 120, i32); //expected-error{{argument to '__builtin_amdgcn_image_store_3d_v4f16_i32' must be a constant integer}}
+}
+
+void test_builtin_image_store_cube_1(float4 v4f32, int i32, int8 vec8i32) {
+
+ return __builtin_amdgcn_image_store_cube_v4f32_i32(v4f32, i32, i32, i32, i32, vec8i32, 120, 110); //expected-error{{argument to '__builtin_amdgcn_image_store_cube_v4f32_i32' must be a constant integer}}
+}
+void test_builtin_image_store_cube_2(half4 v4f16, int i32, int8 vec8i32) {
+
+ return __builtin_amdgcn_image_store_cube_v4f16_i32(v4f16, i32, i32, i32, i32, vec8i32, 120, 110); //expected-error{{argument to '__builtin_amdgcn_image_store_cube_v4f16_i32' must be a constant integer}}
+}
+
+void test_builtin_image_store_mip_1d_1(float4 v4f32, int i32, int8 vec8i32) {
+
+ return __builtin_amdgcn_image_store_mip_1d_v4f32_i32(v4f32, i32, i32, i32, vec8i32, 120, i32); //expected-error{{argument to '__builtin_amdgcn_image_store_mip_1d_v4f32_i32' must be a constant integer}}
+}
+void test_builtin_image_store_mip_1d_2(half4 v4f16, int i32, int8 vec8i32) {
+
+ return __builtin_amdgcn_image_store_mip_1d_v4f16_i32(v4f16, 100, i32, i32, vec8i32, 120, i32); //expected-error{{argument to '__builtin_amdgcn_image_store_mip_1d_v4f16_i32' must be a constant integer}}
+}
+
+void test_builtin_image_store_mip_1darray_1(float4 v4f32, int i32, int8 vec8i32) {
+
+ return __builtin_amdgcn_image_store_mip_1darray_v4f32_i32(v4f32, i32, i32, i32, i32, vec8i32, i32, 110); //expected-error{{argument to '__builtin_amdgcn_image_store_mip_1darray_v4f32_i32' must be a constant integer}}
+}
+void test_builtin_image_store_mip_1darray_2(half4 v4f16, int i32, int8 vec8i32) {
+
+ return __builtin_amdgcn_image_store_mip_1darray_v4f16_i32(v4f16, 100, i32, i32, i32, vec8i32, i32, 110); //expected-error{{argument to '__builtin_amdgcn_image_store_mip_1darray_v4f16_i32' must be a constant integer}}
+}
+
+void test_builtin_image_store_mip_2d(float f32, int i32, int8 vec8i32) {
+
+ return __builtin_amdgcn_image_store_mip_2d_f32_i32(f32, i32, i32, i32, i32, vec8i32, 120, i32); //expected-error{{argument to '__builtin_amdgcn_image_store_mip_2d_f32_i32' must be a constant integer}}
+}
+void test_builtin_image_store_mip_2d_1(float4 v4f32, int i32, int8 vec8i32) {
+
+ return __builtin_amdgcn_image_store_mip_2d_v4f32_i32(v4f32, 100, i32, i32, i32, vec8i32, 120, i32); //expected-error{{argument to '__builtin_amdgcn_image_store_mip_2d_v4f32_i32' must be a constant integer}}
+}
+void test_builtin_image_store_mip_2d_2(half4 v4f16, int i32, int8 vec8i32) {
+
+ return __builtin_amdgcn_image_store_mip_2d_v4f16_i32(v4f16, i32, i32, i32, i32, vec8i32, 120, 110); //expected-error{{argument to '__builtin_amdgcn_image_store_mip_2d_v4f16_i32' must be a constant integer}}
+}
+
+void test_builtin_image_store_mip_2darray(float f32, int i32, int8 vec8i32) {
+
+ return __builtin_amdgcn_image_store_mip_2darray_f32_i32(f32, i32, i32, i32, i32, i32, vec8i32, 120, 110); //expected-error{{argument to '__builtin_amdgcn_image_store_mip_2darray_f32_i32' must be a constant integer}}
+}
+void test_builtin_image_store_mip_2darray_1(float4 v4f32, int i32, int8 vec8i32) {
+
+ return __builtin_amdgcn_image_store_mip_2darray_v4f32_i32(v4f32, 100, i32, i32, i32, i32, vec8i32, 120, i32); //expected-error{{argument to '__builtin_amdgcn_image_store_mip_2darray_v4f32_i32' must be a constant integer}}
+}
+void test_builtin_image_store_mip_2darray_2(half4 v4f16, int i32, int8 vec8i32) {
+
+ return __builtin_amdgcn_image_store_mip_2darray_v4f16_i32(v4f16, 100, i32, i32, i32, i32, vec8i32, 120, i32); //expected-error{{argument to '__builtin_amdgcn_image_store_mip_2darray_v4f16_i32' must be a constant integer}}
+}
+
+void test_builtin_image_store_mip_3d_1(float4 v4f32, int i32, int8 vec8i32) {
+
+ return __builtin_amdgcn_image_store_mip_3d_v4f32_i32(v4f32, i32, i32, i32, i32, i32, vec8i32, i32, 110); //expected-error{{argument to '__builtin_amdgcn_image_store_mip_3d_v4f32_i32' must be a constant integer}}
+}
+void test_builtin_image_store_mip_3d_2(half4 v4f16, int i32, int8 vec8i32) {
+
+ return __builtin_amdgcn_image_store_mip_3d_v4f16_i32(v4f16, i32, i32, i32, i32, i32, vec8i32, i32, 110); //expected-error{{argument to '__builtin_amdgcn_image_store_mip_3d_v4f16_i32' must be a constant integer}}
+}
+
+void test_builtin_image_store_mip_cube_1(float4 v4f32, int i32, int8 vec8i32) {
+
+ return __builtin_amdgcn_image_store_mip_cube_v4f32_i32(v4f32, i32, i32, i32, i32, i32, vec8i32, 120, i32); //expected-error{{argument to '__builtin_amdgcn_image_store_mip_cube_v4f32_i32' must be a constant integer}}
+}
+void test_builtin_image_store_mip_cube_2(half4 v4f16, int i32, int8 vec8i32) {
+
+ return __builtin_amdgcn_image_store_mip_cube_v4f16_i32(v4f16, 100, i32, i32, i32, i32, vec8i32, 120, i32); //expected-error{{argument to '__builtin_amdgcn_image_store_mip_cube_v4f16_i32' must be a constant integer}}
+}
>From 39403aeeb789e3ca91f6c6c6d027d642da198daf Mon Sep 17 00:00:00 2001
From: ranapratap55 <RanaPratapReddy.Nimmakayala at amd.com>
Date: Tue, 12 Aug 2025 13:19:37 +0530
Subject: [PATCH 4/7] [AMDGPU] Adds EmitAMDGCNImageOverloadedReturnType for
amdgcn_image_load/store and adds 'image-insts' feature
---
clang/include/clang/Basic/BuiltinsAMDGPU.def | 113 +++---
clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp | 383 ++++--------------
clang/lib/Sema/SemaAMDGPU.cpp | 33 +-
clang/test/CodeGen/builtins-image-load.c | 55 +--
clang/test/CodeGen/builtins-image-store.c | 156 +++----
.../SemaOpenCL/builtins-image-load-param.cl | 5 +-
.../SemaOpenCL/builtins-image-store-param.cl | 2 +-
7 files changed, 266 insertions(+), 481 deletions(-)
diff --git a/clang/include/clang/Basic/BuiltinsAMDGPU.def b/clang/include/clang/Basic/BuiltinsAMDGPU.def
index 02cf766f0c899..33acb614d73c0 100644
--- a/clang/include/clang/Basic/BuiltinsAMDGPU.def
+++ b/clang/include/clang/Basic/BuiltinsAMDGPU.def
@@ -638,63 +638,62 @@ TARGET_BUILTIN(__builtin_amdgcn_cvt_sr_f16_f32, "V2hV2hfUiIb", "nc", "f32-to-f16
//===----------------------------------------------------------------------===//
// Image load/store builtins
//===----------------------------------------------------------------------===//
-TARGET_BUILTIN(__builtin_amdgcn_image_load_1d_v4f32_i32, "V4fiiV8iii", "nc", "")
-TARGET_BUILTIN(__builtin_amdgcn_image_load_1d_v4f16_i32, "V4hiiV8iii", "nc", "")
-TARGET_BUILTIN(__builtin_amdgcn_image_load_1darray_v4f32_i32, "V4fiiiV8iii", "nc", "")
-TARGET_BUILTIN(__builtin_amdgcn_image_load_1darray_v4f16_i32, "V4hiiiV8iii", "nc", "")
-TARGET_BUILTIN(__builtin_amdgcn_image_load_2d_f32_i32, "fiiiV8iii", "nc", "")
-TARGET_BUILTIN(__builtin_amdgcn_image_load_2d_v4f32_i32, "V4fiiiV8iii", "nc", "")
-TARGET_BUILTIN(__builtin_amdgcn_image_load_2d_v4f16_i32, "V4hiiiV8iii", "nc", "")
-TARGET_BUILTIN(__builtin_amdgcn_image_load_2darray_f32_i32, "fiiiiV8iii", "nc", "")
-TARGET_BUILTIN(__builtin_amdgcn_image_load_2darray_v4f32_i32, "V4fiiiiV8iii", "nc", "")
-TARGET_BUILTIN(__builtin_amdgcn_image_load_2darray_v4f16_i32, "V4hiiiiV8iii", "nc", "")
-TARGET_BUILTIN(__builtin_amdgcn_image_load_3d_v4f32_i32, "V4fiiiiV8iii", "nc", "")
-TARGET_BUILTIN(__builtin_amdgcn_image_load_3d_v4f16_i32, "V4hiiiiV8iii", "nc", "")
-TARGET_BUILTIN(__builtin_amdgcn_image_load_cube_v4f32_i32, "V4fiiiiV8iii", "nc", "")
-TARGET_BUILTIN(__builtin_amdgcn_image_load_cube_v4f16_i32, "V4hiiiiV8iii", "nc", "")
-TARGET_BUILTIN(__builtin_amdgcn_image_load_mip_1d_v4f32_i32, "V4fiiiV8iii", "nc", "")
-TARGET_BUILTIN(__builtin_amdgcn_image_load_mip_1d_v4f16_i32, "V4hiiiV8iii", "nc", "")
-TARGET_BUILTIN(__builtin_amdgcn_image_load_mip_1darray_v4f32_i32, "V4fiiiiV8iii", "nc", "")
-TARGET_BUILTIN(__builtin_amdgcn_image_load_mip_1darray_v4f16_i32, "V4hiiiiV8iii", "nc", "")
-TARGET_BUILTIN(__builtin_amdgcn_image_load_mip_2d_f32_i32, "fiiiiV8iii", "nc", "")
-TARGET_BUILTIN(__builtin_amdgcn_image_load_mip_2d_v4f32_i32, "V4fiiiiV8iii", "nc", "")
-TARGET_BUILTIN(__builtin_amdgcn_image_load_mip_2d_v4f16_i32, "V4hiiiiV8iii", "nc", "")
-TARGET_BUILTIN(__builtin_amdgcn_image_load_mip_2darray_f32_i32, "fiiiiiV8iii", "nc", "")
-TARGET_BUILTIN(__builtin_amdgcn_image_load_mip_2darray_v4f32_i32, "V4fiiiiiV8iii", "nc", "")
-TARGET_BUILTIN(__builtin_amdgcn_image_load_mip_2darray_v4f16_i32, "V4hiiiiiV8iii", "nc", "")
-TARGET_BUILTIN(__builtin_amdgcn_image_load_mip_3d_v4f32_i32, "V4fiiiiiV8iii", "nc", "")
-TARGET_BUILTIN(__builtin_amdgcn_image_load_mip_3d_v4f16_i32, "V4hiiiiiV8iii", "nc", "")
-TARGET_BUILTIN(__builtin_amdgcn_image_load_mip_cube_v4f32_i32, "V4fiiiiiV8iii", "nc", "")
-TARGET_BUILTIN(__builtin_amdgcn_image_load_mip_cube_v4f16_i32, "V4hiiiiiV8iii", "nc", "")
-
-TARGET_BUILTIN(__builtin_amdgcn_image_store_1d_v4f32_i32, "vV4fiiV8iii", "nc", "")
-TARGET_BUILTIN(__builtin_amdgcn_image_store_1d_v4f16_i32, "vV4hiiV8iii", "nc", "")
-TARGET_BUILTIN(__builtin_amdgcn_image_store_1darray_v4f32_i32, "vV4fiiiV8iii", "nc", "")
-TARGET_BUILTIN(__builtin_amdgcn_image_store_1darray_v4f16_i32, "vV4hiiiV8iii", "nc", "")
-TARGET_BUILTIN(__builtin_amdgcn_image_store_2d_f32_i32, "vfiiiV8iii", "nc", "")
-TARGET_BUILTIN(__builtin_amdgcn_image_store_2d_v4f32_i32, "vV4fiiiV8iii", "nc", "")
-TARGET_BUILTIN(__builtin_amdgcn_image_store_2d_v4f16_i32, "vV4hiiiV8iii", "nc", "")
-TARGET_BUILTIN(__builtin_amdgcn_image_store_2darray_f32_i32, "vfiiiiV8iii", "nc", "")
-TARGET_BUILTIN(__builtin_amdgcn_image_store_2darray_v4f32_i32, "vV4fiiiiV8iii", "nc", "")
-TARGET_BUILTIN(__builtin_amdgcn_image_store_2darray_v4f16_i32, "vV4hiiiiV8iii", "nc", "")
-TARGET_BUILTIN(__builtin_amdgcn_image_store_3d_v4f32_i32, "vV4fiiiiV8iii", "nc", "")
-TARGET_BUILTIN(__builtin_amdgcn_image_store_3d_v4f16_i32, "vV4hiiiiV8iii", "nc", "")
-TARGET_BUILTIN(__builtin_amdgcn_image_store_cube_v4f32_i32, "vV4fiiiiV8iii", "nc", "")
-TARGET_BUILTIN(__builtin_amdgcn_image_store_cube_v4f16_i32, "vV4hiiiiV8iii", "nc", "")
-TARGET_BUILTIN(__builtin_amdgcn_image_store_mip_1d_v4f32_i32, "vV4fiiiV8iii", "nc", "")
-TARGET_BUILTIN(__builtin_amdgcn_image_store_mip_1d_v4f16_i32, "vV4hiiiV8iii", "nc", "")
-TARGET_BUILTIN(__builtin_amdgcn_image_store_mip_1darray_v4f32_i32, "vV4fiiiiV8iii", "nc", "")
-TARGET_BUILTIN(__builtin_amdgcn_image_store_mip_1darray_v4f16_i32, "vV4hiiiiV8iii", "nc", "")
-TARGET_BUILTIN(__builtin_amdgcn_image_store_mip_2d_f32_i32, "vfiiiiV8iii", "nc", "")
-TARGET_BUILTIN(__builtin_amdgcn_image_store_mip_2d_v4f32_i32, "vV4fiiiiV8iii", "nc", "")
-TARGET_BUILTIN(__builtin_amdgcn_image_store_mip_2d_v4f16_i32, "vV4hiiiiV8iii", "nc", "")
-TARGET_BUILTIN(__builtin_amdgcn_image_store_mip_2darray_f32_i32, "vfiiiiiV8iii", "nc", "")
-TARGET_BUILTIN(__builtin_amdgcn_image_store_mip_2darray_v4f32_i32, "vV4fiiiiiV8iii", "nc", "")
-TARGET_BUILTIN(__builtin_amdgcn_image_store_mip_2darray_v4f16_i32, "vV4hiiiiiV8iii", "nc", "")
-TARGET_BUILTIN(__builtin_amdgcn_image_store_mip_3d_v4f32_i32, "vV4fiiiiiV8iii", "nc", "")
-TARGET_BUILTIN(__builtin_amdgcn_image_store_mip_3d_v4f16_i32, "vV4hiiiiiV8iii", "nc", "")
-TARGET_BUILTIN(__builtin_amdgcn_image_store_mip_cube_v4f32_i32, "vV4fiiiiiV8iii", "nc", "")
-TARGET_BUILTIN(__builtin_amdgcn_image_store_mip_cube_v4f16_i32, "vV4hiiiiiV8iii", "nc", "")
+TARGET_BUILTIN(__builtin_amdgcn_image_load_1d_v4f32_i32, "V4fiiV8iii", "nc", "image-insts")
+TARGET_BUILTIN(__builtin_amdgcn_image_load_1d_v4f16_i32, "V4hiiV8iii", "nc", "image-insts")
+TARGET_BUILTIN(__builtin_amdgcn_image_load_1darray_v4f32_i32, "V4fiiiV8iii", "nc", "image-insts")
+TARGET_BUILTIN(__builtin_amdgcn_image_load_1darray_v4f16_i32, "V4hiiiV8iii", "nc", "image-insts")
+TARGET_BUILTIN(__builtin_amdgcn_image_load_2d_f32_i32, "fiiiV8iii", "nc", "image-insts")
+TARGET_BUILTIN(__builtin_amdgcn_image_load_2d_v4f32_i32, "V4fiiiV8iii", "nc", "image-insts")
+TARGET_BUILTIN(__builtin_amdgcn_image_load_2d_v4f16_i32, "V4hiiiV8iii", "nc", "image-insts")
+TARGET_BUILTIN(__builtin_amdgcn_image_load_2darray_f32_i32, "fiiiiV8iii", "nc", "image-insts")
+TARGET_BUILTIN(__builtin_amdgcn_image_load_2darray_v4f32_i32, "V4fiiiiV8iii", "nc", "image-insts")
+TARGET_BUILTIN(__builtin_amdgcn_image_load_2darray_v4f16_i32, "V4hiiiiV8iii", "nc", "image-insts")
+TARGET_BUILTIN(__builtin_amdgcn_image_load_3d_v4f32_i32, "V4fiiiiV8iii", "nc", "image-insts")
+TARGET_BUILTIN(__builtin_amdgcn_image_load_3d_v4f16_i32, "V4hiiiiV8iii", "nc", "image-insts")
+TARGET_BUILTIN(__builtin_amdgcn_image_load_cube_v4f32_i32, "V4fiiiiV8iii", "nc", "image-insts")
+TARGET_BUILTIN(__builtin_amdgcn_image_load_cube_v4f16_i32, "V4hiiiiV8iii", "nc", "image-insts")
+TARGET_BUILTIN(__builtin_amdgcn_image_load_mip_1d_v4f32_i32, "V4fiiiV8iii", "nc", "image-insts")
+TARGET_BUILTIN(__builtin_amdgcn_image_load_mip_1d_v4f16_i32, "V4hiiiV8iii", "nc", "image-insts")
+TARGET_BUILTIN(__builtin_amdgcn_image_load_mip_1darray_v4f32_i32, "V4fiiiiV8iii", "nc", "image-insts")
+TARGET_BUILTIN(__builtin_amdgcn_image_load_mip_1darray_v4f16_i32, "V4hiiiiV8iii", "nc", "image-insts")
+TARGET_BUILTIN(__builtin_amdgcn_image_load_mip_2d_f32_i32, "fiiiiV8iii", "nc", "image-insts")
+TARGET_BUILTIN(__builtin_amdgcn_image_load_mip_2d_v4f32_i32, "V4fiiiiV8iii", "nc", "image-insts")
+TARGET_BUILTIN(__builtin_amdgcn_image_load_mip_2d_v4f16_i32, "V4hiiiiV8iii", "nc", "image-insts")
+TARGET_BUILTIN(__builtin_amdgcn_image_load_mip_2darray_f32_i32, "fiiiiiV8iii", "nc", "image-insts")
+TARGET_BUILTIN(__builtin_amdgcn_image_load_mip_2darray_v4f32_i32, "V4fiiiiiV8iii", "nc", "image-insts")
+TARGET_BUILTIN(__builtin_amdgcn_image_load_mip_2darray_v4f16_i32, "V4hiiiiiV8iii", "nc", "image-insts")
+TARGET_BUILTIN(__builtin_amdgcn_image_load_mip_3d_v4f32_i32, "V4fiiiiiV8iii", "nc", "image-insts")
+TARGET_BUILTIN(__builtin_amdgcn_image_load_mip_3d_v4f16_i32, "V4hiiiiiV8iii", "nc", "image-insts")
+TARGET_BUILTIN(__builtin_amdgcn_image_load_mip_cube_v4f32_i32, "V4fiiiiiV8iii", "nc", "image-insts")
+TARGET_BUILTIN(__builtin_amdgcn_image_load_mip_cube_v4f16_i32, "V4hiiiiiV8iii", "nc", "image-insts")
+TARGET_BUILTIN(__builtin_amdgcn_image_store_1d_v4f32_i32, "vV4fiiV8iii", "nc", "image-insts")
+TARGET_BUILTIN(__builtin_amdgcn_image_store_1d_v4f16_i32, "vV4hiiV8iii", "nc", "image-insts")
+TARGET_BUILTIN(__builtin_amdgcn_image_store_1darray_v4f32_i32, "vV4fiiiV8iii", "nc", "image-insts")
+TARGET_BUILTIN(__builtin_amdgcn_image_store_1darray_v4f16_i32, "vV4hiiiV8iii", "nc", "image-insts")
+TARGET_BUILTIN(__builtin_amdgcn_image_store_2d_f32_i32, "vfiiiV8iii", "nc", "image-insts")
+TARGET_BUILTIN(__builtin_amdgcn_image_store_2d_v4f32_i32, "vV4fiiiV8iii", "nc", "image-insts")
+TARGET_BUILTIN(__builtin_amdgcn_image_store_2d_v4f16_i32, "vV4hiiiV8iii", "nc", "image-insts")
+TARGET_BUILTIN(__builtin_amdgcn_image_store_2darray_f32_i32, "vfiiiiV8iii", "nc", "image-insts")
+TARGET_BUILTIN(__builtin_amdgcn_image_store_2darray_v4f32_i32, "vV4fiiiiV8iii", "nc", "image-insts")
+TARGET_BUILTIN(__builtin_amdgcn_image_store_2darray_v4f16_i32, "vV4hiiiiV8iii", "nc", "image-insts")
+TARGET_BUILTIN(__builtin_amdgcn_image_store_3d_v4f32_i32, "vV4fiiiiV8iii", "nc", "image-insts")
+TARGET_BUILTIN(__builtin_amdgcn_image_store_3d_v4f16_i32, "vV4hiiiiV8iii", "nc", "image-insts")
+TARGET_BUILTIN(__builtin_amdgcn_image_store_cube_v4f32_i32, "vV4fiiiiV8iii", "nc", "image-insts")
+TARGET_BUILTIN(__builtin_amdgcn_image_store_cube_v4f16_i32, "vV4hiiiiV8iii", "nc", "image-insts")
+TARGET_BUILTIN(__builtin_amdgcn_image_store_mip_1d_v4f32_i32, "vV4fiiiV8iii", "nc", "image-insts")
+TARGET_BUILTIN(__builtin_amdgcn_image_store_mip_1d_v4f16_i32, "vV4hiiiV8iii", "nc", "image-insts")
+TARGET_BUILTIN(__builtin_amdgcn_image_store_mip_1darray_v4f32_i32, "vV4fiiiiV8iii", "nc", "image-insts")
+TARGET_BUILTIN(__builtin_amdgcn_image_store_mip_1darray_v4f16_i32, "vV4hiiiiV8iii", "nc", "image-insts")
+TARGET_BUILTIN(__builtin_amdgcn_image_store_mip_2d_f32_i32, "vfiiiiV8iii", "nc", "image-insts")
+TARGET_BUILTIN(__builtin_amdgcn_image_store_mip_2d_v4f32_i32, "vV4fiiiiV8iii", "nc", "image-insts")
+TARGET_BUILTIN(__builtin_amdgcn_image_store_mip_2d_v4f16_i32, "vV4hiiiiV8iii", "nc", "image-insts")
+TARGET_BUILTIN(__builtin_amdgcn_image_store_mip_2darray_f32_i32, "vfiiiiiV8iii", "nc", "image-insts")
+TARGET_BUILTIN(__builtin_amdgcn_image_store_mip_2darray_v4f32_i32, "vV4fiiiiiV8iii", "nc", "image-insts")
+TARGET_BUILTIN(__builtin_amdgcn_image_store_mip_2darray_v4f16_i32, "vV4hiiiiiV8iii", "nc", "image-insts")
+TARGET_BUILTIN(__builtin_amdgcn_image_store_mip_3d_v4f32_i32, "vV4fiiiiiV8iii", "nc", "image-insts")
+TARGET_BUILTIN(__builtin_amdgcn_image_store_mip_3d_v4f16_i32, "vV4hiiiiiV8iii", "nc", "image-insts")
+TARGET_BUILTIN(__builtin_amdgcn_image_store_mip_cube_v4f32_i32, "vV4fiiiiiV8iii", "nc", "image-insts")
+TARGET_BUILTIN(__builtin_amdgcn_image_store_mip_cube_v4f16_i32, "vV4hiiiiiV8iii", "nc", "image-insts")
#undef BUILTIN
#undef TARGET_BUILTIN
diff --git a/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp b/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp
index 97a1078d7bb77..ef635379b9900 100644
--- a/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp
+++ b/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp
@@ -10,9 +10,12 @@
//
//===----------------------------------------------------------------------===//
+#include "CodeGenFunction.h"
#include "CGBuiltin.h"
#include "clang/Basic/TargetBuiltins.h"
+#include "clang/Frontend/FrontendDiagnostic.h"
#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/IR/IntrinsicsAMDGPU.h"
#include "llvm/IR/IntrinsicsR600.h"
#include "llvm/IR/MemoryModelRelaxationAnnotations.h"
@@ -160,6 +163,27 @@ static Value *EmitAMDGCNBallotForExec(CodeGenFunction &CGF, const CallExpr *E,
return Call;
}
+template <unsigned N>
+llvm::CallInst *EmitAMDGCNImageOverloadedReturnType(clang::CodeGen::CodeGenFunction &CGF,
+ const clang::CallExpr *E,
+ unsigned IntrinsicID,
+ bool IsImageStore) {
+ static_assert(N, "expect non-empty argument");
+
+ assert(E->getNumArgs() == N &&
+ "Argument count mismatch with builtin definition");
+
+ clang::SmallVector<llvm::Value *, N> Args;
+ for (unsigned I = 0; I < N; ++I)
+ Args.push_back(CGF.EmitScalarExpr(E->getArg(I)));
+
+ llvm::Type *RetTy = CGF.ConvertType(E->getType());
+ if (IsImageStore)
+ RetTy = CGF.VoidTy;
+ llvm::CallInst *Call = CGF.Builder.CreateIntrinsic(RetTy, IntrinsicID, Args);
+ return Call;
+}
+
// Emit an intrinsic that has 1 float or double operand, and 1 integer.
static Value *emitFPIntBuiltin(CodeGenFunction &CGF,
const CallExpr *E,
@@ -684,364 +708,109 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID,
return Builder.CreateInsertElement(I0, A, 1);
}
case AMDGPU::BI__builtin_amdgcn_image_load_1d_v4f32_i32:
- case AMDGPU::BI__builtin_amdgcn_image_load_1darray_v4f32_i32:
case AMDGPU::BI__builtin_amdgcn_image_load_1d_v4f16_i32:
+ return EmitAMDGCNImageOverloadedReturnType<5>(
+ *this, E, Intrinsic::amdgcn_image_load_1d, false);
+ case AMDGPU::BI__builtin_amdgcn_image_load_1darray_v4f32_i32:
case AMDGPU::BI__builtin_amdgcn_image_load_1darray_v4f16_i32:
+ return EmitAMDGCNImageOverloadedReturnType<6>(
+ *this, E, Intrinsic::amdgcn_image_load_1darray, false);
case AMDGPU::BI__builtin_amdgcn_image_load_2d_f32_i32:
case AMDGPU::BI__builtin_amdgcn_image_load_2d_v4f32_i32:
case AMDGPU::BI__builtin_amdgcn_image_load_2d_v4f16_i32:
+ return EmitAMDGCNImageOverloadedReturnType<6>(
+ *this, E, Intrinsic::amdgcn_image_load_2d, false);
case AMDGPU::BI__builtin_amdgcn_image_load_2darray_f32_i32:
case AMDGPU::BI__builtin_amdgcn_image_load_2darray_v4f32_i32:
case AMDGPU::BI__builtin_amdgcn_image_load_2darray_v4f16_i32:
+ return EmitAMDGCNImageOverloadedReturnType<7>(
+ *this, E, Intrinsic::amdgcn_image_load_2darray, false);
case AMDGPU::BI__builtin_amdgcn_image_load_3d_v4f32_i32:
case AMDGPU::BI__builtin_amdgcn_image_load_3d_v4f16_i32:
+ return EmitAMDGCNImageOverloadedReturnType<7>(
+ *this, E, Intrinsic::amdgcn_image_load_3d, false);
case AMDGPU::BI__builtin_amdgcn_image_load_cube_v4f32_i32:
case AMDGPU::BI__builtin_amdgcn_image_load_cube_v4f16_i32:
+ return EmitAMDGCNImageOverloadedReturnType<7>(
+ *this, E, Intrinsic::amdgcn_image_load_cube, false);
case AMDGPU::BI__builtin_amdgcn_image_load_mip_1d_v4f32_i32:
case AMDGPU::BI__builtin_amdgcn_image_load_mip_1d_v4f16_i32:
+ return EmitAMDGCNImageOverloadedReturnType<6>(
+ *this, E, Intrinsic::amdgcn_image_load_mip_1d, false);
case AMDGPU::BI__builtin_amdgcn_image_load_mip_1darray_v4f32_i32:
case AMDGPU::BI__builtin_amdgcn_image_load_mip_1darray_v4f16_i32:
+ return EmitAMDGCNImageOverloadedReturnType<7>(
+ *this, E, Intrinsic::amdgcn_image_load_mip_1darray, false);
case AMDGPU::BI__builtin_amdgcn_image_load_mip_2d_f32_i32:
case AMDGPU::BI__builtin_amdgcn_image_load_mip_2d_v4f32_i32:
case AMDGPU::BI__builtin_amdgcn_image_load_mip_2d_v4f16_i32:
+ return EmitAMDGCNImageOverloadedReturnType<7>(
+ *this, E, Intrinsic::amdgcn_image_load_mip_2d, false);
case AMDGPU::BI__builtin_amdgcn_image_load_mip_2darray_f32_i32:
case AMDGPU::BI__builtin_amdgcn_image_load_mip_2darray_v4f32_i32:
case AMDGPU::BI__builtin_amdgcn_image_load_mip_2darray_v4f16_i32:
+ return EmitAMDGCNImageOverloadedReturnType<8>(
+ *this, E, Intrinsic::amdgcn_image_load_mip_2darray, false);
case AMDGPU::BI__builtin_amdgcn_image_load_mip_3d_v4f32_i32:
case AMDGPU::BI__builtin_amdgcn_image_load_mip_3d_v4f16_i32:
+ return EmitAMDGCNImageOverloadedReturnType<8>(
+ *this, E, Intrinsic::amdgcn_image_load_mip_3d, false);
case AMDGPU::BI__builtin_amdgcn_image_load_mip_cube_v4f32_i32:
- case AMDGPU::BI__builtin_amdgcn_image_load_mip_cube_v4f16_i32: {
- llvm::Type *RetTy = nullptr;
- switch (BuiltinID) {
- case AMDGPU::BI__builtin_amdgcn_image_load_2d_f32_i32:
- case AMDGPU::BI__builtin_amdgcn_image_load_2darray_f32_i32:
- case AMDGPU::BI__builtin_amdgcn_image_load_mip_2d_f32_i32:
- case AMDGPU::BI__builtin_amdgcn_image_load_mip_2darray_f32_i32:
- RetTy = llvm::Type::getFloatTy(Builder.getContext());
- break;
- case AMDGPU::BI__builtin_amdgcn_image_load_1d_v4f32_i32:
- case AMDGPU::BI__builtin_amdgcn_image_load_1darray_v4f32_i32:
- case AMDGPU::BI__builtin_amdgcn_image_load_2d_v4f32_i32:
- case AMDGPU::BI__builtin_amdgcn_image_load_2darray_v4f32_i32:
- case AMDGPU::BI__builtin_amdgcn_image_load_3d_v4f32_i32:
- case AMDGPU::BI__builtin_amdgcn_image_load_cube_v4f32_i32:
- case AMDGPU::BI__builtin_amdgcn_image_load_mip_1d_v4f32_i32:
- case AMDGPU::BI__builtin_amdgcn_image_load_mip_1darray_v4f32_i32:
- case AMDGPU::BI__builtin_amdgcn_image_load_mip_2d_v4f32_i32:
- case AMDGPU::BI__builtin_amdgcn_image_load_mip_2darray_v4f32_i32:
- case AMDGPU::BI__builtin_amdgcn_image_load_mip_3d_v4f32_i32:
- case AMDGPU::BI__builtin_amdgcn_image_load_mip_cube_v4f32_i32:
- RetTy =
- FixedVectorType::get(llvm::Type::getFloatTy(Builder.getContext()), 4);
- break;
- case AMDGPU::BI__builtin_amdgcn_image_load_1d_v4f16_i32:
- case AMDGPU::BI__builtin_amdgcn_image_load_1darray_v4f16_i32:
- case AMDGPU::BI__builtin_amdgcn_image_load_2d_v4f16_i32:
- case AMDGPU::BI__builtin_amdgcn_image_load_2darray_v4f16_i32:
- case AMDGPU::BI__builtin_amdgcn_image_load_3d_v4f16_i32:
- case AMDGPU::BI__builtin_amdgcn_image_load_cube_v4f16_i32:
- case AMDGPU::BI__builtin_amdgcn_image_load_mip_1d_v4f16_i32:
- case AMDGPU::BI__builtin_amdgcn_image_load_mip_1darray_v4f16_i32:
- case AMDGPU::BI__builtin_amdgcn_image_load_mip_2d_v4f16_i32:
- case AMDGPU::BI__builtin_amdgcn_image_load_mip_2darray_v4f16_i32:
- case AMDGPU::BI__builtin_amdgcn_image_load_mip_3d_v4f16_i32:
- case AMDGPU::BI__builtin_amdgcn_image_load_mip_cube_v4f16_i32:
- RetTy =
- FixedVectorType::get(llvm::Type::getHalfTy(Builder.getContext()), 4);
- break;
- }
-
- llvm::Value *Dmask = EmitScalarExpr(E->getArg(0));
- llvm::Value *S = EmitScalarExpr(E->getArg(1));
- llvm::Value *T = EmitScalarExpr(E->getArg(2));
- llvm::Value *Slice = nullptr;
- llvm::Value *Mip = nullptr;
- llvm::Value *Rsrc = nullptr;
- llvm::Value *Tfe = nullptr;
- llvm::Value *Cpol = nullptr;
-
- SmallVector<Value *, 10> ArgTys;
-
- Intrinsic::ID IID;
- llvm::CallInst *Call;
-
- switch (BuiltinID) {
- case AMDGPU::BI__builtin_amdgcn_image_load_1d_v4f32_i32:
- case AMDGPU::BI__builtin_amdgcn_image_load_1d_v4f16_i32: {
- Rsrc = EmitScalarExpr(E->getArg(2));
- Tfe = EmitScalarExpr(E->getArg(3));
- Cpol = EmitScalarExpr(E->getArg(4));
-
- ArgTys = {Dmask, S, Rsrc, Tfe, Cpol};
- IID = Intrinsic::amdgcn_image_load_1d;
- Call = Builder.CreateIntrinsic(RetTy, IID, ArgTys);
- break;
- }
- case AMDGPU::BI__builtin_amdgcn_image_load_1darray_v4f32_i32:
- case AMDGPU::BI__builtin_amdgcn_image_load_1darray_v4f16_i32:
- case AMDGPU::BI__builtin_amdgcn_image_load_mip_1d_v4f32_i32:
- case AMDGPU::BI__builtin_amdgcn_image_load_mip_1d_v4f16_i32: {
- Slice = EmitScalarExpr(E->getArg(2));
- Rsrc = EmitScalarExpr(E->getArg(3));
- Tfe = EmitScalarExpr(E->getArg(4));
- Cpol = EmitScalarExpr(E->getArg(5));
-
- ArgTys = {Dmask, S, Slice, Rsrc, Tfe, Cpol};
- IID = Intrinsic::amdgcn_image_load_1darray;
- if (BuiltinID == AMDGPU::BI__builtin_amdgcn_image_load_mip_1d_v4f32_i32 ||
- BuiltinID == AMDGPU::BI__builtin_amdgcn_image_load_mip_1d_v4f16_i32) {
- IID = Intrinsic::amdgcn_image_load_mip_1d;
- }
- Call = Builder.CreateIntrinsic(RetTy, IID, ArgTys);
- break;
- }
- case AMDGPU::BI__builtin_amdgcn_image_load_2d_f32_i32:
- case AMDGPU::BI__builtin_amdgcn_image_load_2d_v4f32_i32:
- case AMDGPU::BI__builtin_amdgcn_image_load_2d_v4f16_i32: {
- Rsrc = EmitScalarExpr(E->getArg(3));
- Tfe = EmitScalarExpr(E->getArg(4));
- Cpol = EmitScalarExpr(E->getArg(5));
-
- ArgTys = {Dmask, S, T, Rsrc, Tfe, Cpol};
- IID = Intrinsic::amdgcn_image_load_2d;
- Call = Builder.CreateIntrinsic(RetTy, IID, ArgTys);
- break;
- }
- case AMDGPU::BI__builtin_amdgcn_image_load_2darray_f32_i32:
- case AMDGPU::BI__builtin_amdgcn_image_load_2darray_v4f32_i32:
- case AMDGPU::BI__builtin_amdgcn_image_load_2darray_v4f16_i32:
- case AMDGPU::BI__builtin_amdgcn_image_load_3d_v4f32_i32:
- case AMDGPU::BI__builtin_amdgcn_image_load_3d_v4f16_i32:
- case AMDGPU::BI__builtin_amdgcn_image_load_cube_v4f32_i32:
- case AMDGPU::BI__builtin_amdgcn_image_load_cube_v4f16_i32:
- case AMDGPU::BI__builtin_amdgcn_image_load_mip_1darray_v4f32_i32:
- case AMDGPU::BI__builtin_amdgcn_image_load_mip_1darray_v4f16_i32:
- case AMDGPU::BI__builtin_amdgcn_image_load_mip_2d_f32_i32:
- case AMDGPU::BI__builtin_amdgcn_image_load_mip_2d_v4f32_i32:
- case AMDGPU::BI__builtin_amdgcn_image_load_mip_2d_v4f16_i32: {
- Slice = EmitScalarExpr(E->getArg(3));
- Rsrc = EmitScalarExpr(E->getArg(4));
- Tfe = EmitScalarExpr(E->getArg(5));
- Cpol = EmitScalarExpr(E->getArg(6));
-
- ArgTys = {Dmask, S, T, Slice, Rsrc, Tfe, Cpol};
- IID = Intrinsic::amdgcn_image_load_2darray;
-
- switch (BuiltinID) {
- case AMDGPU::BI__builtin_amdgcn_image_load_3d_v4f32_i32:
- case AMDGPU::BI__builtin_amdgcn_image_load_3d_v4f16_i32:
- IID = Intrinsic::amdgcn_image_load_3d;
- break;
- case AMDGPU::BI__builtin_amdgcn_image_load_cube_v4f32_i32:
- case AMDGPU::BI__builtin_amdgcn_image_load_cube_v4f16_i32:
- IID = Intrinsic::amdgcn_image_load_cube;
- break;
- case AMDGPU::BI__builtin_amdgcn_image_load_mip_1darray_v4f32_i32:
- case AMDGPU::BI__builtin_amdgcn_image_load_mip_1darray_v4f16_i32:
- IID = Intrinsic::amdgcn_image_load_mip_1darray;
- break;
- case AMDGPU::BI__builtin_amdgcn_image_load_mip_2d_f32_i32:
- case AMDGPU::BI__builtin_amdgcn_image_load_mip_2d_v4f32_i32:
- case AMDGPU::BI__builtin_amdgcn_image_load_mip_2d_v4f16_i32:
- IID = Intrinsic::amdgcn_image_load_mip_2d;
- break;
- }
- Call = Builder.CreateIntrinsic(RetTy, IID, ArgTys);
- break;
- }
- case AMDGPU::BI__builtin_amdgcn_image_load_mip_2darray_f32_i32:
- case AMDGPU::BI__builtin_amdgcn_image_load_mip_2darray_v4f32_i32:
- case AMDGPU::BI__builtin_amdgcn_image_load_mip_2darray_v4f16_i32:
- case AMDGPU::BI__builtin_amdgcn_image_load_mip_3d_v4f32_i32:
- case AMDGPU::BI__builtin_amdgcn_image_load_mip_3d_v4f16_i32:
- case AMDGPU::BI__builtin_amdgcn_image_load_mip_cube_v4f32_i32:
- case AMDGPU::BI__builtin_amdgcn_image_load_mip_cube_v4f16_i32: {
- Slice = EmitScalarExpr(E->getArg(3));
- Mip = EmitScalarExpr(E->getArg(4));
- Rsrc = EmitScalarExpr(E->getArg(5));
- Tfe = EmitScalarExpr(E->getArg(6));
- Cpol = EmitScalarExpr(E->getArg(7));
-
- ArgTys = {Dmask, S, T, Slice, Mip, Rsrc, Tfe, Cpol};
- IID = Intrinsic::amdgcn_image_load_mip_2darray;
- switch (BuiltinID) {
- case AMDGPU::BI__builtin_amdgcn_image_load_mip_3d_v4f32_i32:
- case AMDGPU::BI__builtin_amdgcn_image_load_mip_3d_v4f16_i32:
- IID = Intrinsic::amdgcn_image_load_mip_3d;
- break;
- case AMDGPU::BI__builtin_amdgcn_image_load_mip_cube_v4f32_i32:
- case AMDGPU::BI__builtin_amdgcn_image_load_mip_cube_v4f16_i32:
- IID = Intrinsic::amdgcn_image_load_mip_cube;
- break;
- }
- Call = Builder.CreateIntrinsic(RetTy, IID, ArgTys);
- break;
- }
- }
-
- return Call;
- }
+ case AMDGPU::BI__builtin_amdgcn_image_load_mip_cube_v4f16_i32:
+ return EmitAMDGCNImageOverloadedReturnType<8>(
+ *this, E, Intrinsic::amdgcn_image_load_mip_cube, false);
case AMDGPU::BI__builtin_amdgcn_image_store_1d_v4f32_i32:
- case AMDGPU::BI__builtin_amdgcn_image_store_1darray_v4f32_i32:
case AMDGPU::BI__builtin_amdgcn_image_store_1d_v4f16_i32:
+ return EmitAMDGCNImageOverloadedReturnType<6>(
+ *this, E, Intrinsic::amdgcn_image_store_1d, true);
+ case AMDGPU::BI__builtin_amdgcn_image_store_1darray_v4f32_i32:
case AMDGPU::BI__builtin_amdgcn_image_store_1darray_v4f16_i32:
+ return EmitAMDGCNImageOverloadedReturnType<7>(
+ *this, E, Intrinsic::amdgcn_image_store_1darray, true);
case AMDGPU::BI__builtin_amdgcn_image_store_2d_f32_i32:
case AMDGPU::BI__builtin_amdgcn_image_store_2d_v4f32_i32:
case AMDGPU::BI__builtin_amdgcn_image_store_2d_v4f16_i32:
+ return EmitAMDGCNImageOverloadedReturnType<7>(
+ *this, E, Intrinsic::amdgcn_image_store_2d, true);
case AMDGPU::BI__builtin_amdgcn_image_store_2darray_f32_i32:
case AMDGPU::BI__builtin_amdgcn_image_store_2darray_v4f32_i32:
case AMDGPU::BI__builtin_amdgcn_image_store_2darray_v4f16_i32:
+ return EmitAMDGCNImageOverloadedReturnType<8>(
+ *this, E, Intrinsic::amdgcn_image_store_2darray, true);
case AMDGPU::BI__builtin_amdgcn_image_store_3d_v4f32_i32:
case AMDGPU::BI__builtin_amdgcn_image_store_3d_v4f16_i32:
+ return EmitAMDGCNImageOverloadedReturnType<8>(
+ *this, E, Intrinsic::amdgcn_image_store_3d, true);
case AMDGPU::BI__builtin_amdgcn_image_store_cube_v4f32_i32:
case AMDGPU::BI__builtin_amdgcn_image_store_cube_v4f16_i32:
+ return EmitAMDGCNImageOverloadedReturnType<8>(
+ *this, E, Intrinsic::amdgcn_image_store_cube, true);
case AMDGPU::BI__builtin_amdgcn_image_store_mip_1d_v4f32_i32:
case AMDGPU::BI__builtin_amdgcn_image_store_mip_1d_v4f16_i32:
+ return EmitAMDGCNImageOverloadedReturnType<7>(
+ *this, E, Intrinsic::amdgcn_image_store_mip_1d, true);
case AMDGPU::BI__builtin_amdgcn_image_store_mip_1darray_v4f32_i32:
case AMDGPU::BI__builtin_amdgcn_image_store_mip_1darray_v4f16_i32:
+ return EmitAMDGCNImageOverloadedReturnType<8>(
+ *this, E, Intrinsic::amdgcn_image_store_mip_1darray, true);
case AMDGPU::BI__builtin_amdgcn_image_store_mip_2d_f32_i32:
case AMDGPU::BI__builtin_amdgcn_image_store_mip_2d_v4f32_i32:
case AMDGPU::BI__builtin_amdgcn_image_store_mip_2d_v4f16_i32:
+ return EmitAMDGCNImageOverloadedReturnType<8>(
+ *this, E, Intrinsic::amdgcn_image_store_mip_2d, true);
case AMDGPU::BI__builtin_amdgcn_image_store_mip_2darray_f32_i32:
case AMDGPU::BI__builtin_amdgcn_image_store_mip_2darray_v4f32_i32:
case AMDGPU::BI__builtin_amdgcn_image_store_mip_2darray_v4f16_i32:
+ return EmitAMDGCNImageOverloadedReturnType<9>(
+ *this, E, Intrinsic::amdgcn_image_store_mip_2darray, true);
case AMDGPU::BI__builtin_amdgcn_image_store_mip_3d_v4f32_i32:
case AMDGPU::BI__builtin_amdgcn_image_store_mip_3d_v4f16_i32:
+ return EmitAMDGCNImageOverloadedReturnType<9>(
+ *this, E, Intrinsic::amdgcn_image_store_mip_3d, true);
case AMDGPU::BI__builtin_amdgcn_image_store_mip_cube_v4f32_i32:
- case AMDGPU::BI__builtin_amdgcn_image_store_mip_cube_v4f16_i32: {
- llvm::Type *RetTy = VoidTy;
- llvm::Value *Vdata = EmitScalarExpr(E->getArg(0));
- llvm::Value *Dmask = EmitScalarExpr(E->getArg(1));
- llvm::Value *S = EmitScalarExpr(E->getArg(2));
- llvm::Value *T = EmitScalarExpr(E->getArg(3));
- llvm::Value *Slice = nullptr;
- llvm::Value *Mip = nullptr;
- llvm::Value *Rsrc = nullptr;
- llvm::Value *Tfe = nullptr;
- llvm::Value *Cpol = nullptr;
-
- SmallVector<Value *, 10> ArgTys;
-
- Intrinsic::ID IID;
- llvm::CallInst *Call;
-
- switch (BuiltinID) {
- case AMDGPU::BI__builtin_amdgcn_image_store_1d_v4f32_i32:
- case AMDGPU::BI__builtin_amdgcn_image_store_1d_v4f16_i32: {
- Rsrc = EmitScalarExpr(E->getArg(3));
- Tfe = EmitScalarExpr(E->getArg(4));
- Cpol = EmitScalarExpr(E->getArg(5));
-
- ArgTys = {Vdata, Dmask, S, Rsrc, Tfe, Cpol};
- IID = Intrinsic::amdgcn_image_store_1d;
- Call = Builder.CreateIntrinsic(RetTy, IID, ArgTys);
- break;
- }
- case AMDGPU::BI__builtin_amdgcn_image_store_1darray_v4f32_i32:
- case AMDGPU::BI__builtin_amdgcn_image_store_1darray_v4f16_i32:
- case AMDGPU::BI__builtin_amdgcn_image_store_mip_1d_v4f32_i32:
- case AMDGPU::BI__builtin_amdgcn_image_store_mip_1d_v4f16_i32: {
- Slice = EmitScalarExpr(E->getArg(3));
- Rsrc = EmitScalarExpr(E->getArg(4));
- Tfe = EmitScalarExpr(E->getArg(5));
- Cpol = EmitScalarExpr(E->getArg(6));
-
- ArgTys = {Vdata, Dmask, S, Slice, Rsrc, Tfe, Cpol};
- IID = Intrinsic::amdgcn_image_store_1darray;
- if (BuiltinID ==
- AMDGPU::BI__builtin_amdgcn_image_store_mip_1d_v4f32_i32 ||
- BuiltinID ==
- AMDGPU::BI__builtin_amdgcn_image_store_mip_1d_v4f16_i32) {
- IID = Intrinsic::amdgcn_image_store_mip_1d;
- }
- Call = Builder.CreateIntrinsic(RetTy, IID, ArgTys);
- break;
- }
- case AMDGPU::BI__builtin_amdgcn_image_store_2d_f32_i32:
- case AMDGPU::BI__builtin_amdgcn_image_store_2d_v4f32_i32:
- case AMDGPU::BI__builtin_amdgcn_image_store_2d_v4f16_i32: {
- Rsrc = EmitScalarExpr(E->getArg(4));
- Tfe = EmitScalarExpr(E->getArg(5));
- Cpol = EmitScalarExpr(E->getArg(6));
-
- ArgTys = {Vdata, Dmask, S, T, Rsrc, Tfe, Cpol};
- IID = Intrinsic::amdgcn_image_store_2d;
- Call = Builder.CreateIntrinsic(RetTy, IID, ArgTys);
- break;
- }
- case AMDGPU::BI__builtin_amdgcn_image_store_2darray_f32_i32:
- case AMDGPU::BI__builtin_amdgcn_image_store_2darray_v4f32_i32:
- case AMDGPU::BI__builtin_amdgcn_image_store_2darray_v4f16_i32:
- case AMDGPU::BI__builtin_amdgcn_image_store_3d_v4f32_i32:
- case AMDGPU::BI__builtin_amdgcn_image_store_3d_v4f16_i32:
- case AMDGPU::BI__builtin_amdgcn_image_store_cube_v4f32_i32:
- case AMDGPU::BI__builtin_amdgcn_image_store_cube_v4f16_i32:
- case AMDGPU::BI__builtin_amdgcn_image_store_mip_1darray_v4f32_i32:
- case AMDGPU::BI__builtin_amdgcn_image_store_mip_1darray_v4f16_i32:
- case AMDGPU::BI__builtin_amdgcn_image_store_mip_2d_f32_i32:
- case AMDGPU::BI__builtin_amdgcn_image_store_mip_2d_v4f32_i32:
- case AMDGPU::BI__builtin_amdgcn_image_store_mip_2d_v4f16_i32: {
- Slice = EmitScalarExpr(E->getArg(4));
- Rsrc = EmitScalarExpr(E->getArg(5));
- Tfe = EmitScalarExpr(E->getArg(6));
- Cpol = EmitScalarExpr(E->getArg(7));
-
- ArgTys = {Vdata, Dmask, S, T, Slice, Rsrc, Tfe, Cpol};
- IID = Intrinsic::amdgcn_image_store_2darray;
-
- switch (BuiltinID) {
- case AMDGPU::BI__builtin_amdgcn_image_store_3d_v4f32_i32:
- case AMDGPU::BI__builtin_amdgcn_image_store_3d_v4f16_i32:
- IID = Intrinsic::amdgcn_image_store_3d;
- break;
- case AMDGPU::BI__builtin_amdgcn_image_store_cube_v4f32_i32:
- case AMDGPU::BI__builtin_amdgcn_image_store_cube_v4f16_i32:
- IID = Intrinsic::amdgcn_image_store_cube;
- break;
- case AMDGPU::BI__builtin_amdgcn_image_store_mip_1darray_v4f32_i32:
- case AMDGPU::BI__builtin_amdgcn_image_store_mip_1darray_v4f16_i32:
- IID = Intrinsic::amdgcn_image_store_mip_1darray;
- break;
- case AMDGPU::BI__builtin_amdgcn_image_store_mip_2d_f32_i32:
- case AMDGPU::BI__builtin_amdgcn_image_store_mip_2d_v4f32_i32:
- case AMDGPU::BI__builtin_amdgcn_image_store_mip_2d_v4f16_i32:
- IID = Intrinsic::amdgcn_image_store_mip_2d;
- break;
- }
- Call = Builder.CreateIntrinsic(RetTy, IID, ArgTys);
- break;
- }
- case AMDGPU::BI__builtin_amdgcn_image_store_mip_2darray_f32_i32:
- case AMDGPU::BI__builtin_amdgcn_image_store_mip_2darray_v4f32_i32:
- case AMDGPU::BI__builtin_amdgcn_image_store_mip_2darray_v4f16_i32:
- case AMDGPU::BI__builtin_amdgcn_image_store_mip_3d_v4f32_i32:
- case AMDGPU::BI__builtin_amdgcn_image_store_mip_3d_v4f16_i32:
- case AMDGPU::BI__builtin_amdgcn_image_store_mip_cube_v4f32_i32:
- case AMDGPU::BI__builtin_amdgcn_image_store_mip_cube_v4f16_i32: {
- Slice = EmitScalarExpr(E->getArg(4));
- Mip = EmitScalarExpr(E->getArg(5));
- Rsrc = EmitScalarExpr(E->getArg(6));
- Tfe = EmitScalarExpr(E->getArg(7));
- Cpol = EmitScalarExpr(E->getArg(8));
-
- ArgTys = {Vdata, Dmask, S, T, Slice, Mip, Rsrc, Tfe, Cpol};
- IID = Intrinsic::amdgcn_image_store_mip_2darray;
- switch (BuiltinID) {
- case AMDGPU::BI__builtin_amdgcn_image_store_mip_3d_v4f32_i32:
- case AMDGPU::BI__builtin_amdgcn_image_store_mip_3d_v4f16_i32:
- IID = Intrinsic::amdgcn_image_store_mip_3d;
- break;
- case AMDGPU::BI__builtin_amdgcn_image_store_mip_cube_v4f32_i32:
- case AMDGPU::BI__builtin_amdgcn_image_store_mip_cube_v4f16_i32:
- IID = Intrinsic::amdgcn_image_store_mip_cube;
- break;
- }
- Call = Builder.CreateIntrinsic(RetTy, IID, ArgTys);
- break;
- }
- }
- return Call;
- }
+ case AMDGPU::BI__builtin_amdgcn_image_store_mip_cube_v4f16_i32:
+ return EmitAMDGCNImageOverloadedReturnType<9>(
+ *this, E, Intrinsic::amdgcn_image_store_mip_cube, true);
case AMDGPU::BI__builtin_amdgcn_mfma_scale_f32_16x16x128_f8f6f4:
case AMDGPU::BI__builtin_amdgcn_mfma_scale_f32_32x32x64_f8f6f4: {
llvm::FixedVectorType *VT = FixedVectorType::get(Builder.getInt32Ty(), 8);
diff --git a/clang/lib/Sema/SemaAMDGPU.cpp b/clang/lib/Sema/SemaAMDGPU.cpp
index 2381f73a870d6..559408ccb300a 100644
--- a/clang/lib/Sema/SemaAMDGPU.cpp
+++ b/clang/lib/Sema/SemaAMDGPU.cpp
@@ -13,6 +13,7 @@
#include "clang/Sema/SemaAMDGPU.h"
#include "clang/Basic/DiagnosticSema.h"
#include "clang/Basic/TargetBuiltins.h"
+#include "clang/Frontend/FrontendDiagnostic.h"
#include "clang/Sema/Ownership.h"
#include "clang/Sema/Sema.h"
#include "llvm/Support/AtomicOrdering.h"
@@ -111,16 +112,20 @@ bool SemaAMDGPU::CheckAMDGCNBuiltinFunctionCall(unsigned BuiltinID,
case AMDGPU::BI__builtin_amdgcn_image_load_mip_3d_v4f16_i32:
case AMDGPU::BI__builtin_amdgcn_image_load_mip_cube_v4f32_i32:
case AMDGPU::BI__builtin_amdgcn_image_load_mip_cube_v4f16_i32: {
+ bool HasImageInsts = Builtin::evaluateRequiredTargetFeatures("image-insts", CallerFeatureMap);
+
+ if(!HasImageInsts){
+ Diag(TheCall->getBeginLoc(), diag::err_builtin_needs_feature)
+ << FD->getDeclName() << "image-insts";
+ return false;
+ }
+
unsigned ArgCount = TheCall->getNumArgs() - 1;
llvm::APSInt Result;
- bool isImmArg =
- (!(SemaRef.BuiltinConstantArg(TheCall, 0, Result)) &&
- !(SemaRef.BuiltinConstantArg(TheCall, ArgCount, Result)) &&
- !(SemaRef.BuiltinConstantArg(TheCall, (ArgCount - 1), Result)))
- ? false
- : true;
-
- return isImmArg;
+
+ return ((SemaRef.BuiltinConstantArg(TheCall, 0, Result)) ||
+ (SemaRef.BuiltinConstantArg(TheCall, ArgCount, Result)) ||
+ (SemaRef.BuiltinConstantArg(TheCall, (ArgCount - 1), Result)));
}
case AMDGPU::BI__builtin_amdgcn_image_store_1d_v4f32_i32:
case AMDGPU::BI__builtin_amdgcn_image_store_1darray_v4f32_i32:
@@ -152,14 +157,10 @@ bool SemaAMDGPU::CheckAMDGCNBuiltinFunctionCall(unsigned BuiltinID,
case AMDGPU::BI__builtin_amdgcn_image_store_mip_cube_v4f16_i32: {
unsigned ArgCount = TheCall->getNumArgs() - 1;
llvm::APSInt Result;
- bool isImmArg =
- (!(SemaRef.BuiltinConstantArg(TheCall, 1, Result)) &&
- !(SemaRef.BuiltinConstantArg(TheCall, ArgCount, Result)) &&
- !(SemaRef.BuiltinConstantArg(TheCall, (ArgCount - 1), Result)))
- ? false
- : true;
-
- return isImmArg;
+
+ return ((SemaRef.BuiltinConstantArg(TheCall, 1, Result)) ||
+ (SemaRef.BuiltinConstantArg(TheCall, ArgCount, Result)) ||
+ (SemaRef.BuiltinConstantArg(TheCall, (ArgCount - 1), Result)));
}
default:
return false;
diff --git a/clang/test/CodeGen/builtins-image-load.c b/clang/test/CodeGen/builtins-image-load.c
index aee97af37aaf0..5b5b3669ad083 100644
--- a/clang/test/CodeGen/builtins-image-load.c
+++ b/clang/test/CodeGen/builtins-image-load.c
@@ -1,5 +1,5 @@
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5
-// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa %s -emit-llvm -o - | FileCheck %s
+// RUN: %clang_cc1 -triple amdgcn-- -target-cpu gfx1100 %s -emit-llvm -o - | FileCheck %s
typedef int int8 __attribute__((ext_vector_type(8)));
typedef float float4 __attribute__((ext_vector_type(4)));
@@ -55,6 +55,7 @@ float4 test_builtin_image_load_2d_1(float4 v4f32, int i32, int8 vec8i32) {
return __builtin_amdgcn_image_load_2d_v4f32_i32(100, i32, i32, vec8i32, 120, 110);
}
+
// CHECK-LABEL: define dso_local <4 x half> @test_builtin_image_load_2d_2(
// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
@@ -105,6 +106,7 @@ float test_builtin_image_load_2darray(float f32, int i32, int8 vec8i32) {
return __builtin_amdgcn_image_load_2darray_f32_i32(100, i32, i32, i32, vec8i32, 120, 110);
}
+
// CHECK-LABEL: define dso_local <4 x float> @test_builtin_image_load_2darray_1(
// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
@@ -130,6 +132,7 @@ float4 test_builtin_image_load_2darray_1(float4 v4f32, int i32, int8 vec8i32) {
return __builtin_amdgcn_image_load_2darray_v4f32_i32(100, i32, i32, i32, vec8i32, 120, 110);
}
+
// CHECK-LABEL: define dso_local <4 x half> @test_builtin_image_load_2darray_2(
// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
@@ -172,14 +175,14 @@ half4 test_builtin_image_load_2darray_2(half4 v4f16, int i32, int8 vec8i32) {
// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32
// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
// CHECK-NEXT: [[TMP1:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32
-// CHECK-NEXT: [[TMP2:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32
-// CHECK-NEXT: [[TMP3:%.*]] = call <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i32.v8i32(i32 100, i32 [[TMP0]], <8 x i32> [[TMP2]], i32 120, i32 110)
-// CHECK-NEXT: ret <4 x float> [[TMP3]]
+// CHECK-NEXT: [[TMP2:%.*]] = call <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i32.v8i32(i32 100, i32 [[TMP0]], <8 x i32> [[TMP1]], i32 120, i32 110)
+// CHECK-NEXT: ret <4 x float> [[TMP2]]
//
float4 test_builtin_image_load_1d_1(float4 v4f32, int i32, int8 vec8i32) {
return __builtin_amdgcn_image_load_1d_v4f32_i32(100, i32, vec8i32, 120, 110);
}
+
// CHECK-LABEL: define dso_local <4 x half> @test_builtin_image_load_1d_2(
// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
@@ -196,9 +199,8 @@ float4 test_builtin_image_load_1d_1(float4 v4f32, int i32, int8 vec8i32) {
// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32
// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
// CHECK-NEXT: [[TMP1:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32
-// CHECK-NEXT: [[TMP2:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32
-// CHECK-NEXT: [[TMP3:%.*]] = call <4 x half> @llvm.amdgcn.image.load.1d.v4f16.i32.v8i32(i32 100, i32 [[TMP0]], <8 x i32> [[TMP2]], i32 120, i32 110)
-// CHECK-NEXT: ret <4 x half> [[TMP3]]
+// CHECK-NEXT: [[TMP2:%.*]] = call <4 x half> @llvm.amdgcn.image.load.1d.v4f16.i32.v8i32(i32 100, i32 [[TMP0]], <8 x i32> [[TMP1]], i32 120, i32 110)
+// CHECK-NEXT: ret <4 x half> [[TMP2]]
//
half4 test_builtin_image_load_1d_2(half4 v4f16, int i32, int8 vec8i32) {
@@ -221,15 +223,15 @@ half4 test_builtin_image_load_1d_2(half4 v4f16, int i32, int8 vec8i32) {
// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32
// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
-// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
-// CHECK-NEXT: [[TMP3:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32
-// CHECK-NEXT: [[TMP4:%.*]] = call <4 x float> @llvm.amdgcn.image.load.1darray.v4f32.i32.v8i32(i32 100, i32 [[TMP0]], i32 [[TMP2]], <8 x i32> [[TMP3]], i32 120, i32 110)
-// CHECK-NEXT: ret <4 x float> [[TMP4]]
+// CHECK-NEXT: [[TMP2:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: [[TMP3:%.*]] = call <4 x float> @llvm.amdgcn.image.load.1darray.v4f32.i32.v8i32(i32 100, i32 [[TMP0]], i32 [[TMP1]], <8 x i32> [[TMP2]], i32 120, i32 110)
+// CHECK-NEXT: ret <4 x float> [[TMP3]]
//
float4 test_builtin_image_load_1darray_1(float4 v4f32, int i32, int8 vec8i32) {
return __builtin_amdgcn_image_load_1darray_v4f32_i32(100, i32, i32, vec8i32, 120, 110);
}
+
// CHECK-LABEL: define dso_local <4 x half> @test_builtin_image_load_1darray_2(
// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
@@ -246,10 +248,9 @@ float4 test_builtin_image_load_1darray_1(float4 v4f32, int i32, int8 vec8i32) {
// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32
// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
-// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
-// CHECK-NEXT: [[TMP3:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32
-// CHECK-NEXT: [[TMP4:%.*]] = call <4 x half> @llvm.amdgcn.image.load.1darray.v4f16.i32.v8i32(i32 100, i32 [[TMP0]], i32 [[TMP2]], <8 x i32> [[TMP3]], i32 120, i32 110)
-// CHECK-NEXT: ret <4 x half> [[TMP4]]
+// CHECK-NEXT: [[TMP2:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: [[TMP3:%.*]] = call <4 x half> @llvm.amdgcn.image.load.1darray.v4f16.i32.v8i32(i32 100, i32 [[TMP0]], i32 [[TMP1]], <8 x i32> [[TMP2]], i32 120, i32 110)
+// CHECK-NEXT: ret <4 x half> [[TMP3]]
//
half4 test_builtin_image_load_1darray_2(half4 v4f16, int i32, int8 vec8i32) {
@@ -281,6 +282,7 @@ float4 test_builtin_image_load_3d_1(float4 v4f32, int i32, int8 vec8i32) {
return __builtin_amdgcn_image_load_3d_v4f32_i32(100, i32, i32, i32, vec8i32, 120, 110);
}
+
// CHECK-LABEL: define dso_local <4 x half> @test_builtin_image_load_3d_2(
// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
@@ -332,6 +334,7 @@ float4 test_builtin_image_load_cube_1(float4 v4f32, int i32, int8 vec8i32) {
return __builtin_amdgcn_image_load_cube_v4f32_i32(100, i32, i32, i32, vec8i32, 120, 110);
}
+
// CHECK-LABEL: define dso_local <4 x half> @test_builtin_image_load_cube_2(
// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
@@ -374,15 +377,15 @@ half4 test_builtin_image_load_cube_2(half4 v4f16, int i32, int8 vec8i32) {
// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32
// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
-// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
-// CHECK-NEXT: [[TMP3:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32
-// CHECK-NEXT: [[TMP4:%.*]] = call <4 x float> @llvm.amdgcn.image.load.mip.1d.v4f32.i32.v8i32(i32 100, i32 [[TMP0]], i32 [[TMP2]], <8 x i32> [[TMP3]], i32 120, i32 110)
-// CHECK-NEXT: ret <4 x float> [[TMP4]]
+// CHECK-NEXT: [[TMP2:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: [[TMP3:%.*]] = call <4 x float> @llvm.amdgcn.image.load.mip.1d.v4f32.i32.v8i32(i32 100, i32 [[TMP0]], i32 [[TMP1]], <8 x i32> [[TMP2]], i32 120, i32 110)
+// CHECK-NEXT: ret <4 x float> [[TMP3]]
//
float4 test_builtin_image_load_mip_1d_1(float4 v4f32, int i32, int8 vec8i32) {
return __builtin_amdgcn_image_load_mip_1d_v4f32_i32(100, i32, i32, vec8i32, 120, 110);
}
+
// CHECK-LABEL: define dso_local <4 x half> @test_builtin_image_load_mip_1d_2(
// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
@@ -399,10 +402,9 @@ float4 test_builtin_image_load_mip_1d_1(float4 v4f32, int i32, int8 vec8i32) {
// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32
// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
-// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
-// CHECK-NEXT: [[TMP3:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32
-// CHECK-NEXT: [[TMP4:%.*]] = call <4 x half> @llvm.amdgcn.image.load.mip.1d.v4f16.i32.v8i32(i32 100, i32 [[TMP0]], i32 [[TMP2]], <8 x i32> [[TMP3]], i32 120, i32 110)
-// CHECK-NEXT: ret <4 x half> [[TMP4]]
+// CHECK-NEXT: [[TMP2:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: [[TMP3:%.*]] = call <4 x half> @llvm.amdgcn.image.load.mip.1d.v4f16.i32.v8i32(i32 100, i32 [[TMP0]], i32 [[TMP1]], <8 x i32> [[TMP2]], i32 120, i32 110)
+// CHECK-NEXT: ret <4 x half> [[TMP3]]
//
half4 test_builtin_image_load_mip_1d_2(half4 v4f16, int i32, int8 vec8i32) {
@@ -434,6 +436,7 @@ float4 test_builtin_image_load_mip_1darray_1(float4 v4f32, int i32, int8 vec8i32
return __builtin_amdgcn_image_load_mip_1darray_v4f32_i32(100, i32, i32, i32, vec8i32, 120, 110);
}
+
// CHECK-LABEL: define dso_local <4 x half> @test_builtin_image_load_mip_1darray_2(
// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
@@ -485,6 +488,7 @@ float test_builtin_image_load_mip_2d(float f32, int i32, int8 vec8i32) {
return __builtin_amdgcn_image_load_mip_2d_f32_i32(100, i32, i32, i32, vec8i32, 120, 110);
}
+
// CHECK-LABEL: define dso_local <4 x float> @test_builtin_image_load_mip_2d_1(
// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
@@ -510,6 +514,7 @@ float4 test_builtin_image_load_mip_2d_1(float4 v4f32, int i32, int8 vec8i32) {
return __builtin_amdgcn_image_load_mip_2d_v4f32_i32(100, i32, i32, i32, vec8i32, 120, 110);
}
+
// CHECK-LABEL: define dso_local <4 x half> @test_builtin_image_load_mip_2d_2(
// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
@@ -562,6 +567,7 @@ float test_builtin_image_load_mip_2darray(float f32, int i32, int8 vec8i32) {
return __builtin_amdgcn_image_load_mip_2darray_f32_i32(100, i32, i32, i32, i32, vec8i32, 120, 110);
}
+
// CHECK-LABEL: define dso_local <4 x float> @test_builtin_image_load_mip_2darray_1(
// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
@@ -588,6 +594,7 @@ float4 test_builtin_image_load_mip_2darray_1(float4 v4f32, int i32, int8 vec8i32
return __builtin_amdgcn_image_load_mip_2darray_v4f32_i32(100, i32, i32, i32, i32, vec8i32, 120, 110);
}
+
// CHECK-LABEL: define dso_local <4 x half> @test_builtin_image_load_mip_2darray_2(
// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
@@ -641,6 +648,7 @@ float4 test_builtin_image_load_mip_3d_1(float4 v4f32, int i32, int8 vec8i32) {
return __builtin_amdgcn_image_load_mip_3d_v4f32_i32(100, i32, i32, i32, i32, vec8i32, 120, 110);
}
+
// CHECK-LABEL: define dso_local <4 x half> @test_builtin_image_load_mip_3d_2(
// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
@@ -694,6 +702,7 @@ float4 test_builtin_image_load_mip_cube_1(float4 v4f32, int i32, int8 vec8i32) {
return __builtin_amdgcn_image_load_mip_cube_v4f32_i32(100, i32, i32, i32, i32, vec8i32, 120, 110);
}
+
// CHECK-LABEL: define dso_local <4 x half> @test_builtin_image_load_mip_cube_2(
// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
diff --git a/clang/test/CodeGen/builtins-image-store.c b/clang/test/CodeGen/builtins-image-store.c
index 63ab7f3cf518a..cd2b09e074c59 100644
--- a/clang/test/CodeGen/builtins-image-store.c
+++ b/clang/test/CodeGen/builtins-image-store.c
@@ -1,5 +1,5 @@
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5
-// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa %s -emit-llvm -o - | FileCheck %s
+// RUN: %clang_cc1 -triple amdgcn-- -target-cpu gfx1010 %s -emit-llvm -o - | FileCheck %s
typedef int int8 __attribute__((ext_vector_type(8)));
typedef float float4 __attribute__((ext_vector_type(4)));
@@ -27,8 +27,8 @@ typedef half half4 __attribute__((ext_vector_type(4)));
//
void test_builtin_image_store_2d(float f32, int i32, int8 vec8i32) {
- return __builtin_amdgcn_image_store_2d_f32_i32(f32, 12, i32, i32, vec8i32, 106, 103);
-}
+ __builtin_amdgcn_image_store_2d_f32_i32(f32, 12, i32, i32, vec8i32, 106, 103);
+ }
// CHECK-LABEL: define dso_local void @test_builtin_image_store_2d_1(
// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] {
@@ -51,8 +51,9 @@ void test_builtin_image_store_2d(float f32, int i32, int8 vec8i32) {
//
void test_builtin_image_store_2d_1(float4 v4f32, int i32, int8 vec8i32) {
- return __builtin_amdgcn_image_store_2d_v4f32_i32(v4f32, 100, i32, i32, vec8i32, 120, 110);
-}
+ __builtin_amdgcn_image_store_2d_v4f32_i32(v4f32, 100, i32, i32, vec8i32, 120, 110);
+ }
+
// CHECK-LABEL: define dso_local void @test_builtin_image_store_2d_2(
// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
@@ -74,8 +75,8 @@ void test_builtin_image_store_2d_1(float4 v4f32, int i32, int8 vec8i32) {
//
void test_builtin_image_store_2d_2(half4 v4f16, int i32, int8 vec8i32) {
- return __builtin_amdgcn_image_store_2d_v4f16_i32(v4f16, 100, i32, i32, vec8i32, 120, 110);
-}
+ __builtin_amdgcn_image_store_2d_v4f16_i32(v4f16, 100, i32, i32, vec8i32, 120, 110);
+ }
// CHECK-LABEL: define dso_local void @test_builtin_image_store_2darray(
// CHECK-SAME: float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] {
@@ -99,8 +100,9 @@ void test_builtin_image_store_2d_2(half4 v4f16, int i32, int8 vec8i32) {
//
void test_builtin_image_store_2darray(float f32, int i32, int8 vec8i32) {
- return __builtin_amdgcn_image_store_2darray_f32_i32(f32, 100, i32, i32, i32, vec8i32, 120, 110);
-}
+ __builtin_amdgcn_image_store_2darray_f32_i32(f32, 100, i32, i32, i32, vec8i32, 120, 110);
+ }
+
// CHECK-LABEL: define dso_local void @test_builtin_image_store_2darray_1(
// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
@@ -123,8 +125,9 @@ void test_builtin_image_store_2darray(float f32, int i32, int8 vec8i32) {
//
void test_builtin_image_store_2darray_1(float4 v4f32, int i32, int8 vec8i32) {
- return __builtin_amdgcn_image_store_2darray_v4f32_i32(v4f32, 100, i32, i32, i32, vec8i32, 120, 110);
-}
+ __builtin_amdgcn_image_store_2darray_v4f32_i32(v4f32, 100, i32, i32, i32, vec8i32, 120, 110);
+ }
+
// CHECK-LABEL: define dso_local void @test_builtin_image_store_2darray_2(
// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
@@ -147,9 +150,8 @@ void test_builtin_image_store_2darray_1(float4 v4f32, int i32, int8 vec8i32) {
//
void test_builtin_image_store_2darray_2(half4 v4f16, int i32, int8 vec8i32) {
- return __builtin_amdgcn_image_store_2darray_v4f16_i32(v4f16, 100, i32, i32, i32, vec8i32, 120, 110);
-}
-
+ __builtin_amdgcn_image_store_2darray_v4f16_i32(v4f16, 100, i32, i32, i32, vec8i32, 120, 110);
+ }
// CHECK-LABEL: define dso_local void @test_builtin_image_store_1d_1(
// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] {
@@ -166,14 +168,14 @@ void test_builtin_image_store_2darray_2(half4 v4f16, int i32, int8 vec8i32) {
// CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[V4F32_ADDR_ASCAST]], align 16
// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
// CHECK-NEXT: [[TMP2:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32
-// CHECK-NEXT: [[TMP3:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32
-// CHECK-NEXT: call void @llvm.amdgcn.image.store.1d.v4f32.i32.v8i32(<4 x float> [[TMP0]], i32 100, i32 [[TMP1]], <8 x i32> [[TMP3]], i32 120, i32 110)
+// CHECK-NEXT: call void @llvm.amdgcn.image.store.1d.v4f32.i32.v8i32(<4 x float> [[TMP0]], i32 100, i32 [[TMP1]], <8 x i32> [[TMP2]], i32 120, i32 110)
// CHECK-NEXT: ret void
//
void test_builtin_image_store_1d_1(float4 v4f32, int i32, int8 vec8i32) {
- __builtin_amdgcn_image_store_1d_v4f32_i32(v4f32, 100, i32, vec8i32, 120, 110);
-}
+ __builtin_amdgcn_image_store_1d_v4f32_i32(v4f32, 100, i32, vec8i32, 120, 110);
+ }
+
// CHECK-LABEL: define dso_local void @test_builtin_image_store_1d_2(
// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
@@ -189,14 +191,13 @@ void test_builtin_image_store_1d_1(float4 v4f32, int i32, int8 vec8i32) {
// CHECK-NEXT: [[TMP0:%.*]] = load <4 x half>, ptr [[V4F16_ADDR_ASCAST]], align 8
// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
// CHECK-NEXT: [[TMP2:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32
-// CHECK-NEXT: [[TMP3:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32
-// CHECK-NEXT: call void @llvm.amdgcn.image.store.1d.v4f16.i32.v8i32(<4 x half> [[TMP0]], i32 100, i32 [[TMP1]], <8 x i32> [[TMP3]], i32 120, i32 110)
+// CHECK-NEXT: call void @llvm.amdgcn.image.store.1d.v4f16.i32.v8i32(<4 x half> [[TMP0]], i32 100, i32 [[TMP1]], <8 x i32> [[TMP2]], i32 120, i32 110)
// CHECK-NEXT: ret void
//
void test_builtin_image_store_1d_2(half4 v4f16, int i32, int8 vec8i32) {
- return __builtin_amdgcn_image_store_1d_v4f16_i32(v4f16, 100, i32, vec8i32, 120, 110);
-}
+ __builtin_amdgcn_image_store_1d_v4f16_i32(v4f16, 100, i32, vec8i32, 120, 110);
+ }
// CHECK-LABEL: define dso_local void @test_builtin_image_store_1darray_1(
// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] {
@@ -213,15 +214,15 @@ void test_builtin_image_store_1d_2(half4 v4f16, int i32, int8 vec8i32) {
// CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[V4F32_ADDR_ASCAST]], align 16
// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
-// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
-// CHECK-NEXT: [[TMP4:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32
-// CHECK-NEXT: call void @llvm.amdgcn.image.store.1darray.v4f32.i32.v8i32(<4 x float> [[TMP0]], i32 100, i32 [[TMP1]], i32 [[TMP3]], <8 x i32> [[TMP4]], i32 120, i32 110)
+// CHECK-NEXT: [[TMP3:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: call void @llvm.amdgcn.image.store.1darray.v4f32.i32.v8i32(<4 x float> [[TMP0]], i32 100, i32 [[TMP1]], i32 [[TMP2]], <8 x i32> [[TMP3]], i32 120, i32 110)
// CHECK-NEXT: ret void
//
void test_builtin_image_store_1darray_1(float4 v4f32, int i32, int8 vec8i32) {
- return __builtin_amdgcn_image_store_1darray_v4f32_i32(v4f32, 100, i32, i32, vec8i32, 120, 110);
-}
+ __builtin_amdgcn_image_store_1darray_v4f32_i32(v4f32, 100, i32, i32, vec8i32, 120, 110);
+ }
+
// CHECK-LABEL: define dso_local void @test_builtin_image_store_1darray_2(
// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
@@ -237,15 +238,14 @@ void test_builtin_image_store_1darray_1(float4 v4f32, int i32, int8 vec8i32) {
// CHECK-NEXT: [[TMP0:%.*]] = load <4 x half>, ptr [[V4F16_ADDR_ASCAST]], align 8
// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
-// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
-// CHECK-NEXT: [[TMP4:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32
-// CHECK-NEXT: call void @llvm.amdgcn.image.store.1darray.v4f16.i32.v8i32(<4 x half> [[TMP0]], i32 100, i32 [[TMP1]], i32 [[TMP3]], <8 x i32> [[TMP4]], i32 120, i32 110)
+// CHECK-NEXT: [[TMP3:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: call void @llvm.amdgcn.image.store.1darray.v4f16.i32.v8i32(<4 x half> [[TMP0]], i32 100, i32 [[TMP1]], i32 [[TMP2]], <8 x i32> [[TMP3]], i32 120, i32 110)
// CHECK-NEXT: ret void
//
void test_builtin_image_store_1darray_2(half4 v4f16, int i32, int8 vec8i32) {
- return __builtin_amdgcn_image_store_1darray_v4f16_i32(v4f16, 100, i32, i32, vec8i32, 120, 110);
-}
+ __builtin_amdgcn_image_store_1darray_v4f16_i32(v4f16, 100, i32, i32, vec8i32, 120, 110);
+ }
// CHECK-LABEL: define dso_local void @test_builtin_image_store_3d_1(
// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] {
@@ -269,8 +269,9 @@ void test_builtin_image_store_1darray_2(half4 v4f16, int i32, int8 vec8i32) {
//
void test_builtin_image_store_3d_1(float4 v4f32, int i32, int8 vec8i32) {
- return __builtin_amdgcn_image_store_3d_v4f32_i32(v4f32, 100, i32, i32, i32, vec8i32, 120, 110);
-}
+ __builtin_amdgcn_image_store_3d_v4f32_i32(v4f32, 100, i32, i32, i32, vec8i32, 120, 110);
+ }
+
// CHECK-LABEL: define dso_local void @test_builtin_image_store_3d_2(
// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
@@ -293,8 +294,8 @@ void test_builtin_image_store_3d_1(float4 v4f32, int i32, int8 vec8i32) {
//
void test_builtin_image_store_3d_2(half4 v4f16, int i32, int8 vec8i32) {
- return __builtin_amdgcn_image_store_3d_v4f16_i32(v4f16, 100, i32, i32, i32, vec8i32, 120, 110);
-}
+ __builtin_amdgcn_image_store_3d_v4f16_i32(v4f16, 100, i32, i32, i32, vec8i32, 120, 110);
+ }
// CHECK-LABEL: define dso_local void @test_builtin_image_store_cube_1(
// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] {
@@ -318,8 +319,9 @@ void test_builtin_image_store_3d_2(half4 v4f16, int i32, int8 vec8i32) {
//
void test_builtin_image_store_cube_1(float4 v4f32, int i32, int8 vec8i32) {
- return __builtin_amdgcn_image_store_cube_v4f32_i32(v4f32, 100, i32, i32, i32, vec8i32, 120, 110);
-}
+ __builtin_amdgcn_image_store_cube_v4f32_i32(v4f32, 100, i32, i32, i32, vec8i32, 120, 110);
+ }
+
// CHECK-LABEL: define dso_local void @test_builtin_image_store_cube_2(
// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
@@ -342,8 +344,8 @@ void test_builtin_image_store_cube_1(float4 v4f32, int i32, int8 vec8i32) {
//
void test_builtin_image_store_cube_2(half4 v4f16, int i32, int8 vec8i32) {
- return __builtin_amdgcn_image_store_cube_v4f16_i32(v4f16, 100, i32, i32, i32, vec8i32, 120, 110);
-}
+ __builtin_amdgcn_image_store_cube_v4f16_i32(v4f16, 100, i32, i32, i32, vec8i32, 120, 110);
+ }
// CHECK-LABEL: define dso_local void @test_builtin_image_store_mip_1d_1(
// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] {
@@ -360,15 +362,15 @@ void test_builtin_image_store_cube_2(half4 v4f16, int i32, int8 vec8i32) {
// CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[V4F32_ADDR_ASCAST]], align 16
// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
-// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
-// CHECK-NEXT: [[TMP4:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32
-// CHECK-NEXT: call void @llvm.amdgcn.image.store.mip.1d.v4f32.i32.v8i32(<4 x float> [[TMP0]], i32 100, i32 [[TMP1]], i32 [[TMP3]], <8 x i32> [[TMP4]], i32 120, i32 110)
+// CHECK-NEXT: [[TMP3:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: call void @llvm.amdgcn.image.store.mip.1d.v4f32.i32.v8i32(<4 x float> [[TMP0]], i32 100, i32 [[TMP1]], i32 [[TMP2]], <8 x i32> [[TMP3]], i32 120, i32 110)
// CHECK-NEXT: ret void
//
void test_builtin_image_store_mip_1d_1(float4 v4f32, int i32, int8 vec8i32) {
- return __builtin_amdgcn_image_store_mip_1d_v4f32_i32(v4f32, 100, i32, i32, vec8i32, 120, 110);
-}
+ __builtin_amdgcn_image_store_mip_1d_v4f32_i32(v4f32, 100, i32, i32, vec8i32, 120, 110);
+ }
+
// CHECK-LABEL: define dso_local void @test_builtin_image_store_mip_1d_2(
// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
@@ -384,15 +386,14 @@ void test_builtin_image_store_mip_1d_1(float4 v4f32, int i32, int8 vec8i32) {
// CHECK-NEXT: [[TMP0:%.*]] = load <4 x half>, ptr [[V4F16_ADDR_ASCAST]], align 8
// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
-// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
-// CHECK-NEXT: [[TMP4:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32
-// CHECK-NEXT: call void @llvm.amdgcn.image.store.mip.1d.v4f16.i32.v8i32(<4 x half> [[TMP0]], i32 100, i32 [[TMP1]], i32 [[TMP3]], <8 x i32> [[TMP4]], i32 120, i32 110)
+// CHECK-NEXT: [[TMP3:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: call void @llvm.amdgcn.image.store.mip.1d.v4f16.i32.v8i32(<4 x half> [[TMP0]], i32 100, i32 [[TMP1]], i32 [[TMP2]], <8 x i32> [[TMP3]], i32 120, i32 110)
// CHECK-NEXT: ret void
//
void test_builtin_image_store_mip_1d_2(half4 v4f16, int i32, int8 vec8i32) {
- return __builtin_amdgcn_image_store_mip_1d_v4f16_i32(v4f16, 100, i32, i32, vec8i32, 120, 110);
-}
+ __builtin_amdgcn_image_store_mip_1d_v4f16_i32(v4f16, 100, i32, i32, vec8i32, 120, 110);
+ }
// CHECK-LABEL: define dso_local void @test_builtin_image_store_mip_1darray_1(
// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] {
@@ -416,8 +417,9 @@ void test_builtin_image_store_mip_1d_2(half4 v4f16, int i32, int8 vec8i32) {
//
void test_builtin_image_store_mip_1darray_1(float4 v4f32, int i32, int8 vec8i32) {
- return __builtin_amdgcn_image_store_mip_1darray_v4f32_i32(v4f32, 100, i32, i32, i32, vec8i32, 120, 110);
-}
+ __builtin_amdgcn_image_store_mip_1darray_v4f32_i32(v4f32, 100, i32, i32, i32, vec8i32, 120, 110);
+ }
+
// CHECK-LABEL: define dso_local void @test_builtin_image_store_mip_1darray_2(
// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
@@ -440,8 +442,8 @@ void test_builtin_image_store_mip_1darray_1(float4 v4f32, int i32, int8 vec8i32)
//
void test_builtin_image_store_mip_1darray_2(half4 v4f16, int i32, int8 vec8i32) {
- return __builtin_amdgcn_image_store_mip_1darray_v4f16_i32(v4f16, 100, i32, i32, i32, vec8i32, 120, 110);
-}
+ __builtin_amdgcn_image_store_mip_1darray_v4f16_i32(v4f16, 100, i32, i32, i32, vec8i32, 120, 110);
+ }
// CHECK-LABEL: define dso_local void @test_builtin_image_store_mip_2d(
// CHECK-SAME: float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] {
@@ -465,8 +467,9 @@ void test_builtin_image_store_mip_1darray_2(half4 v4f16, int i32, int8 vec8i32)
//
void test_builtin_image_store_mip_2d(float f32, int i32, int8 vec8i32) {
- return __builtin_amdgcn_image_store_mip_2d_f32_i32(f32, 100, i32, i32, i32, vec8i32, 120, 110);
-}
+ __builtin_amdgcn_image_store_mip_2d_f32_i32(f32, 100, i32, i32, i32, vec8i32, 120, 110);
+ }
+
// CHECK-LABEL: define dso_local void @test_builtin_image_store_mip_2d_1(
// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
@@ -489,8 +492,9 @@ void test_builtin_image_store_mip_2d(float f32, int i32, int8 vec8i32) {
//
void test_builtin_image_store_mip_2d_1(float4 v4f32, int i32, int8 vec8i32) {
- return __builtin_amdgcn_image_store_mip_2d_v4f32_i32(v4f32, 100, i32, i32, i32, vec8i32, 120, 110);
-}
+ __builtin_amdgcn_image_store_mip_2d_v4f32_i32(v4f32, 100, i32, i32, i32, vec8i32, 120, 110);
+ }
+
// CHECK-LABEL: define dso_local void @test_builtin_image_store_mip_2d_2(
// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
@@ -513,8 +517,8 @@ void test_builtin_image_store_mip_2d_1(float4 v4f32, int i32, int8 vec8i32) {
//
void test_builtin_image_store_mip_2d_2(half4 v4f16, int i32, int8 vec8i32) {
- return __builtin_amdgcn_image_store_mip_2d_v4f16_i32(v4f16, 100, i32, i32, i32, vec8i32, 120, 110);
-}
+ __builtin_amdgcn_image_store_mip_2d_v4f16_i32(v4f16, 100, i32, i32, i32, vec8i32, 120, 110);
+ }
// CHECK-LABEL: define dso_local void @test_builtin_image_store_mip_2darray(
// CHECK-SAME: float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] {
@@ -539,8 +543,9 @@ void test_builtin_image_store_mip_2d_2(half4 v4f16, int i32, int8 vec8i32) {
//
void test_builtin_image_store_mip_2darray(float f32, int i32, int8 vec8i32) {
- return __builtin_amdgcn_image_store_mip_2darray_f32_i32(f32, 100, i32, i32, i32, i32, vec8i32, 120, 110);
-}
+ __builtin_amdgcn_image_store_mip_2darray_f32_i32(f32, 100, i32, i32, i32, i32, vec8i32, 120, 110);
+ }
+
// CHECK-LABEL: define dso_local void @test_builtin_image_store_mip_2darray_1(
// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
@@ -564,8 +569,9 @@ void test_builtin_image_store_mip_2darray(float f32, int i32, int8 vec8i32) {
//
void test_builtin_image_store_mip_2darray_1(float4 v4f32, int i32, int8 vec8i32) {
- return __builtin_amdgcn_image_store_mip_2darray_v4f32_i32(v4f32, 100, i32, i32, i32, i32, vec8i32, 120, 110);
-}
+ __builtin_amdgcn_image_store_mip_2darray_v4f32_i32(v4f32, 100, i32, i32, i32, i32, vec8i32, 120, 110);
+ }
+
// CHECK-LABEL: define dso_local void @test_builtin_image_store_mip_2darray_2(
// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
@@ -589,8 +595,8 @@ void test_builtin_image_store_mip_2darray_1(float4 v4f32, int i32, int8 vec8i32)
//
void test_builtin_image_store_mip_2darray_2(half4 v4f16, int i32, int8 vec8i32) {
- return __builtin_amdgcn_image_store_mip_2darray_v4f16_i32(v4f16, 100, i32, i32, i32, i32, vec8i32, 120, 110);
-}
+ __builtin_amdgcn_image_store_mip_2darray_v4f16_i32(v4f16, 100, i32, i32, i32, i32, vec8i32, 120, 110);
+ }
// CHECK-LABEL: define dso_local void @test_builtin_image_store_mip_3d_1(
// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] {
@@ -615,8 +621,9 @@ void test_builtin_image_store_mip_2darray_2(half4 v4f16, int i32, int8 vec8i32)
//
void test_builtin_image_store_mip_3d_1(float4 v4f32, int i32, int8 vec8i32) {
- return __builtin_amdgcn_image_store_mip_3d_v4f32_i32(v4f32, 100, i32, i32, i32, i32, vec8i32, 120, 110);
-}
+ __builtin_amdgcn_image_store_mip_3d_v4f32_i32(v4f32, 100, i32, i32, i32, i32, vec8i32, 120, 110);
+ }
+
// CHECK-LABEL: define dso_local void @test_builtin_image_store_mip_3d_2(
// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
@@ -640,8 +647,8 @@ void test_builtin_image_store_mip_3d_1(float4 v4f32, int i32, int8 vec8i32) {
//
void test_builtin_image_store_mip_3d_2(half4 v4f16, int i32, int8 vec8i32) {
- return __builtin_amdgcn_image_store_mip_3d_v4f16_i32(v4f16, 100, i32, i32, i32, i32, vec8i32, 120, 110);
-}
+ __builtin_amdgcn_image_store_mip_3d_v4f16_i32(v4f16, 100, i32, i32, i32, i32, vec8i32, 120, 110);
+ }
// CHECK-LABEL: define dso_local void @test_builtin_image_store_mip_cube_1(
// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] {
@@ -666,8 +673,9 @@ void test_builtin_image_store_mip_3d_2(half4 v4f16, int i32, int8 vec8i32) {
//
void test_builtin_image_store_mip_cube_1(float4 v4f32, int i32, int8 vec8i32) {
- return __builtin_amdgcn_image_store_mip_cube_v4f32_i32(v4f32, 100, i32, i32, i32, i32, vec8i32, 120, 110);
-}
+ __builtin_amdgcn_image_store_mip_cube_v4f32_i32(v4f32, 100, i32, i32, i32, i32, vec8i32, 120, 110);
+ }
+
// CHECK-LABEL: define dso_local void @test_builtin_image_store_mip_cube_2(
// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
@@ -691,5 +699,5 @@ void test_builtin_image_store_mip_cube_1(float4 v4f32, int i32, int8 vec8i32) {
//
void test_builtin_image_store_mip_cube_2(half4 v4f16, int i32, int8 vec8i32) {
- return __builtin_amdgcn_image_store_mip_cube_v4f16_i32(v4f16, 100, i32, i32, i32, i32, vec8i32, 120, 110);
-}
+ __builtin_amdgcn_image_store_mip_cube_v4f16_i32(v4f16, 100, i32, i32, i32, i32, vec8i32, 120, 110);
+ }
diff --git a/clang/test/SemaOpenCL/builtins-image-load-param.cl b/clang/test/SemaOpenCL/builtins-image-load-param.cl
index 7b5aab4011da9..fb61316cb6a77 100644
--- a/clang/test/SemaOpenCL/builtins-image-load-param.cl
+++ b/clang/test/SemaOpenCL/builtins-image-load-param.cl
@@ -1,12 +1,11 @@
-// RUN: %clang_cc1 -triple amdgcn-- -target-cpu gfx90a -S -verify=expected -o - %s
+// RUN: %clang_cc1 -triple amdgcn-- -target-cpu gfx1100 -S -verify=expected -o - %s
+// RUN: %clang_cc1 -triple amdgcn-- -target-cpu tahiti -S -verify=expected -o - %s
// REQUIRES: amdgpu-registered-target
typedef int int8 __attribute__((ext_vector_type(8)));
typedef float float4 __attribute__((ext_vector_type(4)));
-//typedef _Float16 half;
typedef half half4 __attribute__((ext_vector_type(4)));
-
float test_builtin_image_load_2d(float f32, int i32, int8 vec8i32) {
return __builtin_amdgcn_image_load_2d_f32_i32(i32, i32, i32, vec8i32, 106, 103); //expected-error{{argument to '__builtin_amdgcn_image_load_2d_f32_i32' must be a constant integer}}
diff --git a/clang/test/SemaOpenCL/builtins-image-store-param.cl b/clang/test/SemaOpenCL/builtins-image-store-param.cl
index c8fcc59bf2b66..70a7149b25be3 100644
--- a/clang/test/SemaOpenCL/builtins-image-store-param.cl
+++ b/clang/test/SemaOpenCL/builtins-image-store-param.cl
@@ -1,9 +1,9 @@
// RUN: %clang_cc1 -triple amdgcn-- -target-cpu gfx90a -S -verify=expected -o - %s
+// RUN: %clang_cc1 -triple amdgcn-- -target-cpu gfx701 -S -verify=GFX7 -o - %s
// REQUIRES: amdgpu-registered-target
typedef int int8 __attribute__((ext_vector_type(8)));
typedef float float4 __attribute__((ext_vector_type(4)));
-//typedef _Float16 half;
typedef half half4 __attribute__((ext_vector_type(4)));
>From f512daef1d335d44ad84b5e4c471dd61ebd33321 Mon Sep 17 00:00:00 2001
From: ranapratap55 <RanaPratapReddy.Nimmakayala at amd.com>
Date: Wed, 20 Aug 2025 12:36:02 +0530
Subject: [PATCH 5/7] [AMDGPU] Adds target failure test cases and minor
modificatinos
---
clang/lib/Sema/SemaAMDGPU.cpp | 16 +++++++++++++---
.../test/SemaOpenCL/builtins-image-load-param.cl | 14 ++++++++++++++
.../SemaOpenCL/builtins-image-store-param.cl | 15 ++++++++++++++-
3 files changed, 41 insertions(+), 4 deletions(-)
diff --git a/clang/lib/Sema/SemaAMDGPU.cpp b/clang/lib/Sema/SemaAMDGPU.cpp
index 559408ccb300a..e3bf0dc089cb7 100644
--- a/clang/lib/Sema/SemaAMDGPU.cpp
+++ b/clang/lib/Sema/SemaAMDGPU.cpp
@@ -112,11 +112,12 @@ bool SemaAMDGPU::CheckAMDGCNBuiltinFunctionCall(unsigned BuiltinID,
case AMDGPU::BI__builtin_amdgcn_image_load_mip_3d_v4f16_i32:
case AMDGPU::BI__builtin_amdgcn_image_load_mip_cube_v4f32_i32:
case AMDGPU::BI__builtin_amdgcn_image_load_mip_cube_v4f16_i32: {
+ StringRef FeatureList(getASTContext().BuiltinInfo.getRequiredFeatures(BuiltinID));
bool HasImageInsts = Builtin::evaluateRequiredTargetFeatures("image-insts", CallerFeatureMap);
-
- if(!HasImageInsts){
+ if (!Builtin::evaluateRequiredTargetFeatures(
+ FeatureList, CallerFeatureMap) && !HasImageInsts){
Diag(TheCall->getBeginLoc(), diag::err_builtin_needs_feature)
- << FD->getDeclName() << "image-insts";
+ << FD->getDeclName() << FeatureList;
return false;
}
@@ -155,6 +156,15 @@ bool SemaAMDGPU::CheckAMDGCNBuiltinFunctionCall(unsigned BuiltinID,
case AMDGPU::BI__builtin_amdgcn_image_store_mip_3d_v4f16_i32:
case AMDGPU::BI__builtin_amdgcn_image_store_mip_cube_v4f32_i32:
case AMDGPU::BI__builtin_amdgcn_image_store_mip_cube_v4f16_i32: {
+ StringRef FeatureList(getASTContext().BuiltinInfo.getRequiredFeatures(BuiltinID));
+ bool HasImageInsts = Builtin::evaluateRequiredTargetFeatures("image-insts", CallerFeatureMap);
+ if (!Builtin::evaluateRequiredTargetFeatures(
+ FeatureList, CallerFeatureMap) && !HasImageInsts){
+ Diag(TheCall->getBeginLoc(), diag::err_builtin_needs_feature)
+ << FD->getDeclName() << FeatureList;
+ return false;
+ }
+
unsigned ArgCount = TheCall->getNumArgs() - 1;
llvm::APSInt Result;
diff --git a/clang/test/SemaOpenCL/builtins-image-load-param.cl b/clang/test/SemaOpenCL/builtins-image-load-param.cl
index fb61316cb6a77..ebdc8106076f5 100644
--- a/clang/test/SemaOpenCL/builtins-image-load-param.cl
+++ b/clang/test/SemaOpenCL/builtins-image-load-param.cl
@@ -1,5 +1,6 @@
// RUN: %clang_cc1 -triple amdgcn-- -target-cpu gfx1100 -S -verify=expected -o - %s
// RUN: %clang_cc1 -triple amdgcn-- -target-cpu tahiti -S -verify=expected -o - %s
+// RUN: %clang_cc1 -triple amdgcn-- -target-cpu gfx942 -S -verify=GFX94 -o - %s
// REQUIRES: amdgpu-registered-target
typedef int int8 __attribute__((ext_vector_type(8)));
@@ -129,3 +130,16 @@ half4 test_builtin_image_load_mip_cube_2(half4 v4f16, int i32, int8 vec8i32) {
return __builtin_amdgcn_image_load_mip_cube_v4f16_i32(100, i32, i32, i32, i32, vec8i32, 120, i32); //expected-error{{argument to '__builtin_amdgcn_image_load_mip_cube_v4f16_i32' must be a constant integer}}
}
+
+float test_builtin_image_load_2d_gfx(float f32, int i32, int8 vec8i32) {
+
+ return __builtin_amdgcn_image_load_2d_f32_i32(12, i32, i32, vec8i32, 106, 103); //GFX94-error{{'__builtin_amdgcn_image_load_2d_f32_i32' needs target feature image-insts}}
+}
+float4 test_builtin_image_load_2d_gfx_1(float4 v4f32, int i32, int8 vec8i32) {
+
+ return __builtin_amdgcn_image_load_2d_v4f32_i32(100, i32, i32, vec8i32, 120, 110); //GFX94-error{{'__builtin_amdgcn_image_load_2d_v4f32_i32' needs target feature image-insts}}
+}
+half4 test_builtin_image_load_2d_gfx_2(half4 v4f16, int i32, int8 vec8i32) {
+
+ return __builtin_amdgcn_image_load_2d_v4f16_i32(100, i32, i32, vec8i32, 120, 110); //GFX94-error{{'__builtin_amdgcn_image_load_2d_v4f16_i32' needs target feature image-insts}}
+}
diff --git a/clang/test/SemaOpenCL/builtins-image-store-param.cl b/clang/test/SemaOpenCL/builtins-image-store-param.cl
index 70a7149b25be3..f84df77171098 100644
--- a/clang/test/SemaOpenCL/builtins-image-store-param.cl
+++ b/clang/test/SemaOpenCL/builtins-image-store-param.cl
@@ -1,5 +1,5 @@
// RUN: %clang_cc1 -triple amdgcn-- -target-cpu gfx90a -S -verify=expected -o - %s
-// RUN: %clang_cc1 -triple amdgcn-- -target-cpu gfx701 -S -verify=GFX7 -o - %s
+// RUN: %clang_cc1 -triple amdgcn-- -target-cpu gfx942 -S -verify=GFX94 -o - %s
// REQUIRES: amdgpu-registered-target
typedef int int8 __attribute__((ext_vector_type(8)));
@@ -130,3 +130,16 @@ void test_builtin_image_store_mip_cube_2(half4 v4f16, int i32, int8 vec8i32) {
return __builtin_amdgcn_image_store_mip_cube_v4f16_i32(v4f16, 100, i32, i32, i32, i32, vec8i32, 120, i32); //expected-error{{argument to '__builtin_amdgcn_image_store_mip_cube_v4f16_i32' must be a constant integer}}
}
+
+void test_builtin_image_store_2d_gfx(float f32, int i32, int8 vec8i32) {
+
+ __builtin_amdgcn_image_store_2d_f32_i32(f32, 12, i32, i32, vec8i32, 106, 103); //GFX94-error{{'__builtin_amdgcn_image_store_2d_f32_i32' needs target feature image-insts}}
+}
+void test_builtin_image_store_2d_gfx_1(float4 v4f32, int i32, int8 vec8i32) {
+
+ __builtin_amdgcn_image_store_2d_v4f32_i32(v4f32, 100, i32, i32, vec8i32, 120, 110); //GFX94-error{{'__builtin_amdgcn_image_store_2d_v4f32_i32' needs target feature image-insts}}
+ }
+ void test_builtin_image_store_2d_gfx_2(half4 v4f16, int i32, int8 vec8i32) {
+
+ __builtin_amdgcn_image_store_2d_v4f16_i32(v4f16, 100, i32, i32, vec8i32, 120, 110); //GFX94-error{{'__builtin_amdgcn_image_store_2d_v4f16_i32' needs target feature image-insts}}
+ }
>From d6b2395f87d0965731f63114914a39ed5df6af99 Mon Sep 17 00:00:00 2001
From: ranapratap55 <RanaPratapReddy.Nimmakayala at amd.com>
Date: Thu, 21 Aug 2025 11:13:17 +0530
Subject: [PATCH 6/7] [AMDGPU] Minor changes
---
clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp | 58 +++++++++------------
clang/lib/Sema/SemaAMDGPU.cpp | 6 +--
2 files changed, 28 insertions(+), 36 deletions(-)
diff --git a/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp b/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp
index ef635379b9900..06048e6b069d7 100644
--- a/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp
+++ b/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp
@@ -163,18 +163,12 @@ static Value *EmitAMDGCNBallotForExec(CodeGenFunction &CGF, const CallExpr *E,
return Call;
}
-template <unsigned N>
llvm::CallInst *EmitAMDGCNImageOverloadedReturnType(clang::CodeGen::CodeGenFunction &CGF,
const clang::CallExpr *E,
unsigned IntrinsicID,
bool IsImageStore) {
- static_assert(N, "expect non-empty argument");
-
- assert(E->getNumArgs() == N &&
- "Argument count mismatch with builtin definition");
-
- clang::SmallVector<llvm::Value *, N> Args;
- for (unsigned I = 0; I < N; ++I)
+ clang::SmallVector<llvm::Value *, 10> Args;
+ for (unsigned I = 0; I < E->getNumArgs(); ++I)
Args.push_back(CGF.EmitScalarExpr(E->getArg(I)));
llvm::Type *RetTy = CGF.ConvertType(E->getType());
@@ -709,107 +703,107 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID,
}
case AMDGPU::BI__builtin_amdgcn_image_load_1d_v4f32_i32:
case AMDGPU::BI__builtin_amdgcn_image_load_1d_v4f16_i32:
- return EmitAMDGCNImageOverloadedReturnType<5>(
+ return EmitAMDGCNImageOverloadedReturnType(
*this, E, Intrinsic::amdgcn_image_load_1d, false);
case AMDGPU::BI__builtin_amdgcn_image_load_1darray_v4f32_i32:
case AMDGPU::BI__builtin_amdgcn_image_load_1darray_v4f16_i32:
- return EmitAMDGCNImageOverloadedReturnType<6>(
+ return EmitAMDGCNImageOverloadedReturnType(
*this, E, Intrinsic::amdgcn_image_load_1darray, false);
case AMDGPU::BI__builtin_amdgcn_image_load_2d_f32_i32:
case AMDGPU::BI__builtin_amdgcn_image_load_2d_v4f32_i32:
case AMDGPU::BI__builtin_amdgcn_image_load_2d_v4f16_i32:
- return EmitAMDGCNImageOverloadedReturnType<6>(
+ return EmitAMDGCNImageOverloadedReturnType(
*this, E, Intrinsic::amdgcn_image_load_2d, false);
case AMDGPU::BI__builtin_amdgcn_image_load_2darray_f32_i32:
case AMDGPU::BI__builtin_amdgcn_image_load_2darray_v4f32_i32:
case AMDGPU::BI__builtin_amdgcn_image_load_2darray_v4f16_i32:
- return EmitAMDGCNImageOverloadedReturnType<7>(
+ return EmitAMDGCNImageOverloadedReturnType(
*this, E, Intrinsic::amdgcn_image_load_2darray, false);
case AMDGPU::BI__builtin_amdgcn_image_load_3d_v4f32_i32:
case AMDGPU::BI__builtin_amdgcn_image_load_3d_v4f16_i32:
- return EmitAMDGCNImageOverloadedReturnType<7>(
+ return EmitAMDGCNImageOverloadedReturnType(
*this, E, Intrinsic::amdgcn_image_load_3d, false);
case AMDGPU::BI__builtin_amdgcn_image_load_cube_v4f32_i32:
case AMDGPU::BI__builtin_amdgcn_image_load_cube_v4f16_i32:
- return EmitAMDGCNImageOverloadedReturnType<7>(
+ return EmitAMDGCNImageOverloadedReturnType(
*this, E, Intrinsic::amdgcn_image_load_cube, false);
case AMDGPU::BI__builtin_amdgcn_image_load_mip_1d_v4f32_i32:
case AMDGPU::BI__builtin_amdgcn_image_load_mip_1d_v4f16_i32:
- return EmitAMDGCNImageOverloadedReturnType<6>(
+ return EmitAMDGCNImageOverloadedReturnType(
*this, E, Intrinsic::amdgcn_image_load_mip_1d, false);
case AMDGPU::BI__builtin_amdgcn_image_load_mip_1darray_v4f32_i32:
case AMDGPU::BI__builtin_amdgcn_image_load_mip_1darray_v4f16_i32:
- return EmitAMDGCNImageOverloadedReturnType<7>(
+ return EmitAMDGCNImageOverloadedReturnType(
*this, E, Intrinsic::amdgcn_image_load_mip_1darray, false);
case AMDGPU::BI__builtin_amdgcn_image_load_mip_2d_f32_i32:
case AMDGPU::BI__builtin_amdgcn_image_load_mip_2d_v4f32_i32:
case AMDGPU::BI__builtin_amdgcn_image_load_mip_2d_v4f16_i32:
- return EmitAMDGCNImageOverloadedReturnType<7>(
+ return EmitAMDGCNImageOverloadedReturnType(
*this, E, Intrinsic::amdgcn_image_load_mip_2d, false);
case AMDGPU::BI__builtin_amdgcn_image_load_mip_2darray_f32_i32:
case AMDGPU::BI__builtin_amdgcn_image_load_mip_2darray_v4f32_i32:
case AMDGPU::BI__builtin_amdgcn_image_load_mip_2darray_v4f16_i32:
- return EmitAMDGCNImageOverloadedReturnType<8>(
+ return EmitAMDGCNImageOverloadedReturnType(
*this, E, Intrinsic::amdgcn_image_load_mip_2darray, false);
case AMDGPU::BI__builtin_amdgcn_image_load_mip_3d_v4f32_i32:
case AMDGPU::BI__builtin_amdgcn_image_load_mip_3d_v4f16_i32:
- return EmitAMDGCNImageOverloadedReturnType<8>(
+ return EmitAMDGCNImageOverloadedReturnType(
*this, E, Intrinsic::amdgcn_image_load_mip_3d, false);
case AMDGPU::BI__builtin_amdgcn_image_load_mip_cube_v4f32_i32:
case AMDGPU::BI__builtin_amdgcn_image_load_mip_cube_v4f16_i32:
- return EmitAMDGCNImageOverloadedReturnType<8>(
+ return EmitAMDGCNImageOverloadedReturnType(
*this, E, Intrinsic::amdgcn_image_load_mip_cube, false);
case AMDGPU::BI__builtin_amdgcn_image_store_1d_v4f32_i32:
case AMDGPU::BI__builtin_amdgcn_image_store_1d_v4f16_i32:
- return EmitAMDGCNImageOverloadedReturnType<6>(
+ return EmitAMDGCNImageOverloadedReturnType(
*this, E, Intrinsic::amdgcn_image_store_1d, true);
case AMDGPU::BI__builtin_amdgcn_image_store_1darray_v4f32_i32:
case AMDGPU::BI__builtin_amdgcn_image_store_1darray_v4f16_i32:
- return EmitAMDGCNImageOverloadedReturnType<7>(
+ return EmitAMDGCNImageOverloadedReturnType(
*this, E, Intrinsic::amdgcn_image_store_1darray, true);
case AMDGPU::BI__builtin_amdgcn_image_store_2d_f32_i32:
case AMDGPU::BI__builtin_amdgcn_image_store_2d_v4f32_i32:
case AMDGPU::BI__builtin_amdgcn_image_store_2d_v4f16_i32:
- return EmitAMDGCNImageOverloadedReturnType<7>(
+ return EmitAMDGCNImageOverloadedReturnType(
*this, E, Intrinsic::amdgcn_image_store_2d, true);
case AMDGPU::BI__builtin_amdgcn_image_store_2darray_f32_i32:
case AMDGPU::BI__builtin_amdgcn_image_store_2darray_v4f32_i32:
case AMDGPU::BI__builtin_amdgcn_image_store_2darray_v4f16_i32:
- return EmitAMDGCNImageOverloadedReturnType<8>(
+ return EmitAMDGCNImageOverloadedReturnType(
*this, E, Intrinsic::amdgcn_image_store_2darray, true);
case AMDGPU::BI__builtin_amdgcn_image_store_3d_v4f32_i32:
case AMDGPU::BI__builtin_amdgcn_image_store_3d_v4f16_i32:
- return EmitAMDGCNImageOverloadedReturnType<8>(
+ return EmitAMDGCNImageOverloadedReturnType(
*this, E, Intrinsic::amdgcn_image_store_3d, true);
case AMDGPU::BI__builtin_amdgcn_image_store_cube_v4f32_i32:
case AMDGPU::BI__builtin_amdgcn_image_store_cube_v4f16_i32:
- return EmitAMDGCNImageOverloadedReturnType<8>(
+ return EmitAMDGCNImageOverloadedReturnType(
*this, E, Intrinsic::amdgcn_image_store_cube, true);
case AMDGPU::BI__builtin_amdgcn_image_store_mip_1d_v4f32_i32:
case AMDGPU::BI__builtin_amdgcn_image_store_mip_1d_v4f16_i32:
- return EmitAMDGCNImageOverloadedReturnType<7>(
+ return EmitAMDGCNImageOverloadedReturnType(
*this, E, Intrinsic::amdgcn_image_store_mip_1d, true);
case AMDGPU::BI__builtin_amdgcn_image_store_mip_1darray_v4f32_i32:
case AMDGPU::BI__builtin_amdgcn_image_store_mip_1darray_v4f16_i32:
- return EmitAMDGCNImageOverloadedReturnType<8>(
+ return EmitAMDGCNImageOverloadedReturnType(
*this, E, Intrinsic::amdgcn_image_store_mip_1darray, true);
case AMDGPU::BI__builtin_amdgcn_image_store_mip_2d_f32_i32:
case AMDGPU::BI__builtin_amdgcn_image_store_mip_2d_v4f32_i32:
case AMDGPU::BI__builtin_amdgcn_image_store_mip_2d_v4f16_i32:
- return EmitAMDGCNImageOverloadedReturnType<8>(
+ return EmitAMDGCNImageOverloadedReturnType(
*this, E, Intrinsic::amdgcn_image_store_mip_2d, true);
case AMDGPU::BI__builtin_amdgcn_image_store_mip_2darray_f32_i32:
case AMDGPU::BI__builtin_amdgcn_image_store_mip_2darray_v4f32_i32:
case AMDGPU::BI__builtin_amdgcn_image_store_mip_2darray_v4f16_i32:
- return EmitAMDGCNImageOverloadedReturnType<9>(
+ return EmitAMDGCNImageOverloadedReturnType(
*this, E, Intrinsic::amdgcn_image_store_mip_2darray, true);
case AMDGPU::BI__builtin_amdgcn_image_store_mip_3d_v4f32_i32:
case AMDGPU::BI__builtin_amdgcn_image_store_mip_3d_v4f16_i32:
- return EmitAMDGCNImageOverloadedReturnType<9>(
+ return EmitAMDGCNImageOverloadedReturnType(
*this, E, Intrinsic::amdgcn_image_store_mip_3d, true);
case AMDGPU::BI__builtin_amdgcn_image_store_mip_cube_v4f32_i32:
case AMDGPU::BI__builtin_amdgcn_image_store_mip_cube_v4f16_i32:
- return EmitAMDGCNImageOverloadedReturnType<9>(
+ return EmitAMDGCNImageOverloadedReturnType(
*this, E, Intrinsic::amdgcn_image_store_mip_cube, true);
case AMDGPU::BI__builtin_amdgcn_mfma_scale_f32_16x16x128_f8f6f4:
case AMDGPU::BI__builtin_amdgcn_mfma_scale_f32_32x32x64_f8f6f4: {
diff --git a/clang/lib/Sema/SemaAMDGPU.cpp b/clang/lib/Sema/SemaAMDGPU.cpp
index e3bf0dc089cb7..e29d450951820 100644
--- a/clang/lib/Sema/SemaAMDGPU.cpp
+++ b/clang/lib/Sema/SemaAMDGPU.cpp
@@ -113,9 +113,8 @@ bool SemaAMDGPU::CheckAMDGCNBuiltinFunctionCall(unsigned BuiltinID,
case AMDGPU::BI__builtin_amdgcn_image_load_mip_cube_v4f32_i32:
case AMDGPU::BI__builtin_amdgcn_image_load_mip_cube_v4f16_i32: {
StringRef FeatureList(getASTContext().BuiltinInfo.getRequiredFeatures(BuiltinID));
- bool HasImageInsts = Builtin::evaluateRequiredTargetFeatures("image-insts", CallerFeatureMap);
if (!Builtin::evaluateRequiredTargetFeatures(
- FeatureList, CallerFeatureMap) && !HasImageInsts){
+ FeatureList, CallerFeatureMap)){
Diag(TheCall->getBeginLoc(), diag::err_builtin_needs_feature)
<< FD->getDeclName() << FeatureList;
return false;
@@ -157,9 +156,8 @@ bool SemaAMDGPU::CheckAMDGCNBuiltinFunctionCall(unsigned BuiltinID,
case AMDGPU::BI__builtin_amdgcn_image_store_mip_cube_v4f32_i32:
case AMDGPU::BI__builtin_amdgcn_image_store_mip_cube_v4f16_i32: {
StringRef FeatureList(getASTContext().BuiltinInfo.getRequiredFeatures(BuiltinID));
- bool HasImageInsts = Builtin::evaluateRequiredTargetFeatures("image-insts", CallerFeatureMap);
if (!Builtin::evaluateRequiredTargetFeatures(
- FeatureList, CallerFeatureMap) && !HasImageInsts){
+ FeatureList, CallerFeatureMap)){
Diag(TheCall->getBeginLoc(), diag::err_builtin_needs_feature)
<< FD->getDeclName() << FeatureList;
return false;
>From 1e9cbede55a46e2e9f738769cef5591d011dcf8b Mon Sep 17 00:00:00 2001
From: ranapratap55 <RanaPratapReddy.Nimmakayala at amd.com>
Date: Tue, 9 Sep 2025 10:37:23 +0530
Subject: [PATCH 7/7] [AMDGPU] Extends builtin support for amdgcn_image_sample
and adds sema checking tests
---
clang/include/clang/Basic/BuiltinsAMDGPU.def | 16 +-
clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp | 26 ++
clang/lib/Sema/SemaAMDGPU.cpp | 16 +-
clang/test/CodeGen/builtins-image-load.c | 431 ++++++++++++++++++
.../SemaOpenCL/builtins-image-load-param.cl | 77 ++++
5 files changed, 564 insertions(+), 2 deletions(-)
diff --git a/clang/include/clang/Basic/BuiltinsAMDGPU.def b/clang/include/clang/Basic/BuiltinsAMDGPU.def
index 33acb614d73c0..ab55281a11ede 100644
--- a/clang/include/clang/Basic/BuiltinsAMDGPU.def
+++ b/clang/include/clang/Basic/BuiltinsAMDGPU.def
@@ -636,7 +636,7 @@ TARGET_BUILTIN(__builtin_amdgcn_cvt_sr_bf16_f32, "V2yV2yfUiIb", "nc", "f32-to-f1
TARGET_BUILTIN(__builtin_amdgcn_cvt_sr_f16_f32, "V2hV2hfUiIb", "nc", "f32-to-f16bf16-cvt-sr-insts")
//===----------------------------------------------------------------------===//
-// Image load/store builtins
+// Image builtins
//===----------------------------------------------------------------------===//
TARGET_BUILTIN(__builtin_amdgcn_image_load_1d_v4f32_i32, "V4fiiV8iii", "nc", "image-insts")
TARGET_BUILTIN(__builtin_amdgcn_image_load_1d_v4f16_i32, "V4hiiV8iii", "nc", "image-insts")
@@ -694,6 +694,20 @@ TARGET_BUILTIN(__builtin_amdgcn_image_store_mip_3d_v4f32_i32, "vV4fiiiiiV8iii",
TARGET_BUILTIN(__builtin_amdgcn_image_store_mip_3d_v4f16_i32, "vV4hiiiiiV8iii", "nc", "image-insts")
TARGET_BUILTIN(__builtin_amdgcn_image_store_mip_cube_v4f32_i32, "vV4fiiiiiV8iii", "nc", "image-insts")
TARGET_BUILTIN(__builtin_amdgcn_image_store_mip_cube_v4f16_i32, "vV4hiiiiiV8iii", "nc", "image-insts")
+TARGET_BUILTIN(__builtin_amdgcn_image_sample_1d_v4f32_f32, "V4fifV8iV4ibii", "nc", "image-insts")
+TARGET_BUILTIN(__builtin_amdgcn_image_sample_1d_v4f16_f32, "V4hifV8iV4ibii", "nc", "image-insts")
+TARGET_BUILTIN(__builtin_amdgcn_image_sample_1darray_v4f32_f32, "V4fiffV8iV4ibii", "nc", "image-insts")
+TARGET_BUILTIN(__builtin_amdgcn_image_sample_1darray_v4f16_f32, "V4hiffV8iV4ibii", "nc", "image-insts")
+TARGET_BUILTIN(__builtin_amdgcn_image_sample_2d_f32_f32, "fiffV8iV4ibii", "nc", "image-insts")
+TARGET_BUILTIN(__builtin_amdgcn_image_sample_2d_v4f32_f32, "V4fiffV8iV4ibii", "nc", "image-insts")
+TARGET_BUILTIN(__builtin_amdgcn_image_sample_2d_v4f16_f32, "V4hiffV8iV4ibii", "nc", "image-insts")
+TARGET_BUILTIN(__builtin_amdgcn_image_sample_2darray_f32_f32, "fifffV8iV4ibii", "nc", "image-insts")
+TARGET_BUILTIN(__builtin_amdgcn_image_sample_2darray_v4f32_f32, "V4fifffV8iV4ibii", "nc", "image-insts")
+TARGET_BUILTIN(__builtin_amdgcn_image_sample_2darray_v4f16_f32, "V4hifffV8iV4ibii", "nc", "image-insts")
+TARGET_BUILTIN(__builtin_amdgcn_image_sample_3d_v4f32_f32, "V4fifffV8iV4ibii", "nc", "image-insts")
+TARGET_BUILTIN(__builtin_amdgcn_image_sample_3d_v4f16_f32, "V4hifffV8iV4ibii", "nc", "image-insts")
+TARGET_BUILTIN(__builtin_amdgcn_image_sample_cube_v4f32_f32, "V4fifffV8iV4ibii", "nc", "image-insts")
+TARGET_BUILTIN(__builtin_amdgcn_image_sample_cube_v4f16_f32, "V4hifffV8iV4ibii", "nc", "image-insts")
#undef BUILTIN
#undef TARGET_BUILTIN
diff --git a/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp b/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp
index 06048e6b069d7..c7c25ccc8d4d1 100644
--- a/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp
+++ b/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp
@@ -805,6 +805,32 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID,
case AMDGPU::BI__builtin_amdgcn_image_store_mip_cube_v4f16_i32:
return EmitAMDGCNImageOverloadedReturnType(
*this, E, Intrinsic::amdgcn_image_store_mip_cube, true);
+ case AMDGPU::BI__builtin_amdgcn_image_sample_1d_v4f32_f32:
+ case AMDGPU::BI__builtin_amdgcn_image_sample_1d_v4f16_f32:
+ return EmitAMDGCNImageOverloadedReturnType(
+ *this, E, Intrinsic::amdgcn_image_sample_1d, false);
+ case AMDGPU::BI__builtin_amdgcn_image_sample_1darray_v4f32_f32:
+ case AMDGPU::BI__builtin_amdgcn_image_sample_1darray_v4f16_f32:
+ return EmitAMDGCNImageOverloadedReturnType(
+ *this, E, Intrinsic::amdgcn_image_sample_1darray, false);
+ case AMDGPU::BI__builtin_amdgcn_image_sample_2d_f32_f32:
+ case AMDGPU::BI__builtin_amdgcn_image_sample_2d_v4f32_f32:
+ case AMDGPU::BI__builtin_amdgcn_image_sample_2d_v4f16_f32:
+ return EmitAMDGCNImageOverloadedReturnType(
+ *this, E, Intrinsic::amdgcn_image_sample_2d, false);
+ case AMDGPU::BI__builtin_amdgcn_image_sample_2darray_f32_f32:
+ case AMDGPU::BI__builtin_amdgcn_image_sample_2darray_v4f32_f32:
+ case AMDGPU::BI__builtin_amdgcn_image_sample_2darray_v4f16_f32:
+ return EmitAMDGCNImageOverloadedReturnType(
+ *this, E, Intrinsic::amdgcn_image_sample_2darray, false);
+ case AMDGPU::BI__builtin_amdgcn_image_sample_3d_v4f32_f32:
+ case AMDGPU::BI__builtin_amdgcn_image_sample_3d_v4f16_f32:
+ return EmitAMDGCNImageOverloadedReturnType(
+ *this, E, Intrinsic::amdgcn_image_sample_3d, false);
+ case AMDGPU::BI__builtin_amdgcn_image_sample_cube_v4f32_f32:
+ case AMDGPU::BI__builtin_amdgcn_image_sample_cube_v4f16_f32:
+ return EmitAMDGCNImageOverloadedReturnType(
+ *this, E, Intrinsic::amdgcn_image_sample_cube, false);
case AMDGPU::BI__builtin_amdgcn_mfma_scale_f32_16x16x128_f8f6f4:
case AMDGPU::BI__builtin_amdgcn_mfma_scale_f32_32x32x64_f8f6f4: {
llvm::FixedVectorType *VT = FixedVectorType::get(Builder.getInt32Ty(), 8);
diff --git a/clang/lib/Sema/SemaAMDGPU.cpp b/clang/lib/Sema/SemaAMDGPU.cpp
index e29d450951820..c4e9ec270e28c 100644
--- a/clang/lib/Sema/SemaAMDGPU.cpp
+++ b/clang/lib/Sema/SemaAMDGPU.cpp
@@ -111,7 +111,21 @@ bool SemaAMDGPU::CheckAMDGCNBuiltinFunctionCall(unsigned BuiltinID,
case AMDGPU::BI__builtin_amdgcn_image_load_mip_3d_v4f32_i32:
case AMDGPU::BI__builtin_amdgcn_image_load_mip_3d_v4f16_i32:
case AMDGPU::BI__builtin_amdgcn_image_load_mip_cube_v4f32_i32:
- case AMDGPU::BI__builtin_amdgcn_image_load_mip_cube_v4f16_i32: {
+ case AMDGPU::BI__builtin_amdgcn_image_load_mip_cube_v4f16_i32:
+ case AMDGPU::BI__builtin_amdgcn_image_sample_1d_v4f32_f32:
+ case AMDGPU::BI__builtin_amdgcn_image_sample_1darray_v4f32_f32:
+ case AMDGPU::BI__builtin_amdgcn_image_sample_1d_v4f16_f32:
+ case AMDGPU::BI__builtin_amdgcn_image_sample_1darray_v4f16_f32:
+ case AMDGPU::BI__builtin_amdgcn_image_sample_2d_f32_f32:
+ case AMDGPU::BI__builtin_amdgcn_image_sample_2d_v4f32_f32:
+ case AMDGPU::BI__builtin_amdgcn_image_sample_2d_v4f16_f32:
+ case AMDGPU::BI__builtin_amdgcn_image_sample_2darray_f32_f32:
+ case AMDGPU::BI__builtin_amdgcn_image_sample_2darray_v4f32_f32:
+ case AMDGPU::BI__builtin_amdgcn_image_sample_2darray_v4f16_f32:
+ case AMDGPU::BI__builtin_amdgcn_image_sample_3d_v4f32_f32:
+ case AMDGPU::BI__builtin_amdgcn_image_sample_3d_v4f16_f32:
+ case AMDGPU::BI__builtin_amdgcn_image_sample_cube_v4f32_f32:
+ case AMDGPU::BI__builtin_amdgcn_image_sample_cube_v4f16_f32: {
StringRef FeatureList(getASTContext().BuiltinInfo.getRequiredFeatures(BuiltinID));
if (!Builtin::evaluateRequiredTargetFeatures(
FeatureList, CallerFeatureMap)){
diff --git a/clang/test/CodeGen/builtins-image-load.c b/clang/test/CodeGen/builtins-image-load.c
index 5b5b3669ad083..67548a567723e 100644
--- a/clang/test/CodeGen/builtins-image-load.c
+++ b/clang/test/CodeGen/builtins-image-load.c
@@ -2,6 +2,7 @@
// RUN: %clang_cc1 -triple amdgcn-- -target-cpu gfx1100 %s -emit-llvm -o - | FileCheck %s
typedef int int8 __attribute__((ext_vector_type(8)));
+typedef int int4 __attribute__((ext_vector_type(4)));
typedef float float4 __attribute__((ext_vector_type(4)));
typedef _Float16 half;
typedef half half4 __attribute__((ext_vector_type(4)));
@@ -729,3 +730,433 @@ half4 test_builtin_image_load_mip_cube_2(half4 v4f16, int i32, int8 vec8i32) {
return __builtin_amdgcn_image_load_mip_cube_v4f16_i32(100, i32, i32, i32, i32, vec8i32, 120, 110);
}
+
+// CHECK-LABEL: define dso_local <4 x float> @test_builtin_amdgcn_image_sample_1d_v4f32_f32(
+// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], i32 noundef [[I32:%.*]], float noundef [[F32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x float>, align 16, addrspace(5)
+// CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5)
+// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5)
+// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
+// CHECK-NEXT: [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5)
+// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
+// CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr
+// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
+// CHECK-NEXT: [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr
+// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr
+// CHECK-NEXT: [[VEC4I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC4I32_ADDR]] to ptr
+// CHECK-NEXT: store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16
+// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: store <4 x i32> [[VEC4I32]], ptr [[VEC4I32_ADDR_ASCAST]], align 16
+// CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP1:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr [[VEC4I32_ADDR_ASCAST]], align 16
+// CHECK-NEXT: [[TMP3:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32.v8i32.v4i32(i32 100, float [[TMP0]], <8 x i32> [[TMP1]], <4 x i32> [[TMP2]], i1 false, i32 120, i32 110)
+// CHECK-NEXT: ret <4 x float> [[TMP3]]
+//
+float4 test_builtin_amdgcn_image_sample_1d_v4f32_f32(float4 v4f32, int i32, float f32, int8 vec8i32, int4 vec4i32) {
+ return __builtin_amdgcn_image_sample_1d_v4f32_f32(100, f32, vec8i32, vec4i32, 0, 120, 110);
+}
+
+// CHECK-LABEL: define dso_local <4 x half> @test_builtin_amdgcn_image_sample_1d_v4f16_f32(
+// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], float noundef [[F32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x half>, align 8, addrspace(5)
+// CHECK-NEXT: [[V4F16_ADDR:%.*]] = alloca <4 x half>, align 8, addrspace(5)
+// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5)
+// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
+// CHECK-NEXT: [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5)
+// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
+// CHECK-NEXT: [[V4F16_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F16_ADDR]] to ptr
+// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
+// CHECK-NEXT: [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr
+// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr
+// CHECK-NEXT: [[VEC4I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC4I32_ADDR]] to ptr
+// CHECK-NEXT: store <4 x half> [[V4F16]], ptr [[V4F16_ADDR_ASCAST]], align 8
+// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: store <4 x i32> [[VEC4I32]], ptr [[VEC4I32_ADDR_ASCAST]], align 16
+// CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP1:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr [[VEC4I32_ADDR_ASCAST]], align 16
+// CHECK-NEXT: [[TMP3:%.*]] = call <4 x half> @llvm.amdgcn.image.sample.1d.v4f16.f32.v8i32.v4i32(i32 100, float [[TMP0]], <8 x i32> [[TMP1]], <4 x i32> [[TMP2]], i1 false, i32 120, i32 110)
+// CHECK-NEXT: ret <4 x half> [[TMP3]]
+//
+half4 test_builtin_amdgcn_image_sample_1d_v4f16_f32(half4 v4f16, int i32, float f32, int8 vec8i32, int4 vec4i32) {
+ return __builtin_amdgcn_image_sample_1d_v4f16_f32(100, f32, vec8i32, vec4i32, 0, 120, 110);
+}
+
+// CHECK-LABEL: define dso_local <4 x float> @test_builtin_amdgcn_image_sample_1darray_v4f32_f32(
+// CHECK-SAME: i32 noundef [[I32:%.*]], float noundef [[F32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x float>, align 16, addrspace(5)
+// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5)
+// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
+// CHECK-NEXT: [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5)
+// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
+// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
+// CHECK-NEXT: [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr
+// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr
+// CHECK-NEXT: [[VEC4I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC4I32_ADDR]] to ptr
+// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: store <4 x i32> [[VEC4I32]], ptr [[VEC4I32_ADDR_ASCAST]], align 16
+// CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP1:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP2:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: [[TMP3:%.*]] = load <4 x i32>, ptr [[VEC4I32_ADDR_ASCAST]], align 16
+// CHECK-NEXT: [[TMP4:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.1darray.v4f32.f32.v8i32.v4i32(i32 100, float [[TMP0]], float [[TMP1]], <8 x i32> [[TMP2]], <4 x i32> [[TMP3]], i1 false, i32 120, i32 110)
+// CHECK-NEXT: ret <4 x float> [[TMP4]]
+//
+float4 test_builtin_amdgcn_image_sample_1darray_v4f32_f32(int i32, float f32, int8 vec8i32, int4 vec4i32) {
+ return __builtin_amdgcn_image_sample_1darray_v4f32_f32(100, f32, f32, vec8i32, vec4i32, 0, 120, 110);
+}
+
+// CHECK-LABEL: define dso_local <4 x half> @test_builtin_amdgcn_image_sample_1darray_v4f16_f32(
+// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], float noundef [[F32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x half>, align 8, addrspace(5)
+// CHECK-NEXT: [[V4F16_ADDR:%.*]] = alloca <4 x half>, align 8, addrspace(5)
+// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5)
+// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
+// CHECK-NEXT: [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5)
+// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
+// CHECK-NEXT: [[V4F16_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F16_ADDR]] to ptr
+// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
+// CHECK-NEXT: [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr
+// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr
+// CHECK-NEXT: [[VEC4I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC4I32_ADDR]] to ptr
+// CHECK-NEXT: store <4 x half> [[V4F16]], ptr [[V4F16_ADDR_ASCAST]], align 8
+// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: store <4 x i32> [[VEC4I32]], ptr [[VEC4I32_ADDR_ASCAST]], align 16
+// CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP1:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP2:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: [[TMP3:%.*]] = load <4 x i32>, ptr [[VEC4I32_ADDR_ASCAST]], align 16
+// CHECK-NEXT: [[TMP4:%.*]] = call <4 x half> @llvm.amdgcn.image.sample.1darray.v4f16.f32.v8i32.v4i32(i32 100, float [[TMP0]], float [[TMP1]], <8 x i32> [[TMP2]], <4 x i32> [[TMP3]], i1 false, i32 120, i32 110)
+// CHECK-NEXT: ret <4 x half> [[TMP4]]
+//
+half4 test_builtin_amdgcn_image_sample_1darray_v4f16_f32(half4 v4f16, int i32, float f32, int8 vec8i32, int4 vec4i32) {
+ return __builtin_amdgcn_image_sample_1darray_v4f16_f32(100, f32, f32, vec8i32, vec4i32, 0, 120, 110);
+}
+
+// CHECK-LABEL: define dso_local float @test_builtin_amdgcn_image_sample_2d_f32_f32(
+// CHECK-SAME: i32 noundef [[I32:%.*]], float noundef [[F32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[RETVAL:%.*]] = alloca float, align 4, addrspace(5)
+// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5)
+// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
+// CHECK-NEXT: [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5)
+// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
+// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
+// CHECK-NEXT: [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr
+// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr
+// CHECK-NEXT: [[VEC4I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC4I32_ADDR]] to ptr
+// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: store <4 x i32> [[VEC4I32]], ptr [[VEC4I32_ADDR_ASCAST]], align 16
+// CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP1:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP2:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: [[TMP3:%.*]] = load <4 x i32>, ptr [[VEC4I32_ADDR_ASCAST]], align 16
+// CHECK-NEXT: [[TMP4:%.*]] = call float @llvm.amdgcn.image.sample.2d.f32.f32.v8i32.v4i32(i32 100, float [[TMP0]], float [[TMP1]], <8 x i32> [[TMP2]], <4 x i32> [[TMP3]], i1 false, i32 120, i32 110)
+// CHECK-NEXT: ret float [[TMP4]]
+//
+float test_builtin_amdgcn_image_sample_2d_f32_f32(int i32, float f32, int8 vec8i32, int4 vec4i32) {
+ return __builtin_amdgcn_image_sample_2d_f32_f32(100, f32, f32, vec8i32, vec4i32, 0, 120, 110);
+}
+
+// CHECK-LABEL: define dso_local <4 x float> @test_builtin_amdgcn_image_sample_2d_v4f32_f32(
+// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], i32 noundef [[I32:%.*]], float noundef [[F32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x float>, align 16, addrspace(5)
+// CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5)
+// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5)
+// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
+// CHECK-NEXT: [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5)
+// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
+// CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr
+// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
+// CHECK-NEXT: [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr
+// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr
+// CHECK-NEXT: [[VEC4I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC4I32_ADDR]] to ptr
+// CHECK-NEXT: store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16
+// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: store <4 x i32> [[VEC4I32]], ptr [[VEC4I32_ADDR_ASCAST]], align 16
+// CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP1:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP2:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: [[TMP3:%.*]] = load <4 x i32>, ptr [[VEC4I32_ADDR_ASCAST]], align 16
+// CHECK-NEXT: [[TMP4:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32.v8i32.v4i32(i32 100, float [[TMP0]], float [[TMP1]], <8 x i32> [[TMP2]], <4 x i32> [[TMP3]], i1 false, i32 120, i32 110)
+// CHECK-NEXT: ret <4 x float> [[TMP4]]
+//
+float4 test_builtin_amdgcn_image_sample_2d_v4f32_f32(float4 v4f32, int i32, float f32, int8 vec8i32, int4 vec4i32) {
+ return __builtin_amdgcn_image_sample_2d_v4f32_f32(100, f32, f32, vec8i32, vec4i32, 0, 120, 110);
+}
+
+// CHECK-LABEL: define dso_local <4 x half> @test_builtin_amdgcn_image_sample_2d_v4f16_f32(
+// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], float noundef [[F32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x half>, align 8, addrspace(5)
+// CHECK-NEXT: [[V4F16_ADDR:%.*]] = alloca <4 x half>, align 8, addrspace(5)
+// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5)
+// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
+// CHECK-NEXT: [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5)
+// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
+// CHECK-NEXT: [[V4F16_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F16_ADDR]] to ptr
+// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
+// CHECK-NEXT: [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr
+// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr
+// CHECK-NEXT: [[VEC4I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC4I32_ADDR]] to ptr
+// CHECK-NEXT: store <4 x half> [[V4F16]], ptr [[V4F16_ADDR_ASCAST]], align 8
+// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: store <4 x i32> [[VEC4I32]], ptr [[VEC4I32_ADDR_ASCAST]], align 16
+// CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP1:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP2:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: [[TMP3:%.*]] = load <4 x i32>, ptr [[VEC4I32_ADDR_ASCAST]], align 16
+// CHECK-NEXT: [[TMP4:%.*]] = call <4 x half> @llvm.amdgcn.image.sample.2d.v4f16.f32.v8i32.v4i32(i32 100, float [[TMP0]], float [[TMP1]], <8 x i32> [[TMP2]], <4 x i32> [[TMP3]], i1 false, i32 120, i32 110)
+// CHECK-NEXT: ret <4 x half> [[TMP4]]
+//
+half4 test_builtin_amdgcn_image_sample_2d_v4f16_f32(half4 v4f16, int i32, float f32, int8 vec8i32, int4 vec4i32) {
+ return __builtin_amdgcn_image_sample_2d_v4f16_f32(100, f32, f32, vec8i32, vec4i32, 0, 120, 110);
+}
+
+// CHECK-LABEL: define dso_local float @test_builtin_amdgcn_image_sample_2darray_f32_f32(
+// CHECK-SAME: i32 noundef [[I32:%.*]], float noundef [[F32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[RETVAL:%.*]] = alloca float, align 4, addrspace(5)
+// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5)
+// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
+// CHECK-NEXT: [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5)
+// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
+// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
+// CHECK-NEXT: [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr
+// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr
+// CHECK-NEXT: [[VEC4I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC4I32_ADDR]] to ptr
+// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: store <4 x i32> [[VEC4I32]], ptr [[VEC4I32_ADDR_ASCAST]], align 16
+// CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP1:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP2:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP3:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: [[TMP4:%.*]] = load <4 x i32>, ptr [[VEC4I32_ADDR_ASCAST]], align 16
+// CHECK-NEXT: [[TMP5:%.*]] = call float @llvm.amdgcn.image.sample.2darray.f32.f32.v8i32.v4i32(i32 100, float [[TMP0]], float [[TMP1]], float [[TMP2]], <8 x i32> [[TMP3]], <4 x i32> [[TMP4]], i1 false, i32 120, i32 110)
+// CHECK-NEXT: ret float [[TMP5]]
+//
+float test_builtin_amdgcn_image_sample_2darray_f32_f32(int i32, float f32, int8 vec8i32, int4 vec4i32) {
+ return __builtin_amdgcn_image_sample_2darray_f32_f32(100, f32, f32, f32, vec8i32, vec4i32, 0, 120, 110);
+}
+
+// CHECK-LABEL: define dso_local <4 x float> @test_builtin_amdgcn_image_sample_2darray_v4f32_f32(
+// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], i32 noundef [[I32:%.*]], float noundef [[F32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x float>, align 16, addrspace(5)
+// CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5)
+// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5)
+// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
+// CHECK-NEXT: [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5)
+// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
+// CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr
+// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
+// CHECK-NEXT: [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr
+// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr
+// CHECK-NEXT: [[VEC4I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC4I32_ADDR]] to ptr
+// CHECK-NEXT: store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16
+// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: store <4 x i32> [[VEC4I32]], ptr [[VEC4I32_ADDR_ASCAST]], align 16
+// CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP1:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP2:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP3:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: [[TMP4:%.*]] = load <4 x i32>, ptr [[VEC4I32_ADDR_ASCAST]], align 16
+// CHECK-NEXT: [[TMP5:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.2darray.v4f32.f32.v8i32.v4i32(i32 100, float [[TMP0]], float [[TMP1]], float [[TMP2]], <8 x i32> [[TMP3]], <4 x i32> [[TMP4]], i1 false, i32 120, i32 110)
+// CHECK-NEXT: ret <4 x float> [[TMP5]]
+//
+float4 test_builtin_amdgcn_image_sample_2darray_v4f32_f32(float4 v4f32, int i32, float f32, int8 vec8i32, int4 vec4i32) {
+ return __builtin_amdgcn_image_sample_2darray_v4f32_f32(100, f32, f32, f32, vec8i32, vec4i32, 0, 120, 110);
+}
+
+// CHECK-LABEL: define dso_local <4 x half> @test_builtin_amdgcn_image_sample_2darray_v4f16_f32(
+// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], float noundef [[F32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x half>, align 8, addrspace(5)
+// CHECK-NEXT: [[V4F16_ADDR:%.*]] = alloca <4 x half>, align 8, addrspace(5)
+// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5)
+// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
+// CHECK-NEXT: [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5)
+// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
+// CHECK-NEXT: [[V4F16_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F16_ADDR]] to ptr
+// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
+// CHECK-NEXT: [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr
+// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr
+// CHECK-NEXT: [[VEC4I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC4I32_ADDR]] to ptr
+// CHECK-NEXT: store <4 x half> [[V4F16]], ptr [[V4F16_ADDR_ASCAST]], align 8
+// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: store <4 x i32> [[VEC4I32]], ptr [[VEC4I32_ADDR_ASCAST]], align 16
+// CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP1:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP2:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP3:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: [[TMP4:%.*]] = load <4 x i32>, ptr [[VEC4I32_ADDR_ASCAST]], align 16
+// CHECK-NEXT: [[TMP5:%.*]] = call <4 x half> @llvm.amdgcn.image.sample.2darray.v4f16.f32.v8i32.v4i32(i32 100, float [[TMP0]], float [[TMP1]], float [[TMP2]], <8 x i32> [[TMP3]], <4 x i32> [[TMP4]], i1 false, i32 120, i32 110)
+// CHECK-NEXT: ret <4 x half> [[TMP5]]
+//
+half4 test_builtin_amdgcn_image_sample_2darray_v4f16_f32(half4 v4f16, int i32, float f32, int8 vec8i32, int4 vec4i32) {
+ return __builtin_amdgcn_image_sample_2darray_v4f16_f32(100, f32, f32, f32, vec8i32, vec4i32, 0, 120, 110);
+}
+
+// CHECK-LABEL: define dso_local <4 x float> @test_builtin_amdgcn_image_sample_3d_v4f32_f32(
+// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], i32 noundef [[I32:%.*]], float noundef [[F32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x float>, align 16, addrspace(5)
+// CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5)
+// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5)
+// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
+// CHECK-NEXT: [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5)
+// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
+// CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr
+// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
+// CHECK-NEXT: [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr
+// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr
+// CHECK-NEXT: [[VEC4I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC4I32_ADDR]] to ptr
+// CHECK-NEXT: store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16
+// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: store <4 x i32> [[VEC4I32]], ptr [[VEC4I32_ADDR_ASCAST]], align 16
+// CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP1:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP2:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP3:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: [[TMP4:%.*]] = load <4 x i32>, ptr [[VEC4I32_ADDR_ASCAST]], align 16
+// CHECK-NEXT: [[TMP5:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.3d.v4f32.f32.v8i32.v4i32(i32 100, float [[TMP0]], float [[TMP1]], float [[TMP2]], <8 x i32> [[TMP3]], <4 x i32> [[TMP4]], i1 false, i32 120, i32 110)
+// CHECK-NEXT: ret <4 x float> [[TMP5]]
+//
+float4 test_builtin_amdgcn_image_sample_3d_v4f32_f32(float4 v4f32, int i32, float f32, int8 vec8i32, int4 vec4i32) {
+ return __builtin_amdgcn_image_sample_3d_v4f32_f32(100, f32, f32, f32, vec8i32, vec4i32, 0, 120, 110);
+}
+
+// CHECK-LABEL: define dso_local <4 x half> @test_builtin_amdgcn_image_sample_3d_v4f16_f32(
+// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], float noundef [[F32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x half>, align 8, addrspace(5)
+// CHECK-NEXT: [[V4F16_ADDR:%.*]] = alloca <4 x half>, align 8, addrspace(5)
+// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5)
+// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
+// CHECK-NEXT: [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5)
+// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
+// CHECK-NEXT: [[V4F16_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F16_ADDR]] to ptr
+// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
+// CHECK-NEXT: [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr
+// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr
+// CHECK-NEXT: [[VEC4I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC4I32_ADDR]] to ptr
+// CHECK-NEXT: store <4 x half> [[V4F16]], ptr [[V4F16_ADDR_ASCAST]], align 8
+// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: store <4 x i32> [[VEC4I32]], ptr [[VEC4I32_ADDR_ASCAST]], align 16
+// CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP1:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP2:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP3:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: [[TMP4:%.*]] = load <4 x i32>, ptr [[VEC4I32_ADDR_ASCAST]], align 16
+// CHECK-NEXT: [[TMP5:%.*]] = call <4 x half> @llvm.amdgcn.image.sample.3d.v4f16.f32.v8i32.v4i32(i32 100, float [[TMP0]], float [[TMP1]], float [[TMP2]], <8 x i32> [[TMP3]], <4 x i32> [[TMP4]], i1 false, i32 120, i32 110)
+// CHECK-NEXT: ret <4 x half> [[TMP5]]
+//
+half4 test_builtin_amdgcn_image_sample_3d_v4f16_f32(half4 v4f16, int i32, float f32, int8 vec8i32, int4 vec4i32) {
+ return __builtin_amdgcn_image_sample_3d_v4f16_f32(100, f32, f32, f32, vec8i32, vec4i32, 0, 120, 110);
+}
+
+// CHECK-LABEL: define dso_local <4 x float> @test_builtin_amdgcn_image_sample_cube_v4f32_f32(
+// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], i32 noundef [[I32:%.*]], float noundef [[F32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x float>, align 16, addrspace(5)
+// CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5)
+// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5)
+// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
+// CHECK-NEXT: [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5)
+// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
+// CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr
+// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
+// CHECK-NEXT: [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr
+// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr
+// CHECK-NEXT: [[VEC4I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC4I32_ADDR]] to ptr
+// CHECK-NEXT: store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16
+// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: store <4 x i32> [[VEC4I32]], ptr [[VEC4I32_ADDR_ASCAST]], align 16
+// CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP1:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP2:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP3:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: [[TMP4:%.*]] = load <4 x i32>, ptr [[VEC4I32_ADDR_ASCAST]], align 16
+// CHECK-NEXT: [[TMP5:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.cube.v4f32.f32.v8i32.v4i32(i32 100, float [[TMP0]], float [[TMP1]], float [[TMP2]], <8 x i32> [[TMP3]], <4 x i32> [[TMP4]], i1 false, i32 120, i32 110)
+// CHECK-NEXT: ret <4 x float> [[TMP5]]
+//
+float4 test_builtin_amdgcn_image_sample_cube_v4f32_f32(float4 v4f32, int i32, float f32, int8 vec8i32, int4 vec4i32) {
+ return __builtin_amdgcn_image_sample_cube_v4f32_f32(100, f32, f32, f32, vec8i32, vec4i32, 0, 120, 110);
+}
+
+// CHECK-LABEL: define dso_local <4 x half> @test_builtin_amdgcn_image_sample_cube_v4f16_f32(
+// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], float noundef [[F32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x half>, align 8, addrspace(5)
+// CHECK-NEXT: [[V4F16_ADDR:%.*]] = alloca <4 x half>, align 8, addrspace(5)
+// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5)
+// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5)
+// CHECK-NEXT: [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5)
+// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
+// CHECK-NEXT: [[V4F16_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F16_ADDR]] to ptr
+// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr
+// CHECK-NEXT: [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr
+// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr
+// CHECK-NEXT: [[VEC4I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC4I32_ADDR]] to ptr
+// CHECK-NEXT: store <4 x half> [[V4F16]], ptr [[V4F16_ADDR_ASCAST]], align 8
+// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: store <4 x i32> [[VEC4I32]], ptr [[VEC4I32_ADDR_ASCAST]], align 16
+// CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP1:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP2:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP3:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32
+// CHECK-NEXT: [[TMP4:%.*]] = load <4 x i32>, ptr [[VEC4I32_ADDR_ASCAST]], align 16
+// CHECK-NEXT: [[TMP5:%.*]] = call <4 x half> @llvm.amdgcn.image.sample.cube.v4f16.f32.v8i32.v4i32(i32 100, float [[TMP0]], float [[TMP1]], float [[TMP2]], <8 x i32> [[TMP3]], <4 x i32> [[TMP4]], i1 false, i32 120, i32 110)
+// CHECK-NEXT: ret <4 x half> [[TMP5]]
+//
+half4 test_builtin_amdgcn_image_sample_cube_v4f16_f32(half4 v4f16, int i32, float f32, int8 vec8i32, int4 vec4i32) {
+ return __builtin_amdgcn_image_sample_cube_v4f16_f32(100, f32, f32, f32, vec8i32, vec4i32, 0, 120, 110);
+}
diff --git a/clang/test/SemaOpenCL/builtins-image-load-param.cl b/clang/test/SemaOpenCL/builtins-image-load-param.cl
index ebdc8106076f5..249bb9211ab75 100644
--- a/clang/test/SemaOpenCL/builtins-image-load-param.cl
+++ b/clang/test/SemaOpenCL/builtins-image-load-param.cl
@@ -4,6 +4,7 @@
// REQUIRES: amdgpu-registered-target
typedef int int8 __attribute__((ext_vector_type(8)));
+typedef int int4 __attribute__((ext_vector_type(4)));
typedef float float4 __attribute__((ext_vector_type(4)));
typedef half half4 __attribute__((ext_vector_type(4)));
@@ -20,6 +21,7 @@ half4 test_builtin_image_load_2d_2(half4 v4f16, int i32, int8 vec8i32) {
return __builtin_amdgcn_image_load_2d_v4f16_i32(100, i32, i32, vec8i32, 120, i32); //expected-error{{argument to '__builtin_amdgcn_image_load_2d_v4f16_i32' must be a constant integer}}
}
+
float test_builtin_image_load_2darray(float f32, int i32, int8 vec8i32) {
return __builtin_amdgcn_image_load_2darray_f32_i32(100, i32, i32, i32, vec8i32, i32, 110); //expected-error{{argument to '__builtin_amdgcn_image_load_2darray_f32_i32' must be a constant integer}}
@@ -143,3 +145,78 @@ half4 test_builtin_image_load_2d_gfx_2(half4 v4f16, int i32, int8 vec8i32) {
return __builtin_amdgcn_image_load_2d_v4f16_i32(100, i32, i32, vec8i32, 120, 110); //GFX94-error{{'__builtin_amdgcn_image_load_2d_v4f16_i32' needs target feature image-insts}}
}
+
+float test_builtin_image_sample_2d(float f32, int i32, int8 vec8i32, int4 vec4i32) {
+
+ return __builtin_amdgcn_image_sample_2d_f32_f32(i32, f32, f32, vec8i32, vec4i32, 0, 106, 103); //expected-error{{argument to '__builtin_amdgcn_image_sample_2d_f32_f32' must be a constant integer}}
+}
+float4 test_builtin_image_sample_2d_1(float4 v4f32, float f32, int i32, int8 vec8i32, int4 vec4i32) {
+
+ return __builtin_amdgcn_image_sample_2d_v4f32_f32(100, f32, f32, vec8i32, vec4i32, 0, i32, 110); //expected-error{{argument to '__builtin_amdgcn_image_sample_2d_v4f32_f32' must be a constant integer}}
+}
+half4 test_builtin_image_sample_2d_2(half4 v4f16, float f32, int i32, int8 vec8i32, int4 vec4i32) {
+
+ return __builtin_amdgcn_image_sample_2d_v4f16_f32(100, f32, f32, vec8i32, vec4i32, 0, 120, i32); //expected-error{{argument to '__builtin_amdgcn_image_sample_2d_v4f16_f32' must be a constant integer}}
+}
+
+float test_builtin_image_sample_2darray(float f32, int i32, int8 vec8i32, int4 vec4i32) {
+
+ return __builtin_amdgcn_image_sample_2darray_f32_f32(100, f32, f32, f32, vec8i32, vec4i32, 0, i32, 110); //expected-error{{argument to '__builtin_amdgcn_image_sample_2darray_f32_f32' must be a constant integer}}
+}
+float4 test_builtin_image_sample_2darray_1(float4 v4f32, float f32, int i32, int8 vec8i32, int4 vec4i32) {
+
+ return __builtin_amdgcn_image_sample_2darray_v4f32_f32(100, f32, f32, f32, vec8i32, vec4i32, 0, i32, 110); //expected-error{{argument to '__builtin_amdgcn_image_sample_2darray_v4f32_f32' must be a constant integer}}
+}
+half4 test_builtin_image_sample_2darray_2(half4 v4f16, float f32, int i32, int8 vec8i32, int4 vec4i32) {
+
+ return __builtin_amdgcn_image_sample_2darray_v4f16_f32(100, f32, f32, f32, vec8i32, vec4i32, 0, 120, i32); //expected-error{{argument to '__builtin_amdgcn_image_sample_2darray_v4f16_f32' must be a constant integer}}
+}
+
+float4 test_builtin_image_sample_1d_1(float4 v4f32, float f32, int i32, int8 vec8i32, int4 vec4i32) {
+
+ return __builtin_amdgcn_image_sample_1d_v4f32_f32(i32, f32, vec8i32, vec4i32, 0, 120, i32); //expected-error{{argument to '__builtin_amdgcn_image_sample_1d_v4f32_f32' must be a constant integer}}
+}
+half4 test_builtin_image_sample_1d_2(half4 v4f16, float f32, int i32, int8 vec8i32, int4 vec4i32) {
+
+ return __builtin_amdgcn_image_sample_1d_v4f16_f32(100, f32, vec8i32, vec4i32, 0, 120, i32); //expected-error{{argument to '__builtin_amdgcn_image_sample_1d_v4f16_f32' must be a constant integer}}
+}
+
+float4 test_builtin_image_sample_1darray_1(float4 v4f32, float f32, int i32, int8 vec8i32, int4 vec4i32) {
+
+ return __builtin_amdgcn_image_sample_1darray_v4f32_f32(100, f32, f32, vec8i32, vec4i32, 0, i32, 110); //expected-error{{argument to '__builtin_amdgcn_image_sample_1darray_v4f32_f32' must be a constant integer}}
+}
+half4 test_builtin_image_sample_1darray_2(half4 v4f16, float f32, int i32, int8 vec8i32, int4 vec4i32) {
+
+ return __builtin_amdgcn_image_sample_1darray_v4f16_f32(100, f32, f32, vec8i32, vec4i32, 0, i32, 110); //expected-error{{argument to '__builtin_amdgcn_image_sample_1darray_v4f16_f32' must be a constant integer}}
+}
+
+float4 test_builtin_image_sample_3d_1(float4 v4f32, float f32, int i32, int8 vec8i32, int4 vec4i32) {
+
+ return __builtin_amdgcn_image_sample_3d_v4f32_f32(100, f32, f32, f32, vec8i32, vec4i32, 0, 120, i32); //expected-error{{argument to '__builtin_amdgcn_image_sample_3d_v4f32_f32' must be a constant integer}}
+}
+half4 test_builtin_image_sample_3d_2(half4 v4f16, float f32, int i32, int8 vec8i32, int4 vec4i32) {
+
+ return __builtin_amdgcn_image_sample_3d_v4f16_f32(i32, f32, f32, f32, vec8i32, vec4i32, 0, 120, i32); //expected-error{{argument to '__builtin_amdgcn_image_sample_3d_v4f16_f32' must be a constant integer}}
+}
+
+float4 test_builtin_image_sample_cube_1(float4 v4f32, float f32, int i32, int8 vec8i32, int4 vec4i32) {
+
+ return __builtin_amdgcn_image_sample_cube_v4f32_f32(i32, f32, f32, f32, vec8i32, vec4i32, 0, 120, 110); //expected-error{{argument to '__builtin_amdgcn_image_sample_cube_v4f32_f32' must be a constant integer}}
+}
+half4 test_builtin_image_sample_cube_2(half4 v4f16, float f32, int i32, int8 vec8i32, int4 vec4i32) {
+
+ return __builtin_amdgcn_image_sample_cube_v4f16_f32(i32, f32, f32, f32, vec8i32, vec4i32, 0, 120, 110); //expected-error{{argument to '__builtin_amdgcn_image_sample_cube_v4f16_f32' must be a constant integer}}
+}
+
+float test_builtin_image_sample_2d_gfx(float f32, int i32, int8 vec8i32, int4 vec4i32) {
+
+ return __builtin_amdgcn_image_sample_2d_f32_f32(100, f32, f32, vec8i32, vec4i32, 0, 120, 110); //GFX94-error{{'__builtin_amdgcn_image_sample_2d_f32_f32' needs target feature image-insts}}
+}
+float4 test_builtin_image_sample_2d_gfx_1(float4 v4f32, float f32, int i32, int8 vec8i32, int4 vec4i32) {
+
+ return __builtin_amdgcn_image_sample_2d_v4f32_f32(100, f32, f32, vec8i32, vec4i32, 0, 120, 110); //GFX94-error{{'__builtin_amdgcn_image_sample_2d_v4f32_f32' needs target feature image-insts}}
+}
+half4 test_builtin_image_sample_2d_gfx_2(half4 v4f16, float f32, int i32, int8 vec8i32, int4 vec4i32) {
+
+ return __builtin_amdgcn_image_sample_2d_v4f16_f32(100, f32, f32, vec8i32, vec4i32, 0, 120, 110); //GFX94-error{{'__builtin_amdgcn_image_sample_2d_v4f16_f32' needs target feature image-insts}}
+}
\ No newline at end of file
More information about the cfe-commits
mailing list