[clang] [llvm] [AMDGPU] Fix crash in InstCombine (PR #179511)
Alexey Sachkov via cfe-commits
cfe-commits at lists.llvm.org
Thu Feb 5 05:23:53 PST 2026
https://github.com/AlexeySachkov updated https://github.com/llvm/llvm-project/pull/179511
>From 4539640c220c51f1ec4ce687dafa4029c9b7caf9 Mon Sep 17 00:00:00 2001
From: Alexey Sachkov <alexey.sachkov at amd.com>
Date: Tue, 3 Feb 2026 11:16:58 -0600
Subject: [PATCH 1/7] [AMDGPU] Fix crash in InstCombine
Added an out-of-bounds check to avoid crashes when simplifying
`@llvm.amdgcn.image.load.*` intrinsics that return vector types and the
mask argument "enables" more elements than there are in the return type.
---
.../AMDGPU/AMDGPUInstCombineIntrinsic.cpp | 2 +
.../fix-amdgcn-image-load-dmask-crash.ll | 123 ++++++++++++++++++
2 files changed, 125 insertions(+)
create mode 100644 llvm/test/CodeGen/AMDGPU/fix-amdgcn-image-load-dmask-crash.ll
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp
index 2cd1902785546..0756a3c257738 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp
@@ -1861,6 +1861,8 @@ static Value *simplifyAMDGCNMemoryIntrinsicDemanded(InstCombiner &IC,
for (unsigned SrcIdx = 0; SrcIdx < 4; ++SrcIdx) {
const unsigned Bit = 1 << SrcIdx;
if (!!(DMaskVal & Bit)) {
+ if (OrigLdStIdx >= DemandedElts.getBitWidth())
+ break;
if (!!DemandedElts[OrigLdStIdx])
NewDMaskVal |= Bit;
OrigLdStIdx++;
diff --git a/llvm/test/CodeGen/AMDGPU/fix-amdgcn-image-load-dmask-crash.ll b/llvm/test/CodeGen/AMDGPU/fix-amdgcn-image-load-dmask-crash.ll
new file mode 100644
index 0000000000000..571b9e4f123b7
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/fix-amdgcn-image-load-dmask-crash.ll
@@ -0,0 +1,123 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
+; RUN: opt -mtriple amdgcn -passes=instcombine %s -S -o - | FileCheck %s
+;
+; The main purpose of the test is to ensure that we do not crash when the mask
+; argument "enables" more elements than there are in return type.
+; This specific corner case was discovered by a fuzzer.
+
+define amdgpu_ps [4 x <2 x float>] @load_2dmsaa_v4v2f32_dmask(<8 x i32> inreg %rsrc, i32 %s, i32 %t) {
+; CHECK-LABEL: define amdgpu_ps [4 x <2 x float>] @load_2dmsaa_v4v2f32_dmask(
+; CHECK-SAME: <8 x i32> inreg [[RSRC:%.*]], i32 [[S:%.*]], i32 [[T:%.*]]) {
+; CHECK-NEXT: [[MAIN_BODY:.*:]]
+; CHECK-NEXT: [[I:%.*]] = call <2 x float> @llvm.amdgcn.image.load.2dmsaa.v2f32.i32.v8i32(i32 3, i32 [[S]], i32 [[T]], i32 0, <8 x i32> [[RSRC]], i32 0, i32 0)
+; CHECK-NEXT: [[I1:%.*]] = call <2 x float> @llvm.amdgcn.image.load.2dmsaa.v2f32.i32.v8i32(i32 3, i32 [[S]], i32 [[T]], i32 1, <8 x i32> [[RSRC]], i32 0, i32 0)
+; CHECK-NEXT: [[I2:%.*]] = call <2 x float> @llvm.amdgcn.image.load.2dmsaa.v2f32.i32.v8i32(i32 3, i32 [[S]], i32 [[T]], i32 2, <8 x i32> [[RSRC]], i32 0, i32 0)
+; CHECK-NEXT: [[I3:%.*]] = call <2 x float> @llvm.amdgcn.image.load.2dmsaa.v2f32.i32.v8i32(i32 3, i32 [[S]], i32 [[T]], i32 3, <8 x i32> [[RSRC]], i32 0, i32 0)
+; CHECK-NEXT: [[I4:%.*]] = insertvalue [4 x <2 x float>] undef, <2 x float> [[I]], 0
+; CHECK-NEXT: [[I5:%.*]] = insertvalue [4 x <2 x float>] [[I4]], <2 x float> [[I1]], 1
+; CHECK-NEXT: [[I6:%.*]] = insertvalue [4 x <2 x float>] [[I5]], <2 x float> [[I2]], 2
+; CHECK-NEXT: [[I7:%.*]] = insertvalue [4 x <2 x float>] [[I6]], <2 x float> [[I3]], 3
+; CHECK-NEXT: ret [4 x <2 x float>] [[I7]]
+;
+main_body:
+ %i = call <2 x float> @llvm.amdgcn.image.load.2dmsaa.v2f32.i32(i32 7, i32 %s, i32 %t, i32 0, <8 x i32> %rsrc, i32 0, i32 0)
+ %i1 = call <2 x float> @llvm.amdgcn.image.load.2dmsaa.v2f32.i32(i32 11, i32 %s, i32 %t, i32 1, <8 x i32> %rsrc, i32 0, i32 0)
+ %i2 = call <2 x float> @llvm.amdgcn.image.load.2dmsaa.v2f32.i32(i32 15, i32 %s, i32 %t, i32 2, <8 x i32> %rsrc, i32 0, i32 0)
+ %i3 = call <2 x float> @llvm.amdgcn.image.load.2dmsaa.v2f32.i32(i32 -1, i32 %s, i32 %t, i32 3, <8 x i32> %rsrc, i32 0, i32 0)
+ %i4 = insertvalue [4 x <2 x float>] undef, <2 x float> %i, 0
+ %i5 = insertvalue [4 x <2 x float>] %i4, <2 x float> %i1, 1
+ %i6 = insertvalue [4 x <2 x float>] %i5, <2 x float> %i2, 2
+ %i7 = insertvalue [4 x <2 x float>] %i6, <2 x float> %i3, 3
+ ret [4 x <2 x float>] %i7
+}
+
+define amdgpu_ps [4 x <3 x float>] @load_2dmsaa_v4v3f32_dmask(<8 x i32> inreg %rsrc, i32 %s, i32 %t) {
+; CHECK-LABEL: define amdgpu_ps [4 x <3 x float>] @load_2dmsaa_v4v3f32_dmask(
+; CHECK-SAME: <8 x i32> inreg [[RSRC:%.*]], i32 [[S:%.*]], i32 [[T:%.*]]) {
+; CHECK-NEXT: [[MAIN_BODY:.*:]]
+; CHECK-NEXT: [[I:%.*]] = call <3 x float> @llvm.amdgcn.image.load.2dmsaa.v3f32.i32.v8i32(i32 11, i32 [[S]], i32 [[T]], i32 0, <8 x i32> [[RSRC]], i32 0, i32 0)
+; CHECK-NEXT: [[I1:%.*]] = call <3 x float> @llvm.amdgcn.image.load.2dmsaa.v3f32.i32.v8i32(i32 7, i32 [[S]], i32 [[T]], i32 1, <8 x i32> [[RSRC]], i32 0, i32 0)
+; CHECK-NEXT: [[I2:%.*]] = call <3 x float> @llvm.amdgcn.image.load.2dmsaa.v3f32.i32.v8i32(i32 7, i32 [[S]], i32 [[T]], i32 2, <8 x i32> [[RSRC]], i32 0, i32 0)
+; CHECK-NEXT: [[I3:%.*]] = call <2 x float> @llvm.amdgcn.image.load.2dmsaa.v2f32.i32.v8i32(i32 -11, i32 [[S]], i32 [[T]], i32 3, <8 x i32> [[RSRC]], i32 0, i32 0)
+; CHECK-NEXT: [[TMP0:%.*]] = shufflevector <2 x float> [[I3]], <2 x float> poison, <3 x i32> <i32 0, i32 1, i32 poison>
+; CHECK-NEXT: [[I4:%.*]] = insertvalue [4 x <3 x float>] undef, <3 x float> [[I]], 0
+; CHECK-NEXT: [[I5:%.*]] = insertvalue [4 x <3 x float>] [[I4]], <3 x float> [[I1]], 1
+; CHECK-NEXT: [[I6:%.*]] = insertvalue [4 x <3 x float>] [[I5]], <3 x float> [[I2]], 2
+; CHECK-NEXT: [[I7:%.*]] = insertvalue [4 x <3 x float>] [[I6]], <3 x float> [[TMP0]], 3
+; CHECK-NEXT: ret [4 x <3 x float>] [[I7]]
+;
+main_body:
+ %i = call <3 x float> @llvm.amdgcn.image.load.2dmsaa.v3f32.i32(i32 11, i32 %s, i32 %t, i32 0, <8 x i32> %rsrc, i32 0, i32 0)
+ %i1 = call <3 x float> @llvm.amdgcn.image.load.2dmsaa.v3f32.i32(i32 15, i32 %s, i32 %t, i32 1, <8 x i32> %rsrc, i32 0, i32 0)
+ %i2 = call <3 x float> @llvm.amdgcn.image.load.2dmsaa.v3f32.i32(i32 31, i32 %s, i32 %t, i32 2, <8 x i32> %rsrc, i32 0, i32 0)
+ %i3 = call <3 x float> @llvm.amdgcn.image.load.2dmsaa.v3f32.i32(i32 -11, i32 %s, i32 %t, i32 3, <8 x i32> %rsrc, i32 0, i32 0)
+ %i4 = insertvalue [4 x <3 x float>] undef, <3 x float> %i, 0
+ %i5 = insertvalue [4 x <3 x float>] %i4, <3 x float> %i1, 1
+ %i6 = insertvalue [4 x <3 x float>] %i5, <3 x float> %i2, 2
+ %i7 = insertvalue [4 x <3 x float>] %i6, <3 x float> %i3, 3
+ ret [4 x <3 x float>] %i7
+}
+
+define amdgpu_ps [4 x <2 x float>] @load_2darraymsaa_v4v2f32_dmask(<8 x i32> inreg %rsrc, i32 %s, i32 %t, i32 %slice) {
+; CHECK-LABEL: define amdgpu_ps [4 x <2 x float>] @load_2darraymsaa_v4v2f32_dmask(
+; CHECK-SAME: <8 x i32> inreg [[RSRC:%.*]], i32 [[S:%.*]], i32 [[T:%.*]], i32 [[SLICE:%.*]]) {
+; CHECK-NEXT: [[MAIN_BODY:.*:]]
+; CHECK-NEXT: [[I:%.*]] = call <2 x float> @llvm.amdgcn.image.load.2darraymsaa.v2f32.i32.v8i32(i32 3, i32 [[S]], i32 [[T]], i32 [[SLICE]], i32 0, <8 x i32> [[RSRC]], i32 0, i32 0)
+; CHECK-NEXT: [[I1:%.*]] = call <2 x float> @llvm.amdgcn.image.load.2darraymsaa.v2f32.i32.v8i32(i32 3, i32 [[S]], i32 [[T]], i32 [[SLICE]], i32 1, <8 x i32> [[RSRC]], i32 0, i32 0)
+; CHECK-NEXT: [[I2:%.*]] = call <2 x float> @llvm.amdgcn.image.load.2darraymsaa.v2f32.i32.v8i32(i32 3, i32 [[S]], i32 [[T]], i32 [[SLICE]], i32 2, <8 x i32> [[RSRC]], i32 0, i32 0)
+; CHECK-NEXT: [[I3:%.*]] = call <2 x float> @llvm.amdgcn.image.load.2darraymsaa.v2f32.i32.v8i32(i32 3, i32 [[S]], i32 [[T]], i32 [[SLICE]], i32 3, <8 x i32> [[RSRC]], i32 0, i32 0)
+; CHECK-NEXT: [[I4:%.*]] = insertvalue [4 x <2 x float>] undef, <2 x float> [[I]], 0
+; CHECK-NEXT: [[I5:%.*]] = insertvalue [4 x <2 x float>] [[I4]], <2 x float> [[I1]], 1
+; CHECK-NEXT: [[I6:%.*]] = insertvalue [4 x <2 x float>] [[I5]], <2 x float> [[I2]], 2
+; CHECK-NEXT: [[I7:%.*]] = insertvalue [4 x <2 x float>] [[I6]], <2 x float> [[I3]], 3
+; CHECK-NEXT: ret [4 x <2 x float>] [[I7]]
+;
+main_body:
+ %i = call <2 x float> @llvm.amdgcn.image.load.2darraymsaa.v2f32.i32(i32 7, i32 %s, i32 %t, i32 %slice, i32 0, <8 x i32> %rsrc, i32 0, i32 0)
+ %i1 = call <2 x float> @llvm.amdgcn.image.load.2darraymsaa.v2f32.i32(i32 11, i32 %s, i32 %t, i32 %slice, i32 1, <8 x i32> %rsrc, i32 0, i32 0)
+ %i2 = call <2 x float> @llvm.amdgcn.image.load.2darraymsaa.v2f32.i32(i32 15, i32 %s, i32 %t, i32 %slice, i32 2, <8 x i32> %rsrc, i32 0, i32 0)
+ %i3 = call <2 x float> @llvm.amdgcn.image.load.2darraymsaa.v2f32.i32(i32 -1, i32 %s, i32 %t, i32 %slice, i32 3, <8 x i32> %rsrc, i32 0, i32 0)
+ %i4 = insertvalue [4 x <2 x float>] undef, <2 x float> %i, 0
+ %i5 = insertvalue [4 x <2 x float>] %i4, <2 x float> %i1, 1
+ %i6 = insertvalue [4 x <2 x float>] %i5, <2 x float> %i2, 2
+ %i7 = insertvalue [4 x <2 x float>] %i6, <2 x float> %i3, 3
+ ret [4 x <2 x float>] %i7
+}
+
+define amdgpu_ps [4 x <3 x float>] @load_2darraymsaa_v4v3f32_dmask(<8 x i32> inreg %rsrc, i32 %s, i32 %t, i32 %slice) {
+; CHECK-LABEL: define amdgpu_ps [4 x <3 x float>] @load_2darraymsaa_v4v3f32_dmask(
+; CHECK-SAME: <8 x i32> inreg [[RSRC:%.*]], i32 [[S:%.*]], i32 [[T:%.*]], i32 [[SLICE:%.*]]) {
+; CHECK-NEXT: [[MAIN_BODY:.*:]]
+; CHECK-NEXT: [[I:%.*]] = call <3 x float> @llvm.amdgcn.image.load.2darraymsaa.v3f32.i32.v8i32(i32 11, i32 [[S]], i32 [[T]], i32 [[SLICE]], i32 0, <8 x i32> [[RSRC]], i32 0, i32 0)
+; CHECK-NEXT: [[I1:%.*]] = call <3 x float> @llvm.amdgcn.image.load.2darraymsaa.v3f32.i32.v8i32(i32 7, i32 [[S]], i32 [[T]], i32 [[SLICE]], i32 1, <8 x i32> [[RSRC]], i32 0, i32 0)
+; CHECK-NEXT: [[I2:%.*]] = call <3 x float> @llvm.amdgcn.image.load.2darraymsaa.v3f32.i32.v8i32(i32 7, i32 [[S]], i32 [[T]], i32 [[SLICE]], i32 2, <8 x i32> [[RSRC]], i32 0, i32 0)
+; CHECK-NEXT: [[I3:%.*]] = call <3 x float> @llvm.amdgcn.image.load.2darraymsaa.v3f32.i32.v8i32(i32 7, i32 [[S]], i32 [[T]], i32 [[SLICE]], i32 3, <8 x i32> [[RSRC]], i32 0, i32 0)
+; CHECK-NEXT: [[I4:%.*]] = insertvalue [4 x <3 x float>] undef, <3 x float> [[I]], 0
+; CHECK-NEXT: [[I5:%.*]] = insertvalue [4 x <3 x float>] [[I4]], <3 x float> [[I1]], 1
+; CHECK-NEXT: [[I6:%.*]] = insertvalue [4 x <3 x float>] [[I5]], <3 x float> [[I2]], 2
+; CHECK-NEXT: [[I7:%.*]] = insertvalue [4 x <3 x float>] [[I6]], <3 x float> [[I3]], 3
+; CHECK-NEXT: ret [4 x <3 x float>] [[I7]]
+;
+main_body:
+ %i = call <3 x float> @llvm.amdgcn.image.load.2darraymsaa.v3f32.i32(i32 11, i32 %s, i32 %t, i32 %slice, i32 0, <8 x i32> %rsrc, i32 0, i32 0)
+ %i1 = call <3 x float> @llvm.amdgcn.image.load.2darraymsaa.v3f32.i32(i32 15, i32 %s, i32 %t, i32 %slice, i32 1, <8 x i32> %rsrc, i32 0, i32 0)
+ %i2 = call <3 x float> @llvm.amdgcn.image.load.2darraymsaa.v3f32.i32(i32 31, i32 %s, i32 %t, i32 %slice, i32 2, <8 x i32> %rsrc, i32 0, i32 0)
+ %i3 = call <3 x float> @llvm.amdgcn.image.load.2darraymsaa.v3f32.i32(i32 -1, i32 %s, i32 %t, i32 %slice, i32 3, <8 x i32> %rsrc, i32 0, i32 0)
+ %i4 = insertvalue [4 x <3 x float>] undef, <3 x float> %i, 0
+ %i5 = insertvalue [4 x <3 x float>] %i4, <3 x float> %i1, 1
+ %i6 = insertvalue [4 x <3 x float>] %i5, <3 x float> %i2, 2
+ %i7 = insertvalue [4 x <3 x float>] %i6, <3 x float> %i3, 3
+ ret [4 x <3 x float>] %i7
+}
+
+; Function Attrs: nocallback nofree nosync nounwind willreturn memory(read)
+declare <2 x float> @llvm.amdgcn.image.load.2dmsaa.v2f32.i32(i32 immarg, i32, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0
+
+; Function Attrs: nocallback nofree nosync nounwind willreturn memory(read)
+declare <3 x float> @llvm.amdgcn.image.load.2dmsaa.v3f32.i32(i32 immarg, i32, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0
+
+; Function Attrs: nocallback nofree nosync nounwind willreturn memory(read)
+declare <2 x float> @llvm.amdgcn.image.load.2darraymsaa.v2f32.i32(i32 immarg, i32, i32, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0
+
+; Function Attrs: nocallback nofree nosync nounwind willreturn memory(read)
+declare <3 x float> @llvm.amdgcn.image.load.2darraymsaa.v3f32.i32(i32 immarg, i32, i32, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0
>From 93e5ccdadd7133acb94772d2ba1ca9afa5c19208 Mon Sep 17 00:00:00 2001
From: Alexey Sachkov <alexey.sachkov at amd.com>
Date: Wed, 4 Feb 2026 05:16:42 -0600
Subject: [PATCH 2/7] Apply comments
---
.../fix-amdgcn-image-load-dmask-crash.ll | 20 ++++++++-----------
1 file changed, 8 insertions(+), 12 deletions(-)
rename llvm/test/{CodeGen => Transforms/InstCombine}/AMDGPU/fix-amdgcn-image-load-dmask-crash.ll (91%)
diff --git a/llvm/test/CodeGen/AMDGPU/fix-amdgcn-image-load-dmask-crash.ll b/llvm/test/Transforms/InstCombine/AMDGPU/fix-amdgcn-image-load-dmask-crash.ll
similarity index 91%
rename from llvm/test/CodeGen/AMDGPU/fix-amdgcn-image-load-dmask-crash.ll
rename to llvm/test/Transforms/InstCombine/AMDGPU/fix-amdgcn-image-load-dmask-crash.ll
index 571b9e4f123b7..c81f1fe7cde9a 100644
--- a/llvm/test/CodeGen/AMDGPU/fix-amdgcn-image-load-dmask-crash.ll
+++ b/llvm/test/Transforms/InstCombine/AMDGPU/fix-amdgcn-image-load-dmask-crash.ll
@@ -13,7 +13,7 @@ define amdgpu_ps [4 x <2 x float>] @load_2dmsaa_v4v2f32_dmask(<8 x i32> inreg %r
; CHECK-NEXT: [[I1:%.*]] = call <2 x float> @llvm.amdgcn.image.load.2dmsaa.v2f32.i32.v8i32(i32 3, i32 [[S]], i32 [[T]], i32 1, <8 x i32> [[RSRC]], i32 0, i32 0)
; CHECK-NEXT: [[I2:%.*]] = call <2 x float> @llvm.amdgcn.image.load.2dmsaa.v2f32.i32.v8i32(i32 3, i32 [[S]], i32 [[T]], i32 2, <8 x i32> [[RSRC]], i32 0, i32 0)
; CHECK-NEXT: [[I3:%.*]] = call <2 x float> @llvm.amdgcn.image.load.2dmsaa.v2f32.i32.v8i32(i32 3, i32 [[S]], i32 [[T]], i32 3, <8 x i32> [[RSRC]], i32 0, i32 0)
-; CHECK-NEXT: [[I4:%.*]] = insertvalue [4 x <2 x float>] undef, <2 x float> [[I]], 0
+; CHECK-NEXT: [[I4:%.*]] = insertvalue [4 x <2 x float>] poison, <2 x float> [[I]], 0
; CHECK-NEXT: [[I5:%.*]] = insertvalue [4 x <2 x float>] [[I4]], <2 x float> [[I1]], 1
; CHECK-NEXT: [[I6:%.*]] = insertvalue [4 x <2 x float>] [[I5]], <2 x float> [[I2]], 2
; CHECK-NEXT: [[I7:%.*]] = insertvalue [4 x <2 x float>] [[I6]], <2 x float> [[I3]], 3
@@ -24,7 +24,7 @@ main_body:
%i1 = call <2 x float> @llvm.amdgcn.image.load.2dmsaa.v2f32.i32(i32 11, i32 %s, i32 %t, i32 1, <8 x i32> %rsrc, i32 0, i32 0)
%i2 = call <2 x float> @llvm.amdgcn.image.load.2dmsaa.v2f32.i32(i32 15, i32 %s, i32 %t, i32 2, <8 x i32> %rsrc, i32 0, i32 0)
%i3 = call <2 x float> @llvm.amdgcn.image.load.2dmsaa.v2f32.i32(i32 -1, i32 %s, i32 %t, i32 3, <8 x i32> %rsrc, i32 0, i32 0)
- %i4 = insertvalue [4 x <2 x float>] undef, <2 x float> %i, 0
+ %i4 = insertvalue [4 x <2 x float>] poison, <2 x float> %i, 0
%i5 = insertvalue [4 x <2 x float>] %i4, <2 x float> %i1, 1
%i6 = insertvalue [4 x <2 x float>] %i5, <2 x float> %i2, 2
%i7 = insertvalue [4 x <2 x float>] %i6, <2 x float> %i3, 3
@@ -40,7 +40,7 @@ define amdgpu_ps [4 x <3 x float>] @load_2dmsaa_v4v3f32_dmask(<8 x i32> inreg %r
; CHECK-NEXT: [[I2:%.*]] = call <3 x float> @llvm.amdgcn.image.load.2dmsaa.v3f32.i32.v8i32(i32 7, i32 [[S]], i32 [[T]], i32 2, <8 x i32> [[RSRC]], i32 0, i32 0)
; CHECK-NEXT: [[I3:%.*]] = call <2 x float> @llvm.amdgcn.image.load.2dmsaa.v2f32.i32.v8i32(i32 -11, i32 [[S]], i32 [[T]], i32 3, <8 x i32> [[RSRC]], i32 0, i32 0)
; CHECK-NEXT: [[TMP0:%.*]] = shufflevector <2 x float> [[I3]], <2 x float> poison, <3 x i32> <i32 0, i32 1, i32 poison>
-; CHECK-NEXT: [[I4:%.*]] = insertvalue [4 x <3 x float>] undef, <3 x float> [[I]], 0
+; CHECK-NEXT: [[I4:%.*]] = insertvalue [4 x <3 x float>] poison, <3 x float> [[I]], 0
; CHECK-NEXT: [[I5:%.*]] = insertvalue [4 x <3 x float>] [[I4]], <3 x float> [[I1]], 1
; CHECK-NEXT: [[I6:%.*]] = insertvalue [4 x <3 x float>] [[I5]], <3 x float> [[I2]], 2
; CHECK-NEXT: [[I7:%.*]] = insertvalue [4 x <3 x float>] [[I6]], <3 x float> [[TMP0]], 3
@@ -51,7 +51,7 @@ main_body:
%i1 = call <3 x float> @llvm.amdgcn.image.load.2dmsaa.v3f32.i32(i32 15, i32 %s, i32 %t, i32 1, <8 x i32> %rsrc, i32 0, i32 0)
%i2 = call <3 x float> @llvm.amdgcn.image.load.2dmsaa.v3f32.i32(i32 31, i32 %s, i32 %t, i32 2, <8 x i32> %rsrc, i32 0, i32 0)
%i3 = call <3 x float> @llvm.amdgcn.image.load.2dmsaa.v3f32.i32(i32 -11, i32 %s, i32 %t, i32 3, <8 x i32> %rsrc, i32 0, i32 0)
- %i4 = insertvalue [4 x <3 x float>] undef, <3 x float> %i, 0
+ %i4 = insertvalue [4 x <3 x float>] poison, <3 x float> %i, 0
%i5 = insertvalue [4 x <3 x float>] %i4, <3 x float> %i1, 1
%i6 = insertvalue [4 x <3 x float>] %i5, <3 x float> %i2, 2
%i7 = insertvalue [4 x <3 x float>] %i6, <3 x float> %i3, 3
@@ -66,7 +66,7 @@ define amdgpu_ps [4 x <2 x float>] @load_2darraymsaa_v4v2f32_dmask(<8 x i32> inr
; CHECK-NEXT: [[I1:%.*]] = call <2 x float> @llvm.amdgcn.image.load.2darraymsaa.v2f32.i32.v8i32(i32 3, i32 [[S]], i32 [[T]], i32 [[SLICE]], i32 1, <8 x i32> [[RSRC]], i32 0, i32 0)
; CHECK-NEXT: [[I2:%.*]] = call <2 x float> @llvm.amdgcn.image.load.2darraymsaa.v2f32.i32.v8i32(i32 3, i32 [[S]], i32 [[T]], i32 [[SLICE]], i32 2, <8 x i32> [[RSRC]], i32 0, i32 0)
; CHECK-NEXT: [[I3:%.*]] = call <2 x float> @llvm.amdgcn.image.load.2darraymsaa.v2f32.i32.v8i32(i32 3, i32 [[S]], i32 [[T]], i32 [[SLICE]], i32 3, <8 x i32> [[RSRC]], i32 0, i32 0)
-; CHECK-NEXT: [[I4:%.*]] = insertvalue [4 x <2 x float>] undef, <2 x float> [[I]], 0
+; CHECK-NEXT: [[I4:%.*]] = insertvalue [4 x <2 x float>] poison, <2 x float> [[I]], 0
; CHECK-NEXT: [[I5:%.*]] = insertvalue [4 x <2 x float>] [[I4]], <2 x float> [[I1]], 1
; CHECK-NEXT: [[I6:%.*]] = insertvalue [4 x <2 x float>] [[I5]], <2 x float> [[I2]], 2
; CHECK-NEXT: [[I7:%.*]] = insertvalue [4 x <2 x float>] [[I6]], <2 x float> [[I3]], 3
@@ -77,7 +77,7 @@ main_body:
%i1 = call <2 x float> @llvm.amdgcn.image.load.2darraymsaa.v2f32.i32(i32 11, i32 %s, i32 %t, i32 %slice, i32 1, <8 x i32> %rsrc, i32 0, i32 0)
%i2 = call <2 x float> @llvm.amdgcn.image.load.2darraymsaa.v2f32.i32(i32 15, i32 %s, i32 %t, i32 %slice, i32 2, <8 x i32> %rsrc, i32 0, i32 0)
%i3 = call <2 x float> @llvm.amdgcn.image.load.2darraymsaa.v2f32.i32(i32 -1, i32 %s, i32 %t, i32 %slice, i32 3, <8 x i32> %rsrc, i32 0, i32 0)
- %i4 = insertvalue [4 x <2 x float>] undef, <2 x float> %i, 0
+ %i4 = insertvalue [4 x <2 x float>] poison, <2 x float> %i, 0
%i5 = insertvalue [4 x <2 x float>] %i4, <2 x float> %i1, 1
%i6 = insertvalue [4 x <2 x float>] %i5, <2 x float> %i2, 2
%i7 = insertvalue [4 x <2 x float>] %i6, <2 x float> %i3, 3
@@ -92,7 +92,7 @@ define amdgpu_ps [4 x <3 x float>] @load_2darraymsaa_v4v3f32_dmask(<8 x i32> inr
; CHECK-NEXT: [[I1:%.*]] = call <3 x float> @llvm.amdgcn.image.load.2darraymsaa.v3f32.i32.v8i32(i32 7, i32 [[S]], i32 [[T]], i32 [[SLICE]], i32 1, <8 x i32> [[RSRC]], i32 0, i32 0)
; CHECK-NEXT: [[I2:%.*]] = call <3 x float> @llvm.amdgcn.image.load.2darraymsaa.v3f32.i32.v8i32(i32 7, i32 [[S]], i32 [[T]], i32 [[SLICE]], i32 2, <8 x i32> [[RSRC]], i32 0, i32 0)
; CHECK-NEXT: [[I3:%.*]] = call <3 x float> @llvm.amdgcn.image.load.2darraymsaa.v3f32.i32.v8i32(i32 7, i32 [[S]], i32 [[T]], i32 [[SLICE]], i32 3, <8 x i32> [[RSRC]], i32 0, i32 0)
-; CHECK-NEXT: [[I4:%.*]] = insertvalue [4 x <3 x float>] undef, <3 x float> [[I]], 0
+; CHECK-NEXT: [[I4:%.*]] = insertvalue [4 x <3 x float>] poison, <3 x float> [[I]], 0
; CHECK-NEXT: [[I5:%.*]] = insertvalue [4 x <3 x float>] [[I4]], <3 x float> [[I1]], 1
; CHECK-NEXT: [[I6:%.*]] = insertvalue [4 x <3 x float>] [[I5]], <3 x float> [[I2]], 2
; CHECK-NEXT: [[I7:%.*]] = insertvalue [4 x <3 x float>] [[I6]], <3 x float> [[I3]], 3
@@ -103,21 +103,17 @@ main_body:
%i1 = call <3 x float> @llvm.amdgcn.image.load.2darraymsaa.v3f32.i32(i32 15, i32 %s, i32 %t, i32 %slice, i32 1, <8 x i32> %rsrc, i32 0, i32 0)
%i2 = call <3 x float> @llvm.amdgcn.image.load.2darraymsaa.v3f32.i32(i32 31, i32 %s, i32 %t, i32 %slice, i32 2, <8 x i32> %rsrc, i32 0, i32 0)
%i3 = call <3 x float> @llvm.amdgcn.image.load.2darraymsaa.v3f32.i32(i32 -1, i32 %s, i32 %t, i32 %slice, i32 3, <8 x i32> %rsrc, i32 0, i32 0)
- %i4 = insertvalue [4 x <3 x float>] undef, <3 x float> %i, 0
+ %i4 = insertvalue [4 x <3 x float>] poison, <3 x float> %i, 0
%i5 = insertvalue [4 x <3 x float>] %i4, <3 x float> %i1, 1
%i6 = insertvalue [4 x <3 x float>] %i5, <3 x float> %i2, 2
%i7 = insertvalue [4 x <3 x float>] %i6, <3 x float> %i3, 3
ret [4 x <3 x float>] %i7
}
-; Function Attrs: nocallback nofree nosync nounwind willreturn memory(read)
declare <2 x float> @llvm.amdgcn.image.load.2dmsaa.v2f32.i32(i32 immarg, i32, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0
-; Function Attrs: nocallback nofree nosync nounwind willreturn memory(read)
declare <3 x float> @llvm.amdgcn.image.load.2dmsaa.v3f32.i32(i32 immarg, i32, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0
-; Function Attrs: nocallback nofree nosync nounwind willreturn memory(read)
declare <2 x float> @llvm.amdgcn.image.load.2darraymsaa.v2f32.i32(i32 immarg, i32, i32, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0
-; Function Attrs: nocallback nofree nosync nounwind willreturn memory(read)
declare <3 x float> @llvm.amdgcn.image.load.2darraymsaa.v3f32.i32(i32 immarg, i32, i32, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0
>From 6a3f4d59505e551b746ed1624ab9133f010d9c2d Mon Sep 17 00:00:00 2001
From: Alexey Sachkov <alexey.sachkov at amd.com>
Date: Thu, 5 Feb 2026 03:05:14 -0600
Subject: [PATCH 3/7] Catch the corner case by verifier
---
llvm/lib/IR/Verifier.cpp | 33 ++
.../fix-amdgcn-image-load-dmask-crash.ll | 4 +-
.../intrinsic-amdgcn-image-load-dmask.ll | 307 ++++++++++++++++++
3 files changed, 343 insertions(+), 1 deletion(-)
create mode 100644 llvm/test/Verifier/AMDGPU/intrinsic-amdgcn-image-load-dmask.ll
diff --git a/llvm/lib/IR/Verifier.cpp b/llvm/lib/IR/Verifier.cpp
index 3d44d1317ecc7..3e62d9525dacc 100644
--- a/llvm/lib/IR/Verifier.cpp
+++ b/llvm/lib/IR/Verifier.cpp
@@ -7147,6 +7147,39 @@ void Verifier::visitIntrinsicCall(Intrinsic::ID ID, CallBase &Call) {
&Call, Op);
break;
}
+ case Intrinsic::amdgcn_image_load_1d:
+ case Intrinsic::amdgcn_image_load_1darray:
+ case Intrinsic::amdgcn_image_load_2d:
+ case Intrinsic::amdgcn_image_load_2darray:
+ case Intrinsic::amdgcn_image_load_2darraymsaa:
+ case Intrinsic::amdgcn_image_load_2dmsaa:
+ case Intrinsic::amdgcn_image_load_3d:
+ case Intrinsic::amdgcn_image_load_cube:
+ case Intrinsic::amdgcn_image_load_mip_1d:
+ case Intrinsic::amdgcn_image_load_mip_1darray:
+ case Intrinsic::amdgcn_image_load_mip_2d:
+ case Intrinsic::amdgcn_image_load_mip_2darray:
+ case Intrinsic::amdgcn_image_load_mip_3d:
+ case Intrinsic::amdgcn_image_load_mip_cube: {
+ // LLVM IR definition of those intrinsics says that they can return any
+ // type. The logic below is based on what is covered by the
+ // llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.dim.ll test.
+ Type *T = Call.getType();
+ if (auto *ST = dyn_cast<StructType>(Call.getType()))
+ T = ST->getElementType(0);
+
+ unsigned VWidth = 1;
+ if (auto *FVT = dyn_cast<FixedVectorType>(T))
+ VWidth = FVT->getNumElements();
+
+ Value *V = Call.getArgOperand(0);
+ unsigned DMask = cast<ConstantInt>(V)->getZExtValue();
+ unsigned NumActiveBits = popcount(DMask);
+ Check(NumActiveBits <= VWidth,
+ "llvm.amdgcn.image.load.* intrinsic mask cannot have more active "
+ "bits than there are elements in the return type");
+ break;
+ }
case Intrinsic::nvvm_setmaxnreg_inc_sync_aligned_u32:
case Intrinsic::nvvm_setmaxnreg_dec_sync_aligned_u32: {
Value *V = Call.getArgOperand(0);
diff --git a/llvm/test/Transforms/InstCombine/AMDGPU/fix-amdgcn-image-load-dmask-crash.ll b/llvm/test/Transforms/InstCombine/AMDGPU/fix-amdgcn-image-load-dmask-crash.ll
index c81f1fe7cde9a..7f16d69623eed 100644
--- a/llvm/test/Transforms/InstCombine/AMDGPU/fix-amdgcn-image-load-dmask-crash.ll
+++ b/llvm/test/Transforms/InstCombine/AMDGPU/fix-amdgcn-image-load-dmask-crash.ll
@@ -1,5 +1,7 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
-; RUN: opt -mtriple amdgcn -passes=instcombine %s -S -o - | FileCheck %s
+; RUN: opt -mtriple amdgcn -passes=instcombine --disable-verify %s -S -o - | FileCheck %s
+;
+; Verifier is disabled on purpose, because the IR is deemed invalid.
;
; The main purpose of the test is to ensure that we do not crash when the mask
; argument "enables" more elements than there are in return type.
diff --git a/llvm/test/Verifier/AMDGPU/intrinsic-amdgcn-image-load-dmask.ll b/llvm/test/Verifier/AMDGPU/intrinsic-amdgcn-image-load-dmask.ll
new file mode 100644
index 0000000000000..1cb6fe420f320
--- /dev/null
+++ b/llvm/test/Verifier/AMDGPU/intrinsic-amdgcn-image-load-dmask.ll
@@ -0,0 +1,307 @@
+; RUN: not llvm-as %s -disable-output 2>&1 | FileCheck %s
+
+define amdgpu_ps void @load_1d(<8 x i32> inreg %rsrc, i32 %s) {
+main_body:
+ %v = call <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i32(i32 31, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
+; CHECK: llvm.amdgcn.image.load.* intrinsic mask cannot have more active bits than there are elements in the return type
+ ret void
+}
+
+define amdgpu_ps void @load_1d_tfe(<8 x i32> inreg %rsrc, ptr addrspace(1) inreg %out, i32 %s) {
+main_body:
+ %v = call {<4 x float>,i32} @llvm.amdgcn.image.load.1d.v4f32i32.i32(i32 31, i32 %s, <8 x i32> %rsrc, i32 1, i32 0)
+; CHECK-NEXT: llvm.amdgcn.image.load.* intrinsic mask cannot have more active bits than there are elements in the return type
+ ret void
+}
+
+define amdgpu_ps void @load_1d_lwe(<8 x i32> inreg %rsrc, ptr addrspace(1) inreg %out, i32 %s) {
+main_body:
+ %v = call {<4 x float>, i32} @llvm.amdgcn.image.load.1d.v4f32i32.i32(i32 -1, i32 %s, <8 x i32> %rsrc, i32 2, i32 0)
+; CHECK-NEXT: llvm.amdgcn.image.load.* intrinsic mask cannot have more active bits than there are elements in the return type
+ ret void
+}
+
+define amdgpu_ps void @load_2d(<8 x i32> inreg %rsrc, i32 %s, i32 %t) {
+main_body:
+ %v = call <4 x float> @llvm.amdgcn.image.load.2d.v4f32.i32(i32 -1, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0)
+; CHECK-NEXT: llvm.amdgcn.image.load.* intrinsic mask cannot have more active bits than there are elements in the return type
+ ret void
+}
+
+define amdgpu_ps void @load_2d_tfe(<8 x i32> inreg %rsrc, ptr addrspace(1) inreg %out, i32 %s, i32 %t) {
+main_body:
+ %v = call {<4 x float>,i32} @llvm.amdgcn.image.load.2d.v4f32i32.i32(i32 31, i32 %s, i32 %t, <8 x i32> %rsrc, i32 1, i32 0)
+; CHECK-NEXT: llvm.amdgcn.image.load.* intrinsic mask cannot have more active bits than there are elements in the return type
+ ret void
+}
+
+define amdgpu_ps void @load_3d(<8 x i32> inreg %rsrc, i32 %s, i32 %t, i32 %r) {
+main_body:
+ %v = call <4 x float> @llvm.amdgcn.image.load.3d.v4f32.i32(i32 31, i32 %s, i32 %t, i32 %r, <8 x i32> %rsrc, i32 0, i32 0)
+; CHECK-NEXT: llvm.amdgcn.image.load.* intrinsic mask cannot have more active bits than there are elements in the return type
+ ret void
+}
+
+define amdgpu_ps void @load_3d_tfe_lwe(<8 x i32> inreg %rsrc, ptr addrspace(1) inreg %out, i32 %s, i32 %t, i32 %r) {
+main_body:
+ %v = call {<4 x float>,i32} @llvm.amdgcn.image.load.3d.v4f32i32.i32(i32 31, i32 %s, i32 %t, i32 %r, <8 x i32> %rsrc, i32 3, i32 0)
+; CHECK-NEXT: llvm.amdgcn.image.load.* intrinsic mask cannot have more active bits than there are elements in the return type
+ ret void
+}
+
+define amdgpu_ps void @load_cube(<8 x i32> inreg %rsrc, i32 %s, i32 %t, i32 %slice) {
+main_body:
+ %v = call <4 x float> @llvm.amdgcn.image.load.cube.v4f32.i32(i32 31, i32 %s, i32 %t, i32 %slice, <8 x i32> %rsrc, i32 0, i32 0)
+; CHECK-NEXT: llvm.amdgcn.image.load.* intrinsic mask cannot have more active bits than there are elements in the return type
+ ret void
+}
+
+define amdgpu_ps void @load_cube_lwe(<8 x i32> inreg %rsrc, ptr addrspace(1) inreg %out, i32 %s, i32 %t, i32 %slice) {
+main_body:
+ %v = call {<4 x float>,i32} @llvm.amdgcn.image.load.cube.v4f32i32.i32(i32 31, i32 %s, i32 %t, i32 %slice, <8 x i32> %rsrc, i32 2, i32 0)
+; CHECK-NEXT: llvm.amdgcn.image.load.* intrinsic mask cannot have more active bits than there are elements in the return type
+ ret void
+}
+
+define amdgpu_ps void @load_1darray(<8 x i32> inreg %rsrc, i32 %s, i32 %slice) {
+main_body:
+ %v = call <4 x float> @llvm.amdgcn.image.load.1darray.v4f32.i32(i32 31, i32 %s, i32 %slice, <8 x i32> %rsrc, i32 0, i32 0)
+; CHECK-NEXT: llvm.amdgcn.image.load.* intrinsic mask cannot have more active bits than there are elements in the return type
+ ret void
+}
+
+define amdgpu_ps void @load_1darray_tfe(<8 x i32> inreg %rsrc, ptr addrspace(1) inreg %out, i32 %s, i32 %slice) {
+main_body:
+ %v = call {<4 x float>,i32} @llvm.amdgcn.image.load.1darray.v4f32i32.i32(i32 31, i32 %s, i32 %slice, <8 x i32> %rsrc, i32 1, i32 0)
+; CHECK-NEXT: llvm.amdgcn.image.load.* intrinsic mask cannot have more active bits than there are elements in the return type
+ ret void
+}
+
+define amdgpu_ps void @load_2darray(<8 x i32> inreg %rsrc, i32 %s, i32 %t, i32 %slice) {
+main_body:
+ %v = call <4 x float> @llvm.amdgcn.image.load.2darray.v4f32.i32(i32 31, i32 %s, i32 %t, i32 %slice, <8 x i32> %rsrc, i32 0, i32 0)
+; CHECK-NEXT: llvm.amdgcn.image.load.* intrinsic mask cannot have more active bits than there are elements in the return type
+ ret void
+}
+
+define amdgpu_ps void @load_2darray_lwe(<8 x i32> inreg %rsrc, ptr addrspace(1) inreg %out, i32 %s, i32 %t, i32 %slice) {
+main_body:
+ %v = call {<4 x float>,i32} @llvm.amdgcn.image.load.2darray.v4f32i32.i32(i32 31, i32 %s, i32 %t, i32 %slice, <8 x i32> %rsrc, i32 2, i32 0)
+; CHECK-NEXT: llvm.amdgcn.image.load.* intrinsic mask cannot have more active bits than there are elements in the return type
+ ret void
+}
+
+define amdgpu_ps void @load_2dmsaa(<8 x i32> inreg %rsrc, i32 %s, i32 %t, i32 %fragid) {
+main_body:
+ %v = call <4 x float> @llvm.amdgcn.image.load.2dmsaa.v4f32.i32(i32 31, i32 %s, i32 %t, i32 %fragid, <8 x i32> %rsrc, i32 0, i32 0)
+; CHECK-NEXT: llvm.amdgcn.image.load.* intrinsic mask cannot have more active bits than there are elements in the return type
+ ret void
+}
+
+define amdgpu_ps void @load_2dmsaa_both(<8 x i32> inreg %rsrc, ptr addrspace(1) inreg %out, i32 %s, i32 %t, i32 %fragid) {
+main_body:
+ %v = call {<4 x float>,i32} @llvm.amdgcn.image.load.2dmsaa.v4f32i32.i32(i32 31, i32 %s, i32 %t, i32 %fragid, <8 x i32> %rsrc, i32 3, i32 0)
+; CHECK-NEXT: llvm.amdgcn.image.load.* intrinsic mask cannot have more active bits than there are elements in the return type
+ ret void
+}
+
+define amdgpu_ps void @load_2darraymsaa(<8 x i32> inreg %rsrc, i32 %s, i32 %t, i32 %slice, i32 %fragid) {
+main_body:
+ %v = call <4 x float> @llvm.amdgcn.image.load.2darraymsaa.v4f32.i32(i32 31, i32 %s, i32 %t, i32 %slice, i32 %fragid, <8 x i32> %rsrc, i32 0, i32 0)
+; CHECK-NEXT: llvm.amdgcn.image.load.* intrinsic mask cannot have more active bits than there are elements in the return type
+ ret void
+}
+
+define amdgpu_ps void @load_2darraymsaa_tfe(<8 x i32> inreg %rsrc, ptr addrspace(1) inreg %out, i32 %s, i32 %t, i32 %slice, i32 %fragid) {
+main_body:
+ %v = call {<4 x float>,i32} @llvm.amdgcn.image.load.2darraymsaa.v4f32i32.i32(i32 31, i32 %s, i32 %t, i32 %slice, i32 %fragid, <8 x i32> %rsrc, i32 1, i32 0)
+; CHECK-NEXT: llvm.amdgcn.image.load.* intrinsic mask cannot have more active bits than there are elements in the return type
+ ret void
+}
+
+define amdgpu_ps void @load_mip_1d(<8 x i32> inreg %rsrc, i32 %s, i32 %mip) {
+main_body:
+ %v = call <4 x float> @llvm.amdgcn.image.load.mip.1d.v4f32.i32(i32 31, i32 %s, i32 %mip, <8 x i32> %rsrc, i32 0, i32 0)
+; CHECK-NEXT: llvm.amdgcn.image.load.* intrinsic mask cannot have more active bits than there are elements in the return type
+ ret void
+}
+
+define amdgpu_ps void @load_mip_1d_lwe(<8 x i32> inreg %rsrc, ptr addrspace(1) inreg %out, i32 %s, i32 %mip) {
+main_body:
+ %v = call {<4 x float>,i32} @llvm.amdgcn.image.load.mip.1d.v4f32i32.i32(i32 31, i32 %s, i32 %mip, <8 x i32> %rsrc, i32 2, i32 0)
+; CHECK-NEXT: llvm.amdgcn.image.load.* intrinsic mask cannot have more active bits than there are elements in the return type
+ ret void
+}
+
+define amdgpu_ps void @load_mip_2d(<8 x i32> inreg %rsrc, i32 %s, i32 %t, i32 %mip) {
+main_body:
+ %v = call <4 x float> @llvm.amdgcn.image.load.mip.2d.v4f32.i32(i32 31, i32 %s, i32 %t, i32 %mip, <8 x i32> %rsrc, i32 0, i32 0)
+; CHECK-NEXT: llvm.amdgcn.image.load.* intrinsic mask cannot have more active bits than there are elements in the return type
+ ret void
+}
+
+define amdgpu_ps void @load_mip_2d_tfe(<8 x i32> inreg %rsrc, ptr addrspace(1) inreg %out, i32 %s, i32 %t, i32 %mip) {
+main_body:
+ %v = call {<4 x float>,i32} @llvm.amdgcn.image.load.mip.2d.v4f32i32.i32(i32 31, i32 %s, i32 %t, i32 %mip, <8 x i32> %rsrc, i32 1, i32 0)
+; CHECK-NEXT: llvm.amdgcn.image.load.* intrinsic mask cannot have more active bits than there are elements in the return type
+ ret void
+}
+
+define amdgpu_ps void @load_1d_V2_tfe(<8 x i32> inreg %rsrc, i32 %s) {
+main_body:
+ %v = call {<2 x float>,i32} @llvm.amdgcn.image.load.1d.v2f32i32.i32(i32 -1, i32 %s, <8 x i32> %rsrc, i32 1, i32 0)
+; CHECK-NEXT: llvm.amdgcn.image.load.* intrinsic mask cannot have more active bits than there are elements in the return type
+ ret void
+}
+
+define amdgpu_ps void @load_1d_V1_tfe(<8 x i32> inreg %rsrc, i32 %s) {
+main_body:
+ %v = call {float,i32} @llvm.amdgcn.image.load.1d.f32i32.i32(i32 3, i32 %s, <8 x i32> %rsrc, i32 1, i32 0)
+; CHECK-NEXT: llvm.amdgcn.image.load.* intrinsic mask cannot have more active bits than there are elements in the return type
+ ret void
+}
+
+define amdgpu_ps void @load_mip_2d_tfe_nouse(<8 x i32> inreg %rsrc, i32 %s, i32 %t, i32 %mip) {
+main_body:
+ %v = call {<4 x float>,i32} @llvm.amdgcn.image.load.mip.2d.v4f32i32.i32(i32 31, i32 %s, i32 %t, i32 %mip, <8 x i32> %rsrc, i32 1, i32 0)
+; CHECK-NEXT: llvm.amdgcn.image.load.* intrinsic mask cannot have more active bits than there are elements in the return type
+ ret void
+}
+
+define amdgpu_ps void @load_mip_2d_tfe_nouse_V2(<8 x i32> inreg %rsrc, i32 %s, i32 %t, i32 %mip) {
+main_body:
+ %v = call {<2 x float>,i32} @llvm.amdgcn.image.load.mip.2d.v2f32i32.i32(i32 7, i32 %s, i32 %t, i32 %mip, <8 x i32> %rsrc, i32 1, i32 0)
+; CHECK-NEXT: llvm.amdgcn.image.load.* intrinsic mask cannot have more active bits than there are elements in the return type
+ ret void
+}
+
+define amdgpu_ps void @load_mip_2d_tfe_nouse_V1(<8 x i32> inreg %rsrc, i32 %s, i32 %t, i32 %mip) {
+main_body:
+ %v = call {float, i32} @llvm.amdgcn.image.load.mip.2d.f32i32.i32(i32 3, i32 %s, i32 %t, i32 %mip, <8 x i32> %rsrc, i32 1, i32 0)
+; CHECK-NEXT: llvm.amdgcn.image.load.* intrinsic mask cannot have more active bits than there are elements in the return type
+ ret void
+}
+
+define amdgpu_ps void @load_1d_tfe_V4(<8 x i32> inreg %rsrc, ptr addrspace(1) inreg %out, i32 %s) {
+main_body:
+ %v = call {<4 x float>,i32} @llvm.amdgcn.image.load.1d.v4f32i32.i32(i32 31, i32 %s, <8 x i32> %rsrc, i32 1, i32 0)
+; CHECK-NEXT: llvm.amdgcn.image.load.* intrinsic mask cannot have more active bits than there are elements in the return type
+ ret void
+}
+
+define amdgpu_ps void @load_1d_tfe_V2(<8 x i32> inreg %rsrc, ptr addrspace(1) inreg %out, i32 %s) {
+main_body:
+ %v = call {<2 x float>,i32} @llvm.amdgcn.image.load.1d.v2f32i32.i32(i32 -1, i32 %s, <8 x i32> %rsrc, i32 1, i32 0)
+; CHECK-NEXT: llvm.amdgcn.image.load.* intrinsic mask cannot have more active bits than there are elements in the return type
+ ret void
+}
+
+
+define amdgpu_ps void @load_mip_3d(<8 x i32> inreg %rsrc, i32 %s, i32 %t, i32 %r, i32 %mip) {
+main_body:
+ %v = call <4 x float> @llvm.amdgcn.image.load.mip.3d.v4f32.i32(i32 31, i32 %s, i32 %t, i32 %r, i32 %mip, <8 x i32> %rsrc, i32 0, i32 0)
+; CHECK-NEXT: llvm.amdgcn.image.load.* intrinsic mask cannot have more active bits than there are elements in the return type
+ ret void
+}
+
+define amdgpu_ps void @load_mip_cube(<8 x i32> inreg %rsrc, i32 %s, i32 %t, i32 %slice, i32 %mip) {
+main_body:
+ %v = call <4 x float> @llvm.amdgcn.image.load.mip.cube.v4f32.i32(i32 31, i32 %s, i32 %t, i32 %slice, i32 %mip, <8 x i32> %rsrc, i32 0, i32 0)
+; CHECK-NEXT: llvm.amdgcn.image.load.* intrinsic mask cannot have more active bits than there are elements in the return type
+ ret void
+}
+
+define amdgpu_ps void @load_mip_1darray(<8 x i32> inreg %rsrc, i32 %s, i32 %slice, i32 %mip) {
+main_body:
+ %v = call <4 x float> @llvm.amdgcn.image.load.mip.1darray.v4f32.i32(i32 31, i32 %s, i32 %slice, i32 %mip, <8 x i32> %rsrc, i32 0, i32 0)
+; CHECK-NEXT: llvm.amdgcn.image.load.* intrinsic mask cannot have more active bits than there are elements in the return type
+ ret void
+}
+
+define amdgpu_ps void @load_mip_2darray(<8 x i32> inreg %rsrc, i32 %s, i32 %t, i32 %slice, i32 %mip) {
+main_body:
+ %v = call <4 x float> @llvm.amdgcn.image.load.mip.2darray.v4f32.i32(i32 31, i32 %s, i32 %t, i32 %slice, i32 %mip, <8 x i32> %rsrc, i32 0, i32 0)
+; CHECK-NEXT: llvm.amdgcn.image.load.* intrinsic mask cannot have more active bits than there are elements in the return type
+ ret void
+}
+
+define amdgpu_ps void @load_1d_V1(<8 x i32> inreg %rsrc, i32 %s) {
+main_body:
+ %v = call float @llvm.amdgcn.image.load.1d.f32.i32(i32 3, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
+; CHECK-NEXT: llvm.amdgcn.image.load.* intrinsic mask cannot have more active bits than there are elements in the return type
+ ret void
+}
+
+define amdgpu_ps void @load_1d_V2(<8 x i32> inreg %rsrc, i32 %s) {
+main_body:
+ %v = call <2 x float> @llvm.amdgcn.image.load.1d.v2f32.i32(i32 7, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
+; CHECK-NEXT: llvm.amdgcn.image.load.* intrinsic mask cannot have more active bits than there are elements in the return type
+ ret void
+}
+
+define amdgpu_ps void @load_1d_glc(<8 x i32> inreg %rsrc, i32 %s) {
+main_body:
+ %v = call <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i32(i32 31, i32 %s, <8 x i32> %rsrc, i32 0, i32 1)
+; CHECK-NEXT: llvm.amdgcn.image.load.* intrinsic mask cannot have more active bits than there are elements in the return type
+ ret void
+}
+
+define amdgpu_ps void @load_1d_slc(<8 x i32> inreg %rsrc, i32 %s) {
+main_body:
+ %v = call <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i32(i32 31, i32 %s, <8 x i32> %rsrc, i32 0, i32 2)
+; CHECK-NEXT: llvm.amdgcn.image.load.* intrinsic mask cannot have more active bits than there are elements in the return type
+ ret void
+}
+
+define amdgpu_ps void @load_1d_glc_slc(<8 x i32> inreg %rsrc, i32 %s) {
+main_body:
+ %v = call <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i32(i32 31, i32 %s, <8 x i32> %rsrc, i32 0, i32 3)
+; CHECK-NEXT: llvm.amdgcn.image.load.* intrinsic mask cannot have more active bits than there are elements in the return type
+ ret void
+}
+
+define amdgpu_ps void @image_load_mmo(<8 x i32> inreg %rsrc, ptr addrspace(3) %lds, <2 x i32> %c) #0 {
+ %c0 = extractelement <2 x i32> %c, i32 0
+ %c1 = extractelement <2 x i32> %c, i32 1
+ %tex = call float @llvm.amdgcn.image.load.2d.f32.i32(i32 3, i32 %c0, i32 %c1, <8 x i32> %rsrc, i32 0, i32 0)
+; CHECK-NEXT: llvm.amdgcn.image.load.* intrinsic mask cannot have more active bits than there are elements in the return type
+ ret void
+}
+
+declare <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i32(i32, i32, <8 x i32>, i32, i32) #1
+declare {float,i32} @llvm.amdgcn.image.load.1d.f32i32.i32(i32, i32, <8 x i32>, i32, i32) #1
+declare {<2 x float>,i32} @llvm.amdgcn.image.load.1d.v2f32i32.i32(i32, i32, <8 x i32>, i32, i32) #1
+declare {<4 x float>,i32} @llvm.amdgcn.image.load.1d.v4f32i32.i32(i32, i32, <8 x i32>, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.load.2d.v4f32.i32(i32, i32, i32, <8 x i32>, i32, i32) #1
+declare {<4 x float>,i32} @llvm.amdgcn.image.load.2d.v4f32i32.i32(i32, i32, i32, <8 x i32>, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.load.3d.v4f32.i32(i32, i32, i32, i32, <8 x i32>, i32, i32) #1
+declare {<4 x float>,i32} @llvm.amdgcn.image.load.3d.v4f32i32.i32(i32, i32, i32, i32, <8 x i32>, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.load.cube.v4f32.i32(i32, i32, i32, i32, <8 x i32>, i32, i32) #1
+declare {<4 x float>,i32} @llvm.amdgcn.image.load.cube.v4f32i32.i32(i32, i32, i32, i32, <8 x i32>, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.load.1darray.v4f32.i32(i32, i32, i32, <8 x i32>, i32, i32) #1
+declare {<4 x float>,i32} @llvm.amdgcn.image.load.1darray.v4f32i32.i32(i32, i32, i32, <8 x i32>, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.load.2darray.v4f32.i32(i32, i32, i32, i32, <8 x i32>, i32, i32) #1
+declare {<4 x float>,i32} @llvm.amdgcn.image.load.2darray.v4f32i32.i32(i32, i32, i32, i32, <8 x i32>, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.load.2dmsaa.v4f32.i32(i32, i32, i32, i32, <8 x i32>, i32, i32) #1
+declare {<4 x float>,i32} @llvm.amdgcn.image.load.2dmsaa.v4f32i32.i32(i32, i32, i32, i32, <8 x i32>, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.load.2darraymsaa.v4f32.i32(i32, i32, i32, i32, i32, <8 x i32>, i32, i32) #1
+declare {<4 x float>,i32} @llvm.amdgcn.image.load.2darraymsaa.v4f32i32.i32(i32, i32, i32, i32, i32, <8 x i32>, i32, i32) #1
+
+declare <4 x float> @llvm.amdgcn.image.load.mip.1d.v4f32.i32(i32, i32, i32, <8 x i32>, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.load.mip.2d.v4f32.i32(i32, i32, i32, i32, <8 x i32>, i32, i32) #1
+declare {<4 x float>,i32} @llvm.amdgcn.image.load.mip.1d.v4f32i32.i32(i32, i32, i32, <8 x i32>, i32, i32) #1
+declare {<4 x float>,i32} @llvm.amdgcn.image.load.mip.2d.v4f32i32.i32(i32, i32, i32, i32, <8 x i32>, i32, i32) #1
+declare {<2 x float>,i32} @llvm.amdgcn.image.load.mip.2d.v2f32i32.i32(i32, i32, i32, i32, <8 x i32>, i32, i32) #1
+declare {float,i32} @llvm.amdgcn.image.load.mip.2d.f32i32.i32(i32, i32, i32, i32, <8 x i32>, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.load.mip.3d.v4f32.i32(i32, i32, i32, i32, i32, <8 x i32>, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.load.mip.cube.v4f32.i32(i32, i32, i32, i32, i32, <8 x i32>, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.load.mip.1darray.v4f32.i32(i32, i32, i32, i32, <8 x i32>, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.load.mip.2darray.v4f32.i32(i32, i32, i32, i32, i32, <8 x i32>, i32, i32) #1
+
+declare float @llvm.amdgcn.image.load.1d.f32.i32(i32, i32, <8 x i32>, i32, i32) #1
+declare float @llvm.amdgcn.image.load.2d.f32.i32(i32, i32, i32, <8 x i32>, i32, i32) #1
+declare <2 x float> @llvm.amdgcn.image.load.1d.v2f32.i32(i32, i32, <8 x i32>, i32, i32) #1
+
+attributes #0 = { nounwind }
+attributes #1 = { nounwind readonly }
+attributes #2 = { nounwind readnone }
>From 8738f469b243bcef848e4a7cad0b6cf50b8624ab Mon Sep 17 00:00:00 2001
From: Alexey Sachkov <alexey.sachkov at amd.com>
Date: Thu, 5 Feb 2026 06:44:25 -0600
Subject: [PATCH 4/7] Adjust clang codegen tests accordingly
---
clang/test/CodeGen/builtins-image-load.c | 168 +++++++++++------------
1 file changed, 84 insertions(+), 84 deletions(-)
diff --git a/clang/test/CodeGen/builtins-image-load.c b/clang/test/CodeGen/builtins-image-load.c
index 8442124416338..0efa7725c2363 100644
--- a/clang/test/CodeGen/builtins-image-load.c
+++ b/clang/test/CodeGen/builtins-image-load.c
@@ -24,12 +24,12 @@ typedef half half4 __attribute__((ext_vector_type(4)));
// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
// CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32
// CHECK-NEXT: [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP2]], align 32
-// CHECK-NEXT: [[TMP3:%.*]] = call float @llvm.amdgcn.image.load.2d.f32.i32.v8i32(i32 12, i32 [[TMP0]], i32 [[TMP1]], <8 x i32> [[TEX_RSRC_VAL]], i32 106, i32 103)
+// CHECK-NEXT: [[TMP3:%.*]] = call float @llvm.amdgcn.image.load.2d.f32.i32.v8i32(i32 1, i32 [[TMP0]], i32 [[TMP1]], <8 x i32> [[TEX_RSRC_VAL]], i32 106, i32 103)
// CHECK-NEXT: ret float [[TMP3]]
//
float test_builtin_image_load_2d(float f32, int i32, __amdgpu_texture_t tex) {
- return __builtin_amdgcn_image_load_2d_f32_i32(12, i32, i32, tex, 106, 103);
+ return __builtin_amdgcn_image_load_2d_f32_i32(1, i32, i32, tex, 106, 103);
}
// CHECK-LABEL: define dso_local <4 x float> @test_builtin_image_load_2d_1(
@@ -50,12 +50,12 @@ float test_builtin_image_load_2d(float f32, int i32, __amdgpu_texture_t tex) {
// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
// CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32
// CHECK-NEXT: [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP2]], align 32
-// CHECK-NEXT: [[TMP3:%.*]] = call <4 x float> @llvm.amdgcn.image.load.2d.v4f32.i32.v8i32(i32 100, i32 [[TMP0]], i32 [[TMP1]], <8 x i32> [[TEX_RSRC_VAL]], i32 120, i32 110)
+// CHECK-NEXT: [[TMP3:%.*]] = call <4 x float> @llvm.amdgcn.image.load.2d.v4f32.i32.v8i32(i32 15, i32 [[TMP0]], i32 [[TMP1]], <8 x i32> [[TEX_RSRC_VAL]], i32 120, i32 110)
// CHECK-NEXT: ret <4 x float> [[TMP3]]
//
float4 test_builtin_image_load_2d_1(float4 v4f32, int i32, __amdgpu_texture_t tex) {
- return __builtin_amdgcn_image_load_2d_v4f32_i32(100, i32, i32, tex, 120, 110);
+ return __builtin_amdgcn_image_load_2d_v4f32_i32(15, i32, i32, tex, 120, 110);
}
// CHECK-LABEL: define dso_local <4 x half> @test_builtin_image_load_2d_2(
@@ -76,12 +76,12 @@ float4 test_builtin_image_load_2d_1(float4 v4f32, int i32, __amdgpu_texture_t te
// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
// CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32
// CHECK-NEXT: [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP2]], align 32
-// CHECK-NEXT: [[TMP3:%.*]] = call <4 x half> @llvm.amdgcn.image.load.2d.v4f16.i32.v8i32(i32 100, i32 [[TMP0]], i32 [[TMP1]], <8 x i32> [[TEX_RSRC_VAL]], i32 120, i32 110)
+// CHECK-NEXT: [[TMP3:%.*]] = call <4 x half> @llvm.amdgcn.image.load.2d.v4f16.i32.v8i32(i32 15, i32 [[TMP0]], i32 [[TMP1]], <8 x i32> [[TEX_RSRC_VAL]], i32 120, i32 110)
// CHECK-NEXT: ret <4 x half> [[TMP3]]
//
half4 test_builtin_image_load_2d_2(half4 v4f16, int i32, __amdgpu_texture_t tex) {
- return __builtin_amdgcn_image_load_2d_v4f16_i32(100, i32, i32, tex, 120, 110);
+ return __builtin_amdgcn_image_load_2d_v4f16_i32(15, i32, i32, tex, 120, 110);
}
// CHECK-LABEL: define dso_local float @test_builtin_image_load_2darray(
@@ -103,12 +103,12 @@ half4 test_builtin_image_load_2d_2(half4 v4f16, int i32, __amdgpu_texture_t tex)
// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
// CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32
// CHECK-NEXT: [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP3]], align 32
-// CHECK-NEXT: [[TMP4:%.*]] = call float @llvm.amdgcn.image.load.2darray.f32.i32.v8i32(i32 100, i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]], <8 x i32> [[TEX_RSRC_VAL]], i32 120, i32 110)
+// CHECK-NEXT: [[TMP4:%.*]] = call float @llvm.amdgcn.image.load.2darray.f32.i32.v8i32(i32 1, i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]], <8 x i32> [[TEX_RSRC_VAL]], i32 120, i32 110)
// CHECK-NEXT: ret float [[TMP4]]
//
float test_builtin_image_load_2darray(float f32, int i32, __amdgpu_texture_t tex) {
- return __builtin_amdgcn_image_load_2darray_f32_i32(100, i32, i32, i32, tex, 120, 110);
+ return __builtin_amdgcn_image_load_2darray_f32_i32(1, i32, i32, i32, tex, 120, 110);
}
// CHECK-LABEL: define dso_local <4 x float> @test_builtin_image_load_2darray_1(
@@ -130,12 +130,12 @@ float test_builtin_image_load_2darray(float f32, int i32, __amdgpu_texture_t tex
// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
// CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32
// CHECK-NEXT: [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP3]], align 32
-// CHECK-NEXT: [[TMP4:%.*]] = call <4 x float> @llvm.amdgcn.image.load.2darray.v4f32.i32.v8i32(i32 100, i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]], <8 x i32> [[TEX_RSRC_VAL]], i32 120, i32 110)
+// CHECK-NEXT: [[TMP4:%.*]] = call <4 x float> @llvm.amdgcn.image.load.2darray.v4f32.i32.v8i32(i32 15, i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]], <8 x i32> [[TEX_RSRC_VAL]], i32 120, i32 110)
// CHECK-NEXT: ret <4 x float> [[TMP4]]
//
float4 test_builtin_image_load_2darray_1(float4 v4f32, int i32, __amdgpu_texture_t tex) {
- return __builtin_amdgcn_image_load_2darray_v4f32_i32(100, i32, i32, i32, tex, 120, 110);
+ return __builtin_amdgcn_image_load_2darray_v4f32_i32(15, i32, i32, i32, tex, 120, 110);
}
// CHECK-LABEL: define dso_local <4 x half> @test_builtin_image_load_2darray_2(
@@ -157,12 +157,12 @@ float4 test_builtin_image_load_2darray_1(float4 v4f32, int i32, __amdgpu_texture
// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
// CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32
// CHECK-NEXT: [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP3]], align 32
-// CHECK-NEXT: [[TMP4:%.*]] = call <4 x half> @llvm.amdgcn.image.load.2darray.v4f16.i32.v8i32(i32 100, i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]], <8 x i32> [[TEX_RSRC_VAL]], i32 120, i32 110)
+// CHECK-NEXT: [[TMP4:%.*]] = call <4 x half> @llvm.amdgcn.image.load.2darray.v4f16.i32.v8i32(i32 15, i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]], <8 x i32> [[TEX_RSRC_VAL]], i32 120, i32 110)
// CHECK-NEXT: ret <4 x half> [[TMP4]]
//
half4 test_builtin_image_load_2darray_2(half4 v4f16, int i32, __amdgpu_texture_t tex) {
- return __builtin_amdgcn_image_load_2darray_v4f16_i32(100, i32, i32, i32, tex, 120, 110);
+ return __builtin_amdgcn_image_load_2darray_v4f16_i32(15, i32, i32, i32, tex, 120, 110);
}
// CHECK-LABEL: define dso_local <4 x float> @test_builtin_image_load_1d_1(
@@ -182,12 +182,12 @@ half4 test_builtin_image_load_2darray_2(half4 v4f16, int i32, __amdgpu_texture_t
// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
// CHECK-NEXT: [[TMP1:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32
// CHECK-NEXT: [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32
-// CHECK-NEXT: [[TMP2:%.*]] = call <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i32.v8i32(i32 100, i32 [[TMP0]], <8 x i32> [[TEX_RSRC_VAL]], i32 120, i32 110)
+// CHECK-NEXT: [[TMP2:%.*]] = call <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i32.v8i32(i32 15, i32 [[TMP0]], <8 x i32> [[TEX_RSRC_VAL]], i32 120, i32 110)
// CHECK-NEXT: ret <4 x float> [[TMP2]]
//
float4 test_builtin_image_load_1d_1(float4 v4f32, int i32, __amdgpu_texture_t tex) {
- return __builtin_amdgcn_image_load_1d_v4f32_i32(100, i32, tex, 120, 110);
+ return __builtin_amdgcn_image_load_1d_v4f32_i32(15, i32, tex, 120, 110);
}
// CHECK-LABEL: define dso_local <4 x half> @test_builtin_image_load_1d_2(
@@ -207,12 +207,12 @@ float4 test_builtin_image_load_1d_1(float4 v4f32, int i32, __amdgpu_texture_t te
// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
// CHECK-NEXT: [[TMP1:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32
// CHECK-NEXT: [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32
-// CHECK-NEXT: [[TMP2:%.*]] = call <4 x half> @llvm.amdgcn.image.load.1d.v4f16.i32.v8i32(i32 100, i32 [[TMP0]], <8 x i32> [[TEX_RSRC_VAL]], i32 120, i32 110)
+// CHECK-NEXT: [[TMP2:%.*]] = call <4 x half> @llvm.amdgcn.image.load.1d.v4f16.i32.v8i32(i32 15, i32 [[TMP0]], <8 x i32> [[TEX_RSRC_VAL]], i32 120, i32 110)
// CHECK-NEXT: ret <4 x half> [[TMP2]]
//
half4 test_builtin_image_load_1d_2(half4 v4f16, int i32, __amdgpu_texture_t tex) {
- return __builtin_amdgcn_image_load_1d_v4f16_i32(100, i32, tex, 120, 110);
+ return __builtin_amdgcn_image_load_1d_v4f16_i32(15, i32, tex, 120, 110);
}
// CHECK-LABEL: define dso_local <4 x float> @test_builtin_image_load_1darray_1(
@@ -233,12 +233,12 @@ half4 test_builtin_image_load_1d_2(half4 v4f16, int i32, __amdgpu_texture_t tex)
// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
// CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32
// CHECK-NEXT: [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP2]], align 32
-// CHECK-NEXT: [[TMP3:%.*]] = call <4 x float> @llvm.amdgcn.image.load.1darray.v4f32.i32.v8i32(i32 100, i32 [[TMP0]], i32 [[TMP1]], <8 x i32> [[TEX_RSRC_VAL]], i32 120, i32 110)
+// CHECK-NEXT: [[TMP3:%.*]] = call <4 x float> @llvm.amdgcn.image.load.1darray.v4f32.i32.v8i32(i32 15, i32 [[TMP0]], i32 [[TMP1]], <8 x i32> [[TEX_RSRC_VAL]], i32 120, i32 110)
// CHECK-NEXT: ret <4 x float> [[TMP3]]
//
float4 test_builtin_image_load_1darray_1(float4 v4f32, int i32, __amdgpu_texture_t tex) {
- return __builtin_amdgcn_image_load_1darray_v4f32_i32(100, i32, i32, tex, 120, 110);
+ return __builtin_amdgcn_image_load_1darray_v4f32_i32(15, i32, i32, tex, 120, 110);
}
// CHECK-LABEL: define dso_local <4 x half> @test_builtin_image_load_1darray_2(
@@ -259,12 +259,12 @@ float4 test_builtin_image_load_1darray_1(float4 v4f32, int i32, __amdgpu_texture
// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
// CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32
// CHECK-NEXT: [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP2]], align 32
-// CHECK-NEXT: [[TMP3:%.*]] = call <4 x half> @llvm.amdgcn.image.load.1darray.v4f16.i32.v8i32(i32 100, i32 [[TMP0]], i32 [[TMP1]], <8 x i32> [[TEX_RSRC_VAL]], i32 120, i32 110)
+// CHECK-NEXT: [[TMP3:%.*]] = call <4 x half> @llvm.amdgcn.image.load.1darray.v4f16.i32.v8i32(i32 15, i32 [[TMP0]], i32 [[TMP1]], <8 x i32> [[TEX_RSRC_VAL]], i32 120, i32 110)
// CHECK-NEXT: ret <4 x half> [[TMP3]]
//
half4 test_builtin_image_load_1darray_2(half4 v4f16, int i32, __amdgpu_texture_t tex) {
- return __builtin_amdgcn_image_load_1darray_v4f16_i32(100, i32, i32, tex, 120, 110);
+ return __builtin_amdgcn_image_load_1darray_v4f16_i32(15, i32, i32, tex, 120, 110);
}
// CHECK-LABEL: define dso_local <4 x float> @test_builtin_image_load_3d_1(
@@ -286,12 +286,12 @@ half4 test_builtin_image_load_1darray_2(half4 v4f16, int i32, __amdgpu_texture_t
// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
// CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32
// CHECK-NEXT: [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP3]], align 32
-// CHECK-NEXT: [[TMP4:%.*]] = call <4 x float> @llvm.amdgcn.image.load.3d.v4f32.i32.v8i32(i32 100, i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]], <8 x i32> [[TEX_RSRC_VAL]], i32 120, i32 110)
+// CHECK-NEXT: [[TMP4:%.*]] = call <4 x float> @llvm.amdgcn.image.load.3d.v4f32.i32.v8i32(i32 15, i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]], <8 x i32> [[TEX_RSRC_VAL]], i32 120, i32 110)
// CHECK-NEXT: ret <4 x float> [[TMP4]]
//
float4 test_builtin_image_load_3d_1(float4 v4f32, int i32, __amdgpu_texture_t tex) {
- return __builtin_amdgcn_image_load_3d_v4f32_i32(100, i32, i32, i32, tex, 120, 110);
+ return __builtin_amdgcn_image_load_3d_v4f32_i32(15, i32, i32, i32, tex, 120, 110);
}
// CHECK-LABEL: define dso_local <4 x half> @test_builtin_image_load_3d_2(
@@ -313,12 +313,12 @@ float4 test_builtin_image_load_3d_1(float4 v4f32, int i32, __amdgpu_texture_t te
// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
// CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32
// CHECK-NEXT: [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP3]], align 32
-// CHECK-NEXT: [[TMP4:%.*]] = call <4 x half> @llvm.amdgcn.image.load.3d.v4f16.i32.v8i32(i32 100, i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]], <8 x i32> [[TEX_RSRC_VAL]], i32 120, i32 110)
+// CHECK-NEXT: [[TMP4:%.*]] = call <4 x half> @llvm.amdgcn.image.load.3d.v4f16.i32.v8i32(i32 15, i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]], <8 x i32> [[TEX_RSRC_VAL]], i32 120, i32 110)
// CHECK-NEXT: ret <4 x half> [[TMP4]]
//
half4 test_builtin_image_load_3d_2(half4 v4f16, int i32, __amdgpu_texture_t tex) {
- return __builtin_amdgcn_image_load_3d_v4f16_i32(100, i32, i32, i32, tex, 120, 110);
+ return __builtin_amdgcn_image_load_3d_v4f16_i32(15, i32, i32, i32, tex, 120, 110);
}
// CHECK-LABEL: define dso_local <4 x float> @test_builtin_image_load_cube_1(
@@ -340,12 +340,12 @@ half4 test_builtin_image_load_3d_2(half4 v4f16, int i32, __amdgpu_texture_t tex)
// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
// CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32
// CHECK-NEXT: [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP3]], align 32
-// CHECK-NEXT: [[TMP4:%.*]] = call <4 x float> @llvm.amdgcn.image.load.cube.v4f32.i32.v8i32(i32 100, i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]], <8 x i32> [[TEX_RSRC_VAL]], i32 120, i32 110)
+// CHECK-NEXT: [[TMP4:%.*]] = call <4 x float> @llvm.amdgcn.image.load.cube.v4f32.i32.v8i32(i32 15, i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]], <8 x i32> [[TEX_RSRC_VAL]], i32 120, i32 110)
// CHECK-NEXT: ret <4 x float> [[TMP4]]
//
float4 test_builtin_image_load_cube_1(float4 v4f32, int i32, __amdgpu_texture_t tex) {
- return __builtin_amdgcn_image_load_cube_v4f32_i32(100, i32, i32, i32, tex, 120, 110);
+ return __builtin_amdgcn_image_load_cube_v4f32_i32(15, i32, i32, i32, tex, 120, 110);
}
// CHECK-LABEL: define dso_local <4 x half> @test_builtin_image_load_cube_2(
@@ -367,12 +367,12 @@ float4 test_builtin_image_load_cube_1(float4 v4f32, int i32, __amdgpu_texture_t
// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
// CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32
// CHECK-NEXT: [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP3]], align 32
-// CHECK-NEXT: [[TMP4:%.*]] = call <4 x half> @llvm.amdgcn.image.load.cube.v4f16.i32.v8i32(i32 100, i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]], <8 x i32> [[TEX_RSRC_VAL]], i32 120, i32 110)
+// CHECK-NEXT: [[TMP4:%.*]] = call <4 x half> @llvm.amdgcn.image.load.cube.v4f16.i32.v8i32(i32 15, i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]], <8 x i32> [[TEX_RSRC_VAL]], i32 120, i32 110)
// CHECK-NEXT: ret <4 x half> [[TMP4]]
//
half4 test_builtin_image_load_cube_2(half4 v4f16, int i32, __amdgpu_texture_t tex) {
- return __builtin_amdgcn_image_load_cube_v4f16_i32(100, i32, i32, i32, tex, 120, 110);
+ return __builtin_amdgcn_image_load_cube_v4f16_i32(15, i32, i32, i32, tex, 120, 110);
}
// CHECK-LABEL: define dso_local <4 x float> @test_builtin_image_load_mip_1d_1(
@@ -393,12 +393,12 @@ half4 test_builtin_image_load_cube_2(half4 v4f16, int i32, __amdgpu_texture_t te
// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
// CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32
// CHECK-NEXT: [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP2]], align 32
-// CHECK-NEXT: [[TMP3:%.*]] = call <4 x float> @llvm.amdgcn.image.load.mip.1d.v4f32.i32.v8i32(i32 100, i32 [[TMP0]], i32 [[TMP1]], <8 x i32> [[TEX_RSRC_VAL]], i32 120, i32 110)
+// CHECK-NEXT: [[TMP3:%.*]] = call <4 x float> @llvm.amdgcn.image.load.mip.1d.v4f32.i32.v8i32(i32 15, i32 [[TMP0]], i32 [[TMP1]], <8 x i32> [[TEX_RSRC_VAL]], i32 120, i32 110)
// CHECK-NEXT: ret <4 x float> [[TMP3]]
//
float4 test_builtin_image_load_mip_1d_1(float4 v4f32, int i32, __amdgpu_texture_t tex) {
- return __builtin_amdgcn_image_load_mip_1d_v4f32_i32(100, i32, i32, tex, 120, 110);
+ return __builtin_amdgcn_image_load_mip_1d_v4f32_i32(15, i32, i32, tex, 120, 110);
}
// CHECK-LABEL: define dso_local <4 x half> @test_builtin_image_load_mip_1d_2(
@@ -419,12 +419,12 @@ float4 test_builtin_image_load_mip_1d_1(float4 v4f32, int i32, __amdgpu_texture_
// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
// CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32
// CHECK-NEXT: [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP2]], align 32
-// CHECK-NEXT: [[TMP3:%.*]] = call <4 x half> @llvm.amdgcn.image.load.mip.1d.v4f16.i32.v8i32(i32 100, i32 [[TMP0]], i32 [[TMP1]], <8 x i32> [[TEX_RSRC_VAL]], i32 120, i32 110)
+// CHECK-NEXT: [[TMP3:%.*]] = call <4 x half> @llvm.amdgcn.image.load.mip.1d.v4f16.i32.v8i32(i32 15, i32 [[TMP0]], i32 [[TMP1]], <8 x i32> [[TEX_RSRC_VAL]], i32 120, i32 110)
// CHECK-NEXT: ret <4 x half> [[TMP3]]
//
half4 test_builtin_image_load_mip_1d_2(half4 v4f16, int i32, __amdgpu_texture_t tex) {
- return __builtin_amdgcn_image_load_mip_1d_v4f16_i32(100, i32, i32, tex, 120, 110);
+ return __builtin_amdgcn_image_load_mip_1d_v4f16_i32(15, i32, i32, tex, 120, 110);
}
// CHECK-LABEL: define dso_local <4 x float> @test_builtin_image_load_mip_1darray_1(
@@ -446,12 +446,12 @@ half4 test_builtin_image_load_mip_1d_2(half4 v4f16, int i32, __amdgpu_texture_t
// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
// CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32
// CHECK-NEXT: [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP3]], align 32
-// CHECK-NEXT: [[TMP4:%.*]] = call <4 x float> @llvm.amdgcn.image.load.mip.1darray.v4f32.i32.v8i32(i32 100, i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]], <8 x i32> [[TEX_RSRC_VAL]], i32 120, i32 110)
+// CHECK-NEXT: [[TMP4:%.*]] = call <4 x float> @llvm.amdgcn.image.load.mip.1darray.v4f32.i32.v8i32(i32 15, i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]], <8 x i32> [[TEX_RSRC_VAL]], i32 120, i32 110)
// CHECK-NEXT: ret <4 x float> [[TMP4]]
//
float4 test_builtin_image_load_mip_1darray_1(float4 v4f32, int i32, __amdgpu_texture_t tex) {
- return __builtin_amdgcn_image_load_mip_1darray_v4f32_i32(100, i32, i32, i32, tex, 120, 110);
+ return __builtin_amdgcn_image_load_mip_1darray_v4f32_i32(15, i32, i32, i32, tex, 120, 110);
}
// CHECK-LABEL: define dso_local <4 x half> @test_builtin_image_load_mip_1darray_2(
@@ -473,12 +473,12 @@ float4 test_builtin_image_load_mip_1darray_1(float4 v4f32, int i32, __amdgpu_tex
// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
// CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32
// CHECK-NEXT: [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP3]], align 32
-// CHECK-NEXT: [[TMP4:%.*]] = call <4 x half> @llvm.amdgcn.image.load.mip.1darray.v4f16.i32.v8i32(i32 100, i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]], <8 x i32> [[TEX_RSRC_VAL]], i32 120, i32 110)
+// CHECK-NEXT: [[TMP4:%.*]] = call <4 x half> @llvm.amdgcn.image.load.mip.1darray.v4f16.i32.v8i32(i32 15, i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]], <8 x i32> [[TEX_RSRC_VAL]], i32 120, i32 110)
// CHECK-NEXT: ret <4 x half> [[TMP4]]
//
half4 test_builtin_image_load_mip_1darray_2(half4 v4f16, int i32, __amdgpu_texture_t tex) {
- return __builtin_amdgcn_image_load_mip_1darray_v4f16_i32(100, i32, i32, i32, tex, 120, 110);
+ return __builtin_amdgcn_image_load_mip_1darray_v4f16_i32(15, i32, i32, i32, tex, 120, 110);
}
// CHECK-LABEL: define dso_local float @test_builtin_image_load_mip_2d(
@@ -500,12 +500,12 @@ half4 test_builtin_image_load_mip_1darray_2(half4 v4f16, int i32, __amdgpu_textu
// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
// CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32
// CHECK-NEXT: [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP3]], align 32
-// CHECK-NEXT: [[TMP4:%.*]] = call float @llvm.amdgcn.image.load.mip.2d.f32.i32.v8i32(i32 100, i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]], <8 x i32> [[TEX_RSRC_VAL]], i32 120, i32 110)
+// CHECK-NEXT: [[TMP4:%.*]] = call float @llvm.amdgcn.image.load.mip.2d.f32.i32.v8i32(i32 1, i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]], <8 x i32> [[TEX_RSRC_VAL]], i32 120, i32 110)
// CHECK-NEXT: ret float [[TMP4]]
//
float test_builtin_image_load_mip_2d(float f32, int i32, __amdgpu_texture_t tex) {
- return __builtin_amdgcn_image_load_mip_2d_f32_i32(100, i32, i32, i32, tex, 120, 110);
+ return __builtin_amdgcn_image_load_mip_2d_f32_i32(1, i32, i32, i32, tex, 120, 110);
}
// CHECK-LABEL: define dso_local <4 x float> @test_builtin_image_load_mip_2d_1(
@@ -527,12 +527,12 @@ float test_builtin_image_load_mip_2d(float f32, int i32, __amdgpu_texture_t tex)
// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
// CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32
// CHECK-NEXT: [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP3]], align 32
-// CHECK-NEXT: [[TMP4:%.*]] = call <4 x float> @llvm.amdgcn.image.load.mip.2d.v4f32.i32.v8i32(i32 100, i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]], <8 x i32> [[TEX_RSRC_VAL]], i32 120, i32 110)
+// CHECK-NEXT: [[TMP4:%.*]] = call <4 x float> @llvm.amdgcn.image.load.mip.2d.v4f32.i32.v8i32(i32 15, i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]], <8 x i32> [[TEX_RSRC_VAL]], i32 120, i32 110)
// CHECK-NEXT: ret <4 x float> [[TMP4]]
//
float4 test_builtin_image_load_mip_2d_1(float4 v4f32, int i32, __amdgpu_texture_t tex) {
- return __builtin_amdgcn_image_load_mip_2d_v4f32_i32(100, i32, i32, i32, tex, 120, 110);
+ return __builtin_amdgcn_image_load_mip_2d_v4f32_i32(15, i32, i32, i32, tex, 120, 110);
}
// CHECK-LABEL: define dso_local <4 x half> @test_builtin_image_load_mip_2d_2(
@@ -554,12 +554,12 @@ float4 test_builtin_image_load_mip_2d_1(float4 v4f32, int i32, __amdgpu_texture_
// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
// CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32
// CHECK-NEXT: [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP3]], align 32
-// CHECK-NEXT: [[TMP4:%.*]] = call <4 x half> @llvm.amdgcn.image.load.mip.2d.v4f16.i32.v8i32(i32 100, i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]], <8 x i32> [[TEX_RSRC_VAL]], i32 120, i32 110)
+// CHECK-NEXT: [[TMP4:%.*]] = call <4 x half> @llvm.amdgcn.image.load.mip.2d.v4f16.i32.v8i32(i32 15, i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]], <8 x i32> [[TEX_RSRC_VAL]], i32 120, i32 110)
// CHECK-NEXT: ret <4 x half> [[TMP4]]
//
half4 test_builtin_image_load_mip_2d_2(half4 v4f16, int i32, __amdgpu_texture_t tex) {
- return __builtin_amdgcn_image_load_mip_2d_v4f16_i32(100, i32, i32, i32, tex, 120, 110);
+ return __builtin_amdgcn_image_load_mip_2d_v4f16_i32(15, i32, i32, i32, tex, 120, 110);
}
// CHECK-LABEL: define dso_local float @test_builtin_image_load_mip_2darray(
@@ -582,12 +582,12 @@ half4 test_builtin_image_load_mip_2d_2(half4 v4f16, int i32, __amdgpu_texture_t
// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
// CHECK-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32
// CHECK-NEXT: [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP4]], align 32
-// CHECK-NEXT: [[TMP5:%.*]] = call float @llvm.amdgcn.image.load.mip.2darray.f32.i32.v8i32(i32 100, i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]], i32 [[TMP3]], <8 x i32> [[TEX_RSRC_VAL]], i32 120, i32 110)
+// CHECK-NEXT: [[TMP5:%.*]] = call float @llvm.amdgcn.image.load.mip.2darray.f32.i32.v8i32(i32 1, i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]], i32 [[TMP3]], <8 x i32> [[TEX_RSRC_VAL]], i32 120, i32 110)
// CHECK-NEXT: ret float [[TMP5]]
//
float test_builtin_image_load_mip_2darray(float f32, int i32, __amdgpu_texture_t tex) {
- return __builtin_amdgcn_image_load_mip_2darray_f32_i32(100, i32, i32, i32, i32, tex, 120, 110);
+ return __builtin_amdgcn_image_load_mip_2darray_f32_i32(1, i32, i32, i32, i32, tex, 120, 110);
}
// CHECK-LABEL: define dso_local <4 x float> @test_builtin_image_load_mip_2darray_1(
@@ -610,12 +610,12 @@ float test_builtin_image_load_mip_2darray(float f32, int i32, __amdgpu_texture_t
// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
// CHECK-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32
// CHECK-NEXT: [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP4]], align 32
-// CHECK-NEXT: [[TMP5:%.*]] = call <4 x float> @llvm.amdgcn.image.load.mip.2darray.v4f32.i32.v8i32(i32 100, i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]], i32 [[TMP3]], <8 x i32> [[TEX_RSRC_VAL]], i32 120, i32 110)
+// CHECK-NEXT: [[TMP5:%.*]] = call <4 x float> @llvm.amdgcn.image.load.mip.2darray.v4f32.i32.v8i32(i32 15, i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]], i32 [[TMP3]], <8 x i32> [[TEX_RSRC_VAL]], i32 120, i32 110)
// CHECK-NEXT: ret <4 x float> [[TMP5]]
//
float4 test_builtin_image_load_mip_2darray_1(float4 v4f32, int i32, __amdgpu_texture_t tex) {
- return __builtin_amdgcn_image_load_mip_2darray_v4f32_i32(100, i32, i32, i32, i32, tex, 120, 110);
+ return __builtin_amdgcn_image_load_mip_2darray_v4f32_i32(15, i32, i32, i32, i32, tex, 120, 110);
}
// CHECK-LABEL: define dso_local <4 x half> @test_builtin_image_load_mip_2darray_2(
@@ -638,12 +638,12 @@ float4 test_builtin_image_load_mip_2darray_1(float4 v4f32, int i32, __amdgpu_tex
// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
// CHECK-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32
// CHECK-NEXT: [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP4]], align 32
-// CHECK-NEXT: [[TMP5:%.*]] = call <4 x half> @llvm.amdgcn.image.load.mip.2darray.v4f16.i32.v8i32(i32 100, i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]], i32 [[TMP3]], <8 x i32> [[TEX_RSRC_VAL]], i32 120, i32 110)
+// CHECK-NEXT: [[TMP5:%.*]] = call <4 x half> @llvm.amdgcn.image.load.mip.2darray.v4f16.i32.v8i32(i32 15, i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]], i32 [[TMP3]], <8 x i32> [[TEX_RSRC_VAL]], i32 120, i32 110)
// CHECK-NEXT: ret <4 x half> [[TMP5]]
//
half4 test_builtin_image_load_mip_2darray_2(half4 v4f16, int i32, __amdgpu_texture_t tex) {
- return __builtin_amdgcn_image_load_mip_2darray_v4f16_i32(100, i32, i32, i32, i32, tex, 120, 110);
+ return __builtin_amdgcn_image_load_mip_2darray_v4f16_i32(15, i32, i32, i32, i32, tex, 120, 110);
}
// CHECK-LABEL: define dso_local <4 x float> @test_builtin_image_load_mip_3d_1(
@@ -666,12 +666,12 @@ half4 test_builtin_image_load_mip_2darray_2(half4 v4f16, int i32, __amdgpu_textu
// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
// CHECK-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32
// CHECK-NEXT: [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP4]], align 32
-// CHECK-NEXT: [[TMP5:%.*]] = call <4 x float> @llvm.amdgcn.image.load.mip.3d.v4f32.i32.v8i32(i32 100, i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]], i32 [[TMP3]], <8 x i32> [[TEX_RSRC_VAL]], i32 120, i32 110)
+// CHECK-NEXT: [[TMP5:%.*]] = call <4 x float> @llvm.amdgcn.image.load.mip.3d.v4f32.i32.v8i32(i32 15, i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]], i32 [[TMP3]], <8 x i32> [[TEX_RSRC_VAL]], i32 120, i32 110)
// CHECK-NEXT: ret <4 x float> [[TMP5]]
//
float4 test_builtin_image_load_mip_3d_1(float4 v4f32, int i32, __amdgpu_texture_t tex) {
- return __builtin_amdgcn_image_load_mip_3d_v4f32_i32(100, i32, i32, i32, i32, tex, 120, 110);
+ return __builtin_amdgcn_image_load_mip_3d_v4f32_i32(15, i32, i32, i32, i32, tex, 120, 110);
}
// CHECK-LABEL: define dso_local <4 x half> @test_builtin_image_load_mip_3d_2(
@@ -694,12 +694,12 @@ float4 test_builtin_image_load_mip_3d_1(float4 v4f32, int i32, __amdgpu_texture_
// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
// CHECK-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32
// CHECK-NEXT: [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP4]], align 32
-// CHECK-NEXT: [[TMP5:%.*]] = call <4 x half> @llvm.amdgcn.image.load.mip.3d.v4f16.i32.v8i32(i32 100, i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]], i32 [[TMP3]], <8 x i32> [[TEX_RSRC_VAL]], i32 120, i32 110)
+// CHECK-NEXT: [[TMP5:%.*]] = call <4 x half> @llvm.amdgcn.image.load.mip.3d.v4f16.i32.v8i32(i32 15, i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]], i32 [[TMP3]], <8 x i32> [[TEX_RSRC_VAL]], i32 120, i32 110)
// CHECK-NEXT: ret <4 x half> [[TMP5]]
//
half4 test_builtin_image_load_mip_3d_2(half4 v4f16, int i32, __amdgpu_texture_t tex) {
- return __builtin_amdgcn_image_load_mip_3d_v4f16_i32(100, i32, i32, i32, i32, tex, 120, 110);
+ return __builtin_amdgcn_image_load_mip_3d_v4f16_i32(15, i32, i32, i32, i32, tex, 120, 110);
}
// CHECK-LABEL: define dso_local <4 x float> @test_builtin_image_load_mip_cube_1(
@@ -722,12 +722,12 @@ half4 test_builtin_image_load_mip_3d_2(half4 v4f16, int i32, __amdgpu_texture_t
// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
// CHECK-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32
// CHECK-NEXT: [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP4]], align 32
-// CHECK-NEXT: [[TMP5:%.*]] = call <4 x float> @llvm.amdgcn.image.load.mip.cube.v4f32.i32.v8i32(i32 100, i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]], i32 [[TMP3]], <8 x i32> [[TEX_RSRC_VAL]], i32 120, i32 110)
+// CHECK-NEXT: [[TMP5:%.*]] = call <4 x float> @llvm.amdgcn.image.load.mip.cube.v4f32.i32.v8i32(i32 15, i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]], i32 [[TMP3]], <8 x i32> [[TEX_RSRC_VAL]], i32 120, i32 110)
// CHECK-NEXT: ret <4 x float> [[TMP5]]
//
float4 test_builtin_image_load_mip_cube_1(float4 v4f32, int i32, __amdgpu_texture_t tex) {
- return __builtin_amdgcn_image_load_mip_cube_v4f32_i32(100, i32, i32, i32, i32, tex, 120, 110);
+ return __builtin_amdgcn_image_load_mip_cube_v4f32_i32(15, i32, i32, i32, i32, tex, 120, 110);
}
// CHECK-LABEL: define dso_local <4 x half> @test_builtin_image_load_mip_cube_2(
@@ -750,12 +750,12 @@ float4 test_builtin_image_load_mip_cube_1(float4 v4f32, int i32, __amdgpu_textur
// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4
// CHECK-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32
// CHECK-NEXT: [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP4]], align 32
-// CHECK-NEXT: [[TMP5:%.*]] = call <4 x half> @llvm.amdgcn.image.load.mip.cube.v4f16.i32.v8i32(i32 100, i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]], i32 [[TMP3]], <8 x i32> [[TEX_RSRC_VAL]], i32 120, i32 110)
+// CHECK-NEXT: [[TMP5:%.*]] = call <4 x half> @llvm.amdgcn.image.load.mip.cube.v4f16.i32.v8i32(i32 15, i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]], i32 [[TMP3]], <8 x i32> [[TEX_RSRC_VAL]], i32 120, i32 110)
// CHECK-NEXT: ret <4 x half> [[TMP5]]
//
half4 test_builtin_image_load_mip_cube_2(half4 v4f16, int i32, __amdgpu_texture_t tex) {
- return __builtin_amdgcn_image_load_mip_cube_v4f16_i32(100, i32, i32, i32, i32, tex, 120, 110);
+ return __builtin_amdgcn_image_load_mip_cube_v4f16_i32(15, i32, i32, i32, i32, tex, 120, 110);
}
// CHECK-LABEL: define dso_local <4 x float> @test_builtin_amdgcn_image_sample_1d_v4f32_f32(
@@ -782,11 +782,11 @@ half4 test_builtin_image_load_mip_cube_2(half4 v4f16, int i32, __amdgpu_texture_
// CHECK-NEXT: [[TMP1:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32
// CHECK-NEXT: [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32
// CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr [[VEC4I32_ADDR_ASCAST]], align 16
-// CHECK-NEXT: [[TMP3:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32.v8i32.v4i32(i32 100, float [[TMP0]], <8 x i32> [[TEX_RSRC_VAL]], <4 x i32> [[TMP2]], i1 false, i32 120, i32 110)
+// CHECK-NEXT: [[TMP3:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32.v8i32.v4i32(i32 15, float [[TMP0]], <8 x i32> [[TEX_RSRC_VAL]], <4 x i32> [[TMP2]], i1 false, i32 120, i32 110)
// CHECK-NEXT: ret <4 x float> [[TMP3]]
//
float4 test_builtin_amdgcn_image_sample_1d_v4f32_f32(float4 v4f32, int i32, float f32, __amdgpu_texture_t tex, int4 vec4i32) {
- return __builtin_amdgcn_image_sample_1d_v4f32_f32(100, f32, tex, vec4i32, 0, 120, 110);
+ return __builtin_amdgcn_image_sample_1d_v4f32_f32(15, f32, tex, vec4i32, 0, 120, 110);
}
// CHECK-LABEL: define dso_local <4 x half> @test_builtin_amdgcn_image_sample_1d_v4f16_f32(
@@ -813,11 +813,11 @@ float4 test_builtin_amdgcn_image_sample_1d_v4f32_f32(float4 v4f32, int i32, floa
// CHECK-NEXT: [[TMP1:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32
// CHECK-NEXT: [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32
// CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr [[VEC4I32_ADDR_ASCAST]], align 16
-// CHECK-NEXT: [[TMP3:%.*]] = call <4 x half> @llvm.amdgcn.image.sample.1d.v4f16.f32.v8i32.v4i32(i32 100, float [[TMP0]], <8 x i32> [[TEX_RSRC_VAL]], <4 x i32> [[TMP2]], i1 false, i32 120, i32 110)
+// CHECK-NEXT: [[TMP3:%.*]] = call <4 x half> @llvm.amdgcn.image.sample.1d.v4f16.f32.v8i32.v4i32(i32 15, float [[TMP0]], <8 x i32> [[TEX_RSRC_VAL]], <4 x i32> [[TMP2]], i1 false, i32 120, i32 110)
// CHECK-NEXT: ret <4 x half> [[TMP3]]
//
half4 test_builtin_amdgcn_image_sample_1d_v4f16_f32(half4 v4f16, int i32, float f32, __amdgpu_texture_t tex, int4 vec4i32) {
- return __builtin_amdgcn_image_sample_1d_v4f16_f32(100, f32, tex, vec4i32, 0, 120, 110);
+ return __builtin_amdgcn_image_sample_1d_v4f16_f32(15, f32, tex, vec4i32, 0, 120, 110);
}
// CHECK-LABEL: define dso_local <4 x float> @test_builtin_amdgcn_image_sample_1darray_v4f32_f32(
@@ -842,11 +842,11 @@ half4 test_builtin_amdgcn_image_sample_1d_v4f16_f32(half4 v4f16, int i32, float
// CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32
// CHECK-NEXT: [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP2]], align 32
// CHECK-NEXT: [[TMP3:%.*]] = load <4 x i32>, ptr [[VEC4I32_ADDR_ASCAST]], align 16
-// CHECK-NEXT: [[TMP4:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.1darray.v4f32.f32.v8i32.v4i32(i32 100, float [[TMP0]], float [[TMP1]], <8 x i32> [[TEX_RSRC_VAL]], <4 x i32> [[TMP3]], i1 false, i32 120, i32 110)
+// CHECK-NEXT: [[TMP4:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.1darray.v4f32.f32.v8i32.v4i32(i32 15, float [[TMP0]], float [[TMP1]], <8 x i32> [[TEX_RSRC_VAL]], <4 x i32> [[TMP3]], i1 false, i32 120, i32 110)
// CHECK-NEXT: ret <4 x float> [[TMP4]]
//
float4 test_builtin_amdgcn_image_sample_1darray_v4f32_f32(int i32, float f32, __amdgpu_texture_t tex, int4 vec4i32) {
- return __builtin_amdgcn_image_sample_1darray_v4f32_f32(100, f32, f32, tex, vec4i32, 0, 120, 110);
+ return __builtin_amdgcn_image_sample_1darray_v4f32_f32(15, f32, f32, tex, vec4i32, 0, 120, 110);
}
// CHECK-LABEL: define dso_local <4 x half> @test_builtin_amdgcn_image_sample_1darray_v4f16_f32(
@@ -874,11 +874,11 @@ float4 test_builtin_amdgcn_image_sample_1darray_v4f32_f32(int i32, float f32, __
// CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32
// CHECK-NEXT: [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP2]], align 32
// CHECK-NEXT: [[TMP3:%.*]] = load <4 x i32>, ptr [[VEC4I32_ADDR_ASCAST]], align 16
-// CHECK-NEXT: [[TMP4:%.*]] = call <4 x half> @llvm.amdgcn.image.sample.1darray.v4f16.f32.v8i32.v4i32(i32 100, float [[TMP0]], float [[TMP1]], <8 x i32> [[TEX_RSRC_VAL]], <4 x i32> [[TMP3]], i1 false, i32 120, i32 110)
+// CHECK-NEXT: [[TMP4:%.*]] = call <4 x half> @llvm.amdgcn.image.sample.1darray.v4f16.f32.v8i32.v4i32(i32 15, float [[TMP0]], float [[TMP1]], <8 x i32> [[TEX_RSRC_VAL]], <4 x i32> [[TMP3]], i1 false, i32 120, i32 110)
// CHECK-NEXT: ret <4 x half> [[TMP4]]
//
half4 test_builtin_amdgcn_image_sample_1darray_v4f16_f32(half4 v4f16, int i32, float f32, __amdgpu_texture_t tex, int4 vec4i32) {
- return __builtin_amdgcn_image_sample_1darray_v4f16_f32(100, f32, f32, tex, vec4i32, 0, 120, 110);
+ return __builtin_amdgcn_image_sample_1darray_v4f16_f32(15, f32, f32, tex, vec4i32, 0, 120, 110);
}
// CHECK-LABEL: define dso_local float @test_builtin_amdgcn_image_sample_2d_f32_f32(
@@ -903,11 +903,11 @@ half4 test_builtin_amdgcn_image_sample_1darray_v4f16_f32(half4 v4f16, int i32, f
// CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32
// CHECK-NEXT: [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP2]], align 32
// CHECK-NEXT: [[TMP3:%.*]] = load <4 x i32>, ptr [[VEC4I32_ADDR_ASCAST]], align 16
-// CHECK-NEXT: [[TMP4:%.*]] = call float @llvm.amdgcn.image.sample.2d.f32.f32.v8i32.v4i32(i32 100, float [[TMP0]], float [[TMP1]], <8 x i32> [[TEX_RSRC_VAL]], <4 x i32> [[TMP3]], i1 false, i32 120, i32 110)
+// CHECK-NEXT: [[TMP4:%.*]] = call float @llvm.amdgcn.image.sample.2d.f32.f32.v8i32.v4i32(i32 1, float [[TMP0]], float [[TMP1]], <8 x i32> [[TEX_RSRC_VAL]], <4 x i32> [[TMP3]], i1 false, i32 120, i32 110)
// CHECK-NEXT: ret float [[TMP4]]
//
float test_builtin_amdgcn_image_sample_2d_f32_f32(int i32, float f32, __amdgpu_texture_t tex, int4 vec4i32) {
- return __builtin_amdgcn_image_sample_2d_f32_f32(100, f32, f32, tex, vec4i32, 0, 120, 110);
+ return __builtin_amdgcn_image_sample_2d_f32_f32(1, f32, f32, tex, vec4i32, 0, 120, 110);
}
// CHECK-LABEL: define dso_local <4 x float> @test_builtin_amdgcn_image_sample_2d_v4f32_f32(
@@ -935,11 +935,11 @@ float test_builtin_amdgcn_image_sample_2d_f32_f32(int i32, float f32, __amdgpu_t
// CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32
// CHECK-NEXT: [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP2]], align 32
// CHECK-NEXT: [[TMP3:%.*]] = load <4 x i32>, ptr [[VEC4I32_ADDR_ASCAST]], align 16
-// CHECK-NEXT: [[TMP4:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32.v8i32.v4i32(i32 100, float [[TMP0]], float [[TMP1]], <8 x i32> [[TEX_RSRC_VAL]], <4 x i32> [[TMP3]], i1 false, i32 120, i32 110)
+// CHECK-NEXT: [[TMP4:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32.v8i32.v4i32(i32 15, float [[TMP0]], float [[TMP1]], <8 x i32> [[TEX_RSRC_VAL]], <4 x i32> [[TMP3]], i1 false, i32 120, i32 110)
// CHECK-NEXT: ret <4 x float> [[TMP4]]
//
float4 test_builtin_amdgcn_image_sample_2d_v4f32_f32(float4 v4f32, int i32, float f32, __amdgpu_texture_t tex, int4 vec4i32) {
- return __builtin_amdgcn_image_sample_2d_v4f32_f32(100, f32, f32, tex, vec4i32, 0, 120, 110);
+ return __builtin_amdgcn_image_sample_2d_v4f32_f32(15, f32, f32, tex, vec4i32, 0, 120, 110);
}
// CHECK-LABEL: define dso_local <4 x half> @test_builtin_amdgcn_image_sample_2d_v4f16_f32(
@@ -967,11 +967,11 @@ float4 test_builtin_amdgcn_image_sample_2d_v4f32_f32(float4 v4f32, int i32, floa
// CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32
// CHECK-NEXT: [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP2]], align 32
// CHECK-NEXT: [[TMP3:%.*]] = load <4 x i32>, ptr [[VEC4I32_ADDR_ASCAST]], align 16
-// CHECK-NEXT: [[TMP4:%.*]] = call <4 x half> @llvm.amdgcn.image.sample.2d.v4f16.f32.v8i32.v4i32(i32 100, float [[TMP0]], float [[TMP1]], <8 x i32> [[TEX_RSRC_VAL]], <4 x i32> [[TMP3]], i1 false, i32 120, i32 110)
+// CHECK-NEXT: [[TMP4:%.*]] = call <4 x half> @llvm.amdgcn.image.sample.2d.v4f16.f32.v8i32.v4i32(i32 15, float [[TMP0]], float [[TMP1]], <8 x i32> [[TEX_RSRC_VAL]], <4 x i32> [[TMP3]], i1 false, i32 120, i32 110)
// CHECK-NEXT: ret <4 x half> [[TMP4]]
//
half4 test_builtin_amdgcn_image_sample_2d_v4f16_f32(half4 v4f16, int i32, float f32, __amdgpu_texture_t tex, int4 vec4i32) {
- return __builtin_amdgcn_image_sample_2d_v4f16_f32(100, f32, f32, tex, vec4i32, 0, 120, 110);
+ return __builtin_amdgcn_image_sample_2d_v4f16_f32(15, f32, f32, tex, vec4i32, 0, 120, 110);
}
// CHECK-LABEL: define dso_local float @test_builtin_amdgcn_image_sample_2darray_f32_f32(
@@ -997,11 +997,11 @@ half4 test_builtin_amdgcn_image_sample_2d_v4f16_f32(half4 v4f16, int i32, float
// CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32
// CHECK-NEXT: [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP3]], align 32
// CHECK-NEXT: [[TMP4:%.*]] = load <4 x i32>, ptr [[VEC4I32_ADDR_ASCAST]], align 16
-// CHECK-NEXT: [[TMP5:%.*]] = call float @llvm.amdgcn.image.sample.2darray.f32.f32.v8i32.v4i32(i32 100, float [[TMP0]], float [[TMP1]], float [[TMP2]], <8 x i32> [[TEX_RSRC_VAL]], <4 x i32> [[TMP4]], i1 false, i32 120, i32 110)
+// CHECK-NEXT: [[TMP5:%.*]] = call float @llvm.amdgcn.image.sample.2darray.f32.f32.v8i32.v4i32(i32 1, float [[TMP0]], float [[TMP1]], float [[TMP2]], <8 x i32> [[TEX_RSRC_VAL]], <4 x i32> [[TMP4]], i1 false, i32 120, i32 110)
// CHECK-NEXT: ret float [[TMP5]]
//
float test_builtin_amdgcn_image_sample_2darray_f32_f32(int i32, float f32, __amdgpu_texture_t tex, int4 vec4i32) {
- return __builtin_amdgcn_image_sample_2darray_f32_f32(100, f32, f32, f32, tex, vec4i32, 0, 120, 110);
+ return __builtin_amdgcn_image_sample_2darray_f32_f32(1, f32, f32, f32, tex, vec4i32, 0, 120, 110);
}
// CHECK-LABEL: define dso_local <4 x float> @test_builtin_amdgcn_image_sample_2darray_v4f32_f32(
@@ -1030,11 +1030,11 @@ float test_builtin_amdgcn_image_sample_2darray_f32_f32(int i32, float f32, __amd
// CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32
// CHECK-NEXT: [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP3]], align 32
// CHECK-NEXT: [[TMP4:%.*]] = load <4 x i32>, ptr [[VEC4I32_ADDR_ASCAST]], align 16
-// CHECK-NEXT: [[TMP5:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.2darray.v4f32.f32.v8i32.v4i32(i32 100, float [[TMP0]], float [[TMP1]], float [[TMP2]], <8 x i32> [[TEX_RSRC_VAL]], <4 x i32> [[TMP4]], i1 false, i32 120, i32 110)
+// CHECK-NEXT: [[TMP5:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.2darray.v4f32.f32.v8i32.v4i32(i32 15, float [[TMP0]], float [[TMP1]], float [[TMP2]], <8 x i32> [[TEX_RSRC_VAL]], <4 x i32> [[TMP4]], i1 false, i32 120, i32 110)
// CHECK-NEXT: ret <4 x float> [[TMP5]]
//
float4 test_builtin_amdgcn_image_sample_2darray_v4f32_f32(float4 v4f32, int i32, float f32, __amdgpu_texture_t tex, int4 vec4i32) {
- return __builtin_amdgcn_image_sample_2darray_v4f32_f32(100, f32, f32, f32, tex, vec4i32, 0, 120, 110);
+ return __builtin_amdgcn_image_sample_2darray_v4f32_f32(15, f32, f32, f32, tex, vec4i32, 0, 120, 110);
}
// CHECK-LABEL: define dso_local <4 x half> @test_builtin_amdgcn_image_sample_2darray_v4f16_f32(
@@ -1063,11 +1063,11 @@ float4 test_builtin_amdgcn_image_sample_2darray_v4f32_f32(float4 v4f32, int i32,
// CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32
// CHECK-NEXT: [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP3]], align 32
// CHECK-NEXT: [[TMP4:%.*]] = load <4 x i32>, ptr [[VEC4I32_ADDR_ASCAST]], align 16
-// CHECK-NEXT: [[TMP5:%.*]] = call <4 x half> @llvm.amdgcn.image.sample.2darray.v4f16.f32.v8i32.v4i32(i32 100, float [[TMP0]], float [[TMP1]], float [[TMP2]], <8 x i32> [[TEX_RSRC_VAL]], <4 x i32> [[TMP4]], i1 false, i32 120, i32 110)
+// CHECK-NEXT: [[TMP5:%.*]] = call <4 x half> @llvm.amdgcn.image.sample.2darray.v4f16.f32.v8i32.v4i32(i32 15, float [[TMP0]], float [[TMP1]], float [[TMP2]], <8 x i32> [[TEX_RSRC_VAL]], <4 x i32> [[TMP4]], i1 false, i32 120, i32 110)
// CHECK-NEXT: ret <4 x half> [[TMP5]]
//
half4 test_builtin_amdgcn_image_sample_2darray_v4f16_f32(half4 v4f16, int i32, float f32, __amdgpu_texture_t tex, int4 vec4i32) {
- return __builtin_amdgcn_image_sample_2darray_v4f16_f32(100, f32, f32, f32, tex, vec4i32, 0, 120, 110);
+ return __builtin_amdgcn_image_sample_2darray_v4f16_f32(15, f32, f32, f32, tex, vec4i32, 0, 120, 110);
}
// CHECK-LABEL: define dso_local <4 x float> @test_builtin_amdgcn_image_sample_3d_v4f32_f32(
@@ -1096,11 +1096,11 @@ half4 test_builtin_amdgcn_image_sample_2darray_v4f16_f32(half4 v4f16, int i32, f
// CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32
// CHECK-NEXT: [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP3]], align 32
// CHECK-NEXT: [[TMP4:%.*]] = load <4 x i32>, ptr [[VEC4I32_ADDR_ASCAST]], align 16
-// CHECK-NEXT: [[TMP5:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.3d.v4f32.f32.v8i32.v4i32(i32 100, float [[TMP0]], float [[TMP1]], float [[TMP2]], <8 x i32> [[TEX_RSRC_VAL]], <4 x i32> [[TMP4]], i1 false, i32 120, i32 110)
+// CHECK-NEXT: [[TMP5:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.3d.v4f32.f32.v8i32.v4i32(i32 15, float [[TMP0]], float [[TMP1]], float [[TMP2]], <8 x i32> [[TEX_RSRC_VAL]], <4 x i32> [[TMP4]], i1 false, i32 120, i32 110)
// CHECK-NEXT: ret <4 x float> [[TMP5]]
//
float4 test_builtin_amdgcn_image_sample_3d_v4f32_f32(float4 v4f32, int i32, float f32, __amdgpu_texture_t tex, int4 vec4i32) {
- return __builtin_amdgcn_image_sample_3d_v4f32_f32(100, f32, f32, f32, tex, vec4i32, 0, 120, 110);
+ return __builtin_amdgcn_image_sample_3d_v4f32_f32(15, f32, f32, f32, tex, vec4i32, 0, 120, 110);
}
// CHECK-LABEL: define dso_local <4 x half> @test_builtin_amdgcn_image_sample_3d_v4f16_f32(
@@ -1129,11 +1129,11 @@ float4 test_builtin_amdgcn_image_sample_3d_v4f32_f32(float4 v4f32, int i32, floa
// CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32
// CHECK-NEXT: [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP3]], align 32
// CHECK-NEXT: [[TMP4:%.*]] = load <4 x i32>, ptr [[VEC4I32_ADDR_ASCAST]], align 16
-// CHECK-NEXT: [[TMP5:%.*]] = call <4 x half> @llvm.amdgcn.image.sample.3d.v4f16.f32.v8i32.v4i32(i32 100, float [[TMP0]], float [[TMP1]], float [[TMP2]], <8 x i32> [[TEX_RSRC_VAL]], <4 x i32> [[TMP4]], i1 false, i32 120, i32 110)
+// CHECK-NEXT: [[TMP5:%.*]] = call <4 x half> @llvm.amdgcn.image.sample.3d.v4f16.f32.v8i32.v4i32(i32 15, float [[TMP0]], float [[TMP1]], float [[TMP2]], <8 x i32> [[TEX_RSRC_VAL]], <4 x i32> [[TMP4]], i1 false, i32 120, i32 110)
// CHECK-NEXT: ret <4 x half> [[TMP5]]
//
half4 test_builtin_amdgcn_image_sample_3d_v4f16_f32(half4 v4f16, int i32, float f32, __amdgpu_texture_t tex, int4 vec4i32) {
- return __builtin_amdgcn_image_sample_3d_v4f16_f32(100, f32, f32, f32, tex, vec4i32, 0, 120, 110);
+ return __builtin_amdgcn_image_sample_3d_v4f16_f32(15, f32, f32, f32, tex, vec4i32, 0, 120, 110);
}
// CHECK-LABEL: define dso_local <4 x float> @test_builtin_amdgcn_image_sample_cube_v4f32_f32(
@@ -1162,11 +1162,11 @@ half4 test_builtin_amdgcn_image_sample_3d_v4f16_f32(half4 v4f16, int i32, float
// CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32
// CHECK-NEXT: [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP3]], align 32
// CHECK-NEXT: [[TMP4:%.*]] = load <4 x i32>, ptr [[VEC4I32_ADDR_ASCAST]], align 16
-// CHECK-NEXT: [[TMP5:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.cube.v4f32.f32.v8i32.v4i32(i32 100, float [[TMP0]], float [[TMP1]], float [[TMP2]], <8 x i32> [[TEX_RSRC_VAL]], <4 x i32> [[TMP4]], i1 false, i32 120, i32 110)
+// CHECK-NEXT: [[TMP5:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.cube.v4f32.f32.v8i32.v4i32(i32 15, float [[TMP0]], float [[TMP1]], float [[TMP2]], <8 x i32> [[TEX_RSRC_VAL]], <4 x i32> [[TMP4]], i1 false, i32 120, i32 110)
// CHECK-NEXT: ret <4 x float> [[TMP5]]
//
float4 test_builtin_amdgcn_image_sample_cube_v4f32_f32(float4 v4f32, int i32, float f32, __amdgpu_texture_t tex, int4 vec4i32) {
- return __builtin_amdgcn_image_sample_cube_v4f32_f32(100, f32, f32, f32, tex, vec4i32, 0, 120, 110);
+ return __builtin_amdgcn_image_sample_cube_v4f32_f32(15, f32, f32, f32, tex, vec4i32, 0, 120, 110);
}
// CHECK-LABEL: define dso_local <4 x half> @test_builtin_amdgcn_image_sample_cube_v4f16_f32(
@@ -1195,9 +1195,9 @@ float4 test_builtin_amdgcn_image_sample_cube_v4f32_f32(float4 v4f32, int i32, fl
// CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32
// CHECK-NEXT: [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP3]], align 32
// CHECK-NEXT: [[TMP4:%.*]] = load <4 x i32>, ptr [[VEC4I32_ADDR_ASCAST]], align 16
-// CHECK-NEXT: [[TMP5:%.*]] = call <4 x half> @llvm.amdgcn.image.sample.cube.v4f16.f32.v8i32.v4i32(i32 100, float [[TMP0]], float [[TMP1]], float [[TMP2]], <8 x i32> [[TEX_RSRC_VAL]], <4 x i32> [[TMP4]], i1 false, i32 120, i32 110)
+// CHECK-NEXT: [[TMP5:%.*]] = call <4 x half> @llvm.amdgcn.image.sample.cube.v4f16.f32.v8i32.v4i32(i32 15, float [[TMP0]], float [[TMP1]], float [[TMP2]], <8 x i32> [[TEX_RSRC_VAL]], <4 x i32> [[TMP4]], i1 false, i32 120, i32 110)
// CHECK-NEXT: ret <4 x half> [[TMP5]]
//
half4 test_builtin_amdgcn_image_sample_cube_v4f16_f32(half4 v4f16, int i32, float f32, __amdgpu_texture_t tex, int4 vec4i32) {
- return __builtin_amdgcn_image_sample_cube_v4f16_f32(100, f32, f32, f32, tex, vec4i32, 0, 120, 110);
+ return __builtin_amdgcn_image_sample_cube_v4f16_f32(15, f32, f32, f32, tex, vec4i32, 0, 120, 110);
}
>From a63396f36562658e1e795ff8dc2748841ec26881 Mon Sep 17 00:00:00 2001
From: Alexey Sachkov <alexey.sachkov at amd.com>
Date: Thu, 5 Feb 2026 06:44:40 -0600
Subject: [PATCH 5/7] Add more context to the verifier error
---
llvm/lib/IR/Verifier.cpp | 2 +-
.../intrinsic-amdgcn-image-load-dmask.ll | 249 +++++++++---------
2 files changed, 120 insertions(+), 131 deletions(-)
diff --git a/llvm/lib/IR/Verifier.cpp b/llvm/lib/IR/Verifier.cpp
index 3e62d9525dacc..b7516681eeb8f 100644
--- a/llvm/lib/IR/Verifier.cpp
+++ b/llvm/lib/IR/Verifier.cpp
@@ -7177,7 +7177,7 @@ void Verifier::visitIntrinsicCall(Intrinsic::ID ID, CallBase &Call) {
unsigned NumActiveBits = popcount(DMask);
Check(NumActiveBits <= VWidth,
"llvm.amdgcn.image.load.* intrinsic mask cannot have more active "
- "bits than there are elements in the return type");
+ "bits than there are elements in the return type", &Call);
break;
}
case Intrinsic::nvvm_setmaxnreg_inc_sync_aligned_u32:
diff --git a/llvm/test/Verifier/AMDGPU/intrinsic-amdgcn-image-load-dmask.ll b/llvm/test/Verifier/AMDGPU/intrinsic-amdgcn-image-load-dmask.ll
index 1cb6fe420f320..ac27e96f3b2b0 100644
--- a/llvm/test/Verifier/AMDGPU/intrinsic-amdgcn-image-load-dmask.ll
+++ b/llvm/test/Verifier/AMDGPU/intrinsic-amdgcn-image-load-dmask.ll
@@ -4,295 +4,284 @@ define amdgpu_ps void @load_1d(<8 x i32> inreg %rsrc, i32 %s) {
main_body:
%v = call <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i32(i32 31, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
; CHECK: llvm.amdgcn.image.load.* intrinsic mask cannot have more active bits than there are elements in the return type
+; CHECK-NEXT: @llvm.amdgcn.image.load.1d.v4f32.i32
ret void
}
define amdgpu_ps void @load_1d_tfe(<8 x i32> inreg %rsrc, ptr addrspace(1) inreg %out, i32 %s) {
main_body:
- %v = call {<4 x float>,i32} @llvm.amdgcn.image.load.1d.v4f32i32.i32(i32 31, i32 %s, <8 x i32> %rsrc, i32 1, i32 0)
-; CHECK-NEXT: llvm.amdgcn.image.load.* intrinsic mask cannot have more active bits than there are elements in the return type
+ %v = call {<4 x float>, i32} @llvm.amdgcn.image.load.1d.v4f32i32.i32(i32 31, i32 %s, <8 x i32> %rsrc, i32 1, i32 0)
+; CHECK: llvm.amdgcn.image.load.* intrinsic mask cannot have more active bits than there are elements in the return type
+; CHECK-NEXT: @llvm.amdgcn.image.load.1d.sl_v4f32i32s.i32
ret void
}
define amdgpu_ps void @load_1d_lwe(<8 x i32> inreg %rsrc, ptr addrspace(1) inreg %out, i32 %s) {
main_body:
%v = call {<4 x float>, i32} @llvm.amdgcn.image.load.1d.v4f32i32.i32(i32 -1, i32 %s, <8 x i32> %rsrc, i32 2, i32 0)
-; CHECK-NEXT: llvm.amdgcn.image.load.* intrinsic mask cannot have more active bits than there are elements in the return type
+; CHECK: llvm.amdgcn.image.load.* intrinsic mask cannot have more active bits than there are elements in the return type
+; CHECK-NEXT: @llvm.amdgcn.image.load.1d.sl_v4f32i32s.i32
+ ret void
+}
+
+define amdgpu_ps void @load_1d_V2_tfe(<8 x i32> inreg %rsrc, i32 %s) {
+main_body:
+ %v = call {<2 x float>, i32} @llvm.amdgcn.image.load.1d.v2f32i32.i32(i32 -1, i32 %s, <8 x i32> %rsrc, i32 1, i32 0)
+; CHECK: llvm.amdgcn.image.load.* intrinsic mask cannot have more active bits than there are elements in the return type
+; CHECK-NEXT: @llvm.amdgcn.image.load.1d.sl_v2f32i32s.i32
+ ret void
+}
+
+define amdgpu_ps void @load_1d_V1_tfe(<8 x i32> inreg %rsrc, i32 %s) {
+main_body:
+ %v = call {float, i32} @llvm.amdgcn.image.load.1d.f32i32.i32(i32 3, i32 %s, <8 x i32> %rsrc, i32 1, i32 0)
+; CHECK: llvm.amdgcn.image.load.* intrinsic mask cannot have more active bits than there are elements in the return type
+; CHECK-NEXT: @llvm.amdgcn.image.load.1d.sl_f32i32s.i32
+ ret void
+}
+
+define amdgpu_ps void @load_1d_V2(<8 x i32> inreg %rsrc, i32 %s) {
+main_body:
+ %v = call <2 x float> @llvm.amdgcn.image.load.1d.v2f32.i32(i32 7, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
+; CHECK: llvm.amdgcn.image.load.* intrinsic mask cannot have more active bits than there are elements in the return type
+; CHECK-NEXT: @llvm.amdgcn.image.load.1d.v2f32.i32
+ ret void
+}
+
+define amdgpu_ps void @load_1d_V1(<8 x i32> inreg %rsrc, i32 %s) {
+main_body:
+ %v = call float @llvm.amdgcn.image.load.1d.f32.i32(i32 3, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
+; CHECK: llvm.amdgcn.image.load.* intrinsic mask cannot have more active bits than there are elements in the return type
+; CHECK-NEXT: @llvm.amdgcn.image.load.1d.f32.i32
ret void
}
define amdgpu_ps void @load_2d(<8 x i32> inreg %rsrc, i32 %s, i32 %t) {
main_body:
%v = call <4 x float> @llvm.amdgcn.image.load.2d.v4f32.i32(i32 -1, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0)
-; CHECK-NEXT: llvm.amdgcn.image.load.* intrinsic mask cannot have more active bits than there are elements in the return type
+; CHECK: llvm.amdgcn.image.load.* intrinsic mask cannot have more active bits than there are elements in the return type
+; CHECK-NEXT: @llvm.amdgcn.image.load.2d.v4f32.i32
ret void
}
define amdgpu_ps void @load_2d_tfe(<8 x i32> inreg %rsrc, ptr addrspace(1) inreg %out, i32 %s, i32 %t) {
main_body:
- %v = call {<4 x float>,i32} @llvm.amdgcn.image.load.2d.v4f32i32.i32(i32 31, i32 %s, i32 %t, <8 x i32> %rsrc, i32 1, i32 0)
-; CHECK-NEXT: llvm.amdgcn.image.load.* intrinsic mask cannot have more active bits than there are elements in the return type
+ %v = call {<4 x float>, i32} @llvm.amdgcn.image.load.2d.v4f32i32.i32(i32 31, i32 %s, i32 %t, <8 x i32> %rsrc, i32 1, i32 0)
+; CHECK: llvm.amdgcn.image.load.* intrinsic mask cannot have more active bits than there are elements in the return type
+; CHECK-NEXT: @llvm.amdgcn.image.load.2d.sl_v4f32i32s.i32
+ ret void
+}
+
+define amdgpu_ps void @image_load_mmo(<8 x i32> inreg %rsrc, ptr addrspace(3) %lds, <2 x i32> %c) #0 {
+ %c0 = extractelement <2 x i32> %c, i32 0
+ %c1 = extractelement <2 x i32> %c, i32 1
+ %tex = call float @llvm.amdgcn.image.load.2d.f32.i32(i32 3, i32 %c0, i32 %c1, <8 x i32> %rsrc, i32 0, i32 0)
+; CHECK: llvm.amdgcn.image.load.* intrinsic mask cannot have more active bits than there are elements in the return type
+; CHECK-NEXT: @llvm.amdgcn.image.load.2d.f32.i32
ret void
}
define amdgpu_ps void @load_3d(<8 x i32> inreg %rsrc, i32 %s, i32 %t, i32 %r) {
main_body:
%v = call <4 x float> @llvm.amdgcn.image.load.3d.v4f32.i32(i32 31, i32 %s, i32 %t, i32 %r, <8 x i32> %rsrc, i32 0, i32 0)
-; CHECK-NEXT: llvm.amdgcn.image.load.* intrinsic mask cannot have more active bits than there are elements in the return type
+; CHECK: llvm.amdgcn.image.load.* intrinsic mask cannot have more active bits than there are elements in the return type
+; CHECK-NEXT: @llvm.amdgcn.image.load.3d.v4f32.i32
ret void
}
define amdgpu_ps void @load_3d_tfe_lwe(<8 x i32> inreg %rsrc, ptr addrspace(1) inreg %out, i32 %s, i32 %t, i32 %r) {
main_body:
- %v = call {<4 x float>,i32} @llvm.amdgcn.image.load.3d.v4f32i32.i32(i32 31, i32 %s, i32 %t, i32 %r, <8 x i32> %rsrc, i32 3, i32 0)
-; CHECK-NEXT: llvm.amdgcn.image.load.* intrinsic mask cannot have more active bits than there are elements in the return type
+ %v = call {<4 x float>, i32} @llvm.amdgcn.image.load.3d.v4f32i32.i32(i32 31, i32 %s, i32 %t, i32 %r, <8 x i32> %rsrc, i32 3, i32 0)
+; CHECK: llvm.amdgcn.image.load.* intrinsic mask cannot have more active bits than there are elements in the return type
+; CHECK-NEXT: @llvm.amdgcn.image.load.3d.sl_v4f32i32s.i32
ret void
}
define amdgpu_ps void @load_cube(<8 x i32> inreg %rsrc, i32 %s, i32 %t, i32 %slice) {
main_body:
%v = call <4 x float> @llvm.amdgcn.image.load.cube.v4f32.i32(i32 31, i32 %s, i32 %t, i32 %slice, <8 x i32> %rsrc, i32 0, i32 0)
-; CHECK-NEXT: llvm.amdgcn.image.load.* intrinsic mask cannot have more active bits than there are elements in the return type
+; CHECK: llvm.amdgcn.image.load.* intrinsic mask cannot have more active bits than there are elements in the return type
+; CHECK-NEXT: @llvm.amdgcn.image.load.cube.v4f32.i32
ret void
}
define amdgpu_ps void @load_cube_lwe(<8 x i32> inreg %rsrc, ptr addrspace(1) inreg %out, i32 %s, i32 %t, i32 %slice) {
main_body:
- %v = call {<4 x float>,i32} @llvm.amdgcn.image.load.cube.v4f32i32.i32(i32 31, i32 %s, i32 %t, i32 %slice, <8 x i32> %rsrc, i32 2, i32 0)
-; CHECK-NEXT: llvm.amdgcn.image.load.* intrinsic mask cannot have more active bits than there are elements in the return type
+ %v = call {<4 x float>, i32} @llvm.amdgcn.image.load.cube.v4f32i32.i32(i32 31, i32 %s, i32 %t, i32 %slice, <8 x i32> %rsrc, i32 2, i32 0)
+; CHECK: llvm.amdgcn.image.load.* intrinsic mask cannot have more active bits than there are elements in the return type
+; CHECK-NEXT: @llvm.amdgcn.image.load.cube.sl_v4f32i32s.i32
ret void
}
define amdgpu_ps void @load_1darray(<8 x i32> inreg %rsrc, i32 %s, i32 %slice) {
main_body:
%v = call <4 x float> @llvm.amdgcn.image.load.1darray.v4f32.i32(i32 31, i32 %s, i32 %slice, <8 x i32> %rsrc, i32 0, i32 0)
-; CHECK-NEXT: llvm.amdgcn.image.load.* intrinsic mask cannot have more active bits than there are elements in the return type
+; CHECK: llvm.amdgcn.image.load.* intrinsic mask cannot have more active bits than there are elements in the return type
+; CHECK-NEXT: @llvm.amdgcn.image.load.1darray.v4f32.i32
ret void
}
define amdgpu_ps void @load_1darray_tfe(<8 x i32> inreg %rsrc, ptr addrspace(1) inreg %out, i32 %s, i32 %slice) {
main_body:
- %v = call {<4 x float>,i32} @llvm.amdgcn.image.load.1darray.v4f32i32.i32(i32 31, i32 %s, i32 %slice, <8 x i32> %rsrc, i32 1, i32 0)
-; CHECK-NEXT: llvm.amdgcn.image.load.* intrinsic mask cannot have more active bits than there are elements in the return type
+ %v = call {<4 x float>, i32} @llvm.amdgcn.image.load.1darray.v4f32i32.i32(i32 31, i32 %s, i32 %slice, <8 x i32> %rsrc, i32 1, i32 0)
+; CHECK: llvm.amdgcn.image.load.* intrinsic mask cannot have more active bits than there are elements in the return type
+; CHECK-NEXT: @llvm.amdgcn.image.load.1darray.sl_v4f32i32s.i32
ret void
}
define amdgpu_ps void @load_2darray(<8 x i32> inreg %rsrc, i32 %s, i32 %t, i32 %slice) {
main_body:
%v = call <4 x float> @llvm.amdgcn.image.load.2darray.v4f32.i32(i32 31, i32 %s, i32 %t, i32 %slice, <8 x i32> %rsrc, i32 0, i32 0)
-; CHECK-NEXT: llvm.amdgcn.image.load.* intrinsic mask cannot have more active bits than there are elements in the return type
+; CHECK: llvm.amdgcn.image.load.* intrinsic mask cannot have more active bits than there are elements in the return type
+; CHECK-NEXT: @llvm.amdgcn.image.load.2darray.v4f32.i32
ret void
}
define amdgpu_ps void @load_2darray_lwe(<8 x i32> inreg %rsrc, ptr addrspace(1) inreg %out, i32 %s, i32 %t, i32 %slice) {
main_body:
- %v = call {<4 x float>,i32} @llvm.amdgcn.image.load.2darray.v4f32i32.i32(i32 31, i32 %s, i32 %t, i32 %slice, <8 x i32> %rsrc, i32 2, i32 0)
-; CHECK-NEXT: llvm.amdgcn.image.load.* intrinsic mask cannot have more active bits than there are elements in the return type
+ %v = call {<4 x float>, i32} @llvm.amdgcn.image.load.2darray.v4f32i32.i32(i32 31, i32 %s, i32 %t, i32 %slice, <8 x i32> %rsrc, i32 2, i32 0)
+; CHECK: llvm.amdgcn.image.load.* intrinsic mask cannot have more active bits than there are elements in the return type
+; CHECK-NEXT: @llvm.amdgcn.image.load.2darray.sl_v4f32i32s.i32
ret void
}
define amdgpu_ps void @load_2dmsaa(<8 x i32> inreg %rsrc, i32 %s, i32 %t, i32 %fragid) {
main_body:
%v = call <4 x float> @llvm.amdgcn.image.load.2dmsaa.v4f32.i32(i32 31, i32 %s, i32 %t, i32 %fragid, <8 x i32> %rsrc, i32 0, i32 0)
-; CHECK-NEXT: llvm.amdgcn.image.load.* intrinsic mask cannot have more active bits than there are elements in the return type
+; CHECK: llvm.amdgcn.image.load.* intrinsic mask cannot have more active bits than there are elements in the return type
+; CHECK-NEXT: @llvm.amdgcn.image.load.2dmsaa.v4f32.i32
ret void
}
define amdgpu_ps void @load_2dmsaa_both(<8 x i32> inreg %rsrc, ptr addrspace(1) inreg %out, i32 %s, i32 %t, i32 %fragid) {
main_body:
- %v = call {<4 x float>,i32} @llvm.amdgcn.image.load.2dmsaa.v4f32i32.i32(i32 31, i32 %s, i32 %t, i32 %fragid, <8 x i32> %rsrc, i32 3, i32 0)
-; CHECK-NEXT: llvm.amdgcn.image.load.* intrinsic mask cannot have more active bits than there are elements in the return type
+ %v = call {<4 x float>, i32} @llvm.amdgcn.image.load.2dmsaa.v4f32i32.i32(i32 31, i32 %s, i32 %t, i32 %fragid, <8 x i32> %rsrc, i32 3, i32 0)
+; CHECK: llvm.amdgcn.image.load.* intrinsic mask cannot have more active bits than there are elements in the return type
+; CHECK-NEXT: @llvm.amdgcn.image.load.2dmsaa.sl_v4f32i32s.i32
ret void
}
define amdgpu_ps void @load_2darraymsaa(<8 x i32> inreg %rsrc, i32 %s, i32 %t, i32 %slice, i32 %fragid) {
main_body:
%v = call <4 x float> @llvm.amdgcn.image.load.2darraymsaa.v4f32.i32(i32 31, i32 %s, i32 %t, i32 %slice, i32 %fragid, <8 x i32> %rsrc, i32 0, i32 0)
-; CHECK-NEXT: llvm.amdgcn.image.load.* intrinsic mask cannot have more active bits than there are elements in the return type
+; CHECK: llvm.amdgcn.image.load.* intrinsic mask cannot have more active bits than there are elements in the return type
+; CHECK-NEXT: @llvm.amdgcn.image.load.2darraymsaa.v4f32.i32
ret void
}
define amdgpu_ps void @load_2darraymsaa_tfe(<8 x i32> inreg %rsrc, ptr addrspace(1) inreg %out, i32 %s, i32 %t, i32 %slice, i32 %fragid) {
main_body:
- %v = call {<4 x float>,i32} @llvm.amdgcn.image.load.2darraymsaa.v4f32i32.i32(i32 31, i32 %s, i32 %t, i32 %slice, i32 %fragid, <8 x i32> %rsrc, i32 1, i32 0)
-; CHECK-NEXT: llvm.amdgcn.image.load.* intrinsic mask cannot have more active bits than there are elements in the return type
+ %v = call {<4 x float>, i32} @llvm.amdgcn.image.load.2darraymsaa.v4f32i32.i32(i32 31, i32 %s, i32 %t, i32 %slice, i32 %fragid, <8 x i32> %rsrc, i32 1, i32 0)
+; CHECK: llvm.amdgcn.image.load.* intrinsic mask cannot have more active bits than there are elements in the return type
+; CHECK-NEXT: @llvm.amdgcn.image.load.2darraymsaa.sl_v4f32i32s.i32
ret void
}
define amdgpu_ps void @load_mip_1d(<8 x i32> inreg %rsrc, i32 %s, i32 %mip) {
main_body:
%v = call <4 x float> @llvm.amdgcn.image.load.mip.1d.v4f32.i32(i32 31, i32 %s, i32 %mip, <8 x i32> %rsrc, i32 0, i32 0)
-; CHECK-NEXT: llvm.amdgcn.image.load.* intrinsic mask cannot have more active bits than there are elements in the return type
+; CHECK: llvm.amdgcn.image.load.* intrinsic mask cannot have more active bits than there are elements in the return type
+; CHECK-NEXT: @llvm.amdgcn.image.load.mip.1d.v4f32.i32
ret void
}
define amdgpu_ps void @load_mip_1d_lwe(<8 x i32> inreg %rsrc, ptr addrspace(1) inreg %out, i32 %s, i32 %mip) {
main_body:
- %v = call {<4 x float>,i32} @llvm.amdgcn.image.load.mip.1d.v4f32i32.i32(i32 31, i32 %s, i32 %mip, <8 x i32> %rsrc, i32 2, i32 0)
-; CHECK-NEXT: llvm.amdgcn.image.load.* intrinsic mask cannot have more active bits than there are elements in the return type
+ %v = call {<4 x float>, i32} @llvm.amdgcn.image.load.mip.1d.v4f32i32.i32(i32 31, i32 %s, i32 %mip, <8 x i32> %rsrc, i32 2, i32 0)
+; CHECK: llvm.amdgcn.image.load.* intrinsic mask cannot have more active bits than there are elements in the return type
+; CHECK-NEXT: @llvm.amdgcn.image.load.mip.1d.sl_v4f32i32s.i32
ret void
}
define amdgpu_ps void @load_mip_2d(<8 x i32> inreg %rsrc, i32 %s, i32 %t, i32 %mip) {
main_body:
%v = call <4 x float> @llvm.amdgcn.image.load.mip.2d.v4f32.i32(i32 31, i32 %s, i32 %t, i32 %mip, <8 x i32> %rsrc, i32 0, i32 0)
-; CHECK-NEXT: llvm.amdgcn.image.load.* intrinsic mask cannot have more active bits than there are elements in the return type
+; CHECK: llvm.amdgcn.image.load.* intrinsic mask cannot have more active bits than there are elements in the return type
+; CHECK-NEXT: @llvm.amdgcn.image.load.mip.2d.v4f32.i32
ret void
}
define amdgpu_ps void @load_mip_2d_tfe(<8 x i32> inreg %rsrc, ptr addrspace(1) inreg %out, i32 %s, i32 %t, i32 %mip) {
main_body:
- %v = call {<4 x float>,i32} @llvm.amdgcn.image.load.mip.2d.v4f32i32.i32(i32 31, i32 %s, i32 %t, i32 %mip, <8 x i32> %rsrc, i32 1, i32 0)
-; CHECK-NEXT: llvm.amdgcn.image.load.* intrinsic mask cannot have more active bits than there are elements in the return type
- ret void
-}
-
-define amdgpu_ps void @load_1d_V2_tfe(<8 x i32> inreg %rsrc, i32 %s) {
-main_body:
- %v = call {<2 x float>,i32} @llvm.amdgcn.image.load.1d.v2f32i32.i32(i32 -1, i32 %s, <8 x i32> %rsrc, i32 1, i32 0)
-; CHECK-NEXT: llvm.amdgcn.image.load.* intrinsic mask cannot have more active bits than there are elements in the return type
- ret void
-}
-
-define amdgpu_ps void @load_1d_V1_tfe(<8 x i32> inreg %rsrc, i32 %s) {
-main_body:
- %v = call {float,i32} @llvm.amdgcn.image.load.1d.f32i32.i32(i32 3, i32 %s, <8 x i32> %rsrc, i32 1, i32 0)
-; CHECK-NEXT: llvm.amdgcn.image.load.* intrinsic mask cannot have more active bits than there are elements in the return type
- ret void
-}
-
-define amdgpu_ps void @load_mip_2d_tfe_nouse(<8 x i32> inreg %rsrc, i32 %s, i32 %t, i32 %mip) {
-main_body:
- %v = call {<4 x float>,i32} @llvm.amdgcn.image.load.mip.2d.v4f32i32.i32(i32 31, i32 %s, i32 %t, i32 %mip, <8 x i32> %rsrc, i32 1, i32 0)
-; CHECK-NEXT: llvm.amdgcn.image.load.* intrinsic mask cannot have more active bits than there are elements in the return type
+ %v = call {<4 x float>, i32} @llvm.amdgcn.image.load.mip.2d.v4f32i32.i32(i32 31, i32 %s, i32 %t, i32 %mip, <8 x i32> %rsrc, i32 1, i32 0)
+; CHECK: llvm.amdgcn.image.load.* intrinsic mask cannot have more active bits than there are elements in the return type
+; CHECK-NEXT: @llvm.amdgcn.image.load.mip.2d.sl_v4f32i32s.i32
ret void
}
define amdgpu_ps void @load_mip_2d_tfe_nouse_V2(<8 x i32> inreg %rsrc, i32 %s, i32 %t, i32 %mip) {
main_body:
- %v = call {<2 x float>,i32} @llvm.amdgcn.image.load.mip.2d.v2f32i32.i32(i32 7, i32 %s, i32 %t, i32 %mip, <8 x i32> %rsrc, i32 1, i32 0)
-; CHECK-NEXT: llvm.amdgcn.image.load.* intrinsic mask cannot have more active bits than there are elements in the return type
+ %v = call {<2 x float>, i32} @llvm.amdgcn.image.load.mip.2d.v2f32i32.i32(i32 7, i32 %s, i32 %t, i32 %mip, <8 x i32> %rsrc, i32 1, i32 0)
+; CHECK: llvm.amdgcn.image.load.* intrinsic mask cannot have more active bits than there are elements in the return type
+; CHECK-NEXT: @llvm.amdgcn.image.load.mip.2d.sl_v2f32i32s.i32
ret void
}
define amdgpu_ps void @load_mip_2d_tfe_nouse_V1(<8 x i32> inreg %rsrc, i32 %s, i32 %t, i32 %mip) {
main_body:
%v = call {float, i32} @llvm.amdgcn.image.load.mip.2d.f32i32.i32(i32 3, i32 %s, i32 %t, i32 %mip, <8 x i32> %rsrc, i32 1, i32 0)
-; CHECK-NEXT: llvm.amdgcn.image.load.* intrinsic mask cannot have more active bits than there are elements in the return type
- ret void
-}
-
-define amdgpu_ps void @load_1d_tfe_V4(<8 x i32> inreg %rsrc, ptr addrspace(1) inreg %out, i32 %s) {
-main_body:
- %v = call {<4 x float>,i32} @llvm.amdgcn.image.load.1d.v4f32i32.i32(i32 31, i32 %s, <8 x i32> %rsrc, i32 1, i32 0)
-; CHECK-NEXT: llvm.amdgcn.image.load.* intrinsic mask cannot have more active bits than there are elements in the return type
- ret void
-}
-
-define amdgpu_ps void @load_1d_tfe_V2(<8 x i32> inreg %rsrc, ptr addrspace(1) inreg %out, i32 %s) {
-main_body:
- %v = call {<2 x float>,i32} @llvm.amdgcn.image.load.1d.v2f32i32.i32(i32 -1, i32 %s, <8 x i32> %rsrc, i32 1, i32 0)
-; CHECK-NEXT: llvm.amdgcn.image.load.* intrinsic mask cannot have more active bits than there are elements in the return type
+; CHECK: llvm.amdgcn.image.load.* intrinsic mask cannot have more active bits than there are elements in the return type
+; CHECK-NEXT: @llvm.amdgcn.image.load.mip.2d.sl_f32i32s.i32
ret void
}
-
define amdgpu_ps void @load_mip_3d(<8 x i32> inreg %rsrc, i32 %s, i32 %t, i32 %r, i32 %mip) {
main_body:
%v = call <4 x float> @llvm.amdgcn.image.load.mip.3d.v4f32.i32(i32 31, i32 %s, i32 %t, i32 %r, i32 %mip, <8 x i32> %rsrc, i32 0, i32 0)
-; CHECK-NEXT: llvm.amdgcn.image.load.* intrinsic mask cannot have more active bits than there are elements in the return type
+; CHECK: llvm.amdgcn.image.load.* intrinsic mask cannot have more active bits than there are elements in the return type
+; CHECK-NEXT: @llvm.amdgcn.image.load.mip.3d.v4f32.i32
ret void
}
define amdgpu_ps void @load_mip_cube(<8 x i32> inreg %rsrc, i32 %s, i32 %t, i32 %slice, i32 %mip) {
main_body:
%v = call <4 x float> @llvm.amdgcn.image.load.mip.cube.v4f32.i32(i32 31, i32 %s, i32 %t, i32 %slice, i32 %mip, <8 x i32> %rsrc, i32 0, i32 0)
-; CHECK-NEXT: llvm.amdgcn.image.load.* intrinsic mask cannot have more active bits than there are elements in the return type
+; CHECK: llvm.amdgcn.image.load.* intrinsic mask cannot have more active bits than there are elements in the return type
+; CHECK-NEXT: @llvm.amdgcn.image.load.mip.cube.v4f32.i32
ret void
}
define amdgpu_ps void @load_mip_1darray(<8 x i32> inreg %rsrc, i32 %s, i32 %slice, i32 %mip) {
main_body:
%v = call <4 x float> @llvm.amdgcn.image.load.mip.1darray.v4f32.i32(i32 31, i32 %s, i32 %slice, i32 %mip, <8 x i32> %rsrc, i32 0, i32 0)
-; CHECK-NEXT: llvm.amdgcn.image.load.* intrinsic mask cannot have more active bits than there are elements in the return type
+; CHECK: llvm.amdgcn.image.load.* intrinsic mask cannot have more active bits than there are elements in the return type
+; CHECK-NEXT: @llvm.amdgcn.image.load.mip.1darray.v4f32.i32
ret void
}
define amdgpu_ps void @load_mip_2darray(<8 x i32> inreg %rsrc, i32 %s, i32 %t, i32 %slice, i32 %mip) {
main_body:
%v = call <4 x float> @llvm.amdgcn.image.load.mip.2darray.v4f32.i32(i32 31, i32 %s, i32 %t, i32 %slice, i32 %mip, <8 x i32> %rsrc, i32 0, i32 0)
-; CHECK-NEXT: llvm.amdgcn.image.load.* intrinsic mask cannot have more active bits than there are elements in the return type
- ret void
-}
-
-define amdgpu_ps void @load_1d_V1(<8 x i32> inreg %rsrc, i32 %s) {
-main_body:
- %v = call float @llvm.amdgcn.image.load.1d.f32.i32(i32 3, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
-; CHECK-NEXT: llvm.amdgcn.image.load.* intrinsic mask cannot have more active bits than there are elements in the return type
- ret void
-}
-
-define amdgpu_ps void @load_1d_V2(<8 x i32> inreg %rsrc, i32 %s) {
-main_body:
- %v = call <2 x float> @llvm.amdgcn.image.load.1d.v2f32.i32(i32 7, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
-; CHECK-NEXT: llvm.amdgcn.image.load.* intrinsic mask cannot have more active bits than there are elements in the return type
- ret void
-}
-
-define amdgpu_ps void @load_1d_glc(<8 x i32> inreg %rsrc, i32 %s) {
-main_body:
- %v = call <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i32(i32 31, i32 %s, <8 x i32> %rsrc, i32 0, i32 1)
-; CHECK-NEXT: llvm.amdgcn.image.load.* intrinsic mask cannot have more active bits than there are elements in the return type
- ret void
-}
-
-define amdgpu_ps void @load_1d_slc(<8 x i32> inreg %rsrc, i32 %s) {
-main_body:
- %v = call <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i32(i32 31, i32 %s, <8 x i32> %rsrc, i32 0, i32 2)
-; CHECK-NEXT: llvm.amdgcn.image.load.* intrinsic mask cannot have more active bits than there are elements in the return type
- ret void
-}
-
-define amdgpu_ps void @load_1d_glc_slc(<8 x i32> inreg %rsrc, i32 %s) {
-main_body:
- %v = call <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i32(i32 31, i32 %s, <8 x i32> %rsrc, i32 0, i32 3)
-; CHECK-NEXT: llvm.amdgcn.image.load.* intrinsic mask cannot have more active bits than there are elements in the return type
- ret void
-}
-
-define amdgpu_ps void @image_load_mmo(<8 x i32> inreg %rsrc, ptr addrspace(3) %lds, <2 x i32> %c) #0 {
- %c0 = extractelement <2 x i32> %c, i32 0
- %c1 = extractelement <2 x i32> %c, i32 1
- %tex = call float @llvm.amdgcn.image.load.2d.f32.i32(i32 3, i32 %c0, i32 %c1, <8 x i32> %rsrc, i32 0, i32 0)
-; CHECK-NEXT: llvm.amdgcn.image.load.* intrinsic mask cannot have more active bits than there are elements in the return type
+; CHECK: llvm.amdgcn.image.load.* intrinsic mask cannot have more active bits than there are elements in the return type
+; CHECK-NEXT: @llvm.amdgcn.image.load.mip.2darray.v4f32.i32
ret void
}
declare <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i32(i32, i32, <8 x i32>, i32, i32) #1
-declare {float,i32} @llvm.amdgcn.image.load.1d.f32i32.i32(i32, i32, <8 x i32>, i32, i32) #1
-declare {<2 x float>,i32} @llvm.amdgcn.image.load.1d.v2f32i32.i32(i32, i32, <8 x i32>, i32, i32) #1
-declare {<4 x float>,i32} @llvm.amdgcn.image.load.1d.v4f32i32.i32(i32, i32, <8 x i32>, i32, i32) #1
+declare {float, i32} @llvm.amdgcn.image.load.1d.f32i32.i32(i32, i32, <8 x i32>, i32, i32) #1
+declare {<2 x float>, i32} @llvm.amdgcn.image.load.1d.v2f32i32.i32(i32, i32, <8 x i32>, i32, i32) #1
+declare {<4 x float>, i32} @llvm.amdgcn.image.load.1d.v4f32i32.i32(i32, i32, <8 x i32>, i32, i32) #1
declare <4 x float> @llvm.amdgcn.image.load.2d.v4f32.i32(i32, i32, i32, <8 x i32>, i32, i32) #1
-declare {<4 x float>,i32} @llvm.amdgcn.image.load.2d.v4f32i32.i32(i32, i32, i32, <8 x i32>, i32, i32) #1
+declare {<4 x float>, i32} @llvm.amdgcn.image.load.2d.v4f32i32.i32(i32, i32, i32, <8 x i32>, i32, i32) #1
declare <4 x float> @llvm.amdgcn.image.load.3d.v4f32.i32(i32, i32, i32, i32, <8 x i32>, i32, i32) #1
-declare {<4 x float>,i32} @llvm.amdgcn.image.load.3d.v4f32i32.i32(i32, i32, i32, i32, <8 x i32>, i32, i32) #1
+declare {<4 x float>, i32} @llvm.amdgcn.image.load.3d.v4f32i32.i32(i32, i32, i32, i32, <8 x i32>, i32, i32) #1
declare <4 x float> @llvm.amdgcn.image.load.cube.v4f32.i32(i32, i32, i32, i32, <8 x i32>, i32, i32) #1
-declare {<4 x float>,i32} @llvm.amdgcn.image.load.cube.v4f32i32.i32(i32, i32, i32, i32, <8 x i32>, i32, i32) #1
+declare {<4 x float>, i32} @llvm.amdgcn.image.load.cube.v4f32i32.i32(i32, i32, i32, i32, <8 x i32>, i32, i32) #1
declare <4 x float> @llvm.amdgcn.image.load.1darray.v4f32.i32(i32, i32, i32, <8 x i32>, i32, i32) #1
-declare {<4 x float>,i32} @llvm.amdgcn.image.load.1darray.v4f32i32.i32(i32, i32, i32, <8 x i32>, i32, i32) #1
+declare {<4 x float>, i32} @llvm.amdgcn.image.load.1darray.v4f32i32.i32(i32, i32, i32, <8 x i32>, i32, i32) #1
declare <4 x float> @llvm.amdgcn.image.load.2darray.v4f32.i32(i32, i32, i32, i32, <8 x i32>, i32, i32) #1
-declare {<4 x float>,i32} @llvm.amdgcn.image.load.2darray.v4f32i32.i32(i32, i32, i32, i32, <8 x i32>, i32, i32) #1
+declare {<4 x float>, i32} @llvm.amdgcn.image.load.2darray.v4f32i32.i32(i32, i32, i32, i32, <8 x i32>, i32, i32) #1
declare <4 x float> @llvm.amdgcn.image.load.2dmsaa.v4f32.i32(i32, i32, i32, i32, <8 x i32>, i32, i32) #1
-declare {<4 x float>,i32} @llvm.amdgcn.image.load.2dmsaa.v4f32i32.i32(i32, i32, i32, i32, <8 x i32>, i32, i32) #1
+declare {<4 x float>, i32} @llvm.amdgcn.image.load.2dmsaa.v4f32i32.i32(i32, i32, i32, i32, <8 x i32>, i32, i32) #1
declare <4 x float> @llvm.amdgcn.image.load.2darraymsaa.v4f32.i32(i32, i32, i32, i32, i32, <8 x i32>, i32, i32) #1
-declare {<4 x float>,i32} @llvm.amdgcn.image.load.2darraymsaa.v4f32i32.i32(i32, i32, i32, i32, i32, <8 x i32>, i32, i32) #1
+declare {<4 x float>, i32} @llvm.amdgcn.image.load.2darraymsaa.v4f32i32.i32(i32, i32, i32, i32, i32, <8 x i32>, i32, i32) #1
declare <4 x float> @llvm.amdgcn.image.load.mip.1d.v4f32.i32(i32, i32, i32, <8 x i32>, i32, i32) #1
declare <4 x float> @llvm.amdgcn.image.load.mip.2d.v4f32.i32(i32, i32, i32, i32, <8 x i32>, i32, i32) #1
-declare {<4 x float>,i32} @llvm.amdgcn.image.load.mip.1d.v4f32i32.i32(i32, i32, i32, <8 x i32>, i32, i32) #1
-declare {<4 x float>,i32} @llvm.amdgcn.image.load.mip.2d.v4f32i32.i32(i32, i32, i32, i32, <8 x i32>, i32, i32) #1
-declare {<2 x float>,i32} @llvm.amdgcn.image.load.mip.2d.v2f32i32.i32(i32, i32, i32, i32, <8 x i32>, i32, i32) #1
-declare {float,i32} @llvm.amdgcn.image.load.mip.2d.f32i32.i32(i32, i32, i32, i32, <8 x i32>, i32, i32) #1
+declare {<4 x float>, i32} @llvm.amdgcn.image.load.mip.1d.v4f32i32.i32(i32, i32, i32, <8 x i32>, i32, i32) #1
+declare {<4 x float>, i32} @llvm.amdgcn.image.load.mip.2d.v4f32i32.i32(i32, i32, i32, i32, <8 x i32>, i32, i32) #1
+declare {<2 x float>, i32} @llvm.amdgcn.image.load.mip.2d.v2f32i32.i32(i32, i32, i32, i32, <8 x i32>, i32, i32) #1
+declare {float, i32} @llvm.amdgcn.image.load.mip.2d.f32i32.i32(i32, i32, i32, i32, <8 x i32>, i32, i32) #1
declare <4 x float> @llvm.amdgcn.image.load.mip.3d.v4f32.i32(i32, i32, i32, i32, i32, <8 x i32>, i32, i32) #1
declare <4 x float> @llvm.amdgcn.image.load.mip.cube.v4f32.i32(i32, i32, i32, i32, i32, <8 x i32>, i32, i32) #1
declare <4 x float> @llvm.amdgcn.image.load.mip.1darray.v4f32.i32(i32, i32, i32, i32, <8 x i32>, i32, i32) #1
>From 21540fba1c72c3ba0af7c4b9a3d8797e9de3b128 Mon Sep 17 00:00:00 2001
From: Alexey Sachkov <alexey.sachkov at amd.com>
Date: Thu, 5 Feb 2026 07:18:14 -0600
Subject: [PATCH 6/7] Revert the original change
---
.../AMDGPU/AMDGPUInstCombineIntrinsic.cpp | 2 -
.../fix-amdgcn-image-load-dmask-crash.ll | 121 ------------------
2 files changed, 123 deletions(-)
delete mode 100644 llvm/test/Transforms/InstCombine/AMDGPU/fix-amdgcn-image-load-dmask-crash.ll
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp
index 0756a3c257738..2cd1902785546 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp
@@ -1861,8 +1861,6 @@ static Value *simplifyAMDGCNMemoryIntrinsicDemanded(InstCombiner &IC,
for (unsigned SrcIdx = 0; SrcIdx < 4; ++SrcIdx) {
const unsigned Bit = 1 << SrcIdx;
if (!!(DMaskVal & Bit)) {
- if (OrigLdStIdx >= DemandedElts.getBitWidth())
- break;
if (!!DemandedElts[OrigLdStIdx])
NewDMaskVal |= Bit;
OrigLdStIdx++;
diff --git a/llvm/test/Transforms/InstCombine/AMDGPU/fix-amdgcn-image-load-dmask-crash.ll b/llvm/test/Transforms/InstCombine/AMDGPU/fix-amdgcn-image-load-dmask-crash.ll
deleted file mode 100644
index 7f16d69623eed..0000000000000
--- a/llvm/test/Transforms/InstCombine/AMDGPU/fix-amdgcn-image-load-dmask-crash.ll
+++ /dev/null
@@ -1,121 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
-; RUN: opt -mtriple amdgcn -passes=instcombine --disable-verify %s -S -o - | FileCheck %s
-;
-; Verifier is disabled on purpose, because the IR is deemed invalid.
-;
-; The main purpose of the test is to ensure that we do not crash when the mask
-; argument "enables" more elements than there are in return type.
-; This specific corner case was discovered by a fuzzer.
-
-define amdgpu_ps [4 x <2 x float>] @load_2dmsaa_v4v2f32_dmask(<8 x i32> inreg %rsrc, i32 %s, i32 %t) {
-; CHECK-LABEL: define amdgpu_ps [4 x <2 x float>] @load_2dmsaa_v4v2f32_dmask(
-; CHECK-SAME: <8 x i32> inreg [[RSRC:%.*]], i32 [[S:%.*]], i32 [[T:%.*]]) {
-; CHECK-NEXT: [[MAIN_BODY:.*:]]
-; CHECK-NEXT: [[I:%.*]] = call <2 x float> @llvm.amdgcn.image.load.2dmsaa.v2f32.i32.v8i32(i32 3, i32 [[S]], i32 [[T]], i32 0, <8 x i32> [[RSRC]], i32 0, i32 0)
-; CHECK-NEXT: [[I1:%.*]] = call <2 x float> @llvm.amdgcn.image.load.2dmsaa.v2f32.i32.v8i32(i32 3, i32 [[S]], i32 [[T]], i32 1, <8 x i32> [[RSRC]], i32 0, i32 0)
-; CHECK-NEXT: [[I2:%.*]] = call <2 x float> @llvm.amdgcn.image.load.2dmsaa.v2f32.i32.v8i32(i32 3, i32 [[S]], i32 [[T]], i32 2, <8 x i32> [[RSRC]], i32 0, i32 0)
-; CHECK-NEXT: [[I3:%.*]] = call <2 x float> @llvm.amdgcn.image.load.2dmsaa.v2f32.i32.v8i32(i32 3, i32 [[S]], i32 [[T]], i32 3, <8 x i32> [[RSRC]], i32 0, i32 0)
-; CHECK-NEXT: [[I4:%.*]] = insertvalue [4 x <2 x float>] poison, <2 x float> [[I]], 0
-; CHECK-NEXT: [[I5:%.*]] = insertvalue [4 x <2 x float>] [[I4]], <2 x float> [[I1]], 1
-; CHECK-NEXT: [[I6:%.*]] = insertvalue [4 x <2 x float>] [[I5]], <2 x float> [[I2]], 2
-; CHECK-NEXT: [[I7:%.*]] = insertvalue [4 x <2 x float>] [[I6]], <2 x float> [[I3]], 3
-; CHECK-NEXT: ret [4 x <2 x float>] [[I7]]
-;
-main_body:
- %i = call <2 x float> @llvm.amdgcn.image.load.2dmsaa.v2f32.i32(i32 7, i32 %s, i32 %t, i32 0, <8 x i32> %rsrc, i32 0, i32 0)
- %i1 = call <2 x float> @llvm.amdgcn.image.load.2dmsaa.v2f32.i32(i32 11, i32 %s, i32 %t, i32 1, <8 x i32> %rsrc, i32 0, i32 0)
- %i2 = call <2 x float> @llvm.amdgcn.image.load.2dmsaa.v2f32.i32(i32 15, i32 %s, i32 %t, i32 2, <8 x i32> %rsrc, i32 0, i32 0)
- %i3 = call <2 x float> @llvm.amdgcn.image.load.2dmsaa.v2f32.i32(i32 -1, i32 %s, i32 %t, i32 3, <8 x i32> %rsrc, i32 0, i32 0)
- %i4 = insertvalue [4 x <2 x float>] poison, <2 x float> %i, 0
- %i5 = insertvalue [4 x <2 x float>] %i4, <2 x float> %i1, 1
- %i6 = insertvalue [4 x <2 x float>] %i5, <2 x float> %i2, 2
- %i7 = insertvalue [4 x <2 x float>] %i6, <2 x float> %i3, 3
- ret [4 x <2 x float>] %i7
-}
-
-define amdgpu_ps [4 x <3 x float>] @load_2dmsaa_v4v3f32_dmask(<8 x i32> inreg %rsrc, i32 %s, i32 %t) {
-; CHECK-LABEL: define amdgpu_ps [4 x <3 x float>] @load_2dmsaa_v4v3f32_dmask(
-; CHECK-SAME: <8 x i32> inreg [[RSRC:%.*]], i32 [[S:%.*]], i32 [[T:%.*]]) {
-; CHECK-NEXT: [[MAIN_BODY:.*:]]
-; CHECK-NEXT: [[I:%.*]] = call <3 x float> @llvm.amdgcn.image.load.2dmsaa.v3f32.i32.v8i32(i32 11, i32 [[S]], i32 [[T]], i32 0, <8 x i32> [[RSRC]], i32 0, i32 0)
-; CHECK-NEXT: [[I1:%.*]] = call <3 x float> @llvm.amdgcn.image.load.2dmsaa.v3f32.i32.v8i32(i32 7, i32 [[S]], i32 [[T]], i32 1, <8 x i32> [[RSRC]], i32 0, i32 0)
-; CHECK-NEXT: [[I2:%.*]] = call <3 x float> @llvm.amdgcn.image.load.2dmsaa.v3f32.i32.v8i32(i32 7, i32 [[S]], i32 [[T]], i32 2, <8 x i32> [[RSRC]], i32 0, i32 0)
-; CHECK-NEXT: [[I3:%.*]] = call <2 x float> @llvm.amdgcn.image.load.2dmsaa.v2f32.i32.v8i32(i32 -11, i32 [[S]], i32 [[T]], i32 3, <8 x i32> [[RSRC]], i32 0, i32 0)
-; CHECK-NEXT: [[TMP0:%.*]] = shufflevector <2 x float> [[I3]], <2 x float> poison, <3 x i32> <i32 0, i32 1, i32 poison>
-; CHECK-NEXT: [[I4:%.*]] = insertvalue [4 x <3 x float>] poison, <3 x float> [[I]], 0
-; CHECK-NEXT: [[I5:%.*]] = insertvalue [4 x <3 x float>] [[I4]], <3 x float> [[I1]], 1
-; CHECK-NEXT: [[I6:%.*]] = insertvalue [4 x <3 x float>] [[I5]], <3 x float> [[I2]], 2
-; CHECK-NEXT: [[I7:%.*]] = insertvalue [4 x <3 x float>] [[I6]], <3 x float> [[TMP0]], 3
-; CHECK-NEXT: ret [4 x <3 x float>] [[I7]]
-;
-main_body:
- %i = call <3 x float> @llvm.amdgcn.image.load.2dmsaa.v3f32.i32(i32 11, i32 %s, i32 %t, i32 0, <8 x i32> %rsrc, i32 0, i32 0)
- %i1 = call <3 x float> @llvm.amdgcn.image.load.2dmsaa.v3f32.i32(i32 15, i32 %s, i32 %t, i32 1, <8 x i32> %rsrc, i32 0, i32 0)
- %i2 = call <3 x float> @llvm.amdgcn.image.load.2dmsaa.v3f32.i32(i32 31, i32 %s, i32 %t, i32 2, <8 x i32> %rsrc, i32 0, i32 0)
- %i3 = call <3 x float> @llvm.amdgcn.image.load.2dmsaa.v3f32.i32(i32 -11, i32 %s, i32 %t, i32 3, <8 x i32> %rsrc, i32 0, i32 0)
- %i4 = insertvalue [4 x <3 x float>] poison, <3 x float> %i, 0
- %i5 = insertvalue [4 x <3 x float>] %i4, <3 x float> %i1, 1
- %i6 = insertvalue [4 x <3 x float>] %i5, <3 x float> %i2, 2
- %i7 = insertvalue [4 x <3 x float>] %i6, <3 x float> %i3, 3
- ret [4 x <3 x float>] %i7
-}
-
-define amdgpu_ps [4 x <2 x float>] @load_2darraymsaa_v4v2f32_dmask(<8 x i32> inreg %rsrc, i32 %s, i32 %t, i32 %slice) {
-; CHECK-LABEL: define amdgpu_ps [4 x <2 x float>] @load_2darraymsaa_v4v2f32_dmask(
-; CHECK-SAME: <8 x i32> inreg [[RSRC:%.*]], i32 [[S:%.*]], i32 [[T:%.*]], i32 [[SLICE:%.*]]) {
-; CHECK-NEXT: [[MAIN_BODY:.*:]]
-; CHECK-NEXT: [[I:%.*]] = call <2 x float> @llvm.amdgcn.image.load.2darraymsaa.v2f32.i32.v8i32(i32 3, i32 [[S]], i32 [[T]], i32 [[SLICE]], i32 0, <8 x i32> [[RSRC]], i32 0, i32 0)
-; CHECK-NEXT: [[I1:%.*]] = call <2 x float> @llvm.amdgcn.image.load.2darraymsaa.v2f32.i32.v8i32(i32 3, i32 [[S]], i32 [[T]], i32 [[SLICE]], i32 1, <8 x i32> [[RSRC]], i32 0, i32 0)
-; CHECK-NEXT: [[I2:%.*]] = call <2 x float> @llvm.amdgcn.image.load.2darraymsaa.v2f32.i32.v8i32(i32 3, i32 [[S]], i32 [[T]], i32 [[SLICE]], i32 2, <8 x i32> [[RSRC]], i32 0, i32 0)
-; CHECK-NEXT: [[I3:%.*]] = call <2 x float> @llvm.amdgcn.image.load.2darraymsaa.v2f32.i32.v8i32(i32 3, i32 [[S]], i32 [[T]], i32 [[SLICE]], i32 3, <8 x i32> [[RSRC]], i32 0, i32 0)
-; CHECK-NEXT: [[I4:%.*]] = insertvalue [4 x <2 x float>] poison, <2 x float> [[I]], 0
-; CHECK-NEXT: [[I5:%.*]] = insertvalue [4 x <2 x float>] [[I4]], <2 x float> [[I1]], 1
-; CHECK-NEXT: [[I6:%.*]] = insertvalue [4 x <2 x float>] [[I5]], <2 x float> [[I2]], 2
-; CHECK-NEXT: [[I7:%.*]] = insertvalue [4 x <2 x float>] [[I6]], <2 x float> [[I3]], 3
-; CHECK-NEXT: ret [4 x <2 x float>] [[I7]]
-;
-main_body:
- %i = call <2 x float> @llvm.amdgcn.image.load.2darraymsaa.v2f32.i32(i32 7, i32 %s, i32 %t, i32 %slice, i32 0, <8 x i32> %rsrc, i32 0, i32 0)
- %i1 = call <2 x float> @llvm.amdgcn.image.load.2darraymsaa.v2f32.i32(i32 11, i32 %s, i32 %t, i32 %slice, i32 1, <8 x i32> %rsrc, i32 0, i32 0)
- %i2 = call <2 x float> @llvm.amdgcn.image.load.2darraymsaa.v2f32.i32(i32 15, i32 %s, i32 %t, i32 %slice, i32 2, <8 x i32> %rsrc, i32 0, i32 0)
- %i3 = call <2 x float> @llvm.amdgcn.image.load.2darraymsaa.v2f32.i32(i32 -1, i32 %s, i32 %t, i32 %slice, i32 3, <8 x i32> %rsrc, i32 0, i32 0)
- %i4 = insertvalue [4 x <2 x float>] poison, <2 x float> %i, 0
- %i5 = insertvalue [4 x <2 x float>] %i4, <2 x float> %i1, 1
- %i6 = insertvalue [4 x <2 x float>] %i5, <2 x float> %i2, 2
- %i7 = insertvalue [4 x <2 x float>] %i6, <2 x float> %i3, 3
- ret [4 x <2 x float>] %i7
-}
-
-define amdgpu_ps [4 x <3 x float>] @load_2darraymsaa_v4v3f32_dmask(<8 x i32> inreg %rsrc, i32 %s, i32 %t, i32 %slice) {
-; CHECK-LABEL: define amdgpu_ps [4 x <3 x float>] @load_2darraymsaa_v4v3f32_dmask(
-; CHECK-SAME: <8 x i32> inreg [[RSRC:%.*]], i32 [[S:%.*]], i32 [[T:%.*]], i32 [[SLICE:%.*]]) {
-; CHECK-NEXT: [[MAIN_BODY:.*:]]
-; CHECK-NEXT: [[I:%.*]] = call <3 x float> @llvm.amdgcn.image.load.2darraymsaa.v3f32.i32.v8i32(i32 11, i32 [[S]], i32 [[T]], i32 [[SLICE]], i32 0, <8 x i32> [[RSRC]], i32 0, i32 0)
-; CHECK-NEXT: [[I1:%.*]] = call <3 x float> @llvm.amdgcn.image.load.2darraymsaa.v3f32.i32.v8i32(i32 7, i32 [[S]], i32 [[T]], i32 [[SLICE]], i32 1, <8 x i32> [[RSRC]], i32 0, i32 0)
-; CHECK-NEXT: [[I2:%.*]] = call <3 x float> @llvm.amdgcn.image.load.2darraymsaa.v3f32.i32.v8i32(i32 7, i32 [[S]], i32 [[T]], i32 [[SLICE]], i32 2, <8 x i32> [[RSRC]], i32 0, i32 0)
-; CHECK-NEXT: [[I3:%.*]] = call <3 x float> @llvm.amdgcn.image.load.2darraymsaa.v3f32.i32.v8i32(i32 7, i32 [[S]], i32 [[T]], i32 [[SLICE]], i32 3, <8 x i32> [[RSRC]], i32 0, i32 0)
-; CHECK-NEXT: [[I4:%.*]] = insertvalue [4 x <3 x float>] poison, <3 x float> [[I]], 0
-; CHECK-NEXT: [[I5:%.*]] = insertvalue [4 x <3 x float>] [[I4]], <3 x float> [[I1]], 1
-; CHECK-NEXT: [[I6:%.*]] = insertvalue [4 x <3 x float>] [[I5]], <3 x float> [[I2]], 2
-; CHECK-NEXT: [[I7:%.*]] = insertvalue [4 x <3 x float>] [[I6]], <3 x float> [[I3]], 3
-; CHECK-NEXT: ret [4 x <3 x float>] [[I7]]
-;
-main_body:
- %i = call <3 x float> @llvm.amdgcn.image.load.2darraymsaa.v3f32.i32(i32 11, i32 %s, i32 %t, i32 %slice, i32 0, <8 x i32> %rsrc, i32 0, i32 0)
- %i1 = call <3 x float> @llvm.amdgcn.image.load.2darraymsaa.v3f32.i32(i32 15, i32 %s, i32 %t, i32 %slice, i32 1, <8 x i32> %rsrc, i32 0, i32 0)
- %i2 = call <3 x float> @llvm.amdgcn.image.load.2darraymsaa.v3f32.i32(i32 31, i32 %s, i32 %t, i32 %slice, i32 2, <8 x i32> %rsrc, i32 0, i32 0)
- %i3 = call <3 x float> @llvm.amdgcn.image.load.2darraymsaa.v3f32.i32(i32 -1, i32 %s, i32 %t, i32 %slice, i32 3, <8 x i32> %rsrc, i32 0, i32 0)
- %i4 = insertvalue [4 x <3 x float>] poison, <3 x float> %i, 0
- %i5 = insertvalue [4 x <3 x float>] %i4, <3 x float> %i1, 1
- %i6 = insertvalue [4 x <3 x float>] %i5, <3 x float> %i2, 2
- %i7 = insertvalue [4 x <3 x float>] %i6, <3 x float> %i3, 3
- ret [4 x <3 x float>] %i7
-}
-
-declare <2 x float> @llvm.amdgcn.image.load.2dmsaa.v2f32.i32(i32 immarg, i32, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0
-
-declare <3 x float> @llvm.amdgcn.image.load.2dmsaa.v3f32.i32(i32 immarg, i32, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0
-
-declare <2 x float> @llvm.amdgcn.image.load.2darraymsaa.v2f32.i32(i32 immarg, i32, i32, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0
-
-declare <3 x float> @llvm.amdgcn.image.load.2darraymsaa.v3f32.i32(i32 immarg, i32, i32, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0
>From 64f4b6d54ef4c3182573071b3d29185f7b175f5a Mon Sep 17 00:00:00 2001
From: Alexey Sachkov <alexey.sachkov at amd.com>
Date: Thu, 5 Feb 2026 07:22:07 -0600
Subject: [PATCH 7/7] Apply clang-format
---
llvm/lib/IR/Verifier.cpp | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/llvm/lib/IR/Verifier.cpp b/llvm/lib/IR/Verifier.cpp
index b7516681eeb8f..aee9816beb541 100644
--- a/llvm/lib/IR/Verifier.cpp
+++ b/llvm/lib/IR/Verifier.cpp
@@ -7177,7 +7177,8 @@ void Verifier::visitIntrinsicCall(Intrinsic::ID ID, CallBase &Call) {
unsigned NumActiveBits = popcount(DMask);
Check(NumActiveBits <= VWidth,
"llvm.amdgcn.image.load.* intrinsic mask cannot have more active "
- "bits than there are elements in the return type", &Call);
+ "bits than there are elements in the return type",
+ &Call);
break;
}
case Intrinsic::nvvm_setmaxnreg_inc_sync_aligned_u32:
More information about the cfe-commits
mailing list