[llvm] [AMDGPU] Fix crash in InstCombine (PR #179511)
Alexey Sachkov via llvm-commits
llvm-commits at lists.llvm.org
Thu Feb 5 01:06:08 PST 2026
https://github.com/AlexeySachkov updated https://github.com/llvm/llvm-project/pull/179511
>From 4539640c220c51f1ec4ce687dafa4029c9b7caf9 Mon Sep 17 00:00:00 2001
From: Alexey Sachkov <alexey.sachkov at amd.com>
Date: Tue, 3 Feb 2026 11:16:58 -0600
Subject: [PATCH 1/3] [AMDGPU] Fix crash in InstCombine
Added an out-of-bounds check to avoid crashes when simplifying
`@llvm.amdgcn.image.load.*` intrinsics that return vector types and the
mask argument "enables" more elements than there are in the return type.
---
.../AMDGPU/AMDGPUInstCombineIntrinsic.cpp | 2 +
.../fix-amdgcn-image-load-dmask-crash.ll | 123 ++++++++++++++++++
2 files changed, 125 insertions(+)
create mode 100644 llvm/test/CodeGen/AMDGPU/fix-amdgcn-image-load-dmask-crash.ll
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp
index 2cd1902785546..0756a3c257738 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp
@@ -1861,6 +1861,8 @@ static Value *simplifyAMDGCNMemoryIntrinsicDemanded(InstCombiner &IC,
for (unsigned SrcIdx = 0; SrcIdx < 4; ++SrcIdx) {
const unsigned Bit = 1 << SrcIdx;
if (!!(DMaskVal & Bit)) {
+ if (OrigLdStIdx >= DemandedElts.getBitWidth())
+ break;
if (!!DemandedElts[OrigLdStIdx])
NewDMaskVal |= Bit;
OrigLdStIdx++;
diff --git a/llvm/test/CodeGen/AMDGPU/fix-amdgcn-image-load-dmask-crash.ll b/llvm/test/CodeGen/AMDGPU/fix-amdgcn-image-load-dmask-crash.ll
new file mode 100644
index 0000000000000..571b9e4f123b7
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/fix-amdgcn-image-load-dmask-crash.ll
@@ -0,0 +1,123 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
+; RUN: opt -mtriple amdgcn -passes=instcombine %s -S -o - | FileCheck %s
+;
+; The main purpose of the test is to ensure that we do not crash when the mask
+; argument "enables" more elements than there are in return type.
+; This specific corner case was discovered by a fuzzer.
+
+define amdgpu_ps [4 x <2 x float>] @load_2dmsaa_v4v2f32_dmask(<8 x i32> inreg %rsrc, i32 %s, i32 %t) {
+; CHECK-LABEL: define amdgpu_ps [4 x <2 x float>] @load_2dmsaa_v4v2f32_dmask(
+; CHECK-SAME: <8 x i32> inreg [[RSRC:%.*]], i32 [[S:%.*]], i32 [[T:%.*]]) {
+; CHECK-NEXT: [[MAIN_BODY:.*:]]
+; CHECK-NEXT: [[I:%.*]] = call <2 x float> @llvm.amdgcn.image.load.2dmsaa.v2f32.i32.v8i32(i32 3, i32 [[S]], i32 [[T]], i32 0, <8 x i32> [[RSRC]], i32 0, i32 0)
+; CHECK-NEXT: [[I1:%.*]] = call <2 x float> @llvm.amdgcn.image.load.2dmsaa.v2f32.i32.v8i32(i32 3, i32 [[S]], i32 [[T]], i32 1, <8 x i32> [[RSRC]], i32 0, i32 0)
+; CHECK-NEXT: [[I2:%.*]] = call <2 x float> @llvm.amdgcn.image.load.2dmsaa.v2f32.i32.v8i32(i32 3, i32 [[S]], i32 [[T]], i32 2, <8 x i32> [[RSRC]], i32 0, i32 0)
+; CHECK-NEXT: [[I3:%.*]] = call <2 x float> @llvm.amdgcn.image.load.2dmsaa.v2f32.i32.v8i32(i32 3, i32 [[S]], i32 [[T]], i32 3, <8 x i32> [[RSRC]], i32 0, i32 0)
+; CHECK-NEXT: [[I4:%.*]] = insertvalue [4 x <2 x float>] undef, <2 x float> [[I]], 0
+; CHECK-NEXT: [[I5:%.*]] = insertvalue [4 x <2 x float>] [[I4]], <2 x float> [[I1]], 1
+; CHECK-NEXT: [[I6:%.*]] = insertvalue [4 x <2 x float>] [[I5]], <2 x float> [[I2]], 2
+; CHECK-NEXT: [[I7:%.*]] = insertvalue [4 x <2 x float>] [[I6]], <2 x float> [[I3]], 3
+; CHECK-NEXT: ret [4 x <2 x float>] [[I7]]
+;
+main_body:
+ %i = call <2 x float> @llvm.amdgcn.image.load.2dmsaa.v2f32.i32(i32 7, i32 %s, i32 %t, i32 0, <8 x i32> %rsrc, i32 0, i32 0)
+ %i1 = call <2 x float> @llvm.amdgcn.image.load.2dmsaa.v2f32.i32(i32 11, i32 %s, i32 %t, i32 1, <8 x i32> %rsrc, i32 0, i32 0)
+ %i2 = call <2 x float> @llvm.amdgcn.image.load.2dmsaa.v2f32.i32(i32 15, i32 %s, i32 %t, i32 2, <8 x i32> %rsrc, i32 0, i32 0)
+ %i3 = call <2 x float> @llvm.amdgcn.image.load.2dmsaa.v2f32.i32(i32 -1, i32 %s, i32 %t, i32 3, <8 x i32> %rsrc, i32 0, i32 0)
+ %i4 = insertvalue [4 x <2 x float>] undef, <2 x float> %i, 0
+ %i5 = insertvalue [4 x <2 x float>] %i4, <2 x float> %i1, 1
+ %i6 = insertvalue [4 x <2 x float>] %i5, <2 x float> %i2, 2
+ %i7 = insertvalue [4 x <2 x float>] %i6, <2 x float> %i3, 3
+ ret [4 x <2 x float>] %i7
+}
+
+define amdgpu_ps [4 x <3 x float>] @load_2dmsaa_v4v3f32_dmask(<8 x i32> inreg %rsrc, i32 %s, i32 %t) {
+; CHECK-LABEL: define amdgpu_ps [4 x <3 x float>] @load_2dmsaa_v4v3f32_dmask(
+; CHECK-SAME: <8 x i32> inreg [[RSRC:%.*]], i32 [[S:%.*]], i32 [[T:%.*]]) {
+; CHECK-NEXT: [[MAIN_BODY:.*:]]
+; CHECK-NEXT: [[I:%.*]] = call <3 x float> @llvm.amdgcn.image.load.2dmsaa.v3f32.i32.v8i32(i32 11, i32 [[S]], i32 [[T]], i32 0, <8 x i32> [[RSRC]], i32 0, i32 0)
+; CHECK-NEXT: [[I1:%.*]] = call <3 x float> @llvm.amdgcn.image.load.2dmsaa.v3f32.i32.v8i32(i32 7, i32 [[S]], i32 [[T]], i32 1, <8 x i32> [[RSRC]], i32 0, i32 0)
+; CHECK-NEXT: [[I2:%.*]] = call <3 x float> @llvm.amdgcn.image.load.2dmsaa.v3f32.i32.v8i32(i32 7, i32 [[S]], i32 [[T]], i32 2, <8 x i32> [[RSRC]], i32 0, i32 0)
+; CHECK-NEXT: [[I3:%.*]] = call <2 x float> @llvm.amdgcn.image.load.2dmsaa.v2f32.i32.v8i32(i32 -11, i32 [[S]], i32 [[T]], i32 3, <8 x i32> [[RSRC]], i32 0, i32 0)
+; CHECK-NEXT: [[TMP0:%.*]] = shufflevector <2 x float> [[I3]], <2 x float> poison, <3 x i32> <i32 0, i32 1, i32 poison>
+; CHECK-NEXT: [[I4:%.*]] = insertvalue [4 x <3 x float>] undef, <3 x float> [[I]], 0
+; CHECK-NEXT: [[I5:%.*]] = insertvalue [4 x <3 x float>] [[I4]], <3 x float> [[I1]], 1
+; CHECK-NEXT: [[I6:%.*]] = insertvalue [4 x <3 x float>] [[I5]], <3 x float> [[I2]], 2
+; CHECK-NEXT: [[I7:%.*]] = insertvalue [4 x <3 x float>] [[I6]], <3 x float> [[TMP0]], 3
+; CHECK-NEXT: ret [4 x <3 x float>] [[I7]]
+;
+main_body:
+ %i = call <3 x float> @llvm.amdgcn.image.load.2dmsaa.v3f32.i32(i32 11, i32 %s, i32 %t, i32 0, <8 x i32> %rsrc, i32 0, i32 0)
+ %i1 = call <3 x float> @llvm.amdgcn.image.load.2dmsaa.v3f32.i32(i32 15, i32 %s, i32 %t, i32 1, <8 x i32> %rsrc, i32 0, i32 0)
+ %i2 = call <3 x float> @llvm.amdgcn.image.load.2dmsaa.v3f32.i32(i32 31, i32 %s, i32 %t, i32 2, <8 x i32> %rsrc, i32 0, i32 0)
+ %i3 = call <3 x float> @llvm.amdgcn.image.load.2dmsaa.v3f32.i32(i32 -11, i32 %s, i32 %t, i32 3, <8 x i32> %rsrc, i32 0, i32 0)
+ %i4 = insertvalue [4 x <3 x float>] undef, <3 x float> %i, 0
+ %i5 = insertvalue [4 x <3 x float>] %i4, <3 x float> %i1, 1
+ %i6 = insertvalue [4 x <3 x float>] %i5, <3 x float> %i2, 2
+ %i7 = insertvalue [4 x <3 x float>] %i6, <3 x float> %i3, 3
+ ret [4 x <3 x float>] %i7
+}
+
+define amdgpu_ps [4 x <2 x float>] @load_2darraymsaa_v4v2f32_dmask(<8 x i32> inreg %rsrc, i32 %s, i32 %t, i32 %slice) {
+; CHECK-LABEL: define amdgpu_ps [4 x <2 x float>] @load_2darraymsaa_v4v2f32_dmask(
+; CHECK-SAME: <8 x i32> inreg [[RSRC:%.*]], i32 [[S:%.*]], i32 [[T:%.*]], i32 [[SLICE:%.*]]) {
+; CHECK-NEXT: [[MAIN_BODY:.*:]]
+; CHECK-NEXT: [[I:%.*]] = call <2 x float> @llvm.amdgcn.image.load.2darraymsaa.v2f32.i32.v8i32(i32 3, i32 [[S]], i32 [[T]], i32 [[SLICE]], i32 0, <8 x i32> [[RSRC]], i32 0, i32 0)
+; CHECK-NEXT: [[I1:%.*]] = call <2 x float> @llvm.amdgcn.image.load.2darraymsaa.v2f32.i32.v8i32(i32 3, i32 [[S]], i32 [[T]], i32 [[SLICE]], i32 1, <8 x i32> [[RSRC]], i32 0, i32 0)
+; CHECK-NEXT: [[I2:%.*]] = call <2 x float> @llvm.amdgcn.image.load.2darraymsaa.v2f32.i32.v8i32(i32 3, i32 [[S]], i32 [[T]], i32 [[SLICE]], i32 2, <8 x i32> [[RSRC]], i32 0, i32 0)
+; CHECK-NEXT: [[I3:%.*]] = call <2 x float> @llvm.amdgcn.image.load.2darraymsaa.v2f32.i32.v8i32(i32 3, i32 [[S]], i32 [[T]], i32 [[SLICE]], i32 3, <8 x i32> [[RSRC]], i32 0, i32 0)
+; CHECK-NEXT: [[I4:%.*]] = insertvalue [4 x <2 x float>] undef, <2 x float> [[I]], 0
+; CHECK-NEXT: [[I5:%.*]] = insertvalue [4 x <2 x float>] [[I4]], <2 x float> [[I1]], 1
+; CHECK-NEXT: [[I6:%.*]] = insertvalue [4 x <2 x float>] [[I5]], <2 x float> [[I2]], 2
+; CHECK-NEXT: [[I7:%.*]] = insertvalue [4 x <2 x float>] [[I6]], <2 x float> [[I3]], 3
+; CHECK-NEXT: ret [4 x <2 x float>] [[I7]]
+;
+main_body:
+ %i = call <2 x float> @llvm.amdgcn.image.load.2darraymsaa.v2f32.i32(i32 7, i32 %s, i32 %t, i32 %slice, i32 0, <8 x i32> %rsrc, i32 0, i32 0)
+ %i1 = call <2 x float> @llvm.amdgcn.image.load.2darraymsaa.v2f32.i32(i32 11, i32 %s, i32 %t, i32 %slice, i32 1, <8 x i32> %rsrc, i32 0, i32 0)
+ %i2 = call <2 x float> @llvm.amdgcn.image.load.2darraymsaa.v2f32.i32(i32 15, i32 %s, i32 %t, i32 %slice, i32 2, <8 x i32> %rsrc, i32 0, i32 0)
+ %i3 = call <2 x float> @llvm.amdgcn.image.load.2darraymsaa.v2f32.i32(i32 -1, i32 %s, i32 %t, i32 %slice, i32 3, <8 x i32> %rsrc, i32 0, i32 0)
+ %i4 = insertvalue [4 x <2 x float>] undef, <2 x float> %i, 0
+ %i5 = insertvalue [4 x <2 x float>] %i4, <2 x float> %i1, 1
+ %i6 = insertvalue [4 x <2 x float>] %i5, <2 x float> %i2, 2
+ %i7 = insertvalue [4 x <2 x float>] %i6, <2 x float> %i3, 3
+ ret [4 x <2 x float>] %i7
+}
+
+define amdgpu_ps [4 x <3 x float>] @load_2darraymsaa_v4v3f32_dmask(<8 x i32> inreg %rsrc, i32 %s, i32 %t, i32 %slice) {
+; CHECK-LABEL: define amdgpu_ps [4 x <3 x float>] @load_2darraymsaa_v4v3f32_dmask(
+; CHECK-SAME: <8 x i32> inreg [[RSRC:%.*]], i32 [[S:%.*]], i32 [[T:%.*]], i32 [[SLICE:%.*]]) {
+; CHECK-NEXT: [[MAIN_BODY:.*:]]
+; CHECK-NEXT: [[I:%.*]] = call <3 x float> @llvm.amdgcn.image.load.2darraymsaa.v3f32.i32.v8i32(i32 11, i32 [[S]], i32 [[T]], i32 [[SLICE]], i32 0, <8 x i32> [[RSRC]], i32 0, i32 0)
+; CHECK-NEXT: [[I1:%.*]] = call <3 x float> @llvm.amdgcn.image.load.2darraymsaa.v3f32.i32.v8i32(i32 7, i32 [[S]], i32 [[T]], i32 [[SLICE]], i32 1, <8 x i32> [[RSRC]], i32 0, i32 0)
+; CHECK-NEXT: [[I2:%.*]] = call <3 x float> @llvm.amdgcn.image.load.2darraymsaa.v3f32.i32.v8i32(i32 7, i32 [[S]], i32 [[T]], i32 [[SLICE]], i32 2, <8 x i32> [[RSRC]], i32 0, i32 0)
+; CHECK-NEXT: [[I3:%.*]] = call <3 x float> @llvm.amdgcn.image.load.2darraymsaa.v3f32.i32.v8i32(i32 7, i32 [[S]], i32 [[T]], i32 [[SLICE]], i32 3, <8 x i32> [[RSRC]], i32 0, i32 0)
+; CHECK-NEXT: [[I4:%.*]] = insertvalue [4 x <3 x float>] undef, <3 x float> [[I]], 0
+; CHECK-NEXT: [[I5:%.*]] = insertvalue [4 x <3 x float>] [[I4]], <3 x float> [[I1]], 1
+; CHECK-NEXT: [[I6:%.*]] = insertvalue [4 x <3 x float>] [[I5]], <3 x float> [[I2]], 2
+; CHECK-NEXT: [[I7:%.*]] = insertvalue [4 x <3 x float>] [[I6]], <3 x float> [[I3]], 3
+; CHECK-NEXT: ret [4 x <3 x float>] [[I7]]
+;
+main_body:
+ %i = call <3 x float> @llvm.amdgcn.image.load.2darraymsaa.v3f32.i32(i32 11, i32 %s, i32 %t, i32 %slice, i32 0, <8 x i32> %rsrc, i32 0, i32 0)
+ %i1 = call <3 x float> @llvm.amdgcn.image.load.2darraymsaa.v3f32.i32(i32 15, i32 %s, i32 %t, i32 %slice, i32 1, <8 x i32> %rsrc, i32 0, i32 0)
+ %i2 = call <3 x float> @llvm.amdgcn.image.load.2darraymsaa.v3f32.i32(i32 31, i32 %s, i32 %t, i32 %slice, i32 2, <8 x i32> %rsrc, i32 0, i32 0)
+ %i3 = call <3 x float> @llvm.amdgcn.image.load.2darraymsaa.v3f32.i32(i32 -1, i32 %s, i32 %t, i32 %slice, i32 3, <8 x i32> %rsrc, i32 0, i32 0)
+ %i4 = insertvalue [4 x <3 x float>] undef, <3 x float> %i, 0
+ %i5 = insertvalue [4 x <3 x float>] %i4, <3 x float> %i1, 1
+ %i6 = insertvalue [4 x <3 x float>] %i5, <3 x float> %i2, 2
+ %i7 = insertvalue [4 x <3 x float>] %i6, <3 x float> %i3, 3
+ ret [4 x <3 x float>] %i7
+}
+
+; Function Attrs: nocallback nofree nosync nounwind willreturn memory(read)
+declare <2 x float> @llvm.amdgcn.image.load.2dmsaa.v2f32.i32(i32 immarg, i32, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0
+
+; Function Attrs: nocallback nofree nosync nounwind willreturn memory(read)
+declare <3 x float> @llvm.amdgcn.image.load.2dmsaa.v3f32.i32(i32 immarg, i32, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0
+
+; Function Attrs: nocallback nofree nosync nounwind willreturn memory(read)
+declare <2 x float> @llvm.amdgcn.image.load.2darraymsaa.v2f32.i32(i32 immarg, i32, i32, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0
+
+; Function Attrs: nocallback nofree nosync nounwind willreturn memory(read)
+declare <3 x float> @llvm.amdgcn.image.load.2darraymsaa.v3f32.i32(i32 immarg, i32, i32, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0
>From 93e5ccdadd7133acb94772d2ba1ca9afa5c19208 Mon Sep 17 00:00:00 2001
From: Alexey Sachkov <alexey.sachkov at amd.com>
Date: Wed, 4 Feb 2026 05:16:42 -0600
Subject: [PATCH 2/3] Apply comments
---
.../fix-amdgcn-image-load-dmask-crash.ll | 20 ++++++++-----------
1 file changed, 8 insertions(+), 12 deletions(-)
rename llvm/test/{CodeGen => Transforms/InstCombine}/AMDGPU/fix-amdgcn-image-load-dmask-crash.ll (91%)
diff --git a/llvm/test/CodeGen/AMDGPU/fix-amdgcn-image-load-dmask-crash.ll b/llvm/test/Transforms/InstCombine/AMDGPU/fix-amdgcn-image-load-dmask-crash.ll
similarity index 91%
rename from llvm/test/CodeGen/AMDGPU/fix-amdgcn-image-load-dmask-crash.ll
rename to llvm/test/Transforms/InstCombine/AMDGPU/fix-amdgcn-image-load-dmask-crash.ll
index 571b9e4f123b7..c81f1fe7cde9a 100644
--- a/llvm/test/CodeGen/AMDGPU/fix-amdgcn-image-load-dmask-crash.ll
+++ b/llvm/test/Transforms/InstCombine/AMDGPU/fix-amdgcn-image-load-dmask-crash.ll
@@ -13,7 +13,7 @@ define amdgpu_ps [4 x <2 x float>] @load_2dmsaa_v4v2f32_dmask(<8 x i32> inreg %r
; CHECK-NEXT: [[I1:%.*]] = call <2 x float> @llvm.amdgcn.image.load.2dmsaa.v2f32.i32.v8i32(i32 3, i32 [[S]], i32 [[T]], i32 1, <8 x i32> [[RSRC]], i32 0, i32 0)
; CHECK-NEXT: [[I2:%.*]] = call <2 x float> @llvm.amdgcn.image.load.2dmsaa.v2f32.i32.v8i32(i32 3, i32 [[S]], i32 [[T]], i32 2, <8 x i32> [[RSRC]], i32 0, i32 0)
; CHECK-NEXT: [[I3:%.*]] = call <2 x float> @llvm.amdgcn.image.load.2dmsaa.v2f32.i32.v8i32(i32 3, i32 [[S]], i32 [[T]], i32 3, <8 x i32> [[RSRC]], i32 0, i32 0)
-; CHECK-NEXT: [[I4:%.*]] = insertvalue [4 x <2 x float>] undef, <2 x float> [[I]], 0
+; CHECK-NEXT: [[I4:%.*]] = insertvalue [4 x <2 x float>] poison, <2 x float> [[I]], 0
; CHECK-NEXT: [[I5:%.*]] = insertvalue [4 x <2 x float>] [[I4]], <2 x float> [[I1]], 1
; CHECK-NEXT: [[I6:%.*]] = insertvalue [4 x <2 x float>] [[I5]], <2 x float> [[I2]], 2
; CHECK-NEXT: [[I7:%.*]] = insertvalue [4 x <2 x float>] [[I6]], <2 x float> [[I3]], 3
@@ -24,7 +24,7 @@ main_body:
%i1 = call <2 x float> @llvm.amdgcn.image.load.2dmsaa.v2f32.i32(i32 11, i32 %s, i32 %t, i32 1, <8 x i32> %rsrc, i32 0, i32 0)
%i2 = call <2 x float> @llvm.amdgcn.image.load.2dmsaa.v2f32.i32(i32 15, i32 %s, i32 %t, i32 2, <8 x i32> %rsrc, i32 0, i32 0)
%i3 = call <2 x float> @llvm.amdgcn.image.load.2dmsaa.v2f32.i32(i32 -1, i32 %s, i32 %t, i32 3, <8 x i32> %rsrc, i32 0, i32 0)
- %i4 = insertvalue [4 x <2 x float>] undef, <2 x float> %i, 0
+ %i4 = insertvalue [4 x <2 x float>] poison, <2 x float> %i, 0
%i5 = insertvalue [4 x <2 x float>] %i4, <2 x float> %i1, 1
%i6 = insertvalue [4 x <2 x float>] %i5, <2 x float> %i2, 2
%i7 = insertvalue [4 x <2 x float>] %i6, <2 x float> %i3, 3
@@ -40,7 +40,7 @@ define amdgpu_ps [4 x <3 x float>] @load_2dmsaa_v4v3f32_dmask(<8 x i32> inreg %r
; CHECK-NEXT: [[I2:%.*]] = call <3 x float> @llvm.amdgcn.image.load.2dmsaa.v3f32.i32.v8i32(i32 7, i32 [[S]], i32 [[T]], i32 2, <8 x i32> [[RSRC]], i32 0, i32 0)
; CHECK-NEXT: [[I3:%.*]] = call <2 x float> @llvm.amdgcn.image.load.2dmsaa.v2f32.i32.v8i32(i32 -11, i32 [[S]], i32 [[T]], i32 3, <8 x i32> [[RSRC]], i32 0, i32 0)
; CHECK-NEXT: [[TMP0:%.*]] = shufflevector <2 x float> [[I3]], <2 x float> poison, <3 x i32> <i32 0, i32 1, i32 poison>
-; CHECK-NEXT: [[I4:%.*]] = insertvalue [4 x <3 x float>] undef, <3 x float> [[I]], 0
+; CHECK-NEXT: [[I4:%.*]] = insertvalue [4 x <3 x float>] poison, <3 x float> [[I]], 0
; CHECK-NEXT: [[I5:%.*]] = insertvalue [4 x <3 x float>] [[I4]], <3 x float> [[I1]], 1
; CHECK-NEXT: [[I6:%.*]] = insertvalue [4 x <3 x float>] [[I5]], <3 x float> [[I2]], 2
; CHECK-NEXT: [[I7:%.*]] = insertvalue [4 x <3 x float>] [[I6]], <3 x float> [[TMP0]], 3
@@ -51,7 +51,7 @@ main_body:
%i1 = call <3 x float> @llvm.amdgcn.image.load.2dmsaa.v3f32.i32(i32 15, i32 %s, i32 %t, i32 1, <8 x i32> %rsrc, i32 0, i32 0)
%i2 = call <3 x float> @llvm.amdgcn.image.load.2dmsaa.v3f32.i32(i32 31, i32 %s, i32 %t, i32 2, <8 x i32> %rsrc, i32 0, i32 0)
%i3 = call <3 x float> @llvm.amdgcn.image.load.2dmsaa.v3f32.i32(i32 -11, i32 %s, i32 %t, i32 3, <8 x i32> %rsrc, i32 0, i32 0)
- %i4 = insertvalue [4 x <3 x float>] undef, <3 x float> %i, 0
+ %i4 = insertvalue [4 x <3 x float>] poison, <3 x float> %i, 0
%i5 = insertvalue [4 x <3 x float>] %i4, <3 x float> %i1, 1
%i6 = insertvalue [4 x <3 x float>] %i5, <3 x float> %i2, 2
%i7 = insertvalue [4 x <3 x float>] %i6, <3 x float> %i3, 3
@@ -66,7 +66,7 @@ define amdgpu_ps [4 x <2 x float>] @load_2darraymsaa_v4v2f32_dmask(<8 x i32> inr
; CHECK-NEXT: [[I1:%.*]] = call <2 x float> @llvm.amdgcn.image.load.2darraymsaa.v2f32.i32.v8i32(i32 3, i32 [[S]], i32 [[T]], i32 [[SLICE]], i32 1, <8 x i32> [[RSRC]], i32 0, i32 0)
; CHECK-NEXT: [[I2:%.*]] = call <2 x float> @llvm.amdgcn.image.load.2darraymsaa.v2f32.i32.v8i32(i32 3, i32 [[S]], i32 [[T]], i32 [[SLICE]], i32 2, <8 x i32> [[RSRC]], i32 0, i32 0)
; CHECK-NEXT: [[I3:%.*]] = call <2 x float> @llvm.amdgcn.image.load.2darraymsaa.v2f32.i32.v8i32(i32 3, i32 [[S]], i32 [[T]], i32 [[SLICE]], i32 3, <8 x i32> [[RSRC]], i32 0, i32 0)
-; CHECK-NEXT: [[I4:%.*]] = insertvalue [4 x <2 x float>] undef, <2 x float> [[I]], 0
+; CHECK-NEXT: [[I4:%.*]] = insertvalue [4 x <2 x float>] poison, <2 x float> [[I]], 0
; CHECK-NEXT: [[I5:%.*]] = insertvalue [4 x <2 x float>] [[I4]], <2 x float> [[I1]], 1
; CHECK-NEXT: [[I6:%.*]] = insertvalue [4 x <2 x float>] [[I5]], <2 x float> [[I2]], 2
; CHECK-NEXT: [[I7:%.*]] = insertvalue [4 x <2 x float>] [[I6]], <2 x float> [[I3]], 3
@@ -77,7 +77,7 @@ main_body:
%i1 = call <2 x float> @llvm.amdgcn.image.load.2darraymsaa.v2f32.i32(i32 11, i32 %s, i32 %t, i32 %slice, i32 1, <8 x i32> %rsrc, i32 0, i32 0)
%i2 = call <2 x float> @llvm.amdgcn.image.load.2darraymsaa.v2f32.i32(i32 15, i32 %s, i32 %t, i32 %slice, i32 2, <8 x i32> %rsrc, i32 0, i32 0)
%i3 = call <2 x float> @llvm.amdgcn.image.load.2darraymsaa.v2f32.i32(i32 -1, i32 %s, i32 %t, i32 %slice, i32 3, <8 x i32> %rsrc, i32 0, i32 0)
- %i4 = insertvalue [4 x <2 x float>] undef, <2 x float> %i, 0
+ %i4 = insertvalue [4 x <2 x float>] poison, <2 x float> %i, 0
%i5 = insertvalue [4 x <2 x float>] %i4, <2 x float> %i1, 1
%i6 = insertvalue [4 x <2 x float>] %i5, <2 x float> %i2, 2
%i7 = insertvalue [4 x <2 x float>] %i6, <2 x float> %i3, 3
@@ -92,7 +92,7 @@ define amdgpu_ps [4 x <3 x float>] @load_2darraymsaa_v4v3f32_dmask(<8 x i32> inr
; CHECK-NEXT: [[I1:%.*]] = call <3 x float> @llvm.amdgcn.image.load.2darraymsaa.v3f32.i32.v8i32(i32 7, i32 [[S]], i32 [[T]], i32 [[SLICE]], i32 1, <8 x i32> [[RSRC]], i32 0, i32 0)
; CHECK-NEXT: [[I2:%.*]] = call <3 x float> @llvm.amdgcn.image.load.2darraymsaa.v3f32.i32.v8i32(i32 7, i32 [[S]], i32 [[T]], i32 [[SLICE]], i32 2, <8 x i32> [[RSRC]], i32 0, i32 0)
; CHECK-NEXT: [[I3:%.*]] = call <3 x float> @llvm.amdgcn.image.load.2darraymsaa.v3f32.i32.v8i32(i32 7, i32 [[S]], i32 [[T]], i32 [[SLICE]], i32 3, <8 x i32> [[RSRC]], i32 0, i32 0)
-; CHECK-NEXT: [[I4:%.*]] = insertvalue [4 x <3 x float>] undef, <3 x float> [[I]], 0
+; CHECK-NEXT: [[I4:%.*]] = insertvalue [4 x <3 x float>] poison, <3 x float> [[I]], 0
; CHECK-NEXT: [[I5:%.*]] = insertvalue [4 x <3 x float>] [[I4]], <3 x float> [[I1]], 1
; CHECK-NEXT: [[I6:%.*]] = insertvalue [4 x <3 x float>] [[I5]], <3 x float> [[I2]], 2
; CHECK-NEXT: [[I7:%.*]] = insertvalue [4 x <3 x float>] [[I6]], <3 x float> [[I3]], 3
@@ -103,21 +103,17 @@ main_body:
%i1 = call <3 x float> @llvm.amdgcn.image.load.2darraymsaa.v3f32.i32(i32 15, i32 %s, i32 %t, i32 %slice, i32 1, <8 x i32> %rsrc, i32 0, i32 0)
%i2 = call <3 x float> @llvm.amdgcn.image.load.2darraymsaa.v3f32.i32(i32 31, i32 %s, i32 %t, i32 %slice, i32 2, <8 x i32> %rsrc, i32 0, i32 0)
%i3 = call <3 x float> @llvm.amdgcn.image.load.2darraymsaa.v3f32.i32(i32 -1, i32 %s, i32 %t, i32 %slice, i32 3, <8 x i32> %rsrc, i32 0, i32 0)
- %i4 = insertvalue [4 x <3 x float>] undef, <3 x float> %i, 0
+ %i4 = insertvalue [4 x <3 x float>] poison, <3 x float> %i, 0
%i5 = insertvalue [4 x <3 x float>] %i4, <3 x float> %i1, 1
%i6 = insertvalue [4 x <3 x float>] %i5, <3 x float> %i2, 2
%i7 = insertvalue [4 x <3 x float>] %i6, <3 x float> %i3, 3
ret [4 x <3 x float>] %i7
}
-; Function Attrs: nocallback nofree nosync nounwind willreturn memory(read)
declare <2 x float> @llvm.amdgcn.image.load.2dmsaa.v2f32.i32(i32 immarg, i32, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0
-; Function Attrs: nocallback nofree nosync nounwind willreturn memory(read)
declare <3 x float> @llvm.amdgcn.image.load.2dmsaa.v3f32.i32(i32 immarg, i32, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0
-; Function Attrs: nocallback nofree nosync nounwind willreturn memory(read)
declare <2 x float> @llvm.amdgcn.image.load.2darraymsaa.v2f32.i32(i32 immarg, i32, i32, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0
-; Function Attrs: nocallback nofree nosync nounwind willreturn memory(read)
declare <3 x float> @llvm.amdgcn.image.load.2darraymsaa.v3f32.i32(i32 immarg, i32, i32, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0
>From 6a3f4d59505e551b746ed1624ab9133f010d9c2d Mon Sep 17 00:00:00 2001
From: Alexey Sachkov <alexey.sachkov at amd.com>
Date: Thu, 5 Feb 2026 03:05:14 -0600
Subject: [PATCH 3/3] Catch the corner case by verifier
---
llvm/lib/IR/Verifier.cpp | 33 ++
.../fix-amdgcn-image-load-dmask-crash.ll | 4 +-
.../intrinsic-amdgcn-image-load-dmask.ll | 307 ++++++++++++++++++
3 files changed, 343 insertions(+), 1 deletion(-)
create mode 100644 llvm/test/Verifier/AMDGPU/intrinsic-amdgcn-image-load-dmask.ll
diff --git a/llvm/lib/IR/Verifier.cpp b/llvm/lib/IR/Verifier.cpp
index 3d44d1317ecc7..3e62d9525dacc 100644
--- a/llvm/lib/IR/Verifier.cpp
+++ b/llvm/lib/IR/Verifier.cpp
@@ -7147,6 +7147,39 @@ void Verifier::visitIntrinsicCall(Intrinsic::ID ID, CallBase &Call) {
&Call, Op);
break;
}
+ case Intrinsic::amdgcn_image_load_1d:
+ case Intrinsic::amdgcn_image_load_1darray:
+ case Intrinsic::amdgcn_image_load_2d:
+ case Intrinsic::amdgcn_image_load_2darray:
+ case Intrinsic::amdgcn_image_load_2darraymsaa:
+ case Intrinsic::amdgcn_image_load_2dmsaa:
+ case Intrinsic::amdgcn_image_load_3d:
+ case Intrinsic::amdgcn_image_load_cube:
+ case Intrinsic::amdgcn_image_load_mip_1d:
+ case Intrinsic::amdgcn_image_load_mip_1darray:
+ case Intrinsic::amdgcn_image_load_mip_2d:
+ case Intrinsic::amdgcn_image_load_mip_2darray:
+ case Intrinsic::amdgcn_image_load_mip_3d:
+ case Intrinsic::amdgcn_image_load_mip_cube: {
+ // LLVM IR definition of those intrinsics says that they can return any
+ // type. The logic below is based on what is covered by the
+ // llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.dim.ll test.
+ Type *T = Call.getType();
+ if (auto *ST = dyn_cast<StructType>(Call.getType()))
+ T = ST->getElementType(0);
+
+ unsigned VWidth = 1;
+ if (auto *FVT = dyn_cast<FixedVectorType>(T))
+ VWidth = FVT->getNumElements();
+
+ Value *V = Call.getArgOperand(0);
+ unsigned DMask = cast<ConstantInt>(V)->getZExtValue();
+ unsigned NumActiveBits = popcount(DMask);
+ Check(NumActiveBits <= VWidth,
+ "llvm.amdgcn.image.load.* intrinsic mask cannot have more active "
+ "bits than there are elements in the return type");
+ break;
+ }
case Intrinsic::nvvm_setmaxnreg_inc_sync_aligned_u32:
case Intrinsic::nvvm_setmaxnreg_dec_sync_aligned_u32: {
Value *V = Call.getArgOperand(0);
diff --git a/llvm/test/Transforms/InstCombine/AMDGPU/fix-amdgcn-image-load-dmask-crash.ll b/llvm/test/Transforms/InstCombine/AMDGPU/fix-amdgcn-image-load-dmask-crash.ll
index c81f1fe7cde9a..7f16d69623eed 100644
--- a/llvm/test/Transforms/InstCombine/AMDGPU/fix-amdgcn-image-load-dmask-crash.ll
+++ b/llvm/test/Transforms/InstCombine/AMDGPU/fix-amdgcn-image-load-dmask-crash.ll
@@ -1,5 +1,7 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
-; RUN: opt -mtriple amdgcn -passes=instcombine %s -S -o - | FileCheck %s
+; RUN: opt -mtriple amdgcn -passes=instcombine --disable-verify %s -S -o - | FileCheck %s
+;
+; Verifier is disabled on purpose, because the IR is deemed invalid.
;
; The main purpose of the test is to ensure that we do not crash when the mask
; argument "enables" more elements than there are in return type.
diff --git a/llvm/test/Verifier/AMDGPU/intrinsic-amdgcn-image-load-dmask.ll b/llvm/test/Verifier/AMDGPU/intrinsic-amdgcn-image-load-dmask.ll
new file mode 100644
index 0000000000000..1cb6fe420f320
--- /dev/null
+++ b/llvm/test/Verifier/AMDGPU/intrinsic-amdgcn-image-load-dmask.ll
@@ -0,0 +1,307 @@
+; RUN: not llvm-as %s -disable-output 2>&1 | FileCheck %s
+
+define amdgpu_ps void @load_1d(<8 x i32> inreg %rsrc, i32 %s) {
+main_body:
+ %v = call <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i32(i32 31, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
+; CHECK: llvm.amdgcn.image.load.* intrinsic mask cannot have more active bits than there are elements in the return type
+ ret void
+}
+
+define amdgpu_ps void @load_1d_tfe(<8 x i32> inreg %rsrc, ptr addrspace(1) inreg %out, i32 %s) {
+main_body:
+ %v = call {<4 x float>,i32} @llvm.amdgcn.image.load.1d.v4f32i32.i32(i32 31, i32 %s, <8 x i32> %rsrc, i32 1, i32 0)
+; CHECK-NEXT: llvm.amdgcn.image.load.* intrinsic mask cannot have more active bits than there are elements in the return type
+ ret void
+}
+
+define amdgpu_ps void @load_1d_lwe(<8 x i32> inreg %rsrc, ptr addrspace(1) inreg %out, i32 %s) {
+main_body:
+ %v = call {<4 x float>, i32} @llvm.amdgcn.image.load.1d.v4f32i32.i32(i32 -1, i32 %s, <8 x i32> %rsrc, i32 2, i32 0)
+; CHECK-NEXT: llvm.amdgcn.image.load.* intrinsic mask cannot have more active bits than there are elements in the return type
+ ret void
+}
+
+define amdgpu_ps void @load_2d(<8 x i32> inreg %rsrc, i32 %s, i32 %t) {
+main_body:
+ %v = call <4 x float> @llvm.amdgcn.image.load.2d.v4f32.i32(i32 -1, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0)
+; CHECK-NEXT: llvm.amdgcn.image.load.* intrinsic mask cannot have more active bits than there are elements in the return type
+ ret void
+}
+
+define amdgpu_ps void @load_2d_tfe(<8 x i32> inreg %rsrc, ptr addrspace(1) inreg %out, i32 %s, i32 %t) {
+main_body:
+ %v = call {<4 x float>,i32} @llvm.amdgcn.image.load.2d.v4f32i32.i32(i32 31, i32 %s, i32 %t, <8 x i32> %rsrc, i32 1, i32 0)
+; CHECK-NEXT: llvm.amdgcn.image.load.* intrinsic mask cannot have more active bits than there are elements in the return type
+ ret void
+}
+
+define amdgpu_ps void @load_3d(<8 x i32> inreg %rsrc, i32 %s, i32 %t, i32 %r) {
+main_body:
+ %v = call <4 x float> @llvm.amdgcn.image.load.3d.v4f32.i32(i32 31, i32 %s, i32 %t, i32 %r, <8 x i32> %rsrc, i32 0, i32 0)
+; CHECK-NEXT: llvm.amdgcn.image.load.* intrinsic mask cannot have more active bits than there are elements in the return type
+ ret void
+}
+
+define amdgpu_ps void @load_3d_tfe_lwe(<8 x i32> inreg %rsrc, ptr addrspace(1) inreg %out, i32 %s, i32 %t, i32 %r) {
+main_body:
+ %v = call {<4 x float>,i32} @llvm.amdgcn.image.load.3d.v4f32i32.i32(i32 31, i32 %s, i32 %t, i32 %r, <8 x i32> %rsrc, i32 3, i32 0)
+; CHECK-NEXT: llvm.amdgcn.image.load.* intrinsic mask cannot have more active bits than there are elements in the return type
+ ret void
+}
+
+define amdgpu_ps void @load_cube(<8 x i32> inreg %rsrc, i32 %s, i32 %t, i32 %slice) {
+main_body:
+ %v = call <4 x float> @llvm.amdgcn.image.load.cube.v4f32.i32(i32 31, i32 %s, i32 %t, i32 %slice, <8 x i32> %rsrc, i32 0, i32 0)
+; CHECK-NEXT: llvm.amdgcn.image.load.* intrinsic mask cannot have more active bits than there are elements in the return type
+ ret void
+}
+
+define amdgpu_ps void @load_cube_lwe(<8 x i32> inreg %rsrc, ptr addrspace(1) inreg %out, i32 %s, i32 %t, i32 %slice) {
+main_body:
+ %v = call {<4 x float>,i32} @llvm.amdgcn.image.load.cube.v4f32i32.i32(i32 31, i32 %s, i32 %t, i32 %slice, <8 x i32> %rsrc, i32 2, i32 0)
+; CHECK-NEXT: llvm.amdgcn.image.load.* intrinsic mask cannot have more active bits than there are elements in the return type
+ ret void
+}
+
+define amdgpu_ps void @load_1darray(<8 x i32> inreg %rsrc, i32 %s, i32 %slice) {
+main_body:
+ %v = call <4 x float> @llvm.amdgcn.image.load.1darray.v4f32.i32(i32 31, i32 %s, i32 %slice, <8 x i32> %rsrc, i32 0, i32 0)
+; CHECK-NEXT: llvm.amdgcn.image.load.* intrinsic mask cannot have more active bits than there are elements in the return type
+ ret void
+}
+
+define amdgpu_ps void @load_1darray_tfe(<8 x i32> inreg %rsrc, ptr addrspace(1) inreg %out, i32 %s, i32 %slice) {
+main_body:
+ %v = call {<4 x float>,i32} @llvm.amdgcn.image.load.1darray.v4f32i32.i32(i32 31, i32 %s, i32 %slice, <8 x i32> %rsrc, i32 1, i32 0)
+; CHECK-NEXT: llvm.amdgcn.image.load.* intrinsic mask cannot have more active bits than there are elements in the return type
+ ret void
+}
+
+define amdgpu_ps void @load_2darray(<8 x i32> inreg %rsrc, i32 %s, i32 %t, i32 %slice) {
+main_body:
+ %v = call <4 x float> @llvm.amdgcn.image.load.2darray.v4f32.i32(i32 31, i32 %s, i32 %t, i32 %slice, <8 x i32> %rsrc, i32 0, i32 0)
+; CHECK-NEXT: llvm.amdgcn.image.load.* intrinsic mask cannot have more active bits than there are elements in the return type
+ ret void
+}
+
+define amdgpu_ps void @load_2darray_lwe(<8 x i32> inreg %rsrc, ptr addrspace(1) inreg %out, i32 %s, i32 %t, i32 %slice) {
+main_body:
+ %v = call {<4 x float>,i32} @llvm.amdgcn.image.load.2darray.v4f32i32.i32(i32 31, i32 %s, i32 %t, i32 %slice, <8 x i32> %rsrc, i32 2, i32 0)
+; CHECK-NEXT: llvm.amdgcn.image.load.* intrinsic mask cannot have more active bits than there are elements in the return type
+ ret void
+}
+
+define amdgpu_ps void @load_2dmsaa(<8 x i32> inreg %rsrc, i32 %s, i32 %t, i32 %fragid) {
+main_body:
+ %v = call <4 x float> @llvm.amdgcn.image.load.2dmsaa.v4f32.i32(i32 31, i32 %s, i32 %t, i32 %fragid, <8 x i32> %rsrc, i32 0, i32 0)
+; CHECK-NEXT: llvm.amdgcn.image.load.* intrinsic mask cannot have more active bits than there are elements in the return type
+ ret void
+}
+
+define amdgpu_ps void @load_2dmsaa_both(<8 x i32> inreg %rsrc, ptr addrspace(1) inreg %out, i32 %s, i32 %t, i32 %fragid) {
+main_body:
+ %v = call {<4 x float>,i32} @llvm.amdgcn.image.load.2dmsaa.v4f32i32.i32(i32 31, i32 %s, i32 %t, i32 %fragid, <8 x i32> %rsrc, i32 3, i32 0)
+; CHECK-NEXT: llvm.amdgcn.image.load.* intrinsic mask cannot have more active bits than there are elements in the return type
+ ret void
+}
+
+define amdgpu_ps void @load_2darraymsaa(<8 x i32> inreg %rsrc, i32 %s, i32 %t, i32 %slice, i32 %fragid) {
+main_body:
+ %v = call <4 x float> @llvm.amdgcn.image.load.2darraymsaa.v4f32.i32(i32 31, i32 %s, i32 %t, i32 %slice, i32 %fragid, <8 x i32> %rsrc, i32 0, i32 0)
+; CHECK-NEXT: llvm.amdgcn.image.load.* intrinsic mask cannot have more active bits than there are elements in the return type
+ ret void
+}
+
+define amdgpu_ps void @load_2darraymsaa_tfe(<8 x i32> inreg %rsrc, ptr addrspace(1) inreg %out, i32 %s, i32 %t, i32 %slice, i32 %fragid) {
+main_body:
+ %v = call {<4 x float>,i32} @llvm.amdgcn.image.load.2darraymsaa.v4f32i32.i32(i32 31, i32 %s, i32 %t, i32 %slice, i32 %fragid, <8 x i32> %rsrc, i32 1, i32 0)
+; CHECK-NEXT: llvm.amdgcn.image.load.* intrinsic mask cannot have more active bits than there are elements in the return type
+ ret void
+}
+
+define amdgpu_ps void @load_mip_1d(<8 x i32> inreg %rsrc, i32 %s, i32 %mip) {
+main_body:
+ %v = call <4 x float> @llvm.amdgcn.image.load.mip.1d.v4f32.i32(i32 31, i32 %s, i32 %mip, <8 x i32> %rsrc, i32 0, i32 0)
+; CHECK-NEXT: llvm.amdgcn.image.load.* intrinsic mask cannot have more active bits than there are elements in the return type
+ ret void
+}
+
+define amdgpu_ps void @load_mip_1d_lwe(<8 x i32> inreg %rsrc, ptr addrspace(1) inreg %out, i32 %s, i32 %mip) {
+main_body:
+ %v = call {<4 x float>,i32} @llvm.amdgcn.image.load.mip.1d.v4f32i32.i32(i32 31, i32 %s, i32 %mip, <8 x i32> %rsrc, i32 2, i32 0)
+; CHECK-NEXT: llvm.amdgcn.image.load.* intrinsic mask cannot have more active bits than there are elements in the return type
+ ret void
+}
+
+define amdgpu_ps void @load_mip_2d(<8 x i32> inreg %rsrc, i32 %s, i32 %t, i32 %mip) {
+main_body:
+ %v = call <4 x float> @llvm.amdgcn.image.load.mip.2d.v4f32.i32(i32 31, i32 %s, i32 %t, i32 %mip, <8 x i32> %rsrc, i32 0, i32 0)
+; CHECK-NEXT: llvm.amdgcn.image.load.* intrinsic mask cannot have more active bits than there are elements in the return type
+ ret void
+}
+
+define amdgpu_ps void @load_mip_2d_tfe(<8 x i32> inreg %rsrc, ptr addrspace(1) inreg %out, i32 %s, i32 %t, i32 %mip) {
+main_body:
+ %v = call {<4 x float>,i32} @llvm.amdgcn.image.load.mip.2d.v4f32i32.i32(i32 31, i32 %s, i32 %t, i32 %mip, <8 x i32> %rsrc, i32 1, i32 0)
+; CHECK-NEXT: llvm.amdgcn.image.load.* intrinsic mask cannot have more active bits than there are elements in the return type
+ ret void
+}
+
+define amdgpu_ps void @load_1d_V2_tfe(<8 x i32> inreg %rsrc, i32 %s) {
+main_body:
+ %v = call {<2 x float>,i32} @llvm.amdgcn.image.load.1d.v2f32i32.i32(i32 -1, i32 %s, <8 x i32> %rsrc, i32 1, i32 0)
+; CHECK-NEXT: llvm.amdgcn.image.load.* intrinsic mask cannot have more active bits than there are elements in the return type
+ ret void
+}
+
+define amdgpu_ps void @load_1d_V1_tfe(<8 x i32> inreg %rsrc, i32 %s) {
+main_body:
+ %v = call {float,i32} @llvm.amdgcn.image.load.1d.f32i32.i32(i32 3, i32 %s, <8 x i32> %rsrc, i32 1, i32 0)
+; CHECK-NEXT: llvm.amdgcn.image.load.* intrinsic mask cannot have more active bits than there are elements in the return type
+ ret void
+}
+
+define amdgpu_ps void @load_mip_2d_tfe_nouse(<8 x i32> inreg %rsrc, i32 %s, i32 %t, i32 %mip) {
+main_body:
+ %v = call {<4 x float>,i32} @llvm.amdgcn.image.load.mip.2d.v4f32i32.i32(i32 31, i32 %s, i32 %t, i32 %mip, <8 x i32> %rsrc, i32 1, i32 0)
+; CHECK-NEXT: llvm.amdgcn.image.load.* intrinsic mask cannot have more active bits than there are elements in the return type
+ ret void
+}
+
+define amdgpu_ps void @load_mip_2d_tfe_nouse_V2(<8 x i32> inreg %rsrc, i32 %s, i32 %t, i32 %mip) {
+main_body:
+ %v = call {<2 x float>,i32} @llvm.amdgcn.image.load.mip.2d.v2f32i32.i32(i32 7, i32 %s, i32 %t, i32 %mip, <8 x i32> %rsrc, i32 1, i32 0)
+; CHECK-NEXT: llvm.amdgcn.image.load.* intrinsic mask cannot have more active bits than there are elements in the return type
+ ret void
+}
+
+define amdgpu_ps void @load_mip_2d_tfe_nouse_V1(<8 x i32> inreg %rsrc, i32 %s, i32 %t, i32 %mip) {
+main_body:
+ %v = call {float, i32} @llvm.amdgcn.image.load.mip.2d.f32i32.i32(i32 3, i32 %s, i32 %t, i32 %mip, <8 x i32> %rsrc, i32 1, i32 0)
+; CHECK-NEXT: llvm.amdgcn.image.load.* intrinsic mask cannot have more active bits than there are elements in the return type
+ ret void
+}
+
+define amdgpu_ps void @load_1d_tfe_V4(<8 x i32> inreg %rsrc, ptr addrspace(1) inreg %out, i32 %s) {
+main_body:
+ %v = call {<4 x float>,i32} @llvm.amdgcn.image.load.1d.v4f32i32.i32(i32 31, i32 %s, <8 x i32> %rsrc, i32 1, i32 0)
+; CHECK-NEXT: llvm.amdgcn.image.load.* intrinsic mask cannot have more active bits than there are elements in the return type
+ ret void
+}
+
+define amdgpu_ps void @load_1d_tfe_V2(<8 x i32> inreg %rsrc, ptr addrspace(1) inreg %out, i32 %s) {
+main_body:
+ %v = call {<2 x float>,i32} @llvm.amdgcn.image.load.1d.v2f32i32.i32(i32 -1, i32 %s, <8 x i32> %rsrc, i32 1, i32 0)
+; CHECK-NEXT: llvm.amdgcn.image.load.* intrinsic mask cannot have more active bits than there are elements in the return type
+ ret void
+}
+
+
+define amdgpu_ps void @load_mip_3d(<8 x i32> inreg %rsrc, i32 %s, i32 %t, i32 %r, i32 %mip) {
+main_body:
+ %v = call <4 x float> @llvm.amdgcn.image.load.mip.3d.v4f32.i32(i32 31, i32 %s, i32 %t, i32 %r, i32 %mip, <8 x i32> %rsrc, i32 0, i32 0)
+; CHECK-NEXT: llvm.amdgcn.image.load.* intrinsic mask cannot have more active bits than there are elements in the return type
+ ret void
+}
+
+define amdgpu_ps void @load_mip_cube(<8 x i32> inreg %rsrc, i32 %s, i32 %t, i32 %slice, i32 %mip) {
+main_body:
+ %v = call <4 x float> @llvm.amdgcn.image.load.mip.cube.v4f32.i32(i32 31, i32 %s, i32 %t, i32 %slice, i32 %mip, <8 x i32> %rsrc, i32 0, i32 0)
+; CHECK-NEXT: llvm.amdgcn.image.load.* intrinsic mask cannot have more active bits than there are elements in the return type
+ ret void
+}
+
+define amdgpu_ps void @load_mip_1darray(<8 x i32> inreg %rsrc, i32 %s, i32 %slice, i32 %mip) {
+main_body:
+ %v = call <4 x float> @llvm.amdgcn.image.load.mip.1darray.v4f32.i32(i32 31, i32 %s, i32 %slice, i32 %mip, <8 x i32> %rsrc, i32 0, i32 0)
+; CHECK-NEXT: llvm.amdgcn.image.load.* intrinsic mask cannot have more active bits than there are elements in the return type
+ ret void
+}
+
+define amdgpu_ps void @load_mip_2darray(<8 x i32> inreg %rsrc, i32 %s, i32 %t, i32 %slice, i32 %mip) {
+main_body:
+ %v = call <4 x float> @llvm.amdgcn.image.load.mip.2darray.v4f32.i32(i32 31, i32 %s, i32 %t, i32 %slice, i32 %mip, <8 x i32> %rsrc, i32 0, i32 0)
+; CHECK-NEXT: llvm.amdgcn.image.load.* intrinsic mask cannot have more active bits than there are elements in the return type
+ ret void
+}
+
+define amdgpu_ps void @load_1d_V1(<8 x i32> inreg %rsrc, i32 %s) {
+main_body:
+ %v = call float @llvm.amdgcn.image.load.1d.f32.i32(i32 3, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
+; CHECK-NEXT: llvm.amdgcn.image.load.* intrinsic mask cannot have more active bits than there are elements in the return type
+ ret void
+}
+
+define amdgpu_ps void @load_1d_V2(<8 x i32> inreg %rsrc, i32 %s) {
+main_body:
+ %v = call <2 x float> @llvm.amdgcn.image.load.1d.v2f32.i32(i32 7, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
+; CHECK-NEXT: llvm.amdgcn.image.load.* intrinsic mask cannot have more active bits than there are elements in the return type
+ ret void
+}
+
+define amdgpu_ps void @load_1d_glc(<8 x i32> inreg %rsrc, i32 %s) {
+main_body:
+ %v = call <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i32(i32 31, i32 %s, <8 x i32> %rsrc, i32 0, i32 1)
+; CHECK-NEXT: llvm.amdgcn.image.load.* intrinsic mask cannot have more active bits than there are elements in the return type
+ ret void
+}
+
+define amdgpu_ps void @load_1d_slc(<8 x i32> inreg %rsrc, i32 %s) {
+main_body:
+ %v = call <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i32(i32 31, i32 %s, <8 x i32> %rsrc, i32 0, i32 2)
+; CHECK-NEXT: llvm.amdgcn.image.load.* intrinsic mask cannot have more active bits than there are elements in the return type
+ ret void
+}
+
+define amdgpu_ps void @load_1d_glc_slc(<8 x i32> inreg %rsrc, i32 %s) {
+main_body:
+ %v = call <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i32(i32 31, i32 %s, <8 x i32> %rsrc, i32 0, i32 3)
+; CHECK-NEXT: llvm.amdgcn.image.load.* intrinsic mask cannot have more active bits than there are elements in the return type
+ ret void
+}
+
+define amdgpu_ps void @image_load_mmo(<8 x i32> inreg %rsrc, ptr addrspace(3) %lds, <2 x i32> %c) #0 {
+ %c0 = extractelement <2 x i32> %c, i32 0
+ %c1 = extractelement <2 x i32> %c, i32 1
+ %tex = call float @llvm.amdgcn.image.load.2d.f32.i32(i32 3, i32 %c0, i32 %c1, <8 x i32> %rsrc, i32 0, i32 0)
+; CHECK-NEXT: llvm.amdgcn.image.load.* intrinsic mask cannot have more active bits than there are elements in the return type
+ ret void
+}
+
+declare <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i32(i32, i32, <8 x i32>, i32, i32) #1
+declare {float,i32} @llvm.amdgcn.image.load.1d.f32i32.i32(i32, i32, <8 x i32>, i32, i32) #1
+declare {<2 x float>,i32} @llvm.amdgcn.image.load.1d.v2f32i32.i32(i32, i32, <8 x i32>, i32, i32) #1
+declare {<4 x float>,i32} @llvm.amdgcn.image.load.1d.v4f32i32.i32(i32, i32, <8 x i32>, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.load.2d.v4f32.i32(i32, i32, i32, <8 x i32>, i32, i32) #1
+declare {<4 x float>,i32} @llvm.amdgcn.image.load.2d.v4f32i32.i32(i32, i32, i32, <8 x i32>, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.load.3d.v4f32.i32(i32, i32, i32, i32, <8 x i32>, i32, i32) #1
+declare {<4 x float>,i32} @llvm.amdgcn.image.load.3d.v4f32i32.i32(i32, i32, i32, i32, <8 x i32>, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.load.cube.v4f32.i32(i32, i32, i32, i32, <8 x i32>, i32, i32) #1
+declare {<4 x float>,i32} @llvm.amdgcn.image.load.cube.v4f32i32.i32(i32, i32, i32, i32, <8 x i32>, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.load.1darray.v4f32.i32(i32, i32, i32, <8 x i32>, i32, i32) #1
+declare {<4 x float>,i32} @llvm.amdgcn.image.load.1darray.v4f32i32.i32(i32, i32, i32, <8 x i32>, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.load.2darray.v4f32.i32(i32, i32, i32, i32, <8 x i32>, i32, i32) #1
+declare {<4 x float>,i32} @llvm.amdgcn.image.load.2darray.v4f32i32.i32(i32, i32, i32, i32, <8 x i32>, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.load.2dmsaa.v4f32.i32(i32, i32, i32, i32, <8 x i32>, i32, i32) #1
+declare {<4 x float>,i32} @llvm.amdgcn.image.load.2dmsaa.v4f32i32.i32(i32, i32, i32, i32, <8 x i32>, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.load.2darraymsaa.v4f32.i32(i32, i32, i32, i32, i32, <8 x i32>, i32, i32) #1
+declare {<4 x float>,i32} @llvm.amdgcn.image.load.2darraymsaa.v4f32i32.i32(i32, i32, i32, i32, i32, <8 x i32>, i32, i32) #1
+
+declare <4 x float> @llvm.amdgcn.image.load.mip.1d.v4f32.i32(i32, i32, i32, <8 x i32>, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.load.mip.2d.v4f32.i32(i32, i32, i32, i32, <8 x i32>, i32, i32) #1
+declare {<4 x float>,i32} @llvm.amdgcn.image.load.mip.1d.v4f32i32.i32(i32, i32, i32, <8 x i32>, i32, i32) #1
+declare {<4 x float>,i32} @llvm.amdgcn.image.load.mip.2d.v4f32i32.i32(i32, i32, i32, i32, <8 x i32>, i32, i32) #1
+declare {<2 x float>,i32} @llvm.amdgcn.image.load.mip.2d.v2f32i32.i32(i32, i32, i32, i32, <8 x i32>, i32, i32) #1
+declare {float,i32} @llvm.amdgcn.image.load.mip.2d.f32i32.i32(i32, i32, i32, i32, <8 x i32>, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.load.mip.3d.v4f32.i32(i32, i32, i32, i32, i32, <8 x i32>, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.load.mip.cube.v4f32.i32(i32, i32, i32, i32, i32, <8 x i32>, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.load.mip.1darray.v4f32.i32(i32, i32, i32, i32, <8 x i32>, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.load.mip.2darray.v4f32.i32(i32, i32, i32, i32, i32, <8 x i32>, i32, i32) #1
+
+declare float @llvm.amdgcn.image.load.1d.f32.i32(i32, i32, <8 x i32>, i32, i32) #1
+declare float @llvm.amdgcn.image.load.2d.f32.i32(i32, i32, i32, <8 x i32>, i32, i32) #1
+declare <2 x float> @llvm.amdgcn.image.load.1d.v2f32.i32(i32, i32, <8 x i32>, i32, i32) #1
+
+attributes #0 = { nounwind }
+attributes #1 = { nounwind readonly }
+attributes #2 = { nounwind readnone }
More information about the llvm-commits
mailing list