[llvm] [AMDGCN] Error checking for llvm.amdgcn.init.exec.from.input intrinsic (PR #128176)
Julian Brown via llvm-commits
llvm-commits at lists.llvm.org
Fri Feb 21 05:45:16 PST 2025
https://github.com/jtb20 created https://github.com/llvm/llvm-project/pull/128176
This patch catches some cases of illegal use of the llvm.amdgcn.init.exec.from.input intrinsic, which is only permitted to take its first parameter from an SGPR argument of the containing function.
The intrinsic in question isn't intended to be user-facing, so we don't need to go to extraordinary lengths to make the error handling bulletproof. I made it an error instead of an assertion though so the new test cases work unconditionally.
>From f29d60fd8d970ade1a17b7a29a0cea8fd564d814 Mon Sep 17 00:00:00 2001
From: Julian Brown <julian.brown at amd.com>
Date: Tue, 18 Feb 2025 11:05:56 -0600
Subject: [PATCH] [AMDGCN] Error checking for llvm.amdgcn.init.exec.from.input
intrinsic
This patch catches some cases of illegal use of the
llvm.amdgcn.init.exec.from.input intrinsic, which is only permitted to
take its first parameter from an SGPR argument of the containing function.
The intrinsic in question isn't intended to be user-facing, so we
don't need to go to extraordinary lengths to make the error handling
bulletproof. I made it an error instead of an assertion though so the
new test cases work unconditionally.
---
llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp | 16 +++++++-
...gbankselect-intrinsic-initexecfrominput.ll | 40 +++++++++++++++++++
.../AMDGPU/intrinsic-initexecfrominput.ll | 40 +++++++++++++++++++
3 files changed, 95 insertions(+), 1 deletion(-)
create mode 100644 llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-intrinsic-initexecfrominput.ll
create mode 100644 llvm/test/CodeGen/AMDGPU/intrinsic-initexecfrominput.ll
diff --git a/llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp b/llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp
index 3293602db0901..3ba8b386c0360 100644
--- a/llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp
+++ b/llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp
@@ -1622,7 +1622,21 @@ void SIWholeQuadMode::lowerInitExec(MachineInstr &MI) {
MachineInstr *FirstMI = &*MBB->begin();
if (InputReg.isVirtual()) {
MachineInstr *DefInstr = MRI->getVRegDef(InputReg);
- assert(DefInstr && DefInstr->isCopy());
+ // This condition catches some cases where a
+ // llvm.amdgcn.init.exec.from.input intrinsic's first argument comes from
+ // somewhere other than a (SGPR) function argument, which is forbidden.
+ if (!DefInstr || !DefInstr->isCopy() ||
+ (DefInstr->getNumOperands() == 2 && DefInstr->getOperand(1).isReg() &&
+ TRI->isVectorRegister(*MRI, DefInstr->getOperand(1).getReg()))) {
+ MachineFunction *MF = MBB->getParent();
+ DebugLoc DL = DefInstr->getDebugLoc();
+ DiagnosticInfoUnsupported IllegalArg(
+ MF->getFunction(), "EXEC must be initialized using function argument",
+ DL, DS_Error);
+ LLVMContext &C = MF->getFunction().getContext();
+ C.diagnose(IllegalArg);
+ return;
+ }
if (DefInstr->getParent() == MBB) {
if (DefInstr != FirstMI) {
// If the `InputReg` is defined in current block, we also need to
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-intrinsic-initexecfrominput.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-intrinsic-initexecfrominput.ll
new file mode 100644
index 0000000000000..3d02e52b0babc
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-intrinsic-initexecfrominput.ll
@@ -0,0 +1,40 @@
+; RUN: not llc -mtriple=amdgcn -mcpu=gfx942 -O3 -global-isel=true -o - %s 2>&1 | FileCheck -check-prefix=ERR %s
+
+source_filename = "llvm.amdgcn.init.exec.wave32.ll"
+
+ at G = global i32 -2147483648
+ at G.1 = global <32 x i32> splat (i32 1)
+ at G.2 = global <16 x i64> splat (i64 1)
+ at G.3 = global <8 x i1> zeroinitializer
+
+define amdgpu_ps float @test_init_exec(float %a, float %b) {
+main_body:
+ %s = fadd float %a, %b
+ call void @llvm.amdgcn.init.exec(i64 74565)
+ ret float %s
+}
+
+define amdgpu_ps float @test_init_exec_from_input(i32 inreg %0, i32 inreg %1, i32 inreg %2, i32 inreg %count, float %a, float %b) {
+main_body:
+ %LGV2 = load <16 x i64>, ptr @G.2, align 128
+ %LGV1 = load <32 x i32>, ptr @G.1, align 128
+ %LGV = load i32, ptr @G, align 4
+ %C = call <8 x i1> @f(<32 x i32> %LGV1, <16 x i64> %LGV2, <2 x half> splat (half 0xH5140))
+ %B = or i32 0, %LGV
+ %s = fadd float %a, %b
+ call void @llvm.amdgcn.init.exec.from.input(i32 %B, i32 8)
+ store <8 x i1> %C, ptr @G.3, align 1
+ ret float %s
+}
+
+; Function Attrs: convergent nocallback nofree nounwind willreturn
+declare void @llvm.amdgcn.init.exec(i64 immarg) #0
+
+; Function Attrs: convergent nocallback nofree nounwind willreturn
+declare void @llvm.amdgcn.init.exec.from.input(i32, i32 immarg) #0
+
+declare <8 x i1> @f(<32 x i32>, <16 x i64>, <2 x half>)
+
+attributes #0 = { convergent nocallback nofree nounwind willreturn }
+
+ERR: error: <unknown>:0:0: in function test_init_exec_from_input float (i32, i32, i32, i32, float, float): EXEC must be initialized using function argument
diff --git a/llvm/test/CodeGen/AMDGPU/intrinsic-initexecfrominput.ll b/llvm/test/CodeGen/AMDGPU/intrinsic-initexecfrominput.ll
new file mode 100644
index 0000000000000..04dd30401a58e
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/intrinsic-initexecfrominput.ll
@@ -0,0 +1,40 @@
+; RUN: not llc -mtriple=amdgcn -mcpu=gfx942 -O3 -global-isel=false -o - %s 2>&1 | FileCheck -check-prefix=ERR %s
+
+source_filename = "llvm.amdgcn.init.exec.wave32.ll"
+
+ at G = global i32 -2147483648
+ at G.1 = global <32 x i32> splat (i32 1)
+ at G.2 = global <16 x i64> splat (i64 1)
+ at G.3 = global <8 x i1> zeroinitializer
+
+define amdgpu_ps float @test_init_exec(float %a, float %b) {
+main_body:
+ %s = fadd float %a, %b
+ call void @llvm.amdgcn.init.exec(i64 74565)
+ ret float %s
+}
+
+define amdgpu_ps float @test_init_exec_from_input(i32 inreg %0, i32 inreg %1, i32 inreg %2, i32 inreg %count, float %a, float %b) {
+main_body:
+ %LGV2 = load <16 x i64>, ptr @G.2, align 128
+ %LGV1 = load <32 x i32>, ptr @G.1, align 128
+ %LGV = load i32, ptr @G, align 4
+ %C = call <8 x i1> @f(<32 x i32> %LGV1, <16 x i64> %LGV2, <2 x half> splat (half 0xH5140))
+ %B = or i32 0, %LGV
+ %s = fadd float %a, %b
+ call void @llvm.amdgcn.init.exec.from.input(i32 %B, i32 8)
+ store <8 x i1> %C, ptr @G.3, align 1
+ ret float %s
+}
+
+; Function Attrs: convergent nocallback nofree nounwind willreturn
+declare void @llvm.amdgcn.init.exec(i64 immarg) #0
+
+; Function Attrs: convergent nocallback nofree nounwind willreturn
+declare void @llvm.amdgcn.init.exec.from.input(i32, i32 immarg) #0
+
+declare <8 x i1> @f(<32 x i32>, <16 x i64>, <2 x half>)
+
+attributes #0 = { convergent nocallback nofree nounwind willreturn }
+
+ERR: error: <unknown>:0:0: in function test_init_exec_from_input float (i32, i32, i32, i32, float, float): EXEC must be initialized using function argument
More information about the llvm-commits
mailing list