[llvm] [AMDGPU] SelDAG: fix lowering of undefined workitem intrinsics (PR #126058)

Robert Imschweiler via llvm-commits llvm-commits at lists.llvm.org
Fri Feb 7 03:12:18 PST 2025


https://github.com/ro-i updated https://github.com/llvm/llvm-project/pull/126058

>From 9c8af8a0a76019069d04b93608ac0ebd4e2df234 Mon Sep 17 00:00:00 2001
From: Robert Imschweiler <robert.imschweiler at amd.com>
Date: Wed, 5 Feb 2025 08:02:43 -0600
Subject: [PATCH 1/2] [AMDGPU] SelDAG: fix lowering of undefined workitem
 intrinsics

GlobalISel already handles undefined workitem.id.{x,y,z} intrinsics, SelDAG failed in AMDGPUISelLowering.cpp due to a failed assertion in `AMDGPUTargetLowering::loadInputValue`: `Arg && "Attempting to load missing argument"`.
This commit changes the behavior of SelDAG to instead use a zero constant.

This LLVM defect was identified via the AMD Fuzzing project.
---
 llvm/lib/Target/AMDGPU/SIISelLowering.cpp     | 23 ++++++++++--
 .../llvm.amdgcn.workitem.id-undefined-attr.ll | 36 +++++++++++++++++++
 2 files changed, 56 insertions(+), 3 deletions(-)
 create mode 100644 llvm/test/CodeGen/AMDGPU/llvm.amdgcn.workitem.id-undefined-attr.ll

diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index b632c50dae0e359..47654890ff50e74 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -8790,11 +8790,28 @@ SDValue SITargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
                              AMDGPUFunctionArgInfo::LDS_KERNEL_ID);
   }
   case Intrinsic::amdgcn_workitem_id_x:
-    return lowerWorkitemID(DAG, Op, 0, MFI->getArgInfo().WorkItemIDX);
+    if (!MFI->getArgInfo().WorkItemIDX) {
+      // It's undefined behavior if a function marked with the amdgpu-no-*
+      // attributes uses the corresponding intrinsic.
+      return DAG.getConstant(0, SDLoc(Op),
+                             EVT::getIntegerVT(*DAG.getContext(), 32));
+    } else {
+      return lowerWorkitemID(DAG, Op, 0, MFI->getArgInfo().WorkItemIDX);
+    }
   case Intrinsic::amdgcn_workitem_id_y:
-    return lowerWorkitemID(DAG, Op, 1, MFI->getArgInfo().WorkItemIDY);
+    if (!MFI->getArgInfo().WorkItemIDY) {
+      return DAG.getConstant(0, SDLoc(Op),
+                             EVT::getIntegerVT(*DAG.getContext(), 32));
+    } else {
+      return lowerWorkitemID(DAG, Op, 1, MFI->getArgInfo().WorkItemIDY);
+    }
   case Intrinsic::amdgcn_workitem_id_z:
-    return lowerWorkitemID(DAG, Op, 2, MFI->getArgInfo().WorkItemIDZ);
+    if (!MFI->getArgInfo().WorkItemIDZ) {
+      return DAG.getConstant(0, SDLoc(Op),
+                             EVT::getIntegerVT(*DAG.getContext(), 32));
+    } else {
+      return lowerWorkitemID(DAG, Op, 2, MFI->getArgInfo().WorkItemIDZ);
+    }
   case Intrinsic::amdgcn_wavefrontsize:
     return DAG.getConstant(MF.getSubtarget<GCNSubtarget>().getWavefrontSize(),
                            SDLoc(Op), MVT::i32);
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.workitem.id-undefined-attr.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.workitem.id-undefined-attr.ll
new file mode 100644
index 000000000000000..0e6a448fcae5a96
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.workitem.id-undefined-attr.ll
@@ -0,0 +1,36 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -mtriple=amdgcn -mcpu=gfx942 -O0 --global-isel=false -o - %s | FileCheck %s
+
+declare i32 @llvm.amdgcn.workitem.id.x() #0
+declare i32 @llvm.amdgcn.workitem.id.y() #0
+declare i32 @llvm.amdgcn.workitem.id.z() #0
+
+define amdgpu_ps i32 @test_workitem_id_x() #1 {
+; CHECK-LABEL: test_workitem_id_x:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    s_mov_b32 s0, 0
+; CHECK-NEXT:    ; return to shader part epilog
+  %id = call i32 @llvm.amdgcn.workitem.id.x()
+  ret i32 %id
+}
+
+define amdgpu_ps i32 @test_workitem_id_y() #1 {
+; CHECK-LABEL: test_workitem_id_y:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    s_mov_b32 s0, 0
+; CHECK-NEXT:    ; return to shader part epilog
+  %id = call i32 @llvm.amdgcn.workitem.id.y()
+  ret i32 %id
+}
+
+define amdgpu_ps i32 @test_workitem_id_z() #1 {
+; CHECK-LABEL: test_workitem_id_z:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    s_mov_b32 s0, 0
+; CHECK-NEXT:    ; return to shader part epilog
+  %id = call i32 @llvm.amdgcn.workitem.id.z()
+  ret i32 %id
+}
+
+attributes #0 = { nounwind readnone }
+attributes #1 = { nounwind }

>From b9210231f10c4929d77f3e1fd4fa8521710a7dff Mon Sep 17 00:00:00 2001
From: Robert Imschweiler <robert.imschweiler at amd.com>
Date: Fri, 7 Feb 2025 05:11:20 -0600
Subject: [PATCH 2/2] implement feedback

---
 llvm/lib/Target/AMDGPU/SIISelLowering.cpp     | 28 +++++----------
 .../GlobalISel/llvm.amdgcn.workitem.id.ll     | 25 +++++++++++++
 .../llvm.amdgcn.workitem.id-undefined-attr.ll | 36 -------------------
 .../CodeGen/AMDGPU/llvm.amdgcn.workitem.id.ll | 25 +++++++++++++
 4 files changed, 58 insertions(+), 56 deletions(-)
 delete mode 100644 llvm/test/CodeGen/AMDGPU/llvm.amdgcn.workitem.id-undefined-attr.ll

diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index 47654890ff50e74..39c95bacad26bf7 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -8619,6 +8619,11 @@ SDValue SITargetLowering::lowerWorkitemID(SelectionDAG &DAG, SDValue Op,
   if (MaxID == 0)
     return DAG.getConstant(0, SL, MVT::i32);
 
+  // It's undefined behavior if a function marked with the amdgpu-no-*
+  // attributes uses the corresponding intrinsic.
+  if (!Arg)
+    return DAG.getUNDEF(EVT::getIntegerVT(*DAG.getContext(), 32));
+
   SDValue Val = loadInputValue(DAG, &AMDGPU::VGPR_32RegClass, MVT::i32,
                                SDLoc(DAG.getEntryNode()), Arg);
 
@@ -8790,28 +8795,11 @@ SDValue SITargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
                              AMDGPUFunctionArgInfo::LDS_KERNEL_ID);
   }
   case Intrinsic::amdgcn_workitem_id_x:
-    if (!MFI->getArgInfo().WorkItemIDX) {
-      // It's undefined behavior if a function marked with the amdgpu-no-*
-      // attributes uses the corresponding intrinsic.
-      return DAG.getConstant(0, SDLoc(Op),
-                             EVT::getIntegerVT(*DAG.getContext(), 32));
-    } else {
-      return lowerWorkitemID(DAG, Op, 0, MFI->getArgInfo().WorkItemIDX);
-    }
+    return lowerWorkitemID(DAG, Op, 0, MFI->getArgInfo().WorkItemIDX);
   case Intrinsic::amdgcn_workitem_id_y:
-    if (!MFI->getArgInfo().WorkItemIDY) {
-      return DAG.getConstant(0, SDLoc(Op),
-                             EVT::getIntegerVT(*DAG.getContext(), 32));
-    } else {
-      return lowerWorkitemID(DAG, Op, 1, MFI->getArgInfo().WorkItemIDY);
-    }
+    return lowerWorkitemID(DAG, Op, 1, MFI->getArgInfo().WorkItemIDY);
   case Intrinsic::amdgcn_workitem_id_z:
-    if (!MFI->getArgInfo().WorkItemIDZ) {
-      return DAG.getConstant(0, SDLoc(Op),
-                             EVT::getIntegerVT(*DAG.getContext(), 32));
-    } else {
-      return lowerWorkitemID(DAG, Op, 2, MFI->getArgInfo().WorkItemIDZ);
-    }
+    return lowerWorkitemID(DAG, Op, 2, MFI->getArgInfo().WorkItemIDZ);
   case Intrinsic::amdgcn_wavefrontsize:
     return DAG.getConstant(MF.getSubtarget<GCNSubtarget>().getWavefrontSize(),
                            SDLoc(Op), MVT::i32);
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.workitem.id.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.workitem.id.ll
index d5646820a19832f..eac575742cf0e88 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.workitem.id.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.workitem.id.ll
@@ -9,6 +9,7 @@
 ; RUN: llc -global-isel -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx90a -verify-machineinstrs < %t.v4.ll | FileCheck -check-prefixes=ALL,PACKED-TID %s
 ; RUN: llc -global-isel -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx1100 -verify-machineinstrs -amdgpu-enable-vopd=0 < %t.v4.ll | FileCheck -check-prefixes=ALL,PACKED-TID %s
 ; RUN: llc -global-isel -mtriple=amdgcn-unknown-amdhsa --amdhsa-code-object-version=6 -mcpu=gfx11-generic -verify-machineinstrs -amdgpu-enable-vopd=0 < %t.v6.ll | FileCheck -check-prefixes=ALL,PACKED-TID %s
+; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx942 -o - %t.v4.ll | FileCheck --check-prefixes=UNDEF %s
 
 declare i32 @llvm.amdgcn.workitem.id.x() #0
 declare i32 @llvm.amdgcn.workitem.id.y() #0
@@ -195,6 +196,30 @@ define amdgpu_kernel void @test_reqd_workgroup_size_z_only(ptr %out) !reqd_work_
   ret void
 }
 
+define amdgpu_kernel void @undefined_workitem_x_only() {
+; UNDEF-LABEL: undefined_workitem_x_only:
+; UNDEF:       ; %bb.0:
+; UNDEF-NEXT:    s_endpgm
+  %id.x = call i32 @llvm.amdgcn.workitem.id.x()
+  ret void
+}
+
+define amdgpu_kernel void @undefined_workitem_y_only() {
+; UNDEF-LABEL: undefined_workitem_y_only:
+; UNDEF:       ; %bb.0:
+; UNDEF-NEXT:    s_endpgm
+  %id.y = call i32 @llvm.amdgcn.workitem.id.y()
+  ret void
+}
+
+define amdgpu_kernel void @undefined_workitem_z_only() {
+; UNDEF-LABEL: undefined_workitem_z_only:
+; UNDEF:       ; %bb.0:
+; UNDEF-NEXT:    s_endpgm
+  %id.z = call i32 @llvm.amdgcn.workitem.id.z()
+  ret void
+}
+
 attributes #0 = { nounwind readnone }
 attributes #1 = { nounwind }
 
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.workitem.id-undefined-attr.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.workitem.id-undefined-attr.ll
deleted file mode 100644
index 0e6a448fcae5a96..000000000000000
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.workitem.id-undefined-attr.ll
+++ /dev/null
@@ -1,36 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
-; RUN: llc -mtriple=amdgcn -mcpu=gfx942 -O0 --global-isel=false -o - %s | FileCheck %s
-
-declare i32 @llvm.amdgcn.workitem.id.x() #0
-declare i32 @llvm.amdgcn.workitem.id.y() #0
-declare i32 @llvm.amdgcn.workitem.id.z() #0
-
-define amdgpu_ps i32 @test_workitem_id_x() #1 {
-; CHECK-LABEL: test_workitem_id_x:
-; CHECK:       ; %bb.0:
-; CHECK-NEXT:    s_mov_b32 s0, 0
-; CHECK-NEXT:    ; return to shader part epilog
-  %id = call i32 @llvm.amdgcn.workitem.id.x()
-  ret i32 %id
-}
-
-define amdgpu_ps i32 @test_workitem_id_y() #1 {
-; CHECK-LABEL: test_workitem_id_y:
-; CHECK:       ; %bb.0:
-; CHECK-NEXT:    s_mov_b32 s0, 0
-; CHECK-NEXT:    ; return to shader part epilog
-  %id = call i32 @llvm.amdgcn.workitem.id.y()
-  ret i32 %id
-}
-
-define amdgpu_ps i32 @test_workitem_id_z() #1 {
-; CHECK-LABEL: test_workitem_id_z:
-; CHECK:       ; %bb.0:
-; CHECK-NEXT:    s_mov_b32 s0, 0
-; CHECK-NEXT:    ; return to shader part epilog
-  %id = call i32 @llvm.amdgcn.workitem.id.z()
-  ret i32 %id
-}
-
-attributes #0 = { nounwind readnone }
-attributes #1 = { nounwind }
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.workitem.id.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.workitem.id.ll
index eaee8ec73fe411a..af46f0c02eda26e 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.workitem.id.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.workitem.id.ll
@@ -5,6 +5,7 @@
 ; RUN: llc -mtriple=amdgcn-unknown-mesa3d -mcpu=tonga -mattr=-flat-for-global < %t.bc | FileCheck -check-prefixes=ALL,MESA3D,UNPACKED %s
 ; RUN: llc -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx90a < %t.bc | FileCheck -check-prefixes=ALL,PACKED-TID %s
 ; RUN: llc -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx1100 -amdgpu-enable-vopd=0 < %t.bc | FileCheck -check-prefixes=ALL,PACKED-TID %s
+; RUN: llc -mtriple=amdgcn -mcpu=gfx942 -o - %s | FileCheck --check-prefix=UNDEF %s
 
 declare i32 @llvm.amdgcn.workitem.id.x() #0
 declare i32 @llvm.amdgcn.workitem.id.y() #0
@@ -128,6 +129,30 @@ define amdgpu_kernel void @test_reqd_workgroup_size_z_only(ptr %out) !reqd_work_
   ret void
 }
 
+define amdgpu_kernel void @undefined_workitem_x_only() {
+; UNDEF-LABEL: undefined_workitem_x_only:
+; UNDEF:       ; %bb.0:
+; UNDEF-NEXT:    s_endpgm
+  %id.x = call i32 @llvm.amdgcn.workitem.id.x()
+  ret void
+}
+
+define amdgpu_kernel void @undefined_workitem_y_only() {
+; UNDEF-LABEL: undefined_workitem_y_only:
+; UNDEF:       ; %bb.0:
+; UNDEF-NEXT:    s_endpgm
+  %id.y = call i32 @llvm.amdgcn.workitem.id.y()
+  ret void
+}
+
+define amdgpu_kernel void @undefined_workitem_z_only() {
+; UNDEF-LABEL: undefined_workitem_z_only:
+; UNDEF:       ; %bb.0:
+; UNDEF-NEXT:    s_endpgm
+  %id.z = call i32 @llvm.amdgcn.workitem.id.z()
+  ret void
+}
+
 attributes #0 = { nounwind readnone }
 attributes #1 = { nounwind }
 



More information about the llvm-commits mailing list