[llvm] bcba311 - [AMDGPU] SelDAG: fix lowering of undefined workitem intrinsics (#126058)

via llvm-commits llvm-commits at lists.llvm.org
Wed Feb 12 15:41:44 PST 2025


Author: Robert Imschweiler
Date: 2025-02-12T18:41:41-05:00
New Revision: bcba3117c057158472af83b36a38e0df0f57f4fc

URL: https://github.com/llvm/llvm-project/commit/bcba3117c057158472af83b36a38e0df0f57f4fc
DIFF: https://github.com/llvm/llvm-project/commit/bcba3117c057158472af83b36a38e0df0f57f4fc.diff

LOG: [AMDGPU] SelDAG: fix lowering of undefined workitem intrinsics (#126058)

GlobalISel already handles undefined workitem.id.{x,y,z} intrinsics,
SelDAG failed in AMDGPUISelLowering.cpp due to a failed assertion in
`AMDGPUTargetLowering::loadInputValue`: `Arg && "Attempting to load
missing argument"`. This commit changes the behavior of SelDAG to
instead use a zero constant.

This LLVM defect was identified via the AMD Fuzzing project.

Added: 
    llvm/test/CodeGen/AMDGPU/llvm.amdgcn.workitem.id-unsupported-calling-convention.ll

Modified: 
    llvm/lib/Target/AMDGPU/SIISelLowering.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index b632c50dae0e3..28debbcfc1ede 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -8619,6 +8619,11 @@ SDValue SITargetLowering::lowerWorkitemID(SelectionDAG &DAG, SDValue Op,
   if (MaxID == 0)
     return DAG.getConstant(0, SL, MVT::i32);
 
+  // It's undefined behavior if a function marked with the amdgpu-no-*
+  // attributes uses the corresponding intrinsic.
+  if (!Arg)
+    return DAG.getUNDEF(Op->getValueType(0));
+
   SDValue Val = loadInputValue(DAG, &AMDGPU::VGPR_32RegClass, MVT::i32,
                                SDLoc(DAG.getEntryNode()), Arg);
 

diff  --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.workitem.id-unsupported-calling-convention.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.workitem.id-unsupported-calling-convention.ll
new file mode 100644
index 0000000000000..684b59c66ee8e
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.workitem.id-unsupported-calling-convention.ll
@@ -0,0 +1,55 @@
+; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -mtriple=amdgcn -mcpu=gfx942 -O0 -stop-after=amdgpu-isel -o - %s | FileCheck --check-prefix=SelDAG %s
+; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx942 -stop-after=legalizer -o - %s | FileCheck --check-prefix=GlobalISel %s
+
+declare i32 @llvm.amdgcn.workitem.id.x()
+declare i32 @llvm.amdgcn.workitem.id.y()
+declare i32 @llvm.amdgcn.workitem.id.z()
+
+define amdgpu_ps void @undefined_workitems(ptr %p, ptr %q, ptr %r) {
+  ; SelDAG-LABEL: name: undefined_workitems
+  ; SelDAG: bb.0 (%ir-block.0):
+  ; SelDAG-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5
+  ; SelDAG-NEXT: {{  $}}
+  ; SelDAG-NEXT:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr5
+  ; SelDAG-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr4
+  ; SelDAG-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3
+  ; SelDAG-NEXT:   [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+  ; SelDAG-NEXT:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+  ; SelDAG-NEXT:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+  ; SelDAG-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY]], %subreg.sub1
+  ; SelDAG-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY2]], %subreg.sub1
+  ; SelDAG-NEXT:   [[REG_SEQUENCE2:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1
+  ; SelDAG-NEXT:   [[COPY6:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE]]
+  ; SelDAG-NEXT:   [[COPY7:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE1]]
+  ; SelDAG-NEXT:   [[COPY8:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE2]]
+  ; SelDAG-NEXT:   S_ENDPGM 0
+  ;
+  ; GlobalISel-LABEL: name: undefined_workitems
+  ; GlobalISel: bb.1 (%ir-block.0):
+  ; GlobalISel-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5
+  ; GlobalISel-NEXT: {{  $}}
+  ; GlobalISel-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+  ; GlobalISel-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+  ; GlobalISel-NEXT:   [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32)
+  ; GlobalISel-NEXT:   [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
+  ; GlobalISel-NEXT:   [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
+  ; GlobalISel-NEXT:   [[MV1:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32)
+  ; GlobalISel-NEXT:   [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4
+  ; GlobalISel-NEXT:   [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5
+  ; GlobalISel-NEXT:   [[MV2:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32)
+  ; GlobalISel-NEXT:   [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+  ; GlobalISel-NEXT:   [[COPY6:%[0-9]+]]:_(s32) = COPY [[DEF]](s32)
+  ; GlobalISel-NEXT:   G_STORE [[COPY6]](s32), [[MV]](p0) :: (store (s32) into %ir.p)
+  ; GlobalISel-NEXT:   [[COPY7:%[0-9]+]]:_(s32) = COPY [[DEF]](s32)
+  ; GlobalISel-NEXT:   G_STORE [[COPY7]](s32), [[MV1]](p0) :: (store (s32) into %ir.q)
+  ; GlobalISel-NEXT:   G_STORE [[DEF]](s32), [[MV2]](p0) :: (store (s32) into %ir.r)
+  ; GlobalISel-NEXT:   S_ENDPGM 0
+  %id.x = call i32 @llvm.amdgcn.workitem.id.x()
+  store i32 %id.x, ptr %p
+  %id.y = call i32 @llvm.amdgcn.workitem.id.y()
+  store i32 %id.y, ptr %q
+  %id.z = call i32 @llvm.amdgcn.workitem.id.z()
+  store i32 %id.z, ptr %r
+  ret void
+}


        


More information about the llvm-commits mailing list