[llvm] de1600a - AMDGPU: Avoid enabling kernel workitem IDs with reqd_work_group_size

Matt Arsenault via llvm-commits llvm-commits at lists.llvm.org
Tue Jan 18 10:52:12 PST 2022


Author: Matt Arsenault
Date: 2022-01-18T13:52:04-05:00
New Revision: de1600a1d946d1bd9ca5f7b0711012a8f31a66e2

URL: https://github.com/llvm/llvm-project/commit/de1600a1d946d1bd9ca5f7b0711012a8f31a66e2
DIFF: https://github.com/llvm/llvm-project/commit/de1600a1d946d1bd9ca5f7b0711012a8f31a66e2.diff

LOG: AMDGPU: Avoid enabling kernel workitem IDs with reqd_work_group_size

Added: 
    

Modified: 
    llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
    llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.workitem.id.ll
    llvm/test/CodeGen/AMDGPU/call-reqd-group-size.ll
    llvm/test/CodeGen/AMDGPU/llvm.amdgcn.workitem.id.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
index 292ca4529883b..cca8565c9ff97 100644
--- a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
@@ -118,10 +118,12 @@ SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF)
     if (IsKernel || !F.hasFnAttribute("amdgpu-no-workitem-id-x"))
       WorkItemIDX = true;
 
-    if (!F.hasFnAttribute("amdgpu-no-workitem-id-y"))
+    if (!F.hasFnAttribute("amdgpu-no-workitem-id-y") &&
+        ST.getMaxWorkitemID(F, 1) != 0)
       WorkItemIDY = true;
 
-    if (!F.hasFnAttribute("amdgpu-no-workitem-id-z"))
+    if (!F.hasFnAttribute("amdgpu-no-workitem-id-z") &&
+        ST.getMaxWorkitemID(F, 2) != 0)
       WorkItemIDZ = true;
 
     if (!F.hasFnAttribute("amdgpu-no-dispatch-ptr"))

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.workitem.id.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.workitem.id.ll
index a49058d7f8c13..c301c2c7405ec 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.workitem.id.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.workitem.id.ll
@@ -128,7 +128,7 @@ define void @test_workitem_id_z_func(i32 addrspace(1)* %out) #1 {
 ; FIXME: Should be able to avoid enabling in kernel inputs
 ; FIXME: Packed tid should avoid the and
 ; ALL-LABEL: {{^}}test_reqd_workgroup_size_x_only:
-; CO-V2: enable_vgpr_workitem_id = 2
+; CO-V2: enable_vgpr_workitem_id = 0
 
 ; ALL-DAG: v_mov_b32_e32 [[ZERO:v[0-9]+]], 0{{$}}
 ; UNPACKED-DAG: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, v0
@@ -149,7 +149,7 @@ define amdgpu_kernel void @test_reqd_workgroup_size_x_only(i32* %out) !reqd_work
 }
 
 ; ALL-LABEL: {{^}}test_reqd_workgroup_size_y_only:
-; CO-V2: enable_vgpr_workitem_id = 2
+; CO-V2: enable_vgpr_workitem_id = 1
 
 ; ALL: v_mov_b32_e32 [[ZERO:v[0-9]+]], 0{{$}}
 ; ALL: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[ZERO]]

diff  --git a/llvm/test/CodeGen/AMDGPU/call-reqd-group-size.ll b/llvm/test/CodeGen/AMDGPU/call-reqd-group-size.ll
index cb781a14dad7f..ad0e5aaf54d6d 100644
--- a/llvm/test/CodeGen/AMDGPU/call-reqd-group-size.ll
+++ b/llvm/test/CodeGen/AMDGPU/call-reqd-group-size.ll
@@ -63,7 +63,7 @@ define amdgpu_kernel void @known_z_0(i32 addrspace(1)* %out) !reqd_work_group_si
   call void @callee()
   ret void
 }
-; CHECK: .amdhsa_system_vgpr_workitem_id 2
+; CHECK: .amdhsa_system_vgpr_workitem_id 1
 
 define amdgpu_kernel void @known_yz_0(i32 addrspace(1)* %out) !reqd_work_group_size !3 {
 ; CHECK-LABEL: known_yz_0:
@@ -82,7 +82,7 @@ define amdgpu_kernel void @known_yz_0(i32 addrspace(1)* %out) !reqd_work_group_s
   call void @callee()
   ret void
 }
-; CHECK: .amdhsa_system_vgpr_workitem_id 2
+; CHECK: .amdhsa_system_vgpr_workitem_id 0
 
 define amdgpu_kernel void @known_xz_0(i32 addrspace(1)* %out) !reqd_work_group_size !4 {
 ; CHECK-LABEL: known_xz_0:
@@ -101,7 +101,7 @@ define amdgpu_kernel void @known_xz_0(i32 addrspace(1)* %out) !reqd_work_group_s
   call void @callee()
   ret void
 }
-; CHECK: .amdhsa_system_vgpr_workitem_id 2
+; CHECK: .amdhsa_system_vgpr_workitem_id 1
 
 
 define amdgpu_kernel void @known_xyz_0(i32 addrspace(1)* %out) !reqd_work_group_size !5 {
@@ -121,7 +121,7 @@ define amdgpu_kernel void @known_xyz_0(i32 addrspace(1)* %out) !reqd_work_group_
   call void @callee()
   ret void
 }
-; CHECK: .amdhsa_system_vgpr_workitem_id 2
+; CHECK: .amdhsa_system_vgpr_workitem_id 0
 
 attributes #0 = { "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" }
 

diff  --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.workitem.id.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.workitem.id.ll
index 62952e70a6c37..0001397123ded 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.workitem.id.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.workitem.id.ll
@@ -63,10 +63,9 @@ define amdgpu_kernel void @test_workitem_id_z(i32 addrspace(1)* %out) #1 {
   ret void
 }
 
-; FIXME: Should be able to avoid enabling in kernel inputs
 ; FIXME: Packed tid should avoid the and
 ; ALL-LABEL: {{^}}test_reqd_workgroup_size_x_only:
-; CO-V2: enable_vgpr_workitem_id = 2
+; CO-V2: enable_vgpr_workitem_id = 0
 
 ; ALL-DAG: v_mov_b32_e32 [[ZERO:v[0-9]+]], 0{{$}}
 ; UNPACKED-DAG: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, v0
@@ -87,7 +86,7 @@ define amdgpu_kernel void @test_reqd_workgroup_size_x_only(i32* %out) !reqd_work
 }
 
 ; ALL-LABEL: {{^}}test_reqd_workgroup_size_y_only:
-; CO-V2: enable_vgpr_workitem_id = 2
+; CO-V2: enable_vgpr_workitem_id = 1
 
 ; ALL: v_mov_b32_e32 [[ZERO:v[0-9]+]], 0{{$}}
 ; ALL: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[ZERO]]


        


More information about the llvm-commits mailing list