[llvm] 25c5da5 - AMDGPU Reduce reported maximum group size to 1024

Tue Nov 12 17:41:49 PST 2019

Author: Matt Arsenault
Date: 2019-11-13T06:34:28+05:30
New Revision: 25c5da5a426168b38fb3e9baa918faa75e4a92b4

URL: https://github.com/llvm/llvm-project/commit/25c5da5a426168b38fb3e9baa918faa75e4a92b4
DIFF: https://github.com/llvm/llvm-project/commit/25c5da5a426168b38fb3e9baa918faa75e4a92b4.diff

LOG: AMDGPU Reduce reported maximum group size to 1024

While some targets allow encoding 2048, this was never tested or
supported.

Added: 
    

Modified: 
    llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
    llvm/test/CodeGen/AMDGPU/attr-amdgpu-flat-work-group-size-v3.ll
    llvm/test/CodeGen/AMDGPU/attr-amdgpu-flat-work-group-size.ll
    llvm/test/CodeGen/AMDGPU/large-work-group-promote-alloca.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
index 7d27738bf6a3..c72f93eb739c 100644

--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
@@ -312,7 +312,8 @@ unsigned getMinFlatWorkGroupSize(const MCSubtargetInfo *STI) {
 }
 
 unsigned getMaxFlatWorkGroupSize(const MCSubtargetInfo *STI) {
-  return 2048;
+  // Some subtargets allow encoding 2048, but this isn't tested or supported.
+  return 1024;
 }
 
 unsigned getWavesPerWorkGroup(const MCSubtargetInfo *STI,

diff  --git a/llvm/test/CodeGen/AMDGPU/attr-amdgpu-flat-work-group-size-v3.ll b/llvm/test/CodeGen/AMDGPU/attr-amdgpu-flat-work-group-size-v3.ll
index e57ce963e3c6..d11919929564 100644
--- a/llvm/test/CodeGen/AMDGPU/attr-amdgpu-flat-work-group-size-v3.ll
+++ b/llvm/test/CodeGen/AMDGPU/attr-amdgpu-flat-work-group-size-v3.ll
@@ -34,13 +34,13 @@ entry:
 }
 attributes #2 = {"amdgpu-flat-work-group-size"="128,128"}
 
-; CHECK-LABEL: {{^}}min_1024_max_2048
-; CHECK: SGPRBlocks: 1
-; CHECK: VGPRBlocks: 7
-; CHECK: NumSGPRsForWavesPerEU: 12
-; CHECK: NumVGPRsForWavesPerEU: 32
+; CHECK-LABEL: {{^}}min_1024_max_1024
+; CHECK: SGPRBlocks: 0
+; CHECK: VGPRBlocks: 10
+; CHECK: NumSGPRsForWavesPerEU: 2{{$}}
+; CHECK: NumVGPRsForWavesPerEU: 43
 @var = addrspace(1) global float 0.0
-define amdgpu_kernel void @min_1024_max_2048() #3 {
+define amdgpu_kernel void @min_1024_max_1024() #3 {
   %val0 = load volatile float, float addrspace(1)* @var
   %val1 = load volatile float, float addrspace(1)* @var
   %val2 = load volatile float, float addrspace(1)* @var
@@ -127,7 +127,7 @@ define amdgpu_kernel void @min_1024_max_2048() #3 {
 
   ret void
 }
-attributes #3 = {"amdgpu-flat-work-group-size"="1024,2048"}
+attributes #3 = {"amdgpu-flat-work-group-size"="1024,1024"}
 
 ; CHECK: amdhsa.kernels:
 ; CHECK:   .max_flat_workgroup_size: 64
@@ -136,8 +136,8 @@ attributes #3 = {"amdgpu-flat-work-group-size"="1024,2048"}
 ; CHECK:   .name:                 min_64_max_128
 ; CHECK:   .max_flat_workgroup_size: 128
 ; CHECK:   .name:                 min_128_max_128
-; CHECK:   .max_flat_workgroup_size: 2048
-; CHECK:   .name:                 min_1024_max_2048
+; CHECK:   .max_flat_workgroup_size: 1024
+; CHECK:   .name:                 min_1024_max_1024
 ; CHECK: amdhsa.version:
 ; CHECK:   - 1
 ; CHECK:   - 0

diff  --git a/llvm/test/CodeGen/AMDGPU/attr-amdgpu-flat-work-group-size.ll b/llvm/test/CodeGen/AMDGPU/attr-amdgpu-flat-work-group-size.ll
index d51e2d6e938b..f372fcb42667 100644
--- a/llvm/test/CodeGen/AMDGPU/attr-amdgpu-flat-work-group-size.ll
+++ b/llvm/test/CodeGen/AMDGPU/attr-amdgpu-flat-work-group-size.ll
@@ -34,13 +34,13 @@ entry:
 }
 attributes #2 = {"amdgpu-flat-work-group-size"="128,128"}
 
-; CHECK-LABEL: {{^}}min_1024_max_2048
-; CHECK: SGPRBlocks: 1
-; CHECK: VGPRBlocks: 7
-; CHECK: NumSGPRsForWavesPerEU: 12
-; CHECK: NumVGPRsForWavesPerEU: 32
+; CHECK-LABEL: {{^}}min_1024_max_1024
+; CHECK: SGPRBlocks: 0
+; CHECK: VGPRBlocks: 10
+; CHECK: NumSGPRsForWavesPerEU: 2{{$}}
+; CHECK: NumVGPRsForWavesPerEU: 43
 @var = addrspace(1) global float 0.0
-define amdgpu_kernel void @min_1024_max_2048() #3 {
+define amdgpu_kernel void @min_1024_max_1024() #3 {
   %val0 = load volatile float, float addrspace(1)* @var
   %val1 = load volatile float, float addrspace(1)* @var
   %val2 = load volatile float, float addrspace(1)* @var
@@ -127,7 +127,7 @@ define amdgpu_kernel void @min_1024_max_2048() #3 {
 
   ret void
 }
-attributes #3 = {"amdgpu-flat-work-group-size"="1024,2048"}
+attributes #3 = {"amdgpu-flat-work-group-size"="1024,1024"}
 
 ; HSAMD: NT_AMD_AMDGPU_HSA_METADATA (HSA Metadata)
 ; HSAMD: Version: [ 1, 0 ]
@@ -138,5 +138,5 @@ attributes #3 = {"amdgpu-flat-work-group-size"="1024,2048"}
 ; HSAMD:   MaxFlatWorkGroupSize: 128
 ; HSAMD: - Name:                 min_128_max_128
 ; HSAMD:   MaxFlatWorkGroupSize: 128
-; HSAMD: - Name:                 min_1024_max_2048
-; HSAMD:   MaxFlatWorkGroupSize: 2048
+; HSAMD: - Name:                 min_1024_max_1024
+; HSAMD:   MaxFlatWorkGroupSize: 1024

diff  --git a/llvm/test/CodeGen/AMDGPU/large-work-group-promote-alloca.ll b/llvm/test/CodeGen/AMDGPU/large-work-group-promote-alloca.ll
index 0702f4091c2b..778e9ed9a8f1 100644
--- a/llvm/test/CodeGen/AMDGPU/large-work-group-promote-alloca.ll
+++ b/llvm/test/CodeGen/AMDGPU/large-work-group-promote-alloca.ll
@@ -47,8 +47,9 @@ entry:
   ret void
 }
 
-; SICI: @promote_alloca_size_1600.stack = internal unnamed_addr addrspace(3) global [1600 x [5 x i32]] undef, align 4
-; GFX10: alloca [5 x i32]
+; SI-NOT: @promote_alloca_size_1600.stack
+; CI: @promote_alloca_size_1600.stack = internal unnamed_addr addrspace(3) global [1024 x [5 x i32]] undef, align 4
+; GFX10: @promote_alloca_size_1600.stack = internal unnamed_addr addrspace(3) global [1024 x [5 x i32]] undef, align 4
 
 define amdgpu_kernel void @promote_alloca_size_1600(i32 addrspace(1)* nocapture %out, i32 addrspace(1)* nocapture %in) #2 {
 entry:
@@ -274,7 +275,7 @@ entry:
 
 attributes #0 = { nounwind "amdgpu-flat-work-group-size"="63,63" }
 attributes #1 = { nounwind "amdgpu-waves-per-eu"="1,3" "amdgpu-flat-work-group-size"="256,256" }
-attributes #2 = { nounwind "amdgpu-waves-per-eu"="1,1" "amdgpu-flat-work-group-size"="1600,1600" }
+attributes #2 = { nounwind "amdgpu-waves-per-eu"="1,1" "amdgpu-flat-work-group-size"="1024,1024" }
 attributes #3 = { nounwind "amdgpu-waves-per-eu"="1,10" }
 attributes #4 = { nounwind "amdgpu-waves-per-eu"="1,10" }
 attributes #5 = { nounwind "amdgpu-waves-per-eu"="1,6" "amdgpu-flat-work-group-size"="64,64" }