[llvm] e8e6817 - [AMDGPU] Don't check hasStackObjects() when reserving VGPR

Thu Mar 11 16:11:39 PST 2021

Author: Ruiling Song
Date: 2021-03-12T08:11:14+08:00
New Revision: e8e6817d00a4dd1ba563a96aec2fd2be8f35dea9

URL: https://github.com/llvm/llvm-project/commit/e8e6817d00a4dd1ba563a96aec2fd2be8f35dea9
DIFF: https://github.com/llvm/llvm-project/commit/e8e6817d00a4dd1ba563a96aec2fd2be8f35dea9.diff

LOG: [AMDGPU] Don't check hasStackObjects() when reserving VGPR

We have amdgpu_gfx functions that have high register pressure. If
we do not reserve VGPR for SGPR spill, we will fall into the path
to spill the SGPR to memory, which does not only have correctness issue,
but also have really bad performance.

I don't know why there is the check for hasStackObjects(), in our case,
we don't have stack objects at the time of finalizeLowering(). So just
remove the check that we always reserve a VGPR for possible SGPR spill
in non-entry functions.

Reviewed by: arsenm

Differential Revision: https://reviews.llvm.org/D98345

Added: 
    

Modified: 
    llvm/lib/Target/AMDGPU/SIISelLowering.cpp
    llvm/test/CodeGen/AMDGPU/reserve-vgpr-for-sgpr-spill.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index 516d272deab3b..3f1549ab7931a 100644

--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -11649,8 +11649,7 @@ void SITargetLowering::finalizeLowering(MachineFunction &MF) const {
   // "amdgpu-reserve-vgpr-for-sgpr-spill" option is used
   // FIXME: We won't need this hack if we split SGPR allocation from VGPR
   if (VGPRReserveforSGPRSpill && TRI->spillSGPRToVGPR() &&
-      !Info->VGPRReservedForSGPRSpill && !Info->isEntryFunction() &&
-      MF.getFrameInfo().hasStackObjects())
+      !Info->VGPRReservedForSGPRSpill && !Info->isEntryFunction())
     Info->reserveVGPRforSGPRSpills(MF);
 }
 

diff  --git a/llvm/test/CodeGen/AMDGPU/reserve-vgpr-for-sgpr-spill.ll b/llvm/test/CodeGen/AMDGPU/reserve-vgpr-for-sgpr-spill.ll
index 7ba977071936d..c0433d25b501a 100644
--- a/llvm/test/CodeGen/AMDGPU/reserve-vgpr-for-sgpr-spill.ll
+++ b/llvm/test/CodeGen/AMDGPU/reserve-vgpr-for-sgpr-spill.ll
@@ -187,4 +187,51 @@ define void @reserve_vgpr_with_tail_call() #0 {
   ret void
 }
 
+; GCN-LABEL: {{^}}reserve_vgpr_for_sgpr_spills_no_alloca:
+; GCN:  v_writelane_b32 v5, s34, 0
+; GCN:  v_writelane_b32 v5, s35, 1
+; GCN:  v_writelane_b32 v5, s36, 2
+; GCN:  v_writelane_b32 v5, s37, 3
+; GCN:  v_readlane_b32 s37, v5, 3
+; GCN:  v_readlane_b32 s36, v5, 2
+; GCN:  v_readlane_b32 s35, v5, 1
+; GCN:  v_readlane_b32 s34, v5, 0
+
+define void @reserve_vgpr_for_sgpr_spills_no_alloca(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) #0 {
+  %a = load <4 x i32>, <4 x i32> addrspace(1)* %in
+  call void asm sideeffect "",
+  "~{v6},~{v7},~{v8},~{v9}
+  ,~{v10},~{v11},~{v12},~{v13},~{v14},~{v15},~{v16},~{v17},~{v18},~{v19}
+  ,~{v20},~{v21},~{v22},~{v23},~{v24},~{v25},~{v26},~{v27},~{v28},~{v29}
+  ,~{v30},~{v31},~{v32},~{v33},~{v34},~{v35},~{v36},~{v37},~{v38},~{v39}
+  ,~{v40},~{v41},~{v42},~{v43},~{v44},~{v45},~{v46},~{v47},~{v48},~{v49}
+  ,~{v50},~{v51},~{v52},~{v53},~{v54},~{v55},~{v56},~{v57},~{v58},~{v59}
+  ,~{v60},~{v61},~{v62},~{v63},~{v64},~{v65},~{v66},~{v67},~{v68},~{v69}
+  ,~{v70},~{v71},~{v72},~{v73},~{v74},~{v75},~{v76},~{v77},~{v78},~{v79}
+  ,~{v80},~{v81},~{v82},~{v83},~{v84},~{v85},~{v86},~{v87},~{v88},~{v89}
+  ,~{v90},~{v91},~{v92},~{v93},~{v94},~{v95},~{v96},~{v97},~{v98},~{v99}
+  ,~{v100},~{v101},~{v102},~{v103},~{v104},~{v105},~{v106},~{v107},~{v108},~{v109}
+  ,~{v110},~{v111},~{v112},~{v113},~{v114},~{v115},~{v116},~{v117},~{v118},~{v119}
+  ,~{v120},~{v121},~{v122},~{v123},~{v124},~{v125},~{v126},~{v127},~{v128},~{v129}
+  ,~{v130},~{v131},~{v132},~{v133},~{v134},~{v135},~{v136},~{v137},~{v138},~{v139}
+  ,~{v140},~{v141},~{v142},~{v143},~{v144},~{v145},~{v146},~{v147},~{v148},~{v149}
+  ,~{v150},~{v151},~{v152},~{v153},~{v154},~{v155},~{v156},~{v157},~{v158},~{v159}
+  ,~{v160},~{v161},~{v162},~{v163},~{v164},~{v165},~{v166},~{v167},~{v168},~{v169}
+  ,~{v170},~{v171},~{v172},~{v173},~{v174},~{v175},~{v176},~{v177},~{v178},~{v179}
+  ,~{v180},~{v181},~{v182},~{v183},~{v184},~{v185},~{v186},~{v187},~{v188},~{v189}
+  ,~{v190},~{v191},~{v192},~{v193},~{v194},~{v195},~{v196},~{v197},~{v198},~{v199}
+  ,~{v200},~{v201},~{v202},~{v203},~{v204},~{v205},~{v206},~{v207},~{v208},~{v209}
+  ,~{v210},~{v211},~{v212},~{v213},~{v214},~{v215},~{v216},~{v217},~{v218},~{v219}
+  ,~{v220},~{v221},~{v222},~{v223},~{v224},~{v225},~{v226},~{v227},~{v228},~{v229}
+  ,~{v230},~{v231},~{v232},~{v233},~{v234},~{v235},~{v236},~{v237},~{v238},~{v239}
+  ,~{v240},~{v241},~{v242},~{v243},~{v244},~{v245},~{v246},~{v247},~{v248},~{v249}
+  ,~{v250},~{v251},~{v252},~{v253},~{v254},~{v255}" () #0
+
+  call void asm sideeffect "",
+  "~{s34},~{s35},~{s36},~{s37}" () #0
+
+  store <4 x i32> %a, <4 x i32> addrspace(1)* %out
+  ret void
+}
+
 attributes #0 = { nounwind noinline norecurse "amdgpu-flat-work-group-size"="1,256" }