[PATCH] D98345: [AMDGPU] Don't check hasStackObjects() when reserving VGPR

Ruiling, Song via Phabricator via llvm-commits llvm-commits at lists.llvm.org
Wed Mar 10 06:22:14 PST 2021


ruiling created this revision.
ruiling added a reviewer: arsenm.
Herald added subscribers: kerbowa, hiraditya, t-tye, tpr, dstuttard, yaxunl, nhaehnle, jvesely, kzhuravl, qcolombet.
ruiling requested review of this revision.
Herald added subscribers: llvm-commits, wdng.
Herald added a project: LLVM.

We have amdgpu_gfx functions that have high register pressure. If
we do not reserve VGPR for SGPR spill, we will fall into the path
to spill the SGPR to memory, which does not only have correctness issue,
but also have really bad performance.

I don't know why there is the check for hasStackObjects(), in our case,
we don't have stack objects at the time of finalizeLowering(). So just
remove the check that we always reserve a VGPR for possible SGPR spill
in non-entry functions.


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D98345

Files:
  llvm/lib/Target/AMDGPU/SIISelLowering.cpp
  llvm/test/CodeGen/AMDGPU/reserve-vgpr-for-sgpr-spill.ll


Index: llvm/test/CodeGen/AMDGPU/reserve-vgpr-for-sgpr-spill.ll
===================================================================
--- llvm/test/CodeGen/AMDGPU/reserve-vgpr-for-sgpr-spill.ll
+++ llvm/test/CodeGen/AMDGPU/reserve-vgpr-for-sgpr-spill.ll
@@ -187,4 +187,51 @@
   ret void
 }
 
+; GCN-LABEL: {{^}}reserve_vgpr_for_sgpr_spills_no_alloca:
+; GCN:  v_writelane_b32 v5, s34, 0
+; GCN:  v_writelane_b32 v5, s35, 1
+; GCN:  v_writelane_b32 v5, s36, 2
+; GCN:  v_writelane_b32 v5, s37, 3
+; GCN:  v_readlane_b32 s37, v5, 3
+; GCN:  v_readlane_b32 s36, v5, 2
+; GCN:  v_readlane_b32 s35, v5, 1
+; GCN:  v_readlane_b32 s34, v5, 0
+
+define void @reserve_vgpr_for_sgpr_spills_no_alloca(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) #0 {
+  %a = load <4 x i32>, <4 x i32> addrspace(1)* %in
+  call void asm sideeffect "",
+  "~{v6},~{v7},~{v8},~{v9}
+  ,~{v10},~{v11},~{v12},~{v13},~{v14},~{v15},~{v16},~{v17},~{v18},~{v19}
+  ,~{v20},~{v21},~{v22},~{v23},~{v24},~{v25},~{v26},~{v27},~{v28},~{v29}
+  ,~{v30},~{v31},~{v32},~{v33},~{v34},~{v35},~{v36},~{v37},~{v38},~{v39}
+  ,~{v40},~{v41},~{v42},~{v43},~{v44},~{v45},~{v46},~{v47},~{v48},~{v49}
+  ,~{v50},~{v51},~{v52},~{v53},~{v54},~{v55},~{v56},~{v57},~{v58},~{v59}
+  ,~{v60},~{v61},~{v62},~{v63},~{v64},~{v65},~{v66},~{v67},~{v68},~{v69}
+  ,~{v70},~{v71},~{v72},~{v73},~{v74},~{v75},~{v76},~{v77},~{v78},~{v79}
+  ,~{v80},~{v81},~{v82},~{v83},~{v84},~{v85},~{v86},~{v87},~{v88},~{v89}
+  ,~{v90},~{v91},~{v92},~{v93},~{v94},~{v95},~{v96},~{v97},~{v98},~{v99}
+  ,~{v100},~{v101},~{v102},~{v103},~{v104},~{v105},~{v106},~{v107},~{v108},~{v109}
+  ,~{v110},~{v111},~{v112},~{v113},~{v114},~{v115},~{v116},~{v117},~{v118},~{v119}
+  ,~{v120},~{v121},~{v122},~{v123},~{v124},~{v125},~{v126},~{v127},~{v128},~{v129}
+  ,~{v130},~{v131},~{v132},~{v133},~{v134},~{v135},~{v136},~{v137},~{v138},~{v139}
+  ,~{v140},~{v141},~{v142},~{v143},~{v144},~{v145},~{v146},~{v147},~{v148},~{v149}
+  ,~{v150},~{v151},~{v152},~{v153},~{v154},~{v155},~{v156},~{v157},~{v158},~{v159}
+  ,~{v160},~{v161},~{v162},~{v163},~{v164},~{v165},~{v166},~{v167},~{v168},~{v169}
+  ,~{v170},~{v171},~{v172},~{v173},~{v174},~{v175},~{v176},~{v177},~{v178},~{v179}
+  ,~{v180},~{v181},~{v182},~{v183},~{v184},~{v185},~{v186},~{v187},~{v188},~{v189}
+  ,~{v190},~{v191},~{v192},~{v193},~{v194},~{v195},~{v196},~{v197},~{v198},~{v199}
+  ,~{v200},~{v201},~{v202},~{v203},~{v204},~{v205},~{v206},~{v207},~{v208},~{v209}
+  ,~{v210},~{v211},~{v212},~{v213},~{v214},~{v215},~{v216},~{v217},~{v218},~{v219}
+  ,~{v220},~{v221},~{v222},~{v223},~{v224},~{v225},~{v226},~{v227},~{v228},~{v229}
+  ,~{v230},~{v231},~{v232},~{v233},~{v234},~{v235},~{v236},~{v237},~{v238},~{v239}
+  ,~{v240},~{v241},~{v242},~{v243},~{v244},~{v245},~{v246},~{v247},~{v248},~{v249}
+  ,~{v250},~{v251},~{v252},~{v253},~{v254},~{v255}" () #0
+
+  call void asm sideeffect "",
+  "~{s34},~{s35},~{s36},~{s37}" () #0
+
+  store <4 x i32> %a, <4 x i32> addrspace(1)* %out
+  ret void
+}
+
 attributes #0 = { nounwind noinline norecurse "amdgpu-flat-work-group-size"="1,256" }
Index: llvm/lib/Target/AMDGPU/SIISelLowering.cpp
===================================================================
--- llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -11649,8 +11649,7 @@
   // "amdgpu-reserve-vgpr-for-sgpr-spill" option is used
   // FIXME: We won't need this hack if we split SGPR allocation from VGPR
   if (VGPRReserveforSGPRSpill && TRI->spillSGPRToVGPR() &&
-      !Info->VGPRReservedForSGPRSpill && !Info->isEntryFunction() &&
-      MF.getFrameInfo().hasStackObjects())
+      !Info->VGPRReservedForSGPRSpill && !Info->isEntryFunction())
     Info->reserveVGPRforSGPRSpills(MF);
 }
 


-------------- next part --------------
A non-text attachment was scrubbed...
Name: D98345.329644.patch
Type: text/x-patch
Size: 3733 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20210310/3942c676/attachment.bin>


More information about the llvm-commits mailing list