[clang] [NVPTX] Add support for maxclusterrank in launch_bounds (PR #66496)

Thu Sep 21 09:29:16 PDT 2023

================
@@ -537,59 +537,46 @@ void NVPTXAsmPrinter::emitKernelFunctionDirectives(const Function &F,
                                                    raw_ostream &O) const {
   // If the NVVM IR has some of reqntid* specified, then output
   // the reqntid directive, and set the unspecified ones to 1.
-  // If none of reqntid* is specified, don't output reqntid directive.
-  unsigned reqntidx, reqntidy, reqntidz;
-  bool specified = false;
-  if (!getReqNTIDx(F, reqntidx))
-    reqntidx = 1;
-  else
-    specified = true;
-  if (!getReqNTIDy(F, reqntidy))
-    reqntidy = 1;
-  else
-    specified = true;
-  if (!getReqNTIDz(F, reqntidz))
-    reqntidz = 1;
-  else
-    specified = true;
-
-  if (specified)
-    O << ".reqntid " << reqntidx << ", " << reqntidy << ", " << reqntidz
+  // If none of Reqntid* is specified, don't output reqntid directive.
+  unsigned Reqntidx, Reqntidy, Reqntidz;
+  Reqntidx = Reqntidy = Reqntidz = 1;
+  bool ReqSpecified = false;
+  ReqSpecified |= getReqNTIDx(F, Reqntidx);
+  ReqSpecified |= getReqNTIDy(F, Reqntidy);
+  ReqSpecified |= getReqNTIDz(F, Reqntidz);
+
+  if (ReqSpecified)
+    O << ".reqntid " << Reqntidx << ", " << Reqntidy << ", " << Reqntidz
       << "\n";
 
   // If the NVVM IR has some of maxntid* specified, then output
   // the maxntid directive, and set the unspecified ones to 1.
   // If none of maxntid* is specified, don't output maxntid directive.
-  unsigned maxntidx, maxntidy, maxntidz;
-  specified = false;
-  if (!getMaxNTIDx(F, maxntidx))
-    maxntidx = 1;
-  else
-    specified = true;
-  if (!getMaxNTIDy(F, maxntidy))
-    maxntidy = 1;
-  else
-    specified = true;
-  if (!getMaxNTIDz(F, maxntidz))
-    maxntidz = 1;
-  else
-    specified = true;
-
-  if (specified)
-    O << ".maxntid " << maxntidx << ", " << maxntidy << ", " << maxntidz
+  unsigned Maxntidx, Maxntidy, Maxntidz;
+  Maxntidx = Maxntidy = Maxntidz = 1;
+  bool MaxSpecified = false;
+  MaxSpecified |= getMaxNTIDx(F, Maxntidx);
+  MaxSpecified |= getMaxNTIDy(F, Maxntidy);
+  MaxSpecified |= getMaxNTIDz(F, Maxntidz);
+
+  if (MaxSpecified)
+    O << ".maxntid " << Maxntidx << ", " << Maxntidy << ", " << Maxntidz
       << "\n";
 
-  unsigned mincta;
-  if (getMinCTASm(F, mincta))
-    O << ".minnctapersm " << mincta << "\n";
+  unsigned Mincta = 0;
+  if (getMinCTASm(F, Mincta))
+    O << ".minnctapersm " << Mincta << "\n";
 
-  unsigned maxnreg;
-  if (getMaxNReg(F, maxnreg))
-    O << ".maxnreg " << maxnreg << "\n";
+  unsigned Maxnreg = 0;
+  if (getMaxNReg(F, Maxnreg))
+    O << ".maxnreg " << Maxnreg << "\n";
+
+  unsigned Maxclusterrank = 0;
----------------
Artem-B wrote:

Do we want to ignore this directive if the metadata exists, but we're targeting a pre-sm_90 GPU? 

It may be useful for non-clang LLVM users (e.g XLA) to be able to always specify launch bounds metadata, and let LLVM decide on what it can do with it. Generating the directive for older GPUs would result in ptxas error, while ignoring it would still allow the kernels to compile and work, the same as would be the case if the metadata was correctly absent. I don't think there's not much point to require users to jump through more hoops just to achieve exactly the same result.


https://github.com/llvm/llvm-project/pull/66496