[clang] [NVPTX] Add support for maxclusterrank in launch_bounds (PR #66496)

Artem Belevich via cfe-commits cfe-commits at lists.llvm.org
Fri Sep 22 13:48:56 PDT 2023


================
@@ -537,59 +537,46 @@ void NVPTXAsmPrinter::emitKernelFunctionDirectives(const Function &F,
                                                    raw_ostream &O) const {
   // If the NVVM IR has some of reqntid* specified, then output
   // the reqntid directive, and set the unspecified ones to 1.
-  // If none of reqntid* is specified, don't output reqntid directive.
-  unsigned reqntidx, reqntidy, reqntidz;
-  bool specified = false;
-  if (!getReqNTIDx(F, reqntidx))
-    reqntidx = 1;
-  else
-    specified = true;
-  if (!getReqNTIDy(F, reqntidy))
-    reqntidy = 1;
-  else
-    specified = true;
-  if (!getReqNTIDz(F, reqntidz))
-    reqntidz = 1;
-  else
-    specified = true;
-
-  if (specified)
-    O << ".reqntid " << reqntidx << ", " << reqntidy << ", " << reqntidz
+  // If none of Reqntid* is specified, don't output reqntid directive.
+  unsigned Reqntidx, Reqntidy, Reqntidz;
+  Reqntidx = Reqntidy = Reqntidz = 1;
+  bool ReqSpecified = false;
+  ReqSpecified |= getReqNTIDx(F, Reqntidx);
+  ReqSpecified |= getReqNTIDy(F, Reqntidy);
+  ReqSpecified |= getReqNTIDz(F, Reqntidz);
+
+  if (ReqSpecified)
+    O << ".reqntid " << Reqntidx << ", " << Reqntidy << ", " << Reqntidz
       << "\n";
 
   // If the NVVM IR has some of maxntid* specified, then output
   // the maxntid directive, and set the unspecified ones to 1.
   // If none of maxntid* is specified, don't output maxntid directive.
-  unsigned maxntidx, maxntidy, maxntidz;
-  specified = false;
-  if (!getMaxNTIDx(F, maxntidx))
-    maxntidx = 1;
-  else
-    specified = true;
-  if (!getMaxNTIDy(F, maxntidy))
-    maxntidy = 1;
-  else
-    specified = true;
-  if (!getMaxNTIDz(F, maxntidz))
-    maxntidz = 1;
-  else
-    specified = true;
-
-  if (specified)
-    O << ".maxntid " << maxntidx << ", " << maxntidy << ", " << maxntidz
+  unsigned Maxntidx, Maxntidy, Maxntidz;
+  Maxntidx = Maxntidy = Maxntidz = 1;
+  bool MaxSpecified = false;
+  MaxSpecified |= getMaxNTIDx(F, Maxntidx);
+  MaxSpecified |= getMaxNTIDy(F, Maxntidy);
+  MaxSpecified |= getMaxNTIDz(F, Maxntidz);
+
+  if (MaxSpecified)
+    O << ".maxntid " << Maxntidx << ", " << Maxntidy << ", " << Maxntidz
       << "\n";
 
-  unsigned mincta;
-  if (getMinCTASm(F, mincta))
-    O << ".minnctapersm " << mincta << "\n";
+  unsigned Mincta = 0;
+  if (getMinCTASm(F, Mincta))
+    O << ".minnctapersm " << Mincta << "\n";
 
-  unsigned maxnreg;
-  if (getMaxNReg(F, maxnreg))
-    O << ".maxnreg " << maxnreg << "\n";
+  unsigned Maxnreg = 0;
+  if (getMaxNReg(F, Maxnreg))
+    O << ".maxnreg " << Maxnreg << "\n";
+
+  unsigned Maxclusterrank = 0;
----------------
Artem-B wrote:

We do not have a good way to issue any diagnostics from LLVM, so the choice would be to either reject the IR as invalid, or make an effort to compile to valid PTX. Right now we're neither here nor there. 

I'd be fine with either of the options above. That said, ignoring metadata which we can't apply seems OK to me.




https://github.com/llvm/llvm-project/pull/66496


More information about the cfe-commits mailing list