[clang] [NVPTX] Add support for maxclusterrank in launch_bounds (PR #66496)
Jakub Chlanda via cfe-commits
cfe-commits at lists.llvm.org
Fri Sep 22 03:05:55 PDT 2023
================
@@ -537,59 +537,46 @@ void NVPTXAsmPrinter::emitKernelFunctionDirectives(const Function &F,
raw_ostream &O) const {
// If the NVVM IR has some of reqntid* specified, then output
// the reqntid directive, and set the unspecified ones to 1.
- // If none of reqntid* is specified, don't output reqntid directive.
- unsigned reqntidx, reqntidy, reqntidz;
- bool specified = false;
- if (!getReqNTIDx(F, reqntidx))
- reqntidx = 1;
- else
- specified = true;
- if (!getReqNTIDy(F, reqntidy))
- reqntidy = 1;
- else
- specified = true;
- if (!getReqNTIDz(F, reqntidz))
- reqntidz = 1;
- else
- specified = true;
-
- if (specified)
- O << ".reqntid " << reqntidx << ", " << reqntidy << ", " << reqntidz
+ // If none of Reqntid* is specified, don't output reqntid directive.
+ unsigned Reqntidx, Reqntidy, Reqntidz;
+ Reqntidx = Reqntidy = Reqntidz = 1;
+ bool ReqSpecified = false;
+ ReqSpecified |= getReqNTIDx(F, Reqntidx);
+ ReqSpecified |= getReqNTIDy(F, Reqntidy);
+ ReqSpecified |= getReqNTIDz(F, Reqntidz);
+
+ if (ReqSpecified)
+ O << ".reqntid " << Reqntidx << ", " << Reqntidy << ", " << Reqntidz
<< "\n";
// If the NVVM IR has some of maxntid* specified, then output
// the maxntid directive, and set the unspecified ones to 1.
// If none of maxntid* is specified, don't output maxntid directive.
- unsigned maxntidx, maxntidy, maxntidz;
- specified = false;
- if (!getMaxNTIDx(F, maxntidx))
- maxntidx = 1;
- else
- specified = true;
- if (!getMaxNTIDy(F, maxntidy))
- maxntidy = 1;
- else
- specified = true;
- if (!getMaxNTIDz(F, maxntidz))
- maxntidz = 1;
- else
- specified = true;
-
- if (specified)
- O << ".maxntid " << maxntidx << ", " << maxntidy << ", " << maxntidz
+ unsigned Maxntidx, Maxntidy, Maxntidz;
+ Maxntidx = Maxntidy = Maxntidz = 1;
+ bool MaxSpecified = false;
+ MaxSpecified |= getMaxNTIDx(F, Maxntidx);
+ MaxSpecified |= getMaxNTIDy(F, Maxntidy);
+ MaxSpecified |= getMaxNTIDz(F, Maxntidz);
+
+ if (MaxSpecified)
+ O << ".maxntid " << Maxntidx << ", " << Maxntidy << ", " << Maxntidz
<< "\n";
- unsigned mincta;
- if (getMinCTASm(F, mincta))
- O << ".minnctapersm " << mincta << "\n";
+ unsigned Mincta = 0;
+ if (getMinCTASm(F, Mincta))
+ O << ".minnctapersm " << Mincta << "\n";
- unsigned maxnreg;
- if (getMaxNReg(F, maxnreg))
- O << ".maxnreg " << maxnreg << "\n";
+ unsigned Maxnreg = 0;
+ if (getMaxNReg(F, Maxnreg))
+ O << ".maxnreg " << Maxnreg << "\n";
+
+ unsigned Maxclusterrank = 0;
----------------
jchlanda wrote:
You are right, `ptxas` reacts to a sample with `.maxclusterrank` with pre SM_90 with a hard error:
```
ptxas --gpu-name sm_75 --output-file cluster_rank.o cluster_rank.s
ptxas cluster_rank.s, line 18; error : Feature '.maxclusterrank' requires .target sm_90 or higher
ptxas fatal : Ptx assembly aborted due to errors
```
Do I understand you right, that you'd like to see a [check similar to what we do in SemaDeclAttr](https://github.com/llvm/llvm-project/pull/66496/files#diff-2a5bdb2d9f07f8d77de51d5403d349c22978141b6de6bd87fc5e449f5ed95becR5683) and filter out the directive on targets < SM_90?
https://github.com/llvm/llvm-project/pull/66496
More information about the cfe-commits
mailing list