[llvm] [AMDGPU][Attributor] Infer `inreg` attribute in `AMDGPUAttributor` (PR #101609)
Matt Arsenault via llvm-commits
llvm-commits at lists.llvm.org
Thu Jun 26 00:22:44 PDT 2025
================
@@ -1295,6 +1298,134 @@ struct AAAMDGPUNoAGPR
const char AAAMDGPUNoAGPR::ID = 0;
+struct AAAMDGPUUniform : public StateWrapper<BooleanState, AbstractAttribute> {
+ using Base = StateWrapper<BooleanState, AbstractAttribute>;
+ AAAMDGPUUniform(const IRPosition &IRP, Attributor &A) : Base(IRP) {}
+
+ /// Create an abstract attribute view for the position \p IRP.
+ static AAAMDGPUUniform &createForPosition(const IRPosition &IRP,
+ Attributor &A);
+
+ /// See AbstractAttribute::getName()
+ StringRef getName() const override { return "AAAMDGPUUniform"; }
+
+ const std::string getAsStr(Attributor *A) const override {
+ return getAssumed() ? "uniform" : "divergent";
+ }
+
+ void trackStatistics() const override {}
+
+ /// See AbstractAttribute::getIdAddr()
+ const char *getIdAddr() const override { return &ID; }
+
+ /// This function should return true if the type of the \p AA is
+ /// AAAMDGPUUniform
+ static bool classof(const AbstractAttribute *AA) {
+ return (AA->getIdAddr() == &ID);
+ }
+
+ /// Unique ID (due to the unique address)
+ static const char ID;
+};
+
+const char AAAMDGPUUniform::ID = 0;
+
+/// This AA is to infer the inreg attribute for a function argument.
+struct AAAMDGPUUniformArgument : public AAAMDGPUUniform {
+ AAAMDGPUUniformArgument(const IRPosition &IRP, Attributor &A)
+ : AAAMDGPUUniform(IRP, A) {}
+
+ void initialize(Attributor &A) override {
+ Argument *Arg = getAssociatedArgument();
+ CallingConv::ID CC = Arg->getParent()->getCallingConv();
+ if (Arg->hasAttribute(Attribute::InReg)) {
+ indicateOptimisticFixpoint();
+ return;
+ }
+ if (AMDGPU::isEntryFunctionCC(CC)) {
+ // We only use isArgPassedInSGPR on kernel entry function argument, so
+ // even if we will use VPGR for inreg i1 argument passing, it will not
+ // affect this.
+ if (AMDGPU::isArgPassedInSGPR(Arg))
+ indicateOptimisticFixpoint();
+ else
+ indicatePessimisticFixpoint();
+ }
+ }
+
+ ChangeStatus updateImpl(Attributor &A) override {
+ unsigned ArgNo = getAssociatedArgument()->getArgNo();
+
+ auto isUniform = [&](AbstractCallSite ACS) -> bool {
+ CallBase *CB = ACS.getInstruction();
+ Value *V = CB->getArgOperandUse(ArgNo);
+ if (isa<Constant>(V))
+ return true;
+ Function *F = nullptr;
+ if (auto *Arg = dyn_cast<Argument>(V)) {
+ auto *AA =
+ A.getOrCreateAAFor<AAAMDGPUUniform>(IRPosition::argument(*Arg));
+ if (AA)
+ return AA->isValidState();
+ F = Arg->getParent();
+ } else if (auto *I = dyn_cast<Instruction>(V)) {
+ F = I->getFunction();
+ }
+
+ if (F) {
+ auto *UA =
+ A.getInfoCache()
+ .getAnalysisResultForFunction<UniformityInfoAnalysis>(*F);
+ return UA && UA->isUniform(V);
+ }
+
+ return false;
+ };
+
+ bool UsedAssumedInformation = true;
+ if (!A.checkForAllCallSites(isUniform, *this, /*RequireAllCallSites=*/true,
+ UsedAssumedInformation))
+ return indicatePessimisticFixpoint();
+
+ if (!UsedAssumedInformation)
+ return indicateOptimisticFixpoint();
+
+ return ChangeStatus::UNCHANGED;
+ }
+
+ ChangeStatus manifest(Attributor &A) override {
+ Argument *Arg = getAssociatedArgument();
+ // If the argument already has inreg attribute, we will not do anything
+ // about it.
+ if (Arg->hasAttribute(Attribute::InReg))
+ return ChangeStatus::UNCHANGED;
+ if (AMDGPU::isEntryFunctionCC(Arg->getParent()->getCallingConv()))
+ return ChangeStatus::UNCHANGED;
+ // We don't directly emit readfirstlane here because it will cause multiple
+ // replacements of a single use in the manifest map, which is not supported
+ // at this moment.
+ // Add both inreg and "uniform" attribute to the argument. We will emit a
+ // readfirstlane at each call site for inreg uniform argument, and the
+ // "uniform" attribute will be removed later.
+ LLVMContext &Ctx = Arg->getContext();
+ return A.manifestAttrs(getIRPosition(),
+ {Attribute::get(Ctx, Attribute::InReg),
+ Attribute::get(Ctx, "uniform")});
----------------
arsenm wrote:
`"uniform"` is a hack on a hack, and is potentially unsafe on later code motion of the callsite. This comment doesn't explain why you would want to insert the readfirstlane in the first place, which is another giant hack.
Once again, I think it would be easiest to restrict this to the trivially uniform case for now. Extending this to arbitrary uniform analysis and the readfirstlanes should be a second step. The trivially uniform case is the most important case, and every problem is going to be in these other cases
https://github.com/llvm/llvm-project/pull/101609
More information about the llvm-commits
mailing list