[llvm] [AMDGPU][Attributor] Infer `inreg` attribute in `AMDGPUAttributor` (PR #101609)

Wed May 21 01:58:46 PDT 2025

================
@@ -1299,6 +1301,111 @@ struct AAAMDGPUNoAGPR
 
 const char AAAMDGPUNoAGPR::ID = 0;
 
+struct AAAMDGPUUniform : public StateWrapper<BooleanState, AbstractAttribute> {
+  using Base = StateWrapper<BooleanState, AbstractAttribute>;
+  AAAMDGPUUniform(const IRPosition &IRP, Attributor &A) : Base(IRP) {}
+
+  /// Create an abstract attribute view for the position \p IRP.
+  static AAAMDGPUUniform &createForPosition(const IRPosition &IRP,
+                                            Attributor &A);
+
+  /// See AbstractAttribute::getName()
+  const std::string getName() const override { return "AAAMDGPUUniform"; }
+
+  const std::string getAsStr(Attributor *A) const override {
+    return getAssumed() ? "inreg" : "non-inreg";
+  }
+
+  void trackStatistics() const override {}
+
+  /// See AbstractAttribute::getIdAddr()
+  const char *getIdAddr() const override { return &ID; }
+
+  /// This function should return true if the type of the \p AA is
+  /// AAAMDGPUUniform
+  static bool classof(const AbstractAttribute *AA) {
+    return (AA->getIdAddr() == &ID);
+  }
+
+  /// Unique ID (due to the unique address)
+  static const char ID;
+};
+
+const char AAAMDGPUUniform::ID = 0;
+
+struct AAAMDGPUUniformArgument : public AAAMDGPUUniform {
+  AAAMDGPUUniformArgument(const IRPosition &IRP, Attributor &A)
+      : AAAMDGPUUniform(IRP, A) {}
+
+  void initialize(Attributor &A) override {
+    Argument *Arg = getAssociatedArgument();
+    if (Arg->hasAttribute(Attribute::InReg) ||
+        AMDGPU::isEntryFunctionCC(Arg->getParent()->getCallingConv()))
+      indicateOptimisticFixpoint();
+  }
+
+  ChangeStatus updateImpl(Attributor &A) override {
+    unsigned ArgNo = getAssociatedArgument()->getArgNo();
+
+    auto isUniform = [&](AbstractCallSite ACS) -> bool {
+      CallBase *CB = ACS.getInstruction();
+      Value *V = CB->getArgOperandUse(ArgNo);
+      if (isa<Constant>(V))
+        return true;
+      Function *F = nullptr;
+      if (auto *Arg = dyn_cast<Argument>(V)) {
+        auto *AA =
+            A.getOrCreateAAFor<AAAMDGPUUniform>(IRPosition::argument(*Arg));
+        if (AA)
+          return AA->isValidState();
+        F = Arg->getParent();
+      } else if (auto *I = dyn_cast<Instruction>(V)) {
+        F = I->getFunction();
+      }
+
+      if (F) {
+        auto *UA =
+            A.getInfoCache()
+                .getAnalysisResultForFunction<UniformityInfoAnalysis>(*F);
+        return UA && UA->isUniform(V);
----------------
arsenm wrote:

I still think the initial implementation of this should *not* use uniformity analysis. We can add this back in later. For now, I think it will be significantly less trouble to only handle the simple isTriviallyUniform cases. That will avoid nearly all of the downstream codegen failures we'll have wrt to not emitting waterfall loops.

Another possible intermediate step is to insert readfirstlane calls on the outgoing argument 

https://github.com/llvm/llvm-project/pull/101609