[clang] [llvm] [AMDGPU] Infer amdgpu-no-flat-scratch-init attribute in AMDGPUAttributor (PR #94647)

Jun Wang via cfe-commits cfe-commits at lists.llvm.org
Tue Oct 29 15:45:45 PDT 2024


================
@@ -683,6 +706,59 @@ struct AAAMDAttributesFunction : public AAAMDAttributes {
     return !A.checkForAllCallLikeInstructions(DoesNotRetrieve, *this,
                                               UsedAssumedInformation);
   }
+
+  // Returns true if FlatScratchInit is needed, i.e., no-flat-scratch-init is
+  // not to be set.
+  bool needFlatScratchInit(Attributor &A) {
+    assert(isAssumed(FLAT_SCRATCH_INIT)); // only called if the bit is still set
+
+    // This is called on each callee; false means callee shouldn't have
+    // no-flat-scratch-init.
+    auto CheckForNoFlatScratchInit = [&](Instruction &I) {
+      const auto &CB = cast<CallBase>(I);
+      const Function *Callee = CB.getCalledFunction();
+
+      // Callee == 0 for inline asm or indirect call with known callees.
+      // In the latter case, updateImpl() already checked the callees and we
+      // know their FLAT_SCRATCH_INIT bit is set.
+      // If function has indirect call with unknown callees, the bit is
+      // already removed in updateImpl() and execution won't reach here.
+      if (!Callee)
+        return true;
+
+      return Callee->getIntrinsicID() !=
+             Intrinsic::amdgcn_addrspacecast_nonnull;
+    };
+
+    bool UsedAssumedInformation = false;
+    // If any callee is false (i.e. need FlatScratchInit),
+    // checkForAllCallLikeInstructions returns false, in which case this
+    // function returns true.
+    return !A.checkForAllCallLikeInstructions(CheckForNoFlatScratchInit, *this,
+                                              UsedAssumedInformation);
+  }
+
+  bool constHasASCast(const Constant *C,
+                      SmallPtrSetImpl<const Constant *> &Visited) {
+    if (!Visited.insert(C).second)
+      return false;
+
+    if (const auto *CE = dyn_cast<ConstantExpr>(C))
+      if (CE->getOpcode() == Instruction::AddrSpaceCast &&
+          CE->getOperand(0)->getType()->getPointerAddressSpace() ==
+              AMDGPUAS::PRIVATE_ADDRESS)
+        return true;
+
+    for (const Use &U : C->operands()) {
+      const auto *OpC = dyn_cast<Constant>(U);
+      if (!OpC || !Visited.insert(OpC).second)
+        continue;
+
+      if (constHasASCast(OpC, Visited))
+        return true;
+    }
+    return false;
+  }
----------------
jwanggit86 wrote:

> I do not want to duplicate the same function that already exists for the LDS case. Unify these.
> 
> We also should try to avoid doing this walk over all instructions through all constant expressions twice for the two attributes

The latest commit creates a new function in AMDGPUInformationCache that makes use of the existing `getConstantAccess()`, so the code in `getConstantAccess()` is not duplicated. However, the two attributes still walk over all instructions to check on constants separately. To unify these two walks would require the walk to happen earlier and the results (all the constants) be collected. Pls let me know your thoughts on this.

https://github.com/llvm/llvm-project/pull/94647


More information about the cfe-commits mailing list