[llvm] [Target][AMDGPU] Fix TSan error on AMDGPU Target. (PR #79529)

via llvm-commits llvm-commits at lists.llvm.org
Thu Jan 25 21:22:29 PST 2024


https://github.com/MaheshRavishankar updated https://github.com/llvm/llvm-project/pull/79529

>From 2f8c098c40e8d8243f3da99556b38958465788f5 Mon Sep 17 00:00:00 2001
From: MaheshRavishankar <mahesh at nod-labs.com>
Date: Wed, 24 Jan 2024 16:12:48 -0800
Subject: [PATCH 1/2] [Target][AMDGPU] Fix TSan error on AMDGPU Target.

Updating the value of the global flag within the code was flagged as a
TSAN error. Fixing that.
---
 .../AMDGPU/AMDGPUResourceUsageAnalysis.cpp    | 28 ++++++++++++-------
 .../AMDGPU/AMDGPUResourceUsageAnalysis.h      |  6 ++--
 2 files changed, 22 insertions(+), 12 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/AMDGPUResourceUsageAnalysis.cpp b/llvm/lib/Target/AMDGPU/AMDGPUResourceUsageAnalysis.cpp
index 0c759e7f3b0957e..3d162794bb4957a 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUResourceUsageAnalysis.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUResourceUsageAnalysis.cpp
@@ -46,12 +46,12 @@ char &llvm::AMDGPUResourceUsageAnalysisID = AMDGPUResourceUsageAnalysis::ID;
 // In code object v4 and older, we need to tell the runtime some amount ahead of
 // time if we don't know the true stack size. Assume a smaller number if this is
 // only due to dynamic / non-entry block allocas.
-static cl::opt<uint32_t> AssumedStackSizeForExternalCall(
+static cl::opt<uint32_t> clAssumedStackSizeForExternalCall(
     "amdgpu-assume-external-call-stack-size",
     cl::desc("Assumed stack use of any external call (in bytes)"), cl::Hidden,
     cl::init(16384));
 
-static cl::opt<uint32_t> AssumedStackSizeForDynamicSizeObjects(
+static cl::opt<uint32_t> clAssumedStackSizeForDynamicSizeObjects(
     "amdgpu-assume-dynamic-stack-object-size",
     cl::desc("Assumed extra stack use if there are any "
              "variable sized objects (in bytes)"),
@@ -112,11 +112,15 @@ bool AMDGPUResourceUsageAnalysis::runOnModule(Module &M) {
 
   // By default, for code object v5 and later, track only the minimum scratch
   // size
+  uint32_t AssumedStackSizeForDynamicSizeObjects =
+      clAssumedStackSizeForDynamicSizeObjects.getValue();
+  uint32_t AssumedStackSizeForExternalCall =
+      clAssumedStackSizeForExternalCall.getValue();
   if (AMDGPU::getAMDHSACodeObjectVersion(M) >= AMDGPU::AMDHSA_COV5 ||
       STI.getTargetTriple().getOS() == Triple::AMDPAL) {
-    if (!AssumedStackSizeForDynamicSizeObjects.getNumOccurrences())
+    if (!clAssumedStackSizeForDynamicSizeObjects.getNumOccurrences())
       AssumedStackSizeForDynamicSizeObjects = 0;
-    if (!AssumedStackSizeForExternalCall.getNumOccurrences())
+    if (!clAssumedStackSizeForExternalCall.getNumOccurrences())
       AssumedStackSizeForExternalCall = 0;
   }
 
@@ -132,7 +136,8 @@ bool AMDGPUResourceUsageAnalysis::runOnModule(Module &M) {
         CallGraphResourceInfo.insert(std::pair(F, SIFunctionResourceInfo()));
     SIFunctionResourceInfo &Info = CI.first->second;
     assert(CI.second && "should only be called once per function");
-    Info = analyzeResourceUsage(*MF, TM);
+    Info = analyzeResourceUsage(*MF, TM, AssumedStackSizeForDynamicSizeObjects,
+                                AssumedStackSizeForExternalCall);
     HasIndirectCall |= Info.HasIndirectCall;
   }
 
@@ -152,7 +157,8 @@ bool AMDGPUResourceUsageAnalysis::runOnModule(Module &M) {
     SIFunctionResourceInfo &Info = CI.first->second;
     MachineFunction *MF = MMI.getMachineFunction(*F);
     assert(MF && "function must have been generated already");
-    Info = analyzeResourceUsage(*MF, TM);
+    Info = analyzeResourceUsage(*MF, TM, AssumedStackSizeForDynamicSizeObjects,
+                                AssumedStackSizeForExternalCall);
     HasIndirectCall |= Info.HasIndirectCall;
   }
 
@@ -164,7 +170,9 @@ bool AMDGPUResourceUsageAnalysis::runOnModule(Module &M) {
 
 AMDGPUResourceUsageAnalysis::SIFunctionResourceInfo
 AMDGPUResourceUsageAnalysis::analyzeResourceUsage(
-    const MachineFunction &MF, const TargetMachine &TM) const {
+    const MachineFunction &MF, const TargetMachine &TM,
+    uint32_t AssumedStackSizeForDynamicSizeObjects,
+    uint32_t AssumedStackSizeForExternalCall) const {
   SIFunctionResourceInfo Info;
 
   const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
@@ -541,9 +549,9 @@ AMDGPUResourceUsageAnalysis::analyzeResourceUsage(
             // directly call the tail called function. If a kernel directly
             // calls a tail recursive function, we'll assume maximum stack size
             // based on the regular call instruction.
-            CalleeFrameSize =
-              std::max(CalleeFrameSize,
-                       static_cast<uint64_t>(AssumedStackSizeForExternalCall));
+            CalleeFrameSize = std::max(
+                CalleeFrameSize,
+                static_cast<uint64_t>(AssumedStackSizeForExternalCall));
           }
         }
 
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUResourceUsageAnalysis.h b/llvm/lib/Target/AMDGPU/AMDGPUResourceUsageAnalysis.h
index df0789e471c16a5..16dcc28bcf88bf4 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUResourceUsageAnalysis.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUResourceUsageAnalysis.h
@@ -72,8 +72,10 @@ struct AMDGPUResourceUsageAnalysis : public ModulePass {
   }
 
 private:
-  SIFunctionResourceInfo analyzeResourceUsage(const MachineFunction &MF,
-                                              const TargetMachine &TM) const;
+  SIFunctionResourceInfo
+  analyzeResourceUsage(const MachineFunction &MF, const TargetMachine &TM,
+                       uint32_t AssumedStackSizeForDynamicSizeObjects,
+                       uint32_t AssumedStackSizeForExternalCall) const;
   void propagateIndirectCallRegisterUsage();
 
   DenseMap<const Function *, SIFunctionResourceInfo> CallGraphResourceInfo;

>From 156c3c420d5d9d01cb5d14ac31cbfdda935bc85b Mon Sep 17 00:00:00 2001
From: MaheshRavishankar <mahesh at nod-labs.com>
Date: Thu, 25 Jan 2024 21:22:05 -0800
Subject: [PATCH 2/2] Address comments.

---
 llvm/lib/Target/AMDGPU/AMDGPUResourceUsageAnalysis.cpp | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/AMDGPUResourceUsageAnalysis.cpp b/llvm/lib/Target/AMDGPU/AMDGPUResourceUsageAnalysis.cpp
index 3d162794bb4957a..326d0fa58dd15fe 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUResourceUsageAnalysis.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUResourceUsageAnalysis.cpp
@@ -113,14 +113,13 @@ bool AMDGPUResourceUsageAnalysis::runOnModule(Module &M) {
   // By default, for code object v5 and later, track only the minimum scratch
   // size
   uint32_t AssumedStackSizeForDynamicSizeObjects =
-      clAssumedStackSizeForDynamicSizeObjects.getValue();
-  uint32_t AssumedStackSizeForExternalCall =
-      clAssumedStackSizeForExternalCall.getValue();
+      clAssumedStackSizeForDynamicSizeObjects;
+  uint32_t AssumedStackSizeForExternalCall = clAssumedStackSizeForExternalCall;
   if (AMDGPU::getAMDHSACodeObjectVersion(M) >= AMDGPU::AMDHSA_COV5 ||
       STI.getTargetTriple().getOS() == Triple::AMDPAL) {
-    if (!clAssumedStackSizeForDynamicSizeObjects.getNumOccurrences())
+    if (clAssumedStackSizeForDynamicSizeObjects.getNumOccurrences() == 0)
       AssumedStackSizeForDynamicSizeObjects = 0;
-    if (!clAssumedStackSizeForExternalCall.getNumOccurrences())
+    if (clAssumedStackSizeForExternalCall.getNumOccurrences() == 0)
       AssumedStackSizeForExternalCall = 0;
   }
 



More information about the llvm-commits mailing list