[llvm] d2c817d - [AMDGPU] Fix DynLDS causing crash when LowerLDS is run at fullLTO pipeline (#96038)

via llvm-commits llvm-commits at lists.llvm.org
Fri Jun 28 19:05:51 PDT 2024


Author: Vigneshwar Jayakumar
Date: 2024-06-28T21:05:48-05:00
New Revision: d2c817df846a4fa141778560e2258ab674426f44

URL: https://github.com/llvm/llvm-project/commit/d2c817df846a4fa141778560e2258ab674426f44
DIFF: https://github.com/llvm/llvm-project/commit/d2c817df846a4fa141778560e2258ab674426f44.diff

LOG: [AMDGPU] Fix DynLDS causing crash when LowerLDS is run at fullLTO pipeline (#96038)

Direct mapped dynamic LDS is not lowered in the LowerLDSModule pass.
Hence it is not marked with an absolute symbol. When the LowerLDS pass is
rerun in LTO, compilation fails with an assert "cannot mix abs and non-abs LDVs".
This patch adds an additional check for direct mapped dynLDS to skip the assert.

Fixes SWDEV-454281

Added: 
    llvm/test/CodeGen/AMDGPU/lds-mixed-absolute-dynlds.ll

Modified: 
    llvm/lib/Target/AMDGPU/Utils/AMDGPUMemoryUtils.cpp
    llvm/test/CodeGen/AMDGPU/lds-run-twice.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUMemoryUtils.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUMemoryUtils.cpp
index 04c6e940e6ed6..abe0ce375aedd 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUMemoryUtils.cpp
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUMemoryUtils.cpp
@@ -207,7 +207,9 @@ LDSUsesInfoTy getTransitiveUsesOfLDS(const CallGraph &CG, Module &M) {
   }
 
   // Verify that we fall into one of 2 cases:
-  //    - All variables are absolute: this is a re-run of the pass
+  //    - All variables are either absolute 
+  //      or direct mapped dynamic LDS that is not lowered.
+  //      this is a re-run of the pass
   //      so we don't have anything to do.
   //    - No variables are absolute.
   std::optional<bool> HasAbsoluteGVs;
@@ -215,6 +217,9 @@ LDSUsesInfoTy getTransitiveUsesOfLDS(const CallGraph &CG, Module &M) {
     for (auto &[Fn, GVs] : Map) {
       for (auto *GV : GVs) {
         bool IsAbsolute = GV->isAbsoluteSymbolRef();
+        bool IsDirectMapDynLDSGV = AMDGPU::isDynamicLDS(*GV) && DirectMapKernel.contains(Fn);
+        if (IsDirectMapDynLDSGV)
+          continue;
         if (HasAbsoluteGVs.has_value()) {
           if (*HasAbsoluteGVs != IsAbsolute) {
             report_fatal_error(

diff  --git a/llvm/test/CodeGen/AMDGPU/lds-mixed-absolute-dynlds.ll b/llvm/test/CodeGen/AMDGPU/lds-mixed-absolute-dynlds.ll
new file mode 100644
index 0000000000000..1c3734ef4d001
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/lds-mixed-absolute-dynlds.ll
@@ -0,0 +1,25 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -S -mtriple=amdgcn-- -amdgpu-lower-module-lds < %s 2>&1 | FileCheck %s
+; RUN: opt -S -mtriple=amdgcn-- -passes=amdgpu-lower-module-lds < %s 2>&1 | FileCheck %s
+
+; Dynamic LDS that are direct mapped are not lowered in LowerModuleLDS pass.
+; In such cases, LowerModuleLDS is free to leave it in and ignore it, and we want to make sure
+; LowerModuleLDS doesn't crash if it re-runs on such modules.
+
+ at loweredlds = addrspace(3) global i32 poison, !absolute_symbol !0
+ at dynlds = external addrspace(3) global [0 x i32]
+
+define amdgpu_kernel void @kern(i32 %val0) {
+; CHECK-LABEL: define amdgpu_kernel void @kern(
+; CHECK-SAME: i32 [[VAL0:%.*]]) {
+; CHECK-NEXT:    store i32 0, ptr addrspace(3) @loweredlds, align 4
+; CHECK-NEXT:    store i32 1, ptr addrspace(3) @dynlds, align 4
+; CHECK-NEXT:    ret void
+;
+  store i32 0, ptr addrspace(3) @loweredlds
+  store i32 1, ptr addrspace(3) @dynlds
+  ret void
+}
+
+
+!0 = !{i32 0, i32 1}

diff  --git a/llvm/test/CodeGen/AMDGPU/lds-run-twice.ll b/llvm/test/CodeGen/AMDGPU/lds-run-twice.ll
index e121f0da327d0..615f1e3b7fb38 100644
--- a/llvm/test/CodeGen/AMDGPU/lds-run-twice.ll
+++ b/llvm/test/CodeGen/AMDGPU/lds-run-twice.ll
@@ -7,10 +7,12 @@
 ; Check AMDGPULowerModuleLDS can run more than once on the same module, and that
 ; the second run is a no-op.
 
+ at dynlds = external addrspace(3) global [0 x i32], align 4
 @lds = internal unnamed_addr addrspace(3) global i32 undef, align 4
 
 define amdgpu_kernel void @test() {
 entry:
+  store i32 0, ptr addrspace(3) @dynlds
   store i32 1, ptr addrspace(3) @lds
   ret void
 }


        


More information about the llvm-commits mailing list