[llvm] [AMDGPU] Fix DynLDS causing crash when LowerLDS is run at fullLTO pipeline (PR #96038)

Vigneshwar Jayakumar via llvm-commits llvm-commits at lists.llvm.org
Thu Jun 20 09:59:38 PDT 2024


https://github.com/VigneshwarJ updated https://github.com/llvm/llvm-project/pull/96038

>From 93982470d8eee13e886f9a9a4c54bc043d421e71 Mon Sep 17 00:00:00 2001
From: Vigneshwar Jayakumar <vjayakum at amd.com>
Date: Wed, 19 Jun 2024 02:19:12 -0500
Subject: [PATCH 1/2] [AMDGPU] Fix DynLDS causing crash when LowerLDS is run at
 fullLTO pipeline

Direct mapped dynamic LDS is not lowered in the LowerLDSModule pass. Hence
it is not marked with absolute symbol. When lowerLDS pass is rerun in LTO,
compilation fails with assert "cannot mix abs and non-abs LDVs". This patch
adds fix to check if all GVs are absolute or if its non absolute,then
whether it is direct mapped dynLDS, if not fails with the same assert.

Fixes SWDEV-454281
---
 llvm/lib/Target/AMDGPU/Utils/AMDGPUMemoryUtils.cpp | 9 +++++++--
 llvm/test/CodeGen/AMDGPU/lto-lower-module-lds.ll   | 3 ++-
 2 files changed, 9 insertions(+), 3 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUMemoryUtils.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUMemoryUtils.cpp
index 04c6e940e6ed6..68f4f6ed101ed 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUMemoryUtils.cpp
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUMemoryUtils.cpp
@@ -207,7 +207,9 @@ LDSUsesInfoTy getTransitiveUsesOfLDS(const CallGraph &CG, Module &M) {
   }
 
   // Verify that we fall into one of 2 cases:
-  //    - All variables are absolute: this is a re-run of the pass
+  //    - All variables are either absolute 
+  //      or direct mapped dynamic LDS that is not lowered.
+  //      this is a re-run of the pass
   //      so we don't have anything to do.
   //    - No variables are absolute.
   std::optional<bool> HasAbsoluteGVs;
@@ -215,8 +217,11 @@ LDSUsesInfoTy getTransitiveUsesOfLDS(const CallGraph &CG, Module &M) {
     for (auto &[Fn, GVs] : Map) {
       for (auto *GV : GVs) {
         bool IsAbsolute = GV->isAbsoluteSymbolRef();
+        bool IsDirectMapDynLDSGV = AMDGPU::isDynamicLDS(*GV) && DirectMapKernel.contains(Fn);
         if (HasAbsoluteGVs.has_value()) {
-          if (*HasAbsoluteGVs != IsAbsolute) {
+          if (*HasAbsoluteGVs != IsAbsolute ) {
+            if(IsDirectMapDynLDSGV)
+              continue;
             report_fatal_error(
                 "Module cannot mix absolute and non-absolute LDS GVs");
           }
diff --git a/llvm/test/CodeGen/AMDGPU/lto-lower-module-lds.ll b/llvm/test/CodeGen/AMDGPU/lto-lower-module-lds.ll
index f1d946376afe0..c9f4303dfdde3 100644
--- a/llvm/test/CodeGen/AMDGPU/lto-lower-module-lds.ll
+++ b/llvm/test/CodeGen/AMDGPU/lto-lower-module-lds.ll
@@ -39,9 +39,10 @@
 ; CHECK:   Lower uses of LDS variables from non-kernel functions
 
 @lds = internal unnamed_addr addrspace(3) global i32 poison, align 4
-
+ at dynlds = external addrspace(3) global [0 x i32]
 define amdgpu_kernel void @test() {
 entry:
   store i32 1, ptr addrspace(3) @lds
+  store i32 0, ptr addrspace(3) @dynlds
   ret void
 }

>From 3ae54b61f8439cf48e451194ed676b25bd4a8d2c Mon Sep 17 00:00:00 2001
From: Vigneshwar Jayakumar <vjayakum at amd.com>
Date: Thu, 20 Jun 2024 11:47:46 -0500
Subject: [PATCH 2/2] fix review comments

---
 .../Target/AMDGPU/Utils/AMDGPUMemoryUtils.cpp |  6 ++---
 .../AMDGPU/lds-mixed-absolute-dynlds.ll       | 25 +++++++++++++++++++
 llvm/test/CodeGen/AMDGPU/lds-run-twice.ll     |  2 ++
 .../CodeGen/AMDGPU/lto-lower-module-lds.ll    |  3 +--
 4 files changed, 31 insertions(+), 5 deletions(-)
 create mode 100644 llvm/test/CodeGen/AMDGPU/lds-mixed-absolute-dynlds.ll

diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUMemoryUtils.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUMemoryUtils.cpp
index 68f4f6ed101ed..abe0ce375aedd 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUMemoryUtils.cpp
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUMemoryUtils.cpp
@@ -218,10 +218,10 @@ LDSUsesInfoTy getTransitiveUsesOfLDS(const CallGraph &CG, Module &M) {
       for (auto *GV : GVs) {
         bool IsAbsolute = GV->isAbsoluteSymbolRef();
         bool IsDirectMapDynLDSGV = AMDGPU::isDynamicLDS(*GV) && DirectMapKernel.contains(Fn);
+        if (IsDirectMapDynLDSGV)
+          continue;
         if (HasAbsoluteGVs.has_value()) {
-          if (*HasAbsoluteGVs != IsAbsolute ) {
-            if(IsDirectMapDynLDSGV)
-              continue;
+          if (*HasAbsoluteGVs != IsAbsolute) {
             report_fatal_error(
                 "Module cannot mix absolute and non-absolute LDS GVs");
           }
diff --git a/llvm/test/CodeGen/AMDGPU/lds-mixed-absolute-dynlds.ll b/llvm/test/CodeGen/AMDGPU/lds-mixed-absolute-dynlds.ll
new file mode 100644
index 0000000000000..1c3734ef4d001
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/lds-mixed-absolute-dynlds.ll
@@ -0,0 +1,25 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -S -mtriple=amdgcn-- -amdgpu-lower-module-lds < %s 2>&1 | FileCheck %s
+; RUN: opt -S -mtriple=amdgcn-- -passes=amdgpu-lower-module-lds < %s 2>&1 | FileCheck %s
+
+; Dynamic LDS that are direct mapped are not lowered in LowerModuleLDS pass.
+; In such cases, LowerModuleLDS is free to leave it in and ignore it, and we want to make sure
+; LowerModuleLDS doesn't crash if it re-runs on such modules.
+
+ at loweredlds = addrspace(3) global i32 poison, !absolute_symbol !0
+ at dynlds = external addrspace(3) global [0 x i32]
+
+define amdgpu_kernel void @kern(i32 %val0) {
+; CHECK-LABEL: define amdgpu_kernel void @kern(
+; CHECK-SAME: i32 [[VAL0:%.*]]) {
+; CHECK-NEXT:    store i32 0, ptr addrspace(3) @loweredlds, align 4
+; CHECK-NEXT:    store i32 1, ptr addrspace(3) @dynlds, align 4
+; CHECK-NEXT:    ret void
+;
+  store i32 0, ptr addrspace(3) @loweredlds
+  store i32 1, ptr addrspace(3) @dynlds
+  ret void
+}
+
+
+!0 = !{i32 0, i32 1}
diff --git a/llvm/test/CodeGen/AMDGPU/lds-run-twice.ll b/llvm/test/CodeGen/AMDGPU/lds-run-twice.ll
index e121f0da327d0..615f1e3b7fb38 100644
--- a/llvm/test/CodeGen/AMDGPU/lds-run-twice.ll
+++ b/llvm/test/CodeGen/AMDGPU/lds-run-twice.ll
@@ -7,10 +7,12 @@
 ; Check AMDGPULowerModuleLDS can run more than once on the same module, and that
 ; the second run is a no-op.
 
+ at dynlds = external addrspace(3) global [0 x i32], align 4
 @lds = internal unnamed_addr addrspace(3) global i32 undef, align 4
 
 define amdgpu_kernel void @test() {
 entry:
+  store i32 0, ptr addrspace(3) @dynlds
   store i32 1, ptr addrspace(3) @lds
   ret void
 }
diff --git a/llvm/test/CodeGen/AMDGPU/lto-lower-module-lds.ll b/llvm/test/CodeGen/AMDGPU/lto-lower-module-lds.ll
index c9f4303dfdde3..f1d946376afe0 100644
--- a/llvm/test/CodeGen/AMDGPU/lto-lower-module-lds.ll
+++ b/llvm/test/CodeGen/AMDGPU/lto-lower-module-lds.ll
@@ -39,10 +39,9 @@
 ; CHECK:   Lower uses of LDS variables from non-kernel functions
 
 @lds = internal unnamed_addr addrspace(3) global i32 poison, align 4
- at dynlds = external addrspace(3) global [0 x i32]
+
 define amdgpu_kernel void @test() {
 entry:
   store i32 1, ptr addrspace(3) @lds
-  store i32 0, ptr addrspace(3) @dynlds
   ret void
 }



More information about the llvm-commits mailing list