[llvm] [NVPTX] Only run LowerUnreachable when necessary (PR #109868)

Justin Fargnoli via llvm-commits llvm-commits at lists.llvm.org
Tue Sep 24 14:47:40 PDT 2024


https://github.com/justinfargnoli created https://github.com/llvm/llvm-project/pull/109868

[NVPTX: Lower unreachable to exit to allow ptxas to accurately reconstruct the CFG.](https://github.com/llvm/llvm-project/commit/1ee4d880e8760256c606fe55b7af85a4f70d006d) added the LowerUnreachable pass to NVPTX. Based on the PR description:

> Finally, although I expect this to fix most of
> https://bugs.llvm.org/show_bug.cgi?id=27738, I do still encounter
> miscompilations with Julia's unreachable-heavy code when targeting these
> older GPUs using an older ptxas version (specifically, from CUDA 11.4 or
> below). This is likely due to related bugs in ptxas which have been fixed
> since, as I have filed several reproducers with NVIDIA over the past couple of
> years. I'm not inclined to look into fixing those issues over here, and will
> instead be recommending our users to upgrade CUDA to 11.5+ when using these GPUs.

this pass is only necessary when targeting Pascal or earlier via ptxas from CUDA 11.4 or earlier. This PR updates NVPTXTargetMachine.cpp to reflect that.

CC @maleadt 

>From 681c40f7fa9478115a1f5838bb2d8fcbd29e5bad Mon Sep 17 00:00:00 2001
From: Justin Fargnoli <jfargnoli at nvidia.com>
Date: Tue, 24 Sep 2024 12:39:36 -0700
Subject: [PATCH 1/2] [NVPTX] Only run LowerUnreachable on PTX version <= 7.4

---
 llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp | 12 +++++++++---
 llvm/test/CodeGen/NVPTX/unreachable.ll       | 13 +++++++------
 2 files changed, 16 insertions(+), 9 deletions(-)

diff --git a/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp b/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp
index 57b7fa783c14a7..a8aa2be499607a 100644
--- a/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp
@@ -368,9 +368,15 @@ void NVPTXPassConfig::addIRPasses() {
     addPass(createSROAPass());
   }
 
-  const auto &Options = getNVPTXTargetMachine().Options;
-  addPass(createNVPTXLowerUnreachablePass(Options.TrapUnreachable,
-                                          Options.NoTrapAfterNoreturn));
+  if (ST.getPTXVersion() <= 74) {
+    // This pass is a WAR for a bug that's present in `ptxas` binaries that are
+    // shipped in or prior to CUDA Toolkit 11.4. The highest version that's
+    // supported by `ptxas` in CUDA 11.4 is 7.4. Limit this pass to only run
+    // when targeting PTX 7.4 or lower.
+    const auto &Options = getNVPTXTargetMachine().Options;
+    addPass(createNVPTXLowerUnreachablePass(Options.TrapUnreachable,
+                                            Options.NoTrapAfterNoreturn));
+  }
 }
 
 bool NVPTXPassConfig::addInstSelector() {
diff --git a/llvm/test/CodeGen/NVPTX/unreachable.ll b/llvm/test/CodeGen/NVPTX/unreachable.ll
index 011497c4e23401..3f04a42bc53e5a 100644
--- a/llvm/test/CodeGen/NVPTX/unreachable.ll
+++ b/llvm/test/CodeGen/NVPTX/unreachable.ll
@@ -1,11 +1,13 @@
 ; RUN: llc < %s -march=nvptx -mcpu=sm_20 -verify-machineinstrs \
-; RUN:     | FileCheck %s  --check-prefix=CHECK --check-prefix=CHECK-NOTRAP
+; RUN:     | FileCheck %s  --check-prefixes=CHECK,CHECK-NOTRAP
 ; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 -verify-machineinstrs \
-; RUN:     | FileCheck %s  --check-prefix=CHECK --check-prefix=CHECK-NOTRAP
+; RUN:     | FileCheck %s  --check-prefixes=CHECK,CHECK-NOTRAP
+; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 -verify-machineinstrs -mattr=+ptx75 \
+; RUN:     | FileCheck %s  --check-prefixes=CHECK-PTX75
 ; RUN: llc < %s -march=nvptx -mcpu=sm_20 -verify-machineinstrs -trap-unreachable \
-; RUN:     | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-TRAP
+; RUN:     | FileCheck %s --check-prefixes=CHECK,CHECK-TRAP
 ; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 -verify-machineinstrs -trap-unreachable \
-; RUN:     | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-TRAP
+; RUN:     | FileCheck %s --check-prefixes=CHECK,CHECK-TRAP
 ; RUN: %if ptxas && !ptxas-12.0 %{ llc < %s -march=nvptx -mcpu=sm_20 -verify-machineinstrs | %ptxas-verify %}
 ; RUN: %if ptxas %{ llc < %s -march=nvptx64 -mcpu=sm_20 -verify-machineinstrs | %ptxas-verify %}
 
@@ -21,12 +23,11 @@ define void @kernel_func() {
 ; CHECK-TRAP: trap;
 ; CHECK-NOTRAP-NOT: trap;
 ; CHECK: exit;
+; CHECK-PTX75-NOT: exit;
   unreachable
 }
 
 attributes #0 = { noreturn }
 
-
 !nvvm.annotations = !{!1}
-
 !1 = !{ptr @kernel_func, !"kernel", i32 1}

>From 6bc6174ec9acbe963e38ebb53fe6b90c5e558286 Mon Sep 17 00:00:00 2001
From: Justin Fargnoli <jfargnoli at nvidia.com>
Date: Tue, 24 Sep 2024 13:21:52 -0700
Subject: [PATCH 2/2] Also check wether we're targeting Pascal or prior

---
 llvm/lib/Target/NVPTX/NVPTXSubtarget.h       | 3 +++
 llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp | 8 +++-----
 llvm/test/CodeGen/NVPTX/unreachable.ll       | 6 ++++--
 3 files changed, 10 insertions(+), 7 deletions(-)

diff --git a/llvm/lib/Target/NVPTX/NVPTXSubtarget.h b/llvm/lib/Target/NVPTX/NVPTXSubtarget.h
index 8b9059bd60cbd4..e2ce088cacdf53 100644
--- a/llvm/lib/Target/NVPTX/NVPTXSubtarget.h
+++ b/llvm/lib/Target/NVPTX/NVPTXSubtarget.h
@@ -95,6 +95,9 @@ class NVPTXSubtarget : public NVPTXGenSubtargetInfo {
   bool hasDotInstructions() const {
     return SmVersion >= 61 && PTXVersion >= 50;
   }
+  bool hasPTXASUnreachableBug() const {
+    return SmVersion < 70 && PTXVersion <= 74;
+  }
   bool hasCvtaParam() const { return SmVersion >= 70 && PTXVersion >= 77; }
   unsigned int getFullSmVersion() const { return FullSmVersion; }
   unsigned int getSmVersion() const { return getFullSmVersion() / 10; }
diff --git a/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp b/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp
index a8aa2be499607a..b79b4ff93efe49 100644
--- a/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp
@@ -368,11 +368,9 @@ void NVPTXPassConfig::addIRPasses() {
     addPass(createSROAPass());
   }
 
-  if (ST.getPTXVersion() <= 74) {
-    // This pass is a WAR for a bug that's present in `ptxas` binaries that are
-    // shipped in or prior to CUDA Toolkit 11.4. The highest version that's
-    // supported by `ptxas` in CUDA 11.4 is 7.4. Limit this pass to only run
-    // when targeting PTX 7.4 or lower.
+  if (ST.hasPTXASUnreachableBug()) {
+    // Run LowerUnreachable to WAR a ptxas bug. See the commit description of
+    // 1ee4d880e8760256c606fe55b7af85a4f70d006d for more details.
     const auto &Options = getNVPTXTargetMachine().Options;
     addPass(createNVPTXLowerUnreachablePass(Options.TrapUnreachable,
                                             Options.NoTrapAfterNoreturn));
diff --git a/llvm/test/CodeGen/NVPTX/unreachable.ll b/llvm/test/CodeGen/NVPTX/unreachable.ll
index 3f04a42bc53e5a..4219f0f3b47fc9 100644
--- a/llvm/test/CodeGen/NVPTX/unreachable.ll
+++ b/llvm/test/CodeGen/NVPTX/unreachable.ll
@@ -3,7 +3,9 @@
 ; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 -verify-machineinstrs \
 ; RUN:     | FileCheck %s  --check-prefixes=CHECK,CHECK-NOTRAP
 ; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 -verify-machineinstrs -mattr=+ptx75 \
-; RUN:     | FileCheck %s  --check-prefixes=CHECK-PTX75
+; RUN:     | FileCheck %s  --check-prefixes=CHECK-BUG-FIXED
+; RUN: llc < %s -march=nvptx64 -mcpu=sm_70 -verify-machineinstrs \
+; RUN:     | FileCheck %s  --check-prefixes=CHECK-BUG-FIXED
 ; RUN: llc < %s -march=nvptx -mcpu=sm_20 -verify-machineinstrs -trap-unreachable \
 ; RUN:     | FileCheck %s --check-prefixes=CHECK,CHECK-TRAP
 ; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 -verify-machineinstrs -trap-unreachable \
@@ -23,7 +25,7 @@ define void @kernel_func() {
 ; CHECK-TRAP: trap;
 ; CHECK-NOTRAP-NOT: trap;
 ; CHECK: exit;
-; CHECK-PTX75-NOT: exit;
+; CHECK-BUG-FIXED-NOT: exit;
   unreachable
 }
 



More information about the llvm-commits mailing list