[llvm] [NVPTX] Only run LowerUnreachable when necessary (PR #109868)
Justin Fargnoli via llvm-commits
llvm-commits at lists.llvm.org
Tue Sep 24 14:47:40 PDT 2024
https://github.com/justinfargnoli created https://github.com/llvm/llvm-project/pull/109868
[NVPTX: Lower unreachable to exit to allow ptxas to accurately reconstruct the CFG.](https://github.com/llvm/llvm-project/commit/1ee4d880e8760256c606fe55b7af85a4f70d006d) added the LowerUnreachable pass to NVPTX. Based on the PR description:
> Finally, although I expect this to fix most of
> https://bugs.llvm.org/show_bug.cgi?id=27738, I do still encounter
> miscompilations with Julia's unreachable-heavy code when targeting these
> older GPUs using an older ptxas version (specifically, from CUDA 11.4 or
> below). This is likely due to related bugs in ptxas which have been fixed
> since, as I have filed several reproducers with NVIDIA over the past couple of
> years. I'm not inclined to look into fixing those issues over here, and will
> instead be recommending our users to upgrade CUDA to 11.5+ when using these GPUs.
this pass is only necessary when targeting Pascal or earlier via ptxas from CUDA 11.4 or earlier. This PR updates NVPTXTargetMachine.cpp to reflect that.
CC @maleadt
>From 681c40f7fa9478115a1f5838bb2d8fcbd29e5bad Mon Sep 17 00:00:00 2001
From: Justin Fargnoli <jfargnoli at nvidia.com>
Date: Tue, 24 Sep 2024 12:39:36 -0700
Subject: [PATCH 1/2] [NVPTX] Only run LowerUnreachable on PTX version <= 7.4
---
llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp | 12 +++++++++---
llvm/test/CodeGen/NVPTX/unreachable.ll | 13 +++++++------
2 files changed, 16 insertions(+), 9 deletions(-)
diff --git a/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp b/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp
index 57b7fa783c14a7..a8aa2be499607a 100644
--- a/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp
@@ -368,9 +368,15 @@ void NVPTXPassConfig::addIRPasses() {
addPass(createSROAPass());
}
- const auto &Options = getNVPTXTargetMachine().Options;
- addPass(createNVPTXLowerUnreachablePass(Options.TrapUnreachable,
- Options.NoTrapAfterNoreturn));
+ if (ST.getPTXVersion() <= 74) {
+ // This pass is a WAR for a bug that's present in `ptxas` binaries that are
+ // shipped in or prior to CUDA Toolkit 11.4. The highest version that's
+ // supported by `ptxas` in CUDA 11.4 is 7.4. Limit this pass to only run
+ // when targeting PTX 7.4 or lower.
+ const auto &Options = getNVPTXTargetMachine().Options;
+ addPass(createNVPTXLowerUnreachablePass(Options.TrapUnreachable,
+ Options.NoTrapAfterNoreturn));
+ }
}
bool NVPTXPassConfig::addInstSelector() {
diff --git a/llvm/test/CodeGen/NVPTX/unreachable.ll b/llvm/test/CodeGen/NVPTX/unreachable.ll
index 011497c4e23401..3f04a42bc53e5a 100644
--- a/llvm/test/CodeGen/NVPTX/unreachable.ll
+++ b/llvm/test/CodeGen/NVPTX/unreachable.ll
@@ -1,11 +1,13 @@
; RUN: llc < %s -march=nvptx -mcpu=sm_20 -verify-machineinstrs \
-; RUN: | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-NOTRAP
+; RUN: | FileCheck %s --check-prefixes=CHECK,CHECK-NOTRAP
; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 -verify-machineinstrs \
-; RUN: | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-NOTRAP
+; RUN: | FileCheck %s --check-prefixes=CHECK,CHECK-NOTRAP
+; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 -verify-machineinstrs -mattr=+ptx75 \
+; RUN: | FileCheck %s --check-prefixes=CHECK-PTX75
; RUN: llc < %s -march=nvptx -mcpu=sm_20 -verify-machineinstrs -trap-unreachable \
-; RUN: | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-TRAP
+; RUN: | FileCheck %s --check-prefixes=CHECK,CHECK-TRAP
; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 -verify-machineinstrs -trap-unreachable \
-; RUN: | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-TRAP
+; RUN: | FileCheck %s --check-prefixes=CHECK,CHECK-TRAP
; RUN: %if ptxas && !ptxas-12.0 %{ llc < %s -march=nvptx -mcpu=sm_20 -verify-machineinstrs | %ptxas-verify %}
; RUN: %if ptxas %{ llc < %s -march=nvptx64 -mcpu=sm_20 -verify-machineinstrs | %ptxas-verify %}
@@ -21,12 +23,11 @@ define void @kernel_func() {
; CHECK-TRAP: trap;
; CHECK-NOTRAP-NOT: trap;
; CHECK: exit;
+; CHECK-PTX75-NOT: exit;
unreachable
}
attributes #0 = { noreturn }
-
!nvvm.annotations = !{!1}
-
!1 = !{ptr @kernel_func, !"kernel", i32 1}
>From 6bc6174ec9acbe963e38ebb53fe6b90c5e558286 Mon Sep 17 00:00:00 2001
From: Justin Fargnoli <jfargnoli at nvidia.com>
Date: Tue, 24 Sep 2024 13:21:52 -0700
Subject: [PATCH 2/2] Also check wether we're targeting Pascal or prior
---
llvm/lib/Target/NVPTX/NVPTXSubtarget.h | 3 +++
llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp | 8 +++-----
llvm/test/CodeGen/NVPTX/unreachable.ll | 6 ++++--
3 files changed, 10 insertions(+), 7 deletions(-)
diff --git a/llvm/lib/Target/NVPTX/NVPTXSubtarget.h b/llvm/lib/Target/NVPTX/NVPTXSubtarget.h
index 8b9059bd60cbd4..e2ce088cacdf53 100644
--- a/llvm/lib/Target/NVPTX/NVPTXSubtarget.h
+++ b/llvm/lib/Target/NVPTX/NVPTXSubtarget.h
@@ -95,6 +95,9 @@ class NVPTXSubtarget : public NVPTXGenSubtargetInfo {
bool hasDotInstructions() const {
return SmVersion >= 61 && PTXVersion >= 50;
}
+ bool hasPTXASUnreachableBug() const {
+ return SmVersion < 70 && PTXVersion <= 74;
+ }
bool hasCvtaParam() const { return SmVersion >= 70 && PTXVersion >= 77; }
unsigned int getFullSmVersion() const { return FullSmVersion; }
unsigned int getSmVersion() const { return getFullSmVersion() / 10; }
diff --git a/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp b/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp
index a8aa2be499607a..b79b4ff93efe49 100644
--- a/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp
@@ -368,11 +368,9 @@ void NVPTXPassConfig::addIRPasses() {
addPass(createSROAPass());
}
- if (ST.getPTXVersion() <= 74) {
- // This pass is a WAR for a bug that's present in `ptxas` binaries that are
- // shipped in or prior to CUDA Toolkit 11.4. The highest version that's
- // supported by `ptxas` in CUDA 11.4 is 7.4. Limit this pass to only run
- // when targeting PTX 7.4 or lower.
+ if (ST.hasPTXASUnreachableBug()) {
+ // Run LowerUnreachable to WAR a ptxas bug. See the commit description of
+ // 1ee4d880e8760256c606fe55b7af85a4f70d006d for more details.
const auto &Options = getNVPTXTargetMachine().Options;
addPass(createNVPTXLowerUnreachablePass(Options.TrapUnreachable,
Options.NoTrapAfterNoreturn));
diff --git a/llvm/test/CodeGen/NVPTX/unreachable.ll b/llvm/test/CodeGen/NVPTX/unreachable.ll
index 3f04a42bc53e5a..4219f0f3b47fc9 100644
--- a/llvm/test/CodeGen/NVPTX/unreachable.ll
+++ b/llvm/test/CodeGen/NVPTX/unreachable.ll
@@ -3,7 +3,9 @@
; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 -verify-machineinstrs \
; RUN: | FileCheck %s --check-prefixes=CHECK,CHECK-NOTRAP
; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 -verify-machineinstrs -mattr=+ptx75 \
-; RUN: | FileCheck %s --check-prefixes=CHECK-PTX75
+; RUN: | FileCheck %s --check-prefixes=CHECK-BUG-FIXED
+; RUN: llc < %s -march=nvptx64 -mcpu=sm_70 -verify-machineinstrs \
+; RUN: | FileCheck %s --check-prefixes=CHECK-BUG-FIXED
; RUN: llc < %s -march=nvptx -mcpu=sm_20 -verify-machineinstrs -trap-unreachable \
; RUN: | FileCheck %s --check-prefixes=CHECK,CHECK-TRAP
; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 -verify-machineinstrs -trap-unreachable \
@@ -23,7 +25,7 @@ define void @kernel_func() {
; CHECK-TRAP: trap;
; CHECK-NOTRAP-NOT: trap;
; CHECK: exit;
-; CHECK-PTX75-NOT: exit;
+; CHECK-BUG-FIXED-NOT: exit;
unreachable
}
More information about the llvm-commits
mailing list