[llvm] [NVPTX] Only run LowerUnreachable when necessary (PR #109868)
Justin Fargnoli via llvm-commits
llvm-commits at lists.llvm.org
Wed Oct 9 19:20:42 PDT 2024
https://github.com/justinfargnoli updated https://github.com/llvm/llvm-project/pull/109868
>From 58ffde6db94d40c8db18db52d9ae2a8d938d3095 Mon Sep 17 00:00:00 2001
From: Justin Fargnoli <jfargnoli at nvidia.com>
Date: Tue, 24 Sep 2024 12:39:36 -0700
Subject: [PATCH 1/7] [NVPTX] Only run LowerUnreachable on PTX version <= 7.4
---
llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp | 12 +++++++++---
llvm/test/CodeGen/NVPTX/unreachable.ll | 6 ++++--
2 files changed, 13 insertions(+), 5 deletions(-)
diff --git a/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp b/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp
index 57b7fa783c14a7..a8aa2be499607a 100644
--- a/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp
@@ -368,9 +368,15 @@ void NVPTXPassConfig::addIRPasses() {
addPass(createSROAPass());
}
- const auto &Options = getNVPTXTargetMachine().Options;
- addPass(createNVPTXLowerUnreachablePass(Options.TrapUnreachable,
- Options.NoTrapAfterNoreturn));
+ if (ST.getPTXVersion() <= 74) {
+ // This pass is a WAR for a bug that's present in `ptxas` binaries that are
+ // shipped in or prior to CUDA Toolkit 11.4. The highest version that's
+ // supported by `ptxas` in CUDA 11.4 is 7.4. Limit this pass to only run
+ // when targeting PTX 7.4 or lower.
+ const auto &Options = getNVPTXTargetMachine().Options;
+ addPass(createNVPTXLowerUnreachablePass(Options.TrapUnreachable,
+ Options.NoTrapAfterNoreturn));
+ }
}
bool NVPTXPassConfig::addInstSelector() {
diff --git a/llvm/test/CodeGen/NVPTX/unreachable.ll b/llvm/test/CodeGen/NVPTX/unreachable.ll
index f9118900cb7372..e6cb54fa16e1fe 100644
--- a/llvm/test/CodeGen/NVPTX/unreachable.ll
+++ b/llvm/test/CodeGen/NVPTX/unreachable.ll
@@ -10,6 +10,8 @@
; RUN: | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-TRAP
; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 -verify-machineinstrs -trap-unreachable -no-trap-after-noreturn=false \
; RUN: | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-TRAP
+; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 -verify-machineinstrs -mattr=+ptx75 \
+; RUN: | FileCheck %s --check-prefixes=CHECK-BUG-FIXED
; RUN: %if ptxas && !ptxas-12.0 %{ llc < %s -march=nvptx -mcpu=sm_20 -verify-machineinstrs | %ptxas-verify %}
; RUN: %if ptxas %{ llc < %s -march=nvptx64 -mcpu=sm_20 -verify-machineinstrs | %ptxas-verify %}
@@ -26,12 +28,14 @@ define void @kernel_func() {
; CHECK-TRAP: trap;
; CHECK-NOTRAP-NOT: trap;
; CHECK: exit;
+; CHECK-BUG-FIXED-NOT: exit;
unreachable
}
; CHECK-LABEL: kernel_func_2
define void @kernel_func_2() {
; CHECK: trap; exit;
+; CHECK-BUG-FIXED-NOT: exit;
call void @llvm.trap()
;; Make sure we avoid emitting two trap instructions.
@@ -42,7 +46,5 @@ define void @kernel_func_2() {
attributes #0 = { noreturn }
-
!nvvm.annotations = !{!1}
-
!1 = !{ptr @kernel_func, !"kernel", i32 1}
>From 5424a9dcc85cdec1d5ebebcc76d1e41e28d65a2a Mon Sep 17 00:00:00 2001
From: Justin Fargnoli <jfargnoli at nvidia.com>
Date: Tue, 24 Sep 2024 13:21:52 -0700
Subject: [PATCH 2/7] Also check wether we're targeting Pascal or prior
---
llvm/lib/Target/NVPTX/NVPTXSubtarget.h | 3 +++
llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp | 8 +++-----
2 files changed, 6 insertions(+), 5 deletions(-)
diff --git a/llvm/lib/Target/NVPTX/NVPTXSubtarget.h b/llvm/lib/Target/NVPTX/NVPTXSubtarget.h
index 8b9059bd60cbd4..e2ce088cacdf53 100644
--- a/llvm/lib/Target/NVPTX/NVPTXSubtarget.h
+++ b/llvm/lib/Target/NVPTX/NVPTXSubtarget.h
@@ -95,6 +95,9 @@ class NVPTXSubtarget : public NVPTXGenSubtargetInfo {
bool hasDotInstructions() const {
return SmVersion >= 61 && PTXVersion >= 50;
}
+ bool hasPTXASUnreachableBug() const {
+ return SmVersion < 70 && PTXVersion <= 74;
+ }
bool hasCvtaParam() const { return SmVersion >= 70 && PTXVersion >= 77; }
unsigned int getFullSmVersion() const { return FullSmVersion; }
unsigned int getSmVersion() const { return getFullSmVersion() / 10; }
diff --git a/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp b/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp
index a8aa2be499607a..b79b4ff93efe49 100644
--- a/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp
@@ -368,11 +368,9 @@ void NVPTXPassConfig::addIRPasses() {
addPass(createSROAPass());
}
- if (ST.getPTXVersion() <= 74) {
- // This pass is a WAR for a bug that's present in `ptxas` binaries that are
- // shipped in or prior to CUDA Toolkit 11.4. The highest version that's
- // supported by `ptxas` in CUDA 11.4 is 7.4. Limit this pass to only run
- // when targeting PTX 7.4 or lower.
+ if (ST.hasPTXASUnreachableBug()) {
+ // Run LowerUnreachable to WAR a ptxas bug. See the commit description of
+ // 1ee4d880e8760256c606fe55b7af85a4f70d006d for more details.
const auto &Options = getNVPTXTargetMachine().Options;
addPass(createNVPTXLowerUnreachablePass(Options.TrapUnreachable,
Options.NoTrapAfterNoreturn));
>From 529062c32b270e6c665e04b71859b3393cebc6a0 Mon Sep 17 00:00:00 2001
From: Justin Fargnoli <jfargnoli at nvidia.com>
Date: Tue, 8 Oct 2024 12:35:44 -0700
Subject: [PATCH 3/7] PTXAS only has the bug prior to CUDA 12.8 / PTX v8.2
---
llvm/lib/Target/NVPTX/NVPTXSubtarget.h | 4 +---
1 file changed, 1 insertion(+), 3 deletions(-)
diff --git a/llvm/lib/Target/NVPTX/NVPTXSubtarget.h b/llvm/lib/Target/NVPTX/NVPTXSubtarget.h
index e2ce088cacdf53..2e6beeceb98614 100644
--- a/llvm/lib/Target/NVPTX/NVPTXSubtarget.h
+++ b/llvm/lib/Target/NVPTX/NVPTXSubtarget.h
@@ -95,9 +95,7 @@ class NVPTXSubtarget : public NVPTXGenSubtargetInfo {
bool hasDotInstructions() const {
return SmVersion >= 61 && PTXVersion >= 50;
}
- bool hasPTXASUnreachableBug() const {
- return SmVersion < 70 && PTXVersion <= 74;
- }
+ bool hasPTXASUnreachableBug() const { return PTXVersion <= 83; }
bool hasCvtaParam() const { return SmVersion >= 70 && PTXVersion >= 77; }
unsigned int getFullSmVersion() const { return FullSmVersion; }
unsigned int getSmVersion() const { return getFullSmVersion() / 10; }
>From b88976bea0f69ea6490c9bd141af878a00031b14 Mon Sep 17 00:00:00 2001
From: Justin Fargnoli <jfargnoli at nvidia.com>
Date: Tue, 8 Oct 2024 15:52:01 -0700
Subject: [PATCH 4/7] Only emit exit for PTX v8.3+
---
llvm/lib/Target/NVPTX/NVPTXInstrInfo.td | 5 ++++-
llvm/lib/Target/NVPTX/NVPTXSubtarget.h | 2 +-
llvm/test/CodeGen/NVPTX/unreachable.ll | 23 ++++++++++++++---------
3 files changed, 19 insertions(+), 11 deletions(-)
diff --git a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
index 8f4eddb5142740..86eceed235ea55 100644
--- a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
+++ b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
@@ -139,6 +139,8 @@ def hasVote : Predicate<"Subtarget->hasVote()">;
def hasDouble : Predicate<"Subtarget->hasDouble()">;
def hasLDG : Predicate<"Subtarget->hasLDG()">;
def hasLDU : Predicate<"Subtarget->hasLDU()">;
+def hasPTXASUnreachableBug : Predicate<"Subtarget->hasPTXASUnreachableBug()">;
+def noPTXASUnreachableBug : Predicate<"!Subtarget->hasPTXASUnreachableBug()">;
def doF32FTZ : Predicate<"useF32FTZ()">;
def doNoF32FTZ : Predicate<"!useF32FTZ()">;
@@ -3736,9 +3738,10 @@ def Callseq_End :
[(callseq_end timm:$amt1, timm:$amt2)]>;
// trap instruction
+def trapinst : NVPTXInst<(outs), (ins), "trap;", [(trap)]>, Requires<[noPTXASUnreachableBug]>;
// Emit an `exit` as well to convey to ptxas that `trap` exits the CFG.
// This won't be necessary in a future version of ptxas.
-def trapinst : NVPTXInst<(outs), (ins), "trap; exit;", [(trap)]>;
+def trapinstexit : NVPTXInst<(outs), (ins), "trap; exit;", [(trap)]>, Requires<[hasPTXASUnreachableBug]>;
// brkpt instruction
def debugtrapinst : NVPTXInst<(outs), (ins), "brkpt;", [(debugtrap)]>;
diff --git a/llvm/lib/Target/NVPTX/NVPTXSubtarget.h b/llvm/lib/Target/NVPTX/NVPTXSubtarget.h
index 2e6beeceb98614..49be13b2962c59 100644
--- a/llvm/lib/Target/NVPTX/NVPTXSubtarget.h
+++ b/llvm/lib/Target/NVPTX/NVPTXSubtarget.h
@@ -95,7 +95,7 @@ class NVPTXSubtarget : public NVPTXGenSubtargetInfo {
bool hasDotInstructions() const {
return SmVersion >= 61 && PTXVersion >= 50;
}
- bool hasPTXASUnreachableBug() const { return PTXVersion <= 83; }
+ bool hasPTXASUnreachableBug() const { return PTXVersion < 83; }
bool hasCvtaParam() const { return SmVersion >= 70 && PTXVersion >= 77; }
unsigned int getFullSmVersion() const { return FullSmVersion; }
unsigned int getSmVersion() const { return getFullSmVersion() / 10; }
diff --git a/llvm/test/CodeGen/NVPTX/unreachable.ll b/llvm/test/CodeGen/NVPTX/unreachable.ll
index e6cb54fa16e1fe..e2d1079e4dc6ea 100644
--- a/llvm/test/CodeGen/NVPTX/unreachable.ll
+++ b/llvm/test/CodeGen/NVPTX/unreachable.ll
@@ -1,17 +1,17 @@
; RUN: llc < %s -march=nvptx -mcpu=sm_20 -verify-machineinstrs -trap-unreachable=false \
-; RUN: | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-NOTRAP
+; RUN: | FileCheck %s --check-prefixes=CHECK,CHECK-NOTRAP
; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 -verify-machineinstrs -trap-unreachable=false \
-; RUN: | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-NOTRAP
+; RUN: | FileCheck %s --check-prefixes=CHECK,CHECK-NOTRAP
; RUN: llc < %s -march=nvptx -mcpu=sm_20 -verify-machineinstrs -trap-unreachable -no-trap-after-noreturn \
-; RUN: | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-NOTRAP
+; RUN: | FileCheck %s --check-prefixes=CHECK,CHECK-NOTRAP
; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 -verify-machineinstrs -trap-unreachable -no-trap-after-noreturn \
-; RUN: | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-NOTRAP
+; RUN: | FileCheck %s --check-prefixes=CHECK,CHECK-NOTRAP
; RUN: llc < %s -march=nvptx -mcpu=sm_20 -verify-machineinstrs -trap-unreachable -no-trap-after-noreturn=false \
-; RUN: | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-TRAP
+; RUN: | FileCheck %s --check-prefixes=CHECK,CHECK-TRAP
; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 -verify-machineinstrs -trap-unreachable -no-trap-after-noreturn=false \
-; RUN: | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-TRAP
-; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 -verify-machineinstrs -mattr=+ptx75 \
-; RUN: | FileCheck %s --check-prefixes=CHECK-BUG-FIXED
+; RUN: | FileCheck %s --check-prefixes=CHECK,CHECK-TRAP
+; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 -verify-machineinstrs -trap-unreachable -mattr=+ptx83 \
+; RUN: | FileCheck %s --check-prefix=CHECK-BUG-FIXED
; RUN: %if ptxas && !ptxas-12.0 %{ llc < %s -march=nvptx -mcpu=sm_20 -verify-machineinstrs | %ptxas-verify %}
; RUN: %if ptxas %{ llc < %s -march=nvptx64 -mcpu=sm_20 -verify-machineinstrs | %ptxas-verify %}
@@ -26,21 +26,26 @@ define void @kernel_func() {
call void @throw()
; CHECK-TRAP-NOT: exit;
; CHECK-TRAP: trap;
+
; CHECK-NOTRAP-NOT: trap;
; CHECK: exit;
+
; CHECK-BUG-FIXED-NOT: exit;
+; CHECK-BUG-FIXED: trap;
unreachable
}
; CHECK-LABEL: kernel_func_2
define void @kernel_func_2() {
; CHECK: trap; exit;
-; CHECK-BUG-FIXED-NOT: exit;
+; CHECK-BUG-FIXED: trap;
+; CHECK-BUG-FIXED-NOT: trap; exit;
call void @llvm.trap()
;; Make sure we avoid emitting two trap instructions.
; CHECK-NOT: trap;
; CHECK-NOT: exit;
+; CHECK-BUG-FIXED-NOT: trap;
unreachable
}
>From 29acb3c163bb93194360cdfd1d090cbe43fa2909 Mon Sep 17 00:00:00 2001
From: Justin Fargnoli <jfargnoli at nvidia.com>
Date: Tue, 8 Oct 2024 16:04:38 -0700
Subject: [PATCH 5/7] Rename tablegen def
---
llvm/lib/Target/NVPTX/NVPTXInstrInfo.td | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
index 86eceed235ea55..8b34ce4f1001c1 100644
--- a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
+++ b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
@@ -3741,7 +3741,7 @@ def Callseq_End :
def trapinst : NVPTXInst<(outs), (ins), "trap;", [(trap)]>, Requires<[noPTXASUnreachableBug]>;
// Emit an `exit` as well to convey to ptxas that `trap` exits the CFG.
// This won't be necessary in a future version of ptxas.
-def trapinstexit : NVPTXInst<(outs), (ins), "trap; exit;", [(trap)]>, Requires<[hasPTXASUnreachableBug]>;
+def trapexitinst : NVPTXInst<(outs), (ins), "trap; exit;", [(trap)]>, Requires<[hasPTXASUnreachableBug]>;
// brkpt instruction
def debugtrapinst : NVPTXInst<(outs), (ins), "brkpt;", [(debugtrap)]>;
>From 328e12c94e186dc8474646d96355ce4e928e8943 Mon Sep 17 00:00:00 2001
From: Justin Fargnoli <jfargnoli at nvidia.com>
Date: Wed, 9 Oct 2024 19:08:35 -0700
Subject: [PATCH 6/7] Auto-generate checks
---
llvm/test/CodeGen/NVPTX/unreachable.ll | 108 ++++++++++++++++++-------
1 file changed, 80 insertions(+), 28 deletions(-)
diff --git a/llvm/test/CodeGen/NVPTX/unreachable.ll b/llvm/test/CodeGen/NVPTX/unreachable.ll
index e2d1079e4dc6ea..6bd583c8d50d8a 100644
--- a/llvm/test/CodeGen/NVPTX/unreachable.ll
+++ b/llvm/test/CodeGen/NVPTX/unreachable.ll
@@ -1,51 +1,103 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; RUN: llc < %s -march=nvptx -mcpu=sm_20 -verify-machineinstrs -trap-unreachable=false \
-; RUN: | FileCheck %s --check-prefixes=CHECK,CHECK-NOTRAP
+; RUN: | FileCheck %s --check-prefixes=CHECK,NO-TRAP-UNREACHABLE
; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 -verify-machineinstrs -trap-unreachable=false \
-; RUN: | FileCheck %s --check-prefixes=CHECK,CHECK-NOTRAP
+; RUN: | FileCheck %s --check-prefixes=CHECK,NO-TRAP-UNREACHABLE
; RUN: llc < %s -march=nvptx -mcpu=sm_20 -verify-machineinstrs -trap-unreachable -no-trap-after-noreturn \
-; RUN: | FileCheck %s --check-prefixes=CHECK,CHECK-NOTRAP
+; RUN: | FileCheck %s --check-prefixes=CHECK,NO-TRAP-AFTER-NORETURN
; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 -verify-machineinstrs -trap-unreachable -no-trap-after-noreturn \
-; RUN: | FileCheck %s --check-prefixes=CHECK,CHECK-NOTRAP
+; RUN: | FileCheck %s --check-prefixes=CHECK,NO-TRAP-AFTER-NORETURN
; RUN: llc < %s -march=nvptx -mcpu=sm_20 -verify-machineinstrs -trap-unreachable -no-trap-after-noreturn=false \
-; RUN: | FileCheck %s --check-prefixes=CHECK,CHECK-TRAP
+; RUN: | FileCheck %s --check-prefixes=CHECK,TRAP
; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 -verify-machineinstrs -trap-unreachable -no-trap-after-noreturn=false \
-; RUN: | FileCheck %s --check-prefixes=CHECK,CHECK-TRAP
+; RUN: | FileCheck %s --check-prefixes=CHECK,TRAP
; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 -verify-machineinstrs -trap-unreachable -mattr=+ptx83 \
-; RUN: | FileCheck %s --check-prefix=CHECK-BUG-FIXED
+; RUN: | FileCheck %s --check-prefixes=BUG-FIXED
; RUN: %if ptxas && !ptxas-12.0 %{ llc < %s -march=nvptx -mcpu=sm_20 -verify-machineinstrs | %ptxas-verify %}
; RUN: %if ptxas %{ llc < %s -march=nvptx64 -mcpu=sm_20 -verify-machineinstrs | %ptxas-verify %}
-; CHECK: .extern .func throw
+target triple = "nvptx-unknown-cuda"
+
declare void @throw() #0
declare void @llvm.trap() #0
-; CHECK-LABEL: .entry kernel_func
define void @kernel_func() {
-; CHECK: call.uni
-; CHECK: throw,
+; NO-TRAP-UNREACHABLE-LABEL: kernel_func(
+; NO-TRAP-UNREACHABLE: {
+; NO-TRAP-UNREACHABLE-EMPTY:
+; NO-TRAP-UNREACHABLE-EMPTY:
+; NO-TRAP-UNREACHABLE-NEXT: // %bb.0:
+; NO-TRAP-UNREACHABLE-NEXT: { // callseq 0, 0
+; NO-TRAP-UNREACHABLE-NEXT: call.uni
+; NO-TRAP-UNREACHABLE-NEXT: throw,
+; NO-TRAP-UNREACHABLE-NEXT: (
+; NO-TRAP-UNREACHABLE-NEXT: );
+; NO-TRAP-UNREACHABLE-NEXT: } // callseq 0
+; NO-TRAP-UNREACHABLE-NEXT: // begin inline asm
+; NO-TRAP-UNREACHABLE-NEXT: exit;
+; NO-TRAP-UNREACHABLE-NEXT: // end inline asm
+;
+; NO-TRAP-AFTER-NORETURN-LABEL: kernel_func(
+; NO-TRAP-AFTER-NORETURN: {
+; NO-TRAP-AFTER-NORETURN-EMPTY:
+; NO-TRAP-AFTER-NORETURN-EMPTY:
+; NO-TRAP-AFTER-NORETURN-NEXT: // %bb.0:
+; NO-TRAP-AFTER-NORETURN-NEXT: { // callseq 0, 0
+; NO-TRAP-AFTER-NORETURN-NEXT: call.uni
+; NO-TRAP-AFTER-NORETURN-NEXT: throw,
+; NO-TRAP-AFTER-NORETURN-NEXT: (
+; NO-TRAP-AFTER-NORETURN-NEXT: );
+; NO-TRAP-AFTER-NORETURN-NEXT: } // callseq 0
+; NO-TRAP-AFTER-NORETURN-NEXT: // begin inline asm
+; NO-TRAP-AFTER-NORETURN-NEXT: exit;
+; NO-TRAP-AFTER-NORETURN-NEXT: // end inline asm
+; NO-TRAP-AFTER-NORETURN-NEXT: trap; exit;
+;
+; TRAP-LABEL: kernel_func(
+; TRAP: {
+; TRAP-EMPTY:
+; TRAP-EMPTY:
+; TRAP-NEXT: // %bb.0:
+; TRAP-NEXT: { // callseq 0, 0
+; TRAP-NEXT: call.uni
+; TRAP-NEXT: throw,
+; TRAP-NEXT: (
+; TRAP-NEXT: );
+; TRAP-NEXT: } // callseq 0
+; TRAP-NEXT: trap; exit;
+;
+; BUG-FIXED-LABEL: kernel_func(
+; BUG-FIXED: {
+; BUG-FIXED-EMPTY:
+; BUG-FIXED-EMPTY:
+; BUG-FIXED-NEXT: // %bb.0:
+; BUG-FIXED-NEXT: { // callseq 0, 0
+; BUG-FIXED-NEXT: call.uni
+; BUG-FIXED-NEXT: throw,
+; BUG-FIXED-NEXT: (
+; BUG-FIXED-NEXT: );
+; BUG-FIXED-NEXT: } // callseq 0
+; BUG-FIXED-NEXT: trap;
call void @throw()
-; CHECK-TRAP-NOT: exit;
-; CHECK-TRAP: trap;
-
-; CHECK-NOTRAP-NOT: trap;
-; CHECK: exit;
-
-; CHECK-BUG-FIXED-NOT: exit;
-; CHECK-BUG-FIXED: trap;
unreachable
}
-; CHECK-LABEL: kernel_func_2
define void @kernel_func_2() {
-; CHECK: trap; exit;
-; CHECK-BUG-FIXED: trap;
-; CHECK-BUG-FIXED-NOT: trap; exit;
+; CHECK-LABEL: kernel_func_2(
+; CHECK: {
+; CHECK-EMPTY:
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: trap; exit;
+;
+; BUG-FIXED-LABEL: kernel_func_2(
+; BUG-FIXED: {
+; BUG-FIXED-EMPTY:
+; BUG-FIXED-EMPTY:
+; BUG-FIXED-NEXT: // %bb.0:
+; BUG-FIXED-NEXT: trap;
call void @llvm.trap()
-
-;; Make sure we avoid emitting two trap instructions.
-; CHECK-NOT: trap;
-; CHECK-NOT: exit;
-; CHECK-BUG-FIXED-NOT: trap;
+; Make sure we avoid emitting two trap instructions.
unreachable
}
>From 5d2902dde6a611b6dc531a0c747c596415326e30 Mon Sep 17 00:00:00 2001
From: Justin Fargnoli <jfargnoli at nvidia.com>
Date: Wed, 9 Oct 2024 19:19:00 -0700
Subject: [PATCH 7/7] Add comment
---
llvm/lib/Target/NVPTX/NVPTXSubtarget.h | 7 +++++++
1 file changed, 7 insertions(+)
diff --git a/llvm/lib/Target/NVPTX/NVPTXSubtarget.h b/llvm/lib/Target/NVPTX/NVPTXSubtarget.h
index 49be13b2962c59..e785bbf830da62 100644
--- a/llvm/lib/Target/NVPTX/NVPTXSubtarget.h
+++ b/llvm/lib/Target/NVPTX/NVPTXSubtarget.h
@@ -95,6 +95,13 @@ class NVPTXSubtarget : public NVPTXGenSubtargetInfo {
bool hasDotInstructions() const {
return SmVersion >= 61 && PTXVersion >= 50;
}
+ // Prior to CUDA 12.3 ptxas did not recognize that the trap instruction
+ // terminates a basic block. Instead, it would assume that control flow
+ // continued to the next instruction. The next instruction could be in the
+ // block that's lexically below it. This would lead to a phantom CFG edges
+ // being created within ptxas. This issue was fixed in CUDA 12.3. Thus, when
+ // PTX ISA versions 8.3+ we can confidently say that the bug will not be
+ // present.
bool hasPTXASUnreachableBug() const { return PTXVersion < 83; }
bool hasCvtaParam() const { return SmVersion >= 70 && PTXVersion >= 77; }
unsigned int getFullSmVersion() const { return FullSmVersion; }
More information about the llvm-commits
mailing list