[llvm] [NVPTX] Only run LowerUnreachable when necessary (PR #109868)

Justin Fargnoli via llvm-commits llvm-commits at lists.llvm.org
Wed Oct 9 19:20:42 PDT 2024


https://github.com/justinfargnoli updated https://github.com/llvm/llvm-project/pull/109868

>From 58ffde6db94d40c8db18db52d9ae2a8d938d3095 Mon Sep 17 00:00:00 2001
From: Justin Fargnoli <jfargnoli at nvidia.com>
Date: Tue, 24 Sep 2024 12:39:36 -0700
Subject: [PATCH 1/7] [NVPTX] Only run LowerUnreachable on PTX version <= 7.4

---
 llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp | 12 +++++++++---
 llvm/test/CodeGen/NVPTX/unreachable.ll       |  6 ++++--
 2 files changed, 13 insertions(+), 5 deletions(-)

diff --git a/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp b/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp
index 57b7fa783c14a7..a8aa2be499607a 100644
--- a/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp
@@ -368,9 +368,15 @@ void NVPTXPassConfig::addIRPasses() {
     addPass(createSROAPass());
   }
 
-  const auto &Options = getNVPTXTargetMachine().Options;
-  addPass(createNVPTXLowerUnreachablePass(Options.TrapUnreachable,
-                                          Options.NoTrapAfterNoreturn));
+  if (ST.getPTXVersion() <= 74) {
+    // This pass is a WAR for a bug that's present in `ptxas` binaries that are
+    // shipped in or prior to CUDA Toolkit 11.4. The highest version that's
+    // supported by `ptxas` in CUDA 11.4 is 7.4. Limit this pass to only run
+    // when targeting PTX 7.4 or lower.
+    const auto &Options = getNVPTXTargetMachine().Options;
+    addPass(createNVPTXLowerUnreachablePass(Options.TrapUnreachable,
+                                            Options.NoTrapAfterNoreturn));
+  }
 }
 
 bool NVPTXPassConfig::addInstSelector() {
diff --git a/llvm/test/CodeGen/NVPTX/unreachable.ll b/llvm/test/CodeGen/NVPTX/unreachable.ll
index f9118900cb7372..e6cb54fa16e1fe 100644
--- a/llvm/test/CodeGen/NVPTX/unreachable.ll
+++ b/llvm/test/CodeGen/NVPTX/unreachable.ll
@@ -10,6 +10,8 @@
 ; RUN:     | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-TRAP
 ; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 -verify-machineinstrs -trap-unreachable -no-trap-after-noreturn=false \
 ; RUN:     | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-TRAP
+; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 -verify-machineinstrs -mattr=+ptx75 \
+; RUN:     | FileCheck %s  --check-prefixes=CHECK-BUG-FIXED
 ; RUN: %if ptxas && !ptxas-12.0 %{ llc < %s -march=nvptx -mcpu=sm_20 -verify-machineinstrs | %ptxas-verify %}
 ; RUN: %if ptxas %{ llc < %s -march=nvptx64 -mcpu=sm_20 -verify-machineinstrs | %ptxas-verify %}
 
@@ -26,12 +28,14 @@ define void @kernel_func() {
 ; CHECK-TRAP: trap;
 ; CHECK-NOTRAP-NOT: trap;
 ; CHECK: exit;
+; CHECK-BUG-FIXED-NOT: exit;
   unreachable
 }
 
 ; CHECK-LABEL: kernel_func_2
 define void @kernel_func_2() {
 ; CHECK: trap; exit;
+; CHECK-BUG-FIXED-NOT: exit;
   call void @llvm.trap()
 
 ;; Make sure we avoid emitting two trap instructions.
@@ -42,7 +46,5 @@ define void @kernel_func_2() {
 
 attributes #0 = { noreturn }
 
-
 !nvvm.annotations = !{!1}
-
 !1 = !{ptr @kernel_func, !"kernel", i32 1}

>From 5424a9dcc85cdec1d5ebebcc76d1e41e28d65a2a Mon Sep 17 00:00:00 2001
From: Justin Fargnoli <jfargnoli at nvidia.com>
Date: Tue, 24 Sep 2024 13:21:52 -0700
Subject: [PATCH 2/7] Also check wether we're targeting Pascal or prior

---
 llvm/lib/Target/NVPTX/NVPTXSubtarget.h       | 3 +++
 llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp | 8 +++-----
 2 files changed, 6 insertions(+), 5 deletions(-)

diff --git a/llvm/lib/Target/NVPTX/NVPTXSubtarget.h b/llvm/lib/Target/NVPTX/NVPTXSubtarget.h
index 8b9059bd60cbd4..e2ce088cacdf53 100644
--- a/llvm/lib/Target/NVPTX/NVPTXSubtarget.h
+++ b/llvm/lib/Target/NVPTX/NVPTXSubtarget.h
@@ -95,6 +95,9 @@ class NVPTXSubtarget : public NVPTXGenSubtargetInfo {
   bool hasDotInstructions() const {
     return SmVersion >= 61 && PTXVersion >= 50;
   }
+  bool hasPTXASUnreachableBug() const {
+    return SmVersion < 70 && PTXVersion <= 74;
+  }
   bool hasCvtaParam() const { return SmVersion >= 70 && PTXVersion >= 77; }
   unsigned int getFullSmVersion() const { return FullSmVersion; }
   unsigned int getSmVersion() const { return getFullSmVersion() / 10; }
diff --git a/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp b/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp
index a8aa2be499607a..b79b4ff93efe49 100644
--- a/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp
@@ -368,11 +368,9 @@ void NVPTXPassConfig::addIRPasses() {
     addPass(createSROAPass());
   }
 
-  if (ST.getPTXVersion() <= 74) {
-    // This pass is a WAR for a bug that's present in `ptxas` binaries that are
-    // shipped in or prior to CUDA Toolkit 11.4. The highest version that's
-    // supported by `ptxas` in CUDA 11.4 is 7.4. Limit this pass to only run
-    // when targeting PTX 7.4 or lower.
+  if (ST.hasPTXASUnreachableBug()) {
+    // Run LowerUnreachable to WAR a ptxas bug. See the commit description of
+    // 1ee4d880e8760256c606fe55b7af85a4f70d006d for more details.
     const auto &Options = getNVPTXTargetMachine().Options;
     addPass(createNVPTXLowerUnreachablePass(Options.TrapUnreachable,
                                             Options.NoTrapAfterNoreturn));

>From 529062c32b270e6c665e04b71859b3393cebc6a0 Mon Sep 17 00:00:00 2001
From: Justin Fargnoli <jfargnoli at nvidia.com>
Date: Tue, 8 Oct 2024 12:35:44 -0700
Subject: [PATCH 3/7] PTXAS only has the bug prior to CUDA 12.8 / PTX v8.2

---
 llvm/lib/Target/NVPTX/NVPTXSubtarget.h | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/llvm/lib/Target/NVPTX/NVPTXSubtarget.h b/llvm/lib/Target/NVPTX/NVPTXSubtarget.h
index e2ce088cacdf53..2e6beeceb98614 100644
--- a/llvm/lib/Target/NVPTX/NVPTXSubtarget.h
+++ b/llvm/lib/Target/NVPTX/NVPTXSubtarget.h
@@ -95,9 +95,7 @@ class NVPTXSubtarget : public NVPTXGenSubtargetInfo {
   bool hasDotInstructions() const {
     return SmVersion >= 61 && PTXVersion >= 50;
   }
-  bool hasPTXASUnreachableBug() const {
-    return SmVersion < 70 && PTXVersion <= 74;
-  }
+  bool hasPTXASUnreachableBug() const { return PTXVersion <= 83; }
   bool hasCvtaParam() const { return SmVersion >= 70 && PTXVersion >= 77; }
   unsigned int getFullSmVersion() const { return FullSmVersion; }
   unsigned int getSmVersion() const { return getFullSmVersion() / 10; }

>From b88976bea0f69ea6490c9bd141af878a00031b14 Mon Sep 17 00:00:00 2001
From: Justin Fargnoli <jfargnoli at nvidia.com>
Date: Tue, 8 Oct 2024 15:52:01 -0700
Subject: [PATCH 4/7] Only emit exit for PTX v8.3+

---
 llvm/lib/Target/NVPTX/NVPTXInstrInfo.td |  5 ++++-
 llvm/lib/Target/NVPTX/NVPTXSubtarget.h  |  2 +-
 llvm/test/CodeGen/NVPTX/unreachable.ll  | 23 ++++++++++++++---------
 3 files changed, 19 insertions(+), 11 deletions(-)

diff --git a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
index 8f4eddb5142740..86eceed235ea55 100644
--- a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
+++ b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
@@ -139,6 +139,8 @@ def hasVote : Predicate<"Subtarget->hasVote()">;
 def hasDouble : Predicate<"Subtarget->hasDouble()">;
 def hasLDG : Predicate<"Subtarget->hasLDG()">;
 def hasLDU : Predicate<"Subtarget->hasLDU()">;
+def hasPTXASUnreachableBug : Predicate<"Subtarget->hasPTXASUnreachableBug()">;
+def noPTXASUnreachableBug : Predicate<"!Subtarget->hasPTXASUnreachableBug()">;
 
 def doF32FTZ : Predicate<"useF32FTZ()">;
 def doNoF32FTZ : Predicate<"!useF32FTZ()">;
@@ -3736,9 +3738,10 @@ def Callseq_End :
             [(callseq_end timm:$amt1, timm:$amt2)]>;
 
 // trap instruction
+def trapinst : NVPTXInst<(outs), (ins), "trap;", [(trap)]>, Requires<[noPTXASUnreachableBug]>;
 // Emit an `exit` as well to convey to ptxas that `trap` exits the CFG.
 // This won't be necessary in a future version of ptxas.
-def trapinst : NVPTXInst<(outs), (ins), "trap; exit;", [(trap)]>;
+def trapinstexit : NVPTXInst<(outs), (ins), "trap; exit;", [(trap)]>, Requires<[hasPTXASUnreachableBug]>;
 // brkpt instruction
 def debugtrapinst : NVPTXInst<(outs), (ins), "brkpt;", [(debugtrap)]>;
 
diff --git a/llvm/lib/Target/NVPTX/NVPTXSubtarget.h b/llvm/lib/Target/NVPTX/NVPTXSubtarget.h
index 2e6beeceb98614..49be13b2962c59 100644
--- a/llvm/lib/Target/NVPTX/NVPTXSubtarget.h
+++ b/llvm/lib/Target/NVPTX/NVPTXSubtarget.h
@@ -95,7 +95,7 @@ class NVPTXSubtarget : public NVPTXGenSubtargetInfo {
   bool hasDotInstructions() const {
     return SmVersion >= 61 && PTXVersion >= 50;
   }
-  bool hasPTXASUnreachableBug() const { return PTXVersion <= 83; }
+  bool hasPTXASUnreachableBug() const { return PTXVersion < 83; }
   bool hasCvtaParam() const { return SmVersion >= 70 && PTXVersion >= 77; }
   unsigned int getFullSmVersion() const { return FullSmVersion; }
   unsigned int getSmVersion() const { return getFullSmVersion() / 10; }
diff --git a/llvm/test/CodeGen/NVPTX/unreachable.ll b/llvm/test/CodeGen/NVPTX/unreachable.ll
index e6cb54fa16e1fe..e2d1079e4dc6ea 100644
--- a/llvm/test/CodeGen/NVPTX/unreachable.ll
+++ b/llvm/test/CodeGen/NVPTX/unreachable.ll
@@ -1,17 +1,17 @@
 ; RUN: llc < %s -march=nvptx -mcpu=sm_20 -verify-machineinstrs -trap-unreachable=false \
-; RUN:     | FileCheck %s  --check-prefix=CHECK --check-prefix=CHECK-NOTRAP
+; RUN:     | FileCheck %s  --check-prefixes=CHECK,CHECK-NOTRAP
 ; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 -verify-machineinstrs -trap-unreachable=false \
-; RUN:     | FileCheck %s  --check-prefix=CHECK --check-prefix=CHECK-NOTRAP
+; RUN:     | FileCheck %s  --check-prefixes=CHECK,CHECK-NOTRAP
 ; RUN: llc < %s -march=nvptx -mcpu=sm_20 -verify-machineinstrs -trap-unreachable -no-trap-after-noreturn \
-; RUN:     | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-NOTRAP
+; RUN:     | FileCheck %s --check-prefixes=CHECK,CHECK-NOTRAP
 ; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 -verify-machineinstrs -trap-unreachable -no-trap-after-noreturn \
-; RUN:     | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-NOTRAP
+; RUN:     | FileCheck %s --check-prefixes=CHECK,CHECK-NOTRAP
 ; RUN: llc < %s -march=nvptx -mcpu=sm_20 -verify-machineinstrs -trap-unreachable -no-trap-after-noreturn=false \
-; RUN:     | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-TRAP
+; RUN:     | FileCheck %s --check-prefixes=CHECK,CHECK-TRAP
 ; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 -verify-machineinstrs -trap-unreachable -no-trap-after-noreturn=false \
-; RUN:     | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-TRAP
-; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 -verify-machineinstrs -mattr=+ptx75 \
-; RUN:     | FileCheck %s  --check-prefixes=CHECK-BUG-FIXED
+; RUN:     | FileCheck %s --check-prefixes=CHECK,CHECK-TRAP
+; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 -verify-machineinstrs -trap-unreachable -mattr=+ptx83 \
+; RUN:     | FileCheck %s  --check-prefix=CHECK-BUG-FIXED
 ; RUN: %if ptxas && !ptxas-12.0 %{ llc < %s -march=nvptx -mcpu=sm_20 -verify-machineinstrs | %ptxas-verify %}
 ; RUN: %if ptxas %{ llc < %s -march=nvptx64 -mcpu=sm_20 -verify-machineinstrs | %ptxas-verify %}
 
@@ -26,21 +26,26 @@ define void @kernel_func() {
   call void @throw()
 ; CHECK-TRAP-NOT: exit;
 ; CHECK-TRAP: trap;
+
 ; CHECK-NOTRAP-NOT: trap;
 ; CHECK: exit;
+
 ; CHECK-BUG-FIXED-NOT: exit;
+; CHECK-BUG-FIXED: trap;
   unreachable
 }
 
 ; CHECK-LABEL: kernel_func_2
 define void @kernel_func_2() {
 ; CHECK: trap; exit;
-; CHECK-BUG-FIXED-NOT: exit;
+; CHECK-BUG-FIXED: trap;
+; CHECK-BUG-FIXED-NOT: trap; exit;
   call void @llvm.trap()
 
 ;; Make sure we avoid emitting two trap instructions.
 ; CHECK-NOT: trap;
 ; CHECK-NOT: exit;
+; CHECK-BUG-FIXED-NOT: trap; 
   unreachable
 }
 

>From 29acb3c163bb93194360cdfd1d090cbe43fa2909 Mon Sep 17 00:00:00 2001
From: Justin Fargnoli <jfargnoli at nvidia.com>
Date: Tue, 8 Oct 2024 16:04:38 -0700
Subject: [PATCH 5/7] Rename tablegen def

---
 llvm/lib/Target/NVPTX/NVPTXInstrInfo.td | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
index 86eceed235ea55..8b34ce4f1001c1 100644
--- a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
+++ b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
@@ -3741,7 +3741,7 @@ def Callseq_End :
 def trapinst : NVPTXInst<(outs), (ins), "trap;", [(trap)]>, Requires<[noPTXASUnreachableBug]>;
 // Emit an `exit` as well to convey to ptxas that `trap` exits the CFG.
 // This won't be necessary in a future version of ptxas.
-def trapinstexit : NVPTXInst<(outs), (ins), "trap; exit;", [(trap)]>, Requires<[hasPTXASUnreachableBug]>;
+def trapexitinst : NVPTXInst<(outs), (ins), "trap; exit;", [(trap)]>, Requires<[hasPTXASUnreachableBug]>;
 // brkpt instruction
 def debugtrapinst : NVPTXInst<(outs), (ins), "brkpt;", [(debugtrap)]>;
 

>From 328e12c94e186dc8474646d96355ce4e928e8943 Mon Sep 17 00:00:00 2001
From: Justin Fargnoli <jfargnoli at nvidia.com>
Date: Wed, 9 Oct 2024 19:08:35 -0700
Subject: [PATCH 6/7] Auto-generate checks

---
 llvm/test/CodeGen/NVPTX/unreachable.ll | 108 ++++++++++++++++++-------
 1 file changed, 80 insertions(+), 28 deletions(-)

diff --git a/llvm/test/CodeGen/NVPTX/unreachable.ll b/llvm/test/CodeGen/NVPTX/unreachable.ll
index e2d1079e4dc6ea..6bd583c8d50d8a 100644
--- a/llvm/test/CodeGen/NVPTX/unreachable.ll
+++ b/llvm/test/CodeGen/NVPTX/unreachable.ll
@@ -1,51 +1,103 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
 ; RUN: llc < %s -march=nvptx -mcpu=sm_20 -verify-machineinstrs -trap-unreachable=false \
-; RUN:     | FileCheck %s  --check-prefixes=CHECK,CHECK-NOTRAP
+; RUN:     | FileCheck %s --check-prefixes=CHECK,NO-TRAP-UNREACHABLE
 ; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 -verify-machineinstrs -trap-unreachable=false \
-; RUN:     | FileCheck %s  --check-prefixes=CHECK,CHECK-NOTRAP
+; RUN:     | FileCheck %s --check-prefixes=CHECK,NO-TRAP-UNREACHABLE
 ; RUN: llc < %s -march=nvptx -mcpu=sm_20 -verify-machineinstrs -trap-unreachable -no-trap-after-noreturn \
-; RUN:     | FileCheck %s --check-prefixes=CHECK,CHECK-NOTRAP
+; RUN:     | FileCheck %s --check-prefixes=CHECK,NO-TRAP-AFTER-NORETURN
 ; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 -verify-machineinstrs -trap-unreachable -no-trap-after-noreturn \
-; RUN:     | FileCheck %s --check-prefixes=CHECK,CHECK-NOTRAP
+; RUN:     | FileCheck %s --check-prefixes=CHECK,NO-TRAP-AFTER-NORETURN
 ; RUN: llc < %s -march=nvptx -mcpu=sm_20 -verify-machineinstrs -trap-unreachable -no-trap-after-noreturn=false \
-; RUN:     | FileCheck %s --check-prefixes=CHECK,CHECK-TRAP
+; RUN:     | FileCheck %s --check-prefixes=CHECK,TRAP
 ; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 -verify-machineinstrs -trap-unreachable -no-trap-after-noreturn=false \
-; RUN:     | FileCheck %s --check-prefixes=CHECK,CHECK-TRAP
+; RUN:     | FileCheck %s --check-prefixes=CHECK,TRAP
 ; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 -verify-machineinstrs -trap-unreachable -mattr=+ptx83 \
-; RUN:     | FileCheck %s  --check-prefix=CHECK-BUG-FIXED
+; RUN:     | FileCheck %s --check-prefixes=BUG-FIXED
 ; RUN: %if ptxas && !ptxas-12.0 %{ llc < %s -march=nvptx -mcpu=sm_20 -verify-machineinstrs | %ptxas-verify %}
 ; RUN: %if ptxas %{ llc < %s -march=nvptx64 -mcpu=sm_20 -verify-machineinstrs | %ptxas-verify %}
 
-; CHECK: .extern .func throw
+target triple = "nvptx-unknown-cuda"
+
 declare void @throw() #0
 declare void @llvm.trap() #0
 
-; CHECK-LABEL: .entry kernel_func
 define void @kernel_func() {
-; CHECK: call.uni
-; CHECK: throw,
+; NO-TRAP-UNREACHABLE-LABEL: kernel_func(
+; NO-TRAP-UNREACHABLE:       {
+; NO-TRAP-UNREACHABLE-EMPTY:
+; NO-TRAP-UNREACHABLE-EMPTY:
+; NO-TRAP-UNREACHABLE-NEXT:  // %bb.0:
+; NO-TRAP-UNREACHABLE-NEXT:    { // callseq 0, 0
+; NO-TRAP-UNREACHABLE-NEXT:    call.uni
+; NO-TRAP-UNREACHABLE-NEXT:    throw,
+; NO-TRAP-UNREACHABLE-NEXT:    (
+; NO-TRAP-UNREACHABLE-NEXT:    );
+; NO-TRAP-UNREACHABLE-NEXT:    } // callseq 0
+; NO-TRAP-UNREACHABLE-NEXT:    // begin inline asm
+; NO-TRAP-UNREACHABLE-NEXT:    exit;
+; NO-TRAP-UNREACHABLE-NEXT:    // end inline asm
+;
+; NO-TRAP-AFTER-NORETURN-LABEL: kernel_func(
+; NO-TRAP-AFTER-NORETURN:       {
+; NO-TRAP-AFTER-NORETURN-EMPTY:
+; NO-TRAP-AFTER-NORETURN-EMPTY:
+; NO-TRAP-AFTER-NORETURN-NEXT:  // %bb.0:
+; NO-TRAP-AFTER-NORETURN-NEXT:    { // callseq 0, 0
+; NO-TRAP-AFTER-NORETURN-NEXT:    call.uni
+; NO-TRAP-AFTER-NORETURN-NEXT:    throw,
+; NO-TRAP-AFTER-NORETURN-NEXT:    (
+; NO-TRAP-AFTER-NORETURN-NEXT:    );
+; NO-TRAP-AFTER-NORETURN-NEXT:    } // callseq 0
+; NO-TRAP-AFTER-NORETURN-NEXT:    // begin inline asm
+; NO-TRAP-AFTER-NORETURN-NEXT:    exit;
+; NO-TRAP-AFTER-NORETURN-NEXT:    // end inline asm
+; NO-TRAP-AFTER-NORETURN-NEXT:    trap; exit;
+;
+; TRAP-LABEL: kernel_func(
+; TRAP:       {
+; TRAP-EMPTY:
+; TRAP-EMPTY:
+; TRAP-NEXT:  // %bb.0:
+; TRAP-NEXT:    { // callseq 0, 0
+; TRAP-NEXT:    call.uni
+; TRAP-NEXT:    throw,
+; TRAP-NEXT:    (
+; TRAP-NEXT:    );
+; TRAP-NEXT:    } // callseq 0
+; TRAP-NEXT:    trap; exit;
+;
+; BUG-FIXED-LABEL: kernel_func(
+; BUG-FIXED:       {
+; BUG-FIXED-EMPTY:
+; BUG-FIXED-EMPTY:
+; BUG-FIXED-NEXT:  // %bb.0:
+; BUG-FIXED-NEXT:    { // callseq 0, 0
+; BUG-FIXED-NEXT:    call.uni
+; BUG-FIXED-NEXT:    throw,
+; BUG-FIXED-NEXT:    (
+; BUG-FIXED-NEXT:    );
+; BUG-FIXED-NEXT:    } // callseq 0
+; BUG-FIXED-NEXT:    trap;
   call void @throw()
-; CHECK-TRAP-NOT: exit;
-; CHECK-TRAP: trap;
-
-; CHECK-NOTRAP-NOT: trap;
-; CHECK: exit;
-
-; CHECK-BUG-FIXED-NOT: exit;
-; CHECK-BUG-FIXED: trap;
   unreachable
 }
 
-; CHECK-LABEL: kernel_func_2
 define void @kernel_func_2() {
-; CHECK: trap; exit;
-; CHECK-BUG-FIXED: trap;
-; CHECK-BUG-FIXED-NOT: trap; exit;
+; CHECK-LABEL: kernel_func_2(
+; CHECK:       {
+; CHECK-EMPTY:
+; CHECK-EMPTY:
+; CHECK-NEXT:  // %bb.0:
+; CHECK-NEXT:    trap; exit;
+;
+; BUG-FIXED-LABEL: kernel_func_2(
+; BUG-FIXED:       {
+; BUG-FIXED-EMPTY:
+; BUG-FIXED-EMPTY:
+; BUG-FIXED-NEXT:  // %bb.0:
+; BUG-FIXED-NEXT:    trap;
   call void @llvm.trap()
-
-;; Make sure we avoid emitting two trap instructions.
-; CHECK-NOT: trap;
-; CHECK-NOT: exit;
-; CHECK-BUG-FIXED-NOT: trap; 
+; Make sure we avoid emitting two trap instructions.
   unreachable
 }
 

>From 5d2902dde6a611b6dc531a0c747c596415326e30 Mon Sep 17 00:00:00 2001
From: Justin Fargnoli <jfargnoli at nvidia.com>
Date: Wed, 9 Oct 2024 19:19:00 -0700
Subject: [PATCH 7/7] Add comment

---
 llvm/lib/Target/NVPTX/NVPTXSubtarget.h | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/llvm/lib/Target/NVPTX/NVPTXSubtarget.h b/llvm/lib/Target/NVPTX/NVPTXSubtarget.h
index 49be13b2962c59..e785bbf830da62 100644
--- a/llvm/lib/Target/NVPTX/NVPTXSubtarget.h
+++ b/llvm/lib/Target/NVPTX/NVPTXSubtarget.h
@@ -95,6 +95,13 @@ class NVPTXSubtarget : public NVPTXGenSubtargetInfo {
   bool hasDotInstructions() const {
     return SmVersion >= 61 && PTXVersion >= 50;
   }
+  // Prior to CUDA 12.3 ptxas did not recognize that the trap instruction
+  // terminates a basic block. Instead, it would assume that control flow
+  // continued to the next instruction. The next instruction could be in the
+  // block that's lexically below it. This would lead to a phantom CFG edges
+  // being created within ptxas. This issue was fixed in CUDA 12.3. Thus, when
+  // PTX ISA versions 8.3+ we can confidently say that the bug will not be
+  // present.
   bool hasPTXASUnreachableBug() const { return PTXVersion < 83; }
   bool hasCvtaParam() const { return SmVersion >= 70 && PTXVersion >= 77; }
   unsigned int getFullSmVersion() const { return FullSmVersion; }



More information about the llvm-commits mailing list