[llvm] [NVPTX] Only run LowerUnreachable when necessary (PR #109868)

Justin Fargnoli via llvm-commits llvm-commits at lists.llvm.org
Tue Oct 8 15:52:44 PDT 2024


https://github.com/justinfargnoli updated https://github.com/llvm/llvm-project/pull/109868

>From 58ffde6db94d40c8db18db52d9ae2a8d938d3095 Mon Sep 17 00:00:00 2001
From: Justin Fargnoli <jfargnoli at nvidia.com>
Date: Tue, 24 Sep 2024 12:39:36 -0700
Subject: [PATCH 1/4] [NVPTX] Only run LowerUnreachable on PTX version <= 7.4

---
 llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp | 12 +++++++++---
 llvm/test/CodeGen/NVPTX/unreachable.ll       |  6 ++++--
 2 files changed, 13 insertions(+), 5 deletions(-)

diff --git a/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp b/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp
index 57b7fa783c14a7..a8aa2be499607a 100644
--- a/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp
@@ -368,9 +368,15 @@ void NVPTXPassConfig::addIRPasses() {
     addPass(createSROAPass());
   }
 
-  const auto &Options = getNVPTXTargetMachine().Options;
-  addPass(createNVPTXLowerUnreachablePass(Options.TrapUnreachable,
-                                          Options.NoTrapAfterNoreturn));
+  if (ST.getPTXVersion() <= 74) {
+    // This pass is a WAR for a bug that's present in `ptxas` binaries that are
+    // shipped in or prior to CUDA Toolkit 11.4. The highest version that's
+    // supported by `ptxas` in CUDA 11.4 is 7.4. Limit this pass to only run
+    // when targeting PTX 7.4 or lower.
+    const auto &Options = getNVPTXTargetMachine().Options;
+    addPass(createNVPTXLowerUnreachablePass(Options.TrapUnreachable,
+                                            Options.NoTrapAfterNoreturn));
+  }
 }
 
 bool NVPTXPassConfig::addInstSelector() {
diff --git a/llvm/test/CodeGen/NVPTX/unreachable.ll b/llvm/test/CodeGen/NVPTX/unreachable.ll
index f9118900cb7372..e6cb54fa16e1fe 100644
--- a/llvm/test/CodeGen/NVPTX/unreachable.ll
+++ b/llvm/test/CodeGen/NVPTX/unreachable.ll
@@ -10,6 +10,8 @@
 ; RUN:     | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-TRAP
 ; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 -verify-machineinstrs -trap-unreachable -no-trap-after-noreturn=false \
 ; RUN:     | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-TRAP
+; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 -verify-machineinstrs -mattr=+ptx75 \
+; RUN:     | FileCheck %s  --check-prefixes=CHECK-BUG-FIXED
 ; RUN: %if ptxas && !ptxas-12.0 %{ llc < %s -march=nvptx -mcpu=sm_20 -verify-machineinstrs | %ptxas-verify %}
 ; RUN: %if ptxas %{ llc < %s -march=nvptx64 -mcpu=sm_20 -verify-machineinstrs | %ptxas-verify %}
 
@@ -26,12 +28,14 @@ define void @kernel_func() {
 ; CHECK-TRAP: trap;
 ; CHECK-NOTRAP-NOT: trap;
 ; CHECK: exit;
+; CHECK-BUG-FIXED-NOT: exit;
   unreachable
 }
 
 ; CHECK-LABEL: kernel_func_2
 define void @kernel_func_2() {
 ; CHECK: trap; exit;
+; CHECK-BUG-FIXED-NOT: exit;
   call void @llvm.trap()
 
 ;; Make sure we avoid emitting two trap instructions.
@@ -42,7 +46,5 @@ define void @kernel_func_2() {
 
 attributes #0 = { noreturn }
 
-
 !nvvm.annotations = !{!1}
-
 !1 = !{ptr @kernel_func, !"kernel", i32 1}

>From 5424a9dcc85cdec1d5ebebcc76d1e41e28d65a2a Mon Sep 17 00:00:00 2001
From: Justin Fargnoli <jfargnoli at nvidia.com>
Date: Tue, 24 Sep 2024 13:21:52 -0700
Subject: [PATCH 2/4] Also check wether we're targeting Pascal or prior

---
 llvm/lib/Target/NVPTX/NVPTXSubtarget.h       | 3 +++
 llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp | 8 +++-----
 2 files changed, 6 insertions(+), 5 deletions(-)

diff --git a/llvm/lib/Target/NVPTX/NVPTXSubtarget.h b/llvm/lib/Target/NVPTX/NVPTXSubtarget.h
index 8b9059bd60cbd4..e2ce088cacdf53 100644
--- a/llvm/lib/Target/NVPTX/NVPTXSubtarget.h
+++ b/llvm/lib/Target/NVPTX/NVPTXSubtarget.h
@@ -95,6 +95,9 @@ class NVPTXSubtarget : public NVPTXGenSubtargetInfo {
   bool hasDotInstructions() const {
     return SmVersion >= 61 && PTXVersion >= 50;
   }
+  bool hasPTXASUnreachableBug() const {
+    return SmVersion < 70 && PTXVersion <= 74;
+  }
   bool hasCvtaParam() const { return SmVersion >= 70 && PTXVersion >= 77; }
   unsigned int getFullSmVersion() const { return FullSmVersion; }
   unsigned int getSmVersion() const { return getFullSmVersion() / 10; }
diff --git a/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp b/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp
index a8aa2be499607a..b79b4ff93efe49 100644
--- a/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp
@@ -368,11 +368,9 @@ void NVPTXPassConfig::addIRPasses() {
     addPass(createSROAPass());
   }
 
-  if (ST.getPTXVersion() <= 74) {
-    // This pass is a WAR for a bug that's present in `ptxas` binaries that are
-    // shipped in or prior to CUDA Toolkit 11.4. The highest version that's
-    // supported by `ptxas` in CUDA 11.4 is 7.4. Limit this pass to only run
-    // when targeting PTX 7.4 or lower.
+  if (ST.hasPTXASUnreachableBug()) {
+    // Run LowerUnreachable to WAR a ptxas bug. See the commit description of
+    // 1ee4d880e8760256c606fe55b7af85a4f70d006d for more details.
     const auto &Options = getNVPTXTargetMachine().Options;
     addPass(createNVPTXLowerUnreachablePass(Options.TrapUnreachable,
                                             Options.NoTrapAfterNoreturn));

>From 529062c32b270e6c665e04b71859b3393cebc6a0 Mon Sep 17 00:00:00 2001
From: Justin Fargnoli <jfargnoli at nvidia.com>
Date: Tue, 8 Oct 2024 12:35:44 -0700
Subject: [PATCH 3/4] PTXAS only has the bug prior to CUDA 12.8 / PTX v8.2

---
 llvm/lib/Target/NVPTX/NVPTXSubtarget.h | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/llvm/lib/Target/NVPTX/NVPTXSubtarget.h b/llvm/lib/Target/NVPTX/NVPTXSubtarget.h
index e2ce088cacdf53..2e6beeceb98614 100644
--- a/llvm/lib/Target/NVPTX/NVPTXSubtarget.h
+++ b/llvm/lib/Target/NVPTX/NVPTXSubtarget.h
@@ -95,9 +95,7 @@ class NVPTXSubtarget : public NVPTXGenSubtargetInfo {
   bool hasDotInstructions() const {
     return SmVersion >= 61 && PTXVersion >= 50;
   }
-  bool hasPTXASUnreachableBug() const {
-    return SmVersion < 70 && PTXVersion <= 74;
-  }
+  bool hasPTXASUnreachableBug() const { return PTXVersion <= 83; }
   bool hasCvtaParam() const { return SmVersion >= 70 && PTXVersion >= 77; }
   unsigned int getFullSmVersion() const { return FullSmVersion; }
   unsigned int getSmVersion() const { return getFullSmVersion() / 10; }

>From b88976bea0f69ea6490c9bd141af878a00031b14 Mon Sep 17 00:00:00 2001
From: Justin Fargnoli <jfargnoli at nvidia.com>
Date: Tue, 8 Oct 2024 15:52:01 -0700
Subject: [PATCH 4/4] Only emit exit for PTX v8.3+

---
 llvm/lib/Target/NVPTX/NVPTXInstrInfo.td |  5 ++++-
 llvm/lib/Target/NVPTX/NVPTXSubtarget.h  |  2 +-
 llvm/test/CodeGen/NVPTX/unreachable.ll  | 23 ++++++++++++++---------
 3 files changed, 19 insertions(+), 11 deletions(-)

diff --git a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
index 8f4eddb5142740..86eceed235ea55 100644
--- a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
+++ b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
@@ -139,6 +139,8 @@ def hasVote : Predicate<"Subtarget->hasVote()">;
 def hasDouble : Predicate<"Subtarget->hasDouble()">;
 def hasLDG : Predicate<"Subtarget->hasLDG()">;
 def hasLDU : Predicate<"Subtarget->hasLDU()">;
+def hasPTXASUnreachableBug : Predicate<"Subtarget->hasPTXASUnreachableBug()">;
+def noPTXASUnreachableBug : Predicate<"!Subtarget->hasPTXASUnreachableBug()">;
 
 def doF32FTZ : Predicate<"useF32FTZ()">;
 def doNoF32FTZ : Predicate<"!useF32FTZ()">;
@@ -3736,9 +3738,10 @@ def Callseq_End :
             [(callseq_end timm:$amt1, timm:$amt2)]>;
 
 // trap instruction
+def trapinst : NVPTXInst<(outs), (ins), "trap;", [(trap)]>, Requires<[noPTXASUnreachableBug]>;
 // Emit an `exit` as well to convey to ptxas that `trap` exits the CFG.
 // This won't be necessary in a future version of ptxas.
-def trapinst : NVPTXInst<(outs), (ins), "trap; exit;", [(trap)]>;
+def trapinstexit : NVPTXInst<(outs), (ins), "trap; exit;", [(trap)]>, Requires<[hasPTXASUnreachableBug]>;
 // brkpt instruction
 def debugtrapinst : NVPTXInst<(outs), (ins), "brkpt;", [(debugtrap)]>;
 
diff --git a/llvm/lib/Target/NVPTX/NVPTXSubtarget.h b/llvm/lib/Target/NVPTX/NVPTXSubtarget.h
index 2e6beeceb98614..49be13b2962c59 100644
--- a/llvm/lib/Target/NVPTX/NVPTXSubtarget.h
+++ b/llvm/lib/Target/NVPTX/NVPTXSubtarget.h
@@ -95,7 +95,7 @@ class NVPTXSubtarget : public NVPTXGenSubtargetInfo {
   bool hasDotInstructions() const {
     return SmVersion >= 61 && PTXVersion >= 50;
   }
-  bool hasPTXASUnreachableBug() const { return PTXVersion <= 83; }
+  bool hasPTXASUnreachableBug() const { return PTXVersion < 83; }
   bool hasCvtaParam() const { return SmVersion >= 70 && PTXVersion >= 77; }
   unsigned int getFullSmVersion() const { return FullSmVersion; }
   unsigned int getSmVersion() const { return getFullSmVersion() / 10; }
diff --git a/llvm/test/CodeGen/NVPTX/unreachable.ll b/llvm/test/CodeGen/NVPTX/unreachable.ll
index e6cb54fa16e1fe..e2d1079e4dc6ea 100644
--- a/llvm/test/CodeGen/NVPTX/unreachable.ll
+++ b/llvm/test/CodeGen/NVPTX/unreachable.ll
@@ -1,17 +1,17 @@
 ; RUN: llc < %s -march=nvptx -mcpu=sm_20 -verify-machineinstrs -trap-unreachable=false \
-; RUN:     | FileCheck %s  --check-prefix=CHECK --check-prefix=CHECK-NOTRAP
+; RUN:     | FileCheck %s  --check-prefixes=CHECK,CHECK-NOTRAP
 ; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 -verify-machineinstrs -trap-unreachable=false \
-; RUN:     | FileCheck %s  --check-prefix=CHECK --check-prefix=CHECK-NOTRAP
+; RUN:     | FileCheck %s  --check-prefixes=CHECK,CHECK-NOTRAP
 ; RUN: llc < %s -march=nvptx -mcpu=sm_20 -verify-machineinstrs -trap-unreachable -no-trap-after-noreturn \
-; RUN:     | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-NOTRAP
+; RUN:     | FileCheck %s --check-prefixes=CHECK,CHECK-NOTRAP
 ; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 -verify-machineinstrs -trap-unreachable -no-trap-after-noreturn \
-; RUN:     | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-NOTRAP
+; RUN:     | FileCheck %s --check-prefixes=CHECK,CHECK-NOTRAP
 ; RUN: llc < %s -march=nvptx -mcpu=sm_20 -verify-machineinstrs -trap-unreachable -no-trap-after-noreturn=false \
-; RUN:     | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-TRAP
+; RUN:     | FileCheck %s --check-prefixes=CHECK,CHECK-TRAP
 ; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 -verify-machineinstrs -trap-unreachable -no-trap-after-noreturn=false \
-; RUN:     | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-TRAP
-; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 -verify-machineinstrs -mattr=+ptx75 \
-; RUN:     | FileCheck %s  --check-prefixes=CHECK-BUG-FIXED
+; RUN:     | FileCheck %s --check-prefixes=CHECK,CHECK-TRAP
+; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 -verify-machineinstrs -trap-unreachable -mattr=+ptx83 \
+; RUN:     | FileCheck %s  --check-prefix=CHECK-BUG-FIXED
 ; RUN: %if ptxas && !ptxas-12.0 %{ llc < %s -march=nvptx -mcpu=sm_20 -verify-machineinstrs | %ptxas-verify %}
 ; RUN: %if ptxas %{ llc < %s -march=nvptx64 -mcpu=sm_20 -verify-machineinstrs | %ptxas-verify %}
 
@@ -26,21 +26,26 @@ define void @kernel_func() {
   call void @throw()
 ; CHECK-TRAP-NOT: exit;
 ; CHECK-TRAP: trap;
+
 ; CHECK-NOTRAP-NOT: trap;
 ; CHECK: exit;
+
 ; CHECK-BUG-FIXED-NOT: exit;
+; CHECK-BUG-FIXED: trap;
   unreachable
 }
 
 ; CHECK-LABEL: kernel_func_2
 define void @kernel_func_2() {
 ; CHECK: trap; exit;
-; CHECK-BUG-FIXED-NOT: exit;
+; CHECK-BUG-FIXED: trap;
+; CHECK-BUG-FIXED-NOT: trap; exit;
   call void @llvm.trap()
 
 ;; Make sure we avoid emitting two trap instructions.
 ; CHECK-NOT: trap;
 ; CHECK-NOT: exit;
+; CHECK-BUG-FIXED-NOT: trap; 
   unreachable
 }
 



More information about the llvm-commits mailing list