[llvm] d832a1c - [NVPTX] Only run LowerUnreachable when necessary (#109868)
via llvm-commits
llvm-commits at lists.llvm.org
Thu Oct 10 12:57:46 PDT 2024
Author: Justin Fargnoli
Date: 2024-10-10T12:57:43-07:00
New Revision: d832a1c744fddad93ec4d8d2739c2a49a3623e02
URL: https://github.com/llvm/llvm-project/commit/d832a1c744fddad93ec4d8d2739c2a49a3623e02
DIFF: https://github.com/llvm/llvm-project/commit/d832a1c744fddad93ec4d8d2739c2a49a3623e02.diff
LOG: [NVPTX] Only run LowerUnreachable when necessary (#109868)
Before CUDA 12.3 `ptxas` did not recognize that the trap instruction
terminates a basic block. Instead, it would assume that control flow
continued to the next instruction. The next instruction could be in the
block that's lexically below it. This would lead to phantom CFG edges
being created within ptxas.
[NVPTX: Lower unreachable to exit to allow ptxas to accurately
reconstruct the
CFG.](https://github.com/llvm/llvm-project/commit/1ee4d880e8760256c606fe55b7af85a4f70d006d)
added the LowerUnreachable pass to NVPTX to work around this. Several
other WAR patches followed.
This bug in `ptxas` was fixed in CUDA 12.3 and is thus impossible to
encounter when targeting PTX ISA v8.3+
This commit reverts the WARs for the `ptxas` bug when targeting PTX ISA
v8.3+
CC @maleadt
Added:
Modified:
llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
llvm/lib/Target/NVPTX/NVPTXSubtarget.h
llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp
llvm/test/CodeGen/NVPTX/unreachable.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
index 8f4eddb5142740..8b34ce4f1001c1 100644
--- a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
+++ b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
@@ -139,6 +139,8 @@ def hasVote : Predicate<"Subtarget->hasVote()">;
def hasDouble : Predicate<"Subtarget->hasDouble()">;
def hasLDG : Predicate<"Subtarget->hasLDG()">;
def hasLDU : Predicate<"Subtarget->hasLDU()">;
+def hasPTXASUnreachableBug : Predicate<"Subtarget->hasPTXASUnreachableBug()">;
+def noPTXASUnreachableBug : Predicate<"!Subtarget->hasPTXASUnreachableBug()">;
def doF32FTZ : Predicate<"useF32FTZ()">;
def doNoF32FTZ : Predicate<"!useF32FTZ()">;
@@ -3736,9 +3738,10 @@ def Callseq_End :
[(callseq_end timm:$amt1, timm:$amt2)]>;
// trap instruction
+def trapinst : NVPTXInst<(outs), (ins), "trap;", [(trap)]>, Requires<[noPTXASUnreachableBug]>;
// Emit an `exit` as well to convey to ptxas that `trap` exits the CFG.
// This won't be necessary in a future version of ptxas.
-def trapinst : NVPTXInst<(outs), (ins), "trap; exit;", [(trap)]>;
+def trapexitinst : NVPTXInst<(outs), (ins), "trap; exit;", [(trap)]>, Requires<[hasPTXASUnreachableBug]>;
// brkpt instruction
def debugtrapinst : NVPTXInst<(outs), (ins), "brkpt;", [(debugtrap)]>;
diff --git a/llvm/lib/Target/NVPTX/NVPTXSubtarget.h b/llvm/lib/Target/NVPTX/NVPTXSubtarget.h
index 8b9059bd60cbd4..e785bbf830da62 100644
--- a/llvm/lib/Target/NVPTX/NVPTXSubtarget.h
+++ b/llvm/lib/Target/NVPTX/NVPTXSubtarget.h
@@ -95,6 +95,14 @@ class NVPTXSubtarget : public NVPTXGenSubtargetInfo {
bool hasDotInstructions() const {
return SmVersion >= 61 && PTXVersion >= 50;
}
+ // Prior to CUDA 12.3 ptxas did not recognize that the trap instruction
+ // terminates a basic block. Instead, it would assume that control flow
+ // continued to the next instruction. The next instruction could be in the
+ // block that's lexically below it. This would lead to a phantom CFG edges
+ // being created within ptxas. This issue was fixed in CUDA 12.3. Thus, when
+ // PTX ISA versions 8.3+ we can confidently say that the bug will not be
+ // present.
+ bool hasPTXASUnreachableBug() const { return PTXVersion < 83; }
bool hasCvtaParam() const { return SmVersion >= 70 && PTXVersion >= 77; }
unsigned int getFullSmVersion() const { return FullSmVersion; }
unsigned int getSmVersion() const { return getFullSmVersion() / 10; }
diff --git a/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp b/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp
index 8e6e4395efb559..2eb8b17f1b0f40 100644
--- a/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp
@@ -367,9 +367,13 @@ void NVPTXPassConfig::addIRPasses() {
addPass(createSROAPass());
}
- const auto &Options = getNVPTXTargetMachine().Options;
- addPass(createNVPTXLowerUnreachablePass(Options.TrapUnreachable,
- Options.NoTrapAfterNoreturn));
+ if (ST.hasPTXASUnreachableBug()) {
+ // Run LowerUnreachable to WAR a ptxas bug. See the commit description of
+ // 1ee4d880e8760256c606fe55b7af85a4f70d006d for more details.
+ const auto &Options = getNVPTXTargetMachine().Options;
+ addPass(createNVPTXLowerUnreachablePass(Options.TrapUnreachable,
+ Options.NoTrapAfterNoreturn));
+ }
}
bool NVPTXPassConfig::addInstSelector() {
diff --git a/llvm/test/CodeGen/NVPTX/unreachable.ll b/llvm/test/CodeGen/NVPTX/unreachable.ll
index f9118900cb7372..6bd583c8d50d8a 100644
--- a/llvm/test/CodeGen/NVPTX/unreachable.ll
+++ b/llvm/test/CodeGen/NVPTX/unreachable.ll
@@ -1,48 +1,107 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; RUN: llc < %s -march=nvptx -mcpu=sm_20 -verify-machineinstrs -trap-unreachable=false \
-; RUN: | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-NOTRAP
+; RUN: | FileCheck %s --check-prefixes=CHECK,NO-TRAP-UNREACHABLE
; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 -verify-machineinstrs -trap-unreachable=false \
-; RUN: | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-NOTRAP
+; RUN: | FileCheck %s --check-prefixes=CHECK,NO-TRAP-UNREACHABLE
; RUN: llc < %s -march=nvptx -mcpu=sm_20 -verify-machineinstrs -trap-unreachable -no-trap-after-noreturn \
-; RUN: | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-NOTRAP
+; RUN: | FileCheck %s --check-prefixes=CHECK,NO-TRAP-AFTER-NORETURN
; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 -verify-machineinstrs -trap-unreachable -no-trap-after-noreturn \
-; RUN: | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-NOTRAP
+; RUN: | FileCheck %s --check-prefixes=CHECK,NO-TRAP-AFTER-NORETURN
; RUN: llc < %s -march=nvptx -mcpu=sm_20 -verify-machineinstrs -trap-unreachable -no-trap-after-noreturn=false \
-; RUN: | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-TRAP
+; RUN: | FileCheck %s --check-prefixes=CHECK,TRAP
; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 -verify-machineinstrs -trap-unreachable -no-trap-after-noreturn=false \
-; RUN: | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-TRAP
+; RUN: | FileCheck %s --check-prefixes=CHECK,TRAP
+; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 -verify-machineinstrs -trap-unreachable -mattr=+ptx83 \
+; RUN: | FileCheck %s --check-prefixes=BUG-FIXED
; RUN: %if ptxas && !ptxas-12.0 %{ llc < %s -march=nvptx -mcpu=sm_20 -verify-machineinstrs | %ptxas-verify %}
; RUN: %if ptxas %{ llc < %s -march=nvptx64 -mcpu=sm_20 -verify-machineinstrs | %ptxas-verify %}
-; CHECK: .extern .func throw
+target triple = "nvptx-unknown-cuda"
+
declare void @throw() #0
declare void @llvm.trap() #0
-; CHECK-LABEL: .entry kernel_func
define void @kernel_func() {
-; CHECK: call.uni
-; CHECK: throw,
+; NO-TRAP-UNREACHABLE-LABEL: kernel_func(
+; NO-TRAP-UNREACHABLE: {
+; NO-TRAP-UNREACHABLE-EMPTY:
+; NO-TRAP-UNREACHABLE-EMPTY:
+; NO-TRAP-UNREACHABLE-NEXT: // %bb.0:
+; NO-TRAP-UNREACHABLE-NEXT: { // callseq 0, 0
+; NO-TRAP-UNREACHABLE-NEXT: call.uni
+; NO-TRAP-UNREACHABLE-NEXT: throw,
+; NO-TRAP-UNREACHABLE-NEXT: (
+; NO-TRAP-UNREACHABLE-NEXT: );
+; NO-TRAP-UNREACHABLE-NEXT: } // callseq 0
+; NO-TRAP-UNREACHABLE-NEXT: // begin inline asm
+; NO-TRAP-UNREACHABLE-NEXT: exit;
+; NO-TRAP-UNREACHABLE-NEXT: // end inline asm
+;
+; NO-TRAP-AFTER-NORETURN-LABEL: kernel_func(
+; NO-TRAP-AFTER-NORETURN: {
+; NO-TRAP-AFTER-NORETURN-EMPTY:
+; NO-TRAP-AFTER-NORETURN-EMPTY:
+; NO-TRAP-AFTER-NORETURN-NEXT: // %bb.0:
+; NO-TRAP-AFTER-NORETURN-NEXT: { // callseq 0, 0
+; NO-TRAP-AFTER-NORETURN-NEXT: call.uni
+; NO-TRAP-AFTER-NORETURN-NEXT: throw,
+; NO-TRAP-AFTER-NORETURN-NEXT: (
+; NO-TRAP-AFTER-NORETURN-NEXT: );
+; NO-TRAP-AFTER-NORETURN-NEXT: } // callseq 0
+; NO-TRAP-AFTER-NORETURN-NEXT: // begin inline asm
+; NO-TRAP-AFTER-NORETURN-NEXT: exit;
+; NO-TRAP-AFTER-NORETURN-NEXT: // end inline asm
+; NO-TRAP-AFTER-NORETURN-NEXT: trap; exit;
+;
+; TRAP-LABEL: kernel_func(
+; TRAP: {
+; TRAP-EMPTY:
+; TRAP-EMPTY:
+; TRAP-NEXT: // %bb.0:
+; TRAP-NEXT: { // callseq 0, 0
+; TRAP-NEXT: call.uni
+; TRAP-NEXT: throw,
+; TRAP-NEXT: (
+; TRAP-NEXT: );
+; TRAP-NEXT: } // callseq 0
+; TRAP-NEXT: trap; exit;
+;
+; BUG-FIXED-LABEL: kernel_func(
+; BUG-FIXED: {
+; BUG-FIXED-EMPTY:
+; BUG-FIXED-EMPTY:
+; BUG-FIXED-NEXT: // %bb.0:
+; BUG-FIXED-NEXT: { // callseq 0, 0
+; BUG-FIXED-NEXT: call.uni
+; BUG-FIXED-NEXT: throw,
+; BUG-FIXED-NEXT: (
+; BUG-FIXED-NEXT: );
+; BUG-FIXED-NEXT: } // callseq 0
+; BUG-FIXED-NEXT: trap;
call void @throw()
-; CHECK-TRAP-NOT: exit;
-; CHECK-TRAP: trap;
-; CHECK-NOTRAP-NOT: trap;
-; CHECK: exit;
unreachable
}
-; CHECK-LABEL: kernel_func_2
define void @kernel_func_2() {
-; CHECK: trap; exit;
+; CHECK-LABEL: kernel_func_2(
+; CHECK: {
+; CHECK-EMPTY:
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: trap; exit;
+;
+; BUG-FIXED-LABEL: kernel_func_2(
+; BUG-FIXED: {
+; BUG-FIXED-EMPTY:
+; BUG-FIXED-EMPTY:
+; BUG-FIXED-NEXT: // %bb.0:
+; BUG-FIXED-NEXT: trap;
call void @llvm.trap()
-
-;; Make sure we avoid emitting two trap instructions.
-; CHECK-NOT: trap;
-; CHECK-NOT: exit;
+; Make sure we avoid emitting two trap instructions.
unreachable
}
attributes #0 = { noreturn }
-
!nvvm.annotations = !{!1}
-
!1 = !{ptr @kernel_func, !"kernel", i32 1}
More information about the llvm-commits
mailing list