[llvm] [NVPTX] Fixup and refactor brx.idx support (PR #171933)
Alex MacLean via llvm-commits
llvm-commits at lists.llvm.org
Thu Dec 11 15:56:47 PST 2025
https://github.com/AlexMaclean created https://github.com/llvm/llvm-project/pull/171933
None
>From eb1adaa88c12f2fe5be1c1385939148cacd67b70 Mon Sep 17 00:00:00 2001
From: Alex Maclean <amaclean at nvidia.com>
Date: Thu, 11 Dec 2025 23:54:42 +0000
Subject: [PATCH] [NVPTX] Fixup and refactor brx.idx support
---
llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp | 38 +++
llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.h | 1 +
llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp | 34 +--
llvm/lib/Target/NVPTX/NVPTXISelLowering.h | 2 -
llvm/lib/Target/NVPTX/NVPTXInstrInfo.td | 25 +-
llvm/lib/Target/NVPTX/NVPTXSubtarget.h | 1 +
llvm/test/CodeGen/NVPTX/jump-table.ll | 278 ++++++++++++++------
llvm/test/CodeGen/NVPTX/switch.ll | 2 +-
8 files changed, 238 insertions(+), 143 deletions(-)
diff --git a/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp b/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
index 0e1125ab8d8b3..99982ff3181b3 100644
--- a/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
@@ -16,6 +16,7 @@
#include "llvm/ADT/APInt.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/CodeGen/ISDOpcodes.h"
+#include "llvm/CodeGen/MachineJumpTableInfo.h"
#include "llvm/CodeGen/SelectionDAG.h"
#include "llvm/CodeGen/SelectionDAGNodes.h"
#include "llvm/IR/GlobalValue.h"
@@ -190,6 +191,8 @@ void NVPTXDAGToDAGISel::Select(SDNode *N) {
if (tryBF16ArithToFMA(N))
return;
break;
+ case ISD::BR_JT:
+ return selectBR_JT(N);
default:
break;
}
@@ -2273,3 +2276,38 @@ void NVPTXDAGToDAGISel::selectAtomicSwap128(SDNode *N) {
ReplaceNode(N, ATOM);
}
+
+void NVPTXDAGToDAGISel::selectBR_JT(SDNode *N) {
+ assert(Subtarget->hasBrx() &&
+ "BR_JT should be expanded during legalization on unsupported targets");
+
+ SDLoc DL(N);
+ const SDValue InChain = N->getOperand(0);
+ const auto *JT = cast<JumpTableSDNode>(N->getOperand(1));
+ const SDValue Index = N->getOperand(2);
+
+ unsigned JId = JT->getIndex();
+ MachineJumpTableInfo *MJTI = CurDAG->getMachineFunction().getJumpTableInfo();
+ ArrayRef<MachineBasicBlock *> MBBs = MJTI->getJumpTables()[JId].MBBs;
+
+ SDValue IdV = getI32Imm(JId, DL);
+
+ // Generate BrxStart node
+ MachineSDNode *Chain = CurDAG->getMachineNode(
+ NVPTX::BRX_START, DL, {MVT::Other, MVT::Glue}, {IdV, InChain});
+
+ // Generate BrxItem nodes
+ assert(!MBBs.empty());
+ for (MachineBasicBlock *MBB : MBBs.drop_back())
+ Chain = CurDAG->getMachineNode(
+ NVPTX::BRX_ITEM, DL, {MVT::Other, MVT::Glue},
+ {CurDAG->getBasicBlock(MBB), SDValue(Chain, 0), SDValue(Chain, 1)});
+
+ // Generate BrxEnd nodes
+ MachineSDNode *BrxEnd =
+ CurDAG->getMachineNode(NVPTX::BRX_END, DL, MVT::Other,
+ {CurDAG->getBasicBlock(MBBs.back()), Index, IdV,
+ SDValue(Chain, 0), SDValue(Chain, 1)});
+
+ ReplaceNode(N, BrxEnd);
+}
\ No newline at end of file
diff --git a/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.h b/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.h
index 055f1ff47306d..fcb5700dcb6d4 100644
--- a/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.h
+++ b/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.h
@@ -93,6 +93,7 @@ class LLVM_LIBRARY_VISIBILITY NVPTXDAGToDAGISel : public SelectionDAGISel {
void SelectTcgen05Ld(SDNode *N, bool hasOffset = false);
void SelectTcgen05St(SDNode *N, bool hasOffset = false);
void selectAtomicSwap128(SDNode *N);
+ void selectBR_JT(SDNode *N);
inline SDValue getI32Imm(unsigned Imm, const SDLoc &DL) {
return CurDAG->getTargetConstant(Imm, DL, MVT::i32);
diff --git a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
index b88978a50ac16..92f3865818530 100644
--- a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
@@ -713,7 +713,7 @@ NVPTXTargetLowering::NVPTXTargetLowering(const NVPTXTargetMachine &TM,
Custom);
}
- setOperationAction(ISD::BR_JT, MVT::Other, Custom);
+ setOperationAction(ISD::BR_JT, MVT::Other, STI.hasBrx() ? Legal : Expand);
setOperationAction(ISD::BRIND, MVT::Other, Expand);
// We want to legalize constant related memmove and memcopy
@@ -3281,8 +3281,6 @@ NVPTXTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
return LowerFP_ROUND(Op, DAG);
case ISD::FP_EXTEND:
return LowerFP_EXTEND(Op, DAG);
- case ISD::BR_JT:
- return LowerBR_JT(Op, DAG);
case ISD::VAARG:
return LowerVAARG(Op, DAG);
case ISD::VASTART:
@@ -3330,36 +3328,6 @@ NVPTXTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
}
}
-SDValue NVPTXTargetLowering::LowerBR_JT(SDValue Op, SelectionDAG &DAG) const {
- SDLoc DL(Op);
- SDValue Chain = Op.getOperand(0);
- const auto *JT = cast<JumpTableSDNode>(Op.getOperand(1));
- SDValue Index = Op.getOperand(2);
-
- unsigned JId = JT->getIndex();
- MachineJumpTableInfo *MJTI = DAG.getMachineFunction().getJumpTableInfo();
- ArrayRef<MachineBasicBlock *> MBBs = MJTI->getJumpTables()[JId].MBBs;
-
- SDValue IdV = DAG.getConstant(JId, DL, MVT::i32);
-
- // Generate BrxStart node
- SDVTList VTs = DAG.getVTList(MVT::Other, MVT::Glue);
- Chain = DAG.getNode(NVPTXISD::BrxStart, DL, VTs, Chain, IdV);
-
- // Generate BrxItem nodes
- assert(!MBBs.empty());
- for (MachineBasicBlock *MBB : MBBs.drop_back())
- Chain = DAG.getNode(NVPTXISD::BrxItem, DL, VTs, Chain.getValue(0),
- DAG.getBasicBlock(MBB), Chain.getValue(1));
-
- // Generate BrxEnd nodes
- SDValue EndOps[] = {Chain.getValue(0), DAG.getBasicBlock(MBBs.back()), Index,
- IdV, Chain.getValue(1)};
- SDValue BrxEnd = DAG.getNode(NVPTXISD::BrxEnd, DL, MVT::Other, EndOps);
-
- return BrxEnd;
-}
-
// This will prevent AsmPrinter from trying to print the jump tables itself.
unsigned NVPTXTargetLowering::getJumpTableEncoding() const {
return MachineJumpTableInfo::EK_Inline;
diff --git a/llvm/lib/Target/NVPTX/NVPTXISelLowering.h b/llvm/lib/Target/NVPTX/NVPTXISelLowering.h
index cb0a1aa5dc892..33d62c28882a5 100644
--- a/llvm/lib/Target/NVPTX/NVPTXISelLowering.h
+++ b/llvm/lib/Target/NVPTX/NVPTXISelLowering.h
@@ -242,8 +242,6 @@ class NVPTXTargetLowering : public TargetLowering {
SDValue LowerShiftRightParts(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerShiftLeftParts(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerBR_JT(SDValue Op, SelectionDAG &DAG) const;
-
SDValue LowerVAARG(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) const;
diff --git a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
index feefaf9a21e5b..9c1ec38fb1f3a 100644
--- a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
+++ b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
@@ -2392,36 +2392,17 @@ foreach t = [I32RT, I64RT] in {
// BRX
//
-def SDTBrxStartProfile : SDTypeProfile<0, 1, [SDTCisInt<0>]>;
-def SDTBrxItemProfile : SDTypeProfile<0, 1, [SDTCisVT<0, OtherVT>]>;
-def SDTBrxEndProfile : SDTypeProfile<0, 3, [SDTCisVT<0, OtherVT>, SDTCisInt<1>, SDTCisInt<2>]>;
-
-def brx_start :
- SDNode<"NVPTXISD::BrxStart", SDTBrxStartProfile,
- [SDNPHasChain, SDNPOutGlue, SDNPSideEffect]>;
-def brx_item :
- SDNode<"NVPTXISD::BrxItem", SDTBrxItemProfile,
- [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
-def brx_end :
- SDNode<"NVPTXISD::BrxEnd", SDTBrxEndProfile,
- [SDNPHasChain, SDNPInGlue, SDNPSideEffect]>;
-
let isTerminator = 1, isBranch = 1, isIndirectBranch = 1, isNotDuplicable = 1 in {
def BRX_START :
- NVPTXInst<(outs), (ins i32imm:$id),
- "$$L_brx_$id: .branchtargets",
- [(brx_start (i32 imm:$id))]>;
+ NVPTXInst<(outs), (ins i32imm:$id), "$$L_brx_$id: .branchtargets">;
def BRX_ITEM :
- NVPTXInst<(outs), (ins brtarget:$target),
- "\t$target,",
- [(brx_item bb:$target)]>;
+ NVPTXInst<(outs), (ins brtarget:$target), "\t$target,">;
def BRX_END :
NVPTXInst<(outs), (ins brtarget:$target, B32:$val, i32imm:$id),
- "\t$target;\n\tbrx.idx \t$val, $$L_brx_$id;",
- [(brx_end bb:$target, i32:$val, (i32 imm:$id))]> {
+ "\t$target;\n\tbrx.idx \t$val, $$L_brx_$id;"> {
let isBarrier = 1;
}
}
diff --git a/llvm/lib/Target/NVPTX/NVPTXSubtarget.h b/llvm/lib/Target/NVPTX/NVPTXSubtarget.h
index 9b9f871549047..6f6057b3689e6 100644
--- a/llvm/lib/Target/NVPTX/NVPTXSubtarget.h
+++ b/llvm/lib/Target/NVPTX/NVPTXSubtarget.h
@@ -101,6 +101,7 @@ class NVPTXSubtarget : public NVPTXGenSubtargetInfo {
bool hasClusters() const { return SmVersion >= 90 && PTXVersion >= 78; }
bool hasLDG() const { return SmVersion >= 32; }
bool hasHWROT32() const { return SmVersion >= 32; }
+ bool hasBrx() const { return SmVersion >= 30 && PTXVersion >= 60; }
bool hasFP16Math() const { return SmVersion >= 53; }
bool hasBF16Math() const { return SmVersion >= 80; }
bool allowFP16Math() const;
diff --git a/llvm/test/CodeGen/NVPTX/jump-table.ll b/llvm/test/CodeGen/NVPTX/jump-table.ll
index 4620c5e01008c..4d391f85e978a 100644
--- a/llvm/test/CodeGen/NVPTX/jump-table.ll
+++ b/llvm/test/CodeGen/NVPTX/jump-table.ll
@@ -1,41 +1,80 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
-; RUN: llc < %s | FileCheck %s
-; RUN: %if ptxas %{ llc < %s | %ptxas-verify %}
+; RUN: llc < %s -mcpu=sm_30 -mattr=+ptx60 | FileCheck %s --check-prefix=PTX60
+; RUN: llc < %s -mcpu=sm_30 -mattr=+ptx50 | FileCheck %s --check-prefix=PTX50
+; RUN: %if ptxas-isa-6.0 %{ llc < %s -mcpu=sm_30 -mattr=+ptx60 | %ptxas-verify %}
+; RUN: %if ptxas-isa-5.0 %{ llc < %s -mcpu=sm_30 -mattr=+ptx50 | %ptxas-verify %}
target triple = "nvptx64-nvidia-cuda"
@out = addrspace(1) global i32 0, align 4
define void @foo(i32 %i) {
-; CHECK-LABEL: foo(
-; CHECK: {
-; CHECK-NEXT: .reg .pred %p<2>;
-; CHECK-NEXT: .reg .b32 %r<2>;
-; CHECK-EMPTY:
-; CHECK-NEXT: // %bb.0: // %entry
-; CHECK-NEXT: ld.param.b32 %r1, [foo_param_0];
-; CHECK-NEXT: setp.gt.u32 %p1, %r1, 3;
-; CHECK-NEXT: @%p1 bra $L__BB0_6;
-; CHECK-NEXT: // %bb.1: // %entry
-; CHECK-NEXT: $L_brx_0: .branchtargets
-; CHECK-NEXT: $L__BB0_2,
-; CHECK-NEXT: $L__BB0_3,
-; CHECK-NEXT: $L__BB0_4,
-; CHECK-NEXT: $L__BB0_5;
-; CHECK-NEXT: brx.idx %r1, $L_brx_0;
-; CHECK-NEXT: $L__BB0_2: // %case0
-; CHECK-NEXT: st.global.b32 [out], 0;
-; CHECK-NEXT: bra.uni $L__BB0_6;
-; CHECK-NEXT: $L__BB0_4: // %case2
-; CHECK-NEXT: st.global.b32 [out], 2;
-; CHECK-NEXT: bra.uni $L__BB0_6;
-; CHECK-NEXT: $L__BB0_5: // %case3
-; CHECK-NEXT: st.global.b32 [out], 3;
-; CHECK-NEXT: bra.uni $L__BB0_6;
-; CHECK-NEXT: $L__BB0_3: // %case1
-; CHECK-NEXT: st.global.b32 [out], 1;
-; CHECK-NEXT: $L__BB0_6: // %end
-; CHECK-NEXT: ret;
+; PTX60-LABEL: foo(
+; PTX60: {
+; PTX60-NEXT: .reg .pred %p<2>;
+; PTX60-NEXT: .reg .b32 %r<2>;
+; PTX60-EMPTY:
+; PTX60-NEXT: // %bb.0: // %entry
+; PTX60-NEXT: ld.param.b32 %r1, [foo_param_0];
+; PTX60-NEXT: setp.gt.u32 %p1, %r1, 3;
+; PTX60-NEXT: @%p1 bra $L__BB0_6;
+; PTX60-NEXT: // %bb.1: // %entry
+; PTX60-NEXT: $L_brx_0: .branchtargets
+; PTX60-NEXT: $L__BB0_2,
+; PTX60-NEXT: $L__BB0_3,
+; PTX60-NEXT: $L__BB0_4,
+; PTX60-NEXT: $L__BB0_5;
+; PTX60-NEXT: brx.idx %r1, $L_brx_0;
+; PTX60-NEXT: $L__BB0_2: // %case0
+; PTX60-NEXT: st.global.b32 [out], 0;
+; PTX60-NEXT: bra.uni $L__BB0_6;
+; PTX60-NEXT: $L__BB0_4: // %case2
+; PTX60-NEXT: st.global.b32 [out], 2;
+; PTX60-NEXT: bra.uni $L__BB0_6;
+; PTX60-NEXT: $L__BB0_5: // %case3
+; PTX60-NEXT: st.global.b32 [out], 3;
+; PTX60-NEXT: bra.uni $L__BB0_6;
+; PTX60-NEXT: $L__BB0_3: // %case1
+; PTX60-NEXT: st.global.b32 [out], 1;
+; PTX60-NEXT: $L__BB0_6: // %end
+; PTX60-NEXT: ret;
+;
+; PTX50-LABEL: foo(
+; PTX50: {
+; PTX50-NEXT: .reg .pred %p<6>;
+; PTX50-NEXT: .reg .b32 %r<2>;
+; PTX50-EMPTY:
+; PTX50-NEXT: // %bb.0: // %entry
+; PTX50-NEXT: ld.param.b32 %r1, [foo_param_0];
+; PTX50-NEXT: setp.gt.s32 %p1, %r1, 1;
+; PTX50-NEXT: @%p1 bra $L__BB0_4;
+; PTX50-NEXT: // %bb.1: // %entry
+; PTX50-NEXT: setp.eq.b32 %p4, %r1, 0;
+; PTX50-NEXT: @%p4 bra $L__BB0_7;
+; PTX50-NEXT: // %bb.2: // %entry
+; PTX50-NEXT: setp.eq.b32 %p5, %r1, 1;
+; PTX50-NEXT: @%p5 bra $L__BB0_3;
+; PTX50-NEXT: bra.uni $L__BB0_9;
+; PTX50-NEXT: $L__BB0_3: // %case1
+; PTX50-NEXT: st.global.b32 [out], 1;
+; PTX50-NEXT: bra.uni $L__BB0_9;
+; PTX50-NEXT: $L__BB0_4: // %entry
+; PTX50-NEXT: setp.eq.b32 %p2, %r1, 2;
+; PTX50-NEXT: @%p2 bra $L__BB0_8;
+; PTX50-NEXT: // %bb.5: // %entry
+; PTX50-NEXT: setp.eq.b32 %p3, %r1, 3;
+; PTX50-NEXT: @%p3 bra $L__BB0_6;
+; PTX50-NEXT: bra.uni $L__BB0_9;
+; PTX50-NEXT: $L__BB0_6: // %case3
+; PTX50-NEXT: st.global.b32 [out], 3;
+; PTX50-NEXT: bra.uni $L__BB0_9;
+; PTX50-NEXT: $L__BB0_7: // %case0
+; PTX50-NEXT: st.global.b32 [out], 0;
+; PTX50-NEXT: bra.uni $L__BB0_9;
+; PTX50-NEXT: $L__BB0_8: // %case2
+; PTX50-NEXT: st.global.b32 [out], 2;
+; PTX50-NEXT: $L__BB0_9: // %end
+; PTX50-NEXT: ret;
entry:
switch i32 %i, label %end [
i32 0, label %case0
@@ -66,60 +105,129 @@ end:
define i32 @test2(i32 %tmp158) {
-; CHECK-LABEL: test2(
-; CHECK: {
-; CHECK-NEXT: .reg .pred %p<6>;
-; CHECK-NEXT: .reg .b32 %r<3>;
-; CHECK-EMPTY:
-; CHECK-NEXT: // %bb.0: // %entry
-; CHECK-NEXT: ld.param.b32 %r1, [test2_param_0];
-; CHECK-NEXT: setp.gt.s32 %p1, %r1, 119;
-; CHECK-NEXT: @%p1 bra $L__BB1_4;
-; CHECK-NEXT: // %bb.1: // %entry
-; CHECK-NEXT: setp.lt.u32 %p4, %r1, 6;
-; CHECK-NEXT: @%p4 bra $L__BB1_3;
-; CHECK-NEXT: // %bb.2: // %entry
-; CHECK-NEXT: setp.lt.s32 %p5, %r1, -2147483645;
-; CHECK-NEXT: @%p5 bra $L__BB1_3;
-; CHECK-NEXT: bra.uni $L__BB1_6;
-; CHECK-NEXT: $L__BB1_4: // %entry
-; CHECK-NEXT: add.s32 %r2, %r1, -120;
-; CHECK-NEXT: setp.gt.u32 %p2, %r2, 5;
-; CHECK-NEXT: @%p2 bra $L__BB1_5;
-; CHECK-NEXT: // %bb.12: // %entry
-; CHECK-NEXT: $L_brx_0: .branchtargets
-; CHECK-NEXT: $L__BB1_3,
-; CHECK-NEXT: $L__BB1_7,
-; CHECK-NEXT: $L__BB1_8,
-; CHECK-NEXT: $L__BB1_9,
-; CHECK-NEXT: $L__BB1_10,
-; CHECK-NEXT: $L__BB1_11;
-; CHECK-NEXT: brx.idx %r2, $L_brx_0;
-; CHECK-NEXT: $L__BB1_7: // %bb339
-; CHECK-NEXT: st.param.b32 [func_retval0], 12;
-; CHECK-NEXT: ret;
-; CHECK-NEXT: $L__BB1_5: // %entry
-; CHECK-NEXT: setp.eq.b32 %p3, %r1, 1024;
-; CHECK-NEXT: @%p3 bra $L__BB1_3;
-; CHECK-NEXT: bra.uni $L__BB1_6;
-; CHECK-NEXT: $L__BB1_3: // %bb338
-; CHECK-NEXT: st.param.b32 [func_retval0], 11;
-; CHECK-NEXT: ret;
-; CHECK-NEXT: $L__BB1_10: // %bb342
-; CHECK-NEXT: st.param.b32 [func_retval0], 15;
-; CHECK-NEXT: ret;
-; CHECK-NEXT: $L__BB1_6: // %bb336
-; CHECK-NEXT: st.param.b32 [func_retval0], 10;
-; CHECK-NEXT: ret;
-; CHECK-NEXT: $L__BB1_8: // %bb340
-; CHECK-NEXT: st.param.b32 [func_retval0], 13;
-; CHECK-NEXT: ret;
-; CHECK-NEXT: $L__BB1_9: // %bb341
-; CHECK-NEXT: st.param.b32 [func_retval0], 14;
-; CHECK-NEXT: ret;
-; CHECK-NEXT: $L__BB1_11: // %bb343
-; CHECK-NEXT: st.param.b32 [func_retval0], 18;
-; CHECK-NEXT: ret;
+; PTX60-LABEL: test2(
+; PTX60: {
+; PTX60-NEXT: .reg .pred %p<6>;
+; PTX60-NEXT: .reg .b32 %r<3>;
+; PTX60-EMPTY:
+; PTX60-NEXT: // %bb.0: // %entry
+; PTX60-NEXT: ld.param.b32 %r1, [test2_param_0];
+; PTX60-NEXT: setp.gt.s32 %p1, %r1, 119;
+; PTX60-NEXT: @%p1 bra $L__BB1_4;
+; PTX60-NEXT: // %bb.1: // %entry
+; PTX60-NEXT: setp.lt.u32 %p4, %r1, 6;
+; PTX60-NEXT: @%p4 bra $L__BB1_3;
+; PTX60-NEXT: // %bb.2: // %entry
+; PTX60-NEXT: setp.lt.s32 %p5, %r1, -2147483645;
+; PTX60-NEXT: @%p5 bra $L__BB1_3;
+; PTX60-NEXT: bra.uni $L__BB1_6;
+; PTX60-NEXT: $L__BB1_4: // %entry
+; PTX60-NEXT: add.s32 %r2, %r1, -120;
+; PTX60-NEXT: setp.gt.u32 %p2, %r2, 5;
+; PTX60-NEXT: @%p2 bra $L__BB1_5;
+; PTX60-NEXT: // %bb.12: // %entry
+; PTX60-NEXT: $L_brx_0: .branchtargets
+; PTX60-NEXT: $L__BB1_3,
+; PTX60-NEXT: $L__BB1_7,
+; PTX60-NEXT: $L__BB1_8,
+; PTX60-NEXT: $L__BB1_9,
+; PTX60-NEXT: $L__BB1_10,
+; PTX60-NEXT: $L__BB1_11;
+; PTX60-NEXT: brx.idx %r2, $L_brx_0;
+; PTX60-NEXT: $L__BB1_7: // %bb339
+; PTX60-NEXT: st.param.b32 [func_retval0], 12;
+; PTX60-NEXT: ret;
+; PTX60-NEXT: $L__BB1_5: // %entry
+; PTX60-NEXT: setp.eq.b32 %p3, %r1, 1024;
+; PTX60-NEXT: @%p3 bra $L__BB1_3;
+; PTX60-NEXT: bra.uni $L__BB1_6;
+; PTX60-NEXT: $L__BB1_3: // %bb338
+; PTX60-NEXT: st.param.b32 [func_retval0], 11;
+; PTX60-NEXT: ret;
+; PTX60-NEXT: $L__BB1_10: // %bb342
+; PTX60-NEXT: st.param.b32 [func_retval0], 15;
+; PTX60-NEXT: ret;
+; PTX60-NEXT: $L__BB1_6: // %bb336
+; PTX60-NEXT: st.param.b32 [func_retval0], 10;
+; PTX60-NEXT: ret;
+; PTX60-NEXT: $L__BB1_8: // %bb340
+; PTX60-NEXT: st.param.b32 [func_retval0], 13;
+; PTX60-NEXT: ret;
+; PTX60-NEXT: $L__BB1_9: // %bb341
+; PTX60-NEXT: st.param.b32 [func_retval0], 14;
+; PTX60-NEXT: ret;
+; PTX60-NEXT: $L__BB1_11: // %bb343
+; PTX60-NEXT: st.param.b32 [func_retval0], 18;
+; PTX60-NEXT: ret;
+;
+; PTX50-LABEL: test2(
+; PTX50: {
+; PTX50-NEXT: .reg .pred %p<13>;
+; PTX50-NEXT: .reg .b32 %r<2>;
+; PTX50-EMPTY:
+; PTX50-NEXT: // %bb.0: // %entry
+; PTX50-NEXT: ld.param.b32 %r1, [test2_param_0];
+; PTX50-NEXT: setp.gt.s32 %p1, %r1, 119;
+; PTX50-NEXT: @%p1 bra $L__BB1_4;
+; PTX50-NEXT: // %bb.1: // %entry
+; PTX50-NEXT: setp.lt.u32 %p11, %r1, 6;
+; PTX50-NEXT: @%p11 bra $L__BB1_3;
+; PTX50-NEXT: // %bb.2: // %entry
+; PTX50-NEXT: setp.lt.s32 %p12, %r1, -2147483645;
+; PTX50-NEXT: @%p12 bra $L__BB1_3;
+; PTX50-NEXT: bra.uni $L__BB1_15;
+; PTX50-NEXT: $L__BB1_4: // %entry
+; PTX50-NEXT: setp.gt.s32 %p2, %r1, 122;
+; PTX50-NEXT: @%p2 bra $L__BB1_9;
+; PTX50-NEXT: bra.uni $L__BB1_5;
+; PTX50-NEXT: $L__BB1_9: // %entry
+; PTX50-NEXT: setp.gt.s32 %p3, %r1, 124;
+; PTX50-NEXT: @%p3 bra $L__BB1_13;
+; PTX50-NEXT: // %bb.10: // %entry
+; PTX50-NEXT: setp.eq.b32 %p6, %r1, 123;
+; PTX50-NEXT: @%p6 bra $L__BB1_17;
+; PTX50-NEXT: // %bb.11: // %entry
+; PTX50-NEXT: setp.eq.b32 %p7, %r1, 124;
+; PTX50-NEXT: @%p7 bra $L__BB1_12;
+; PTX50-NEXT: bra.uni $L__BB1_15;
+; PTX50-NEXT: $L__BB1_12: // %bb342
+; PTX50-NEXT: st.param.b32 [func_retval0], 15;
+; PTX50-NEXT: ret;
+; PTX50-NEXT: $L__BB1_5: // %entry
+; PTX50-NEXT: setp.eq.b32 %p8, %r1, 120;
+; PTX50-NEXT: @%p8 bra $L__BB1_3;
+; PTX50-NEXT: // %bb.6: // %entry
+; PTX50-NEXT: setp.eq.b32 %p9, %r1, 121;
+; PTX50-NEXT: @%p9 bra $L__BB1_16;
+; PTX50-NEXT: // %bb.7: // %entry
+; PTX50-NEXT: setp.eq.b32 %p10, %r1, 122;
+; PTX50-NEXT: @%p10 bra $L__BB1_8;
+; PTX50-NEXT: bra.uni $L__BB1_15;
+; PTX50-NEXT: $L__BB1_8: // %bb340
+; PTX50-NEXT: st.param.b32 [func_retval0], 13;
+; PTX50-NEXT: ret;
+; PTX50-NEXT: $L__BB1_13: // %entry
+; PTX50-NEXT: setp.eq.b32 %p4, %r1, 125;
+; PTX50-NEXT: @%p4 bra $L__BB1_18;
+; PTX50-NEXT: // %bb.14: // %entry
+; PTX50-NEXT: setp.eq.b32 %p5, %r1, 1024;
+; PTX50-NEXT: @%p5 bra $L__BB1_3;
+; PTX50-NEXT: bra.uni $L__BB1_15;
+; PTX50-NEXT: $L__BB1_3: // %bb338
+; PTX50-NEXT: st.param.b32 [func_retval0], 11;
+; PTX50-NEXT: ret;
+; PTX50-NEXT: $L__BB1_17: // %bb341
+; PTX50-NEXT: st.param.b32 [func_retval0], 14;
+; PTX50-NEXT: ret;
+; PTX50-NEXT: $L__BB1_18: // %bb343
+; PTX50-NEXT: st.param.b32 [func_retval0], 18;
+; PTX50-NEXT: ret;
+; PTX50-NEXT: $L__BB1_15: // %bb336
+; PTX50-NEXT: st.param.b32 [func_retval0], 10;
+; PTX50-NEXT: ret;
+; PTX50-NEXT: $L__BB1_16: // %bb339
+; PTX50-NEXT: st.param.b32 [func_retval0], 12;
+; PTX50-NEXT: ret;
entry:
switch i32 %tmp158, label %bb336 [
i32 -2147483648, label %bb338
diff --git a/llvm/test/CodeGen/NVPTX/switch.ll b/llvm/test/CodeGen/NVPTX/switch.ll
index 7fcfcfbb85d00..328f69c59a800 100644
--- a/llvm/test/CodeGen/NVPTX/switch.ll
+++ b/llvm/test/CodeGen/NVPTX/switch.ll
@@ -1,5 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
-; RUN: llc < %s -mcpu=sm_20 -verify-machineinstrs | FileCheck %s
+; RUN: llc < %s -mcpu=sm_30 -mattr=+ptx60 -verify-machineinstrs | FileCheck %s
target triple = "nvptx64-unknown-nvidiacl"
More information about the llvm-commits
mailing list