[llvm] [NVPTX] Fixup and refactor brx.idx support (PR #171933)

via llvm-commits llvm-commits at lists.llvm.org
Thu Dec 11 15:57:20 PST 2025


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-backend-nvptx

Author: Alex MacLean (AlexMaclean)

<details>
<summary>Changes</summary>



---
Full diff: https://github.com/llvm/llvm-project/pull/171933.diff


8 Files Affected:

- (modified) llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp (+38) 
- (modified) llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.h (+1) 
- (modified) llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp (+1-33) 
- (modified) llvm/lib/Target/NVPTX/NVPTXISelLowering.h (-2) 
- (modified) llvm/lib/Target/NVPTX/NVPTXInstrInfo.td (+3-22) 
- (modified) llvm/lib/Target/NVPTX/NVPTXSubtarget.h (+1) 
- (modified) llvm/test/CodeGen/NVPTX/jump-table.ll (+193-85) 
- (modified) llvm/test/CodeGen/NVPTX/switch.ll (+1-1) 


``````````diff
diff --git a/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp b/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
index 0e1125ab8d8b3..99982ff3181b3 100644
--- a/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
@@ -16,6 +16,7 @@
 #include "llvm/ADT/APInt.h"
 #include "llvm/Analysis/ValueTracking.h"
 #include "llvm/CodeGen/ISDOpcodes.h"
+#include "llvm/CodeGen/MachineJumpTableInfo.h"
 #include "llvm/CodeGen/SelectionDAG.h"
 #include "llvm/CodeGen/SelectionDAGNodes.h"
 #include "llvm/IR/GlobalValue.h"
@@ -190,6 +191,8 @@ void NVPTXDAGToDAGISel::Select(SDNode *N) {
     if (tryBF16ArithToFMA(N))
       return;
     break;
+  case ISD::BR_JT:
+    return selectBR_JT(N);
   default:
     break;
   }
@@ -2273,3 +2276,38 @@ void NVPTXDAGToDAGISel::selectAtomicSwap128(SDNode *N) {
 
   ReplaceNode(N, ATOM);
 }
+
+void NVPTXDAGToDAGISel::selectBR_JT(SDNode *N) {
+  assert(Subtarget->hasBrx() &&
+         "BR_JT should be expanded during legalization on unsupported targets");
+
+  SDLoc DL(N);
+  const SDValue InChain = N->getOperand(0);
+  const auto *JT = cast<JumpTableSDNode>(N->getOperand(1));
+  const SDValue Index = N->getOperand(2);
+
+  unsigned JId = JT->getIndex();
+  MachineJumpTableInfo *MJTI = CurDAG->getMachineFunction().getJumpTableInfo();
+  ArrayRef<MachineBasicBlock *> MBBs = MJTI->getJumpTables()[JId].MBBs;
+
+  SDValue IdV = getI32Imm(JId, DL);
+
+  // Generate BrxStart node
+  MachineSDNode *Chain = CurDAG->getMachineNode(
+      NVPTX::BRX_START, DL, {MVT::Other, MVT::Glue}, {IdV, InChain});
+
+  // Generate BrxItem nodes
+  assert(!MBBs.empty());
+  for (MachineBasicBlock *MBB : MBBs.drop_back())
+    Chain = CurDAG->getMachineNode(
+        NVPTX::BRX_ITEM, DL, {MVT::Other, MVT::Glue},
+        {CurDAG->getBasicBlock(MBB), SDValue(Chain, 0), SDValue(Chain, 1)});
+
+  // Generate BrxEnd nodes
+  MachineSDNode *BrxEnd =
+      CurDAG->getMachineNode(NVPTX::BRX_END, DL, MVT::Other,
+                             {CurDAG->getBasicBlock(MBBs.back()), Index, IdV,
+                              SDValue(Chain, 0), SDValue(Chain, 1)});
+
+  ReplaceNode(N, BrxEnd);
+}
\ No newline at end of file
diff --git a/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.h b/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.h
index 055f1ff47306d..fcb5700dcb6d4 100644
--- a/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.h
+++ b/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.h
@@ -93,6 +93,7 @@ class LLVM_LIBRARY_VISIBILITY NVPTXDAGToDAGISel : public SelectionDAGISel {
   void SelectTcgen05Ld(SDNode *N, bool hasOffset = false);
   void SelectTcgen05St(SDNode *N, bool hasOffset = false);
   void selectAtomicSwap128(SDNode *N);
+  void selectBR_JT(SDNode *N);
 
   inline SDValue getI32Imm(unsigned Imm, const SDLoc &DL) {
     return CurDAG->getTargetConstant(Imm, DL, MVT::i32);
diff --git a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
index b88978a50ac16..92f3865818530 100644
--- a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
@@ -713,7 +713,7 @@ NVPTXTargetLowering::NVPTXTargetLowering(const NVPTXTargetMachine &TM,
                        Custom);
   }
 
-  setOperationAction(ISD::BR_JT, MVT::Other, Custom);
+  setOperationAction(ISD::BR_JT, MVT::Other, STI.hasBrx() ? Legal : Expand);
   setOperationAction(ISD::BRIND, MVT::Other, Expand);
 
   // We want to legalize constant related memmove and memcopy
@@ -3281,8 +3281,6 @@ NVPTXTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
     return LowerFP_ROUND(Op, DAG);
   case ISD::FP_EXTEND:
     return LowerFP_EXTEND(Op, DAG);
-  case ISD::BR_JT:
-    return LowerBR_JT(Op, DAG);
   case ISD::VAARG:
     return LowerVAARG(Op, DAG);
   case ISD::VASTART:
@@ -3330,36 +3328,6 @@ NVPTXTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
   }
 }
 
-SDValue NVPTXTargetLowering::LowerBR_JT(SDValue Op, SelectionDAG &DAG) const {
-  SDLoc DL(Op);
-  SDValue Chain = Op.getOperand(0);
-  const auto *JT = cast<JumpTableSDNode>(Op.getOperand(1));
-  SDValue Index = Op.getOperand(2);
-
-  unsigned JId = JT->getIndex();
-  MachineJumpTableInfo *MJTI = DAG.getMachineFunction().getJumpTableInfo();
-  ArrayRef<MachineBasicBlock *> MBBs = MJTI->getJumpTables()[JId].MBBs;
-
-  SDValue IdV = DAG.getConstant(JId, DL, MVT::i32);
-
-  // Generate BrxStart node
-  SDVTList VTs = DAG.getVTList(MVT::Other, MVT::Glue);
-  Chain = DAG.getNode(NVPTXISD::BrxStart, DL, VTs, Chain, IdV);
-
-  // Generate BrxItem nodes
-  assert(!MBBs.empty());
-  for (MachineBasicBlock *MBB : MBBs.drop_back())
-    Chain = DAG.getNode(NVPTXISD::BrxItem, DL, VTs, Chain.getValue(0),
-                        DAG.getBasicBlock(MBB), Chain.getValue(1));
-
-  // Generate BrxEnd nodes
-  SDValue EndOps[] = {Chain.getValue(0), DAG.getBasicBlock(MBBs.back()), Index,
-                      IdV, Chain.getValue(1)};
-  SDValue BrxEnd = DAG.getNode(NVPTXISD::BrxEnd, DL, MVT::Other, EndOps);
-
-  return BrxEnd;
-}
-
 // This will prevent AsmPrinter from trying to print the jump tables itself.
 unsigned NVPTXTargetLowering::getJumpTableEncoding() const {
   return MachineJumpTableInfo::EK_Inline;
diff --git a/llvm/lib/Target/NVPTX/NVPTXISelLowering.h b/llvm/lib/Target/NVPTX/NVPTXISelLowering.h
index cb0a1aa5dc892..33d62c28882a5 100644
--- a/llvm/lib/Target/NVPTX/NVPTXISelLowering.h
+++ b/llvm/lib/Target/NVPTX/NVPTXISelLowering.h
@@ -242,8 +242,6 @@ class NVPTXTargetLowering : public TargetLowering {
   SDValue LowerShiftRightParts(SDValue Op, SelectionDAG &DAG) const;
   SDValue LowerShiftLeftParts(SDValue Op, SelectionDAG &DAG) const;
 
-  SDValue LowerBR_JT(SDValue Op, SelectionDAG &DAG) const;
-
   SDValue LowerVAARG(SDValue Op, SelectionDAG &DAG) const;
   SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) const;
 
diff --git a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
index feefaf9a21e5b..9c1ec38fb1f3a 100644
--- a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
+++ b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
@@ -2392,36 +2392,17 @@ foreach t = [I32RT, I64RT] in {
 // BRX
 //
 
-def SDTBrxStartProfile : SDTypeProfile<0, 1, [SDTCisInt<0>]>;
-def SDTBrxItemProfile : SDTypeProfile<0, 1, [SDTCisVT<0, OtherVT>]>;
-def SDTBrxEndProfile : SDTypeProfile<0, 3, [SDTCisVT<0, OtherVT>, SDTCisInt<1>, SDTCisInt<2>]>;
-
-def brx_start :
-  SDNode<"NVPTXISD::BrxStart", SDTBrxStartProfile,
-         [SDNPHasChain, SDNPOutGlue, SDNPSideEffect]>;
-def brx_item :
-  SDNode<"NVPTXISD::BrxItem", SDTBrxItemProfile,
-         [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
-def brx_end :
-  SDNode<"NVPTXISD::BrxEnd", SDTBrxEndProfile,
-         [SDNPHasChain, SDNPInGlue, SDNPSideEffect]>;
-
 let isTerminator = 1, isBranch = 1, isIndirectBranch = 1, isNotDuplicable = 1 in {
 
   def BRX_START :
-    NVPTXInst<(outs), (ins i32imm:$id),
-              "$$L_brx_$id: .branchtargets",
-              [(brx_start (i32 imm:$id))]>;
+    NVPTXInst<(outs), (ins i32imm:$id), "$$L_brx_$id: .branchtargets">;
 
   def BRX_ITEM :
-    NVPTXInst<(outs), (ins brtarget:$target),
-              "\t$target,",
-              [(brx_item bb:$target)]>;
+    NVPTXInst<(outs), (ins brtarget:$target), "\t$target,">;
 
   def BRX_END :
     NVPTXInst<(outs), (ins brtarget:$target, B32:$val, i32imm:$id),
-              "\t$target;\n\tbrx.idx \t$val, $$L_brx_$id;",
-              [(brx_end bb:$target, i32:$val, (i32 imm:$id))]> {
+              "\t$target;\n\tbrx.idx \t$val, $$L_brx_$id;"> {
       let isBarrier = 1;
     }
 }
diff --git a/llvm/lib/Target/NVPTX/NVPTXSubtarget.h b/llvm/lib/Target/NVPTX/NVPTXSubtarget.h
index 9b9f871549047..6f6057b3689e6 100644
--- a/llvm/lib/Target/NVPTX/NVPTXSubtarget.h
+++ b/llvm/lib/Target/NVPTX/NVPTXSubtarget.h
@@ -101,6 +101,7 @@ class NVPTXSubtarget : public NVPTXGenSubtargetInfo {
   bool hasClusters() const { return SmVersion >= 90 && PTXVersion >= 78; }
   bool hasLDG() const { return SmVersion >= 32; }
   bool hasHWROT32() const { return SmVersion >= 32; }
+  bool hasBrx() const { return SmVersion >= 30 && PTXVersion >= 60; }
   bool hasFP16Math() const { return SmVersion >= 53; }
   bool hasBF16Math() const { return SmVersion >= 80; }
   bool allowFP16Math() const;
diff --git a/llvm/test/CodeGen/NVPTX/jump-table.ll b/llvm/test/CodeGen/NVPTX/jump-table.ll
index 4620c5e01008c..4d391f85e978a 100644
--- a/llvm/test/CodeGen/NVPTX/jump-table.ll
+++ b/llvm/test/CodeGen/NVPTX/jump-table.ll
@@ -1,41 +1,80 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
-; RUN: llc < %s | FileCheck %s
-; RUN: %if ptxas %{ llc < %s | %ptxas-verify %}
+; RUN: llc < %s -mcpu=sm_30 -mattr=+ptx60 | FileCheck %s --check-prefix=PTX60
+; RUN: llc < %s -mcpu=sm_30 -mattr=+ptx50 | FileCheck %s --check-prefix=PTX50
+; RUN: %if ptxas-isa-6.0 %{ llc < %s -mcpu=sm_30 -mattr=+ptx60 | %ptxas-verify %}
+; RUN: %if ptxas-isa-5.0 %{ llc < %s -mcpu=sm_30 -mattr=+ptx50 | %ptxas-verify %}
 
 target triple = "nvptx64-nvidia-cuda"
 
 @out = addrspace(1) global i32 0, align 4
 
 define void @foo(i32 %i) {
-; CHECK-LABEL: foo(
-; CHECK:       {
-; CHECK-NEXT:    .reg .pred %p<2>;
-; CHECK-NEXT:    .reg .b32 %r<2>;
-; CHECK-EMPTY:
-; CHECK-NEXT:  // %bb.0: // %entry
-; CHECK-NEXT:    ld.param.b32 %r1, [foo_param_0];
-; CHECK-NEXT:    setp.gt.u32 %p1, %r1, 3;
-; CHECK-NEXT:    @%p1 bra $L__BB0_6;
-; CHECK-NEXT:  // %bb.1: // %entry
-; CHECK-NEXT:    $L_brx_0: .branchtargets
-; CHECK-NEXT:     $L__BB0_2,
-; CHECK-NEXT:     $L__BB0_3,
-; CHECK-NEXT:     $L__BB0_4,
-; CHECK-NEXT:     $L__BB0_5;
-; CHECK-NEXT:    brx.idx %r1, $L_brx_0;
-; CHECK-NEXT:  $L__BB0_2: // %case0
-; CHECK-NEXT:    st.global.b32 [out], 0;
-; CHECK-NEXT:    bra.uni $L__BB0_6;
-; CHECK-NEXT:  $L__BB0_4: // %case2
-; CHECK-NEXT:    st.global.b32 [out], 2;
-; CHECK-NEXT:    bra.uni $L__BB0_6;
-; CHECK-NEXT:  $L__BB0_5: // %case3
-; CHECK-NEXT:    st.global.b32 [out], 3;
-; CHECK-NEXT:    bra.uni $L__BB0_6;
-; CHECK-NEXT:  $L__BB0_3: // %case1
-; CHECK-NEXT:    st.global.b32 [out], 1;
-; CHECK-NEXT:  $L__BB0_6: // %end
-; CHECK-NEXT:    ret;
+; PTX60-LABEL: foo(
+; PTX60:       {
+; PTX60-NEXT:    .reg .pred %p<2>;
+; PTX60-NEXT:    .reg .b32 %r<2>;
+; PTX60-EMPTY:
+; PTX60-NEXT:  // %bb.0: // %entry
+; PTX60-NEXT:    ld.param.b32 %r1, [foo_param_0];
+; PTX60-NEXT:    setp.gt.u32 %p1, %r1, 3;
+; PTX60-NEXT:    @%p1 bra $L__BB0_6;
+; PTX60-NEXT:  // %bb.1: // %entry
+; PTX60-NEXT:    $L_brx_0: .branchtargets
+; PTX60-NEXT:     $L__BB0_2,
+; PTX60-NEXT:     $L__BB0_3,
+; PTX60-NEXT:     $L__BB0_4,
+; PTX60-NEXT:     $L__BB0_5;
+; PTX60-NEXT:    brx.idx %r1, $L_brx_0;
+; PTX60-NEXT:  $L__BB0_2: // %case0
+; PTX60-NEXT:    st.global.b32 [out], 0;
+; PTX60-NEXT:    bra.uni $L__BB0_6;
+; PTX60-NEXT:  $L__BB0_4: // %case2
+; PTX60-NEXT:    st.global.b32 [out], 2;
+; PTX60-NEXT:    bra.uni $L__BB0_6;
+; PTX60-NEXT:  $L__BB0_5: // %case3
+; PTX60-NEXT:    st.global.b32 [out], 3;
+; PTX60-NEXT:    bra.uni $L__BB0_6;
+; PTX60-NEXT:  $L__BB0_3: // %case1
+; PTX60-NEXT:    st.global.b32 [out], 1;
+; PTX60-NEXT:  $L__BB0_6: // %end
+; PTX60-NEXT:    ret;
+;
+; PTX50-LABEL: foo(
+; PTX50:       {
+; PTX50-NEXT:    .reg .pred %p<6>;
+; PTX50-NEXT:    .reg .b32 %r<2>;
+; PTX50-EMPTY:
+; PTX50-NEXT:  // %bb.0: // %entry
+; PTX50-NEXT:    ld.param.b32 %r1, [foo_param_0];
+; PTX50-NEXT:    setp.gt.s32 %p1, %r1, 1;
+; PTX50-NEXT:    @%p1 bra $L__BB0_4;
+; PTX50-NEXT:  // %bb.1: // %entry
+; PTX50-NEXT:    setp.eq.b32 %p4, %r1, 0;
+; PTX50-NEXT:    @%p4 bra $L__BB0_7;
+; PTX50-NEXT:  // %bb.2: // %entry
+; PTX50-NEXT:    setp.eq.b32 %p5, %r1, 1;
+; PTX50-NEXT:    @%p5 bra $L__BB0_3;
+; PTX50-NEXT:    bra.uni $L__BB0_9;
+; PTX50-NEXT:  $L__BB0_3: // %case1
+; PTX50-NEXT:    st.global.b32 [out], 1;
+; PTX50-NEXT:    bra.uni $L__BB0_9;
+; PTX50-NEXT:  $L__BB0_4: // %entry
+; PTX50-NEXT:    setp.eq.b32 %p2, %r1, 2;
+; PTX50-NEXT:    @%p2 bra $L__BB0_8;
+; PTX50-NEXT:  // %bb.5: // %entry
+; PTX50-NEXT:    setp.eq.b32 %p3, %r1, 3;
+; PTX50-NEXT:    @%p3 bra $L__BB0_6;
+; PTX50-NEXT:    bra.uni $L__BB0_9;
+; PTX50-NEXT:  $L__BB0_6: // %case3
+; PTX50-NEXT:    st.global.b32 [out], 3;
+; PTX50-NEXT:    bra.uni $L__BB0_9;
+; PTX50-NEXT:  $L__BB0_7: // %case0
+; PTX50-NEXT:    st.global.b32 [out], 0;
+; PTX50-NEXT:    bra.uni $L__BB0_9;
+; PTX50-NEXT:  $L__BB0_8: // %case2
+; PTX50-NEXT:    st.global.b32 [out], 2;
+; PTX50-NEXT:  $L__BB0_9: // %end
+; PTX50-NEXT:    ret;
 entry:
   switch i32 %i, label %end [
     i32 0, label %case0
@@ -66,60 +105,129 @@ end:
 
 
 define i32 @test2(i32 %tmp158) {
-; CHECK-LABEL: test2(
-; CHECK:       {
-; CHECK-NEXT:    .reg .pred %p<6>;
-; CHECK-NEXT:    .reg .b32 %r<3>;
-; CHECK-EMPTY:
-; CHECK-NEXT:  // %bb.0: // %entry
-; CHECK-NEXT:    ld.param.b32 %r1, [test2_param_0];
-; CHECK-NEXT:    setp.gt.s32 %p1, %r1, 119;
-; CHECK-NEXT:    @%p1 bra $L__BB1_4;
-; CHECK-NEXT:  // %bb.1: // %entry
-; CHECK-NEXT:    setp.lt.u32 %p4, %r1, 6;
-; CHECK-NEXT:    @%p4 bra $L__BB1_3;
-; CHECK-NEXT:  // %bb.2: // %entry
-; CHECK-NEXT:    setp.lt.s32 %p5, %r1, -2147483645;
-; CHECK-NEXT:    @%p5 bra $L__BB1_3;
-; CHECK-NEXT:    bra.uni $L__BB1_6;
-; CHECK-NEXT:  $L__BB1_4: // %entry
-; CHECK-NEXT:    add.s32 %r2, %r1, -120;
-; CHECK-NEXT:    setp.gt.u32 %p2, %r2, 5;
-; CHECK-NEXT:    @%p2 bra $L__BB1_5;
-; CHECK-NEXT:  // %bb.12: // %entry
-; CHECK-NEXT:    $L_brx_0: .branchtargets
-; CHECK-NEXT:     $L__BB1_3,
-; CHECK-NEXT:     $L__BB1_7,
-; CHECK-NEXT:     $L__BB1_8,
-; CHECK-NEXT:     $L__BB1_9,
-; CHECK-NEXT:     $L__BB1_10,
-; CHECK-NEXT:     $L__BB1_11;
-; CHECK-NEXT:    brx.idx %r2, $L_brx_0;
-; CHECK-NEXT:  $L__BB1_7: // %bb339
-; CHECK-NEXT:    st.param.b32 [func_retval0], 12;
-; CHECK-NEXT:    ret;
-; CHECK-NEXT:  $L__BB1_5: // %entry
-; CHECK-NEXT:    setp.eq.b32 %p3, %r1, 1024;
-; CHECK-NEXT:    @%p3 bra $L__BB1_3;
-; CHECK-NEXT:    bra.uni $L__BB1_6;
-; CHECK-NEXT:  $L__BB1_3: // %bb338
-; CHECK-NEXT:    st.param.b32 [func_retval0], 11;
-; CHECK-NEXT:    ret;
-; CHECK-NEXT:  $L__BB1_10: // %bb342
-; CHECK-NEXT:    st.param.b32 [func_retval0], 15;
-; CHECK-NEXT:    ret;
-; CHECK-NEXT:  $L__BB1_6: // %bb336
-; CHECK-NEXT:    st.param.b32 [func_retval0], 10;
-; CHECK-NEXT:    ret;
-; CHECK-NEXT:  $L__BB1_8: // %bb340
-; CHECK-NEXT:    st.param.b32 [func_retval0], 13;
-; CHECK-NEXT:    ret;
-; CHECK-NEXT:  $L__BB1_9: // %bb341
-; CHECK-NEXT:    st.param.b32 [func_retval0], 14;
-; CHECK-NEXT:    ret;
-; CHECK-NEXT:  $L__BB1_11: // %bb343
-; CHECK-NEXT:    st.param.b32 [func_retval0], 18;
-; CHECK-NEXT:    ret;
+; PTX60-LABEL: test2(
+; PTX60:       {
+; PTX60-NEXT:    .reg .pred %p<6>;
+; PTX60-NEXT:    .reg .b32 %r<3>;
+; PTX60-EMPTY:
+; PTX60-NEXT:  // %bb.0: // %entry
+; PTX60-NEXT:    ld.param.b32 %r1, [test2_param_0];
+; PTX60-NEXT:    setp.gt.s32 %p1, %r1, 119;
+; PTX60-NEXT:    @%p1 bra $L__BB1_4;
+; PTX60-NEXT:  // %bb.1: // %entry
+; PTX60-NEXT:    setp.lt.u32 %p4, %r1, 6;
+; PTX60-NEXT:    @%p4 bra $L__BB1_3;
+; PTX60-NEXT:  // %bb.2: // %entry
+; PTX60-NEXT:    setp.lt.s32 %p5, %r1, -2147483645;
+; PTX60-NEXT:    @%p5 bra $L__BB1_3;
+; PTX60-NEXT:    bra.uni $L__BB1_6;
+; PTX60-NEXT:  $L__BB1_4: // %entry
+; PTX60-NEXT:    add.s32 %r2, %r1, -120;
+; PTX60-NEXT:    setp.gt.u32 %p2, %r2, 5;
+; PTX60-NEXT:    @%p2 bra $L__BB1_5;
+; PTX60-NEXT:  // %bb.12: // %entry
+; PTX60-NEXT:    $L_brx_0: .branchtargets
+; PTX60-NEXT:     $L__BB1_3,
+; PTX60-NEXT:     $L__BB1_7,
+; PTX60-NEXT:     $L__BB1_8,
+; PTX60-NEXT:     $L__BB1_9,
+; PTX60-NEXT:     $L__BB1_10,
+; PTX60-NEXT:     $L__BB1_11;
+; PTX60-NEXT:    brx.idx %r2, $L_brx_0;
+; PTX60-NEXT:  $L__BB1_7: // %bb339
+; PTX60-NEXT:    st.param.b32 [func_retval0], 12;
+; PTX60-NEXT:    ret;
+; PTX60-NEXT:  $L__BB1_5: // %entry
+; PTX60-NEXT:    setp.eq.b32 %p3, %r1, 1024;
+; PTX60-NEXT:    @%p3 bra $L__BB1_3;
+; PTX60-NEXT:    bra.uni $L__BB1_6;
+; PTX60-NEXT:  $L__BB1_3: // %bb338
+; PTX60-NEXT:    st.param.b32 [func_retval0], 11;
+; PTX60-NEXT:    ret;
+; PTX60-NEXT:  $L__BB1_10: // %bb342
+; PTX60-NEXT:    st.param.b32 [func_retval0], 15;
+; PTX60-NEXT:    ret;
+; PTX60-NEXT:  $L__BB1_6: // %bb336
+; PTX60-NEXT:    st.param.b32 [func_retval0], 10;
+; PTX60-NEXT:    ret;
+; PTX60-NEXT:  $L__BB1_8: // %bb340
+; PTX60-NEXT:    st.param.b32 [func_retval0], 13;
+; PTX60-NEXT:    ret;
+; PTX60-NEXT:  $L__BB1_9: // %bb341
+; PTX60-NEXT:    st.param.b32 [func_retval0], 14;
+; PTX60-NEXT:    ret;
+; PTX60-NEXT:  $L__BB1_11: // %bb343
+; PTX60-NEXT:    st.param.b32 [func_retval0], 18;
+; PTX60-NEXT:    ret;
+;
+; PTX50-LABEL: test2(
+; PTX50:       {
+; PTX50-NEXT:    .reg .pred %p<13>;
+; PTX50-NEXT:    .reg .b32 %r<2>;
+; PTX50-EMPTY:
+; PTX50-NEXT:  // %bb.0: // %entry
+; PTX50-NEXT:    ld.param.b32 %r1, [test2_param_0];
+; PTX50-NEXT:    setp.gt.s32 %p1, %r1, 119;
+; PTX50-NEXT:    @%p1 bra $L__BB1_4;
+; PTX50-NEXT:  // %bb.1: // %entry
+; PTX50-NEXT:    setp.lt.u32 %p11, %r1, 6;
+; PTX50-NEXT:    @%p11 bra $L__BB1_3;
+; PTX50-NEXT:  // %bb.2: // %entry
+; PTX50-NEXT:    setp.lt.s32 %p12, %r1, -2147483645;
+; PTX50-NEXT:    @%p12 bra $L__BB1_3;
+; PTX50-NEXT:    bra.uni $L__BB1_15;
+; PTX50-NEXT:  $L__BB1_4: // %entry
+; PTX50-NEXT:    setp.gt.s32 %p2, %r1, 122;
+; PTX50-NEXT:    @%p2 bra $L__BB1_9;
+; PTX50-NEXT:    bra.uni $L__BB1_5;
+; PTX50-NEXT:  $L__BB1_9: // %entry
+; PTX50-NEXT:    setp.gt.s32 %p3, %r1, 124;
+; PTX50-NEXT:    @%p3 bra $L__BB1_13;
+; PTX50-NEXT:  // %bb.10: // %entry
+; PTX50-NEXT:    setp.eq.b32 %p6, %r1, 123;
+; PTX50-NEXT:    @%p6 bra $L__BB1_17;
+; PTX50-NEXT:  // %bb.11: // %entry
+; PTX50-NEXT:    setp.eq.b32 %p7, %r1, 124;
+; PTX50-NEXT:    @%p7 bra $L__BB1_12;
+; PTX50-NEXT:    bra.uni $L__BB1_15;
+; PTX50-NEXT:  $L__BB1_12: // %bb342
+; PTX50-NEXT:    st.param.b32 [func_retval0], 15;
+; PTX50-NEXT:    ret;
+; PTX50-NEXT:  $L__BB1_5: // %entry
+; PTX50-NEXT:    setp.eq.b32 %p8, %r1, 120;
+; PTX50-NEXT:    @%p8 bra $L__BB1_3;
+; PTX50-NEXT:  // %bb.6: // %entry
+; PTX50-NEXT:    setp.eq.b32 %p9, %r1, 121;
+; PTX50-NEXT:    @%p9 bra $L__BB1_16;
+; PTX50-NEXT:  // %bb.7: // %entry
+; PTX50-NEXT:    setp.eq.b32 %p10, %r1, 122;
+; PTX50-NEXT:    @%p10 bra $L__BB1_8;
+; PTX50-NEXT:    bra.uni $L__BB1_15;
+; PTX50-NEXT:  $L__BB1_8: // %bb340
+; PTX50-NEXT:    st.param.b32 [func_retval0], 13;
+; PTX50-NEXT:    ret;
+; PTX50-NEXT:  $L__BB1_13: // %entry
+; PTX50-NEXT:    setp.eq.b32 %p4, %r1, 125;
+; PTX50-NEXT:    @%p4 bra $L__BB1_18;
+; PTX50-NEXT:  // %bb.14: // %entry
+; PTX50-NEXT:    setp.eq.b32 %p5, %r1, 1024;
+; PTX50-NEXT:    @%p5 bra $L__BB1_3;
+; PTX50-NEXT:    bra.uni $L__BB1_15;
+; PTX50-NEXT:  $L__BB1_3: // %bb338
+; PTX50-NEXT:    st.param.b32 [func_retval0], 11;
+; PTX50-NEXT:    ret;
+; PTX50-NEXT:  $L__BB1_17: // %bb341
+; PTX50-NEXT:    st.param.b32 [func_retval0], 14;
+; PTX50-NEXT:    ret;
+; PTX50-NEXT:  $L__BB1_18: // %bb343
+; PTX50-NEXT:    st.param.b32 [func_retval0], 18;
+; PTX50-NEXT:    ret;
+; PTX50-NEXT:  $L__BB1_15: // %bb336
+; PTX50-NEXT:    st.param.b32 [func_retval0], 10;
+; PTX50-NEXT:    ret;
+; PTX50-NEXT:  $L__BB1_16: // %bb339
+; PTX50-NEXT:    st.param.b32 [func_retval0], 12;
+; PTX50-NEXT:    ret;
 entry:
   switch i32 %tmp158, label %bb336 [
     i32 -2147483648, label %bb338
diff --git a/llvm/test/CodeGen/NVPTX/switch.ll b/llvm/test/CodeGen/NVPTX/switch.ll
index 7fcfcfbb85d00..328f69c59a800 100644
--- a/llvm/test/CodeGen/NVPTX/switch.ll
+++ b/llvm/test/CodeGen/NVPTX/switch.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
-; RUN: llc < %s -mcpu=sm_20 -verify-machineinstrs | FileCheck %s
+; RUN: llc < %s -mcpu=sm_30 -mattr=+ptx60 -verify-machineinstrs | FileCheck %s
 
 target triple = "nvptx64-unknown-nvidiacl"
 

``````````

</details>


https://github.com/llvm/llvm-project/pull/171933


More information about the llvm-commits mailing list