[llvm] [NVPTX] Fix lowering of i1 SETCC (PR #115035)

Alex MacLean via llvm-commits llvm-commits at lists.llvm.org
Thu Dec 5 10:55:00 PST 2024


https://github.com/AlexMaclean updated https://github.com/llvm/llvm-project/pull/115035

>From beedad91841e21786e3912076d39ddd17abd9a7b Mon Sep 17 00:00:00 2001
From: Alex Maclean <amaclean at nvidia.com>
Date: Thu, 7 Nov 2024 19:37:27 +0000
Subject: [PATCH 1/5] [NVPTX] Fix lowering of i1 SETCC - address comments

---
 llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp |   5 +-
 llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp   |  47 +++++
 llvm/lib/Target/NVPTX/NVPTXISelLowering.h     |   2 +
 llvm/test/CodeGen/NVPTX/i1-icmp.ll            | 193 ++++++++++++++++++
 4 files changed, 245 insertions(+), 2 deletions(-)
 create mode 100644 llvm/test/CodeGen/NVPTX/i1-icmp.ll

diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 48018ac29bd089..85209fe706ad74 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -18764,8 +18764,9 @@ SDValue DAGCombiner::rebuildSetCC(SDValue N) {
       if (LegalTypes)
         SetCCVT = getSetCCResultType(SetCCVT);
       // Replace the uses of XOR with SETCC
-      return DAG.getSetCC(SDLoc(N), SetCCVT, Op0, Op1,
-                          Equal ? ISD::SETEQ : ISD::SETNE);
+      const ISD::CondCode CC = Equal ? ISD::SETEQ : ISD::SETNE;
+      if (!LegalOperations || TLI.isCondCodeLegal(CC, Op0.getSimpleValueType()))
+        return DAG.getSetCC(SDLoc(N), SetCCVT, Op0, Op1, CC);
     }
   }
 
diff --git a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
index 62647b31285188..e341ebb2daf930 100644
--- a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
@@ -667,6 +667,11 @@ NVPTXTargetLowering::NVPTXTargetLowering(const NVPTXTargetMachine &TM,
     setTruncStoreAction(VT, MVT::i1, Expand);
   }
 
+  setCondCodeAction({ISD::SETNE, ISD::SETEQ, ISD::SETUGE, ISD::SETULE,
+                     ISD::SETUGT, ISD::SETULT, ISD::SETGT, ISD::SETLT,
+                     ISD::SETGE, ISD::SETLE},
+                    MVT::i1, Custom);
+
   // expand extload of vector of integers.
   setLoadExtAction({ISD::EXTLOAD, ISD::SEXTLOAD, ISD::ZEXTLOAD}, MVT::v2i16,
                    MVT::v2i8, Expand);
@@ -2668,6 +2673,46 @@ SDValue NVPTXTargetLowering::LowerShiftLeftParts(SDValue Op,
   }
 }
 
+// Lowers SETCC nodes that aren't directly supported by our arch.
+SDValue NVPTXTargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
+  SDValue L = Op->getOperand(0);
+  SDValue R = Op->getOperand(1);
+
+  if (L.getValueType() != MVT::i1)
+    return SDValue();
+
+  SDLoc DL(Op);
+  SDValue Ret;
+  switch (cast<CondCodeSDNode>(Op->getOperand(2))->get()) {
+  default:
+    llvm_unreachable("Unknown integer setcc!");
+  case ISD::SETEQ: // X == Y  -> ~(X^Y)
+    Ret = DAG.getNOT(DL, DAG.getNode(ISD::XOR, DL, MVT::i1, L, R), MVT::i1);
+    break;
+  case ISD::SETNE: // X != Y   -->  (X^Y)
+    Ret = DAG.getNode(ISD::XOR, DL, MVT::i1, L, R);
+    break;
+  case ISD::SETGT:  // X >s Y   -->  X == 0 & Y == 1  -->  ~X & Y
+  case ISD::SETULT: // X <u Y   -->  X == 0 & Y == 1  -->  ~X & Y
+    Ret = DAG.getNode(ISD::AND, DL, MVT::i1, R, DAG.getNOT(DL, L, MVT::i1));
+    break;
+  case ISD::SETLT:  // X <s Y   --> X == 1 & Y == 0  -->  ~Y & X
+  case ISD::SETUGT: // X >u Y   --> X == 1 & Y == 0  -->  ~Y & X
+    Ret = DAG.getNode(ISD::AND, DL, MVT::i1, L, DAG.getNOT(DL, R, MVT::i1));
+    break;
+  case ISD::SETULE: // X <=u Y  --> X == 0 | Y == 1  -->  ~X | Y
+  case ISD::SETGE:  // X >=s Y  --> X == 0 | Y == 1  -->  ~X | Y
+    Ret = DAG.getNode(ISD::OR, DL, MVT::i1, R, DAG.getNOT(DL, L, MVT::i1));
+    break;
+  case ISD::SETUGE: // X >=u Y  --> X == 1 | Y == 0  -->  ~Y | X
+  case ISD::SETLE:  // X <=s Y  --> X == 1 | Y == 0  -->  ~Y | X
+    Ret = DAG.getNode(ISD::OR, DL, MVT::i1, L, DAG.getNOT(DL, R, MVT::i1));
+    break;
+  }
+
+  return DAG.getZExtOrTrunc(Ret, DL, Op.getValueType());
+}
+
 /// If the types match, convert the generic copysign to the NVPTXISD version,
 /// otherwise bail ensuring that mismatched cases are properly expaned.
 SDValue NVPTXTargetLowering::LowerFCOPYSIGN(SDValue Op,
@@ -2921,6 +2966,8 @@ NVPTXTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
     return LowerSTORE(Op, DAG);
   case ISD::LOAD:
     return LowerLOAD(Op, DAG);
+  case ISD::SETCC:
+    return LowerSETCC(Op, DAG);
   case ISD::SHL_PARTS:
     return LowerShiftLeftParts(Op, DAG);
   case ISD::SRA_PARTS:
diff --git a/llvm/lib/Target/NVPTX/NVPTXISelLowering.h b/llvm/lib/Target/NVPTX/NVPTXISelLowering.h
index c8b589ae39413e..b1bb9090464ac4 100644
--- a/llvm/lib/Target/NVPTX/NVPTXISelLowering.h
+++ b/llvm/lib/Target/NVPTX/NVPTXISelLowering.h
@@ -628,6 +628,8 @@ class NVPTXTargetLowering : public TargetLowering {
   SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
   SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const;
 
+  SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const;
+
   SDValue LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const;
 
   SDValue LowerFROUND(SDValue Op, SelectionDAG &DAG) const;
diff --git a/llvm/test/CodeGen/NVPTX/i1-icmp.ll b/llvm/test/CodeGen/NVPTX/i1-icmp.ll
new file mode 100644
index 00000000000000..db9ae6541b87ae
--- /dev/null
+++ b/llvm/test/CodeGen/NVPTX/i1-icmp.ll
@@ -0,0 +1,193 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 | FileCheck %s
+; RUN: %if ptxas %{ llc < %s -march=nvptx64 -mcpu=sm_20 | %ptxas-verify %}
+
+target triple = "nvptx-nvidia-cuda"
+
+define i32 @icmp_i1_eq(i32 %a, i32 %b) {
+; CHECK-LABEL: icmp_i1_eq(
+; CHECK:       {
+; CHECK-NEXT:    .reg .pred %p<4>;
+; CHECK-NEXT:    .reg .b32 %r<5>;
+; CHECK-EMPTY:
+; CHECK-NEXT:  // %bb.0:
+; CHECK-NEXT:    ld.param.u32 %r1, [icmp_i1_eq_param_0];
+; CHECK-NEXT:    setp.gt.s32 %p1, %r1, 1;
+; CHECK-NEXT:    ld.param.u32 %r2, [icmp_i1_eq_param_1];
+; CHECK-NEXT:    setp.gt.s32 %p2, %r2, 1;
+; CHECK-NEXT:    xor.pred %p3, %p1, %p2;
+; CHECK-NEXT:    @%p3 bra $L__BB0_2;
+; CHECK-NEXT:  // %bb.1: // %bb1
+; CHECK-NEXT:    mov.b32 %r4, 1;
+; CHECK-NEXT:    st.param.b32 [func_retval0], %r4;
+; CHECK-NEXT:    ret;
+; CHECK-NEXT:  $L__BB0_2: // %bb2
+; CHECK-NEXT:    mov.b32 %r3, 127;
+; CHECK-NEXT:    st.param.b32 [func_retval0], %r3;
+; CHECK-NEXT:    ret;
+  %p1 = icmp sgt i32 %a, 1
+  %p2 = icmp sgt i32 %b, 1
+  %c = icmp eq i1 %p1, %p2
+  br i1 %c, label %bb1, label %bb2
+bb1:
+  ret i32 1
+bb2:
+  ret i32 127
+}
+
+define i32 @icmp_i1_ne(i32 %a, i32 %b) {
+; CHECK-LABEL: icmp_i1_ne(
+; CHECK:       {
+; CHECK-NEXT:    .reg .pred %p<5>;
+; CHECK-NEXT:    .reg .b32 %r<5>;
+; CHECK-EMPTY:
+; CHECK-NEXT:  // %bb.0:
+; CHECK-NEXT:    ld.param.u32 %r1, [icmp_i1_ne_param_0];
+; CHECK-NEXT:    setp.gt.s32 %p1, %r1, 1;
+; CHECK-NEXT:    ld.param.u32 %r2, [icmp_i1_ne_param_1];
+; CHECK-NEXT:    setp.gt.s32 %p2, %r2, 1;
+; CHECK-NEXT:    xor.pred %p3, %p1, %p2;
+; CHECK-NEXT:    not.pred %p4, %p3;
+; CHECK-NEXT:    @%p4 bra $L__BB1_2;
+; CHECK-NEXT:  // %bb.1: // %bb1
+; CHECK-NEXT:    mov.b32 %r4, 1;
+; CHECK-NEXT:    st.param.b32 [func_retval0], %r4;
+; CHECK-NEXT:    ret;
+; CHECK-NEXT:  $L__BB1_2: // %bb2
+; CHECK-NEXT:    mov.b32 %r3, 127;
+; CHECK-NEXT:    st.param.b32 [func_retval0], %r3;
+; CHECK-NEXT:    ret;
+  %p1 = icmp sgt i32 %a, 1
+  %p2 = icmp sgt i32 %b, 1
+  %c = icmp ne i1 %p1, %p2
+  br i1 %c, label %bb1, label %bb2
+bb1:
+  ret i32 1
+bb2:
+  ret i32 127
+}
+
+define i32 @icmp_i1_sgt(i32 %a, i32 %b) {
+; CHECK-LABEL: icmp_i1_sgt(
+; CHECK:       {
+; CHECK-NEXT:    .reg .pred %p<4>;
+; CHECK-NEXT:    .reg .b32 %r<5>;
+; CHECK-EMPTY:
+; CHECK-NEXT:  // %bb.0:
+; CHECK-NEXT:    ld.param.u32 %r1, [icmp_i1_sgt_param_0];
+; CHECK-NEXT:    setp.gt.s32 %p1, %r1, 1;
+; CHECK-NEXT:    ld.param.u32 %r2, [icmp_i1_sgt_param_1];
+; CHECK-NEXT:    setp.lt.s32 %p2, %r2, 2;
+; CHECK-NEXT:    or.pred %p3, %p1, %p2;
+; CHECK-NEXT:    @%p3 bra $L__BB2_2;
+; CHECK-NEXT:  // %bb.1: // %bb1
+; CHECK-NEXT:    mov.b32 %r4, 1;
+; CHECK-NEXT:    st.param.b32 [func_retval0], %r4;
+; CHECK-NEXT:    ret;
+; CHECK-NEXT:  $L__BB2_2: // %bb2
+; CHECK-NEXT:    mov.b32 %r3, 127;
+; CHECK-NEXT:    st.param.b32 [func_retval0], %r3;
+; CHECK-NEXT:    ret;
+  %p1 = icmp sgt i32 %a, 1
+  %p2 = icmp sgt i32 %b, 1
+  %c = icmp sgt i1 %p1, %p2
+  br i1 %c, label %bb1, label %bb2
+bb1:
+  ret i32 1
+bb2:
+  ret i32 127
+}
+
+define i32 @icmp_i1_slt(i32 %a, i32 %b) {
+; CHECK-LABEL: icmp_i1_slt(
+; CHECK:       {
+; CHECK-NEXT:    .reg .pred %p<4>;
+; CHECK-NEXT:    .reg .b32 %r<5>;
+; CHECK-EMPTY:
+; CHECK-NEXT:  // %bb.0:
+; CHECK-NEXT:    ld.param.u32 %r1, [icmp_i1_slt_param_0];
+; CHECK-NEXT:    setp.lt.s32 %p1, %r1, 2;
+; CHECK-NEXT:    ld.param.u32 %r2, [icmp_i1_slt_param_1];
+; CHECK-NEXT:    setp.gt.s32 %p2, %r2, 1;
+; CHECK-NEXT:    or.pred %p3, %p2, %p1;
+; CHECK-NEXT:    @%p3 bra $L__BB3_2;
+; CHECK-NEXT:  // %bb.1: // %bb1
+; CHECK-NEXT:    mov.b32 %r4, 1;
+; CHECK-NEXT:    st.param.b32 [func_retval0], %r4;
+; CHECK-NEXT:    ret;
+; CHECK-NEXT:  $L__BB3_2: // %bb2
+; CHECK-NEXT:    mov.b32 %r3, 127;
+; CHECK-NEXT:    st.param.b32 [func_retval0], %r3;
+; CHECK-NEXT:    ret;
+  %p1 = icmp sgt i32 %a, 1
+  %p2 = icmp sgt i32 %b, 1
+  %c = icmp slt i1 %p1, %p2
+  br i1 %c, label %bb1, label %bb2
+bb1:
+  ret i32 1
+bb2:
+  ret i32 127
+}
+
+define i32 @icmp_i1_sge(i32 %a, i32 %b) {
+; CHECK-LABEL: icmp_i1_sge(
+; CHECK:       {
+; CHECK-NEXT:    .reg .pred %p<4>;
+; CHECK-NEXT:    .reg .b32 %r<5>;
+; CHECK-EMPTY:
+; CHECK-NEXT:  // %bb.0:
+; CHECK-NEXT:    ld.param.u32 %r1, [icmp_i1_sge_param_0];
+; CHECK-NEXT:    setp.gt.s32 %p1, %r1, 1;
+; CHECK-NEXT:    ld.param.u32 %r2, [icmp_i1_sge_param_1];
+; CHECK-NEXT:    setp.lt.s32 %p2, %r2, 2;
+; CHECK-NEXT:    and.pred %p3, %p1, %p2;
+; CHECK-NEXT:    @%p3 bra $L__BB4_2;
+; CHECK-NEXT:  // %bb.1: // %bb1
+; CHECK-NEXT:    mov.b32 %r4, 1;
+; CHECK-NEXT:    st.param.b32 [func_retval0], %r4;
+; CHECK-NEXT:    ret;
+; CHECK-NEXT:  $L__BB4_2: // %bb2
+; CHECK-NEXT:    mov.b32 %r3, 127;
+; CHECK-NEXT:    st.param.b32 [func_retval0], %r3;
+; CHECK-NEXT:    ret;
+  %p1 = icmp sgt i32 %a, 1
+  %p2 = icmp sgt i32 %b, 1
+  %c = icmp sge i1 %p1, %p2
+  br i1 %c, label %bb1, label %bb2
+bb1:
+  ret i32 1
+bb2:
+  ret i32 127
+}
+
+define i32 @icmp_i1_sle(i32 %a, i32 %b) {
+; CHECK-LABEL: icmp_i1_sle(
+; CHECK:       {
+; CHECK-NEXT:    .reg .pred %p<4>;
+; CHECK-NEXT:    .reg .b32 %r<5>;
+; CHECK-EMPTY:
+; CHECK-NEXT:  // %bb.0:
+; CHECK-NEXT:    ld.param.u32 %r1, [icmp_i1_sle_param_0];
+; CHECK-NEXT:    setp.lt.s32 %p1, %r1, 2;
+; CHECK-NEXT:    ld.param.u32 %r2, [icmp_i1_sle_param_1];
+; CHECK-NEXT:    setp.gt.s32 %p2, %r2, 1;
+; CHECK-NEXT:    and.pred %p3, %p2, %p1;
+; CHECK-NEXT:    @%p3 bra $L__BB5_2;
+; CHECK-NEXT:  // %bb.1: // %bb1
+; CHECK-NEXT:    mov.b32 %r4, 1;
+; CHECK-NEXT:    st.param.b32 [func_retval0], %r4;
+; CHECK-NEXT:    ret;
+; CHECK-NEXT:  $L__BB5_2: // %bb2
+; CHECK-NEXT:    mov.b32 %r3, 127;
+; CHECK-NEXT:    st.param.b32 [func_retval0], %r3;
+; CHECK-NEXT:    ret;
+  %p1 = icmp sgt i32 %a, 1
+  %p2 = icmp sgt i32 %b, 1
+  %c = icmp sle i1 %p1, %p2
+  br i1 %c, label %bb1, label %bb2
+bb1:
+  ret i32 1
+bb2:
+  ret i32 127
+}
+

>From 13a383fa52cb88c392e8e56a827024e605d25028 Mon Sep 17 00:00:00 2001
From: Alex Maclean <amaclean at nvidia.com>
Date: Thu, 14 Nov 2024 16:57:48 +0000
Subject: [PATCH 2/5] address comments

---
 .../CodeGen/SelectionDAG/TargetLowering.cpp   | 41 +++++++++++++++++
 llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp   | 44 +------------------
 llvm/lib/Target/NVPTX/NVPTXISelLowering.h     |  2 -
 3 files changed, 42 insertions(+), 45 deletions(-)

diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index eeba4b7d20f9c1..e87d809f88eb97 100644
--- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -11889,6 +11889,47 @@ bool TargetLowering::LegalizeSetCCCondCode(SelectionDAG &DAG, EVT VT,
       return true;
     }
 
+    // Special case: expand i1 comparisons using logical operations.
+    if (OpVT == MVT::i1) {
+      SDValue Ret;
+      switch (CCCode) {
+      default:
+        llvm_unreachable("Unknown integer setcc!");
+      case ISD::SETEQ: // X == Y  -->  ~(X ^ Y)
+        Ret = DAG.getNOT(dl, DAG.getNode(ISD::XOR, dl, MVT::i1, LHS, RHS),
+                         MVT::i1);
+        break;
+      case ISD::SETNE: // X != Y  -->  (X ^ Y)
+        Ret = DAG.getNode(ISD::XOR, dl, MVT::i1, LHS, RHS);
+        break;
+      case ISD::SETGT:  // X >s Y  -->  X == 0 & Y == 1  -->  ~X & Y
+      case ISD::SETULT: // X <u Y  -->  X == 0 & Y == 1  -->  ~X & Y
+        Ret = DAG.getNode(ISD::AND, dl, MVT::i1, RHS,
+                          DAG.getNOT(dl, LHS, MVT::i1));
+        break;
+      case ISD::SETLT:  // X <s Y  -->  X == 1 & Y == 0  -->  ~Y & X
+      case ISD::SETUGT: // X >u Y  -->  X == 1 & Y == 0  -->  ~Y & X
+        Ret = DAG.getNode(ISD::AND, dl, MVT::i1, LHS,
+                          DAG.getNOT(dl, RHS, MVT::i1));
+        break;
+      case ISD::SETULE: // X <=u Y  -->  X == 0 | Y == 1  -->  ~X | Y
+      case ISD::SETGE:  // X >=s Y  -->  X == 0 | Y == 1  -->  ~X | Y
+        Ret = DAG.getNode(ISD::OR, dl, MVT::i1, RHS,
+                          DAG.getNOT(dl, LHS, MVT::i1));
+        break;
+      case ISD::SETUGE: // X >=u Y  -->  X == 1 | Y == 0  -->  ~Y | X
+      case ISD::SETLE:  // X <=s Y  -->  X == 1 | Y == 0  -->  ~Y | X
+        Ret = DAG.getNode(ISD::OR, dl, MVT::i1, LHS,
+                          DAG.getNOT(dl, RHS, MVT::i1));
+        break;
+      }
+
+      LHS = DAG.getZExtOrTrunc(Ret, dl, VT);
+      RHS = SDValue();
+      CC = SDValue();
+      return true;
+    }
+
     ISD::CondCode CC1 = ISD::SETCC_INVALID, CC2 = ISD::SETCC_INVALID;
     unsigned Opc = 0;
     switch (CCCode) {
diff --git a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
index e341ebb2daf930..ce94dded815b8f 100644
--- a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
@@ -670,7 +670,7 @@ NVPTXTargetLowering::NVPTXTargetLowering(const NVPTXTargetMachine &TM,
   setCondCodeAction({ISD::SETNE, ISD::SETEQ, ISD::SETUGE, ISD::SETULE,
                      ISD::SETUGT, ISD::SETULT, ISD::SETGT, ISD::SETLT,
                      ISD::SETGE, ISD::SETLE},
-                    MVT::i1, Custom);
+                    MVT::i1, Expand);
 
   // expand extload of vector of integers.
   setLoadExtAction({ISD::EXTLOAD, ISD::SEXTLOAD, ISD::ZEXTLOAD}, MVT::v2i16,
@@ -2673,46 +2673,6 @@ SDValue NVPTXTargetLowering::LowerShiftLeftParts(SDValue Op,
   }
 }
 
-// Lowers SETCC nodes that aren't directly supported by our arch.
-SDValue NVPTXTargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
-  SDValue L = Op->getOperand(0);
-  SDValue R = Op->getOperand(1);
-
-  if (L.getValueType() != MVT::i1)
-    return SDValue();
-
-  SDLoc DL(Op);
-  SDValue Ret;
-  switch (cast<CondCodeSDNode>(Op->getOperand(2))->get()) {
-  default:
-    llvm_unreachable("Unknown integer setcc!");
-  case ISD::SETEQ: // X == Y  -> ~(X^Y)
-    Ret = DAG.getNOT(DL, DAG.getNode(ISD::XOR, DL, MVT::i1, L, R), MVT::i1);
-    break;
-  case ISD::SETNE: // X != Y   -->  (X^Y)
-    Ret = DAG.getNode(ISD::XOR, DL, MVT::i1, L, R);
-    break;
-  case ISD::SETGT:  // X >s Y   -->  X == 0 & Y == 1  -->  ~X & Y
-  case ISD::SETULT: // X <u Y   -->  X == 0 & Y == 1  -->  ~X & Y
-    Ret = DAG.getNode(ISD::AND, DL, MVT::i1, R, DAG.getNOT(DL, L, MVT::i1));
-    break;
-  case ISD::SETLT:  // X <s Y   --> X == 1 & Y == 0  -->  ~Y & X
-  case ISD::SETUGT: // X >u Y   --> X == 1 & Y == 0  -->  ~Y & X
-    Ret = DAG.getNode(ISD::AND, DL, MVT::i1, L, DAG.getNOT(DL, R, MVT::i1));
-    break;
-  case ISD::SETULE: // X <=u Y  --> X == 0 | Y == 1  -->  ~X | Y
-  case ISD::SETGE:  // X >=s Y  --> X == 0 | Y == 1  -->  ~X | Y
-    Ret = DAG.getNode(ISD::OR, DL, MVT::i1, R, DAG.getNOT(DL, L, MVT::i1));
-    break;
-  case ISD::SETUGE: // X >=u Y  --> X == 1 | Y == 0  -->  ~Y | X
-  case ISD::SETLE:  // X <=s Y  --> X == 1 | Y == 0  -->  ~Y | X
-    Ret = DAG.getNode(ISD::OR, DL, MVT::i1, L, DAG.getNOT(DL, R, MVT::i1));
-    break;
-  }
-
-  return DAG.getZExtOrTrunc(Ret, DL, Op.getValueType());
-}
-
 /// If the types match, convert the generic copysign to the NVPTXISD version,
 /// otherwise bail ensuring that mismatched cases are properly expaned.
 SDValue NVPTXTargetLowering::LowerFCOPYSIGN(SDValue Op,
@@ -2966,8 +2926,6 @@ NVPTXTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
     return LowerSTORE(Op, DAG);
   case ISD::LOAD:
     return LowerLOAD(Op, DAG);
-  case ISD::SETCC:
-    return LowerSETCC(Op, DAG);
   case ISD::SHL_PARTS:
     return LowerShiftLeftParts(Op, DAG);
   case ISD::SRA_PARTS:
diff --git a/llvm/lib/Target/NVPTX/NVPTXISelLowering.h b/llvm/lib/Target/NVPTX/NVPTXISelLowering.h
index b1bb9090464ac4..c8b589ae39413e 100644
--- a/llvm/lib/Target/NVPTX/NVPTXISelLowering.h
+++ b/llvm/lib/Target/NVPTX/NVPTXISelLowering.h
@@ -628,8 +628,6 @@ class NVPTXTargetLowering : public TargetLowering {
   SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
   SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const;
 
-  SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const;
-
   SDValue LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const;
 
   SDValue LowerFROUND(SDValue Op, SelectionDAG &DAG) const;

>From 83a7e42b233e022dcf0ee705e4199d929ccf858f Mon Sep 17 00:00:00 2001
From: Alex Maclean <amaclean at nvidia.com>
Date: Thu, 14 Nov 2024 22:45:38 +0000
Subject: [PATCH 3/5] address comments

---
 llvm/test/CodeGen/NVPTX/i1-icmp.ll | 124 +++++++++++++++++++++++++++++
 1 file changed, 124 insertions(+)

diff --git a/llvm/test/CodeGen/NVPTX/i1-icmp.ll b/llvm/test/CodeGen/NVPTX/i1-icmp.ll
index db9ae6541b87ae..b189f5ff8bca82 100644
--- a/llvm/test/CodeGen/NVPTX/i1-icmp.ll
+++ b/llvm/test/CodeGen/NVPTX/i1-icmp.ll
@@ -191,3 +191,127 @@ bb2:
   ret i32 127
 }
 
+define i32 @icmp_i1_uge(i32 %a, i32 %b) {
+; CHECK-LABEL: icmp_i1_uge(
+; CHECK:       {
+; CHECK-NEXT:    .reg .pred %p<4>;
+; CHECK-NEXT:    .reg .b32 %r<5>;
+; CHECK-EMPTY:
+; CHECK-NEXT:  // %bb.0:
+; CHECK-NEXT:    ld.param.u32 %r1, [icmp_i1_uge_param_0];
+; CHECK-NEXT:    setp.lt.s32 %p1, %r1, 2;
+; CHECK-NEXT:    ld.param.u32 %r2, [icmp_i1_uge_param_1];
+; CHECK-NEXT:    setp.gt.s32 %p2, %r2, 1;
+; CHECK-NEXT:    and.pred %p3, %p2, %p1;
+; CHECK-NEXT:    @%p3 bra $L__BB6_2;
+; CHECK-NEXT:  // %bb.1: // %bb1
+; CHECK-NEXT:    mov.b32 %r4, 1;
+; CHECK-NEXT:    st.param.b32 [func_retval0], %r4;
+; CHECK-NEXT:    ret;
+; CHECK-NEXT:  $L__BB6_2: // %bb2
+; CHECK-NEXT:    mov.b32 %r3, 127;
+; CHECK-NEXT:    st.param.b32 [func_retval0], %r3;
+; CHECK-NEXT:    ret;
+  %p1 = icmp sgt i32 %a, 1
+  %p2 = icmp sgt i32 %b, 1
+  %c = icmp uge i1 %p1, %p2
+  br i1 %c, label %bb1, label %bb2
+bb1:
+  ret i32 1
+bb2:
+  ret i32 127
+}
+
+define i32 @icmp_i1_ugt(i32 %a, i32 %b) {
+; CHECK-LABEL: icmp_i1_ugt(
+; CHECK:       {
+; CHECK-NEXT:    .reg .pred %p<4>;
+; CHECK-NEXT:    .reg .b32 %r<5>;
+; CHECK-EMPTY:
+; CHECK-NEXT:  // %bb.0:
+; CHECK-NEXT:    ld.param.u32 %r1, [icmp_i1_ugt_param_0];
+; CHECK-NEXT:    setp.lt.s32 %p1, %r1, 2;
+; CHECK-NEXT:    ld.param.u32 %r2, [icmp_i1_ugt_param_1];
+; CHECK-NEXT:    setp.gt.s32 %p2, %r2, 1;
+; CHECK-NEXT:    or.pred %p3, %p2, %p1;
+; CHECK-NEXT:    @%p3 bra $L__BB7_2;
+; CHECK-NEXT:  // %bb.1: // %bb1
+; CHECK-NEXT:    mov.b32 %r4, 1;
+; CHECK-NEXT:    st.param.b32 [func_retval0], %r4;
+; CHECK-NEXT:    ret;
+; CHECK-NEXT:  $L__BB7_2: // %bb2
+; CHECK-NEXT:    mov.b32 %r3, 127;
+; CHECK-NEXT:    st.param.b32 [func_retval0], %r3;
+; CHECK-NEXT:    ret;
+  %p1 = icmp sgt i32 %a, 1
+  %p2 = icmp sgt i32 %b, 1
+  %c = icmp ugt i1 %p1, %p2
+  br i1 %c, label %bb1, label %bb2
+bb1:
+  ret i32 1
+bb2:
+  ret i32 127
+}
+
+define i32 @icmp_i1_ule(i32 %a, i32 %b) {
+; CHECK-LABEL: icmp_i1_ule(
+; CHECK:       {
+; CHECK-NEXT:    .reg .pred %p<4>;
+; CHECK-NEXT:    .reg .b32 %r<5>;
+; CHECK-EMPTY:
+; CHECK-NEXT:  // %bb.0:
+; CHECK-NEXT:    ld.param.u32 %r1, [icmp_i1_ule_param_0];
+; CHECK-NEXT:    setp.gt.s32 %p1, %r1, 1;
+; CHECK-NEXT:    ld.param.u32 %r2, [icmp_i1_ule_param_1];
+; CHECK-NEXT:    setp.lt.s32 %p2, %r2, 2;
+; CHECK-NEXT:    and.pred %p3, %p1, %p2;
+; CHECK-NEXT:    @%p3 bra $L__BB8_2;
+; CHECK-NEXT:  // %bb.1: // %bb1
+; CHECK-NEXT:    mov.b32 %r4, 1;
+; CHECK-NEXT:    st.param.b32 [func_retval0], %r4;
+; CHECK-NEXT:    ret;
+; CHECK-NEXT:  $L__BB8_2: // %bb2
+; CHECK-NEXT:    mov.b32 %r3, 127;
+; CHECK-NEXT:    st.param.b32 [func_retval0], %r3;
+; CHECK-NEXT:    ret;
+  %p1 = icmp sgt i32 %a, 1
+  %p2 = icmp sgt i32 %b, 1
+  %c = icmp ule i1 %p1, %p2
+  br i1 %c, label %bb1, label %bb2
+bb1:
+  ret i32 1
+bb2:
+  ret i32 127
+}
+
+define i32 @icmp_i1_ult(i32 %a, i32 %b) {
+; CHECK-LABEL: icmp_i1_ult(
+; CHECK:       {
+; CHECK-NEXT:    .reg .pred %p<4>;
+; CHECK-NEXT:    .reg .b32 %r<5>;
+; CHECK-EMPTY:
+; CHECK-NEXT:  // %bb.0:
+; CHECK-NEXT:    ld.param.u32 %r1, [icmp_i1_ult_param_0];
+; CHECK-NEXT:    setp.gt.s32 %p1, %r1, 1;
+; CHECK-NEXT:    ld.param.u32 %r2, [icmp_i1_ult_param_1];
+; CHECK-NEXT:    setp.lt.s32 %p2, %r2, 2;
+; CHECK-NEXT:    or.pred %p3, %p1, %p2;
+; CHECK-NEXT:    @%p3 bra $L__BB9_2;
+; CHECK-NEXT:  // %bb.1: // %bb1
+; CHECK-NEXT:    mov.b32 %r4, 1;
+; CHECK-NEXT:    st.param.b32 [func_retval0], %r4;
+; CHECK-NEXT:    ret;
+; CHECK-NEXT:  $L__BB9_2: // %bb2
+; CHECK-NEXT:    mov.b32 %r3, 127;
+; CHECK-NEXT:    st.param.b32 [func_retval0], %r3;
+; CHECK-NEXT:    ret;
+  %p1 = icmp sgt i32 %a, 1
+  %p2 = icmp sgt i32 %b, 1
+  %c = icmp ult i1 %p1, %p2
+  br i1 %c, label %bb1, label %bb2
+bb1:
+  ret i32 1
+bb2:
+  ret i32 127
+}
+

>From 980ea86bacee6aba369e2a25947ef1c246e75dd5 Mon Sep 17 00:00:00 2001
From: Alex Maclean <amaclean at nvidia.com>
Date: Tue, 19 Nov 2024 23:29:01 +0000
Subject: [PATCH 4/5] address comments

---
 llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp |  4 +++-
 llvm/lib/Target/NVPTX/NVPTXInstrInfo.td       | 11 -----------
 2 files changed, 3 insertions(+), 12 deletions(-)

diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 85209fe706ad74..f8e7161722511c 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -18763,7 +18763,9 @@ SDValue DAGCombiner::rebuildSetCC(SDValue N) {
       EVT SetCCVT = N.getValueType();
       if (LegalTypes)
         SetCCVT = getSetCCResultType(SetCCVT);
-      // Replace the uses of XOR with SETCC
+      // Replace the uses of XOR with SETCC. Note, avoid this transformation if
+      // it would introduce illegal operations post-legalization as this can
+      // result in an infinite loop.
       const ISD::CondCode CC = Equal ? ISD::SETEQ : ISD::SETNE;
       if (!LegalOperations || TLI.isCondCodeLegal(CC, Op0.getSimpleValueType()))
         return DAG.getSetCC(SDLoc(N), SetCCVT, Op0, Op1, CC);
diff --git a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
index 64f437fd4e4d2d..b8a55851e8d3e1 100644
--- a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
+++ b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
@@ -2120,17 +2120,6 @@ defm : ISET_FORMAT_UNSIGNED<setule, CmpLE>;
 defm : ISET_FORMAT_UNSIGNED<setueq, CmpEQ>;
 defm : ISET_FORMAT_UNSIGNED<setune, CmpNE>;
 
-// i1 compares
-def : Pat<(setne Int1Regs:$a, Int1Regs:$b),
-          (XORb1rr Int1Regs:$a, Int1Regs:$b)>;
-def : Pat<(setune Int1Regs:$a, Int1Regs:$b),
-          (XORb1rr Int1Regs:$a, Int1Regs:$b)>;
-
-def : Pat<(seteq Int1Regs:$a, Int1Regs:$b),
-          (NOT1 (XORb1rr Int1Regs:$a, Int1Regs:$b))>;
-def : Pat<(setueq Int1Regs:$a, Int1Regs:$b),
-          (NOT1 (XORb1rr Int1Regs:$a, Int1Regs:$b))>;
-
 // comparisons of i8 extracted with BFE as i32
 // It's faster to do comparison directly on i32 extracted by BFE,
 // instead of the long conversion and sign extending.

>From c8da53d34429c01cf342d4ca808e5d8d64adec1f Mon Sep 17 00:00:00 2001
From: Alex Maclean <amaclean at nvidia.com>
Date: Thu, 5 Dec 2024 18:53:37 +0000
Subject: [PATCH 5/5] address comments -- clarify comment

---
 llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index f8e7161722511c..f5334e8c63964a 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -18765,7 +18765,8 @@ SDValue DAGCombiner::rebuildSetCC(SDValue N) {
         SetCCVT = getSetCCResultType(SetCCVT);
       // Replace the uses of XOR with SETCC. Note, avoid this transformation if
       // it would introduce illegal operations post-legalization as this can
-      // result in an infinite loop.
+      // result in infinite looping between converting xor->setcc here, and
+      // expanding setcc->xor in LegalizeSetCCCondCode if requested.
       const ISD::CondCode CC = Equal ? ISD::SETEQ : ISD::SETNE;
       if (!LegalOperations || TLI.isCondCodeLegal(CC, Op0.getSimpleValueType()))
         return DAG.getSetCC(SDLoc(N), SetCCVT, Op0, Op1, CC);



More information about the llvm-commits mailing list