[llvm] [NVPTX] Fix lowering of i1 SETCC (PR #115035)
via llvm-commits
llvm-commits at lists.llvm.org
Tue Nov 5 09:51:22 PST 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-nvptx
Author: Alex MacLean (AlexMaclean)
<details>
<summary>Changes</summary>
fixes #<!-- -->58428
---
Full diff: https://github.com/llvm/llvm-project/pull/115035.diff
4 Files Affected:
- (modified) llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp (+3)
- (modified) llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp (+43)
- (modified) llvm/lib/Target/NVPTX/NVPTXISelLowering.h (+2)
- (added) llvm/test/CodeGen/NVPTX/i1-icmp.ll (+193)
``````````diff
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 42232bd195a651..a599328a0e5be2 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -18723,6 +18723,9 @@ SDValue DAGCombiner::rebuildSetCC(SDValue N) {
SDValue Op0 = N->getOperand(0);
SDValue Op1 = N->getOperand(1);
+ if (!TLI.isOperationLegal(ISD::SETCC, Op0.getValueType()))
+ return SDValue();
+
if (Op0.getOpcode() != ISD::SETCC && Op1.getOpcode() != ISD::SETCC) {
bool Equal = false;
// (brcond (xor (xor x, y), -1)) -> (brcond (setcc x, y, eq))
diff --git a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
index d3bf0ecfe2cc92..a74e9dcccdd770 100644
--- a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
@@ -667,6 +667,7 @@ NVPTXTargetLowering::NVPTXTargetLowering(const NVPTXTargetMachine &TM,
setLoadExtAction(ISD::EXTLOAD, VT, MVT::i1, Promote);
setTruncStoreAction(VT, MVT::i1, Expand);
}
+ setOperationAction(ISD::SETCC, MVT::i1, Custom);
// expand extload of vector of integers.
setLoadExtAction({ISD::EXTLOAD, ISD::SEXTLOAD, ISD::ZEXTLOAD}, MVT::v2i16,
@@ -2666,6 +2667,46 @@ SDValue NVPTXTargetLowering::LowerShiftLeftParts(SDValue Op,
}
}
+// Lowers SETCC nodes that aren't directly supported by our arch.
+SDValue NVPTXTargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
+ SDValue L = Op->getOperand(0);
+ SDValue R = Op->getOperand(1);
+
+ if (L.getValueType() != MVT::i1)
+ return SDValue();
+
+ SDLoc DL(Op);
+ SDValue Ret;
+ switch (cast<CondCodeSDNode>(Op->getOperand(2))->get()) {
+ default:
+ llvm_unreachable("Unknown integer setcc!");
+ case ISD::SETEQ: // X == Y -> ~(X^Y)
+ Ret = DAG.getNOT(DL, DAG.getNode(ISD::XOR, DL, MVT::i1, L, R), MVT::i1);
+ break;
+ case ISD::SETNE: // X != Y --> (X^Y)
+ Ret = DAG.getNode(ISD::XOR, DL, MVT::i1, L, R);
+ break;
+ case ISD::SETGT: // X >s Y --> X == 0 & Y == 1 --> ~X & Y
+ case ISD::SETULT: // X <u Y --> X == 0 & Y == 1 --> ~X & Y
+ Ret = DAG.getNode(ISD::AND, DL, MVT::i1, R, DAG.getNOT(DL, L, MVT::i1));
+ break;
+ case ISD::SETLT: // X <s Y --> X == 1 & Y == 0 --> ~Y & X
+ case ISD::SETUGT: // X >u Y --> X == 1 & Y == 0 --> ~Y & X
+ Ret = DAG.getNode(ISD::AND, DL, MVT::i1, L, DAG.getNOT(DL, R, MVT::i1));
+ break;
+ case ISD::SETULE: // X <=u Y --> X == 0 | Y == 1 --> ~X | Y
+ case ISD::SETGE: // X >=s Y --> X == 0 | Y == 1 --> ~X | Y
+ Ret = DAG.getNode(ISD::OR, DL, MVT::i1, R, DAG.getNOT(DL, L, MVT::i1));
+ break;
+ case ISD::SETUGE: // X >=u Y --> X == 1 | Y == 0 --> ~Y | X
+ case ISD::SETLE: // X <=s Y --> X == 1 | Y == 0 --> ~Y | X
+ Ret = DAG.getNode(ISD::OR, DL, MVT::i1, L, DAG.getNOT(DL, R, MVT::i1));
+ break;
+ }
+
+ return DAG.getZExtOrTrunc(Ret, DL, Op.getValueType());
+}
+
/// If the types match, convert the generic copysign to the NVPTXISD version,
/// otherwise bail ensuring that mismatched cases are properly expaned.
SDValue NVPTXTargetLowering::LowerFCOPYSIGN(SDValue Op,
@@ -2919,6 +2960,8 @@ NVPTXTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
return LowerSTORE(Op, DAG);
case ISD::LOAD:
return LowerLOAD(Op, DAG);
+ case ISD::SETCC:
+ return LowerSETCC(Op, DAG);
case ISD::SHL_PARTS:
return LowerShiftLeftParts(Op, DAG);
case ISD::SRA_PARTS:
diff --git a/llvm/lib/Target/NVPTX/NVPTXISelLowering.h b/llvm/lib/Target/NVPTX/NVPTXISelLowering.h
index c8b589ae39413e..b1bb9090464ac4 100644
--- a/llvm/lib/Target/NVPTX/NVPTXISelLowering.h
+++ b/llvm/lib/Target/NVPTX/NVPTXISelLowering.h
@@ -628,6 +628,8 @@ class NVPTXTargetLowering : public TargetLowering {
SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const;
+
SDValue LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFROUND(SDValue Op, SelectionDAG &DAG) const;
diff --git a/llvm/test/CodeGen/NVPTX/i1-icmp.ll b/llvm/test/CodeGen/NVPTX/i1-icmp.ll
new file mode 100644
index 00000000000000..db9ae6541b87ae
--- /dev/null
+++ b/llvm/test/CodeGen/NVPTX/i1-icmp.ll
@@ -0,0 +1,193 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 | FileCheck %s
+; RUN: %if ptxas %{ llc < %s -march=nvptx64 -mcpu=sm_20 | %ptxas-verify %}
+
+target triple = "nvptx-nvidia-cuda"
+
+define i32 @icmp_i1_eq(i32 %a, i32 %b) {
+; CHECK-LABEL: icmp_i1_eq(
+; CHECK: {
+; CHECK-NEXT: .reg .pred %p<4>;
+; CHECK-NEXT: .reg .b32 %r<5>;
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: ld.param.u32 %r1, [icmp_i1_eq_param_0];
+; CHECK-NEXT: setp.gt.s32 %p1, %r1, 1;
+; CHECK-NEXT: ld.param.u32 %r2, [icmp_i1_eq_param_1];
+; CHECK-NEXT: setp.gt.s32 %p2, %r2, 1;
+; CHECK-NEXT: xor.pred %p3, %p1, %p2;
+; CHECK-NEXT: @%p3 bra $L__BB0_2;
+; CHECK-NEXT: // %bb.1: // %bb1
+; CHECK-NEXT: mov.b32 %r4, 1;
+; CHECK-NEXT: st.param.b32 [func_retval0], %r4;
+; CHECK-NEXT: ret;
+; CHECK-NEXT: $L__BB0_2: // %bb2
+; CHECK-NEXT: mov.b32 %r3, 127;
+; CHECK-NEXT: st.param.b32 [func_retval0], %r3;
+; CHECK-NEXT: ret;
+ %p1 = icmp sgt i32 %a, 1
+ %p2 = icmp sgt i32 %b, 1
+ %c = icmp eq i1 %p1, %p2
+ br i1 %c, label %bb1, label %bb2
+bb1:
+ ret i32 1
+bb2:
+ ret i32 127
+}
+
+define i32 @icmp_i1_ne(i32 %a, i32 %b) {
+; CHECK-LABEL: icmp_i1_ne(
+; CHECK: {
+; CHECK-NEXT: .reg .pred %p<5>;
+; CHECK-NEXT: .reg .b32 %r<5>;
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: ld.param.u32 %r1, [icmp_i1_ne_param_0];
+; CHECK-NEXT: setp.gt.s32 %p1, %r1, 1;
+; CHECK-NEXT: ld.param.u32 %r2, [icmp_i1_ne_param_1];
+; CHECK-NEXT: setp.gt.s32 %p2, %r2, 1;
+; CHECK-NEXT: xor.pred %p3, %p1, %p2;
+; CHECK-NEXT: not.pred %p4, %p3;
+; CHECK-NEXT: @%p4 bra $L__BB1_2;
+; CHECK-NEXT: // %bb.1: // %bb1
+; CHECK-NEXT: mov.b32 %r4, 1;
+; CHECK-NEXT: st.param.b32 [func_retval0], %r4;
+; CHECK-NEXT: ret;
+; CHECK-NEXT: $L__BB1_2: // %bb2
+; CHECK-NEXT: mov.b32 %r3, 127;
+; CHECK-NEXT: st.param.b32 [func_retval0], %r3;
+; CHECK-NEXT: ret;
+ %p1 = icmp sgt i32 %a, 1
+ %p2 = icmp sgt i32 %b, 1
+ %c = icmp ne i1 %p1, %p2
+ br i1 %c, label %bb1, label %bb2
+bb1:
+ ret i32 1
+bb2:
+ ret i32 127
+}
+
+define i32 @icmp_i1_sgt(i32 %a, i32 %b) {
+; CHECK-LABEL: icmp_i1_sgt(
+; CHECK: {
+; CHECK-NEXT: .reg .pred %p<4>;
+; CHECK-NEXT: .reg .b32 %r<5>;
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: ld.param.u32 %r1, [icmp_i1_sgt_param_0];
+; CHECK-NEXT: setp.gt.s32 %p1, %r1, 1;
+; CHECK-NEXT: ld.param.u32 %r2, [icmp_i1_sgt_param_1];
+; CHECK-NEXT: setp.lt.s32 %p2, %r2, 2;
+; CHECK-NEXT: or.pred %p3, %p1, %p2;
+; CHECK-NEXT: @%p3 bra $L__BB2_2;
+; CHECK-NEXT: // %bb.1: // %bb1
+; CHECK-NEXT: mov.b32 %r4, 1;
+; CHECK-NEXT: st.param.b32 [func_retval0], %r4;
+; CHECK-NEXT: ret;
+; CHECK-NEXT: $L__BB2_2: // %bb2
+; CHECK-NEXT: mov.b32 %r3, 127;
+; CHECK-NEXT: st.param.b32 [func_retval0], %r3;
+; CHECK-NEXT: ret;
+ %p1 = icmp sgt i32 %a, 1
+ %p2 = icmp sgt i32 %b, 1
+ %c = icmp sgt i1 %p1, %p2
+ br i1 %c, label %bb1, label %bb2
+bb1:
+ ret i32 1
+bb2:
+ ret i32 127
+}
+
+define i32 @icmp_i1_slt(i32 %a, i32 %b) {
+; CHECK-LABEL: icmp_i1_slt(
+; CHECK: {
+; CHECK-NEXT: .reg .pred %p<4>;
+; CHECK-NEXT: .reg .b32 %r<5>;
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: ld.param.u32 %r1, [icmp_i1_slt_param_0];
+; CHECK-NEXT: setp.lt.s32 %p1, %r1, 2;
+; CHECK-NEXT: ld.param.u32 %r2, [icmp_i1_slt_param_1];
+; CHECK-NEXT: setp.gt.s32 %p2, %r2, 1;
+; CHECK-NEXT: or.pred %p3, %p2, %p1;
+; CHECK-NEXT: @%p3 bra $L__BB3_2;
+; CHECK-NEXT: // %bb.1: // %bb1
+; CHECK-NEXT: mov.b32 %r4, 1;
+; CHECK-NEXT: st.param.b32 [func_retval0], %r4;
+; CHECK-NEXT: ret;
+; CHECK-NEXT: $L__BB3_2: // %bb2
+; CHECK-NEXT: mov.b32 %r3, 127;
+; CHECK-NEXT: st.param.b32 [func_retval0], %r3;
+; CHECK-NEXT: ret;
+ %p1 = icmp sgt i32 %a, 1
+ %p2 = icmp sgt i32 %b, 1
+ %c = icmp slt i1 %p1, %p2
+ br i1 %c, label %bb1, label %bb2
+bb1:
+ ret i32 1
+bb2:
+ ret i32 127
+}
+
+define i32 @icmp_i1_sge(i32 %a, i32 %b) {
+; CHECK-LABEL: icmp_i1_sge(
+; CHECK: {
+; CHECK-NEXT: .reg .pred %p<4>;
+; CHECK-NEXT: .reg .b32 %r<5>;
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: ld.param.u32 %r1, [icmp_i1_sge_param_0];
+; CHECK-NEXT: setp.gt.s32 %p1, %r1, 1;
+; CHECK-NEXT: ld.param.u32 %r2, [icmp_i1_sge_param_1];
+; CHECK-NEXT: setp.lt.s32 %p2, %r2, 2;
+; CHECK-NEXT: and.pred %p3, %p1, %p2;
+; CHECK-NEXT: @%p3 bra $L__BB4_2;
+; CHECK-NEXT: // %bb.1: // %bb1
+; CHECK-NEXT: mov.b32 %r4, 1;
+; CHECK-NEXT: st.param.b32 [func_retval0], %r4;
+; CHECK-NEXT: ret;
+; CHECK-NEXT: $L__BB4_2: // %bb2
+; CHECK-NEXT: mov.b32 %r3, 127;
+; CHECK-NEXT: st.param.b32 [func_retval0], %r3;
+; CHECK-NEXT: ret;
+ %p1 = icmp sgt i32 %a, 1
+ %p2 = icmp sgt i32 %b, 1
+ %c = icmp sge i1 %p1, %p2
+ br i1 %c, label %bb1, label %bb2
+bb1:
+ ret i32 1
+bb2:
+ ret i32 127
+}
+
+define i32 @icmp_i1_sle(i32 %a, i32 %b) {
+; CHECK-LABEL: icmp_i1_sle(
+; CHECK: {
+; CHECK-NEXT: .reg .pred %p<4>;
+; CHECK-NEXT: .reg .b32 %r<5>;
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: ld.param.u32 %r1, [icmp_i1_sle_param_0];
+; CHECK-NEXT: setp.lt.s32 %p1, %r1, 2;
+; CHECK-NEXT: ld.param.u32 %r2, [icmp_i1_sle_param_1];
+; CHECK-NEXT: setp.gt.s32 %p2, %r2, 1;
+; CHECK-NEXT: and.pred %p3, %p2, %p1;
+; CHECK-NEXT: @%p3 bra $L__BB5_2;
+; CHECK-NEXT: // %bb.1: // %bb1
+; CHECK-NEXT: mov.b32 %r4, 1;
+; CHECK-NEXT: st.param.b32 [func_retval0], %r4;
+; CHECK-NEXT: ret;
+; CHECK-NEXT: $L__BB5_2: // %bb2
+; CHECK-NEXT: mov.b32 %r3, 127;
+; CHECK-NEXT: st.param.b32 [func_retval0], %r3;
+; CHECK-NEXT: ret;
+ %p1 = icmp sgt i32 %a, 1
+ %p2 = icmp sgt i32 %b, 1
+ %c = icmp sle i1 %p1, %p2
+ br i1 %c, label %bb1, label %bb2
+bb1:
+ ret i32 1
+bb2:
+ ret i32 127
+}
+
``````````
</details>
https://github.com/llvm/llvm-project/pull/115035
More information about the llvm-commits
mailing list