[llvm] d27802a - [DAGCombiner] Fold setcc of trunc, generalizing some NVPTX isel logic (#150270)
via llvm-commits
llvm-commits at lists.llvm.org
Tue Aug 5 19:20:21 PDT 2025
Author: Alex MacLean
Date: 2025-08-05T19:20:17-07:00
New Revision: d27802a2173ab3d864d3bf1ac507a4acc656e457
URL: https://github.com/llvm/llvm-project/commit/d27802a2173ab3d864d3bf1ac507a4acc656e457
DIFF: https://github.com/llvm/llvm-project/commit/d27802a2173ab3d864d3bf1ac507a4acc656e457.diff
LOG: [DAGCombiner] Fold setcc of trunc, generalizing some NVPTX isel logic (#150270)
That change adds support for folding a SETCC when one or both of the
operands is a TRUNCATE with the appropriate no-wrap flags. This pattern
can occur when promoting i8 operations in NVPTX, and we currently have
some ISel rules to try to handle it.
Added:
llvm/test/CodeGen/NVPTX/trunc-setcc.ll
Modified:
llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
llvm/test/CodeGen/NVPTX/sext-setcc.ll
Removed:
################################################################################
diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index a68f521ee59cd..e235d144e85ff 100644
--- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -5118,6 +5118,20 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
Cond == ISD::SETEQ ? ISD::SETLT : ISD::SETGE);
}
+ // fold (setcc (trunc x) c) -> (setcc x c)
+ if (N0.getOpcode() == ISD::TRUNCATE &&
+ ((N0->getFlags().hasNoUnsignedWrap() && !ISD::isSignedIntSetCC(Cond)) ||
+ (N0->getFlags().hasNoSignedWrap() &&
+ !ISD::isUnsignedIntSetCC(Cond))) &&
+ isTypeDesirableForOp(ISD::SETCC, N0.getOperand(0).getValueType())) {
+ EVT NewVT = N0.getOperand(0).getValueType();
+ SDValue NewConst = DAG.getConstant(ISD::isSignedIntSetCC(Cond)
+ ? C1.sext(NewVT.getSizeInBits())
+ : C1.zext(NewVT.getSizeInBits()),
+ dl, NewVT);
+ return DAG.getSetCC(dl, VT, N0.getOperand(0), NewConst, Cond);
+ }
+
if (SDValue V =
optimizeSetCCOfSignedTruncationCheck(VT, N0, N1, Cond, DCI, dl))
return V;
@@ -5654,6 +5668,17 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
return N0;
}
+ // Fold (setcc (trunc x) (trunc y)) -> (setcc x y)
+ if (N0.getOpcode() == ISD::TRUNCATE && N1.getOpcode() == ISD::TRUNCATE &&
+ N0.getOperand(0).getValueType() == N1.getOperand(0).getValueType() &&
+ ((!ISD::isSignedIntSetCC(Cond) && N0->getFlags().hasNoUnsignedWrap() &&
+ N1->getFlags().hasNoUnsignedWrap()) ||
+ (!ISD::isUnsignedIntSetCC(Cond) && N0->getFlags().hasNoSignedWrap() &&
+ N1->getFlags().hasNoSignedWrap())) &&
+ isTypeDesirableForOp(ISD::SETCC, N0.getOperand(0).getValueType())) {
+ return DAG.getSetCC(dl, VT, N0.getOperand(0), N1.getOperand(0), Cond);
+ }
+
// Could not fold it.
return SDValue();
}
diff --git a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
index 6765ecb77da3a..aac611d4c903a 100644
--- a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
+++ b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
@@ -1560,18 +1560,6 @@ def : Pat<(setcc (i16 (sext_inreg (trunc (prmt i32:$a, 0, byte_extract_prmt:$sel
(PRMT_B32rii i32:$b, 0, (to_sign_extend_selector $sel_b), PrmtNONE),
(cond2cc $cc))>;
-// A 16-bit comparison of truncated byte extracts can be be converted to 32-bit
-// comparison because we know that the truncate is just trancating off zeros
-// and that the most-significant byte is also zeros so the meaning of signed and
-// unsigned comparisons will not be changed.
-def : Pat<(setcc (i16 (trunc (prmt i32:$a, 0, byte_extract_prmt:$sel_a, PrmtNONE))),
- (i16 (trunc (prmt i32:$b, 0, byte_extract_prmt:$sel_b, PrmtNONE))),
- cond:$cc),
- (SETP_i32rr (PRMT_B32rii i32:$a, 0, byte_extract_prmt:$sel_a, PrmtNONE),
- (PRMT_B32rii i32:$b, 0, byte_extract_prmt:$sel_b, PrmtNONE),
- (cond2cc $cc))>;
-
-
def SDTDeclareArrayParam :
SDTypeProfile<0, 3, [SDTCisVT<0, i32>, SDTCisVT<1, i32>, SDTCisVT<2, i32>]>;
def SDTDeclareScalarParam :
diff --git a/llvm/test/CodeGen/NVPTX/sext-setcc.ll b/llvm/test/CodeGen/NVPTX/sext-setcc.ll
index 9a67bdfeb067b..97918a6f26cdf 100644
--- a/llvm/test/CodeGen/NVPTX/sext-setcc.ll
+++ b/llvm/test/CodeGen/NVPTX/sext-setcc.ll
@@ -29,7 +29,6 @@ define <4 x i8> @sext_setcc_v4i1_to_v4i8(ptr %p) {
; CHECK-LABEL: sext_setcc_v4i1_to_v4i8(
; CHECK: {
; CHECK-NEXT: .reg .pred %p<5>;
-; CHECK-NEXT: .reg .b16 %rs<5>;
; CHECK-NEXT: .reg .b32 %r<13>;
; CHECK-NEXT: .reg .b64 %rd<2>;
; CHECK-EMPTY:
@@ -37,17 +36,13 @@ define <4 x i8> @sext_setcc_v4i1_to_v4i8(ptr %p) {
; CHECK-NEXT: ld.param.b64 %rd1, [sext_setcc_v4i1_to_v4i8_param_0];
; CHECK-NEXT: ld.b32 %r1, [%rd1];
; CHECK-NEXT: prmt.b32 %r2, %r1, 0, 0x7770U;
-; CHECK-NEXT: cvt.u16.u32 %rs1, %r2;
-; CHECK-NEXT: setp.eq.b16 %p1, %rs1, 0;
+; CHECK-NEXT: setp.eq.b32 %p1, %r2, 0;
; CHECK-NEXT: prmt.b32 %r3, %r1, 0, 0x7771U;
-; CHECK-NEXT: cvt.u16.u32 %rs2, %r3;
-; CHECK-NEXT: setp.eq.b16 %p2, %rs2, 0;
+; CHECK-NEXT: setp.eq.b32 %p2, %r3, 0;
; CHECK-NEXT: prmt.b32 %r4, %r1, 0, 0x7772U;
-; CHECK-NEXT: cvt.u16.u32 %rs3, %r4;
-; CHECK-NEXT: setp.eq.b16 %p3, %rs3, 0;
+; CHECK-NEXT: setp.eq.b32 %p3, %r4, 0;
; CHECK-NEXT: prmt.b32 %r5, %r1, 0, 0x7773U;
-; CHECK-NEXT: cvt.u16.u32 %rs4, %r5;
-; CHECK-NEXT: setp.eq.b16 %p4, %rs4, 0;
+; CHECK-NEXT: setp.eq.b32 %p4, %r5, 0;
; CHECK-NEXT: selp.b32 %r6, -1, 0, %p4;
; CHECK-NEXT: selp.b32 %r7, -1, 0, %p3;
; CHECK-NEXT: prmt.b32 %r8, %r7, %r6, 0x3340U;
diff --git a/llvm/test/CodeGen/NVPTX/trunc-setcc.ll b/llvm/test/CodeGen/NVPTX/trunc-setcc.ll
new file mode 100644
index 0000000000000..f22e37e203966
--- /dev/null
+++ b/llvm/test/CodeGen/NVPTX/trunc-setcc.ll
@@ -0,0 +1,269 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc < %s -mcpu=sm_50 | FileCheck %s
+; RUN: %if ptxas %{ llc < %s -mcpu=sm_50 | %ptxas-verify -arch=sm_50 %}
+
+target triple = "nvptx64-nvidia-cuda"
+
+define i1 @trunc_nsw_singed_const(i32 %a) {
+; CHECK-LABEL: trunc_nsw_singed_const(
+; CHECK: {
+; CHECK-NEXT: .reg .pred %p<2>;
+; CHECK-NEXT: .reg .b32 %r<4>;
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: ld.param.b32 %r1, [trunc_nsw_singed_const_param_0];
+; CHECK-NEXT: add.s32 %r2, %r1, 1;
+; CHECK-NEXT: setp.gt.s32 %p1, %r2, -1;
+; CHECK-NEXT: selp.b32 %r3, -1, 0, %p1;
+; CHECK-NEXT: st.param.b32 [func_retval0], %r3;
+; CHECK-NEXT: ret;
+ %a2 = add i32 %a, 1
+ %b = trunc nsw i32 %a2 to i8
+ %c = icmp sgt i8 %b, -1
+ ret i1 %c
+}
+
+define i1 @trunc_nuw_singed_const(i32 %a) {
+; CHECK-LABEL: trunc_nuw_singed_const(
+; CHECK: {
+; CHECK-NEXT: .reg .pred %p<2>;
+; CHECK-NEXT: .reg .b16 %rs<4>;
+; CHECK-NEXT: .reg .b32 %r<2>;
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: ld.param.b8 %rs1, [trunc_nuw_singed_const_param_0];
+; CHECK-NEXT: add.s16 %rs2, %rs1, 1;
+; CHECK-NEXT: cvt.s16.s8 %rs3, %rs2;
+; CHECK-NEXT: setp.lt.s16 %p1, %rs3, 100;
+; CHECK-NEXT: selp.b32 %r1, -1, 0, %p1;
+; CHECK-NEXT: st.param.b32 [func_retval0], %r1;
+; CHECK-NEXT: ret;
+ %a2 = add i32 %a, 1
+ %b = trunc nuw i32 %a2 to i8
+ %c = icmp slt i8 %b, 100
+ ret i1 %c
+}
+
+define i1 @trunc_nsw_unsinged_const(i32 %a) {
+; CHECK-LABEL: trunc_nsw_unsinged_const(
+; CHECK: {
+; CHECK-NEXT: .reg .pred %p<2>;
+; CHECK-NEXT: .reg .b16 %rs<4>;
+; CHECK-NEXT: .reg .b32 %r<2>;
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: ld.param.b8 %rs1, [trunc_nsw_unsinged_const_param_0];
+; CHECK-NEXT: add.s16 %rs2, %rs1, 1;
+; CHECK-NEXT: and.b16 %rs3, %rs2, 255;
+; CHECK-NEXT: setp.lt.u16 %p1, %rs3, 236;
+; CHECK-NEXT: selp.b32 %r1, -1, 0, %p1;
+; CHECK-NEXT: st.param.b32 [func_retval0], %r1;
+; CHECK-NEXT: ret;
+ %a2 = add i32 %a, 1
+ %b = trunc nsw i32 %a2 to i8
+ %c = icmp ult i8 %b, -20
+ ret i1 %c
+}
+
+define i1 @trunc_nuw_unsinged_const(i32 %a) {
+; CHECK-LABEL: trunc_nuw_unsinged_const(
+; CHECK: {
+; CHECK-NEXT: .reg .pred %p<2>;
+; CHECK-NEXT: .reg .b32 %r<4>;
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: ld.param.b32 %r1, [trunc_nuw_unsinged_const_param_0];
+; CHECK-NEXT: add.s32 %r2, %r1, 1;
+; CHECK-NEXT: setp.gt.u32 %p1, %r2, 100;
+; CHECK-NEXT: selp.b32 %r3, -1, 0, %p1;
+; CHECK-NEXT: st.param.b32 [func_retval0], %r3;
+; CHECK-NEXT: ret;
+ %a2 = add i32 %a, 1
+ %b = trunc nuw i32 %a2 to i8
+ %c = icmp ugt i8 %b, 100
+ ret i1 %c
+}
+
+
+define i1 @trunc_nsw_eq_const(i32 %a) {
+; CHECK-LABEL: trunc_nsw_eq_const(
+; CHECK: {
+; CHECK-NEXT: .reg .pred %p<2>;
+; CHECK-NEXT: .reg .b32 %r<3>;
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: ld.param.b32 %r1, [trunc_nsw_eq_const_param_0];
+; CHECK-NEXT: setp.eq.b32 %p1, %r1, 99;
+; CHECK-NEXT: selp.b32 %r2, -1, 0, %p1;
+; CHECK-NEXT: st.param.b32 [func_retval0], %r2;
+; CHECK-NEXT: ret;
+ %a2 = add i32 %a, 1
+ %b = trunc nsw i32 %a2 to i8
+ %c = icmp eq i8 %b, 100
+ ret i1 %c
+}
+
+define i1 @trunc_nuw_eq_const(i32 %a) {
+; CHECK-LABEL: trunc_nuw_eq_const(
+; CHECK: {
+; CHECK-NEXT: .reg .pred %p<2>;
+; CHECK-NEXT: .reg .b32 %r<3>;
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: ld.param.b32 %r1, [trunc_nuw_eq_const_param_0];
+; CHECK-NEXT: setp.eq.b32 %p1, %r1, 99;
+; CHECK-NEXT: selp.b32 %r2, -1, 0, %p1;
+; CHECK-NEXT: st.param.b32 [func_retval0], %r2;
+; CHECK-NEXT: ret;
+ %a2 = add i32 %a, 1
+ %b = trunc nuw i32 %a2 to i8
+ %c = icmp eq i8 %b, 100
+ ret i1 %c
+}
+
+;;;
+
+define i1 @trunc_nsw_singed(i32 %a1, i32 %a2) {
+; CHECK-LABEL: trunc_nsw_singed(
+; CHECK: {
+; CHECK-NEXT: .reg .pred %p<2>;
+; CHECK-NEXT: .reg .b32 %r<6>;
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: ld.param.b32 %r1, [trunc_nsw_singed_param_0];
+; CHECK-NEXT: add.s32 %r2, %r1, 1;
+; CHECK-NEXT: ld.param.b32 %r3, [trunc_nsw_singed_param_1];
+; CHECK-NEXT: add.s32 %r4, %r3, 7;
+; CHECK-NEXT: setp.gt.s32 %p1, %r2, %r4;
+; CHECK-NEXT: selp.b32 %r5, -1, 0, %p1;
+; CHECK-NEXT: st.param.b32 [func_retval0], %r5;
+; CHECK-NEXT: ret;
+ %b1 = add i32 %a1, 1
+ %b2 = add i32 %a2, 7
+ %c1 = trunc nsw i32 %b1 to i8
+ %c2 = trunc nsw i32 %b2 to i8
+ %c = icmp sgt i8 %c1, %c2
+ ret i1 %c
+}
+
+define i1 @trunc_nuw_singed(i32 %a1, i32 %a2) {
+; CHECK-LABEL: trunc_nuw_singed(
+; CHECK: {
+; CHECK-NEXT: .reg .pred %p<2>;
+; CHECK-NEXT: .reg .b16 %rs<7>;
+; CHECK-NEXT: .reg .b32 %r<2>;
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: ld.param.b8 %rs1, [trunc_nuw_singed_param_0];
+; CHECK-NEXT: ld.param.b8 %rs2, [trunc_nuw_singed_param_1];
+; CHECK-NEXT: add.s16 %rs3, %rs1, 1;
+; CHECK-NEXT: cvt.s16.s8 %rs4, %rs3;
+; CHECK-NEXT: add.s16 %rs5, %rs2, 6;
+; CHECK-NEXT: cvt.s16.s8 %rs6, %rs5;
+; CHECK-NEXT: setp.lt.s16 %p1, %rs4, %rs6;
+; CHECK-NEXT: selp.b32 %r1, -1, 0, %p1;
+; CHECK-NEXT: st.param.b32 [func_retval0], %r1;
+; CHECK-NEXT: ret;
+ %b1 = add i32 %a1, 1
+ %b2 = add i32 %a2, 6
+ %c1 = trunc nuw i32 %b1 to i8
+ %c2 = trunc nuw i32 %b2 to i8
+ %c = icmp slt i8 %c1, %c2
+ ret i1 %c
+}
+
+define i1 @trunc_nsw_unsinged(i32 %a1, i32 %a2) {
+; CHECK-LABEL: trunc_nsw_unsinged(
+; CHECK: {
+; CHECK-NEXT: .reg .pred %p<2>;
+; CHECK-NEXT: .reg .b16 %rs<7>;
+; CHECK-NEXT: .reg .b32 %r<2>;
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: ld.param.b8 %rs1, [trunc_nsw_unsinged_param_0];
+; CHECK-NEXT: ld.param.b8 %rs2, [trunc_nsw_unsinged_param_1];
+; CHECK-NEXT: add.s16 %rs3, %rs1, 1;
+; CHECK-NEXT: and.b16 %rs4, %rs3, 255;
+; CHECK-NEXT: add.s16 %rs5, %rs2, 4;
+; CHECK-NEXT: and.b16 %rs6, %rs5, 255;
+; CHECK-NEXT: setp.lt.u16 %p1, %rs4, %rs6;
+; CHECK-NEXT: selp.b32 %r1, -1, 0, %p1;
+; CHECK-NEXT: st.param.b32 [func_retval0], %r1;
+; CHECK-NEXT: ret;
+ %b1 = add i32 %a1, 1
+ %b2 = add i32 %a2, 4
+ %c1 = trunc nsw i32 %b1 to i8
+ %c2 = trunc nsw i32 %b2 to i8
+ %c = icmp ult i8 %c1, %c2
+ ret i1 %c
+}
+
+define i1 @trunc_nuw_unsinged(i32 %a1, i32 %a2) {
+; CHECK-LABEL: trunc_nuw_unsinged(
+; CHECK: {
+; CHECK-NEXT: .reg .pred %p<2>;
+; CHECK-NEXT: .reg .b32 %r<6>;
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: ld.param.b32 %r1, [trunc_nuw_unsinged_param_0];
+; CHECK-NEXT: add.s32 %r2, %r1, 1;
+; CHECK-NEXT: ld.param.b32 %r3, [trunc_nuw_unsinged_param_1];
+; CHECK-NEXT: add.s32 %r4, %r3, 5;
+; CHECK-NEXT: setp.gt.u32 %p1, %r2, %r4;
+; CHECK-NEXT: selp.b32 %r5, -1, 0, %p1;
+; CHECK-NEXT: st.param.b32 [func_retval0], %r5;
+; CHECK-NEXT: ret;
+ %b1 = add i32 %a1, 1
+ %b2 = add i32 %a2, 5
+ %c1 = trunc nuw i32 %b1 to i8
+ %c2 = trunc nuw i32 %b2 to i8
+ %c = icmp ugt i8 %c1, %c2
+ ret i1 %c
+}
+
+
+define i1 @trunc_nsw_eq(i32 %a1, i32 %a2) {
+; CHECK-LABEL: trunc_nsw_eq(
+; CHECK: {
+; CHECK-NEXT: .reg .pred %p<2>;
+; CHECK-NEXT: .reg .b32 %r<6>;
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: ld.param.b32 %r1, [trunc_nsw_eq_param_0];
+; CHECK-NEXT: add.s32 %r2, %r1, 1;
+; CHECK-NEXT: ld.param.b32 %r3, [trunc_nsw_eq_param_1];
+; CHECK-NEXT: add.s32 %r4, %r3, 3;
+; CHECK-NEXT: setp.eq.b32 %p1, %r2, %r4;
+; CHECK-NEXT: selp.b32 %r5, -1, 0, %p1;
+; CHECK-NEXT: st.param.b32 [func_retval0], %r5;
+; CHECK-NEXT: ret;
+ %b1 = add i32 %a1, 1
+ %b2 = add i32 %a2, 3
+ %c1 = trunc nsw i32 %b1 to i8
+ %c2 = trunc nsw i32 %b2 to i8
+ %c = icmp eq i8 %c1, %c2
+ ret i1 %c
+}
+
+define i1 @trunc_nuw_eq(i32 %a1, i32 %a2) {
+; CHECK-LABEL: trunc_nuw_eq(
+; CHECK: {
+; CHECK-NEXT: .reg .pred %p<2>;
+; CHECK-NEXT: .reg .b32 %r<6>;
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: ld.param.b32 %r1, [trunc_nuw_eq_param_0];
+; CHECK-NEXT: add.s32 %r2, %r1, 2;
+; CHECK-NEXT: ld.param.b32 %r3, [trunc_nuw_eq_param_1];
+; CHECK-NEXT: add.s32 %r4, %r3, 1;
+; CHECK-NEXT: setp.eq.b32 %p1, %r2, %r4;
+; CHECK-NEXT: selp.b32 %r5, -1, 0, %p1;
+; CHECK-NEXT: st.param.b32 [func_retval0], %r5;
+; CHECK-NEXT: ret;
+ %b1 = add i32 %a1, 2
+ %b2 = add i32 %a2, 1
+ %c1 = trunc nuw i32 %b1 to i8
+ %c2 = trunc nuw i32 %b2 to i8
+ %c = icmp eq i8 %c1, %c2
+ ret i1 %c
+}
More information about the llvm-commits
mailing list