[llvm] [DAGCombiner] Fold setcc of trunc, generalizing some NVPTX isel logic (PR #150270)
via llvm-commits
llvm-commits at lists.llvm.org
Wed Jul 23 10:15:17 PDT 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-llvm-selectiondag
Author: Alex MacLean (AlexMaclean)
<details>
<summary>Changes</summary>
That change adds support for folding a SETCC when one or both of the operands is a TRUNCATE with the appropriate no-wrap flags. This pattern can occur when promoting i8 operations in NVPTX, and we currently have some ISel rules to try to handle it.
While working on the above I realized one of the ISel rules was incorrect, so fix that as well.
---
Patch is 24.82 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/150270.diff
5 Files Affected:
- (modified) llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp (+26)
- (modified) llvm/lib/Target/NVPTX/NVPTXInstrInfo.td (+2-26)
- (modified) llvm/test/CodeGen/NVPTX/i8x4-instructions.ll (+100-68)
- (modified) llvm/test/CodeGen/NVPTX/sext-setcc.ll (+4-9)
- (added) llvm/test/CodeGen/NVPTX/trunc-setcc.ll (+269)
``````````diff
diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index 1764910861df4..a453c17877430 100644
--- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -17,6 +17,7 @@
#include "llvm/CodeGen/Analysis.h"
#include "llvm/CodeGen/CallingConvLower.h"
#include "llvm/CodeGen/CodeGenCommonISel.h"
+#include "llvm/CodeGen/ISDOpcodes.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineJumpTableInfo.h"
@@ -5125,6 +5126,20 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
Cond == ISD::SETEQ ? ISD::SETLT : ISD::SETGE);
}
+ // fold (setcc (trunc x) c) -> (setcc x c)
+ if (N0.getOpcode() == ISD::TRUNCATE &&
+ ((N0->getFlags().hasNoUnsignedWrap() && !ISD::isSignedIntSetCC(Cond)) ||
+ (N0->getFlags().hasNoSignedWrap() &&
+ !ISD::isUnsignedIntSetCC(Cond))) &&
+ isTypeDesirableForOp(ISD::SETCC, N0.getOperand(0).getValueType())) {
+ EVT NewVT = N0.getOperand(0).getValueType();
+ SDValue NewConst = DAG.getConstant(ISD::isSignedIntSetCC(Cond)
+ ? C1.sext(NewVT.getSizeInBits())
+ : C1.zext(NewVT.getSizeInBits()),
+ dl, NewVT);
+ return DAG.getSetCC(dl, VT, N0.getOperand(0), NewConst, Cond);
+ }
+
if (SDValue V =
optimizeSetCCOfSignedTruncationCheck(VT, N0, N1, Cond, DCI, dl))
return V;
@@ -5646,6 +5661,17 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
return N0;
}
+ // Fold (setcc (trunc x) (trunc y)) -> (setcc x y)
+ if (N0.getOpcode() == ISD::TRUNCATE && N1.getOpcode() == ISD::TRUNCATE &&
+ N0.getOperand(0).getValueType() == N1.getOperand(0).getValueType() &&
+ ((!ISD::isSignedIntSetCC(Cond) && N0->getFlags().hasNoUnsignedWrap() &&
+ N1->getFlags().hasNoUnsignedWrap()) ||
+ (!ISD::isUnsignedIntSetCC(Cond) && N0->getFlags().hasNoSignedWrap() &&
+ N1->getFlags().hasNoSignedWrap())) &&
+ isTypeDesirableForOp(ISD::SETCC, N0.getOperand(0).getValueType())) {
+ return DAG.getSetCC(dl, VT, N0.getOperand(0), N1.getOperand(0), Cond);
+ }
+
// Could not fold it.
return SDValue();
}
diff --git a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
index b5df4c6de7fd8..8043f678e0fcc 100644
--- a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
+++ b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
@@ -1714,40 +1714,16 @@ def cond_signed : PatLeaf<(cond), [{
return isSignedIntSetCC(N->get());
}]>;
-def cond_not_signed : PatLeaf<(cond), [{
- return !isSignedIntSetCC(N->get());
-}]>;
-
// comparisons of i8 extracted with PRMT as i32
// It's faster to do comparison directly on i32 extracted by PRMT,
// instead of the long conversion and sign extending.
-def: Pat<(setcc (i16 (sext_inreg (i16 (trunc (prmt i32:$a, 0, byte_extract_prmt:$sel_a, PrmtNONE))), i8)),
- (i16 (sext_inreg (i16 (trunc (prmt i32:$b, 0, byte_extract_prmt:$sel_b, PrmtNONE))), i8)),
- cond_signed:$cc),
- (SETP_i32rr (PRMT_B32rii i32:$a, 0, byte_extract_prmt:$sel_a, PrmtNONE),
- (PRMT_B32rii i32:$b, 0, byte_extract_prmt:$sel_b, PrmtNONE),
- (cond2cc $cc))>;
-
def: Pat<(setcc (i16 (sext_inreg (trunc (prmt i32:$a, 0, byte_extract_prmt:$sel_a, PrmtNONE)), i8)),
(i16 (sext_inreg (trunc (prmt i32:$b, 0, byte_extract_prmt:$sel_b, PrmtNONE)), i8)),
cond_signed:$cc),
- (SETP_i32rr (PRMT_B32rii i32:$a, 0, byte_extract_prmt:$sel_a, PrmtNONE),
- (PRMT_B32rii i32:$b, 0, byte_extract_prmt:$sel_b, PrmtNONE),
+ (SETP_i32rr (PRMT_B32rii i32:$a, 0, (to_sign_extend_selector $sel_a), PrmtNONE),
+ (PRMT_B32rii i32:$b, 0, (to_sign_extend_selector $sel_b), PrmtNONE),
(cond2cc $cc))>;
-def: Pat<(setcc (i16 (trunc (prmt i32:$a, 0, byte_extract_prmt:$sel_a, PrmtNONE))),
- (i16 (trunc (prmt i32:$b, 0, byte_extract_prmt:$sel_b, PrmtNONE))),
- cond_signed:$cc),
- (SETP_i32rr (PRMT_B32rii i32:$a, 0, byte_extract_prmt:$sel_a, PrmtNONE),
- (PRMT_B32rii i32:$b, 0, byte_extract_prmt:$sel_b, PrmtNONE),
- (cond2cc $cc))>;
-
-def: Pat<(setcc (i16 (trunc (prmt i32:$a, 0, byte_extract_prmt:$sel_a, PrmtNONE))),
- (i16 (trunc (prmt i32:$b, 0, byte_extract_prmt:$sel_b, PrmtNONE))),
- cond_not_signed:$cc),
- (SETP_i32rr (PRMT_B32rii i32:$a, 0, byte_extract_prmt:$sel_a, PrmtNONE),
- (PRMT_B32rii i32:$b, 0, byte_extract_prmt:$sel_b, PrmtNONE),
- (cond2cc $cc))>;
def SDTDeclareArrayParam :
SDTypeProfile<0, 3, [SDTCisVT<0, i32>, SDTCisVT<1, i32>, SDTCisVT<2, i32>]>;
diff --git a/llvm/test/CodeGen/NVPTX/i8x4-instructions.ll b/llvm/test/CodeGen/NVPTX/i8x4-instructions.ll
index da99cec0669ed..f2a2b171d5ca2 100644
--- a/llvm/test/CodeGen/NVPTX/i8x4-instructions.ll
+++ b/llvm/test/CodeGen/NVPTX/i8x4-instructions.ll
@@ -343,61 +343,77 @@ define <4 x i8> @test_smax(<4 x i8> %a, <4 x i8> %b) #0 {
; O0-LABEL: test_smax(
; O0: {
; O0-NEXT: .reg .pred %p<5>;
-; O0-NEXT: .reg .b32 %r<18>;
+; O0-NEXT: .reg .b32 %r<26>;
; O0-EMPTY:
; O0-NEXT: // %bb.0:
; O0-NEXT: ld.param.b32 %r2, [test_smax_param_1];
; O0-NEXT: ld.param.b32 %r1, [test_smax_param_0];
-; O0-NEXT: prmt.b32 %r3, %r2, 0, 0x7770U;
-; O0-NEXT: prmt.b32 %r4, %r1, 0, 0x7770U;
+; O0-NEXT: prmt.b32 %r3, %r2, 0, 0x8880U;
+; O0-NEXT: prmt.b32 %r4, %r1, 0, 0x8880U;
; O0-NEXT: setp.gt.s32 %p1, %r4, %r3;
-; O0-NEXT: prmt.b32 %r5, %r2, 0, 0x7771U;
-; O0-NEXT: prmt.b32 %r6, %r1, 0, 0x7771U;
+; O0-NEXT: prmt.b32 %r5, %r2, 0, 0x9991U;
+; O0-NEXT: prmt.b32 %r6, %r1, 0, 0x9991U;
; O0-NEXT: setp.gt.s32 %p2, %r6, %r5;
-; O0-NEXT: prmt.b32 %r7, %r2, 0, 0x7772U;
-; O0-NEXT: prmt.b32 %r8, %r1, 0, 0x7772U;
+; O0-NEXT: prmt.b32 %r7, %r2, 0, 0xaaa2U;
+; O0-NEXT: prmt.b32 %r8, %r1, 0, 0xaaa2U;
; O0-NEXT: setp.gt.s32 %p3, %r8, %r7;
-; O0-NEXT: prmt.b32 %r9, %r2, 0, 0x7773U;
-; O0-NEXT: prmt.b32 %r10, %r1, 0, 0x7773U;
+; O0-NEXT: prmt.b32 %r9, %r2, 0, 0xbbb3U;
+; O0-NEXT: prmt.b32 %r10, %r1, 0, 0xbbb3U;
; O0-NEXT: setp.gt.s32 %p4, %r10, %r9;
-; O0-NEXT: selp.b32 %r11, %r10, %r9, %p4;
-; O0-NEXT: selp.b32 %r12, %r8, %r7, %p3;
-; O0-NEXT: prmt.b32 %r13, %r12, %r11, 0x3340U;
-; O0-NEXT: selp.b32 %r14, %r6, %r5, %p2;
-; O0-NEXT: selp.b32 %r15, %r4, %r3, %p1;
-; O0-NEXT: prmt.b32 %r16, %r15, %r14, 0x3340U;
-; O0-NEXT: prmt.b32 %r17, %r16, %r13, 0x5410U;
-; O0-NEXT: st.param.b32 [func_retval0], %r17;
+; O0-NEXT: prmt.b32 %r11, %r2, 0, 0x7770U;
+; O0-NEXT: prmt.b32 %r12, %r2, 0, 0x7771U;
+; O0-NEXT: prmt.b32 %r13, %r2, 0, 0x7772U;
+; O0-NEXT: prmt.b32 %r14, %r2, 0, 0x7773U;
+; O0-NEXT: prmt.b32 %r15, %r1, 0, 0x7773U;
+; O0-NEXT: selp.b32 %r16, %r15, %r14, %p4;
+; O0-NEXT: prmt.b32 %r17, %r1, 0, 0x7772U;
+; O0-NEXT: selp.b32 %r18, %r17, %r13, %p3;
+; O0-NEXT: prmt.b32 %r19, %r18, %r16, 0x3340U;
+; O0-NEXT: prmt.b32 %r20, %r1, 0, 0x7771U;
+; O0-NEXT: selp.b32 %r21, %r20, %r12, %p2;
+; O0-NEXT: prmt.b32 %r22, %r1, 0, 0x7770U;
+; O0-NEXT: selp.b32 %r23, %r22, %r11, %p1;
+; O0-NEXT: prmt.b32 %r24, %r23, %r21, 0x3340U;
+; O0-NEXT: prmt.b32 %r25, %r24, %r19, 0x5410U;
+; O0-NEXT: st.param.b32 [func_retval0], %r25;
; O0-NEXT: ret;
;
; O3-LABEL: test_smax(
; O3: {
; O3-NEXT: .reg .pred %p<5>;
-; O3-NEXT: .reg .b32 %r<18>;
+; O3-NEXT: .reg .b32 %r<26>;
; O3-EMPTY:
; O3-NEXT: // %bb.0:
; O3-NEXT: ld.param.b32 %r1, [test_smax_param_0];
; O3-NEXT: ld.param.b32 %r2, [test_smax_param_1];
-; O3-NEXT: prmt.b32 %r3, %r2, 0, 0x7770U;
-; O3-NEXT: prmt.b32 %r4, %r1, 0, 0x7770U;
+; O3-NEXT: prmt.b32 %r3, %r2, 0, 0x8880U;
+; O3-NEXT: prmt.b32 %r4, %r1, 0, 0x8880U;
; O3-NEXT: setp.gt.s32 %p1, %r4, %r3;
-; O3-NEXT: prmt.b32 %r5, %r2, 0, 0x7771U;
-; O3-NEXT: prmt.b32 %r6, %r1, 0, 0x7771U;
+; O3-NEXT: prmt.b32 %r5, %r2, 0, 0x9991U;
+; O3-NEXT: prmt.b32 %r6, %r1, 0, 0x9991U;
; O3-NEXT: setp.gt.s32 %p2, %r6, %r5;
-; O3-NEXT: prmt.b32 %r7, %r2, 0, 0x7772U;
-; O3-NEXT: prmt.b32 %r8, %r1, 0, 0x7772U;
+; O3-NEXT: prmt.b32 %r7, %r2, 0, 0xaaa2U;
+; O3-NEXT: prmt.b32 %r8, %r1, 0, 0xaaa2U;
; O3-NEXT: setp.gt.s32 %p3, %r8, %r7;
-; O3-NEXT: prmt.b32 %r9, %r2, 0, 0x7773U;
-; O3-NEXT: prmt.b32 %r10, %r1, 0, 0x7773U;
+; O3-NEXT: prmt.b32 %r9, %r2, 0, 0xbbb3U;
+; O3-NEXT: prmt.b32 %r10, %r1, 0, 0xbbb3U;
; O3-NEXT: setp.gt.s32 %p4, %r10, %r9;
-; O3-NEXT: selp.b32 %r11, %r10, %r9, %p4;
-; O3-NEXT: selp.b32 %r12, %r8, %r7, %p3;
-; O3-NEXT: prmt.b32 %r13, %r12, %r11, 0x3340U;
-; O3-NEXT: selp.b32 %r14, %r6, %r5, %p2;
-; O3-NEXT: selp.b32 %r15, %r4, %r3, %p1;
-; O3-NEXT: prmt.b32 %r16, %r15, %r14, 0x3340U;
-; O3-NEXT: prmt.b32 %r17, %r16, %r13, 0x5410U;
-; O3-NEXT: st.param.b32 [func_retval0], %r17;
+; O3-NEXT: prmt.b32 %r11, %r2, 0, 0x7770U;
+; O3-NEXT: prmt.b32 %r12, %r2, 0, 0x7771U;
+; O3-NEXT: prmt.b32 %r13, %r2, 0, 0x7772U;
+; O3-NEXT: prmt.b32 %r14, %r2, 0, 0x7773U;
+; O3-NEXT: prmt.b32 %r15, %r1, 0, 0x7773U;
+; O3-NEXT: selp.b32 %r16, %r15, %r14, %p4;
+; O3-NEXT: prmt.b32 %r17, %r1, 0, 0x7772U;
+; O3-NEXT: selp.b32 %r18, %r17, %r13, %p3;
+; O3-NEXT: prmt.b32 %r19, %r18, %r16, 0x3340U;
+; O3-NEXT: prmt.b32 %r20, %r1, 0, 0x7771U;
+; O3-NEXT: selp.b32 %r21, %r20, %r12, %p2;
+; O3-NEXT: prmt.b32 %r22, %r1, 0, 0x7770U;
+; O3-NEXT: selp.b32 %r23, %r22, %r11, %p1;
+; O3-NEXT: prmt.b32 %r24, %r23, %r21, 0x3340U;
+; O3-NEXT: prmt.b32 %r25, %r24, %r19, 0x5410U;
+; O3-NEXT: st.param.b32 [func_retval0], %r25;
; O3-NEXT: ret;
%cmp = icmp sgt <4 x i8> %a, %b
%r = select <4 x i1> %cmp, <4 x i8> %a, <4 x i8> %b
@@ -473,61 +489,77 @@ define <4 x i8> @test_smin(<4 x i8> %a, <4 x i8> %b) #0 {
; O0-LABEL: test_smin(
; O0: {
; O0-NEXT: .reg .pred %p<5>;
-; O0-NEXT: .reg .b32 %r<18>;
+; O0-NEXT: .reg .b32 %r<26>;
; O0-EMPTY:
; O0-NEXT: // %bb.0:
; O0-NEXT: ld.param.b32 %r2, [test_smin_param_1];
; O0-NEXT: ld.param.b32 %r1, [test_smin_param_0];
-; O0-NEXT: prmt.b32 %r3, %r2, 0, 0x7770U;
-; O0-NEXT: prmt.b32 %r4, %r1, 0, 0x7770U;
+; O0-NEXT: prmt.b32 %r3, %r2, 0, 0x8880U;
+; O0-NEXT: prmt.b32 %r4, %r1, 0, 0x8880U;
; O0-NEXT: setp.le.s32 %p1, %r4, %r3;
-; O0-NEXT: prmt.b32 %r5, %r2, 0, 0x7771U;
-; O0-NEXT: prmt.b32 %r6, %r1, 0, 0x7771U;
+; O0-NEXT: prmt.b32 %r5, %r2, 0, 0x9991U;
+; O0-NEXT: prmt.b32 %r6, %r1, 0, 0x9991U;
; O0-NEXT: setp.le.s32 %p2, %r6, %r5;
-; O0-NEXT: prmt.b32 %r7, %r2, 0, 0x7772U;
-; O0-NEXT: prmt.b32 %r8, %r1, 0, 0x7772U;
+; O0-NEXT: prmt.b32 %r7, %r2, 0, 0xaaa2U;
+; O0-NEXT: prmt.b32 %r8, %r1, 0, 0xaaa2U;
; O0-NEXT: setp.le.s32 %p3, %r8, %r7;
-; O0-NEXT: prmt.b32 %r9, %r2, 0, 0x7773U;
-; O0-NEXT: prmt.b32 %r10, %r1, 0, 0x7773U;
+; O0-NEXT: prmt.b32 %r9, %r2, 0, 0xbbb3U;
+; O0-NEXT: prmt.b32 %r10, %r1, 0, 0xbbb3U;
; O0-NEXT: setp.le.s32 %p4, %r10, %r9;
-; O0-NEXT: selp.b32 %r11, %r10, %r9, %p4;
-; O0-NEXT: selp.b32 %r12, %r8, %r7, %p3;
-; O0-NEXT: prmt.b32 %r13, %r12, %r11, 0x3340U;
-; O0-NEXT: selp.b32 %r14, %r6, %r5, %p2;
-; O0-NEXT: selp.b32 %r15, %r4, %r3, %p1;
-; O0-NEXT: prmt.b32 %r16, %r15, %r14, 0x3340U;
-; O0-NEXT: prmt.b32 %r17, %r16, %r13, 0x5410U;
-; O0-NEXT: st.param.b32 [func_retval0], %r17;
+; O0-NEXT: prmt.b32 %r11, %r2, 0, 0x7770U;
+; O0-NEXT: prmt.b32 %r12, %r2, 0, 0x7771U;
+; O0-NEXT: prmt.b32 %r13, %r2, 0, 0x7772U;
+; O0-NEXT: prmt.b32 %r14, %r2, 0, 0x7773U;
+; O0-NEXT: prmt.b32 %r15, %r1, 0, 0x7773U;
+; O0-NEXT: selp.b32 %r16, %r15, %r14, %p4;
+; O0-NEXT: prmt.b32 %r17, %r1, 0, 0x7772U;
+; O0-NEXT: selp.b32 %r18, %r17, %r13, %p3;
+; O0-NEXT: prmt.b32 %r19, %r18, %r16, 0x3340U;
+; O0-NEXT: prmt.b32 %r20, %r1, 0, 0x7771U;
+; O0-NEXT: selp.b32 %r21, %r20, %r12, %p2;
+; O0-NEXT: prmt.b32 %r22, %r1, 0, 0x7770U;
+; O0-NEXT: selp.b32 %r23, %r22, %r11, %p1;
+; O0-NEXT: prmt.b32 %r24, %r23, %r21, 0x3340U;
+; O0-NEXT: prmt.b32 %r25, %r24, %r19, 0x5410U;
+; O0-NEXT: st.param.b32 [func_retval0], %r25;
; O0-NEXT: ret;
;
; O3-LABEL: test_smin(
; O3: {
; O3-NEXT: .reg .pred %p<5>;
-; O3-NEXT: .reg .b32 %r<18>;
+; O3-NEXT: .reg .b32 %r<26>;
; O3-EMPTY:
; O3-NEXT: // %bb.0:
; O3-NEXT: ld.param.b32 %r1, [test_smin_param_0];
; O3-NEXT: ld.param.b32 %r2, [test_smin_param_1];
-; O3-NEXT: prmt.b32 %r3, %r2, 0, 0x7770U;
-; O3-NEXT: prmt.b32 %r4, %r1, 0, 0x7770U;
+; O3-NEXT: prmt.b32 %r3, %r2, 0, 0x8880U;
+; O3-NEXT: prmt.b32 %r4, %r1, 0, 0x8880U;
; O3-NEXT: setp.le.s32 %p1, %r4, %r3;
-; O3-NEXT: prmt.b32 %r5, %r2, 0, 0x7771U;
-; O3-NEXT: prmt.b32 %r6, %r1, 0, 0x7771U;
+; O3-NEXT: prmt.b32 %r5, %r2, 0, 0x9991U;
+; O3-NEXT: prmt.b32 %r6, %r1, 0, 0x9991U;
; O3-NEXT: setp.le.s32 %p2, %r6, %r5;
-; O3-NEXT: prmt.b32 %r7, %r2, 0, 0x7772U;
-; O3-NEXT: prmt.b32 %r8, %r1, 0, 0x7772U;
+; O3-NEXT: prmt.b32 %r7, %r2, 0, 0xaaa2U;
+; O3-NEXT: prmt.b32 %r8, %r1, 0, 0xaaa2U;
; O3-NEXT: setp.le.s32 %p3, %r8, %r7;
-; O3-NEXT: prmt.b32 %r9, %r2, 0, 0x7773U;
-; O3-NEXT: prmt.b32 %r10, %r1, 0, 0x7773U;
+; O3-NEXT: prmt.b32 %r9, %r2, 0, 0xbbb3U;
+; O3-NEXT: prmt.b32 %r10, %r1, 0, 0xbbb3U;
; O3-NEXT: setp.le.s32 %p4, %r10, %r9;
-; O3-NEXT: selp.b32 %r11, %r10, %r9, %p4;
-; O3-NEXT: selp.b32 %r12, %r8, %r7, %p3;
-; O3-NEXT: prmt.b32 %r13, %r12, %r11, 0x3340U;
-; O3-NEXT: selp.b32 %r14, %r6, %r5, %p2;
-; O3-NEXT: selp.b32 %r15, %r4, %r3, %p1;
-; O3-NEXT: prmt.b32 %r16, %r15, %r14, 0x3340U;
-; O3-NEXT: prmt.b32 %r17, %r16, %r13, 0x5410U;
-; O3-NEXT: st.param.b32 [func_retval0], %r17;
+; O3-NEXT: prmt.b32 %r11, %r2, 0, 0x7770U;
+; O3-NEXT: prmt.b32 %r12, %r2, 0, 0x7771U;
+; O3-NEXT: prmt.b32 %r13, %r2, 0, 0x7772U;
+; O3-NEXT: prmt.b32 %r14, %r2, 0, 0x7773U;
+; O3-NEXT: prmt.b32 %r15, %r1, 0, 0x7773U;
+; O3-NEXT: selp.b32 %r16, %r15, %r14, %p4;
+; O3-NEXT: prmt.b32 %r17, %r1, 0, 0x7772U;
+; O3-NEXT: selp.b32 %r18, %r17, %r13, %p3;
+; O3-NEXT: prmt.b32 %r19, %r18, %r16, 0x3340U;
+; O3-NEXT: prmt.b32 %r20, %r1, 0, 0x7771U;
+; O3-NEXT: selp.b32 %r21, %r20, %r12, %p2;
+; O3-NEXT: prmt.b32 %r22, %r1, 0, 0x7770U;
+; O3-NEXT: selp.b32 %r23, %r22, %r11, %p1;
+; O3-NEXT: prmt.b32 %r24, %r23, %r21, 0x3340U;
+; O3-NEXT: prmt.b32 %r25, %r24, %r19, 0x5410U;
+; O3-NEXT: st.param.b32 [func_retval0], %r25;
; O3-NEXT: ret;
%cmp = icmp sle <4 x i8> %a, %b
%r = select <4 x i1> %cmp, <4 x i8> %a, <4 x i8> %b
diff --git a/llvm/test/CodeGen/NVPTX/sext-setcc.ll b/llvm/test/CodeGen/NVPTX/sext-setcc.ll
index 9a67bdfeb067b..97918a6f26cdf 100644
--- a/llvm/test/CodeGen/NVPTX/sext-setcc.ll
+++ b/llvm/test/CodeGen/NVPTX/sext-setcc.ll
@@ -29,7 +29,6 @@ define <4 x i8> @sext_setcc_v4i1_to_v4i8(ptr %p) {
; CHECK-LABEL: sext_setcc_v4i1_to_v4i8(
; CHECK: {
; CHECK-NEXT: .reg .pred %p<5>;
-; CHECK-NEXT: .reg .b16 %rs<5>;
; CHECK-NEXT: .reg .b32 %r<13>;
; CHECK-NEXT: .reg .b64 %rd<2>;
; CHECK-EMPTY:
@@ -37,17 +36,13 @@ define <4 x i8> @sext_setcc_v4i1_to_v4i8(ptr %p) {
; CHECK-NEXT: ld.param.b64 %rd1, [sext_setcc_v4i1_to_v4i8_param_0];
; CHECK-NEXT: ld.b32 %r1, [%rd1];
; CHECK-NEXT: prmt.b32 %r2, %r1, 0, 0x7770U;
-; CHECK-NEXT: cvt.u16.u32 %rs1, %r2;
-; CHECK-NEXT: setp.eq.b16 %p1, %rs1, 0;
+; CHECK-NEXT: setp.eq.b32 %p1, %r2, 0;
; CHECK-NEXT: prmt.b32 %r3, %r1, 0, 0x7771U;
-; CHECK-NEXT: cvt.u16.u32 %rs2, %r3;
-; CHECK-NEXT: setp.eq.b16 %p2, %rs2, 0;
+; CHECK-NEXT: setp.eq.b32 %p2, %r3, 0;
; CHECK-NEXT: prmt.b32 %r4, %r1, 0, 0x7772U;
-; CHECK-NEXT: cvt.u16.u32 %rs3, %r4;
-; CHECK-NEXT: setp.eq.b16 %p3, %rs3, 0;
+; CHECK-NEXT: setp.eq.b32 %p3, %r4, 0;
; CHECK-NEXT: prmt.b32 %r5, %r1, 0, 0x7773U;
-; CHECK-NEXT: cvt.u16.u32 %rs4, %r5;
-; CHECK-NEXT: setp.eq.b16 %p4, %rs4, 0;
+; CHECK-NEXT: setp.eq.b32 %p4, %r5, 0;
; CHECK-NEXT: selp.b32 %r6, -1, 0, %p4;
; CHECK-NEXT: selp.b32 %r7, -1, 0, %p3;
; CHECK-NEXT: prmt.b32 %r8, %r7, %r6, 0x3340U;
diff --git a/llvm/test/CodeGen/NVPTX/trunc-setcc.ll b/llvm/test/CodeGen/NVPTX/trunc-setcc.ll
new file mode 100644
index 0000000000000..f22e37e203966
--- /dev/null
+++ b/llvm/test/CodeGen/NVPTX/trunc-setcc.ll
@@ -0,0 +1,269 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc < %s -mcpu=sm_50 | FileCheck %s
+; RUN: %if ptxas %{ llc < %s -mcpu=sm_50 | %ptxas-verify -arch=sm_50 %}
+
+target triple = "nvptx64-nvidia-cuda"
+
+define i1 @trunc_nsw_singed_const(i32 %a) {
+; CHECK-LABEL: trunc_nsw_singed_const(
+; CHECK: {
+; CHECK-NEXT: .reg .pred %p<2>;
+; CHECK-NEXT: .reg .b32 %r<4>;
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: ld.param.b32 %r1, [trunc_nsw_singed_const_param_0];
+; CHECK-NEXT: add.s32 %r2, %r1, 1;
+; CHECK-NEXT: setp.gt.s32 %p1, %r2, -1;
+; CHECK-NEXT: selp.b32 %r3, -1, 0, %p1;
+; CHECK-NEXT: st.param.b32 [func_retval0], %r3;
+; CHECK-NEXT: ret;
+ %a2 = add i32 %a, 1
+ %b = trunc nsw i32 %a2 to i8
+ %c = icmp sgt i8 %b, -1
+ ret i1 %c
+}
+
+define i1 @trunc_nuw_singed_const(i32 %a) {
+; CHECK-LABEL: trunc_nuw_singed_const(
+; CHECK: {
+; CHECK-NEXT: .reg .pred %p<2>;
+; CHECK-NEXT: .reg .b16 %rs<4>;
+; CHECK-NEXT: .reg .b32 %r<2>;
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: ld.param.b8 %rs1, [trunc_nuw_singed_const_param_0];
+; CHECK-NEXT: add.s16 %rs2, %rs1, 1;
+; CHECK-NEXT: cvt.s16.s8 %rs3, %rs2;
+; CHECK-NEXT: setp.lt.s16 %p1, %rs3, 100;
+; CHECK-NEXT: selp.b32 %r1, -1, 0, %p1;
+; CHECK-NEXT: st.param.b32 [func_retval0], %r1;
+; CHECK-NEXT: ret;
+ %a2 = add i32 %a, 1
+ %b = trunc nuw i32 %a2 to i8
+ %c = icmp slt i8 %b, 100
+ ret i1 %c
+}
+
+define i1 @trunc_nsw_unsinged_const(i32 %a) {
+; CHECK-LABEL: trunc_nsw_unsinged_const(
+; CHECK: {
+; CHECK-NEXT: .reg .pred %p<2>;
+; CHECK-NEXT: .reg .b16 %rs<4>;
+; CHECK-NEXT: .reg .b32 %r<2>;
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: ld.param.b8 %rs1, [trunc_nsw_unsinged_const_param_0];
+; CHECK-NEXT: add.s16 %rs2, %rs1, 1;
+; CHECK-NEXT: and.b16 %rs3, %rs2, 255;
+; CHECK-NEXT: setp.lt.u16 %p1, %rs3, 236;
+; CHECK-NEXT: selp.b32 %r1, -1, 0, %p1;
+; CHECK-NEXT: st.param.b32 [func_retval0], %r1;
+; CHECK-NEXT: ret;
+ %a2 = add i32 %a, 1
+ %b = trunc nsw i32 %a2 to i8
+ %c = icmp ult i8 %b, -20
+ ret i1 %c
+}
+
+define i1 @trunc_nuw_unsinged_const(i32 %a) {
+; CHECK-LABEL: trunc_nuw_unsinged_const(
+; CHECK: {
+; CHECK-NEXT: .reg .pred %p<2>;
+; CHECK-NEXT: .reg .b32 %r<4>;
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: ld.param.b32 %r1, [trunc_nuw_unsinged_const_param_0];
+; CHECK-NEXT: add.s32 %r2, %r1, 1;
+; CHECK-NEXT: setp.gt.u32 %p1, %r2, 100;
+; CHECK-NEXT: selp.b32 %r3, -1, 0, %p1;
+; CHECK-NEXT: st.param.b32 [func_retval0], %r3;
+; CHECK-NEXT: ret;
+ %a2 = add i32 %a, 1
+ %b = trunc nuw i32 %a2 to i8
+ %c = icmp ugt i8 %b, 100
+ ret i1 %c
+}
+
+
+define i1 @trunc_nsw_eq_const(i32 %a) {
+; CHECK-LABEL: trunc_nsw_eq_const(
+; CHECK: {
+; CHECK-NEXT: .reg .pred %p<2>;
+; CHECK-NEXT: .reg .b32 %r<3>;
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: ld.param.b32 %r1, [trunc_nsw_eq_const_param_0];
+; CHECK-NEXT: setp.eq.b32 %p1, %r1, 99;
+; CHECK-NEXT: selp.b32 %r2, -1, 0, %p1;
+; CHECK-NEXT: st.param.b32 [func_retval0], %r2;
+; CHECK-NEXT: ret;
+ %a2 = add i32 %a, 1
+ %b = trunc nsw i32 %a2 to i8
+ %c = icmp eq i8 %b, 100
+ ret i1 %c
+}
+
+define i1 @trunc_nuw_eq_const(i32 %a) {
+; CHECK-LABEL: trunc_nuw_eq_const(
+; CHECK: {
+; CHECK-NEXT: .reg .pred %p<2>;
+; CHECK-NEXT: .reg .b32 %r<3>;
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: ld.param.b32 %r1, [trunc_nuw_eq_const_param_0];
+; CHECK-NEXT: setp.eq.b32 %p1, ...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/150270
More information about the llvm-commits
mailing list