[llvm] [NVPTX] Use 0 immediate for i1 trunc, cleanup dead code (PR #135646)
via llvm-commits
llvm-commits at lists.llvm.org
Mon Apr 14 09:52:55 PDT 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-nvptx
Author: Alex MacLean (AlexMaclean)
<details>
<summary>Changes</summary>
Update the instruction selection for truncation to i1 to use "setp.ne %v, 0" as the zero immediate is a preferable canonical form. Also remove some dead code relating to the "set" instruction which we do not actually support currently.
---
Patch is 27.65 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/135646.diff
17 Files Affected:
- (modified) llvm/lib/Target/NVPTX/NVPTXInstrInfo.td (+6-142)
- (modified) llvm/test/CodeGen/NVPTX/bf16-instructions.ll (+4-4)
- (modified) llvm/test/CodeGen/NVPTX/bf16x2-instructions.ll (+1-1)
- (modified) llvm/test/CodeGen/NVPTX/combine-mad.ll (+2-2)
- (modified) llvm/test/CodeGen/NVPTX/copysign.ll (+4-4)
- (modified) llvm/test/CodeGen/NVPTX/f16-instructions.ll (+2-2)
- (modified) llvm/test/CodeGen/NVPTX/f16x2-instructions.ll (+1-1)
- (modified) llvm/test/CodeGen/NVPTX/forward-ld-param.ll (+1-1)
- (modified) llvm/test/CodeGen/NVPTX/i1-int-to-fp.ll (+6-6)
- (modified) llvm/test/CodeGen/NVPTX/i1-load-lower.ll (+1-1)
- (modified) llvm/test/CodeGen/NVPTX/i128.ll (+4-4)
- (modified) llvm/test/CodeGen/NVPTX/i16x2-instructions.ll (+1-1)
- (modified) llvm/test/CodeGen/NVPTX/i8x4-instructions.ll (+1-1)
- (modified) llvm/test/CodeGen/NVPTX/lower-args-gridconstant.ll (+1-1)
- (modified) llvm/test/CodeGen/NVPTX/lower-byval-args.ll (+6-6)
- (modified) llvm/test/CodeGen/NVPTX/param-load-store.ll (+1-1)
- (modified) llvm/test/CodeGen/NVPTX/pr13291-i1-store.ll (+2-2)
``````````diff
diff --git a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
index aa0eedb1b7446..17e7fafafb421 100644
--- a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
+++ b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
@@ -1703,39 +1703,6 @@ def SETP_bf16x2rr :
[]>,
Requires<[hasBF16Math, hasPTX<78>, hasSM<90>]>;
-
-// FIXME: This doesn't appear to be correct. The "set" mnemonic has the form
-// "set.CmpOp{.ftz}.dtype.stype", where dtype is the type of the destination
-// reg, either u32, s32, or f32. Anyway these aren't used at the moment.
-
-let hasSideEffects = false in {
- multiclass SET<string TypeStr, RegisterClass RC, Operand ImmCls> {
- def rr : NVPTXInst<(outs Int32Regs:$dst),
- (ins RC:$a, RC:$b, CmpMode:$cmp),
- !strconcat("set$cmp.", TypeStr, " \t$dst, $a, $b;"), []>;
- def ri : NVPTXInst<(outs Int32Regs:$dst),
- (ins RC:$a, ImmCls:$b, CmpMode:$cmp),
- !strconcat("set$cmp.", TypeStr, " \t$dst, $a, $b;"), []>;
- def ir : NVPTXInst<(outs Int32Regs:$dst),
- (ins ImmCls:$a, RC:$b, CmpMode:$cmp),
- !strconcat("set$cmp.", TypeStr, " \t$dst, $a, $b;"), []>;
- }
-}
-
-defm SET_b16 : SET<"b16", Int16Regs, i16imm>;
-defm SET_s16 : SET<"s16", Int16Regs, i16imm>;
-defm SET_u16 : SET<"u16", Int16Regs, i16imm>;
-defm SET_b32 : SET<"b32", Int32Regs, i32imm>;
-defm SET_s32 : SET<"s32", Int32Regs, i32imm>;
-defm SET_u32 : SET<"u32", Int32Regs, i32imm>;
-defm SET_b64 : SET<"b64", Int64Regs, i64imm>;
-defm SET_s64 : SET<"s64", Int64Regs, i64imm>;
-defm SET_u64 : SET<"u64", Int64Regs, i64imm>;
-defm SET_f16 : SET<"f16", Int16Regs, f16imm>;
-defm SET_bf16 : SET<"bf16", Int16Regs, bf16imm>, Requires<[hasPTX<78>, hasSM<90>]>;
-defm SET_f32 : SET<"f32", Float32Regs, f32imm>;
-defm SET_f64 : SET<"f64", Float64Regs, f64imm>;
-
//-----------------------------------
// Data Movement (Load / Store, Move)
//-----------------------------------
@@ -1842,16 +1809,7 @@ multiclass ISET_FORMAT<PatFrag OpNode, PatLeaf Mode,
Instruction setp_32ir,
Instruction setp_64rr,
Instruction setp_64ri,
- Instruction setp_64ir,
- Instruction set_16rr,
- Instruction set_16ri,
- Instruction set_16ir,
- Instruction set_32rr,
- Instruction set_32ri,
- Instruction set_32ir,
- Instruction set_64rr,
- Instruction set_64ri,
- Instruction set_64ir> {
+ Instruction setp_64ir> {
// i16 -> pred
def : Pat<(i1 (OpNode i16:$a, i16:$b)),
(setp_16rr $a, $b, Mode)>;
@@ -1873,38 +1831,13 @@ multiclass ISET_FORMAT<PatFrag OpNode, PatLeaf Mode,
(setp_64ri $a, imm:$b, Mode)>;
def : Pat<(i1 (OpNode imm:$a, i64:$b)),
(setp_64ir imm:$a, $b, Mode)>;
-
- // i16 -> i32
- def : Pat<(i32 (OpNode i16:$a, i16:$b)),
- (set_16rr $a, $b, Mode)>;
- def : Pat<(i32 (OpNode i16:$a, imm:$b)),
- (set_16ri $a, imm:$b, Mode)>;
- def : Pat<(i32 (OpNode imm:$a, i16:$b)),
- (set_16ir imm:$a, $b, Mode)>;
- // i32 -> i32
- def : Pat<(i32 (OpNode i32:$a, i32:$b)),
- (set_32rr $a, $b, Mode)>;
- def : Pat<(i32 (OpNode i32:$a, imm:$b)),
- (set_32ri $a, imm:$b, Mode)>;
- def : Pat<(i32 (OpNode imm:$a, i32:$b)),
- (set_32ir imm:$a, $b, Mode)>;
- // i64 -> i32
- def : Pat<(i32 (OpNode i64:$a, Int64Regs:$b)),
- (set_64rr $a, $b, Mode)>;
- def : Pat<(i32 (OpNode i64:$a, imm:$b)),
- (set_64ri $a, imm:$b, Mode)>;
- def : Pat<(i32 (OpNode imm:$a, i64:$b)),
- (set_64ir imm:$a, $b, Mode)>;
}
multiclass ISET_FORMAT_SIGNED<PatFrag OpNode, PatLeaf Mode>
: ISET_FORMAT<OpNode, Mode,
SETP_s16rr, SETP_s16ri, SETP_s16ir,
SETP_s32rr, SETP_s32ri, SETP_s32ir,
- SETP_s64rr, SETP_s64ri, SETP_s64ir,
- SET_s16rr, SET_s16ri, SET_s16ir,
- SET_s32rr, SET_s32ri, SET_s32ir,
- SET_s64rr, SET_s64ri, SET_s64ir> {
+ SETP_s64rr, SETP_s64ri, SETP_s64ir> {
// TableGen doesn't like empty multiclasses.
def : PatLeaf<(i32 0)>;
}
@@ -1913,10 +1846,7 @@ multiclass ISET_FORMAT_UNSIGNED<PatFrag OpNode, PatLeaf Mode>
: ISET_FORMAT<OpNode, Mode,
SETP_u16rr, SETP_u16ri, SETP_u16ir,
SETP_u32rr, SETP_u32ri, SETP_u32ir,
- SETP_u64rr, SETP_u64ri, SETP_u64ir,
- SET_u16rr, SET_u16ri, SET_u16ir,
- SET_u32rr, SET_u32ri, SET_u32ir,
- SET_u64rr, SET_u64ri, SET_u64ir> {
+ SETP_u64rr, SETP_u64ri, SETP_u64ir> {
// TableGen doesn't like empty multiclasses.
def : PatLeaf<(i32 0)>;
}
@@ -2048,47 +1978,6 @@ multiclass FSET_FORMAT<PatFrag OpNode, PatLeaf Mode, PatLeaf ModeFTZ> {
(SETP_f64ri $a, fpimm:$b, Mode)>;
def : Pat<(i1 (OpNode fpimm:$a, f64:$b)),
(SETP_f64ir fpimm:$a, $b, Mode)>;
-
- // f16 -> i32
- def : Pat<(i32 (OpNode f16:$a, f16:$b)),
- (SET_f16rr $a, $b, ModeFTZ)>,
- Requires<[useFP16Math, doF32FTZ]>;
- def : Pat<(i32 (OpNode f16:$a, f16:$b)),
- (SET_f16rr $a, $b, Mode)>,
- Requires<[useFP16Math]>;
-
- // bf16 -> i32
- def : Pat<(i32 (OpNode bf16:$a, bf16:$b)),
- (SET_bf16rr $a, $b, ModeFTZ)>,
- Requires<[hasBF16Math, doF32FTZ]>;
- def : Pat<(i32 (OpNode bf16:$a, bf16:$b)),
- (SET_bf16rr $a, $b, Mode)>,
- Requires<[hasBF16Math]>;
-
- // f32 -> i32
- def : Pat<(i32 (OpNode f32:$a, f32:$b)),
- (SET_f32rr $a, $b, ModeFTZ)>,
- Requires<[doF32FTZ]>;
- def : Pat<(i32 (OpNode f32:$a, f32:$b)),
- (SET_f32rr $a, $b, Mode)>;
- def : Pat<(i32 (OpNode f32:$a, fpimm:$b)),
- (SET_f32ri $a, fpimm:$b, ModeFTZ)>,
- Requires<[doF32FTZ]>;
- def : Pat<(i32 (OpNode f32:$a, fpimm:$b)),
- (SET_f32ri $a, fpimm:$b, Mode)>;
- def : Pat<(i32 (OpNode fpimm:$a, f32:$b)),
- (SET_f32ir fpimm:$a, $b, ModeFTZ)>,
- Requires<[doF32FTZ]>;
- def : Pat<(i32 (OpNode fpimm:$a, f32:$b)),
- (SET_f32ir fpimm:$a, $b, Mode)>;
-
- // f64 -> i32
- def : Pat<(i32 (OpNode f64:$a, f64:$b)),
- (SET_f64rr $a, $b, Mode)>;
- def : Pat<(i32 (OpNode f64:$a, fpimm:$b)),
- (SET_f64ri $a, fpimm:$b, Mode)>;
- def : Pat<(i32 (OpNode fpimm:$a, f64:$b)),
- (SET_f64ir fpimm:$a, $b, Mode)>;
}
defm FSetOGT : FSET_FORMAT<setogt, CmpGT, CmpGT_FTZ>;
@@ -2899,17 +2788,17 @@ def : Pat<(i32 (trunc i64:$a)),
def : Pat<(i16 (trunc i64:$a)),
(CVT_u16_u64 $a, CvtNONE)>;
def : Pat<(i1 (trunc i64:$a)),
- (SETP_b64ri (ANDb64ri $a, 1), 1, CmpEQ)>;
+ (SETP_b64ri (ANDb64ri $a, 1), 0, CmpNE)>;
// truncate i32
def : Pat<(i16 (trunc i32:$a)),
(CVT_u16_u32 $a, CvtNONE)>;
def : Pat<(i1 (trunc i32:$a)),
- (SETP_b32ri (ANDb32ri $a, 1), 1, CmpEQ)>;
+ (SETP_b32ri (ANDb32ri $a, 1), 0, CmpNE)>;
// truncate i16
def : Pat<(i1 (trunc i16:$a)),
- (SETP_b16ri (ANDb16ri $a, 1), 1, CmpEQ)>;
+ (SETP_b16ri (ANDb16ri $a, 1), 0, CmpNE)>;
// sext_inreg
def : Pat<(sext_inreg i16:$a, i8), (CVT_INREG_s16_s8 $a)>;
@@ -2919,31 +2808,6 @@ def : Pat<(sext_inreg i64:$a, i8), (CVT_INREG_s64_s8 $a)>;
def : Pat<(sext_inreg i64:$a, i16), (CVT_INREG_s64_s16 $a)>;
def : Pat<(sext_inreg i64:$a, i32), (CVT_INREG_s64_s32 $a)>;
-
-// Select instructions with 32-bit predicates
-def : Pat<(select i32:$pred, i16:$a, i16:$b),
- (SELP_b16rr $a, $b,
- (SETP_b32ri (ANDb32ri $pred, 1), 1, CmpEQ))>;
-def : Pat<(select i32:$pred, i32:$a, i32:$b),
- (SELP_b32rr $a, $b,
- (SETP_b32ri (ANDb32ri $pred, 1), 1, CmpEQ))>;
-def : Pat<(select i32:$pred, i64:$a, i64:$b),
- (SELP_b64rr $a, $b,
- (SETP_b32ri (ANDb32ri $pred, 1), 1, CmpEQ))>;
-def : Pat<(select i32:$pred, f16:$a, f16:$b),
- (SELP_f16rr $a, $b,
- (SETP_b32ri (ANDb32ri $pred, 1), 1, CmpEQ))>;
-def : Pat<(select i32:$pred, bf16:$a, bf16:$b),
- (SELP_bf16rr $a, $b,
- (SETP_b32ri (ANDb32ri $pred, 1), 1, CmpEQ))>;
-def : Pat<(select i32:$pred, f32:$a, f32:$b),
- (SELP_f32rr $a, $b,
- (SETP_b32ri (ANDb32ri $pred, 1), 1, CmpEQ))>;
-def : Pat<(select i32:$pred, f64:$a, f64:$b),
- (SELP_f64rr $a, $b,
- (SETP_b32ri (ANDb32ri $pred, 1), 1, CmpEQ))>;
-
-
let hasSideEffects = false in {
// pack a set of smaller int registers to a larger int register
def V4I16toI64 : NVPTXInst<(outs Int64Regs:$d),
diff --git a/llvm/test/CodeGen/NVPTX/bf16-instructions.ll b/llvm/test/CodeGen/NVPTX/bf16-instructions.ll
index 9be54a746cacd..e73c3310d5bac 100644
--- a/llvm/test/CodeGen/NVPTX/bf16-instructions.ll
+++ b/llvm/test/CodeGen/NVPTX/bf16-instructions.ll
@@ -1123,7 +1123,7 @@ define bfloat @test_uitofp_i1(i1 %a) {
; SM70-NEXT: // %bb.0:
; SM70-NEXT: ld.param.u8 %rs1, [test_uitofp_i1_param_0];
; SM70-NEXT: and.b16 %rs2, %rs1, 1;
-; SM70-NEXT: setp.eq.b16 %p1, %rs2, 1;
+; SM70-NEXT: setp.ne.b16 %p1, %rs2, 0;
; SM70-NEXT: selp.b32 %r1, 1, 0, %p1;
; SM70-NEXT: cvt.rn.f32.u32 %f1, %r1;
; SM70-NEXT: mov.b32 %r2, %f1;
@@ -1147,7 +1147,7 @@ define bfloat @test_uitofp_i1(i1 %a) {
; SM80-NEXT: // %bb.0:
; SM80-NEXT: ld.param.u8 %rs1, [test_uitofp_i1_param_0];
; SM80-NEXT: and.b16 %rs2, %rs1, 1;
-; SM80-NEXT: setp.eq.b16 %p1, %rs2, 1;
+; SM80-NEXT: setp.ne.b16 %p1, %rs2, 0;
; SM80-NEXT: selp.b32 %r1, 1, 0, %p1;
; SM80-NEXT: cvt.rn.f32.u32 %f1, %r1;
; SM80-NEXT: cvt.rn.bf16.f32 %rs3, %f1;
@@ -1164,7 +1164,7 @@ define bfloat @test_uitofp_i1(i1 %a) {
; SM80-FTZ-NEXT: // %bb.0:
; SM80-FTZ-NEXT: ld.param.u8 %rs1, [test_uitofp_i1_param_0];
; SM80-FTZ-NEXT: and.b16 %rs2, %rs1, 1;
-; SM80-FTZ-NEXT: setp.eq.b16 %p1, %rs2, 1;
+; SM80-FTZ-NEXT: setp.ne.b16 %p1, %rs2, 0;
; SM80-FTZ-NEXT: selp.b32 %r1, 1, 0, %p1;
; SM80-FTZ-NEXT: cvt.rn.f32.u32 %f1, %r1;
; SM80-FTZ-NEXT: cvt.rn.bf16.f32 %rs3, %f1;
@@ -1180,7 +1180,7 @@ define bfloat @test_uitofp_i1(i1 %a) {
; SM90-NEXT: // %bb.0:
; SM90-NEXT: ld.param.u8 %rs1, [test_uitofp_i1_param_0];
; SM90-NEXT: and.b16 %rs2, %rs1, 1;
-; SM90-NEXT: setp.eq.b16 %p1, %rs2, 1;
+; SM90-NEXT: setp.ne.b16 %p1, %rs2, 0;
; SM90-NEXT: selp.b32 %r1, 1, 0, %p1;
; SM90-NEXT: cvt.rn.bf16.u32 %rs3, %r1;
; SM90-NEXT: st.param.b16 [func_retval0], %rs3;
diff --git a/llvm/test/CodeGen/NVPTX/bf16x2-instructions.ll b/llvm/test/CodeGen/NVPTX/bf16x2-instructions.ll
index e6d35bd5ba536..3e9da7a5d2134 100644
--- a/llvm/test/CodeGen/NVPTX/bf16x2-instructions.ll
+++ b/llvm/test/CodeGen/NVPTX/bf16x2-instructions.ll
@@ -243,7 +243,7 @@ define <2 x bfloat> @test_select(<2 x bfloat> %a, <2 x bfloat> %b, i1 zeroext %c
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.u8 %rs1, [test_select_param_2];
; CHECK-NEXT: and.b16 %rs2, %rs1, 1;
-; CHECK-NEXT: setp.eq.b16 %p1, %rs2, 1;
+; CHECK-NEXT: setp.ne.b16 %p1, %rs2, 0;
; CHECK-NEXT: ld.param.b32 %r1, [test_select_param_1];
; CHECK-NEXT: ld.param.b32 %r2, [test_select_param_0];
; CHECK-NEXT: selp.b32 %r3, %r2, %r1, %p1;
diff --git a/llvm/test/CodeGen/NVPTX/combine-mad.ll b/llvm/test/CodeGen/NVPTX/combine-mad.ll
index 304025fdb15fe..319cadcb27f0f 100644
--- a/llvm/test/CodeGen/NVPTX/combine-mad.ll
+++ b/llvm/test/CodeGen/NVPTX/combine-mad.ll
@@ -146,7 +146,7 @@ define i32 @test4(i32 %a, i32 %b, i32 %c, i1 %p) {
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.u8 %rs1, [test4_param_3];
; CHECK-NEXT: and.b16 %rs2, %rs1, 1;
-; CHECK-NEXT: setp.eq.b16 %p1, %rs2, 1;
+; CHECK-NEXT: setp.ne.b16 %p1, %rs2, 0;
; CHECK-NEXT: ld.param.u32 %r1, [test4_param_0];
; CHECK-NEXT: ld.param.u32 %r2, [test4_param_1];
; CHECK-NEXT: ld.param.u32 %r3, [test4_param_2];
@@ -170,7 +170,7 @@ define i32 @test4_rev(i32 %a, i32 %b, i32 %c, i1 %p) {
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.u8 %rs1, [test4_rev_param_3];
; CHECK-NEXT: and.b16 %rs2, %rs1, 1;
-; CHECK-NEXT: setp.eq.b16 %p1, %rs2, 1;
+; CHECK-NEXT: setp.ne.b16 %p1, %rs2, 0;
; CHECK-NEXT: ld.param.u32 %r1, [test4_rev_param_0];
; CHECK-NEXT: ld.param.u32 %r2, [test4_rev_param_1];
; CHECK-NEXT: ld.param.u32 %r3, [test4_rev_param_2];
diff --git a/llvm/test/CodeGen/NVPTX/copysign.ll b/llvm/test/CodeGen/NVPTX/copysign.ll
index 4a766dd139858..843ef4dbde367 100644
--- a/llvm/test/CodeGen/NVPTX/copysign.ll
+++ b/llvm/test/CodeGen/NVPTX/copysign.ll
@@ -49,7 +49,7 @@ define float @fcopysign_f_d(float %a, double %b) {
; CHECK-NEXT: ld.param.u64 %rd1, [fcopysign_f_d_param_1];
; CHECK-NEXT: shr.u64 %rd2, %rd1, 63;
; CHECK-NEXT: and.b64 %rd3, %rd2, 1;
-; CHECK-NEXT: setp.eq.b64 %p1, %rd3, 1;
+; CHECK-NEXT: setp.ne.b64 %p1, %rd3, 0;
; CHECK-NEXT: selp.f32 %f4, %f3, %f2, %p1;
; CHECK-NEXT: st.param.f32 [func_retval0], %f4;
; CHECK-NEXT: ret;
@@ -72,7 +72,7 @@ define float @fcopysign_f_h(float %a, half %b) {
; CHECK-NEXT: ld.param.u16 %rs1, [fcopysign_f_h_param_1];
; CHECK-NEXT: shr.u16 %rs2, %rs1, 15;
; CHECK-NEXT: and.b16 %rs3, %rs2, 1;
-; CHECK-NEXT: setp.eq.b16 %p1, %rs3, 1;
+; CHECK-NEXT: setp.ne.b16 %p1, %rs3, 0;
; CHECK-NEXT: selp.f32 %f4, %f3, %f2, %p1;
; CHECK-NEXT: st.param.f32 [func_retval0], %f4;
; CHECK-NEXT: ret;
@@ -95,7 +95,7 @@ define double @fcopysign_d_f(double %a, float %b) {
; CHECK-NEXT: ld.param.u32 %r1, [fcopysign_d_f_param_1];
; CHECK-NEXT: shr.u32 %r2, %r1, 31;
; CHECK-NEXT: and.b32 %r3, %r2, 1;
-; CHECK-NEXT: setp.eq.b32 %p1, %r3, 1;
+; CHECK-NEXT: setp.ne.b32 %p1, %r3, 0;
; CHECK-NEXT: selp.f64 %fd4, %fd3, %fd2, %p1;
; CHECK-NEXT: st.param.f64 [func_retval0], %fd4;
; CHECK-NEXT: ret;
@@ -118,7 +118,7 @@ define double @fcopysign_d_h(double %a, half %b) {
; CHECK-NEXT: ld.param.u16 %rs1, [fcopysign_d_h_param_1];
; CHECK-NEXT: shr.u16 %rs2, %rs1, 15;
; CHECK-NEXT: and.b16 %rs3, %rs2, 1;
-; CHECK-NEXT: setp.eq.b16 %p1, %rs3, 1;
+; CHECK-NEXT: setp.ne.b16 %p1, %rs3, 0;
; CHECK-NEXT: selp.f64 %fd4, %fd3, %fd2, %p1;
; CHECK-NEXT: st.param.f64 [func_retval0], %fd4;
; CHECK-NEXT: ret;
diff --git a/llvm/test/CodeGen/NVPTX/f16-instructions.ll b/llvm/test/CodeGen/NVPTX/f16-instructions.ll
index 6897a167a3b8f..bea9db03caf6e 100644
--- a/llvm/test/CodeGen/NVPTX/f16-instructions.ll
+++ b/llvm/test/CodeGen/NVPTX/f16-instructions.ll
@@ -329,7 +329,7 @@ define half @test_tailcall_flipped(half %a, half %b) #0 {
; CHECK-LABEL: test_select(
; CHECK-DAG: ld.param.b16 [[A:%rs[0-9]+]], [test_select_param_0];
; CHECK-DAG: ld.param.b16 [[B:%rs[0-9]+]], [test_select_param_1];
-; CHECK-DAG: setp.eq.b16 [[PRED:%p[0-9]+]], %rs{{.*}}, 1;
+; CHECK-DAG: setp.ne.b16 [[PRED:%p[0-9]+]], %rs{{.*}}, 0;
; CHECK-NEXT: selp.b16 [[R:%rs[0-9]+]], [[A]], [[B]], [[PRED]];
; CHECK-NEXT: st.param.b16 [func_retval0], [[R]];
; CHECK-NEXT: ret;
@@ -653,7 +653,7 @@ else:
; CHECK: call.uni (retval0),
; CHECK-NEXT: test_dummy
; CHECK: }
-; CHECK: setp.eq.b32 [[PRED:%p[0-9]+]], %r{{[0-9]+}}, 1;
+; CHECK: setp.ne.b32 [[PRED:%p[0-9]+]], %r{{[0-9]+}}, 0;
; CHECK: @[[PRED]] bra [[LOOP]];
; CHECK: st.param.b16 [func_retval0], [[R]];
; CHECK: ret;
diff --git a/llvm/test/CodeGen/NVPTX/f16x2-instructions.ll b/llvm/test/CodeGen/NVPTX/f16x2-instructions.ll
index 778c0ad3bad64..e9edabd1ee8af 100644
--- a/llvm/test/CodeGen/NVPTX/f16x2-instructions.ll
+++ b/llvm/test/CodeGen/NVPTX/f16x2-instructions.ll
@@ -555,7 +555,7 @@ define <2 x half> @test_select(<2 x half> %a, <2 x half> %b, i1 zeroext %c) #0 {
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.u8 %rs1, [test_select_param_2];
; CHECK-NEXT: and.b16 %rs2, %rs1, 1;
-; CHECK-NEXT: setp.eq.b16 %p1, %rs2, 1;
+; CHECK-NEXT: setp.ne.b16 %p1, %rs2, 0;
; CHECK-NEXT: ld.param.b32 %r2, [test_select_param_1];
; CHECK-NEXT: ld.param.b32 %r1, [test_select_param_0];
; CHECK-NEXT: selp.b32 %r3, %r1, %r2, %p1;
diff --git a/llvm/test/CodeGen/NVPTX/forward-ld-param.ll b/llvm/test/CodeGen/NVPTX/forward-ld-param.ll
index 5bf2a84b0013a..6d9710e6d2272 100644
--- a/llvm/test/CodeGen/NVPTX/forward-ld-param.ll
+++ b/llvm/test/CodeGen/NVPTX/forward-ld-param.ll
@@ -112,7 +112,7 @@ define i32 @test_multi_block(ptr byval([10 x i32]) %a, i1 %p) {
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.u8 %rs1, [test_multi_block_param_1];
; CHECK-NEXT: and.b16 %rs2, %rs1, 1;
-; CHECK-NEXT: setp.eq.b16 %p1, %rs2, 1;
+; CHECK-NEXT: setp.ne.b16 %p1, %rs2, 0;
; CHECK-NEXT: not.pred %p2, %p1;
; CHECK-NEXT: @%p2 bra $L__BB5_2;
; CHECK-NEXT: // %bb.1: // %if
diff --git a/llvm/test/CodeGen/NVPTX/i1-int-to-fp.ll b/llvm/test/CodeGen/NVPTX/i1-int-to-fp.ll
index 0bfac0a0ef758..169927bc0ac0f 100644
--- a/llvm/test/CodeGen/NVPTX/i1-int-to-fp.ll
+++ b/llvm/test/CodeGen/NVPTX/i1-int-to-fp.ll
@@ -2,7 +2,7 @@
; RUN: %if ptxas %{ llc < %s -mtriple=nvptx64 -mcpu=sm_20 | %ptxas-verify %}
; CHECK-LABEL: foo
-; CHECK: setp.eq.b16 %[[P:p[0-9]+]], %{{.*}}, 1;
+; CHECK: setp.ne.b16 %[[P:p[0-9]+]], %{{.*}}, 0;
; CHECK: selp.b32 %[[R:r[0-9]+]], 1, 0, %[[P]];
; CHECK: cvt.rn.f32.u32 %f{{.*}}, %[[R]]
define float @foo(i1 %a) {
@@ -11,7 +11,7 @@ define float @foo(i1 %a) {
}
; CHECK-LABEL: foo2
-; CHECK: setp.eq.b16 %[[P:p[0-9]+]], %{{.*}}, 1;
+; CHECK: setp.ne.b16 %[[P:p[0-9]+]], %{{.*}}, 0;
; CHECK: selp.b32 %[[R:r[0-9]+]], -1, 0, %[[P]];
; CHECK: cvt.rn.f32.s32 %f{{.*}}, %[[R]]
define float @foo2(i1 %a) {
@@ -20,7 +20,7 @@ define float @foo2(i1 %a) {
}
; CHECK-LABEL: foo3
-; CHECK: setp.eq.b16 %[[P:p[0-9]+]], %{{.*}}, 1;
+; CHECK: setp.ne.b16 %[[P:p[0-9]+]], %{{.*}}, 0;
; CHECK: selp.b32 %[[R:r[0-9]+]], 1, 0, %[[P]];
; CHECK: cvt.rn.f64.u32 %fd{{.*}}, %[[R]]
define double @foo3(i1 %a) {
@@ -29,7 +29,7 @@ define double @foo3(i1 %a) {
}
; CHECK-LABEL: foo4
-; CHECK: setp.eq.b16 %[[P:p[0-9]+]], %{{.*}}, 1;
+; CHECK: setp.ne.b16 %[[P:p[0-9]+]], %{{.*}}, 0;
; CHECK: selp.b32 %[[R:r[0-9]+]], -1, 0, %[[P]];
; CHECK: cvt.rn.f64.s32 %fd{{.*}}, %[[R]]
define double @foo4(i1 %a) {
@@ -38,7 +38,7 @@ define double @foo4(i1 %a) {
}
; CHECK-LABEL: foo5
-; CHECK: setp.eq.b16 %[[P:p[0-9]+]], %{{.*}}, 1;
+; CHECK: setp.ne.b16 %[[P:p[0-9]+]], %{{.*}}, 0;
; CHECK: selp.b32 %[[R:r[0-9]+]], 1, 0, %[[P]];
; CHECK: cvt.rn.f16.u32 %{{.*}}, %[[R]]
define half @foo5(i1 %a) {
@@ -47,7 +47,7 @@ define half @foo5(i1 %a) {
}
; CHECK-LABEL: foo6
-; CHECK: setp.eq.b16 %[[P:p[0-9]+]], %{{.*}}, 1;
+; CHECK: setp.ne.b16 %[[P:p[0-9]+]], %{{.*}}, 0;
; CHECK: selp.b32 %[[R:r[0-9]+]], -1, 0, %[[P]];
; CHECK: cvt.rn.f16.s32 %{{.*}}, %[[R]]
define half @foo6(i1 %a) {
diff --git a/llvm/test/CodeGen/NVPTX/i1-load-lower.ll b/llvm/test/CodeGen/NVPTX/i1-load-lower.ll
index f4a5ff9a333f7..84fd8226bb608 100644
--- a/llvm/test/CodeGen/NVPTX/i1-load-lower.ll
+++ b/llvm/test/CodeGen/NVPTX/i1-load-lower.ll
@@ -14,7 +14,7 @@ define void @foo() {
; CHECK-EMPTY:
; CHECK: ld.global.u8 %rs1, [i1g];
; CHECK: and.b16 %rs2, %rs1, 1;
-; CHECK: setp.eq.b16 %p1, %rs2, 1;
+; CHECK: setp.ne.b16 %p1, %rs2, 0;
; CHECK: @%p1 bra $L__BB0_2;
; CHECK: mov.b16 %rs3, 1;
; CHECK: st.global.u8 [i1g], %rs3;
diff --git a/llvm/test/CodeGen/NVPTX/i128.ll b/llvm/test/CodeGen/NVPTX/i128.ll
index 5b74a47a04591..bf6189c280191 100644
--- a/llvm/test/CodeGen/NVPTX/i128.ll
+++ b/llvm/test/CodeGen/NVPTX/i128.ll
@@ -52,7 +52,7 @@ define i128 @srem_i128(i128 %lhs, i128 %rhs) {
; CHECK-NEXT: selp.b32 %r6, -1, 0, %p10;
; CHECK-NEXT: selp.b32 %r7, %r6, %r5, %p8;
; CHECK-NEXT: and.b32 %r8, %r7, 1;
-; CHECK-NEXT: setp.eq.b32 %p11, %r8, 1;
+; CHECK-NEXT: setp.ne.b32 %p11, %r8, 0;
; CHECK-NEXT: or.pred %p12, %p5, %p11;
; CHECK-NEXT: xor.b64 %rd68, %rd66, 127;
; CHECK-NEXT: or.b64 %rd69, %rd68, %rd67;
@@ -185,7 +185,7 @@ define i128 @urem_i128(i128 %lhs, i128 %rhs) {
; CHECK-NEXT: selp.b32 %r6, -1, 0, %p8;
; CHECK-NEXT: selp.b32 %r7, %r6, %r5, %p6;
; CHECK-NEXT: and.b32 %r8, %r7, 1;
-; CHECK-NEXT: setp.eq.b32 %p9, %r8, 1;
...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/135646
More information about the llvm-commits
mailing list