[llvm] [NVPTX] Add some more immediate instruction variants (PR #122746)
via llvm-commits
llvm-commits at lists.llvm.org
Mon Jan 13 09:12:13 PST 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-nvptx
Author: Alex MacLean (AlexMaclean)
<details>
<summary>Changes</summary>
While this likely won't impact the final SASS, it makes for more compact PTX.
---
Full diff: https://github.com/llvm/llvm-project/pull/122746.diff
4 Files Affected:
- (modified) llvm/lib/Target/NVPTX/NVPTXInstrInfo.td (+70-57)
- (modified) llvm/lib/Target/NVPTX/NVPTXIntrinsics.td (-13)
- (modified) llvm/test/CodeGen/NVPTX/i128.ll (+68-80)
- (modified) llvm/test/CodeGen/NVPTX/shift-parts.ll (-1)
``````````diff
diff --git a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
index c3e72d6ce3a3f8..f42c37a74c3cc8 100644
--- a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
+++ b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
@@ -207,33 +207,39 @@ class ValueToRegClass<ValueType T> {
// Some Common Instruction Class Templates
//===----------------------------------------------------------------------===//
+// Utility class to wrap up information about a register and DAG type for more
+// convenient iteration and parameterization
+class RegTyInfo<ValueType ty, NVPTXRegClass rc, Operand imm> {
+ ValueType Ty = ty;
+ NVPTXRegClass RC = rc;
+ Operand Imm = imm;
+ int Size = ty.Size;
+}
+
+def I16RT : RegTyInfo<i16, Int16Regs, i16imm>;
+def I32RT : RegTyInfo<i32, Int32Regs, i32imm>;
+def I64RT : RegTyInfo<i64, Int64Regs, i64imm>;
+
// Template for instructions which take three int64, int32, or int16 args.
// The instructions are named "<OpcStr><Width>" (e.g. "add.s64").
-multiclass I3<string OpcStr, SDNode OpNode> {
- def i64rr :
- NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a, Int64Regs:$b),
- !strconcat(OpcStr, "64 \t$dst, $a, $b;"),
- [(set i64:$dst, (OpNode i64:$a, i64:$b))]>;
- def i64ri :
- NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a, i64imm:$b),
- !strconcat(OpcStr, "64 \t$dst, $a, $b;"),
- [(set i64:$dst, (OpNode i64:$a, imm:$b))]>;
- def i32rr :
- NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, Int32Regs:$b),
- !strconcat(OpcStr, "32 \t$dst, $a, $b;"),
- [(set i32:$dst, (OpNode i32:$a, i32:$b))]>;
- def i32ri :
- NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, i32imm:$b),
- !strconcat(OpcStr, "32 \t$dst, $a, $b;"),
- [(set i32:$dst, (OpNode i32:$a, imm:$b))]>;
- def i16rr :
- NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a, Int16Regs:$b),
- !strconcat(OpcStr, "16 \t$dst, $a, $b;"),
- [(set i16:$dst, (OpNode i16:$a, i16:$b))]>;
- def i16ri :
- NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a, i16imm:$b),
- !strconcat(OpcStr, "16 \t$dst, $a, $b;"),
- [(set i16:$dst, (OpNode i16:$a, (imm):$b))]>;
+multiclass I3<string OpcStr, SDNode OpNode, bit commutative> {
+ foreach t = [I16RT, I32RT, I64RT] in {
+ defvar asmstr = OpcStr # t.Size # " \t$dst, $a, $b;";
+
+ def t.Ty # rr :
+ NVPTXInst<(outs t.RC:$dst), (ins t.RC:$a, t.RC:$b),
+ asmstr,
+ [(set t.Ty:$dst, (OpNode t.Ty:$a, t.Ty:$b))]>;
+ def t.Ty # ri :
+ NVPTXInst<(outs t.RC:$dst), (ins t.RC:$a, t.Imm:$b),
+ asmstr,
+ [(set t.Ty:$dst, (OpNode t.RC:$a, imm:$b))]>;
+ if !not(commutative) then
+ def t.Ty # ir :
+ NVPTXInst<(outs t.RC:$dst), (ins t.Imm:$a, t.RC:$b),
+ asmstr,
+ [(set t.Ty:$dst, (OpNode imm:$a, t.RC:$b))]>;
+ }
}
class I16x2<string OpcStr, SDNode OpNode> :
@@ -853,8 +859,8 @@ defm SUB_i1 : ADD_SUB_i1<sub>;
// int16, int32, and int64 signed addition. Since nvptx is 2's complement, we
// also use these for unsigned arithmetic.
-defm ADD : I3<"add.s", add>;
-defm SUB : I3<"sub.s", sub>;
+defm ADD : I3<"add.s", add, /*commutative=*/ true>;
+defm SUB : I3<"sub.s", sub, /*commutative=*/ false>;
def ADD16x2 : I16x2<"add.s", add>;
@@ -866,18 +872,18 @@ defm SUBCC : ADD_SUB_INT_CARRY<"sub.cc", subc>;
defm ADDCCC : ADD_SUB_INT_CARRY<"addc.cc", adde>;
defm SUBCCC : ADD_SUB_INT_CARRY<"subc.cc", sube>;
-defm MULT : I3<"mul.lo.s", mul>;
+defm MULT : I3<"mul.lo.s", mul, /*commutative=*/ true>;
-defm MULTHS : I3<"mul.hi.s", mulhs>;
-defm MULTHU : I3<"mul.hi.u", mulhu>;
+defm MULTHS : I3<"mul.hi.s", mulhs, /*commutative=*/ true>;
+defm MULTHU : I3<"mul.hi.u", mulhu, /*commutative=*/ true>;
-defm SDIV : I3<"div.s", sdiv>;
-defm UDIV : I3<"div.u", udiv>;
+defm SDIV : I3<"div.s", sdiv, /*commutative=*/ false>;
+defm UDIV : I3<"div.u", udiv, /*commutative=*/ false>;
// The ri versions of rem.s and rem.u won't be selected; DAGCombiner::visitSREM
// will lower it.
-defm SREM : I3<"rem.s", srem>;
-defm UREM : I3<"rem.u", urem>;
+defm SREM : I3<"rem.s", srem, /*commutative=*/ false>;
+defm UREM : I3<"rem.u", urem, /*commutative=*/ false>;
// Integer absolute value. NumBits should be one minus the bit width of RC.
// This idiom implements the algorithm at
@@ -892,10 +898,10 @@ defm ABS_32 : ABS<i32, Int32Regs, ".s32">;
defm ABS_64 : ABS<i64, Int64Regs, ".s64">;
// Integer min/max.
-defm SMAX : I3<"max.s", smax>;
-defm UMAX : I3<"max.u", umax>;
-defm SMIN : I3<"min.s", smin>;
-defm UMIN : I3<"min.u", umin>;
+defm SMAX : I3<"max.s", smax, /*commutative=*/ true>;
+defm UMAX : I3<"max.u", umax, /*commutative=*/ true>;
+defm SMIN : I3<"min.s", smin, /*commutative=*/ true>;
+defm UMIN : I3<"min.u", umin, /*commutative=*/ true>;
def SMAX16x2 : I16x2<"max.s", smax>;
def UMAX16x2 : I16x2<"max.u", umax>;
@@ -1375,25 +1381,32 @@ def FDIV32ri_prec :
//
multiclass FMA<string OpcStr, RegisterClass RC, Operand ImmCls, Predicate Pred> {
- def rrr : NVPTXInst<(outs RC:$dst), (ins RC:$a, RC:$b, RC:$c),
- !strconcat(OpcStr, " \t$dst, $a, $b, $c;"),
- [(set RC:$dst, (fma RC:$a, RC:$b, RC:$c))]>,
- Requires<[Pred]>;
- def rri : NVPTXInst<(outs RC:$dst),
- (ins RC:$a, RC:$b, ImmCls:$c),
- !strconcat(OpcStr, " \t$dst, $a, $b, $c;"),
- [(set RC:$dst, (fma RC:$a, RC:$b, fpimm:$c))]>,
- Requires<[Pred]>;
- def rir : NVPTXInst<(outs RC:$dst),
- (ins RC:$a, ImmCls:$b, RC:$c),
- !strconcat(OpcStr, " \t$dst, $a, $b, $c;"),
- [(set RC:$dst, (fma RC:$a, fpimm:$b, RC:$c))]>,
- Requires<[Pred]>;
- def rii : NVPTXInst<(outs RC:$dst),
- (ins RC:$a, ImmCls:$b, ImmCls:$c),
- !strconcat(OpcStr, " \t$dst, $a, $b, $c;"),
- [(set RC:$dst, (fma RC:$a, fpimm:$b, fpimm:$c))]>,
- Requires<[Pred]>;
+ defvar asmstr = OpcStr # " \t$dst, $a, $b, $c;";
+ def rrr : NVPTXInst<(outs RC:$dst), (ins RC:$a, RC:$b, RC:$c),
+ asmstr,
+ [(set RC:$dst, (fma RC:$a, RC:$b, RC:$c))]>,
+ Requires<[Pred]>;
+ def rri : NVPTXInst<(outs RC:$dst),
+ (ins RC:$a, RC:$b, ImmCls:$c),
+ asmstr,
+ [(set RC:$dst, (fma RC:$a, RC:$b, fpimm:$c))]>,
+ Requires<[Pred]>;
+ def rir : NVPTXInst<(outs RC:$dst),
+ (ins RC:$a, ImmCls:$b, RC:$c),
+ asmstr,
+ [(set RC:$dst, (fma RC:$a, fpimm:$b, RC:$c))]>,
+ Requires<[Pred]>;
+ def rii : NVPTXInst<(outs RC:$dst),
+ (ins RC:$a, ImmCls:$b, ImmCls:$c),
+ asmstr,
+ [(set RC:$dst, (fma RC:$a, fpimm:$b, fpimm:$c))]>,
+ Requires<[Pred]>;
+ def iir : NVPTXInst<(outs RC:$dst),
+ (ins ImmCls:$a, ImmCls:$b, RC:$c),
+ asmstr,
+ [(set RC:$dst, (fma fpimm:$a, fpimm:$b, RC:$c))]>,
+ Requires<[Pred]>;
+
}
multiclass FMA_F16<string OpcStr, ValueType T, RegisterClass RC, Predicate Pred> {
diff --git a/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td b/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td
index 22339ebc5484f1..4955834c129d9b 100644
--- a/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td
+++ b/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td
@@ -6,19 +6,6 @@
//
//===----------------------------------------------------------------------===//
-// Utility class to wrap up information about a register and DAG type for more
-// convenient iteration and parameterization
-class RegTyInfo<ValueType ty, NVPTXRegClass rc, Operand imm> {
- ValueType Ty = ty;
- NVPTXRegClass RC = rc;
- Operand Imm = imm;
- int Size = ty.Size;
-}
-
-def I32RT : RegTyInfo<i32, Int32Regs, i32imm>;
-def I64RT : RegTyInfo<i64, Int64Regs, i64imm>;
-
-
def immFloat0 : PatLeaf<(fpimm), [{
float f = (float)N->getValueAPF().convertToFloat();
return (f==0.0f);
diff --git a/llvm/test/CodeGen/NVPTX/i128.ll b/llvm/test/CodeGen/NVPTX/i128.ll
index accfbe4af0313c..7ece0ccbd844ed 100644
--- a/llvm/test/CodeGen/NVPTX/i128.ll
+++ b/llvm/test/CodeGen/NVPTX/i128.ll
@@ -6,7 +6,7 @@ define i128 @srem_i128(i128 %lhs, i128 %rhs) {
; CHECK-LABEL: srem_i128(
; CHECK: {
; CHECK-NEXT: .reg .pred %p<19>;
-; CHECK-NEXT: .reg .b32 %r<20>;
+; CHECK-NEXT: .reg .b32 %r<16>;
; CHECK-NEXT: .reg .b64 %rd<129>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0: // %_udiv-special-cases
@@ -67,32 +67,29 @@ define i128 @srem_i128(i128 %lhs, i128 %rhs) {
; CHECK-NEXT: or.b64 %rd72, %rd121, %rd122;
; CHECK-NEXT: setp.eq.s64 %p15, %rd72, 0;
; CHECK-NEXT: cvt.u32.u64 %r9, %rd66;
-; CHECK-NEXT: mov.b32 %r10, 127;
-; CHECK-NEXT: sub.s32 %r11, %r10, %r9;
-; CHECK-NEXT: shl.b64 %rd73, %rd4, %r11;
-; CHECK-NEXT: mov.b32 %r12, 64;
-; CHECK-NEXT: sub.s32 %r13, %r12, %r11;
-; CHECK-NEXT: shr.u64 %rd74, %rd3, %r13;
+; CHECK-NEXT: sub.s32 %r10, 127, %r9;
+; CHECK-NEXT: shl.b64 %rd73, %rd4, %r10;
+; CHECK-NEXT: sub.s32 %r11, 64, %r10;
+; CHECK-NEXT: shr.u64 %rd74, %rd3, %r11;
; CHECK-NEXT: or.b64 %rd75, %rd73, %rd74;
-; CHECK-NEXT: mov.b32 %r14, 63;
-; CHECK-NEXT: sub.s32 %r15, %r14, %r9;
-; CHECK-NEXT: shl.b64 %rd76, %rd3, %r15;
-; CHECK-NEXT: setp.gt.s32 %p16, %r11, 63;
+; CHECK-NEXT: sub.s32 %r12, 63, %r9;
+; CHECK-NEXT: shl.b64 %rd76, %rd3, %r12;
+; CHECK-NEXT: setp.gt.s32 %p16, %r10, 63;
; CHECK-NEXT: selp.b64 %rd126, %rd76, %rd75, %p16;
-; CHECK-NEXT: shl.b64 %rd125, %rd3, %r11;
+; CHECK-NEXT: shl.b64 %rd125, %rd3, %r10;
; CHECK-NEXT: mov.u64 %rd116, %rd119;
; CHECK-NEXT: @%p15 bra $L__BB0_4;
; CHECK-NEXT: // %bb.1: // %udiv-preheader
-; CHECK-NEXT: cvt.u32.u64 %r16, %rd121;
-; CHECK-NEXT: shr.u64 %rd79, %rd3, %r16;
-; CHECK-NEXT: sub.s32 %r18, %r12, %r16;
-; CHECK-NEXT: shl.b64 %rd80, %rd4, %r18;
+; CHECK-NEXT: cvt.u32.u64 %r13, %rd121;
+; CHECK-NEXT: shr.u64 %rd79, %rd3, %r13;
+; CHECK-NEXT: sub.s32 %r14, 64, %r13;
+; CHECK-NEXT: shl.b64 %rd80, %rd4, %r14;
; CHECK-NEXT: or.b64 %rd81, %rd79, %rd80;
-; CHECK-NEXT: add.s32 %r19, %r16, -64;
-; CHECK-NEXT: shr.u64 %rd82, %rd4, %r19;
-; CHECK-NEXT: setp.gt.s32 %p17, %r16, 63;
+; CHECK-NEXT: add.s32 %r15, %r13, -64;
+; CHECK-NEXT: shr.u64 %rd82, %rd4, %r15;
+; CHECK-NEXT: setp.gt.s32 %p17, %r13, 63;
; CHECK-NEXT: selp.b64 %rd123, %rd82, %rd81, %p17;
-; CHECK-NEXT: shr.u64 %rd124, %rd4, %r16;
+; CHECK-NEXT: shr.u64 %rd124, %rd4, %r13;
; CHECK-NEXT: add.cc.s64 %rd35, %rd5, -1;
; CHECK-NEXT: addc.cc.s64 %rd36, %rd6, -1;
; CHECK-NEXT: mov.b64 %rd116, 0;
@@ -155,7 +152,7 @@ define i128 @urem_i128(i128 %lhs, i128 %rhs) {
; CHECK-LABEL: urem_i128(
; CHECK: {
; CHECK-NEXT: .reg .pred %p<17>;
-; CHECK-NEXT: .reg .b32 %r<20>;
+; CHECK-NEXT: .reg .b32 %r<16>;
; CHECK-NEXT: .reg .b64 %rd<115>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0: // %_udiv-special-cases
@@ -205,32 +202,29 @@ define i128 @urem_i128(i128 %lhs, i128 %rhs) {
; CHECK-NEXT: or.b64 %rd62, %rd107, %rd108;
; CHECK-NEXT: setp.eq.s64 %p13, %rd62, 0;
; CHECK-NEXT: cvt.u32.u64 %r9, %rd56;
-; CHECK-NEXT: mov.b32 %r10, 127;
-; CHECK-NEXT: sub.s32 %r11, %r10, %r9;
-; CHECK-NEXT: shl.b64 %rd63, %rd42, %r11;
-; CHECK-NEXT: mov.b32 %r12, 64;
-; CHECK-NEXT: sub.s32 %r13, %r12, %r11;
-; CHECK-NEXT: shr.u64 %rd64, %rd41, %r13;
+; CHECK-NEXT: sub.s32 %r10, 127, %r9;
+; CHECK-NEXT: shl.b64 %rd63, %rd42, %r10;
+; CHECK-NEXT: sub.s32 %r11, 64, %r10;
+; CHECK-NEXT: shr.u64 %rd64, %rd41, %r11;
; CHECK-NEXT: or.b64 %rd65, %rd63, %rd64;
-; CHECK-NEXT: mov.b32 %r14, 63;
-; CHECK-NEXT: sub.s32 %r15, %r14, %r9;
-; CHECK-NEXT: shl.b64 %rd66, %rd41, %r15;
-; CHECK-NEXT: setp.gt.s32 %p14, %r11, 63;
+; CHECK-NEXT: sub.s32 %r12, 63, %r9;
+; CHECK-NEXT: shl.b64 %rd66, %rd41, %r12;
+; CHECK-NEXT: setp.gt.s32 %p14, %r10, 63;
; CHECK-NEXT: selp.b64 %rd112, %rd66, %rd65, %p14;
-; CHECK-NEXT: shl.b64 %rd111, %rd41, %r11;
+; CHECK-NEXT: shl.b64 %rd111, %rd41, %r10;
; CHECK-NEXT: mov.u64 %rd102, %rd105;
; CHECK-NEXT: @%p13 bra $L__BB1_4;
; CHECK-NEXT: // %bb.1: // %udiv-preheader
-; CHECK-NEXT: cvt.u32.u64 %r16, %rd107;
-; CHECK-NEXT: shr.u64 %rd69, %rd41, %r16;
-; CHECK-NEXT: sub.s32 %r18, %r12, %r16;
-; CHECK-NEXT: shl.b64 %rd70, %rd42, %r18;
+; CHECK-NEXT: cvt.u32.u64 %r13, %rd107;
+; CHECK-NEXT: shr.u64 %rd69, %rd41, %r13;
+; CHECK-NEXT: sub.s32 %r14, 64, %r13;
+; CHECK-NEXT: shl.b64 %rd70, %rd42, %r14;
; CHECK-NEXT: or.b64 %rd71, %rd69, %rd70;
-; CHECK-NEXT: add.s32 %r19, %r16, -64;
-; CHECK-NEXT: shr.u64 %rd72, %rd42, %r19;
-; CHECK-NEXT: setp.gt.s32 %p15, %r16, 63;
+; CHECK-NEXT: add.s32 %r15, %r13, -64;
+; CHECK-NEXT: shr.u64 %rd72, %rd42, %r15;
+; CHECK-NEXT: setp.gt.s32 %p15, %r13, 63;
; CHECK-NEXT: selp.b64 %rd109, %rd72, %rd71, %p15;
-; CHECK-NEXT: shr.u64 %rd110, %rd42, %r16;
+; CHECK-NEXT: shr.u64 %rd110, %rd42, %r13;
; CHECK-NEXT: add.cc.s64 %rd33, %rd3, -1;
; CHECK-NEXT: addc.cc.s64 %rd34, %rd4, -1;
; CHECK-NEXT: mov.b64 %rd102, 0;
@@ -324,7 +318,7 @@ define i128 @sdiv_i128(i128 %lhs, i128 %rhs) {
; CHECK-LABEL: sdiv_i128(
; CHECK: {
; CHECK-NEXT: .reg .pred %p<19>;
-; CHECK-NEXT: .reg .b32 %r<20>;
+; CHECK-NEXT: .reg .b32 %r<16>;
; CHECK-NEXT: .reg .b64 %rd<122>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0: // %_udiv-special-cases
@@ -386,32 +380,29 @@ define i128 @sdiv_i128(i128 %lhs, i128 %rhs) {
; CHECK-NEXT: or.b64 %rd73, %rd114, %rd115;
; CHECK-NEXT: setp.eq.s64 %p15, %rd73, 0;
; CHECK-NEXT: cvt.u32.u64 %r9, %rd67;
-; CHECK-NEXT: mov.b32 %r10, 127;
-; CHECK-NEXT: sub.s32 %r11, %r10, %r9;
-; CHECK-NEXT: shl.b64 %rd74, %rd2, %r11;
-; CHECK-NEXT: mov.b32 %r12, 64;
-; CHECK-NEXT: sub.s32 %r13, %r12, %r11;
-; CHECK-NEXT: shr.u64 %rd75, %rd1, %r13;
+; CHECK-NEXT: sub.s32 %r10, 127, %r9;
+; CHECK-NEXT: shl.b64 %rd74, %rd2, %r10;
+; CHECK-NEXT: sub.s32 %r11, 64, %r10;
+; CHECK-NEXT: shr.u64 %rd75, %rd1, %r11;
; CHECK-NEXT: or.b64 %rd76, %rd74, %rd75;
-; CHECK-NEXT: mov.b32 %r14, 63;
-; CHECK-NEXT: sub.s32 %r15, %r14, %r9;
-; CHECK-NEXT: shl.b64 %rd77, %rd1, %r15;
-; CHECK-NEXT: setp.gt.s32 %p16, %r11, 63;
+; CHECK-NEXT: sub.s32 %r12, 63, %r9;
+; CHECK-NEXT: shl.b64 %rd77, %rd1, %r12;
+; CHECK-NEXT: setp.gt.s32 %p16, %r10, 63;
; CHECK-NEXT: selp.b64 %rd119, %rd77, %rd76, %p16;
-; CHECK-NEXT: shl.b64 %rd118, %rd1, %r11;
+; CHECK-NEXT: shl.b64 %rd118, %rd1, %r10;
; CHECK-NEXT: mov.u64 %rd109, %rd112;
; CHECK-NEXT: @%p15 bra $L__BB4_4;
; CHECK-NEXT: // %bb.1: // %udiv-preheader
-; CHECK-NEXT: cvt.u32.u64 %r16, %rd114;
-; CHECK-NEXT: shr.u64 %rd80, %rd1, %r16;
-; CHECK-NEXT: sub.s32 %r18, %r12, %r16;
-; CHECK-NEXT: shl.b64 %rd81, %rd2, %r18;
+; CHECK-NEXT: cvt.u32.u64 %r13, %rd114;
+; CHECK-NEXT: shr.u64 %rd80, %rd1, %r13;
+; CHECK-NEXT: sub.s32 %r14, 64, %r13;
+; CHECK-NEXT: shl.b64 %rd81, %rd2, %r14;
; CHECK-NEXT: or.b64 %rd82, %rd80, %rd81;
-; CHECK-NEXT: add.s32 %r19, %r16, -64;
-; CHECK-NEXT: shr.u64 %rd83, %rd2, %r19;
-; CHECK-NEXT: setp.gt.s32 %p17, %r16, 63;
+; CHECK-NEXT: add.s32 %r15, %r13, -64;
+; CHECK-NEXT: shr.u64 %rd83, %rd2, %r15;
+; CHECK-NEXT: setp.gt.s32 %p17, %r13, 63;
; CHECK-NEXT: selp.b64 %rd116, %rd83, %rd82, %p17;
-; CHECK-NEXT: shr.u64 %rd117, %rd2, %r16;
+; CHECK-NEXT: shr.u64 %rd117, %rd2, %r13;
; CHECK-NEXT: add.cc.s64 %rd35, %rd3, -1;
; CHECK-NEXT: addc.cc.s64 %rd36, %rd4, -1;
; CHECK-NEXT: mov.b64 %rd109, 0;
@@ -466,7 +457,7 @@ define i128 @udiv_i128(i128 %lhs, i128 %rhs) {
; CHECK-LABEL: udiv_i128(
; CHECK: {
; CHECK-NEXT: .reg .pred %p<17>;
-; CHECK-NEXT: .reg .b32 %r<20>;
+; CHECK-NEXT: .reg .b32 %r<16>;
; CHECK-NEXT: .reg .b64 %rd<107>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0: // %_udiv-special-cases
@@ -516,32 +507,29 @@ define i128 @udiv_i128(i128 %lhs, i128 %rhs) {
; CHECK-NEXT: or.b64 %rd62, %rd99, %rd100;
; CHECK-NEXT: setp.eq.s64 %p13, %rd62, 0;
; CHECK-NEXT: cvt.u32.u64 %r9, %rd56;
-; CHECK-NEXT: mov.b32 %r10, 127;
-; CHECK-NEXT: sub.s32 %r11, %r10, %r9;
-; CHECK-NEXT: shl.b64 %rd63, %rd42, %r11;
-; CHECK-NEXT: mov.b32 %r12, 64;
-; CHECK-NEXT: sub.s32 %r13, %r12, %r11;
-; CHECK-NEXT: shr.u64 %rd64, %rd41, %r13;
+; CHECK-NEXT: sub.s32 %r10, 127, %r9;
+; CHECK-NEXT: shl.b64 %rd63, %rd42, %r10;
+; CHECK-NEXT: sub.s32 %r11, 64, %r10;
+; CHECK-NEXT: shr.u64 %rd64, %rd41, %r11;
; CHECK-NEXT: or.b64 %rd65, %rd63, %rd64;
-; CHECK-NEXT: mov.b32 %r14, 63;
-; CHECK-NEXT: sub.s32 %r15, %r14, %r9;
-; CHECK-NEXT: shl.b64 %rd66, %rd41, %r15;
-; CHECK-NEXT: setp.gt.s32 %p14, %r11, 63;
+; CHECK-NEXT: sub.s32 %r12, 63, %r9;
+; CHECK-NEXT: shl.b64 %rd66, %rd41, %r12;
+; CHECK-NEXT: setp.gt.s32 %p14, %r10, 63;
; CHECK-NEXT: selp.b64 %rd104, %rd66, %rd65, %p14;
-; CHECK-NEXT: shl.b64 %rd103, %rd41, %r11;
+; CHECK-NEXT: shl.b64 %rd103, %rd41, %r10;
; CHECK-NEXT: mov.u64 %rd94, %rd97;
; CHECK-NEXT: @%p13 bra $L__BB5_4;
; CHECK-NEXT: // %bb.1: // %udiv-preheader
-; CHECK-NEXT: cvt.u32.u64 %r16, %rd99;
-; CHECK-NEXT: shr.u64 %rd69, %rd41, %r16;
-; CHECK-NEXT: sub.s32 %r18, %r12, %r16;
-; CHECK-NEXT: shl.b64 %rd70, %rd42, %r18;
+; CHECK-NEXT: cvt.u32.u64 %r13, %rd99;
+; CHECK-NEXT: shr.u64 %rd69, %rd41, %r13;
+; CHECK-NEXT: sub.s32 %r14, 64, %r13;
+; CHECK-NEXT: shl.b64 %rd70, %rd42, %r14;
; CHECK-NEXT: or.b64 %rd71, %rd69, %rd70;
-; CHECK-NEXT: add.s32 %r19, %r16, -64;
-; CHECK-NEXT: shr.u64 %rd72, %rd42, %r19;
-; CHECK-NEXT: setp.gt.s32 %p15, %r16, 63;
+; CHECK-NEXT: add.s32 %r15, %r13, -64;
+; CHECK-NEXT: shr.u64 %rd72, %rd42, %r15;
+; CHECK-NEXT: setp.gt.s32 %p15, %r13, 63;
; CHECK-NEXT: selp.b64 %rd101, %rd72, %rd71, %p15;
-; CHECK-NEXT: shr.u64 %rd102, %rd42, %r16;
+; CHECK-NEXT: shr.u64 %rd102, %rd42, %r13;
; CHECK-NEXT: add.cc.s64 %rd33, %rd43, -1;
; CHECK-NEXT: addc.cc.s64 %rd34, %rd44, -1;
; CHECK-NEXT: mov.b64 %rd94, 0;
diff --git a/llvm/test/CodeGen/NVPTX/shift-parts.ll b/llvm/test/CodeGen/NVPTX/shift-parts.ll
index c7cfdc4ff2a4dc..ded1046714fd53 100644
--- a/llvm/test/CodeGen/NVPTX/shift-parts.ll
+++ b/llvm/test/CodeGen/NVPTX/shift-parts.ll
@@ -4,7 +4,6 @@
; CHECK: shift_parts_left_128
define void @shift_parts_left_128(ptr %val, ptr %amtptr) {
; CHECK: shl.b64
-; CHECK: mov.b32
; CHECK: sub.s32
; CHECK: shr.u64
; CHECK: or.b64
``````````
</details>
https://github.com/llvm/llvm-project/pull/122746
More information about the llvm-commits
mailing list