[llvm] a288d8d - [NVPTX] Enable i128 support in the NVPTX backend (#98658)
via llvm-commits
llvm-commits at lists.llvm.org
Fri Jul 12 11:01:48 PDT 2024
Author: Joseph Huber
Date: 2024-07-12T13:01:45-05:00
New Revision: a288d8dad3b3f459bce3f908cd564f2963060642
URL: https://github.com/llvm/llvm-project/commit/a288d8dad3b3f459bce3f908cd564f2963060642
DIFF: https://github.com/llvm/llvm-project/commit/a288d8dad3b3f459bce3f908cd564f2963060642.diff
LOG: [NVPTX] Enable i128 support in the NVPTX backend (#98658)
Summary:
The target information needs to configure that the platform has a
maximum integer size of 64 in order for it to enable i128 support. The
motivation behind this patch is that the i128 libcalls seem to be the
only ones used by the NVPTX backend and it would be ideal to disable
those completely. That would allow LTO to optimize libcalls properly
after https://github.com/llvm/llvm-project/pull/98512.
Added:
llvm/test/CodeGen/NVPTX/i128.ll
Modified:
llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
Removed:
llvm/test/CodeGen/NVPTX/libcall-fulfilled.ll
llvm/test/CodeGen/NVPTX/libcall-instruction.ll
llvm/test/CodeGen/NVPTX/libcall-intrinsic.ll
################################################################################
diff --git a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
index 9fccfb26eb6fe..a2181b478c269 100644
--- a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
@@ -872,6 +872,7 @@ NVPTXTargetLowering::NVPTXTargetLowering(const NVPTXTargetMachine &TM,
setMinCmpXchgSizeInBits(32);
setMaxAtomicSizeInBitsSupported(64);
+ setMaxDivRemBitWidthSupported(64);
}
const char *NVPTXTargetLowering::getTargetNodeName(unsigned Opcode) const {
diff --git a/llvm/test/CodeGen/NVPTX/i128.ll b/llvm/test/CodeGen/NVPTX/i128.ll
new file mode 100644
index 0000000000000..4449e4f2ea4ed
--- /dev/null
+++ b/llvm/test/CodeGen/NVPTX/i128.ll
@@ -0,0 +1,643 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc < %s -mtriple=nvptx64-- 2>&1 | FileCheck %s
+; RUN: %if ptxas %{ llc < %s -mtriple=nvptx64-- | %ptxas-verify %}
+
+define i128 @srem_i128(i128 %lhs, i128 %rhs) {
+; CHECK-LABEL: srem_i128(
+; CHECK: {
+; CHECK-NEXT: .reg .pred %p<19>;
+; CHECK-NEXT: .reg .b32 %r<20>;
+; CHECK-NEXT: .reg .b64 %rd<127>;
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0: // %_udiv-special-cases
+; CHECK-NEXT: ld.param.v2.u64 {%rd45, %rd46}, [srem_i128_param_0];
+; CHECK-NEXT: ld.param.v2.u64 {%rd49, %rd50}, [srem_i128_param_1];
+; CHECK-NEXT: shr.s64 %rd2, %rd46, 63;
+; CHECK-NEXT: mov.u64 %rd117, 0;
+; CHECK-NEXT: sub.cc.s64 %rd52, %rd117, %rd45;
+; CHECK-NEXT: subc.cc.s64 %rd53, %rd117, %rd46;
+; CHECK-NEXT: setp.lt.s64 %p1, %rd46, 0;
+; CHECK-NEXT: selp.b64 %rd4, %rd53, %rd46, %p1;
+; CHECK-NEXT: selp.b64 %rd3, %rd52, %rd45, %p1;
+; CHECK-NEXT: sub.cc.s64 %rd54, %rd117, %rd49;
+; CHECK-NEXT: subc.cc.s64 %rd55, %rd117, %rd50;
+; CHECK-NEXT: setp.lt.s64 %p2, %rd50, 0;
+; CHECK-NEXT: selp.b64 %rd6, %rd55, %rd50, %p2;
+; CHECK-NEXT: selp.b64 %rd5, %rd54, %rd49, %p2;
+; CHECK-NEXT: or.b64 %rd56, %rd5, %rd6;
+; CHECK-NEXT: setp.eq.s64 %p3, %rd56, 0;
+; CHECK-NEXT: or.b64 %rd57, %rd3, %rd4;
+; CHECK-NEXT: setp.eq.s64 %p4, %rd57, 0;
+; CHECK-NEXT: or.pred %p5, %p3, %p4;
+; CHECK-NEXT: setp.ne.s64 %p6, %rd6, 0;
+; CHECK-NEXT: clz.b64 %r1, %rd6;
+; CHECK-NEXT: cvt.u64.u32 %rd58, %r1;
+; CHECK-NEXT: clz.b64 %r2, %rd5;
+; CHECK-NEXT: cvt.u64.u32 %rd59, %r2;
+; CHECK-NEXT: add.s64 %rd60, %rd59, 64;
+; CHECK-NEXT: selp.b64 %rd61, %rd58, %rd60, %p6;
+; CHECK-NEXT: setp.ne.s64 %p7, %rd4, 0;
+; CHECK-NEXT: clz.b64 %r3, %rd4;
+; CHECK-NEXT: cvt.u64.u32 %rd62, %r3;
+; CHECK-NEXT: clz.b64 %r4, %rd3;
+; CHECK-NEXT: cvt.u64.u32 %rd63, %r4;
+; CHECK-NEXT: add.s64 %rd64, %rd63, 64;
+; CHECK-NEXT: selp.b64 %rd65, %rd62, %rd64, %p7;
+; CHECK-NEXT: sub.cc.s64 %rd7, %rd61, %rd65;
+; CHECK-NEXT: subc.cc.s64 %rd8, %rd117, 0;
+; CHECK-NEXT: setp.eq.s64 %p8, %rd8, 0;
+; CHECK-NEXT: setp.ne.s64 %p9, %rd8, 0;
+; CHECK-NEXT: selp.u32 %r5, -1, 0, %p9;
+; CHECK-NEXT: setp.gt.u64 %p10, %rd7, 127;
+; CHECK-NEXT: selp.u32 %r6, -1, 0, %p10;
+; CHECK-NEXT: selp.b32 %r7, %r6, %r5, %p8;
+; CHECK-NEXT: and.b32 %r8, %r7, 1;
+; CHECK-NEXT: setp.eq.b32 %p11, %r8, 1;
+; CHECK-NEXT: or.pred %p12, %p5, %p11;
+; CHECK-NEXT: xor.b64 %rd66, %rd7, 127;
+; CHECK-NEXT: or.b64 %rd67, %rd66, %rd8;
+; CHECK-NEXT: setp.eq.s64 %p13, %rd67, 0;
+; CHECK-NEXT: selp.b64 %rd126, 0, %rd4, %p12;
+; CHECK-NEXT: selp.b64 %rd125, 0, %rd3, %p12;
+; CHECK-NEXT: or.pred %p14, %p12, %p13;
+; CHECK-NEXT: @%p14 bra $L__BB0_5;
+; CHECK-NEXT: // %bb.3: // %udiv-bb1
+; CHECK-NEXT: add.cc.s64 %rd119, %rd7, 1;
+; CHECK-NEXT: addc.cc.s64 %rd120, %rd8, 0;
+; CHECK-NEXT: or.b64 %rd70, %rd119, %rd120;
+; CHECK-NEXT: setp.eq.s64 %p15, %rd70, 0;
+; CHECK-NEXT: cvt.u32.u64 %r9, %rd7;
+; CHECK-NEXT: mov.b32 %r10, 127;
+; CHECK-NEXT: sub.s32 %r11, %r10, %r9;
+; CHECK-NEXT: shl.b64 %rd71, %rd4, %r11;
+; CHECK-NEXT: mov.b32 %r12, 64;
+; CHECK-NEXT: sub.s32 %r13, %r12, %r11;
+; CHECK-NEXT: shr.u64 %rd72, %rd3, %r13;
+; CHECK-NEXT: or.b64 %rd73, %rd71, %rd72;
+; CHECK-NEXT: mov.b32 %r14, 63;
+; CHECK-NEXT: sub.s32 %r15, %r14, %r9;
+; CHECK-NEXT: shl.b64 %rd74, %rd3, %r15;
+; CHECK-NEXT: setp.gt.s32 %p16, %r11, 63;
+; CHECK-NEXT: selp.b64 %rd124, %rd74, %rd73, %p16;
+; CHECK-NEXT: shl.b64 %rd123, %rd3, %r11;
+; CHECK-NEXT: mov.u64 %rd114, %rd117;
+; CHECK-NEXT: @%p15 bra $L__BB0_4;
+; CHECK-NEXT: // %bb.1: // %udiv-preheader
+; CHECK-NEXT: cvt.u32.u64 %r16, %rd119;
+; CHECK-NEXT: shr.u64 %rd77, %rd3, %r16;
+; CHECK-NEXT: sub.s32 %r18, %r12, %r16;
+; CHECK-NEXT: shl.b64 %rd78, %rd4, %r18;
+; CHECK-NEXT: or.b64 %rd79, %rd77, %rd78;
+; CHECK-NEXT: add.s32 %r19, %r16, -64;
+; CHECK-NEXT: shr.u64 %rd80, %rd4, %r19;
+; CHECK-NEXT: setp.gt.s32 %p17, %r16, 63;
+; CHECK-NEXT: selp.b64 %rd121, %rd80, %rd79, %p17;
+; CHECK-NEXT: shr.u64 %rd122, %rd4, %r16;
+; CHECK-NEXT: add.cc.s64 %rd35, %rd5, -1;
+; CHECK-NEXT: addc.cc.s64 %rd36, %rd6, -1;
+; CHECK-NEXT: mov.u64 %rd114, 0;
+; CHECK-NEXT: mov.u64 %rd117, %rd114;
+; CHECK-NEXT: $L__BB0_2: // %udiv-do-while
+; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
+; CHECK-NEXT: shr.u64 %rd81, %rd121, 63;
+; CHECK-NEXT: shl.b64 %rd82, %rd122, 1;
+; CHECK-NEXT: or.b64 %rd83, %rd82, %rd81;
+; CHECK-NEXT: shl.b64 %rd84, %rd121, 1;
+; CHECK-NEXT: shr.u64 %rd85, %rd124, 63;
+; CHECK-NEXT: or.b64 %rd86, %rd84, %rd85;
+; CHECK-NEXT: shr.u64 %rd87, %rd123, 63;
+; CHECK-NEXT: shl.b64 %rd88, %rd124, 1;
+; CHECK-NEXT: or.b64 %rd89, %rd88, %rd87;
+; CHECK-NEXT: shl.b64 %rd90, %rd123, 1;
+; CHECK-NEXT: or.b64 %rd123, %rd117, %rd90;
+; CHECK-NEXT: or.b64 %rd124, %rd114, %rd89;
+; CHECK-NEXT: sub.cc.s64 %rd91, %rd35, %rd86;
+; CHECK-NEXT: subc.cc.s64 %rd92, %rd36, %rd83;
+; CHECK-NEXT: shr.s64 %rd93, %rd92, 63;
+; CHECK-NEXT: and.b64 %rd117, %rd93, 1;
+; CHECK-NEXT: and.b64 %rd94, %rd93, %rd5;
+; CHECK-NEXT: and.b64 %rd95, %rd93, %rd6;
+; CHECK-NEXT: sub.cc.s64 %rd121, %rd86, %rd94;
+; CHECK-NEXT: subc.cc.s64 %rd122, %rd83, %rd95;
+; CHECK-NEXT: add.cc.s64 %rd119, %rd119, -1;
+; CHECK-NEXT: addc.cc.s64 %rd120, %rd120, -1;
+; CHECK-NEXT: or.b64 %rd96, %rd119, %rd120;
+; CHECK-NEXT: setp.eq.s64 %p18, %rd96, 0;
+; CHECK-NEXT: @%p18 bra $L__BB0_4;
+; CHECK-NEXT: bra.uni $L__BB0_2;
+; CHECK-NEXT: $L__BB0_4: // %udiv-loop-exit
+; CHECK-NEXT: shr.u64 %rd97, %rd123, 63;
+; CHECK-NEXT: shl.b64 %rd98, %rd124, 1;
+; CHECK-NEXT: or.b64 %rd99, %rd98, %rd97;
+; CHECK-NEXT: shl.b64 %rd100, %rd123, 1;
+; CHECK-NEXT: or.b64 %rd125, %rd117, %rd100;
+; CHECK-NEXT: or.b64 %rd126, %rd114, %rd99;
+; CHECK-NEXT: $L__BB0_5: // %udiv-end
+; CHECK-NEXT: mul.hi.u64 %rd101, %rd5, %rd125;
+; CHECK-NEXT: mul.lo.s64 %rd102, %rd5, %rd126;
+; CHECK-NEXT: add.s64 %rd103, %rd101, %rd102;
+; CHECK-NEXT: mul.lo.s64 %rd104, %rd6, %rd125;
+; CHECK-NEXT: add.s64 %rd105, %rd103, %rd104;
+; CHECK-NEXT: mul.lo.s64 %rd106, %rd5, %rd125;
+; CHECK-NEXT: sub.cc.s64 %rd107, %rd3, %rd106;
+; CHECK-NEXT: subc.cc.s64 %rd108, %rd4, %rd105;
+; CHECK-NEXT: xor.b64 %rd109, %rd107, %rd2;
+; CHECK-NEXT: xor.b64 %rd110, %rd108, %rd2;
+; CHECK-NEXT: sub.cc.s64 %rd111, %rd109, %rd2;
+; CHECK-NEXT: subc.cc.s64 %rd112, %rd110, %rd2;
+; CHECK-NEXT: st.param.v2.b64 [func_retval0+0], {%rd111, %rd112};
+; CHECK-NEXT: ret;
+ %div = srem i128 %lhs, %rhs
+ ret i128 %div
+}
+
+define i128 @urem_i128(i128 %lhs, i128 %rhs) {
+; CHECK-LABEL: urem_i128(
+; CHECK: {
+; CHECK-NEXT: .reg .pred %p<17>;
+; CHECK-NEXT: .reg .b32 %r<20>;
+; CHECK-NEXT: .reg .b64 %rd<113>;
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0: // %_udiv-special-cases
+; CHECK-NEXT: ld.param.v2.u64 {%rd41, %rd42}, [urem_i128_param_0];
+; CHECK-NEXT: ld.param.v2.u64 {%rd3, %rd4}, [urem_i128_param_1];
+; CHECK-NEXT: or.b64 %rd45, %rd3, %rd4;
+; CHECK-NEXT: setp.eq.s64 %p1, %rd45, 0;
+; CHECK-NEXT: or.b64 %rd46, %rd41, %rd42;
+; CHECK-NEXT: setp.eq.s64 %p2, %rd46, 0;
+; CHECK-NEXT: or.pred %p3, %p1, %p2;
+; CHECK-NEXT: setp.ne.s64 %p4, %rd4, 0;
+; CHECK-NEXT: clz.b64 %r1, %rd4;
+; CHECK-NEXT: cvt.u64.u32 %rd47, %r1;
+; CHECK-NEXT: clz.b64 %r2, %rd3;
+; CHECK-NEXT: cvt.u64.u32 %rd48, %r2;
+; CHECK-NEXT: add.s64 %rd49, %rd48, 64;
+; CHECK-NEXT: selp.b64 %rd50, %rd47, %rd49, %p4;
+; CHECK-NEXT: setp.ne.s64 %p5, %rd42, 0;
+; CHECK-NEXT: clz.b64 %r3, %rd42;
+; CHECK-NEXT: cvt.u64.u32 %rd51, %r3;
+; CHECK-NEXT: clz.b64 %r4, %rd41;
+; CHECK-NEXT: cvt.u64.u32 %rd52, %r4;
+; CHECK-NEXT: add.s64 %rd53, %rd52, 64;
+; CHECK-NEXT: selp.b64 %rd54, %rd51, %rd53, %p5;
+; CHECK-NEXT: mov.u64 %rd103, 0;
+; CHECK-NEXT: sub.cc.s64 %rd5, %rd50, %rd54;
+; CHECK-NEXT: subc.cc.s64 %rd6, %rd103, 0;
+; CHECK-NEXT: setp.eq.s64 %p6, %rd6, 0;
+; CHECK-NEXT: setp.ne.s64 %p7, %rd6, 0;
+; CHECK-NEXT: selp.u32 %r5, -1, 0, %p7;
+; CHECK-NEXT: setp.gt.u64 %p8, %rd5, 127;
+; CHECK-NEXT: selp.u32 %r6, -1, 0, %p8;
+; CHECK-NEXT: selp.b32 %r7, %r6, %r5, %p6;
+; CHECK-NEXT: and.b32 %r8, %r7, 1;
+; CHECK-NEXT: setp.eq.b32 %p9, %r8, 1;
+; CHECK-NEXT: or.pred %p10, %p3, %p9;
+; CHECK-NEXT: xor.b64 %rd56, %rd5, 127;
+; CHECK-NEXT: or.b64 %rd57, %rd56, %rd6;
+; CHECK-NEXT: setp.eq.s64 %p11, %rd57, 0;
+; CHECK-NEXT: selp.b64 %rd112, 0, %rd42, %p10;
+; CHECK-NEXT: selp.b64 %rd111, 0, %rd41, %p10;
+; CHECK-NEXT: or.pred %p12, %p10, %p11;
+; CHECK-NEXT: @%p12 bra $L__BB1_5;
+; CHECK-NEXT: // %bb.3: // %udiv-bb1
+; CHECK-NEXT: add.cc.s64 %rd105, %rd5, 1;
+; CHECK-NEXT: addc.cc.s64 %rd106, %rd6, 0;
+; CHECK-NEXT: or.b64 %rd60, %rd105, %rd106;
+; CHECK-NEXT: setp.eq.s64 %p13, %rd60, 0;
+; CHECK-NEXT: cvt.u32.u64 %r9, %rd5;
+; CHECK-NEXT: mov.b32 %r10, 127;
+; CHECK-NEXT: sub.s32 %r11, %r10, %r9;
+; CHECK-NEXT: shl.b64 %rd61, %rd42, %r11;
+; CHECK-NEXT: mov.b32 %r12, 64;
+; CHECK-NEXT: sub.s32 %r13, %r12, %r11;
+; CHECK-NEXT: shr.u64 %rd62, %rd41, %r13;
+; CHECK-NEXT: or.b64 %rd63, %rd61, %rd62;
+; CHECK-NEXT: mov.b32 %r14, 63;
+; CHECK-NEXT: sub.s32 %r15, %r14, %r9;
+; CHECK-NEXT: shl.b64 %rd64, %rd41, %r15;
+; CHECK-NEXT: setp.gt.s32 %p14, %r11, 63;
+; CHECK-NEXT: selp.b64 %rd110, %rd64, %rd63, %p14;
+; CHECK-NEXT: shl.b64 %rd109, %rd41, %r11;
+; CHECK-NEXT: mov.u64 %rd100, %rd103;
+; CHECK-NEXT: @%p13 bra $L__BB1_4;
+; CHECK-NEXT: // %bb.1: // %udiv-preheader
+; CHECK-NEXT: cvt.u32.u64 %r16, %rd105;
+; CHECK-NEXT: shr.u64 %rd67, %rd41, %r16;
+; CHECK-NEXT: sub.s32 %r18, %r12, %r16;
+; CHECK-NEXT: shl.b64 %rd68, %rd42, %r18;
+; CHECK-NEXT: or.b64 %rd69, %rd67, %rd68;
+; CHECK-NEXT: add.s32 %r19, %r16, -64;
+; CHECK-NEXT: shr.u64 %rd70, %rd42, %r19;
+; CHECK-NEXT: setp.gt.s32 %p15, %r16, 63;
+; CHECK-NEXT: selp.b64 %rd107, %rd70, %rd69, %p15;
+; CHECK-NEXT: shr.u64 %rd108, %rd42, %r16;
+; CHECK-NEXT: add.cc.s64 %rd33, %rd3, -1;
+; CHECK-NEXT: addc.cc.s64 %rd34, %rd4, -1;
+; CHECK-NEXT: mov.u64 %rd100, 0;
+; CHECK-NEXT: mov.u64 %rd103, %rd100;
+; CHECK-NEXT: $L__BB1_2: // %udiv-do-while
+; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
+; CHECK-NEXT: shr.u64 %rd71, %rd107, 63;
+; CHECK-NEXT: shl.b64 %rd72, %rd108, 1;
+; CHECK-NEXT: or.b64 %rd73, %rd72, %rd71;
+; CHECK-NEXT: shl.b64 %rd74, %rd107, 1;
+; CHECK-NEXT: shr.u64 %rd75, %rd110, 63;
+; CHECK-NEXT: or.b64 %rd76, %rd74, %rd75;
+; CHECK-NEXT: shr.u64 %rd77, %rd109, 63;
+; CHECK-NEXT: shl.b64 %rd78, %rd110, 1;
+; CHECK-NEXT: or.b64 %rd79, %rd78, %rd77;
+; CHECK-NEXT: shl.b64 %rd80, %rd109, 1;
+; CHECK-NEXT: or.b64 %rd109, %rd103, %rd80;
+; CHECK-NEXT: or.b64 %rd110, %rd100, %rd79;
+; CHECK-NEXT: sub.cc.s64 %rd81, %rd33, %rd76;
+; CHECK-NEXT: subc.cc.s64 %rd82, %rd34, %rd73;
+; CHECK-NEXT: shr.s64 %rd83, %rd82, 63;
+; CHECK-NEXT: and.b64 %rd103, %rd83, 1;
+; CHECK-NEXT: and.b64 %rd84, %rd83, %rd3;
+; CHECK-NEXT: and.b64 %rd85, %rd83, %rd4;
+; CHECK-NEXT: sub.cc.s64 %rd107, %rd76, %rd84;
+; CHECK-NEXT: subc.cc.s64 %rd108, %rd73, %rd85;
+; CHECK-NEXT: add.cc.s64 %rd105, %rd105, -1;
+; CHECK-NEXT: addc.cc.s64 %rd106, %rd106, -1;
+; CHECK-NEXT: or.b64 %rd86, %rd105, %rd106;
+; CHECK-NEXT: setp.eq.s64 %p16, %rd86, 0;
+; CHECK-NEXT: @%p16 bra $L__BB1_4;
+; CHECK-NEXT: bra.uni $L__BB1_2;
+; CHECK-NEXT: $L__BB1_4: // %udiv-loop-exit
+; CHECK-NEXT: shr.u64 %rd87, %rd109, 63;
+; CHECK-NEXT: shl.b64 %rd88, %rd110, 1;
+; CHECK-NEXT: or.b64 %rd89, %rd88, %rd87;
+; CHECK-NEXT: shl.b64 %rd90, %rd109, 1;
+; CHECK-NEXT: or.b64 %rd111, %rd103, %rd90;
+; CHECK-NEXT: or.b64 %rd112, %rd100, %rd89;
+; CHECK-NEXT: $L__BB1_5: // %udiv-end
+; CHECK-NEXT: mul.hi.u64 %rd91, %rd3, %rd111;
+; CHECK-NEXT: mul.lo.s64 %rd92, %rd3, %rd112;
+; CHECK-NEXT: add.s64 %rd93, %rd91, %rd92;
+; CHECK-NEXT: mul.lo.s64 %rd94, %rd4, %rd111;
+; CHECK-NEXT: add.s64 %rd95, %rd93, %rd94;
+; CHECK-NEXT: mul.lo.s64 %rd96, %rd3, %rd111;
+; CHECK-NEXT: sub.cc.s64 %rd97, %rd41, %rd96;
+; CHECK-NEXT: subc.cc.s64 %rd98, %rd42, %rd95;
+; CHECK-NEXT: st.param.v2.b64 [func_retval0+0], {%rd97, %rd98};
+; CHECK-NEXT: ret;
+ %div = urem i128 %lhs, %rhs
+ ret i128 %div
+}
+
+define i128 @srem_i128_pow2k(i128 %lhs) {
+; CHECK-LABEL: srem_i128_pow2k(
+; CHECK: {
+; CHECK-NEXT: .reg .b64 %rd<10>;
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: ld.param.v2.u64 {%rd1, %rd2}, [srem_i128_pow2k_param_0];
+; CHECK-NEXT: shr.s64 %rd3, %rd2, 63;
+; CHECK-NEXT: shr.u64 %rd4, %rd3, 31;
+; CHECK-NEXT: add.cc.s64 %rd5, %rd1, %rd4;
+; CHECK-NEXT: addc.cc.s64 %rd6, %rd2, 0;
+; CHECK-NEXT: and.b64 %rd7, %rd5, -8589934592;
+; CHECK-NEXT: sub.cc.s64 %rd8, %rd1, %rd7;
+; CHECK-NEXT: subc.cc.s64 %rd9, %rd2, %rd6;
+; CHECK-NEXT: st.param.v2.b64 [func_retval0+0], {%rd8, %rd9};
+; CHECK-NEXT: ret;
+ %div = srem i128 %lhs, 8589934592
+ ret i128 %div
+}
+
+define i128 @urem_i128_pow2k(i128 %lhs) {
+; CHECK-LABEL: urem_i128_pow2k(
+; CHECK: {
+; CHECK-NEXT: .reg .b64 %rd<5>;
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: ld.param.v2.u64 {%rd1, %rd2}, [urem_i128_pow2k_param_0];
+; CHECK-NEXT: and.b64 %rd3, %rd1, 8589934591;
+; CHECK-NEXT: mov.u64 %rd4, 0;
+; CHECK-NEXT: st.param.v2.b64 [func_retval0+0], {%rd3, %rd4};
+; CHECK-NEXT: ret;
+ %div = urem i128 %lhs, 8589934592
+ ret i128 %div
+}
+
+define i128 @sdiv_i128(i128 %lhs, i128 %rhs) {
+; CHECK-LABEL: sdiv_i128(
+; CHECK: {
+; CHECK-NEXT: .reg .pred %p<19>;
+; CHECK-NEXT: .reg .b32 %r<20>;
+; CHECK-NEXT: .reg .b64 %rd<120>;
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0: // %_udiv-special-cases
+; CHECK-NEXT: ld.param.v2.u64 {%rd45, %rd46}, [sdiv_i128_param_0];
+; CHECK-NEXT: ld.param.v2.u64 {%rd49, %rd50}, [sdiv_i128_param_1];
+; CHECK-NEXT: mov.u64 %rd110, 0;
+; CHECK-NEXT: sub.cc.s64 %rd52, %rd110, %rd45;
+; CHECK-NEXT: subc.cc.s64 %rd53, %rd110, %rd46;
+; CHECK-NEXT: setp.lt.s64 %p1, %rd46, 0;
+; CHECK-NEXT: selp.b64 %rd2, %rd53, %rd46, %p1;
+; CHECK-NEXT: selp.b64 %rd1, %rd52, %rd45, %p1;
+; CHECK-NEXT: sub.cc.s64 %rd54, %rd110, %rd49;
+; CHECK-NEXT: subc.cc.s64 %rd55, %rd110, %rd50;
+; CHECK-NEXT: setp.lt.s64 %p2, %rd50, 0;
+; CHECK-NEXT: selp.b64 %rd4, %rd55, %rd50, %p2;
+; CHECK-NEXT: selp.b64 %rd3, %rd54, %rd49, %p2;
+; CHECK-NEXT: xor.b64 %rd56, %rd50, %rd46;
+; CHECK-NEXT: shr.s64 %rd5, %rd56, 63;
+; CHECK-NEXT: or.b64 %rd57, %rd3, %rd4;
+; CHECK-NEXT: setp.eq.s64 %p3, %rd57, 0;
+; CHECK-NEXT: or.b64 %rd58, %rd1, %rd2;
+; CHECK-NEXT: setp.eq.s64 %p4, %rd58, 0;
+; CHECK-NEXT: or.pred %p5, %p3, %p4;
+; CHECK-NEXT: setp.ne.s64 %p6, %rd4, 0;
+; CHECK-NEXT: clz.b64 %r1, %rd4;
+; CHECK-NEXT: cvt.u64.u32 %rd59, %r1;
+; CHECK-NEXT: clz.b64 %r2, %rd3;
+; CHECK-NEXT: cvt.u64.u32 %rd60, %r2;
+; CHECK-NEXT: add.s64 %rd61, %rd60, 64;
+; CHECK-NEXT: selp.b64 %rd62, %rd59, %rd61, %p6;
+; CHECK-NEXT: setp.ne.s64 %p7, %rd2, 0;
+; CHECK-NEXT: clz.b64 %r3, %rd2;
+; CHECK-NEXT: cvt.u64.u32 %rd63, %r3;
+; CHECK-NEXT: clz.b64 %r4, %rd1;
+; CHECK-NEXT: cvt.u64.u32 %rd64, %r4;
+; CHECK-NEXT: add.s64 %rd65, %rd64, 64;
+; CHECK-NEXT: selp.b64 %rd66, %rd63, %rd65, %p7;
+; CHECK-NEXT: sub.cc.s64 %rd7, %rd62, %rd66;
+; CHECK-NEXT: subc.cc.s64 %rd8, %rd110, 0;
+; CHECK-NEXT: setp.eq.s64 %p8, %rd8, 0;
+; CHECK-NEXT: setp.ne.s64 %p9, %rd8, 0;
+; CHECK-NEXT: selp.u32 %r5, -1, 0, %p9;
+; CHECK-NEXT: setp.gt.u64 %p10, %rd7, 127;
+; CHECK-NEXT: selp.u32 %r6, -1, 0, %p10;
+; CHECK-NEXT: selp.b32 %r7, %r6, %r5, %p8;
+; CHECK-NEXT: and.b32 %r8, %r7, 1;
+; CHECK-NEXT: setp.eq.b32 %p11, %r8, 1;
+; CHECK-NEXT: or.pred %p12, %p5, %p11;
+; CHECK-NEXT: xor.b64 %rd67, %rd7, 127;
+; CHECK-NEXT: or.b64 %rd68, %rd67, %rd8;
+; CHECK-NEXT: setp.eq.s64 %p13, %rd68, 0;
+; CHECK-NEXT: selp.b64 %rd119, 0, %rd2, %p12;
+; CHECK-NEXT: selp.b64 %rd118, 0, %rd1, %p12;
+; CHECK-NEXT: or.pred %p14, %p12, %p13;
+; CHECK-NEXT: @%p14 bra $L__BB4_5;
+; CHECK-NEXT: // %bb.3: // %udiv-bb1
+; CHECK-NEXT: add.cc.s64 %rd112, %rd7, 1;
+; CHECK-NEXT: addc.cc.s64 %rd113, %rd8, 0;
+; CHECK-NEXT: or.b64 %rd71, %rd112, %rd113;
+; CHECK-NEXT: setp.eq.s64 %p15, %rd71, 0;
+; CHECK-NEXT: cvt.u32.u64 %r9, %rd7;
+; CHECK-NEXT: mov.b32 %r10, 127;
+; CHECK-NEXT: sub.s32 %r11, %r10, %r9;
+; CHECK-NEXT: shl.b64 %rd72, %rd2, %r11;
+; CHECK-NEXT: mov.b32 %r12, 64;
+; CHECK-NEXT: sub.s32 %r13, %r12, %r11;
+; CHECK-NEXT: shr.u64 %rd73, %rd1, %r13;
+; CHECK-NEXT: or.b64 %rd74, %rd72, %rd73;
+; CHECK-NEXT: mov.b32 %r14, 63;
+; CHECK-NEXT: sub.s32 %r15, %r14, %r9;
+; CHECK-NEXT: shl.b64 %rd75, %rd1, %r15;
+; CHECK-NEXT: setp.gt.s32 %p16, %r11, 63;
+; CHECK-NEXT: selp.b64 %rd117, %rd75, %rd74, %p16;
+; CHECK-NEXT: shl.b64 %rd116, %rd1, %r11;
+; CHECK-NEXT: mov.u64 %rd107, %rd110;
+; CHECK-NEXT: @%p15 bra $L__BB4_4;
+; CHECK-NEXT: // %bb.1: // %udiv-preheader
+; CHECK-NEXT: cvt.u32.u64 %r16, %rd112;
+; CHECK-NEXT: shr.u64 %rd78, %rd1, %r16;
+; CHECK-NEXT: sub.s32 %r18, %r12, %r16;
+; CHECK-NEXT: shl.b64 %rd79, %rd2, %r18;
+; CHECK-NEXT: or.b64 %rd80, %rd78, %rd79;
+; CHECK-NEXT: add.s32 %r19, %r16, -64;
+; CHECK-NEXT: shr.u64 %rd81, %rd2, %r19;
+; CHECK-NEXT: setp.gt.s32 %p17, %r16, 63;
+; CHECK-NEXT: selp.b64 %rd114, %rd81, %rd80, %p17;
+; CHECK-NEXT: shr.u64 %rd115, %rd2, %r16;
+; CHECK-NEXT: add.cc.s64 %rd35, %rd3, -1;
+; CHECK-NEXT: addc.cc.s64 %rd36, %rd4, -1;
+; CHECK-NEXT: mov.u64 %rd107, 0;
+; CHECK-NEXT: mov.u64 %rd110, %rd107;
+; CHECK-NEXT: $L__BB4_2: // %udiv-do-while
+; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
+; CHECK-NEXT: shr.u64 %rd82, %rd114, 63;
+; CHECK-NEXT: shl.b64 %rd83, %rd115, 1;
+; CHECK-NEXT: or.b64 %rd84, %rd83, %rd82;
+; CHECK-NEXT: shl.b64 %rd85, %rd114, 1;
+; CHECK-NEXT: shr.u64 %rd86, %rd117, 63;
+; CHECK-NEXT: or.b64 %rd87, %rd85, %rd86;
+; CHECK-NEXT: shr.u64 %rd88, %rd116, 63;
+; CHECK-NEXT: shl.b64 %rd89, %rd117, 1;
+; CHECK-NEXT: or.b64 %rd90, %rd89, %rd88;
+; CHECK-NEXT: shl.b64 %rd91, %rd116, 1;
+; CHECK-NEXT: or.b64 %rd116, %rd110, %rd91;
+; CHECK-NEXT: or.b64 %rd117, %rd107, %rd90;
+; CHECK-NEXT: sub.cc.s64 %rd92, %rd35, %rd87;
+; CHECK-NEXT: subc.cc.s64 %rd93, %rd36, %rd84;
+; CHECK-NEXT: shr.s64 %rd94, %rd93, 63;
+; CHECK-NEXT: and.b64 %rd110, %rd94, 1;
+; CHECK-NEXT: and.b64 %rd95, %rd94, %rd3;
+; CHECK-NEXT: and.b64 %rd96, %rd94, %rd4;
+; CHECK-NEXT: sub.cc.s64 %rd114, %rd87, %rd95;
+; CHECK-NEXT: subc.cc.s64 %rd115, %rd84, %rd96;
+; CHECK-NEXT: add.cc.s64 %rd112, %rd112, -1;
+; CHECK-NEXT: addc.cc.s64 %rd113, %rd113, -1;
+; CHECK-NEXT: or.b64 %rd97, %rd112, %rd113;
+; CHECK-NEXT: setp.eq.s64 %p18, %rd97, 0;
+; CHECK-NEXT: @%p18 bra $L__BB4_4;
+; CHECK-NEXT: bra.uni $L__BB4_2;
+; CHECK-NEXT: $L__BB4_4: // %udiv-loop-exit
+; CHECK-NEXT: shr.u64 %rd98, %rd116, 63;
+; CHECK-NEXT: shl.b64 %rd99, %rd117, 1;
+; CHECK-NEXT: or.b64 %rd100, %rd99, %rd98;
+; CHECK-NEXT: shl.b64 %rd101, %rd116, 1;
+; CHECK-NEXT: or.b64 %rd118, %rd110, %rd101;
+; CHECK-NEXT: or.b64 %rd119, %rd107, %rd100;
+; CHECK-NEXT: $L__BB4_5: // %udiv-end
+; CHECK-NEXT: xor.b64 %rd102, %rd118, %rd5;
+; CHECK-NEXT: xor.b64 %rd103, %rd119, %rd5;
+; CHECK-NEXT: sub.cc.s64 %rd104, %rd102, %rd5;
+; CHECK-NEXT: subc.cc.s64 %rd105, %rd103, %rd5;
+; CHECK-NEXT: st.param.v2.b64 [func_retval0+0], {%rd104, %rd105};
+; CHECK-NEXT: ret;
+ %div = sdiv i128 %lhs, %rhs
+ ret i128 %div
+}
+
+define i128 @udiv_i128(i128 %lhs, i128 %rhs) {
+; CHECK-LABEL: udiv_i128(
+; CHECK: {
+; CHECK-NEXT: .reg .pred %p<17>;
+; CHECK-NEXT: .reg .b32 %r<20>;
+; CHECK-NEXT: .reg .b64 %rd<105>;
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0: // %_udiv-special-cases
+; CHECK-NEXT: ld.param.v2.u64 {%rd41, %rd42}, [udiv_i128_param_0];
+; CHECK-NEXT: ld.param.v2.u64 {%rd43, %rd44}, [udiv_i128_param_1];
+; CHECK-NEXT: or.b64 %rd45, %rd43, %rd44;
+; CHECK-NEXT: setp.eq.s64 %p1, %rd45, 0;
+; CHECK-NEXT: or.b64 %rd46, %rd41, %rd42;
+; CHECK-NEXT: setp.eq.s64 %p2, %rd46, 0;
+; CHECK-NEXT: or.pred %p3, %p1, %p2;
+; CHECK-NEXT: setp.ne.s64 %p4, %rd44, 0;
+; CHECK-NEXT: clz.b64 %r1, %rd44;
+; CHECK-NEXT: cvt.u64.u32 %rd47, %r1;
+; CHECK-NEXT: clz.b64 %r2, %rd43;
+; CHECK-NEXT: cvt.u64.u32 %rd48, %r2;
+; CHECK-NEXT: add.s64 %rd49, %rd48, 64;
+; CHECK-NEXT: selp.b64 %rd50, %rd47, %rd49, %p4;
+; CHECK-NEXT: setp.ne.s64 %p5, %rd42, 0;
+; CHECK-NEXT: clz.b64 %r3, %rd42;
+; CHECK-NEXT: cvt.u64.u32 %rd51, %r3;
+; CHECK-NEXT: clz.b64 %r4, %rd41;
+; CHECK-NEXT: cvt.u64.u32 %rd52, %r4;
+; CHECK-NEXT: add.s64 %rd53, %rd52, 64;
+; CHECK-NEXT: selp.b64 %rd54, %rd51, %rd53, %p5;
+; CHECK-NEXT: mov.u64 %rd95, 0;
+; CHECK-NEXT: sub.cc.s64 %rd5, %rd50, %rd54;
+; CHECK-NEXT: subc.cc.s64 %rd6, %rd95, 0;
+; CHECK-NEXT: setp.eq.s64 %p6, %rd6, 0;
+; CHECK-NEXT: setp.ne.s64 %p7, %rd6, 0;
+; CHECK-NEXT: selp.u32 %r5, -1, 0, %p7;
+; CHECK-NEXT: setp.gt.u64 %p8, %rd5, 127;
+; CHECK-NEXT: selp.u32 %r6, -1, 0, %p8;
+; CHECK-NEXT: selp.b32 %r7, %r6, %r5, %p6;
+; CHECK-NEXT: and.b32 %r8, %r7, 1;
+; CHECK-NEXT: setp.eq.b32 %p9, %r8, 1;
+; CHECK-NEXT: or.pred %p10, %p3, %p9;
+; CHECK-NEXT: xor.b64 %rd56, %rd5, 127;
+; CHECK-NEXT: or.b64 %rd57, %rd56, %rd6;
+; CHECK-NEXT: setp.eq.s64 %p11, %rd57, 0;
+; CHECK-NEXT: selp.b64 %rd104, 0, %rd42, %p10;
+; CHECK-NEXT: selp.b64 %rd103, 0, %rd41, %p10;
+; CHECK-NEXT: or.pred %p12, %p10, %p11;
+; CHECK-NEXT: @%p12 bra $L__BB5_5;
+; CHECK-NEXT: // %bb.3: // %udiv-bb1
+; CHECK-NEXT: add.cc.s64 %rd97, %rd5, 1;
+; CHECK-NEXT: addc.cc.s64 %rd98, %rd6, 0;
+; CHECK-NEXT: or.b64 %rd60, %rd97, %rd98;
+; CHECK-NEXT: setp.eq.s64 %p13, %rd60, 0;
+; CHECK-NEXT: cvt.u32.u64 %r9, %rd5;
+; CHECK-NEXT: mov.b32 %r10, 127;
+; CHECK-NEXT: sub.s32 %r11, %r10, %r9;
+; CHECK-NEXT: shl.b64 %rd61, %rd42, %r11;
+; CHECK-NEXT: mov.b32 %r12, 64;
+; CHECK-NEXT: sub.s32 %r13, %r12, %r11;
+; CHECK-NEXT: shr.u64 %rd62, %rd41, %r13;
+; CHECK-NEXT: or.b64 %rd63, %rd61, %rd62;
+; CHECK-NEXT: mov.b32 %r14, 63;
+; CHECK-NEXT: sub.s32 %r15, %r14, %r9;
+; CHECK-NEXT: shl.b64 %rd64, %rd41, %r15;
+; CHECK-NEXT: setp.gt.s32 %p14, %r11, 63;
+; CHECK-NEXT: selp.b64 %rd102, %rd64, %rd63, %p14;
+; CHECK-NEXT: shl.b64 %rd101, %rd41, %r11;
+; CHECK-NEXT: mov.u64 %rd92, %rd95;
+; CHECK-NEXT: @%p13 bra $L__BB5_4;
+; CHECK-NEXT: // %bb.1: // %udiv-preheader
+; CHECK-NEXT: cvt.u32.u64 %r16, %rd97;
+; CHECK-NEXT: shr.u64 %rd67, %rd41, %r16;
+; CHECK-NEXT: sub.s32 %r18, %r12, %r16;
+; CHECK-NEXT: shl.b64 %rd68, %rd42, %r18;
+; CHECK-NEXT: or.b64 %rd69, %rd67, %rd68;
+; CHECK-NEXT: add.s32 %r19, %r16, -64;
+; CHECK-NEXT: shr.u64 %rd70, %rd42, %r19;
+; CHECK-NEXT: setp.gt.s32 %p15, %r16, 63;
+; CHECK-NEXT: selp.b64 %rd99, %rd70, %rd69, %p15;
+; CHECK-NEXT: shr.u64 %rd100, %rd42, %r16;
+; CHECK-NEXT: add.cc.s64 %rd33, %rd43, -1;
+; CHECK-NEXT: addc.cc.s64 %rd34, %rd44, -1;
+; CHECK-NEXT: mov.u64 %rd92, 0;
+; CHECK-NEXT: mov.u64 %rd95, %rd92;
+; CHECK-NEXT: $L__BB5_2: // %udiv-do-while
+; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
+; CHECK-NEXT: shr.u64 %rd71, %rd99, 63;
+; CHECK-NEXT: shl.b64 %rd72, %rd100, 1;
+; CHECK-NEXT: or.b64 %rd73, %rd72, %rd71;
+; CHECK-NEXT: shl.b64 %rd74, %rd99, 1;
+; CHECK-NEXT: shr.u64 %rd75, %rd102, 63;
+; CHECK-NEXT: or.b64 %rd76, %rd74, %rd75;
+; CHECK-NEXT: shr.u64 %rd77, %rd101, 63;
+; CHECK-NEXT: shl.b64 %rd78, %rd102, 1;
+; CHECK-NEXT: or.b64 %rd79, %rd78, %rd77;
+; CHECK-NEXT: shl.b64 %rd80, %rd101, 1;
+; CHECK-NEXT: or.b64 %rd101, %rd95, %rd80;
+; CHECK-NEXT: or.b64 %rd102, %rd92, %rd79;
+; CHECK-NEXT: sub.cc.s64 %rd81, %rd33, %rd76;
+; CHECK-NEXT: subc.cc.s64 %rd82, %rd34, %rd73;
+; CHECK-NEXT: shr.s64 %rd83, %rd82, 63;
+; CHECK-NEXT: and.b64 %rd95, %rd83, 1;
+; CHECK-NEXT: and.b64 %rd84, %rd83, %rd43;
+; CHECK-NEXT: and.b64 %rd85, %rd83, %rd44;
+; CHECK-NEXT: sub.cc.s64 %rd99, %rd76, %rd84;
+; CHECK-NEXT: subc.cc.s64 %rd100, %rd73, %rd85;
+; CHECK-NEXT: add.cc.s64 %rd97, %rd97, -1;
+; CHECK-NEXT: addc.cc.s64 %rd98, %rd98, -1;
+; CHECK-NEXT: or.b64 %rd86, %rd97, %rd98;
+; CHECK-NEXT: setp.eq.s64 %p16, %rd86, 0;
+; CHECK-NEXT: @%p16 bra $L__BB5_4;
+; CHECK-NEXT: bra.uni $L__BB5_2;
+; CHECK-NEXT: $L__BB5_4: // %udiv-loop-exit
+; CHECK-NEXT: shr.u64 %rd87, %rd101, 63;
+; CHECK-NEXT: shl.b64 %rd88, %rd102, 1;
+; CHECK-NEXT: or.b64 %rd89, %rd88, %rd87;
+; CHECK-NEXT: shl.b64 %rd90, %rd101, 1;
+; CHECK-NEXT: or.b64 %rd103, %rd95, %rd90;
+; CHECK-NEXT: or.b64 %rd104, %rd92, %rd89;
+; CHECK-NEXT: $L__BB5_5: // %udiv-end
+; CHECK-NEXT: st.param.v2.b64 [func_retval0+0], {%rd103, %rd104};
+; CHECK-NEXT: ret;
+ %div = udiv i128 %lhs, %rhs
+ ret i128 %div
+}
+
+define i128 @sdiv_i128_pow2k(i128 %lhs) {
+; CHECK-LABEL: sdiv_i128_pow2k(
+; CHECK: {
+; CHECK-NEXT: .reg .b64 %rd<11>;
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: ld.param.v2.u64 {%rd1, %rd2}, [sdiv_i128_pow2k_param_0];
+; CHECK-NEXT: shr.s64 %rd3, %rd2, 63;
+; CHECK-NEXT: shr.u64 %rd4, %rd3, 31;
+; CHECK-NEXT: add.cc.s64 %rd5, %rd1, %rd4;
+; CHECK-NEXT: addc.cc.s64 %rd6, %rd2, 0;
+; CHECK-NEXT: shl.b64 %rd7, %rd6, 31;
+; CHECK-NEXT: shr.u64 %rd8, %rd5, 33;
+; CHECK-NEXT: or.b64 %rd9, %rd8, %rd7;
+; CHECK-NEXT: shr.s64 %rd10, %rd6, 33;
+; CHECK-NEXT: st.param.v2.b64 [func_retval0+0], {%rd9, %rd10};
+; CHECK-NEXT: ret;
+ %div = sdiv i128 %lhs, 8589934592
+ ret i128 %div
+}
+
+define i128 @udiv_i128_pow2k(i128 %lhs) {
+; CHECK-LABEL: udiv_i128_pow2k(
+; CHECK: {
+; CHECK-NEXT: .reg .b64 %rd<7>;
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: ld.param.v2.u64 {%rd1, %rd2}, [udiv_i128_pow2k_param_0];
+; CHECK-NEXT: shl.b64 %rd3, %rd2, 31;
+; CHECK-NEXT: shr.u64 %rd4, %rd1, 33;
+; CHECK-NEXT: or.b64 %rd5, %rd4, %rd3;
+; CHECK-NEXT: shr.u64 %rd6, %rd2, 33;
+; CHECK-NEXT: st.param.v2.b64 [func_retval0+0], {%rd5, %rd6};
+; CHECK-NEXT: ret;
+ %div = udiv i128 %lhs, 8589934592
+ ret i128 %div
+}
+
+define i128 @add_i128(i128 %lhs, i128 %rhs) {
+; CHECK-LABEL: add_i128(
+; CHECK: {
+; CHECK-NEXT: .reg .b64 %rd<7>;
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: ld.param.v2.u64 {%rd1, %rd2}, [add_i128_param_0];
+; CHECK-NEXT: ld.param.v2.u64 {%rd3, %rd4}, [add_i128_param_1];
+; CHECK-NEXT: add.cc.s64 %rd5, %rd1, %rd3;
+; CHECK-NEXT: addc.cc.s64 %rd6, %rd2, %rd4;
+; CHECK-NEXT: st.param.v2.b64 [func_retval0+0], {%rd5, %rd6};
+; CHECK-NEXT: ret;
+ %result = add i128 %lhs, %rhs
+ ret i128 %result
+}
diff --git a/llvm/test/CodeGen/NVPTX/libcall-fulfilled.ll b/llvm/test/CodeGen/NVPTX/libcall-fulfilled.ll
deleted file mode 100644
index e7b2140b92200..0000000000000
--- a/llvm/test/CodeGen/NVPTX/libcall-fulfilled.ll
+++ /dev/null
@@ -1,47 +0,0 @@
-; RUN: llc < %s -march=nvptx64 2>&1 | FileCheck %s
-; RUN: %if ptxas %{ llc < %s -march=nvptx64 | %ptxas-verify %}
-
-; Allow to make libcalls that are defined in the current module
-
-declare ptr @malloc(i64)
-declare void @free(ptr)
-
-; Underlying libcall declaration
-; CHECK: .visible .func (.param .align 16 .b8 func_retval0[16]) __umodti3
-
-define i128 @remainder(i128, i128) {
-bb0:
- ; CHECK: { // callseq 0, 0
- ; CHECK: call.uni (retval0),
- ; CHECK-NEXT: __umodti3,
- ; CHECK-NEXT: (
- ; CHECK-NEXT: param0,
- ; CHECK-NEXT: param1
- ; CHECK-NEXT: );
- ; CHECK-NEXT: ld.param.v2.b64 {%[[REG0:rd[0-9]+]], %[[REG1:rd[0-9]+]]}, [retval0+0];
- ; CHECK-NEXT: } // callseq 0
- %a = urem i128 %0, %1
- br label %bb1
-
-bb1:
- ; CHECK-NEXT: st.param.v2.b64 [func_retval0+0], {%[[REG0]], %[[REG1]]};
- ; CHECK-NEXT: ret;
- ret i128 %a
-}
-
-; Underlying libcall definition
-; CHECK: .visible .func (.param .align 16 .b8 func_retval0[16]) __umodti3(
-define i128 @__umodti3(i128, i128) {
- ret i128 0
-}
-
-define void @malloc_then_free() {
-; CHECK: call.uni (retval0),
-; CHECK: malloc,
-; CHECK: call.uni
-; CHECK: free,
- %a = call ptr @malloc(i64 4)
- store i8 0, ptr %a
- call void @free(ptr %a)
- ret void
-}
diff --git a/llvm/test/CodeGen/NVPTX/libcall-instruction.ll b/llvm/test/CodeGen/NVPTX/libcall-instruction.ll
deleted file mode 100644
index 33bd9a4c8e381..0000000000000
--- a/llvm/test/CodeGen/NVPTX/libcall-instruction.ll
+++ /dev/null
@@ -1,8 +0,0 @@
-; RUN: not --crash llc < %s -march=nvptx 2>&1 | FileCheck %s
-; used to panic on failed assertion and now fails with an "Undefined external symbol"
-
-; CHECK: LLVM ERROR: Undefined external symbol "__umodti3"
-define hidden i128 @remainder(i128, i128) {
- %3 = urem i128 %0, %1
- ret i128 %3
-}
diff --git a/llvm/test/CodeGen/NVPTX/libcall-intrinsic.ll b/llvm/test/CodeGen/NVPTX/libcall-intrinsic.ll
deleted file mode 100644
index f5f465cbc994e..0000000000000
--- a/llvm/test/CodeGen/NVPTX/libcall-intrinsic.ll
+++ /dev/null
@@ -1,10 +0,0 @@
-; RUN: not --crash llc < %s -march=nvptx 2>&1 | FileCheck %s
-; used to seqfault and now fails with an "Undefined external symbol"
-
-; CHECK: LLVM ERROR: Undefined external symbol "__powidf2"
-define double @powi(double, i32) {
- %a = call double @llvm.powi.f64.i32(double %0, i32 %1)
- ret double %a
-}
-
-declare double @llvm.powi.f64.i32(double, i32) nounwind readnone
More information about the llvm-commits
mailing list