[llvm] [NVPTX] deprecate nvvm.rotate.* intrinsics, cleanup funnel-shift handling (PR #107655)
Alex MacLean via llvm-commits
llvm-commits at lists.llvm.org
Mon Sep 23 13:39:03 PDT 2024
================
@@ -315,35 +269,180 @@ define i64 @rotr64(i64 %a, i64 %n) {
define i64 @rotr64_imm(i64 %a) {
; SM20-LABEL: rotr64_imm(
; SM20: {
-; SM20-NEXT: .reg .b64 %rd<3>;
+; SM20-NEXT: .reg .b64 %rd<5>;
; SM20-EMPTY:
; SM20-NEXT: // %bb.0:
; SM20-NEXT: ld.param.u64 %rd1, [rotr64_imm_param_0];
-; SM20-NEXT: {
-; SM20-NEXT: .reg .b64 %lhs;
-; SM20-NEXT: .reg .b64 %rhs;
-; SM20-NEXT: shl.b64 %lhs, %rd1, 62;
-; SM20-NEXT: shr.b64 %rhs, %rd1, 2;
-; SM20-NEXT: add.u64 %rd2, %lhs, %rhs;
-; SM20-NEXT: }
-; SM20-NEXT: st.param.b64 [func_retval0+0], %rd2;
+; SM20-NEXT: shl.b64 %rd2, %rd1, 62;
+; SM20-NEXT: shr.u64 %rd3, %rd1, 2;
+; SM20-NEXT: or.b64 %rd4, %rd3, %rd2;
+; SM20-NEXT: st.param.b64 [func_retval0+0], %rd4;
; SM20-NEXT: ret;
;
; SM35-LABEL: rotr64_imm(
; SM35: {
-; SM35-NEXT: .reg .b64 %rd<3>;
+; SM35-NEXT: .reg .b64 %rd<5>;
; SM35-EMPTY:
; SM35-NEXT: // %bb.0:
; SM35-NEXT: ld.param.u64 %rd1, [rotr64_imm_param_0];
-; SM35-NEXT: {
-; SM35-NEXT: .reg .b64 %lhs;
-; SM35-NEXT: .reg .b64 %rhs;
-; SM35-NEXT: shl.b64 %lhs, %rd1, 62;
-; SM35-NEXT: shr.b64 %rhs, %rd1, 2;
-; SM35-NEXT: add.u64 %rd2, %lhs, %rhs;
-; SM35-NEXT: }
-; SM35-NEXT: st.param.b64 [func_retval0+0], %rd2;
+; SM35-NEXT: shl.b64 %rd2, %rd1, 62;
+; SM35-NEXT: shr.u64 %rd3, %rd1, 2;
+; SM35-NEXT: or.b64 %rd4, %rd3, %rd2;
+; SM35-NEXT: st.param.b64 [func_retval0+0], %rd4;
; SM35-NEXT: ret;
%val = tail call i64 @llvm.fshr.i64(i64 %a, i64 %a, i64 66)
ret i64 %val
}
+
+define i32 @funnel_shift_right_32(i32 %a, i32 %b, i32 %c) {
+; SM20-LABEL: funnel_shift_right_32(
+; SM20: {
+; SM20-NEXT: .reg .b32 %r<11>;
+; SM20-EMPTY:
+; SM20-NEXT: // %bb.0:
+; SM20-NEXT: ld.param.u32 %r1, [funnel_shift_right_32_param_0];
+; SM20-NEXT: ld.param.u32 %r2, [funnel_shift_right_32_param_2];
+; SM20-NEXT: and.b32 %r3, %r2, 31;
+; SM20-NEXT: ld.param.u32 %r4, [funnel_shift_right_32_param_1];
+; SM20-NEXT: shr.u32 %r5, %r4, %r3;
+; SM20-NEXT: shl.b32 %r6, %r1, 1;
+; SM20-NEXT: not.b32 %r7, %r2;
+; SM20-NEXT: and.b32 %r8, %r7, 31;
+; SM20-NEXT: shl.b32 %r9, %r6, %r8;
+; SM20-NEXT: or.b32 %r10, %r9, %r5;
+; SM20-NEXT: st.param.b32 [func_retval0+0], %r10;
+; SM20-NEXT: ret;
+;
+; SM35-LABEL: funnel_shift_right_32(
+; SM35: {
+; SM35-NEXT: .reg .b32 %r<5>;
+; SM35-EMPTY:
+; SM35-NEXT: // %bb.0:
+; SM35-NEXT: ld.param.u32 %r1, [funnel_shift_right_32_param_0];
+; SM35-NEXT: ld.param.u32 %r2, [funnel_shift_right_32_param_1];
+; SM35-NEXT: ld.param.u32 %r3, [funnel_shift_right_32_param_2];
+; SM35-NEXT: shf.r.wrap.b32 %r4, %r1, %r2, %r3;
+; SM35-NEXT: st.param.b32 [func_retval0+0], %r4;
+; SM35-NEXT: ret;
+ %val = call i32 @llvm.fshr.i32(i32 %a, i32 %b, i32 %c)
+ ret i32 %val
+}
+
+define i32 @funnel_shift_left_32(i32 %a, i32 %b, i32 %c) {
+; SM20-LABEL: funnel_shift_left_32(
+; SM20: {
+; SM20-NEXT: .reg .b32 %r<11>;
+; SM20-EMPTY:
+; SM20-NEXT: // %bb.0:
+; SM20-NEXT: ld.param.u32 %r1, [funnel_shift_left_32_param_0];
+; SM20-NEXT: ld.param.u32 %r2, [funnel_shift_left_32_param_2];
+; SM20-NEXT: and.b32 %r3, %r2, 31;
+; SM20-NEXT: shl.b32 %r4, %r1, %r3;
+; SM20-NEXT: ld.param.u32 %r5, [funnel_shift_left_32_param_1];
+; SM20-NEXT: shr.u32 %r6, %r5, 1;
+; SM20-NEXT: not.b32 %r7, %r2;
+; SM20-NEXT: and.b32 %r8, %r7, 31;
+; SM20-NEXT: shr.u32 %r9, %r6, %r8;
+; SM20-NEXT: or.b32 %r10, %r4, %r9;
+; SM20-NEXT: st.param.b32 [func_retval0+0], %r10;
+; SM20-NEXT: ret;
+;
+; SM35-LABEL: funnel_shift_left_32(
+; SM35: {
+; SM35-NEXT: .reg .b32 %r<5>;
+; SM35-EMPTY:
+; SM35-NEXT: // %bb.0:
+; SM35-NEXT: ld.param.u32 %r1, [funnel_shift_left_32_param_0];
+; SM35-NEXT: ld.param.u32 %r2, [funnel_shift_left_32_param_1];
+; SM35-NEXT: ld.param.u32 %r3, [funnel_shift_left_32_param_2];
+; SM35-NEXT: shf.l.wrap.b32 %r4, %r1, %r2, %r3;
+; SM35-NEXT: st.param.b32 [func_retval0+0], %r4;
+; SM35-NEXT: ret;
+ %val = call i32 @llvm.fshl.i32(i32 %a, i32 %b, i32 %c)
+ ret i32 %val
+}
+
+define i64 @funnel_shift_right_64(i64 %a, i64 %b, i64 %c) {
+; SM20-LABEL: funnel_shift_right_64(
+; SM20: {
+; SM20-NEXT: .reg .b32 %r<5>;
+; SM20-NEXT: .reg .b64 %rd<7>;
+; SM20-EMPTY:
+; SM20-NEXT: // %bb.0:
+; SM20-NEXT: ld.param.u64 %rd1, [funnel_shift_right_64_param_0];
+; SM20-NEXT: ld.param.u32 %r1, [funnel_shift_right_64_param_2];
+; SM20-NEXT: and.b32 %r2, %r1, 63;
+; SM20-NEXT: ld.param.u64 %rd2, [funnel_shift_right_64_param_1];
+; SM20-NEXT: shr.u64 %rd3, %rd2, %r2;
+; SM20-NEXT: shl.b64 %rd4, %rd1, 1;
+; SM20-NEXT: not.b32 %r3, %r1;
+; SM20-NEXT: and.b32 %r4, %r3, 63;
+; SM20-NEXT: shl.b64 %rd5, %rd4, %r4;
+; SM20-NEXT: or.b64 %rd6, %rd5, %rd3;
+; SM20-NEXT: st.param.b64 [func_retval0+0], %rd6;
+; SM20-NEXT: ret;
+;
+; SM35-LABEL: funnel_shift_right_64(
+; SM35: {
+; SM35-NEXT: .reg .b32 %r<5>;
+; SM35-NEXT: .reg .b64 %rd<7>;
+; SM35-EMPTY:
+; SM35-NEXT: // %bb.0:
+; SM35-NEXT: ld.param.u64 %rd1, [funnel_shift_right_64_param_0];
+; SM35-NEXT: ld.param.u32 %r1, [funnel_shift_right_64_param_2];
+; SM35-NEXT: and.b32 %r2, %r1, 63;
+; SM35-NEXT: ld.param.u64 %rd2, [funnel_shift_right_64_param_1];
+; SM35-NEXT: shr.u64 %rd3, %rd2, %r2;
+; SM35-NEXT: shl.b64 %rd4, %rd1, 1;
+; SM35-NEXT: not.b32 %r3, %r1;
+; SM35-NEXT: and.b32 %r4, %r3, 63;
+; SM35-NEXT: shl.b64 %rd5, %rd4, %r4;
+; SM35-NEXT: or.b64 %rd6, %rd5, %rd3;
+; SM35-NEXT: st.param.b64 [func_retval0+0], %rd6;
+; SM35-NEXT: ret;
+ %val = call i64 @llvm.fshr.i64(i64 %a, i64 %b, i64 %c)
+ ret i64 %val
+}
+
+define i64 @funnel_shift_left_64(i64 %a, i64 %b, i64 %c) {
+; SM20-LABEL: funnel_shift_left_64(
+; SM20: {
+; SM20-NEXT: .reg .b32 %r<5>;
+; SM20-NEXT: .reg .b64 %rd<7>;
+; SM20-EMPTY:
+; SM20-NEXT: // %bb.0:
+; SM20-NEXT: ld.param.u64 %rd1, [funnel_shift_left_64_param_0];
+; SM20-NEXT: ld.param.u32 %r1, [funnel_shift_left_64_param_2];
+; SM20-NEXT: and.b32 %r2, %r1, 63;
+; SM20-NEXT: shl.b64 %rd2, %rd1, %r2;
+; SM20-NEXT: ld.param.u64 %rd3, [funnel_shift_left_64_param_1];
+; SM20-NEXT: shr.u64 %rd4, %rd3, 1;
+; SM20-NEXT: not.b32 %r3, %r1;
+; SM20-NEXT: and.b32 %r4, %r3, 63;
+; SM20-NEXT: shr.u64 %rd5, %rd4, %r4;
+; SM20-NEXT: or.b64 %rd6, %rd2, %rd5;
+; SM20-NEXT: st.param.b64 [func_retval0+0], %rd6;
+; SM20-NEXT: ret;
+;
+; SM35-LABEL: funnel_shift_left_64(
+; SM35: {
+; SM35-NEXT: .reg .b32 %r<5>;
+; SM35-NEXT: .reg .b64 %rd<7>;
+; SM35-EMPTY:
+; SM35-NEXT: // %bb.0:
+; SM35-NEXT: ld.param.u64 %rd1, [funnel_shift_left_64_param_0];
+; SM35-NEXT: ld.param.u32 %r1, [funnel_shift_left_64_param_2];
+; SM35-NEXT: and.b32 %r2, %r1, 63;
+; SM35-NEXT: shl.b64 %rd2, %rd1, %r2;
+; SM35-NEXT: ld.param.u64 %rd3, [funnel_shift_left_64_param_1];
+; SM35-NEXT: shr.u64 %rd4, %rd3, 1;
+; SM35-NEXT: not.b32 %r3, %r1;
+; SM35-NEXT: and.b32 %r4, %r3, 63;
+; SM35-NEXT: shr.u64 %rd5, %rd4, %r4;
+; SM35-NEXT: or.b64 %rd6, %rd2, %rd5;
----------------
AlexMaclean wrote:
See above.
https://github.com/llvm/llvm-project/pull/107655
More information about the llvm-commits
mailing list