[llvm] [NVPTX] deprecate nvvm.rotate.* intrinsics, cleanup funnel-shift handling (PR #107655)

Artem Belevich via llvm-commits llvm-commits at lists.llvm.org
Mon Sep 23 13:10:58 PDT 2024


================
@@ -315,35 +269,180 @@ define i64 @rotr64(i64 %a, i64 %n) {
 define i64 @rotr64_imm(i64 %a) {
 ; SM20-LABEL: rotr64_imm(
 ; SM20:       {
-; SM20-NEXT:    .reg .b64 %rd<3>;
+; SM20-NEXT:    .reg .b64 %rd<5>;
 ; SM20-EMPTY:
 ; SM20-NEXT:  // %bb.0:
 ; SM20-NEXT:    ld.param.u64 %rd1, [rotr64_imm_param_0];
-; SM20-NEXT:    {
-; SM20-NEXT:    .reg .b64 %lhs;
-; SM20-NEXT:    .reg .b64 %rhs;
-; SM20-NEXT:    shl.b64 %lhs, %rd1, 62;
-; SM20-NEXT:    shr.b64 %rhs, %rd1, 2;
-; SM20-NEXT:    add.u64 %rd2, %lhs, %rhs;
-; SM20-NEXT:    }
-; SM20-NEXT:    st.param.b64 [func_retval0+0], %rd2;
+; SM20-NEXT:    shl.b64 %rd2, %rd1, 62;
+; SM20-NEXT:    shr.u64 %rd3, %rd1, 2;
+; SM20-NEXT:    or.b64 %rd4, %rd3, %rd2;
+; SM20-NEXT:    st.param.b64 [func_retval0+0], %rd4;
 ; SM20-NEXT:    ret;
 ;
 ; SM35-LABEL: rotr64_imm(
 ; SM35:       {
-; SM35-NEXT:    .reg .b64 %rd<3>;
+; SM35-NEXT:    .reg .b64 %rd<5>;
 ; SM35-EMPTY:
 ; SM35-NEXT:  // %bb.0:
 ; SM35-NEXT:    ld.param.u64 %rd1, [rotr64_imm_param_0];
-; SM35-NEXT:    {
-; SM35-NEXT:    .reg .b64 %lhs;
-; SM35-NEXT:    .reg .b64 %rhs;
-; SM35-NEXT:    shl.b64 %lhs, %rd1, 62;
-; SM35-NEXT:    shr.b64 %rhs, %rd1, 2;
-; SM35-NEXT:    add.u64 %rd2, %lhs, %rhs;
-; SM35-NEXT:    }
-; SM35-NEXT:    st.param.b64 [func_retval0+0], %rd2;
+; SM35-NEXT:    shl.b64 %rd2, %rd1, 62;
+; SM35-NEXT:    shr.u64 %rd3, %rd1, 2;
+; SM35-NEXT:    or.b64 %rd4, %rd3, %rd2;
+; SM35-NEXT:    st.param.b64 [func_retval0+0], %rd4;
 ; SM35-NEXT:    ret;
   %val = tail call i64 @llvm.fshr.i64(i64 %a, i64 %a, i64 66)
   ret i64 %val
 }
+
+define i32 @funnel_shift_right_32(i32 %a, i32 %b, i32 %c) {
+; SM20-LABEL: funnel_shift_right_32(
+; SM20:       {
+; SM20-NEXT:    .reg .b32 %r<11>;
+; SM20-EMPTY:
+; SM20-NEXT:  // %bb.0:
+; SM20-NEXT:    ld.param.u32 %r1, [funnel_shift_right_32_param_0];
+; SM20-NEXT:    ld.param.u32 %r2, [funnel_shift_right_32_param_2];
+; SM20-NEXT:    and.b32 %r3, %r2, 31;
+; SM20-NEXT:    ld.param.u32 %r4, [funnel_shift_right_32_param_1];
+; SM20-NEXT:    shr.u32 %r5, %r4, %r3;
+; SM20-NEXT:    shl.b32 %r6, %r1, 1;
+; SM20-NEXT:    not.b32 %r7, %r2;
+; SM20-NEXT:    and.b32 %r8, %r7, 31;
+; SM20-NEXT:    shl.b32 %r9, %r6, %r8;
+; SM20-NEXT:    or.b32 %r10, %r9, %r5;
+; SM20-NEXT:    st.param.b32 [func_retval0+0], %r10;
+; SM20-NEXT:    ret;
+;
+; SM35-LABEL: funnel_shift_right_32(
+; SM35:       {
+; SM35-NEXT:    .reg .b32 %r<5>;
+; SM35-EMPTY:
+; SM35-NEXT:  // %bb.0:
+; SM35-NEXT:    ld.param.u32 %r1, [funnel_shift_right_32_param_0];
+; SM35-NEXT:    ld.param.u32 %r2, [funnel_shift_right_32_param_1];
+; SM35-NEXT:    ld.param.u32 %r3, [funnel_shift_right_32_param_2];
+; SM35-NEXT:    shf.r.wrap.b32 %r4, %r1, %r2, %r3;
+; SM35-NEXT:    st.param.b32 [func_retval0+0], %r4;
+; SM35-NEXT:    ret;
+  %val = call i32 @llvm.fshr.i32(i32 %a, i32 %b, i32 %c)
+  ret i32 %val
+}
+
+define i32 @funnel_shift_left_32(i32 %a, i32 %b, i32 %c) {
+; SM20-LABEL: funnel_shift_left_32(
+; SM20:       {
+; SM20-NEXT:    .reg .b32 %r<11>;
+; SM20-EMPTY:
+; SM20-NEXT:  // %bb.0:
+; SM20-NEXT:    ld.param.u32 %r1, [funnel_shift_left_32_param_0];
+; SM20-NEXT:    ld.param.u32 %r2, [funnel_shift_left_32_param_2];
+; SM20-NEXT:    and.b32 %r3, %r2, 31;
+; SM20-NEXT:    shl.b32 %r4, %r1, %r3;
+; SM20-NEXT:    ld.param.u32 %r5, [funnel_shift_left_32_param_1];
+; SM20-NEXT:    shr.u32 %r6, %r5, 1;
+; SM20-NEXT:    not.b32 %r7, %r2;
+; SM20-NEXT:    and.b32 %r8, %r7, 31;
+; SM20-NEXT:    shr.u32 %r9, %r6, %r8;
+; SM20-NEXT:    or.b32 %r10, %r4, %r9;
+; SM20-NEXT:    st.param.b32 [func_retval0+0], %r10;
+; SM20-NEXT:    ret;
+;
+; SM35-LABEL: funnel_shift_left_32(
+; SM35:       {
+; SM35-NEXT:    .reg .b32 %r<5>;
+; SM35-EMPTY:
+; SM35-NEXT:  // %bb.0:
+; SM35-NEXT:    ld.param.u32 %r1, [funnel_shift_left_32_param_0];
+; SM35-NEXT:    ld.param.u32 %r2, [funnel_shift_left_32_param_1];
+; SM35-NEXT:    ld.param.u32 %r3, [funnel_shift_left_32_param_2];
+; SM35-NEXT:    shf.l.wrap.b32 %r4, %r1, %r2, %r3;
+; SM35-NEXT:    st.param.b32 [func_retval0+0], %r4;
+; SM35-NEXT:    ret;
+  %val = call i32 @llvm.fshl.i32(i32 %a, i32 %b, i32 %c)
+  ret i32 %val
+}
+
+define i64 @funnel_shift_right_64(i64 %a, i64 %b, i64 %c) {
+; SM20-LABEL: funnel_shift_right_64(
+; SM20:       {
+; SM20-NEXT:    .reg .b32 %r<5>;
+; SM20-NEXT:    .reg .b64 %rd<7>;
+; SM20-EMPTY:
+; SM20-NEXT:  // %bb.0:
+; SM20-NEXT:    ld.param.u64 %rd1, [funnel_shift_right_64_param_0];
+; SM20-NEXT:    ld.param.u32 %r1, [funnel_shift_right_64_param_2];
+; SM20-NEXT:    and.b32 %r2, %r1, 63;
+; SM20-NEXT:    ld.param.u64 %rd2, [funnel_shift_right_64_param_1];
+; SM20-NEXT:    shr.u64 %rd3, %rd2, %r2;
+; SM20-NEXT:    shl.b64 %rd4, %rd1, 1;
+; SM20-NEXT:    not.b32 %r3, %r1;
+; SM20-NEXT:    and.b32 %r4, %r3, 63;
+; SM20-NEXT:    shl.b64 %rd5, %rd4, %r4;
+; SM20-NEXT:    or.b64 %rd6, %rd5, %rd3;
+; SM20-NEXT:    st.param.b64 [func_retval0+0], %rd6;
+; SM20-NEXT:    ret;
+;
+; SM35-LABEL: funnel_shift_right_64(
+; SM35:       {
+; SM35-NEXT:    .reg .b32 %r<5>;
+; SM35-NEXT:    .reg .b64 %rd<7>;
+; SM35-EMPTY:
+; SM35-NEXT:  // %bb.0:
+; SM35-NEXT:    ld.param.u64 %rd1, [funnel_shift_right_64_param_0];
+; SM35-NEXT:    ld.param.u32 %r1, [funnel_shift_right_64_param_2];
+; SM35-NEXT:    and.b32 %r2, %r1, 63;
+; SM35-NEXT:    ld.param.u64 %rd2, [funnel_shift_right_64_param_1];
+; SM35-NEXT:    shr.u64 %rd3, %rd2, %r2;
+; SM35-NEXT:    shl.b64 %rd4, %rd1, 1;
+; SM35-NEXT:    not.b32 %r3, %r1;
+; SM35-NEXT:    and.b32 %r4, %r3, 63;
+; SM35-NEXT:    shl.b64 %rd5, %rd4, %r4;
+; SM35-NEXT:    or.b64 %rd6, %rd5, %rd3;
+; SM35-NEXT:    st.param.b64 [func_retval0+0], %rd6;
+; SM35-NEXT:    ret;
+  %val = call i64 @llvm.fshr.i64(i64 %a, i64 %b, i64 %c)
+  ret i64 %val
+}
+
+define i64 @funnel_shift_left_64(i64 %a, i64 %b, i64 %c) {
+; SM20-LABEL: funnel_shift_left_64(
+; SM20:       {
+; SM20-NEXT:    .reg .b32 %r<5>;
+; SM20-NEXT:    .reg .b64 %rd<7>;
+; SM20-EMPTY:
+; SM20-NEXT:  // %bb.0:
+; SM20-NEXT:    ld.param.u64 %rd1, [funnel_shift_left_64_param_0];
+; SM20-NEXT:    ld.param.u32 %r1, [funnel_shift_left_64_param_2];
+; SM20-NEXT:    and.b32 %r2, %r1, 63;
+; SM20-NEXT:    shl.b64 %rd2, %rd1, %r2;
+; SM20-NEXT:    ld.param.u64 %rd3, [funnel_shift_left_64_param_1];
+; SM20-NEXT:    shr.u64 %rd4, %rd3, 1;
+; SM20-NEXT:    not.b32 %r3, %r1;
+; SM20-NEXT:    and.b32 %r4, %r3, 63;
+; SM20-NEXT:    shr.u64 %rd5, %rd4, %r4;
+; SM20-NEXT:    or.b64 %rd6, %rd2, %rd5;
+; SM20-NEXT:    st.param.b64 [func_retval0+0], %rd6;
+; SM20-NEXT:    ret;
+;
+; SM35-LABEL: funnel_shift_left_64(
+; SM35:       {
+; SM35-NEXT:    .reg .b32 %r<5>;
+; SM35-NEXT:    .reg .b64 %rd<7>;
+; SM35-EMPTY:
+; SM35-NEXT:  // %bb.0:
+; SM35-NEXT:    ld.param.u64 %rd1, [funnel_shift_left_64_param_0];
+; SM35-NEXT:    ld.param.u32 %r1, [funnel_shift_left_64_param_2];
+; SM35-NEXT:    and.b32 %r2, %r1, 63;
+; SM35-NEXT:    shl.b64 %rd2, %rd1, %r2;
+; SM35-NEXT:    ld.param.u64 %rd3, [funnel_shift_left_64_param_1];
+; SM35-NEXT:    shr.u64 %rd4, %rd3, 1;
+; SM35-NEXT:    not.b32 %r3, %r1;
+; SM35-NEXT:    and.b32 %r4, %r3, 63;
+; SM35-NEXT:    shr.u64 %rd5, %rd4, %r4;
+; SM35-NEXT:    or.b64 %rd6, %rd2, %rd5;
----------------
Artem-B wrote:

This also does not use funnel shifts.

https://github.com/llvm/llvm-project/pull/107655


More information about the llvm-commits mailing list