[llvm] [NVPTX] deprecate nvvm.rotate.* intrinsics, cleanup funnel-shift handling (PR #107655)
Artem Belevich via llvm-commits
llvm-commits at lists.llvm.org
Mon Sep 23 13:10:59 PDT 2024
================
@@ -50,45 +53,36 @@ define i32 @rotate32(i32 %a, i32 %b) {
define i64 @rotate64(i64 %a, i32 %b) {
; SM20-LABEL: rotate64(
; SM20: {
-; SM20-NEXT: .reg .b32 %r<2>;
-; SM20-NEXT: .reg .b64 %rd<3>;
+; SM20-NEXT: .reg .b32 %r<5>;
+; SM20-NEXT: .reg .b64 %rd<5>;
; SM20-EMPTY:
; SM20-NEXT: // %bb.0:
; SM20-NEXT: ld.param.u64 %rd1, [rotate64_param_0];
; SM20-NEXT: ld.param.u32 %r1, [rotate64_param_1];
-; SM20-NEXT: {
-; SM20-NEXT: .reg .b64 %lhs;
-; SM20-NEXT: .reg .b64 %rhs;
-; SM20-NEXT: .reg .u32 %amt2;
-; SM20-NEXT: and.b32 %amt2, %r1, 63;
-; SM20-NEXT: shl.b64 %lhs, %rd1, %amt2;
-; SM20-NEXT: sub.u32 %amt2, 64, %amt2;
-; SM20-NEXT: shr.b64 %rhs, %rd1, %amt2;
-; SM20-NEXT: add.u64 %rd2, %lhs, %rhs;
-; SM20-NEXT: }
-; SM20-NEXT: st.param.b64 [func_retval0+0], %rd2;
+; SM20-NEXT: and.b32 %r2, %r1, 63;
+; SM20-NEXT: shl.b64 %rd2, %rd1, %r2;
+; SM20-NEXT: neg.s32 %r3, %r1;
+; SM20-NEXT: and.b32 %r4, %r3, 63;
+; SM20-NEXT: shr.u64 %rd3, %rd1, %r4;
+; SM20-NEXT: or.b64 %rd4, %rd2, %rd3;
+; SM20-NEXT: st.param.b64 [func_retval0+0], %rd4;
; SM20-NEXT: ret;
;
; SM35-LABEL: rotate64(
; SM35: {
-; SM35-NEXT: .reg .b32 %r<6>;
-; SM35-NEXT: .reg .b64 %rd<3>;
+; SM35-NEXT: .reg .b32 %r<5>;
+; SM35-NEXT: .reg .b64 %rd<5>;
; SM35-EMPTY:
; SM35-NEXT: // %bb.0:
; SM35-NEXT: ld.param.u64 %rd1, [rotate64_param_0];
-; SM35-NEXT: {
-; SM35-NEXT: .reg .b32 %dummy;
-; SM35-NEXT: mov.b64 {%dummy,%r1}, %rd1;
-; SM35-NEXT: }
-; SM35-NEXT: {
-; SM35-NEXT: .reg .b32 %dummy;
-; SM35-NEXT: mov.b64 {%r2,%dummy}, %rd1;
-; SM35-NEXT: }
-; SM35-NEXT: ld.param.u32 %r3, [rotate64_param_1];
-; SM35-NEXT: shf.l.wrap.b32 %r4, %r2, %r1, %r3;
-; SM35-NEXT: shf.l.wrap.b32 %r5, %r1, %r2, %r3;
-; SM35-NEXT: mov.b64 %rd2, {%r5, %r4};
-; SM35-NEXT: st.param.b64 [func_retval0+0], %rd2;
+; SM35-NEXT: ld.param.u32 %r1, [rotate64_param_1];
+; SM35-NEXT: and.b32 %r2, %r1, 63;
+; SM35-NEXT: shl.b64 %rd2, %rd1, %r2;
+; SM35-NEXT: neg.s32 %r3, %r1;
+; SM35-NEXT: and.b32 %r4, %r3, 63;
+; SM35-NEXT: shr.u64 %rd3, %rd1, %r4;
+; SM35-NEXT: or.b64 %rd4, %rd2, %rd3;
----------------
Artem-B wrote:
This looks like a regression. Previously we'd generate a couple of 32-bit funnel shifts.
https://github.com/llvm/llvm-project/pull/107655
More information about the llvm-commits
mailing list