[llvm] [NVPTX] Add support for clamped funnel shift intrinsics (PR #113228)
Artem Belevich via llvm-commits
llvm-commits at lists.llvm.org
Tue Oct 22 09:55:11 PDT 2024
================
@@ -0,0 +1,70 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc < %s -march=nvptx -mcpu=sm_61 | FileCheck %s
+; RUN: llc < %s -march=nvptx64 -mcpu=sm_61 | FileCheck %s
+
+target triple = "nvptx-nvidia-cuda"
+
+declare i32 @llvm.nvvm.fshr.clamp.i32(i32, i32, i32)
+declare i32 @llvm.nvvm.fshl.clamp.i32(i32, i32, i32)
+
+define i32 @fshr_clamp_r(i32 %a, i32 %b, i32 %c) {
+; CHECK-LABEL: fshr_clamp_r(
+; CHECK: {
+; CHECK-NEXT: .reg .b32 %r<5>;
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: ld.param.u32 %r1, [fshr_clamp_r_param_0];
+; CHECK-NEXT: ld.param.u32 %r2, [fshr_clamp_r_param_1];
+; CHECK-NEXT: ld.param.u32 %r3, [fshr_clamp_r_param_2];
+; CHECK-NEXT: shf.r.clamp.b32 %r4, %r2, %r1, %r3;
+; CHECK-NEXT: st.param.b32 [func_retval0], %r4;
+; CHECK-NEXT: ret;
+ %call = call i32 @llvm.nvvm.fshr.clamp.i32(i32 %a, i32 %b, i32 %c)
+ ret i32 %call
+}
+
+define i32 @fshl_clamp_r(i32 %a, i32 %b, i32 %c) {
+; CHECK-LABEL: fshl_clamp_r(
+; CHECK: {
+; CHECK-NEXT: .reg .b32 %r<5>;
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: ld.param.u32 %r1, [fshl_clamp_r_param_0];
+; CHECK-NEXT: ld.param.u32 %r2, [fshl_clamp_r_param_1];
+; CHECK-NEXT: ld.param.u32 %r3, [fshl_clamp_r_param_2];
+; CHECK-NEXT: shf.l.clamp.b32 %r4, %r2, %r1, %r3;
+; CHECK-NEXT: st.param.b32 [func_retval0], %r4;
+; CHECK-NEXT: ret;
+ %call = call i32 @llvm.nvvm.fshl.clamp.i32(i32 %a, i32 %b, i32 %c)
+ ret i32 %call
+}
+
+define i32 @fshr_clamp_i(i32 %a, i32 %b, i32 %c) {
----------------
Artem-B wrote:
`%c` is also unused here.
https://github.com/llvm/llvm-project/pull/113228
More information about the llvm-commits
mailing list