[llvm] [NVPTX] Add support for clamped funnel shift intrinsics (PR #113228)

Mon Oct 21 22:31:09 PDT 2024

================
@@ -384,6 +384,34 @@ define float @test_sqrt_rn_f_ftz(float %a) #0 {
   ret float %ret
 }
 
+; CHECK-LABEL: @test_fshl_clamp_1
+define i32 @test_fshl_clamp_1(i32 %a, i32 %b, i32 %c) {
+; CHECK: call i32 @llvm.fshl.i32(i32 %a, i32 %b, i32 3)
+  %call = call i32 @llvm.nvvm.fshl.clamp.i32(i32 %a, i32 %b, i32 3)
+  ret i32 %call
+}
+
+; CHECK-LABEL: @test_fshl_clamp_2
+define i32 @test_fshl_clamp_2(i32 %a, i32 %b, i32 %c) {
+; CHECK: ret i32 %b
+  %call = call i32 @llvm.nvvm.fshl.clamp.i32(i32 %a, i32 %b, i32 300)
+  ret i32 %call
+}
+
+; CHECK-LABEL: @test_fshr_clamp_1
+define i32 @test_fshr_clamp_1(i32 %a, i32 %b, i32 %c) {
+; CHECK: call i32 @llvm.fshl.i32(i32 %a, i32 %b, i32 29)
+  %call = call i32 @llvm.nvvm.fshr.clamp.i32(i32 %a, i32 %b, i32 3)
+  ret i32 %call
+}
+
+; CHECK-LABEL: @test_fshr_clamp_2
+define i32 @test_fshr_clamp_2(i32 %a, i32 %b, i32 %c) {
+; CHECK: ret i32 %b
----------------
AlexMaclean wrote:

You're correct. I've fixed this.

> On a side note, it would also help if the input arguments would carry intended input bit positions, so it's easier to see the argument mixup we ran into in the previous funnel shift patch.

I don't understand what you mean, could you please say more?

https://github.com/llvm/llvm-project/pull/113228