[llvm] [NVPTX] Add support for clamped funnel shift intrinsics (PR #113228)

Mon Oct 21 15:54:19 PDT 2024

================
@@ -384,6 +384,34 @@ define float @test_sqrt_rn_f_ftz(float %a) #0 {
   ret float %ret
 }
 
+; CHECK-LABEL: @test_fshl_clamp_1
+define i32 @test_fshl_clamp_1(i32 %a, i32 %b, i32 %c) {
+; CHECK: call i32 @llvm.fshl.i32(i32 %a, i32 %b, i32 3)
+  %call = call i32 @llvm.nvvm.fshl.clamp.i32(i32 %a, i32 %b, i32 3)
+  ret i32 %call
+}
+
+; CHECK-LABEL: @test_fshl_clamp_2
+define i32 @test_fshl_clamp_2(i32 %a, i32 %b, i32 %c) {
+; CHECK: ret i32 %b
+  %call = call i32 @llvm.nvvm.fshl.clamp.i32(i32 %a, i32 %b, i32 300)
+  ret i32 %call
+}
+
+; CHECK-LABEL: @test_fshr_clamp_1
+define i32 @test_fshr_clamp_1(i32 %a, i32 %b, i32 %c) {
+; CHECK: call i32 @llvm.fshl.i32(i32 %a, i32 %b, i32 29)
+  %call = call i32 @llvm.nvvm.fshr.clamp.i32(i32 %a, i32 %b, i32 3)
+  ret i32 %call
+}
+
+; CHECK-LABEL: @test_fshr_clamp_2
+define i32 @test_fshr_clamp_2(i32 %a, i32 %b, i32 %c) {
+; CHECK: ret i32 %b
----------------
Artem-B wrote:

Shouldn't we return `%a` ? 

> (%a is the most significant bits of the wide value), the combined value is shifted
> right, and the least significant bits are extracted to produce a result

So, if we shift `{%a, %b} >> 300`, it's the same as ``{%a, %b} >> 32`` which should be the same as `%a`.

On a side note, it would also help if the input arguments would carry intended input bit positions, so it's easier to see the argument mixup we ran into in the previous funnel shift patch.

https://github.com/llvm/llvm-project/pull/113228