[llvm] [NVPTX] Add intrinsic support for specialized prmt variants (PR #140951)

Durgadoss R via llvm-commits llvm-commits at lists.llvm.org
Thu May 22 06:24:20 PDT 2025


================
@@ -0,0 +1,113 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc < %s -verify-machineinstrs | FileCheck %s
+; RUN: %if ptxas %{ llc < %s -verify-machineinstrs | %ptxas-verify %}
+
+target triple = "nvptx64-nvidia-cuda"
+
+define i32 @test_prmt_basic(i32 %a, i32 %b, i32 %c) {
+; CHECK-LABEL: test_prmt_basic(
+; CHECK:       {
+; CHECK-NEXT:    .reg .b32 %r<5>;
+; CHECK-EMPTY:
+; CHECK-NEXT:  // %bb.0:
+; CHECK-NEXT:    ld.param.b32 %r1, [test_prmt_basic_param_0];
+; CHECK-NEXT:    ld.param.b32 %r2, [test_prmt_basic_param_1];
+; CHECK-NEXT:    ld.param.b32 %r3, [test_prmt_basic_param_2];
+; CHECK-NEXT:    prmt.b32 %r4, %r1, %r2, %r3;
+; CHECK-NEXT:    st.param.b32 [func_retval0], %r4;
+; CHECK-NEXT:    ret;
+    %val = call i32 @llvm.nvvm.prmt(i32 %a, i32 %b, i32 %c)
+    ret i32 %val
+}
+
+define i32 @test_prmt_f4e(i32 %a, i32 %b, i32 %c) {
+; CHECK-LABEL: test_prmt_f4e(
+; CHECK:       {
+; CHECK-NEXT:    .reg .b32 %r<5>;
+; CHECK-EMPTY:
+; CHECK-NEXT:  // %bb.0:
+; CHECK-NEXT:    ld.param.b32 %r1, [test_prmt_f4e_param_0];
+; CHECK-NEXT:    ld.param.b32 %r2, [test_prmt_f4e_param_1];
+; CHECK-NEXT:    ld.param.b32 %r3, [test_prmt_f4e_param_2];
+; CHECK-NEXT:    prmt.b32.f4e %r4, %r1, %r2, %r3;
+; CHECK-NEXT:    st.param.b32 [func_retval0], %r4;
+; CHECK-NEXT:    ret;
+  %val = call i32 @llvm.nvvm.prmt.f4e(i32 %a, i32 %b, i32 %c)
+  ret i32 %val
+}
+
+define i32 @test_prmt_b4e(i32 %a, i32 %b, i32 %c) {
+; CHECK-LABEL: test_prmt_b4e(
+; CHECK:       {
+; CHECK-NEXT:    .reg .b32 %r<5>;
+; CHECK-EMPTY:
+; CHECK-NEXT:  // %bb.0:
+; CHECK-NEXT:    ld.param.b32 %r1, [test_prmt_b4e_param_0];
+; CHECK-NEXT:    ld.param.b32 %r2, [test_prmt_b4e_param_1];
+; CHECK-NEXT:    ld.param.b32 %r3, [test_prmt_b4e_param_2];
+; CHECK-NEXT:    prmt.b32.b4e %r4, %r1, %r2, %r3;
+; CHECK-NEXT:    st.param.b32 [func_retval0], %r4;
+; CHECK-NEXT:    ret;
+  %val = call i32 @llvm.nvvm.prmt.b4e(i32 %a, i32 %b, i32 %c)
+  ret i32 %val
+}
+
+define i32 @test_prmt_rc8(i32 %a, i32 %b) {
----------------
durga4github wrote:

Do we want to name the second argument as `%c` itself?


https://github.com/llvm/llvm-project/pull/140951


More information about the llvm-commits mailing list