[llvm] [NVPTX] Add intrinsic support for specialized prmt variants (PR #140951)
Durgadoss R via llvm-commits
llvm-commits at lists.llvm.org
Thu May 22 06:24:20 PDT 2025
================
@@ -0,0 +1,113 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc < %s -verify-machineinstrs | FileCheck %s
+; RUN: %if ptxas %{ llc < %s -verify-machineinstrs | %ptxas-verify %}
+
+target triple = "nvptx64-nvidia-cuda"
+
+define i32 @test_prmt_basic(i32 %a, i32 %b, i32 %c) {
+; CHECK-LABEL: test_prmt_basic(
+; CHECK: {
+; CHECK-NEXT: .reg .b32 %r<5>;
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: ld.param.b32 %r1, [test_prmt_basic_param_0];
+; CHECK-NEXT: ld.param.b32 %r2, [test_prmt_basic_param_1];
+; CHECK-NEXT: ld.param.b32 %r3, [test_prmt_basic_param_2];
+; CHECK-NEXT: prmt.b32 %r4, %r1, %r2, %r3;
+; CHECK-NEXT: st.param.b32 [func_retval0], %r4;
+; CHECK-NEXT: ret;
+ %val = call i32 @llvm.nvvm.prmt(i32 %a, i32 %b, i32 %c)
+ ret i32 %val
+}
+
+define i32 @test_prmt_f4e(i32 %a, i32 %b, i32 %c) {
+; CHECK-LABEL: test_prmt_f4e(
+; CHECK: {
+; CHECK-NEXT: .reg .b32 %r<5>;
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: ld.param.b32 %r1, [test_prmt_f4e_param_0];
+; CHECK-NEXT: ld.param.b32 %r2, [test_prmt_f4e_param_1];
+; CHECK-NEXT: ld.param.b32 %r3, [test_prmt_f4e_param_2];
+; CHECK-NEXT: prmt.b32.f4e %r4, %r1, %r2, %r3;
+; CHECK-NEXT: st.param.b32 [func_retval0], %r4;
+; CHECK-NEXT: ret;
+ %val = call i32 @llvm.nvvm.prmt.f4e(i32 %a, i32 %b, i32 %c)
+ ret i32 %val
+}
+
+define i32 @test_prmt_b4e(i32 %a, i32 %b, i32 %c) {
+; CHECK-LABEL: test_prmt_b4e(
+; CHECK: {
+; CHECK-NEXT: .reg .b32 %r<5>;
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: ld.param.b32 %r1, [test_prmt_b4e_param_0];
+; CHECK-NEXT: ld.param.b32 %r2, [test_prmt_b4e_param_1];
+; CHECK-NEXT: ld.param.b32 %r3, [test_prmt_b4e_param_2];
+; CHECK-NEXT: prmt.b32.b4e %r4, %r1, %r2, %r3;
+; CHECK-NEXT: st.param.b32 [func_retval0], %r4;
+; CHECK-NEXT: ret;
+ %val = call i32 @llvm.nvvm.prmt.b4e(i32 %a, i32 %b, i32 %c)
+ ret i32 %val
+}
+
+define i32 @test_prmt_rc8(i32 %a, i32 %b) {
----------------
durga4github wrote:
Do we want to name the second argument as `%c` itself?
https://github.com/llvm/llvm-project/pull/140951
More information about the llvm-commits
mailing list