[llvm] [NVPTX] support immediate values in st.param instructions (PR #91523)

Artem Belevich via llvm-commits llvm-commits at lists.llvm.org
Mon May 13 15:52:58 PDT 2024


================
@@ -0,0 +1,1356 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
+; RUN: llc < %s -march=nvptx64 | FileCheck %s
+; RUN: llc < %s -march=nvptx | FileCheck %s
+; RUN: %if ptxas %{ llc < %s -march=nvptx -verify-machineinstrs | %ptxas-verify %}
+; RUN: %if ptxas %{ llc < %s -march=nvptx64 -verify-machineinstrs | %ptxas-verify %}
+
+target triple = "nvptx64-nvidia-cuda"
+
+%struct.A = type { i8, i16 }
+%struct.char2 = type { i8, i8 }
+%struct.char4 = type { i8, i8, i8, i8 }
+%struct.short2 = type { i16, i16 }
+%struct.short4 = type { i16, i16, i16, i16 }
+%struct.int2 = type { i32, i32 }
+%struct.int4 = type { i32, i32, i32, i32 }
+%struct.longlong2 = type { i64, i64 }
+%struct.float2 = type { float, float }
+%struct.float4 = type { float, float, float, float }
+%struct.double2 = type { double, double }
+
+define void @st_param_i8_i16() {
+; CHECK-LABEL: st_param_i8_i16(
+; CHECK:       {
+; CHECK-EMPTY:
+; CHECK-EMPTY:
+; CHECK-NEXT:  // %bb.0:
+; CHECK-NEXT:    { // callseq 0, 0
+; CHECK-NEXT:    .param .align 2 .b8 param0[4];
+; CHECK-NEXT:    st.param.b8 [param0+0], 1;
+; CHECK-NEXT:    st.param.b16 [param0+2], 2;
+; CHECK-NEXT:    call.uni
+; CHECK-NEXT:    call_i8_i16,
+; CHECK-NEXT:    (
+; CHECK-NEXT:    param0
+; CHECK-NEXT:    );
+; CHECK-NEXT:    } // callseq 0
+; CHECK-NEXT:    ret;
+  call void @call_i8_i16(%struct.A { i8 1, i16 2 })
+  ret void
+}
+
+define void @st_param_i32() {
+; CHECK-LABEL: st_param_i32(
+; CHECK:       {
+; CHECK-EMPTY:
+; CHECK-EMPTY:
+; CHECK-NEXT:  // %bb.0:
+; CHECK-NEXT:    { // callseq 1, 0
+; CHECK-NEXT:    .param .b32 param0;
+; CHECK-NEXT:    st.param.b32 [param0+0], 3;
+; CHECK-NEXT:    call.uni
+; CHECK-NEXT:    call_i32,
+; CHECK-NEXT:    (
+; CHECK-NEXT:    param0
+; CHECK-NEXT:    );
+; CHECK-NEXT:    } // callseq 1
+; CHECK-NEXT:    ret;
+  call void @call_i32(i32 3)
+  ret void
+}
+
+define void @st_param_i64() {
+; CHECK-LABEL: st_param_i64(
+; CHECK:       {
+; CHECK-EMPTY:
+; CHECK-EMPTY:
+; CHECK-NEXT:  // %bb.0:
+; CHECK-NEXT:    { // callseq 2, 0
+; CHECK-NEXT:    .param .b64 param0;
+; CHECK-NEXT:    st.param.b64 [param0+0], 4;
+; CHECK-NEXT:    call.uni
+; CHECK-NEXT:    call_i64,
+; CHECK-NEXT:    (
+; CHECK-NEXT:    param0
+; CHECK-NEXT:    );
+; CHECK-NEXT:    } // callseq 2
+; CHECK-NEXT:    ret;
+  call void @call_i64(i64 4)
+  ret void
+}
+
+define void @st_param_f32() {
+; CHECK-LABEL: st_param_f32(
+; CHECK:       {
+; CHECK-EMPTY:
+; CHECK-EMPTY:
+; CHECK-NEXT:  // %bb.0:
+; CHECK-NEXT:    { // callseq 3, 0
+; CHECK-NEXT:    .param .b32 param0;
+; CHECK-NEXT:    st.param.f32 [param0+0], 0f40A00000;
+; CHECK-NEXT:    call.uni
+; CHECK-NEXT:    call_f32,
+; CHECK-NEXT:    (
+; CHECK-NEXT:    param0
+; CHECK-NEXT:    );
+; CHECK-NEXT:    } // callseq 3
+; CHECK-NEXT:    ret;
+  call void @call_f32(float 5.0)
+  ret void
+}
+
+define void @st_param_f64() {
+; CHECK-LABEL: st_param_f64(
+; CHECK:       {
+; CHECK-EMPTY:
+; CHECK-EMPTY:
+; CHECK-NEXT:  // %bb.0:
+; CHECK-NEXT:    { // callseq 4, 0
+; CHECK-NEXT:    .param .b64 param0;
+; CHECK-NEXT:    st.param.f64 [param0+0], 0d4018000000000000;
+; CHECK-NEXT:    call.uni
+; CHECK-NEXT:    call_f64,
+; CHECK-NEXT:    (
+; CHECK-NEXT:    param0
+; CHECK-NEXT:    );
+; CHECK-NEXT:    } // callseq 4
+; CHECK-NEXT:    ret;
+  call void @call_f64(double 6.0)
+  ret void
+}
+
+declare void @call_i8_i16(%struct.A)
+declare void @call_i32(i32)
+declare void @call_i64(i64)
+declare void @call_f32(float)
+declare void @call_f64(double)
+
+define void @st_param_v2_i8(i8 %val) {
+; CHECK-LABEL: st_param_v2_i8(
+; CHECK:       {
+; CHECK-NEXT:    .reg .b16 %rs<2>;
+; CHECK-EMPTY:
+; CHECK-NEXT:  // %bb.0:
+; CHECK-NEXT:    ld.param.u8 %rs1, [st_param_v2_i8_param_0];
+; CHECK-NEXT:    { // callseq 5, 0
+; CHECK-NEXT:    .param .align 2 .b8 param0[2];
+; CHECK-NEXT:    st.param.v2.b8 [param0+0], {1, 2};
+; CHECK-NEXT:    call.uni
+; CHECK-NEXT:    call_v2_i8,
+; CHECK-NEXT:    (
+; CHECK-NEXT:    param0
+; CHECK-NEXT:    );
+; CHECK-NEXT:    } // callseq 5
+; CHECK-NEXT:    { // callseq 6, 0
+; CHECK-NEXT:    .param .align 2 .b8 param0[2];
+; CHECK-NEXT:    st.param.v2.b8 [param0+0], {1, %rs1};
+; CHECK-NEXT:    call.uni
+; CHECK-NEXT:    call_v2_i8,
+; CHECK-NEXT:    (
+; CHECK-NEXT:    param0
+; CHECK-NEXT:    );
+; CHECK-NEXT:    } // callseq 6
+; CHECK-NEXT:    { // callseq 7, 0
+; CHECK-NEXT:    .param .align 2 .b8 param0[2];
+; CHECK-NEXT:    st.param.v2.b8 [param0+0], {%rs1, 2};
+; CHECK-NEXT:    call.uni
+; CHECK-NEXT:    call_v2_i8,
+; CHECK-NEXT:    (
+; CHECK-NEXT:    param0
+; CHECK-NEXT:    );
+; CHECK-NEXT:    } // callseq 7
+; CHECK-NEXT:    ret;
+  call void @call_v2_i8(%struct.char2 { i8 1, i8 2 })
+  %struct.ir0 = insertvalue %struct.char2 poison, i8 1, 0
+  %struct.ir1 = insertvalue %struct.char2 %struct.ir0, i8 %val, 1
+  call void @call_v2_i8(%struct.char2 %struct.ir1)
+  %struct.ri0 = insertvalue %struct.char2 poison, i8 %val, 0
+  %struct.ri1 = insertvalue %struct.char2 %struct.ri0, i8 2, 1
+  call void @call_v2_i8(%struct.char2 %struct.ri1)
+  ret void
+}
+
+define void @st_param_v2_i16(i16 %val) {
+; CHECK-LABEL: st_param_v2_i16(
+; CHECK:       {
+; CHECK-NEXT:    .reg .b16 %rs<2>;
+; CHECK-EMPTY:
+; CHECK-NEXT:  // %bb.0:
+; CHECK-NEXT:    ld.param.u16 %rs1, [st_param_v2_i16_param_0];
+; CHECK-NEXT:    { // callseq 8, 0
+; CHECK-NEXT:    .param .align 4 .b8 param0[4];
+; CHECK-NEXT:    st.param.v2.b16 [param0+0], {1, 2};
+; CHECK-NEXT:    call.uni
+; CHECK-NEXT:    call_v2_i16,
+; CHECK-NEXT:    (
+; CHECK-NEXT:    param0
+; CHECK-NEXT:    );
+; CHECK-NEXT:    } // callseq 8
+; CHECK-NEXT:    { // callseq 9, 0
+; CHECK-NEXT:    .param .align 4 .b8 param0[4];
+; CHECK-NEXT:    st.param.v2.b16 [param0+0], {1, %rs1};
+; CHECK-NEXT:    call.uni
+; CHECK-NEXT:    call_v2_i16,
+; CHECK-NEXT:    (
+; CHECK-NEXT:    param0
+; CHECK-NEXT:    );
+; CHECK-NEXT:    } // callseq 9
+; CHECK-NEXT:    { // callseq 10, 0
+; CHECK-NEXT:    .param .align 4 .b8 param0[4];
+; CHECK-NEXT:    st.param.v2.b16 [param0+0], {%rs1, 2};
+; CHECK-NEXT:    call.uni
+; CHECK-NEXT:    call_v2_i16,
+; CHECK-NEXT:    (
+; CHECK-NEXT:    param0
+; CHECK-NEXT:    );
+; CHECK-NEXT:    } // callseq 10
+; CHECK-NEXT:    ret;
+  call void @call_v2_i16(%struct.short2 { i16 1, i16 2 })
+  %struct.ir0 = insertvalue %struct.short2 poison, i16 1, 0
+  %struct.ir1 = insertvalue %struct.short2 %struct.ir0, i16 %val, 1
+  call void @call_v2_i16(%struct.short2 %struct.ir1)
+  %struct.ri0 = insertvalue %struct.short2 poison, i16 %val, 0
+  %struct.ri1 = insertvalue %struct.short2 %struct.ri0, i16 2, 1
+  call void @call_v2_i16(%struct.short2 %struct.ri1)
+  ret void
+}
+
+define void @st_param_v2_i32(i32 %val) {
+; CHECK-LABEL: st_param_v2_i32(
+; CHECK:       {
+; CHECK-NEXT:    .reg .b32 %r<2>;
+; CHECK-EMPTY:
+; CHECK-NEXT:  // %bb.0:
+; CHECK-NEXT:    ld.param.u32 %r1, [st_param_v2_i32_param_0];
+; CHECK-NEXT:    { // callseq 11, 0
+; CHECK-NEXT:    .param .align 8 .b8 param0[8];
+; CHECK-NEXT:    st.param.v2.b32 [param0+0], {1, 2};
+; CHECK-NEXT:    call.uni
+; CHECK-NEXT:    call_v2_i32,
+; CHECK-NEXT:    (
+; CHECK-NEXT:    param0
+; CHECK-NEXT:    );
+; CHECK-NEXT:    } // callseq 11
+; CHECK-NEXT:    { // callseq 12, 0
+; CHECK-NEXT:    .param .align 8 .b8 param0[8];
+; CHECK-NEXT:    st.param.v2.b32 [param0+0], {1, %r1};
+; CHECK-NEXT:    call.uni
+; CHECK-NEXT:    call_v2_i32,
+; CHECK-NEXT:    (
+; CHECK-NEXT:    param0
+; CHECK-NEXT:    );
+; CHECK-NEXT:    } // callseq 12
+; CHECK-NEXT:    { // callseq 13, 0
+; CHECK-NEXT:    .param .align 8 .b8 param0[8];
+; CHECK-NEXT:    st.param.v2.b32 [param0+0], {%r1, 2};
+; CHECK-NEXT:    call.uni
+; CHECK-NEXT:    call_v2_i32,
+; CHECK-NEXT:    (
+; CHECK-NEXT:    param0
+; CHECK-NEXT:    );
+; CHECK-NEXT:    } // callseq 13
+; CHECK-NEXT:    ret;
+  call void @call_v2_i32(%struct.int2 { i32 1, i32 2 })
+  %struct.ir0 = insertvalue %struct.int2 poison, i32 1, 0
+  %struct.ir1 = insertvalue %struct.int2 %struct.ir0, i32 %val, 1
+  call void @call_v2_i32(%struct.int2 %struct.ir1)
+  %struct.ri0 = insertvalue %struct.int2 poison, i32 %val, 0
+  %struct.ri1 = insertvalue %struct.int2 %struct.ri0, i32 2, 1
+  call void @call_v2_i32(%struct.int2 %struct.ri1)
+  ret void
+}
+
+define void @st_param_v2_i64(i64 %val) {
+; CHECK-LABEL: st_param_v2_i64(
+; CHECK:       {
+; CHECK-NEXT:    .reg .b64 %rd<2>;
+; CHECK-EMPTY:
+; CHECK-NEXT:  // %bb.0:
+; CHECK-NEXT:    ld.param.u64 %rd1, [st_param_v2_i64_param_0];
+; CHECK-NEXT:    { // callseq 14, 0
+; CHECK-NEXT:    .param .align 16 .b8 param0[16];
+; CHECK-NEXT:    st.param.v2.b64 [param0+0], {1, 2};
+; CHECK-NEXT:    call.uni
+; CHECK-NEXT:    call_v2_i64,
+; CHECK-NEXT:    (
+; CHECK-NEXT:    param0
+; CHECK-NEXT:    );
+; CHECK-NEXT:    } // callseq 14
+; CHECK-NEXT:    { // callseq 15, 0
+; CHECK-NEXT:    .param .align 16 .b8 param0[16];
+; CHECK-NEXT:    st.param.v2.b64 [param0+0], {1, %rd1};
+; CHECK-NEXT:    call.uni
+; CHECK-NEXT:    call_v2_i64,
+; CHECK-NEXT:    (
+; CHECK-NEXT:    param0
+; CHECK-NEXT:    );
+; CHECK-NEXT:    } // callseq 15
+; CHECK-NEXT:    { // callseq 16, 0
+; CHECK-NEXT:    .param .align 16 .b8 param0[16];
+; CHECK-NEXT:    st.param.v2.b64 [param0+0], {%rd1, 2};
+; CHECK-NEXT:    call.uni
+; CHECK-NEXT:    call_v2_i64,
+; CHECK-NEXT:    (
+; CHECK-NEXT:    param0
+; CHECK-NEXT:    );
+; CHECK-NEXT:    } // callseq 16
+; CHECK-NEXT:    ret;
+  call void @call_v2_i64(%struct.longlong2 { i64 1, i64 2 })
+  %struct.ir0 = insertvalue %struct.longlong2 poison, i64 1, 0
+  %struct.ir1 = insertvalue %struct.longlong2 %struct.ir0, i64 %val, 1
+  call void @call_v2_i64(%struct.longlong2 %struct.ir1)
+  %struct.ri0 = insertvalue %struct.longlong2 poison, i64 %val, 0
+  %struct.ri1 = insertvalue %struct.longlong2 %struct.ri0, i64 2, 1
+  call void @call_v2_i64(%struct.longlong2 %struct.ri1)
+  ret void
+}
+
+define void @st_param_v2_f32(float %val) {
+; CHECK-LABEL: st_param_v2_f32(
+; CHECK:       {
+; CHECK-NEXT:    .reg .f32 %f<2>;
+; CHECK-EMPTY:
+; CHECK-NEXT:  // %bb.0:
+; CHECK-NEXT:    ld.param.f32 %f1, [st_param_v2_f32_param_0];
+; CHECK-NEXT:    { // callseq 17, 0
+; CHECK-NEXT:    .param .align 8 .b8 param0[8];
+; CHECK-NEXT:    st.param.v2.f32 [param0+0], {0f3F800000, 0f40000000};
+; CHECK-NEXT:    call.uni
+; CHECK-NEXT:    call_v2_f32,
+; CHECK-NEXT:    (
+; CHECK-NEXT:    param0
+; CHECK-NEXT:    );
+; CHECK-NEXT:    } // callseq 17
+; CHECK-NEXT:    { // callseq 18, 0
+; CHECK-NEXT:    .param .align 8 .b8 param0[8];
+; CHECK-NEXT:    st.param.v2.f32 [param0+0], {0f3F800000, %f1};
+; CHECK-NEXT:    call.uni
+; CHECK-NEXT:    call_v2_f32,
+; CHECK-NEXT:    (
+; CHECK-NEXT:    param0
+; CHECK-NEXT:    );
+; CHECK-NEXT:    } // callseq 18
+; CHECK-NEXT:    { // callseq 19, 0
+; CHECK-NEXT:    .param .align 8 .b8 param0[8];
+; CHECK-NEXT:    st.param.v2.f32 [param0+0], {%f1, 0f40000000};
+; CHECK-NEXT:    call.uni
+; CHECK-NEXT:    call_v2_f32,
+; CHECK-NEXT:    (
+; CHECK-NEXT:    param0
+; CHECK-NEXT:    );
+; CHECK-NEXT:    } // callseq 19
+; CHECK-NEXT:    ret;
+  call void @call_v2_f32(%struct.float2 { float 1.0, float 2.0 })
+  %struct.ir0 = insertvalue %struct.float2 poison, float 1.0, 0
+  %struct.ir1 = insertvalue %struct.float2 %struct.ir0, float %val, 1
+  call void @call_v2_f32(%struct.float2 %struct.ir1)
+  %struct.ri0 = insertvalue %struct.float2 poison, float %val, 0
+  %struct.ri1 = insertvalue %struct.float2 %struct.ri0, float 2.0, 1
+  call void @call_v2_f32(%struct.float2 %struct.ri1)
+  ret void
+}
+
+define void @st_param_v2_f64(double %val) {
+; CHECK-LABEL: st_param_v2_f64(
+; CHECK:       {
+; CHECK-NEXT:    .reg .f64 %fd<2>;
+; CHECK-EMPTY:
+; CHECK-NEXT:  // %bb.0:
+; CHECK-NEXT:    ld.param.f64 %fd1, [st_param_v2_f64_param_0];
+; CHECK-NEXT:    { // callseq 20, 0
+; CHECK-NEXT:    .param .align 16 .b8 param0[16];
+; CHECK-NEXT:    st.param.v2.f64 [param0+0], {0d3FF0000000000000, 0d4000000000000000};
+; CHECK-NEXT:    call.uni
+; CHECK-NEXT:    call_v2_f64,
+; CHECK-NEXT:    (
+; CHECK-NEXT:    param0
+; CHECK-NEXT:    );
+; CHECK-NEXT:    } // callseq 20
+; CHECK-NEXT:    { // callseq 21, 0
+; CHECK-NEXT:    .param .align 16 .b8 param0[16];
+; CHECK-NEXT:    st.param.v2.f64 [param0+0], {0d3FF0000000000000, %fd1};
+; CHECK-NEXT:    call.uni
+; CHECK-NEXT:    call_v2_f64,
+; CHECK-NEXT:    (
+; CHECK-NEXT:    param0
+; CHECK-NEXT:    );
+; CHECK-NEXT:    } // callseq 21
+; CHECK-NEXT:    { // callseq 22, 0
+; CHECK-NEXT:    .param .align 16 .b8 param0[16];
+; CHECK-NEXT:    st.param.v2.f64 [param0+0], {%fd1, 0d4000000000000000};
+; CHECK-NEXT:    call.uni
+; CHECK-NEXT:    call_v2_f64,
+; CHECK-NEXT:    (
+; CHECK-NEXT:    param0
+; CHECK-NEXT:    );
+; CHECK-NEXT:    } // callseq 22
+; CHECK-NEXT:    ret;
+  call void @call_v2_f64(%struct.double2 { double 1.0, double 2.0 })
+  %struct.ir0 = insertvalue %struct.double2 poison, double 1.0, 0
+  %struct.ir1 = insertvalue %struct.double2 %struct.ir0, double %val, 1
+  call void @call_v2_f64(%struct.double2 %struct.ir1)
+  %struct.ri0 = insertvalue %struct.double2 poison, double %val, 0
+  %struct.ri1 = insertvalue %struct.double2 %struct.ri0, double 2.0, 1
+  call void @call_v2_f64(%struct.double2 %struct.ri1)
+  ret void
+}
+
+declare void @call_v2_i8(%struct.char2)
+declare void @call_v2_i16(%struct.short2)
+declare void @call_v2_i32(%struct.int2)
+declare void @call_v2_i64(%struct.longlong2)
+declare void @call_v2_f32(%struct.float2)
+declare void @call_v2_f64(%struct.double2)
+
+define void @st_param_v4_i8(i8 %a, i8 %b, i8 %c, i8 %d) {
+; CHECK-LABEL: st_param_v4_i8(
+; CHECK:       {
+; CHECK-NEXT:    .reg .b16 %rs<5>;
+; CHECK-EMPTY:
+; CHECK-NEXT:  // %bb.0:
+; CHECK-NEXT:    ld.param.u8 %rs1, [st_param_v4_i8_param_0];
+; CHECK-NEXT:    { // callseq 23, 0
+; CHECK-NEXT:    .param .align 4 .b8 param0[4];
+; CHECK-NEXT:    st.param.v4.b8 [param0+0], {1, 2, 3, 4};
+; CHECK-NEXT:    call.uni
+; CHECK-NEXT:    call_v4_i8,
+; CHECK-NEXT:    (
+; CHECK-NEXT:    param0
+; CHECK-NEXT:    );
+; CHECK-NEXT:    } // callseq 23
+; CHECK-NEXT:    ld.param.u8 %rs2, [st_param_v4_i8_param_1];
+; CHECK-NEXT:    ld.param.u8 %rs3, [st_param_v4_i8_param_2];
+; CHECK-NEXT:    ld.param.u8 %rs4, [st_param_v4_i8_param_3];
+; CHECK-NEXT:    { // callseq 24, 0
+; CHECK-NEXT:    .param .align 4 .b8 param0[4];
+; CHECK-NEXT:    st.param.v4.b8 [param0+0], {1, %rs2, %rs3, %rs4};
+; CHECK-NEXT:    call.uni
+; CHECK-NEXT:    call_v4_i8,
+; CHECK-NEXT:    (
+; CHECK-NEXT:    param0
+; CHECK-NEXT:    );
+; CHECK-NEXT:    } // callseq 24
+; CHECK-NEXT:    { // callseq 25, 0
+; CHECK-NEXT:    .param .align 4 .b8 param0[4];
+; CHECK-NEXT:    st.param.v4.b8 [param0+0], {%rs1, 2, %rs3, %rs4};
+; CHECK-NEXT:    call.uni
+; CHECK-NEXT:    call_v4_i8,
+; CHECK-NEXT:    (
+; CHECK-NEXT:    param0
+; CHECK-NEXT:    );
+; CHECK-NEXT:    } // callseq 25
+; CHECK-NEXT:    { // callseq 26, 0
+; CHECK-NEXT:    .param .align 4 .b8 param0[4];
+; CHECK-NEXT:    st.param.v4.b8 [param0+0], {%rs1, %rs2, 3, %rs4};
+; CHECK-NEXT:    call.uni
+; CHECK-NEXT:    call_v4_i8,
+; CHECK-NEXT:    (
+; CHECK-NEXT:    param0
+; CHECK-NEXT:    );
+; CHECK-NEXT:    } // callseq 26
+; CHECK-NEXT:    { // callseq 27, 0
+; CHECK-NEXT:    .param .align 4 .b8 param0[4];
+; CHECK-NEXT:    st.param.v4.b8 [param0+0], {%rs1, %rs2, %rs3, 4};
+; CHECK-NEXT:    call.uni
+; CHECK-NEXT:    call_v4_i8,
+; CHECK-NEXT:    (
+; CHECK-NEXT:    param0
+; CHECK-NEXT:    );
+; CHECK-NEXT:    } // callseq 27
+; CHECK-NEXT:    { // callseq 28, 0
+; CHECK-NEXT:    .param .align 4 .b8 param0[4];
+; CHECK-NEXT:    st.param.v4.b8 [param0+0], {1, 2, %rs3, %rs4};
+; CHECK-NEXT:    call.uni
+; CHECK-NEXT:    call_v4_i8,
+; CHECK-NEXT:    (
+; CHECK-NEXT:    param0
+; CHECK-NEXT:    );
+; CHECK-NEXT:    } // callseq 28
+; CHECK-NEXT:    { // callseq 29, 0
+; CHECK-NEXT:    .param .align 4 .b8 param0[4];
+; CHECK-NEXT:    st.param.v4.b8 [param0+0], {1, %rs2, 3, %rs4};
+; CHECK-NEXT:    call.uni
+; CHECK-NEXT:    call_v4_i8,
+; CHECK-NEXT:    (
+; CHECK-NEXT:    param0
+; CHECK-NEXT:    );
+; CHECK-NEXT:    } // callseq 29
+; CHECK-NEXT:    { // callseq 30, 0
+; CHECK-NEXT:    .param .align 4 .b8 param0[4];
+; CHECK-NEXT:    st.param.v4.b8 [param0+0], {1, %rs2, %rs3, 4};
+; CHECK-NEXT:    call.uni
+; CHECK-NEXT:    call_v4_i8,
+; CHECK-NEXT:    (
+; CHECK-NEXT:    param0
+; CHECK-NEXT:    );
+; CHECK-NEXT:    } // callseq 30
+; CHECK-NEXT:    { // callseq 31, 0
+; CHECK-NEXT:    .param .align 4 .b8 param0[4];
+; CHECK-NEXT:    st.param.v4.b8 [param0+0], {%rs1, 2, 3, %rs4};
+; CHECK-NEXT:    call.uni
+; CHECK-NEXT:    call_v4_i8,
+; CHECK-NEXT:    (
+; CHECK-NEXT:    param0
+; CHECK-NEXT:    );
+; CHECK-NEXT:    } // callseq 31
+; CHECK-NEXT:    { // callseq 32, 0
+; CHECK-NEXT:    .param .align 4 .b8 param0[4];
+; CHECK-NEXT:    st.param.v4.b8 [param0+0], {%rs1, 2, %rs3, 4};
+; CHECK-NEXT:    call.uni
+; CHECK-NEXT:    call_v4_i8,
+; CHECK-NEXT:    (
+; CHECK-NEXT:    param0
+; CHECK-NEXT:    );
+; CHECK-NEXT:    } // callseq 32
+; CHECK-NEXT:    { // callseq 33, 0
+; CHECK-NEXT:    .param .align 4 .b8 param0[4];
+; CHECK-NEXT:    st.param.v4.b8 [param0+0], {%rs1, %rs2, 3, 4};
+; CHECK-NEXT:    call.uni
+; CHECK-NEXT:    call_v4_i8,
+; CHECK-NEXT:    (
+; CHECK-NEXT:    param0
+; CHECK-NEXT:    );
+; CHECK-NEXT:    } // callseq 33
+; CHECK-NEXT:    { // callseq 34, 0
+; CHECK-NEXT:    .param .align 4 .b8 param0[4];
+; CHECK-NEXT:    st.param.v4.b8 [param0+0], {1, 2, 3, %rs4};
+; CHECK-NEXT:    call.uni
+; CHECK-NEXT:    call_v4_i8,
+; CHECK-NEXT:    (
+; CHECK-NEXT:    param0
+; CHECK-NEXT:    );
+; CHECK-NEXT:    } // callseq 34
+; CHECK-NEXT:    { // callseq 35, 0
+; CHECK-NEXT:    .param .align 4 .b8 param0[4];
+; CHECK-NEXT:    st.param.v4.b8 [param0+0], {1, 2, %rs3, 4};
+; CHECK-NEXT:    call.uni
+; CHECK-NEXT:    call_v4_i8,
+; CHECK-NEXT:    (
+; CHECK-NEXT:    param0
+; CHECK-NEXT:    );
+; CHECK-NEXT:    } // callseq 35
+; CHECK-NEXT:    { // callseq 36, 0
+; CHECK-NEXT:    .param .align 4 .b8 param0[4];
+; CHECK-NEXT:    st.param.v4.b8 [param0+0], {1, %rs2, 3, 4};
+; CHECK-NEXT:    call.uni
+; CHECK-NEXT:    call_v4_i8,
+; CHECK-NEXT:    (
+; CHECK-NEXT:    param0
+; CHECK-NEXT:    );
+; CHECK-NEXT:    } // callseq 36
+; CHECK-NEXT:    { // callseq 37, 0
+; CHECK-NEXT:    .param .align 4 .b8 param0[4];
+; CHECK-NEXT:    st.param.v4.b8 [param0+0], {%rs1, 2, 3, 4};
+; CHECK-NEXT:    call.uni
+; CHECK-NEXT:    call_v4_i8,
+; CHECK-NEXT:    (
+; CHECK-NEXT:    param0
+; CHECK-NEXT:    );
+; CHECK-NEXT:    } // callseq 37
+; CHECK-NEXT:    ret;
+  call void @call_v4_i8(%struct.char4 { i8 1, i8 2, i8 3, i8 4 })
+
+  %struct.irrr0 = insertvalue %struct.char4 poison, i8 1, 0
----------------
Artem-B wrote:

It would be nice to split each call into a separate test function -- much easier to check IR vs the generated PTX.

https://github.com/llvm/llvm-project/pull/91523


More information about the llvm-commits mailing list