[llvm] [NVPTX] Improve lowering of v4i8 (PR #67866)

Artem Belevich via llvm-commits llvm-commits at lists.llvm.org
Fri Oct 6 14:06:32 PDT 2023

@@ -0,0 +1,1248 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3
+; ## Support i16x2 instructions
+; RUN: llc < %s -mtriple=nvptx64-nvidia-cuda -mcpu=sm_90 -mattr=+ptx80 \
+; RUN:          -O0 -disable-post-ra -frame-pointer=all -verify-machineinstrs \
+; RUN: | FileCheck -allow-deprecated-dag-overlap %s
+; RUN: %if ptxas %{                                                           \
+; RUN:   llc < %s -mtriple=nvptx64-nvidia-cuda -mcpu=sm_90 \
+; RUN:          -O0 -disable-post-ra -frame-pointer=all -verify-machineinstrs \
+; RUN:   | %ptxas-verify -arch=sm_90                                          \
+; RUN: %}
+target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
+define <4 x i8> @test_ret_const() #0 {
+; CHECK-LABEL: test_ret_const(
+; CHECK:       {
+; CHECK-NEXT:    .reg .b32 %r<3>;
+; CHECK-NEXT:  // %bb.0:
+; CHECK-NEXT:    mov.u32 %r1, 67305985;
+; CHECK-NEXT:    st.param.b32 [func_retval0+0], %r1;
+; CHECK-NEXT:    ret;
+  ret <4 x i8> <i8 1, i8 2, i8 3, i8 4>
+define i8 @test_extract_0(<4 x i8> %a) #0 {
+; CHECK-LABEL: test_extract_0(
+; CHECK:       {
+; CHECK-NEXT:    .reg .b32 %r<3>;
+; CHECK-NEXT:  // %bb.0:
+; CHECK-NEXT:    ld.param.u32 %r1, [test_extract_0_param_0];
Artem-B wrote:

In case you're referring to `test_extract_0_param_0`, it's the function parameter name, so yes, checking for it is expected.


