[llvm] [NVPTX] Make i16x2 a native type and add supported vec instructions (PR #65432)

Wed Sep 6 11:52:27 PDT 2023

================
@@ -0,0 +1,523 @@
+; ## Support i16x2 instructions
+; RUN: llc < %s -mtriple=nvptx64-nvidia-cuda -mcpu=sm_90 -mattr=+ptx80 -asm-verbose=false \
+; RUN:          -O0 -disable-post-ra -frame-pointer=all -verify-machineinstrs \
+; RUN: | FileCheck -allow-deprecated-dag-overlap -check-prefixes CHECK,CHECK-I16x2 %s
+; RUN: %if ptxas %{                                                           \
+; RUN:   llc < %s -mtriple=nvptx64-nvidia-cuda -mcpu=sm_90 -asm-verbose=false \
+; RUN:          -O0 -disable-post-ra -frame-pointer=all -verify-machineinstrs \
+; RUN:   | %ptxas-verify -arch=sm_53                                          \
+; RUN: %}
+; ## No support for i16x2 instructions
+; RUN: llc < %s -mtriple=nvptx64-nvidia-cuda -mcpu=sm_53 -asm-verbose=false \
+; RUN:          -O0 -disable-post-ra -frame-pointer=all -verify-machineinstrs \
+; RUN: | FileCheck -allow-deprecated-dag-overlap -check-prefixes CHECK,CHECK-NOI16x2 %s
+; RUN: %if ptxas %{                                                           \
+; RUN:   llc < %s -mtriple=nvptx64-nvidia-cuda -mcpu=sm_53 -asm-verbose=false \
+; RUN:          -O0 -disable-post-ra -frame-pointer=all -verify-machineinstrs \
+; RUN:   | %ptxas-verify -arch=sm_53                                          \
+; RUN: %}
+
+target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
+
+; CHECK-LABEL: test_ret_const(
+; CHECK:     mov.u32         [[R:%r[0-9+]]], 131073;
+; CHECK:     st.param.b32    [func_retval0+0], [[R]];
+; CHECK-NEXT: ret;
+define <2 x i16> @test_ret_const() #0 {
+  ret <2 x i16> <i16 1, i16 2>
+}
+
+; CHECK-LABEL: test_extract_0(
+; CHECK:      ld.param.u32   [[A:%r[0-9]+]], [test_extract_0_param_0];
+; CHECK:      mov.b32        {[[RS:%rs[0-9]+]], tmp}, [[A]];
+; CHECK:      cvt.u32.u16    [[R:%r[0-9]+]], [[RS]];
+; CHECK:      st.param.b32    [func_retval0+0], [[R]];
+; CHECK:      ret;
+define i16 @test_extract_0(<2 x i16> %a) #0 {
+  %e = extractelement <2 x i16> %a, i32 0
+  ret i16 %e
+}
+
+; CHECK-LABEL: test_extract_1(
+; CHECK:      ld.param.u32   [[A:%r[0-9]+]], [test_extract_1_param_0];
+; CHECK:      mov.b32        {tmp, [[RS:%rs[0-9]+]]}, [[A]];
+; CHECK:      cvt.u32.u16    [[R:%r[0-9]+]], [[RS]];
+; CHECK:      st.param.b32    [func_retval0+0], [[R]];
+; CHECK:      ret;
+define i16 @test_extract_1(<2 x i16> %a) #0 {
+  %e = extractelement <2 x i16> %a, i32 1
+  ret i16 %e
+}
+
+; CHECK-LABEL: test_extract_i(
+; CHECK-DAG:  ld.param.u32    [[A:%r[0-9]+]], [test_extract_i_param_0];
+; CHECK-DAG:  ld.param.u64    [[IDX:%rd[0-9]+]], [test_extract_i_param_1];
+; CHECK-DAG:  setp.eq.s64     [[PRED:%p[0-9]+]], [[IDX]], 0;
+; CHECK-DAG:  mov.b32         {[[E0:%rs[0-9]+]], [[E1:%rs[0-9]+]]}, [[A]];
+; CHECK:      selp.b16        [[RS:%rs[0-9]+]], [[E0]], [[E1]], [[PRED]];
+; CHECK:      cvt.u32.u16     [[R:%r[0-9]+]], [[RS]];
+; CHECK:      st.param.b32    [func_retval0+0], [[R]];
+; CHECK:      ret;
+define i16 @test_extract_i(<2 x i16> %a, i64 %idx) #0 {
+  %e = extractelement <2 x i16> %a, i64 %idx
+  ret i16 %e
+}
+
+; CHECK-LABEL: test_add(
+; CHECK-DAG:  ld.param.u32    [[A:%r[0-9]+]], [test_add_param_0];
+; CHECK-DAG:  ld.param.u32    [[B:%r[0-9]+]], [test_add_param_1];
+;
+; CHECK-I16x2-NEXT:  add.s16x2   [[R:%r[0-9]+]], [[A]], [[B]];
+;
+;	CHECK-NOI16x2-DAG: mov.b32 	{[[RS0:%rs[0-9]+]], [[RS1:%rs[0-9]+]]}, [[A]];
----------------
Artem-B wrote:

Is indentation here intentional? The check prefix itself provides sufficient clue w/o extra spaces.

Speaking of the CHECK prefixes. I'd rename them:
CHECK->COMMON
CHECK-I16x2 -> I16x2
CHECK-NOI16x2 -> NO-I16x2

https://github.com/llvm/llvm-project/pull/65432