[llvm] [NVPTX] Make i16x2 a native type and add supported vec instructions (PR #65432)

Wed Sep 6 15:29:27 PDT 2023

@@ -0,0 +1,523 @@
+; ## Support i16x2 instructions
+; RUN: llc < %s -mtriple=nvptx64-nvidia-cuda -mcpu=sm_90 -mattr=+ptx80 -asm-verbose=false \
+; RUN:          -O0 -disable-post-ra -frame-pointer=all -verify-machineinstrs \
+; RUN: | FileCheck -allow-deprecated-dag-overlap -check-prefixes COMMON,I16x2 %s
+; RUN: %if ptxas %{                                                           \
+; RUN:   llc < %s -mtriple=nvptx64-nvidia-cuda -mcpu=sm_90 -asm-verbose=false \
+; RUN:          -O0 -disable-post-ra -frame-pointer=all -verify-machineinstrs \
+; RUN:   | %ptxas-verify -arch=sm_53                                          \
+; RUN: %}
+; ## No support for i16x2 instructions
+; RUN: llc < %s -mtriple=nvptx64-nvidia-cuda -mcpu=sm_53 -asm-verbose=false \
+; RUN:          -O0 -disable-post-ra -frame-pointer=all -verify-machineinstrs \
+; RUN: | FileCheck -allow-deprecated-dag-overlap -check-prefixes COMMON,NO-I16x2 %s
+; RUN: %if ptxas %{                                                           \
+; RUN:   llc < %s -mtriple=nvptx64-nvidia-cuda -mcpu=sm_53 -asm-verbose=false \
+; RUN:          -O0 -disable-post-ra -frame-pointer=all -verify-machineinstrs \
+; RUN:   | %ptxas-verify -arch=sm_53                                          \
+; RUN: %}
+target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
+; COMMON-LABEL: test_ret_const(
+; COMMON:     mov.u32         [[R:%r[0-9+]]], 131073;
+; COMMON:     st.param.b32    [func_retval0+0], [[R]];
+; COMMON-NEXT: ret;
+define <2 x i16> @test_ret_const() #0 {
+  ret <2 x i16> <i16 1, i16 2>
+; COMMON-LABEL: test_extract_0(
+; COMMON:      ld.param.u32   [[A:%r[0-9]+]], [test_extract_0_param_0];
+; COMMON:      mov.b32        {[[RS:%rs[0-9]+]], tmp}, [[A]];
+; COMMON:      cvt.u32.u16    [[R:%r[0-9]+]], [[RS]];
+; COMMON:      st.param.b32    [func_retval0+0], [[R]];
+; COMMON:      ret;
+define i16 @test_extract_0(<2 x i16> %a) #0 {
+  %e = extractelement <2 x i16> %a, i32 0
+  ret i16 %e
+; COMMON-LABEL: test_extract_1(
+; COMMON:      ld.param.u32   [[A:%r[0-9]+]], [test_extract_1_param_0];
+; COMMON:      mov.b32        {tmp, [[RS:%rs[0-9]+]]}, [[A]];
+; COMMON:      cvt.u32.u16    [[R:%r[0-9]+]], [[RS]];
+; COMMON:      st.param.b32    [func_retval0+0], [[R]];
+; COMMON:      ret;
+define i16 @test_extract_1(<2 x i16> %a) #0 {
+  %e = extractelement <2 x i16> %a, i32 1
+  ret i16 %e
+; COMMON-LABEL: test_extract_i(
+; COMMON-DAG:  ld.param.u32    [[A:%r[0-9]+]], [test_extract_i_param_0];
+; COMMON-DAG:  ld.param.u64    [[IDX:%rd[0-9]+]], [test_extract_i_param_1];
+; COMMON-DAG:  setp.eq.s64     [[PRED:%p[0-9]+]], [[IDX]], 0;
+; COMMON-DAG:  mov.b32         {[[E0:%rs[0-9]+]], [[E1:%rs[0-9]+]]}, [[A]];
+; COMMON:      selp.b16        [[RS:%rs[0-9]+]], [[E0]], [[E1]], [[PRED]];
+; COMMON:      cvt.u32.u16     [[R:%r[0-9]+]], [[RS]];
+; COMMON:      st.param.b32    [func_retval0+0], [[R]];
+; COMMON:      ret;
+define i16 @test_extract_i(<2 x i16> %a, i64 %idx) #0 {
+  %e = extractelement <2 x i16> %a, i64 %idx
+  ret i16 %e
+; COMMON-LABEL: test_add(
+; COMMON-DAG:  ld.param.u32    [[A:%r[0-9]+]], [test_add_param_0];
+; COMMON-DAG:  ld.param.u32    [[B:%r[0-9]+]], [test_add_param_1];
+; I16x2-NEXT:  add.s16x2   [[R:%r[0-9]+]], [[A]], [[B]];
+;	NO-I16x2-DAG: mov.b32 	{[[RS0:%rs[0-9]+]], [[RS1:%rs[0-9]+]]}, [[A]];
ThomasRaoux wrote:

Oops, somehow might editor was misleading me. Should be good now.


