[llvm] [NVPTX] Make i16x2 a native type and add supported vec instructions (PR #65432)
via llvm-commits
llvm-commits at lists.llvm.org
Wed Sep 6 14:16:48 PDT 2023
================
@@ -0,0 +1,523 @@
+; ## Support i16x2 instructions
+; RUN: llc < %s -mtriple=nvptx64-nvidia-cuda -mcpu=sm_90 -mattr=+ptx80 -asm-verbose=false \
+; RUN: -O0 -disable-post-ra -frame-pointer=all -verify-machineinstrs \
+; RUN: | FileCheck -allow-deprecated-dag-overlap -check-prefixes CHECK,CHECK-I16x2 %s
+; RUN: %if ptxas %{ \
+; RUN: llc < %s -mtriple=nvptx64-nvidia-cuda -mcpu=sm_90 -asm-verbose=false \
+; RUN: -O0 -disable-post-ra -frame-pointer=all -verify-machineinstrs \
+; RUN: | %ptxas-verify -arch=sm_53 \
+; RUN: %}
+; ## No support for i16x2 instructions
+; RUN: llc < %s -mtriple=nvptx64-nvidia-cuda -mcpu=sm_53 -asm-verbose=false \
+; RUN: -O0 -disable-post-ra -frame-pointer=all -verify-machineinstrs \
+; RUN: | FileCheck -allow-deprecated-dag-overlap -check-prefixes CHECK,CHECK-NOI16x2 %s
+; RUN: %if ptxas %{ \
+; RUN: llc < %s -mtriple=nvptx64-nvidia-cuda -mcpu=sm_53 -asm-verbose=false \
+; RUN: -O0 -disable-post-ra -frame-pointer=all -verify-machineinstrs \
+; RUN: | %ptxas-verify -arch=sm_53 \
+; RUN: %}
+
+target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
+
+; CHECK-LABEL: test_ret_const(
+; CHECK: mov.u32 [[R:%r[0-9+]]], 131073;
+; CHECK: st.param.b32 [func_retval0+0], [[R]];
+; CHECK-NEXT: ret;
+define <2 x i16> @test_ret_const() #0 {
+ ret <2 x i16> <i16 1, i16 2>
+}
+
+; CHECK-LABEL: test_extract_0(
+; CHECK: ld.param.u32 [[A:%r[0-9]+]], [test_extract_0_param_0];
+; CHECK: mov.b32 {[[RS:%rs[0-9]+]], tmp}, [[A]];
+; CHECK: cvt.u32.u16 [[R:%r[0-9]+]], [[RS]];
+; CHECK: st.param.b32 [func_retval0+0], [[R]];
+; CHECK: ret;
+define i16 @test_extract_0(<2 x i16> %a) #0 {
+ %e = extractelement <2 x i16> %a, i32 0
+ ret i16 %e
+}
+
+; CHECK-LABEL: test_extract_1(
+; CHECK: ld.param.u32 [[A:%r[0-9]+]], [test_extract_1_param_0];
+; CHECK: mov.b32 {tmp, [[RS:%rs[0-9]+]]}, [[A]];
+; CHECK: cvt.u32.u16 [[R:%r[0-9]+]], [[RS]];
+; CHECK: st.param.b32 [func_retval0+0], [[R]];
+; CHECK: ret;
+define i16 @test_extract_1(<2 x i16> %a) #0 {
+ %e = extractelement <2 x i16> %a, i32 1
+ ret i16 %e
+}
+
+; CHECK-LABEL: test_extract_i(
+; CHECK-DAG: ld.param.u32 [[A:%r[0-9]+]], [test_extract_i_param_0];
+; CHECK-DAG: ld.param.u64 [[IDX:%rd[0-9]+]], [test_extract_i_param_1];
+; CHECK-DAG: setp.eq.s64 [[PRED:%p[0-9]+]], [[IDX]], 0;
+; CHECK-DAG: mov.b32 {[[E0:%rs[0-9]+]], [[E1:%rs[0-9]+]]}, [[A]];
+; CHECK: selp.b16 [[RS:%rs[0-9]+]], [[E0]], [[E1]], [[PRED]];
+; CHECK: cvt.u32.u16 [[R:%r[0-9]+]], [[RS]];
+; CHECK: st.param.b32 [func_retval0+0], [[R]];
+; CHECK: ret;
+define i16 @test_extract_i(<2 x i16> %a, i64 %idx) #0 {
+ %e = extractelement <2 x i16> %a, i64 %idx
+ ret i16 %e
+}
+
+; CHECK-LABEL: test_add(
+; CHECK-DAG: ld.param.u32 [[A:%r[0-9]+]], [test_add_param_0];
+; CHECK-DAG: ld.param.u32 [[B:%r[0-9]+]], [test_add_param_1];
+;
+; CHECK-I16x2-NEXT: add.s16x2 [[R:%r[0-9]+]], [[A]], [[B]];
+;
+; CHECK-NOI16x2-DAG: mov.b32 {[[RS0:%rs[0-9]+]], [[RS1:%rs[0-9]+]]}, [[A]];
----------------
ThomasRaoux wrote:
Removed the indentation and changed the prefix names
https://github.com/llvm/llvm-project/pull/65432
More information about the llvm-commits
mailing list