[llvm] [NVPTX] Don't use stack memory when bitcasting to/from v2i8 (PR #113928)
via llvm-commits
llvm-commits at lists.llvm.org
Tue Oct 29 07:12:23 PDT 2024
================
@@ -0,0 +1,36 @@
+; RUN: llc < %s -mtriple=nvptx64-nvidia-cuda -mcpu=sm_90 -mattr=+ptx80 -asm-verbose=false \
+; RUN: -O0 -disable-post-ra -frame-pointer=all -verify-machineinstrs \
+; RUN: | FileCheck %s
+; RUN: %if ptxas %{ \
+; RUN: llc < %s -mtriple=nvptx64-nvidia-cuda -mcpu=sm_90 -asm-verbose=false \
+; RUN: -O0 -disable-post-ra -frame-pointer=all -verify-machineinstrs \
+; RUN: | %ptxas-verify -arch=sm_90 \
+; RUN: %}
+
+target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
+
+; CHECK-LABEL: test_trunc_2xi8(
+; CHECK: ld.param.u32 [[R1:%r[0-9]+]], [test_trunc_2xi8_param_0];
+; CHECK: mov.b32 {[[RS1:%rs[0-9]+]], [[RS2:%rs[0-9]+]]}, [[R1]];
+; CHECK: shl.b16 [[RS3:%rs[0-9]+]], [[RS2]], 8;
+; CHECK: and.b16 [[RS4:%rs[0-9]+]], [[RS1]], 255;
+; CHECK: or.b16 [[RS5:%rs[0-9]+]], [[RS4]], [[RS3]]
+; CHECK: cvt.u32.u16 [[R2:%r[0-9]]], [[RS5]]
+; CHECK: st.param.b32 [func_retval0], [[R2]];
+define i16 @test_trunc_2xi8(<2 x i16> %a) #0 {
+ %trunc = trunc <2 x i16> %a to <2 x i8>
----------------
peterbell10 wrote:
I was having issues with having v2i8 in the function signature but it seems to be working now so I've removed them.
https://github.com/llvm/llvm-project/pull/113928
More information about the llvm-commits
mailing list