[llvm] [NVPTX] Don't use stack memory when bitcasting to/from v2i8 (PR #113928)

Justin Fargnoli via llvm-commits llvm-commits at lists.llvm.org
Mon Oct 28 23:13:23 PDT 2024


================
@@ -0,0 +1,36 @@
+; RUN: llc < %s -mtriple=nvptx64-nvidia-cuda -mcpu=sm_90 -mattr=+ptx80 -asm-verbose=false \
+; RUN:          -O0 -disable-post-ra -frame-pointer=all -verify-machineinstrs \
+; RUN: | FileCheck  %s
+; RUN: %if ptxas %{                                                           \
+; RUN:   llc < %s -mtriple=nvptx64-nvidia-cuda -mcpu=sm_90 -asm-verbose=false \
+; RUN:          -O0 -disable-post-ra -frame-pointer=all -verify-machineinstrs \
+; RUN:   | %ptxas-verify -arch=sm_90                                          \
+; RUN: %}
+
+target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
+
+; CHECK-LABEL: test_trunc_2xi8(
+; CHECK:      ld.param.u32 [[R1:%r[0-9]+]], [test_trunc_2xi8_param_0];
+; CHECK:      mov.b32 {[[RS1:%rs[0-9]+]], [[RS2:%rs[0-9]+]]}, [[R1]];
+; CHECK:      shl.b16 	[[RS3:%rs[0-9]+]], [[RS2]], 8;
+; CHECK:      and.b16  [[RS4:%rs[0-9]+]], [[RS1]], 255;
+; CHECK:      or.b16   [[RS5:%rs[0-9]+]], [[RS4]], [[RS3]]
+; CHECK:      cvt.u32.u16  [[R2:%r[0-9]]], [[RS5]]
+; CHECK:      st.param.b32  [func_retval0], [[R2]];
+define i16 @test_trunc_2xi8(<2 x i16> %a) #0 {
+  %trunc = trunc <2 x i16> %a to <2 x i8>
----------------
justinfargnoli wrote:

Why include the `trunc` in this test and the `zext` in the test below? 

https://github.com/llvm/llvm-project/pull/113928


More information about the llvm-commits mailing list