[llvm] [AMDGPU][True16][CodeGen] readfirstlane for vgpr16 copy to sgpr32 (PR #118037)
Brox Chen via llvm-commits
llvm-commits at lists.llvm.org
Mon May 5 11:49:59 PDT 2025
================
@@ -0,0 +1,41 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3
+; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=+real-true16 < %s | FileCheck %s
+
+; expect readfirstlane to pick the 32bit register
+define amdgpu_gs i32 @vgpr16_copyto_sgpr(ptr addrspace(3) %a, i32 %b, ptr addrspace(1) %out) {
+; CHECK-LABEL: vgpr16_copyto_sgpr:
+; CHECK: ; %bb.0: ; %entry
+; CHECK-NEXT: ds_load_2addr_b32 v[0:1], v0 offset1:1
+; CHECK-NEXT: s_waitcnt lgkmcnt(0)
+; CHECK-NEXT: v_cvt_f16_f32_e32 v0.l, v0
+; CHECK-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
+; CHECK-NEXT: v_readfirstlane_b32 s0, v0
+; CHECK-NEXT: s_and_b32 s0, 0xffff, s0
+; CHECK-NEXT: s_mul_i32 s0, s0, 5
+; CHECK-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
+; CHECK-NEXT: s_cmp_lg_u32 s0, 2
+; CHECK-NEXT: s_cbranch_scc1 .LBB0_2
+; CHECK-NEXT: ; %bb.1: ; %a1
+; CHECK-NEXT: s_mov_b32 s0, 1
+; CHECK-NEXT: s_branch .LBB0_3
+; CHECK-NEXT: .LBB0_2: ; %a2
+; CHECK-NEXT: s_mov_b32 s0, 2
+; CHECK-NEXT: s_branch .LBB0_3
+; CHECK-NEXT: .LBB0_3:
+entry:
+ %1 = load <4 x float>, ptr addrspace(3) poison, align 4
+ %2 = extractelement <4 x float> %1, i32 0
+ %3 = fptrunc float %2 to half
+ %4 = bitcast half %3 to i16
+ %5 = zext i16 %4 to i32
+ %6 = add i32 %5, 1
+ %7 = mul i32 %6, 5
+ %8 = icmp eq i32 %7, 7
----------------
broxigarchen wrote:
updated
https://github.com/llvm/llvm-project/pull/118037
More information about the llvm-commits
mailing list