[llvm] [AMDGPU] Insert readfirstlane in the function returns in sgpr. (PR #135326)

Pankaj Dwivedi via llvm-commits llvm-commits at lists.llvm.org
Mon Apr 21 04:29:58 PDT 2025


================
@@ -72,3 +72,76 @@ define amdgpu_ps ptr @uniform_v_to_s_ptr(ptr inreg %x) {
   %ptr = inttoptr i32 %int to ptr
   ret ptr %ptr
 }
+
+define amdgpu_ps half @uniform_v_to_s_f16(half inreg %a, half inreg %b) {
+; GFX11-LABEL: uniform_v_to_s_f16:
+; GFX11:       ; %bb.0:
+; GFX11-NEXT:    v_max_f16_e64 v0, s0, s1
+; GFX11-NEXT:    v_cmp_o_f16_e64 vcc_lo, s0, s1
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2)
+; GFX11-NEXT:    v_cndmask_b32_e32 v0, 0x7e00, v0, vcc_lo
+; GFX11-NEXT:    ; return to shader part epilog
+  %max = call half @llvm.maximum.f16(half %a, half %b)
+  ret half %max
+}
+
+define amdgpu_ps float @uniform_v_to_s_v2f16(<2 x half> inreg %a, <2 x half> inreg %b) {
+; GFX11-LABEL: uniform_v_to_s_v2f16:
+; GFX11:       ; %bb.0:
+; GFX11-NEXT:    v_pk_max_f16 v0, s0, s1
+; GFX11-NEXT:    v_cmp_o_f16_e64 vcc_lo, s0, s1
+; GFX11-NEXT:    s_lshr_b32 s2, s1, 16
+; GFX11-NEXT:    s_lshr_b32 s0, s0, 16
+; GFX11-NEXT:    v_lshrrev_b32_e32 v1, 16, v0
+; GFX11-NEXT:    v_cndmask_b32_e32 v0, 0x7e00, v0, vcc_lo
+; GFX11-NEXT:    v_cmp_o_f16_e64 vcc_lo, s0, s2
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_4)
+; GFX11-NEXT:    v_and_b32_e32 v0, 0xffff, v0
+; GFX11-NEXT:    v_cndmask_b32_e32 v1, 0x7e00, v1, vcc_lo
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-NEXT:    v_lshl_or_b32 v0, v1, 16, v0
+; GFX11-NEXT:    ; return to shader part epilog
+  %max = call <2 x half> @llvm.maximum.f16(<2 x half> %a, <2 x half> %b)
----------------
PankajDwivedi-25 wrote:

for fp16 and float bicast resulting to vgpr

https://github.com/llvm/llvm-project/pull/135326


More information about the llvm-commits mailing list