[llvm] select v_sat_pk from 2 i16 (PR #121124)
via llvm-commits
llvm-commits at lists.llvm.org
Wed Dec 25 21:51:17 PST 2024
================
@@ -587,5 +587,44 @@ define <2 x i16> @vec_smin_smax(<2 x i16> %src) {
%src.clamp = call <2 x i16> @llvm.smax.v2i16(<2 x i16> %src.min, <2 x i16> <i16 0, i16 0>)
ret <2 x i16> %src.clamp
}
+define i16 @basic_smax_smin_bitop(i16 %src0, i16 %src1) {
+; SDAG-VI-LABEL: basic_smax_smin_bitop:
+; SDAG-VI: ; %bb.0:
+; SDAG-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SDAG-VI-NEXT: v_max_i16_e32 v0, 0, v0
+; SDAG-VI-NEXT: v_max_i16_e32 v1, 0, v1
+; SDAG-VI-NEXT: v_mov_b32_e32 v2, 0xff
+; SDAG-VI-NEXT: v_min_i16_e32 v0, 0xff, v0
+; SDAG-VI-NEXT: v_min_i16_sdwa v1, v1, v2 dst_sel:BYTE_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; SDAG-VI-NEXT: v_or_b32_e32 v0, v0, v1
+; SDAG-VI-NEXT: s_setpc_b64 s[30:31]
+;
+; GISEL-GFX9-LABEL: basic_smax_smin_bitop:
+; GISEL-GFX9: ; %bb.0:
+; GISEL-GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-GFX9-NEXT: v_and_b32_e32 v0, 0xffff, v0
+; GISEL-GFX9-NEXT: v_lshl_or_b32 v0, v1, 16, v0
+; GISEL-GFX9-NEXT: v_sat_pk_u8_i16_e32 v0, v0
+; GISEL-GFX9-NEXT: s_setpc_b64 s[30:31]
+;
+; GISEL-GFX11-LABEL: basic_smax_smin_bitop:
+; GISEL-GFX11: ; %bb.0:
+; GISEL-GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-GFX11-NEXT: v_med3_i16 v1, v1, 0, 0xff
+; GISEL-GFX11-NEXT: v_med3_i16 v0, v0, 0, 0xff
+; GISEL-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GISEL-GFX11-NEXT: v_lshlrev_b16 v1, 8, v1
+; GISEL-GFX11-NEXT: v_or_b32_e32 v0, v0, v1
+; GISEL-GFX11-NEXT: s_setpc_b64 s[30:31]
+
+ %src0.max = call i16 @llvm.smax.i16(i16 %src0, i16 0)
+ %src0.clamp = call i16 @llvm.smin.i16(i16 %src0.max, i16 255)
+ %src1.max = call i16 @llvm.smax.i16(i16 %src1, i16 0)
+ %src1.clamp = call i16 @llvm.smin.i16(i16 %src1.max, i16 255)
+ %src0.and = and i16 %src0.clamp, 255
+ %src1.shl = shl i16 %src1.clamp, 8
+ %or = or i16 %src0.and, %src1.shl
+ ret i16 %or
+}
----------------
Shoreshen wrote:
Hi it seems like AMDGPU backend doesn't support v2i8........
by returning v2i8, it will return two i16, and due to the existing of the other i16, one of the med instruction will remain.....
https://github.com/llvm/llvm-project/pull/121124
More information about the llvm-commits
mailing list