[llvm] fd85ffb - [AMDGPU] Handle min/max in isNarrowingProfitable (#140206)
via llvm-commits
llvm-commits at lists.llvm.org
Fri May 16 01:16:49 PDT 2025
Author: Pierre van Houtryve
Date: 2025-05-16T10:16:44+02:00
New Revision: fd85ffb4c43be80c179145a382d78a7c9ca3c1b7
URL: https://github.com/llvm/llvm-project/commit/fd85ffb4c43be80c179145a382d78a7c9ca3c1b7
DIFF: https://github.com/llvm/llvm-project/commit/fd85ffb4c43be80c179145a382d78a7c9ca3c1b7.diff
LOG: [AMDGPU] Handle min/max in isNarrowingProfitable (#140206)
Introduces a slight regression in some cases but it'll even out once we
disable the promotion in CGP.
Added:
Modified:
llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
llvm/test/CodeGen/AMDGPU/min.ll
llvm/test/CodeGen/AMDGPU/v_sat_pk_u8_i16.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
index 8b93ed342c64a..7ed055e8da2b6 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
@@ -1042,6 +1042,10 @@ bool AMDGPUTargetLowering::isNarrowingProfitable(SDNode *N, EVT SrcVT,
case ISD::MUL:
case ISD::SETCC:
case ISD::SELECT:
+ case ISD::SMIN:
+ case ISD::SMAX:
+ case ISD::UMIN:
+ case ISD::UMAX:
if (Subtarget->has16BitInsts() &&
(!DestVT.isVector() || !Subtarget->hasVOP3PInsts())) {
// Don't narrow back down to i16 if promoted to i32 already.
diff --git a/llvm/test/CodeGen/AMDGPU/min.ll b/llvm/test/CodeGen/AMDGPU/min.ll
index d2f4f54cefe78..201b85c745c18 100644
--- a/llvm/test/CodeGen/AMDGPU/min.ll
+++ b/llvm/test/CodeGen/AMDGPU/min.ll
@@ -649,36 +649,35 @@ define amdgpu_kernel void @s_test_imin_sle_v4i8(ptr addrspace(1) %out, [8 x i32]
;
; GFX9-LABEL: s_test_imin_sle_v4i8:
; GFX9: ; %bb.0:
-; GFX9-NEXT: s_load_dword s3, s[8:9], 0x4c
; GFX9-NEXT: s_load_dword s2, s[8:9], 0x28
+; GFX9-NEXT: s_load_dword s3, s[8:9], 0x4c
; GFX9-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0
; GFX9-NEXT: v_mov_b32_e32 v0, 0
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
-; GFX9-NEXT: s_lshr_b32 s5, s2, 16
-; GFX9-NEXT: s_lshr_b32 s8, s3, 16
-; GFX9-NEXT: s_ashr_i32 s9, s3, 24
-; GFX9-NEXT: s_ashr_i32 s6, s2, 24
-; GFX9-NEXT: s_bfe_i32 s8, s8, 0x80000
-; GFX9-NEXT: v_mov_b32_e32 v1, s9
-; GFX9-NEXT: s_bfe_i32 s5, s5, 0x80000
+; GFX9-NEXT: s_sext_i32_i16 s5, s2
; GFX9-NEXT: s_sext_i32_i16 s7, s3
-; GFX9-NEXT: v_min_i16_e32 v1, s6, v1
-; GFX9-NEXT: v_mov_b32_e32 v2, s8
-; GFX9-NEXT: s_sext_i32_i16 s4, s2
-; GFX9-NEXT: s_lshr_b32 s7, s7, 8
-; GFX9-NEXT: v_lshlrev_b32_e32 v1, 8, v1
-; GFX9-NEXT: v_min_i16_e32 v2, s5, v2
-; GFX9-NEXT: s_lshr_b32 s4, s4, 8
-; GFX9-NEXT: s_bfe_i32 s3, s3, 0x80000
-; GFX9-NEXT: v_or_b32_sdwa v1, v2, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
-; GFX9-NEXT: v_mov_b32_e32 v2, s7
-; GFX9-NEXT: s_bfe_i32 s2, s2, 0x80000
-; GFX9-NEXT: v_min_i16_e32 v2, s4, v2
-; GFX9-NEXT: v_mov_b32_e32 v3, s3
-; GFX9-NEXT: v_lshlrev_b32_e32 v2, 8, v2
-; GFX9-NEXT: v_min_i16_e32 v3, s2, v3
-; GFX9-NEXT: v_or_b32_sdwa v2, v3, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
-; GFX9-NEXT: v_or_b32_sdwa v1, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
+; GFX9-NEXT: s_ashr_i32 s7, s7, 8
+; GFX9-NEXT: s_ashr_i32 s5, s5, 8
+; GFX9-NEXT: s_ashr_i32 s4, s2, 24
+; GFX9-NEXT: s_ashr_i32 s6, s3, 24
+; GFX9-NEXT: s_min_i32 s5, s5, s7
+; GFX9-NEXT: s_sext_i32_i8 s7, s3
+; GFX9-NEXT: s_sext_i32_i8 s8, s2
+; GFX9-NEXT: s_bfe_i32 s3, s3, 0x80010
+; GFX9-NEXT: s_bfe_i32 s2, s2, 0x80010
+; GFX9-NEXT: s_min_i32 s7, s8, s7
+; GFX9-NEXT: s_min_i32 s4, s4, s6
+; GFX9-NEXT: s_min_i32 s2, s2, s3
+; GFX9-NEXT: s_lshl_b32 s5, s5, 8
+; GFX9-NEXT: s_and_b32 s7, s7, 0xff
+; GFX9-NEXT: s_lshl_b32 s4, s4, 8
+; GFX9-NEXT: s_and_b32 s2, s2, 0xff
+; GFX9-NEXT: s_or_b32 s5, s7, s5
+; GFX9-NEXT: s_or_b32 s2, s2, s4
+; GFX9-NEXT: s_and_b32 s5, s5, 0xffff
+; GFX9-NEXT: s_lshl_b32 s2, s2, 16
+; GFX9-NEXT: s_or_b32 s2, s5, s2
+; GFX9-NEXT: v_mov_b32_e32 v1, s2
; GFX9-NEXT: global_store_dword v0, v1, s[0:1]
; GFX9-NEXT: s_endpgm
;
@@ -688,111 +687,70 @@ define amdgpu_kernel void @s_test_imin_sle_v4i8(ptr addrspace(1) %out, [8 x i32]
; GFX10-NEXT: s_load_dword s2, s[8:9], 0x28
; GFX10-NEXT: s_load_dword s3, s[8:9], 0x4c
; GFX10-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0
+; GFX10-NEXT: v_mov_b32_e32 v0, 0
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
-; GFX10-NEXT: s_sext_i32_i16 s4, s2
+; GFX10-NEXT: s_sext_i32_i16 s5, s2
; GFX10-NEXT: s_sext_i32_i16 s7, s3
-; GFX10-NEXT: s_ashr_i32 s6, s2, 24
-; GFX10-NEXT: s_ashr_i32 s9, s3, 24
-; GFX10-NEXT: s_lshr_b32 s4, s4, 8
-; GFX10-NEXT: s_lshr_b32 s7, s7, 8
-; GFX10-NEXT: v_min_i16 v0, s6, s9
-; GFX10-NEXT: v_min_i16 v1, s4, s7
-; GFX10-NEXT: s_lshr_b32 s5, s2, 16
-; GFX10-NEXT: s_lshr_b32 s8, s3, 16
-; GFX10-NEXT: s_bfe_i32 s2, s2, 0x80000
-; GFX10-NEXT: s_bfe_i32 s5, s5, 0x80000
-; GFX10-NEXT: s_bfe_i32 s4, s8, 0x80000
-; GFX10-NEXT: s_bfe_i32 s3, s3, 0x80000
-; GFX10-NEXT: v_min_i16 v2, s5, s4
-; GFX10-NEXT: v_lshlrev_b32_e32 v0, 8, v0
-; GFX10-NEXT: v_lshlrev_b32_e32 v1, 8, v1
-; GFX10-NEXT: v_min_i16 v3, s2, s3
-; GFX10-NEXT: v_or_b32_sdwa v0, v2, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
-; GFX10-NEXT: v_mov_b32_e32 v2, 0
-; GFX10-NEXT: v_or_b32_sdwa v1, v3, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
-; GFX10-NEXT: v_or_b32_sdwa v0, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
-; GFX10-NEXT: global_store_dword v2, v0, s[0:1]
+; GFX10-NEXT: s_ashr_i32 s4, s2, 24
+; GFX10-NEXT: s_ashr_i32 s6, s3, 24
+; GFX10-NEXT: s_sext_i32_i8 s8, s3
+; GFX10-NEXT: s_sext_i32_i8 s9, s2
+; GFX10-NEXT: s_bfe_i32 s3, s3, 0x80010
+; GFX10-NEXT: s_bfe_i32 s2, s2, 0x80010
+; GFX10-NEXT: s_ashr_i32 s7, s7, 8
+; GFX10-NEXT: s_ashr_i32 s5, s5, 8
+; GFX10-NEXT: s_min_i32 s8, s9, s8
+; GFX10-NEXT: s_min_i32 s4, s4, s6
+; GFX10-NEXT: s_min_i32 s2, s2, s3
+; GFX10-NEXT: s_min_i32 s3, s5, s7
+; GFX10-NEXT: s_and_b32 s5, s8, 0xff
+; GFX10-NEXT: s_lshl_b32 s4, s4, 8
+; GFX10-NEXT: s_lshl_b32 s3, s3, 8
+; GFX10-NEXT: s_and_b32 s2, s2, 0xff
+; GFX10-NEXT: s_or_b32 s3, s5, s3
+; GFX10-NEXT: s_or_b32 s2, s2, s4
+; GFX10-NEXT: s_and_b32 s3, s3, 0xffff
+; GFX10-NEXT: s_lshl_b32 s2, s2, 16
+; GFX10-NEXT: s_or_b32 s2, s3, s2
+; GFX10-NEXT: v_mov_b32_e32 v1, s2
+; GFX10-NEXT: global_store_dword v0, v1, s[0:1]
; GFX10-NEXT: s_endpgm
;
-; GFX11-TRUE16-LABEL: s_test_imin_sle_v4i8:
-; GFX11-TRUE16: ; %bb.0:
-; GFX11-TRUE16-NEXT: s_clause 0x1
-; GFX11-TRUE16-NEXT: s_load_b32 s0, s[4:5], 0x28
-; GFX11-TRUE16-NEXT: s_load_b32 s1, s[4:5], 0x4c
-; GFX11-TRUE16-NEXT: s_waitcnt lgkmcnt(0)
-; GFX11-TRUE16-NEXT: s_sext_i32_i16 s2, s0
-; GFX11-TRUE16-NEXT: s_lshr_b32 s3, s0, 16
-; GFX11-TRUE16-NEXT: s_sext_i32_i16 s7, s1
-; GFX11-TRUE16-NEXT: s_lshr_b32 s8, s1, 16
-; GFX11-TRUE16-NEXT: s_ashr_i32 s6, s0, 24
-; GFX11-TRUE16-NEXT: s_ashr_i32 s9, s1, 24
-; GFX11-TRUE16-NEXT: s_lshr_b32 s2, s2, 8
-; GFX11-TRUE16-NEXT: s_bfe_i32 s3, s3, 0x80000
-; GFX11-TRUE16-NEXT: s_bfe_i32 s0, s0, 0x80000
-; GFX11-TRUE16-NEXT: s_lshr_b32 s7, s7, 8
-; GFX11-TRUE16-NEXT: s_bfe_i32 s8, s8, 0x80000
-; GFX11-TRUE16-NEXT: s_bfe_i32 s1, s1, 0x80000
-; GFX11-TRUE16-NEXT: v_min_i16 v0.l, s6, s9
-; GFX11-TRUE16-NEXT: v_min_i16 v1.l, s3, s8
-; GFX11-TRUE16-NEXT: v_min_i16 v2.l, s2, s7
-; GFX11-TRUE16-NEXT: v_min_i16 v3.l, s0, s1
-; GFX11-TRUE16-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v0, 8, v0
-; GFX11-TRUE16-NEXT: v_and_b32_e32 v1, 0xff, v1
-; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v2, 8, v2
-; GFX11-TRUE16-NEXT: v_and_b32_e32 v3, 0xff, v3
-; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-TRUE16-NEXT: v_or_b32_e32 v0, v1, v0
-; GFX11-TRUE16-NEXT: v_or_b32_e32 v1, v3, v2
-; GFX11-TRUE16-NEXT: v_mov_b32_e32 v2, 0
-; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
-; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v0, 16, v0
-; GFX11-TRUE16-NEXT: v_and_b32_e32 v1, 0xffff, v1
-; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-TRUE16-NEXT: v_or_b32_e32 v0, v1, v0
-; GFX11-TRUE16-NEXT: s_waitcnt lgkmcnt(0)
-; GFX11-TRUE16-NEXT: global_store_b32 v2, v0, s[0:1]
-; GFX11-TRUE16-NEXT: s_endpgm
-;
-; GFX11-FAKE16-LABEL: s_test_imin_sle_v4i8:
-; GFX11-FAKE16: ; %bb.0:
-; GFX11-FAKE16-NEXT: s_clause 0x1
-; GFX11-FAKE16-NEXT: s_load_b32 s0, s[4:5], 0x28
-; GFX11-FAKE16-NEXT: s_load_b32 s1, s[4:5], 0x4c
-; GFX11-FAKE16-NEXT: s_waitcnt lgkmcnt(0)
-; GFX11-FAKE16-NEXT: s_sext_i32_i16 s2, s0
-; GFX11-FAKE16-NEXT: s_lshr_b32 s3, s0, 16
-; GFX11-FAKE16-NEXT: s_sext_i32_i16 s7, s1
-; GFX11-FAKE16-NEXT: s_lshr_b32 s8, s1, 16
-; GFX11-FAKE16-NEXT: s_ashr_i32 s6, s0, 24
-; GFX11-FAKE16-NEXT: s_bfe_i32 s0, s0, 0x80000
-; GFX11-FAKE16-NEXT: s_ashr_i32 s9, s1, 24
-; GFX11-FAKE16-NEXT: s_bfe_i32 s1, s1, 0x80000
-; GFX11-FAKE16-NEXT: s_lshr_b32 s2, s2, 8
-; GFX11-FAKE16-NEXT: s_bfe_i32 s3, s3, 0x80000
-; GFX11-FAKE16-NEXT: s_lshr_b32 s7, s7, 8
-; GFX11-FAKE16-NEXT: s_bfe_i32 s8, s8, 0x80000
-; GFX11-FAKE16-NEXT: v_min_i16 v0, s6, s9
-; GFX11-FAKE16-NEXT: v_min_i16 v1, s0, s1
-; GFX11-FAKE16-NEXT: v_min_i16 v2, s3, s8
-; GFX11-FAKE16-NEXT: v_min_i16 v3, s2, s7
-; GFX11-FAKE16-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v0, 8, v0
-; GFX11-FAKE16-NEXT: v_and_b32_e32 v1, 0xff, v1
-; GFX11-FAKE16-NEXT: v_and_b32_e32 v2, 0xff, v2
-; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v3, 8, v3
-; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-FAKE16-NEXT: v_or_b32_e32 v0, v2, v0
-; GFX11-FAKE16-NEXT: v_or_b32_e32 v1, v1, v3
-; GFX11-FAKE16-NEXT: v_mov_b32_e32 v2, 0
-; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
-; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v0, 16, v0
-; GFX11-FAKE16-NEXT: v_and_b32_e32 v1, 0xffff, v1
-; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-FAKE16-NEXT: v_or_b32_e32 v0, v1, v0
-; GFX11-FAKE16-NEXT: s_waitcnt lgkmcnt(0)
-; GFX11-FAKE16-NEXT: global_store_b32 v2, v0, s[0:1]
-; GFX11-FAKE16-NEXT: s_endpgm
+; GFX11-LABEL: s_test_imin_sle_v4i8:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_clause 0x2
+; GFX11-NEXT: s_load_b32 s2, s[4:5], 0x28
+; GFX11-NEXT: s_load_b32 s3, s[4:5], 0x4c
+; GFX11-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX11-NEXT: v_mov_b32_e32 v0, 0
+; GFX11-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-NEXT: s_sext_i32_i16 s5, s2
+; GFX11-NEXT: s_sext_i32_i16 s7, s3
+; GFX11-NEXT: s_ashr_i32 s4, s2, 24
+; GFX11-NEXT: s_ashr_i32 s6, s3, 24
+; GFX11-NEXT: s_sext_i32_i8 s8, s3
+; GFX11-NEXT: s_sext_i32_i8 s9, s2
+; GFX11-NEXT: s_bfe_i32 s3, s3, 0x80010
+; GFX11-NEXT: s_bfe_i32 s2, s2, 0x80010
+; GFX11-NEXT: s_ashr_i32 s7, s7, 8
+; GFX11-NEXT: s_ashr_i32 s5, s5, 8
+; GFX11-NEXT: s_min_i32 s8, s9, s8
+; GFX11-NEXT: s_min_i32 s4, s4, s6
+; GFX11-NEXT: s_min_i32 s2, s2, s3
+; GFX11-NEXT: s_min_i32 s3, s5, s7
+; GFX11-NEXT: s_and_b32 s5, s8, 0xff
+; GFX11-NEXT: s_lshl_b32 s4, s4, 8
+; GFX11-NEXT: s_lshl_b32 s3, s3, 8
+; GFX11-NEXT: s_and_b32 s2, s2, 0xff
+; GFX11-NEXT: s_or_b32 s3, s5, s3
+; GFX11-NEXT: s_or_b32 s2, s2, s4
+; GFX11-NEXT: s_and_b32 s3, s3, 0xffff
+; GFX11-NEXT: s_lshl_b32 s2, s2, 16
+; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
+; GFX11-NEXT: s_or_b32 s2, s3, s2
+; GFX11-NEXT: v_mov_b32_e32 v1, s2
+; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-NEXT: s_endpgm
%cmp = icmp sle <4 x i8> %a, %b
%val = select <4 x i1> %cmp, <4 x i8> %a, <4 x i8> %b
store <4 x i8> %val, ptr addrspace(1) %out
diff --git a/llvm/test/CodeGen/AMDGPU/v_sat_pk_u8_i16.ll b/llvm/test/CodeGen/AMDGPU/v_sat_pk_u8_i16.ll
index 86fc0ace2c43f..6ab3022a91cd7 100644
--- a/llvm/test/CodeGen/AMDGPU/v_sat_pk_u8_i16.ll
+++ b/llvm/test/CodeGen/AMDGPU/v_sat_pk_u8_i16.ll
@@ -162,10 +162,11 @@ define amdgpu_kernel void @basic_smax_smin_sgpr(ptr addrspace(1) %out, i32 inreg
; SDAG-VI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
; SDAG-VI-NEXT: v_mov_b32_e32 v0, 0xff
; SDAG-VI-NEXT: s_waitcnt lgkmcnt(0)
-; SDAG-VI-NEXT: v_max_i16_e64 v1, s2, 0
-; SDAG-VI-NEXT: v_max_i16_e64 v2, s3, 0
-; SDAG-VI-NEXT: v_min_i16_sdwa v0, v2, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
-; SDAG-VI-NEXT: v_min_i16_e32 v1, 0xff, v1
+; SDAG-VI-NEXT: s_sext_i32_i16 s2, s2
+; SDAG-VI-NEXT: s_sext_i32_i16 s3, s3
+; SDAG-VI-NEXT: v_med3_i32 v1, s2, 0, v0
+; SDAG-VI-NEXT: v_med3_i32 v0, s3, 0, v0
+; SDAG-VI-NEXT: v_lshlrev_b32_e32 v0, 16, v0
; SDAG-VI-NEXT: v_or_b32_e32 v2, v1, v0
; SDAG-VI-NEXT: v_mov_b32_e32 v0, s0
; SDAG-VI-NEXT: v_mov_b32_e32 v1, s1
@@ -689,12 +690,12 @@ define amdgpu_kernel void @vec_smax_smin_sgpr(ptr addrspace(1) %out, <2 x i16> i
; SDAG-VI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
; SDAG-VI-NEXT: v_mov_b32_e32 v0, 0xff
; SDAG-VI-NEXT: s_waitcnt lgkmcnt(0)
-; SDAG-VI-NEXT: s_lshr_b32 s3, s2, 16
-; SDAG-VI-NEXT: v_max_i16_e64 v1, s2, 0
-; SDAG-VI-NEXT: v_max_i16_e64 v2, s3, 0
-; SDAG-VI-NEXT: v_min_i16_e32 v1, 0xff, v1
-; SDAG-VI-NEXT: v_min_i16_sdwa v0, v2, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
-; SDAG-VI-NEXT: v_or_b32_e32 v2, v1, v0
+; SDAG-VI-NEXT: s_ashr_i32 s3, s2, 16
+; SDAG-VI-NEXT: s_sext_i32_i16 s2, s2
+; SDAG-VI-NEXT: v_med3_i32 v1, s2, 0, v0
+; SDAG-VI-NEXT: v_med3_i32 v0, s3, 0, v0
+; SDAG-VI-NEXT: v_lshlrev_b32_e32 v0, 16, v0
+; SDAG-VI-NEXT: v_or_b32_sdwa v2, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
; SDAG-VI-NEXT: v_mov_b32_e32 v0, s0
; SDAG-VI-NEXT: v_mov_b32_e32 v1, s1
; SDAG-VI-NEXT: flat_store_dword v[0:1], v2
More information about the llvm-commits
mailing list