[llvm] remove duplicated move (PR #126942)
Brox Chen via llvm-commits
llvm-commits at lists.llvm.org
Wed Feb 12 08:34:58 PST 2025
https://github.com/broxigarchen created https://github.com/llvm/llvm-project/pull/126942
None
>From de9cc93e88c00d16976f819f1887ed0b0bf881e6 Mon Sep 17 00:00:00 2001
From: guochen2 <guochen2 at amd.com>
Date: Tue, 11 Feb 2025 19:56:53 -0500
Subject: [PATCH] remove duplicated move
---
llvm/lib/Target/AMDGPU/VOP3PInstructions.td | 18 +++---
llvm/test/CodeGen/AMDGPU/mad-mix-lo.ll | 71 ++++++++++-----------
2 files changed, 42 insertions(+), 47 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/VOP3PInstructions.td b/llvm/lib/Target/AMDGPU/VOP3PInstructions.td
index 5e825e7259a95..8eb525bf38738 100644
--- a/llvm/lib/Target/AMDGPU/VOP3PInstructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP3PInstructions.td
@@ -267,9 +267,9 @@ multiclass MadFmaMixPats<SDPatternOperator fma_like,
(build_vector (f16 (fpround (fma_like (f32 (VOP3PMadMixMods f16:$src0, i32:$src0_modifiers)),
(f32 (VOP3PMadMixMods f16:$src1, i32:$src1_modifiers)),
(f32 (VOP3PMadMixMods f16:$src2, i32:$src2_modifiers))))), f16:$elt1),
- (v2f16 (mixlo_inst $src0_modifiers, $src0,
- $src1_modifiers, $src1,
- $src2_modifiers, $src2,
+ (v2f16 (mixlo_inst $src0_modifiers, (REG_SEQUENCE VGPR_32, $src0, lo16, (i16 (IMPLICIT_DEF)), hi16),
+ $src1_modifiers, (REG_SEQUENCE VGPR_32, $src1, lo16, (i16 (IMPLICIT_DEF)), hi16),
+ $src2_modifiers, (REG_SEQUENCE VGPR_32, $src2, lo16, (i16 (IMPLICIT_DEF)), hi16),
DSTCLAMP.NONE,
(REG_SEQUENCE VGPR_32, (f16 (IMPLICIT_DEF)), lo16, $elt1, hi16)))
>;
@@ -278,9 +278,9 @@ multiclass MadFmaMixPats<SDPatternOperator fma_like,
(build_vector f16:$elt0, (f16 (fpround (fma_like (f32 (VOP3PMadMixMods f16:$src0, i32:$src0_modifiers)),
(f32 (VOP3PMadMixMods f16:$src1, i32:$src1_modifiers)),
(f32 (VOP3PMadMixMods f16:$src2, i32:$src2_modifiers)))))),
- (v2f16 (mixhi_inst $src0_modifiers, $src0,
- $src1_modifiers, $src1,
- $src2_modifiers, $src2,
+ (v2f16 (mixhi_inst $src0_modifiers, (REG_SEQUENCE VGPR_32, $src0, lo16, (i16 (IMPLICIT_DEF)), hi16),
+ $src1_modifiers, (REG_SEQUENCE VGPR_32, $src1, lo16, (i16 (IMPLICIT_DEF)), hi16),
+ $src2_modifiers, (REG_SEQUENCE VGPR_32, $src2, lo16, (i16 (IMPLICIT_DEF)), hi16),
DSTCLAMP.NONE,
(REG_SEQUENCE VGPR_32, $elt0, lo16, (f16 (IMPLICIT_DEF)), hi16)))
>;
@@ -291,9 +291,9 @@ multiclass MadFmaMixPats<SDPatternOperator fma_like,
(AMDGPUclamp (f16 (fpround (fma_like (f32 (VOP3PMadMixMods f16:$src0, i32:$src0_modifiers)),
(f32 (VOP3PMadMixMods f16:$src1, i32:$src1_modifiers)),
(f32 (VOP3PMadMixMods f16:$src2, i32:$src2_modifiers))))))),
- (v2f16 (mixhi_inst $src0_modifiers, $src0,
- $src1_modifiers, $src1,
- $src2_modifiers, $src2,
+ (v2f16 (mixhi_inst $src0_modifiers, (REG_SEQUENCE VGPR_32, $src0, lo16, (i16 (IMPLICIT_DEF)), hi16),
+ $src1_modifiers, (REG_SEQUENCE VGPR_32, $src1, lo16, (i16 (IMPLICIT_DEF)), hi16),
+ $src2_modifiers, (REG_SEQUENCE VGPR_32, $src2, lo16, (i16 (IMPLICIT_DEF)), hi16),
DSTCLAMP.ENABLE,
(REG_SEQUENCE VGPR_32, $elt0, lo16, (f16 (IMPLICIT_DEF)), hi16)))
>;
diff --git a/llvm/test/CodeGen/AMDGPU/mad-mix-lo.ll b/llvm/test/CodeGen/AMDGPU/mad-mix-lo.ll
index dd4ad0a70b254..3395b62172780 100644
--- a/llvm/test/CodeGen/AMDGPU/mad-mix-lo.ll
+++ b/llvm/test/CodeGen/AMDGPU/mad-mix-lo.ll
@@ -424,10 +424,9 @@ define <2 x half> @v_mad_mix_v2f32(<2 x half> %src0, <2 x half> %src1, <2 x half
; SDAG-GFX1100-TRUE16: ; %bb.0:
; SDAG-GFX1100-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SDAG-GFX1100-TRUE16-NEXT: v_fma_mixlo_f16 v3, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1]
-; SDAG-GFX1100-TRUE16-NEXT: v_mov_b16_e32 v4.l, v0.l
-; SDAG-GFX1100-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; SDAG-GFX1100-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; SDAG-GFX1100-TRUE16-NEXT: v_mov_b16_e32 v0.h, v3.l
-; SDAG-GFX1100-TRUE16-NEXT: v_fma_mixlo_f16 v0, v4, v1, v2 op_sel_hi:[1,1,1]
+; SDAG-GFX1100-TRUE16-NEXT: v_fma_mixlo_f16 v0, v0, v1, v2 op_sel_hi:[1,1,1]
; SDAG-GFX1100-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; SDAG-GFX1100-FAKE16-LABEL: v_mad_mix_v2f32:
@@ -546,11 +545,10 @@ define <3 x half> @v_mad_mix_v3f32(<3 x half> %src0, <3 x half> %src1, <3 x half
; SDAG-GFX1100-TRUE16: ; %bb.0:
; SDAG-GFX1100-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SDAG-GFX1100-TRUE16-NEXT: v_fma_mixlo_f16 v6, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1]
-; SDAG-GFX1100-TRUE16-NEXT: v_mov_b16_e32 v7.l, v0.l
; SDAG-GFX1100-TRUE16-NEXT: v_fma_mixlo_f16 v1, v1, v3, v5 op_sel_hi:[1,1,1]
-; SDAG-GFX1100-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
+; SDAG-GFX1100-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
; SDAG-GFX1100-TRUE16-NEXT: v_mov_b16_e32 v0.h, v6.l
-; SDAG-GFX1100-TRUE16-NEXT: v_fma_mixlo_f16 v0, v7, v2, v4 op_sel_hi:[1,1,1]
+; SDAG-GFX1100-TRUE16-NEXT: v_fma_mixlo_f16 v0, v0, v2, v4 op_sel_hi:[1,1,1]
; SDAG-GFX1100-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; SDAG-GFX1100-FAKE16-LABEL: v_mad_mix_v3f32:
@@ -716,14 +714,12 @@ define <4 x half> @v_mad_mix_v4f32(<4 x half> %src0, <4 x half> %src1, <4 x half
; SDAG-GFX1100-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SDAG-GFX1100-TRUE16-NEXT: v_fma_mixlo_f16 v6, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1]
; SDAG-GFX1100-TRUE16-NEXT: v_fma_mixlo_f16 v7, v1, v3, v5 op_sel:[1,1,1] op_sel_hi:[1,1,1]
-; SDAG-GFX1100-TRUE16-NEXT: v_mov_b16_e32 v8.l, v0.l
-; SDAG-GFX1100-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
+; SDAG-GFX1100-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
; SDAG-GFX1100-TRUE16-NEXT: v_mov_b16_e32 v0.h, v6.l
; SDAG-GFX1100-TRUE16-NEXT: v_mov_b16_e32 v1.h, v7.l
-; SDAG-GFX1100-TRUE16-NEXT: v_mov_b16_e32 v6.l, v1.l
-; SDAG-GFX1100-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
-; SDAG-GFX1100-TRUE16-NEXT: v_fma_mixlo_f16 v0, v8, v2, v4 op_sel_hi:[1,1,1]
-; SDAG-GFX1100-TRUE16-NEXT: v_fma_mixlo_f16 v1, v6, v3, v5 op_sel_hi:[1,1,1]
+; SDAG-GFX1100-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; SDAG-GFX1100-TRUE16-NEXT: v_fma_mixlo_f16 v0, v0, v2, v4 op_sel_hi:[1,1,1]
+; SDAG-GFX1100-TRUE16-NEXT: v_fma_mixlo_f16 v1, v1, v3, v5 op_sel_hi:[1,1,1]
; SDAG-GFX1100-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; SDAG-GFX1100-FAKE16-LABEL: v_mad_mix_v4f32:
@@ -1073,16 +1069,14 @@ define <3 x half> @v_mad_mix_v3f32_clamp_postcvt(<3 x half> %src0, <3 x half> %s
; SDAG-GFX1100-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SDAG-GFX1100-TRUE16-NEXT: v_fma_mixlo_f16 v6, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1]
; SDAG-GFX1100-TRUE16-NEXT: v_fma_mixlo_f16 v1, v1, v3, v5 op_sel_hi:[1,1,1]
-; SDAG-GFX1100-TRUE16-NEXT: v_mov_b16_e32 v3.l, v0.l
-; SDAG-GFX1100-TRUE16-NEXT: v_mov_b16_e32 v5.l, v2.l
-; SDAG-GFX1100-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
-; SDAG-GFX1100-TRUE16-NEXT: v_mov_b16_e32 v2.h, v6.l
-; SDAG-GFX1100-TRUE16-NEXT: v_mov_b16_e32 v0.l, v1.l
-; SDAG-GFX1100-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; SDAG-GFX1100-TRUE16-NEXT: v_fma_mixlo_f16 v2, v3, v5, v4 op_sel_hi:[1,1,1]
-; SDAG-GFX1100-TRUE16-NEXT: v_pack_b32_f16 v1, v0.l, 0
; SDAG-GFX1100-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; SDAG-GFX1100-TRUE16-NEXT: v_mov_b16_e32 v2.h, v6.l
+; SDAG-GFX1100-TRUE16-NEXT: v_mov_b16_e32 v0.h, v1.l
+; SDAG-GFX1100-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; SDAG-GFX1100-TRUE16-NEXT: v_fma_mixlo_f16 v2, v0, v2, v4 op_sel_hi:[1,1,1]
+; SDAG-GFX1100-TRUE16-NEXT: v_pack_b32_f16 v1, v0.h, 0
; SDAG-GFX1100-TRUE16-NEXT: v_pk_max_f16 v0, v2, v2 clamp
+; SDAG-GFX1100-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2)
; SDAG-GFX1100-TRUE16-NEXT: v_pk_max_f16 v1, v1, v1 clamp
; SDAG-GFX1100-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
@@ -1287,19 +1281,17 @@ define <4 x half> @v_mad_mix_v4f32_clamp_postcvt(<4 x half> %src0, <4 x half> %s
; SDAG-GFX1100-TRUE16-LABEL: v_mad_mix_v4f32_clamp_postcvt:
; SDAG-GFX1100-TRUE16: ; %bb.0:
; SDAG-GFX1100-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SDAG-GFX1100-TRUE16-NEXT: v_fma_mixlo_f16 v6, v1, v3, v5 op_sel:[1,1,1] op_sel_hi:[1,1,1]
-; SDAG-GFX1100-TRUE16-NEXT: v_fma_mixlo_f16 v7, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1]
+; SDAG-GFX1100-TRUE16-NEXT: v_fma_mixlo_f16 v6, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1]
+; SDAG-GFX1100-TRUE16-NEXT: v_fma_mixlo_f16 v7, v1, v3, v5 op_sel:[1,1,1] op_sel_hi:[1,1,1]
; SDAG-GFX1100-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; SDAG-GFX1100-TRUE16-NEXT: v_mov_b16_e32 v2.h, v6.l
-; SDAG-GFX1100-TRUE16-NEXT: v_mov_b16_e32 v0.h, v7.l
-; SDAG-GFX1100-TRUE16-NEXT: v_mov_b16_e32 v6.l, v0.l
-; SDAG-GFX1100-TRUE16-NEXT: v_mov_b16_e32 v7.l, v2.l
-; SDAG-GFX1100-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_2)
-; SDAG-GFX1100-TRUE16-NEXT: v_fma_mixlo_f16 v2, v1, v3, v5 op_sel_hi:[1,1,1]
-; SDAG-GFX1100-TRUE16-NEXT: v_fma_mixlo_f16 v0, v6, v7, v4 op_sel_hi:[1,1,1]
+; SDAG-GFX1100-TRUE16-NEXT: v_mov_b16_e32 v0.h, v6.l
+; SDAG-GFX1100-TRUE16-NEXT: v_mov_b16_e32 v1.h, v7.l
+; SDAG-GFX1100-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; SDAG-GFX1100-TRUE16-NEXT: v_fma_mixlo_f16 v0, v0, v2, v4 op_sel_hi:[1,1,1]
+; SDAG-GFX1100-TRUE16-NEXT: v_fma_mixlo_f16 v1, v1, v3, v5 op_sel_hi:[1,1,1]
; SDAG-GFX1100-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; SDAG-GFX1100-TRUE16-NEXT: v_pk_max_f16 v1, v2, v2 clamp
; SDAG-GFX1100-TRUE16-NEXT: v_pk_max_f16 v0, v0, v0 clamp
+; SDAG-GFX1100-TRUE16-NEXT: v_pk_max_f16 v1, v1, v1 clamp
; SDAG-GFX1100-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; SDAG-GFX1100-FAKE16-LABEL: v_mad_mix_v4f32_clamp_postcvt:
@@ -1545,12 +1537,13 @@ define <2 x half> @v_mad_mix_v2f32_clamp_postcvt_lo(<2 x half> %src0, <2 x half>
; SDAG-GFX1100-TRUE16-NEXT: v_mov_b16_e32 v3.l, v0.l
; SDAG-GFX1100-TRUE16-NEXT: v_mov_b16_e32 v4.l, v1.l
; SDAG-GFX1100-TRUE16-NEXT: v_mov_b16_e32 v5.l, v2.l
-; SDAG-GFX1100-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; SDAG-GFX1100-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
; SDAG-GFX1100-TRUE16-NEXT: v_fma_mixlo_f16 v3, v3, v4, v5 op_sel_hi:[1,1,1]
-; SDAG-GFX1100-TRUE16-NEXT: v_max_f16_e64 v3.l, v3.l, v3.l clamp
+; SDAG-GFX1100-TRUE16-NEXT: v_mov_b16_e32 v4.l, v1.l
+; SDAG-GFX1100-TRUE16-NEXT: v_max_f16_e64 v1.l, v3.l, v3.l clamp
; SDAG-GFX1100-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; SDAG-GFX1100-TRUE16-NEXT: v_fma_mixhi_f16 v3, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1]
-; SDAG-GFX1100-TRUE16-NEXT: v_mov_b32_e32 v0, v3
+; SDAG-GFX1100-TRUE16-NEXT: v_fma_mixhi_f16 v1, v0, v4, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1]
+; SDAG-GFX1100-TRUE16-NEXT: v_mov_b32_e32 v0, v1
; SDAG-GFX1100-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; SDAG-GFX1100-FAKE16-LABEL: v_mad_mix_v2f32_clamp_postcvt_lo:
@@ -1715,11 +1708,13 @@ define <2 x half> @v_mad_mix_v2f32_clamp_postcvt_hi(<2 x half> %src0, <2 x half>
; SDAG-GFX1100-TRUE16-NEXT: v_mov_b16_e32 v3.l, v0.l
; SDAG-GFX1100-TRUE16-NEXT: v_mov_b16_e32 v4.l, v1.l
; SDAG-GFX1100-TRUE16-NEXT: v_mov_b16_e32 v5.l, v2.l
-; SDAG-GFX1100-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; SDAG-GFX1100-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
; SDAG-GFX1100-TRUE16-NEXT: v_fma_mixlo_f16 v3, v3, v4, v5 op_sel_hi:[1,1,1]
-; SDAG-GFX1100-TRUE16-NEXT: v_fma_mixhi_f16 v3, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp
-; SDAG-GFX1100-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; SDAG-GFX1100-TRUE16-NEXT: v_mov_b32_e32 v0, v3
+; SDAG-GFX1100-TRUE16-NEXT: v_mov_b16_e32 v4.l, v1.l
+; SDAG-GFX1100-TRUE16-NEXT: v_mov_b16_e32 v1.l, v3.l
+; SDAG-GFX1100-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; SDAG-GFX1100-TRUE16-NEXT: v_fma_mixhi_f16 v1, v0, v4, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp
+; SDAG-GFX1100-TRUE16-NEXT: v_mov_b32_e32 v0, v1
; SDAG-GFX1100-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; SDAG-GFX1100-FAKE16-LABEL: v_mad_mix_v2f32_clamp_postcvt_hi:
More information about the llvm-commits
mailing list