[llvm] AMDGPU/GlobalISel: Add test for fma_mix with source from unmerge (PR #102129)
Petar Avramovic via llvm-commits
llvm-commits at lists.llvm.org
Tue Aug 6 05:07:44 PDT 2024
https://github.com/petar-avramovic created https://github.com/llvm/llvm-project/pull/102129
When selecting fma_mix with operand that comes from G_UNMERGE_VALUES,
there is a bug where folded register is operand 0 of G_UNMERGE_VALUES.
Source modifiers are correctly selected.
isExtractHiElt returns G_UNMERGE_VALUES that defines source register
but does not specify which operand.
>From 517f5e040b990627872bb512776d37311d524105 Mon Sep 17 00:00:00 2001
From: Petar Avramovic <Petar.Avramovic at amd.com>
Date: Tue, 6 Aug 2024 13:45:17 +0200
Subject: [PATCH] AMDGPU/GlobalISel: Add test for fma_mix with source from
unmerge
When selecting fma_mix with operand that comes from G_UNMERGE_VALUES,
there is a bug where folded register is operand 0 of G_UNMERGE_VALUES.
Source modifiers are correctly selected.
isExtractHiElt returns G_UNMERGE_VALUES that defines source register
but does not specify which operand.
---
.../GlobalISel/combine-fma-add-ext-fma.ll | 37 +++++++++++++++++++
1 file changed, 37 insertions(+)
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fma-add-ext-fma.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fma-add-ext-fma.ll
index b9846a6a555dc..e910c2eca2ced 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fma-add-ext-fma.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fma-add-ext-fma.ll
@@ -441,6 +441,43 @@ define amdgpu_vs <4 x float> @test_v4f16_v4f32_add_ext_fma_mul_rhs(<4 x float> %
ret <4 x float> %d
}
+define amdgpu_ps float @test_matching_source_from_unmerge(ptr addrspace(3) %aptr, float %b) {
+; GFX9-DENORM-LABEL: test_matching_source_from_unmerge:
+; GFX9-DENORM: ; %bb.0: ; %.entry
+; GFX9-DENORM-NEXT: ds_read_b64 v[2:3], v0
+; GFX9-DENORM-NEXT: s_waitcnt lgkmcnt(0)
+; GFX9-DENORM-NEXT: v_mad_mix_f32 v0, v2, v2, v1 op_sel:[1,1,0] op_sel_hi:[1,1,0]
+; GFX9-DENORM-NEXT: ; return to shader part epilog
+;
+; GFX10-LABEL: test_matching_source_from_unmerge:
+; GFX10: ; %bb.0: ; %.entry
+; GFX10-NEXT: ds_read_b64 v[2:3], v0
+; GFX10-NEXT: s_waitcnt lgkmcnt(0)
+; GFX10-NEXT: v_fma_mix_f32 v0, v2, v2, v1 op_sel:[1,1,0] op_sel_hi:[1,1,0]
+; GFX10-NEXT: ; return to shader part epilog
+;
+; GFX10-CONTRACT-LABEL: test_matching_source_from_unmerge:
+; GFX10-CONTRACT: ; %bb.0: ; %.entry
+; GFX10-CONTRACT-NEXT: ds_read_b64 v[2:3], v0
+; GFX10-CONTRACT-NEXT: s_waitcnt lgkmcnt(0)
+; GFX10-CONTRACT-NEXT: v_fma_mix_f32 v0, v2, v2, v1 op_sel:[1,1,0] op_sel_hi:[1,1,0]
+; GFX10-CONTRACT-NEXT: ; return to shader part epilog
+;
+; GFX10-DENORM-LABEL: test_matching_source_from_unmerge:
+; GFX10-DENORM: ; %bb.0: ; %.entry
+; GFX10-DENORM-NEXT: ds_read_b64 v[2:3], v0
+; GFX10-DENORM-NEXT: s_waitcnt lgkmcnt(0)
+; GFX10-DENORM-NEXT: v_fma_mix_f32 v0, v2, v2, v1 op_sel:[1,1,0] op_sel_hi:[1,1,0]
+; GFX10-DENORM-NEXT: ; return to shader part epilog
+.entry:
+ %a = load <4 x half>, ptr addrspace(3) %aptr, align 16
+ %a_f32 = fpext <4 x half> %a to <4 x float>
+ %.a3_f32 = extractelement <4 x float> %a_f32, i64 3
+ %.a1_f32 = extractelement <4 x float> %a_f32, i64 1
+ %res = call float @llvm.fmuladd.f32(float %.a1_f32, float %.a3_f32, float %b)
+ ret float %res
+}
+
declare float @llvm.fmuladd.f32(float, float, float) #0
declare half @llvm.fmuladd.f16(half, half, half) #0
declare <4 x float> @llvm.fmuladd.v4f32(<4 x float>, <4 x float>, <4 x float>) #0
More information about the llvm-commits
mailing list