[llvm] AMDGPU/GlobalISel: Add test for fma_mix with source from unmerge (PR #102129)

Petar Avramovic via llvm-commits llvm-commits at lists.llvm.org
Tue Aug 6 05:07:44 PDT 2024


https://github.com/petar-avramovic created https://github.com/llvm/llvm-project/pull/102129

When selecting fma_mix with operand that comes from G_UNMERGE_VALUES,
there is a bug where folded register is operand 0 of G_UNMERGE_VALUES.
Source modifiers are correctly selected.
isExtractHiElt returns G_UNMERGE_VALUES that defines source register
but does not specify which operand.

>From 517f5e040b990627872bb512776d37311d524105 Mon Sep 17 00:00:00 2001
From: Petar Avramovic <Petar.Avramovic at amd.com>
Date: Tue, 6 Aug 2024 13:45:17 +0200
Subject: [PATCH] AMDGPU/GlobalISel: Add test for fma_mix with source from
 unmerge

When selecting fma_mix with operand that comes from G_UNMERGE_VALUES,
there is a bug where folded register is operand 0 of G_UNMERGE_VALUES.
Source modifiers are correctly selected.
isExtractHiElt returns G_UNMERGE_VALUES that defines source register
but does not specify which operand.
---
 .../GlobalISel/combine-fma-add-ext-fma.ll     | 37 +++++++++++++++++++
 1 file changed, 37 insertions(+)

diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fma-add-ext-fma.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fma-add-ext-fma.ll
index b9846a6a555dc..e910c2eca2ced 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fma-add-ext-fma.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fma-add-ext-fma.ll
@@ -441,6 +441,43 @@ define amdgpu_vs <4 x float> @test_v4f16_v4f32_add_ext_fma_mul_rhs(<4 x float> %
     ret <4 x float> %d
 }
 
+define amdgpu_ps float @test_matching_source_from_unmerge(ptr addrspace(3) %aptr, float %b) {
+; GFX9-DENORM-LABEL: test_matching_source_from_unmerge:
+; GFX9-DENORM:       ; %bb.0: ; %.entry
+; GFX9-DENORM-NEXT:    ds_read_b64 v[2:3], v0
+; GFX9-DENORM-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX9-DENORM-NEXT:    v_mad_mix_f32 v0, v2, v2, v1 op_sel:[1,1,0] op_sel_hi:[1,1,0]
+; GFX9-DENORM-NEXT:    ; return to shader part epilog
+;
+; GFX10-LABEL: test_matching_source_from_unmerge:
+; GFX10:       ; %bb.0: ; %.entry
+; GFX10-NEXT:    ds_read_b64 v[2:3], v0
+; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX10-NEXT:    v_fma_mix_f32 v0, v2, v2, v1 op_sel:[1,1,0] op_sel_hi:[1,1,0]
+; GFX10-NEXT:    ; return to shader part epilog
+;
+; GFX10-CONTRACT-LABEL: test_matching_source_from_unmerge:
+; GFX10-CONTRACT:       ; %bb.0: ; %.entry
+; GFX10-CONTRACT-NEXT:    ds_read_b64 v[2:3], v0
+; GFX10-CONTRACT-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX10-CONTRACT-NEXT:    v_fma_mix_f32 v0, v2, v2, v1 op_sel:[1,1,0] op_sel_hi:[1,1,0]
+; GFX10-CONTRACT-NEXT:    ; return to shader part epilog
+;
+; GFX10-DENORM-LABEL: test_matching_source_from_unmerge:
+; GFX10-DENORM:       ; %bb.0: ; %.entry
+; GFX10-DENORM-NEXT:    ds_read_b64 v[2:3], v0
+; GFX10-DENORM-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX10-DENORM-NEXT:    v_fma_mix_f32 v0, v2, v2, v1 op_sel:[1,1,0] op_sel_hi:[1,1,0]
+; GFX10-DENORM-NEXT:    ; return to shader part epilog
+.entry:
+    %a = load <4 x half>, ptr addrspace(3) %aptr, align 16
+    %a_f32 = fpext <4 x half> %a to <4 x float>
+    %.a3_f32 = extractelement <4 x float> %a_f32, i64 3
+    %.a1_f32 = extractelement <4 x float> %a_f32, i64 1
+    %res = call float @llvm.fmuladd.f32(float %.a1_f32, float %.a3_f32, float %b)
+    ret float %res
+}
+
 declare float @llvm.fmuladd.f32(float, float, float) #0
 declare half @llvm.fmuladd.f16(half, half, half) #0
 declare <4 x float> @llvm.fmuladd.v4f32(<4 x float>, <4 x float>, <4 x float>) #0



More information about the llvm-commits mailing list