[llvm] a2d086a - [AMDGPU] Fix FMA combine (#119217)

Tue Dec 10 01:11:22 PST 2024

Author: Piotr Sobczak
Date: 2024-12-10T10:11:19+01:00
New Revision: a2d086af2cdac8c22685551d4d3d0928e40e1a0f

URL: https://github.com/llvm/llvm-project/commit/a2d086af2cdac8c22685551d4d3d0928e40e1a0f
DIFF: https://github.com/llvm/llvm-project/commit/a2d086af2cdac8c22685551d4d3d0928e40e1a0f.diff

LOG: [AMDGPU] Fix FMA combine (#119217)

Update the check in the FMA combine to check dot10-insts instead of
dot7-insts.

The target of the combine, v_dot2_f32_f16, is available only if
dot10-insts target feature is enabled.

The issue probably dates back to the change that split out dot10-insts
out of dot7-insts.

As far as I can see, this does not affect any current targets, but if a
future target has dot7-insts, but not dot10-insts that would cause a
crash ("cannot select") for the input ir in the test.

Added: 
    

Modified: 
    llvm/lib/Target/AMDGPU/SIISelLowering.cpp
    llvm/test/CodeGen/AMDGPU/fdot2.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index f89fe8faa600ba..8dfebd36a962e1 100644

--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -14696,7 +14696,7 @@ SDValue SITargetLowering::performFMACombine(SDNode *N,
   EVT VT = N->getValueType(0);
   SDLoc SL(N);
 
-  if (!Subtarget->hasDot7Insts() || VT != MVT::f32)
+  if (!Subtarget->hasDot10Insts() || VT != MVT::f32)
     return SDValue();
 
   // FMA((F32)S0.x, (F32)S1. x, FMA((F32)S0.y, (F32)S1.y, (F32)z)) ->

diff  --git a/llvm/test/CodeGen/AMDGPU/fdot2.ll b/llvm/test/CodeGen/AMDGPU/fdot2.ll
index 695042d44d87b6..776816d6aa0e38 100644
--- a/llvm/test/CodeGen/AMDGPU/fdot2.ll
+++ b/llvm/test/CodeGen/AMDGPU/fdot2.ll
@@ -5,6 +5,7 @@
 ; RUN: llc -mtriple=amdgcn -mcpu=gfx906 -denormal-fp-math-f32=preserve-sign -verify-machineinstrs < %s | FileCheck %s  -check-prefixes=GCN,GFX906
 ; RUN: llc -mtriple=amdgcn -mcpu=gfx906 -denormal-fp-math=preserve-sign -fp-contract=fast -verify-machineinstrs < %s | FileCheck %s  -check-prefixes=GCN,GFX906-CONTRACT
 ; RUN: llc -mtriple=amdgcn -mcpu=gfx906 -denormal-fp-math=ieee -fp-contract=fast -verify-machineinstrs < %s | FileCheck %s  -check-prefixes=GCN,GFX906-DENORM-CONTRACT
+; RUN: llc -mtriple=amdgcn -mcpu=gfx906 -denormal-fp-math-f32=preserve-sign -enable-unsafe-fp-math -mattr="+dot7-insts,-dot10-insts" -verify-machineinstrs < %s | FileCheck %s  -check-prefixes=GCN,GFX906-DOT10-DISABLED
 ; (fadd (fmul S1.x, S2.x), (fadd (fmul (S1.y, S2.y), z))) -> (fdot2 S1, S2, z)
 
 ; Tests to make sure fdot2 is not generated when vector elements of dot-product expressions
@@ -21,6 +22,7 @@
 
 ; GFX906-CONTRACT: v_mac_f16_e32
 ; GFX906-DENORM-CONTRACT: v_fma_f16
+; GFX906-DOT10-DISABLED: v_fma_f16
 define amdgpu_kernel void @dotproduct_f16(ptr addrspace(1) %src1,
                                           ptr addrspace(1) %src2,
                                           ptr addrspace(1) nocapture %dst) {
@@ -44,8 +46,11 @@ entry:
 }
 
 
-; We only want to generate fdot2 if vector element of dot product is converted from f16 to f32
-; and the vectors are of type <2 x half>
+; We only want to generate fdot2 if:
+; - vector element of dot product is converted from f16 to f32, and
+; - the vectors are of type <2 x half>, and
+; - "dot10-insts" is enabled
+
 ; GCN-LABEL: {{^}}dotproduct_f16_f32
 ; GFX900: v_mad_mix_f32
 ; GFX900: v_mad_mix_f32
@@ -59,6 +64,7 @@ entry:
 ; GFX906-CONTRACT: v_dot2_f32_f16
 
 ; GFX906-DENORM-CONTRACT: v_dot2_f32_f16
+; GFX906-DOT10-DISABLED: v_fma_mix_f32
 define amdgpu_kernel void @dotproduct_f16_f32(ptr addrspace(1) %src1,
                                               ptr addrspace(1) %src2,
                                               ptr addrspace(1) nocapture %dst) {
@@ -85,8 +91,11 @@ entry:
   ret void
 }
 
-; We only want to generate fdot2 if vector element of dot product is converted from f16 to f32
-; and the vectors are of type <2 x half>
+; We only want to generate fdot2 if:
+; - vector element of dot product is converted from f16 to f32, and
+; - the vectors are of type <2 x half>, and
+; - "dot10-insts" is enabled
+
 ; GCN-LABEL: {{^}}dotproduct_
diff vecorder
 ; GFX900: v_mad_mix_f32
 ; GFX900: v_mad_mix_f32
@@ -99,6 +108,7 @@ entry:
 
 ; GFX906-CONTRACT: v_dot2_f32_f16
 ; GFX906-DENORM-CONTRACT: v_dot2_f32_f16
+; GFX906-DOT10-DISABLED: v_fma_mix_f32
 define amdgpu_kernel void @dotproduct_
diff vecorder(ptr addrspace(1) %src1,
                                                    ptr addrspace(1) %src2,
                                                    ptr addrspace(1) nocapture %dst) {
@@ -136,6 +146,7 @@ entry:
 
 ; GFX906-CONTRACT: v_fma_mix_f32
 ; GFX906-DENORM-CONTRACT: v_fma_mix_f32
+; GFX906-DOT10-DISABLED: v_fma_mix_f32
 define amdgpu_kernel void @dotproduct_v4f16(ptr addrspace(1) %src1,
                                             ptr addrspace(1) %src2,
                                             ptr addrspace(1) nocapture %dst) {
@@ -173,6 +184,7 @@ entry:
 
 ; GFX906-CONTRACT: v_fma_mix_f32
 ; GFX906-DENORM-CONTRACT: v_fma_mix_f32
+; GFX906-DOT10-DISABLED: v_fma_mix_f32
 define amdgpu_kernel void @NotAdotproduct(ptr addrspace(1) %src1,
                                           ptr addrspace(1) %src2,
                                           ptr addrspace(1) nocapture %dst) {
@@ -210,6 +222,7 @@ entry:
 
 ; GFX906-CONTRACT: v_fma_mix_f32
 ; GFX906-DENORM-CONTRACT: v_fma_mix_f32
+; GFX906-DOT10-DISABLED: v_fma_mix_f32
 define amdgpu_kernel void @Diff_Idx_NotAdotproduct(ptr addrspace(1) %src1,
                                                    ptr addrspace(1) %src2,
                                                    ptr addrspace(1) nocapture %dst) {