[llvm] 82d22a1 - [AMDGPU] Fixed folding of inline imm into dot w/o opsel (#73589)

via llvm-commits llvm-commits at lists.llvm.org
Tue Nov 28 00:50:46 PST 2023


Author: Stanislav Mekhanoshin
Date: 2023-11-28T00:50:41-08:00
New Revision: 82d22a1bb407ffe08dc9a272c1d5c627b4609587

URL: https://github.com/llvm/llvm-project/commit/82d22a1bb407ffe08dc9a272c1d5c627b4609587
DIFF: https://github.com/llvm/llvm-project/commit/82d22a1bb407ffe08dc9a272c1d5c627b4609587.diff

LOG: [AMDGPU] Fixed folding of inline imm into dot w/o opsel (#73589)

A splat packed constant can be folded as an inline immediate but it
shall use opsel. On gfx940 this code path can be skipped due to HW bug
workaround and then it may be folded w/o opsel which is a bug. Fixed.

Added: 
    

Modified: 
    llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
    llvm/test/CodeGen/AMDGPU/llvm.amdgcn.udot2.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
index 0ec0370e21dfc16..5f8e32c812292b2 100644
--- a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
+++ b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
@@ -205,9 +205,11 @@ bool SIFoldOperands::updateOperand(FoldCandidate &Fold) const {
   const uint64_t TSFlags = MI->getDesc().TSFlags;
   if (Fold.isImm()) {
     if (TSFlags & SIInstrFlags::IsPacked && !(TSFlags & SIInstrFlags::IsMAI) &&
-        (!ST->hasDOTOpSelHazard() || !(TSFlags & SIInstrFlags::IsDOT)) &&
         AMDGPU::isFoldableLiteralV216(Fold.ImmToFold,
                                       ST->hasInv2PiInlineImm())) {
+      if (ST->hasDOTOpSelHazard() && (TSFlags & SIInstrFlags::IsDOT))
+        return false; // Prevent further folding of this operand without opsel.
+
       // Set op_sel/op_sel_hi on this operand or bail out if op_sel is
       // already set.
       unsigned Opcode = MI->getOpcode();

diff  --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.udot2.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.udot2.ll
index 490ce706455cd6f..04c84df859b27fa 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.udot2.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.udot2.ll
@@ -1,6 +1,6 @@
 ; RUN: llc -march=amdgcn -mcpu=gfx906 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=GCN,GFX9,GFX906
-; RUN: llc -march=amdgcn -mcpu=gfx940 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=GCN,GFX9,GFX940
-; RUN: llc -march=amdgcn -mcpu=gfx940 -global-isel -verify-machineinstrs < %s | FileCheck %s --check-prefixes=GCN,GFX9,GFX940
+; RUN: llc -march=amdgcn -mcpu=gfx940 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=GCN,GFX9,GFX940-SDAG
+; RUN: llc -march=amdgcn -mcpu=gfx940 -global-isel -verify-machineinstrs < %s | FileCheck %s --check-prefixes=GCN,GFX9,GFX940-GISEL
 ; RUN: llc -march=amdgcn -mcpu=gfx1011 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=GCN,GFX10
 ; RUN: llc -march=amdgcn -mcpu=gfx1012 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=GCN,GFX10
 
@@ -43,7 +43,10 @@ entry:
 
 ; GCN-LABEL: {{^}}test_llvm_amdgcn_udot2_op_sel:
 ; GFX906: v_dot2_u32_u16 v{{[0-9]+}}, 1, v{{[0-9]+}}, s{{[0-9]+}} op_sel:[0,1,0] op_sel_hi:[0,0,1]{{$}}
-; GFX940: v_dot2_u32_u16 v{{[0-9]+}}, 1, v{{[0-9]+}}, s{{[0-9]+}}{{$}}
+; GFX940-SDAG: s_mov_b32 [[K:s[0-9]+]], 0x10001
+; GFX940-SDAG: v_dot2_u32_u16 v{{[0-9]+}}, [[K]], v{{[0-9]+}}, v{{[0-9]+}}{{$}}
+; GFX940-GISEL: v_mov_b32_e32 [[K:v[0-9]+]], 0x10001
+; GFX940-GISEL: v_dot2_u32_u16 v{{[0-9]+}}, [[K]], v{{[0-9]+}}, s{{[0-9]+}}{{$}}
 ; GFX10:  v_dot2_u32_u16 v{{[0-9]+}}, 1, v{{[0-9]+}}, s{{[0-9]+}} op_sel:[0,1,0] op_sel_hi:[0,0,1]{{$}}
 define amdgpu_kernel void @test_llvm_amdgcn_udot2_op_sel(
     ptr addrspace(1) %r,


        


More information about the llvm-commits mailing list