[llvm] 1e5359c - [AMDGPU] Treat KIMM32 and KIMM16 operand types as noninlinable

Mirko Brkusanin via llvm-commits llvm-commits at lists.llvm.org
Fri Aug 11 09:55:56 PDT 2023


Author: Mirko Brkusanin
Date: 2023-08-11T18:46:39+02:00
New Revision: 1e5359c6bad4d19ec6c6237ceb37d14b68e058cb

URL: https://github.com/llvm/llvm-project/commit/1e5359c6bad4d19ec6c6237ceb37d14b68e058cb
DIFF: https://github.com/llvm/llvm-project/commit/1e5359c6bad4d19ec6c6237ceb37d14b68e058cb.diff

LOG: [AMDGPU] Treat KIMM32 and KIMM16 operand types as noninlinable

While they are represent 32/16 bit immediate values they are already
included in encoding of the instructions that use them and are not true
literals. FMAMK and FMAAK instructions that use them are marked with fixed
size so getInstSizeInBytes will not increase the size for these operands.

We also add tests whose logic relies on KIMM16 and KIMM32 being considered
not inlinable.

Differential Revision: https://reviews.llvm.org/D157624

Added: 
    llvm/test/CodeGen/AMDGPU/si-fold-kimm.mir

Modified: 
    llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
    llvm/lib/Target/AMDGPU/VOP2Instructions.td
    llvm/test/CodeGen/AMDGPU/code-size-estimate.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index 8e875f87021065..b0e033a1f3875e 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -3906,6 +3906,7 @@ bool SIInstrInfo::isInlineConstant(const MachineOperand &MO,
   }
   case AMDGPU::OPERAND_KIMM32:
   case AMDGPU::OPERAND_KIMM16:
+    return false;
   case AMDGPU::OPERAND_INPUT_MODS:
   case MCOI::OPERAND_IMMEDIATE:
     // Always embedded in the instruction for free.
@@ -5264,9 +5265,8 @@ bool SIInstrInfo::isOperandLegal(const MachineInstr &MI, unsigned OpIdx,
             return false;
           SGPRsUsed.insert(SGPR);
         }
-      } else if (InstDesc.operands()[i].OperandType == AMDGPU::OPERAND_KIMM32 ||
-                 (AMDGPU::isSISrcOperand(InstDesc, i) &&
-                  !isInlineConstant(Op, InstDesc.operands()[i]))) {
+      } else if (AMDGPU::isSISrcOperand(InstDesc, i) &&
+                 !isInlineConstant(Op, InstDesc.operands()[i])) {
         if (!LiteralLimit--)
           return false;
         if (--ConstantBusLimit <= 0)

diff  --git a/llvm/lib/Target/AMDGPU/VOP2Instructions.td b/llvm/lib/Target/AMDGPU/VOP2Instructions.td
index 481a162748e6ba..83df7a3cca4cbe 100644
--- a/llvm/lib/Target/AMDGPU/VOP2Instructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP2Instructions.td
@@ -931,7 +931,7 @@ let SubtargetPredicate = isGFX11Plus in {
   } // End isCommutable = 1
 } // End SubtargetPredicate = isGFX11Plus
 
-let FPDPRounding = 1, isReMaterializable = 1 in {
+let FPDPRounding = 1, isReMaterializable = 1, FixedSize = 1 in {
 let SubtargetPredicate = isGFX10Plus, OtherPredicates = [NotHasTrue16BitInsts] in {
 def V_FMAMK_F16 : VOP2_Pseudo <"v_fmamk_f16", VOP_MADMK_F16, [], "">;
 }
@@ -947,7 +947,7 @@ let SubtargetPredicate = HasTrue16BitInsts in {
 def V_FMAAK_F16_t16 : VOP2_Pseudo <"v_fmaak_f16_t16", VOP_MADAK_F16_t16, [], "">;
 }
 } // End isCommutable = 1
-} // End FPDPRounding  = 1, isReMaterializable = 1
+} // End FPDPRounding  = 1, isReMaterializable = 1, FixedSize = 1
 
 let Constraints = "$vdst = $src2",
     DisableEncoding="$src2",
@@ -1089,12 +1089,12 @@ let AddedComplexity = 30 in {
   }
 } // End AddedComplexity = 30
 
-let SubtargetPredicate = HasFmaakFmamkF32Insts, isReMaterializable = 1 in {
+let SubtargetPredicate = HasFmaakFmamkF32Insts, isReMaterializable = 1, FixedSize = 1 in {
 def V_FMAMK_F32 : VOP2_Pseudo<"v_fmamk_f32", VOP_MADMK_F32, [], "">, VOPD_Component<0x2, "v_fmamk_f32">;
 
 let isCommutable = 1 in
 def V_FMAAK_F32 : VOP2_Pseudo<"v_fmaak_f32", VOP_MADAK_F32, [], "">, VOPD_Component<0x1, "v_fmaak_f32">;
-}
+} // End SubtargetPredicate = HasFmaakFmamkF32Insts, isReMaterializable = 1, FixedSize = 1
 
 let SubtargetPredicate = HasPkFmacF16Inst in {
 defm V_PK_FMAC_F16 : VOP2Inst<"v_pk_fmac_f16", VOP_V2F16_V2F16_V2F16>;

diff  --git a/llvm/test/CodeGen/AMDGPU/code-size-estimate.ll b/llvm/test/CodeGen/AMDGPU/code-size-estimate.ll
index c37edcbcd292c9..7e76569606ef99 100644
--- a/llvm/test/CodeGen/AMDGPU/code-size-estimate.ll
+++ b/llvm/test/CodeGen/AMDGPU/code-size-estimate.ll
@@ -149,6 +149,7 @@ define float @v_mul_f32_vop3_src_mods_literal(float %x, float %y) {
 
 ; GFX9: codeLenInByte = 24
 ; GFX10: codeLenInByte = 20
+; GFX11: codeLenInByte = 20
 
 define float @v_mul_f32_vop2_frame_index(float %x) {
 ; GFX9-LABEL: v_mul_f32_vop2_frame_index:

diff  --git a/llvm/test/CodeGen/AMDGPU/si-fold-kimm.mir b/llvm/test/CodeGen/AMDGPU/si-fold-kimm.mir
new file mode 100644
index 00000000000000..cfff5e621be535
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/si-fold-kimm.mir
@@ -0,0 +1,85 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -run-pass=si-fold-operands -verify-machineinstrs -o - %s | FileCheck --check-prefix=GCN %s
+
+# Do not fold noninlinable constants in instructions like fmaak and fmamk next
+# to kimm16 and kimm32 operands
+
+---
+name: fmaak_f16
+tracksRegLiveness: true
+body: |
+  bb.0:
+    liveins: $vgpr0
+
+    ; GCN-LABEL: name: fmaak_f16
+    ; GCN: liveins: $vgpr0
+    ; GCN-NEXT: {{  $}}
+    ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; GCN-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 14950, implicit $exec
+    ; GCN-NEXT: [[V_FMAAK_F16_:%[0-9]+]]:vgpr_32 = contract nofpexcept V_FMAAK_F16 [[COPY]], [[V_MOV_B32_e32_]], 11878, implicit $mode, implicit $exec
+    ; GCN-NEXT: $vgpr0 = COPY [[V_FMAAK_F16_]]
+    %0:vgpr_32 = COPY $vgpr0
+    %1:vgpr_32 = V_MOV_B32_e32 14950, implicit $exec
+    %2:vgpr_32 = contract nofpexcept V_FMAAK_F16 %0, %1, 11878, implicit $mode, implicit $exec
+    $vgpr0 = COPY %2
+...
+
+---
+name: fmamk_f16
+tracksRegLiveness: true
+body: |
+  bb.0:
+    liveins: $vgpr0
+
+    ; GCN-LABEL: name: fmamk_f16
+    ; GCN: liveins: $vgpr0
+    ; GCN-NEXT: {{  $}}
+    ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; GCN-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 14950, implicit $exec
+    ; GCN-NEXT: [[V_FMAMK_F16_:%[0-9]+]]:vgpr_32 = contract nofpexcept V_FMAMK_F16 [[COPY]], 11878, [[V_MOV_B32_e32_]], implicit $mode, implicit $exec
+    ; GCN-NEXT: $vgpr0 = COPY [[V_FMAMK_F16_]]
+    %0:vgpr_32 = COPY $vgpr0
+    %1:vgpr_32 = V_MOV_B32_e32 14950, implicit $exec
+    %2:vgpr_32 = contract nofpexcept V_FMAMK_F16 %0, 11878, %1, implicit $mode, implicit $exec
+    $vgpr0 = COPY %2
+...
+
+---
+name: fmaak_f32
+tracksRegLiveness: true
+body: |
+  bb.0:
+    liveins: $vgpr0
+
+    ; GCN-LABEL: name: fmaak_f32
+    ; GCN: liveins: $vgpr0
+    ; GCN-NEXT: {{  $}}
+    ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; GCN-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 979789416, implicit $exec
+    ; GCN-NEXT: [[V_FMAAK_F32_:%[0-9]+]]:vgpr_32 = contract nofpexcept V_FMAAK_F32 [[COPY]], [[V_MOV_B32_e32_]], 778462824, implicit $mode, implicit $exec
+    ; GCN-NEXT: $vgpr0 = COPY [[V_FMAAK_F32_]]
+    %0:vgpr_32 = COPY $vgpr0
+    %1:vgpr_32 = V_MOV_B32_e32 979789416, implicit $exec
+    %2:vgpr_32 = contract nofpexcept V_FMAAK_F32 %0, %1, 778462824, implicit $mode, implicit $exec
+    $vgpr0 = COPY %2
+...
+
+---
+name: fmamk_f32
+tracksRegLiveness: true
+body: |
+  bb.0:
+    liveins: $vgpr0
+
+    ; GCN-LABEL: name: fmamk_f32
+    ; GCN: liveins: $vgpr0
+    ; GCN-NEXT: {{  $}}
+    ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; GCN-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 979789416, implicit $exec
+    ; GCN-NEXT: [[V_FMAMK_F32_:%[0-9]+]]:vgpr_32 = contract nofpexcept V_FMAMK_F32 [[COPY]], 778462824, [[V_MOV_B32_e32_]], implicit $mode, implicit $exec
+    ; GCN-NEXT: $vgpr0 = COPY [[V_FMAMK_F32_]]
+    %0:vgpr_32 = COPY $vgpr0
+    %1:vgpr_32 = V_MOV_B32_e32 979789416, implicit $exec
+    %2:vgpr_32 = contract nofpexcept V_FMAMK_F32 %0, 778462824, %1, implicit $mode, implicit $exec
+    $vgpr0 = COPY %2
+...


        


More information about the llvm-commits mailing list