[llvm-branch-commits] [llvm] release/19.x: [AMDGPU] Disable inline constants for pseudo scalar transcendentals (#104395) (PR #105472)

Mon Aug 26 00:12:15 PDT 2024

https://github.com/tru updated https://github.com/llvm/llvm-project/pull/105472

>From 43b455b2d2e5107e19d7d47e77ba513d1f9f5e2f Mon Sep 17 00:00:00 2001
From: Carl Ritson <carl.ritson at amd.com>
Date: Sat, 17 Aug 2024 16:52:38 +0900
Subject: [PATCH] [AMDGPU] Disable inline constants for pseudo scalar
 transcendentals (#104395)

Prevent operand folding from inlining constants into pseudo scalar
transcendental f16 instructions.
However still allow literal constants.

(cherry picked from commit fc6300a5f7ef430e4ec86d16be0b146de7fbd16b)
---
 llvm/lib/Target/AMDGPU/GCNSubtarget.h         |   6 +
 llvm/lib/Target/AMDGPU/SIInstrInfo.cpp        |   4 +
 llvm/lib/Target/AMDGPU/SIInstrInfo.h          |   8 ++
 .../AMDGPU/pseudo-scalar-transcendental.mir   | 120 ++++++++++++++++++
 4 files changed, 138 insertions(+)
 create mode 100644 llvm/test/CodeGen/AMDGPU/pseudo-scalar-transcendental.mir

diff --git a/llvm/lib/Target/AMDGPU/GCNSubtarget.h b/llvm/lib/Target/AMDGPU/GCNSubtarget.h
index def89c785b8552..902f51ae358d59 100644
--- a/llvm/lib/Target/AMDGPU/GCNSubtarget.h
+++ b/llvm/lib/Target/AMDGPU/GCNSubtarget.h
@@ -1289,6 +1289,12 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
   /// and STOREcnt rather than VMcnt, LGKMcnt and VScnt respectively.
   bool hasExtendedWaitCounts() const { return getGeneration() >= GFX12; }
 
+  /// \returns true if inline constants are not supported for F16 pseudo
+  /// scalar transcendentals.
+  bool hasNoF16PseudoScalarTransInlineConstants() const {
+    return getGeneration() == GFX12;
+  }
+
   /// \returns The maximum number of instructions that can be enclosed in an
   /// S_CLAUSE on the given subtarget, or 0 for targets that do not support that
   /// instruction.
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index 463737f645d459..27b8c1b17422af 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -5768,6 +5768,10 @@ bool SIInstrInfo::isOperandLegal(const MachineInstr &MI, unsigned OpIdx,
           return false;
       }
     }
+  } else if (ST.hasNoF16PseudoScalarTransInlineConstants() && !MO->isReg() &&
+             isF16PseudoScalarTrans(MI.getOpcode()) &&
+             isInlineConstant(*MO, OpInfo)) {
+    return false;
   }
 
   if (MO->isReg()) {
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.h b/llvm/lib/Target/AMDGPU/SIInstrInfo.h
index 1712dfe8d406cc..91855fb14f6f37 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.h
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.h
@@ -946,6 +946,14 @@ class SIInstrInfo final : public AMDGPUGenInstrInfo {
            Opcode == AMDGPU::DS_GWS_BARRIER;
   }
 
+  static bool isF16PseudoScalarTrans(unsigned Opcode) {
+    return Opcode == AMDGPU::V_S_EXP_F16_e64 ||
+           Opcode == AMDGPU::V_S_LOG_F16_e64 ||
+           Opcode == AMDGPU::V_S_RCP_F16_e64 ||
+           Opcode == AMDGPU::V_S_RSQ_F16_e64 ||
+           Opcode == AMDGPU::V_S_SQRT_F16_e64;
+  }
+
   static bool doesNotReadTiedSource(const MachineInstr &MI) {
     return MI.getDesc().TSFlags & SIInstrFlags::TiedSourceNotRead;
   }
diff --git a/llvm/test/CodeGen/AMDGPU/pseudo-scalar-transcendental.mir b/llvm/test/CodeGen/AMDGPU/pseudo-scalar-transcendental.mir
new file mode 100644
index 00000000000000..17bed38bd046d7
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/pseudo-scalar-transcendental.mir
@@ -0,0 +1,120 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
+# RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx1200 -run-pass=si-fold-operands -verify-machineinstrs -o - %s | FileCheck --check-prefix=GCN %s
+
+# Do not use inline constants for f16 pseudo scalar transcendentals.
+# But allow literal constants.
+
+---
+name: exp_f16_imm
+tracksRegLiveness: true
+body: |
+  bb.0:
+    ; GCN-LABEL: name: exp_f16_imm
+    ; GCN: [[S_MOV_B32_:%[0-9]+]]:sgpr_32 = S_MOV_B32 15360
+    ; GCN-NEXT: [[V_S_EXP_F16_e64_:%[0-9]+]]:sgpr_32 = V_S_EXP_F16_e64 1, [[S_MOV_B32_]], 0, 0, implicit $mode, implicit $exec
+    %0:sgpr_32 = S_MOV_B32 15360
+    %1:sgpr_32 = V_S_EXP_F16_e64 1, %0:sgpr_32, 0, 0, implicit $mode, implicit $exec
+...
+
+---
+name: exp_f16_literal
+tracksRegLiveness: true
+body: |
+  bb.0:
+    ; GCN-LABEL: name: exp_f16_literal
+    ; GCN: [[V_S_EXP_F16_e64_:%[0-9]+]]:sgpr_32 = V_S_EXP_F16_e64 1, 16960, 0, 0, implicit $mode, implicit $exec
+    %0:sgpr_32 = S_MOV_B32 16960
+    %1:sgpr_32 = V_S_EXP_F16_e64 1, %0:sgpr_32, 0, 0, implicit $mode, implicit $exec
+...
+
+---
+name: log_f16_imm
+tracksRegLiveness: true
+body: |
+  bb.0:
+    ; GCN-LABEL: name: log_f16_imm
+    ; GCN: [[S_MOV_B32_:%[0-9]+]]:sgpr_32 = S_MOV_B32 15360
+    ; GCN-NEXT: [[V_S_LOG_F16_e64_:%[0-9]+]]:sgpr_32 = V_S_LOG_F16_e64 1, [[S_MOV_B32_]], 0, 0, implicit $mode, implicit $exec
+    %0:sgpr_32 = S_MOV_B32 15360
+    %1:sgpr_32 = V_S_LOG_F16_e64 1, %0:sgpr_32, 0, 0, implicit $mode, implicit $exec
+...
+
+---
+name: log_f16_literal
+tracksRegLiveness: true
+body: |
+  bb.0:
+    ; GCN-LABEL: name: log_f16_literal
+    ; GCN: [[V_S_LOG_F16_e64_:%[0-9]+]]:sgpr_32 = V_S_LOG_F16_e64 1, 16960, 0, 0, implicit $mode, implicit $exec
+    %0:sgpr_32 = S_MOV_B32 16960
+    %1:sgpr_32 = V_S_LOG_F16_e64 1, %0:sgpr_32, 0, 0, implicit $mode, implicit $exec
+...
+
+---
+name: rcp_f16_imm
+tracksRegLiveness: true
+body: |
+  bb.0:
+    ; GCN-LABEL: name: rcp_f16_imm
+    ; GCN: [[S_MOV_B32_:%[0-9]+]]:sgpr_32 = S_MOV_B32 15360
+    ; GCN-NEXT: [[V_S_RCP_F16_e64_:%[0-9]+]]:sgpr_32 = V_S_RCP_F16_e64 1, [[S_MOV_B32_]], 0, 0, implicit $mode, implicit $exec
+    %0:sgpr_32 = S_MOV_B32 15360
+    %1:sgpr_32 = V_S_RCP_F16_e64 1, %0:sgpr_32, 0, 0, implicit $mode, implicit $exec
+...
+
+---
+name: rcp_f16_literal
+tracksRegLiveness: true
+body: |
+  bb.0:
+    ; GCN-LABEL: name: rcp_f16_literal
+    ; GCN: [[V_S_RCP_F16_e64_:%[0-9]+]]:sgpr_32 = V_S_RCP_F16_e64 1, 16960, 0, 0, implicit $mode, implicit $exec
+    %0:sgpr_32 = S_MOV_B32 16960
+    %1:sgpr_32 = V_S_RCP_F16_e64 1, %0:sgpr_32, 0, 0, implicit $mode, implicit $exec
+...
+
+---
+name: rsq_f16_imm
+tracksRegLiveness: true
+body: |
+  bb.0:
+    ; GCN-LABEL: name: rsq_f16_imm
+    ; GCN: [[S_MOV_B32_:%[0-9]+]]:sgpr_32 = S_MOV_B32 15360
+    ; GCN-NEXT: [[V_S_RSQ_F16_e64_:%[0-9]+]]:sgpr_32 = V_S_RSQ_F16_e64 1, [[S_MOV_B32_]], 0, 0, implicit $mode, implicit $exec
+    %0:sgpr_32 = S_MOV_B32 15360
+    %1:sgpr_32 = V_S_RSQ_F16_e64 1, %0:sgpr_32, 0, 0, implicit $mode, implicit $exec
+...
+
+---
+name: rsq_f16_literal
+tracksRegLiveness: true
+body: |
+  bb.0:
+    ; GCN-LABEL: name: rsq_f16_literal
+    ; GCN: [[V_S_RSQ_F16_e64_:%[0-9]+]]:sgpr_32 = V_S_RSQ_F16_e64 1, 16960, 0, 0, implicit $mode, implicit $exec
+    %0:sgpr_32 = S_MOV_B32 16960
+    %1:sgpr_32 = V_S_RSQ_F16_e64 1, %0:sgpr_32, 0, 0, implicit $mode, implicit $exec
+...
+
+---
+name: sqrt_f16_imm
+tracksRegLiveness: true
+body: |
+  bb.0:
+    ; GCN-LABEL: name: sqrt_f16_imm
+    ; GCN: [[S_MOV_B32_:%[0-9]+]]:sgpr_32 = S_MOV_B32 15360
+    ; GCN-NEXT: [[V_S_SQRT_F16_e64_:%[0-9]+]]:sgpr_32 = V_S_SQRT_F16_e64 1, [[S_MOV_B32_]], 0, 0, implicit $mode, implicit $exec
+    %0:sgpr_32 = S_MOV_B32 15360
+    %1:sgpr_32 = V_S_SQRT_F16_e64 1, %0:sgpr_32, 0, 0, implicit $mode, implicit $exec
+...
+
+---
+name: sqrt_f16_literal
+tracksRegLiveness: true
+body: |
+  bb.0:
+    ; GCN-LABEL: name: sqrt_f16_literal
+    ; GCN: [[V_S_SQRT_F16_e64_:%[0-9]+]]:sgpr_32 = V_S_SQRT_F16_e64 1, 16960, 0, 0, implicit $mode, implicit $exec
+    %0:sgpr_32 = S_MOV_B32 16960
+    %1:sgpr_32 = V_S_SQRT_F16_e64 1, %0:sgpr_32, 0, 0, implicit $mode, implicit $exec
+...