[llvm] [AMDGPU] Disable inline constants for pseudo scalar transcendentals (PR #104395)
Carl Ritson via llvm-commits
llvm-commits at lists.llvm.org
Fri Aug 16 03:11:25 PDT 2024
https://github.com/perlfu updated https://github.com/llvm/llvm-project/pull/104395
>From 03b786e6a5c0df11ff6398a97abe085b384a3c4b Mon Sep 17 00:00:00 2001
From: Carl Ritson <carl.ritson at amd.com>
Date: Thu, 8 Aug 2024 15:26:38 +0900
Subject: [PATCH 1/4] [AMDGPU] Disable inline constants for pseudo scalar
transcendentals
Prevent operand folding from inlining constants into pseudo scalar
transcendental f16 instructions.
However still allow literal constants.
---
llvm/lib/Target/AMDGPU/GCNSubtarget.h | 6 +
llvm/lib/Target/AMDGPU/SIInstrInfo.cpp | 12 ++
.../AMDGPU/pseudo-scalar-transcendental.mir | 120 ++++++++++++++++++
3 files changed, 138 insertions(+)
create mode 100644 llvm/test/CodeGen/AMDGPU/pseudo-scalar-transcendental.mir
diff --git a/llvm/lib/Target/AMDGPU/GCNSubtarget.h b/llvm/lib/Target/AMDGPU/GCNSubtarget.h
index def89c785b8552..902f51ae358d59 100644
--- a/llvm/lib/Target/AMDGPU/GCNSubtarget.h
+++ b/llvm/lib/Target/AMDGPU/GCNSubtarget.h
@@ -1289,6 +1289,12 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
/// and STOREcnt rather than VMcnt, LGKMcnt and VScnt respectively.
bool hasExtendedWaitCounts() const { return getGeneration() >= GFX12; }
+ /// \returns true if inline constants are not supported for F16 pseudo
+ /// scalar transcendentals.
+ bool hasNoF16PseudoScalarTransInlineConstants() const {
+ return getGeneration() == GFX12;
+ }
+
/// \returns The maximum number of instructions that can be enclosed in an
/// S_CLAUSE on the given subtarget, or 0 for targets that do not support that
/// instruction.
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index 9147242046ceda..9ee65e390317a8 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -5776,6 +5776,18 @@ bool SIInstrInfo::isOperandLegal(const MachineInstr &MI, unsigned OpIdx,
return false;
}
}
+ } else if (isVOP3(MI) && ST.hasNoF16PseudoScalarTransInlineConstants() &&
+ !MO->isReg() && isInlineConstant(*MO, OpInfo)) {
+ switch (MI.getOpcode()) {
+ case AMDGPU::V_S_EXP_F16_e64:
+ case AMDGPU::V_S_LOG_F16_e64:
+ case AMDGPU::V_S_RCP_F16_e64:
+ case AMDGPU::V_S_RSQ_F16_e64:
+ case AMDGPU::V_S_SQRT_F16_e64:
+ return false;
+ default:
+ break;
+ }
}
if (MO->isReg()) {
diff --git a/llvm/test/CodeGen/AMDGPU/pseudo-scalar-transcendental.mir b/llvm/test/CodeGen/AMDGPU/pseudo-scalar-transcendental.mir
new file mode 100644
index 00000000000000..17bed38bd046d7
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/pseudo-scalar-transcendental.mir
@@ -0,0 +1,120 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
+# RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx1200 -run-pass=si-fold-operands -verify-machineinstrs -o - %s | FileCheck --check-prefix=GCN %s
+
+# Do not use inline constants for f16 pseudo scalar transcendentals.
+# But allow literal constants.
+
+---
+name: exp_f16_imm
+tracksRegLiveness: true
+body: |
+ bb.0:
+ ; GCN-LABEL: name: exp_f16_imm
+ ; GCN: [[S_MOV_B32_:%[0-9]+]]:sgpr_32 = S_MOV_B32 15360
+ ; GCN-NEXT: [[V_S_EXP_F16_e64_:%[0-9]+]]:sgpr_32 = V_S_EXP_F16_e64 1, [[S_MOV_B32_]], 0, 0, implicit $mode, implicit $exec
+ %0:sgpr_32 = S_MOV_B32 15360
+ %1:sgpr_32 = V_S_EXP_F16_e64 1, %0:sgpr_32, 0, 0, implicit $mode, implicit $exec
+...
+
+---
+name: exp_f16_literal
+tracksRegLiveness: true
+body: |
+ bb.0:
+ ; GCN-LABEL: name: exp_f16_literal
+ ; GCN: [[V_S_EXP_F16_e64_:%[0-9]+]]:sgpr_32 = V_S_EXP_F16_e64 1, 16960, 0, 0, implicit $mode, implicit $exec
+ %0:sgpr_32 = S_MOV_B32 16960
+ %1:sgpr_32 = V_S_EXP_F16_e64 1, %0:sgpr_32, 0, 0, implicit $mode, implicit $exec
+...
+
+---
+name: log_f16_imm
+tracksRegLiveness: true
+body: |
+ bb.0:
+ ; GCN-LABEL: name: log_f16_imm
+ ; GCN: [[S_MOV_B32_:%[0-9]+]]:sgpr_32 = S_MOV_B32 15360
+ ; GCN-NEXT: [[V_S_LOG_F16_e64_:%[0-9]+]]:sgpr_32 = V_S_LOG_F16_e64 1, [[S_MOV_B32_]], 0, 0, implicit $mode, implicit $exec
+ %0:sgpr_32 = S_MOV_B32 15360
+ %1:sgpr_32 = V_S_LOG_F16_e64 1, %0:sgpr_32, 0, 0, implicit $mode, implicit $exec
+...
+
+---
+name: log_f16_literal
+tracksRegLiveness: true
+body: |
+ bb.0:
+ ; GCN-LABEL: name: log_f16_literal
+ ; GCN: [[V_S_LOG_F16_e64_:%[0-9]+]]:sgpr_32 = V_S_LOG_F16_e64 1, 16960, 0, 0, implicit $mode, implicit $exec
+ %0:sgpr_32 = S_MOV_B32 16960
+ %1:sgpr_32 = V_S_LOG_F16_e64 1, %0:sgpr_32, 0, 0, implicit $mode, implicit $exec
+...
+
+---
+name: rcp_f16_imm
+tracksRegLiveness: true
+body: |
+ bb.0:
+ ; GCN-LABEL: name: rcp_f16_imm
+ ; GCN: [[S_MOV_B32_:%[0-9]+]]:sgpr_32 = S_MOV_B32 15360
+ ; GCN-NEXT: [[V_S_RCP_F16_e64_:%[0-9]+]]:sgpr_32 = V_S_RCP_F16_e64 1, [[S_MOV_B32_]], 0, 0, implicit $mode, implicit $exec
+ %0:sgpr_32 = S_MOV_B32 15360
+ %1:sgpr_32 = V_S_RCP_F16_e64 1, %0:sgpr_32, 0, 0, implicit $mode, implicit $exec
+...
+
+---
+name: rcp_f16_literal
+tracksRegLiveness: true
+body: |
+ bb.0:
+ ; GCN-LABEL: name: rcp_f16_literal
+ ; GCN: [[V_S_RCP_F16_e64_:%[0-9]+]]:sgpr_32 = V_S_RCP_F16_e64 1, 16960, 0, 0, implicit $mode, implicit $exec
+ %0:sgpr_32 = S_MOV_B32 16960
+ %1:sgpr_32 = V_S_RCP_F16_e64 1, %0:sgpr_32, 0, 0, implicit $mode, implicit $exec
+...
+
+---
+name: rsq_f16_imm
+tracksRegLiveness: true
+body: |
+ bb.0:
+ ; GCN-LABEL: name: rsq_f16_imm
+ ; GCN: [[S_MOV_B32_:%[0-9]+]]:sgpr_32 = S_MOV_B32 15360
+ ; GCN-NEXT: [[V_S_RSQ_F16_e64_:%[0-9]+]]:sgpr_32 = V_S_RSQ_F16_e64 1, [[S_MOV_B32_]], 0, 0, implicit $mode, implicit $exec
+ %0:sgpr_32 = S_MOV_B32 15360
+ %1:sgpr_32 = V_S_RSQ_F16_e64 1, %0:sgpr_32, 0, 0, implicit $mode, implicit $exec
+...
+
+---
+name: rsq_f16_literal
+tracksRegLiveness: true
+body: |
+ bb.0:
+ ; GCN-LABEL: name: rsq_f16_literal
+ ; GCN: [[V_S_RSQ_F16_e64_:%[0-9]+]]:sgpr_32 = V_S_RSQ_F16_e64 1, 16960, 0, 0, implicit $mode, implicit $exec
+ %0:sgpr_32 = S_MOV_B32 16960
+ %1:sgpr_32 = V_S_RSQ_F16_e64 1, %0:sgpr_32, 0, 0, implicit $mode, implicit $exec
+...
+
+---
+name: sqrt_f16_imm
+tracksRegLiveness: true
+body: |
+ bb.0:
+ ; GCN-LABEL: name: sqrt_f16_imm
+ ; GCN: [[S_MOV_B32_:%[0-9]+]]:sgpr_32 = S_MOV_B32 15360
+ ; GCN-NEXT: [[V_S_SQRT_F16_e64_:%[0-9]+]]:sgpr_32 = V_S_SQRT_F16_e64 1, [[S_MOV_B32_]], 0, 0, implicit $mode, implicit $exec
+ %0:sgpr_32 = S_MOV_B32 15360
+ %1:sgpr_32 = V_S_SQRT_F16_e64 1, %0:sgpr_32, 0, 0, implicit $mode, implicit $exec
+...
+
+---
+name: sqrt_f16_literal
+tracksRegLiveness: true
+body: |
+ bb.0:
+ ; GCN-LABEL: name: sqrt_f16_literal
+ ; GCN: [[V_S_SQRT_F16_e64_:%[0-9]+]]:sgpr_32 = V_S_SQRT_F16_e64 1, 16960, 0, 0, implicit $mode, implicit $exec
+ %0:sgpr_32 = S_MOV_B32 16960
+ %1:sgpr_32 = V_S_SQRT_F16_e64 1, %0:sgpr_32, 0, 0, implicit $mode, implicit $exec
+...
>From 75d7065555c6df2faddd852bbc600035c9f75d95 Mon Sep 17 00:00:00 2001
From: Carl Ritson <carl.ritson at amd.com>
Date: Thu, 15 Aug 2024 14:03:30 +0900
Subject: [PATCH 2/4] - Address reviewer comments
---
llvm/lib/Target/AMDGPU/SIInstrInfo.cpp | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index 9ee65e390317a8..3a82e13efa0ded 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -5776,7 +5776,7 @@ bool SIInstrInfo::isOperandLegal(const MachineInstr &MI, unsigned OpIdx,
return false;
}
}
- } else if (isVOP3(MI) && ST.hasNoF16PseudoScalarTransInlineConstants() &&
+ } else if (ST.hasNoF16PseudoScalarTransInlineConstants() && isVOP3(MI) &&
!MO->isReg() && isInlineConstant(*MO, OpInfo)) {
switch (MI.getOpcode()) {
case AMDGPU::V_S_EXP_F16_e64:
>From 7d00498487dbbf7c0de65229a5aa25a94b939a53 Mon Sep 17 00:00:00 2001
From: Carl Ritson <carl.ritson at amd.com>
Date: Fri, 16 Aug 2024 11:41:24 +0900
Subject: [PATCH 3/4] - Address reviewer comments
---
llvm/lib/Target/AMDGPU/SIInstrInfo.cpp | 16 ++++------------
llvm/lib/Target/AMDGPU/SIInstrInfo.h | 8 ++++++++
2 files changed, 12 insertions(+), 12 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index 3a82e13efa0ded..0f7cfb34969202 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -5776,18 +5776,10 @@ bool SIInstrInfo::isOperandLegal(const MachineInstr &MI, unsigned OpIdx,
return false;
}
}
- } else if (ST.hasNoF16PseudoScalarTransInlineConstants() && isVOP3(MI) &&
- !MO->isReg() && isInlineConstant(*MO, OpInfo)) {
- switch (MI.getOpcode()) {
- case AMDGPU::V_S_EXP_F16_e64:
- case AMDGPU::V_S_LOG_F16_e64:
- case AMDGPU::V_S_RCP_F16_e64:
- case AMDGPU::V_S_RSQ_F16_e64:
- case AMDGPU::V_S_SQRT_F16_e64:
- return false;
- default:
- break;
- }
+ } else if (ST.hasNoF16PseudoScalarTransInlineConstants() &&
+ isF16PseudoScalarTrans(MI.getOpcode()) && !MO->isReg() &&
+ isInlineConstant(*MO, OpInfo)) {
+ return false;
}
if (MO->isReg()) {
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.h b/llvm/lib/Target/AMDGPU/SIInstrInfo.h
index 1712dfe8d406cc..c2bc4798658606 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.h
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.h
@@ -946,6 +946,14 @@ class SIInstrInfo final : public AMDGPUGenInstrInfo {
Opcode == AMDGPU::DS_GWS_BARRIER;
}
+ bool isF16PseudoScalarTrans(unsigned Opcode) const {
+ return Opcode == AMDGPU::V_S_EXP_F16_e64 ||
+ Opcode == AMDGPU::V_S_LOG_F16_e64 ||
+ Opcode == AMDGPU::V_S_RCP_F16_e64 ||
+ Opcode == AMDGPU::V_S_RSQ_F16_e64 ||
+ Opcode == AMDGPU::V_S_SQRT_F16_e64;
+ }
+
static bool doesNotReadTiedSource(const MachineInstr &MI) {
return MI.getDesc().TSFlags & SIInstrFlags::TiedSourceNotRead;
}
>From ead00b82a1ae946be677ed966e147e333e63a2bc Mon Sep 17 00:00:00 2001
From: Carl Ritson <carl.ritson at amd.com>
Date: Fri, 16 Aug 2024 19:00:09 +0900
Subject: [PATCH 4/4] - Address reviewer comments
---
llvm/lib/Target/AMDGPU/SIInstrInfo.cpp | 4 ++--
llvm/lib/Target/AMDGPU/SIInstrInfo.h | 2 +-
2 files changed, 3 insertions(+), 3 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index 0f7cfb34969202..6dce41d1605fa4 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -5776,8 +5776,8 @@ bool SIInstrInfo::isOperandLegal(const MachineInstr &MI, unsigned OpIdx,
return false;
}
}
- } else if (ST.hasNoF16PseudoScalarTransInlineConstants() &&
- isF16PseudoScalarTrans(MI.getOpcode()) && !MO->isReg() &&
+ } else if (ST.hasNoF16PseudoScalarTransInlineConstants() && !MO->isReg() &&
+ isF16PseudoScalarTrans(MI.getOpcode()) &&
isInlineConstant(*MO, OpInfo)) {
return false;
}
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.h b/llvm/lib/Target/AMDGPU/SIInstrInfo.h
index c2bc4798658606..91855fb14f6f37 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.h
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.h
@@ -946,7 +946,7 @@ class SIInstrInfo final : public AMDGPUGenInstrInfo {
Opcode == AMDGPU::DS_GWS_BARRIER;
}
- bool isF16PseudoScalarTrans(unsigned Opcode) const {
+ static bool isF16PseudoScalarTrans(unsigned Opcode) {
return Opcode == AMDGPU::V_S_EXP_F16_e64 ||
Opcode == AMDGPU::V_S_LOG_F16_e64 ||
Opcode == AMDGPU::V_S_RCP_F16_e64 ||
More information about the llvm-commits
mailing list