[PATCH] D66666: [AMDGPU] Remove unnecessary movs for v_fmac operands
Ryan Taylor via Phabricator via llvm-commits
llvm-commits at lists.llvm.org
Fri Aug 23 13:31:18 PDT 2019
rtaylor updated this revision to Diff 216938.
rtaylor added a comment.
Forgot to capitalize var name.
Repository:
rL LLVM
CHANGES SINCE LAST ACTION
https://reviews.llvm.org/D66666/new/
https://reviews.llvm.org/D66666
Files:
lib/Target/AMDGPU/SIInstrInfo.cpp
test/CodeGen/AMDGPU/fmac-fma-sgpr-copy.ll
Index: test/CodeGen/AMDGPU/fmac-fma-sgpr-copy.ll
===================================================================
--- /dev/null
+++ test/CodeGen/AMDGPU/fmac-fma-sgpr-copy.ll
@@ -0,0 +1,19 @@
+; RUN: llc < %s -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs | FileCheck %s
+
+; CHECK: v_fma_f32 v0, v1, v0, s0
+define amdgpu_cs float @test1(<4 x i32> inreg %a, float %b, float %y) {
+entry:
+ %buf.load = call <4 x i32> @llvm.amdgcn.s.buffer.load.v4i32(<4 x i32> %a, i32 0, i32 0)
+ %vec1 = bitcast <4 x i32> %buf.load to <4 x float>
+ %.i095 = extractelement <4 x float> %vec1, i32 0
+ %.i098 = fsub nnan arcp float %b, %.i095
+ %fma1 = call float @llvm.fma.f32(float %y, float %.i098, float %.i095) #3
+ ret float %fma1
+}
+
+declare <4 x i32> @llvm.amdgcn.s.buffer.load.v4i32(<4 x i32>, i32, i32 immarg) #2
+declare float @llvm.fma.f32(float, float, float) #1
+
+attributes #1 = { nounwind readnone speculatable willreturn }
+attributes #2 = { nounwind readnone }
+attributes #3 = { nounwind }
Index: lib/Target/AMDGPU/SIInstrInfo.cpp
===================================================================
--- lib/Target/AMDGPU/SIInstrInfo.cpp
+++ lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -2539,6 +2539,7 @@
LiveVariables *LV) const {
unsigned Opc = MI.getOpcode();
bool IsF16 = false;
+ bool ConstantBus = false;
bool IsFMA = Opc == AMDGPU::V_FMAC_F32_e32 || Opc == AMDGPU::V_FMAC_F32_e64 ||
Opc == AMDGPU::V_FMAC_F16_e32 || Opc == AMDGPU::V_FMAC_F16_e64;
@@ -2582,11 +2583,11 @@
const MachineOperand *Clamp = getNamedOperand(MI, AMDGPU::OpName::clamp);
const MachineOperand *Omod = getNamedOperand(MI, AMDGPU::OpName::omod);
- if (!Src0Mods && !Src1Mods && !Clamp && !Omod &&
+ if ((ConstantBus = !Src0Mods && !Src1Mods && !Clamp && !Omod &&
// If we have an SGPR input, we will violate the constant bus restriction.
(ST.getConstantBusLimit(Opc) > 1 ||
!Src0->isReg() ||
- !RI.isSGPRReg(MBB->getParent()->getRegInfo(), Src0->getReg()))) {
+ !RI.isSGPRReg(MBB->getParent()->getRegInfo(), Src0->getReg())))) {
if (auto Imm = getFoldableImm(Src2)) {
unsigned NewOpc =
IsFMA ? (IsF16 ? AMDGPU::V_FMAAK_F16 : AMDGPU::V_FMAAK_F32)
@@ -2623,6 +2624,19 @@
unsigned NewOpc = IsFMA ? (IsF16 ? AMDGPU::V_FMA_F16 : AMDGPU::V_FMA_F32)
: (IsF16 ? AMDGPU::V_MAD_F16 : AMDGPU::V_MAD_F32);
+
+ // If the source of the tied reg is a sgpr->vgpr copy then replace
+ // it back with the sgpr, to potentially kill the copy, if
+ // this doesn't violate the constant bus restriction
+ if (ConstantBus) {
+ const MachineFunction *MF = Src2->getParent()->getMF();
+ const MachineRegisterInfo *MRI = &MF->getRegInfo();
+ if (auto *Def = MRI->getUniqueVRegDef(Src2->getReg())) {
+ if (Def->getOpcode() == AMDGPU::COPY)
+ Src2 = &Def->getOperand(1);
+ }
+ }
+
if (pseudoToMCOpcode(NewOpc) == -1)
return nullptr;
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D66666.216938.patch
Type: text/x-patch
Size: 3006 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20190823/a69f5a3d/attachment-0001.bin>
More information about the llvm-commits
mailing list