[llvm] r225410 - R600/SI: Commute instructions to enable more folding opportunities
Tom Stellard
thomas.stellard at amd.com
Wed Jan 7 14:44:19 PST 2015
Author: tstellar
Date: Wed Jan 7 16:44:19 2015
New Revision: 225410
URL: http://llvm.org/viewvc/llvm-project?rev=225410&view=rev
Log:
R600/SI: Commute instructions to enable more folding opportunities
Modified:
llvm/trunk/lib/Target/R600/SIFoldOperands.cpp
llvm/trunk/lib/Target/R600/SIInstrInfo.cpp
llvm/trunk/test/CodeGen/R600/mulhu.ll
llvm/trunk/test/CodeGen/R600/sdiv.ll
llvm/trunk/test/CodeGen/R600/use-sgpr-multiple-times.ll
Modified: llvm/trunk/lib/Target/R600/SIFoldOperands.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/R600/SIFoldOperands.cpp?rev=225410&r1=225409&r2=225410&view=diff
==============================================================================
--- llvm/trunk/lib/Target/R600/SIFoldOperands.cpp (original)
+++ llvm/trunk/lib/Target/R600/SIFoldOperands.cpp Wed Jan 7 16:44:19 2015
@@ -56,10 +56,16 @@ struct FoldCandidate {
uint64_t ImmToFold;
FoldCandidate(MachineInstr *MI, unsigned OpNo, MachineOperand *FoldOp) :
- UseMI(MI), UseOpNo(OpNo), OpToFold(FoldOp), ImmToFold(0) { }
+ UseMI(MI), UseOpNo(OpNo) {
- FoldCandidate(MachineInstr *MI, unsigned OpNo, uint64_t Imm) :
- UseMI(MI), UseOpNo(OpNo), OpToFold(nullptr), ImmToFold(Imm) { }
+ if (FoldOp->isImm()) {
+ OpToFold = nullptr;
+ ImmToFold = FoldOp->getImm();
+ } else {
+ assert(FoldOp->isReg());
+ OpToFold = FoldOp;
+ }
+ }
bool isImm() const {
return !OpToFold;
@@ -119,6 +125,35 @@ static bool updateOperand(FoldCandidate
return false;
}
+static bool tryAddToFoldList(std::vector<FoldCandidate> &FoldList,
+ MachineInstr *MI, unsigned OpNo,
+ MachineOperand *OpToFold,
+ const SIInstrInfo *TII) {
+ if (!TII->isOperandLegal(MI, OpNo, OpToFold)) {
+ // Operand is not legal, so try to commute the instruction to
+ // see if this makes it possible to fold.
+ unsigned CommuteIdx0;
+ unsigned CommuteIdx1;
+ bool CanCommute = TII->findCommutedOpIndices(MI, CommuteIdx0, CommuteIdx1);
+
+ if (CanCommute) {
+ if (CommuteIdx0 == OpNo)
+ OpNo = CommuteIdx1;
+ else if (CommuteIdx1 == OpNo)
+ OpNo = CommuteIdx0;
+ }
+
+ if (!CanCommute || !TII->commuteInstruction(MI))
+ return false;
+
+ if (!TII->isOperandLegal(MI, OpNo, OpToFold))
+ return false;
+ }
+
+ FoldList.push_back(FoldCandidate(MI, OpNo, OpToFold));
+ return true;
+}
+
bool SIFoldOperands::runOnMachineFunction(MachineFunction &MF) {
MachineRegisterInfo &MRI = MF.getRegInfo();
const SIInstrInfo *TII =
@@ -140,6 +175,11 @@ bool SIFoldOperands::runOnMachineFunctio
MachineOperand &OpToFold = MI.getOperand(1);
bool FoldingImm = OpToFold.isImm() || OpToFold.isFPImm();
+ // FIXME: We could also be folding things like FrameIndexes and
+ // TargetIndexes.
+ if (!FoldingImm && !OpToFold.isReg())
+ continue;
+
// Folding immediates with more than one use will increase program side.
// FIXME: This will also reduce register usage, which may be better
// in some cases. A better heuristic is needed.
@@ -210,24 +250,13 @@ bool SIFoldOperands::runOnMachineFunctio
UseDesc.OpInfo[Use.getOperandNo()].RegClass == -1)
continue;
-
if (FoldingImm) {
- const MachineOperand ImmOp = MachineOperand::CreateImm(Imm.getSExtValue());
- if (TII->isOperandLegal(UseMI, Use.getOperandNo(), &ImmOp)) {
- FoldList.push_back(FoldCandidate(UseMI, Use.getOperandNo(),
- Imm.getSExtValue()));
- }
- continue;
- }
-
- // Normal substitution with registers
- if (TII->isOperandLegal(UseMI, Use.getOperandNo(), &OpToFold)) {
- FoldList.push_back(FoldCandidate(UseMI, Use.getOperandNo(), &OpToFold));
+ MachineOperand ImmOp = MachineOperand::CreateImm(Imm.getSExtValue());
+ tryAddToFoldList(FoldList, UseMI, Use.getOperandNo(), &ImmOp, TII);
continue;
}
- // FIXME: We could commute the instruction to create more opportunites
- // for folding. This will only be useful if we have 32-bit instructions.
+ tryAddToFoldList(FoldList, UseMI, Use.getOperandNo(), &OpToFold, TII);
// FIXME: We could try to change the instruction from 64-bit to 32-bit
// to enable more folding opportunites. The shrink operands pass
Modified: llvm/trunk/lib/Target/R600/SIInstrInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/R600/SIInstrInfo.cpp?rev=225410&r1=225409&r2=225410&view=diff
==============================================================================
--- llvm/trunk/lib/Target/R600/SIInstrInfo.cpp (original)
+++ llvm/trunk/lib/Target/R600/SIInstrInfo.cpp Wed Jan 7 16:44:19 2015
@@ -709,6 +709,7 @@ bool SIInstrInfo::expandPostRAPseudo(Mac
MachineInstr *SIInstrInfo::commuteInstruction(MachineInstr *MI,
bool NewMI) const {
+
if (MI->getNumOperands() < 3)
return nullptr;
@@ -730,8 +731,9 @@ MachineInstr *SIInstrInfo::commuteInstru
// Make sure it's legal to commute operands for VOP2.
if (isVOP2(MI->getOpcode()) &&
(!isOperandLegal(MI, Src0Idx, &Src1) ||
- !isOperandLegal(MI, Src1Idx, &Src0)))
+ !isOperandLegal(MI, Src1Idx, &Src0))) {
return nullptr;
+ }
if (!Src1.isReg()) {
// Allow commuting instructions with Imm or FPImm operands.
@@ -1471,6 +1473,7 @@ bool SIInstrInfo::isOperandLegal(const M
//
// s_sendmsg 0, s0 ; Operand defined as m0reg
// ; RI.getCommonSubClass(s0,m0reg) = m0reg ; NOT LEGAL
+
return RI.getCommonSubClass(RC, RI.getRegClass(OpInfo.RegClass)) == RC;
}
Modified: llvm/trunk/test/CodeGen/R600/mulhu.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/R600/mulhu.ll?rev=225410&r1=225409&r2=225410&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/R600/mulhu.ll (original)
+++ llvm/trunk/test/CodeGen/R600/mulhu.ll Wed Jan 7 16:44:19 2015
@@ -1,7 +1,7 @@
;RUN: llc < %s -march=amdgcn -mcpu=verde -verify-machineinstrs | FileCheck %s
;CHECK: v_mov_b32_e32 v{{[0-9]+}}, 0xaaaaaaab
-;CHECK: v_mul_hi_u32 v0, {{[sv][0-9]+}}, {{v[0-9]+}}
+;CHECK: v_mul_hi_u32 v0, {{v[0-9]+}}, {{s[0-9]+}}
;CHECK-NEXT: v_lshrrev_b32_e32 v0, 1, v0
define void @test(i32 %p) {
Modified: llvm/trunk/test/CodeGen/R600/sdiv.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/R600/sdiv.ll?rev=225410&r1=225409&r2=225410&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/R600/sdiv.ll (original)
+++ llvm/trunk/test/CodeGen/R600/sdiv.ll Wed Jan 7 16:44:19 2015
@@ -35,7 +35,7 @@ define void @sdiv_i32_4(i32 addrspace(1)
; FUNC-LABEL: {{^}}slow_sdiv_i32_3435:
; SI: buffer_load_dword [[VAL:v[0-9]+]],
; SI: v_mov_b32_e32 [[MAGIC:v[0-9]+]], 0x98a1930b
-; SI: v_mul_hi_i32 [[TMP:v[0-9]+]], [[VAL]], [[MAGIC]]
+; SI: v_mul_hi_i32 [[TMP:v[0-9]+]], [[MAGIC]], [[VAL]]
; SI: v_add_i32
; SI: v_lshrrev_b32
; SI: v_ashrrev_i32
Modified: llvm/trunk/test/CodeGen/R600/use-sgpr-multiple-times.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/R600/use-sgpr-multiple-times.ll?rev=225410&r1=225409&r2=225410&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/R600/use-sgpr-multiple-times.ll (original)
+++ llvm/trunk/test/CodeGen/R600/use-sgpr-multiple-times.ll Wed Jan 7 16:44:19 2015
@@ -41,7 +41,7 @@ define void @test_sgpr_use_twice_ternary
; SI: s_load_dword [[SGPR0:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xb
; SI: s_load_dword [[SGPR1:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xc
; SI: v_mov_b32_e32 [[VGPR1:v[0-9]+]], [[SGPR1]]
-; SI: v_fma_f32 [[RESULT:v[0-9]+]], [[SGPR0]], [[VGPR1]], [[SGPR0]]
+; SI: v_fma_f32 [[RESULT:v[0-9]+]], [[VGPR1]], [[SGPR0]], [[SGPR0]]
; SI: buffer_store_dword [[RESULT]]
define void @test_sgpr_use_twice_ternary_op_a_b_a(float addrspace(1)* %out, float %a, float %b) #0 {
%fma = call float @llvm.fma.f32(float %a, float %b, float %a) #1
@@ -53,7 +53,7 @@ define void @test_sgpr_use_twice_ternary
; SI: s_load_dword [[SGPR0:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xb
; SI: s_load_dword [[SGPR1:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xc
; SI: v_mov_b32_e32 [[VGPR1:v[0-9]+]], [[SGPR1]]
-; SI: v_fma_f32 [[RESULT:v[0-9]+]], [[VGPR1]], [[SGPR0]], [[SGPR0]]
+; SI: v_fma_f32 [[RESULT:v[0-9]+]], [[SGPR0]], [[VGPR1]], [[SGPR0]]
; SI: buffer_store_dword [[RESULT]]
define void @test_sgpr_use_twice_ternary_op_b_a_a(float addrspace(1)* %out, float %a, float %b) #0 {
%fma = call float @llvm.fma.f32(float %b, float %a, float %a) #1
More information about the llvm-commits
mailing list