[llvm] r183278 - R600: Const/Neg/Abs can be folded to dot4
Vincent Lejeune
vljn at ovi.com
Tue Jun 4 16:17:15 PDT 2013
Author: vljn
Date: Tue Jun 4 18:17:15 2013
New Revision: 183278
URL: http://llvm.org/viewvc/llvm-project?rev=183278&view=rev
Log:
R600: Const/Neg/Abs can be folded to dot4
Added:
llvm/trunk/test/CodeGen/R600/dot4-folding.ll
Modified:
llvm/trunk/lib/Target/R600/AMDILISelDAGToDAG.cpp
llvm/trunk/lib/Target/R600/R600EmitClauseMarkers.cpp
llvm/trunk/lib/Target/R600/R600ExpandSpecialInstrs.cpp
llvm/trunk/lib/Target/R600/R600InstrInfo.cpp
llvm/trunk/lib/Target/R600/R600InstrInfo.h
Modified: llvm/trunk/lib/Target/R600/AMDILISelDAGToDAG.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/R600/AMDILISelDAGToDAG.cpp?rev=183278&r1=183277&r2=183278&view=diff
==============================================================================
--- llvm/trunk/lib/Target/R600/AMDILISelDAGToDAG.cpp (original)
+++ llvm/trunk/lib/Target/R600/AMDILISelDAGToDAG.cpp Tue Jun 4 18:17:15 2013
@@ -49,7 +49,10 @@ public:
private:
inline SDValue getSmallIPtrImm(unsigned Imm);
+ bool FoldOperand(SDValue &Src, SDValue &Sel, SDValue &Neg, SDValue &Abs,
+ const R600InstrInfo *TII, std::vector<unsigned> Cst);
bool FoldOperands(unsigned, const R600InstrInfo *, std::vector<SDValue> &);
+ bool FoldDotOperands(unsigned, const R600InstrInfo *, std::vector<SDValue> &);
// Complex pattern selectors
bool SelectADDRParam(SDValue Addr, SDValue& R1, SDValue& R2);
@@ -318,6 +321,20 @@ SDNode *AMDGPUDAGToDAGISel::Select(SDNod
if (ST.device()->getGeneration() <= AMDGPUDeviceInfo::HD6XXX) {
const R600InstrInfo *TII =
static_cast<const R600InstrInfo*>(TM.getInstrInfo());
+ if (Result && Result->isMachineOpcode() && Result->getMachineOpcode() == AMDGPU::DOT_4) {
+ bool IsModified = false;
+ do {
+ std::vector<SDValue> Ops;
+ for(SDNode::op_iterator I = Result->op_begin(), E = Result->op_end();
+ I != E; ++I)
+ Ops.push_back(*I);
+ IsModified = FoldDotOperands(Result->getMachineOpcode(), TII, Ops);
+ if (IsModified) {
+ Result = CurDAG->UpdateNodeOperands(Result, Ops.data(), Ops.size());
+ }
+ } while (IsModified);
+
+ }
if (Result && Result->isMachineOpcode() &&
!(TII->get(Result->getMachineOpcode()).TSFlags & R600_InstFlag::VECTOR)
&& TII->isALUInstr(Result->getMachineOpcode())) {
@@ -360,6 +377,43 @@ SDNode *AMDGPUDAGToDAGISel::Select(SDNod
return Result;
}
+bool AMDGPUDAGToDAGISel::FoldOperand(SDValue &Src, SDValue &Sel, SDValue &Neg,
+ SDValue &Abs, const R600InstrInfo *TII,
+ std::vector<unsigned> Consts) {
+ switch (Src.getOpcode()) {
+ case AMDGPUISD::CONST_ADDRESS: {
+ SDValue CstOffset;
+ if (Src.getValueType().isVector() ||
+ !SelectGlobalValueConstantOffset(Src.getOperand(0), CstOffset))
+ return false;
+
+ ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(CstOffset);
+ Consts.push_back(Cst->getZExtValue());
+ if (!TII->fitsConstReadLimitations(Consts))
+ return false;
+
+ Src = CurDAG->getRegister(AMDGPU::ALU_CONST, MVT::f32);
+ Sel = CstOffset;
+ return true;
+ }
+ case ISD::FNEG:
+ Src = Src.getOperand(0);
+ Neg = CurDAG->getTargetConstant(1, MVT::i32);
+ return true;
+ case ISD::FABS:
+ if (!Abs.getNode())
+ return false;
+ Src = Src.getOperand(0);
+ Abs = CurDAG->getTargetConstant(1, MVT::i32);
+ return true;
+ case ISD::BITCAST:
+ Src = Src.getOperand(0);
+ return true;
+ default:
+ return false;
+ }
+}
+
bool AMDGPUDAGToDAGISel::FoldOperands(unsigned Opcode,
const R600InstrInfo *TII, std::vector<SDValue> &Ops) {
int OperandIdx[] = {
@@ -383,59 +437,101 @@ bool AMDGPUDAGToDAGISel::FoldOperands(un
-1
};
+ // Gather constants values
+ std::vector<unsigned> Consts;
+ for (unsigned j = 0; j < 3; j++) {
+ int SrcIdx = OperandIdx[j];
+ if (SrcIdx < 0)
+ break;
+ if (RegisterSDNode *Reg = dyn_cast<RegisterSDNode>(Ops[SrcIdx - 1])) {
+ if (Reg->getReg() == AMDGPU::ALU_CONST) {
+ ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Ops[SelIdx[j] - 1]);
+ Consts.push_back(Cst->getZExtValue());
+ }
+ }
+ }
+
for (unsigned i = 0; i < 3; i++) {
if (OperandIdx[i] < 0)
return false;
- SDValue Operand = Ops[OperandIdx[i] - 1];
- switch (Operand.getOpcode()) {
- case AMDGPUISD::CONST_ADDRESS: {
- SDValue CstOffset;
- if (Operand.getValueType().isVector() ||
- !SelectGlobalValueConstantOffset(Operand.getOperand(0), CstOffset))
- break;
-
- // Gather others constants values
- std::vector<unsigned> Consts;
- for (unsigned j = 0; j < 3; j++) {
- int SrcIdx = OperandIdx[j];
- if (SrcIdx < 0)
- break;
- if (RegisterSDNode *Reg = dyn_cast<RegisterSDNode>(Ops[SrcIdx - 1])) {
- if (Reg->getReg() == AMDGPU::ALU_CONST) {
- ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Ops[SelIdx[j] - 1]);
- Consts.push_back(Cst->getZExtValue());
- }
- }
- }
+ SDValue &Src = Ops[OperandIdx[i] - 1];
+ SDValue &Sel = Ops[SelIdx[i] - 1];
+ SDValue &Neg = Ops[NegIdx[i] - 1];
+ SDValue FakeAbs;
+ SDValue &Abs = (AbsIdx[i] > -1) ? Ops[AbsIdx[i] - 1] : FakeAbs;
+ if (FoldOperand(Src, Sel, Neg, Abs, TII, Consts))
+ return true;
+ }
+ return false;
+}
- ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(CstOffset);
- Consts.push_back(Cst->getZExtValue());
- if (!TII->fitsConstReadLimitations(Consts))
- break;
+bool AMDGPUDAGToDAGISel::FoldDotOperands(unsigned Opcode,
+ const R600InstrInfo *TII, std::vector<SDValue> &Ops) {
+ int OperandIdx[] = {
+ TII->getOperandIdx(Opcode, R600Operands::SRC0_X),
+ TII->getOperandIdx(Opcode, R600Operands::SRC0_Y),
+ TII->getOperandIdx(Opcode, R600Operands::SRC0_Z),
+ TII->getOperandIdx(Opcode, R600Operands::SRC0_W),
+ TII->getOperandIdx(Opcode, R600Operands::SRC1_X),
+ TII->getOperandIdx(Opcode, R600Operands::SRC1_Y),
+ TII->getOperandIdx(Opcode, R600Operands::SRC1_Z),
+ TII->getOperandIdx(Opcode, R600Operands::SRC1_W)
+ };
+ int SelIdx[] = {
+ TII->getOperandIdx(Opcode, R600Operands::SRC0_SEL_X),
+ TII->getOperandIdx(Opcode, R600Operands::SRC0_SEL_Y),
+ TII->getOperandIdx(Opcode, R600Operands::SRC0_SEL_Z),
+ TII->getOperandIdx(Opcode, R600Operands::SRC0_SEL_W),
+ TII->getOperandIdx(Opcode, R600Operands::SRC1_SEL_X),
+ TII->getOperandIdx(Opcode, R600Operands::SRC1_SEL_Y),
+ TII->getOperandIdx(Opcode, R600Operands::SRC1_SEL_Z),
+ TII->getOperandIdx(Opcode, R600Operands::SRC1_SEL_W)
+ };
+ int NegIdx[] = {
+ TII->getOperandIdx(Opcode, R600Operands::SRC0_NEG_X),
+ TII->getOperandIdx(Opcode, R600Operands::SRC0_NEG_Y),
+ TII->getOperandIdx(Opcode, R600Operands::SRC0_NEG_Z),
+ TII->getOperandIdx(Opcode, R600Operands::SRC0_NEG_W),
+ TII->getOperandIdx(Opcode, R600Operands::SRC1_NEG_X),
+ TII->getOperandIdx(Opcode, R600Operands::SRC1_NEG_Y),
+ TII->getOperandIdx(Opcode, R600Operands::SRC1_NEG_Z),
+ TII->getOperandIdx(Opcode, R600Operands::SRC1_NEG_W)
+ };
+ int AbsIdx[] = {
+ TII->getOperandIdx(Opcode, R600Operands::SRC0_ABS_X),
+ TII->getOperandIdx(Opcode, R600Operands::SRC0_ABS_Y),
+ TII->getOperandIdx(Opcode, R600Operands::SRC0_ABS_Z),
+ TII->getOperandIdx(Opcode, R600Operands::SRC0_ABS_W),
+ TII->getOperandIdx(Opcode, R600Operands::SRC1_ABS_X),
+ TII->getOperandIdx(Opcode, R600Operands::SRC1_ABS_Y),
+ TII->getOperandIdx(Opcode, R600Operands::SRC1_ABS_Z),
+ TII->getOperandIdx(Opcode, R600Operands::SRC1_ABS_W)
+ };
- Ops[OperandIdx[i] - 1] = CurDAG->getRegister(AMDGPU::ALU_CONST, MVT::f32);
- Ops[SelIdx[i] - 1] = CstOffset;
- return true;
- }
- case ISD::FNEG:
- if (NegIdx[i] < 0)
- break;
- Ops[OperandIdx[i] - 1] = Operand.getOperand(0);
- Ops[NegIdx[i] - 1] = CurDAG->getTargetConstant(1, MVT::i32);
- return true;
- case ISD::FABS:
- if (AbsIdx[i] < 0)
- break;
- Ops[OperandIdx[i] - 1] = Operand.getOperand(0);
- Ops[AbsIdx[i] - 1] = CurDAG->getTargetConstant(1, MVT::i32);
- return true;
- case ISD::BITCAST:
- Ops[OperandIdx[i] - 1] = Operand.getOperand(0);
- return true;
- default:
+ // Gather constants values
+ std::vector<unsigned> Consts;
+ for (unsigned j = 0; j < 8; j++) {
+ int SrcIdx = OperandIdx[j];
+ if (SrcIdx < 0)
break;
+ if (RegisterSDNode *Reg = dyn_cast<RegisterSDNode>(Ops[SrcIdx - 1])) {
+ if (Reg->getReg() == AMDGPU::ALU_CONST) {
+ ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Ops[SelIdx[j] - 1]);
+ Consts.push_back(Cst->getZExtValue());
+ }
}
}
+
+ for (unsigned i = 0; i < 8; i++) {
+ if (OperandIdx[i] < 0)
+ return false;
+ SDValue &Src = Ops[OperandIdx[i] - 1];
+ SDValue &Sel = Ops[SelIdx[i] - 1];
+ SDValue &Neg = Ops[NegIdx[i] - 1];
+ SDValue &Abs = Ops[AbsIdx[i] - 1];
+ if (FoldOperand(Src, Sel, Neg, Abs, TII, Consts))
+ return true;
+ }
return false;
}
Modified: llvm/trunk/lib/Target/R600/R600EmitClauseMarkers.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/R600/R600EmitClauseMarkers.cpp?rev=183278&r1=183277&r2=183278&view=diff
==============================================================================
--- llvm/trunk/lib/Target/R600/R600EmitClauseMarkers.cpp (original)
+++ llvm/trunk/lib/Target/R600/R600EmitClauseMarkers.cpp Tue Jun 4 18:17:15 2013
@@ -108,7 +108,8 @@ private:
std::vector<std::pair<unsigned, unsigned> > UsedKCache;
const SmallVector<std::pair<MachineOperand *, int64_t>, 3> &Consts =
TII->getSrcs(MI);
- assert(TII->isALUInstr(MI->getOpcode()) && "Can't assign Const");
+ assert((TII->isALUInstr(MI->getOpcode()) ||
+ MI->getOpcode() == AMDGPU::DOT_4) && "Can't assign Const");
for (unsigned i = 0, n = Consts.size(); i < n; ++i) {
if (Consts[i].first->getReg() != AMDGPU::ALU_CONST)
continue;
@@ -183,6 +184,9 @@ private:
if (TII->isALUInstr(I->getOpcode()) &&
!SubstituteKCacheBank(I, KCacheBanks))
break;
+ if (I->getOpcode() == AMDGPU::DOT_4 &&
+ !SubstituteKCacheBank(I, KCacheBanks))
+ break;
AluInstCount += OccupiedDwords(I);
}
unsigned Opcode = PushBeforeModifier ?
Modified: llvm/trunk/lib/Target/R600/R600ExpandSpecialInstrs.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/R600/R600ExpandSpecialInstrs.cpp?rev=183278&r1=183277&r2=183278&view=diff
==============================================================================
--- llvm/trunk/lib/Target/R600/R600ExpandSpecialInstrs.cpp (original)
+++ llvm/trunk/lib/Target/R600/R600ExpandSpecialInstrs.cpp Tue Jun 4 18:17:15 2013
@@ -214,7 +214,9 @@ bool R600ExpandSpecialInstrsPass::runOnM
.getReg();
(void) Src0;
(void) Src1;
- assert(TRI.getHWRegChan(Src0) == TRI.getHWRegChan(Src1));
+ if ((TRI.getEncodingValue(Src0) & 0xff) < 127 &&
+ (TRI.getEncodingValue(Src1) & 0xff) < 127)
+ assert(TRI.getHWRegChan(Src0) == TRI.getHWRegChan(Src1));
}
MI.eraseFromParent();
continue;
Modified: llvm/trunk/lib/Target/R600/R600InstrInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/R600/R600InstrInfo.cpp?rev=183278&r1=183277&r2=183278&view=diff
==============================================================================
--- llvm/trunk/lib/Target/R600/R600InstrInfo.cpp (original)
+++ llvm/trunk/lib/Target/R600/R600InstrInfo.cpp Tue Jun 4 18:17:15 2013
@@ -169,6 +169,31 @@ SmallVector<std::pair<MachineOperand *,
R600InstrInfo::getSrcs(MachineInstr *MI) const {
SmallVector<std::pair<MachineOperand *, int64_t>, 3> Result;
+ if (MI->getOpcode() == AMDGPU::DOT_4) {
+ static const R600Operands::VecOps OpTable[8][2] = {
+ {R600Operands::SRC0_X, R600Operands::SRC0_SEL_X},
+ {R600Operands::SRC0_Y, R600Operands::SRC0_SEL_Y},
+ {R600Operands::SRC0_Z, R600Operands::SRC0_SEL_Z},
+ {R600Operands::SRC0_W, R600Operands::SRC0_SEL_W},
+ {R600Operands::SRC1_X, R600Operands::SRC1_SEL_X},
+ {R600Operands::SRC1_Y, R600Operands::SRC1_SEL_Y},
+ {R600Operands::SRC1_Z, R600Operands::SRC1_SEL_Z},
+ {R600Operands::SRC1_W, R600Operands::SRC1_SEL_W},
+ };
+
+ for (unsigned j = 0; j < 8; j++) {
+ MachineOperand &MO = MI->getOperand(OpTable[j][0] + 1);
+ unsigned Reg = MO.getReg();
+ if (Reg == AMDGPU::ALU_CONST) {
+ unsigned Sel = MI->getOperand(OpTable[j][1] + 1).getImm();
+ Result.push_back(std::pair<MachineOperand *, int64_t>(&MO, Sel));
+ continue;
+ }
+
+ }
+ return Result;
+ }
+
static const R600Operands::Ops OpTable[3][2] = {
{R600Operands::SRC0, R600Operands::SRC0_SEL},
{R600Operands::SRC1, R600Operands::SRC1_SEL},
@@ -967,6 +992,11 @@ int R600InstrInfo::getOperandIdx(const M
return getOperandIdx(MI.getOpcode(), Op);
}
+int R600InstrInfo::getOperandIdx(const MachineInstr &MI,
+ R600Operands::VecOps Op) const {
+ return getOperandIdx(MI.getOpcode(), Op);
+}
+
int R600InstrInfo::getOperandIdx(unsigned Opcode,
R600Operands::Ops Op) const {
unsigned TargetFlags = get(Opcode).TSFlags;
@@ -997,6 +1027,11 @@ int R600InstrInfo::getOperandIdx(unsigne
return R600Operands::ALUOpTable[OpTableIdx][Op];
}
+int R600InstrInfo::getOperandIdx(unsigned Opcode,
+ R600Operands::VecOps Op) const {
+ return Op + 1;
+}
+
void R600InstrInfo::setImmOperand(MachineInstr *MI, R600Operands::Ops Op,
int64_t Imm) const {
int Idx = getOperandIdx(*MI, Op);
Modified: llvm/trunk/lib/Target/R600/R600InstrInfo.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/R600/R600InstrInfo.h?rev=183278&r1=183277&r2=183278&view=diff
==============================================================================
--- llvm/trunk/lib/Target/R600/R600InstrInfo.h (original)
+++ llvm/trunk/lib/Target/R600/R600InstrInfo.h Tue Jun 4 18:17:15 2013
@@ -212,11 +212,13 @@ namespace llvm {
///
/// \returns -1 if the Instruction does not contain the specified \p Op.
int getOperandIdx(const MachineInstr &MI, R600Operands::Ops Op) const;
+ int getOperandIdx(const MachineInstr &MI, R600Operands::VecOps Op) const;
/// \brief Get the index of \p Op for the given Opcode.
///
/// \returns -1 if the Instruction does not contain the specified \p Op.
int getOperandIdx(unsigned Opcode, R600Operands::Ops Op) const;
+ int getOperandIdx(unsigned Opcode, R600Operands::VecOps Op) const;
/// \brief Helper function for setting instruction flag values.
void setImmOperand(MachineInstr *MI, R600Operands::Ops Op, int64_t Imm) const;
Added: llvm/trunk/test/CodeGen/R600/dot4-folding.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/R600/dot4-folding.ll?rev=183278&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/R600/dot4-folding.ll (added)
+++ llvm/trunk/test/CodeGen/R600/dot4-folding.ll Tue Jun 4 18:17:15 2013
@@ -0,0 +1,27 @@
+;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
+
+; Exactly one constant vector can be folded into dot4, which means exactly
+; 4 MOV instructions
+; CHECK: @main
+; CHECK: MOV
+; CHECK: MOV
+; CHECK: MOV
+; CHECK: MOV
+; CHECK-NOT: MOV
+; CHECK-NOT: MOV
+; CHECK-NOT: MOV
+; CHECK-NOT: MOV
+
+define void @main(float addrspace(1)* %out) {
+main_body:
+ %0 = load <4 x float> addrspace(8)* null
+ %1 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
+ %2 = call float @llvm.AMDGPU.dp4(<4 x float> %0,<4 x float> %1)
+ %3 = insertelement <4 x float> undef, float %2, i32 0
+ call void @llvm.R600.store.swizzle(<4 x float> %3, i32 0, i32 0)
+ ret void
+}
+
+declare float @llvm.AMDGPU.dp4(<4 x float>, <4 x float>) #1
+declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32)
+attributes #1 = { readnone }
More information about the llvm-commits
mailing list