[llvm] r347993 - [AMDGPU] Combine DPP mov with use instructions (VOP1/2/3)
Valery Pykhtin via llvm-commits
llvm-commits at lists.llvm.org
Fri Nov 30 06:21:56 PST 2018
Author: vpykhtin
Date: Fri Nov 30 06:21:56 2018
New Revision: 347993
URL: http://llvm.org/viewvc/llvm-project?rev=347993&view=rev
Log:
[AMDGPU] Combine DPP mov with use instructions (VOP1/2/3)
Introduces DPP pseudo instructions and the pass that combines DPP mov with subsequent uses.
Differential revision: https://reviews.llvm.org/D53762
Added:
llvm/trunk/lib/Target/AMDGPU/GCNDPPCombine.cpp
llvm/trunk/test/CodeGen/AMDGPU/dpp_combine.ll
llvm/trunk/test/CodeGen/AMDGPU/dpp_combine_subregs.mir
Modified:
llvm/trunk/lib/Target/AMDGPU/AMDGPU.h
llvm/trunk/lib/Target/AMDGPU/AMDGPU.td
llvm/trunk/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
llvm/trunk/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
llvm/trunk/lib/Target/AMDGPU/CMakeLists.txt
llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.cpp
llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.h
llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.td
llvm/trunk/lib/Target/AMDGPU/VOP1Instructions.td
llvm/trunk/lib/Target/AMDGPU/VOP2Instructions.td
llvm/trunk/lib/Target/AMDGPU/VOPInstructions.td
llvm/trunk/test/MC/AMDGPU/vop_dpp.s
Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPU.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPU.h?rev=347993&r1=347992&r2=347993&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/AMDGPU.h (original)
+++ llvm/trunk/lib/Target/AMDGPU/AMDGPU.h Fri Nov 30 06:21:56 2018
@@ -37,6 +37,7 @@ FunctionPass *createAMDGPUCFGStructurize
FunctionPass *createR600ISelDag(TargetMachine *TM, CodeGenOpt::Level OptLevel);
// SI Passes
+FunctionPass *createGCNDPPCombinePass();
FunctionPass *createSIAnnotateControlFlowPass();
FunctionPass *createSIFoldOperandsPass();
FunctionPass *createSIPeepholeSDWAPass();
@@ -93,6 +94,9 @@ extern char &AMDGPULowerKernelAttributes
void initializeAMDGPURewriteOutArgumentsPass(PassRegistry &);
extern char &AMDGPURewriteOutArgumentsID;
+void initializeGCNDPPCombinePass(PassRegistry &);
+extern char &GCNDPPCombineID;
+
void initializeR600ClauseMergePassPass(PassRegistry &);
extern char &R600ClauseMergePassID;
Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPU.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPU.td?rev=347993&r1=347992&r2=347993&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/AMDGPU.td (original)
+++ llvm/trunk/lib/Target/AMDGPU/AMDGPU.td Fri Nov 30 06:21:56 2018
@@ -11,6 +11,10 @@ include "llvm/TableGen/SearchableTable.t
include "llvm/Target/Target.td"
include "AMDGPUFeatures.td"
+class BoolToList<bit Value> {
+ list<int> ret = !if(Value, [1]<int>, []<int>);
+}
+
//===------------------------------------------------------------===//
// Subtarget Features (device properties)
//===------------------------------------------------------------===//
Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp?rev=347993&r1=347992&r2=347993&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp Fri Nov 30 06:21:56 2018
@@ -106,6 +106,11 @@ static cl::opt<bool> EnableSDWAPeephole(
cl::desc("Enable SDWA peepholer"),
cl::init(true));
+static cl::opt<bool> EnableDPPCombine(
+ "amdgpu-dpp-combine",
+ cl::desc("Enable DPP combiner"),
+ cl::init(false));
+
// Enable address space based alias analysis
static cl::opt<bool> EnableAMDGPUAliasAnalysis("enable-amdgpu-aa", cl::Hidden,
cl::desc("Enable AMDGPU Alias Analysis"),
@@ -158,6 +163,7 @@ extern "C" void LLVMInitializeAMDGPUTarg
initializeR600VectorRegMergerPass(*PR);
initializeGlobalISel(*PR);
initializeAMDGPUDAGToDAGISelPass(*PR);
+ initializeGCNDPPCombinePass(*PR);
initializeSILowerI1CopiesPass(*PR);
initializeSIFixSGPRCopiesPass(*PR);
initializeSIFixVGPRCopiesPass(*PR);
@@ -790,6 +796,8 @@ void GCNPassConfig::addMachineSSAOptimiz
//
// XXX - Can we get away without running DeadMachineInstructionElim again?
addPass(&SIFoldOperandsID);
+ if (EnableDPPCombine)
+ addPass(&GCNDPPCombineID);
addPass(&DeadMachineInstructionElimID);
addPass(&SILoadStoreOptimizerID);
if (EnableSDWAPeephole) {
Modified: llvm/trunk/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp?rev=347993&r1=347992&r2=347993&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp Fri Nov 30 06:21:56 2018
@@ -5275,12 +5275,14 @@ void AMDGPUAsmParser::cvtDPP(MCInst &Ins
((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
}
- // All DPP instructions with at least one source operand have a fake "old"
- // source at the beginning that's tied to the dst operand. Handle it here.
- if (Desc.getNumOperands() >= 2)
- Inst.addOperand(Inst.getOperand(0));
-
for (unsigned E = Operands.size(); I != E; ++I) {
+ auto TiedTo = Desc.getOperandConstraint(Inst.getNumOperands(),
+ MCOI::TIED_TO);
+ if (TiedTo != -1) {
+ assert((unsigned)TiedTo < Inst.getNumOperands());
+ // handle tied old or src2 for MAC instructions
+ Inst.addOperand(Inst.getOperand(TiedTo));
+ }
AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
// Add the register arguments
if (Op.isReg() && Op.Reg.RegNo == AMDGPU::VCC) {
Modified: llvm/trunk/lib/Target/AMDGPU/CMakeLists.txt
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/CMakeLists.txt?rev=347993&r1=347992&r2=347993&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/CMakeLists.txt (original)
+++ llvm/trunk/lib/Target/AMDGPU/CMakeLists.txt Fri Nov 30 06:21:56 2018
@@ -119,6 +119,7 @@ add_llvm_target(AMDGPUCodeGen
SIShrinkInstructions.cpp
SIWholeQuadMode.cpp
GCNILPSched.cpp
+ GCNDPPCombine.cpp
)
add_subdirectory(AsmParser)
Added: llvm/trunk/lib/Target/AMDGPU/GCNDPPCombine.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/GCNDPPCombine.cpp?rev=347993&view=auto
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/GCNDPPCombine.cpp (added)
+++ llvm/trunk/lib/Target/AMDGPU/GCNDPPCombine.cpp Fri Nov 30 06:21:56 2018
@@ -0,0 +1,446 @@
+//=======- GCNDPPCombine.cpp - optimization for DPP instructions ---==========//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+// The pass combines V_MOV_B32_dpp instruction with its VALU uses as a DPP src0
+// operand.If any of the use instruction cannot be combined with the mov the
+// whole sequence is reverted.
+//
+// $old = ...
+// $dpp_value = V_MOV_B32_dpp $old, $vgpr_to_be_read_from_other_lane,
+// dpp_controls..., $bound_ctrl
+// $res = VALU $dpp_value, ...
+//
+// to
+//
+// $res = VALU_DPP $folded_old, $vgpr_to_be_read_from_other_lane, ...,
+// dpp_controls..., $folded_bound_ctrl
+//
+// Combining rules :
+//
+// $bound_ctrl is DPP_BOUND_ZERO, $old is any
+// $bound_ctrl is DPP_BOUND_OFF, $old is 0
+//
+// ->$folded_old = undef, $folded_bound_ctrl = DPP_BOUND_ZERO
+// $bound_ctrl is DPP_BOUND_OFF, $old is undef
+//
+// ->$folded_old = undef, $folded_bound_ctrl = DPP_BOUND_OFF
+// $bound_ctrl is DPP_BOUND_OFF, $old is foldable
+//
+// ->$folded_old = folded value, $folded_bound_ctrl = DPP_BOUND_OFF
+//===----------------------------------------------------------------------===//
+
+#include "AMDGPU.h"
+#include "AMDGPUSubtarget.h"
+#include "SIInstrInfo.h"
+#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineOperand.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
+#include "llvm/Pass.h"
+#include <cassert>
+
+using namespace llvm;
+
+#define DEBUG_TYPE "gcn-dpp-combine"
+
+STATISTIC(NumDPPMovsCombined, "Number of DPP moves combined.");
+
+namespace {
+
+class GCNDPPCombine : public MachineFunctionPass {
+ MachineRegisterInfo *MRI;
+ const SIInstrInfo *TII;
+
+ using RegSubRegPair = TargetInstrInfo::RegSubRegPair;
+
+ MachineOperand *getOldOpndValue(MachineOperand &OldOpnd) const;
+
+ RegSubRegPair foldOldOpnd(MachineInstr &OrigMI,
+ RegSubRegPair OldOpndVGPR,
+ MachineOperand &OldOpndValue) const;
+
+ MachineInstr *createDPPInst(MachineInstr &OrigMI,
+ MachineInstr &MovMI,
+ RegSubRegPair OldOpndVGPR,
+ MachineOperand *OldOpnd,
+ bool BoundCtrlZero) const;
+
+ MachineInstr *createDPPInst(MachineInstr &OrigMI,
+ MachineInstr &MovMI,
+ RegSubRegPair OldOpndVGPR,
+ bool BoundCtrlZero) const;
+
+ bool hasNoImmOrEqual(MachineInstr &MI,
+ unsigned OpndName,
+ int64_t Value,
+ int64_t Mask = -1) const;
+
+ bool combineDPPMov(MachineInstr &MI) const;
+
+public:
+ static char ID;
+
+ GCNDPPCombine() : MachineFunctionPass(ID) {
+ initializeGCNDPPCombinePass(*PassRegistry::getPassRegistry());
+ }
+
+ bool runOnMachineFunction(MachineFunction &MF) override;
+
+ StringRef getPassName() const override { return "GCN DPP Combine"; }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.setPreservesCFG();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+};
+
+} // end anonymous namespace
+
+INITIALIZE_PASS(GCNDPPCombine, DEBUG_TYPE, "GCN DPP Combine", false, false)
+
+char GCNDPPCombine::ID = 0;
+
+char &llvm::GCNDPPCombineID = GCNDPPCombine::ID;
+
+FunctionPass *llvm::createGCNDPPCombinePass() {
+ return new GCNDPPCombine();
+}
+
+static int getDPPOp(unsigned Op) {
+ auto DPP32 = AMDGPU::getDPPOp32(Op);
+ if (DPP32 != -1)
+ return DPP32;
+
+ auto E32 = AMDGPU::getVOPe32(Op);
+ return E32 != -1 ? AMDGPU::getDPPOp32(E32) : -1;
+}
+
+// tracks the register operand definition and returns:
+// 1. immediate operand used to initialize the register if found
+// 2. nullptr if the register operand is undef
+// 3. the operand itself otherwise
+MachineOperand *GCNDPPCombine::getOldOpndValue(MachineOperand &OldOpnd) const {
+ auto *Def = getVRegSubRegDef(getRegSubRegPair(OldOpnd), *MRI);
+ if (!Def)
+ return nullptr;
+
+ switch(Def->getOpcode()) {
+ default: break;
+ case AMDGPU::IMPLICIT_DEF:
+ return nullptr;
+ case AMDGPU::COPY:
+ case AMDGPU::V_MOV_B32_e32: {
+ auto &Op1 = Def->getOperand(1);
+ if (Op1.isImm())
+ return &Op1;
+ break;
+ }
+ }
+ return &OldOpnd;
+}
+
+MachineInstr *GCNDPPCombine::createDPPInst(MachineInstr &OrigMI,
+ MachineInstr &MovMI,
+ RegSubRegPair OldOpndVGPR,
+ bool BoundCtrlZero) const {
+ assert(MovMI.getOpcode() == AMDGPU::V_MOV_B32_dpp);
+ assert(TII->getNamedOperand(MovMI, AMDGPU::OpName::vdst)->getReg() ==
+ TII->getNamedOperand(OrigMI, AMDGPU::OpName::src0)->getReg());
+
+ auto OrigOp = OrigMI.getOpcode();
+ auto DPPOp = getDPPOp(OrigOp);
+ if (DPPOp == -1) {
+ LLVM_DEBUG(dbgs() << " failed: no DPP opcode\n");
+ return nullptr;
+ }
+
+ auto DPPInst = BuildMI(*OrigMI.getParent(), OrigMI,
+ OrigMI.getDebugLoc(), TII->get(DPPOp));
+ bool Fail = false;
+ do {
+ auto *Dst = TII->getNamedOperand(OrigMI, AMDGPU::OpName::vdst);
+ assert(Dst);
+ DPPInst.add(*Dst);
+ int NumOperands = 1;
+
+ const int OldIdx = AMDGPU::getNamedOperandIdx(DPPOp, AMDGPU::OpName::old);
+ if (OldIdx != -1) {
+ assert(OldIdx == NumOperands);
+ assert(isOfRegClass(OldOpndVGPR, AMDGPU::VGPR_32RegClass, *MRI));
+ DPPInst.addReg(OldOpndVGPR.Reg, 0, OldOpndVGPR.SubReg);
+ ++NumOperands;
+ }
+
+ if (auto *Mod0 = TII->getNamedOperand(OrigMI,
+ AMDGPU::OpName::src0_modifiers)) {
+ assert(NumOperands == AMDGPU::getNamedOperandIdx(DPPOp,
+ AMDGPU::OpName::src0_modifiers));
+ assert(0LL == (Mod0->getImm() & ~(SISrcMods::ABS | SISrcMods::NEG)));
+ DPPInst.addImm(Mod0->getImm());
+ ++NumOperands;
+ }
+ auto *Src0 = TII->getNamedOperand(MovMI, AMDGPU::OpName::src0);
+ assert(Src0);
+ if (!TII->isOperandLegal(*DPPInst.getInstr(), NumOperands, Src0)) {
+ LLVM_DEBUG(dbgs() << " failed: src0 is illegal\n");
+ Fail = true;
+ break;
+ }
+ DPPInst.add(*Src0);
+ ++NumOperands;
+
+ if (auto *Mod1 = TII->getNamedOperand(OrigMI,
+ AMDGPU::OpName::src1_modifiers)) {
+ assert(NumOperands == AMDGPU::getNamedOperandIdx(DPPOp,
+ AMDGPU::OpName::src1_modifiers));
+ assert(0LL == (Mod1->getImm() & ~(SISrcMods::ABS | SISrcMods::NEG)));
+ DPPInst.addImm(Mod1->getImm());
+ ++NumOperands;
+ }
+ if (auto *Src1 = TII->getNamedOperand(OrigMI, AMDGPU::OpName::src1)) {
+ if (!TII->isOperandLegal(*DPPInst.getInstr(), NumOperands, Src1)) {
+ LLVM_DEBUG(dbgs() << " failed: src1 is illegal\n");
+ Fail = true;
+ break;
+ }
+ DPPInst.add(*Src1);
+ ++NumOperands;
+ }
+
+ if (auto *Src2 = TII->getNamedOperand(OrigMI, AMDGPU::OpName::src2)) {
+ if (!TII->isOperandLegal(*DPPInst.getInstr(), NumOperands, Src2)) {
+ LLVM_DEBUG(dbgs() << " failed: src2 is illegal\n");
+ Fail = true;
+ break;
+ }
+ DPPInst.add(*Src2);
+ }
+
+ DPPInst.add(*TII->getNamedOperand(MovMI, AMDGPU::OpName::dpp_ctrl));
+ DPPInst.add(*TII->getNamedOperand(MovMI, AMDGPU::OpName::row_mask));
+ DPPInst.add(*TII->getNamedOperand(MovMI, AMDGPU::OpName::bank_mask));
+ DPPInst.addImm(BoundCtrlZero ? 1 : 0);
+ } while (false);
+
+ if (Fail) {
+ DPPInst.getInstr()->eraseFromParent();
+ return nullptr;
+ }
+ LLVM_DEBUG(dbgs() << " combined: " << *DPPInst.getInstr());
+ return DPPInst.getInstr();
+}
+
+GCNDPPCombine::RegSubRegPair
+GCNDPPCombine::foldOldOpnd(MachineInstr &OrigMI,
+ RegSubRegPair OldOpndVGPR,
+ MachineOperand &OldOpndValue) const {
+ assert(OldOpndValue.isImm());
+ switch (OrigMI.getOpcode()) {
+ default: break;
+ case AMDGPU::V_MAX_U32_e32:
+ if (OldOpndValue.getImm() == std::numeric_limits<uint32_t>::max())
+ return OldOpndVGPR;
+ break;
+ case AMDGPU::V_MAX_I32_e32:
+ if (OldOpndValue.getImm() == std::numeric_limits<int32_t>::max())
+ return OldOpndVGPR;
+ break;
+ case AMDGPU::V_MIN_I32_e32:
+ if (OldOpndValue.getImm() == std::numeric_limits<int32_t>::min())
+ return OldOpndVGPR;
+ break;
+
+ case AMDGPU::V_MUL_I32_I24_e32:
+ case AMDGPU::V_MUL_U32_U24_e32:
+ if (OldOpndValue.getImm() == 1) {
+ auto *Src1 = TII->getNamedOperand(OrigMI, AMDGPU::OpName::src1);
+ assert(Src1 && Src1->isReg());
+ return getRegSubRegPair(*Src1);
+ }
+ break;
+ }
+ return RegSubRegPair();
+}
+
+// Cases to combine:
+// $bound_ctrl is DPP_BOUND_ZERO, $old is any
+// $bound_ctrl is DPP_BOUND_OFF, $old is 0
+// -> $old = undef, $bound_ctrl = DPP_BOUND_ZERO
+
+// $bound_ctrl is DPP_BOUND_OFF, $old is undef
+// -> $old = undef, $bound_ctrl = DPP_BOUND_OFF
+
+// $bound_ctrl is DPP_BOUND_OFF, $old is foldable
+// -> $old = folded value, $bound_ctrl = DPP_BOUND_OFF
+
+MachineInstr *GCNDPPCombine::createDPPInst(MachineInstr &OrigMI,
+ MachineInstr &MovMI,
+ RegSubRegPair OldOpndVGPR,
+ MachineOperand *OldOpndValue,
+ bool BoundCtrlZero) const {
+ assert(OldOpndVGPR.Reg);
+ if (!BoundCtrlZero && OldOpndValue) {
+ assert(OldOpndValue->isImm());
+ OldOpndVGPR = foldOldOpnd(OrigMI, OldOpndVGPR, *OldOpndValue);
+ if (!OldOpndVGPR.Reg) {
+ LLVM_DEBUG(dbgs() << " failed: old immediate cannot be folded\n");
+ return nullptr;
+ }
+ }
+ return createDPPInst(OrigMI, MovMI, OldOpndVGPR, BoundCtrlZero);
+}
+
+// returns true if MI doesn't have OpndName immediate operand or the
+// operand has Value
+bool GCNDPPCombine::hasNoImmOrEqual(MachineInstr &MI, unsigned OpndName,
+ int64_t Value, int64_t Mask) const {
+ auto *Imm = TII->getNamedOperand(MI, OpndName);
+ if (!Imm)
+ return true;
+
+ assert(Imm->isImm());
+ return (Imm->getImm() & Mask) == Value;
+}
+
+bool GCNDPPCombine::combineDPPMov(MachineInstr &MovMI) const {
+ assert(MovMI.getOpcode() == AMDGPU::V_MOV_B32_dpp);
+ auto *BCZOpnd = TII->getNamedOperand(MovMI, AMDGPU::OpName::bound_ctrl);
+ assert(BCZOpnd && BCZOpnd->isImm());
+ bool BoundCtrlZero = 0 != BCZOpnd->getImm();
+
+ LLVM_DEBUG(dbgs() << "\nDPP combine: " << MovMI);
+
+ auto *OldOpnd = TII->getNamedOperand(MovMI, AMDGPU::OpName::old);
+ assert(OldOpnd && OldOpnd->isReg());
+ auto OldOpndVGPR = getRegSubRegPair(*OldOpnd);
+ auto *OldOpndValue = getOldOpndValue(*OldOpnd);
+ assert(!OldOpndValue || OldOpndValue->isImm() || OldOpndValue == OldOpnd);
+ if (OldOpndValue) {
+ if (BoundCtrlZero) {
+ OldOpndVGPR.Reg = AMDGPU::NoRegister; // should be undef, ignore old opnd
+ OldOpndValue = nullptr;
+ } else {
+ if (!OldOpndValue->isImm()) {
+ LLVM_DEBUG(dbgs() << " failed: old operand isn't an imm or undef\n");
+ return false;
+ }
+ if (OldOpndValue->getImm() == 0) {
+ OldOpndVGPR.Reg = AMDGPU::NoRegister; // should be undef
+ OldOpndValue = nullptr;
+ BoundCtrlZero = true;
+ }
+ }
+ }
+
+ LLVM_DEBUG(dbgs() << " old=";
+ if (!OldOpndValue)
+ dbgs() << "undef";
+ else
+ dbgs() << OldOpndValue->getImm();
+ dbgs() << ", bound_ctrl=" << BoundCtrlZero << '\n');
+
+ std::vector<MachineInstr*> OrigMIs, DPPMIs;
+ if (!OldOpndVGPR.Reg) { // OldOpndVGPR = undef
+ OldOpndVGPR = RegSubRegPair(
+ MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass));
+ auto UndefInst = BuildMI(*MovMI.getParent(), MovMI, MovMI.getDebugLoc(),
+ TII->get(AMDGPU::IMPLICIT_DEF), OldOpndVGPR.Reg);
+ DPPMIs.push_back(UndefInst.getInstr());
+ }
+
+ OrigMIs.push_back(&MovMI);
+ bool Rollback = true;
+ for (auto &Use : MRI->use_nodbg_operands(
+ TII->getNamedOperand(MovMI, AMDGPU::OpName::vdst)->getReg())) {
+ Rollback = true;
+
+ auto &OrigMI = *Use.getParent();
+ auto OrigOp = OrigMI.getOpcode();
+ if (TII->isVOP3(OrigOp)) {
+ if (!TII->hasVALU32BitEncoding(OrigOp)) {
+ LLVM_DEBUG(dbgs() << " failed: VOP3 hasn't e32 equivalent\n");
+ break;
+ }
+ // check if other than abs|neg modifiers are set (opsel for example)
+ const int64_t Mask = ~(SISrcMods::ABS | SISrcMods::NEG);
+ if (!hasNoImmOrEqual(OrigMI, AMDGPU::OpName::src0_modifiers, 0, Mask) ||
+ !hasNoImmOrEqual(OrigMI, AMDGPU::OpName::src1_modifiers, 0, Mask) ||
+ !hasNoImmOrEqual(OrigMI, AMDGPU::OpName::clamp, 0) ||
+ !hasNoImmOrEqual(OrigMI, AMDGPU::OpName::omod, 0)) {
+ LLVM_DEBUG(dbgs() << " failed: VOP3 has non-default modifiers\n");
+ break;
+ }
+ } else if (!TII->isVOP1(OrigOp) && !TII->isVOP2(OrigOp)) {
+ LLVM_DEBUG(dbgs() << " failed: not VOP1/2/3\n");
+ break;
+ }
+
+ LLVM_DEBUG(dbgs() << " combining: " << OrigMI);
+ if (&Use == TII->getNamedOperand(OrigMI, AMDGPU::OpName::src0)) {
+ if (auto *DPPInst = createDPPInst(OrigMI, MovMI, OldOpndVGPR,
+ OldOpndValue, BoundCtrlZero)) {
+ DPPMIs.push_back(DPPInst);
+ Rollback = false;
+ }
+ } else if (OrigMI.isCommutable() &&
+ &Use == TII->getNamedOperand(OrigMI, AMDGPU::OpName::src1)) {
+ auto *BB = OrigMI.getParent();
+ auto *NewMI = BB->getParent()->CloneMachineInstr(&OrigMI);
+ BB->insert(OrigMI, NewMI);
+ if (TII->commuteInstruction(*NewMI)) {
+ LLVM_DEBUG(dbgs() << " commuted: " << *NewMI);
+ if (auto *DPPInst = createDPPInst(*NewMI, MovMI, OldOpndVGPR,
+ OldOpndValue, BoundCtrlZero)) {
+ DPPMIs.push_back(DPPInst);
+ Rollback = false;
+ }
+ } else
+ LLVM_DEBUG(dbgs() << " failed: cannot be commuted\n");
+ NewMI->eraseFromParent();
+ } else
+ LLVM_DEBUG(dbgs() << " failed: no suitable operands\n");
+ if (Rollback)
+ break;
+ OrigMIs.push_back(&OrigMI);
+ }
+
+ for (auto *MI : *(Rollback? &DPPMIs : &OrigMIs))
+ MI->eraseFromParent();
+
+ return !Rollback;
+}
+
+bool GCNDPPCombine::runOnMachineFunction(MachineFunction &MF) {
+ auto &ST = MF.getSubtarget<GCNSubtarget>();
+ if (!ST.hasDPP() || skipFunction(MF.getFunction()))
+ return false;
+
+ MRI = &MF.getRegInfo();
+ TII = ST.getInstrInfo();
+
+ assert(MRI->isSSA() && "Must be run on SSA");
+
+ bool Changed = false;
+ for (auto &MBB : MF) {
+ for (auto I = MBB.rbegin(), E = MBB.rend(); I != E;) {
+ auto &MI = *I++;
+ if (MI.getOpcode() == AMDGPU::V_MOV_B32_dpp && combineDPPMov(MI)) {
+ Changed = true;
+ ++NumDPPMovsCombined;
+ }
+ }
+ }
+ return Changed;
+}
Modified: llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.cpp?rev=347993&r1=347992&r2=347993&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.cpp Fri Nov 30 06:21:56 2018
@@ -5632,3 +5632,84 @@ int SIInstrInfo::pseudoToMCOpcode(int Op
return MCOp;
}
+
+static
+TargetInstrInfo::RegSubRegPair getRegOrUndef(const MachineOperand &RegOpnd) {
+ assert(RegOpnd.isReg());
+ return RegOpnd.isUndef() ? TargetInstrInfo::RegSubRegPair() :
+ getRegSubRegPair(RegOpnd);
+}
+
+TargetInstrInfo::RegSubRegPair
+llvm::getRegSequenceSubReg(MachineInstr &MI, unsigned SubReg) {
+ assert(MI.isRegSequence());
+ for (unsigned I = 0, E = (MI.getNumOperands() - 1)/ 2; I < E; ++I)
+ if (MI.getOperand(1 + 2 * I + 1).getImm() == SubReg) {
+ auto &RegOp = MI.getOperand(1 + 2 * I);
+ return getRegOrUndef(RegOp);
+ }
+ return TargetInstrInfo::RegSubRegPair();
+}
+
+// Try to find the definition of reg:subreg in subreg-manipulation pseudos
+// Following a subreg of reg:subreg isn't supported
+static bool followSubRegDef(MachineInstr &MI,
+ TargetInstrInfo::RegSubRegPair &RSR) {
+ if (!RSR.SubReg)
+ return false;
+ switch (MI.getOpcode()) {
+ default: break;
+ case AMDGPU::REG_SEQUENCE:
+ RSR = getRegSequenceSubReg(MI, RSR.SubReg);
+ return true;
+ // EXTRACT_SUBREG ins't supported as this would follow a subreg of subreg
+ case AMDGPU::INSERT_SUBREG:
+ if (RSR.SubReg == (unsigned)MI.getOperand(3).getImm())
+ // inserted the subreg we're looking for
+ RSR = getRegOrUndef(MI.getOperand(2));
+ else { // the subreg in the rest of the reg
+ auto R1 = getRegOrUndef(MI.getOperand(1));
+ if (R1.SubReg) // subreg of subreg isn't supported
+ return false;
+ RSR.Reg = R1.Reg;
+ }
+ return true;
+ }
+ return false;
+}
+
+MachineInstr *llvm::getVRegSubRegDef(const TargetInstrInfo::RegSubRegPair &P,
+ MachineRegisterInfo &MRI) {
+ assert(MRI.isSSA());
+ if (!TargetRegisterInfo::isVirtualRegister(P.Reg))
+ return nullptr;
+
+ auto RSR = P;
+ auto *DefInst = MRI.getVRegDef(RSR.Reg);
+ while (auto *MI = DefInst) {
+ DefInst = nullptr;
+ switch (MI->getOpcode()) {
+ case AMDGPU::COPY:
+ case AMDGPU::V_MOV_B32_e32: {
+ auto &Op1 = MI->getOperand(1);
+ if (Op1.isReg() &&
+ TargetRegisterInfo::isVirtualRegister(Op1.getReg())) {
+ if (Op1.isUndef())
+ return nullptr;
+ RSR = getRegSubRegPair(Op1);
+ DefInst = MRI.getVRegDef(RSR.Reg);
+ }
+ break;
+ }
+ default:
+ if (followSubRegDef(*MI, RSR)) {
+ if (!RSR.Reg)
+ return nullptr;
+ DefInst = MRI.getVRegDef(RSR.Reg);
+ }
+ }
+ if (!DefInst)
+ return MI;
+ }
+ return nullptr;
+}
Modified: llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.h?rev=347993&r1=347992&r2=347993&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.h (original)
+++ llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.h Fri Nov 30 06:21:56 2018
@@ -917,9 +917,36 @@ public:
/// Return -1 if the target-specific opcode for the pseudo instruction does
/// not exist. If Opcode is not a pseudo instruction, this is identity.
int pseudoToMCOpcode(int Opcode) const;
-
};
+/// \brief Returns true if a reg:subreg pair P has a TRC class
+inline bool isOfRegClass(const TargetInstrInfo::RegSubRegPair &P,
+ const TargetRegisterClass &TRC,
+ MachineRegisterInfo &MRI) {
+ auto *RC = MRI.getRegClass(P.Reg);
+ if (!P.SubReg)
+ return RC == &TRC;
+ auto *TRI = MRI.getTargetRegisterInfo();
+ return RC == TRI->getMatchingSuperRegClass(RC, &TRC, P.SubReg);
+}
+
+/// \brief Create RegSubRegPair from a register MachineOperand
+inline
+TargetInstrInfo::RegSubRegPair getRegSubRegPair(const MachineOperand &O) {
+ assert(O.isReg());
+ return TargetInstrInfo::RegSubRegPair(O.getReg(), O.getSubReg());
+}
+
+/// \brief Return the SubReg component from REG_SEQUENCE
+TargetInstrInfo::RegSubRegPair getRegSequenceSubReg(MachineInstr &MI,
+ unsigned SubReg);
+
+/// \brief Return the defining instruction for a given reg:subreg pair
+/// skipping copy like instructions and subreg-manipulation pseudos.
+/// Following another subreg of a reg:subreg isn't supported.
+MachineInstr *getVRegSubRegDef(const TargetInstrInfo::RegSubRegPair &P,
+ MachineRegisterInfo &MRI);
+
namespace AMDGPU {
LLVM_READONLY
@@ -932,6 +959,9 @@ namespace AMDGPU {
int getSDWAOp(uint16_t Opcode);
LLVM_READONLY
+ int getDPPOp32(uint16_t Opcode);
+
+ LLVM_READONLY
int getBasicFromSDWAOp(uint16_t Opcode);
LLVM_READONLY
Modified: llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.td?rev=347993&r1=347992&r2=347993&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.td (original)
+++ llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.td Fri Nov 30 06:21:56 2018
@@ -1622,7 +1622,7 @@ class getHasExt <int NumSrcArgs, ValueTy
0, // 64-bit dst - No DPP or SDWA for 64-bit operands
!if(!eq(Src0VT.Size, 64),
0, // 64-bit src0
- !if(!eq(Src0VT.Size, 64),
+ !if(!eq(Src1VT.Size, 64),
0, // 64-bit src2
1
)
@@ -1631,6 +1631,12 @@ class getHasExt <int NumSrcArgs, ValueTy
);
}
+class getHasDPP <int NumSrcArgs, ValueType DstVT = i32, ValueType Src0VT = i32,
+ ValueType Src1VT = i32> {
+ bit ret = !if(!eq(NumSrcArgs, 0), 0,
+ getHasExt<NumSrcArgs, DstVT, Src0VT, Src1VT>.ret);
+}
+
class BitOr<bit a, bit b> {
bit ret = !if(a, 1, !if(b, 1, 0));
}
@@ -1710,7 +1716,7 @@ class VOPProfile <list<ValueType> _ArgVT
field bit HasSDWAOMod = isFloatType<DstVT>.ret;
field bit HasExt = getHasExt<NumSrcArgs, DstVT, Src0VT, Src1VT>.ret;
- field bit HasExtDPP = HasExt;
+ field bit HasExtDPP = getHasDPP<NumSrcArgs, DstVT, Src0VT, Src1VT>.ret;
field bit HasExtSDWA = HasExt;
field bit HasExtSDWA9 = HasExt;
field int NeedPatGen = PatGenMode.NoPattern;
@@ -1741,8 +1747,10 @@ class VOPProfile <list<ValueType> _ArgVT
getOpSelMod<Src0VT>.ret,
getOpSelMod<Src1VT>.ret,
getOpSelMod<Src2VT>.ret>.ret;
- field dag InsDPP = getInsDPP<DstRCDPP, Src0DPP, Src1DPP, NumSrcArgs,
- HasModifiers, Src0ModDPP, Src1ModDPP>.ret;
+ field dag InsDPP = !if(HasExtDPP,
+ getInsDPP<DstRCDPP, Src0DPP, Src1DPP, NumSrcArgs,
+ HasModifiers, Src0ModDPP, Src1ModDPP>.ret,
+ (ins));
field dag InsSDWA = getInsSDWA<Src0SDWA, Src1SDWA, NumSrcArgs,
HasSDWAOMod, Src0ModSDWA, Src1ModSDWA,
DstVT>.ret;
@@ -1756,7 +1764,8 @@ class VOPProfile <list<ValueType> _ArgVT
HasSrc0FloatMods,
HasSrc1FloatMods,
HasSrc2FloatMods>.ret;
- field string AsmDPP = getAsmDPP<HasDst, NumSrcArgs, HasModifiers, DstVT>.ret;
+ field string AsmDPP = !if(HasExtDPP,
+ getAsmDPP<HasDst, NumSrcArgs, HasModifiers, DstVT>.ret, "");
field string AsmSDWA = getAsmSDWA<HasDst, NumSrcArgs, DstVT>.ret;
field string AsmSDWA9 = getAsmSDWA9<HasDst, HasSDWAOMod, NumSrcArgs, DstVT>.ret;
}
@@ -1931,6 +1940,15 @@ def getBasicFromSDWAOp : InstrMapping {
let ValueCols = [["Default"]];
}
+// Maps ordinary instructions to their DPP counterparts
+def getDPPOp32 : InstrMapping {
+ let FilterClass = "VOP";
+ let RowFields = ["OpName"];
+ let ColFields = ["AsmVariantName"];
+ let KeyCol = ["Default"];
+ let ValueCols = [["DPP"]];
+}
+
// Maps an commuted opcode to its original version
def getCommuteOrig : InstrMapping {
let FilterClass = "Commutable_REV";
Modified: llvm/trunk/lib/Target/AMDGPU/VOP1Instructions.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/VOP1Instructions.td?rev=347993&r1=347992&r2=347993&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/VOP1Instructions.td (original)
+++ llvm/trunk/lib/Target/AMDGPU/VOP1Instructions.td Fri Nov 30 06:21:56 2018
@@ -84,6 +84,10 @@ class VOP1_SDWA_Pseudo <string OpName, V
let AsmMatchConverter = "cvtSdwaVOP1";
}
+class VOP1_DPP_Pseudo <string OpName, VOPProfile P, list<dag> pattern=[]> :
+ VOP_DPP_Pseudo <OpName, P, pattern> {
+}
+
class getVOP1Pat64 <SDPatternOperator node, VOPProfile P> : LetDummies {
list<dag> ret =
!if(P.HasModifiers,
@@ -103,6 +107,8 @@ multiclass VOP1Inst <string opName, VOPP
def _e32 : VOP1_Pseudo <opName, P>;
def _e64 : VOP3_Pseudo <opName, P, getVOP1Pat64<node, P>.ret>;
def _sdwa : VOP1_SDWA_Pseudo <opName, P>;
+ foreach _ = BoolToList<P.HasExtDPP>.ret in
+ def _dpp : VOP1_DPP_Pseudo <opName, P>;
}
// Special profile for instructions which have clamp
@@ -500,13 +506,8 @@ defm V_EXP_LEGACY_F32 : VOP1_Real_ci
// VI
//===----------------------------------------------------------------------===//
-class VOP1_DPP <bits<8> op, VOP1_Pseudo ps, VOPProfile P = ps.Pfl> :
- VOP_DPP <ps.OpName, P> {
- let Defs = ps.Defs;
- let Uses = ps.Uses;
- let SchedRW = ps.SchedRW;
- let hasSideEffects = ps.hasSideEffects;
-
+class VOP1_DPPe <bits<8> op, VOP1_DPP_Pseudo ps, VOPProfile P = ps.Pfl> :
+ VOP_DPPe <P> {
bits<8> vdst;
let Inst{8-0} = 0xfa; // dpp
let Inst{16-9} = op;
@@ -544,9 +545,10 @@ multiclass VOP1_Real_vi <bits<10> op> {
VOP_SDWA9_Real <!cast<VOP1_SDWA_Pseudo>(NAME#"_sdwa")>,
VOP1_SDWA9Ae <op{7-0}, !cast<VOP1_SDWA_Pseudo>(NAME#"_sdwa").Pfl>;
- // For now left dpp only for asm/dasm
- // TODO: add corresponding pseudo
- def _dpp : VOP1_DPP<op{7-0}, !cast<VOP1_Pseudo>(NAME#"_e32")>;
+ foreach _ = BoolToList<!cast<VOP1_Pseudo>(NAME#"_e32").Pfl.HasExtDPP>.ret in
+ def _dpp_vi :
+ VOP_DPP_Real<!cast<VOP1_DPP_Pseudo>(NAME#"_dpp"), SIEncodingFamily.VI>,
+ VOP1_DPPe<op{7-0}, !cast<VOP1_DPP_Pseudo>(NAME#"_dpp")>;
}
defm V_NOP : VOP1_Real_vi <0x0>;
@@ -717,9 +719,11 @@ multiclass VOP1_Real_gfx9 <bits<10> op>
VOP_SDWA9_Real <!cast<VOP1_SDWA_Pseudo>(NAME#"_sdwa")>,
VOP1_SDWA9Ae <op{7-0}, !cast<VOP1_SDWA_Pseudo>(NAME#"_sdwa").Pfl>;
- // For now left dpp only for asm/dasm
- // TODO: add corresponding pseudo
- def _dpp : VOP1_DPP<op{7-0}, !cast<VOP1_Pseudo>(NAME#"_e32")>;
+ foreach _ = BoolToList<!cast<VOP1_Pseudo>(NAME#"_e32").Pfl.HasExtDPP>.ret in
+ def _dpp_gfx9 :
+ VOP_DPP_Real<!cast<VOP1_DPP_Pseudo>(NAME#"_dpp"), SIEncodingFamily.GFX9>,
+ VOP1_DPPe<op{7-0}, !cast<VOP1_DPP_Pseudo>(NAME#"_dpp")>;
+
}
defm V_SCREEN_PARTITION_4SE_B32 : VOP1_Real_gfx9 <0x37>;
Modified: llvm/trunk/lib/Target/AMDGPU/VOP2Instructions.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/VOP2Instructions.td?rev=347993&r1=347992&r2=347993&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/VOP2Instructions.td (original)
+++ llvm/trunk/lib/Target/AMDGPU/VOP2Instructions.td Fri Nov 30 06:21:56 2018
@@ -105,6 +105,11 @@ class VOP2_SDWA_Pseudo <string OpName, V
let AsmMatchConverter = "cvtSdwaVOP2";
}
+class VOP2_DPP_Pseudo <string OpName, VOPProfile P, list<dag> pattern=[]> :
+ VOP_DPP_Pseudo <OpName, P, pattern> {
+}
+
+
class getVOP2Pat64 <SDPatternOperator node, VOPProfile P> : LetDummies {
list<dag> ret = !if(P.HasModifiers,
[(set P.DstVT:$vdst,
@@ -155,7 +160,12 @@ multiclass VOP2Inst<string opName,
bit GFX9Renamed = 0> :
VOP2Inst_e32<opName, P, node, revOp, GFX9Renamed>,
VOP2Inst_e64<opName, P, node, revOp, GFX9Renamed>,
- VOP2Inst_sdwa<opName, P, node, revOp, GFX9Renamed>;
+ VOP2Inst_sdwa<opName, P, node, revOp, GFX9Renamed> {
+ let renamedInGFX9 = GFX9Renamed in {
+ foreach _ = BoolToList<P.HasExtDPP>.ret in
+ def _dpp : VOP2_DPP_Pseudo <opName, P>;
+ }
+}
multiclass VOP2bInst <string opName,
VOPProfile P,
@@ -172,6 +182,8 @@ multiclass VOP2bInst <string opName,
def _sdwa : VOP2_SDWA_Pseudo <opName, P> {
let AsmMatchConverter = "cvtSdwaVOP2b";
}
+ foreach _ = BoolToList<P.HasExtDPP>.ret in
+ def _dpp : VOP2_DPP_Pseudo <opName, P>;
}
def _e64 : VOP3_Pseudo <opName, P, getVOP2Pat64<node, P>.ret>,
@@ -194,6 +206,9 @@ multiclass VOP2eInst <string opName,
def _sdwa : VOP2_SDWA_Pseudo <opName, P> {
let AsmMatchConverter = "cvtSdwaVOP2b";
}
+
+ foreach _ = BoolToList<P.HasExtDPP>.ret in
+ def _dpp : VOP2_DPP_Pseudo <opName, P>;
}
def _e64 : VOP3_Pseudo <opName, P, getVOP2Pat64<node, P>.ret>,
@@ -233,9 +248,9 @@ class VOP_MAC <ValueType vt> : VOPProfil
let Ins32 = (ins Src0RC32:$src0, Src1RC32:$src1, VGPR_32:$src2);
let Ins64 = getIns64<Src0RC64, Src1RC64, RegisterOperand<VGPR_32>, 3,
0, HasModifiers, HasOMod, Src0Mod, Src1Mod, Src2Mod>.ret;
- let InsDPP = (ins DstRCDPP:$old,
- Src0ModDPP:$src0_modifiers, Src0DPP:$src0,
+ let InsDPP = (ins Src0ModDPP:$src0_modifiers, Src0DPP:$src0,
Src1ModDPP:$src1_modifiers, Src1DPP:$src1,
+ VGPR_32:$src2, // stub argument
dpp_ctrl:$dpp_ctrl, row_mask:$row_mask,
bank_mask:$bank_mask, bound_ctrl:$bound_ctrl);
@@ -778,13 +793,8 @@ defm V_CVT_PK_I16_I32 : VOP2_Real_e3
// VI
//===----------------------------------------------------------------------===//
-class VOP2_DPP <bits<6> op, VOP2_Pseudo ps, string OpName = ps.OpName, VOPProfile P = ps.Pfl> :
- VOP_DPP <OpName, P> {
- let Defs = ps.Defs;
- let Uses = ps.Uses;
- let SchedRW = ps.SchedRW;
- let hasSideEffects = ps.hasSideEffects;
-
+class VOP2_DPPe <bits<6> op, VOP2_DPP_Pseudo ps, VOPProfile P = ps.Pfl> :
+ VOP_DPPe <P> {
bits<8> vdst;
bits<8> src1;
let Inst{8-0} = 0xfa; //dpp
@@ -865,8 +875,13 @@ multiclass VOP2be_Real_e32e64_vi_only <b
VOP2_SDWA_Pseudo ps = !cast<VOP2_SDWA_Pseudo>(OpName#"_sdwa");
let AsmString = AsmName # ps.AsmOperands;
}
- def _dpp :
- VOP2_DPP<op, !cast<VOP2_Pseudo>(OpName#"_e32"), AsmName>;
+ foreach _ = BoolToList<!cast<VOP2_Pseudo>(OpName#"_e32").Pfl.HasExtDPP>.ret in
+ def _dpp_vi :
+ VOP_DPP_Real<!cast<VOP2_DPP_Pseudo>(OpName#"_dpp"), SIEncodingFamily.VI>,
+ VOP2_DPPe<op, !cast<VOP2_DPP_Pseudo>(OpName#"_dpp")> {
+ VOP2_DPP_Pseudo ps = !cast<VOP2_DPP_Pseudo>(OpName#"_dpp");
+ let AsmString = AsmName # ps.AsmOperands;
+ }
}
}
@@ -893,10 +908,14 @@ multiclass VOP2be_Real_e32e64_gfx9 <bits
VOP2_SDWA_Pseudo ps = !cast<VOP2_SDWA_Pseudo>(OpName#"_sdwa");
let AsmString = AsmName # ps.AsmOperands;
}
- def _dpp_gfx9 :
- VOP2_DPP<op, !cast<VOP2_Pseudo>(OpName#"_e32"), AsmName> {
- let DecoderNamespace = "SDWA9";
- }
+ foreach _ = BoolToList<!cast<VOP2_Pseudo>(OpName#"_e32").Pfl.HasExtDPP>.ret in
+ def _dpp_gfx9 :
+ VOP_DPP_Real<!cast<VOP2_DPP_Pseudo>(OpName#"_dpp"), SIEncodingFamily.GFX9>,
+ VOP2_DPPe<op, !cast<VOP2_DPP_Pseudo>(OpName#"_dpp")> {
+ VOP2_DPP_Pseudo ps = !cast<VOP2_DPP_Pseudo>(OpName#"_dpp");
+ let AsmString = AsmName # ps.AsmOperands;
+ let DecoderNamespace = "SDWA9";
+ }
}
multiclass VOP2_Real_e32e64_gfx9 <bits<6> op> {
@@ -914,19 +933,23 @@ multiclass VOP2_Real_e32e64_gfx9 <bits<6
VOP_SDWA9_Real <!cast<VOP2_SDWA_Pseudo>(NAME#"_sdwa")>,
VOP2_SDWA9Ae <op{5-0}, !cast<VOP2_SDWA_Pseudo>(NAME#"_sdwa").Pfl> {
}
- def _dpp_gfx9 :
- VOP2_DPP<op, !cast<VOP2_Pseudo>(NAME#"_e32")> {
- let DecoderNamespace = "SDWA9";
- }
+ foreach _ = BoolToList<!cast<VOP2_Pseudo>(NAME#"_e32").Pfl.HasExtDPP>.ret in
+ def _dpp_gfx9 :
+ VOP_DPP_Real<!cast<VOP2_DPP_Pseudo>(NAME#"_dpp"), SIEncodingFamily.GFX9>,
+ VOP2_DPPe<op, !cast<VOP2_DPP_Pseudo>(NAME#"_dpp")> {
+ let DecoderNamespace = "SDWA9";
+ }
}
} // AssemblerPredicates = [isGFX9]
multiclass VOP2_Real_e32e64_vi <bits<6> op> :
Base_VOP2_Real_e32e64_vi<op>, VOP2_SDWA_Real<op>, VOP2_SDWA9_Real<op> {
- // For now left dpp only for asm/dasm
- // TODO: add corresponding pseudo
- def _dpp : VOP2_DPP<op, !cast<VOP2_Pseudo>(NAME#"_e32")>;
+
+ foreach _ = BoolToList<!cast<VOP2_Pseudo>(NAME#"_e32").Pfl.HasExtDPP>.ret in
+ def _dpp_vi :
+ VOP_DPP_Real<!cast<VOP2_DPP_Pseudo>(NAME#"_dpp"), SIEncodingFamily.VI>,
+ VOP2_DPPe<op, !cast<VOP2_DPP_Pseudo>(NAME#"_dpp")>;
}
defm V_CNDMASK_B32 : VOP2_Real_e32e64_vi <0x0>;
Modified: llvm/trunk/lib/Target/AMDGPU/VOPInstructions.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/VOPInstructions.td?rev=347993&r1=347992&r2=347993&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/VOPInstructions.td (original)
+++ llvm/trunk/lib/Target/AMDGPU/VOPInstructions.td Fri Nov 30 06:21:56 2018
@@ -505,9 +505,14 @@ class VOP_DPPe<VOPProfile P> : Enc64 {
let Inst{63-60} = row_mask;
}
-class VOP_DPP <string OpName, VOPProfile P> :
- InstSI <P.OutsDPP, P.InsDPP, OpName#P.AsmDPP, []>,
- VOP_DPPe<P> {
+class VOP_DPP_Pseudo <string OpName, VOPProfile P, list<dag> pattern=[]> :
+ InstSI <P.OutsDPP, P.InsDPP, OpName#P.AsmDPP, pattern>,
+ VOP <OpName>,
+ SIMCInstr <OpName#"_dpp", SIEncodingFamily.NONE>,
+ MnemonicAlias <OpName#"_dpp", OpName> {
+
+ let isPseudo = 1;
+ let isCodeGenOnly = 1;
let mayLoad = 0;
let mayStore = 0;
@@ -517,6 +522,11 @@ class VOP_DPP <string OpName, VOPProfile
let VALU = 1;
let DPP = 1;
let Size = 8;
+ let Uses = [EXEC];
+ let isConvergent = 1;
+
+ string Mnemonic = OpName;
+ string AsmOperands = P.AsmDPP;
let AsmMatchConverter = !if(!eq(P.HasModifiers,1), "cvtDPP", "");
let SubtargetPredicate = HasDPP;
@@ -526,6 +536,36 @@ class VOP_DPP <string OpName, VOPProfile
let Constraints = !if(P.NumSrcArgs, "$old = $vdst", "");
let DisableEncoding = !if(P.NumSrcArgs, "$old", "");
let DecoderNamespace = "DPP";
+
+ VOPProfile Pfl = P;
+}
+
+class VOP_DPP_Real <VOP_DPP_Pseudo ps, int EncodingFamily> :
+ InstSI <ps.OutOperandList, ps.InOperandList, ps.Mnemonic # ps.AsmOperands, []>,
+ SIMCInstr <ps.PseudoInstr, EncodingFamily> {
+
+ let isPseudo = 0;
+ let isCodeGenOnly = 0;
+
+ let Defs = ps.Defs;
+ let Uses = ps.Uses;
+ let SchedRW = ps.SchedRW;
+ let hasSideEffects = ps.hasSideEffects;
+
+ let Constraints = ps.Constraints;
+ let DisableEncoding = ps.DisableEncoding;
+
+ // Copy relevant pseudo op flags
+ let isConvergent = ps.isConvergent;
+ let SubtargetPredicate = ps.SubtargetPredicate;
+ let AssemblerPredicate = ps.AssemblerPredicate;
+ let AsmMatchConverter = ps.AsmMatchConverter;
+ let AsmVariantName = ps.AsmVariantName;
+ let UseNamedOperandTable = ps.UseNamedOperandTable;
+ let DecoderNamespace = ps.DecoderNamespace;
+ let Constraints = ps.Constraints;
+ let DisableEncoding = ps.DisableEncoding;
+ let TSFlags = ps.TSFlags;
}
class getNumNodeArgs<SDPatternOperator Op> {
Added: llvm/trunk/test/CodeGen/AMDGPU/dpp_combine.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/dpp_combine.ll?rev=347993&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/dpp_combine.ll (added)
+++ llvm/trunk/test/CodeGen/AMDGPU/dpp_combine.ll Fri Nov 30 06:21:56 2018
@@ -0,0 +1,185 @@
+; RUN: llc -march=amdgcn -mcpu=tonga -amdgpu-dpp-combine -verify-machineinstrs < %s | FileCheck %s
+
+; VOP2 with literal cannot be combined
+; CHECK-LABEL: {{^}}dpp_combine_i32_literal:
+; CHECK: v_mov_b32_dpp [[OLD:v[0-9]+]], {{v[0-9]+}} quad_perm:[1,0,0,0] row_mask:0x2 bank_mask:0x1 bound_ctrl:0
+; CHECK: v_add_u32_e32 {{v[0-9]+}}, vcc, 42, [[OLD]]
+define amdgpu_kernel void @dpp_combine_i32_literal(i32 addrspace(1)* %out, i32 %in) {
+ %dpp = call i32 @llvm.amdgcn.update.dpp.i32(i32 undef, i32 %in, i32 1, i32 2, i32 1, i1 1) #0
+ %res = add nsw i32 %dpp, 42
+ store i32 %res, i32 addrspace(1)* %out
+ ret void
+}
+
+; CHECK-LABEL: {{^}}dpp_combine_i32_bz:
+; CHECK: v_add_u32_dpp {{v[0-9]+}}, vcc, {{v[0-9]+}}, v0 quad_perm:[1,0,0,0] row_mask:0x1 bank_mask:0x1 bound_ctrl:0
+define amdgpu_kernel void @dpp_combine_i32_bz(i32 addrspace(1)* %out, i32 %in) {
+ %x = tail call i32 @llvm.amdgcn.workitem.id.x()
+ %dpp = call i32 @llvm.amdgcn.update.dpp.i32(i32 undef, i32 %in, i32 1, i32 1, i32 1, i1 1) #0
+ %res = add nsw i32 %dpp, %x
+ store i32 %res, i32 addrspace(1)* %out
+ ret void
+}
+
+; CHECK-LABEL: {{^}}dpp_combine_i32_boff_undef:
+; CHECK: v_add_u32_dpp {{v[0-9]+}}, vcc, {{v[0-9]+}}, v0 quad_perm:[1,0,0,0] row_mask:0x1 bank_mask:0x1
+define amdgpu_kernel void @dpp_combine_i32_boff_undef(i32 addrspace(1)* %out, i32 %in) {
+ %x = tail call i32 @llvm.amdgcn.workitem.id.x()
+ %dpp = call i32 @llvm.amdgcn.update.dpp.i32(i32 undef, i32 %in, i32 1, i32 1, i32 1, i1 0) #0
+ %res = add nsw i32 %dpp, %x
+ store i32 %res, i32 addrspace(1)* %out
+ ret void
+}
+
+; CHECK-LABEL: {{^}}dpp_combine_i32_boff_0:
+; CHECK: v_add_u32_dpp {{v[0-9]+}}, vcc, {{v[0-9]+}}, v0 quad_perm:[1,0,0,0] row_mask:0x1 bank_mask:0x1 bound_ctrl:0
+define amdgpu_kernel void @dpp_combine_i32_boff_0(i32 addrspace(1)* %out, i32 %in) {
+ %x = tail call i32 @llvm.amdgcn.workitem.id.x()
+ %dpp = call i32 @llvm.amdgcn.update.dpp.i32(i32 0, i32 %in, i32 1, i32 1, i32 1, i1 0) #0
+ %res = add nsw i32 %dpp, %x
+ store i32 %res, i32 addrspace(1)* %out
+ ret void
+}
+
+; CHECK-LABEL: {{^}}dpp_combine_i32_boff_max:
+; CHECK: v_bfrev_b32_e32 [[OLD:v[0-9]+]], -2
+; CHECK: v_max_i32_dpp [[OLD]], {{v[0-9]+}}, v0 quad_perm:[1,0,0,0] row_mask:0x1 bank_mask:0x1
+define amdgpu_kernel void @dpp_combine_i32_boff_max(i32 addrspace(1)* %out, i32 %in) {
+ %x = tail call i32 @llvm.amdgcn.workitem.id.x()
+ %dpp = call i32 @llvm.amdgcn.update.dpp.i32(i32 2147483647, i32 %in, i32 1, i32 1, i32 1, i1 0) #0
+ %cmp = icmp sge i32 %dpp, %x
+ %res = select i1 %cmp, i32 %dpp, i32 %x
+ store i32 %res, i32 addrspace(1)* %out
+ ret void
+}
+
+; CHECK-LABEL: {{^}}dpp_combine_i32_boff_min:
+; CHECK: v_bfrev_b32_e32 [[OLD:v[0-9]+]], 1
+; CHECK: v_min_i32_dpp [[OLD]], {{v[0-9]+}}, v0 quad_perm:[1,0,0,0] row_mask:0x1 bank_mask:0x1
+define amdgpu_kernel void @dpp_combine_i32_boff_min(i32 addrspace(1)* %out, i32 %in) {
+ %x = tail call i32 @llvm.amdgcn.workitem.id.x()
+ %dpp = call i32 @llvm.amdgcn.update.dpp.i32(i32 -2147483648, i32 %in, i32 1, i32 1, i32 1, i1 0) #0
+ %cmp = icmp sle i32 %dpp, %x
+ %res = select i1 %cmp, i32 %dpp, i32 %x
+ store i32 %res, i32 addrspace(1)* %out
+ ret void
+}
+
+; CHECK-LABEL: {{^}}dpp_combine_i32_boff_mul:
+; CHECK: v_mul_i32_i24_dpp v0, v3, v0 quad_perm:[1,0,0,0] row_mask:0x1 bank_mask:0x1
+define amdgpu_kernel void @dpp_combine_i32_boff_mul(i32 addrspace(1)* %out, i32 %in) {
+ %x = tail call i32 @llvm.amdgcn.workitem.id.x()
+ %dpp = call i32 @llvm.amdgcn.update.dpp.i32(i32 1, i32 %in, i32 1, i32 1, i32 1, i1 0) #0
+
+ %dpp.shl = shl i32 %dpp, 8
+ %dpp.24 = ashr i32 %dpp.shl, 8
+ %x.shl = shl i32 %x, 8
+ %x.24 = ashr i32 %x.shl, 8
+ %res = mul i32 %dpp.24, %x.24
+ store i32 %res, i32 addrspace(1)* %out
+ ret void
+}
+
+; CHECK-LABEL: {{^}}dpp_combine_i32_commute:
+; CHECK: v_subrev_u32_dpp {{v[0-9]+}}, vcc, {{v[0-9]+}}, v0 quad_perm:[2,0,0,0] row_mask:0x1 bank_mask:0x1 bound_ctrl:0
+define amdgpu_kernel void @dpp_combine_i32_commute(i32 addrspace(1)* %out, i32 %in) {
+ %x = tail call i32 @llvm.amdgcn.workitem.id.x()
+ %dpp = call i32 @llvm.amdgcn.update.dpp.i32(i32 undef, i32 %in, i32 2, i32 1, i32 1, i1 1) #0
+ %res = sub nsw i32 %x, %dpp
+ store i32 %res, i32 addrspace(1)* %out
+ ret void
+}
+
+; CHECK-LABEL: {{^}}dpp_combine_f32:
+; CHECK: v_add_f32_dpp {{v[0-9]+}}, {{v[0-9]+}}, v0 quad_perm:[3,0,0,0] row_mask:0x1 bank_mask:0x1 bound_ctrl:0
+define amdgpu_kernel void @dpp_combine_f32(i32 addrspace(1)* %out, i32 %in) {
+ %x = tail call i32 @llvm.amdgcn.workitem.id.x()
+
+ %dpp = call i32 @llvm.amdgcn.update.dpp.i32(i32 undef, i32 %in, i32 3, i32 1, i32 1, i1 1) #0
+ %dpp.f32 = bitcast i32 %dpp to float
+ %x.f32 = bitcast i32 %x to float
+ %res.f32 = fadd float %x.f32, %dpp.f32
+ %res = bitcast float %res.f32 to i32
+ store i32 %res, i32 addrspace(1)* %out
+ ret void
+}
+
+; CHECK-LABEL: {{^}}dpp_combine_test_f32_mods:
+; CHECK: v_mul_f32_dpp {{v[0-9]+}}, |{{v[0-9]+}}|, -v0 quad_perm:[0,1,0,0] row_mask:0x1 bank_mask:0x1 bound_ctrl:0
+define amdgpu_kernel void @dpp_combine_test_f32_mods(i32 addrspace(1)* %out, i32 %in) {
+ %x = tail call i32 @llvm.amdgcn.workitem.id.x()
+
+ %dpp = call i32 @llvm.amdgcn.update.dpp.i32(i32 undef, i32 %in, i32 4, i32 1, i32 1, i1 1) #0
+
+ %x.f32 = bitcast i32 %x to float
+ %x.f32.neg = fsub float -0.000000e+00, %x.f32
+
+ %dpp.f32 = bitcast i32 %dpp to float
+ %dpp.f32.cmp = fcmp fast olt float %dpp.f32, 0.000000e+00
+ %dpp.f32.sign = select i1 %dpp.f32.cmp, float -1.000000e+00, float 1.000000e+00
+ %dpp.f32.abs = fmul fast float %dpp.f32, %dpp.f32.sign
+
+ %res.f32 = fmul float %x.f32.neg, %dpp.f32.abs
+ %res = bitcast float %res.f32 to i32
+ store i32 %res, i32 addrspace(1)* %out
+ ret void
+}
+
+; CHECK-LABEL: {{^}}dpp_combine_mac:
+; CHECK: v_mac_f32_dpp v0, {{v[0-9]+}}, v1 quad_perm:[1,0,0,0] row_mask:0x1 bank_mask:0x1 bound_ctrl:0
+define amdgpu_kernel void @dpp_combine_mac(float addrspace(1)* %out, i32 %in) {
+ %x = tail call i32 @llvm.amdgcn.workitem.id.x()
+ %y = tail call i32 @llvm.amdgcn.workitem.id.y()
+ %dpp = call i32 @llvm.amdgcn.update.dpp.i32(i32 undef, i32 %in, i32 1, i32 1, i32 1, i1 1) #0
+ %dpp.f32 = bitcast i32 %dpp to float
+ %x.f32 = bitcast i32 %x to float
+ %y.f32 = bitcast i32 %y to float
+
+ %mult = fmul float %dpp.f32, %y.f32
+ %res = fadd float %mult, %x.f32
+ store float %res, float addrspace(1)* %out
+ ret void
+}
+
+; CHECK-LABEL: {{^}}dpp_combine_sequence:
+define amdgpu_kernel void @dpp_combine_sequence(i32 addrspace(1)* %out, i32 %in, i1 %cmp) {
+ %x = tail call i32 @llvm.amdgcn.workitem.id.x()
+ %dpp = call i32 @llvm.amdgcn.update.dpp.i32(i32 undef, i32 %in, i32 1, i32 1, i32 1, i1 1) #0
+ br i1 %cmp, label %bb1, label %bb2
+bb1:
+; CHECK: v_add_u32_dpp {{v[0-9]+}}, vcc, {{v[0-9]+}}, v0 quad_perm:[1,0,0,0] row_mask:0x1 bank_mask:0x1 bound_ctrl:0
+ %resadd = add nsw i32 %dpp, %x
+ br label %bb3
+bb2:
+; CHECK: v_subrev_u32_dpp {{v[0-9]+}}, vcc, {{v[0-9]+}}, v0 quad_perm:[1,0,0,0] row_mask:0x1 bank_mask:0x1 bound_ctrl:0
+ %ressub = sub nsw i32 %x, %dpp
+ br label %bb3
+bb3:
+ %res = phi i32 [%resadd, %bb1], [%ressub, %bb2]
+ store i32 %res, i32 addrspace(1)* %out
+ ret void
+}
+
+; CHECK-LABEL: {{^}}dpp_combine_sequence_negative:
+; CHECK: v_mov_b32_dpp v1, v1 quad_perm:[1,0,0,0] row_mask:0x1 bank_mask:0x1 bound_ctrl:0
+define amdgpu_kernel void @dpp_combine_sequence_negative(i32 addrspace(1)* %out, i32 %in, i1 %cmp) {
+ %x = tail call i32 @llvm.amdgcn.workitem.id.x()
+ %dpp = call i32 @llvm.amdgcn.update.dpp.i32(i32 undef, i32 %in, i32 1, i32 1, i32 1, i1 1) #0
+ br i1 %cmp, label %bb1, label %bb2
+bb1:
+ %resadd = add nsw i32 %dpp, %x
+ br label %bb3
+bb2:
+ %ressub = sub nsw i32 2, %dpp ; break seq
+ br label %bb3
+bb3:
+ %res = phi i32 [%resadd, %bb1], [%ressub, %bb2]
+ store i32 %res, i32 addrspace(1)* %out
+ ret void
+}
+
+declare i32 @llvm.amdgcn.workitem.id.x()
+declare i32 @llvm.amdgcn.workitem.id.y()
+declare i32 @llvm.amdgcn.update.dpp.i32(i32, i32, i32, i32, i32, i1) #0
+
+attributes #0 = { nounwind readnone convergent }
Added: llvm/trunk/test/CodeGen/AMDGPU/dpp_combine_subregs.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/dpp_combine_subregs.mir?rev=347993&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/dpp_combine_subregs.mir (added)
+++ llvm/trunk/test/CodeGen/AMDGPU/dpp_combine_subregs.mir Fri Nov 30 06:21:56 2018
@@ -0,0 +1,143 @@
+# RUN: llc -march=amdgcn -mcpu=tonga -run-pass=gcn-dpp-combine -o - %s | FileCheck %s
+
+# test if $old definition is correctly tracked through subreg manipulation pseudos
+
+---
+# CHECK-LABEL: name: mul_old_subreg
+# CHECK: %7:vgpr_32 = V_MUL_I32_I24_dpp %0.sub1, %1, %0.sub1, 1, 1, 1, 0, implicit $exec
+
+name: mul_old_subreg
+tracksRegLiveness: true
+registers:
+ - { id: 0, class: vreg_64 }
+ - { id: 1, class: vgpr_32 }
+ - { id: 2, class: vgpr_32 }
+ - { id: 3, class: vgpr_32 }
+ - { id: 4, class: vreg_64 }
+ - { id: 5, class: vreg_64 }
+ - { id: 6, class: vgpr_32 }
+ - { id: 7, class: vgpr_32 }
+
+liveins:
+ - { reg: '$vgpr0', virtual-reg: '%0' }
+ - { reg: '$vgpr1', virtual-reg: '%1' }
+body: |
+ bb.0:
+ liveins: $vgpr0, $vgpr1
+
+ %0:vreg_64 = COPY $vgpr0
+ %1:vgpr_32 = COPY $vgpr1
+ %2:vgpr_32 = V_MOV_B32_e32 1, implicit $exec
+ %3:vgpr_32 = V_MOV_B32_e32 42, implicit $exec
+ %4 = REG_SEQUENCE %2, %subreg.sub0, %3, %subreg.sub1
+ %5 = INSERT_SUBREG %4, %1, %subreg.sub1 ; %5.sub0 is taken from %4
+ %6:vgpr_32 = V_MOV_B32_dpp %5.sub0, %1, 1, 1, 1, 0, implicit $exec
+ %7:vgpr_32 = V_MUL_I32_I24_e32 %6, %0.sub1, implicit $exec
+...
+
+# CHECK-LABEL: name: add_old_subreg
+# CHECK: [[OLD:\%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+# CHECK: %5:vgpr_32 = V_ADD_U32_dpp [[OLD]], %1, %0.sub1, 1, 1, 1, 1, implicit $exec
+
+name: add_old_subreg
+tracksRegLiveness: true
+registers:
+ - { id: 0, class: vreg_64 }
+ - { id: 1, class: vgpr_32 }
+ - { id: 2, class: vgpr_32 }
+ - { id: 3, class: vreg_64 }
+ - { id: 4, class: vgpr_32 }
+ - { id: 5, class: vgpr_32 }
+
+liveins:
+ - { reg: '$vgpr0', virtual-reg: '%0' }
+ - { reg: '$vgpr1', virtual-reg: '%1' }
+body: |
+ bb.0:
+ liveins: $vgpr0, $vgpr1
+
+ %0:vreg_64 = COPY $vgpr0
+ %1:vgpr_32 = COPY $vgpr1
+ %2:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+ %3:vreg_64 = INSERT_SUBREG %0, %2, %subreg.sub1 ; %3.sub1 is inserted
+ %4:vgpr_32 = V_MOV_B32_dpp %3.sub1, %1, 1, 1, 1, 0, implicit $exec
+ %5:vgpr_32 = V_ADD_U32_e32 %4, %0.sub1, implicit $exec
+...
+
+# CHECK-LABEL: name: add_old_subreg_undef
+# CHECK: %5:vgpr_32 = V_ADD_U32_dpp %3.sub1, %1, %0.sub1, 1, 1, 1, 0, implicit $exec
+
+name: add_old_subreg_undef
+tracksRegLiveness: true
+registers:
+ - { id: 0, class: vreg_64 }
+ - { id: 1, class: vgpr_32 }
+ - { id: 2, class: vgpr_32 }
+ - { id: 3, class: vreg_64 }
+ - { id: 4, class: vgpr_32 }
+ - { id: 5, class: vgpr_32 }
+
+liveins:
+ - { reg: '$vgpr0', virtual-reg: '%0' }
+ - { reg: '$vgpr1', virtual-reg: '%1' }
+body: |
+ bb.0:
+ liveins: $vgpr0, $vgpr1
+
+ %0:vreg_64 = COPY $vgpr0
+ %1:vgpr_32 = COPY $vgpr1
+ %2:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+ %3:vreg_64 = REG_SEQUENCE %2, %subreg.sub0 ; %3.sub1 is undef
+ %4:vgpr_32 = V_MOV_B32_dpp %3.sub1, %1, 1, 1, 1, 0, implicit $exec
+ %5:vgpr_32 = V_ADD_U32_e32 %4, %0.sub1, implicit $exec
+...
+
+# CHECK-LABEL: name: add_f32_e64
+# CHECK: %3:vgpr_32 = V_MOV_B32_dpp undef %2, %1, 1, 1, 1, 1, implicit $exec
+# CHECK: %4:vgpr_32 = V_ADD_F32_e64 0, %3, 0, %0, 0, 1, implicit $exec
+# CHECK: %6:vgpr_32 = V_ADD_F32_dpp %2, 0, %1, 0, %0, 1, 1, 1, 1, implicit $exec
+# CHECK: %7:vgpr_32 = V_ADD_F32_dpp %2, 1, %1, 2, %0, 1, 1, 1, 1, implicit $exec
+# CHECK: %9:vgpr_32 = V_ADD_F32_e64 4, %8, 8, %0, 0, 0, implicit $exec
+
+name: add_f32_e64
+tracksRegLiveness: true
+registers:
+ - { id: 0, class: vgpr_32 }
+ - { id: 1, class: vgpr_32 }
+ - { id: 2, class: vgpr_32 }
+ - { id: 3, class: vgpr_32 }
+ - { id: 4, class: vgpr_32 }
+ - { id: 5, class: vgpr_32 }
+ - { id: 6, class: vgpr_32 }
+ - { id: 7, class: vgpr_32 }
+ - { id: 8, class: vgpr_32 }
+ - { id: 9, class: vgpr_32 }
+
+liveins:
+ - { reg: '$vgpr0', virtual-reg: '%0' }
+ - { reg: '$vgpr1', virtual-reg: '%1' }
+body: |
+ bb.0:
+ liveins: $vgpr0, $vgpr1
+
+ %0:vgpr_32 = COPY $vgpr0
+ %1:vgpr_32 = COPY $vgpr1
+ %2:vgpr_32 = IMPLICIT_DEF
+ %3:vgpr_32 = V_MOV_B32_dpp undef %2, %1, 1, 1, 1, 1, implicit $exec
+
+ ; this shouldn't be combined as omod is set
+ %4:vgpr_32 = V_ADD_F32_e64 0, %3, 0, %0, 0, 1, implicit $exec
+
+ %5:vgpr_32 = V_MOV_B32_dpp undef %2, %1, 1, 1, 1, 1, implicit $exec
+
+ ; this should be combined as all modifiers are default
+ %6:vgpr_32 = V_ADD_F32_e64 0, %5, 0, %0, 0, 0, implicit $exec
+
+ ; this should be combined as modifiers other than abs|neg are default
+ %7:vgpr_32 = V_ADD_F32_e64 1, %5, 2, %0, 0, 0, implicit $exec
+
+ %8:vgpr_32 = V_MOV_B32_dpp undef %2, %1, 1, 1, 1, 1, implicit $exec
+
+ ; this shouldn't be combined as modifiers aren't abs|neg
+ %9:vgpr_32 = V_ADD_F32_e64 4, %8, 8, %0, 0, 0, implicit $exec
+...
Modified: llvm/trunk/test/MC/AMDGPU/vop_dpp.s
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/MC/AMDGPU/vop_dpp.s?rev=347993&r1=347992&r2=347993&view=diff
==============================================================================
--- llvm/trunk/test/MC/AMDGPU/vop_dpp.s (original)
+++ llvm/trunk/test/MC/AMDGPU/vop_dpp.s Fri Nov 30 06:21:56 2018
@@ -116,7 +116,6 @@ v_add_f32 v0, |v0|, -v0 row_shl:1 row_ma
//===----------------------------------------------------------------------===//
// NOSICI: error:
-// VI9: v_nop row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x00,0x00,0x7e,0x00,0x01,0x09,0xa1]
v_nop row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0
// NOSICI: error:
More information about the llvm-commits
mailing list