PATCH: R600/SI: Do abs/neg folding with ComplexPatterns
Matt Arsenault
Matthew.Arsenault at amd.com
Mon Jun 30 15:10:51 PDT 2014
On 06/30/2014 02:21 PM, Tom Stellard wrote:
> Hi,
>
> The attached patches move the abs/neg folding for SI into
> ISel using complex patterns. The first patch adds a pass called
> SIShrinkInstructions, which converts instructions from 64-bit to 32-bit
> encodings. The next 4 patches do some tablegen refactoring, then patches
> 6 and 7 actually move the abs/neg folding.
>
> With these patches we now select to the 64-bit encoding for most instructions,
> which is why the SIShrinkInstructions pass was added. Also, some of the
> integer VOP3 instructions no longer have the modifiers operands.
>
> -Tom
>
> 0001-R600-SI-Add-instruction-shrinking-pass.patch
>
>
> From 9242923ed24890df6079dd9a95f3b6ca8520b54f Mon Sep 17 00:00:00 2001
> From: Tom Stellard<thomas.stellard at amd.com>
> Date: Thu, 26 Jun 2014 21:53:56 -0400
> Subject: [PATCH 1/7] R600/SI: Add instruction shrinking pass.
>
> This pass convents 64-bit wide instruction to 32-bit when possible.
> ---
> lib/Target/R600/AMDGPU.h | 1 +
> lib/Target/R600/AMDGPUTargetMachine.cpp | 2 +
> lib/Target/R600/CMakeLists.txt | 1 +
> lib/Target/R600/SIInstrFormats.td | 1 +
> lib/Target/R600/SIInstrInfo.cpp | 9 ++
> lib/Target/R600/SIInstrInfo.h | 6 +
> lib/Target/R600/SIInstrInfo.td | 9 ++
> lib/Target/R600/SIShrinkInstructions.cpp | 188 +++++++++++++++++++++++++++++++
> test/CodeGen/R600/bfi_int.ll | 2 +-
> test/CodeGen/R600/ctpop.ll | 2 +-
> test/CodeGen/R600/fcmp64.ll | 2 +-
> test/CodeGen/R600/seto.ll | 2 +-
> test/CodeGen/R600/setuo.ll | 2 +-
> 13 files changed, 222 insertions(+), 5 deletions(-)
> create mode 100644 lib/Target/R600/SIShrinkInstructions.cpp
>
> diff --git a/lib/Target/R600/AMDGPU.h b/lib/Target/R600/AMDGPU.h
> index f92bde8..ea98c62 100644
> --- a/lib/Target/R600/AMDGPU.h
> +++ b/lib/Target/R600/AMDGPU.h
> @@ -39,6 +39,7 @@ FunctionPass *createAMDGPUCFGStructurizerPass();
> FunctionPass *createSITypeRewriter();
> FunctionPass *createSIAnnotateControlFlowPass();
> FunctionPass *createSILowerI1CopiesPass();
> +FunctionPass *createSIShrinkInstructionsPass();
> FunctionPass *createSILowerControlFlowPass(TargetMachine &tm);
> FunctionPass *createSIFixSGPRCopiesPass(TargetMachine &tm);
> FunctionPass *createSICodeEmitterPass(formatted_raw_ostream &OS);
> diff --git a/lib/Target/R600/AMDGPUTargetMachine.cpp b/lib/Target/R600/AMDGPUTargetMachine.cpp
> index be1ecea..3506f2c 100644
> --- a/lib/Target/R600/AMDGPUTargetMachine.cpp
> +++ b/lib/Target/R600/AMDGPUTargetMachine.cpp
> @@ -174,6 +174,7 @@ bool AMDGPUPassConfig::addPreRegAlloc() {
> // SIFixSGPRCopies can generate a lot of duplicate instructions,
> // so we need to run MachineCSE afterwards.
> addPass(&MachineCSEID);
> + addPass(createSIShrinkInstructionsPass());
> }
> return false;
> }
> @@ -181,6 +182,7 @@ bool AMDGPUPassConfig::addPreRegAlloc() {
> bool AMDGPUPassConfig::addPostRegAlloc() {
> const AMDGPUSubtarget &ST = TM->getSubtarget<AMDGPUSubtarget>();
>
> + addPass(createSIShrinkInstructionsPass());
> if (ST.getGeneration() > AMDGPUSubtarget::NORTHERN_ISLANDS) {
> addPass(createSIInsertWaits(*TM));
> }
> diff --git a/lib/Target/R600/CMakeLists.txt b/lib/Target/R600/CMakeLists.txt
> index dc1fa9f..dee8302 100644
> --- a/lib/Target/R600/CMakeLists.txt
> +++ b/lib/Target/R600/CMakeLists.txt
> @@ -47,6 +47,7 @@ add_llvm_target(R600CodeGen
> SILowerI1Copies.cpp
> SIMachineFunctionInfo.cpp
> SIRegisterInfo.cpp
> + SIShrinkInstructions.cpp
> SITypeRewriter.cpp
> )
>
> diff --git a/lib/Target/R600/SIInstrFormats.td b/lib/Target/R600/SIInstrFormats.td
> index 7cae9fc..6c97275 100644
> --- a/lib/Target/R600/SIInstrFormats.td
> +++ b/lib/Target/R600/SIInstrFormats.td
> @@ -288,6 +288,7 @@ class VOPC <bits<8> op, dag ins, string asm, list<dag> pattern> :
> let mayLoad = 0;
> let mayStore = 0;
> let hasSideEffects = 0;
> + let UseNamedOperandTable = 1;
> let VOPC = 1;
> }
>
> diff --git a/lib/Target/R600/SIInstrInfo.cpp b/lib/Target/R600/SIInstrInfo.cpp
> index ea649f7..dc1667a 100644
> --- a/lib/Target/R600/SIInstrInfo.cpp
> +++ b/lib/Target/R600/SIInstrInfo.cpp
> @@ -1593,3 +1593,12 @@ void SIInstrInfo::reserveIndirectRegisters(BitVector &Reserved,
> for (int Index = std::max(0, Begin - 15); Index <= End; ++Index)
> Reserved.set(AMDGPU::VReg_512RegClass.getRegister(Index));
> }
> +
> +const MachineOperand *SIInstrInfo::getNamedOperand(const MachineInstr& MI,
> + unsigned OperandName) const {
> + int Idx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), OperandName);
> + if (Idx == -1)
> + return nullptr;
> +
> + return &MI.getOperand(Idx);
> +}
> diff --git a/lib/Target/R600/SIInstrInfo.h b/lib/Target/R600/SIInstrInfo.h
> index 4c204d8..7107d87 100644
> --- a/lib/Target/R600/SIInstrInfo.h
> +++ b/lib/Target/R600/SIInstrInfo.h
> @@ -175,11 +175,17 @@ public:
> unsigned SavReg, unsigned IndexReg) const;
>
> void insertNOPs(MachineBasicBlock::iterator MI, int Count) const;
> +
> + /// \brief Returns the operand named \p Op. If \p MI does not have an
> + /// operand named \c Op, this function returns nullptr.
> + const MachineOperand *getNamedOperand(const MachineInstr& MI,
> + unsigned OperandName) const;
> };
>
> namespace AMDGPU {
>
> int getVOPe64(uint16_t Opcode);
> + int getVOPe32(uint16_t Opcode);
> int getCommuteRev(uint16_t Opcode);
> int getCommuteOrig(uint16_t Opcode);
> int getMCOpcode(uint16_t Opcode, unsigned Gen);
> diff --git a/lib/Target/R600/SIInstrInfo.td b/lib/Target/R600/SIInstrInfo.td
> index 09ba419..5432713 100644
> --- a/lib/Target/R600/SIInstrInfo.td
> +++ b/lib/Target/R600/SIInstrInfo.td
> @@ -795,6 +795,15 @@ def getVOPe64 : InstrMapping {
> let ValueCols = [["8"]];
> }
>
> +// Maps an opcode in e64 form to its e32 equivalent
> +def getVOPe32 : InstrMapping {
> + let FilterClass = "VOP";
> + let RowFields = ["OpName"];
> + let ColFields = ["Size"];
> + let KeyCol = ["8"];
> + let ValueCols = [["4"]];
> +}
> +
> // Maps an original opcode to its commuted version
> def getCommuteRev : InstrMapping {
> let FilterClass = "VOP2_REV";
> diff --git a/lib/Target/R600/SIShrinkInstructions.cpp b/lib/Target/R600/SIShrinkInstructions.cpp
> new file mode 100644
> index 0000000..9f425f7
> --- /dev/null
> +++ b/lib/Target/R600/SIShrinkInstructions.cpp
> @@ -0,0 +1,188 @@
> +//===-- SIShrinkInstructions.cpp - Shrink Instructions --------------------===//
> +//
> +// The LLVM Compiler Infrastructure
> +//
> +// This file is distributed under the University of Illinois Open Source
> +// License. See LICENSE.TXT for details.
> +//
> +/// The pass tries to use the 32-bit encoding for instructions when possible.
> +//===----------------------------------------------------------------------===//
> +//
> +
> +#define DEBUG_TYPE "si-shrink-instructions"
I think DEBUG_TYPE is supposed to go below includes these days
> +#include "AMDGPU.h"
> +#include "SIInstrInfo.h"
> +#include "llvm/ADT/Statistic.h"
> +#include "llvm/CodeGen/MachineFunctionPass.h"
> +#include "llvm/CodeGen/MachineInstrBuilder.h"
> +#include "llvm/CodeGen/MachineRegisterInfo.h"
> +#include "llvm/IR/LLVMContext.h"
> +#include "llvm/IR/Function.h"
> +#include "llvm/Support/Debug.h"
> +#include "llvm/Target/TargetMachine.h"
> +
> +STATISTIC(NumInstructionsShrunk,
> + "Number of 64-bit instruction reduced to 32-bit.");
> +
> +namespace llvm {
> + void initializeSIShrinkInstructionsPass(PassRegistry&);
> +}
> +
> +using namespace llvm;
> +
> +namespace {
> +
> +class SIShrinkInstructions : public MachineFunctionPass {
> +public:
> + static char ID;
> +
> +public:
> + SIShrinkInstructions() : MachineFunctionPass(ID) {
> + }
> +
> + virtual bool runOnMachineFunction(MachineFunction &MF) override;
> +
> + virtual const char *getPassName() const override {
> + return "SI Shrink Instructions";
> + }
> +
> + virtual void getAnalysisUsage(AnalysisUsage &AU) const override {
> + AU.setPreservesCFG();
> + MachineFunctionPass::getAnalysisUsage(AU);
> + }
> +};
> +
> +} // End anonymous namespace.
> +
> +INITIALIZE_PASS_BEGIN(SIShrinkInstructions, DEBUG_TYPE,
> + "SI Lower il Copies", false, false)
> +INITIALIZE_PASS_END(SIShrinkInstructions, DEBUG_TYPE,
> + "SI Lower il Copies", false, false)
> +
Pass description looks like a copy paste error
> +char SIShrinkInstructions::ID = 0;
> +
> +FunctionPass *llvm::createSIShrinkInstructionsPass() {
> + return new SIShrinkInstructions();
> +}
> +
> +static bool isVGPR(const MachineOperand *MO, const SIRegisterInfo &TRI,
> + const MachineRegisterInfo &MRI) {
> + if (!MO->isReg())
> + return false;
> +
> + if (TargetRegisterInfo::isVirtualRegister(MO->getReg()))
> + return TRI.hasVGPRs(MRI.getRegClass(MO->getReg()));
> +
> + return TRI.hasVGPRs(TRI.getPhysRegClass(MO->getReg()));
> +}
> +
> +static bool canShrink(MachineInstr &MI, const SIInstrInfo *TII,
> + const SIRegisterInfo &TRI,
> + const MachineRegisterInfo &MRI) {
> + const MachineOperand *Src0Mod, *Src1, *Src1Mod, *Src2, *Omod, *Clamp;
Can you declare these where they are defined?
> +
> + Src2 = TII->getNamedOperand(MI, AMDGPU::OpName::src2);
> +
> + // Can't shrink instruction with three operands.
> + if (Src2)
> + return false;
> +
> + Src1 = TII->getNamedOperand(MI, AMDGPU::OpName::src1);
> + Src1Mod = TII->getNamedOperand(MI, AMDGPU::OpName::src1_modifiers);
> +
> + if (Src1 && (!isVGPR(Src1, TRI, MRI) || Src1Mod->getImm() != 0))
> + return false;
> +
> + // We don't need to check src0, all input types are legal, so just make
> + // sure src0 isn't using any modifiers.
> + Src0Mod = TII->getNamedOperand(MI, AMDGPU::OpName::src0_modifiers);
> + if (Src0Mod && Src0Mod->getImm() != 0)
> + return false;
> +
> + // Check output modifiers
> + Omod = TII->getNamedOperand(MI, AMDGPU::OpName::omod);
> + if (Omod && Omod->getImm() != 0)
> + return false;
> +
> + Clamp = TII->getNamedOperand(MI, AMDGPU::OpName::clamp);
> + return !Clamp || Clamp->getImm() == 0;
> +}
> +
> +bool SIShrinkInstructions::runOnMachineFunction(MachineFunction &MF) {
> + MachineRegisterInfo &MRI = MF.getRegInfo();
> + const SIInstrInfo *TII = static_cast<const SIInstrInfo *>(
> + MF.getTarget().getInstrInfo());
> + const SIRegisterInfo &TRI = TII->getRegisterInfo();
> + std::vector<unsigned> I1Defs;
> +
> + for (MachineFunction::iterator BI = MF.begin(), BE = MF.end();
> + BI != BE; ++BI) {
> +
> + MachineBasicBlock &MBB = *BI;
> + MachineBasicBlock::iterator I, Next;
> + for (I = MBB.begin(); I != MBB.end(); I = Next) {
> + Next = std::next(I);
> + MachineInstr &MI = *I;
> +
> + int Op32 = AMDGPU::getVOPe32(MI.getOpcode());
> +
> + if (Op32 == -1)
> + continue;
> +
> + if (!canShrink(MI, TII, TRI, MRI)) {
> + // Try commtuing the instruction and see if that enables us to shrink
> + // it.
Typo: commtuing
> + if (!MI.isCommutable() || !TII->commuteInstruction(&MI) ||
> + !canShrink(MI, TII, TRI, MRI))
> + continue;
> + }
> +
> + if (TII->isVOPC(Op32)) {
> + unsigned DstReg = MI.getOperand(0).getReg();
> + if (TargetRegisterInfo::isVirtualRegister(DstReg)) {
> + // VOPC instructions can only write to the VCC register. We can't
> + // force them to use VCC here, because the register allocator
> + // has trouble with sequences like this, which cause the allocator
> + // to run out of registes if vreg0 and vreg1 belong to the VCCReg
Typo: registes
> + // register class:
> + // vreg0 = VOPC;
> + // vreg1 = VOPC;
> + // S_AND_B64 vreg0, vreg1
> + //
> + // So, instead of forcing the instruction to write to VCC, we provide a
> + // hint to the register allocator to use VCC and then we
> + // we will run this pass again after RA and shrink it if it outpus to
Typo: outpus
> + // VCC.
> + MRI.setRegAllocationHint(MI.getOperand(0).getReg(), 0, AMDGPU::VCC);
> + continue;
> + }
> + if (DstReg != AMDGPU::VCC)
> + continue;
> + }
> +
> + // We can shrink this instruction
> + DEBUG(dbgs() << "Shrinking "; MI.dump(); dbgs() << "\n";);
You can use just << *MI << '\n' here
> +
> + MachineInstrBuilder MIB =
> + BuildMI(MBB, I, MI.getDebugLoc(), TII->get(Op32));
> +
> + // dst
> + MIB.addOperand(MI.getOperand(0));
> +
> + MIB.addOperand(*TII->getNamedOperand(MI, AMDGPU::OpName::src0));
> +
> + const MachineOperand *Src1 =
> + TII->getNamedOperand(MI, AMDGPU::OpName::src1);
> + if (Src1)
> + MIB.addOperand(*Src1);
> +
> + for (const MachineOperand &MO : MI.implicit_operands())
> + MIB.addOperand(MO);
> +
> + DEBUG(dbgs() << "e32 MI = "; MI.dump(); dbgs() << "\n";);
Ditto
> + NumInstructionsShrunk++;
Should use preincrement, the statistic isn't a primitive type
> + MI.eraseFromParent();
> + }
> + }
> + return false;
> +}
> diff --git a/test/CodeGen/R600/bfi_int.ll b/test/CodeGen/R600/bfi_int.ll
> index bbfe856..d18702a 100644
> --- a/test/CodeGen/R600/bfi_int.ll
> +++ b/test/CodeGen/R600/bfi_int.ll
> @@ -38,7 +38,7 @@ entry:
> ; R600-CHECK: @bfi_sha256_ma
> ; R600-CHECK: XOR_INT * [[DST:T[0-9]+\.[XYZW]]], KC0[2].Z, KC0[2].W
> ; R600-CHECK: BFI_INT * {{T[0-9]+\.[XYZW]}}, {{[[DST]]|PV\.[XYZW]}}, KC0[3].X, KC0[2].W
> -; SI-CHECK: V_XOR_B32_e64 [[DST:v[0-9]+]], {{[sv][0-9]+, v[0-9]+}}
> +; SI-CHECK: V_XOR_B32_e32 [[DST:v[0-9]+]], {{[sv][0-9]+, v[0-9]+}}
> ; SI-CHECK: V_BFI_B32 {{v[0-9]+}}, [[DST]], {{[sv][0-9]+, [sv][0-9]+}}
>
> define void @bfi_sha256_ma(i32 addrspace(1)* %out, i32 %x, i32 %y, i32 %z) {
> diff --git a/test/CodeGen/R600/ctpop.ll b/test/CodeGen/R600/ctpop.ll
> index 15be8e1..22a3022 100644
> --- a/test/CodeGen/R600/ctpop.ll
> +++ b/test/CodeGen/R600/ctpop.ll
> @@ -43,7 +43,7 @@ define void @v_ctpop_i32(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noali
> ; SI: V_MOV_B32_e32 [[VZERO:v[0-9]+]], 0
> ; SI: V_BCNT_U32_B32_e32 [[MIDRESULT:v[0-9]+]], [[VAL1]], [[VZERO]]
> ; SI-NOT: ADD
> -; SI: V_BCNT_U32_B32_e64 [[RESULT:v[0-9]+]], [[VAL0]], [[MIDRESULT]]
> +; SI: V_BCNT_U32_B32_e32 [[RESULT:v[0-9]+]], [[VAL0]], [[MIDRESULT]]
> ; SI: BUFFER_STORE_DWORD [[RESULT]],
> ; SI: S_ENDPGM
>
> diff --git a/test/CodeGen/R600/fcmp64.ll b/test/CodeGen/R600/fcmp64.ll
> index bcc7a8c..8cbe9f6 100644
> --- a/test/CodeGen/R600/fcmp64.ll
> +++ b/test/CodeGen/R600/fcmp64.ll
> @@ -53,7 +53,7 @@ define void @fge_f64(double addrspace(1)* %out, double addrspace(1)* %in1,
> }
>
> ; CHECK: @fne_f64
> -; CHECK: V_CMP_NEQ_F64_e64 {{s[[0-9]+:[0-9]+], v[[0-9]+:[0-9]+], v[[0-9]+:[0-9]+]}}
> +; CHECK: V_CMP_NEQ_F64_e32 vcc, {{v[[0-9]+:[0-9]+], v[[0-9]+:[0-9]+]}}
>
> define void @fne_f64(double addrspace(1)* %out, double addrspace(1)* %in1,
> double addrspace(1)* %in2) {
> diff --git a/test/CodeGen/R600/seto.ll b/test/CodeGen/R600/seto.ll
> index e90e788..cc942c1 100644
> --- a/test/CodeGen/R600/seto.ll
> +++ b/test/CodeGen/R600/seto.ll
> @@ -1,7 +1,7 @@
> ;RUN: llc < %s -march=r600 -mcpu=verde -verify-machineinstrs | FileCheck %s
>
> ;CHECK-LABEL: @main
> -;CHECK: V_CMP_O_F32_e64 s[0:1], {{[sv][0-9]+, [sv][0-9]+}}, 0, 0
> +;CHECK: V_CMP_O_F32_e32 vcc, {{[sv][0-9]+, v[0-9]+}}
>
> define void @main(float %p) {
> main_body:
> diff --git a/test/CodeGen/R600/setuo.ll b/test/CodeGen/R600/setuo.ll
> index 3b1db8b..33007fc 100644
> --- a/test/CodeGen/R600/setuo.ll
> +++ b/test/CodeGen/R600/setuo.ll
> @@ -1,7 +1,7 @@
> ;RUN: llc < %s -march=r600 -mcpu=verde -verify-machineinstrs | FileCheck %s
>
> ;CHECK-LABEL: @main
> -;CHECK: V_CMP_U_F32_e64 s[0:1], {{[sv][0-9]+, [sv][0-9]+}}, 0, 0
> +;CHECK: V_CMP_U_F32_e32 vcc, {{[sv][0-9]+, v[0-9]+}}
>
> define void @main(float %p) {
> main_body:
> -- 1.8.1.5
>
> 0002-R600-SI-Initialize-unused-VOP3-sources-to-0-instead-.patch
>
>
> From 92f9941af76aaf795f15cd8e99e40c5c312e8af2 Mon Sep 17 00:00:00 2001
> From: Tom Stellard<thomas.stellard at amd.com>
> Date: Fri, 13 Jun 2014 14:54:30 -0700
> Subject: [PATCH 2/7] R600/SI: Initialize unused VOP3 sources to 0 instead of
> SIOperand.ZERO
>
> ---
> lib/Target/R600/SIInstrInfo.td | 12 ++++++------
> 1 file changed, 6 insertions(+), 6 deletions(-)
>
> diff --git a/lib/Target/R600/SIInstrInfo.td b/lib/Target/R600/SIInstrInfo.td
> index 5432713..211b6f9 100644
> --- a/lib/Target/R600/SIInstrInfo.td
> +++ b/lib/Target/R600/SIInstrInfo.td
> @@ -300,8 +300,8 @@ multiclass VOP1_Helper <bits<8> op, RegisterClass drc, RegisterClass src,
> (ins InputMods:$src0_modifiers, src:$src0, i32imm:$clamp, i32imm:$omod),
> opName#"_e64 $dst, $src0_modifiers, $clamp, $omod", []
> >, VOP <opName> {
> - let src1 = SIOperand.ZERO;
> - let src2 = SIOperand.ZERO;
> + let src1 = 0;
> + let src2 = 0;
> }
> }
>
> @@ -332,7 +332,7 @@ multiclass VOP2_Helper <bits<6> op, RegisterClass vrc, RegisterClass arc,
> i32imm:$clamp, i32imm:$omod),
> opName#"_e64 $dst, $src0_modifiers, $src1_modifiers, $clamp, $omod", []
> >, VOP <opName>, VOP2_REV<revOp#"_e64", !eq(revOp, opName)> {
> - let src2 = SIOperand.ZERO;
> + let src2 = 0;
> }
> }
>
> @@ -360,7 +360,7 @@ multiclass VOP2b_32 <bits<6> op, string opName, list<dag> pattern,
> i32imm:$clamp, i32imm:$omod),
> opName#"_e64 $dst, $src0_modifiers, $src1_modifiers, $clamp, $omod", []
> >, VOP <opName>, VOP2_REV<revOp#"_e64", !eq(revOp, opName)> {
> - let src2 = SIOperand.ZERO;
> + let src2 = 0;
> /* the VOP2 variant puts the carry out into VCC, the VOP3 variant
> can write it into any SGPR. We currently don't use the carry out,
> so for now hardcode it to VCC as well */
> @@ -389,7 +389,7 @@ multiclass VOPC_Helper <bits<8> op, RegisterClass vrc, RegisterClass arc,
> )
> >, VOP <opName> {
> let Defs = !if(defExec, [EXEC], []);
> - let src2 = SIOperand.ZERO;
> + let src2 = 0;
> let src2_modifiers = 0;
> }
> }
> @@ -424,7 +424,7 @@ class VOP3_64_32 <bits <9> op, string opName, list<dag> pattern> : VOP3 <
> opName#" $dst, $src0, $src1", pattern
> >, VOP <opName> {
>
> - let src2 = SIOperand.ZERO;
> + let src2 = 0;
> let src0_modifiers = 0;
> let clamp = 0;
> let omod = 0;
> -- 1.8.1.5
>
> 0003-R600-SI-Initailize-encoding-fields-of-unused-VOP3-mo.patch
>
>
> From c67c6eba163c36a992d54f8cd3af21223997a3d0 Mon Sep 17 00:00:00 2001
> From: Tom Stellard<thomas.stellard at amd.com>
> Date: Fri, 13 Jun 2014 15:01:11 -0700
> Subject: [PATCH 3/7] R600/SI: Initailize encoding fields of unused VOP3
> modifiers to 0
>
> ---
> lib/Target/R600/SIInstrInfo.td | 5 +++++
> 1 file changed, 5 insertions(+)
>
> diff --git a/lib/Target/R600/SIInstrInfo.td b/lib/Target/R600/SIInstrInfo.td
> index 211b6f9..e9eff77 100644
> --- a/lib/Target/R600/SIInstrInfo.td
> +++ b/lib/Target/R600/SIInstrInfo.td
> @@ -301,7 +301,9 @@ multiclass VOP1_Helper <bits<8> op, RegisterClass drc, RegisterClass src,
> opName#"_e64 $dst, $src0_modifiers, $clamp, $omod", []
> >, VOP <opName> {
> let src1 = 0;
> + let src1_modifiers = 0;
> let src2 = 0;
> + let src2_modifiers = 0;
> }
> }
>
> @@ -333,6 +335,7 @@ multiclass VOP2_Helper <bits<6> op, RegisterClass vrc, RegisterClass arc,
> opName#"_e64 $dst, $src0_modifiers, $src1_modifiers, $clamp, $omod", []
> >, VOP <opName>, VOP2_REV<revOp#"_e64", !eq(revOp, opName)> {
> let src2 = 0;
> + let src2_modifiers = 0;
> }
> }
>
> @@ -361,6 +364,7 @@ multiclass VOP2b_32 <bits<6> op, string opName, list<dag> pattern,
> opName#"_e64 $dst, $src0_modifiers, $src1_modifiers, $clamp, $omod", []
> >, VOP <opName>, VOP2_REV<revOp#"_e64", !eq(revOp, opName)> {
> let src2 = 0;
> + let src2_modifiers = 0;
> /* the VOP2 variant puts the carry out into VCC, the VOP3 variant
> can write it into any SGPR. We currently don't use the carry out,
> so for now hardcode it to VCC as well */
> @@ -425,6 +429,7 @@ class VOP3_64_32 <bits <9> op, string opName, list<dag> pattern> : VOP3 <
> >, VOP <opName> {
>
> let src2 = 0;
> + let src2_modifiers = 0;
> let src0_modifiers = 0;
> let clamp = 0;
> let omod = 0;
> -- 1.8.1.5
>
> 0004-R600-SI-Separate-encoding-and-operand-definitions-in.patch
>
>
> From e8177962f5ae70505a891e9d97ddab4bf02f2665 Mon Sep 17 00:00:00 2001
> From: Tom Stellard<thomas.stellard at amd.com>
> Date: Fri, 27 Jun 2014 16:53:34 -0400
> Subject: [PATCH 4/7] R600/SI: Separate encoding and operand definitions into
> their own classes
>
> ---
> lib/Target/R600/SIInstrFormats.td | 312 +++++++++++++++++++++-----------------
> 1 file changed, 177 insertions(+), 135 deletions(-)
>
> diff --git a/lib/Target/R600/SIInstrFormats.td b/lib/Target/R600/SIInstrFormats.td
> index 6c97275..ca98075 100644
> --- a/lib/Target/R600/SIInstrFormats.td
> +++ b/lib/Target/R600/SIInstrFormats.td
> @@ -37,22 +37,20 @@ class InstSI <dag outs, dag ins, string asm, list<dag> pattern> :
> let TSFlags{9} = SALU;
> }
>
> -class Enc32 <dag outs, dag ins, string asm, list<dag> pattern> :
> - InstSI <outs, ins, asm, pattern> {
> +class Enc32 {
>
> field bits<32> Inst;
> - let Size = 4;
> + int Size = 4;
> }
>
> -class Enc64 <dag outs, dag ins, string asm, list<dag> pattern> :
> - InstSI <outs, ins, asm, pattern> {
> +class Enc64 {
>
> field bits<64> Inst;
> - let Size = 8;
> + int Size = 8;
> }
>
> class VOP3Common <dag outs, dag ins, string asm, list<dag> pattern> :
> - Enc64 <outs, ins, asm, pattern> {
> + InstSI <outs, ins, asm, pattern> {
>
> let mayLoad = 0;
> let mayStore = 0;
> @@ -65,8 +63,7 @@ class VOP3Common <dag outs, dag ins, string asm, list<dag> pattern> :
> // Scalar operations
> //===----------------------------------------------------------------------===//
>
> -class SOP1 <bits<8> op, dag outs, dag ins, string asm, list<dag> pattern> :
> - Enc32<outs, ins, asm, pattern> {
> +class SOP1e <bits<8> op> : Enc32 {
>
> bits<7> SDST;
> bits<8> SSRC0;
> @@ -75,16 +72,10 @@ class SOP1 <bits<8> op, dag outs, dag ins, string asm, list<dag> pattern> :
> let Inst{15-8} = op;
> let Inst{22-16} = SDST;
> let Inst{31-23} = 0x17d; //encoding;
> -
> - let mayLoad = 0;
> - let mayStore = 0;
> - let hasSideEffects = 0;
> - let SALU = 1;
> }
>
> -class SOP2 <bits<7> op, dag outs, dag ins, string asm, list<dag> pattern> :
> - Enc32 <outs, ins, asm, pattern> {
> -
> +class SOP2e <bits<7> op> : Enc32 {
> +
> bits<7> SDST;
> bits<8> SSRC0;
> bits<8> SSRC1;
> @@ -94,15 +85,9 @@ class SOP2 <bits<7> op, dag outs, dag ins, string asm, list<dag> pattern> :
> let Inst{22-16} = SDST;
> let Inst{29-23} = op;
> let Inst{31-30} = 0x2; // encoding
> -
> - let mayLoad = 0;
> - let mayStore = 0;
> - let hasSideEffects = 0;
> - let SALU = 1;
> }
>
> -class SOPC <bits<7> op, dag outs, dag ins, string asm, list<dag> pattern> :
> - Enc32<outs, ins, asm, pattern> {
> +class SOPCe <bits<7> op> : Enc32 {
>
> bits<8> SSRC0;
> bits<8> SSRC1;
> @@ -111,62 +96,90 @@ class SOPC <bits<7> op, dag outs, dag ins, string asm, list<dag> pattern> :
> let Inst{15-8} = SSRC1;
> let Inst{22-16} = op;
> let Inst{31-23} = 0x17e;
> -
> - let DisableEncoding = "$dst";
> - let mayLoad = 0;
> - let mayStore = 0;
> - let hasSideEffects = 0;
> - let SALU = 1;
> }
>
> -class SOPK <bits<5> op, dag outs, dag ins, string asm, list<dag> pattern> :
> - Enc32 <outs, ins , asm, pattern> {
> +class SOPKe <bits<5> op> : Enc32 {
>
> bits <7> SDST;
> bits <16> SIMM16;
> -
> +
> let Inst{15-0} = SIMM16;
> let Inst{22-16} = SDST;
> let Inst{27-23} = op;
> let Inst{31-28} = 0xb; //encoding
> -
> - let mayLoad = 0;
> - let mayStore = 0;
> - let hasSideEffects = 0;
> - let SALU = 1;
> }
>
> -class SOPP <bits<7> op, dag ins, string asm, list<dag> pattern> : Enc32 <
> - (outs),
> - ins,
> - asm,
> - pattern > {
> +class SOPPe <bits<7> op> : Enc32 {
>
> - bits <16> SIMM16;
> + bits <16> SIMM16;
Random indentation
>
> let Inst{15-0} = SIMM16;
> let Inst{22-16} = op;
> let Inst{31-23} = 0x17f; // encoding
> -
> - let mayLoad = 0;
> - let mayStore = 0;
> - let hasSideEffects = 0;
> - let SALU = 1;
> }
>
> -class SMRD <bits<5> op, bits<1> imm, dag outs, dag ins, string asm,
> - list<dag> pattern> : Enc32<outs, ins, asm, pattern> {
> +class SMRDe <bits<5> op, bits<1> imm> : Enc32 {
>
> bits<7> SDST;
> bits<7> SBASE;
> bits<8> OFFSET;
> -
> +
> let Inst{7-0} = OFFSET;
> let Inst{8} = imm;
> let Inst{14-9} = SBASE{6-1};
> let Inst{21-15} = SDST;
> let Inst{26-22} = op;
> let Inst{31-27} = 0x18; //encoding
> +}
> +
> +class SOP1 <bits<8> op, dag outs, dag ins, string asm, list<dag> pattern> :
> + InstSI<outs, ins, asm, pattern>, SOP1e <op> {
> +
> + let mayLoad = 0;
> + let mayStore = 0;
> + let hasSideEffects = 0;
> + let SALU = 1;
> +}
> +
> +class SOP2 <bits<7> op, dag outs, dag ins, string asm, list<dag> pattern> :
> + InstSI <outs, ins, asm, pattern>, SOP2e<op> {
> +
> + let mayLoad = 0;
> + let mayStore = 0;
> + let hasSideEffects = 0;
> + let SALU = 1;
> +}
> +
> +class SOPC <bits<7> op, dag outs, dag ins, string asm, list<dag> pattern> :
> + InstSI<outs, ins, asm, pattern>, SOPCe <op> {
> +
> + let DisableEncoding = "$dst";
> + let mayLoad = 0;
> + let mayStore = 0;
> + let hasSideEffects = 0;
> + let SALU = 1;
> +}
> +
> +class SOPK <bits<5> op, dag outs, dag ins, string asm, list<dag> pattern> :
> + InstSI <outs, ins , asm, pattern>, SOPKe<op> {
> +
> + let mayLoad = 0;
> + let mayStore = 0;
> + let hasSideEffects = 0;
> + let SALU = 1;
> +}
> +
> +class SOPP <bits<7> op, dag ins, string asm, list<dag> pattern> :
> + InstSI <(outs), ins, asm, pattern >, SOPPe <op> {
> +
> + let mayLoad = 0;
> + let mayStore = 0;
> + let hasSideEffects = 0;
> + let SALU = 1;
> +}
> +
> +class SMRD <bits<5> op, bits<1> imm, dag outs, dag ins, string asm,
> + list<dag> pattern> : InstSI<outs, ins, asm, pattern>, SMRDe<op, imm> {
>
> let LGKM_CNT = 1;
> let SMRD = 1;
> @@ -175,51 +188,34 @@ class SMRD <bits<5> op, bits<1> imm, dag outs, dag ins, string asm,
> //===----------------------------------------------------------------------===//
> // Vector ALU operations
> //===----------------------------------------------------------------------===//
> -
> -let Uses = [EXEC] in {
>
> -class VOP1 <bits<8> op, dag outs, dag ins, string asm, list<dag> pattern> :
> - Enc32 <outs, ins, asm, pattern> {
> +class VOP1e <bits<8> op> : Enc32 {
>
> bits<8> VDST;
> bits<9> SRC0;
> -
> +
> let Inst{8-0} = SRC0;
> let Inst{16-9} = op;
> let Inst{24-17} = VDST;
> let Inst{31-25} = 0x3f; //encoding
> -
> - let mayLoad = 0;
> - let mayStore = 0;
> - let hasSideEffects = 0;
> - let UseNamedOperandTable = 1;
> - let VOP1 = 1;
> }
>
> -class VOP2 <bits<6> op, dag outs, dag ins, string asm, list<dag> pattern> :
> - Enc32 <outs, ins, asm, pattern> {
> +class VOP2e <bits<6> op> : Enc32 {
>
> bits<8> VDST;
> bits<9> SRC0;
> bits<8> VSRC1;
> -
> +
> let Inst{8-0} = SRC0;
> let Inst{16-9} = VSRC1;
> let Inst{24-17} = VDST;
> let Inst{30-25} = op;
> let Inst{31} = 0x0; //encoding
> -
> - let mayLoad = 0;
> - let mayStore = 0;
> - let hasSideEffects = 0;
> - let UseNamedOperandTable = 1;
> - let VOP2 = 1;
> }
>
> -class VOP3 <bits<9> op, dag outs, dag ins, string asm, list<dag> pattern> :
> - VOP3Common <outs, ins, asm, pattern> {
> +class VOP3e <bits<9> op> : Enc64 {
>
> - bits<8> dst;
> + bits<8> dst;
Random indentation
> bits<2> src0_modifiers;
> bits<9> src0;
> bits<2> src1_modifiers;
> @@ -243,13 +239,11 @@ class VOP3 <bits<9> op, dag outs, dag ins, string asm, list<dag> pattern> :
> let Inst{61} = src0_modifiers{0};
> let Inst{62} = src1_modifiers{0};
> let Inst{63} = src2_modifiers{0};
> -
> }
>
> -class VOP3b <bits<9> op, dag outs, dag ins, string asm, list<dag> pattern> :
> - VOP3Common <outs, ins, asm, pattern> {
> +class VOP3be <bits<9> op> : Enc64 {
>
> - bits<8> dst;
> + bits<8> dst;
Random indentation
> bits<2> src0_modifiers;
> bits<9> src0;
> bits<2> src1_modifiers;
> @@ -270,11 +264,9 @@ class VOP3b <bits<9> op, dag outs, dag ins, string asm, list<dag> pattern> :
> let Inst{61} = src0_modifiers{0};
> let Inst{62} = src1_modifiers{0};
> let Inst{63} = src2_modifiers{0};
> -
> }
>
> -class VOPC <bits<8> op, dag ins, string asm, list<dag> pattern> :
> - Enc32 <(outs VCCReg:$dst), ins, asm, pattern> {
> +class VOPCe <bits<8> op> : Enc32 {
>
> bits<9> SRC0;
> bits<8> VSRC1;
> @@ -283,17 +275,9 @@ class VOPC <bits<8> op, dag ins, string asm, list<dag> pattern> :
> let Inst{16-9} = VSRC1;
> let Inst{24-17} = op;
> let Inst{31-25} = 0x3e;
> -
> - let DisableEncoding = "$dst";
> - let mayLoad = 0;
> - let mayStore = 0;
> - let hasSideEffects = 0;
> - let UseNamedOperandTable = 1;
> - let VOPC = 1;
> }
>
> -class VINTRP <bits <2> op, dag outs, dag ins, string asm, list<dag> pattern> :
> - Enc32 <outs, ins, asm, pattern> {
> +class VINTRPe <bits<2> op> : Enc32 {
>
> bits<8> VDST;
> bits<8> VSRC;
> @@ -306,22 +290,9 @@ class VINTRP <bits <2> op, dag outs, dag ins, string asm, list<dag> pattern> :
> let Inst{17-16} = op;
> let Inst{25-18} = VDST;
> let Inst{31-26} = 0x32; // encoding
> -
> - let neverHasSideEffects = 1;
> - let mayLoad = 1;
> - let mayStore = 0;
> }
neverHasSideEffects = 1 seems to have gotten lost? I think it should still be the default for all instructions since it's true for the vast majority of them
>
> -} // End Uses = [EXEC]
> -
> -//===----------------------------------------------------------------------===//
> -// Vector I/O operations
> -//===----------------------------------------------------------------------===//
> -
> -let Uses = [EXEC] in {
> -
> -class DS <bits<8> op, dag outs, dag ins, string asm, list<dag> pattern> :
> - Enc64 <outs, ins, asm, pattern> {
> +class DSe <bits<8> op> : Enc64 {
>
> bits<8> vdst;
> bits<1> gds;
> @@ -340,12 +311,9 @@ class DS <bits<8> op, dag outs, dag ins, string asm, list<dag> pattern> :
> let Inst{47-40} = data0;
> let Inst{55-48} = data1;
> let Inst{63-56} = vdst;
> -
> - let LGKM_CNT = 1;
> }
>
> -class MUBUF <bits<7> op, dag outs, dag ins, string asm, list<dag> pattern> :
> - Enc64<outs, ins, asm, pattern> {
> +class MUBUFe <bits<7> op> : Enc64 {
>
> bits<12> offset;
> bits<1> offen;
> @@ -374,16 +342,9 @@ class MUBUF <bits<7> op, dag outs, dag ins, string asm, list<dag> pattern> :
> let Inst{54} = slc;
> let Inst{55} = tfe;
> let Inst{63-56} = soffset;
> -
> - let VM_CNT = 1;
> - let EXP_CNT = 1;
> -
> - let neverHasSideEffects = 1;
> - let UseNamedOperandTable = 1;
> }
>
> -class MTBUF <bits<3> op, dag outs, dag ins, string asm, list<dag> pattern> :
> - Enc64<outs, ins, asm, pattern> {
> +class MTBUFe <bits<3> op> : Enc64 {
>
> bits<8> VDATA;
> bits<12> OFFSET;
> @@ -414,15 +375,9 @@ class MTBUF <bits<3> op, dag outs, dag ins, string asm, list<dag> pattern> :
> let Inst{54} = SLC;
> let Inst{55} = TFE;
> let Inst{63-56} = SOFFSET;
> -
> - let VM_CNT = 1;
> - let EXP_CNT = 1;
> -
> - let neverHasSideEffects = 1;
> }
>
> -class MIMG <bits<7> op, dag outs, dag ins, string asm, list<dag> pattern> :
> - Enc64 <outs, ins, asm, pattern> {
> +class MIMGe <bits<7> op> : Enc64 {
>
> bits<8> VDATA;
> bits<4> DMASK;
> @@ -435,7 +390,7 @@ class MIMG <bits<7> op, dag outs, dag ins, string asm, list<dag> pattern> :
> bits<1> SLC;
> bits<8> VADDR;
> bits<7> SRSRC;
> - bits<7> SSAMP;
> + bits<7> SSAMP;
>
> let Inst{11-8} = DMASK;
> let Inst{12} = UNORM;
> @@ -451,18 +406,9 @@ class MIMG <bits<7> op, dag outs, dag ins, string asm, list<dag> pattern> :
> let Inst{47-40} = VDATA;
> let Inst{52-48} = SRSRC{6-2};
> let Inst{57-53} = SSAMP{6-2};
> -
> - let VM_CNT = 1;
> - let EXP_CNT = 1;
> - let MIMG = 1;
> }
>
> -def EXP : Enc64<
> - (outs),
> - (ins i32imm:$en, i32imm:$tgt, i32imm:$compr, i32imm:$done, i32imm:$vm,
> - VReg_32:$src0, VReg_32:$src1, VReg_32:$src2, VReg_32:$src3),
> - "EXP $en, $tgt, $compr, $done, $vm, $src0, $src1, $src2, $src3",
> - [] > {
> +class EXPe : Enc64 {
>
> bits<4> EN;
> bits<6> TGT;
> @@ -484,6 +430,102 @@ def EXP : Enc64<
> let Inst{47-40} = VSRC1;
> let Inst{55-48} = VSRC2;
> let Inst{63-56} = VSRC3;
> +}
> +
> +let Uses = [EXEC] in {
> +
> +class VOP1 <bits<8> op, dag outs, dag ins, string asm, list<dag> pattern> :
> + InstSI <outs, ins, asm, pattern>, VOP1e<op> {
> +
> + let mayLoad = 0;
> + let mayStore = 0;
> + let hasSideEffects = 0;
> + let UseNamedOperandTable = 1;
> + let VOP1 = 1;
> +}
> +
> +class VOP2 <bits<6> op, dag outs, dag ins, string asm, list<dag> pattern> :
> + InstSI <outs, ins, asm, pattern>, VOP2e<op> {
> +
> + let mayLoad = 0;
> + let mayStore = 0;
> + let hasSideEffects = 0;
> + let UseNamedOperandTable = 1;
> + let VOP2 = 1;
> +}
> +
> +class VOP3 <bits<9> op, dag outs, dag ins, string asm, list<dag> pattern> :
> + VOP3Common <outs, ins, asm, pattern>, VOP3e<op>;
> +
> +class VOP3b <bits<9> op, dag outs, dag ins, string asm, list<dag> pattern> :
> + VOP3Common <outs, ins, asm, pattern>, VOP3be<op>;
> +
> +class VOPC <bits<8> op, dag ins, string asm, list<dag> pattern> :
> + InstSI <(outs VCCReg:$dst), ins, asm, pattern>, VOPCe <op> {
> +
> + let DisableEncoding = "$dst";
> + let mayLoad = 0;
> + let mayStore = 0;
> + let hasSideEffects = 0;
> + let UseNamedOperandTable = 1;
> + let VOPC = 1;
> +}
> +
> +class VINTRP <bits <2> op, dag outs, dag ins, string asm, list<dag> pattern> :
> + InstSI <outs, ins, asm, pattern>, VINTRPe<op> {
> +
> + let neverHasSideEffects = 1;
> + let mayLoad = 1;
> + let mayStore = 0;
> +}
> +
> +} // End Uses = [EXEC]
> +
> +//===----------------------------------------------------------------------===//
> +// Vector I/O operations
> +//===----------------------------------------------------------------------===//
> +
> +let Uses = [EXEC] in {
> +
> +class DS <bits<8> op, dag outs, dag ins, string asm, list<dag> pattern> :
> + InstSI <outs, ins, asm, pattern> , DSe<op> {
> +
> + let LGKM_CNT = 1;
> +}
> +
> +class MUBUF <bits<7> op, dag outs, dag ins, string asm, list<dag> pattern> :
> + InstSI<outs, ins, asm, pattern>, MUBUFe <op> {
> +
> + let VM_CNT = 1;
> + let EXP_CNT = 1;
> +
> + let neverHasSideEffects = 1;
> + let UseNamedOperandTable = 1;
> +}
> +
> +class MTBUF <bits<3> op, dag outs, dag ins, string asm, list<dag> pattern> :
> + InstSI<outs, ins, asm, pattern>, MTBUFe <op> {
> +
> + let VM_CNT = 1;
> + let EXP_CNT = 1;
> +
> + let neverHasSideEffects = 1;
> +}
> +
> +class MIMG <bits<7> op, dag outs, dag ins, string asm, list<dag> pattern> :
> + InstSI <outs, ins, asm, pattern>, MIMGe <op> {
> +
> + let VM_CNT = 1;
> + let EXP_CNT = 1;
> + let MIMG = 1;
> +}
> +
> +def EXP : InstSI<
> + (outs),
> + (ins i32imm:$en, i32imm:$tgt, i32imm:$compr, i32imm:$done, i32imm:$vm,
> + VReg_32:$src0, VReg_32:$src1, VReg_32:$src2, VReg_32:$src3),
> + "EXP $en, $tgt, $compr, $done, $vm, $src0, $src1, $src2, $src3",
> + [] >, EXPe {
>
> let EXP_CNT = 1;
> }
> -- 1.8.1.5
>
> 0005-R600-SI-Refactor-VOP3-instruction-definitions.patch
>
>
> From c058f7e0c619941954848287f9f31560410f8327 Mon Sep 17 00:00:00 2001
> From: Tom Stellard<thomas.stellard at amd.com>
> Date: Fri, 13 Jun 2014 17:08:09 -0700
> Subject: [PATCH 5/7] R600/SI: Refactor VOP3 instruction definitions
>
> ---
> lib/Target/R600/SIInstrFormats.td | 2 ++
> lib/Target/R600/SIInstrInfo.td | 71 +++++++++++++++++++++++++++------------
> 2 files changed, 52 insertions(+), 21 deletions(-)
>
> diff --git a/lib/Target/R600/SIInstrFormats.td b/lib/Target/R600/SIInstrFormats.td
> index ca98075..2552001 100644
> --- a/lib/Target/R600/SIInstrFormats.td
> +++ b/lib/Target/R600/SIInstrFormats.td
> @@ -57,6 +57,8 @@ class VOP3Common <dag outs, dag ins, string asm, list<dag> pattern> :
> let hasSideEffects = 0;
> let UseNamedOperandTable = 1;
> let VOP3 = 1;
> +
> + int Size = 8;
> }
>
> //===----------------------------------------------------------------------===//
> diff --git a/lib/Target/R600/SIInstrInfo.td b/lib/Target/R600/SIInstrInfo.td
> index e9eff77..0c92038 100644
> --- a/lib/Target/R600/SIInstrInfo.td
> +++ b/lib/Target/R600/SIInstrInfo.td
> @@ -269,16 +269,54 @@ class SIMCInstr <string pseudo, int subtarget> {
> int Subtarget = subtarget;
> }
>
> +class VOP3_Pseudo <dag outs, dag ins, list<dag> pattern, string opName> :
> + VOP3Common <outs, ins, "", pattern>,
> + VOP <opName>,
> + SIMCInstr<opName, SISubtarget.NONE> {
> + let isPseudo = 1;
> +}
> +
> +class VOP3_Real_si <bits<9> op, dag outs, dag ins, string asm, string opName> :
> + VOP3 <op, outs, ins, asm, []>,
> + SIMCInstr<opName, SISubtarget.SI>;
> +
> multiclass VOP3_m <bits<9> op, dag outs, dag ins, string asm, list<dag> pattern,
> string opName> {
>
> - def "" : VOP3Common <outs, ins, "", pattern>, VOP <opName>,
> - SIMCInstr<OpName, SISubtarget.NONE> {
> - let isPseudo = 1;
> - }
> + def "" : VOP3_Pseudo <outs, ins, pattern, opName>;
> +
> + def _si : VOP3_Real_si <op, outs, ins, asm, opName>;
> +
> +}
> +
> +multiclass VOP3_1_m <bits<8> op, dag outs, dag ins, string asm,
> + list<dag> pattern, string opName> {
> +
> + def "" : VOP3_Pseudo <outs, ins, pattern, opName>;
> +
> + let src1 = 0, src1_modifiers = 0, src2 = 0, src2_modifiers = 0 in {
> +
> + def _si : VOP3_Real_si <
> + {1, 1, op{6}, op{5}, op{4}, op{3}, op{2}, op{1}, op{0}},
> + outs, ins, asm, opName
> + >;
> +
> + } // src1 = 0, src1_modifiers = 0, src2 = 0, src2_modifiers = 0
> +}
> +
> +multiclass VOP3_2_m <bits<6> op, dag outs, dag ins, string asm,
> + list<dag> pattern, string opName, string revOp> {
>
> - def _si : VOP3 <op, outs, ins, asm, []>, SIMCInstr<opName, SISubtarget.SI>;
> + def "" : VOP3_Pseudo <outs, ins, pattern, opName>;
>
> + let src2 = 0, src2_modifiers = 0 in {
> +
> + def _si : VOP3_Real_si <
> + {1, 0, 0, op{5}, op{4}, op{3}, op{2}, op{1}, op{0}},
> + outs, ins, asm, opName>,
> + VOP2_REV<revOp#"_e64", !eq(revOp, opName)>;
> +
> + } // src2 = 0, src2_modifiers = 0
> }
>
> // This must always be right before the operand being input modified.
> @@ -294,17 +332,11 @@ multiclass VOP1_Helper <bits<8> op, RegisterClass drc, RegisterClass src,
> opName#"_e32 $dst, $src0", pattern
> >, VOP <opName>;
>
> - def _e64 : VOP3 <
> - {1, 1, op{6}, op{5}, op{4}, op{3}, op{2}, op{1}, op{0}},
> + defm _e64 : VOP3_1_m <
> + op,
> (outs drc:$dst),
> (ins InputMods:$src0_modifiers, src:$src0, i32imm:$clamp, i32imm:$omod),
> - opName#"_e64 $dst, $src0_modifiers, $clamp, $omod", []
> - >, VOP <opName> {
> - let src1 = 0;
> - let src1_modifiers = 0;
> - let src2 = 0;
> - let src2_modifiers = 0;
> - }
> + opName#"_e64 $dst, $src0_modifiers, $clamp, $omod", [], opName>;
> }
>
> multiclass VOP1_32 <bits<8> op, string opName, list<dag> pattern>
> @@ -326,17 +358,14 @@ multiclass VOP2_Helper <bits<6> op, RegisterClass vrc, RegisterClass arc,
> opName#"_e32 $dst, $src0, $src1", pattern
> >, VOP <opName>, VOP2_REV<revOp#"_e32", !eq(revOp, opName)>;
>
> - def _e64 : VOP3 <
> - {1, 0, 0, op{5}, op{4}, op{3}, op{2}, op{1}, op{0}},
> + defm _e64 : VOP3_2_m <
> + op,
> (outs vrc:$dst),
> (ins InputMods:$src0_modifiers, arc:$src0,
> InputMods:$src1_modifiers, arc:$src1,
> i32imm:$clamp, i32imm:$omod),
> - opName#"_e64 $dst, $src0_modifiers, $src1_modifiers, $clamp, $omod", []
> - >, VOP <opName>, VOP2_REV<revOp#"_e64", !eq(revOp, opName)> {
> - let src2 = 0;
> - let src2_modifiers = 0;
> - }
> + opName#"_e64 $dst, $src0_modifiers, $src1_modifiers, $clamp, $omod", [],
> + opName, revOp>;
> }
>
> multiclass VOP2_32 <bits<6> op, string opName, list<dag> pattern,
> -- 1.8.1.5
>
> 0006-TableGen-Allow-AddedComplexity-values-to-be-negative.patch
>
>
> From 1e2781731af1ffee20c6b00712c7192a64bfc616 Mon Sep 17 00:00:00 2001
> From: Tom Stellard<thomas.stellard at amd.com>
> Date: Mon, 30 Jun 2014 13:03:17 -0400
> Subject: [PATCH 6/7] TableGen: Allow AddedComplexity values to be negative
>
> This is useful for cases when stand-alone patterns are preferred to the
> patterns included in the instruction definitions. Instead of requiring
> that stand-alone patterns set a larger AddedComplexity value, which
> can be confusing to new developers, the allows us to reduce the
> complexity of the included patterns to achieve the same result.
> ---
> test/TableGen/NegativeAddedComplexity.ll | 41 ++++++++++++++++++++++++++++++++
> utils/TableGen/CodeGenDAGPatterns.cpp | 2 +-
> utils/TableGen/CodeGenDAGPatterns.h | 8 +++----
> utils/TableGen/DAGISelEmitter.cpp | 4 ++--
> 4 files changed, 48 insertions(+), 7 deletions(-)
> create mode 100644 test/TableGen/NegativeAddedComplexity.ll
>
> diff --git a/test/TableGen/NegativeAddedComplexity.ll b/test/TableGen/NegativeAddedComplexity.ll
> new file mode 100644
> index 0000000..54c52ab
> --- /dev/null
> +++ b/test/TableGen/NegativeAddedComplexity.ll
> @@ -0,0 +1,41 @@
> +// RUN: llvm-tblgen -I../../include -gen-dag-isel %s | FileCheck %s
> +// XFAIL: vg_leak
> +
> +include "llvm/Target/Target.td"
> +
> +// Make sure the higher complexity pattern comes first
> +// CHECK: TARGET_VAL(::ADD0)
> +// CHECK: Complexity = {{[^-]}}
> +// Make sure the ADD1 pattern has a negative complexity
> +// CHECK: TARGET_VAL(::ADD1)
> +// CHECK: Complexity = -{{[0-9]+}}
> +
> +def TestRC : RegisterClass<"TEST", [i32], 32, (add)>;
> +
> +def TestInstrInfo : InstrInfo;
> +
> +def Test : Target {
> + let InstructionSet = TestInstrInfo;
> +}
> +
> +def ADD0 : Instruction {
> + let OutOperandList = (outs TestRC:$dst);
> + let InOperandList = (ins TestRC:$src0, TestRC:$src1);
> +}
> +
> +def ADD1 : Instruction {
> + let OutOperandList = (outs TestRC:$dst);
> + let InOperandList = (ins TestRC:$src0, TestRC:$src1);
> +}
> +
> +def : Pat <
> + (add i32:$src0, i32:$src1),
> + (ADD1 $src0, $src1)
> +> {
> + let AddedComplexity = -1000;
> +}
> +
> +def : Pat <
> + (add i32:$src0, i32:$src1),
> + (ADD0 $src0, $src1)
> +>;
> diff --git a/utils/TableGen/CodeGenDAGPatterns.cpp b/utils/TableGen/CodeGenDAGPatterns.cpp
> index 00bc9a5..fa94fd3 100644
> --- a/utils/TableGen/CodeGenDAGPatterns.cpp
> +++ b/utils/TableGen/CodeGenDAGPatterns.cpp
> @@ -771,7 +771,7 @@ static unsigned getPatternSize(const TreePatternNode *P,
>
> /// Compute the complexity metric for the input pattern. This roughly
> /// corresponds to the number of nodes that are covered.
> -unsigned PatternToMatch::
> +int PatternToMatch::
> getPatternComplexity(const CodeGenDAGPatterns &CGP) const {
> return getPatternSize(getSrcPattern(), CGP) + getAddedComplexity();
> }
> diff --git a/utils/TableGen/CodeGenDAGPatterns.h b/utils/TableGen/CodeGenDAGPatterns.h
> index fb30cdd..ef6c787 100644
> --- a/utils/TableGen/CodeGenDAGPatterns.h
> +++ b/utils/TableGen/CodeGenDAGPatterns.h
> @@ -667,7 +667,7 @@ public:
> PatternToMatch(Record *srcrecord, ListInit *preds,
> TreePatternNode *src, TreePatternNode *dst,
> const std::vector<Record*> &dstregs,
> - unsigned complexity, unsigned uid)
> + int complexity, unsigned uid)
> : SrcRecord(srcrecord), Predicates(preds), SrcPattern(src), DstPattern(dst),
> Dstregs(dstregs), AddedComplexity(complexity), ID(uid) {}
>
> @@ -676,7 +676,7 @@ public:
> TreePatternNode *SrcPattern; // Source pattern to match.
> TreePatternNode *DstPattern; // Resulting pattern.
> std::vector<Record*> Dstregs; // Physical register defs being matched.
> - unsigned AddedComplexity; // Add to matching pattern complexity.
> + int AddedComplexity; // Add to matching pattern complexity.
> unsigned ID; // Unique ID for the record.
>
> Record *getSrcRecord() const { return SrcRecord; }
> @@ -684,13 +684,13 @@ public:
> TreePatternNode *getSrcPattern() const { return SrcPattern; }
> TreePatternNode *getDstPattern() const { return DstPattern; }
> const std::vector<Record*> &getDstRegs() const { return Dstregs; }
> - unsigned getAddedComplexity() const { return AddedComplexity; }
> + int getAddedComplexity() const { return AddedComplexity; }
>
> std::string getPredicateCheck() const;
>
> /// Compute the complexity metric for the input pattern. This roughly
> /// corresponds to the number of nodes that are covered.
> - unsigned getPatternComplexity(const CodeGenDAGPatterns &CGP) const;
> + int getPatternComplexity(const CodeGenDAGPatterns &CGP) const;
> };
>
> class CodeGenDAGPatterns {
> diff --git a/utils/TableGen/DAGISelEmitter.cpp b/utils/TableGen/DAGISelEmitter.cpp
> index 82682cd..e2e6ab1 100644
> --- a/utils/TableGen/DAGISelEmitter.cpp
> +++ b/utils/TableGen/DAGISelEmitter.cpp
> @@ -94,8 +94,8 @@ struct PatternSortingPredicate {
> // Otherwise, if the patterns might both match, sort based on complexity,
> // which means that we prefer to match patterns that cover more nodes in the
> // input over nodes that cover fewer.
> - unsigned LHSSize = LHS->getPatternComplexity(CGP);
> - unsigned RHSSize = RHS->getPatternComplexity(CGP);
> + int LHSSize = LHS->getPatternComplexity(CGP);
> + int RHSSize = RHS->getPatternComplexity(CGP);
> if (LHSSize > RHSSize) return true; // LHS -> bigger -> less cost
> if (LHSSize < RHSSize) return false;
>
> -- 1.8.1.5
>
> 0007-R600-SI-Do-abs-neg-folding-with-ComplexPatterns.patch
>
>
> From 24d9dc94366ba85f9ccfd25e34d880930588cbea Mon Sep 17 00:00:00 2001
> From: Tom Stellard<thomas.stellard at amd.com>
> Date: Mon, 30 Jun 2014 13:02:59 -0400
> Subject: [PATCH 7/7] R600/SI: Do abs/neg folding with ComplexPatterns
>
> Abs/neg folding has moved out of foldOperands and into the instruction
> selection phase using complex patterns. As a consequence of this
> change, we now prefer to select the 64-bit encoding for most
> instructions and the modifier operands have been dropped from some
> integer VOP3 instructions.
> ---
> lib/Target/R600/AMDGPUISelDAGToDAG.cpp | 35 ++++
> lib/Target/R600/AMDGPUInstructions.td | 8 +
> lib/Target/R600/SIISelLowering.cpp | 20 ---
> lib/Target/R600/SIInstrFormats.td | 5 +
> lib/Target/R600/SIInstrInfo.cpp | 32 ++--
> lib/Target/R600/SIInstrInfo.h | 7 +
> lib/Target/R600/SIInstrInfo.td | 122 +++++++++++---
> lib/Target/R600/SIInstructions.td | 290 ++++++++++++++++-----------------
> test/CodeGen/R600/fabs.ll | 3 +-
> test/CodeGen/R600/fneg.ll | 3 +-
> test/CodeGen/R600/mul_uint24.ll | 4 +-
> 11 files changed, 314 insertions(+), 215 deletions(-)
>
> diff --git a/lib/Target/R600/AMDGPUISelDAGToDAG.cpp b/lib/Target/R600/AMDGPUISelDAGToDAG.cpp
> index b4d79e5..b86d7ce 100644
> --- a/lib/Target/R600/AMDGPUISelDAGToDAG.cpp
> +++ b/lib/Target/R600/AMDGPUISelDAGToDAG.cpp
> @@ -86,6 +86,9 @@ private:
> bool SelectADDRIndirect(SDValue Addr, SDValue &Base, SDValue &Offset);
> bool SelectMUBUFAddr64(SDValue Addr, SDValue &Ptr, SDValue &Offset,
> SDValue &ImmOffset) const;
> + bool SelectVOP3Mods(SDValue In, SDValue &Src, SDValue &SrcMods) const;
> + bool SelectVOP3Mods0(SDValue In, SDValue &Src, SDValue &SrcMods,
> + SDValue &Clamp, SDValue &Omod) const;
>
> SDNode *SelectADD_SUB_I64(SDNode *N);
> SDNode *SelectDIV_SCALE(SDNode *N);
> @@ -776,6 +779,38 @@ bool AMDGPUDAGToDAGISel::SelectMUBUFAddr64(SDValue Addr, SDValue &Ptr,
> return true;
> }
>
> +bool AMDGPUDAGToDAGISel::SelectVOP3Mods(SDValue In, SDValue &Src,
> + SDValue &SrcMods) const {
> +
> + unsigned Mods = 0;
> +
> + Src = In;
> +
> + if (Src.getOpcode() == ISD::FNEG) {
> + Mods |= SISrcMods::NEG;
> + Src = Src.getOperand(0);
> + }
> +
> + if (Src.getOpcode() == ISD::FABS) {
> + Mods |= SISrcMods::ABS;
> + Src = Src.getOperand(0);
> + }
> +
> + SrcMods = CurDAG->getTargetConstant(Mods, MVT::i32);
> +
> + return true;
> +}
> +
> +bool AMDGPUDAGToDAGISel::SelectVOP3Mods0(SDValue In, SDValue &Src,
> + SDValue &SrcMods, SDValue &Clamp,
> + SDValue &Omod) const {
> + // FIXME: Handle Clamp and Omod
> + Clamp = CurDAG->getTargetConstant(0, MVT::i32);
> + Omod = CurDAG->getTargetConstant(0, MVT::i32);
> +
> + return SelectVOP3Mods(In, Src, SrcMods);
> +}
> +
> void AMDGPUDAGToDAGISel::PostprocessISelDAG() {
> const AMDGPUTargetLowering& Lowering =
> *static_cast<const AMDGPUTargetLowering*>(getTargetLowering());
> diff --git a/lib/Target/R600/AMDGPUInstructions.td b/lib/Target/R600/AMDGPUInstructions.td
> index b86b781..6e58ecd 100644
> --- a/lib/Target/R600/AMDGPUInstructions.td
> +++ b/lib/Target/R600/AMDGPUInstructions.td
> @@ -293,6 +293,14 @@ def atomic_cmp_swap_64_local :
> AN->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS;
> }]>;
>
> +//===----------------------------------------------------------------------===//
> +// Misc Pattern Fragments
> +//===----------------------------------------------------------------------===//
> +
> +def fmad : PatFrag <
> + (ops node:$src0, node:$src1, node:$src2),
> + (fadd (fmul node:$src0, node:$src1), node:$src2)
> +>;
>
> class Constants {
> int TWO_PI = 0x40c90fdb;
> diff --git a/lib/Target/R600/SIISelLowering.cpp b/lib/Target/R600/SIISelLowering.cpp
> index 29e4b98..b1ce725 100644
> --- a/lib/Target/R600/SIISelLowering.cpp
> +++ b/lib/Target/R600/SIISelLowering.cpp
> @@ -1471,26 +1471,6 @@ SDNode *SITargetLowering::foldOperands(MachineSDNode *Node,
> continue;
> if (!Operand.isMachineOpcode())
> continue;
> - if (Operand.getMachineOpcode() == AMDGPU::FNEG_SI) {
> - Ops.pop_back();
> - Ops.push_back(Operand.getOperand(0));
> - InputModifiers[i] = 1;
> - Promote2e64 = true;
> - if (!DescE64)
> - continue;
> - Desc = DescE64;
> - DescE64 = nullptr;
> - }
> - else if (Operand.getMachineOpcode() == AMDGPU::FABS_SI) {
> - Ops.pop_back();
> - Ops.push_back(Operand.getOperand(0));
> - InputModifiers[i] = 2;
> - Promote2e64 = true;
> - if (!DescE64)
> - continue;
> - Desc = DescE64;
> - DescE64 = nullptr;
> - }
> }
>
> if (Promote2e64) {
> diff --git a/lib/Target/R600/SIInstrFormats.td b/lib/Target/R600/SIInstrFormats.td
> index 2552001..fd669a0 100644
> --- a/lib/Target/R600/SIInstrFormats.td
> +++ b/lib/Target/R600/SIInstrFormats.td
> @@ -56,6 +56,11 @@ class VOP3Common <dag outs, dag ins, string asm, list<dag> pattern> :
> let mayStore = 0;
> let hasSideEffects = 0;
> let UseNamedOperandTable = 1;
> + // Using complex patterns gives VOP3 patterns a very high complexity rating,
> + // but standalone patterns are almost always prefered, so we need to adjust the
> + // priority lower. The goal is to use a high number to reduce complexity to
> + // zero (or less than zero).
> + let AddedComplexity = -1000;
> let VOP3 = 1;
>
> int Size = 8;
> diff --git a/lib/Target/R600/SIInstrInfo.cpp b/lib/Target/R600/SIInstrInfo.cpp
> index dc1667a..9d2286b 100644
> --- a/lib/Target/R600/SIInstrInfo.cpp
> +++ b/lib/Target/R600/SIInstrInfo.cpp
> @@ -384,12 +384,17 @@ MachineInstr *SIInstrInfo::commuteInstruction(MachineInstr *MI,
> return nullptr;
> }
>
> - // XXX: Commute VOP3 instructions with abs and neg set.
> - if (isVOP3(MI->getOpcode()) &&
> - (MI->getOperand(AMDGPU::getNamedOperandIdx(MI->getOpcode(),
> - AMDGPU::OpName::abs)).getImm() ||
> - MI->getOperand(AMDGPU::getNamedOperandIdx(MI->getOpcode(),
> - AMDGPU::OpName::neg)).getImm()))
> + // XXX: Commute VOP3 instructions with abs and neg set .
> + const MachineOperand *Abs, *Neg, *Src0Mods, *Src1Mods, *Src2Mods;
> + Abs = getNamedOperand(*MI, AMDGPU::OpName::abs);
> + Neg = getNamedOperand(*MI, AMDGPU::OpName::neg);
> + Src0Mods = getNamedOperand(*MI, AMDGPU::OpName::src0_modifiers);
> + Src1Mods = getNamedOperand(*MI, AMDGPU::OpName::src1_modifiers);
> + Src2Mods = getNamedOperand(*MI, AMDGPU::OpName::src2_modifiers);
> +
> + if ((Abs && Abs->getImm()) || (Neg && Neg->getImm()) ||
> + (Src0Mods && Src0Mods->getImm()) || (Src1Mods && Src1Mods->getImm()) ||
> + (Src2Mods && Src2Mods->getImm()))
> return nullptr;
>
> unsigned Reg = MI->getOperand(1).getReg();
> @@ -1261,17 +1266,9 @@ void SIInstrInfo::moveToVALU(MachineInstr &TopInst) const {
> // We are converting these to a BFE, so we need to add the missing
> // operands for the size and offset.
> unsigned Size = (Opcode == AMDGPU::S_SEXT_I32_I8) ? 8 : 16;
> - Inst->addOperand(Inst->getOperand(1));
> - Inst->getOperand(1).ChangeToImmediate(0);
> - Inst->addOperand(MachineOperand::CreateImm(0));
> - Inst->addOperand(MachineOperand::CreateImm(0));
> Inst->addOperand(MachineOperand::CreateImm(0));
> Inst->addOperand(MachineOperand::CreateImm(Size));
>
> - // XXX - Other pointless operands. There are 4, but it seems you only need
> - // 3 to not hit an assertion later in MCInstLower.
> - Inst->addOperand(MachineOperand::CreateImm(0));
> - Inst->addOperand(MachineOperand::CreateImm(0));
> } else if (Opcode == AMDGPU::S_BCNT1_I32_B32) {
> // The VALU version adds the second operand to the result, so insert an
> // extra 0 operand.
> @@ -1290,16 +1287,9 @@ void SIInstrInfo::moveToVALU(MachineInstr &TopInst) const {
>
> uint32_t Offset = Imm & 0x3f; // Extract bits [5:0].
> uint32_t BitWidth = (Imm & 0x7f0000) >> 16; // Extract bits [22:16].
> -
> Inst->RemoveOperand(2); // Remove old immediate.
> - Inst->addOperand(Inst->getOperand(1));
> - Inst->getOperand(1).ChangeToImmediate(0);
> - Inst->addOperand(MachineOperand::CreateImm(0));
> Inst->addOperand(MachineOperand::CreateImm(Offset));
> - Inst->addOperand(MachineOperand::CreateImm(0));
> Inst->addOperand(MachineOperand::CreateImm(BitWidth));
> - Inst->addOperand(MachineOperand::CreateImm(0));
> - Inst->addOperand(MachineOperand::CreateImm(0));
> }
>
> // Update the destination register class.
> diff --git a/lib/Target/R600/SIInstrInfo.h b/lib/Target/R600/SIInstrInfo.h
> index 7107d87..171630f 100644
> --- a/lib/Target/R600/SIInstrInfo.h
> +++ b/lib/Target/R600/SIInstrInfo.h
> @@ -206,4 +206,11 @@ namespace SIInstrFlags {
> };
> }
>
> +namespace SISrcMods {
> + enum {
> + NEG = 1 << 0,
> + ABS = 1 << 1
> + };
> +}
> +
> #endif //SIINSTRINFO_H
> diff --git a/lib/Target/R600/SIInstrInfo.td b/lib/Target/R600/SIInstrInfo.td
> index 0c92038..411c2f8 100644
> --- a/lib/Target/R600/SIInstrInfo.td
> +++ b/lib/Target/R600/SIInstrInfo.td
> @@ -146,12 +146,17 @@ def FRAMEri32 : Operand<iPTR> {
> let MIOperandInfo = (ops i32:$ptr, i32imm:$index);
> }
>
> +include "SIInstrFormats.td"
> +
> //===----------------------------------------------------------------------===//
> // Complex patterns
> //===----------------------------------------------------------------------===//
>
> def MUBUFAddr64 : ComplexPattern<i64, 3, "SelectMUBUFAddr64">;
>
> +def VOP3Mods0 : ComplexPattern<untyped, 4, "SelectVOP3Mods0">;
> +def VOP3Mods : ComplexPattern<untyped, 2, "SelectVOP3Mods">;
> +
> //===----------------------------------------------------------------------===//
> // SI assembler operands
> //===----------------------------------------------------------------------===//
> @@ -161,8 +166,6 @@ def SIOperand {
> int VCC = 0x6A;
> }
>
> -include "SIInstrFormats.td"
> -
> //===----------------------------------------------------------------------===//
> //
> // SI Instruction multiclass helpers.
> @@ -289,6 +292,20 @@ multiclass VOP3_m <bits<9> op, dag outs, dag ins, string asm, list<dag> pattern,
>
> }
>
> +multiclass VOP3_NoMods_m <bits<9> op, dag outs, dag ins, string asm,
> + list<dag> pattern, string opName> {
> +
> + def "" : VOP3_Pseudo <outs, ins, pattern, opName>;
> +
> + let src0_modifiers = 0, src1_modifiers = 0, src2_modifiers = 0,
> + clamp = 0, omod = 0 in {
> +
> + def _si : VOP3_Real_si <op, outs, ins, asm, opName>;
> +
> + } // src0_modifiers = 0, src1_modifiers = 0, src2_modifiers = 0,
> + // clamp = 0, omod = 0
> +}
> +
> multiclass VOP3_1_m <bits<8> op, dag outs, dag ins, string asm,
> list<dag> pattern, string opName> {
>
> @@ -323,58 +340,92 @@ multiclass VOP3_2_m <bits<6> op, dag outs, dag ins, string asm,
> def InputMods : OperandWithDefaultOps <i32, (ops (i32 0))> {
> let PrintMethod = "printOperandAndMods";
> }
> +def InputModsNoDefault : Operand <i32> {
> + let PrintMethod = "printOperandAndMods";
> +}
>
> multiclass VOP1_Helper <bits<8> op, RegisterClass drc, RegisterClass src,
> - string opName, list<dag> pattern> {
> + string opName, list<dag> pattern64,
> + list<dag> pattern32 = []> {
>
> def _e32 : VOP1 <
> op, (outs drc:$dst), (ins src:$src0),
> - opName#"_e32 $dst, $src0", pattern
> + opName#"_e32 $dst, $src0", pattern32
> >, VOP <opName>;
>
> defm _e64 : VOP3_1_m <
> op,
> (outs drc:$dst),
> - (ins InputMods:$src0_modifiers, src:$src0, i32imm:$clamp, i32imm:$omod),
> - opName#"_e64 $dst, $src0_modifiers, $clamp, $omod", [], opName>;
> + (ins InputModsNoDefault:$src0_modifiers, src:$src0, i32imm:$clamp, i32imm:$omod),
> + opName#"_e64 $dst, $src0_modifiers, $clamp, $omod", pattern64, opName>;
> }
>
> +
> +multiclass VOP1Inst <bits<8> op, string opName, ValueType dstVT,
> + ValueType srcVT = dstVT,
> + SDPatternOperator node = null_frag> :
> + VOP1_Helper <
> + op,
> + !if(!eq(dstVT.Size, 32), VReg_32, VReg_64),
> + !if(!eq(srcVT.Size, 32), VSrc_32, VSrc_64),
> + opName,
> + [(set dstVT:$dst,
> + (node (srcVT (VOP3Mods0 srcVT:$src0, i32:$src0_modifiers,
> + i32:$clamp, i32:$omod))))]
> +>;
> +
> multiclass VOP1_32 <bits<8> op, string opName, list<dag> pattern>
> - : VOP1_Helper <op, VReg_32, VSrc_32, opName, pattern>;
> + : VOP1_Helper <op, VReg_32, VSrc_32, opName, [], pattern>;
>
> multiclass VOP1_64 <bits<8> op, string opName, list<dag> pattern>
> - : VOP1_Helper <op, VReg_64, VSrc_64, opName, pattern>;
> + : VOP1_Helper <op, VReg_64, VSrc_64, opName, [], pattern>;
>
> multiclass VOP1_32_64 <bits<8> op, string opName, list<dag> pattern>
> - : VOP1_Helper <op, VReg_32, VSrc_64, opName, pattern>;
> + : VOP1_Helper <op, VReg_32, VSrc_64, opName, [], pattern>;
>
> multiclass VOP1_64_32 <bits<8> op, string opName, list<dag> pattern>
> - : VOP1_Helper <op, VReg_64, VSrc_32, opName, pattern>;
> + : VOP1_Helper <op, VReg_64, VSrc_32, opName, [], pattern>;
>
> multiclass VOP2_Helper <bits<6> op, RegisterClass vrc, RegisterClass arc,
> - string opName, list<dag> pattern, string revOp> {
> + string opName, list<dag> pattern32,
> + list<dag> pattern64, string revOp> {
> def _e32 : VOP2 <
> op, (outs vrc:$dst), (ins arc:$src0, vrc:$src1),
> - opName#"_e32 $dst, $src0, $src1", pattern
> + opName#"_e32 $dst, $src0, $src1", pattern32
> >, VOP <opName>, VOP2_REV<revOp#"_e32", !eq(revOp, opName)>;
>
> defm _e64 : VOP3_2_m <
> op,
> (outs vrc:$dst),
> - (ins InputMods:$src0_modifiers, arc:$src0,
> - InputMods:$src1_modifiers, arc:$src1,
> + (ins InputModsNoDefault:$src0_modifiers, arc:$src0,
> + InputModsNoDefault:$src1_modifiers, arc:$src1,
> i32imm:$clamp, i32imm:$omod),
> - opName#"_e64 $dst, $src0_modifiers, $src1_modifiers, $clamp, $omod", [],
> - opName, revOp>;
> + opName#"_e64 $dst, $src0_modifiers, $src1_modifiers, $clamp, $omod",
> + pattern64, opName, revOp>;
> }
>
> -multiclass VOP2_32 <bits<6> op, string opName, list<dag> pattern,
> +multiclass VOP2Inst <bits<6> op, string opName, ValueType dstVT,
> + ValueType srcVT = dstVT,
> + SDPatternOperator node = null_frag> :
> + VOP2_Helper <
> + op,
> + !if(!eq(dstVT.Size, 32), VReg_32, VReg_64),
> + !if(!eq(srcVT.Size, 32), VSrc_32, VSrc_64),
> + opName, [],
> + [(set dstVT:$dst,
> + (node (srcVT (VOP3Mods0 srcVT:$src0, i32:$src0_modifiers,
> + i32:$clamp, i32:$omod)),
> + (srcVT (VOP3Mods srcVT:$src1, i32:$src1_modifiers))))],
> + opName // revOp
> +>;
> +
> +multiclass VOP2_32 <bits<6> op, string opName, list<dag> pattern32,
> string revOp = opName>
> - : VOP2_Helper <op, VReg_32, VSrc_32, opName, pattern, revOp>;
> + : VOP2_Helper <op, VReg_32, VSrc_32, opName, pattern32, [], revOp>;
>
> -multiclass VOP2_64 <bits<6> op, string opName, list<dag> pattern,
> +multiclass VOP2_64 <bits<6> op, string opName, list<dag> pattern32,
> string revOp = opName>
> - : VOP2_Helper <op, VReg_64, VSrc_64, opName, pattern, revOp>;
> + : VOP2_Helper <op, VReg_64, VSrc_64, opName, pattern32, [], revOp>;
>
> multiclass VOP2b_32 <bits<6> op, string opName, list<dag> pattern,
> RegisterClass src0_rc, string revOp = opName> {
> @@ -390,7 +441,7 @@ multiclass VOP2b_32 <bits<6> op, string opName, list<dag> pattern,
> (ins InputMods: $src0_modifiers, VSrc_32:$src0,
> InputMods:$src1_modifiers, VSrc_32:$src1,
> i32imm:$clamp, i32imm:$omod),
> - opName#"_e64 $dst, $src0_modifiers, $src1_modifiers, $clamp, $omod", []
> + opName#"_e64 $dst, $src0, $src1_modifiers, $clamp, $omod", []
> >, VOP <opName>, VOP2_REV<revOp#"_e64", !eq(revOp, opName)> {
> let src2 = 0;
> let src2_modifiers = 0;
> @@ -445,12 +496,39 @@ multiclass VOPCX_64 <bits<8> op, string opName,
>
> multiclass VOP3_32 <bits<9> op, string opName, list<dag> pattern> : VOP3_m <
> op, (outs VReg_32:$dst),
> - (ins InputMods: $src0_modifiers, VSrc_32:$src0, InputMods:$src1_modifiers,
> + (ins InputMods:$src0_modifiers, VSrc_32:$src0, InputMods:$src1_modifiers,
> VSrc_32:$src1, InputMods:$src2_modifiers, VSrc_32:$src2,
> InstFlag:$clamp, InstFlag:$omod),
> opName#" $dst, $src0_modifiers, $src1, $src2, $clamp, $omod", pattern, opName
> >;
>
> +multiclass VOP3_32NoMods <bits<9> op, string opName, list<dag> pattern> :
> + VOP3_NoMods_m <
> + op, (outs VReg_32:$dst),
> + (ins VSrc_32:$src0, VSrc_32:$src1, VSrc_32:$src2),
> + opName#" $dst, $src0, $src1, $src2", pattern, opName
> +>;
> +
> +multiclass VOP3Inst <bits<9> op, string opName, ValueType dstVT,
> + ValueType srcVT = dstVT,
> + SDPatternOperator node = null_frag,
I'm not sure if I like hiding the pattern in the instruction classes vs.
specifying them on every instruction, particularly if there actually is
a use for the instruction pattern as a list.
> + RegisterClass dstRC =
> + !if(!eq(dstVT.Size, 32), VReg_32, VReg_64),
> + RegisterClass srcRC =
> + !if(!eq(srcVT.Size, 32), VSrc_32, VSrc_64)> : VOP3_m <
> + op, (outs dstRC:$dst),
> + (ins i32imm:$src0_modifiers, srcRC:$src0, i32imm:$src1_modifiers,
> + srcRC:$src1, i32imm:$src2_modifiers, srcRC:$src2,
> + i32imm:$clamp, i32imm:$omod),
> + opName#" $dst, $src0, $src1, $src2, $clamp, $omod",
> + [(set dstVT:$dst,
> + (node (srcVT (VOP3Mods0 srcVT:$src0, i32:$src0_modifiers,
> + i32:$clamp, i32:$omod)),
> + (srcVT (VOP3Mods srcVT:$src1, i32:$src1_modifiers)),
> + (srcVT (VOP3Mods srcVT:$src2, i32:$src2_modifiers))))],
> + opName
> +>;
> +
> class VOP3_64_32 <bits <9> op, string opName, list<dag> pattern> : VOP3 <
> op, (outs VReg_64:$dst),
> (ins VSrc_64:$src0, VSrc_32:$src1),
> diff --git a/lib/Target/R600/SIInstructions.td b/lib/Target/R600/SIInstructions.td
> index 0b12f60..755f8fd 100644
> --- a/lib/Target/R600/SIInstructions.td
> +++ b/lib/Target/R600/SIInstructions.td
> @@ -1030,7 +1030,7 @@ defm IMAGE_GET_LOD : MIMG_Sampler <0x00000060, "IMAGE_GET_LOD">;
> //def V_NOP : VOP1_ <0x00000000, "V_NOP", []>;
>
> let neverHasSideEffects = 1, isMoveImm = 1 in {
> -defm V_MOV_B32 : VOP1_32 <0x00000001, "V_MOV_B32", []>;
> +defm V_MOV_B32 : VOP1Inst <0x00000001, "V_MOV_B32", i32>;
> } // End neverHasSideEffects = 1, isMoveImm = 1
>
> let Uses = [EXEC] in {
> @@ -1045,109 +1045,109 @@ def V_READFIRSTLANE_B32 : VOP1 <
>
> }
>
> -defm V_CVT_I32_F64 : VOP1_32_64 <0x00000003, "V_CVT_I32_F64",
> - [(set i32:$dst, (fp_to_sint f64:$src0))]
> +defm V_CVT_I32_F64 : VOP1Inst <0x00000003, "V_CVT_I32_F64",
> + i32, f64, fp_to_sint
> >;
> -defm V_CVT_F64_I32 : VOP1_64_32 <0x00000004, "V_CVT_F64_I32",
> - [(set f64:$dst, (sint_to_fp i32:$src0))]
> +defm V_CVT_F64_I32 : VOP1Inst <0x00000004, "V_CVT_F64_I32",
> + f64, i32, sint_to_fp
> >;
> -defm V_CVT_F32_I32 : VOP1_32 <0x00000005, "V_CVT_F32_I32",
> - [(set f32:$dst, (sint_to_fp i32:$src0))]
> +defm V_CVT_F32_I32 : VOP1Inst <0x00000005, "V_CVT_F32_I32",
> + f32, i32, sint_to_fp
> >;
> -defm V_CVT_F32_U32 : VOP1_32 <0x00000006, "V_CVT_F32_U32",
> - [(set f32:$dst, (uint_to_fp i32:$src0))]
> +defm V_CVT_F32_U32 : VOP1Inst <0x00000006, "V_CVT_F32_U32",
> + f32, i32, uint_to_fp
> >;
> -defm V_CVT_U32_F32 : VOP1_32 <0x00000007, "V_CVT_U32_F32",
> - [(set i32:$dst, (fp_to_uint f32:$src0))]
> +defm V_CVT_U32_F32 : VOP1Inst <0x00000007, "V_CVT_U32_F32",
> + i32, f32, fp_to_uint
> >;
> -defm V_CVT_I32_F32 : VOP1_32 <0x00000008, "V_CVT_I32_F32",
> - [(set i32:$dst, (fp_to_sint f32:$src0))]
> +defm V_CVT_I32_F32 : VOP1Inst <0x00000008, "V_CVT_I32_F32",
> + i32, f32, fp_to_sint
> >;
> defm V_MOV_FED_B32 : VOP1_32 <0x00000009, "V_MOV_FED_B32", []>;
> ////def V_CVT_F16_F32 : VOP1_F16 <0x0000000a, "V_CVT_F16_F32", []>;
> -//defm V_CVT_F32_F16 : VOP1_32 <0x0000000b, "V_CVT_F32_F16", []>;
> -//defm V_CVT_RPI_I32_F32 : VOP1_32 <0x0000000c, "V_CVT_RPI_I32_F32", []>;
> -//defm V_CVT_FLR_I32_F32 : VOP1_32 <0x0000000d, "V_CVT_FLR_I32_F32", []>;
> -//defm V_CVT_OFF_F32_I4 : VOP1_32 <0x0000000e, "V_CVT_OFF_F32_I4", []>;
> -defm V_CVT_F32_F64 : VOP1_32_64 <0x0000000f, "V_CVT_F32_F64",
> - [(set f32:$dst, (fround f64:$src0))]
> +//defm V_CVT_F32_F16 : VOP1Inst <0x0000000b, "V_CVT_F32_F16", []>;
> +//defm V_CVT_RPI_I32_F32 : VOP1Inst <0x0000000c, "V_CVT_RPI_I32_F32", []>;
> +//defm V_CVT_FLR_I32_F32 : VOP1Inst <0x0000000d, "V_CVT_FLR_I32_F32", []>;
> +//defm V_CVT_OFF_F32_I4 : VOP1Inst <0x0000000e, "V_CVT_OFF_F32_I4", []>;
> +defm V_CVT_F32_F64 : VOP1Inst <0x0000000f, "V_CVT_F32_F64",
> + f32, f64, fround
> >;
> -defm V_CVT_F64_F32 : VOP1_64_32 <0x00000010, "V_CVT_F64_F32",
> - [(set f64:$dst, (fextend f32:$src0))]
> +defm V_CVT_F64_F32 : VOP1Inst <0x00000010, "V_CVT_F64_F32",
> + f64, f32, fextend
> >;
> -defm V_CVT_F32_UBYTE0 : VOP1_32 <0x00000011, "V_CVT_F32_UBYTE0",
> - [(set f32:$dst, (AMDGPUcvt_f32_ubyte0 i32:$src0))]
> +defm V_CVT_F32_UBYTE0 : VOP1Inst <0x00000011, "V_CVT_F32_UBYTE0",
> + f32, i32, AMDGPUcvt_f32_ubyte0
> >;
> -defm V_CVT_F32_UBYTE1 : VOP1_32 <0x00000012, "V_CVT_F32_UBYTE1",
> - [(set f32:$dst, (AMDGPUcvt_f32_ubyte1 i32:$src0))]
> +defm V_CVT_F32_UBYTE1 : VOP1Inst <0x00000012, "V_CVT_F32_UBYTE1",
> + f32, i32, AMDGPUcvt_f32_ubyte1
> >;
> -defm V_CVT_F32_UBYTE2 : VOP1_32 <0x00000013, "V_CVT_F32_UBYTE2",
> - [(set f32:$dst, (AMDGPUcvt_f32_ubyte2 i32:$src0))]
> +defm V_CVT_F32_UBYTE2 : VOP1Inst <0x00000013, "V_CVT_F32_UBYTE2",
> + f32, i32, AMDGPUcvt_f32_ubyte2
> >;
> -defm V_CVT_F32_UBYTE3 : VOP1_32 <0x00000014, "V_CVT_F32_UBYTE3",
> - [(set f32:$dst, (AMDGPUcvt_f32_ubyte3 i32:$src0))]
> +defm V_CVT_F32_UBYTE3 : VOP1Inst <0x00000014, "V_CVT_F32_UBYTE3",
> + f32, i32, AMDGPUcvt_f32_ubyte3
> >;
> -defm V_CVT_U32_F64 : VOP1_32_64 <0x00000015, "V_CVT_U32_F64",
> - [(set i32:$dst, (fp_to_uint f64:$src0))]
> +defm V_CVT_U32_F64 : VOP1Inst <0x00000015, "V_CVT_U32_F64",
> + i32, f64, fp_to_uint
> >;
> -defm V_CVT_F64_U32 : VOP1_64_32 <0x00000016, "V_CVT_F64_U32",
> - [(set f64:$dst, (uint_to_fp i32:$src0))]
> +defm V_CVT_F64_U32 : VOP1Inst <0x00000016, "V_CVT_F64_U32",
> + f64, i32, uint_to_fp
> >;
>
> -defm V_FRACT_F32 : VOP1_32 <0x00000020, "V_FRACT_F32",
> - [(set f32:$dst, (AMDGPUfract f32:$src0))]
> +defm V_FRACT_F32 : VOP1Inst <0x00000020, "V_FRACT_F32",
> + f32, f32, AMDGPUfract
> >;
> -defm V_TRUNC_F32 : VOP1_32 <0x00000021, "V_TRUNC_F32",
> - [(set f32:$dst, (ftrunc f32:$src0))]
> +defm V_TRUNC_F32 : VOP1Inst <0x00000021, "V_TRUNC_F32",
> + f32, f32, ftrunc
> >;
> -defm V_CEIL_F32 : VOP1_32 <0x00000022, "V_CEIL_F32",
> - [(set f32:$dst, (fceil f32:$src0))]
> +defm V_CEIL_F32 : VOP1Inst <0x00000022, "V_CEIL_F32",
> + f32, f32, fceil
> >;
> -defm V_RNDNE_F32 : VOP1_32 <0x00000023, "V_RNDNE_F32",
> - [(set f32:$dst, (frint f32:$src0))]
> +defm V_RNDNE_F32 : VOP1Inst <0x00000023, "V_RNDNE_F32",
> + f32, f32, frint
> >;
> -defm V_FLOOR_F32 : VOP1_32 <0x00000024, "V_FLOOR_F32",
> - [(set f32:$dst, (ffloor f32:$src0))]
> +defm V_FLOOR_F32 : VOP1Inst <0x00000024, "V_FLOOR_F32",
> + f32, f32, ffloor
> >;
> -defm V_EXP_F32 : VOP1_32 <0x00000025, "V_EXP_F32",
> - [(set f32:$dst, (fexp2 f32:$src0))]
> +defm V_EXP_F32 : VOP1Inst <0x00000025, "V_EXP_F32",
> + f32, f32, fexp2
> >;
> defm V_LOG_CLAMP_F32 : VOP1_32 <0x00000026, "V_LOG_CLAMP_F32", []>;
> -defm V_LOG_F32 : VOP1_32 <0x00000027, "V_LOG_F32",
> - [(set f32:$dst, (flog2 f32:$src0))]
> +defm V_LOG_F32 : VOP1Inst <0x00000027, "V_LOG_F32",
> + f32, f32, flog2
> >;
>
> defm V_RCP_CLAMP_F32 : VOP1_32 <0x00000028, "V_RCP_CLAMP_F32", []>;
> defm V_RCP_LEGACY_F32 : VOP1_32 <0x00000029, "V_RCP_LEGACY_F32", []>;
> -defm V_RCP_F32 : VOP1_32 <0x0000002a, "V_RCP_F32",
> - [(set f32:$dst, (AMDGPUrcp f32:$src0))]
> +defm V_RCP_F32 : VOP1Inst <0x0000002a, "V_RCP_F32",
> + f32, f32, AMDGPUrcp
> >;
> defm V_RCP_IFLAG_F32 : VOP1_32 <0x0000002b, "V_RCP_IFLAG_F32", []>;
> -defm V_RSQ_CLAMP_F32 : VOP1_32 <0x0000002c, "V_RSQ_CLAMP_F32",
> - [(set f32:$dst, (AMDGPUrsq_clamped f32:$src0))]
> +defm V_RSQ_CLAMP_F32 : VOP1Inst <0x0000002c, "V_RSQ_CLAMP_F32",
> + f32, f32, AMDGPUrsq_clamped
> >;
> -defm V_RSQ_LEGACY_F32 : VOP1_32 <
> +defm V_RSQ_LEGACY_F32 : VOP1Inst <
> 0x0000002d, "V_RSQ_LEGACY_F32",
> - [(set f32:$dst, (AMDGPUrsq_legacy f32:$src0))]
> + f32, f32, AMDGPUrsq_legacy
> >;
> -defm V_RSQ_F32 : VOP1_32 <0x0000002e, "V_RSQ_F32",
> - [(set f32:$dst, (AMDGPUrsq f32:$src0))]
> +defm V_RSQ_F32 : VOP1Inst <0x0000002e, "V_RSQ_F32",
> + f32, f32, AMDGPUrsq
> >;
> -defm V_RCP_F64 : VOP1_64 <0x0000002f, "V_RCP_F64",
> - [(set f64:$dst, (AMDGPUrcp f64:$src0))]
> +defm V_RCP_F64 : VOP1Inst <0x0000002f, "V_RCP_F64",
> + f64, f64, AMDGPUrcp
> >;
> -defm V_RCP_CLAMP_F64 : VOP1_64 <0x00000030, "V_RCP_CLAMP_F64", []>;
> -defm V_RSQ_F64 : VOP1_64 <0x00000031, "V_RSQ_F64",
> - [(set f64:$dst, (AMDGPUrsq f64:$src0))]
> +defm V_RCP_CLAMP_F64 : VOP1_32 <0x00000030, "V_RCP_CLAMP_F64", []>;
> +defm V_RSQ_F64 : VOP1Inst <0x00000031, "V_RSQ_F64",
> + f64, f64, AMDGPUrsq
> >;
> -defm V_RSQ_CLAMP_F64 : VOP1_64 <0x00000032, "V_RSQ_CLAMP_F64",
> - [(set f64:$dst, (AMDGPUrsq_clamped f64:$src0))]
> +defm V_RSQ_CLAMP_F64 : VOP1Inst <0x00000032, "V_RSQ_CLAMP_F64",
> + f64, f64, AMDGPUrsq_clamped
> >;
> -defm V_SQRT_F32 : VOP1_32 <0x00000033, "V_SQRT_F32",
> - [(set f32:$dst, (fsqrt f32:$src0))]
> +defm V_SQRT_F32 : VOP1Inst <0x00000033, "V_SQRT_F32",
> + f32, f32, fsqrt
> >;
> -defm V_SQRT_F64 : VOP1_64 <0x00000034, "V_SQRT_F64",
> - [(set f64:$dst, (fsqrt f64:$src0))]
> +defm V_SQRT_F64 : VOP1Inst <0x00000034, "V_SQRT_F64",
> + f64, f64, fsqrt
> >;
> defm V_SIN_F32 : VOP1_32 <0x00000035, "V_SIN_F32", []>;
> defm V_COS_F32 : VOP1_32 <0x00000036, "V_COS_F32", []>;
> @@ -1157,8 +1157,8 @@ defm V_FFBH_U32 : VOP1_32 <0x00000039, "V_FFBH_U32", []>;
> defm V_FFBL_B32 : VOP1_32 <0x0000003a, "V_FFBL_B32", []>;
> defm V_FFBH_I32 : VOP1_32 <0x0000003b, "V_FFBH_I32", []>;
> //defm V_FREXP_EXP_I32_F64 : VOP1_32 <0x0000003c, "V_FREXP_EXP_I32_F64", []>;
> -defm V_FREXP_MANT_F64 : VOP1_64 <0x0000003d, "V_FREXP_MANT_F64", []>;
> -defm V_FRACT_F64 : VOP1_64 <0x0000003e, "V_FRACT_F64", []>;
> +defm V_FREXP_MANT_F64 : VOP1_32 <0x0000003d, "V_FREXP_MANT_F64", []>;
> +defm V_FRACT_F64 : VOP1_32 <0x0000003e, "V_FRACT_F64", []>;
> //defm V_FREXP_EXP_I32_F32 : VOP1_32 <0x0000003f, "V_FREXP_EXP_I32_F32", []>;
> defm V_FREXP_MANT_F32 : VOP1_32 <0x00000040, "V_FREXP_MANT_F32", []>;
> //def V_CLREXCP : VOP1_ <0x00000041, "V_CLREXCP", []>;
> @@ -1241,12 +1241,12 @@ def V_WRITELANE_B32 : VOP2 <
> >;
>
> let isCommutable = 1 in {
> -defm V_ADD_F32 : VOP2_32 <0x00000003, "V_ADD_F32",
> - [(set f32:$dst, (fadd f32:$src0, f32:$src1))]
> +defm V_ADD_F32 : VOP2Inst <0x00000003, "V_ADD_F32",
> + f32, f32, fadd
> >;
>
> -defm V_SUB_F32 : VOP2_32 <0x00000004, "V_SUB_F32",
> - [(set f32:$dst, (fsub f32:$src0, f32:$src1))]
> +defm V_SUB_F32 : VOP2Inst <0x00000004, "V_SUB_F32",
> + f32, f32, fsub
> >;
> defm V_SUBREV_F32 : VOP2_32 <0x00000005, "V_SUBREV_F32", [], "V_SUB_F32">;
> } // End isCommutable = 1
> @@ -1255,78 +1255,78 @@ defm V_MAC_LEGACY_F32 : VOP2_32 <0x00000006, "V_MAC_LEGACY_F32", []>;
>
> let isCommutable = 1 in {
>
> -defm V_MUL_LEGACY_F32 : VOP2_32 <
> +defm V_MUL_LEGACY_F32 : VOP2Inst <
> 0x00000007, "V_MUL_LEGACY_F32",
> - [(set f32:$dst, (int_AMDGPU_mul f32:$src0, f32:$src1))]
> + f32, f32, int_AMDGPU_mul
> >;
>
> -defm V_MUL_F32 : VOP2_32 <0x00000008, "V_MUL_F32",
> - [(set f32:$dst, (fmul f32:$src0, f32:$src1))]
> +defm V_MUL_F32 : VOP2Inst <0x00000008, "V_MUL_F32",
> + f32, f32, fmul
> >;
>
>
> -defm V_MUL_I32_I24 : VOP2_32 <0x00000009, "V_MUL_I32_I24",
> - [(set i32:$dst, (AMDGPUmul_i24 i32:$src0, i32:$src1))]
> +defm V_MUL_I32_I24 : VOP2Inst <0x00000009, "V_MUL_I32_I24",
> + i32, i32, AMDGPUmul_i24
> >;
> //defm V_MUL_HI_I32_I24 : VOP2_32 <0x0000000a, "V_MUL_HI_I32_I24", []>;
> -defm V_MUL_U32_U24 : VOP2_32 <0x0000000b, "V_MUL_U32_U24",
> - [(set i32:$dst, (AMDGPUmul_u24 i32:$src0, i32:$src1))]
> +defm V_MUL_U32_U24 : VOP2Inst <0x0000000b, "V_MUL_U32_U24",
> + i32, i32, AMDGPUmul_u24
> >;
> //defm V_MUL_HI_U32_U24 : VOP2_32 <0x0000000c, "V_MUL_HI_U32_U24", []>;
>
>
> -defm V_MIN_LEGACY_F32 : VOP2_32 <0x0000000d, "V_MIN_LEGACY_F32",
> - [(set f32:$dst, (AMDGPUfmin f32:$src0, f32:$src1))]
> +defm V_MIN_LEGACY_F32 : VOP2Inst <0x0000000d, "V_MIN_LEGACY_F32",
> + f32, f32, AMDGPUfmin
> >;
>
> -defm V_MAX_LEGACY_F32 : VOP2_32 <0x0000000e, "V_MAX_LEGACY_F32",
> - [(set f32:$dst, (AMDGPUfmax f32:$src0, f32:$src1))]
> +defm V_MAX_LEGACY_F32 : VOP2Inst <0x0000000e, "V_MAX_LEGACY_F32",
> + f32, f32, AMDGPUfmax
> >;
>
> defm V_MIN_F32 : VOP2_32 <0x0000000f, "V_MIN_F32", []>;
> defm V_MAX_F32 : VOP2_32 <0x00000010, "V_MAX_F32", []>;
> -defm V_MIN_I32 : VOP2_32 <0x00000011, "V_MIN_I32",
> - [(set i32:$dst, (AMDGPUsmin i32:$src0, i32:$src1))]>;
> -defm V_MAX_I32 : VOP2_32 <0x00000012, "V_MAX_I32",
> - [(set i32:$dst, (AMDGPUsmax i32:$src0, i32:$src1))]>;
> -defm V_MIN_U32 : VOP2_32 <0x00000013, "V_MIN_U32",
> - [(set i32:$dst, (AMDGPUumin i32:$src0, i32:$src1))]>;
> -defm V_MAX_U32 : VOP2_32 <0x00000014, "V_MAX_U32",
> - [(set i32:$dst, (AMDGPUumax i32:$src0, i32:$src1))]>;
> -
> -defm V_LSHR_B32 : VOP2_32 <0x00000015, "V_LSHR_B32",
> - [(set i32:$dst, (srl i32:$src0, i32:$src1))]
> +defm V_MIN_I32 : VOP2Inst <0x00000011, "V_MIN_I32",
> + i32, i32, AMDGPUsmin>;
> +defm V_MAX_I32 : VOP2Inst <0x00000012, "V_MAX_I32",
> + i32, i32, AMDGPUsmax>;
> +defm V_MIN_U32 : VOP2Inst <0x00000013, "V_MIN_U32",
> + i32, i32, AMDGPUumin>;
> +defm V_MAX_U32 : VOP2Inst <0x00000014, "V_MAX_U32",
> + i32, i32, AMDGPUumax>;
> +
> +defm V_LSHR_B32 : VOP2Inst <0x00000015, "V_LSHR_B32",
> + i32, i32, srl
> >;
>
> defm V_LSHRREV_B32 : VOP2_32 <0x00000016, "V_LSHRREV_B32", [], "V_LSHR_B32">;
>
> -defm V_ASHR_I32 : VOP2_32 <0x00000017, "V_ASHR_I32",
> - [(set i32:$dst, (sra i32:$src0, i32:$src1))]
> +defm V_ASHR_I32 : VOP2Inst <0x00000017, "V_ASHR_I32",
> + i32, i32, sra
> >;
> defm V_ASHRREV_I32 : VOP2_32 <0x00000018, "V_ASHRREV_I32", [], "V_ASHR_I32">;
>
> let hasPostISelHook = 1 in {
>
> -defm V_LSHL_B32 : VOP2_32 <0x00000019, "V_LSHL_B32",
> - [(set i32:$dst, (shl i32:$src0, i32:$src1))]
> +defm V_LSHL_B32 : VOP2Inst <0x00000019, "V_LSHL_B32",
> + i32, i32, shl
> >;
>
> }
> defm V_LSHLREV_B32 : VOP2_32 <0x0000001a, "V_LSHLREV_B32", [], "V_LSHL_B32">;
>
> -defm V_AND_B32 : VOP2_32 <0x0000001b, "V_AND_B32",
> - [(set i32:$dst, (and i32:$src0, i32:$src1))]>;
> -defm V_OR_B32 : VOP2_32 <0x0000001c, "V_OR_B32",
> - [(set i32:$dst, (or i32:$src0, i32:$src1))]
> +defm V_AND_B32 : VOP2Inst <0x0000001b, "V_AND_B32",
> + i32, i32, and>;
> +defm V_OR_B32 : VOP2Inst <0x0000001c, "V_OR_B32",
> + i32, i32, or
> >;
> -defm V_XOR_B32 : VOP2_32 <0x0000001d, "V_XOR_B32",
> - [(set i32:$dst, (xor i32:$src0, i32:$src1))]
> +defm V_XOR_B32 : VOP2Inst <0x0000001d, "V_XOR_B32",
> + i32, i32, xor
> >;
>
> } // End isCommutable = 1
>
> -defm V_BFM_B32 : VOP2_32 <0x0000001e, "V_BFM_B32",
> - [(set i32:$dst, (AMDGPUbfm i32:$src0, i32:$src1))]>;
> +defm V_BFM_B32 : VOP2Inst <0x0000001e, "V_BFM_B32",
> + i32, i32, AMDGPUbfm>;
> defm V_MAC_F32 : VOP2_32 <0x0000001f, "V_MAC_F32", []>;
> defm V_MADMK_F32 : VOP2_32 <0x00000020, "V_MADMK_F32", []>;
> defm V_MADAK_F32 : VOP2_32 <0x00000021, "V_MADAK_F32", []>;
> @@ -1358,8 +1358,8 @@ defm V_LDEXP_F32 : VOP2_32 <0x0000002b, "V_LDEXP_F32", []>;
> ////def V_CVT_PKACCUM_U8_F32 : VOP2_U8 <0x0000002c, "V_CVT_PKACCUM_U8_F32", []>;
> ////def V_CVT_PKNORM_I16_F32 : VOP2_I16 <0x0000002d, "V_CVT_PKNORM_I16_F32", []>;
> ////def V_CVT_PKNORM_U16_F32 : VOP2_U16 <0x0000002e, "V_CVT_PKNORM_U16_F32", []>;
> -defm V_CVT_PKRTZ_F16_F32 : VOP2_32 <0x0000002f, "V_CVT_PKRTZ_F16_F32",
> - [(set i32:$dst, (int_SI_packf16 f32:$src0, f32:$src1))]
> +defm V_CVT_PKRTZ_F16_F32 : VOP2Inst <0x0000002f, "V_CVT_PKRTZ_F16_F32",
> + i32, f32, int_SI_packf16
> >;
> ////def V_CVT_PK_U16_U32 : VOP2_U16 <0x00000030, "V_CVT_PK_U16_U32", []>;
> ////def V_CVT_PK_I16_I32 : VOP2_I16 <0x00000031, "V_CVT_PK_I16_I32", []>;
> @@ -1371,13 +1371,11 @@ defm V_CVT_PKRTZ_F16_F32 : VOP2_32 <0x0000002f, "V_CVT_PKRTZ_F16_F32",
> let neverHasSideEffects = 1 in {
>
> defm V_MAD_LEGACY_F32 : VOP3_32 <0x00000140, "V_MAD_LEGACY_F32", []>;
> -defm V_MAD_F32 : VOP3_32 <0x00000141, "V_MAD_F32",
> - [(set f32:$dst, (fadd (fmul f32:$src0, f32:$src1), f32:$src2))]
> ->;
> -defm V_MAD_I32_I24 : VOP3_32 <0x00000142, "V_MAD_I32_I24",
> +defm V_MAD_F32 : VOP3Inst <0x00000141, "V_MAD_F32", f32, f32, fmad>;
> +defm V_MAD_I32_I24 : VOP3_32NoMods <0x00000142, "V_MAD_I32_I24",
> [(set i32:$dst, (AMDGPUmad_i24 i32:$src0, i32:$src1, i32:$src2))]
> >;
> -defm V_MAD_U32_U24 : VOP3_32 <0x00000143, "V_MAD_U32_U24",
> +defm V_MAD_U32_U24 : VOP3_32NoMods <0x00000143, "V_MAD_U32_U24",
> [(set i32:$dst, (AMDGPUmad_u24 i32:$src0, i32:$src1, i32:$src2))]
> >;
>
> @@ -1389,24 +1387,20 @@ defm V_CUBETC_F32 : VOP3_32 <0x00000146, "V_CUBETC_F32", []>;
> defm V_CUBEMA_F32 : VOP3_32 <0x00000147, "V_CUBEMA_F32", []>;
>
> let neverHasSideEffects = 1, mayLoad = 0, mayStore = 0 in {
> -defm V_BFE_U32 : VOP3_32 <0x00000148, "V_BFE_U32",
> +defm V_BFE_U32 : VOP3_32NoMods <0x00000148, "V_BFE_U32",
> [(set i32:$dst, (AMDGPUbfe_u32 i32:$src0, i32:$src1, i32:$src2))]>;
> -defm V_BFE_I32 : VOP3_32 <0x00000149, "V_BFE_I32",
> +defm V_BFE_I32 : VOP3_32NoMods <0x00000149, "V_BFE_I32",
> [(set i32:$dst, (AMDGPUbfe_i32 i32:$src0, i32:$src1, i32:$src2))]>;
> }
>
> -defm V_BFI_B32 : VOP3_32 <0x0000014a, "V_BFI_B32",
> +defm V_BFI_B32 : VOP3_32NoMods <0x0000014a, "V_BFI_B32",
> [(set i32:$dst, (AMDGPUbfi i32:$src0, i32:$src1, i32:$src2))]>;
> -defm V_FMA_F32 : VOP3_32 <0x0000014b, "V_FMA_F32",
> - [(set f32:$dst, (fma f32:$src0, f32:$src1, f32:$src2))]
> ->;
> -def V_FMA_F64 : VOP3_64 <0x0000014c, "V_FMA_F64",
> - [(set f64:$dst, (fma f64:$src0, f64:$src1, f64:$src2))]
> ->;
> +defm V_FMA_F32 : VOP3Inst <0x0000014b, "V_FMA_F32", f32, f32, fma>;
> +defm V_FMA_F64 : VOP3Inst <0x0000014c, "V_FMA_F64", f64, f64, fma>;
> //def V_LERP_U8 : VOP3_U8 <0x0000014d, "V_LERP_U8", []>;
> -defm V_ALIGNBIT_B32 : VOP3_32 <0x0000014e, "V_ALIGNBIT_B32", []>;
> +defm V_ALIGNBIT_B32 : VOP3_32NoMods <0x0000014e, "V_ALIGNBIT_B32", []>;
>
> -defm V_ALIGNBYTE_B32 : VOP3_32 <0x0000014f, "V_ALIGNBYTE_B32", []>;
> +defm V_ALIGNBYTE_B32 : VOP3_32NoMods <0x0000014f, "V_ALIGNBYTE_B32", []>;
> defm V_MULLIT_F32 : VOP3_32 <0x00000150, "V_MULLIT_F32", []>;
> ////def V_MIN3_F32 : VOP3_MIN3 <0x00000151, "V_MIN3_F32", []>;
> ////def V_MIN3_I32 : VOP3_MIN3 <0x00000152, "V_MIN3_I32", []>;
> @@ -1420,13 +1414,13 @@ defm V_MULLIT_F32 : VOP3_32 <0x00000150, "V_MULLIT_F32", []>;
> //def V_SAD_U8 : VOP3_U8 <0x0000015a, "V_SAD_U8", []>;
> //def V_SAD_HI_U8 : VOP3_U8 <0x0000015b, "V_SAD_HI_U8", []>;
> //def V_SAD_U16 : VOP3_U16 <0x0000015c, "V_SAD_U16", []>;
> -defm V_SAD_U32 : VOP3_32 <0x0000015d, "V_SAD_U32", []>;
> +defm V_SAD_U32 : VOP3_32NoMods <0x0000015d, "V_SAD_U32", []>;
> ////def V_CVT_PK_U8_F32 : VOP3_U8 <0x0000015e, "V_CVT_PK_U8_F32", []>;
> -defm V_DIV_FIXUP_F32 : VOP3_32 <0x0000015f, "V_DIV_FIXUP_F32",
> - [(set f32:$dst, (AMDGPUdiv_fixup f32:$src0, f32:$src1, f32:$src2))]
> +defm V_DIV_FIXUP_F32 : VOP3Inst <
> + 0x0000015f, "V_DIV_FIXUP_F32", f32, f32, AMDGPUdiv_fixup
> >;
> -def V_DIV_FIXUP_F64 : VOP3_64 <0x00000160, "V_DIV_FIXUP_F64",
> - [(set f64:$dst, (AMDGPUdiv_fixup f64:$src0, f64:$src1, f64:$src2))]
> +defm V_DIV_FIXUP_F64 : VOP3Inst <
> + 0x00000160, "V_DIV_FIXUP_F64", f64, f64, AMDGPUdiv_fixup
> >;
>
> def V_LSHL_B64 : VOP3_64_32 <0x00000161, "V_LSHL_B64",
> @@ -1452,10 +1446,10 @@ def V_LDEXP_F64 : VOP3_64 <0x00000168, "V_LDEXP_F64", []>;
>
> let isCommutable = 1 in {
>
> -defm V_MUL_LO_U32 : VOP3_32 <0x00000169, "V_MUL_LO_U32", []>;
> -defm V_MUL_HI_U32 : VOP3_32 <0x0000016a, "V_MUL_HI_U32", []>;
> -defm V_MUL_LO_I32 : VOP3_32 <0x0000016b, "V_MUL_LO_I32", []>;
> -defm V_MUL_HI_I32 : VOP3_32 <0x0000016c, "V_MUL_HI_I32", []>;
> +defm V_MUL_LO_U32 : VOP3_32NoMods <0x00000169, "V_MUL_LO_U32", []>;
> +defm V_MUL_HI_U32 : VOP3_32NoMods <0x0000016a, "V_MUL_HI_U32", []>;
> +defm V_MUL_LO_I32 : VOP3_32NoMods <0x0000016b, "V_MUL_LO_I32", []>;
> +defm V_MUL_HI_I32 : VOP3_32NoMods <0x0000016c, "V_MUL_HI_I32", []>;
>
> } // isCommutable = 1
>
> @@ -1819,7 +1813,7 @@ def : Pat <
>
> def : Pat <
> (i32 (ctpop i32:$popcnt)),
> - (V_BCNT_U32_B32_e64 $popcnt, 0, 0, 0)
> + (V_BCNT_U32_B32_e64 0, $popcnt, 0, 0, 0, 0)
> >;
>
> def : Pat <
> @@ -1827,7 +1821,7 @@ def : Pat <
> (INSERT_SUBREG
> (INSERT_SUBREG (i64 (IMPLICIT_DEF)),
> (V_BCNT_U32_B32_e32 (EXTRACT_SUBREG $src, sub1),
> - (V_BCNT_U32_B32_e64 (EXTRACT_SUBREG $src, sub0), 0, 0, 0)),
> + (V_BCNT_U32_B32_e64 0, (EXTRACT_SUBREG $src, sub0), 0, 0, 0, 0)),
Why did this gain operands?
> sub0),
> (V_MOV_B32_e32 0), sub1)
> >;
> @@ -2313,7 +2307,7 @@ def : Pat <
> def : Pat <
> (int_SI_tid),
> (V_MBCNT_HI_U32_B32_e32 0xffffffff,
> - (V_MBCNT_LO_U32_B32_e64 0xffffffff, 0, 0, 0))
> + (V_MBCNT_LO_U32_B32_e64 0, 0xffffffff, 0, 0, 0, 0))
> >;
>
> //===----------------------------------------------------------------------===//
> @@ -2598,22 +2592,22 @@ let SubtargetPredicate = isCI in {
>
> // Sea island new arithmetic instructinos
> let neverHasSideEffects = 1 in {
> -defm V_TRUNC_F64 : VOP1_64 <0x00000017, "V_TRUNC_F64",
> - [(set f64:$dst, (ftrunc f64:$src0))]
> +defm V_TRUNC_F64 : VOP1Inst <0x00000017, "V_TRUNC_F64",
> + f64, f64, ftrunc
> >;
> -defm V_CEIL_F64 : VOP1_64 <0x00000018, "V_CEIL_F64",
> - [(set f64:$dst, (fceil f64:$src0))]
> +defm V_CEIL_F64 : VOP1Inst <0x00000018, "V_CEIL_F64",
> + f64, f64, fceil
> >;
> -defm V_FLOOR_F64 : VOP1_64 <0x0000001A, "V_FLOOR_F64",
> - [(set f64:$dst, (ffloor f64:$src0))]
> +defm V_FLOOR_F64 : VOP1Inst <0x0000001A, "V_FLOOR_F64",
> + f64, f64, ffloor
> >;
> -defm V_RNDNE_F64 : VOP1_64 <0x00000019, "V_RNDNE_F64",
> - [(set f64:$dst, (frint f64:$src0))]
> +defm V_RNDNE_F64 : VOP1Inst <0x00000019, "V_RNDNE_F64",
> + f64, f64, frint
> >;
>
> -defm V_QSAD_PK_U16_U8 : VOP3_32 <0x00000173, "V_QSAD_PK_U16_U8", []>;
> -defm V_MQSAD_U16_U8 : VOP3_32 <0x000000172, "V_MQSAD_U16_U8", []>;
> -defm V_MQSAD_U32_U8 : VOP3_32 <0x00000175, "V_MQSAD_U32_U8", []>;
> +defm V_QSAD_PK_U16_U8 : VOP3_32NoMods <0x00000173, "V_QSAD_PK_U16_U8", []>;
> +defm V_MQSAD_U16_U8 : VOP3_32NoMods <0x000000172, "V_MQSAD_U16_U8", []>;
> +defm V_MQSAD_U32_U8 : VOP3_32NoMods <0x00000175, "V_MQSAD_U32_U8", []>;
> def V_MAD_U64_U32 : VOP3_64 <0x00000176, "V_MAD_U64_U32", []>;
>
> // XXX - Does this set VCC?
> diff --git a/test/CodeGen/R600/fabs.ll b/test/CodeGen/R600/fabs.ll
> index b87ce22..fa1b608 100644
> --- a/test/CodeGen/R600/fabs.ll
> +++ b/test/CodeGen/R600/fabs.ll
> @@ -50,8 +50,9 @@ entry:
> }
>
> ; SI-CHECK-LABEL: @fabs_fold
> +; SI-CHECK: S_LOAD_DWORD [[ABS_VALUE:s[0-9]+]], s[{{[0-9]+:[0-9]+}}], 0xb
> ; SI-CHECK-NOT: V_AND_B32_e32
> -; SI-CHECK: V_MUL_F32_e64 v{{[0-9]+}}, s{{[0-9]+}}, |v{{[0-9]+}}|
> +; SI-CHECK: V_MUL_F32_e64 v{{[0-9]+}}, |[[ABS_VALUE]]|, v{{[0-9]+}}
> define void @fabs_fold(float addrspace(1)* %out, float %in0, float %in1) {
> entry:
> %0 = call float @fabs(float %in0)
> diff --git a/test/CodeGen/R600/fneg.ll b/test/CodeGen/R600/fneg.ll
> index 4cddc73..5b47817 100644
> --- a/test/CodeGen/R600/fneg.ll
> +++ b/test/CodeGen/R600/fneg.ll
> @@ -61,8 +61,9 @@ entry:
> }
>
> ; SI-CHECK-LABEL: @fneg_fold
> +; SI-CHECK: S_LOAD_DWORD [[NEG_VALUE:s[0-9]+]], s[{{[0-9]+:[0-9]+}}], 0xb
> ; SI-CHECK-NOT: V_XOR_B32
> -; SI-CHECK: V_MUL_F32_e64 v{{[0-9]+}}, s{{[0-9]+}}, -v{{[0-9]+}}
> +; SI-CHECK: V_MUL_F32_e64 v{{[0-9]+}}, -[[NEG_VALUE]], v{{[0-9]+}}
> define void @fneg_fold(float addrspace(1)* %out, float %in) {
> entry:
> %0 = fsub float -0.0, %in
> diff --git a/test/CodeGen/R600/mul_uint24.ll b/test/CodeGen/R600/mul_uint24.ll
> index 419f275..72bbe0f 100644
> --- a/test/CodeGen/R600/mul_uint24.ll
> +++ b/test/CodeGen/R600/mul_uint24.ll
> @@ -23,7 +23,7 @@ entry:
> ; EG: BFE_INT {{[* ]*}}T{{[0-9]}}.{{[XYZW]}}, PV.[[MUL_CHAN]], 0.0, literal.x
> ; EG: 16
> ; SI: V_MUL_U32_U24_e{{(32|64)}} [[MUL:v[0-9]]], {{[sv][0-9], [sv][0-9]}}
> -; SI: V_BFE_I32 v{{[0-9]}}, [[MUL]], 0, 16,
> +; SI: V_BFE_I32 v{{[0-9]}}, [[MUL]], 0, 16
> define void @i16_mul24(i32 addrspace(1)* %out, i16 %a, i16 %b) {
> entry:
> %0 = mul i16 %a, %b
> @@ -37,7 +37,7 @@ entry:
> ; The result must be sign-extended
> ; EG: BFE_INT {{[* ]*}}T{{[0-9]}}.{{[XYZW]}}, PV.[[MUL_CHAN]], 0.0, literal.x
> ; SI: V_MUL_U32_U24_e{{(32|64)}} [[MUL:v[0-9]]], {{[sv][0-9], [sv][0-9]}}
> -; SI: V_BFE_I32 v{{[0-9]}}, [[MUL]], 0, 8,
> +; SI: V_BFE_I32 v{{[0-9]}}, [[MUL]], 0, 8
>
> define void @i8_mul24(i32 addrspace(1)* %out, i8 %a, i8 %b) {
> entry:
> -- 1.8.1.5
I think this will fix source modifiers for f64. I set fabs / fneg to
expand as a temporary fix. Can you add tests for those versions?
-Matt
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20140630/8f4b5d96/attachment.html>
More information about the llvm-commits
mailing list