R600/SI patches
Tom Stellard
tom at stellard.net
Thu Feb 28 13:06:09 PST 2013
+llvm-commits
For the series:
Reviewed-by: Tom Stellard <thomas.stellard at amd.com>
On Thu, Feb 28, 2013 at 05:14:01PM +0100, Christian König wrote:
> Hi Tom,
>
> attached is the next bunch of patches. Nothing major, only some cleanups
> and preparation for new features.
>
> Please review,
> Christian.
> From cb4f2b0296fa021a2e2fef545100776971dc5c1a Mon Sep 17 00:00:00 2001
> From: =?UTF-8?q?Christian=20K=C3=B6nig?= <christian.koenig at amd.com>
> Date: Wed, 27 Feb 2013 16:45:44 +0100
> Subject: [PATCH 1/5] R600/SI: fix inserting waits for unordered defines
> MIME-Version: 1.0
> Content-Type: text/plain; charset=UTF-8
> Content-Transfer-Encoding: 8bit
>
> Signed-off-by: Christian K??nig <christian.koenig at amd.com>
> ---
> lib/Target/R600/SIInsertWaits.cpp | 23 +++++++++++++++++++++--
> 1 file changed, 21 insertions(+), 2 deletions(-)
>
> diff --git a/lib/Target/R600/SIInsertWaits.cpp b/lib/Target/R600/SIInsertWaits.cpp
> index 24fc929..67fbdf7 100644
> --- a/lib/Target/R600/SIInsertWaits.cpp
> +++ b/lib/Target/R600/SIInsertWaits.cpp
> @@ -88,6 +88,9 @@ private:
> MachineBasicBlock::iterator I,
> const Counters &Counts);
>
> + /// \brief Do we need def2def checks?
> + bool unorderedDefines(MachineInstr &MI);
> +
> /// \brief Resolve all operand dependencies to counter requirements
> Counters handleOperands(MachineInstr &MI);
>
> @@ -125,7 +128,7 @@ Counters SIInsertWaits::getHwCounts(MachineInstr &MI) {
>
> // Only consider stores or EXP for EXP_CNT
> Result.Named.EXP = !!(TSFlags & SIInstrFlags::EXP_CNT &&
> - (MI.getOpcode() == AMDGPU::EXP || !MI.getDesc().mayStore()));
> + (MI.getOpcode() == AMDGPU::EXP || MI.getDesc().mayStore()));
>
> // LGKM may uses larger values
> if (TSFlags & SIInstrFlags::LGKM_CNT) {
> @@ -299,8 +302,21 @@ static void increaseCounters(Counters &Dst, const Counters &Src) {
> Dst.Array[i] = std::max(Dst.Array[i], Src.Array[i]);
> }
>
> +bool SIInsertWaits::unorderedDefines(MachineInstr &MI) {
> +
> + uint64_t TSFlags = TII->get(MI.getOpcode()).TSFlags;
> + if (TSFlags & SIInstrFlags::LGKM_CNT)
> + return true;
> +
> + if (TSFlags & SIInstrFlags::EXP_CNT)
> + return ExpInstrTypesSeen == 3;
> +
> + return false;
> +}
> +
> Counters SIInsertWaits::handleOperands(MachineInstr &MI) {
>
> + bool UnorderedDefines = unorderedDefines(MI);
> Counters Result = ZeroCounts;
>
> // For each register affected by this
> @@ -311,8 +327,11 @@ Counters SIInsertWaits::handleOperands(MachineInstr &MI) {
> RegInterval Interval = getRegInterval(Op);
> for (unsigned j = Interval.first; j < Interval.second; ++j) {
>
> - if (Op.isDef())
> + if (Op.isDef()) {
> increaseCounters(Result, UsedRegs[j]);
> + if (UnorderedDefines)
> + increaseCounters(Result, DefinedRegs[j]);
> + }
>
> if (Op.isUse())
> increaseCounters(Result, DefinedRegs[j]);
> --
> 1.7.10.4
>
> From 2eb8b21eb99e0e6ff724dbdc293b0aea740e677a Mon Sep 17 00:00:00 2001
> From: =?UTF-8?q?Christian=20K=C3=B6nig?= <christian.koenig at amd.com>
> Date: Thu, 28 Feb 2013 17:09:57 +0100
> Subject: [PATCH 2/5] R600/SI: fix warning about overloaded virtual
> MIME-Version: 1.0
> Content-Type: text/plain; charset=UTF-8
> Content-Transfer-Encoding: 8bit
>
> Signed-off-by: Christian K??nig <christian.koenig at amd.com>
> ---
> lib/Target/R600/AMDGPUStructurizeCFG.cpp | 1 +
> 1 file changed, 1 insertion(+)
>
> diff --git a/lib/Target/R600/AMDGPUStructurizeCFG.cpp b/lib/Target/R600/AMDGPUStructurizeCFG.cpp
> index 26f842e..b723433 100644
> --- a/lib/Target/R600/AMDGPUStructurizeCFG.cpp
> +++ b/lib/Target/R600/AMDGPUStructurizeCFG.cpp
> @@ -243,6 +243,7 @@ public:
> initializeRegionInfoPass(*PassRegistry::getPassRegistry());
> }
>
> + using Pass::doInitialization;
> virtual bool doInitialization(Region *R, RGPassManager &RGM);
>
> virtual bool runOnRegion(Region *R, RGPassManager &RGM);
> --
> 1.7.10.4
>
> From 52cc3831804ba26866339e2918735c0c7ce27e6f Mon Sep 17 00:00:00 2001
> From: =?UTF-8?q?Christian=20K=C3=B6nig?= <christian.koenig at amd.com>
> Date: Tue, 26 Feb 2013 14:24:43 +0100
> Subject: [PATCH 3/5] R600/SI: remove GPR*AlignEncode
> MIME-Version: 1.0
> Content-Type: text/plain; charset=UTF-8
> Content-Transfer-Encoding: 8bit
>
> It's much easier to specify the encoding with tablegen directly.
>
> Signed-off-by: Christian K??nig <christian.koenig at amd.com>
> ---
> lib/Target/R600/MCTargetDesc/AMDGPUMCCodeEmitter.h | 9 ------
> lib/Target/R600/MCTargetDesc/SIMCCodeEmitter.cpp | 32 --------------------
> lib/Target/R600/SIInstrFormats.td | 20 ++++++------
> lib/Target/R600/SIInstrInfo.td | 22 ++++----------
> 4 files changed, 16 insertions(+), 67 deletions(-)
>
> diff --git a/lib/Target/R600/MCTargetDesc/AMDGPUMCCodeEmitter.h b/lib/Target/R600/MCTargetDesc/AMDGPUMCCodeEmitter.h
> index 8721f80..cd3a7ce 100644
> --- a/lib/Target/R600/MCTargetDesc/AMDGPUMCCodeEmitter.h
> +++ b/lib/Target/R600/MCTargetDesc/AMDGPUMCCodeEmitter.h
> @@ -33,15 +33,6 @@ public:
> SmallVectorImpl<MCFixup> &Fixups) const {
> return 0;
> }
> -
> - virtual unsigned GPR4AlignEncode(const MCInst &MI, unsigned OpNo,
> - SmallVectorImpl<MCFixup> &Fixups) const {
> - return 0;
> - }
> - virtual unsigned GPR2AlignEncode(const MCInst &MI, unsigned OpNo,
> - SmallVectorImpl<MCFixup> &Fixups) const {
> - return 0;
> - }
> };
>
> } // End namespace llvm
> diff --git a/lib/Target/R600/MCTargetDesc/SIMCCodeEmitter.cpp b/lib/Target/R600/MCTargetDesc/SIMCCodeEmitter.cpp
> index 6cc0077..e27abcc 100644
> --- a/lib/Target/R600/MCTargetDesc/SIMCCodeEmitter.cpp
> +++ b/lib/Target/R600/MCTargetDesc/SIMCCodeEmitter.cpp
> @@ -42,9 +42,6 @@ class SIMCCodeEmitter : public AMDGPUMCCodeEmitter {
> const MCSubtargetInfo &STI;
> MCContext &Ctx;
>
> - /// \brief Encode a sequence of registers with the correct alignment.
> - unsigned GPRAlign(const MCInst &MI, unsigned OpNo, unsigned shift) const;
> -
> /// \brief Can this operand also contain immediate values?
> bool isSrcOperand(const MCInstrDesc &Desc, unsigned OpNo) const;
>
> @@ -65,14 +62,6 @@ public:
> /// \returns the encoding for an MCOperand.
> virtual uint64_t getMachineOpValue(const MCInst &MI, const MCOperand &MO,
> SmallVectorImpl<MCFixup> &Fixups) const;
> -
> - /// \brief Encoding for when 2 consecutive registers are used
> - virtual unsigned GPR2AlignEncode(const MCInst &MI, unsigned OpNo,
> - SmallVectorImpl<MCFixup> &Fixup) const;
> -
> - /// \brief Encoding for when 4 consectuive registers are used
> - virtual unsigned GPR4AlignEncode(const MCInst &MI, unsigned OpNo,
> - SmallVectorImpl<MCFixup> &Fixup) const;
> };
>
> } // End anonymous namespace
> @@ -212,24 +201,3 @@ uint64_t SIMCCodeEmitter::getMachineOpValue(const MCInst &MI,
> return 0;
> }
>
> -//===----------------------------------------------------------------------===//
> -// Custom Operand Encodings
> -//===----------------------------------------------------------------------===//
> -
> -unsigned SIMCCodeEmitter::GPRAlign(const MCInst &MI, unsigned OpNo,
> - unsigned shift) const {
> - unsigned regCode = MRI.getEncodingValue(MI.getOperand(OpNo).getReg());
> - return (regCode & 0xff) >> shift;
> -}
> -
> -unsigned SIMCCodeEmitter::GPR2AlignEncode(const MCInst &MI,
> - unsigned OpNo ,
> - SmallVectorImpl<MCFixup> &Fixup) const {
> - return GPRAlign(MI, OpNo, 1);
> -}
> -
> -unsigned SIMCCodeEmitter::GPR4AlignEncode(const MCInst &MI,
> - unsigned OpNo,
> - SmallVectorImpl<MCFixup> &Fixup) const {
> - return GPRAlign(MI, OpNo, 2);
> -}
> diff --git a/lib/Target/R600/SIInstrFormats.td b/lib/Target/R600/SIInstrFormats.td
> index fe417d6..3891ddb 100644
> --- a/lib/Target/R600/SIInstrFormats.td
> +++ b/lib/Target/R600/SIInstrFormats.td
> @@ -129,12 +129,12 @@ class SMRD <bits<5> op, bits<1> imm, dag outs, dag ins, string asm,
> list<dag> pattern> : Enc32<outs, ins, asm, pattern> {
>
> bits<7> SDST;
> - bits<6> SBASE;
> + bits<7> SBASE;
> bits<8> OFFSET;
>
> let Inst{7-0} = OFFSET;
> let Inst{8} = imm;
> - let Inst{14-9} = SBASE;
> + let Inst{14-9} = SBASE{6-1};
> let Inst{21-15} = SDST;
> let Inst{26-22} = op;
> let Inst{31-27} = 0x18; //encoding
> @@ -292,7 +292,7 @@ class MUBUF <bits<7> op, dag outs, dag ins, string asm, list<dag> pattern> :
> bits<1> ADDR64;
> bits<1> LDS;
> bits<8> VADDR;
> - bits<5> SRSRC;
> + bits<7> SRSRC;
> bits<1> SLC;
> bits<1> TFE;
> bits<8> SOFFSET;
> @@ -307,7 +307,7 @@ class MUBUF <bits<7> op, dag outs, dag ins, string asm, list<dag> pattern> :
> let Inst{31-26} = 0x38; //encoding
> let Inst{39-32} = VADDR;
> let Inst{47-40} = VDATA;
> - let Inst{52-48} = SRSRC;
> + let Inst{52-48} = SRSRC{6-2};
> let Inst{54} = SLC;
> let Inst{55} = TFE;
> let Inst{63-56} = SOFFSET;
> @@ -330,7 +330,7 @@ class MTBUF <bits<3> op, dag outs, dag ins, string asm, list<dag> pattern> :
> bits<4> DFMT;
> bits<3> NFMT;
> bits<8> VADDR;
> - bits<5> SRSRC;
> + bits<7> SRSRC;
> bits<1> SLC;
> bits<1> TFE;
> bits<8> SOFFSET;
> @@ -346,7 +346,7 @@ class MTBUF <bits<3> op, dag outs, dag ins, string asm, list<dag> pattern> :
> let Inst{31-26} = 0x3a; //encoding
> let Inst{39-32} = VADDR;
> let Inst{47-40} = VDATA;
> - let Inst{52-48} = SRSRC;
> + let Inst{52-48} = SRSRC{6-2};
> let Inst{54} = SLC;
> let Inst{55} = TFE;
> let Inst{63-56} = SOFFSET;
> @@ -370,8 +370,8 @@ class MIMG <bits<7> op, dag outs, dag ins, string asm, list<dag> pattern> :
> bits<1> LWE;
> bits<1> SLC;
> bits<8> VADDR;
> - bits<5> SRSRC;
> - bits<5> SSAMP;
> + bits<7> SRSRC;
> + bits<7> SSAMP;
>
> let Inst{11-8} = DMASK;
> let Inst{12} = UNORM;
> @@ -385,8 +385,8 @@ class MIMG <bits<7> op, dag outs, dag ins, string asm, list<dag> pattern> :
> let Inst{31-26} = 0x3c;
> let Inst{39-32} = VADDR;
> let Inst{47-40} = VDATA;
> - let Inst{52-48} = SRSRC;
> - let Inst{57-53} = SSAMP;
> + let Inst{52-48} = SRSRC{6-2};
> + let Inst{57-53} = SSAMP{6-2};
>
> let VM_CNT = 1;
> let EXP_CNT = 1;
> diff --git a/lib/Target/R600/SIInstrInfo.td b/lib/Target/R600/SIInstrInfo.td
> index d6c3f06..260c651 100644
> --- a/lib/Target/R600/SIInstrInfo.td
> +++ b/lib/Target/R600/SIInstrInfo.td
> @@ -53,16 +53,6 @@ def SIOperand {
> int VCC = 0x6A;
> }
>
> -class GPR4Align <RegisterClass rc> : Operand <vAny> {
> - let EncoderMethod = "GPR4AlignEncode";
> - let MIOperandInfo = (ops rc:$reg);
> -}
> -
> -class GPR2Align <RegisterClass rc> : Operand <iPTR> {
> - let EncoderMethod = "GPR2AlignEncode";
> - let MIOperandInfo = (ops rc:$reg);
> -}
> -
> include "SIInstrFormats.td"
>
> //===----------------------------------------------------------------------===//
> @@ -128,13 +118,13 @@ class SOPK_64 <bits<5> op, string opName, list<dag> pattern> : SOPK <
> multiclass SMRD_Helper <bits<5> op, string asm, RegisterClass dstClass> {
> def _IMM : SMRD <
> op, 1, (outs dstClass:$dst),
> - (ins GPR2Align<SReg_64>:$sbase, i32imm:$offset),
> + (ins SReg_64:$sbase, i32imm:$offset),
> asm#" $dst, $sbase, $offset", []
> >;
>
> def _SGPR : SMRD <
> op, 0, (outs dstClass:$dst),
> - (ins GPR2Align<SReg_64>:$sbase, SReg_32:$soff),
> + (ins SReg_64:$sbase, SReg_32:$soff),
> asm#" $dst, $sbase, $soff", []
> >;
> }
> @@ -276,7 +266,7 @@ class MTBUF_Store_Helper <bits<3> op, string asm, RegisterClass regClass> : MTBU
> (outs),
> (ins regClass:$vdata, i16imm:$offset, i1imm:$offen, i1imm:$idxen, i1imm:$glc,
> i1imm:$addr64, i8imm:$dfmt, i8imm:$nfmt, VReg_32:$vaddr,
> - GPR4Align<SReg_128>:$srsrc, i1imm:$slc, i1imm:$tfe, SSrc_32:$soffset),
> + SReg_128:$srsrc, i1imm:$slc, i1imm:$tfe, SSrc_32:$soffset),
> asm#" $vdata, $offset, $offen, $idxen, $glc, $addr64, $dfmt,"
> #" $nfmt, $vaddr, $srsrc, $slc, $tfe, $soffset",
> []> {
> @@ -288,7 +278,7 @@ class MUBUF_Load_Helper <bits<7> op, string asm, RegisterClass regClass> : MUBUF
> op,
> (outs regClass:$dst),
> (ins i16imm:$offset, i1imm:$offen, i1imm:$idxen, i1imm:$glc, i1imm:$addr64,
> - i1imm:$lds, VReg_32:$vaddr, GPR4Align<SReg_128>:$srsrc, i1imm:$slc,
> + i1imm:$lds, VReg_32:$vaddr, SReg_128:$srsrc, i1imm:$slc,
> i1imm:$tfe, SSrc_32:$soffset),
> asm#" $dst, $offset, $offen, $idxen, $glc, $addr64, "
> #"$lds, $vaddr, $srsrc, $slc, $tfe, $soffset",
> @@ -301,7 +291,7 @@ class MTBUF_Load_Helper <bits<3> op, string asm, RegisterClass regClass> : MTBUF
> op,
> (outs regClass:$dst),
> (ins i16imm:$offset, i1imm:$offen, i1imm:$idxen, i1imm:$glc, i1imm:$addr64,
> - i8imm:$dfmt, i8imm:$nfmt, VReg_32:$vaddr, GPR4Align<SReg_128>:$srsrc,
> + i8imm:$dfmt, i8imm:$nfmt, VReg_32:$vaddr, SReg_128:$srsrc,
> i1imm:$slc, i1imm:$tfe, SSrc_32:$soffset),
> asm#" $dst, $offset, $offen, $idxen, $glc, $addr64, $dfmt,"
> #" $nfmt, $vaddr, $srsrc, $slc, $tfe, $soffset",
> @@ -315,7 +305,7 @@ class MIMG_Load_Helper <bits<7> op, string asm> : MIMG <
> (outs VReg_128:$vdata),
> (ins i32imm:$dmask, i1imm:$unorm, i1imm:$glc, i1imm:$da, i1imm:$r128,
> i1imm:$tfe, i1imm:$lwe, i1imm:$slc, VReg_32:$vaddr,
> - GPR4Align<SReg_256>:$srsrc, GPR4Align<SReg_128>:$ssamp),
> + SReg_256:$srsrc, SReg_128:$ssamp),
> asm#" $vdata, $dmask, $unorm, $glc, $da, $r128,"
> #" $tfe, $lwe, $slc, $vaddr, $srsrc, $ssamp",
> []> {
> --
> 1.7.10.4
>
> From f11fdc8819b110bf377431f12aa296e67f77a4d3 Mon Sep 17 00:00:00 2001
> From: =?UTF-8?q?Christian=20K=C3=B6nig?= <christian.koenig at amd.com>
> Date: Wed, 27 Feb 2013 12:20:34 +0100
> Subject: [PATCH 4/5] R600/SI: remove S_MOV immediate patterns
> MIME-Version: 1.0
> Content-Type: text/plain; charset=UTF-8
> Content-Transfer-Encoding: 8bit
>
> They won't match anyway.
>
> Signed-off-by: Christian K??nig <christian.koenig at amd.com>
> ---
> lib/Target/R600/SIInstructions.td | 14 ++------------
> 1 file changed, 2 insertions(+), 12 deletions(-)
>
> diff --git a/lib/Target/R600/SIInstructions.td b/lib/Target/R600/SIInstructions.td
> index af116f0..d9d7b4c 100644
> --- a/lib/Target/R600/SIInstructions.td
> +++ b/lib/Target/R600/SIInstructions.td
> @@ -1305,11 +1305,6 @@ def : Pat <
> /********** ================== **********/
>
> def : Pat <
> - (i1 imm:$imm),
> - (S_MOV_B64 imm:$imm)
> ->;
> -
> -def : Pat <
> (i32 imm:$imm),
> (V_MOV_B32_e32 imm:$imm)
> >;
> @@ -1320,13 +1315,8 @@ def : Pat <
> >;
>
> def : Pat <
> - (i32 imm:$imm),
> - (S_MOV_B32 imm:$imm)
> ->;
> -
> -def : Pat <
> - (f32 fpimm:$imm),
> - (S_MOV_B32 fpimm:$imm)
> + (i1 imm:$imm),
> + (S_MOV_B64 imm:$imm)
> >;
>
> def : Pat <
> --
> 1.7.10.4
>
> From 1a93c3a5bf2454f266005cc37a89774ad5cd4edb Mon Sep 17 00:00:00 2001
> From: =?UTF-8?q?Michel=20D=C3=A4nzer?= <michel.daenzer at amd.com>
> Date: Fri, 22 Feb 2013 17:02:11 +0100
> Subject: [PATCH 5/5] R600/SI: handle all registers in copyPhysReg v2
> MIME-Version: 1.0
> Content-Type: text/plain; charset=UTF-8
> Content-Transfer-Encoding: 8bit
>
> v2: based on Michels patch, but now allows copying of all registers sizes.
>
> Signed-off-by: Michel D??nzer <michel.daenzer at amd.com>
> Signed-off-by: Christian K??nig <christian.koenig at amd.com>
> ---
> lib/Target/R600/SIInstrInfo.cpp | 104 +++++++++++++++++++++++++++++++++------
> 1 file changed, 88 insertions(+), 16 deletions(-)
>
> diff --git a/lib/Target/R600/SIInstrInfo.cpp b/lib/Target/R600/SIInstrInfo.cpp
> index d9dbd6a..de2373b 100644
> --- a/lib/Target/R600/SIInstrInfo.cpp
> +++ b/lib/Target/R600/SIInstrInfo.cpp
> @@ -33,36 +33,108 @@ const SIRegisterInfo &SIInstrInfo::getRegisterInfo() const {
>
> void
> SIInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
> - MachineBasicBlock::iterator MI, DebugLoc DL,
> - unsigned DestReg, unsigned SrcReg,
> - bool KillSrc) const {
> + MachineBasicBlock::iterator MI, DebugLoc DL,
> + unsigned DestReg, unsigned SrcReg,
> + bool KillSrc) const {
> +
> // If we are trying to copy to or from SCC, there is a bug somewhere else in
> // the backend. While it may be theoretically possible to do this, it should
> // never be necessary.
> assert(DestReg != AMDGPU::SCC && SrcReg != AMDGPU::SCC);
>
> - if (AMDGPU::VReg_64RegClass.contains(DestReg)) {
> - assert(AMDGPU::VReg_64RegClass.contains(SrcReg) ||
> - AMDGPU::SReg_64RegClass.contains(SrcReg));
> - BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_e32), RI.getSubReg(DestReg, AMDGPU::sub0))
> - .addReg(RI.getSubReg(SrcReg, AMDGPU::sub0), getKillRegState(KillSrc))
> - .addReg(DestReg, RegState::Define | RegState::Implicit);
> - BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_e32), RI.getSubReg(DestReg, AMDGPU::sub1))
> - .addReg(RI.getSubReg(SrcReg, AMDGPU::sub1), getKillRegState(KillSrc));
> + const int16_t Sub0_15[] = {
> + AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, AMDGPU::sub3,
> + AMDGPU::sub4, AMDGPU::sub5, AMDGPU::sub6, AMDGPU::sub7,
> + AMDGPU::sub8, AMDGPU::sub9, AMDGPU::sub10, AMDGPU::sub11,
> + AMDGPU::sub12, AMDGPU::sub13, AMDGPU::sub14, AMDGPU::sub15, 0
> + };
> +
> + const int16_t Sub0_7[] = {
> + AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, AMDGPU::sub3,
> + AMDGPU::sub4, AMDGPU::sub5, AMDGPU::sub6, AMDGPU::sub7, 0
> + };
> +
> + const int16_t Sub0_3[] = {
> + AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, AMDGPU::sub3, 0
> + };
> +
> + const int16_t Sub0_1[] = {
> + AMDGPU::sub0, AMDGPU::sub1, 0
> + };
> +
> + unsigned Opcode;
> + const int16_t *SubIndices;
> +
> + if (AMDGPU::SReg_32RegClass.contains(DestReg)) {
> + assert(AMDGPU::SReg_32RegClass.contains(SrcReg));
> + BuildMI(MBB, MI, DL, get(AMDGPU::S_MOV_B32), DestReg)
> + .addReg(SrcReg, getKillRegState(KillSrc));
> + return;
> +
> } else if (AMDGPU::SReg_64RegClass.contains(DestReg)) {
> assert(AMDGPU::SReg_64RegClass.contains(SrcReg));
> BuildMI(MBB, MI, DL, get(AMDGPU::S_MOV_B64), DestReg)
> .addReg(SrcReg, getKillRegState(KillSrc));
> + return;
> +
> + } else if (AMDGPU::SReg_128RegClass.contains(DestReg)) {
> + assert(AMDGPU::SReg_128RegClass.contains(SrcReg));
> + Opcode = AMDGPU::S_MOV_B32;
> + SubIndices = Sub0_3;
> +
> + } else if (AMDGPU::SReg_256RegClass.contains(DestReg)) {
> + assert(AMDGPU::SReg_256RegClass.contains(SrcReg));
> + Opcode = AMDGPU::S_MOV_B32;
> + SubIndices = Sub0_7;
> +
> + } else if (AMDGPU::SReg_512RegClass.contains(DestReg)) {
> + assert(AMDGPU::SReg_512RegClass.contains(SrcReg));
> + Opcode = AMDGPU::S_MOV_B32;
> + SubIndices = Sub0_15;
> +
> } else if (AMDGPU::VReg_32RegClass.contains(DestReg)) {
> assert(AMDGPU::VReg_32RegClass.contains(SrcReg) ||
> - AMDGPU::SReg_32RegClass.contains(SrcReg));
> + AMDGPU::SReg_32RegClass.contains(SrcReg));
> BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_e32), DestReg)
> .addReg(SrcReg, getKillRegState(KillSrc));
> + return;
> +
> + } else if (AMDGPU::VReg_64RegClass.contains(DestReg)) {
> + assert(AMDGPU::VReg_64RegClass.contains(SrcReg) ||
> + AMDGPU::SReg_64RegClass.contains(SrcReg));
> + Opcode = AMDGPU::V_MOV_B32_e32;
> + SubIndices = Sub0_1;
> +
> + } else if (AMDGPU::VReg_128RegClass.contains(DestReg)) {
> + assert(AMDGPU::VReg_128RegClass.contains(SrcReg) ||
> + AMDGPU::SReg_128RegClass.contains(SrcReg));
> + Opcode = AMDGPU::V_MOV_B32_e32;
> + SubIndices = Sub0_3;
> +
> + } else if (AMDGPU::VReg_256RegClass.contains(DestReg)) {
> + assert(AMDGPU::VReg_256RegClass.contains(SrcReg) ||
> + AMDGPU::SReg_256RegClass.contains(SrcReg));
> + Opcode = AMDGPU::V_MOV_B32_e32;
> + SubIndices = Sub0_7;
> +
> + } else if (AMDGPU::VReg_512RegClass.contains(DestReg)) {
> + assert(AMDGPU::VReg_512RegClass.contains(SrcReg) ||
> + AMDGPU::SReg_512RegClass.contains(SrcReg));
> + Opcode = AMDGPU::V_MOV_B32_e32;
> + SubIndices = Sub0_15;
> +
> } else {
> - assert(AMDGPU::SReg_32RegClass.contains(DestReg));
> - assert(AMDGPU::SReg_32RegClass.contains(SrcReg));
> - BuildMI(MBB, MI, DL, get(AMDGPU::S_MOV_B32), DestReg)
> - .addReg(SrcReg, getKillRegState(KillSrc));
> + llvm_unreachable("Can't copy register!");
> + }
> +
> + while (unsigned SubIdx = *SubIndices++) {
> + MachineInstrBuilder Builder = BuildMI(MBB, MI, DL,
> + get(Opcode), RI.getSubReg(DestReg, SubIdx));
> +
> + Builder.addReg(RI.getSubReg(SrcReg, SubIdx), getKillRegState(KillSrc));
> +
> + if (*SubIndices)
> + Builder.addReg(DestReg, RegState::Define | RegState::Implicit);
> }
> }
>
> --
> 1.7.10.4
>
More information about the llvm-commits
mailing list