[llvm] r204475 - R600/SI: Handle S_MOV_B64 in SIInstrInfo::moveToVALU()

Mon Mar 24 09:19:42 PDT 2014

On Mon, Mar 24, 2014 at 03:17:11PM +0400, Timur Iskhodzhanov wrote:
> FYI gcc-4.8.2 doesn't like this:
> ../lib/Target/R600/SIInstrInfo.cpp: In static member function ‘static
> unsigned int llvm::SIInstrInfo::getVALUOp(const llvm::MachineInstr&)’:
> ../lib/Target/R600/SIInstrInfo.cpp:501:41: warning: enumeral mismatch in
> conditional expression: ‘llvm::TargetOpcode::<anonymous enum>’ vs
> ‘llvm::AMDGPU::<anonymous enum>’ [-Wenum-compare]
>             TargetOpcode::COPY : AMDGPU::V_MOV_B32_e32;
> 

Should be fixed in r204618.

-Tom

> 
> 
> 2014-03-21 19:51 GMT+04:00 Tom Stellard <thomas.stellard at amd.com>:
> 
> > Author: tstellar
> > Date: Fri Mar 21 10:51:54 2014
> > New Revision: 204475
> >
> > URL: http://llvm.org/viewvc/llvm-project?rev=204475&view=rev
> > Log:
> > R600/SI: Handle S_MOV_B64 in SIInstrInfo::moveToVALU()
> >
> > Added:
> >     llvm/trunk/test/CodeGen/R600/salu-to-valu.ll
> > Modified:
> >     llvm/trunk/lib/Target/R600/SIInstrInfo.cpp
> >
> > Modified: llvm/trunk/lib/Target/R600/SIInstrInfo.cpp
> > URL:
> > http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/R600/SIInstrInfo.cpp?rev=204475&r1=204474&r2=204475&view=diff
> >
> > ==============================================================================
> > --- llvm/trunk/lib/Target/R600/SIInstrInfo.cpp (original)
> > +++ llvm/trunk/lib/Target/R600/SIInstrInfo.cpp Fri Mar 21 10:51:54 2014
> > @@ -496,6 +496,9 @@ unsigned SIInstrInfo::getVALUOp(const Ma
> >    case AMDGPU::REG_SEQUENCE: return AMDGPU::REG_SEQUENCE;
> >    case AMDGPU::COPY: return AMDGPU::COPY;
> >    case AMDGPU::PHI: return AMDGPU::PHI;
> > +  case AMDGPU::S_MOV_B32:
> > +    return MI.getOperand(1).isReg() ?
> > +           TargetOpcode::COPY : AMDGPU::V_MOV_B32_e32;
> >    case AMDGPU::S_ADD_I32: return AMDGPU::V_ADD_I32_e32;
> >    case AMDGPU::S_ADDC_U32: return AMDGPU::V_ADDC_U32_e32;
> >    case AMDGPU::S_SUB_I32: return AMDGPU::V_SUB_I32_e32;
> > @@ -680,12 +683,57 @@ void SIInstrInfo::moveToVALU(MachineInst
> >
> >    while (!Worklist.empty()) {
> >      MachineInstr *Inst = Worklist.pop_back_val();
> > +    MachineBasicBlock *MBB = Inst->getParent();
> > +    MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
> > +
> > +    // Handle some special cases
> > +    switch(Inst->getOpcode()) {
> > +      case AMDGPU::S_MOV_B64: {
> > +        DebugLoc DL = Inst->getDebugLoc();
> > +
> > +        // If the source operand is a register we can replace this with a
> > +        // copy
> > +        if (Inst->getOperand(1).isReg()) {
> > +          MachineInstr *Copy = BuildMI(*MBB, Inst, DL,
> > +                                       get(TargetOpcode::COPY))
> > +                                       .addOperand(Inst->getOperand(0))
> > +                                       .addOperand(Inst->getOperand(1));
> > +          Worklist.push_back(Copy);
> > +        } else {
> > +          // Otherwise, we need to split this into two movs, because
> > there is
> > +          // no 64-bit VALU move instruction.
> > +          unsigned LoDst, HiDst, Dst;
> > +          LoDst = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
> > +          HiDst = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
> > +          Dst = MRI.createVirtualRegister(
> > +              MRI.getRegClass(Inst->getOperand(0).getReg()));
> > +
> > +          MachineInstr *Lo = BuildMI(*MBB, Inst, DL,
> > get(AMDGPU::S_MOV_B32),
> > +                                     LoDst)
> > +                             .addImm(Inst->getOperand(1).getImm() &
> > 0xFFFFFFFF);
> > +          MachineInstr *Hi = BuildMI(*MBB, Inst, DL,
> > get(AMDGPU::S_MOV_B32),
> > +                                     HiDst)
> > +                                    .addImm(Inst->getOperand(1).getImm()
> > >> 32);
> > +
> > +          BuildMI(*MBB, Inst, DL, get(TargetOpcode::REG_SEQUENCE), Dst)
> > +                  .addReg(LoDst)
> > +                  .addImm(AMDGPU::sub0)
> > +                  .addReg(HiDst)
> > +                  .addImm(AMDGPU::sub1);
> > +
> > +          MRI.replaceRegWith(Inst->getOperand(0).getReg(), Dst);
> > +          Worklist.push_back(Lo);
> > +          Worklist.push_back(Hi);
> > +        }
> > +        Inst->eraseFromParent();
> > +        continue;
> > +      }
> > +    }
> > +
> >      unsigned NewOpcode = getVALUOp(*Inst);
> >      if (NewOpcode == AMDGPU::INSTRUCTION_LIST_END)
> >        continue;
> >
> > -    MachineRegisterInfo &MRI =
> > Inst->getParent()->getParent()->getRegInfo();
> > -
> >      // Use the new VALU Opcode.
> >      const MCInstrDesc &NewDesc = get(NewOpcode);
> >      Inst->setDesc(NewDesc);
> >
> > Added: llvm/trunk/test/CodeGen/R600/salu-to-valu.ll
> > URL:
> > http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/R600/salu-to-valu.ll?rev=204475&view=auto
> >
> > ==============================================================================
> > --- llvm/trunk/test/CodeGen/R600/salu-to-valu.ll (added)
> > +++ llvm/trunk/test/CodeGen/R600/salu-to-valu.ll Fri Mar 21 10:51:54 2014
> > @@ -0,0 +1,42 @@
> > +; RUN: llc < %s -march=r600 -mcpu=SI  | FileCheck %s
> > +
> > +; In this test both the pointer and the offset operands to the
> > +; BUFFER_LOAD instructions end up being stored in vgprs.  This
> > +; requires us to add the pointer and offset together, store the
> > +; result in the offset operand (vaddr), and then store 0 in an
> > +; sgpr register pair and use that for the pointer operand
> > +; (low 64-bits of srsrc).
> > +
> > +; CHECK-LABEL: @mubuf
> > +; Make sure we aren't using VGPRs for the source operand of S_MOV_B64
> > +; CHECK-NOT: S_MOV_B64 s[{{[0-9]+:[0-9]+}}], v
> > +define void @mubuf(i32 addrspace(1)* %out, i8 addrspace(1)* %in) {
> > +entry:
> > +  %0 = call i32 @llvm.r600.read.tidig.x() #1
> > +  %1 = call i32 @llvm.r600.read.tidig.y() #1
> > +  %2 = sext i32 %0 to i64
> > +  %3 = sext i32 %1 to i64
> > +  br label %loop
> > +
> > +loop:
> > +  %4 = phi i64 [0, %entry], [%5, %loop]
> > +  %5 = add i64 %2, %4
> > +  %6 = getelementptr i8 addrspace(1)* %in, i64 %5
> > +  %7 = load i8 addrspace(1)* %6, align 1
> > +  %8 = or i64 %5, 1
> > +  %9 = getelementptr i8 addrspace(1)* %in, i64 %8
> > +  %10 = load i8 addrspace(1)* %9, align 1
> > +  %11 = add i8 %7, %10
> > +  %12 = sext i8 %11 to i32
> > +  store i32 %12, i32 addrspace(1)* %out
> > +  %13 = icmp slt i64 %5, 10
> > +  br i1 %13, label %loop, label %done
> > +
> > +done:
> > +  ret void
> > +}
> > +
> > +declare i32 @llvm.r600.read.tidig.x() #1
> > +declare i32 @llvm.r600.read.tidig.y() #1
> > +
> > +attributes #1 = { nounwind readnone }
> >
> >
> > _______________________________________________
> > llvm-commits mailing list
> > llvm-commits at cs.uiuc.edu
> > http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits
> >

> _______________________________________________
> llvm-commits mailing list
> llvm-commits at cs.uiuc.edu
> http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits