[llvm] r293551 - Re-commit AMDGPU/GlobalISel: Add support for simple shaders
Mikael Holmén via llvm-commits
llvm-commits at lists.llvm.org
Tue Jan 31 05:29:40 PST 2017
On 01/31/2017 02:23 PM, Tom Stellard wrote:
> On Tue, Jan 31, 2017 at 08:53:24AM +0100, Mikael Holmén via llvm-commits wrote:
>> Hi Tom,
>>
>> The new test
>>
>> test/CodeGen/AMDGPU/GlobalISel/inst-select-load-smrd.mir
>>
>> in this commit fails for me:
>>
>> /data/repo/llvm-patch/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-smrd.mir:57:9:
>> error: expected string not found in input
>> # SIVI: [[ADD_PTR_LO:%[0-9]+]] = S_ADD_U32 [[PTR_LO]], [[K_SUB0]]
>> ^
>> <stdin>:217:2: note: scanning from here
>> %48 = COPY %19.sub0
>> ^
>> <stdin>:217:2: note: with variable "PTR_LO" equal to "%47"
>> %48 = COPY %19.sub0
>> ^
>> <stdin>:217:2: note: with variable "K_SUB0" equal to "%56"
>> %48 = COPY %19.sub0
>> ^
>> <stdin>:241:2: note: possible intended match here
>> %29 = S_ADDC_U32 %32, %33, implicit-def %scc, implicit %scc
>> ^
>>
>> Looking at the generated output I see
>>
>> %59 = S_MOV_B32 4294967292
>> %60 = S_MOV_B32 3
>> %16 = REG_SEQUENCE %59, 1, %60, 2
>> %53 = S_ADD_U32 %55, %56, implicit-def %scc
>> %55 = COPY %0.sub0
>> %56 = COPY %16.sub0
>>
>> so the s_ADD_U32 using %55 and %56 is output before the COPY:s that
>> define %55 and %56?
>>
>> I just ran the test both on top-of-tree and exactly on commit r293551
>> and both fail.
>>
>> Anyone idea? I suppose it works for you so something fishy seems to be
>> going on.
>>
>
> Can you send me the full output?
Attached both the mir output and "-print-before-all -print-after-all
-debug" printouts. Hope it helps.
Regards,
Mikael
>
> Thanks,
> Tom
>
>> Regards,
>> Mikael
>>
>>
>>
>> On 01/30/2017 10:56 PM, Tom Stellard via llvm-commits wrote:
>>> Author: tstellar
>>> Date: Mon Jan 30 15:56:46 2017
>>> New Revision: 293551
>>>
>>> URL: http://llvm.org/viewvc/llvm-project?rev=293551&view=rev
>>> Log:
>>> Re-commit AMDGPU/GlobalISel: Add support for simple shaders
>>>
>>> Fix build when global-isel is disabled and fix a warning.
>>>
>>> Summary: We can select constant/global G_LOAD, global G_STORE, and G_GEP.
>>>
>>> Reviewers: qcolombet, MatzeB, t.p.northover, ab, arsenm
>>>
>>> Subscribers: mehdi_amini, vkalintiris, kzhuravl, wdng, nhaehnle, mgorny, yaxunl, tony-tye, modocache, llvm-commits, dberris
>>>
>>> Differential Revision: https://reviews.llvm.org/D26730
>>>
>>> Added:
>>> llvm/trunk/lib/Target/AMDGPU/AMDGPUGenRegisterBankInfo.def
>>> llvm/trunk/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
>>> llvm/trunk/lib/Target/AMDGPU/AMDGPUInstructionSelector.h
>>> llvm/trunk/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
>>> llvm/trunk/lib/Target/AMDGPU/AMDGPULegalizerInfo.h
>>> llvm/trunk/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
>>> llvm/trunk/lib/Target/AMDGPU/AMDGPURegisterBankInfo.h
>>> llvm/trunk/lib/Target/AMDGPU/AMDGPURegisterBanks.td
>>> llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-flat.mir
>>> llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-smrd.mir
>>> llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/inst-select-store-flat.mir
>>> llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/regbankselect.mir
>>> llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/shader-epilogs.ll
>>> llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/smrd.ll
>>> Modified:
>>> llvm/trunk/lib/Target/AMDGPU/AMDGPU.td
>>> llvm/trunk/lib/Target/AMDGPU/AMDGPUCallLowering.cpp
>>> llvm/trunk/lib/Target/AMDGPU/AMDGPUCallLowering.h
>>> llvm/trunk/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
>>> llvm/trunk/lib/Target/AMDGPU/AMDGPUSubtarget.h
>>> llvm/trunk/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
>>> llvm/trunk/lib/Target/AMDGPU/CMakeLists.txt
>>> llvm/trunk/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
>>>
>>> Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPU.td
>>> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPU.td?rev=293551&r1=293550&r2=293551&view=diff
>>> ==============================================================================
>>> --- llvm/trunk/lib/Target/AMDGPU/AMDGPU.td (original)
>>> +++ llvm/trunk/lib/Target/AMDGPU/AMDGPU.td Mon Jan 30 15:56:46 2017
>>> @@ -562,5 +562,6 @@ include "Processors.td"
>>> include "AMDGPUInstrInfo.td"
>>> include "AMDGPUIntrinsics.td"
>>> include "AMDGPURegisterInfo.td"
>>> +include "AMDGPURegisterBanks.td"
>>> include "AMDGPUInstructions.td"
>>> include "AMDGPUCallingConv.td"
>>>
>>> Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPUCallLowering.cpp
>>> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPUCallLowering.cpp?rev=293551&r1=293550&r2=293551&view=diff
>>> ==============================================================================
>>> --- llvm/trunk/lib/Target/AMDGPU/AMDGPUCallLowering.cpp (original)
>>> +++ llvm/trunk/lib/Target/AMDGPU/AMDGPUCallLowering.cpp Mon Jan 30 15:56:46 2017
>>> @@ -14,8 +14,13 @@
>>> //===----------------------------------------------------------------------===//
>>>
>>> #include "AMDGPUCallLowering.h"
>>> +#include "AMDGPU.h"
>>> #include "AMDGPUISelLowering.h"
>>> -
>>> +#include "AMDGPUSubtarget.h"
>>> +#include "SIISelLowering.h"
>>> +#include "SIRegisterInfo.h"
>>> +#include "SIMachineFunctionInfo.h"
>>> +#include "llvm/CodeGen/CallingConvLower.h"
>>> #include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
>>> #include "llvm/CodeGen/MachineInstrBuilder.h"
>>>
>>> @@ -30,13 +35,135 @@ AMDGPUCallLowering::AMDGPUCallLowering(c
>>> }
>>>
>>> bool AMDGPUCallLowering::lowerReturn(MachineIRBuilder &MIRBuilder,
>>> - const Value *Val, unsigned VReg) const {
>>> + const Value *Val, unsigned VReg) const {
>>> + MIRBuilder.buildInstr(AMDGPU::S_ENDPGM);
>>> return true;
>>> }
>>>
>>> +unsigned AMDGPUCallLowering::lowerParameterPtr(MachineIRBuilder &MIRBuilder,
>>> + Type *ParamTy,
>>> + unsigned Offset) const {
>>> +
>>> + MachineFunction &MF = MIRBuilder.getMF();
>>> + const SIRegisterInfo *TRI = MF.getSubtarget<SISubtarget>().getRegisterInfo();
>>> + MachineRegisterInfo &MRI = MF.getRegInfo();
>>> + const Function &F = *MF.getFunction();
>>> + const DataLayout &DL = F.getParent()->getDataLayout();
>>> + PointerType *PtrTy = PointerType::get(ParamTy, AMDGPUAS::CONSTANT_ADDRESS);
>>> + LLT PtrType(*PtrTy, DL);
>>> + unsigned DstReg = MRI.createGenericVirtualRegister(PtrType);
>>> + unsigned KernArgSegmentPtr =
>>> + TRI->getPreloadedValue(MF, SIRegisterInfo::KERNARG_SEGMENT_PTR);
>>> + unsigned KernArgSegmentVReg = MRI.getLiveInVirtReg(KernArgSegmentPtr);
>>> +
>>> + unsigned OffsetReg = MRI.createGenericVirtualRegister(LLT::scalar(64));
>>> + MIRBuilder.buildConstant(OffsetReg, Offset);
>>> +
>>> + MIRBuilder.buildGEP(DstReg, KernArgSegmentVReg, OffsetReg);
>>> +
>>> + return DstReg;
>>> +}
>>> +
>>> +void AMDGPUCallLowering::lowerParameter(MachineIRBuilder &MIRBuilder,
>>> + Type *ParamTy, unsigned Offset,
>>> + unsigned DstReg) const {
>>> + MachineFunction &MF = MIRBuilder.getMF();
>>> + const Function &F = *MF.getFunction();
>>> + const DataLayout &DL = F.getParent()->getDataLayout();
>>> + PointerType *PtrTy = PointerType::get(ParamTy, AMDGPUAS::CONSTANT_ADDRESS);
>>> + MachinePointerInfo PtrInfo(UndefValue::get(PtrTy));
>>> + unsigned TypeSize = DL.getTypeStoreSize(ParamTy);
>>> + unsigned Align = DL.getABITypeAlignment(ParamTy);
>>> + unsigned PtrReg = lowerParameterPtr(MIRBuilder, ParamTy, Offset);
>>> +
>>> + MachineMemOperand *MMO =
>>> + MF.getMachineMemOperand(PtrInfo, MachineMemOperand::MOLoad |
>>> + MachineMemOperand::MONonTemporal |
>>> + MachineMemOperand::MOInvariant,
>>> + TypeSize, Align);
>>> +
>>> + MIRBuilder.buildLoad(DstReg, PtrReg, *MMO);
>>> +}
>>> +
>>> bool AMDGPUCallLowering::lowerFormalArguments(MachineIRBuilder &MIRBuilder,
>>> const Function &F,
>>> ArrayRef<unsigned> VRegs) const {
>>> - // TODO: Implement once there are generic loads/stores.
>>> +
>>> + MachineFunction &MF = MIRBuilder.getMF();
>>> + const SISubtarget *Subtarget = static_cast<const SISubtarget *>(&MF.getSubtarget());
>>> + MachineRegisterInfo &MRI = MF.getRegInfo();
>>> + SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
>>> + const SIRegisterInfo *TRI = MF.getSubtarget<SISubtarget>().getRegisterInfo();
>>> + const DataLayout &DL = F.getParent()->getDataLayout();
>>> +
>>> + SmallVector<CCValAssign, 16> ArgLocs;
>>> + CCState CCInfo(F.getCallingConv(), F.isVarArg(), MF, ArgLocs, F.getContext());
>>> +
>>> + // FIXME: How should these inputs interact with inreg / custom SGPR inputs?
>>> + if (Info->hasPrivateSegmentBuffer()) {
>>> + unsigned PrivateSegmentBufferReg = Info->addPrivateSegmentBuffer(*TRI);
>>> + MF.addLiveIn(PrivateSegmentBufferReg, &AMDGPU::SReg_128RegClass);
>>> + CCInfo.AllocateReg(PrivateSegmentBufferReg);
>>> + }
>>> +
>>> + if (Info->hasDispatchPtr()) {
>>> + unsigned DispatchPtrReg = Info->addDispatchPtr(*TRI);
>>> + // FIXME: Need to add reg as live-in
>>> + CCInfo.AllocateReg(DispatchPtrReg);
>>> + }
>>> +
>>> + if (Info->hasQueuePtr()) {
>>> + unsigned QueuePtrReg = Info->addQueuePtr(*TRI);
>>> + // FIXME: Need to add reg as live-in
>>> + CCInfo.AllocateReg(QueuePtrReg);
>>> + }
>>> +
>>> + if (Info->hasKernargSegmentPtr()) {
>>> + unsigned InputPtrReg = Info->addKernargSegmentPtr(*TRI);
>>> + const LLT P2 = LLT::pointer(2, 64);
>>> + unsigned VReg = MRI.createGenericVirtualRegister(P2);
>>> + MRI.addLiveIn(InputPtrReg, VReg);
>>> + MIRBuilder.getMBB().addLiveIn(InputPtrReg);
>>> + MIRBuilder.buildCopy(VReg, InputPtrReg);
>>> + CCInfo.AllocateReg(InputPtrReg);
>>> + }
>>> +
>>> + if (Info->hasDispatchID()) {
>>> + unsigned DispatchIDReg = Info->addDispatchID(*TRI);
>>> + // FIXME: Need to add reg as live-in
>>> + CCInfo.AllocateReg(DispatchIDReg);
>>> + }
>>> +
>>> + if (Info->hasFlatScratchInit()) {
>>> + unsigned FlatScratchInitReg = Info->addFlatScratchInit(*TRI);
>>> + // FIXME: Need to add reg as live-in
>>> + CCInfo.AllocateReg(FlatScratchInitReg);
>>> + }
>>> +
>>> + unsigned NumArgs = F.arg_size();
>>> + Function::const_arg_iterator CurOrigArg = F.arg_begin();
>>> + const AMDGPUTargetLowering &TLI = *getTLI<AMDGPUTargetLowering>();
>>> + for (unsigned i = 0; i != NumArgs; ++i, ++CurOrigArg) {
>>> + CurOrigArg->getType()->dump();
>>> + MVT ValVT = TLI.getValueType(DL, CurOrigArg->getType()).getSimpleVT();
>>> + ISD::ArgFlagsTy Flags;
>>> + Flags.setOrigAlign(DL.getABITypeAlignment(CurOrigArg->getType()));
>>> + CCAssignFn *AssignFn = CCAssignFnForCall(F.getCallingConv(),
>>> + /*IsVarArg=*/false);
>>> + bool Res =
>>> + AssignFn(i, ValVT, ValVT, CCValAssign::Full, Flags, CCInfo);
>>> + assert(!Res && "Call operand has unhandled type");
>>> + (void)Res;
>>> + }
>>> +
>>> + Function::const_arg_iterator Arg = F.arg_begin();
>>> + for (unsigned i = 0; i != NumArgs; ++i, ++Arg) {
>>> + // FIXME: We should be getting DebugInfo from the arguments some how.
>>> + CCValAssign &VA = ArgLocs[i];
>>> + lowerParameter(MIRBuilder, Arg->getType(),
>>> + VA.getLocMemOffset() +
>>> + Subtarget->getExplicitKernelArgOffset(MF), VRegs[i]);
>>> + }
>>> +
>>> return true;
>>> }
>>>
>>> Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPUCallLowering.h
>>> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPUCallLowering.h?rev=293551&r1=293550&r2=293551&view=diff
>>> ==============================================================================
>>> --- llvm/trunk/lib/Target/AMDGPU/AMDGPUCallLowering.h (original)
>>> +++ llvm/trunk/lib/Target/AMDGPU/AMDGPUCallLowering.h Mon Jan 30 15:56:46 2017
>>> @@ -22,6 +22,13 @@ namespace llvm {
>>> class AMDGPUTargetLowering;
>>>
>>> class AMDGPUCallLowering: public CallLowering {
>>> +
>>> + unsigned lowerParameterPtr(MachineIRBuilder &MIRBuilder, Type *ParamTy,
>>> + unsigned Offset) const;
>>> +
>>> + void lowerParameter(MachineIRBuilder &MIRBuilder, Type *ParamTy,
>>> + unsigned Offset, unsigned DstReg) const;
>>> +
>>> public:
>>> AMDGPUCallLowering(const AMDGPUTargetLowering &TLI);
>>>
>>> @@ -29,6 +36,7 @@ class AMDGPUCallLowering: public CallLow
>>> unsigned VReg) const override;
>>> bool lowerFormalArguments(MachineIRBuilder &MIRBuilder, const Function &F,
>>> ArrayRef<unsigned> VRegs) const override;
>>> + CCAssignFn *CCAssignFnForCall(CallingConv::ID CC, bool IsVarArg) const;
>>> };
>>> } // End of namespace llvm;
>>> #endif
>>>
>>> Added: llvm/trunk/lib/Target/AMDGPU/AMDGPUGenRegisterBankInfo.def
>>> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPUGenRegisterBankInfo.def?rev=293551&view=auto
>>> ==============================================================================
>>> --- llvm/trunk/lib/Target/AMDGPU/AMDGPUGenRegisterBankInfo.def (added)
>>> +++ llvm/trunk/lib/Target/AMDGPU/AMDGPUGenRegisterBankInfo.def Mon Jan 30 15:56:46 2017
>>> @@ -0,0 +1,62 @@
>>> +//===- AMDGPUGenRegisterBankInfo.def -----------------------------*- C++ -*-==//
>>> +//
>>> +// The LLVM Compiler Infrastructure
>>> +//
>>> +// This file is distributed under the University of Illinois Open Source
>>> +// License. See LICENSE.TXT for details.
>>> +//
>>> +//===----------------------------------------------------------------------===//
>>> +/// \file
>>> +/// This file defines all the static objects used by AMDGPURegisterBankInfo.
>>> +/// \todo This should be generated by TableGen.
>>> +//===----------------------------------------------------------------------===//
>>> +
>>> +#ifndef LLVM_BUILD_GLOBAL_ISEL
>>> +#error "You shouldn't build this"
>>> +#endif
>>> +
>>> +namespace llvm {
>>> +namespace AMDGPU {
>>> +
>>> +enum PartialMappingIdx {
>>> + None = - 1,
>>> + PM_SGPR32 = 0,
>>> + PM_SGPR64 = 1,
>>> + PM_VGPR32 = 2,
>>> + PM_VGPR64 = 3
>>> +};
>>> +
>>> +const RegisterBankInfo::PartialMapping PartMappings[] {
>>> + // StartIdx, Length, RegBank
>>> + {0, 32, SGPRRegBank},
>>> + {0, 64, SGPRRegBank},
>>> + {0, 32, VGPRRegBank},
>>> + {0, 64, VGPRRegBank}
>>> +};
>>> +
>>> +const RegisterBankInfo::ValueMapping ValMappings[] {
>>> + // SGPR 32-bit
>>> + {&PartMappings[0], 1},
>>> + // SGPR 64-bit
>>> + {&PartMappings[1], 1},
>>> + // VGPR 32-bit
>>> + {&PartMappings[2], 1},
>>> + // VGPR 64-bit
>>> + {&PartMappings[3], 1}
>>> +};
>>> +
>>> +enum ValueMappingIdx {
>>> + SGPRStartIdx = 0,
>>> + VGPRStartIdx = 2
>>> +};
>>> +
>>> +const RegisterBankInfo::ValueMapping *getValueMapping(unsigned BankID,
>>> + unsigned Size) {
>>> + assert(Size % 32 == 0);
>>> + unsigned Idx = BankID == AMDGPU::SGPRRegBankID ? SGPRStartIdx : VGPRStartIdx;
>>> + Idx += (Size / 32) - 1;
>>> + return &ValMappings[Idx];
>>> +}
>>> +
>>> +} // End AMDGPU namespace.
>>> +} // End llvm namespace.
>>>
>>> Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
>>> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPUISelLowering.cpp?rev=293551&r1=293550&r2=293551&view=diff
>>> ==============================================================================
>>> --- llvm/trunk/lib/Target/AMDGPU/AMDGPUISelLowering.cpp (original)
>>> +++ llvm/trunk/lib/Target/AMDGPU/AMDGPUISelLowering.cpp Mon Jan 30 15:56:46 2017
>>> @@ -15,6 +15,7 @@
>>>
>>> #include "AMDGPUISelLowering.h"
>>> #include "AMDGPU.h"
>>> +#include "AMDGPUCallLowering.h"
>>> #include "AMDGPUFrameLowering.h"
>>> #include "AMDGPUIntrinsicInfo.h"
>>> #include "AMDGPURegisterInfo.h"
>>> @@ -670,6 +671,11 @@ bool AMDGPUTargetLowering::isNarrowingPr
>>> // TargetLowering Callbacks
>>> //===---------------------------------------------------------------------===//
>>>
>>> +CCAssignFn *AMDGPUCallLowering::CCAssignFnForCall(CallingConv::ID CC,
>>> + bool IsVarArg) const {
>>> + return CC_AMDGPU;
>>> +}
>>> +
>>> /// The SelectionDAGBuilder will automatically promote function arguments
>>> /// with illegal types. However, this does not work for the AMDGPU targets
>>> /// since the function arguments are stored in memory as these illegal types.
>>>
>>> Added: llvm/trunk/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
>>> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp?rev=293551&view=auto
>>> ==============================================================================
>>> --- llvm/trunk/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp (added)
>>> +++ llvm/trunk/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp Mon Jan 30 15:56:46 2017
>>> @@ -0,0 +1,418 @@
>>> +//===- AMDGPUInstructionSelector.cpp ----------------------------*- C++ -*-==//
>>> +//
>>> +// The LLVM Compiler Infrastructure
>>> +//
>>> +// This file is distributed under the University of Illinois Open Source
>>> +// License. See LICENSE.TXT for details.
>>> +//
>>> +//===----------------------------------------------------------------------===//
>>> +/// \file
>>> +/// This file implements the targeting of the InstructionSelector class for
>>> +/// AMDGPU.
>>> +/// \todo This should be generated by TableGen.
>>> +//===----------------------------------------------------------------------===//
>>> +
>>> +#include "AMDGPUInstructionSelector.h"
>>> +#include "AMDGPUInstrInfo.h"
>>> +#include "AMDGPURegisterBankInfo.h"
>>> +#include "AMDGPURegisterInfo.h"
>>> +#include "AMDGPUSubtarget.h"
>>> +#include "llvm/CodeGen/MachineBasicBlock.h"
>>> +#include "llvm/CodeGen/MachineFunction.h"
>>> +#include "llvm/CodeGen/MachineInstr.h"
>>> +#include "llvm/CodeGen/MachineInstrBuilder.h"
>>> +#include "llvm/CodeGen/MachineRegisterInfo.h"
>>> +#include "llvm/IR/Type.h"
>>> +#include "llvm/Support/Debug.h"
>>> +#include "llvm/Support/raw_ostream.h"
>>> +
>>> +#define DEBUG_TYPE "amdgpu-isel"
>>> +
>>> +using namespace llvm;
>>> +
>>> +AMDGPUInstructionSelector::AMDGPUInstructionSelector(
>>> + const SISubtarget &STI, const AMDGPURegisterBankInfo &RBI)
>>> + : InstructionSelector(), TII(*STI.getInstrInfo()),
>>> + TRI(*STI.getRegisterInfo()), RBI(RBI) {}
>>> +
>>> +MachineOperand
>>> +AMDGPUInstructionSelector::getSubOperand64(MachineOperand &MO,
>>> + unsigned SubIdx) const {
>>> +
>>> + MachineInstr *MI = MO.getParent();
>>> + MachineBasicBlock *BB = MO.getParent()->getParent();
>>> + MachineFunction *MF = BB->getParent();
>>> + MachineRegisterInfo &MRI = MF->getRegInfo();
>>> + unsigned DstReg = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
>>> +
>>> + if (MO.isReg()) {
>>> + unsigned ComposedSubIdx = TRI.composeSubRegIndices(MO.getSubReg(), SubIdx);
>>> + unsigned Reg = MO.getReg();
>>> + BuildMI(*BB, MI, MI->getDebugLoc(), TII.get(AMDGPU::COPY), DstReg)
>>> + .addReg(Reg, 0, ComposedSubIdx);
>>> +
>>> + return MachineOperand::CreateReg(DstReg, MO.isDef(), MO.isImplicit(),
>>> + MO.isKill(), MO.isDead(), MO.isUndef(),
>>> + MO.isEarlyClobber(), 0, MO.isDebug(),
>>> + MO.isInternalRead());
>>> + }
>>> +
>>> + assert(MO.isImm());
>>> +
>>> + APInt Imm(64, MO.getImm());
>>> +
>>> + switch (SubIdx) {
>>> + default:
>>> + llvm_unreachable("do not know to split immediate with this sub index.");
>>> + case AMDGPU::sub0:
>>> + return MachineOperand::CreateImm(Imm.getLoBits(32).getSExtValue());
>>> + case AMDGPU::sub1:
>>> + return MachineOperand::CreateImm(Imm.getHiBits(32).getSExtValue());
>>> + }
>>> +}
>>> +
>>> +bool AMDGPUInstructionSelector::selectG_ADD(MachineInstr &I) const {
>>> + MachineBasicBlock *BB = I.getParent();
>>> + MachineFunction *MF = BB->getParent();
>>> + MachineRegisterInfo &MRI = MF->getRegInfo();
>>> + unsigned Size = RBI.getSizeInBits(I.getOperand(0).getReg(), MRI, TRI);
>>> + unsigned DstLo = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
>>> + unsigned DstHi = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
>>> +
>>> + if (Size != 64)
>>> + return false;
>>> +
>>> + DebugLoc DL = I.getDebugLoc();
>>> +
>>> + BuildMI(*BB, &I, DL, TII.get(AMDGPU::S_ADD_U32), DstLo)
>>> + .add(getSubOperand64(I.getOperand(1), AMDGPU::sub0))
>>> + .add(getSubOperand64(I.getOperand(2), AMDGPU::sub0));
>>> +
>>> + BuildMI(*BB, &I, DL, TII.get(AMDGPU::S_ADDC_U32), DstHi)
>>> + .add(getSubOperand64(I.getOperand(1), AMDGPU::sub1))
>>> + .add(getSubOperand64(I.getOperand(2), AMDGPU::sub1));
>>> +
>>> + BuildMI(*BB, &I, DL, TII.get(AMDGPU::REG_SEQUENCE), I.getOperand(0).getReg())
>>> + .addReg(DstLo)
>>> + .addImm(AMDGPU::sub0)
>>> + .addReg(DstHi)
>>> + .addImm(AMDGPU::sub1);
>>> +
>>> + for (MachineOperand &MO : I.explicit_operands()) {
>>> + if (!MO.isReg() || TargetRegisterInfo::isPhysicalRegister(MO.getReg()))
>>> + continue;
>>> + RBI.constrainGenericRegister(MO.getReg(), AMDGPU::SReg_64RegClass, MRI);
>>> + }
>>> +
>>> + I.eraseFromParent();
>>> + return true;
>>> +}
>>> +
>>> +bool AMDGPUInstructionSelector::selectG_GEP(MachineInstr &I) const {
>>> + return selectG_ADD(I);
>>> +}
>>> +
>>> +bool AMDGPUInstructionSelector::selectG_STORE(MachineInstr &I) const {
>>> + MachineBasicBlock *BB = I.getParent();
>>> + DebugLoc DL = I.getDebugLoc();
>>> +
>>> + // FIXME: Select store instruction based on address space
>>> + MachineInstr *Flat = BuildMI(*BB, &I, DL, TII.get(AMDGPU::FLAT_STORE_DWORD))
>>> + .add(I.getOperand(1))
>>> + .add(I.getOperand(0))
>>> + .addImm(0)
>>> + .addImm(0)
>>> + .addImm(0);
>>> +
>>> + // Now that we selected an opcode, we need to constrain the register
>>> + // operands to use appropriate classes.
>>> + bool Ret = constrainSelectedInstRegOperands(*Flat, TII, TRI, RBI);
>>> +
>>> + I.eraseFromParent();
>>> + return Ret;
>>> +}
>>> +
>>> +bool AMDGPUInstructionSelector::selectG_CONSTANT(MachineInstr &I) const {
>>> + MachineBasicBlock *BB = I.getParent();
>>> + MachineFunction *MF = BB->getParent();
>>> + MachineRegisterInfo &MRI = MF->getRegInfo();
>>> + unsigned DstReg = I.getOperand(0).getReg();
>>> + unsigned Size = RBI.getSizeInBits(DstReg, MRI, TRI);
>>> +
>>> + if (Size == 32) {
>>> + I.setDesc(TII.get(AMDGPU::S_MOV_B32));
>>> + return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
>>> + }
>>> +
>>> + assert(Size == 64);
>>> +
>>> + DebugLoc DL = I.getDebugLoc();
>>> + unsigned LoReg = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
>>> + unsigned HiReg = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
>>> + const APInt &Imm = I.getOperand(1).getCImm()->getValue();
>>> +
>>> + BuildMI(*BB, &I, DL, TII.get(AMDGPU::S_MOV_B32), LoReg)
>>> + .addImm(Imm.trunc(32).getZExtValue());
>>> +
>>> + BuildMI(*BB, &I, DL, TII.get(AMDGPU::S_MOV_B32), HiReg)
>>> + .addImm(Imm.ashr(32).getZExtValue());
>>> +
>>> + BuildMI(*BB, &I, DL, TII.get(AMDGPU::REG_SEQUENCE), DstReg)
>>> + .addReg(LoReg)
>>> + .addImm(AMDGPU::sub0)
>>> + .addReg(HiReg)
>>> + .addImm(AMDGPU::sub1);
>>> + // We can't call constrainSelectedInstRegOperands here, because it doesn't
>>> + // work for target independent opcodes
>>> + I.eraseFromParent();
>>> + return RBI.constrainGenericRegister(DstReg, AMDGPU::SReg_64RegClass, MRI);
>>> +}
>>> +
>>> +static bool isConstant(const MachineInstr &MI) {
>>> + return MI.getOpcode() == TargetOpcode::G_CONSTANT;
>>> +}
>>> +
>>> +void AMDGPUInstructionSelector::getAddrModeInfo(const MachineInstr &Load,
>>> + const MachineRegisterInfo &MRI, SmallVectorImpl<GEPInfo> &AddrInfo) const {
>>> +
>>> + const MachineInstr *PtrMI = MRI.getUniqueVRegDef(Load.getOperand(1).getReg());
>>> +
>>> + assert(PtrMI);
>>> +
>>> + if (PtrMI->getOpcode() != TargetOpcode::G_GEP)
>>> + return;
>>> +
>>> + GEPInfo GEPInfo(*PtrMI);
>>> +
>>> + for (unsigned i = 1, e = 3; i < e; ++i) {
>>> + const MachineOperand &GEPOp = PtrMI->getOperand(i);
>>> + const MachineInstr *OpDef = MRI.getUniqueVRegDef(GEPOp.getReg());
>>> + assert(OpDef);
>>> + if (isConstant(*OpDef)) {
>>> + // FIXME: Is it possible to have multiple Imm parts? Maybe if we
>>> + // are lacking other optimizations.
>>> + assert(GEPInfo.Imm == 0);
>>> + GEPInfo.Imm = OpDef->getOperand(1).getCImm()->getSExtValue();
>>> + continue;
>>> + }
>>> + const RegisterBank *OpBank = RBI.getRegBank(GEPOp.getReg(), MRI, TRI);
>>> + if (OpBank->getID() == AMDGPU::SGPRRegBankID)
>>> + GEPInfo.SgprParts.push_back(GEPOp.getReg());
>>> + else
>>> + GEPInfo.VgprParts.push_back(GEPOp.getReg());
>>> + }
>>> +
>>> + AddrInfo.push_back(GEPInfo);
>>> + getAddrModeInfo(*PtrMI, MRI, AddrInfo);
>>> +}
>>> +
>>> +static bool isInstrUniform(const MachineInstr &MI) {
>>> + if (!MI.hasOneMemOperand())
>>> + return false;
>>> +
>>> + const MachineMemOperand *MMO = *MI.memoperands_begin();
>>> + const Value *Ptr = MMO->getValue();
>>> +
>>> + // UndefValue means this is a load of a kernel input. These are uniform.
>>> + // Sometimes LDS instructions have constant pointers.
>>> + // If Ptr is null, then that means this mem operand contains a
>>> + // PseudoSourceValue like GOT.
>>> + if (!Ptr || isa<UndefValue>(Ptr) || isa<Argument>(Ptr) ||
>>> + isa<Constant>(Ptr) || isa<GlobalValue>(Ptr))
>>> + return true;
>>> +
>>> + const Instruction *I = dyn_cast<Instruction>(Ptr);
>>> + return I && I->getMetadata("amdgpu.uniform");
>>> +}
>>> +
>>> +static unsigned getSmrdOpcode(unsigned BaseOpcode, unsigned LoadSize) {
>>> +
>>> + if (LoadSize == 32)
>>> + return BaseOpcode;
>>> +
>>> + switch (BaseOpcode) {
>>> + case AMDGPU::S_LOAD_DWORD_IMM:
>>> + switch (LoadSize) {
>>> + case 64:
>>> + return AMDGPU::S_LOAD_DWORDX2_IMM;
>>> + case 128:
>>> + return AMDGPU::S_LOAD_DWORDX4_IMM;
>>> + case 256:
>>> + return AMDGPU::S_LOAD_DWORDX8_IMM;
>>> + case 512:
>>> + return AMDGPU::S_LOAD_DWORDX16_IMM;
>>> + }
>>> + break;
>>> + case AMDGPU::S_LOAD_DWORD_IMM_ci:
>>> + switch (LoadSize) {
>>> + case 64:
>>> + return AMDGPU::S_LOAD_DWORDX2_IMM_ci;
>>> + case 128:
>>> + return AMDGPU::S_LOAD_DWORDX4_IMM_ci;
>>> + case 256:
>>> + return AMDGPU::S_LOAD_DWORDX8_IMM_ci;
>>> + case 512:
>>> + return AMDGPU::S_LOAD_DWORDX16_IMM_ci;
>>> + }
>>> + break;
>>> + case AMDGPU::S_LOAD_DWORD_SGPR:
>>> + switch (LoadSize) {
>>> + case 64:
>>> + return AMDGPU::S_LOAD_DWORDX2_SGPR;
>>> + case 128:
>>> + return AMDGPU::S_LOAD_DWORDX4_SGPR;
>>> + case 256:
>>> + return AMDGPU::S_LOAD_DWORDX8_SGPR;
>>> + case 512:
>>> + return AMDGPU::S_LOAD_DWORDX16_SGPR;
>>> + }
>>> + break;
>>> + }
>>> + llvm_unreachable("Invalid base smrd opcode or size");
>>> +}
>>> +
>>> +bool AMDGPUInstructionSelector::hasVgprParts(ArrayRef<GEPInfo> AddrInfo) const {
>>> + for (const GEPInfo &GEPInfo : AddrInfo) {
>>> + if (!GEPInfo.VgprParts.empty())
>>> + return true;
>>> + }
>>> + return false;
>>> +}
>>> +
>>> +bool AMDGPUInstructionSelector::selectSMRD(MachineInstr &I,
>>> + ArrayRef<GEPInfo> AddrInfo) const {
>>> +
>>> + if (!I.hasOneMemOperand())
>>> + return false;
>>> +
>>> + if ((*I.memoperands_begin())->getAddrSpace() != AMDGPUAS::CONSTANT_ADDRESS)
>>> + return false;
>>> +
>>> + if (!isInstrUniform(I))
>>> + return false;
>>> +
>>> + if (hasVgprParts(AddrInfo))
>>> + return false;
>>> +
>>> + MachineBasicBlock *BB = I.getParent();
>>> + MachineFunction *MF = BB->getParent();
>>> + const SISubtarget &Subtarget = MF->getSubtarget<SISubtarget>();
>>> + MachineRegisterInfo &MRI = MF->getRegInfo();
>>> + unsigned DstReg = I.getOperand(0).getReg();
>>> + const DebugLoc &DL = I.getDebugLoc();
>>> + unsigned Opcode;
>>> + unsigned LoadSize = RBI.getSizeInBits(DstReg, MRI, TRI);
>>> +
>>> + if (!AddrInfo.empty() && AddrInfo[0].SgprParts.size() == 1) {
>>> +
>>> + const GEPInfo &GEPInfo = AddrInfo[0];
>>> +
>>> + unsigned PtrReg = GEPInfo.SgprParts[0];
>>> + int64_t EncodedImm = AMDGPU::getSMRDEncodedOffset(Subtarget, GEPInfo.Imm);
>>> + if (AMDGPU::isLegalSMRDImmOffset(Subtarget, GEPInfo.Imm)) {
>>> + Opcode = getSmrdOpcode(AMDGPU::S_LOAD_DWORD_IMM, LoadSize);
>>> +
>>> + MachineInstr *SMRD = BuildMI(*BB, &I, DL, TII.get(Opcode), DstReg)
>>> + .addReg(PtrReg)
>>> + .addImm(EncodedImm)
>>> + .addImm(0); // glc
>>> + return constrainSelectedInstRegOperands(*SMRD, TII, TRI, RBI);
>>> + }
>>> +
>>> + if (Subtarget.getGeneration() == AMDGPUSubtarget::SEA_ISLANDS &&
>>> + isUInt<32>(EncodedImm)) {
>>> + Opcode = getSmrdOpcode(AMDGPU::S_LOAD_DWORD_IMM_ci, LoadSize);
>>> + MachineInstr *SMRD = BuildMI(*BB, &I, DL, TII.get(Opcode), DstReg)
>>> + .addReg(PtrReg)
>>> + .addImm(EncodedImm)
>>> + .addImm(0); // glc
>>> + return constrainSelectedInstRegOperands(*SMRD, TII, TRI, RBI);
>>> + }
>>> +
>>> + if (isUInt<32>(GEPInfo.Imm)) {
>>> + Opcode = getSmrdOpcode(AMDGPU::S_LOAD_DWORD_SGPR, LoadSize);
>>> + unsigned OffsetReg = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
>>> + BuildMI(*BB, &I, DL, TII.get(AMDGPU::S_MOV_B32), OffsetReg)
>>> + .addImm(GEPInfo.Imm);
>>> +
>>> + MachineInstr *SMRD = BuildMI(*BB, &I, DL, TII.get(Opcode), DstReg)
>>> + .addReg(PtrReg)
>>> + .addReg(OffsetReg)
>>> + .addImm(0); // glc
>>> + return constrainSelectedInstRegOperands(*SMRD, TII, TRI, RBI);
>>> + }
>>> + }
>>> +
>>> + unsigned PtrReg = I.getOperand(1).getReg();
>>> + Opcode = getSmrdOpcode(AMDGPU::S_LOAD_DWORD_IMM, LoadSize);
>>> + MachineInstr *SMRD = BuildMI(*BB, &I, DL, TII.get(Opcode), DstReg)
>>> + .addReg(PtrReg)
>>> + .addImm(0)
>>> + .addImm(0); // glc
>>> + return constrainSelectedInstRegOperands(*SMRD, TII, TRI, RBI);
>>> +}
>>> +
>>> +
>>> +bool AMDGPUInstructionSelector::selectG_LOAD(MachineInstr &I) const {
>>> + MachineBasicBlock *BB = I.getParent();
>>> + MachineFunction *MF = BB->getParent();
>>> + MachineRegisterInfo &MRI = MF->getRegInfo();
>>> + DebugLoc DL = I.getDebugLoc();
>>> + unsigned DstReg = I.getOperand(0).getReg();
>>> + unsigned PtrReg = I.getOperand(1).getReg();
>>> + unsigned LoadSize = RBI.getSizeInBits(DstReg, MRI, TRI);
>>> + unsigned Opcode;
>>> +
>>> + SmallVector<GEPInfo, 4> AddrInfo;
>>> +
>>> + getAddrModeInfo(I, MRI, AddrInfo);
>>> +
>>> + if (selectSMRD(I, AddrInfo)) {
>>> + I.eraseFromParent();
>>> + return true;
>>> + }
>>> +
>>> + switch (LoadSize) {
>>> + default:
>>> + llvm_unreachable("Load size not supported\n");
>>> + case 32:
>>> + Opcode = AMDGPU::FLAT_LOAD_DWORD;
>>> + break;
>>> + case 64:
>>> + Opcode = AMDGPU::FLAT_LOAD_DWORDX2;
>>> + break;
>>> + }
>>> +
>>> + MachineInstr *Flat = BuildMI(*BB, &I, DL, TII.get(Opcode))
>>> + .add(I.getOperand(0))
>>> + .addReg(PtrReg)
>>> + .addImm(0)
>>> + .addImm(0)
>>> + .addImm(0);
>>> +
>>> + bool Ret = constrainSelectedInstRegOperands(*Flat, TII, TRI, RBI);
>>> + I.eraseFromParent();
>>> + return Ret;
>>> +}
>>> +
>>> +bool AMDGPUInstructionSelector::select(MachineInstr &I) const {
>>> +
>>> + if (!isPreISelGenericOpcode(I.getOpcode()))
>>> + return true;
>>> +
>>> + switch (I.getOpcode()) {
>>> + default:
>>> + break;
>>> + case TargetOpcode::G_ADD:
>>> + return selectG_ADD(I);
>>> + case TargetOpcode::G_CONSTANT:
>>> + return selectG_CONSTANT(I);
>>> + case TargetOpcode::G_GEP:
>>> + return selectG_GEP(I);
>>> + case TargetOpcode::G_LOAD:
>>> + return selectG_LOAD(I);
>>> + case TargetOpcode::G_STORE:
>>> + return selectG_STORE(I);
>>> + }
>>> + return false;
>>> +}
>>>
>>> Added: llvm/trunk/lib/Target/AMDGPU/AMDGPUInstructionSelector.h
>>> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPUInstructionSelector.h?rev=293551&view=auto
>>> ==============================================================================
>>> --- llvm/trunk/lib/Target/AMDGPU/AMDGPUInstructionSelector.h (added)
>>> +++ llvm/trunk/lib/Target/AMDGPU/AMDGPUInstructionSelector.h Mon Jan 30 15:56:46 2017
>>> @@ -0,0 +1,65 @@
>>> +//===- AMDGPUInstructionSelector --------------------------------*- C++ -*-==//
>>> +//
>>> +// The LLVM Compiler Infrastructure
>>> +//
>>> +// This file is distributed under the University of Illinois Open Source
>>> +// License. See LICENSE.TXT for details.
>>> +//
>>> +//===----------------------------------------------------------------------===//
>>> +/// \file
>>> +/// This file declares the targeting of the InstructionSelector class for
>>> +/// AMDGPU.
>>> +//===----------------------------------------------------------------------===//
>>> +
>>> +#ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUINSTRUCTIONSELECTOR_H
>>> +#define LLVM_LIB_TARGET_AMDGPU_AMDGPUINSTRUCTIONSELECTOR_H
>>> +
>>> +#include "llvm/CodeGen/GlobalISel/InstructionSelector.h"
>>> +#include "llvm/ADT/ArrayRef.h"
>>> +#include "llvm/ADT/SmallVector.h"
>>> +
>>> +namespace llvm {
>>> +
>>> +class AMDGPUInstrInfo;
>>> +class AMDGPURegisterBankInfo;
>>> +class MachineInstr;
>>> +class MachineOperand;
>>> +class MachineRegisterInfo;
>>> +class SIInstrInfo;
>>> +class SIRegisterInfo;
>>> +class SISubtarget;
>>> +
>>> +class AMDGPUInstructionSelector : public InstructionSelector {
>>> +public:
>>> + AMDGPUInstructionSelector(const SISubtarget &STI,
>>> + const AMDGPURegisterBankInfo &RBI);
>>> +
>>> + bool select(MachineInstr &I) const override;
>>> +
>>> +private:
>>> + struct GEPInfo {
>>> + const MachineInstr &GEP;
>>> + SmallVector<unsigned, 2> SgprParts;
>>> + SmallVector<unsigned, 2> VgprParts;
>>> + int64_t Imm;
>>> + GEPInfo(const MachineInstr &GEP) : GEP(GEP), Imm(0) { }
>>> + };
>>> +
>>> + MachineOperand getSubOperand64(MachineOperand &MO, unsigned SubIdx) const;
>>> + bool selectG_CONSTANT(MachineInstr &I) const;
>>> + bool selectG_ADD(MachineInstr &I) const;
>>> + bool selectG_GEP(MachineInstr &I) const;
>>> + bool hasVgprParts(ArrayRef<GEPInfo> AddrInfo) const;
>>> + void getAddrModeInfo(const MachineInstr &Load, const MachineRegisterInfo &MRI,
>>> + SmallVectorImpl<GEPInfo> &AddrInfo) const;
>>> + bool selectSMRD(MachineInstr &I, ArrayRef<GEPInfo> AddrInfo) const;
>>> + bool selectG_LOAD(MachineInstr &I) const;
>>> + bool selectG_STORE(MachineInstr &I) const;
>>> +
>>> + const SIInstrInfo &TII;
>>> + const SIRegisterInfo &TRI;
>>> + const AMDGPURegisterBankInfo &RBI;
>>> +};
>>> +
>>> +} // End llvm namespace.
>>> +#endif
>>>
>>> Added: llvm/trunk/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
>>> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp?rev=293551&view=auto
>>> ==============================================================================
>>> --- llvm/trunk/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp (added)
>>> +++ llvm/trunk/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp Mon Jan 30 15:56:46 2017
>>> @@ -0,0 +1,62 @@
>>> +//===- AMDGPULegalizerInfo.cpp -----------------------------------*- C++ -*-==//
>>> +//
>>> +// The LLVM Compiler Infrastructure
>>> +//
>>> +// This file is distributed under the University of Illinois Open Source
>>> +// License. See LICENSE.TXT for details.
>>> +//
>>> +//===----------------------------------------------------------------------===//
>>> +/// \file
>>> +/// This file implements the targeting of the Machinelegalizer class for
>>> +/// AMDGPU.
>>> +/// \todo This should be generated by TableGen.
>>> +//===----------------------------------------------------------------------===//
>>> +
>>> +#include "AMDGPULegalizerInfo.h"
>>> +#include "llvm/CodeGen/ValueTypes.h"
>>> +#include "llvm/IR/Type.h"
>>> +#include "llvm/IR/DerivedTypes.h"
>>> +#include "llvm/Target/TargetOpcodes.h"
>>> +#include "llvm/Support/Debug.h"
>>> +
>>> +using namespace llvm;
>>> +
>>> +#ifndef LLVM_BUILD_GLOBAL_ISEL
>>> +#error "You shouldn't build this"
>>> +#endif
>>> +
>>> +AMDGPULegalizerInfo::AMDGPULegalizerInfo() {
>>> + using namespace TargetOpcode;
>>> +
>>> + const LLT S32 = LLT::scalar(32);
>>> + const LLT S64 = LLT::scalar(64);
>>> + const LLT P1 = LLT::pointer(1, 64);
>>> + const LLT P2 = LLT::pointer(2, 64);
>>> +
>>> + setAction({G_CONSTANT, S64}, Legal);
>>> +
>>> + setAction({G_GEP, P1}, Legal);
>>> + setAction({G_GEP, P2}, Legal);
>>> + setAction({G_GEP, 1, S64}, Legal);
>>> +
>>> + setAction({G_LOAD, P1}, Legal);
>>> + setAction({G_LOAD, P2}, Legal);
>>> + setAction({G_LOAD, S32}, Legal);
>>> + setAction({G_LOAD, 1, P1}, Legal);
>>> + setAction({G_LOAD, 1, P2}, Legal);
>>> +
>>> + setAction({G_STORE, S32}, Legal);
>>> + setAction({G_STORE, 1, P1}, Legal);
>>> +
>>> + // FIXME: When RegBankSelect inserts copies, it will only create new
>>> + // registers with scalar types. This means we can end up with
>>> + // G_LOAD/G_STORE/G_GEP instruction with scalar types for their pointer
>>> + // operands. In assert builds, the instruction selector will assert
>>> + // if it sees a generic instruction which isn't legal, so we need to
>>> + // tell it that scalar types are legal for pointer operands
>>> + setAction({G_GEP, S64}, Legal);
>>> + setAction({G_LOAD, 1, S64}, Legal);
>>> + setAction({G_STORE, 1, S64}, Legal);
>>> +
>>> + computeTables();
>>> +}
>>>
>>> Added: llvm/trunk/lib/Target/AMDGPU/AMDGPULegalizerInfo.h
>>> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPULegalizerInfo.h?rev=293551&view=auto
>>> ==============================================================================
>>> --- llvm/trunk/lib/Target/AMDGPU/AMDGPULegalizerInfo.h (added)
>>> +++ llvm/trunk/lib/Target/AMDGPU/AMDGPULegalizerInfo.h Mon Jan 30 15:56:46 2017
>>> @@ -0,0 +1,30 @@
>>> +//===- AMDGPULegalizerInfo ---------------------------------------*- C++ -*-==//
>>> +//
>>> +// The LLVM Compiler Infrastructure
>>> +//
>>> +// This file is distributed under the University of Illinois Open Source
>>> +// License. See LICENSE.TXT for details.
>>> +//
>>> +//===----------------------------------------------------------------------===//
>>> +/// \file
>>> +/// This file declares the targeting of the Machinelegalizer class for
>>> +/// AMDGPU.
>>> +/// \todo This should be generated by TableGen.
>>> +//===----------------------------------------------------------------------===//
>>> +
>>> +#ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUMACHINELEGALIZER_H
>>> +#define LLVM_LIB_TARGET_AMDGPU_AMDGPUMACHINELEGALIZER_H
>>> +
>>> +#include "llvm/CodeGen/GlobalISel/LegalizerInfo.h"
>>> +
>>> +namespace llvm {
>>> +
>>> +class LLVMContext;
>>> +
>>> +/// This class provides the information for the target register banks.
>>> +class AMDGPULegalizerInfo : public LegalizerInfo {
>>> +public:
>>> + AMDGPULegalizerInfo();
>>> +};
>>> +} // End llvm namespace.
>>> +#endif
>>>
>>> Added: llvm/trunk/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
>>> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp?rev=293551&view=auto
>>> ==============================================================================
>>> --- llvm/trunk/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp (added)
>>> +++ llvm/trunk/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp Mon Jan 30 15:56:46 2017
>>> @@ -0,0 +1,228 @@
>>> +//===- AMDGPURegisterBankInfo.cpp -------------------------------*- C++ -*-==//
>>> +//
>>> +// The LLVM Compiler Infrastructure
>>> +//
>>> +// This file is distributed under the University of Illinois Open Source
>>> +// License. See LICENSE.TXT for details.
>>> +//
>>> +//===----------------------------------------------------------------------===//
>>> +/// \file
>>> +/// This file implements the targeting of the RegisterBankInfo class for
>>> +/// AMDGPU.
>>> +/// \todo This should be generated by TableGen.
>>> +//===----------------------------------------------------------------------===//
>>> +
>>> +#include "AMDGPURegisterBankInfo.h"
>>> +#include "AMDGPUInstrInfo.h"
>>> +#include "SIRegisterInfo.h"
>>> +#include "llvm/CodeGen/GlobalISel/RegisterBank.h"
>>> +#include "llvm/CodeGen/GlobalISel/RegisterBankInfo.h"
>>> +#include "llvm/IR/Constants.h"
>>> +#include "llvm/Target/TargetRegisterInfo.h"
>>> +#include "llvm/Target/TargetSubtargetInfo.h"
>>> +
>>> +#define GET_TARGET_REGBANK_IMPL
>>> +#include "AMDGPUGenRegisterBank.inc"
>>> +
>>> +// This file will be TableGen'ed at some point.
>>> +#include "AMDGPUGenRegisterBankInfo.def"
>>> +
>>> +using namespace llvm;
>>> +
>>> +#ifndef LLVM_BUILD_GLOBAL_ISEL
>>> +#error "You shouldn't build this"
>>> +#endif
>>> +
>>> +AMDGPURegisterBankInfo::AMDGPURegisterBankInfo(const TargetRegisterInfo &TRI)
>>> + : AMDGPUGenRegisterBankInfo(),
>>> + TRI(static_cast<const SIRegisterInfo*>(&TRI)) {
>>> +
>>> + // HACK: Until this is fully tablegen'd
>>> + static bool AlreadyInit = false;
>>> + if (AlreadyInit)
>>> + return;
>>> +
>>> + AlreadyInit = true;
>>> +
>>> + const RegisterBank &RBSGPR = getRegBank(AMDGPU::SGPRRegBankID);
>>> + assert(&RBSGPR == &AMDGPU::SGPRRegBank);
>>> +
>>> + const RegisterBank &RBVGPR = getRegBank(AMDGPU::VGPRRegBankID);
>>> + assert(&RBVGPR == &AMDGPU::VGPRRegBank);
>>> +
>>> +}
>>> +
>>> +unsigned AMDGPURegisterBankInfo::copyCost(const RegisterBank &A,
>>> + const RegisterBank &B,
>>> + unsigned Size) const {
>>> + return RegisterBankInfo::copyCost(A, B, Size);
>>> +}
>>> +
>>> +const RegisterBank &AMDGPURegisterBankInfo::getRegBankFromRegClass(
>>> + const TargetRegisterClass &RC) const {
>>> +
>>> + if (TRI->isSGPRClass(&RC))
>>> + return getRegBank(AMDGPU::SGPRRegBankID);
>>> +
>>> + return getRegBank(AMDGPU::VGPRRegBankID);
>>> +}
>>> +
>>> +RegisterBankInfo::InstructionMappings
>>> +AMDGPURegisterBankInfo::getInstrAlternativeMappings(
>>> + const MachineInstr &MI) const {
>>> +
>>> + const MachineFunction &MF = *MI.getParent()->getParent();
>>> + const MachineRegisterInfo &MRI = MF.getRegInfo();
>>> +
>>> + unsigned Size = getSizeInBits(MI.getOperand(0).getReg(), MRI, *TRI);
>>> +
>>> + InstructionMappings AltMappings;
>>> + switch (MI.getOpcode()) {
>>> + case TargetOpcode::G_LOAD: {
>>> + // FIXME: Should we be hard coding the size for these mappings?
>>> + InstructionMapping SSMapping(1, 1,
>>> + getOperandsMapping({AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size),
>>> + AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, 64)}),
>>> + 2); // Num Operands
>>> + AltMappings.emplace_back(std::move(SSMapping));
>>> +
>>> + InstructionMapping VVMapping(2, 1,
>>> + getOperandsMapping({AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size),
>>> + AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 64)}),
>>> + 2); // Num Operands
>>> + AltMappings.emplace_back(std::move(VVMapping));
>>> +
>>> + // FIXME: Should this be the pointer-size (64-bits) or the size of the
>>> + // register that will hold the bufffer resourc (128-bits).
>>> + InstructionMapping VSMapping(3, 1,
>>> + getOperandsMapping({AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size),
>>> + AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, 64)}),
>>> + 2); // Num Operands
>>> + AltMappings.emplace_back(std::move(VSMapping));
>>> +
>>> + return AltMappings;
>>> +
>>> + }
>>> + default:
>>> + break;
>>> + }
>>> + return RegisterBankInfo::getInstrAlternativeMappings(MI);
>>> +}
>>> +
>>> +void AMDGPURegisterBankInfo::applyMappingImpl(
>>> + const OperandsMapper &OpdMapper) const {
>>> + return applyDefaultMapping(OpdMapper);
>>> +}
>>> +
>>> +static bool isInstrUniform(const MachineInstr &MI) {
>>> + if (!MI.hasOneMemOperand())
>>> + return false;
>>> +
>>> + const MachineMemOperand *MMO = *MI.memoperands_begin();
>>> + return AMDGPU::isUniformMMO(MMO);
>>> +}
>>> +
>>> +RegisterBankInfo::InstructionMapping
>>> +AMDGPURegisterBankInfo::getInstrMappingForLoad(const MachineInstr &MI) const {
>>> +
>>> + const MachineFunction &MF = *MI.getParent()->getParent();
>>> + const MachineRegisterInfo &MRI = MF.getRegInfo();
>>> + RegisterBankInfo::InstructionMapping Mapping =
>>> + InstructionMapping{1, 1, nullptr, MI.getNumOperands()};
>>> + SmallVector<const ValueMapping*, 8> OpdsMapping(MI.getNumOperands());
>>> + unsigned Size = getSizeInBits(MI.getOperand(0).getReg(), MRI, *TRI);
>>> + unsigned PtrSize = getSizeInBits(MI.getOperand(1).getReg(), MRI, *TRI);
>>> +
>>> + const ValueMapping *ValMapping;
>>> + const ValueMapping *PtrMapping;
>>> +
>>> + if (isInstrUniform(MI)) {
>>> + // We have a uniform instruction so we want to use an SMRD load
>>> + ValMapping = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size);
>>> + PtrMapping = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, PtrSize);
>>> + } else {
>>> + ValMapping = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size);
>>> + // FIXME: What would happen if we used SGPRRegBankID here?
>>> + PtrMapping = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, PtrSize);
>>> + }
>>> +
>>> + OpdsMapping[0] = ValMapping;
>>> + OpdsMapping[1] = PtrMapping;
>>> + Mapping.setOperandsMapping(getOperandsMapping(OpdsMapping));
>>> + return Mapping;
>>> +
>>> + // FIXME: Do we want to add a mapping for FLAT load, or should we just
>>> + // handle that during instruction selection?
>>> +}
>>> +
>>> +RegisterBankInfo::InstructionMapping
>>> +AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
>>> + RegisterBankInfo::InstructionMapping Mapping = getInstrMappingImpl(MI);
>>> +
>>> + if (Mapping.isValid())
>>> + return Mapping;
>>> +
>>> + const MachineFunction &MF = *MI.getParent()->getParent();
>>> + const MachineRegisterInfo &MRI = MF.getRegInfo();
>>> + Mapping = InstructionMapping{1, 1, nullptr, MI.getNumOperands()};
>>> + SmallVector<const ValueMapping*, 8> OpdsMapping(MI.getNumOperands());
>>> +
>>> + switch (MI.getOpcode()) {
>>> + default: break;
>>> + case AMDGPU::G_CONSTANT: {
>>> + unsigned Size = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
>>> + OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size);
>>> + Mapping.setOperandsMapping(getOperandsMapping(OpdsMapping));
>>> + return Mapping;
>>> + }
>>> + case AMDGPU::G_GEP: {
>>> + for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
>>> + if (!MI.getOperand(i).isReg())
>>> + continue;
>>> +
>>> + unsigned Size = MRI.getType(MI.getOperand(i).getReg()).getSizeInBits();
>>> + OpdsMapping[i] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size);
>>> + }
>>> + Mapping.setOperandsMapping(getOperandsMapping(OpdsMapping));
>>> + return Mapping;
>>> + }
>>> + case AMDGPU::G_STORE: {
>>> + assert(MI.getOperand(0).isReg());
>>> + unsigned Size = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
>>> + // FIXME: We need to specify a different reg bank once scalar stores
>>> + // are supported.
>>> + const ValueMapping *ValMapping =
>>> + AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size);
>>> + // FIXME: Depending on the type of store, the pointer could be in
>>> + // the SGPR Reg bank.
>>> + // FIXME: Pointer size should be based on the address space.
>>> + const ValueMapping *PtrMapping =
>>> + AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 64);
>>> +
>>> + OpdsMapping[0] = ValMapping;
>>> + OpdsMapping[1] = PtrMapping;
>>> + Mapping.setOperandsMapping(getOperandsMapping(OpdsMapping));
>>> + return Mapping;
>>> + }
>>> +
>>> + case AMDGPU::G_LOAD:
>>> + return getInstrMappingForLoad(MI);
>>> + }
>>> +
>>> + unsigned BankID = AMDGPU::SGPRRegBankID;
>>> +
>>> + Mapping = InstructionMapping{1, 1, nullptr, MI.getNumOperands()};
>>> + unsigned Size = 0;
>>> + for (unsigned Idx = 0; Idx < MI.getNumOperands(); ++Idx) {
>>> + // If the operand is not a register default to the size of the previous
>>> + // operand.
>>> + // FIXME: Can't we pull the types from the MachineInstr rather than the
>>> + // operands.
>>> + if (MI.getOperand(Idx).isReg())
>>> + Size = getSizeInBits(MI.getOperand(Idx).getReg(), MRI, *TRI);
>>> + OpdsMapping.push_back(AMDGPU::getValueMapping(BankID, Size));
>>> + }
>>> + Mapping.setOperandsMapping(getOperandsMapping(OpdsMapping));
>>> +
>>> + return Mapping;
>>> +}
>>>
>>> Added: llvm/trunk/lib/Target/AMDGPU/AMDGPURegisterBankInfo.h
>>> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPURegisterBankInfo.h?rev=293551&view=auto
>>> ==============================================================================
>>> --- llvm/trunk/lib/Target/AMDGPU/AMDGPURegisterBankInfo.h (added)
>>> +++ llvm/trunk/lib/Target/AMDGPU/AMDGPURegisterBankInfo.h Mon Jan 30 15:56:46 2017
>>> @@ -0,0 +1,65 @@
>>> +//===- AMDGPURegisterBankInfo -----------------------------------*- C++ -*-==//
>>> +//
>>> +// The LLVM Compiler Infrastructure
>>> +//
>>> +// This file is distributed under the University of Illinois Open Source
>>> +// License. See LICENSE.TXT for details.
>>> +//
>>> +//===----------------------------------------------------------------------===//
>>> +/// \file
>>> +/// This file declares the targeting of the RegisterBankInfo class for AMDGPU.
>>> +/// \todo This should be generated by TableGen.
>>> +//===----------------------------------------------------------------------===//
>>> +
>>> +#ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUREGISTERBANKINFO_H
>>> +#define LLVM_LIB_TARGET_AMDGPU_AMDGPUREGISTERBANKINFO_H
>>> +
>>> +#include "llvm/CodeGen/GlobalISel/RegisterBankInfo.h"
>>> +
>>> +namespace llvm {
>>> +
>>> +class SIRegisterInfo;
>>> +class TargetRegisterInfo;
>>> +
>>> +namespace AMDGPU {
>>> +enum {
>>> + SGPRRegBankID = 0,
>>> + VGPRRegBankID = 1,
>>> + NumRegisterBanks
>>> +};
>>> +} // End AMDGPU namespace.
>>> +
>>> +/// This class provides the information for the target register banks.
>>> +class AMDGPUGenRegisterBankInfo : public RegisterBankInfo {
>>> +
>>> +protected:
>>> +
>>> +#define GET_TARGET_REGBANK_CLASS
>>> +#include "AMDGPUGenRegisterBank.inc"
>>> +
>>> +};
>>> +class AMDGPURegisterBankInfo : public AMDGPUGenRegisterBankInfo {
>>> + const SIRegisterInfo *TRI;
>>> +
>>> + /// See RegisterBankInfo::applyMapping.
>>> + void applyMappingImpl(const OperandsMapper &OpdMapper) const override;
>>> +
>>> + RegisterBankInfo::InstructionMapping
>>> + getInstrMappingForLoad(const MachineInstr &MI) const;
>>> +
>>> +public:
>>> + AMDGPURegisterBankInfo(const TargetRegisterInfo &TRI);
>>> +
>>> + unsigned copyCost(const RegisterBank &A, const RegisterBank &B,
>>> + unsigned Size) const override;
>>> +
>>> + const RegisterBank &
>>> + getRegBankFromRegClass(const TargetRegisterClass &RC) const override;
>>> +
>>> + InstructionMappings
>>> + getInstrAlternativeMappings(const MachineInstr &MI) const override;
>>> +
>>> + InstructionMapping getInstrMapping(const MachineInstr &MI) const override;
>>> +};
>>> +} // End llvm namespace.
>>> +#endif
>>>
>>> Added: llvm/trunk/lib/Target/AMDGPU/AMDGPURegisterBanks.td
>>> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPURegisterBanks.td?rev=293551&view=auto
>>> ==============================================================================
>>> --- llvm/trunk/lib/Target/AMDGPU/AMDGPURegisterBanks.td (added)
>>> +++ llvm/trunk/lib/Target/AMDGPU/AMDGPURegisterBanks.td Mon Jan 30 15:56:46 2017
>>> @@ -0,0 +1,16 @@
>>> +//=- AMDGPURegisterBank.td - Describe the AMDGPU Banks -------*- tablegen -*-=//
>>> +//
>>> +// The LLVM Compiler Infrastructure
>>> +//
>>> +// This file is distributed under the University of Illinois Open Source
>>> +// License. See LICENSE.TXT for details.
>>> +//
>>> +//===----------------------------------------------------------------------===//
>>> +
>>> +def SGPRRegBank : RegisterBank<"SGPR",
>>> + [SReg_32, SReg_64, SReg_128, SReg_256, SReg_512]
>>> +>;
>>> +
>>> +def VGPRRegBank : RegisterBank<"VGPR",
>>> + [VGPR_32, VReg_64, VReg_96, VReg_128, VReg_256, VReg_512]
>>> +>;
>>>
>>> Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPUSubtarget.h
>>> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPUSubtarget.h?rev=293551&r1=293550&r2=293551&view=diff
>>> ==============================================================================
>>> --- llvm/trunk/lib/Target/AMDGPU/AMDGPUSubtarget.h (original)
>>> +++ llvm/trunk/lib/Target/AMDGPU/AMDGPUSubtarget.h Mon Jan 30 15:56:46 2017
>>> @@ -517,6 +517,21 @@ public:
>>> return GISel->getCallLowering();
>>> }
>>>
>>> + const InstructionSelector *getInstructionSelector() const override {
>>> + assert(GISel && "Access to GlobalISel APIs not set");
>>> + return GISel->getInstructionSelector();
>>> + }
>>> +
>>> + const LegalizerInfo *getLegalizerInfo() const override {
>>> + assert(GISel && "Access to GlobalISel APIs not set");
>>> + return GISel->getLegalizerInfo();
>>> + }
>>> +
>>> + const RegisterBankInfo *getRegBankInfo() const override {
>>> + assert(GISel && "Access to GlobalISel APIs not set");
>>> + return GISel->getRegBankInfo();
>>> + }
>>> +
>>> const SIRegisterInfo *getRegisterInfo() const override {
>>> return &InstrInfo.getRegisterInfo();
>>> }
>>>
>>> Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
>>> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp?rev=293551&r1=293550&r2=293551&view=diff
>>> ==============================================================================
>>> --- llvm/trunk/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp (original)
>>> +++ llvm/trunk/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp Mon Jan 30 15:56:46 2017
>>> @@ -16,18 +16,20 @@
>>> #include "AMDGPUTargetMachine.h"
>>> #include "AMDGPU.h"
>>> #include "AMDGPUCallLowering.h"
>>> +#include "AMDGPUInstructionSelector.h"
>>> +#include "AMDGPULegalizerInfo.h"
>>> +#ifdef LLVM_BUILD_GLOBAL_ISEL
>>> +#include "AMDGPURegisterBankInfo.h"
>>> +#endif
>>> #include "AMDGPUTargetObjectFile.h"
>>> #include "AMDGPUTargetTransformInfo.h"
>>> #include "GCNSchedStrategy.h"
>>> #include "R600MachineScheduler.h"
>>> #include "SIMachineScheduler.h"
>>> -#include "llvm/ADT/SmallString.h"
>>> -#include "llvm/ADT/STLExtras.h"
>>> -#include "llvm/ADT/StringRef.h"
>>> -#include "llvm/ADT/Triple.h"
>>> -#include "llvm/CodeGen/GlobalISel/GISelAccessor.h"
>>> +#include "llvm/CodeGen/GlobalISel/InstructionSelect.h"
>>> #include "llvm/CodeGen/GlobalISel/IRTranslator.h"
>>> -#include "llvm/CodeGen/MachineScheduler.h"
>>> +#include "llvm/CodeGen/GlobalISel/Legalizer.h"
>>> +#include "llvm/CodeGen/GlobalISel/RegBankSelect.h"
>>> #include "llvm/CodeGen/Passes.h"
>>> #include "llvm/CodeGen/TargetPassConfig.h"
>>> #include "llvm/Support/TargetRegistry.h"
>>> @@ -287,9 +289,21 @@ namespace {
>>>
>>> struct SIGISelActualAccessor : public GISelAccessor {
>>> std::unique_ptr<AMDGPUCallLowering> CallLoweringInfo;
>>> + std::unique_ptr<InstructionSelector> InstSelector;
>>> + std::unique_ptr<LegalizerInfo> Legalizer;
>>> + std::unique_ptr<RegisterBankInfo> RegBankInfo;
>>> const AMDGPUCallLowering *getCallLowering() const override {
>>> return CallLoweringInfo.get();
>>> }
>>> + const InstructionSelector *getInstructionSelector() const override {
>>> + return InstSelector.get();
>>> + }
>>> + const LegalizerInfo *getLegalizerInfo() const override {
>>> + return Legalizer.get();
>>> + }
>>> + const RegisterBankInfo *getRegBankInfo() const override {
>>> + return RegBankInfo.get();
>>> + }
>>> };
>>>
>>> } // end anonymous namespace
>>> @@ -323,6 +337,11 @@ const SISubtarget *GCNTargetMachine::get
>>> SIGISelActualAccessor *GISel = new SIGISelActualAccessor();
>>> GISel->CallLoweringInfo.reset(
>>> new AMDGPUCallLowering(*I->getTargetLowering()));
>>> + GISel->Legalizer.reset(new AMDGPULegalizerInfo());
>>> +
>>> + GISel->RegBankInfo.reset(new AMDGPURegisterBankInfo(*I->getRegisterInfo()));
>>> + GISel->InstSelector.reset(new AMDGPUInstructionSelector(*I,
>>> + *static_cast<AMDGPURegisterBankInfo*>(GISel->RegBankInfo.get())));
>>> #endif
>>>
>>> I->setGISelAccessor(*GISel);
>>> @@ -623,16 +642,20 @@ bool GCNPassConfig::addIRTranslator() {
>>> }
>>>
>>> bool GCNPassConfig::addLegalizeMachineIR() {
>>> + addPass(new Legalizer());
>>> return false;
>>> }
>>>
>>> bool GCNPassConfig::addRegBankSelect() {
>>> + addPass(new RegBankSelect());
>>> return false;
>>> }
>>>
>>> bool GCNPassConfig::addGlobalInstructionSelect() {
>>> + addPass(new InstructionSelect());
>>> return false;
>>> }
>>> +
>>> #endif
>>>
>>> void GCNPassConfig::addPreRegAlloc() {
>>>
>>> Modified: llvm/trunk/lib/Target/AMDGPU/CMakeLists.txt
>>> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/CMakeLists.txt?rev=293551&r1=293550&r2=293551&view=diff
>>> ==============================================================================
>>> --- llvm/trunk/lib/Target/AMDGPU/CMakeLists.txt (original)
>>> +++ llvm/trunk/lib/Target/AMDGPU/CMakeLists.txt Mon Jan 30 15:56:46 2017
>>> @@ -12,11 +12,17 @@ tablegen(LLVM AMDGPUGenAsmWriter.inc -ge
>>> tablegen(LLVM AMDGPUGenAsmMatcher.inc -gen-asm-matcher)
>>> tablegen(LLVM AMDGPUGenDisassemblerTables.inc -gen-disassembler)
>>> tablegen(LLVM AMDGPUGenMCPseudoLowering.inc -gen-pseudo-lowering)
>>> +if(LLVM_BUILD_GLOBAL_ISEL)
>>> + tablegen(LLVM AMDGPUGenRegisterBank.inc -gen-register-bank)
>>> +endif()
>>> add_public_tablegen_target(AMDGPUCommonTableGen)
>>>
>>> # List of all GlobalISel files.
>>> set(GLOBAL_ISEL_FILES
>>> AMDGPUCallLowering.cpp
>>> + AMDGPUInstructionSelector.cpp
>>> + AMDGPULegalizerInfo.cpp
>>> + AMDGPURegisterBankInfo.cpp
>>> )
>>>
>>> # Add GlobalISel files to the dependencies if the user wants to build it.
>>>
>>> Modified: llvm/trunk/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
>>> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp?rev=293551&r1=293550&r2=293551&view=diff
>>> ==============================================================================
>>> --- llvm/trunk/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp (original)
>>> +++ llvm/trunk/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp Mon Jan 30 15:56:46 2017
>>> @@ -10,10 +10,10 @@
>>> #include "AMDGPU.h"
>>> #include "SIDefines.h"
>>> #include "llvm/CodeGen/MachineMemOperand.h"
>>> -#include "llvm/IR/LLVMContext.h"
>>> #include "llvm/IR/Constants.h"
>>> #include "llvm/IR/Function.h"
>>> #include "llvm/IR/GlobalValue.h"
>>> +#include "llvm/IR/LLVMContext.h"
>>> #include "llvm/MC/MCContext.h"
>>> #include "llvm/MC/MCInstrInfo.h"
>>> #include "llvm/MC/MCRegisterInfo.h"
>>>
>>> Added: llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-flat.mir
>>> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-flat.mir?rev=293551&view=auto
>>> ==============================================================================
>>> --- llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-flat.mir (added)
>>> +++ llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-flat.mir Mon Jan 30 15:56:46 2017
>>> @@ -0,0 +1,27 @@
>>> +# RUN: llc -march=amdgcn -mcpu=hawaii -run-pass=instruction-select -verify-machineinstrs -global-isel %s -o - | FileCheck %s -check-prefixes=GCN
>>> +# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=instruction-select -verify-machineinstrs -global-isel %s -o - | FileCheck %s -check-prefixes=GCN
>>> +
>>> +# REQUIRES: global-isel
>>> +
>>> +--- |
>>> + define void @global_addrspace(i32 addrspace(1)* %global0) { ret void }
>>> +...
>>> +---
>>> +
>>> +name: global_addrspace
>>> +legalized: true
>>> +regBankSelected: true
>>> +
>>> +# GCN: global_addrspace
>>> +# GCN: [[PTR:%[0-9]+]] = COPY %vgpr0_vgpr1
>>> +# GCN: FLAT_LOAD_DWORD [[PTR]], 0, 0, 0
>>> +
>>> +body: |
>>> + bb.0:
>>> + liveins: %vgpr0_vgpr1
>>> +
>>> + %0:vgpr(p1) = COPY %vgpr0_vgpr1
>>> + %1:vgpr(s32) = G_LOAD %0 :: (load 4 from %ir.global0)
>>> +
>>> +...
>>> +---
>>>
>>> Added: llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-smrd.mir
>>> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-smrd.mir?rev=293551&view=auto
>>> ==============================================================================
>>> --- llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-smrd.mir (added)
>>> +++ llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-smrd.mir Mon Jan 30 15:56:46 2017
>>> @@ -0,0 +1,143 @@
>>> +# RUN: llc -march=amdgcn -mcpu=tahiti -run-pass=instruction-select -verify-machineinstrs -global-isel %s -o - | FileCheck %s -check-prefixes=GCN,SI,SICI,SIVI
>>> +# RUN: llc -march=amdgcn -mcpu=hawaii -run-pass=instruction-select -verify-machineinstrs -global-isel %s -o - | FileCheck %s -check-prefixes=GCN,CI,SICI
>>> +# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=instruction-select -verify-machineinstrs -global-isel %s -o - | FileCheck %s -check-prefixes=GCN,VI,SIVI
>>> +
>>> +# REQUIRES: global-isel
>>> +
>>> +--- |
>>> + define void @smrd_imm(i32 addrspace(2)* %const0) { ret void }
>>> +...
>>> +---
>>> +
>>> +name: smrd_imm
>>> +legalized: true
>>> +regBankSelected: true
>>> +
>>> +# GCN: body:
>>> +# GCN: [[PTR:%[0-9]+]] = COPY %sgpr0_sgpr1
>>> +
>>> +# Immediate offset:
>>> +# SICI: S_LOAD_DWORD_IMM [[PTR]], 1, 0
>>> +# VI: S_LOAD_DWORD_IMM [[PTR]], 4, 0
>>> +
>>> +# Max immediate offset for SI
>>> +# SICI: S_LOAD_DWORD_IMM [[PTR]], 255, 0
>>> +# VI: S_LOAD_DWORD_IMM [[PTR]], 1020, 0
>>> +
>>> +# Immediate overflow for SI
>>> +# FIXME: The immediate gets selected twice, once into the
>>> +# S_LOAD_DWORD instruction and once just as a normal constat.
>>> +# SI: S_MOV_B32 1024
>>> +# SI: [[K1024:%[0-9]+]] = S_MOV_B32 1024
>>> +# SI: S_LOAD_DWORD_SGPR [[PTR]], [[K1024]], 0
>>> +# CI: S_LOAD_DWORD_IMM_ci [[PTR]], 256, 0
>>> +# VI: S_LOAD_DWORD_IMM [[PTR]], 1024, 0
>>> +
>>> +# Max immediate offset for VI
>>> +# SI: S_MOV_B32 1048572
>>> +# SI: [[K1048572:%[0-9]+]] = S_MOV_B32 1048572
>>> +# CI: S_LOAD_DWORD_IMM_ci [[PTR]], 262143
>>> +# VI: S_LOAD_DWORD_IMM [[PTR]], 1048572
>>> +
>>> +#
>>> +# Immediate overflow for VI
>>> +# FIXME: The immediate gets selected twice, once into the
>>> +# S_LOAD_DWORD instruction and once just as a normal constat.
>>> +# SIVI: S_MOV_B32 1048576
>>> +# SIVI: [[K1048576:%[0-9]+]] = S_MOV_B32 1048576
>>> +# SIVI: S_LOAD_DWORD_SGPR [[PTR]], [[K1048576]], 0
>>> +# CI: S_LOAD_DWORD_IMM_ci [[PTR]], 262144, 0
>>> +
>>> +# Max immediate for CI
>>> +# SIVI: [[K_LO:%[0-9]+]] = S_MOV_B32 4294967292
>>> +# SIVI: [[K_HI:%[0-9]+]] = S_MOV_B32 3
>>> +# SIVI: [[K:%[0-9]+]] = REG_SEQUENCE [[K_LO]], 1, [[K_HI]], 2
>>> +# SIVI: [[K_SUB0:%[0-9]+]] = COPY [[K]].sub0
>>> +# SIVI: [[PTR_LO:%[0-9]+]] = COPY [[PTR]].sub0
>>> +# SIVI: [[ADD_PTR_LO:%[0-9]+]] = S_ADD_U32 [[PTR_LO]], [[K_SUB0]]
>>> +# SIVI: [[K_SUB1:%[0-9]+]] = COPY [[K]].sub1
>>> +# SIVI: [[PTR_HI:%[0-9]+]] = COPY [[PTR]].sub1
>>> +# SIVI: [[ADD_PTR_HI:%[0-9]+]] = S_ADDC_U32 [[PTR_HI]], [[K_SUB1]]
>>> +# SIVI: [[ADD_PTR:%[0-9]+]] = REG_SEQUENCE [[ADD_PTR_LO]], 1, [[ADD_PTR_HI]], 2
>>> +# SIVI: S_LOAD_DWORD_IMM [[ADD_PTR]], 0, 0
>>> +# CI: S_LOAD_DWORD_IMM_ci [[PTR]], 4294967295, 0
>>> +
>>> +# Immediate overflow for CI
>>> +# GCN: [[K_LO:%[0-9]+]] = S_MOV_B32 0
>>> +# GCN: [[K_HI:%[0-9]+]] = S_MOV_B32 4
>>> +# GCN: [[K:%[0-9]+]] = REG_SEQUENCE [[K_LO]], 1, [[K_HI]], 2
>>> +# GCN: [[K_SUB0:%[0-9]+]] = COPY [[K]].sub0
>>> +# GCN: [[PTR_LO:%[0-9]+]] = COPY [[PTR]].sub0
>>> +# GCN: [[ADD_PTR_LO:%[0-9]+]] = S_ADD_U32 [[PTR_LO]], [[K_SUB0]]
>>> +# GCN: [[K_SUB1:%[0-9]+]] = COPY [[K]].sub1
>>> +# GCN: [[PTR_HI:%[0-9]+]] = COPY [[PTR]].sub1
>>> +# GCN: [[ADD_PTR_HI:%[0-9]+]] = S_ADDC_U32 [[PTR_HI]], [[K_SUB1]]
>>> +# GCN: [[ADD_PTR:%[0-9]+]] = REG_SEQUENCE [[ADD_PTR_LO]], 1, [[ADD_PTR_HI]], 2
>>> +# GCN: S_LOAD_DWORD_IMM [[ADD_PTR]], 0, 0
>>> +
>>> +# Max 32-bit byte offset
>>> +# FIXME: The immediate gets selected twice, once into the
>>> +# S_LOAD_DWORD instruction and once just as a normal constat.
>>> +# SIVI: S_MOV_B32 4294967292
>>> +# SIVI: [[K4294967292:%[0-9]+]] = S_MOV_B32 4294967292
>>> +# SIVI: S_LOAD_DWORD_SGPR [[PTR]], [[K4294967292]], 0
>>> +# CI: S_LOAD_DWORD_IMM_ci [[PTR]], 1073741823, 0
>>> +
>>> +# Overflow 32-bit byte offset
>>> +# SIVI: [[K_LO:%[0-9]+]] = S_MOV_B32 0
>>> +# SIVI: [[K_HI:%[0-9]+]] = S_MOV_B32 1
>>> +# SIVI: [[K:%[0-9]+]] = REG_SEQUENCE [[K_LO]], 1, [[K_HI]], 2
>>> +# SIVI: [[K_SUB0:%[0-9]+]] = COPY [[K]].sub0
>>> +# SIVI: [[PTR_LO:%[0-9]+]] = COPY [[PTR]].sub0
>>> +# SIVI: [[ADD_PTR_LO:%[0-9]+]] = S_ADD_U32 [[PTR_LO]], [[K_SUB0]]
>>> +# SIVI: [[K_SUB1:%[0-9]+]] = COPY [[K]].sub1
>>> +# SIVI: [[PTR_HI:%[0-9]+]] = COPY [[PTR]].sub1
>>> +# SIVI: [[ADD_PTR_HI:%[0-9]+]] = S_ADDC_U32 [[PTR_HI]], [[K_SUB1]]
>>> +# SIVI: [[ADD_PTR:%[0-9]+]] = REG_SEQUENCE [[ADD_PTR_LO]], 1, [[ADD_PTR_HI]], 2
>>> +# SIVI: S_LOAD_DWORD_IMM [[ADD_PTR]], 0, 0
>>> +# CI: S_LOAD_DWORD_IMM_ci [[PTR]], 1073741824, 0
>>> +
>>> +body: |
>>> + bb.0:
>>> + liveins: %sgpr0_sgpr1
>>> +
>>> + %0:sgpr(p2) = COPY %sgpr0_sgpr1
>>> +
>>> + %1:sgpr(s64) = G_CONSTANT i64 4
>>> + %2:sgpr(p2) = G_GEP %0, %1
>>> + %3:sgpr(s32) = G_LOAD %2 :: (load 4 from %ir.const0)
>>> +
>>> + %4:sgpr(s64) = G_CONSTANT i64 1020
>>> + %5:sgpr(p2) = G_GEP %0, %4
>>> + %6:sgpr(s32) = G_LOAD %5 :: (load 4 from %ir.const0)
>>> +
>>> + %7:sgpr(s64) = G_CONSTANT i64 1024
>>> + %8:sgpr(p2) = G_GEP %0, %7
>>> + %9:sgpr(s32) = G_LOAD %8 :: (load 4 from %ir.const0)
>>> +
>>> + %10:sgpr(s64) = G_CONSTANT i64 1048572
>>> + %11:sgpr(p2) = G_GEP %0, %10
>>> + %12:sgpr(s32) = G_LOAD %11 :: (load 4 from %ir.const0)
>>> +
>>> + %13:sgpr(s64) = G_CONSTANT i64 1048576
>>> + %14:sgpr(p2) = G_GEP %0, %13
>>> + %15:sgpr(s32) = G_LOAD %14 :: (load 4 from %ir.const0)
>>> +
>>> + %16:sgpr(s64) = G_CONSTANT i64 17179869180
>>> + %17:sgpr(p2) = G_GEP %0, %16
>>> + %18:sgpr(s32) = G_LOAD %17 :: (load 4 from %ir.const0)
>>> +
>>> + %19:sgpr(s64) = G_CONSTANT i64 17179869184
>>> + %20:sgpr(p2) = G_GEP %0, %19
>>> + %21:sgpr(s32) = G_LOAD %20 :: (load 4 from %ir.const0)
>>> +
>>> + %22:sgpr(s64) = G_CONSTANT i64 4294967292
>>> + %23:sgpr(p2) = G_GEP %0, %22
>>> + %24:sgpr(s32) = G_LOAD %23 :: (load 4 from %ir.const0)
>>> +
>>> + %25:sgpr(s64) = G_CONSTANT i64 4294967296
>>> + %26:sgpr(p2) = G_GEP %0, %25
>>> + %27:sgpr(s32) = G_LOAD %26 :: (load 4 from %ir.const0)
>>> +
>>> +...
>>> +---
>>>
>>> Added: llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/inst-select-store-flat.mir
>>> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/inst-select-store-flat.mir?rev=293551&view=auto
>>> ==============================================================================
>>> --- llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/inst-select-store-flat.mir (added)
>>> +++ llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/inst-select-store-flat.mir Mon Jan 30 15:56:46 2017
>>> @@ -0,0 +1,29 @@
>>> +# RUN: llc -march=amdgcn -mcpu=hawaii -run-pass=instruction-select -verify-machineinstrs -global-isel %s -o - | FileCheck %s -check-prefixes=GCN
>>> +# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=instruction-select -verify-machineinstrs -global-isel %s -o - | FileCheck %s -check-prefixes=GCN
>>> +
>>> +# REQUIRES: global-isel
>>> +
>>> +--- |
>>> + define void @global_addrspace(i32 addrspace(1)* %global0) { ret void }
>>> +...
>>> +---
>>> +
>>> +name: global_addrspace
>>> +legalized: true
>>> +regBankSelected: true
>>> +
>>> +# GCN: global_addrspace
>>> +# GCN: [[PTR:%[0-9]+]] = COPY %vgpr0_vgpr1
>>> +# GCN: [[VAL:%[0-9]+]] = COPY %vgpr2
>>> +# GCN: FLAT_STORE_DWORD [[PTR]], [[VAL]], 0, 0, 0
>>> +
>>> +body: |
>>> + bb.0:
>>> + liveins: %vgpr0_vgpr1, %vgpr2
>>> +
>>> + %0:vgpr(p1) = COPY %vgpr0_vgpr1
>>> + %1:vgpr(s32) = COPY %vgpr2
>>> + G_STORE %1, %0 :: (store 4 into %ir.global0)
>>> +
>>> +...
>>> +---
>>>
>>> Added: llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/regbankselect.mir
>>> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/regbankselect.mir?rev=293551&view=auto
>>> ==============================================================================
>>> --- llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/regbankselect.mir (added)
>>> +++ llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/regbankselect.mir Mon Jan 30 15:56:46 2017
>>> @@ -0,0 +1,69 @@
>>> +# RUN: llc -march=amdgcn -mcpu=hawaii -run-pass=regbankselect -global-isel %s -verify-machineinstrs -o - | FileCheck %s
>>> +
>>> +# REQUIRES: global-isel
>>> +
>>> +--- |
>>> + define void @load_constant(i32 addrspace(2)* %ptr0) { ret void }
>>> + define void @load_global_uniform(i32 addrspace(1)* %ptr1) {
>>> + %tmp0 = load i32, i32 addrspace(1)* %ptr1
>>> + ret void
>>> + }
>>> + define void @load_global_non_uniform(i32 addrspace(1)* %ptr2) {
>>> + %tmp0 = call i32 @llvm.amdgcn.workitem.id.x() #0
>>> + %tmp1 = getelementptr i32, i32 addrspace(1)* %ptr2, i32 %tmp0
>>> + %tmp2 = load i32, i32 addrspace(1)* %tmp1
>>> + ret void
>>> + }
>>> + declare i32 @llvm.amdgcn.workitem.id.x() #0
>>> + attributes #0 = { nounwind readnone }
>>> +...
>>> +
>>> +---
>>> +name : load_constant
>>> +legalized: true
>>> +
>>> +# CHECK-LABEL: name: load_constant
>>> +# CHECK: registers:
>>> +# CHECK: - { id: 0, class: sgpr }
>>> +# CHECK: - { id: 1, class: sgpr }
>>> +
>>> +body: |
>>> + bb.0:
>>> + liveins: %sgpr0_sgpr1
>>> + %0:_(p2) = COPY %sgpr0_sgpr1
>>> + %1:_(s32) = G_LOAD %0 :: (load 4 from %ir.ptr0)
>>> +...
>>> +
>>> +---
>>> +name: load_global_uniform
>>> +legalized: true
>>> +
>>> +# CHECK-LABEL: name: load_global_uniform
>>> +# CHECK: registers:
>>> +# CHECK: - { id: 0, class: sgpr }
>>> +# CHECK: - { id: 1, class: sgpr }
>>> +
>>> +body: |
>>> + bb.0:
>>> + liveins: %sgpr0_sgpr1
>>> + %0:_(p1) = COPY %sgpr0_sgpr1
>>> + %1:_(s32) = G_LOAD %0 :: (load 4 from %ir.ptr1)
>>> +...
>>> +
>>> +---
>>> +name: load_global_non_uniform
>>> +legalized: true
>>> +
>>> +# CHECK-LABEL: name: load_global_non_uniform
>>> +# CHECK: registers:
>>> +# CHECK: - { id: 0, class: sgpr }
>>> +# CHECK: - { id: 1, class: vgpr }
>>> +# CHECK: - { id: 2, class: vgpr }
>>> +
>>> +
>>> +body: |
>>> + bb.0:
>>> + liveins: %sgpr0_sgpr1
>>> + %0:_(p1) = COPY %sgpr0_sgpr1
>>> + %1:_(s32) = G_LOAD %0 :: (load 4 from %ir.tmp1)
>>> +...
>>>
>>> Added: llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/shader-epilogs.ll
>>> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/shader-epilogs.ll?rev=293551&view=auto
>>> ==============================================================================
>>> --- llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/shader-epilogs.ll (added)
>>> +++ llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/shader-epilogs.ll Mon Jan 30 15:56:46 2017
>>> @@ -0,0 +1,11 @@
>>> +; RUN: llc < %s -march=amdgcn -mcpu=tonga -show-mc-encoding -verify-machineinstrs -global-isel | FileCheck --check-prefix=GCN %s
>>> +
>>> +; REQUIRES: global-isel
>>> +
>>> +; GCN-LABEL: vs_epilog
>>> +; GCN: s_endpgm
>>> +
>>> +define amdgpu_vs void @vs_epilog() {
>>> +main_body:
>>> + ret void
>>> +}
>>>
>>> Added: llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/smrd.ll
>>> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/smrd.ll?rev=293551&view=auto
>>> ==============================================================================
>>> --- llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/smrd.ll (added)
>>> +++ llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/smrd.ll Mon Jan 30 15:56:46 2017
>>> @@ -0,0 +1,89 @@
>>> +; FIXME: Need to add support for mubuf stores to enable this on SI.
>>> +; XUN: llc < %s -march=amdgcn -mcpu=SI -show-mc-encoding -verify-machineinstrs -global-isel | FileCheck --check-prefix=SI --check-prefix=GCN --check-prefix=SIVI %s
>>> +; RUN: llc < %s -march=amdgcn -mcpu=bonaire -show-mc-encoding -verify-machineinstrs -global-isel | FileCheck --check-prefix=CI --check-prefix=GCN %s
>>> +; RUN: llc < %s -march=amdgcn -mcpu=tonga -show-mc-encoding -verify-machineinstrs -global-isel | FileCheck --check-prefix=VI --check-prefix=GCN --check-prefix=SIVI %s
>>> +
>>> +; REQUIRES: global-isel
>>> +
>>> +; SMRD load with an immediate offset.
>>> +; GCN-LABEL: {{^}}smrd0:
>>> +; SICI: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x1 ; encoding: [0x01
>>> +; VI: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x4
>>> +define void @smrd0(i32 addrspace(1)* %out, i32 addrspace(2)* %ptr) {
>>> +entry:
>>> + %0 = getelementptr i32, i32 addrspace(2)* %ptr, i64 1
>>> + %1 = load i32, i32 addrspace(2)* %0
>>> + store i32 %1, i32 addrspace(1)* %out
>>> + ret void
>>> +}
>>> +
>>> +; SMRD load with the largest possible immediate offset.
>>> +; GCN-LABEL: {{^}}smrd1:
>>> +; SICI: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0xff ; encoding: [0xff,0x{{[0-9]+[137]}}
>>> +; VI: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x3fc
>>> +define void @smrd1(i32 addrspace(1)* %out, i32 addrspace(2)* %ptr) {
>>> +entry:
>>> + %0 = getelementptr i32, i32 addrspace(2)* %ptr, i64 255
>>> + %1 = load i32, i32 addrspace(2)* %0
>>> + store i32 %1, i32 addrspace(1)* %out
>>> + ret void
>>> +}
>>> +
>>> +; SMRD load with an offset greater than the largest possible immediate.
>>> +; GCN-LABEL: {{^}}smrd2:
>>> +; SI: s_movk_i32 s[[OFFSET:[0-9]]], 0x400
>>> +; SI: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], s[[OFFSET]] ; encoding: [0x0[[OFFSET]]
>>> +; CI: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x100
>>> +; VI: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x400
>>> +; GCN: s_endpgm
>>> +define void @smrd2(i32 addrspace(1)* %out, i32 addrspace(2)* %ptr) {
>>> +entry:
>>> + %0 = getelementptr i32, i32 addrspace(2)* %ptr, i64 256
>>> + %1 = load i32, i32 addrspace(2)* %0
>>> + store i32 %1, i32 addrspace(1)* %out
>>> + ret void
>>> +}
>>> +
>>> +; SMRD load with a 64-bit offset
>>> +; GCN-LABEL: {{^}}smrd3:
>>> +; FIXME: There are too many copies here because we don't fold immediates
>>> +; through REG_SEQUENCE
>>> +; XSI: s_load_dwordx2 s[{{[0-9]:[0-9]}}], s[{{[0-9]:[0-9]}}], 0xb ; encoding: [0x0b
>>> +; TODO: Add VI checks
>>> +; XGCN: s_endpgm
>>> +define void @smrd3(i32 addrspace(1)* %out, i32 addrspace(2)* %ptr) {
>>> +entry:
>>> + %0 = getelementptr i32, i32 addrspace(2)* %ptr, i64 4294967296 ; 2 ^ 32
>>> + %1 = load i32, i32 addrspace(2)* %0
>>> + store i32 %1, i32 addrspace(1)* %out
>>> + ret void
>>> +}
>>> +
>>> +; SMRD load with the largest possible immediate offset on VI
>>> +; GCN-LABEL: {{^}}smrd4:
>>> +; SI: s_mov_b32 [[OFFSET:s[0-9]+]], 0xffffc
>>> +; SI: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], [[OFFSET]]
>>> +; CI: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x3ffff
>>> +; VI: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0xffffc
>>> +define void @smrd4(i32 addrspace(1)* %out, i32 addrspace(2)* %ptr) {
>>> +entry:
>>> + %0 = getelementptr i32, i32 addrspace(2)* %ptr, i64 262143
>>> + %1 = load i32, i32 addrspace(2)* %0
>>> + store i32 %1, i32 addrspace(1)* %out
>>> + ret void
>>> +}
>>> +
>>> +; SMRD load with an offset greater than the largest possible immediate on VI
>>> +; GCN-LABEL: {{^}}smrd5:
>>> +; SIVI: s_mov_b32 [[OFFSET:s[0-9]+]], 0x100000
>>> +; SIVI: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], [[OFFSET]]
>>> +; CI: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x40000
>>> +; GCN: s_endpgm
>>> +define void @smrd5(i32 addrspace(1)* %out, i32 addrspace(2)* %ptr) {
>>> +entry:
>>> + %0 = getelementptr i32, i32 addrspace(2)* %ptr, i64 262144
>>> + %1 = load i32, i32 addrspace(2)* %0
>>> + store i32 %1, i32 addrspace(1)* %out
>>> + ret void
>>> +}
>>> +
>>>
>>>
>>> _______________________________________________
>>> llvm-commits mailing list
>>> llvm-commits at lists.llvm.org
>>> http://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-commits
>>>
>> _______________________________________________
>> llvm-commits mailing list
>> llvm-commits at lists.llvm.org
>> http://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-commits
-------------- next part --------------
--- |
; ModuleID = '/data/repo/llvm-patch/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-smrd.mir'
source_filename = "/data/repo/llvm-patch/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-smrd.mir"
target datalayout = "e-p:32:32-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64"
define void @smrd_imm(i32 addrspace(2)* %const0) #0 {
ret void
}
attributes #0 = { "target-cpu"="tahiti" }
...
---
name: smrd_imm
alignment: 0
exposesReturnsTwice: false
legalized: true
regBankSelected: true
selected: true
tracksRegLiveness: false
registers:
- { id: 0, class: sreg_64 }
- { id: 1, class: sreg_64 }
- { id: 2, class: sreg_64 }
- { id: 3, class: sreg_32_xm0_xexec }
- { id: 4, class: sreg_64 }
- { id: 5, class: sreg_64 }
- { id: 6, class: sreg_32_xm0_xexec }
- { id: 7, class: sreg_64 }
- { id: 8, class: sreg_64 }
- { id: 9, class: sreg_32_xm0_xexec }
- { id: 10, class: sreg_64 }
- { id: 11, class: sreg_64 }
- { id: 12, class: sreg_32_xm0_xexec }
- { id: 13, class: sreg_64 }
- { id: 14, class: sreg_64 }
- { id: 15, class: sreg_32_xm0_xexec }
- { id: 16, class: sreg_64 }
- { id: 17, class: sreg_64 }
- { id: 18, class: sreg_32_xm0_xexec }
- { id: 19, class: sreg_64 }
- { id: 20, class: sreg_64 }
- { id: 21, class: sreg_32_xm0_xexec }
- { id: 22, class: sreg_64 }
- { id: 23, class: sreg_64 }
- { id: 24, class: sreg_32_xm0_xexec }
- { id: 25, class: sreg_64 }
- { id: 26, class: sreg_64 }
- { id: 27, class: sreg_32_xm0_xexec }
- { id: 28, class: sreg_32 }
- { id: 29, class: sreg_32 }
- { id: 30, class: sgpr_32 }
- { id: 31, class: sgpr_32 }
- { id: 32, class: sgpr_32 }
- { id: 33, class: sgpr_32 }
- { id: 34, class: sreg_32 }
- { id: 35, class: sreg_32 }
- { id: 36, class: sreg_32 }
- { id: 37, class: sreg_32 }
- { id: 38, class: sreg_32 }
- { id: 39, class: sgpr_32 }
- { id: 40, class: sgpr_32 }
- { id: 41, class: sgpr_32 }
- { id: 42, class: sgpr_32 }
- { id: 43, class: sreg_32 }
- { id: 44, class: sreg_32 }
- { id: 45, class: sreg_32 }
- { id: 46, class: sreg_32 }
- { id: 47, class: sgpr_32 }
- { id: 48, class: sgpr_32 }
- { id: 49, class: sgpr_32 }
- { id: 50, class: sgpr_32 }
- { id: 51, class: sreg_32 }
- { id: 52, class: sreg_32 }
- { id: 53, class: sreg_32 }
- { id: 54, class: sreg_32 }
- { id: 55, class: sgpr_32 }
- { id: 56, class: sgpr_32 }
- { id: 57, class: sgpr_32 }
- { id: 58, class: sgpr_32 }
- { id: 59, class: sreg_32 }
- { id: 60, class: sreg_32 }
- { id: 61, class: sreg_32 }
- { id: 62, class: sreg_32 }
- { id: 63, class: sreg_32 }
- { id: 64, class: sgpr_32 }
- { id: 65, class: sgpr_32 }
- { id: 66, class: sgpr_32 }
- { id: 67, class: sgpr_32 }
- { id: 68, class: sreg_32 }
- { id: 69, class: sreg_32 }
- { id: 70, class: sreg_32 }
- { id: 71, class: sreg_32 }
- { id: 72, class: sreg_32 }
- { id: 73, class: sgpr_32 }
- { id: 74, class: sgpr_32 }
- { id: 75, class: sgpr_32 }
- { id: 76, class: sgpr_32 }
- { id: 77, class: sreg_32 }
- { id: 78, class: sreg_32 }
- { id: 79, class: sreg_32 }
- { id: 80, class: sreg_32 }
- { id: 81, class: sreg_32 }
- { id: 82, class: sgpr_32 }
- { id: 83, class: sgpr_32 }
- { id: 84, class: sgpr_32 }
- { id: 85, class: sgpr_32 }
- { id: 86, class: sreg_32 }
- { id: 87, class: sreg_32 }
- { id: 88, class: sreg_32 }
- { id: 89, class: sreg_32 }
- { id: 90, class: sgpr_32 }
- { id: 91, class: sgpr_32 }
- { id: 92, class: sgpr_32 }
- { id: 93, class: sgpr_32 }
- { id: 94, class: sreg_32 }
- { id: 95, class: sreg_32 }
- { id: 96, class: sreg_32 }
- { id: 97, class: sreg_32 }
- { id: 98, class: sgpr_32 }
- { id: 99, class: sgpr_32 }
- { id: 100, class: sgpr_32 }
- { id: 101, class: sgpr_32 }
- { id: 102, class: sreg_32 }
- { id: 103, class: sreg_32 }
frameInfo:
isFrameAddressTaken: false
isReturnAddressTaken: false
hasStackMap: false
hasPatchPoint: false
stackSize: 0
offsetAdjustment: 0
maxAlignment: 0
adjustsStack: false
hasCalls: false
maxCallFrameSize: 0
hasOpaqueSPAdjustment: false
hasVAStart: false
hasMustTailInVarArgFunc: false
body: |
bb.0:
%0 = COPY %sgpr0_sgpr1
%102 = S_MOV_B32 4
%103 = S_MOV_B32 0
%1 = REG_SEQUENCE %102, 1, %103, 2
%96 = S_ADD_U32 %98, %99, implicit-def %scc
%98 = COPY %0.sub0
%99 = COPY %1.sub0
%97 = S_ADDC_U32 %100, %101, implicit-def %scc, implicit %scc
%100 = COPY %0.sub1
%101 = COPY %1.sub1
%2 = REG_SEQUENCE %96, 1, %97, 2
%3 = S_LOAD_DWORD_IMM %0, 1, 0
%94 = S_MOV_B32 1020
%95 = S_MOV_B32 0
%4 = REG_SEQUENCE %94, 1, %95, 2
%88 = S_ADD_U32 %90, %91, implicit-def %scc
%90 = COPY %0.sub0
%91 = COPY %4.sub0
%89 = S_ADDC_U32 %92, %93, implicit-def %scc, implicit %scc
%92 = COPY %0.sub1
%93 = COPY %4.sub1
%5 = REG_SEQUENCE %88, 1, %89, 2
%6 = S_LOAD_DWORD_IMM %0, 255, 0
%86 = S_MOV_B32 1024
%87 = S_MOV_B32 0
%7 = REG_SEQUENCE %86, 1, %87, 2
%80 = S_ADD_U32 %82, %83, implicit-def %scc
%82 = COPY %0.sub0
%83 = COPY %7.sub0
%81 = S_ADDC_U32 %84, %85, implicit-def %scc, implicit %scc
%84 = COPY %0.sub1
%85 = COPY %7.sub1
%8 = REG_SEQUENCE %80, 1, %81, 2
%79 = S_MOV_B32 1024
%9 = S_LOAD_DWORD_SGPR %0, %79, 0
%77 = S_MOV_B32 1048572
%78 = S_MOV_B32 0
%10 = REG_SEQUENCE %77, 1, %78, 2
%71 = S_ADD_U32 %73, %74, implicit-def %scc
%73 = COPY %0.sub0
%74 = COPY %10.sub0
%72 = S_ADDC_U32 %75, %76, implicit-def %scc, implicit %scc
%75 = COPY %0.sub1
%76 = COPY %10.sub1
%11 = REG_SEQUENCE %71, 1, %72, 2
%70 = S_MOV_B32 1048572
%12 = S_LOAD_DWORD_SGPR %0, %70, 0
%68 = S_MOV_B32 1048576
%69 = S_MOV_B32 0
%13 = REG_SEQUENCE %68, 1, %69, 2
%62 = S_ADD_U32 %64, %65, implicit-def %scc
%64 = COPY %0.sub0
%65 = COPY %13.sub0
%63 = S_ADDC_U32 %66, %67, implicit-def %scc, implicit %scc
%66 = COPY %0.sub1
%67 = COPY %13.sub1
%14 = REG_SEQUENCE %62, 1, %63, 2
%61 = S_MOV_B32 1048576
%15 = S_LOAD_DWORD_SGPR %0, %61, 0
%59 = S_MOV_B32 4294967292
%60 = S_MOV_B32 3
%16 = REG_SEQUENCE %59, 1, %60, 2
%53 = S_ADD_U32 %55, %56, implicit-def %scc
%55 = COPY %0.sub0
%56 = COPY %16.sub0
%54 = S_ADDC_U32 %57, %58, implicit-def %scc, implicit %scc
%57 = COPY %0.sub1
%58 = COPY %16.sub1
%17 = REG_SEQUENCE %53, 1, %54, 2
%18 = S_LOAD_DWORD_IMM %17, 0, 0
%51 = S_MOV_B32 0
%52 = S_MOV_B32 4
%19 = REG_SEQUENCE %51, 1, %52, 2
%45 = S_ADD_U32 %47, %48, implicit-def %scc
%47 = COPY %0.sub0
%48 = COPY %19.sub0
%46 = S_ADDC_U32 %49, %50, implicit-def %scc, implicit %scc
%49 = COPY %0.sub1
%50 = COPY %19.sub1
%20 = REG_SEQUENCE %45, 1, %46, 2
%21 = S_LOAD_DWORD_IMM %20, 0, 0
%43 = S_MOV_B32 4294967292
%44 = S_MOV_B32 0
%22 = REG_SEQUENCE %43, 1, %44, 2
%37 = S_ADD_U32 %39, %40, implicit-def %scc
%39 = COPY %0.sub0
%40 = COPY %22.sub0
%38 = S_ADDC_U32 %41, %42, implicit-def %scc, implicit %scc
%41 = COPY %0.sub1
%42 = COPY %22.sub1
%23 = REG_SEQUENCE %37, 1, %38, 2
%36 = S_MOV_B32 4294967292
%24 = S_LOAD_DWORD_SGPR %0, %36, 0
%34 = S_MOV_B32 0
%35 = S_MOV_B32 1
%25 = REG_SEQUENCE %34, 1, %35, 2
%28 = S_ADD_U32 %30, %31, implicit-def %scc
%30 = COPY %0.sub0
%31 = COPY %25.sub0
%29 = S_ADDC_U32 %32, %33, implicit-def %scc, implicit %scc
%32 = COPY %0.sub1
%33 = COPY %25.sub1
%26 = REG_SEQUENCE %28, 1, %29, 2
%27 = S_LOAD_DWORD_IMM %26, 0, 0
...
-------------- next part --------------
A non-text attachment was scrubbed...
Name: foo.log
Type: text/x-log
Size: 43218 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20170131/f0c09cbb/attachment.bin>
More information about the llvm-commits
mailing list