[llvm] r363299 - [AMDGPU] gfx1010 base changes for wave32
Kostya Serebryany via llvm-commits
llvm-commits at lists.llvm.org
Thu Jun 13 13:17:02 PDT 2019
This breaks the build for me:
ninja: Entering directory `/usr/local/google/home/kcc/llvm-build'
[7/7] Building
AMDGPUGenDAGISel.inc...
FAILED: lib/Target/AMDGPU/AMDGPUGenDAGISel.inc
cd /usr/local/google/home/kcc/llvm-build &&
/usr/local/google/home/kcc/llvm-build/bin/llvm-tblgen -gen-dag-isel -I
/usr/local/google/home/kcc/llvm/llvm/lib/Target/AMDGPU -I
/usr/local/google/home/kcc/llvm/llvm/include -I
/usr/local/google/home/kcc/llvm/llvm/lib/Target
/usr/local/google/home/kcc/llvm/llvm/lib/Target/AMDGPU/AMDGPU.td -o
lib/Target/AMDGPU/AMDGPUGenDAGISel.inc -d
lib/Target/AMDGPU/AMDGPUGenDAGISel.inc.d
Type set is empty for each HW mode:
possible type contradiction in the pattern below (use -print-records with
llvm-tblgen to see all expanded records).
anonymous_3351: (AMDGPUsetcc:{ *:[] } i32:{ *:[i32] }:$src0, i32:{
*:[i32] }:$src1, (cond:{ *:[Other] })<<P:Predicate_COND_EQ>>)
UNREACHABLE executed at
/usr/local/google/home/kcc/llvm/llvm/utils/TableGen/CodeGenDAGPatterns.cpp:821!
On Thu, Jun 13, 2019 at 12:15 PM Stanislav Mekhanoshin via llvm-commits <
llvm-commits at lists.llvm.org> wrote:
> Author: rampitec
> Date: Thu Jun 13 12:18:29 2019
> New Revision: 363299
>
> URL: http://llvm.org/viewvc/llvm-project?rev=363299&view=rev
> Log:
> [AMDGPU] gfx1010 base changes for wave32
>
> Differential Revision: https://reviews.llvm.org/D63293
>
> Modified:
> llvm/trunk/lib/Target/AMDGPU/AMDGPUInstructions.td
> llvm/trunk/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
> llvm/trunk/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
> llvm/trunk/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp
> llvm/trunk/lib/Target/AMDGPU/MCTargetDesc/SIMCCodeEmitter.cpp
> llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.td
> llvm/trunk/lib/Target/AMDGPU/SIInstructions.td
> llvm/trunk/lib/Target/AMDGPU/SOPInstructions.td
> llvm/trunk/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
> llvm/trunk/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
> llvm/trunk/lib/Target/AMDGPU/VOP2Instructions.td
> llvm/trunk/lib/Target/AMDGPU/VOPCInstructions.td
>
> Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPUInstructions.td
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPUInstructions.td?rev=363299&r1=363298&r2=363299&view=diff
>
> ==============================================================================
> --- llvm/trunk/lib/Target/AMDGPU/AMDGPUInstructions.td (original)
> +++ llvm/trunk/lib/Target/AMDGPU/AMDGPUInstructions.td Thu Jun 13 12:18:29
> 2019
> @@ -69,9 +69,11 @@ class PredicateControl {
> Predicate SubtargetPredicate = TruePredicate;
> list<Predicate> AssemblerPredicates = [];
> Predicate AssemblerPredicate = TruePredicate;
> + Predicate WaveSizePredicate = TruePredicate;
> list<Predicate> OtherPredicates = [];
> list<Predicate> Predicates = !listconcat([SubtargetPredicate,
> - AssemblerPredicate],
> + AssemblerPredicate,
> + WaveSizePredicate],
> AssemblerPredicates,
> OtherPredicates);
> }
>
> Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPUSubtarget.cpp?rev=363299&r1=363298&r2=363299&view=diff
>
> ==============================================================================
> --- llvm/trunk/lib/Target/AMDGPU/AMDGPUSubtarget.cpp (original)
> +++ llvm/trunk/lib/Target/AMDGPU/AMDGPUSubtarget.cpp Thu Jun 13 12:18:29
> 2019
> @@ -94,6 +94,16 @@ GCNSubtarget::initializeSubtargetDepende
>
> FullFS += "+enable-prt-strict-null,"; // This is overridden by a
> disable in FS
>
> + // Disable mutually exclusive bits.
> + if (FS.find_lower("+wavefrontsize") != StringRef::npos) {
> + if (FS.find_lower("wavefrontsize16") == StringRef::npos)
> + FullFS += "-wavefrontsize16,";
> + if (FS.find_lower("wavefrontsize32") == StringRef::npos)
> + FullFS += "-wavefrontsize32,";
> + if (FS.find_lower("wavefrontsize64") == StringRef::npos)
> + FullFS += "-wavefrontsize64,";
> + }
> +
> FullFS += FS;
>
> ParseSubtargetFeatures(GPU, FullFS);
>
> Modified: llvm/trunk/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp?rev=363299&r1=363298&r2=363299&view=diff
>
> ==============================================================================
> --- llvm/trunk/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp (original)
> +++ llvm/trunk/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp Thu Jun 13
> 12:18:29 2019
> @@ -375,6 +375,8 @@ public:
> return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::i64);
> }
>
> + bool isBoolReg() const;
> +
> bool isSCSrcF16() const {
> return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f16);
> }
> @@ -616,6 +618,10 @@ public:
>
> void addRegOperands(MCInst &Inst, unsigned N) const;
>
> + void addBoolRegOperands(MCInst &Inst, unsigned N) const {
> + addRegOperands(Inst, N);
> + }
> +
> void addRegOrImmOperands(MCInst &Inst, unsigned N) const {
> if (isRegKind())
> addRegOperands(Inst, N);
> @@ -881,6 +887,8 @@ private:
> /// \param VCCUsed [in] Whether VCC special SGPR is reserved.
> /// \param FlatScrUsed [in] Whether FLAT_SCRATCH special SGPR is
> reserved.
> /// \param XNACKUsed [in] Whether XNACK_MASK special SGPR is reserved.
> + /// \param EnableWavefrontSize32 [in] Value of ENABLE_WAVEFRONT_SIZE32
> kernel
> + /// descriptor field, if valid.
> /// \param NextFreeVGPR [in] Max VGPR number referenced, plus one.
> /// \param VGPRRange [in] Token range, used for VGPR diagnostics.
> /// \param NextFreeSGPR [in] Max SGPR number referenced, plus one.
> @@ -889,9 +897,10 @@ private:
> /// \param SGPRBlocks [out] Result SGPR block count.
> bool calculateGPRBlocks(const FeatureBitset &Features, bool VCCUsed,
> bool FlatScrUsed, bool XNACKUsed,
> - unsigned NextFreeVGPR, SMRange VGPRRange,
> - unsigned NextFreeSGPR, SMRange SGPRRange,
> - unsigned &VGPRBlocks, unsigned &SGPRBlocks);
> + Optional<bool> EnableWavefrontSize32, unsigned
> NextFreeVGPR,
> + SMRange VGPRRange, unsigned NextFreeSGPR,
> + SMRange SGPRRange, unsigned &VGPRBlocks,
> + unsigned &SGPRBlocks);
> bool ParseDirectiveAMDGCNTarget();
> bool ParseDirectiveAMDHSAKernel();
> bool ParseDirectiveMajorMinor(uint32_t &Major, uint32_t &Minor);
> @@ -1159,6 +1168,7 @@ private:
> bool validateMIMGDim(const MCInst &Inst);
> bool validateLdsDirect(const MCInst &Inst);
> bool validateOpSel(const MCInst &Inst);
> + bool validateVccOperand(unsigned Reg) const;
> bool validateVOP3Literal(const MCInst &Inst) const;
> bool usesConstantBus(const MCInst &Inst, unsigned OpIdx);
> bool isInlineConstant(const MCInst &Inst, unsigned OpIdx) const;
> @@ -1190,6 +1200,7 @@ public:
> OperandMatchResultTy parseInterpSlot(OperandVector &Operands);
> OperandMatchResultTy parseInterpAttr(OperandVector &Operands);
> OperandMatchResultTy parseSOppBrTarget(OperandVector &Operands);
> + OperandMatchResultTy parseBoolReg(OperandVector &Operands);
>
> bool parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
> const unsigned MinVal,
> @@ -1479,6 +1490,11 @@ bool AMDGPUOperand::isSDWAInt32Operand()
> return isSDWAOperand(MVT::i32);
> }
>
> +bool AMDGPUOperand::isBoolReg() const {
> + return AsmParser->getFeatureBits()[AMDGPU::FeatureWavefrontSize64] ?
> + isSCSrcB64() : isSCSrcB32();
> +}
> +
> uint64_t AMDGPUOperand::applyInputFPModifiers(uint64_t Val, unsigned
> Size) const
> {
> assert(isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
> @@ -3030,6 +3046,13 @@ bool AMDGPUAsmParser::validateOpSel(cons
> return true;
> }
>
> +// Check if VCC register matches wavefront size
> +bool AMDGPUAsmParser::validateVccOperand(unsigned Reg) const {
> + auto FB = getFeatureBits();
> + return (FB[AMDGPU::FeatureWavefrontSize64] && Reg == AMDGPU::VCC) ||
> + (FB[AMDGPU::FeatureWavefrontSize32] && Reg == AMDGPU::VCC_LO);
> +}
> +
> // VOP3 literal is only allowed in GFX10+ and only one can be used
> bool AMDGPUAsmParser::validateVOP3Literal(const MCInst &Inst) const {
> unsigned Opcode = Inst.getOpcode();
> @@ -3267,9 +3290,9 @@ bool AMDGPUAsmParser::OutOfRangeError(SM
>
> bool AMDGPUAsmParser::calculateGPRBlocks(
> const FeatureBitset &Features, bool VCCUsed, bool FlatScrUsed,
> - bool XNACKUsed, unsigned NextFreeVGPR, SMRange VGPRRange,
> - unsigned NextFreeSGPR, SMRange SGPRRange, unsigned &VGPRBlocks,
> - unsigned &SGPRBlocks) {
> + bool XNACKUsed, Optional<bool> EnableWavefrontSize32, unsigned
> NextFreeVGPR,
> + SMRange VGPRRange, unsigned NextFreeSGPR, SMRange SGPRRange,
> + unsigned &VGPRBlocks, unsigned &SGPRBlocks) {
> // TODO(scott.linder): These calculations are duplicated from
> // AMDGPUAsmPrinter::getSIProgramInfo and could be unified.
> IsaVersion Version = getIsaVersion(getSTI().getCPU());
> @@ -3298,7 +3321,8 @@ bool AMDGPUAsmParser::calculateGPRBlocks
> NumSGPRs = IsaInfo::FIXED_NUM_SGPRS_FOR_INIT_BUG;
> }
>
> - VGPRBlocks = IsaInfo::getNumVGPRBlocks(&getSTI(), NumVGPRs);
> + VGPRBlocks =
> + IsaInfo::getNumVGPRBlocks(&getSTI(), NumVGPRs,
> EnableWavefrontSize32);
> SGPRBlocks = IsaInfo::getNumSGPRBlocks(&getSTI(), NumSGPRs);
>
> return false;
> @@ -3329,6 +3353,7 @@ bool AMDGPUAsmParser::ParseDirectiveAMDH
> bool ReserveVCC = true;
> bool ReserveFlatScr = true;
> bool ReserveXNACK = hasXNACK();
> + Optional<bool> EnableWavefrontSize32;
>
> while (true) {
> while (getLexer().is(AsmToken::EndOfStatement))
> @@ -3547,8 +3572,9 @@ bool AMDGPUAsmParser::ParseDirectiveAMDH
> unsigned VGPRBlocks;
> unsigned SGPRBlocks;
> if (calculateGPRBlocks(getFeatureBits(), ReserveVCC, ReserveFlatScr,
> - ReserveXNACK, NextFreeVGPR, VGPRRange,
> NextFreeSGPR,
> - SGPRRange, VGPRBlocks, SGPRBlocks))
> + ReserveXNACK, EnableWavefrontSize32,
> NextFreeVGPR,
> + VGPRRange, NextFreeSGPR, SGPRRange, VGPRBlocks,
> + SGPRBlocks))
> return true;
>
> if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_WIDTH>(
> @@ -5384,6 +5410,15 @@ AMDGPUAsmParser::parseSOppBrTarget(Opera
> }
>
>
> //===----------------------------------------------------------------------===//
> +// Boolean holding registers
>
> +//===----------------------------------------------------------------------===//
> +
> +OperandMatchResultTy
> +AMDGPUAsmParser::parseBoolReg(OperandVector &Operands) {
> + return parseReg(Operands);
> +}
> +
>
> +//===----------------------------------------------------------------------===//
> // mubuf
>
> //===----------------------------------------------------------------------===//
>
> @@ -6294,7 +6329,7 @@ void AMDGPUAsmParser::cvtDPP(MCInst &Ins
> }
> AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
> // Add the register arguments
> - if (Op.isReg() && Op.getReg() == AMDGPU::VCC) {
> + if (Op.isReg() && validateVccOperand(Op.getReg())) {
> // VOP2b (v_add_u32, v_sub_u32 ...) dpp use "vcc" token.
> // Skip it.
> continue;
> @@ -6437,7 +6472,8 @@ void AMDGPUAsmParser::cvtSDWA(MCInst &In
>
> for (unsigned E = Operands.size(); I != E; ++I) {
> AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
> - if (skipVcc && !skippedVcc && Op.isReg() && Op.getReg() ==
> AMDGPU::VCC) {
> + if (skipVcc && !skippedVcc && Op.isReg() &&
> + (Op.getReg() == AMDGPU::VCC || Op.getReg() == AMDGPU::VCC_LO)) {
> // VOP2b (v_add_u32, v_sub_u32 ...) sdwa use "vcc" token as dst.
> // Skip it if it's 2nd (e.g. v_add_i32_sdwa v1, vcc, v2, v3)
> // or 4th (v_addc_u32_sdwa v1, vcc, v2, v3, vcc) operand.
>
> Modified: llvm/trunk/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp?rev=363299&r1=363298&r2=363299&view=diff
>
> ==============================================================================
> --- llvm/trunk/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp
> (original)
> +++ llvm/trunk/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp Thu
> Jun 13 12:18:29 2019
> @@ -442,6 +442,7 @@ void AMDGPUInstPrinter::printVOPDst(cons
>
> printOperand(MI, OpNo, STI, O);
>
> + // Print default vcc/vcc_lo operand.
> switch (MI->getOpcode()) {
> default: break;
>
> @@ -589,7 +590,8 @@ void AMDGPUInstPrinter::printDefaultVccO
> raw_ostream &O) {
> if (OpNo > 0)
> O << ", ";
> - printRegOperand(AMDGPU::VCC, O, MRI);
> + printRegOperand(STI.getFeatureBits()[AMDGPU::FeatureWavefrontSize64] ?
> + AMDGPU::VCC : AMDGPU::VCC_LO, O, MRI);
> if (OpNo == 0)
> O << ", ";
> }
> @@ -597,6 +599,7 @@ void AMDGPUInstPrinter::printDefaultVccO
> void AMDGPUInstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
> const MCSubtargetInfo &STI,
> raw_ostream &O) {
> + // Print default vcc/vcc_lo operand of VOPC.
> const MCInstrDesc &Desc = MII.get(MI->getOpcode());
> if (OpNo == 0 && (Desc.TSFlags & SIInstrFlags::VOPC) &&
> (Desc.hasImplicitDefOfPhysReg(AMDGPU::VCC) ||
> @@ -680,6 +683,7 @@ void AMDGPUInstPrinter::printOperand(con
> O << "/*INV_OP*/";
> }
>
> + // Print default vcc/vcc_lo operand of v_cndmask_b32_e32.
> switch (MI->getOpcode()) {
> default: break;
>
> @@ -749,6 +753,7 @@ void AMDGPUInstPrinter::printOperandAndI
> if (InputModifiers & SISrcMods::SEXT)
> O << ')';
>
> + // Print default vcc/vcc_lo operand of VOP2b.
> switch (MI->getOpcode()) {
> default: break;
>
>
> Modified: llvm/trunk/lib/Target/AMDGPU/MCTargetDesc/SIMCCodeEmitter.cpp
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/MCTargetDesc/SIMCCodeEmitter.cpp?rev=363299&r1=363298&r2=363299&view=diff
>
> ==============================================================================
> --- llvm/trunk/lib/Target/AMDGPU/MCTargetDesc/SIMCCodeEmitter.cpp
> (original)
> +++ llvm/trunk/lib/Target/AMDGPU/MCTargetDesc/SIMCCodeEmitter.cpp Thu Jun
> 13 12:18:29 2019
> @@ -389,7 +389,7 @@ SIMCCodeEmitter::getSDWAVopcDstEncoding(
> const MCOperand &MO = MI.getOperand(OpNo);
>
> unsigned Reg = MO.getReg();
> - if (Reg != AMDGPU::VCC) {
> + if (Reg != AMDGPU::VCC && Reg != AMDGPU::VCC_LO) {
> RegEnc |= MRI.getEncodingValue(Reg);
> RegEnc &= SDWA9EncValues::VOPC_DST_SGPR_MASK;
> RegEnc |= SDWA9EncValues::VOPC_DST_VCC_MASK;
>
> Modified: llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.td
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.td?rev=363299&r1=363298&r2=363299&view=diff
>
> ==============================================================================
> --- llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.td (original)
> +++ llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.td Thu Jun 13 12:18:29 2019
> @@ -6,6 +6,11 @@
> //
>
> //===----------------------------------------------------------------------===//
>
> +def isWave32 : Predicate<"Subtarget->getWavefrontSize() == 32">,
> + AssemblerPredicate <"FeatureWavefrontSize32">;
> +def isWave64 : Predicate<"Subtarget->getWavefrontSize() == 64">,
> + AssemblerPredicate <"FeatureWavefrontSize64">;
> +
> def DisableInst : Predicate <"false">,
> AssemblerPredicate<"FeatureDisable">;
>
> class GCNPredicateControl : PredicateControl {
>
> Modified: llvm/trunk/lib/Target/AMDGPU/SIInstructions.td
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIInstructions.td?rev=363299&r1=363298&r2=363299&view=diff
>
> ==============================================================================
> --- llvm/trunk/lib/Target/AMDGPU/SIInstructions.td (original)
> +++ llvm/trunk/lib/Target/AMDGPU/SIInstructions.td Thu Jun 13 12:18:29 2019
> @@ -188,9 +188,18 @@ class WrapTerminatorInst<SOP_Pseudo base
> let CodeSize = base_inst.CodeSize;
> }
>
> +let WaveSizePredicate = isWave64 in {
> def S_MOV_B64_term : WrapTerminatorInst<S_MOV_B64>;
> def S_XOR_B64_term : WrapTerminatorInst<S_XOR_B64>;
> def S_ANDN2_B64_term : WrapTerminatorInst<S_ANDN2_B64>;
> +}
> +
> +let WaveSizePredicate = isWave32 in {
> +def S_MOV_B32_term : WrapTerminatorInst<S_MOV_B32>;
> +def S_XOR_B32_term : WrapTerminatorInst<S_XOR_B32>;
> +def S_OR_B32_term : WrapTerminatorInst<S_OR_B32>;
> +def S_ANDN2_B32_term : WrapTerminatorInst<S_ANDN2_B32>;
> +}
>
> def WAVE_BARRIER : SPseudoInstSI<(outs), (ins),
> [(int_amdgcn_wave_barrier)]> {
> @@ -343,6 +352,15 @@ def SI_INIT_EXEC : SPseudoInstSI <
> let Defs = [EXEC];
> let usesCustomInserter = 1;
> let isAsCheapAsAMove = 1;
> + let WaveSizePredicate = isWave64;
> +}
> +
> +def SI_INIT_EXEC_LO : SPseudoInstSI <
> + (outs), (ins i32imm:$src), []> {
> + let Defs = [EXEC_LO];
> + let usesCustomInserter = 1;
> + let isAsCheapAsAMove = 1;
> + let WaveSizePredicate = isWave32;
> }
>
> def SI_INIT_EXEC_FROM_INPUT : SPseudoInstSI <
>
> Modified: llvm/trunk/lib/Target/AMDGPU/SOPInstructions.td
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SOPInstructions.td?rev=363299&r1=363298&r2=363299&view=diff
>
> ==============================================================================
> --- llvm/trunk/lib/Target/AMDGPU/SOPInstructions.td (original)
> +++ llvm/trunk/lib/Target/AMDGPU/SOPInstructions.td Thu Jun 13 12:18:29
> 2019
> @@ -275,6 +275,21 @@ let SubtargetPredicate = isGFX9Plus in {
> } // End SubtargetPredicate = isGFX9Plus
>
> let SubtargetPredicate = isGFX10Plus in {
> + let hasSideEffects = 1, Defs = [EXEC, SCC], Uses = [EXEC] in {
> + def S_AND_SAVEEXEC_B32 : SOP1_32<"s_and_saveexec_b32">;
> + def S_OR_SAVEEXEC_B32 : SOP1_32<"s_or_saveexec_b32">;
> + def S_XOR_SAVEEXEC_B32 : SOP1_32<"s_xor_saveexec_b32">;
> + def S_ANDN2_SAVEEXEC_B32 : SOP1_32<"s_andn2_saveexec_b32">;
> + def S_ORN2_SAVEEXEC_B32 : SOP1_32<"s_orn2_saveexec_b32">;
> + def S_NAND_SAVEEXEC_B32 : SOP1_32<"s_nand_saveexec_b32">;
> + def S_NOR_SAVEEXEC_B32 : SOP1_32<"s_nor_saveexec_b32">;
> + def S_XNOR_SAVEEXEC_B32 : SOP1_32<"s_xnor_saveexec_b32">;
> + def S_ANDN1_SAVEEXEC_B32 : SOP1_32<"s_andn1_saveexec_b32">;
> + def S_ORN1_SAVEEXEC_B32 : SOP1_32<"s_orn1_saveexec_b32">;
> + def S_ANDN1_WREXEC_B32 : SOP1_32<"s_andn1_wrexec_b32">;
> + def S_ANDN2_WREXEC_B32 : SOP1_32<"s_andn2_wrexec_b32">;
> + } // End hasSideEffects = 1, Defs = [EXEC, SCC], Uses = [EXEC]
> +
> let Uses = [M0] in {
> def S_MOVRELSD_2_B32 : SOP1_32<"s_movrelsd_2_b32">;
> } // End Uses = [M0]
> @@ -782,6 +797,9 @@ let SubtargetPredicate = isGFX10Plus in
> let has_sdst = 0;
> }
>
> + def S_SUBVECTOR_LOOP_BEGIN : SOPK_32_BR<"s_subvector_loop_begin">;
> + def S_SUBVECTOR_LOOP_END : SOPK_32_BR<"s_subvector_loop_end">;
> +
> def S_WAITCNT_VSCNT : SOPK_WAITCNT<"s_waitcnt_vscnt">;
> def S_WAITCNT_VMCNT : SOPK_WAITCNT<"s_waitcnt_vmcnt">;
> def S_WAITCNT_EXPCNT : SOPK_WAITCNT<"s_waitcnt_expcnt">;
> @@ -1215,6 +1233,18 @@ defm S_ORN1_SAVEEXEC_B64 : SOP1_Real_
> defm S_ANDN1_WREXEC_B64 : SOP1_Real_gfx10<0x039>;
> defm S_ANDN2_WREXEC_B64 : SOP1_Real_gfx10<0x03a>;
> defm S_BITREPLICATE_B64_B32 : SOP1_Real_gfx10<0x03b>;
> +defm S_AND_SAVEEXEC_B32 : SOP1_Real_gfx10<0x03c>;
> +defm S_OR_SAVEEXEC_B32 : SOP1_Real_gfx10<0x03d>;
> +defm S_XOR_SAVEEXEC_B32 : SOP1_Real_gfx10<0x03e>;
> +defm S_ANDN2_SAVEEXEC_B32 : SOP1_Real_gfx10<0x03f>;
> +defm S_ORN2_SAVEEXEC_B32 : SOP1_Real_gfx10<0x040>;
> +defm S_NAND_SAVEEXEC_B32 : SOP1_Real_gfx10<0x041>;
> +defm S_NOR_SAVEEXEC_B32 : SOP1_Real_gfx10<0x042>;
> +defm S_XNOR_SAVEEXEC_B32 : SOP1_Real_gfx10<0x043>;
> +defm S_ANDN1_SAVEEXEC_B32 : SOP1_Real_gfx10<0x044>;
> +defm S_ORN1_SAVEEXEC_B32 : SOP1_Real_gfx10<0x045>;
> +defm S_ANDN1_WREXEC_B32 : SOP1_Real_gfx10<0x046>;
> +defm S_ANDN2_WREXEC_B32 : SOP1_Real_gfx10<0x047>;
> defm S_MOVRELSD_2_B32 : SOP1_Real_gfx10<0x049>;
>
>
> //===----------------------------------------------------------------------===//
> @@ -1382,6 +1412,8 @@ defm S_WAITCNT_VSCNT : SOPK_Real3
> defm S_WAITCNT_VMCNT : SOPK_Real32_gfx10<0x018>;
> defm S_WAITCNT_EXPCNT : SOPK_Real32_gfx10<0x019>;
> defm S_WAITCNT_LGKMCNT : SOPK_Real32_gfx10<0x01a>;
> +defm S_SUBVECTOR_LOOP_BEGIN : SOPK_Real32_gfx10<0x01b>;
> +defm S_SUBVECTOR_LOOP_END : SOPK_Real32_gfx10<0x01c>;
>
>
> //===----------------------------------------------------------------------===//
> // SOPK - GFX6, GFX7.
>
> Modified: llvm/trunk/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp?rev=363299&r1=363298&r2=363299&view=diff
>
> ==============================================================================
> --- llvm/trunk/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp (original)
> +++ llvm/trunk/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp Thu Jun 13
> 12:18:29 2019
> @@ -380,12 +380,17 @@ unsigned getNumSGPRBlocks(const MCSubtar
> return NumSGPRs / getSGPREncodingGranule(STI) - 1;
> }
>
> -unsigned getVGPRAllocGranule(const MCSubtargetInfo *STI) {
> - return 4;
> +unsigned getVGPRAllocGranule(const MCSubtargetInfo *STI,
> + Optional<bool> EnableWavefrontSize32) {
> + bool IsWave32 = EnableWavefrontSize32 ?
> + *EnableWavefrontSize32 :
> + STI->getFeatureBits().test(FeatureWavefrontSize32);
> + return IsWave32 ? 8 : 4;
> }
>
> -unsigned getVGPREncodingGranule(const MCSubtargetInfo *STI) {
> - return getVGPRAllocGranule(STI);
> +unsigned getVGPREncodingGranule(const MCSubtargetInfo *STI,
> + Optional<bool> EnableWavefrontSize32) {
> + return getVGPRAllocGranule(STI, EnableWavefrontSize32);
> }
>
> unsigned getTotalNumVGPRs(const MCSubtargetInfo *STI) {
> @@ -416,10 +421,12 @@ unsigned getMaxNumVGPRs(const MCSubtarge
> return std::min(MaxNumVGPRs, AddressableNumVGPRs);
> }
>
> -unsigned getNumVGPRBlocks(const MCSubtargetInfo *STI, unsigned NumVGPRs) {
> - NumVGPRs = alignTo(std::max(1u, NumVGPRs), getVGPREncodingGranule(STI));
> +unsigned getNumVGPRBlocks(const MCSubtargetInfo *STI, unsigned NumVGPRs,
> + Optional<bool> EnableWavefrontSize32) {
> + NumVGPRs = alignTo(std::max(1u, NumVGPRs),
> + getVGPREncodingGranule(STI, EnableWavefrontSize32));
> // VGPRBlocks is actual number of VGPR blocks minus 1.
> - return NumVGPRs / getVGPREncodingGranule(STI) - 1;
> + return NumVGPRs / getVGPREncodingGranule(STI, EnableWavefrontSize32) -
> 1;
> }
>
> } // end namespace IsaInfo
> @@ -437,7 +444,6 @@ void initDefaultAMDKernelCodeT(amd_kerne
> Header.amd_machine_version_minor = Version.Minor;
> Header.amd_machine_version_stepping = Version.Stepping;
> Header.kernel_code_entry_byte_offset = sizeof(Header);
> - // wavefront_size is specified as a power of 2: 2^6 = 64 threads.
> Header.wavefront_size = 6;
>
> // If the code object does not support indirect functions, then the
> value must
>
> Modified: llvm/trunk/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h?rev=363299&r1=363298&r2=363299&view=diff
>
> ==============================================================================
> --- llvm/trunk/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h (original)
> +++ llvm/trunk/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h Thu Jun 13
> 12:18:29 2019
> @@ -150,10 +150,18 @@ unsigned getNumExtraSGPRs(const MCSubtar
> unsigned getNumSGPRBlocks(const MCSubtargetInfo *STI, unsigned NumSGPRs);
>
> /// \returns VGPR allocation granularity for given subtarget \p STI.
> -unsigned getVGPRAllocGranule(const MCSubtargetInfo *STI);
> +///
> +/// For subtargets which support it, \p EnableWavefrontSize32 should match
> +/// the ENABLE_WAVEFRONT_SIZE32 kernel descriptor field.
> +unsigned getVGPRAllocGranule(const MCSubtargetInfo *STI,
> + Optional<bool> EnableWavefrontSize32 = None);
>
> /// \returns VGPR encoding granularity for given subtarget \p STI.
> -unsigned getVGPREncodingGranule(const MCSubtargetInfo *STI);
> +///
> +/// For subtargets which support it, \p EnableWavefrontSize32 should match
> +/// the ENABLE_WAVEFRONT_SIZE32 kernel descriptor field.
> +unsigned getVGPREncodingGranule(const MCSubtargetInfo *STI,
> + Optional<bool> EnableWavefrontSize32 =
> None);
>
> /// \returns Total number of VGPRs for given subtarget \p STI.
> unsigned getTotalNumVGPRs(const MCSubtargetInfo *STI);
> @@ -171,7 +179,11 @@ unsigned getMaxNumVGPRs(const MCSubtarge
>
> /// \returns Number of VGPR blocks needed for given subtarget \p STI when
> /// \p NumVGPRs are used.
> -unsigned getNumVGPRBlocks(const MCSubtargetInfo *STI, unsigned NumSGPRs);
> +///
> +/// For subtargets which support it, \p EnableWavefrontSize32 should
> match the
> +/// ENABLE_WAVEFRONT_SIZE32 kernel descriptor field.
> +unsigned getNumVGPRBlocks(const MCSubtargetInfo *STI, unsigned NumSGPRs,
> + Optional<bool> EnableWavefrontSize32 = None);
>
> } // end namespace IsaInfo
>
>
> Modified: llvm/trunk/lib/Target/AMDGPU/VOP2Instructions.td
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/VOP2Instructions.td?rev=363299&r1=363298&r2=363299&view=diff
>
> ==============================================================================
> --- llvm/trunk/lib/Target/AMDGPU/VOP2Instructions.td (original)
> +++ llvm/trunk/lib/Target/AMDGPU/VOP2Instructions.td Thu Jun 13 12:18:29
> 2019
> @@ -199,7 +199,12 @@ class VOP2bInstAlias <VOP2_Pseudo ps, In
> }
>
> multiclass VOP2bInstAliases<VOP2_Pseudo ps, VOP2_Real inst, string
> OpName> {
> + let WaveSizePredicate = isWave32 in {
> + def : VOP2bInstAlias<ps, inst, OpName, "vcc_lo">;
> + }
> + let WaveSizePredicate = isWave64 in {
> def : VOP2bInstAlias<ps, inst, OpName, "vcc">;
> + }
> }
>
> multiclass VOP2eInst <string opName,
> @@ -234,7 +239,12 @@ class VOP2eInstAlias <VOP2_Pseudo ps, In
> }
>
> multiclass VOP2eInstAliases<VOP2_Pseudo ps, VOP2_Real inst> {
> + let WaveSizePredicate = isWave32 in {
> + def : VOP2eInstAlias<ps, inst, "vcc_lo">;
> + }
> + let WaveSizePredicate = isWave64 in {
> def : VOP2eInstAlias<ps, inst, "vcc">;
> + }
> }
>
> class VOP_MADAK <ValueType vt> : VOPProfile <[vt, vt, vt, vt]> {
> @@ -953,6 +963,30 @@ let AssemblerPredicate = isGFX10Plus, De
> let DecoderNamespace = "DPP8";
> }
>
> + let WaveSizePredicate = isWave32 in {
> + def _sdwa_w32_gfx10 :
> + Base_VOP_SDWA10_Real<!cast<VOP2_SDWA_Pseudo>(opName#"_sdwa")>,
> + VOP2_SDWA9Ae<op{5-0},
> !cast<VOP2_SDWA_Pseudo>(opName#"_sdwa").Pfl> {
> + VOP2_SDWA_Pseudo Ps = !cast<VOP2_SDWA_Pseudo>(opName#"_sdwa");
> + let AsmString = asmName # !subst("vcc", "vcc_lo",
> Ps.AsmOperands);
> + let isAsmParserOnly = 1;
> + let DecoderNamespace = "SDWA10";
> + }
> + def _dpp_w32_gfx10 :
> + VOP2_DPP16<op, !cast<VOP2_Pseudo>(opName#"_e32"), asmName> {
> + string AsmDPP = !cast<VOP2_Pseudo>(opName#"_e32").Pfl.AsmDPP16;
> + let AsmString = asmName # !subst("vcc", "vcc_lo", AsmDPP);
> + let isAsmParserOnly = 1;
> + }
> + def _dpp8_w32_gfx10 :
> + VOP2_DPP8<op, !cast<VOP2_Pseudo>(opName#"_e32"), asmName> {
> + string AsmDPP8 = !cast<VOP2_Pseudo>(opName#"_e32").Pfl.AsmDPP8;
> + let AsmString = asmName # !subst("vcc", "vcc_lo", AsmDPP8);
> + let isAsmParserOnly = 1;
> + }
> + } // End WaveSizePredicate = isWave32
> +
> + let WaveSizePredicate = isWave64 in {
> def _sdwa_w64_gfx10 :
> Base_VOP_SDWA10_Real<!cast<VOP2_SDWA_Pseudo>(opName#"_sdwa")>,
> VOP2_SDWA9Ae<op{5-0},
> !cast<VOP2_SDWA_Pseudo>(opName#"_sdwa").Pfl> {
> @@ -973,6 +1007,7 @@ let AssemblerPredicate = isGFX10Plus, De
> let AsmString = asmName # AsmDPP8;
> let isAsmParserOnly = 1;
> }
> + } // End WaveSizePredicate = isWave64
> }
>
> //===----------------------------- VOP3Only
> -----------------------------===//
>
> Modified: llvm/trunk/lib/Target/AMDGPU/VOPCInstructions.td
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/VOPCInstructions.td?rev=363299&r1=363298&r2=363299&view=diff
>
> ==============================================================================
> --- llvm/trunk/lib/Target/AMDGPU/VOPCInstructions.td (original)
> +++ llvm/trunk/lib/Target/AMDGPU/VOPCInstructions.td Thu Jun 13 12:18:29
> 2019
> @@ -165,9 +165,16 @@ class VOPCInstAlias <VOP3_Pseudo ps, Ins
> multiclass VOPCInstAliases <string OpName, string Arch> {
> def : VOPCInstAlias <!cast<VOP3_Pseudo>(OpName#"_e64"),
> !cast<Instruction>(OpName#"_e32_"#Arch)>;
> + let WaveSizePredicate = isWave32 in {
> + def : VOPCInstAlias <!cast<VOP3_Pseudo>(OpName#"_e64"),
> + !cast<Instruction>(OpName#"_e32_"#Arch),
> + "vcc_lo,
> "#!cast<VOP3_Pseudo>(OpName#"_e64").Pfl.Asm32>;
> + }
> + let WaveSizePredicate = isWave64 in {
> def : VOPCInstAlias <!cast<VOP3_Pseudo>(OpName#"_e64"),
> !cast<Instruction>(OpName#"_e32_"#Arch),
> "vcc,
> "#!cast<VOP3_Pseudo>(OpName#"_e64").Pfl.Asm32>;
> + }
> }
>
> multiclass VOPCXInstAliases <string OpName, string Arch> {
> @@ -740,10 +747,17 @@ defm V_CMPX_CLASS_F16 : VOPCX_CLASS_F16
> // We need to use COPY_TO_REGCLASS to w/a the problem when
> ReplaceAllUsesWith()
> // complaints it cannot replace i1 <-> i64/i32 if node was not morphed in
> place.
> multiclass ICMP_Pattern <PatLeaf cond, Instruction inst, ValueType vt> {
> + let WaveSizePredicate = isWave64 in
> def : GCNPat <
> (i64 (AMDGPUsetcc vt:$src0, vt:$src1, cond)),
> (i64 (COPY_TO_REGCLASS (inst $src0, $src1), SReg_64))
> >;
> +
> + let WaveSizePredicate = isWave32 in
> + def : GCNPat <
> + (i32 (AMDGPUsetcc vt:$src0, vt:$src1, cond)),
> + (i32 (COPY_TO_REGCLASS (inst $src0, $src1), SReg_32))
> + >;
> }
>
> defm : ICMP_Pattern <COND_EQ, V_CMP_EQ_U32_e64, i32>;
> @@ -780,12 +794,21 @@ defm : ICMP_Pattern <COND_SLT, V_CMP_LT_
> defm : ICMP_Pattern <COND_SLE, V_CMP_LE_I16_e64, i16>;
>
> multiclass FCMP_Pattern <PatLeaf cond, Instruction inst, ValueType vt> {
> + let WaveSizePredicate = isWave64 in
> def : GCNPat <
> (i64 (AMDGPUsetcc (vt (VOP3Mods vt:$src0, i32:$src0_modifiers)),
> (vt (VOP3Mods vt:$src1, i32:$src1_modifiers)), cond)),
> (i64 (COPY_TO_REGCLASS (inst $src0_modifiers, $src0, $src1_modifiers,
> $src1,
> DSTCLAMP.NONE), SReg_64))
> >;
> +
> + let WaveSizePredicate = isWave32 in
> + def : GCNPat <
> + (i32 (AMDGPUsetcc (vt (VOP3Mods vt:$src0, i32:$src0_modifiers)),
> + (vt (VOP3Mods vt:$src1, i32:$src1_modifiers)), cond)),
> + (i32 (COPY_TO_REGCLASS (inst $src0_modifiers, $src0, $src1_modifiers,
> $src1,
> + DSTCLAMP.NONE), SReg_32))
> + >;
> }
>
> defm : FCMP_Pattern <COND_OEQ, V_CMP_EQ_F32_e64, f32>;
>
>
> _______________________________________________
> llvm-commits mailing list
> llvm-commits at lists.llvm.org
> https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-commits
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20190613/778eca5b/attachment-0001.html>
More information about the llvm-commits
mailing list