[llvm] r363299 - [AMDGPU] gfx1010 base changes for wave32

Thu Jun 13 23:34:32 PDT 2019

Hi,

Ok, so they're about to be used then.

Then I'll ignore this and hope for those patches to be submitted soon. :)

Thanks,
Mikael

On 2019-06-14 08:29, Mekhanoshin, Stanislav wrote:
> Sorry about that. It has to be used here: https://reviews.llvm.org/D63204
> Which in turn waits for this: https://reviews.llvm.org/D63205
> 
> I really apologize, this is a long stack of dependencies in the chain. I have missed the warning as clang doesn't issue one.
> 
> Stas
> --- Original message ---
> From: Mikael Holmén mikael.holmen at ericsson.com
> Sent: June 13, 2019 23:13:12
> To: Mekhanoshin, Stanislav Stanislav.Mekhanoshin at amd.com, llvm-commits at lists.llvm.org
> Subject: Re: [llvm] r363299 - [AMDGPU] gfx1010 base changes for wave32
> 
>> [CAUTION: External Email]
>>
>> Hi,
>>
>> gcc (7.4) warns on unused functions with this patch:
>>
>> ../lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp:5417:1: error:
>> 'llvm::OperandMatchResultTy
>> {anonymous}::AMDGPUAsmParser::parseBoolReg(llvm::OperandVector&)'
>> defined but not used [-Werror=unused-function]
>>   AMDGPUAsmParser::parseBoolReg(OperandVector &Operands) {
>>   ^~~~~~~~~~~~~~~
>> ../lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp:1493:6: error: 'bool
>> {anonymous}::AMDGPUOperand::isBoolReg() const' defined but not used
>> [-Werror=unused-function]
>>   bool AMDGPUOperand::isBoolReg() const {
>>        ^~~~~~~~~~~~~
>>
>> Are you about to use parseBoolReg(OperandVector &Operands) and
>> isBoolReg() in upcomming patches or can they be removed so we get a
>> warning free build also with gcc?
>>
>> Regards,
>> Mikael
>>
>> On 2019-06-13 21:18, Stanislav Mekhanoshin via llvm-commits wrote:
>>> Author: rampitec
>>> Date: Thu Jun 13 12:18:29 2019
>>> New Revision: 363299
>>>
>>> URL: http://llvm.org/viewvc/llvm-project?rev=363299&view=rev
>>> Log:
>>> [AMDGPU] gfx1010 base changes for wave32
>>>
>>> Differential Revision: https://reviews.llvm.org/D63293
>>>
>>> Modified:
>>> llvm/trunk/lib/Target/AMDGPU/AMDGPUInstructions.td
>>> llvm/trunk/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
>>> llvm/trunk/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
>>> llvm/trunk/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp
>>> llvm/trunk/lib/Target/AMDGPU/MCTargetDesc/SIMCCodeEmitter.cpp
>>> llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.td
>>> llvm/trunk/lib/Target/AMDGPU/SIInstructions.td
>>> llvm/trunk/lib/Target/AMDGPU/SOPInstructions.td
>>> llvm/trunk/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
>>> llvm/trunk/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
>>> llvm/trunk/lib/Target/AMDGPU/VOP2Instructions.td
>>> llvm/trunk/lib/Target/AMDGPU/VOPCInstructions.td
>>>
>>> Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPUInstructions.td
>>> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPUInstructions.td?rev=363299&r1=363298&r2=363299&view=diff
>>> ==============================================================================
>>> --- llvm/trunk/lib/Target/AMDGPU/AMDGPUInstructions.td (original)
>>> +++ llvm/trunk/lib/Target/AMDGPU/AMDGPUInstructions.td Thu Jun 13 12:18:29 2019
>>> @@ -69,9 +69,11 @@ class PredicateControl {
>>> Predicate SubtargetPredicate = TruePredicate;
>>> list<Predicate> AssemblerPredicates = [];
>>> Predicate AssemblerPredicate = TruePredicate;
>>> +  Predicate WaveSizePredicate = TruePredicate;
>>> list<Predicate> OtherPredicates = [];
>>> list<Predicate> Predicates = !listconcat([SubtargetPredicate,
>>> -                                            AssemblerPredicate],
>>> +                                            AssemblerPredicate,
>>> +                                            WaveSizePredicate],
>>>                                       AssemblerPredicates,
>>>                                       OtherPredicates);
>>> }
>>>
>>> Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
>>> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPUSubtarget.cpp?rev=363299&r1=363298&r2=363299&view=diff
>>> ==============================================================================
>>> --- llvm/trunk/lib/Target/AMDGPU/AMDGPUSubtarget.cpp (original)
>>> +++ llvm/trunk/lib/Target/AMDGPU/AMDGPUSubtarget.cpp Thu Jun 13 12:18:29 2019
>>> @@ -94,6 +94,16 @@ GCNSubtarget::initializeSubtargetDepende
>>>
>>> FullFS += "+enable-prt-strict-null,"; // This is overridden by a disable in FS
>>>
>>> +  // Disable mutually exclusive bits.
>>> +  if (FS.find_lower("+wavefrontsize") != StringRef::npos) {
>>> +    if (FS.find_lower("wavefrontsize16") == StringRef::npos)
>>> +      FullFS += "-wavefrontsize16,";
>>> +    if (FS.find_lower("wavefrontsize32") == StringRef::npos)
>>> +      FullFS += "-wavefrontsize32,";
>>> +    if (FS.find_lower("wavefrontsize64") == StringRef::npos)
>>> +      FullFS += "-wavefrontsize64,";
>>> +  }
>>> +
>>> FullFS += FS;
>>>
>>> ParseSubtargetFeatures(GPU, FullFS);
>>>
>>> Modified: llvm/trunk/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
>>> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp?rev=363299&r1=363298&r2=363299&view=diff
>>> ==============================================================================
>>> --- llvm/trunk/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp (original)
>>> +++ llvm/trunk/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp Thu Jun 13 12:18:29 2019
>>> @@ -375,6 +375,8 @@ public:
>>> return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::i64);
>>> }
>>>
>>> +  bool isBoolReg() const;
>>> +
>>> bool isSCSrcF16() const {
>>> return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f16);
>>> }
>>> @@ -616,6 +618,10 @@ public:
>>>
>>> void addRegOperands(MCInst &Inst, unsigned N) const;
>>>
>>> +  void addBoolRegOperands(MCInst &Inst, unsigned N) const {
>>> +    addRegOperands(Inst, N);
>>> +  }
>>> +
>>> void addRegOrImmOperands(MCInst &Inst, unsigned N) const {
>>> if (isRegKind())
>>> addRegOperands(Inst, N);
>>> @@ -881,6 +887,8 @@ private:
>>> /// \param VCCUsed [in] Whether VCC special SGPR is reserved.
>>> /// \param FlatScrUsed [in] Whether FLAT_SCRATCH special SGPR is reserved.
>>> /// \param XNACKUsed [in] Whether XNACK_MASK special SGPR is reserved.
>>> +  /// \param EnableWavefrontSize32 [in] Value of ENABLE_WAVEFRONT_SIZE32 kernel
>>> +  /// descriptor field, if valid.
>>> /// \param NextFreeVGPR [in] Max VGPR number referenced, plus one.
>>> /// \param VGPRRange [in] Token range, used for VGPR diagnostics.
>>> /// \param NextFreeSGPR [in] Max SGPR number referenced, plus one.
>>> @@ -889,9 +897,10 @@ private:
>>> /// \param SGPRBlocks [out] Result SGPR block count.
>>> bool calculateGPRBlocks(const FeatureBitset &Features, bool VCCUsed,
>>>                     bool FlatScrUsed, bool XNACKUsed,
>>> -                          unsigned NextFreeVGPR, SMRange VGPRRange,
>>> -                          unsigned NextFreeSGPR, SMRange SGPRRange,
>>> -                          unsigned &VGPRBlocks, unsigned &SGPRBlocks);
>>> +                          Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR,
>>> +                          SMRange VGPRRange, unsigned NextFreeSGPR,
>>> +                          SMRange SGPRRange, unsigned &VGPRBlocks,
>>> +                          unsigned &SGPRBlocks);
>>> bool ParseDirectiveAMDGCNTarget();
>>> bool ParseDirectiveAMDHSAKernel();
>>> bool ParseDirectiveMajorMinor(uint32_t &Major, uint32_t &Minor);
>>> @@ -1159,6 +1168,7 @@ private:
>>> bool validateMIMGDim(const MCInst &Inst);
>>> bool validateLdsDirect(const MCInst &Inst);
>>> bool validateOpSel(const MCInst &Inst);
>>> +  bool validateVccOperand(unsigned Reg) const;
>>> bool validateVOP3Literal(const MCInst &Inst) const;
>>> bool usesConstantBus(const MCInst &Inst, unsigned OpIdx);
>>> bool isInlineConstant(const MCInst &Inst, unsigned OpIdx) const;
>>> @@ -1190,6 +1200,7 @@ public:
>>> OperandMatchResultTy parseInterpSlot(OperandVector &Operands);
>>> OperandMatchResultTy parseInterpAttr(OperandVector &Operands);
>>> OperandMatchResultTy parseSOppBrTarget(OperandVector &Operands);
>>> +  OperandMatchResultTy parseBoolReg(OperandVector &Operands);
>>>
>>> bool parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
>>>                       const unsigned MinVal,
>>> @@ -1479,6 +1490,11 @@ bool AMDGPUOperand::isSDWAInt32Operand()
>>> return isSDWAOperand(MVT::i32);
>>> }
>>>
>>> +bool AMDGPUOperand::isBoolReg() const {
>>> +  return AsmParser->getFeatureBits()[AMDGPU::FeatureWavefrontSize64] ?
>>> +    isSCSrcB64() : isSCSrcB32();
>>> +}
>>> +
>>> uint64_t AMDGPUOperand::applyInputFPModifiers(uint64_t Val, unsigned Size) const
>>> {
>>> assert(isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
>>> @@ -3030,6 +3046,13 @@ bool AMDGPUAsmParser::validateOpSel(cons
>>> return true;
>>> }
>>>
>>> +// Check if VCC register matches wavefront size
>>> +bool AMDGPUAsmParser::validateVccOperand(unsigned Reg) const {
>>> +  auto FB = getFeatureBits();
>>> +  return (FB[AMDGPU::FeatureWavefrontSize64] && Reg == AMDGPU::VCC) ||
>>> +    (FB[AMDGPU::FeatureWavefrontSize32] && Reg == AMDGPU::VCC_LO);
>>> +}
>>> +
>>> // VOP3 literal is only allowed in GFX10+ and only one can be used
>>> bool AMDGPUAsmParser::validateVOP3Literal(const MCInst &Inst) const {
>>> unsigned Opcode = Inst.getOpcode();
>>> @@ -3267,9 +3290,9 @@ bool AMDGPUAsmParser::OutOfRangeError(SM
>>>
>>> bool AMDGPUAsmParser::calculateGPRBlocks(
>>> const FeatureBitset &Features, bool VCCUsed, bool FlatScrUsed,
>>> -    bool XNACKUsed, unsigned NextFreeVGPR, SMRange VGPRRange,
>>> -    unsigned NextFreeSGPR, SMRange SGPRRange, unsigned &VGPRBlocks,
>>> -    unsigned &SGPRBlocks) {
>>> +    bool XNACKUsed, Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR,
>>> +    SMRange VGPRRange, unsigned NextFreeSGPR, SMRange SGPRRange,
>>> +    unsigned &VGPRBlocks, unsigned &SGPRBlocks) {
>>> // TODO(scott.linder): These calculations are duplicated from
>>> // AMDGPUAsmPrinter::getSIProgramInfo and could be unified.
>>> IsaVersion Version = getIsaVersion(getSTI().getCPU());
>>> @@ -3298,7 +3321,8 @@ bool AMDGPUAsmParser::calculateGPRBlocks
>>> NumSGPRs = IsaInfo::FIXED_NUM_SGPRS_FOR_INIT_BUG;
>>> }
>>>
>>> -  VGPRBlocks = IsaInfo::getNumVGPRBlocks(&getSTI(), NumVGPRs);
>>> +  VGPRBlocks =
>>> +      IsaInfo::getNumVGPRBlocks(&getSTI(), NumVGPRs, EnableWavefrontSize32);
>>> SGPRBlocks = IsaInfo::getNumSGPRBlocks(&getSTI(), NumSGPRs);
>>>
>>> return false;
>>> @@ -3329,6 +3353,7 @@ bool AMDGPUAsmParser::ParseDirectiveAMDH
>>> bool ReserveVCC = true;
>>> bool ReserveFlatScr = true;
>>> bool ReserveXNACK = hasXNACK();
>>> +  Optional<bool> EnableWavefrontSize32;
>>>
>>> while (true) {
>>> while (getLexer().is(AsmToken::EndOfStatement))
>>> @@ -3547,8 +3572,9 @@ bool AMDGPUAsmParser::ParseDirectiveAMDH
>>> unsigned VGPRBlocks;
>>> unsigned SGPRBlocks;
>>> if (calculateGPRBlocks(getFeatureBits(), ReserveVCC, ReserveFlatScr,
>>> -                         ReserveXNACK, NextFreeVGPR, VGPRRange, NextFreeSGPR,
>>> -                         SGPRRange, VGPRBlocks, SGPRBlocks))
>>> +                         ReserveXNACK, EnableWavefrontSize32, NextFreeVGPR,
>>> +                         VGPRRange, NextFreeSGPR, SGPRRange, VGPRBlocks,
>>> +                         SGPRBlocks))
>>> return true;
>>>
>>> if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_WIDTH>(
>>> @@ -5384,6 +5410,15 @@ AMDGPUAsmParser::parseSOppBrTarget(Opera
>>> }
>>>
>>> //===----------------------------------------------------------------------===//
>>> +// Boolean holding registers
>>> +//===----------------------------------------------------------------------===//
>>> +
>>> +OperandMatchResultTy
>>> +AMDGPUAsmParser::parseBoolReg(OperandVector &Operands) {
>>> +  return parseReg(Operands);
>>> +}
>>> +
>>> +//===----------------------------------------------------------------------===//
>>> // mubuf
>>> //===----------------------------------------------------------------------===//
>>>
>>> @@ -6294,7 +6329,7 @@ void AMDGPUAsmParser::cvtDPP(MCInst &Ins
>>> }
>>> AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
>>> // Add the register arguments
>>> -    if (Op.isReg() && Op.getReg() == AMDGPU::VCC) {
>>> +    if (Op.isReg() && validateVccOperand(Op.getReg())) {
>>> // VOP2b (v_add_u32, v_sub_u32 ...) dpp use "vcc" token.
>>> // Skip it.
>>> continue;
>>> @@ -6437,7 +6472,8 @@ void AMDGPUAsmParser::cvtSDWA(MCInst &In
>>>
>>> for (unsigned E = Operands.size(); I != E; ++I) {
>>> AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
>>> -    if (skipVcc && !skippedVcc && Op.isReg() && Op.getReg() == AMDGPU::VCC) {
>>> +    if (skipVcc && !skippedVcc && Op.isReg() &&
>>> +        (Op.getReg() == AMDGPU::VCC || Op.getReg() == AMDGPU::VCC_LO)) {
>>> // VOP2b (v_add_u32, v_sub_u32 ...) sdwa use "vcc" token as dst.
>>> // Skip it if it's 2nd (e.g. v_add_i32_sdwa v1, vcc, v2, v3)
>>> // or 4th (v_addc_u32_sdwa v1, vcc, v2, v3, vcc) operand.
>>>
>>> Modified: llvm/trunk/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp
>>> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp?rev=363299&r1=363298&r2=363299&view=diff
>>> ==============================================================================
>>> --- llvm/trunk/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp (original)
>>> +++ llvm/trunk/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp Thu Jun 13 12:18:29 2019
>>> @@ -442,6 +442,7 @@ void AMDGPUInstPrinter::printVOPDst(cons
>>>
>>> printOperand(MI, OpNo, STI, O);
>>>
>>> +  // Print default vcc/vcc_lo operand.
>>> switch (MI->getOpcode()) {
>>> default: break;
>>>
>>> @@ -589,7 +590,8 @@ void AMDGPUInstPrinter::printDefaultVccO
>>>                                          raw_ostream &O) {
>>> if (OpNo > 0)
>>> O << ", ";
>>> -  printRegOperand(AMDGPU::VCC, O, MRI);
>>> +  printRegOperand(STI.getFeatureBits()[AMDGPU::FeatureWavefrontSize64] ?
>>> +                  AMDGPU::VCC : AMDGPU::VCC_LO, O, MRI);
>>> if (OpNo == 0)
>>> O << ", ";
>>> }
>>> @@ -597,6 +599,7 @@ void AMDGPUInstPrinter::printDefaultVccO
>>> void AMDGPUInstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
>>>                                const MCSubtargetInfo &STI,
>>>                                raw_ostream &O) {
>>> +  // Print default vcc/vcc_lo operand of VOPC.
>>> const MCInstrDesc &Desc = MII.get(MI->getOpcode());
>>> if (OpNo == 0 && (Desc.TSFlags & SIInstrFlags::VOPC) &&
>>> (Desc.hasImplicitDefOfPhysReg(AMDGPU::VCC) ||
>>> @@ -680,6 +683,7 @@ void AMDGPUInstPrinter::printOperand(con
>>> O << "/*INV_OP*/";
>>> }
>>>
>>> +  // Print default vcc/vcc_lo operand of v_cndmask_b32_e32.
>>> switch (MI->getOpcode()) {
>>> default: break;
>>>
>>> @@ -749,6 +753,7 @@ void AMDGPUInstPrinter::printOperandAndI
>>> if (InputModifiers & SISrcMods::SEXT)
>>> O << ')';
>>>
>>> +  // Print default vcc/vcc_lo operand of VOP2b.
>>> switch (MI->getOpcode()) {
>>> default: break;
>>>
>>>
>>> Modified: llvm/trunk/lib/Target/AMDGPU/MCTargetDesc/SIMCCodeEmitter.cpp
>>> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/MCTargetDesc/SIMCCodeEmitter.cpp?rev=363299&r1=363298&r2=363299&view=diff
>>> ==============================================================================
>>> --- llvm/trunk/lib/Target/AMDGPU/MCTargetDesc/SIMCCodeEmitter.cpp (original)
>>> +++ llvm/trunk/lib/Target/AMDGPU/MCTargetDesc/SIMCCodeEmitter.cpp Thu Jun 13 12:18:29 2019
>>> @@ -389,7 +389,7 @@ SIMCCodeEmitter::getSDWAVopcDstEncoding(
>>> const MCOperand &MO = MI.getOperand(OpNo);
>>>
>>> unsigned Reg = MO.getReg();
>>> -  if (Reg != AMDGPU::VCC) {
>>> +  if (Reg != AMDGPU::VCC && Reg != AMDGPU::VCC_LO) {
>>> RegEnc |= MRI.getEncodingValue(Reg);
>>> RegEnc &= SDWA9EncValues::VOPC_DST_SGPR_MASK;
>>> RegEnc |= SDWA9EncValues::VOPC_DST_VCC_MASK;
>>>
>>> Modified: llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.td
>>> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.td?rev=363299&r1=363298&r2=363299&view=diff
>>> ==============================================================================
>>> --- llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.td (original)
>>> +++ llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.td Thu Jun 13 12:18:29 2019
>>> @@ -6,6 +6,11 @@
>>> //
>>> //===----------------------------------------------------------------------===//
>>>
>>> +def isWave32 : Predicate<"Subtarget->getWavefrontSize() == 32">,
>>> +  AssemblerPredicate <"FeatureWavefrontSize32">;
>>> +def isWave64 : Predicate<"Subtarget->getWavefrontSize() == 64">,
>>> +  AssemblerPredicate <"FeatureWavefrontSize64">;
>>> +
>>> def DisableInst : Predicate <"false">, AssemblerPredicate<"FeatureDisable">;
>>>
>>> class GCNPredicateControl : PredicateControl {
>>>
>>> Modified: llvm/trunk/lib/Target/AMDGPU/SIInstructions.td
>>> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIInstructions.td?rev=363299&r1=363298&r2=363299&view=diff
>>> ==============================================================================
>>> --- llvm/trunk/lib/Target/AMDGPU/SIInstructions.td (original)
>>> +++ llvm/trunk/lib/Target/AMDGPU/SIInstructions.td Thu Jun 13 12:18:29 2019
>>> @@ -188,9 +188,18 @@ class WrapTerminatorInst<SOP_Pseudo base
>>> let CodeSize = base_inst.CodeSize;
>>> }
>>>
>>> +let WaveSizePredicate = isWave64 in {
>>> def S_MOV_B64_term : WrapTerminatorInst<S_MOV_B64>;
>>> def S_XOR_B64_term : WrapTerminatorInst<S_XOR_B64>;
>>> def S_ANDN2_B64_term : WrapTerminatorInst<S_ANDN2_B64>;
>>> +}
>>> +
>>> +let WaveSizePredicate = isWave32 in {
>>> +def S_MOV_B32_term : WrapTerminatorInst<S_MOV_B32>;
>>> +def S_XOR_B32_term : WrapTerminatorInst<S_XOR_B32>;
>>> +def S_OR_B32_term : WrapTerminatorInst<S_OR_B32>;
>>> +def S_ANDN2_B32_term : WrapTerminatorInst<S_ANDN2_B32>;
>>> +}
>>>
>>> def WAVE_BARRIER : SPseudoInstSI<(outs), (ins),
>>> [(int_amdgcn_wave_barrier)]> {
>>> @@ -343,6 +352,15 @@ def SI_INIT_EXEC : SPseudoInstSI <
>>> let Defs = [EXEC];
>>> let usesCustomInserter = 1;
>>> let isAsCheapAsAMove = 1;
>>> +  let WaveSizePredicate = isWave64;
>>> +}
>>> +
>>> +def SI_INIT_EXEC_LO : SPseudoInstSI <
>>> +  (outs), (ins i32imm:$src), []> {
>>> +  let Defs = [EXEC_LO];
>>> +  let usesCustomInserter = 1;
>>> +  let isAsCheapAsAMove = 1;
>>> +  let WaveSizePredicate = isWave32;
>>> }
>>>
>>> def SI_INIT_EXEC_FROM_INPUT : SPseudoInstSI <
>>>
>>> Modified: llvm/trunk/lib/Target/AMDGPU/SOPInstructions.td
>>> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SOPInstructions.td?rev=363299&r1=363298&r2=363299&view=diff
>>> ==============================================================================
>>> --- llvm/trunk/lib/Target/AMDGPU/SOPInstructions.td (original)
>>> +++ llvm/trunk/lib/Target/AMDGPU/SOPInstructions.td Thu Jun 13 12:18:29 2019
>>> @@ -275,6 +275,21 @@ let SubtargetPredicate = isGFX9Plus in {
>>> } // End SubtargetPredicate = isGFX9Plus
>>>
>>> let SubtargetPredicate = isGFX10Plus in {
>>> +  let hasSideEffects = 1, Defs = [EXEC, SCC], Uses = [EXEC] in {
>>> +    def S_AND_SAVEEXEC_B32   : SOP1_32<"s_and_saveexec_b32">;
>>> +    def S_OR_SAVEEXEC_B32    : SOP1_32<"s_or_saveexec_b32">;
>>> +    def S_XOR_SAVEEXEC_B32   : SOP1_32<"s_xor_saveexec_b32">;
>>> +    def S_ANDN2_SAVEEXEC_B32 : SOP1_32<"s_andn2_saveexec_b32">;
>>> +    def S_ORN2_SAVEEXEC_B32  : SOP1_32<"s_orn2_saveexec_b32">;
>>> +    def S_NAND_SAVEEXEC_B32  : SOP1_32<"s_nand_saveexec_b32">;
>>> +    def S_NOR_SAVEEXEC_B32   : SOP1_32<"s_nor_saveexec_b32">;
>>> +    def S_XNOR_SAVEEXEC_B32  : SOP1_32<"s_xnor_saveexec_b32">;
>>> +    def S_ANDN1_SAVEEXEC_B32 : SOP1_32<"s_andn1_saveexec_b32">;
>>> +    def S_ORN1_SAVEEXEC_B32  : SOP1_32<"s_orn1_saveexec_b32">;
>>> +    def S_ANDN1_WREXEC_B32   : SOP1_32<"s_andn1_wrexec_b32">;
>>> +    def S_ANDN2_WREXEC_B32   : SOP1_32<"s_andn2_wrexec_b32">;
>>> +  } // End hasSideEffects = 1, Defs = [EXEC, SCC], Uses = [EXEC]
>>> +
>>> let Uses = [M0] in {
>>> def S_MOVRELSD_2_B32 : SOP1_32<"s_movrelsd_2_b32">;
>>> } // End Uses = [M0]
>>> @@ -782,6 +797,9 @@ let SubtargetPredicate = isGFX10Plus in
>>> let has_sdst = 0;
>>> }
>>>
>>> +  def S_SUBVECTOR_LOOP_BEGIN : SOPK_32_BR<"s_subvector_loop_begin">;
>>> +  def S_SUBVECTOR_LOOP_END   : SOPK_32_BR<"s_subvector_loop_end">;
>>> +
>>> def S_WAITCNT_VSCNT   : SOPK_WAITCNT<"s_waitcnt_vscnt">;
>>> def S_WAITCNT_VMCNT   : SOPK_WAITCNT<"s_waitcnt_vmcnt">;
>>> def S_WAITCNT_EXPCNT  : SOPK_WAITCNT<"s_waitcnt_expcnt">;
>>> @@ -1215,6 +1233,18 @@ defm S_ORN1_SAVEEXEC_B64    : SOP1_Real_
>>> defm S_ANDN1_WREXEC_B64     : SOP1_Real_gfx10<0x039>;
>>> defm S_ANDN2_WREXEC_B64     : SOP1_Real_gfx10<0x03a>;
>>> defm S_BITREPLICATE_B64_B32 : SOP1_Real_gfx10<0x03b>;
>>> +defm S_AND_SAVEEXEC_B32     : SOP1_Real_gfx10<0x03c>;
>>> +defm S_OR_SAVEEXEC_B32      : SOP1_Real_gfx10<0x03d>;
>>> +defm S_XOR_SAVEEXEC_B32     : SOP1_Real_gfx10<0x03e>;
>>> +defm S_ANDN2_SAVEEXEC_B32   : SOP1_Real_gfx10<0x03f>;
>>> +defm S_ORN2_SAVEEXEC_B32    : SOP1_Real_gfx10<0x040>;
>>> +defm S_NAND_SAVEEXEC_B32    : SOP1_Real_gfx10<0x041>;
>>> +defm S_NOR_SAVEEXEC_B32     : SOP1_Real_gfx10<0x042>;
>>> +defm S_XNOR_SAVEEXEC_B32    : SOP1_Real_gfx10<0x043>;
>>> +defm S_ANDN1_SAVEEXEC_B32   : SOP1_Real_gfx10<0x044>;
>>> +defm S_ORN1_SAVEEXEC_B32    : SOP1_Real_gfx10<0x045>;
>>> +defm S_ANDN1_WREXEC_B32     : SOP1_Real_gfx10<0x046>;
>>> +defm S_ANDN2_WREXEC_B32     : SOP1_Real_gfx10<0x047>;
>>> defm S_MOVRELSD_2_B32       : SOP1_Real_gfx10<0x049>;
>>>
>>> //===----------------------------------------------------------------------===//
>>> @@ -1382,6 +1412,8 @@ defm S_WAITCNT_VSCNT        : SOPK_Real3
>>> defm S_WAITCNT_VMCNT        : SOPK_Real32_gfx10<0x018>;
>>> defm S_WAITCNT_EXPCNT       : SOPK_Real32_gfx10<0x019>;
>>> defm S_WAITCNT_LGKMCNT      : SOPK_Real32_gfx10<0x01a>;
>>> +defm S_SUBVECTOR_LOOP_BEGIN : SOPK_Real32_gfx10<0x01b>;
>>> +defm S_SUBVECTOR_LOOP_END   : SOPK_Real32_gfx10<0x01c>;
>>>
>>> //===----------------------------------------------------------------------===//
>>> // SOPK - GFX6, GFX7.
>>>
>>> Modified: llvm/trunk/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
>>> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp?rev=363299&r1=363298&r2=363299&view=diff
>>> ==============================================================================
>>> --- llvm/trunk/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp (original)
>>> +++ llvm/trunk/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp Thu Jun 13 12:18:29 2019
>>> @@ -380,12 +380,17 @@ unsigned getNumSGPRBlocks(const MCSubtar
>>> return NumSGPRs / getSGPREncodingGranule(STI) - 1;
>>> }
>>>
>>> -unsigned getVGPRAllocGranule(const MCSubtargetInfo *STI) {
>>> -  return 4;
>>> +unsigned getVGPRAllocGranule(const MCSubtargetInfo *STI,
>>> +                             Optional<bool> EnableWavefrontSize32) {
>>> +  bool IsWave32 = EnableWavefrontSize32 ?
>>> +      *EnableWavefrontSize32 :
>>> +      STI->getFeatureBits().test(FeatureWavefrontSize32);
>>> +  return IsWave32 ? 8 : 4;
>>> }
>>>
>>> -unsigned getVGPREncodingGranule(const MCSubtargetInfo *STI) {
>>> -  return getVGPRAllocGranule(STI);
>>> +unsigned getVGPREncodingGranule(const MCSubtargetInfo *STI,
>>> +                                Optional<bool> EnableWavefrontSize32) {
>>> +  return getVGPRAllocGranule(STI, EnableWavefrontSize32);
>>> }
>>>
>>> unsigned getTotalNumVGPRs(const MCSubtargetInfo *STI) {
>>> @@ -416,10 +421,12 @@ unsigned getMaxNumVGPRs(const MCSubtarge
>>> return std::min(MaxNumVGPRs, AddressableNumVGPRs);
>>> }
>>>
>>> -unsigned getNumVGPRBlocks(const MCSubtargetInfo *STI, unsigned NumVGPRs) {
>>> -  NumVGPRs = alignTo(std::max(1u, NumVGPRs), getVGPREncodingGranule(STI));
>>> +unsigned getNumVGPRBlocks(const MCSubtargetInfo *STI, unsigned NumVGPRs,
>>> +                          Optional<bool> EnableWavefrontSize32) {
>>> +  NumVGPRs = alignTo(std::max(1u, NumVGPRs),
>>> +                     getVGPREncodingGranule(STI, EnableWavefrontSize32));
>>> // VGPRBlocks is actual number of VGPR blocks minus 1.
>>> -  return NumVGPRs / getVGPREncodingGranule(STI) - 1;
>>> +  return NumVGPRs / getVGPREncodingGranule(STI, EnableWavefrontSize32) - 1;
>>> }
>>>
>>> } // end namespace IsaInfo
>>> @@ -437,7 +444,6 @@ void initDefaultAMDKernelCodeT(amd_kerne
>>> Header.amd_machine_version_minor = Version.Minor;
>>> Header.amd_machine_version_stepping = Version.Stepping;
>>> Header.kernel_code_entry_byte_offset = sizeof(Header);
>>> -  // wavefront_size is specified as a power of 2: 2^6 = 64 threads.
>>> Header.wavefront_size = 6;
>>>
>>> // If the code object does not support indirect functions, then the value must
>>>
>>> Modified: llvm/trunk/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
>>> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h?rev=363299&r1=363298&r2=363299&view=diff
>>> ==============================================================================
>>> --- llvm/trunk/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h (original)
>>> +++ llvm/trunk/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h Thu Jun 13 12:18:29 2019
>>> @@ -150,10 +150,18 @@ unsigned getNumExtraSGPRs(const MCSubtar
>>> unsigned getNumSGPRBlocks(const MCSubtargetInfo *STI, unsigned NumSGPRs);
>>>
>>> /// \returns VGPR allocation granularity for given subtarget \p STI.
>>> -unsigned getVGPRAllocGranule(const MCSubtargetInfo *STI);
>>> +///
>>> +/// For subtargets which support it, \p EnableWavefrontSize32 should match
>>> +/// the ENABLE_WAVEFRONT_SIZE32 kernel descriptor field.
>>> +unsigned getVGPRAllocGranule(const MCSubtargetInfo *STI,
>>> +                             Optional<bool> EnableWavefrontSize32 = None);
>>>
>>> /// \returns VGPR encoding granularity for given subtarget \p STI.
>>> -unsigned getVGPREncodingGranule(const MCSubtargetInfo *STI);
>>> +///
>>> +/// For subtargets which support it, \p EnableWavefrontSize32 should match
>>> +/// the ENABLE_WAVEFRONT_SIZE32 kernel descriptor field.
>>> +unsigned getVGPREncodingGranule(const MCSubtargetInfo *STI,
>>> +                                Optional<bool> EnableWavefrontSize32 = None);
>>>
>>> /// \returns Total number of VGPRs for given subtarget \p STI.
>>> unsigned getTotalNumVGPRs(const MCSubtargetInfo *STI);
>>> @@ -171,7 +179,11 @@ unsigned getMaxNumVGPRs(const MCSubtarge
>>>
>>> /// \returns Number of VGPR blocks needed for given subtarget \p STI when
>>> /// \p NumVGPRs are used.
>>> -unsigned getNumVGPRBlocks(const MCSubtargetInfo *STI, unsigned NumSGPRs);
>>> +///
>>> +/// For subtargets which support it, \p EnableWavefrontSize32 should match the
>>> +/// ENABLE_WAVEFRONT_SIZE32 kernel descriptor field.
>>> +unsigned getNumVGPRBlocks(const MCSubtargetInfo *STI, unsigned NumSGPRs,
>>> +                          Optional<bool> EnableWavefrontSize32 = None);
>>>
>>> } // end namespace IsaInfo
>>>
>>>
>>> Modified: llvm/trunk/lib/Target/AMDGPU/VOP2Instructions.td
>>> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/VOP2Instructions.td?rev=363299&r1=363298&r2=363299&view=diff
>>> ==============================================================================
>>> --- llvm/trunk/lib/Target/AMDGPU/VOP2Instructions.td (original)
>>> +++ llvm/trunk/lib/Target/AMDGPU/VOP2Instructions.td Thu Jun 13 12:18:29 2019
>>> @@ -199,7 +199,12 @@ class VOP2bInstAlias <VOP2_Pseudo ps, In
>>> }
>>>
>>> multiclass VOP2bInstAliases<VOP2_Pseudo ps, VOP2_Real inst, string OpName> {
>>> +  let WaveSizePredicate = isWave32 in {
>>> +    def : VOP2bInstAlias<ps, inst, OpName, "vcc_lo">;
>>> +  }
>>> +  let WaveSizePredicate = isWave64 in {
>>> def : VOP2bInstAlias<ps, inst, OpName, "vcc">;
>>> +  }
>>> }
>>>
>>> multiclass VOP2eInst <string opName,
>>> @@ -234,7 +239,12 @@ class VOP2eInstAlias <VOP2_Pseudo ps, In
>>> }
>>>
>>> multiclass VOP2eInstAliases<VOP2_Pseudo ps, VOP2_Real inst> {
>>> +  let WaveSizePredicate = isWave32 in {
>>> +    def : VOP2eInstAlias<ps, inst, "vcc_lo">;
>>> +  }
>>> +  let WaveSizePredicate = isWave64 in {
>>> def : VOP2eInstAlias<ps, inst, "vcc">;
>>> +  }
>>> }
>>>
>>> class VOP_MADAK <ValueType vt> : VOPProfile <[vt, vt, vt, vt]> {
>>> @@ -953,6 +963,30 @@ let AssemblerPredicate = isGFX10Plus, De
>>>   let DecoderNamespace = "DPP8";
>>> }
>>>
>>> +    let WaveSizePredicate = isWave32 in {
>>> +      def _sdwa_w32_gfx10 :
>>> +        Base_VOP_SDWA10_Real<!cast<VOP2_SDWA_Pseudo>(opName#"_sdwa")>,
>>> +        VOP2_SDWA9Ae<op{5-0}, !cast<VOP2_SDWA_Pseudo>(opName#"_sdwa").Pfl> {
>>> +          VOP2_SDWA_Pseudo Ps = !cast<VOP2_SDWA_Pseudo>(opName#"_sdwa");
>>> +          let AsmString = asmName # !subst("vcc", "vcc_lo", Ps.AsmOperands);
>>> +          let isAsmParserOnly = 1;
>>> +          let DecoderNamespace = "SDWA10";
>>> +        }
>>> +      def _dpp_w32_gfx10 :
>>> +        VOP2_DPP16<op, !cast<VOP2_Pseudo>(opName#"_e32"), asmName> {
>>> +          string AsmDPP = !cast<VOP2_Pseudo>(opName#"_e32").Pfl.AsmDPP16;
>>> +          let AsmString = asmName # !subst("vcc", "vcc_lo", AsmDPP);
>>> +          let isAsmParserOnly = 1;
>>> +        }
>>> +      def _dpp8_w32_gfx10 :
>>> +        VOP2_DPP8<op, !cast<VOP2_Pseudo>(opName#"_e32"), asmName> {
>>> +          string AsmDPP8 = !cast<VOP2_Pseudo>(opName#"_e32").Pfl.AsmDPP8;
>>> +          let AsmString = asmName # !subst("vcc", "vcc_lo", AsmDPP8);
>>> +          let isAsmParserOnly = 1;
>>> +        }
>>> +    } // End WaveSizePredicate = isWave32
>>> +
>>> +    let WaveSizePredicate = isWave64 in {
>>> def _sdwa_w64_gfx10 :
>>>   Base_VOP_SDWA10_Real<!cast<VOP2_SDWA_Pseudo>(opName#"_sdwa")>,
>>>   VOP2_SDWA9Ae<op{5-0}, !cast<VOP2_SDWA_Pseudo>(opName#"_sdwa").Pfl> {
>>> @@ -973,6 +1007,7 @@ let AssemblerPredicate = isGFX10Plus, De
>>>     let AsmString = asmName # AsmDPP8;
>>>     let isAsmParserOnly = 1;
>>>   }
>>> +    } // End WaveSizePredicate = isWave64
>>> }
>>>
>>> //===----------------------------- VOP3Only -----------------------------===//
>>>
>>> Modified: llvm/trunk/lib/Target/AMDGPU/VOPCInstructions.td
>>> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/VOPCInstructions.td?rev=363299&r1=363298&r2=363299&view=diff
>>> ==============================================================================
>>> --- llvm/trunk/lib/Target/AMDGPU/VOPCInstructions.td (original)
>>> +++ llvm/trunk/lib/Target/AMDGPU/VOPCInstructions.td Thu Jun 13 12:18:29 2019
>>> @@ -165,9 +165,16 @@ class VOPCInstAlias <VOP3_Pseudo ps, Ins
>>> multiclass VOPCInstAliases <string OpName, string Arch> {
>>> def : VOPCInstAlias <!cast<VOP3_Pseudo>(OpName#"_e64"),
>>>                  !cast<Instruction>(OpName#"_e32_"#Arch)>;
>>> +  let WaveSizePredicate = isWave32 in {
>>> +    def : VOPCInstAlias <!cast<VOP3_Pseudo>(OpName#"_e64"),
>>> +                         !cast<Instruction>(OpName#"_e32_"#Arch),
>>> +                         "vcc_lo, "#!cast<VOP3_Pseudo>(OpName#"_e64").Pfl.Asm32>;
>>> +  }
>>> +  let WaveSizePredicate = isWave64 in {
>>> def : VOPCInstAlias <!cast<VOP3_Pseudo>(OpName#"_e64"),
>>>                    !cast<Instruction>(OpName#"_e32_"#Arch),
>>>                    "vcc, "#!cast<VOP3_Pseudo>(OpName#"_e64").Pfl.Asm32>;
>>> +  }
>>> }
>>>
>>> multiclass VOPCXInstAliases <string OpName, string Arch> {
>>> @@ -740,10 +747,17 @@ defm V_CMPX_CLASS_F16 : VOPCX_CLASS_F16
>>> // We need to use COPY_TO_REGCLASS to w/a the problem when ReplaceAllUsesWith()
>>> // complaints it cannot replace i1 <-> i64/i32 if node was not morphed in place.
>>> multiclass ICMP_Pattern <PatLeaf cond, Instruction inst, ValueType vt> {
>>> +  let WaveSizePredicate = isWave64 in
>>> def : GCNPat <
>>> (i64 (AMDGPUsetcc vt:$src0, vt:$src1, cond)),
>>> (i64 (COPY_TO_REGCLASS (inst $src0, $src1), SReg_64))
>>>> ;
>>> +
>>> +  let WaveSizePredicate = isWave32 in
>>> +  def : GCNPat <
>>> +    (i32 (AMDGPUsetcc vt:$src0, vt:$src1, cond)),
>>> +    (i32 (COPY_TO_REGCLASS (inst $src0, $src1), SReg_32))
>>> +  >;
>>> }
>>>
>>> defm : ICMP_Pattern <COND_EQ, V_CMP_EQ_U32_e64, i32>;
>>> @@ -780,12 +794,21 @@ defm : ICMP_Pattern <COND_SLT, V_CMP_LT_
>>> defm : ICMP_Pattern <COND_SLE, V_CMP_LE_I16_e64, i16>;
>>>
>>> multiclass FCMP_Pattern <PatLeaf cond, Instruction inst, ValueType vt> {
>>> +  let WaveSizePredicate = isWave64 in
>>> def : GCNPat <
>>> (i64 (AMDGPUsetcc (vt (VOP3Mods vt:$src0, i32:$src0_modifiers)),
>>>            (vt (VOP3Mods vt:$src1, i32:$src1_modifiers)), cond)),
>>> (i64 (COPY_TO_REGCLASS (inst $src0_modifiers, $src0, $src1_modifiers, $src1,
>>>                      DSTCLAMP.NONE), SReg_64))
>>>> ;
>>> +
>>> +  let WaveSizePredicate = isWave32 in
>>> +  def : GCNPat <
>>> +    (i32 (AMDGPUsetcc (vt (VOP3Mods vt:$src0, i32:$src0_modifiers)),
>>> +                 (vt (VOP3Mods vt:$src1, i32:$src1_modifiers)), cond)),
>>> +    (i32 (COPY_TO_REGCLASS (inst $src0_modifiers, $src0, $src1_modifiers, $src1,
>>> +                           DSTCLAMP.NONE), SReg_32))
>>> +  >;
>>> }
>>>
>>> defm : FCMP_Pattern <COND_OEQ, V_CMP_EQ_F32_e64, f32>;
>>>
>>>
>>> _______________________________________________
>>> llvm-commits mailing list
>>> llvm-commits at lists.llvm.org
>>> https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-commits
>