[llvm] r292688 - [AMDGPU] Fix some Clang-tidy modernize and Include What You Use warnings; other minor fixes (NFC).

Fri Jan 20 17:33:34 PST 2017

Breaks for me on a linux non-sanitizer build as well.

On Fri, Jan 20, 2017 at 5:27 PM, Mike Aizatsky via llvm-commits <
llvm-commits at lists.llvm.org> wrote:

> Eugene,
>
> Looks like this change breaks sanitizer-windows bot:
> http://lab.llvm.org:8011/builders/sanitizer-windows/builds/4908
>
> :\PROGRA~2\MICROS~1.0\VC\bin\AMD64_~2\cl.exe   /nologo /TP
> -DGTEST_HAS_RTTI=0 -DUNICODE -D_CRT_NONSTDC_NO_DEPRECATE
> -D_CRT_NONSTDC_NO_WARNINGS -D_CRT_SECURE_NO_DEPRECATE
> -D_CRT_SECURE_NO_WARNINGS -D_HAS_EXCEPTIONS=0 -D_SCL_SECURE_NO_DEPRECATE
> -D_SCL_SECURE_NO_WARNINGS -D_UNICODE -D__STDC_CONSTANT_MACROS
> -D__STDC_FORMAT_MACROS -D__STDC_LIMIT_MACROS -Ilib\Target\AMDGPU
> -IC:\b\slave\sanitizer-windows\llvm\lib\Target\AMDGPU -Iinclude
> -IC:\b\slave\sanitizer-windows\llvm\include /DWIN32 /D_WINDOWS   /W4
> -wd4141 -wd4146 -wd4180 -wd4244 -wd4258 -wd4267 -wd4291 -wd4345 -wd4351
> -wd4355 -wd4456 -wd4457 -wd4458 -wd4459 -wd4503 -wd4624 -wd4722 -wd4800
> -wd4100 -wd4127 -wd4512 -wd4505 -wd4610 -wd4510 -wd4702 -wd4245 -wd4706
> -wd4310 -wd4701 -wd4703 -wd4389 -wd4611 -wd4805 -wd4204 -wd4577 -wd4091
> -wd4592 -wd4319 -wd4324 -w14062 -we4238 /Zc:inline /Zc:strictStrings /Oi
> /Zc:rvalueCast /MD /O2 /Ob2   -UNDEBUG  /EHs-c- /GR- /showIncludes
> /Folib\Target\AMDGPU\CMakeFiles\LLVMAMDGPUCodeGen.
> dir\AMDILCFGStructurizer.cpp.obj /Fdlib\Target\AMDGPU\
> CMakeFiles\LLVMAMDGPUCodeGen.dir\ /FS -c C:\b\slave\sanitizer-windows\
> llvm\lib\Target\AMDGPU\AMDILCFGStructurizer.cpp
> C:\b\slave\sanitizer-windows\llvm\lib\Target\AMDGPU\AMDILCFGStructurizer.cpp(689):
> error C2027: use of undefined type 'llvm::MachineJumpTableInfo'
> C:\b\slave\sanitizer-windows\llvm\include\llvm/Target/TargetLowering.h(74):
> note: see declaration of 'llvm::MachineJumpTableInfo'
> C:\b\slave\sanitizer-windows\llvm\lib\Target\AMDGPU\AMDILCFGStructurizer.cpp(689):
> error C2227: left of '->isEmpty' must point to class/struct/union/generic
> type
> 3271149.631 [9/14/3] Building CXX object lib\Target\AMDGPU\CMakeFiles\
> LLVMAMDGPUCodeGen.dir\SIInsertSkips.cpp.obj
>
>
> On Fri, Jan 20, 2017 at 5:04 PM Eugene Zelenko via llvm-commits <
> llvm-commits at lists.llvm.org> wrote:
>
>> Author: eugenezelenko
>> Date: Fri Jan 20 18:53:49 2017
>> New Revision: 292688
>>
>> URL: http://llvm.org/viewvc/llvm-project?rev=292688&view=rev
>> Log:
>> [AMDGPU] Fix some Clang-tidy modernize and Include What You Use warnings;
>> other minor fixes (NFC).
>>
>> Modified:
>>     llvm/trunk/lib/Target/AMDGPU/AMDILCFGStructurizer.cpp
>>     llvm/trunk/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
>>     llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp
>>     llvm/trunk/lib/Target/AMDGPU/SIInsertSkips.cpp
>>     llvm/trunk/lib/Target/AMDGPU/SIInsertWaits.cpp
>>     llvm/trunk/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp
>>     llvm/trunk/lib/Target/AMDGPU/SIMachineFunctionInfo.h
>>     llvm/trunk/lib/Target/AMDGPU/SIMachineScheduler.h
>>
>> Modified: llvm/trunk/lib/Target/AMDGPU/AMDILCFGStructurizer.cpp
>> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/
>> AMDGPU/AMDILCFGStructurizer.cpp?rev=292688&r1=292687&r2=292688&view=diff
>> ============================================================
>> ==================
>> --- llvm/trunk/lib/Target/AMDGPU/AMDILCFGStructurizer.cpp (original)
>> +++ llvm/trunk/lib/Target/AMDGPU/AMDILCFGStructurizer.cpp Fri Jan 20
>> 18:53:49 2017
>> @@ -9,27 +9,39 @@
>>  //==-------------------------------------------------------
>> ----------------===//
>>
>>  #include "AMDGPU.h"
>> -#include "AMDGPUInstrInfo.h"
>>  #include "AMDGPUSubtarget.h"
>>  #include "R600InstrInfo.h"
>> +#include "R600RegisterInfo.h"
>>  #include "llvm/ADT/DepthFirstIterator.h"
>>  #include "llvm/ADT/SCCIterator.h"
>> +#include "llvm/ADT/SmallPtrSet.h"
>>  #include "llvm/ADT/SmallVector.h"
>>  #include "llvm/ADT/Statistic.h"
>> +#include "llvm/ADT/StringRef.h"
>> +#include "llvm/CodeGen/MachineBasicBlock.h"
>>  #include "llvm/CodeGen/MachineDominators.h"
>>  #include "llvm/CodeGen/MachineFunction.h"
>>  #include "llvm/CodeGen/MachineFunctionPass.h"
>> +#include "llvm/CodeGen/MachineInstr.h"
>>  #include "llvm/CodeGen/MachineInstrBuilder.h"
>> -#include "llvm/CodeGen/MachineJumpTableInfo.h"
>>  #include "llvm/CodeGen/MachineLoopInfo.h"
>> +#include "llvm/CodeGen/MachineOperand.h"
>>  #include "llvm/CodeGen/MachinePostDominators.h"
>>  #include "llvm/CodeGen/MachineRegisterInfo.h"
>> -#include "llvm/IR/Dominators.h"
>> +#include "llvm/CodeGen/MachineValueType.h"
>> +#include "llvm/IR/DebugLoc.h"
>> +#include "llvm/IR/LLVMContext.h"
>> +#include "llvm/Pass.h"
>>  #include "llvm/Support/Debug.h"
>> +#include "llvm/Support/ErrorHandling.h"
>>  #include "llvm/Support/raw_ostream.h"
>> -#include "llvm/Target/TargetInstrInfo.h"
>> -#include "llvm/Target/TargetMachine.h"
>> +#include <cassert>
>> +#include <cstddef>
>>  #include <deque>
>> +#include <iterator>
>> +#include <map>
>> +#include <utility>
>> +#include <vector>
>>
>>  using namespace llvm;
>>
>> @@ -53,15 +65,19 @@ STATISTIC(numClonedBlock,           "CFG
>>  STATISTIC(numClonedInstr,           "CFGStructurizer cloned
>> instructions");
>>
>>  namespace llvm {
>> +
>>    void initializeAMDGPUCFGStructurizerPass(PassRegistry&);
>> -}
>> +
>> +} // end namespace llvm
>> +
>> +namespace {
>>
>>  //===------------------------------------------------------
>> ----------------===//
>>  //
>>  // Miscellaneous utility for CFGStructurizer.
>>  //
>>  //===------------------------------------------------------
>> ----------------===//
>> -namespace {
>> +
>>  #define SHOWNEWINSTR(i) \
>>    DEBUG(dbgs() << "New instr: " << *i << "\n");
>>
>> @@ -92,25 +108,19 @@ void ReverseVector(SmallVectorImpl<NodeT
>>    }
>>  }
>>
>> -} // end anonymous namespace
>> -
>>  //===------------------------------------------------------
>> ----------------===//
>>  //
>>  // supporting data structure for CFGStructurizer
>>  //
>>  //===------------------------------------------------------
>> ----------------===//
>>
>> -
>> -namespace {
>> -
>>  class BlockInformation {
>>  public:
>> -  bool IsRetired;
>> -  int  SccNum;
>> -  BlockInformation() : IsRetired(false), SccNum(INVALIDSCCNUM) {}
>> -};
>> +  bool IsRetired = false;
>> +  int SccNum = INVALIDSCCNUM;
>>
>> -} // end anonymous namespace
>> +  BlockInformation() = default;
>> +};
>>
>>  //===------------------------------------------------------
>> ----------------===//
>>  //
>> @@ -118,7 +128,6 @@ public:
>>  //
>>  //===------------------------------------------------------
>> ----------------===//
>>
>> -namespace {
>>  class AMDGPUCFGStructurizer : public MachineFunctionPass {
>>  public:
>>    typedef SmallVector<MachineBasicBlock *, 32> MBBVector;
>> @@ -133,8 +142,7 @@ public:
>>
>>    static char ID;
>>
>> -  AMDGPUCFGStructurizer() :
>> -      MachineFunctionPass(ID), TII(nullptr), TRI(nullptr) {
>> +  AMDGPUCFGStructurizer() : MachineFunctionPass(ID) {
>>      initializeAMDGPUCFGStructurizerPass(*PassRegistry::
>> getPassRegistry());
>>    }
>>
>> @@ -167,7 +175,7 @@ public:
>>      MLI = &getAnalysis<MachineLoopInfo>();
>>      DEBUG(dbgs() << "LoopInfo:\n"; PrintLoopinfo(*MLI););
>>      MDT = &getAnalysis<MachineDominatorTree>();
>> -    DEBUG(MDT->print(dbgs(), (const llvm::Module*)nullptr););
>> +    DEBUG(MDT->print(dbgs(), (const Module*)nullptr););
>>      PDT = &getAnalysis<MachinePostDominatorTree>();
>>      DEBUG(PDT->print(dbgs()););
>>      prepare();
>> @@ -180,8 +188,8 @@ protected:
>>    MachineDominatorTree *MDT;
>>    MachinePostDominatorTree *PDT;
>>    MachineLoopInfo *MLI;
>> -  const R600InstrInfo *TII;
>> -  const R600RegisterInfo *TRI;
>> +  const R600InstrInfo *TII = nullptr;
>> +  const R600RegisterInfo *TRI = nullptr;
>>
>>    // PRINT FUNCTIONS
>>    /// Print the ordered Blocks.
>> @@ -198,6 +206,7 @@ protected:
>>        }
>>      }
>>    }
>> +
>>    static void PrintLoopinfo(const MachineLoopInfo &LoopInfo) {
>>      for (MachineLoop::iterator iter = LoopInfo.begin(),
>>           iterEnd = LoopInfo.end(); iter != iterEnd; ++iter) {
>> @@ -263,7 +272,6 @@ protected:
>>        MachineBasicBlock *OldMBB, MachineBasicBlock *NewBlk);
>>    static void wrapup(MachineBasicBlock *MBB);
>>
>> -
>>    int patternMatch(MachineBasicBlock *MBB);
>>    int patternMatchGroup(MachineBasicBlock *MBB);
>>    int serialPatternMatch(MachineBasicBlock *MBB);
>> @@ -328,7 +336,6 @@ protected:
>>    void recordSccnum(MachineBasicBlock *MBB, int SCCNum);
>>    void retireBlock(MachineBasicBlock *MBB);
>>
>> -
>>  private:
>>    MBBInfoMap BlockInfoMap;
>>    LoopLandInfoMap LLInfoMap;
>> @@ -337,6 +344,10 @@ private:
>>    SmallVector<MachineBasicBlock *, DEFAULT_VEC_SLOTS> OrderedBlks;
>>  };
>>
>> +char AMDGPUCFGStructurizer::ID = 0;
>> +
>> +} // end anonymous namespace
>> +
>>  int AMDGPUCFGStructurizer::getSCCNum(MachineBasicBlock *MBB) const {
>>    MBBInfoMap::const_iterator It = BlockInfoMap.find(MBB);
>>    if (It == BlockInfoMap.end())
>> @@ -379,6 +390,7 @@ bool AMDGPUCFGStructurizer::isActiveLoop
>>    }
>>    return false;
>>  }
>> +
>>  AMDGPUCFGStructurizer::PathToKind AMDGPUCFGStructurizer::singlePathTo(
>>      MachineBasicBlock *SrcMBB, MachineBasicBlock *DstMBB,
>>      bool AllowSideEntry) const {
>> @@ -697,10 +709,8 @@ void AMDGPUCFGStructurizer::wrapup(Machi
>>     // (jumpTableInfo->isEmpty() == false) { need to clean the jump
>> table, but
>>     // there isn't such an interface yet.  alternatively, replace all the
>> other
>>     // blocks in the jump table with the entryBlk //}
>> -
>>  }
>>
>> -
>>  bool AMDGPUCFGStructurizer::prepare() {
>>    bool Changed = false;
>>
>> @@ -748,7 +758,6 @@ bool AMDGPUCFGStructurizer::prepare() {
>>  }
>>
>>  bool AMDGPUCFGStructurizer::run() {
>> -
>>    //Assume reducible CFG...
>>    DEBUG(dbgs() << "AMDGPUCFGStructurizer::run\n");
>>
>> @@ -886,8 +895,6 @@ bool AMDGPUCFGStructurizer::run() {
>>    return true;
>>  }
>>
>> -
>> -
>>  void AMDGPUCFGStructurizer::orderBlocks(MachineFunction *MF) {
>>    int SccNum = 0;
>>    MachineBasicBlock *MBB;
>> @@ -941,7 +948,6 @@ int AMDGPUCFGStructurizer::patternMatchG
>>    return NumMatch;
>>  }
>>
>> -
>>  int AMDGPUCFGStructurizer::serialPatternMatch(MachineBasicBlock *MBB) {
>>    if (MBB->succ_size() != 1)
>>      return 0;
>> @@ -1039,7 +1045,7 @@ int AMDGPUCFGStructurizer::loopendPatter
>>      for (MachineLoop *ML : depth_first(It))
>>        NestedLoops.push_front(ML);
>>
>> -  if (NestedLoops.size() == 0)
>> +  if (NestedLoops.empty())
>>      return 0;
>>
>>    // Process nested loop outside->inside (we did push_front),
>> @@ -1074,7 +1080,7 @@ int AMDGPUCFGStructurizer::mergeLoop(Mac
>>    MachineBasicBlock *ExitBlk = *ExitBlks.begin();
>>    assert(ExitBlk && "Loop has several exit block");
>>    MBBVector LatchBlks;
>> -  typedef GraphTraits<Inverse<MachineBasicBlock*> > InvMBBTraits;
>> +  typedef GraphTraits<Inverse<MachineBasicBlock*>> InvMBBTraits;
>>    InvMBBTraits::ChildIteratorType PI = InvMBBTraits::child_begin(
>> LoopHeader),
>>        PE = InvMBBTraits::child_end(LoopHeader);
>>    for (; PI != PE; PI++) {
>> @@ -1217,7 +1223,7 @@ void AMDGPUCFGStructurizer::showImproveS
>>      }
>>    }
>>
>> -    dbgs() << "\n";
>> +  dbgs() << "\n";
>>  }
>>
>>  int AMDGPUCFGStructurizer::improveSimpleJumpintoIf(MachineBasicBlock
>> *HeadMBB,
>> @@ -1478,7 +1484,6 @@ void AMDGPUCFGStructurizer::mergeIfthene
>>
>>    if (LandMBB && TrueMBB && FalseMBB)
>>      MBB->addSuccessor(LandMBB);
>> -
>>  }
>>
>>  void AMDGPUCFGStructurizer::mergeLooplandBlock(MachineBasicBlock
>> *DstBlk,
>> @@ -1491,7 +1496,6 @@ void AMDGPUCFGStructurizer::mergeLooplan
>>    DstBlk->replaceSuccessor(DstBlk, LandMBB);
>>  }
>>
>> -
>>  void AMDGPUCFGStructurizer::mergeLoopbreakBlock(MachineBasicBlock
>> *ExitingMBB,
>>      MachineBasicBlock *LandMBB) {
>>    DEBUG(dbgs() << "loopbreakPattern exiting = BB" <<
>> ExitingMBB->getNumber()
>> @@ -1727,11 +1731,6 @@ void AMDGPUCFGStructurizer::retireBlock(
>>           && "can't retire block yet");
>>  }
>>
>> -char AMDGPUCFGStructurizer::ID = 0;
>> -
>> -} // end anonymous namespace
>> -
>> -
>>  INITIALIZE_PASS_BEGIN(AMDGPUCFGStructurizer, "amdgpustructurizer",
>>                        "AMDGPU CFG Structurizer", false, false)
>>  INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
>>
>> Modified: llvm/trunk/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
>> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/
>> AMDGPU/AsmParser/AMDGPUAsmParser.cpp?rev=292688&r1=292687&r2=292688&
>> view=diff
>> ============================================================
>> ==================
>> --- llvm/trunk/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp (original)
>> +++ llvm/trunk/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp Fri Jan
>> 20 18:53:49 2017
>> @@ -16,6 +16,7 @@
>>  #include "Utils/AMDGPUAsmUtils.h"
>>  #include "llvm/ADT/APFloat.h"
>>  #include "llvm/ADT/APInt.h"
>> +#include "llvm/ADT/ArrayRef.h"
>>  #include "llvm/ADT/SmallBitVector.h"
>>  #include "llvm/ADT/SmallString.h"
>>  #include "llvm/ADT/STLExtras.h"
>> @@ -39,15 +40,12 @@
>>  #include "llvm/MC/MCSubtargetInfo.h"
>>  #include "llvm/MC/MCSymbol.h"
>>  #include "llvm/Support/Casting.h"
>> -#include "llvm/Support/Debug.h"
>>  #include "llvm/Support/ELF.h"
>>  #include "llvm/Support/ErrorHandling.h"
>>  #include "llvm/Support/MathExtras.h"
>>  #include "llvm/Support/raw_ostream.h"
>>  #include "llvm/Support/SMLoc.h"
>>  #include "llvm/Support/TargetRegistry.h"
>> -#include "llvm/Support/raw_ostream.h"
>> -#include "llvm/Support/MathExtras.h"
>>  #include <algorithm>
>>  #include <cassert>
>>  #include <cstdint>
>> @@ -56,7 +54,6 @@
>>  #include <map>
>>  #include <memory>
>>  #include <string>
>> -#include <vector>
>>
>>  using namespace llvm;
>>  using namespace llvm::AMDGPU;
>> @@ -695,9 +692,9 @@ raw_ostream &operator <<(raw_ostream &OS
>>  // Kernel scope begins at .amdgpu_hsa_kernel directive, ends at next
>>  // .amdgpu_hsa_kernel or at EOF.
>>  class KernelScopeInfo {
>> -  int SgprIndexUnusedMin;
>> -  int VgprIndexUnusedMin;
>> -  MCContext *Ctx;
>> +  int SgprIndexUnusedMin = -1;
>> +  int VgprIndexUnusedMin = -1;
>> +  MCContext *Ctx = nullptr;
>>
>>    void usesSgprAt(int i) {
>>      if (i >= SgprIndexUnusedMin) {
>> @@ -708,6 +705,7 @@ class KernelScopeInfo {
>>        }
>>      }
>>    }
>> +
>>    void usesVgprAt(int i) {
>>      if (i >= VgprIndexUnusedMin) {
>>        VgprIndexUnusedMin = ++i;
>> @@ -717,14 +715,16 @@ class KernelScopeInfo {
>>        }
>>      }
>>    }
>> +
>>  public:
>> -  KernelScopeInfo() : SgprIndexUnusedMin(-1), VgprIndexUnusedMin(-1),
>> Ctx(nullptr)
>> -  {}
>> +  KernelScopeInfo() = default;
>> +
>>    void initialize(MCContext &Context) {
>>      Ctx = &Context;
>>      usesSgprAt(SgprIndexUnusedMin = -1);
>>      usesVgprAt(VgprIndexUnusedMin = -1);
>>    }
>> +
>>    void usesRegister(RegisterKind RegKind, unsigned DwordRegIndex,
>> unsigned RegWidth) {
>>      switch (RegKind) {
>>        case IS_SGPR: usesSgprAt(DwordRegIndex + RegWidth - 1); break;
>> @@ -738,9 +738,9 @@ class AMDGPUAsmParser : public MCTargetA
>>    const MCInstrInfo &MII;
>>    MCAsmParser &Parser;
>>
>> -  unsigned ForcedEncodingSize;
>> -  bool ForcedDPP;
>> -  bool ForcedSDWA;
>> +  unsigned ForcedEncodingSize = 0;
>> +  bool ForcedDPP = false;
>> +  bool ForcedSDWA = false;
>>    KernelScopeInfo KernelScope;
>>
>>    /// @name Auto-generated Match Functions
>> @@ -779,10 +779,7 @@ public:
>>    AMDGPUAsmParser(const MCSubtargetInfo &STI, MCAsmParser &_Parser,
>>                 const MCInstrInfo &MII,
>>                 const MCTargetOptions &Options)
>> -      : MCTargetAsmParser(Options, STI), MII(MII), Parser(_Parser),
>> -        ForcedEncodingSize(0),
>> -        ForcedDPP(false),
>> -        ForcedSDWA(false) {
>> +      : MCTargetAsmParser(Options, STI), MII(MII), Parser(_Parser) {
>>      MCAsmParserExtension::Initialize(Parser);
>>
>>      if (getSTI().getFeatureBits().none()) {
>> @@ -1043,7 +1040,6 @@ bool AMDGPUOperand::isInlinableImm(MVT t
>>        AsmParser->hasInv2PiInlineImm());
>>    }
>>
>> -
>>    // We got int literal token.
>>    if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
>>      return AMDGPU::isInlinableLiteral64(Imm.Val,
>> @@ -1132,7 +1128,7 @@ void AMDGPUOperand::addLiteralImmOperand
>>      APInt Literal(64, Val);
>>
>>      switch (OpSize) {
>> -    case 8: {
>> +    case 8:
>>        if (AMDGPU::isInlinableLiteral64(Literal.getZExtValue(),
>>                                         AsmParser->hasInv2PiInlineImm()))
>> {
>>          Inst.addOperand(MCOperand::createImm(Literal.getZExtValue()));
>> @@ -1156,7 +1152,7 @@ void AMDGPUOperand::addLiteralImmOperand
>>        // unclear how we should encode them. This case should be checked
>> earlier
>>        // in predicate methods (isLiteralImm())
>>        llvm_unreachable("fp literal in 64-bit integer instruction.");
>> -    }
>> +
>>      case 4:
>>      case 2: {
>>        bool lost;
>> @@ -1180,7 +1176,7 @@ void AMDGPUOperand::addLiteralImmOperand
>>    // Only sign extend inline immediates.
>>    // FIXME: No errors on truncation
>>    switch (OpSize) {
>> -  case 4: {
>> +  case 4:
>>      if (isInt<32>(Val) &&
>>          AMDGPU::isInlinableLiteral32(static_cast<int32_t>(Val),
>>                                       AsmParser->hasInv2PiInlineImm())) {
>> @@ -1190,8 +1186,8 @@ void AMDGPUOperand::addLiteralImmOperand
>>
>>      Inst.addOperand(MCOperand::createImm(Val & 0xffffffff));
>>      return;
>> -  }
>> -  case 8: {
>> +
>> +  case 8:
>>      if (AMDGPU::isInlinableLiteral64(Val,
>>                                       AsmParser->hasInv2PiInlineImm())) {
>>        Inst.addOperand(MCOperand::createImm(Val));
>> @@ -1200,8 +1196,8 @@ void AMDGPUOperand::addLiteralImmOperand
>>
>>      Inst.addOperand(MCOperand::createImm(Lo_32(Val)));
>>      return;
>> -  }
>> -  case 2: {
>> +
>> +  case 2:
>>      if (isInt<16>(Val) &&
>>          AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val),
>>                                       AsmParser->hasInv2PiInlineImm())) {
>> @@ -1211,7 +1207,7 @@ void AMDGPUOperand::addLiteralImmOperand
>>
>>      Inst.addOperand(MCOperand::createImm(Val & 0xffff));
>>      return;
>> -  }
>> +
>>    default:
>>      llvm_unreachable("invalid operand size");
>>    }
>> @@ -1295,7 +1291,8 @@ static unsigned getSpecialRegForName(Str
>>      .Default(0);
>>  }
>>
>> -bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc,
>> SMLoc &EndLoc) {
>> +bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc,
>> +                                    SMLoc &EndLoc) {
>>    auto R = parseRegister();
>>    if (!R) return true;
>>    assert(R->isReg());
>> @@ -1305,20 +1302,43 @@ bool AMDGPUAsmParser::ParseRegister(unsi
>>    return false;
>>  }
>>
>> -bool AMDGPUAsmParser::AddNextRegisterToList(unsigned& Reg, unsigned&
>> RegWidth, RegisterKind RegKind, unsigned Reg1, unsigned RegNum)
>> -{
>> +bool AMDGPUAsmParser::AddNextRegisterToList(unsigned &Reg, unsigned
>> &RegWidth,
>> +                                            RegisterKind RegKind,
>> unsigned Reg1,
>> +                                            unsigned RegNum) {
>>    switch (RegKind) {
>>    case IS_SPECIAL:
>> -    if (Reg == AMDGPU::EXEC_LO && Reg1 == AMDGPU::EXEC_HI) { Reg =
>> AMDGPU::EXEC; RegWidth = 2; return true; }
>> -    if (Reg == AMDGPU::FLAT_SCR_LO && Reg1 == AMDGPU::FLAT_SCR_HI) { Reg
>> = AMDGPU::FLAT_SCR; RegWidth = 2; return true; }
>> -    if (Reg == AMDGPU::VCC_LO && Reg1 == AMDGPU::VCC_HI) { Reg =
>> AMDGPU::VCC; RegWidth = 2; return true; }
>> -    if (Reg == AMDGPU::TBA_LO && Reg1 == AMDGPU::TBA_HI) { Reg =
>> AMDGPU::TBA; RegWidth = 2; return true; }
>> -    if (Reg == AMDGPU::TMA_LO && Reg1 == AMDGPU::TMA_HI) { Reg =
>> AMDGPU::TMA; RegWidth = 2; return true; }
>> +    if (Reg == AMDGPU::EXEC_LO && Reg1 == AMDGPU::EXEC_HI) {
>> +      Reg = AMDGPU::EXEC;
>> +      RegWidth = 2;
>> +      return true;
>> +    }
>> +    if (Reg == AMDGPU::FLAT_SCR_LO && Reg1 == AMDGPU::FLAT_SCR_HI) {
>> +      Reg = AMDGPU::FLAT_SCR;
>> +      RegWidth = 2;
>> +      return true;
>> +    }
>> +    if (Reg == AMDGPU::VCC_LO && Reg1 == AMDGPU::VCC_HI) {
>> +      Reg = AMDGPU::VCC;
>> +      RegWidth = 2;
>> +      return true;
>> +    }
>> +    if (Reg == AMDGPU::TBA_LO && Reg1 == AMDGPU::TBA_HI) {
>> +      Reg = AMDGPU::TBA;
>> +      RegWidth = 2;
>> +      return true;
>> +    }
>> +    if (Reg == AMDGPU::TMA_LO && Reg1 == AMDGPU::TMA_HI) {
>> +      Reg = AMDGPU::TMA;
>> +      RegWidth = 2;
>> +      return true;
>> +    }
>>      return false;
>>    case IS_VGPR:
>>    case IS_SGPR:
>>    case IS_TTMP:
>> -    if (Reg1 != Reg + RegWidth) { return false; }
>> +    if (Reg1 != Reg + RegWidth) {
>> +      return false;
>> +    }
>>      RegWidth++;
>>      return true;
>>    default:
>> @@ -1326,8 +1346,9 @@ bool AMDGPUAsmParser::AddNextRegisterToL
>>    }
>>  }
>>
>> -bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind& RegKind,
>> unsigned& Reg, unsigned& RegNum, unsigned& RegWidth, unsigned
>> *DwordRegIndex)
>> -{
>> +bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind,
>> unsigned &Reg,
>> +                                          unsigned &RegNum, unsigned
>> &RegWidth,
>> +                                          unsigned *DwordRegIndex) {
>>    if (DwordRegIndex) { *DwordRegIndex = 0; }
>>    const MCRegisterInfo *TRI = getContext().getRegisterInfo();
>>    if (getLexer().is(AsmToken::Identifier)) {
>> @@ -1528,7 +1549,8 @@ AMDGPUAsmParser::parseRegOrImm(OperandVe
>>  }
>>
>>  OperandMatchResultTy
>> -AMDGPUAsmParser::parseRegOrImmWithFPInputMods(OperandVector &Operands,
>> bool AllowImm) {
>> +AMDGPUAsmParser::parseRegOrImmWithFPInputMods(OperandVector &Operands,
>> +                                              bool AllowImm) {
>>    // XXX: During parsing we can't determine if minus sign means
>>    // negate-modifier or negative immediate value.
>>    // By default we suppose it is modifier.
>> @@ -1539,7 +1561,8 @@ AMDGPUAsmParser::parseRegOrImmWithFPInpu
>>      Negate = true;
>>    }
>>
>> -  if (getLexer().getKind() == AsmToken::Identifier &&
>> Parser.getTok().getString() == "abs") {
>> +  if (getLexer().getKind() == AsmToken::Identifier &&
>> +      Parser.getTok().getString() == "abs") {
>>      Parser.Lex();
>>      Abs2 = true;
>>      if (getLexer().isNot(AsmToken::LParen)) {
>> @@ -1597,10 +1620,12 @@ AMDGPUAsmParser::parseRegOrImmWithFPInpu
>>  }
>>
>>  OperandMatchResultTy
>> -AMDGPUAsmParser::parseRegOrImmWithIntInputMods(OperandVector &Operands,
>> bool AllowImm) {
>> +AMDGPUAsmParser::parseRegOrImmWithIntInputMods(OperandVector &Operands,
>> +                                               bool AllowImm) {
>>    bool Sext = false;
>>
>> -  if (getLexer().getKind() == AsmToken::Identifier &&
>> Parser.getTok().getString() == "sext") {
>> +  if (getLexer().getKind() == AsmToken::Identifier &&
>> +      Parser.getTok().getString() == "sext") {
>>      Parser.Lex();
>>      Sext = true;
>>      if (getLexer().isNot(AsmToken::LParen)) {
>> @@ -1667,7 +1692,6 @@ OperandMatchResultTy AMDGPUAsmParser::pa
>>  }
>>
>>  unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) {
>> -
>>    uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
>>
>>    if ((getForcedEncodingSize() == 32 && (TSFlags & SIInstrFlags::VOP3))
>> ||
>> @@ -1799,7 +1823,6 @@ bool AMDGPUAsmParser::ParseAsAbsoluteExp
>>    return false;
>>  }
>>
>> -
>>  bool AMDGPUAsmParser::ParseDirectiveMajorMinor(uint32_t &Major,
>>                                                 uint32_t &Minor) {
>>    if (ParseAsAbsoluteExpression(Major))
>> @@ -1816,7 +1839,6 @@ bool AMDGPUAsmParser::ParseDirectiveMajo
>>  }
>>
>>  bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectVersion() {
>> -
>>    uint32_t Major;
>>    uint32_t Minor;
>>
>> @@ -2086,7 +2108,6 @@ bool AMDGPUAsmParser::subtargetHasRegist
>>
>>  OperandMatchResultTy
>>  AMDGPUAsmParser::parseOperand(OperandVector &Operands, StringRef
>> Mnemonic) {
>> -
>>    // Try to parse with a custom parser
>>    OperandMatchResultTy ResTy = MatchOperandParserImpl(Operands,
>> Mnemonic);
>>
>>
>> Modified: llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp
>> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/
>> AMDGPU/SIISelLowering.cpp?rev=292688&r1=292687&r2=292688&view=diff
>> ============================================================
>> ==================
>> --- llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp (original)
>> +++ llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp Fri Jan 20 18:53:49
>> 2017
>> @@ -15,7 +15,6 @@
>>  #ifdef _MSC_VER
>>  // Provide M_PI.
>>  #define _USE_MATH_DEFINES
>> -#include <cmath>
>>  #endif
>>
>>  #include "AMDGPU.h"
>> @@ -26,15 +25,59 @@
>>  #include "SIInstrInfo.h"
>>  #include "SIMachineFunctionInfo.h"
>>  #include "SIRegisterInfo.h"
>> +#include "Utils/AMDGPUBaseInfo.h"
>> +#include "llvm/ADT/APFloat.h"
>> +#include "llvm/ADT/APInt.h"
>> +#include "llvm/ADT/ArrayRef.h"
>>  #include "llvm/ADT/BitVector.h"
>> +#include "llvm/ADT/SmallVector.h"
>> +#include "llvm/ADT/StringRef.h"
>>  #include "llvm/ADT/StringSwitch.h"
>> +#include "llvm/ADT/Twine.h"
>> +#include "llvm/CodeGen/Analysis.h"
>>  #include "llvm/CodeGen/CallingConvLower.h"
>> +#include "llvm/CodeGen/DAGCombine.h"
>> +#include "llvm/CodeGen/ISDOpcodes.h"
>> +#include "llvm/CodeGen/MachineBasicBlock.h"
>> +#include "llvm/CodeGen/MachineFrameInfo.h"
>> +#include "llvm/CodeGen/MachineFunction.h"
>> +#include "llvm/CodeGen/MachineInstr.h"
>>  #include "llvm/CodeGen/MachineInstrBuilder.h"
>> +#include "llvm/CodeGen/MachineMemOperand.h"
>> +#include "llvm/CodeGen/MachineOperand.h"
>>  #include "llvm/CodeGen/MachineRegisterInfo.h"
>> +#include "llvm/CodeGen/MachineValueType.h"
>>  #include "llvm/CodeGen/SelectionDAG.h"
>> -#include "llvm/CodeGen/Analysis.h"
>> +#include "llvm/CodeGen/SelectionDAGNodes.h"
>> +#include "llvm/CodeGen/ValueTypes.h"
>> +#include "llvm/IR/Constants.h"
>> +#include "llvm/IR/DataLayout.h"
>> +#include "llvm/IR/DebugLoc.h"
>> +#include "llvm/IR/DerivedTypes.h"
>>  #include "llvm/IR/DiagnosticInfo.h"
>>  #include "llvm/IR/Function.h"
>> +#include "llvm/IR/GlobalValue.h"
>> +#include "llvm/IR/InstrTypes.h"
>> +#include "llvm/IR/Instruction.h"
>> +#include "llvm/IR/Instructions.h"
>> +#include "llvm/IR/Type.h"
>> +#include "llvm/Support/Casting.h"
>> +#include "llvm/Support/CodeGen.h"
>> +#include "llvm/Support/CommandLine.h"
>> +#include "llvm/Support/Compiler.h"
>> +#include "llvm/Support/ErrorHandling.h"
>> +#include "llvm/Support/MathExtras.h"
>> +#include "llvm/Target/TargetCallingConv.h"
>> +#include "llvm/Target/TargetMachine.h"
>> +#include "llvm/Target/TargetOptions.h"
>> +#include "llvm/Target/TargetRegisterInfo.h"
>> +#include <cassert>
>> +#include <cmath>
>> +#include <cstdint>
>> +#include <iterator>
>> +#include <tuple>
>> +#include <utility>
>> +#include <vector>
>>
>>  using namespace llvm;
>>
>> @@ -43,7 +86,6 @@ static cl::opt<bool> EnableVGPRIndexMode
>>    cl::desc("Use GPR indexing mode instead of movrel for vector
>> indexing"),
>>    cl::init(false));
>>
>> -
>>  static unsigned findFirstFreeSGPR(CCState &CCInfo) {
>>    unsigned NumSGPRs = AMDGPU::SGPR_32RegClass.getNumRegs();
>>    for (unsigned Reg = 0; Reg < NumSGPRs; ++Reg) {
>> @@ -110,7 +152,6 @@ SITargetLowering::SITargetLowering(const
>>    setTruncStoreAction(MVT::v16i32, MVT::v16i8, Expand);
>>    setTruncStoreAction(MVT::v32i32, MVT::v32i8, Expand);
>>
>> -
>>    setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
>>    setOperationAction(ISD::GlobalAddress, MVT::i64, Custom);
>>    setOperationAction(ISD::ConstantPool, MVT::v2i64, Expand);
>> @@ -441,7 +482,7 @@ bool SITargetLowering::isLegalAddressing
>>      return false;
>>
>>    switch (AS) {
>> -  case AMDGPUAS::GLOBAL_ADDRESS: {
>> +  case AMDGPUAS::GLOBAL_ADDRESS:
>>      if (Subtarget->getGeneration() >= SISubtarget::VOLCANIC_ISLANDS) {
>>        // Assume the we will use FLAT for all global memory accesses
>>        // on VI.
>> @@ -456,8 +497,8 @@ bool SITargetLowering::isLegalAddressing
>>      }
>>
>>      return isLegalMUBUFAddressingMode(AM);
>> -  }
>> -  case AMDGPUAS::CONSTANT_ADDRESS: {
>> +
>> +  case AMDGPUAS::CONSTANT_ADDRESS:
>>      // If the offset isn't a multiple of 4, it probably isn't going to be
>>      // correctly aligned.
>>      // FIXME: Can we get the real alignment here?
>> @@ -494,13 +535,12 @@ bool SITargetLowering::isLegalAddressing
>>        return true;
>>
>>      return false;
>> -  }
>>
>>    case AMDGPUAS::PRIVATE_ADDRESS:
>>      return isLegalMUBUFAddressingMode(AM);
>>
>>    case AMDGPUAS::LOCAL_ADDRESS:
>> -  case AMDGPUAS::REGION_ADDRESS: {
>> +  case AMDGPUAS::REGION_ADDRESS:
>>      // Basic, single offset DS instructions allow a 16-bit unsigned
>> immediate
>>      // field.
>>      // XXX - If doing a 4-byte aligned 8-byte type access, we
>> effectively have
>> @@ -515,7 +555,7 @@ bool SITargetLowering::isLegalAddressing
>>        return true;
>>
>>      return false;
>> -  }
>> +
>>    case AMDGPUAS::FLAT_ADDRESS:
>>    case AMDGPUAS::UNKNOWN_ADDRESS_SPACE:
>>      // For an unknown address space, this usually means that this is for
>> some
>> @@ -897,7 +937,6 @@ SDValue SITargetLowering::LowerFormalArg
>>    SmallVector<SDValue, 16> Chains;
>>
>>    for (unsigned i = 0, e = Ins.size(), ArgIdx = 0; i != e; ++i) {
>> -
>>      const ISD::InputArg &Arg = Ins[i];
>>      if (Skipped[i]) {
>>        InVals.push_back(DAG.getUNDEF(Arg.VT));
>> @@ -954,7 +993,6 @@ SDValue SITargetLowering::LowerFormalArg
>>      SDValue Val = DAG.getCopyFromReg(Chain, DL, Reg, VT);
>>
>>      if (Arg.VT.isVector()) {
>> -
>>        // Build a vector from the registers
>>        Type *ParamType = FType->getParamType(Arg.getOrigArgIndex());
>>        unsigned NumElements = ParamType->getVectorNumElements();
>> @@ -1543,7 +1581,6 @@ static MachineBasicBlock *emitIndirectSr
>>      return &MBB;
>>    }
>>
>> -
>>    const DebugLoc &DL = MI.getDebugLoc();
>>    MachineBasicBlock::iterator I(&MI);
>>
>> @@ -1736,13 +1773,13 @@ MachineBasicBlock *SITargetLowering::Emi
>>    }
>>
>>    switch (MI.getOpcode()) {
>> -  case AMDGPU::SI_INIT_M0: {
>> +  case AMDGPU::SI_INIT_M0:
>>      BuildMI(*BB, MI.getIterator(), MI.getDebugLoc(),
>>              TII->get(AMDGPU::S_MOV_B32), AMDGPU::M0)
>>          .add(MI.getOperand(0));
>>      MI.eraseFromParent();
>>      return BB;
>> -  }
>> +
>>    case AMDGPU::GET_GROUPSTATICSIZE: {
>>      DebugLoc DL = MI.getDebugLoc();
>>      BuildMI(*BB, MI, DL, TII->get(AMDGPU::S_MOV_B32))
>> @@ -2001,7 +2038,6 @@ bool SITargetLowering::shouldEmitPCReloc
>>  /// last parameter, also switches branch target with BR if the need arise
>>  SDValue SITargetLowering::LowerBRCOND(SDValue BRCOND,
>>                                        SelectionDAG &DAG) const {
>> -
>>    SDLoc DL(BRCOND);
>>
>>    SDNode *Intr = BRCOND.getOperand(1).getNode();
>> @@ -2399,17 +2435,15 @@ SDValue SITargetLowering::LowerINTRINSIC
>>    case Intrinsic::amdgcn_rsq:
>>    case AMDGPUIntrinsic::AMDGPU_rsq: // Legacy name
>>      return DAG.getNode(AMDGPUISD::RSQ, DL, VT, Op.getOperand(1));
>> -  case Intrinsic::amdgcn_rsq_legacy: {
>> +  case Intrinsic::amdgcn_rsq_legacy:
>>      if (Subtarget->getGeneration() >= SISubtarget::VOLCANIC_ISLANDS)
>>        return emitRemovedIntrinsicError(DAG, DL, VT);
>>
>>      return DAG.getNode(AMDGPUISD::RSQ_LEGACY, DL, VT, Op.getOperand(1));
>> -  }
>> -  case Intrinsic::amdgcn_rcp_legacy: {
>> +  case Intrinsic::amdgcn_rcp_legacy:
>>      if (Subtarget->getGeneration() >= SISubtarget::VOLCANIC_ISLANDS)
>>        return emitRemovedIntrinsicError(DAG, DL, VT);
>>      return DAG.getNode(AMDGPUISD::RCP_LEGACY, DL, VT, Op.getOperand(1));
>> -  }
>>    case Intrinsic::amdgcn_rsq_clamp: {
>>      if (Subtarget->getGeneration() < SISubtarget::VOLCANIC_ISLANDS)
>>        return DAG.getNode(AMDGPUISD::RSQ_CLAMP, DL, VT,
>> Op.getOperand(1));
>> @@ -2516,9 +2550,8 @@ SDValue SITargetLowering::LowerINTRINSIC
>>      return DAG.getMemIntrinsicNode(AMDGPUISD::LOAD_CONSTANT, DL,
>>                                     Op->getVTList(), Ops, VT, MMO);
>>    }
>> -  case AMDGPUIntrinsic::amdgcn_fdiv_fast: {
>> +  case AMDGPUIntrinsic::amdgcn_fdiv_fast:
>>      return lowerFDIV_FAST(Op, DAG);
>> -  }
>>    case AMDGPUIntrinsic::SI_vs_load_input:
>>      return DAG.getNode(AMDGPUISD::LOAD_INPUT, DL, VT,
>>                         Op.getOperand(1),
>> @@ -2912,7 +2945,7 @@ SDValue SITargetLowering::LowerLOAD(SDVa
>>      // loads.
>>      //
>>      LLVM_FALLTHROUGH;
>> -  case AMDGPUAS::GLOBAL_ADDRESS: {
>> +  case AMDGPUAS::GLOBAL_ADDRESS:
>>      if (Subtarget->getScalarizeGlobalBehavior() && isMemOpUniform(Load)
>> &&
>>                    isMemOpHasNoClobberedMemOperand(Load))
>>        return SDValue();
>> @@ -2920,14 +2953,13 @@ SDValue SITargetLowering::LowerLOAD(SDVa
>>      // have the same legalization requirements as global and private
>>      // loads.
>>      //
>> -  }
>>      LLVM_FALLTHROUGH;
>>    case AMDGPUAS::FLAT_ADDRESS:
>>      if (NumElements > 4)
>>        return SplitVectorLoad(Op, DAG);
>>      // v4 loads are supported for private and global memory.
>>      return SDValue();
>> -  case AMDGPUAS::PRIVATE_ADDRESS: {
>> +  case AMDGPUAS::PRIVATE_ADDRESS:
>>      // Depending on the setting of the private_element_size field in the
>>      // resource descriptor, we can only make private accesses up to a
>> certain
>>      // size.
>> @@ -2946,8 +2978,7 @@ SDValue SITargetLowering::LowerLOAD(SDVa
>>      default:
>>        llvm_unreachable("unsupported private_element_size");
>>      }
>> -  }
>> -  case AMDGPUAS::LOCAL_ADDRESS: {
>> +  case AMDGPUAS::LOCAL_ADDRESS:
>>      if (NumElements > 2)
>>        return SplitVectorLoad(Op, DAG);
>>
>> @@ -2956,7 +2987,6 @@ SDValue SITargetLowering::LowerLOAD(SDVa
>>
>>      // If properly aligned, if we split we might be able to use
>> ds_read_b64.
>>      return SplitVectorLoad(Op, DAG);
>> -  }
>>    default:
>>      return SDValue();
>>    }
>> @@ -3454,27 +3484,24 @@ SDValue SITargetLowering::performUCharTo
>>  static bool canFoldOffset(unsigned OffsetSize, unsigned AS,
>>                            const SISubtarget &STI) {
>>    switch (AS) {
>> -  case AMDGPUAS::GLOBAL_ADDRESS: {
>> +  case AMDGPUAS::GLOBAL_ADDRESS:
>>      // MUBUF instructions a 12-bit offset in bytes.
>>      return isUInt<12>(OffsetSize);
>> -  }
>> -  case AMDGPUAS::CONSTANT_ADDRESS: {
>> +  case AMDGPUAS::CONSTANT_ADDRESS:
>>      // SMRD instructions have an 8-bit offset in dwords on SI and
>>      // a 20-bit offset in bytes on VI.
>>      if (STI.getGeneration() >= SISubtarget::VOLCANIC_ISLANDS)
>>        return isUInt<20>(OffsetSize);
>>      else
>>        return (OffsetSize % 4 == 0) && isUInt<8>(OffsetSize / 4);
>> -  }
>>    case AMDGPUAS::LOCAL_ADDRESS:
>> -  case AMDGPUAS::REGION_ADDRESS: {
>> +  case AMDGPUAS::REGION_ADDRESS:
>>      // The single offset versions have a 16-bit offset in bytes.
>>      return isUInt<16>(OffsetSize);
>> -  }
>>    case AMDGPUAS::PRIVATE_ADDRESS:
>>    // Indirect register addressing does not use any offsets.
>>    default:
>> -    return 0;
>> +    return false;
>>    }
>>  }
>>
>> @@ -4176,11 +4203,10 @@ SDValue SITargetLowering::PerformDAGComb
>>    case ISD::ATOMIC_LOAD_UMIN:
>>    case ISD::ATOMIC_LOAD_UMAX:
>>    case AMDGPUISD::ATOMIC_INC:
>> -  case AMDGPUISD::ATOMIC_DEC: { // TODO: Target mem intrinsics.
>> +  case AMDGPUISD::ATOMIC_DEC: // TODO: Target mem intrinsics.
>>      if (DCI.isBeforeLegalize())
>>        break;
>>      return performMemSDNodeCombine(cast<MemSDNode>(N), DCI);
>> -  }
>>    case ISD::AND:
>>      return performAndCombine(N, DCI);
>>    case ISD::OR:
>> @@ -4291,7 +4317,6 @@ void SITargetLowering::adjustWritemask(M
>>
>>    // Update the users of the node with the new indices
>>    for (unsigned i = 0, Idx = AMDGPU::sub0; i < 4; ++i) {
>> -
>>      SDNode *User = Users[i];
>>      if (!User)
>>        continue;
>>
>> Modified: llvm/trunk/lib/Target/AMDGPU/SIInsertSkips.cpp
>> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/
>> AMDGPU/SIInsertSkips.cpp?rev=292688&r1=292687&r2=292688&view=diff
>> ============================================================
>> ==================
>> --- llvm/trunk/lib/Target/AMDGPU/SIInsertSkips.cpp (original)
>> +++ llvm/trunk/lib/Target/AMDGPU/SIInsertSkips.cpp Fri Jan 20 18:53:49
>> 2017
>> @@ -1,4 +1,4 @@
>> -//===-- SIInsertSkips.cpp - Use predicates for control flow
>> ----------===//
>> +//===-- SIInsertSkips.cpp - Use predicates for control flow
>> ---------------===//
>>  //
>>  //                     The LLVM Compiler Infrastructure
>>  //
>> @@ -12,33 +12,46 @@
>>  /// branches when it's expected that jumping over the untaken control
>> flow will
>>  /// be cheaper than having every workitem no-op through it.
>>  //
>> +//===------------------------------------------------------
>> ----------------===//
>>
>>  #include "AMDGPU.h"
>>  #include "AMDGPUSubtarget.h"
>>  #include "SIInstrInfo.h"
>>  #include "SIMachineFunctionInfo.h"
>> -#include "llvm/CodeGen/MachineFrameInfo.h"
>> +#include "llvm/ADT/SmallVector.h"
>> +#include "llvm/ADT/StringRef.h"
>> +#include "llvm/CodeGen/MachineBasicBlock.h"
>>  #include "llvm/CodeGen/MachineFunction.h"
>>  #include "llvm/CodeGen/MachineFunctionPass.h"
>> +#include "llvm/CodeGen/MachineInstr.h"
>>  #include "llvm/CodeGen/MachineInstrBuilder.h"
>> +#include "llvm/CodeGen/MachineOperand.h"
>> +#include "llvm/IR/CallingConv.h"
>> +#include "llvm/IR/DebugLoc.h"
>>  #include "llvm/MC/MCAsmInfo.h"
>> +#include "llvm/Pass.h"
>> +#include "llvm/Support/CommandLine.h"
>> +#include "llvm/Target/TargetMachine.h"
>> +#include <cassert>
>> +#include <cstdint>
>> +#include <iterator>
>>
>>  using namespace llvm;
>>
>>  #define DEBUG_TYPE "si-insert-skips"
>>
>> -namespace {
>> -
>>  static cl::opt<unsigned> SkipThresholdFlag(
>>    "amdgpu-skip-threshold",
>>    cl::desc("Number of instructions before jumping over divergent control
>> flow"),
>>    cl::init(12), cl::Hidden);
>>
>> +namespace {
>> +
>>  class SIInsertSkips : public MachineFunctionPass {
>>  private:
>> -  const SIRegisterInfo *TRI;
>> -  const SIInstrInfo *TII;
>> -  unsigned SkipThreshold;
>> +  const SIRegisterInfo *TRI = nullptr;
>> +  const SIInstrInfo *TII = nullptr;
>> +  unsigned SkipThreshold = 0;
>>
>>    bool shouldSkip(const MachineBasicBlock &From,
>>                    const MachineBasicBlock &To) const;
>> @@ -55,8 +68,7 @@ private:
>>  public:
>>    static char ID;
>>
>> -  SIInsertSkips() :
>> -    MachineFunctionPass(ID), TRI(nullptr), TII(nullptr),
>> SkipThreshold(0) { }
>> +  SIInsertSkips() : MachineFunctionPass(ID) {}
>>
>>    bool runOnMachineFunction(MachineFunction &MF) override;
>>
>> @@ -69,7 +81,7 @@ public:
>>    }
>>  };
>>
>> -} // End anonymous namespace
>> +} // end anonymous namespace
>>
>>  char SIInsertSkips::ID = 0;
>>
>> @@ -270,19 +282,19 @@ bool SIInsertSkips::runOnMachineFunction
>>        MachineInstr &MI = *I;
>>
>>        switch (MI.getOpcode()) {
>> -      case AMDGPU::SI_MASK_BRANCH: {
>> +      case AMDGPU::SI_MASK_BRANCH:
>>          ExecBranchStack.push_back(MI.getOperand(0).getMBB());
>>          MadeChange |= skipMaskBranch(MI, MBB);
>>          break;
>> -      }
>> -      case AMDGPU::S_BRANCH: {
>> +
>> +      case AMDGPU::S_BRANCH:
>>          // Optimize out branches to the next block.
>>          // FIXME: Shouldn't this be handled by BranchFolding?
>>          if (MBB.isLayoutSuccessor(MI.getOperand(0).getMBB()))
>>            MI.eraseFromParent();
>>          break;
>> -      }
>> -      case AMDGPU::SI_KILL_TERMINATOR: {
>> +
>> +      case AMDGPU::SI_KILL_TERMINATOR:
>>          MadeChange = true;
>>          kill(MI);
>>
>> @@ -298,8 +310,8 @@ bool SIInsertSkips::runOnMachineFunction
>>
>>          MI.eraseFromParent();
>>          break;
>> -      }
>> -      case AMDGPU::SI_RETURN: {
>> +
>> +      case AMDGPU::SI_RETURN:
>>          // FIXME: Should move somewhere else
>>          assert(!MF.getInfo<SIMachineFunctionInfo>()->returnsVoid());
>>
>> @@ -318,7 +330,8 @@ bool SIInsertSkips::runOnMachineFunction
>>              .addMBB(EmptyMBBAtEnd);
>>            I->eraseFromParent();
>>          }
>> -      }
>> +        break;
>> +
>>        default:
>>          break;
>>        }
>>
>> Modified: llvm/trunk/lib/Target/AMDGPU/SIInsertWaits.cpp
>> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/
>> AMDGPU/SIInsertWaits.cpp?rev=292688&r1=292687&r2=292688&view=diff
>> ============================================================
>> ==================
>> --- llvm/trunk/lib/Target/AMDGPU/SIInsertWaits.cpp (original)
>> +++ llvm/trunk/lib/Target/AMDGPU/SIInsertWaits.cpp Fri Jan 20 18:53:49
>> 2017
>> @@ -21,11 +21,28 @@
>>  #include "SIDefines.h"
>>  #include "SIInstrInfo.h"
>>  #include "SIMachineFunctionInfo.h"
>> +#include "SIRegisterInfo.h"
>>  #include "Utils/AMDGPUBaseInfo.h"
>> +#include "llvm/ADT/SmallVector.h"
>> +#include "llvm/ADT/StringRef.h"
>> +#include "llvm/CodeGen/MachineBasicBlock.h"
>>  #include "llvm/CodeGen/MachineFunction.h"
>>  #include "llvm/CodeGen/MachineFunctionPass.h"
>> +#include "llvm/CodeGen/MachineInstr.h"
>>  #include "llvm/CodeGen/MachineInstrBuilder.h"
>> +#include "llvm/CodeGen/MachineOperand.h"
>>  #include "llvm/CodeGen/MachineRegisterInfo.h"
>> +#include "llvm/IR/DebugLoc.h"
>> +#include "llvm/Pass.h"
>> +#include "llvm/Support/Debug.h"
>> +#include "llvm/Support/raw_ostream.h"
>> +#include "llvm/Target/TargetRegisterInfo.h"
>> +#include <algorithm>
>> +#include <cassert>
>> +#include <cstdint>
>> +#include <cstring>
>> +#include <new>
>> +#include <utility>
>>
>>  #define DEBUG_TYPE "si-insert-waits"
>>
>> @@ -42,7 +59,6 @@ typedef union {
>>      unsigned LGKM;
>>    } Named;
>>    unsigned Array[3];
>> -
>>  } Counters;
>>
>>  typedef enum {
>> @@ -55,11 +71,10 @@ typedef Counters RegCounters[512];
>>  typedef std::pair<unsigned, unsigned> RegInterval;
>>
>>  class SIInsertWaits : public MachineFunctionPass {
>> -
>>  private:
>> -  const SISubtarget *ST;
>> -  const SIInstrInfo *TII;
>> -  const SIRegisterInfo *TRI;
>> +  const SISubtarget *ST = nullptr;
>> +  const SIInstrInfo *TII = nullptr;
>> +  const SIRegisterInfo *TRI = nullptr;
>>    const MachineRegisterInfo *MRI;
>>    IsaVersion IV;
>>
>> @@ -86,7 +101,7 @@ private:
>>    RegCounters DefinedRegs;
>>
>>    /// \brief Different export instruction types seen since last wait.
>> -  unsigned ExpInstrTypesSeen;
>> +  unsigned ExpInstrTypesSeen = 0;
>>
>>    /// \brief Type of the last opcode.
>>    InstType LastOpcodeType;
>> @@ -100,7 +115,7 @@ private:
>>    bool ReturnsVoid;
>>
>>    /// Whether the VCCZ bit is possibly corrupt
>> -  bool VCCZCorrupt;
>> +  bool VCCZCorrupt = false;
>>
>>    /// \brief Get increment/decrement amount for this instruction.
>>    Counters getHwCounts(MachineInstr &MI);
>> @@ -141,13 +156,7 @@ private:
>>  public:
>>    static char ID;
>>
>> -  SIInsertWaits() :
>> -    MachineFunctionPass(ID),
>> -    ST(nullptr),
>> -    TII(nullptr),
>> -    TRI(nullptr),
>> -    ExpInstrTypesSeen(0),
>> -    VCCZCorrupt(false) { }
>> +  SIInsertWaits() : MachineFunctionPass(ID) {}
>>
>>    bool runOnMachineFunction(MachineFunction &MF) override;
>>
>> @@ -161,7 +170,7 @@ public:
>>    }
>>  };
>>
>> -} // End anonymous namespace
>> +} // end anonymous namespace
>>
>>  INITIALIZE_PASS_BEGIN(SIInsertWaits, DEBUG_TYPE,
>>                        "SI Insert Waits", false, false)
>> @@ -294,7 +303,6 @@ RegInterval SIInsertWaits::getRegInterva
>>  void SIInsertWaits::pushInstruction(MachineBasicBlock &MBB,
>>                                      MachineBasicBlock::iterator I,
>>                                      const Counters &Increment) {
>> -
>>    // Get the hardware counter increments and sum them up
>>    Counters Limit = ZeroCounts;
>>    unsigned Sum = 0;
>> @@ -366,7 +374,6 @@ void SIInsertWaits::pushInstruction(Mach
>>  bool SIInsertWaits::insertWait(MachineBasicBlock &MBB,
>>                                 MachineBasicBlock::iterator I,
>>                                 const Counters &Required) {
>> -
>>    // End of program? No need to wait on anything
>>    // A function not returning void needs to wait, because other bytecode
>> will
>>    // be appended after it and we don't know what it will be.
>> @@ -393,7 +400,6 @@ bool SIInsertWaits::insertWait(MachineBa
>>    bool NeedWait = false;
>>
>>    for (unsigned i = 0; i < 3; ++i) {
>> -
>>      if (Required.Array[i] <= WaitedOn.Array[i])
>>        continue;
>>
>> @@ -434,7 +440,6 @@ bool SIInsertWaits::insertWait(MachineBa
>>
>>  /// \brief helper function for handleOperands
>>  static void increaseCounters(Counters &Dst, const Counters &Src) {
>> -
>>    for (unsigned i = 0; i < 3; ++i)
>>      Dst.Array[i] = std::max(Dst.Array[i], Src.Array[i]);
>>  }
>> @@ -468,7 +473,6 @@ void SIInsertWaits::handleExistingWait(M
>>  }
>>
>>  Counters SIInsertWaits::handleOperands(MachineInstr &MI) {
>> -
>>    Counters Result = ZeroCounts;
>>
>>    // For each register affected by this instruction increase the result
>> @@ -484,7 +488,6 @@ Counters SIInsertWaits::handleOperands(M
>>      const TargetRegisterClass *RC = TII->getOpRegClass(MI, i);
>>      RegInterval Interval = getRegInterval(RC, Op);
>>      for (unsigned j = Interval.first; j < Interval.second; ++j) {
>> -
>>        if (Op.isDef()) {
>>          increaseCounters(Result, UsedRegs[j]);
>>          increaseCounters(Result, DefinedRegs[j]);
>>
>> Modified: llvm/trunk/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp
>> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/
>> AMDGPU/SILoadStoreOptimizer.cpp?rev=292688&r1=292687&r2=292688&view=diff
>> ============================================================
>> ==================
>> --- llvm/trunk/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp (original)
>> +++ llvm/trunk/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp Fri Jan 20
>> 18:53:49 2017
>> @@ -39,15 +39,27 @@
>>  #include "AMDGPUSubtarget.h"
>>  #include "SIInstrInfo.h"
>>  #include "SIRegisterInfo.h"
>> -#include "llvm/CodeGen/LiveIntervalAnalysis.h"
>> -#include "llvm/CodeGen/LiveVariables.h"
>> +#include "Utils/AMDGPUBaseInfo.h"
>> +#include "llvm/ADT/ArrayRef.h"
>> +#include "llvm/ADT/SmallVector.h"
>> +#include "llvm/ADT/StringRef.h"
>> +#include "llvm/Analysis/AliasAnalysis.h"
>> +#include "llvm/CodeGen/MachineBasicBlock.h"
>>  #include "llvm/CodeGen/MachineFunction.h"
>>  #include "llvm/CodeGen/MachineFunctionPass.h"
>> +#include "llvm/CodeGen/MachineInstr.h"
>>  #include "llvm/CodeGen/MachineInstrBuilder.h"
>> +#include "llvm/CodeGen/MachineOperand.h"
>>  #include "llvm/CodeGen/MachineRegisterInfo.h"
>> +#include "llvm/IR/DebugLoc.h"
>> +#include "llvm/Pass.h"
>>  #include "llvm/Support/Debug.h"
>> +#include "llvm/Support/MathExtras.h"
>>  #include "llvm/Support/raw_ostream.h"
>>  #include "llvm/Target/TargetMachine.h"
>> +#include <cassert>
>> +#include <iterator>
>> +#include <utility>
>>
>>  using namespace llvm;
>>
>> @@ -57,10 +69,10 @@ namespace {
>>
>>  class SILoadStoreOptimizer : public MachineFunctionPass {
>>  private:
>> -  const SIInstrInfo *TII;
>> -  const SIRegisterInfo *TRI;
>> -  MachineRegisterInfo *MRI;
>> -  AliasAnalysis *AA;
>> +  const SIInstrInfo *TII = nullptr;
>> +  const SIRegisterInfo *TRI = nullptr;
>> +  MachineRegisterInfo *MRI = nullptr;
>> +  AliasAnalysis *AA = nullptr;
>>
>>    static bool offsetsCanBeCombined(unsigned Offset0,
>>                                     unsigned Offset1,
>> @@ -86,9 +98,7 @@ private:
>>  public:
>>    static char ID;
>>
>> -  SILoadStoreOptimizer()
>> -      : MachineFunctionPass(ID), TII(nullptr), TRI(nullptr),
>> MRI(nullptr),
>> -        AA(nullptr) {}
>> +  SILoadStoreOptimizer() : MachineFunctionPass(ID) {}
>>
>>    SILoadStoreOptimizer(const TargetMachine &TM_) :
>> MachineFunctionPass(ID) {
>>      initializeSILoadStoreOptimizerPass(*PassRegistry::
>> getPassRegistry());
>> @@ -108,7 +118,7 @@ public:
>>    }
>>  };
>>
>> -} // End anonymous namespace.
>> +} // end anonymous namespace.
>>
>>  INITIALIZE_PASS_BEGIN(SILoadStoreOptimizer, DEBUG_TYPE,
>>                        "SI Load / Store Optimizer", false, false)
>> @@ -141,11 +151,10 @@ static void addDefsToList(const MachineI
>>    }
>>  }
>>
>> -static bool memAccessesCanBeReordered(
>> -  MachineBasicBlock::iterator A,
>> -  MachineBasicBlock::iterator B,
>> -  const SIInstrInfo *TII,
>> -  llvm::AliasAnalysis * AA) {
>> +static bool memAccessesCanBeReordered(MachineBasicBlock::iterator A,
>> +                                      MachineBasicBlock::iterator B,
>> +                                      const SIInstrInfo *TII,
>> +                                      AliasAnalysis * AA) {
>>    return (TII->areMemAccessesTriviallyDisjoint(*A, *B, AA) ||
>>      // RAW or WAR - cannot reorder
>>      // WAW - cannot reorder
>> @@ -179,7 +188,6 @@ canMoveInstsAcrossMemOp(MachineInstr &Me
>>                          ArrayRef<MachineInstr*> InstsToMove,
>>                          const SIInstrInfo *TII,
>>                          AliasAnalysis *AA) {
>> -
>>    assert(MemOp.mayLoadOrStore());
>>
>>    for (MachineInstr *InstToMove : InstsToMove) {
>> @@ -230,7 +238,6 @@ SILoadStoreOptimizer::findMatchingDSInst
>>    addDefsToList(*I, DefsToMove);
>>
>>    for ( ; MBBI != E; ++MBBI) {
>> -
>>      if (MBBI->getOpcode() != I->getOpcode()) {
>>
>>        // This is not a matching DS instruction, but we can keep looking
>> as
>>
>> Modified: llvm/trunk/lib/Target/AMDGPU/SIMachineFunctionInfo.h
>> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/
>> AMDGPU/SIMachineFunctionInfo.h?rev=292688&r1=292687&r2=292688&view=diff
>> ============================================================
>> ==================
>> --- llvm/trunk/lib/Target/AMDGPU/SIMachineFunctionInfo.h (original)
>> +++ llvm/trunk/lib/Target/AMDGPU/SIMachineFunctionInfo.h Fri Jan 20
>> 18:53:49 2017
>> @@ -16,13 +16,16 @@
>>
>>  #include "AMDGPUMachineFunction.h"
>>  #include "SIRegisterInfo.h"
>> +#include "llvm/CodeGen/PseudoSourceValue.h"
>> +#include "llvm/MC/MCRegisterInfo.h"
>> +#include "llvm/Support/ErrorHandling.h"
>>  #include <array>
>> +#include <cassert>
>>  #include <map>
>> +#include <utility>
>>
>>  namespace llvm {
>>
>> -class MachineRegisterInfo;
>> -
>>  class AMDGPUImagePseudoSourceValue : public PseudoSourceValue {
>>  public:
>>    explicit AMDGPUImagePseudoSourceValue() :
>> @@ -174,10 +177,12 @@ private:
>>
>>  public:
>>    struct SpilledReg {
>> -    unsigned VGPR;
>> -    int Lane;
>> +    unsigned VGPR = AMDGPU::NoRegister;
>> +    int Lane = -1;
>> +
>> +    SpilledReg() = default;
>>      SpilledReg(unsigned R, int L) : VGPR (R), Lane (L) { }
>> -    SpilledReg() : VGPR(AMDGPU::NoRegister), Lane(-1) { }
>> +
>>      bool hasLane() { return Lane != -1;}
>>      bool hasReg() { return VGPR != AMDGPU::NoRegister;}
>>    };
>> @@ -185,6 +190,7 @@ public:
>>    // SIMachineFunctionInfo definition
>>
>>    SIMachineFunctionInfo(const MachineFunction &MF);
>> +
>>    SpilledReg getSpilledReg(MachineFunction *MF, unsigned FrameIndex,
>>                             unsigned SubIdx);
>>    bool hasCalculatedTID() const { return TIDReg != AMDGPU::NoRegister; };
>> @@ -495,6 +501,6 @@ public:
>>    }
>>  };
>>
>> -} // End namespace llvm
>> +} // end namespace llvm
>>
>> -#endif
>> +#endif // LLVM_LIB_TARGET_AMDGPU_SIMACHINEFUNCTIONINFO_H
>>
>> Modified: llvm/trunk/lib/Target/AMDGPU/SIMachineScheduler.h
>> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/
>> AMDGPU/SIMachineScheduler.h?rev=292688&r1=292687&r2=292688&view=diff
>> ============================================================
>> ==================
>> --- llvm/trunk/lib/Target/AMDGPU/SIMachineScheduler.h (original)
>> +++ llvm/trunk/lib/Target/AMDGPU/SIMachineScheduler.h Fri Jan 20
>> 18:53:49 2017
>> @@ -40,13 +40,12 @@ enum SIScheduleCandReason {
>>
>>  struct SISchedulerCandidate {
>>    // The reason for this candidate.
>> -  SIScheduleCandReason Reason;
>> +  SIScheduleCandReason Reason = NoCand;
>>
>>    // Set of reasons that apply to multiple candidates.
>> -  uint32_t RepeatReasonSet;
>> +  uint32_t RepeatReasonSet = 0;
>>
>> -  SISchedulerCandidate()
>> -    :  Reason(NoCand), RepeatReasonSet(0) {}
>> +  SISchedulerCandidate() = default;
>>
>>    bool isRepeat(SIScheduleCandReason R) { return RepeatReasonSet & (1 <<
>> R); }
>>    void setRepeat(SIScheduleCandReason R) { RepeatReasonSet |= (1 << R); }
>> @@ -84,8 +83,8 @@ class SIScheduleBlock {
>>    std::set<unsigned> LiveInRegs;
>>    std::set<unsigned> LiveOutRegs;
>>
>> -  bool Scheduled;
>> -  bool HighLatencyBlock;
>> +  bool Scheduled = false;
>> +  bool HighLatencyBlock = false;
>>
>>    std::vector<unsigned> HasLowLatencyNonWaitedParent;
>>
>> @@ -94,13 +93,12 @@ class SIScheduleBlock {
>>
>>    std::vector<SIScheduleBlock*> Preds;  // All blocks predecessors.
>>    std::vector<SIScheduleBlock*> Succs;  // All blocks successors.
>> -  unsigned NumHighLatencySuccessors;
>> +  unsigned NumHighLatencySuccessors = 0;
>>
>>  public:
>>    SIScheduleBlock(SIScheduleDAGMI *DAG, SIScheduleBlockCreator *BC,
>>                    unsigned ID):
>> -    DAG(DAG), BC(BC), TopRPTracker(TopPressure), Scheduled(false),
>> -    HighLatencyBlock(false), ID(ID), NumHighLatencySuccessors(0) {}
>> +    DAG(DAG), BC(BC), TopRPTracker(TopPressure), ID(ID) {}
>>
>>    ~SIScheduleBlock() = default;
>>
>> @@ -213,9 +211,9 @@ struct SIScheduleBlocks {
>>  };
>>
>>  enum SISchedulerBlockCreatorVariant {
>> -    LatenciesAlone,
>> -    LatenciesGrouped,
>> -    LatenciesAlonePlusConsecutive
>> +  LatenciesAlone,
>> +  LatenciesGrouped,
>> +  LatenciesAlonePlusConsecutive
>>  };
>>
>>  class SIScheduleBlockCreator {
>>
>>
>> _______________________________________________
>> llvm-commits mailing list
>> llvm-commits at lists.llvm.org
>> http://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-commits
>>
> --
> Mike
> Sent from phone
>
> _______________________________________________
> llvm-commits mailing list
> llvm-commits at lists.llvm.org
> http://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-commits
>
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20170120/bf52dd88/attachment-0001.html>