[llvm] r347864 - Revert r347596 "Support for inserting profile-directed cache prefetches"

Hans Wennborg via llvm-commits llvm-commits at lists.llvm.org
Thu Nov 29 05:58:02 PST 2018


Author: hans
Date: Thu Nov 29 05:58:02 2018
New Revision: 347864

URL: http://llvm.org/viewvc/llvm-project?rev=347864&view=rev
Log:
Revert r347596 "Support for inserting profile-directed cache prefetches"

It causes asserts building BoringSSL. See https://crbug.com/91009#c3 for
repro.

This also reverts the follow-ups:
Revert r347724 "Do not insert prefetches with unsupported memory operands."
Revert r347606 "[X86] Add dependency from X86 to ProfileData after rL347596"
Revert r347607 "Add new passes to X86 pipeline tests"

Removed:
    llvm/trunk/lib/Target/X86/X86DiscriminateMemOps.cpp
    llvm/trunk/lib/Target/X86/X86InsertPrefetch.cpp
    llvm/trunk/test/CodeGen/X86/discriminate-mem-ops.ll
    llvm/trunk/test/CodeGen/X86/insert-prefetch-inline.afdo
    llvm/trunk/test/CodeGen/X86/insert-prefetch-inline.ll
    llvm/trunk/test/CodeGen/X86/insert-prefetch-invalid-instr.afdo
    llvm/trunk/test/CodeGen/X86/insert-prefetch-invalid-instr.ll
    llvm/trunk/test/CodeGen/X86/insert-prefetch-other.afdo
    llvm/trunk/test/CodeGen/X86/insert-prefetch.afdo
    llvm/trunk/test/CodeGen/X86/insert-prefetch.ll
Modified:
    llvm/trunk/lib/Target/X86/CMakeLists.txt
    llvm/trunk/lib/Target/X86/LLVMBuild.txt
    llvm/trunk/lib/Target/X86/X86.h
    llvm/trunk/lib/Target/X86/X86TargetMachine.cpp
    llvm/trunk/test/CodeGen/X86/O0-pipeline.ll
    llvm/trunk/test/CodeGen/X86/O3-pipeline.ll

Modified: llvm/trunk/lib/Target/X86/CMakeLists.txt
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/CMakeLists.txt?rev=347864&r1=347863&r2=347864&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/CMakeLists.txt (original)
+++ llvm/trunk/lib/Target/X86/CMakeLists.txt Thu Nov 29 05:58:02 2018
@@ -30,7 +30,6 @@ set(sources
   X86CmovConversion.cpp
   X86CondBrFolding.cpp
   X86DomainReassignment.cpp
-  X86DiscriminateMemOps.cpp
   X86ExpandPseudo.cpp
   X86FastISel.cpp
   X86FixupBWInsts.cpp
@@ -45,7 +44,6 @@ set(sources
   X86ISelLowering.cpp
   X86IndirectBranchTracking.cpp
   X86InterleavedAccess.cpp
-  X86InsertPrefetch.cpp
   X86InstrFMA3Info.cpp
   X86InstrFoldTables.cpp
   X86InstrInfo.cpp

Modified: llvm/trunk/lib/Target/X86/LLVMBuild.txt
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/LLVMBuild.txt?rev=347864&r1=347863&r2=347864&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/LLVMBuild.txt (original)
+++ llvm/trunk/lib/Target/X86/LLVMBuild.txt Thu Nov 29 05:58:02 2018
@@ -31,5 +31,5 @@ has_jit = 1
 type = Library
 name = X86CodeGen
 parent = X86
-required_libraries = Analysis AsmPrinter CodeGen Core MC SelectionDAG Support Target X86AsmPrinter X86Desc X86Info X86Utils GlobalISel ProfileData
+required_libraries = Analysis AsmPrinter CodeGen Core MC SelectionDAG Support Target X86AsmPrinter X86Desc X86Info X86Utils GlobalISel
 add_to_library_groups = X86

Modified: llvm/trunk/lib/Target/X86/X86.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86.h?rev=347864&r1=347863&r2=347864&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86.h (original)
+++ llvm/trunk/lib/Target/X86/X86.h Thu Nov 29 05:58:02 2018
@@ -122,13 +122,6 @@ FunctionPass *createX86EvexToVexInsts();
 /// This pass creates the thunks for the retpoline feature.
 FunctionPass *createX86RetpolineThunksPass();
 
-/// This pass ensures instructions featuring a memory operand
-/// have distinctive <LineNumber, Discriminator> (with respect to eachother)
-FunctionPass *createX86DiscriminateMemOpsPass();
-
-/// This pass applies profiling information to insert cache prefetches.
-FunctionPass *createX86InsertPrefetchPass();
-
 InstructionSelector *createX86InstructionSelector(const X86TargetMachine &TM,
                                                   X86Subtarget &,
                                                   X86RegisterBankInfo &);

Removed: llvm/trunk/lib/Target/X86/X86DiscriminateMemOps.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86DiscriminateMemOps.cpp?rev=347863&view=auto
==============================================================================
--- llvm/trunk/lib/Target/X86/X86DiscriminateMemOps.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86DiscriminateMemOps.cpp (removed)
@@ -1,130 +0,0 @@
-//===- X86DiscriminateMemOps.cpp - Unique IDs for Mem Ops -----------------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-///
-/// This pass aids profile-driven cache prefetch insertion by ensuring all
-/// instructions that have a memory operand are distinguishible from each other.
-///
-//===----------------------------------------------------------------------===//
-
-#include "X86.h"
-#include "X86InstrBuilder.h"
-#include "X86InstrInfo.h"
-#include "X86MachineFunctionInfo.h"
-#include "X86Subtarget.h"
-#include "llvm/CodeGen/MachineModuleInfo.h"
-#include "llvm/IR/DebugInfoMetadata.h"
-#include "llvm/ProfileData/SampleProf.h"
-#include "llvm/ProfileData/SampleProfReader.h"
-#include "llvm/Transforms/IPO/SampleProfile.h"
-using namespace llvm;
-
-namespace {
-
-using Location = std::pair<StringRef, unsigned>;
-
-Location diToLocation(const DILocation *Loc) {
-  return std::make_pair(Loc->getFilename(), Loc->getLine());
-}
-
-/// Ensure each instruction having a memory operand has a distinct <LineNumber,
-/// Discriminator> pair.
-void updateDebugInfo(MachineInstr *MI, const DILocation *Loc) {
-  DebugLoc DL(Loc);
-  MI->setDebugLoc(DL);
-}
-
-class X86DiscriminateMemOps : public MachineFunctionPass {
-  bool runOnMachineFunction(MachineFunction &MF) override;
-  StringRef getPassName() const override {
-    return "X86 Discriminate Memory Operands";
-  }
-
-public:
-  static char ID;
-
-  /// Default construct and initialize the pass.
-  X86DiscriminateMemOps();
-};
-
-} // end anonymous namespace
-
-//===----------------------------------------------------------------------===//
-//            Implementation
-//===----------------------------------------------------------------------===//
-
-char X86DiscriminateMemOps::ID = 0;
-
-/// Default construct and initialize the pass.
-X86DiscriminateMemOps::X86DiscriminateMemOps() : MachineFunctionPass(ID) {}
-
-bool X86DiscriminateMemOps::runOnMachineFunction(MachineFunction &MF) {
-  DISubprogram *FDI = MF.getFunction().getSubprogram();
-  if (!FDI || !FDI->getUnit()->getDebugInfoForProfiling())
-    return false;
-
-  // Have a default DILocation, if we find instructions with memops that don't
-  // have any debug info.
-  const DILocation *ReferenceDI =
-      DILocation::get(FDI->getContext(), FDI->getLine(), 0, FDI);
-
-  DenseMap<Location, unsigned> MemOpDiscriminators;
-  MemOpDiscriminators[diToLocation(ReferenceDI)] = 0;
-
-  // Figure out the largest discriminator issued for each Location. When we
-  // issue new discriminators, we can thus avoid issuing discriminators
-  // belonging to instructions that don't have memops. This isn't a requirement
-  // for the goals of this pass, however, it avoids unnecessary ambiguity.
-  for (auto &MBB : MF) {
-    for (auto &MI : MBB) {
-      const auto &DI = MI.getDebugLoc();
-      if (!DI)
-        continue;
-      Location Loc = diToLocation(DI);
-      MemOpDiscriminators[Loc] =
-          std::max(MemOpDiscriminators[Loc], DI->getBaseDiscriminator());
-    }
-  }
-
-  // Keep track of the discriminators seen at each Location. If an instruction's
-  // DebugInfo has a Location and discriminator we've already seen, replace its
-  // discriminator with a new one, to guarantee uniqueness.
-  DenseMap<Location, DenseSet<unsigned>> Seen;
-
-  bool Changed = false;
-  for (auto &MBB : MF) {
-    for (auto &MI : MBB) {
-      if (X86II::getMemoryOperandNo(MI.getDesc().TSFlags) < 0)
-        continue;
-      const DILocation *DI = MI.getDebugLoc();
-      if (!DI) {
-        DI = ReferenceDI;
-      }
-      DenseSet<unsigned> &Set = Seen[diToLocation(DI)];
-      std::pair<DenseSet<unsigned>::iterator, bool> P =
-          Set.insert(DI->getBaseDiscriminator());
-      if (!P.second) {
-        DI = DI->setBaseDiscriminator(++MemOpDiscriminators[diToLocation(DI)]);
-        updateDebugInfo(&MI, DI);
-        Changed = true;
-        *P.first = DI->getBaseDiscriminator();
-      }
-
-      // Bump the reference DI to avoid cramming discriminators on line 0.
-      // FIXME(mtrofin): pin ReferenceDI on blocks or first instruction with DI
-      // in a block. It's more consistent than just relying on the last memop
-      // instruction we happened to see.
-      ReferenceDI = DI;
-    }
-  }
-  return Changed;
-}
-
-FunctionPass *llvm::createX86DiscriminateMemOpsPass() {
-  return new X86DiscriminateMemOps();
-}

Removed: llvm/trunk/lib/Target/X86/X86InsertPrefetch.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InsertPrefetch.cpp?rev=347863&view=auto
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InsertPrefetch.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86InsertPrefetch.cpp (removed)
@@ -1,253 +0,0 @@
-//===------- X86InsertPrefetch.cpp - Insert cache prefetch hints ----------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This pass applies cache prefetch instructions based on a profile. The pass
-// assumes DiscriminateMemOps ran immediately before, to ensure debug info
-// matches the one used at profile generation time. The profile is encoded in
-// afdo format (text or binary). It contains prefetch hints recommendations.
-// Each recommendation is made in terms of debug info locations, a type (i.e.
-// nta, t{0|1|2}) and a delta. The debug info identifies an instruction with a
-// memory operand (see X86DiscriminateMemOps). The prefetch will be made for
-// a location at that memory operand + the delta specified in the
-// recommendation.
-//
-//===----------------------------------------------------------------------===//
-
-#include "X86.h"
-#include "X86InstrBuilder.h"
-#include "X86InstrInfo.h"
-#include "X86MachineFunctionInfo.h"
-#include "X86Subtarget.h"
-#include "llvm/CodeGen/MachineModuleInfo.h"
-#include "llvm/IR/DebugInfoMetadata.h"
-#include "llvm/ProfileData/SampleProf.h"
-#include "llvm/ProfileData/SampleProfReader.h"
-#include "llvm/Transforms/IPO/SampleProfile.h"
-using namespace llvm;
-using namespace sampleprof;
-
-static cl::opt<std::string>
-    PrefetchHintsFile("prefetch-hints-file",
-                      cl::desc("Path to the prefetch hints profile."),
-                      cl::Hidden);
-namespace {
-
-class X86InsertPrefetch : public MachineFunctionPass {
-  void getAnalysisUsage(AnalysisUsage &AU) const override;
-  bool doInitialization(Module &) override;
-
-  bool runOnMachineFunction(MachineFunction &MF) override;
-  struct PrefetchInfo {
-    unsigned InstructionID;
-    int64_t Delta;
-  };
-  typedef SmallVectorImpl<PrefetchInfo> Prefetches;
-  bool findPrefetchInfo(const FunctionSamples *Samples, const MachineInstr &MI,
-                        Prefetches &prefetches) const;
-
-public:
-  static char ID;
-  X86InsertPrefetch(const std::string &PrefetchHintsFilename);
-  StringRef getPassName() const override {
-    return "X86 Insert Cache Prefetches";
-  }
-
-private:
-  std::string Filename;
-  std::unique_ptr<SampleProfileReader> Reader;
-};
-
-using PrefetchHints = SampleRecord::CallTargetMap;
-
-// Return any prefetching hints for the specified MachineInstruction. The hints
-// are returned as pairs (name, delta).
-ErrorOr<PrefetchHints> getPrefetchHints(const FunctionSamples *TopSamples,
-                                        const MachineInstr &MI) {
-  if (const auto &Loc = MI.getDebugLoc())
-    if (const auto *Samples = TopSamples->findFunctionSamples(Loc))
-      return Samples->findCallTargetMapAt(FunctionSamples::getOffset(Loc),
-                                          Loc->getBaseDiscriminator());
-  return std::error_code();
-}
-
-// The prefetch instruction can't take memory operands involving vector
-// registers.
-bool IsMemOpCompatibleWithPrefetch(const MachineInstr &MI, int Op) {
-  unsigned BaseReg = MI.getOperand(Op + X86::AddrBaseReg).getReg();
-  unsigned IndexReg = MI.getOperand(Op + X86::AddrIndexReg).getReg();
-  return (BaseReg == 0 ||
-          X86MCRegisterClasses[X86::GR64RegClassID].contains(BaseReg) ||
-          X86MCRegisterClasses[X86::GR32RegClassID].contains(BaseReg)) &&
-         (IndexReg == 0 ||
-          X86MCRegisterClasses[X86::GR64RegClassID].contains(IndexReg) ||
-          X86MCRegisterClasses[X86::GR32RegClassID].contains(IndexReg));
-}
-
-} // end anonymous namespace
-
-//===----------------------------------------------------------------------===//
-//            Implementation
-//===----------------------------------------------------------------------===//
-
-char X86InsertPrefetch::ID = 0;
-
-X86InsertPrefetch::X86InsertPrefetch(const std::string &PrefetchHintsFilename)
-    : MachineFunctionPass(ID), Filename(PrefetchHintsFilename) {}
-
-/// Return true if the provided MachineInstruction has cache prefetch hints. In
-/// that case, the prefetch hints are stored, in order, in the Prefetches
-/// vector.
-bool X86InsertPrefetch::findPrefetchInfo(const FunctionSamples *TopSamples,
-                                         const MachineInstr &MI,
-                                         Prefetches &Prefetches) const {
-  assert(Prefetches.empty() &&
-         "Expected caller passed empty PrefetchInfo vector.");
-  static const std::pair<const StringRef, unsigned> HintTypes[] = {
-      {"_nta_", X86::PREFETCHNTA},
-      {"_t0_", X86::PREFETCHT0},
-      {"_t1_", X86::PREFETCHT1},
-      {"_t2_", X86::PREFETCHT2},
-  };
-  static const char *SerializedPrefetchPrefix = "__prefetch";
-
-  const ErrorOr<PrefetchHints> T = getPrefetchHints(TopSamples, MI);
-  if (!T)
-    return false;
-  int16_t max_index = -1;
-  // Convert serialized prefetch hints into PrefetchInfo objects, and populate
-  // the Prefetches vector.
-  for (const auto &S_V : *T) {
-    StringRef Name = S_V.getKey();
-    if (Name.consume_front(SerializedPrefetchPrefix)) {
-      int64_t D = static_cast<int64_t>(S_V.second);
-      unsigned IID = 0;
-      for (const auto &HintType : HintTypes) {
-        if (Name.startswith(HintType.first)) {
-          Name = Name.drop_front(HintType.first.size());
-          IID = HintType.second;
-          break;
-        }
-      }
-      if (IID == 0)
-        return false;
-      uint8_t index = 0;
-      Name.consumeInteger(10, index);
-
-      if (index >= Prefetches.size())
-        Prefetches.resize(index + 1);
-      Prefetches[index] = {IID, D};
-      max_index = std::max(max_index, static_cast<int16_t>(index));
-    }
-  }
-  assert(max_index + 1 >= 0 &&
-         "Possible overflow: max_index + 1 should be positive.");
-  assert(static_cast<size_t>(max_index + 1) == Prefetches.size() &&
-         "The number of prefetch hints received should match the number of "
-         "PrefetchInfo objects returned");
-  return !Prefetches.empty();
-}
-
-bool X86InsertPrefetch::doInitialization(Module &M) {
-  if (Filename.empty())
-    return false;
-
-  LLVMContext &Ctx = M.getContext();
-  ErrorOr<std::unique_ptr<SampleProfileReader>> ReaderOrErr =
-      SampleProfileReader::create(Filename, Ctx);
-  if (std::error_code EC = ReaderOrErr.getError()) {
-    std::string Msg = "Could not open profile: " + EC.message();
-    Ctx.diagnose(DiagnosticInfoSampleProfile(Filename, Msg,
-                                             DiagnosticSeverity::DS_Warning));
-    return false;
-  }
-  Reader = std::move(ReaderOrErr.get());
-  Reader->read();
-  return true;
-}
-
-void X86InsertPrefetch::getAnalysisUsage(AnalysisUsage &AU) const {
-  AU.setPreservesAll();
-  AU.addRequired<MachineModuleInfo>();
-}
-
-bool X86InsertPrefetch::runOnMachineFunction(MachineFunction &MF) {
-  if (!Reader)
-    return false;
-  const FunctionSamples *Samples = Reader->getSamplesFor(MF.getFunction());
-  if (!Samples)
-    return false;
-
-  bool Changed = false;
-
-  const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();
-  SmallVector<PrefetchInfo, 4> Prefetches;
-  for (auto &MBB : MF) {
-    for (auto MI = MBB.instr_begin(); MI != MBB.instr_end();) {
-      auto Current = MI;
-      ++MI;
-
-      int Offset = X86II::getMemoryOperandNo(Current->getDesc().TSFlags);
-      if (Offset < 0)
-        continue;
-      unsigned Bias = X86II::getOperandBias(Current->getDesc());
-      int MemOpOffset = Offset + Bias;
-      // FIXME(mtrofin): ORE message when the recommendation cannot be taken.
-      if (!IsMemOpCompatibleWithPrefetch(*Current, MemOpOffset))
-        continue;
-      Prefetches.clear();
-      if (!findPrefetchInfo(Samples, *Current, Prefetches))
-        continue;
-      assert(!Prefetches.empty() &&
-             "The Prefetches vector should contain at least a value if "
-             "findPrefetchInfo returned true.");
-      for (auto &PrefInfo : Prefetches) {
-        unsigned PFetchInstrID = PrefInfo.InstructionID;
-        int64_t Delta = PrefInfo.Delta;
-        const MCInstrDesc &Desc = TII->get(PFetchInstrID);
-        MachineInstr *PFetch =
-            MF.CreateMachineInstr(Desc, Current->getDebugLoc(), true);
-        MachineInstrBuilder MIB(MF, PFetch);
-
-        assert(X86::AddrBaseReg == 0 && X86::AddrScaleAmt == 1 &&
-               X86::AddrIndexReg == 2 && X86::AddrDisp == 3 &&
-               X86::AddrSegmentReg == 4 &&
-               "Unexpected change in X86 operand offset order.");
-
-        // This assumes X86::AddBaseReg = 0, {...}ScaleAmt = 1, etc.
-        // FIXME(mtrofin): consider adding a:
-        //     MachineInstrBuilder::set(unsigned offset, op).
-        MIB.addReg(Current->getOperand(MemOpOffset + X86::AddrBaseReg).getReg())
-            .addImm(
-                Current->getOperand(MemOpOffset + X86::AddrScaleAmt).getImm())
-            .addReg(
-                Current->getOperand(MemOpOffset + X86::AddrIndexReg).getReg())
-            .addImm(Current->getOperand(MemOpOffset + X86::AddrDisp).getImm() +
-                    Delta)
-            .addReg(Current->getOperand(MemOpOffset + X86::AddrSegmentReg)
-                        .getReg());
-
-        if (!Current->memoperands_empty()) {
-          MachineMemOperand *CurrentOp = *(Current->memoperands_begin());
-          MIB.addMemOperand(MF.getMachineMemOperand(
-              CurrentOp, CurrentOp->getOffset() + Delta, CurrentOp->getSize()));
-        }
-
-        // Insert before Current. This is because Current may clobber some of
-        // the registers used to describe the input memory operand.
-        MBB.insert(Current, PFetch);
-        Changed = true;
-      }
-    }
-  }
-  return Changed;
-}
-
-FunctionPass *llvm::createX86InsertPrefetchPass() {
-  return new X86InsertPrefetch(PrefetchHintsFile);
-}

Modified: llvm/trunk/lib/Target/X86/X86TargetMachine.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86TargetMachine.cpp?rev=347864&r1=347863&r2=347864&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86TargetMachine.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86TargetMachine.cpp Thu Nov 29 05:58:02 2018
@@ -497,8 +497,6 @@ void X86PassConfig::addPreEmitPass() {
     addPass(createX86FixupLEAs());
     addPass(createX86EvexToVexInsts());
   }
-  addPass(createX86DiscriminateMemOpsPass());
-  addPass(createX86InsertPrefetchPass());
 }
 
 void X86PassConfig::addPreEmitPass2() {

Modified: llvm/trunk/test/CodeGen/X86/O0-pipeline.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/O0-pipeline.ll?rev=347864&r1=347863&r2=347864&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/O0-pipeline.ll (original)
+++ llvm/trunk/test/CodeGen/X86/O0-pipeline.ll Thu Nov 29 05:58:02 2018
@@ -58,8 +58,6 @@
 ; CHECK-NEXT:       Shadow Call Stack
 ; CHECK-NEXT:       X86 Indirect Branch Tracking
 ; CHECK-NEXT:       X86 vzeroupper inserter
-; CHECK-NEXT:       X86 Discriminate Memory Operands
-; CHECK-NEXT:       X86 Insert Cache Prefetches
 ; CHECK-NEXT:       Contiguously Lay Out Funclets
 ; CHECK-NEXT:       StackMap Liveness Analysis
 ; CHECK-NEXT:       Live DEBUG_VALUE analysis

Modified: llvm/trunk/test/CodeGen/X86/O3-pipeline.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/O3-pipeline.ll?rev=347864&r1=347863&r2=347864&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/O3-pipeline.ll (original)
+++ llvm/trunk/test/CodeGen/X86/O3-pipeline.ll Thu Nov 29 05:58:02 2018
@@ -159,8 +159,6 @@
 ; CHECK-NEXT:       X86 Atom pad short functions
 ; CHECK-NEXT:       X86 LEA Fixup
 ; CHECK-NEXT:       Compressing EVEX instrs to VEX encoding when possible
-; CHECK-NEXT:       X86 Discriminate Memory Operands
-; CHECK-NEXT:       X86 Insert Cache Prefetches
 ; CHECK-NEXT:       Contiguously Lay Out Funclets
 ; CHECK-NEXT:       StackMap Liveness Analysis
 ; CHECK-NEXT:       Live DEBUG_VALUE analysis

Removed: llvm/trunk/test/CodeGen/X86/discriminate-mem-ops.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/discriminate-mem-ops.ll?rev=347863&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/X86/discriminate-mem-ops.ll (original)
+++ llvm/trunk/test/CodeGen/X86/discriminate-mem-ops.ll (removed)
@@ -1,55 +0,0 @@
-; RUN: llc < %s | FileCheck %s
-;
-; original source, compiled with -O3 -gmlt -fdebug-info-for-profiling:
-; int sum(int* arr, int pos1, int pos2) {
-;   return arr[pos1] + arr[pos2];
-; }
-;
-; ModuleID = 'test.cc'
-source_filename = "test.cc"
-target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
-target triple = "x86_64-unknown-linux-gnu"
-
-; Function Attrs: norecurse nounwind readonly uwtable
-define i32 @sum(i32* %arr, i32 %pos1, i32 %pos2) !dbg !7 {
-entry:
-  %idxprom = sext i32 %pos1 to i64, !dbg !9
-  %arrayidx = getelementptr inbounds i32, i32* %arr, i64 %idxprom, !dbg !9
-  %0 = load i32, i32* %arrayidx, align 4, !dbg !9, !tbaa !10
-  %idxprom1 = sext i32 %pos2 to i64, !dbg !14
-  %arrayidx2 = getelementptr inbounds i32, i32* %arr, i64 %idxprom1, !dbg !14
-  %1 = load i32, i32* %arrayidx2, align 4, !dbg !14, !tbaa !10
-  %add = add nsw i32 %1, %0, !dbg !15
-  ret i32 %add, !dbg !16
-}
-
-attributes #0 = { "target-cpu"="x86-64" }
-
-!llvm.dbg.cu = !{!0}
-!llvm.module.flags = !{!3, !4, !5}
-!llvm.ident = !{!6}
-
-!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, file: !1, isOptimized: true, runtimeVersion: 0, emissionKind: LineTablesOnly, enums: !2, debugInfoForProfiling: true)
-!1 = !DIFile(filename: "test.cc", directory: "/tmp")
-!2 = !{}
-!3 = !{i32 2, !"Dwarf Version", i32 4}
-!4 = !{i32 2, !"Debug Info Version", i32 3}
-!5 = !{i32 1, !"wchar_size", i32 4}
-!6 = !{!"clang version 7.0.0 (trunk 322155) (llvm/trunk 322159)"}
-!7 = distinct !DISubprogram(name: "sum", linkageName: "sum", scope: !1, file: !1, line: 1, type: !8, isLocal: false, isDefinition: true, scopeLine: 1, flags: DIFlagPrototyped, isOptimized: true, unit: !0)
-!8 = !DISubroutineType(types: !2)
-!9 = !DILocation(line: 2, column: 10, scope: !7)
-!10 = !{!11, !11, i64 0}
-!11 = !{!"int", !12, i64 0}
-!12 = !{!"omnipotent char", !13, i64 0}
-!13 = !{!"Simple C++ TBAA"}
-!14 = !DILocation(line: 2, column: 22, scope: !7)
-!15 = !DILocation(line: 2, column: 20, scope: !7)
-!16 = !DILocation(line: 2, column: 3, scope: !7)
-
-;CHECK-LABEL: sum:
-;CHECK:       # %bb.0:
-;CHECK:       movl (%rdi,%rax,4), %eax
-;CHECK-NEXT:  .loc 1 2 20 discriminator 2  # test.cc:2:20
-;CHECK-NEXT:  addl (%rdi,%rcx,4), %eax
-;CHECK-NEXT:  .loc 1 2 3                   # test.cc:2:3

Removed: llvm/trunk/test/CodeGen/X86/insert-prefetch-inline.afdo
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/insert-prefetch-inline.afdo?rev=347863&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/X86/insert-prefetch-inline.afdo (original)
+++ llvm/trunk/test/CodeGen/X86/insert-prefetch-inline.afdo (removed)
@@ -1,4 +0,0 @@
-caller:0:0
- 2:sum:0
-  3: 0 __prefetch_nta_0:23456
-  3.1: 0 __prefetch_nta_0:8764 __prefetch_nta_1:64
\ No newline at end of file

Removed: llvm/trunk/test/CodeGen/X86/insert-prefetch-inline.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/insert-prefetch-inline.ll?rev=347863&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/X86/insert-prefetch-inline.ll (original)
+++ llvm/trunk/test/CodeGen/X86/insert-prefetch-inline.ll (removed)
@@ -1,76 +0,0 @@
-; RUN: llc < %s -prefetch-hints-file=%S/insert-prefetch-inline.afdo | FileCheck %s
-;
-; Verify we can insert prefetch instructions in code belonging to inlined
-; functions.
-;
-; ModuleID = 'test.cc'
-
-target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
-target triple = "x86_64-unknown-linux-gnu"
-
-; Function Attrs: norecurse nounwind readonly uwtable
-define dso_local i32 @sum(i32* nocapture readonly %arr, i32 %pos1, i32 %pos2) local_unnamed_addr #0 !dbg !7 {
-entry:
-  %idxprom = sext i32 %pos1 to i64, !dbg !10
-  %arrayidx = getelementptr inbounds i32, i32* %arr, i64 %idxprom, !dbg !10
-  %0 = load i32, i32* %arrayidx, align 4, !dbg !10, !tbaa !11
-  %idxprom1 = sext i32 %pos2 to i64, !dbg !15
-  %arrayidx2 = getelementptr inbounds i32, i32* %arr, i64 %idxprom1, !dbg !15
-  %1 = load i32, i32* %arrayidx2, align 4, !dbg !15, !tbaa !11
-  %add = add nsw i32 %1, %0, !dbg !16
-  ret i32 %add, !dbg !17
-}
-
-; "caller" inlines "sum". The associated .afdo file references instructions
-; in "caller" that came from "sum"'s inlining.
-;
-; Function Attrs: norecurse nounwind readonly uwtable
-define dso_local i32 @caller(i32* nocapture readonly %arr) local_unnamed_addr #0 !dbg !18 {
-entry:
-  %0 = load i32, i32* %arr, align 4, !dbg !19, !tbaa !11
-  %arrayidx2.i = getelementptr inbounds i32, i32* %arr, i64 2, !dbg !21
-  %1 = load i32, i32* %arrayidx2.i, align 4, !dbg !21, !tbaa !11
-  %add.i = add nsw i32 %1, %0, !dbg !22
-  ret i32 %add.i, !dbg !23
-}
-
-attributes #0 = { "target-cpu"="x86-64" }
-
-!llvm.dbg.cu = !{!0}
-!llvm.module.flags = !{!3, !4, !5}
-!llvm.ident = !{!6}
-
-!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, file: !1, producer: "clang version 7.0.0 (trunk 324940) (llvm/trunk 324941)", isOptimized: true, runtimeVersion: 0, emissionKind: LineTablesOnly, enums: !2, debugInfoForProfiling: true)
-!1 = !DIFile(filename: "test.cc", directory: "/tmp")
-!2 = !{}
-!3 = !{i32 2, !"Dwarf Version", i32 4}
-!4 = !{i32 2, !"Debug Info Version", i32 3}
-!5 = !{i32 1, !"wchar_size", i32 4}
-!6 = !{!"clang version 7.0.0 (trunk 324940) (llvm/trunk 324941)"}
-!7 = distinct !DISubprogram(name: "sum", linkageName: "sum", scope: !8, file: !8, line: 3, type: !9, isLocal: false, isDefinition: true, scopeLine: 3, flags: DIFlagPrototyped, isOptimized: true, unit: !0)
-!8 = !DIFile(filename: "./test.h", directory: "/tmp")
-!9 = !DISubroutineType(types: !2)
-!10 = !DILocation(line: 6, column: 10, scope: !7)
-!11 = !{!12, !12, i64 0}
-!12 = !{!"int", !13, i64 0}
-!13 = !{!"omnipotent char", !14, i64 0}
-!14 = !{!"Simple C++ TBAA"}
-!15 = !DILocation(line: 6, column: 22, scope: !7)
-!16 = !DILocation(line: 6, column: 20, scope: !7)
-!17 = !DILocation(line: 6, column: 3, scope: !7)
-!18 = distinct !DISubprogram(name: "caller", linkageName: "caller", scope: !1, file: !1, line: 4, type: !9, isLocal: false, isDefinition: true, scopeLine: 4, flags: DIFlagPrototyped, isOptimized: true, unit: !0)
-!19 = !DILocation(line: 6, column: 10, scope: !7, inlinedAt: !20)
-!20 = distinct !DILocation(line: 6, column: 10, scope: !18)
-!21 = !DILocation(line: 6, column: 22, scope: !7, inlinedAt: !20)
-!22 = !DILocation(line: 6, column: 20, scope: !7, inlinedAt: !20)
-!23 = !DILocation(line: 6, column: 3, scope: !18)
-
-; CHECK-LABEL: caller:
-; CHECK-LABEL: # %bb.0:
-; CHECK-NEXT: .loc 1 6 22 prologue_end
-; CHECK-NEXT: prefetchnta 23464(%rdi)
-; CHECK-NEXT: movl 8(%rdi), %eax
-; CHECK-NEXT: .loc 1 6 20 is_stmt 0 discriminator 2
-; CHECK-NEXT: prefetchnta 8764(%rdi)
-; CHECK-NEXT: prefetchnta 64(%rdi)
-; CHECK-NEXT: addl (%rdi), %eax

Removed: llvm/trunk/test/CodeGen/X86/insert-prefetch-invalid-instr.afdo
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/insert-prefetch-invalid-instr.afdo?rev=347863&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/X86/insert-prefetch-invalid-instr.afdo (original)
+++ llvm/trunk/test/CodeGen/X86/insert-prefetch-invalid-instr.afdo (removed)
@@ -1,2 +0,0 @@
-main:0:0
- 6: 0 __prefetch_nta_0:42
\ No newline at end of file

Removed: llvm/trunk/test/CodeGen/X86/insert-prefetch-invalid-instr.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/insert-prefetch-invalid-instr.ll?rev=347863&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/X86/insert-prefetch-invalid-instr.ll (original)
+++ llvm/trunk/test/CodeGen/X86/insert-prefetch-invalid-instr.ll (removed)
@@ -1,46 +0,0 @@
-; RUN: llc < %s -prefetch-hints-file=%S/insert-prefetch-invalid-instr.afdo | FileCheck %s
-; ModuleID = 'prefetch.cc'
-source_filename = "prefetch.cc"
-target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
-target triple = "x86_64-unknown-linux-gnu"
-
-; Function Attrs: norecurse nounwind uwtable
-define dso_local i32 @main() local_unnamed_addr #0 !dbg !7 {
-entry:
-  tail call void @llvm.prefetch(i8* inttoptr (i64 291 to i8*), i32 0, i32 0, i32 1), !dbg !9
-  tail call void @llvm.x86.avx512.gatherpf.dpd.512(i8 97, <8 x i32> undef, i8* null, i32 1, i32 2), !dbg !10
-  ret i32 291, !dbg !11
-}
-
-; Function Attrs: inaccessiblemem_or_argmemonly nounwind
-declare void @llvm.prefetch(i8* nocapture readonly, i32, i32, i32) #1
-
-; Function Attrs: argmemonly nounwind
-declare void @llvm.x86.avx512.gatherpf.dpd.512(i8, <8 x i32>, i8*, i32, i32) #2
-
-attributes #0 = {"target-cpu"="x86-64" "target-features"="+avx512pf,+sse4.2,+ssse3"}
-attributes #1 = { inaccessiblemem_or_argmemonly nounwind }
-attributes #2 = { argmemonly nounwind }
-
-!llvm.dbg.cu = !{!0}
-!llvm.module.flags = !{!3, !4, !5}
-!llvm.ident = !{!6}
-
-!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, file: !1, isOptimized: true, runtimeVersion: 0, emissionKind: LineTablesOnly, enums: !2, debugInfoForProfiling: true)
-!1 = !DIFile(filename: "prefetch.cc", directory: "/tmp")
-!2 = !{}
-!3 = !{i32 2, !"Dwarf Version", i32 4}
-!4 = !{i32 2, !"Debug Info Version", i32 3}
-!5 = !{i32 1, !"wchar_size", i32 4}
-!6 = !{!"clang version 7.0.0 (trunk 327078) (llvm/trunk 327086)"}
-!7 = distinct !DISubprogram(name: "main", scope: !1, file: !1, line: 8, type: !8, isLocal: false, isDefinition: true, scopeLine: 8, flags: DIFlagPrototyped, isOptimized: true, unit: !0)
-!8 = !DISubroutineType(types: !2)
-!9 = !DILocation(line: 12, column: 3, scope: !7)
-!10 = !DILocation(line: 14, column: 3, scope: !7)
-!11 = !DILocation(line: 15, column: 3, scope: !7)
-
-;CHECK-LABEL: main:
-;CHECK:       # %bb.0:
-;CHECK:       prefetchnta 291
-;CHECK-NOT:   prefetchnta 42(%rax,%ymm0)
-;CHECK:       vgatherpf1dpd (%rax,%ymm0) {%k1}

Removed: llvm/trunk/test/CodeGen/X86/insert-prefetch-other.afdo
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/insert-prefetch-other.afdo?rev=347863&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/X86/insert-prefetch-other.afdo (original)
+++ llvm/trunk/test/CodeGen/X86/insert-prefetch-other.afdo (removed)
@@ -1,3 +0,0 @@
-sum:0:0
- 1: 0 __prefetch_t0_1:0 __prefetch_t2_0:42
- 1.1: 0 __prefetch_t1_0:18446744073709551615

Removed: llvm/trunk/test/CodeGen/X86/insert-prefetch.afdo
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/insert-prefetch.afdo?rev=347863&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/X86/insert-prefetch.afdo (original)
+++ llvm/trunk/test/CodeGen/X86/insert-prefetch.afdo (removed)
@@ -1,3 +0,0 @@
-sum:0:0
- 1: 0 __prefetch_nta_1:0 __prefetch_nta_0:42
- 1.1: 0 __prefetch_nta_0:18446744073709551615

Removed: llvm/trunk/test/CodeGen/X86/insert-prefetch.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/insert-prefetch.ll?rev=347863&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/X86/insert-prefetch.ll (original)
+++ llvm/trunk/test/CodeGen/X86/insert-prefetch.ll (removed)
@@ -1,101 +0,0 @@
-; RUN: llc < %s -prefetch-hints-file=%S/insert-prefetch.afdo | FileCheck %s
-; RUN: llc < %s -prefetch-hints-file=%S/insert-prefetch-other.afdo | FileCheck %s -check-prefix=OTHERS
-;
-; original source, compiled with -O3 -gmlt -fdebug-info-for-profiling:
-; int sum(int* arr, int pos1, int pos2) {
-;   return arr[pos1] + arr[pos2];
-; }
-;
-; NOTE: debug line numbers were adjusted such that the function would start
-; at line 15 (an arbitrary number). The sample profile file format uses
-; offsets from the start of the symbol instead of file-relative line numbers.
-; The .afdo file reflects that - the instructions are offset '1'.
-;
-; ModuleID = 'test.cc'
-source_filename = "test.cc"
-target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
-target triple = "x86_64-unknown-linux-gnu"
-
-define i32 @sum(i32* %arr, i32 %pos1, i32 %pos2) !dbg !35 !prof !37 {
-entry:
-  %idxprom = sext i32 %pos1 to i64, !dbg !38
-  %arrayidx = getelementptr inbounds i32, i32* %arr, i64 %idxprom, !dbg !38
-  %0 = load i32, i32* %arrayidx, align 4, !dbg !38, !tbaa !39
-  %idxprom1 = sext i32 %pos2 to i64, !dbg !43
-  %arrayidx2 = getelementptr inbounds i32, i32* %arr, i64 %idxprom1, !dbg !43
-  %1 = load i32, i32* %arrayidx2, align 4, !dbg !43, !tbaa !39
-  %add = add nsw i32 %1, %0, !dbg !44
-  ret i32 %add, !dbg !45
-}
-
-attributes #0 = { "target-cpu"="x86-64" }
-
-!llvm.dbg.cu = !{!0}
-!llvm.module.flags = !{!3, !4, !5, !6}
-!llvm.ident = !{!33}
-
-!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, file: !1, isOptimized: true, runtimeVersion: 0, emissionKind: LineTablesOnly, enums: !2, debugInfoForProfiling: true)
-!1 = !DIFile(filename: "test.cc", directory: "/tmp")
-!2 = !{}
-!3 = !{i32 2, !"Dwarf Version", i32 4}
-!4 = !{i32 2, !"Debug Info Version", i32 3}
-!5 = !{i32 1, !"wchar_size", i32 4}
-!6 = !{i32 1, !"ProfileSummary", !7}
-!7 = !{!8, !9, !10, !11, !12, !13, !14, !15}
-!8 = !{!"ProfileFormat", !"SampleProfile"}
-!9 = !{!"TotalCount", i64 0}
-!10 = !{!"MaxCount", i64 0}
-!11 = !{!"MaxInternalCount", i64 0}
-!12 = !{!"MaxFunctionCount", i64 0}
-!13 = !{!"NumCounts", i64 2}
-!14 = !{!"NumFunctions", i64 1}
-!15 = !{!"DetailedSummary", !16}
-!16 = !{!17, !18, !19, !20, !21, !22, !22, !23, !23, !24, !25, !26, !27, !28, !29, !30, !31, !32}
-!17 = !{i32 10000, i64 0, i32 0}
-!18 = !{i32 100000, i64 0, i32 0}
-!19 = !{i32 200000, i64 0, i32 0}
-!20 = !{i32 300000, i64 0, i32 0}
-!21 = !{i32 400000, i64 0, i32 0}
-!22 = !{i32 500000, i64 0, i32 0}
-!23 = !{i32 600000, i64 0, i32 0}
-!24 = !{i32 700000, i64 0, i32 0}
-!25 = !{i32 800000, i64 0, i32 0}
-!26 = !{i32 900000, i64 0, i32 0}
-!27 = !{i32 950000, i64 0, i32 0}
-!28 = !{i32 990000, i64 0, i32 0}
-!29 = !{i32 999000, i64 0, i32 0}
-!30 = !{i32 999900, i64 0, i32 0}
-!31 = !{i32 999990, i64 0, i32 0}
-!32 = !{i32 999999, i64 0, i32 0}
-!33 = !{!"clang version 7.0.0 (trunk 322593) (llvm/trunk 322526)"}
-!35 = distinct !DISubprogram(name: "sum", linkageName: "sum", scope: !1, file: !1, line: 15, type: !36, isLocal: false, isDefinition: true, scopeLine: 15, flags: DIFlagPrototyped, isOptimized: true, unit: !0)
-!36 = !DISubroutineType(types: !2)
-!37 = !{!"function_entry_count", i64 -1}
-!38 = !DILocation(line: 16, column: 10, scope: !35)
-!39 = !{!40, !40, i64 0}
-!40 = !{!"int", !41, i64 0}
-!41 = !{!"omnipotent char", !42, i64 0}
-!42 = !{!"Simple C++ TBAA"}
-!43 = !DILocation(line: 16, column: 22, scope: !35)
-!44 = !DILocation(line: 16, column: 20, scope: !35)
-!45 = !DILocation(line: 16, column: 3, scope: !35)
-
-;CHECK-LABEL: sum:
-;CHECK:       # %bb.0:
-;CHECK:       prefetchnta 42(%rdi,%rax,4)
-;CHECK-NEXT:  prefetchnta (%rdi,%rax,4)
-;CHECK-NEXT:  movl (%rdi,%rax,4), %eax
-;CHECK-NEXT:  .loc 1 16 20 discriminator 2  # test.cc:16:20
-;CHECK-NEXT:  prefetchnta -1(%rdi,%rcx,4)
-;CHECK-NEXT:  addl (%rdi,%rcx,4), %eax
-;CHECK-NEXT:  .loc 1 16 3                   # test.cc:16:3
-
-;OTHERS-LABEL: sum:
-;OTHERS:       # %bb.0:
-;OTHERS:       prefetcht2 42(%rdi,%rax,4)
-;OTHERS-NEXT:  prefetcht0 (%rdi,%rax,4)
-;OTHERS-NEXT:  movl (%rdi,%rax,4), %eax
-;OTHERS-NEXT:  .loc 1 16 20 discriminator 2  # test.cc:16:20
-;OTHERS-NEXT:  prefetcht1 -1(%rdi,%rcx,4)
-;OTHERS-NEXT:  addl (%rdi,%rcx,4), %eax
-;OTHERS-NEXT:  .loc 1 16 3                   # test.cc:16:3




More information about the llvm-commits mailing list