[llvm] r330219 - [XRay] Typed event logging intrinsic

Keith Wyss via llvm-commits llvm-commits at lists.llvm.org
Tue Apr 17 14:30:29 PDT 2018


Author: kpw
Date: Tue Apr 17 14:30:29 2018
New Revision: 330219

URL: http://llvm.org/viewvc/llvm-project?rev=330219&view=rev
Log:
[XRay] Typed event logging intrinsic

Summary:
Add an LLVM intrinsic for type discriminated event logging with XRay.
Similar to the existing intrinsic for custom events, but also accepts
a type tag argument to allow plugins to be aware of different types
and semantically interpret logged events they know about without
choking on those they don't.

Relies on a symbol defined in compiler-rt patch D43668. I may wait
to submit before I can see demo everything working together including
a still to come clang patch.

Reviewers: dberris, pelikan, eizan, rSerge, timshen

Subscribers: llvm-commits

Differential Revision: https://reviews.llvm.org/D45633

Added:
    llvm/trunk/test/CodeGen/X86/xray-typed-event-log.ll
      - copied, changed from r330208, llvm/trunk/test/CodeGen/X86/xray-custom-log.ll
Modified:
    llvm/trunk/include/llvm/CodeGen/AsmPrinter.h
    llvm/trunk/include/llvm/CodeGen/FastISel.h
    llvm/trunk/include/llvm/CodeGen/TargetLowering.h
    llvm/trunk/include/llvm/IR/Intrinsics.td
    llvm/trunk/include/llvm/Support/TargetOpcodes.def
    llvm/trunk/include/llvm/Target/Target.td
    llvm/trunk/lib/CodeGen/SelectionDAG/FastISel.cpp
    llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
    llvm/trunk/lib/CodeGen/TargetLoweringBase.cpp
    llvm/trunk/lib/Target/X86/X86AsmPrinter.h
    llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
    llvm/trunk/lib/Target/X86/X86MCInstLower.cpp
    llvm/trunk/test/CodeGen/X86/xray-custom-log.ll

Modified: llvm/trunk/include/llvm/CodeGen/AsmPrinter.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/CodeGen/AsmPrinter.h?rev=330219&r1=330218&r2=330219&view=diff
==============================================================================
--- llvm/trunk/include/llvm/CodeGen/AsmPrinter.h (original)
+++ llvm/trunk/include/llvm/CodeGen/AsmPrinter.h Tue Apr 17 14:30:29 2018
@@ -238,6 +238,7 @@ public:
     TAIL_CALL = 2,
     LOG_ARGS_ENTER = 3,
     CUSTOM_EVENT = 4,
+    TYPED_EVENT = 5,
   };
 
   // The table will contain these structs that point to the sled, the function
@@ -644,8 +645,7 @@ private:
 
   GCMetadataPrinter *GetOrCreateGCPrinter(GCStrategy &C);
   /// Emit GlobalAlias or GlobalIFunc.
-  void emitGlobalIndirectSymbol(Module &M,
-                                const GlobalIndirectSymbol& GIS);
+  void emitGlobalIndirectSymbol(Module &M, const GlobalIndirectSymbol &GIS);
   void setupCodePaddingContext(const MachineBasicBlock &MBB,
                                MCCodePaddingContext &Context) const;
 };

Modified: llvm/trunk/include/llvm/CodeGen/FastISel.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/CodeGen/FastISel.h?rev=330219&r1=330218&r2=330219&view=diff
==============================================================================
--- llvm/trunk/include/llvm/CodeGen/FastISel.h (original)
+++ llvm/trunk/include/llvm/CodeGen/FastISel.h Tue Apr 17 14:30:29 2018
@@ -535,6 +535,7 @@ protected:
   bool selectExtractValue(const User *I);
   bool selectInsertValue(const User *I);
   bool selectXRayCustomEvent(const CallInst *II);
+  bool selectXRayTypedEvent(const CallInst *II);
 
 private:
   /// \brief Handle PHI nodes in successor blocks.

Modified: llvm/trunk/include/llvm/CodeGen/TargetLowering.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/CodeGen/TargetLowering.h?rev=330219&r1=330218&r2=330219&view=diff
==============================================================================
--- llvm/trunk/include/llvm/CodeGen/TargetLowering.h (original)
+++ llvm/trunk/include/llvm/CodeGen/TargetLowering.h Tue Apr 17 14:30:29 2018
@@ -2547,6 +2547,11 @@ protected:
   /// details.
   MachineBasicBlock *emitXRayCustomEvent(MachineInstr &MI,
                                          MachineBasicBlock *MBB) const;
+
+  /// Replace/modify the XRay typed event operands with target-dependent
+  /// details.
+  MachineBasicBlock *emitXRayTypedEvent(MachineInstr &MI,
+                                        MachineBasicBlock *MBB) const;
 };
 
 /// This class defines information used to lower LLVM code to legal SelectionDAG

Modified: llvm/trunk/include/llvm/IR/Intrinsics.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/IR/Intrinsics.td?rev=330219&r1=330218&r2=330219&view=diff
==============================================================================
--- llvm/trunk/include/llvm/IR/Intrinsics.td (original)
+++ llvm/trunk/include/llvm/IR/Intrinsics.td Tue Apr 17 14:30:29 2018
@@ -894,6 +894,10 @@ def int_load_relative: Intrinsic<[llvm_p
 // Takes a pointer to a string and the length of the string.
 def int_xray_customevent : Intrinsic<[], [llvm_ptr_ty, llvm_i32_ty],
                                      [NoCapture<0>, ReadOnly<0>, IntrWriteMem]>;
+// Typed event logging for x-ray.
+// Takes a numeric type tag, a pointer to a string and the length of the string.
+def int_xray_typedevent : Intrinsic<[], [llvm_i16_ty, llvm_ptr_ty, llvm_i32_ty],
+                                        [NoCapture<1>, ReadOnly<1>, IntrWriteMem]>;
 //===----------------------------------------------------------------------===//
 
 //===------ Memory intrinsics with element-wise atomicity guarantees ------===//

Modified: llvm/trunk/include/llvm/Support/TargetOpcodes.def
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Support/TargetOpcodes.def?rev=330219&r1=330218&r2=330219&view=diff
==============================================================================
--- llvm/trunk/include/llvm/Support/TargetOpcodes.def (original)
+++ llvm/trunk/include/llvm/Support/TargetOpcodes.def Tue Apr 17 14:30:29 2018
@@ -183,10 +183,14 @@ HANDLE_TARGET_OPCODE(PATCHABLE_FUNCTION_
 /// PATCHABLE_RET which specifically only works for return instructions.
 HANDLE_TARGET_OPCODE(PATCHABLE_TAIL_CALL)
 
-/// Wraps a logging call and its arguments with nop sleds. At runtime, this can be
-/// patched to insert instrumentation instructions.
+/// Wraps a logging call and its arguments with nop sleds. At runtime, this can
+/// be patched to insert instrumentation instructions.
 HANDLE_TARGET_OPCODE(PATCHABLE_EVENT_CALL)
 
+/// Wraps a typed logging call and its argument with nop sleds. At runtime, this
+/// can be patched to insert instrumentation instructions.
+HANDLE_TARGET_OPCODE(PATCHABLE_TYPED_EVENT_CALL)
+
 HANDLE_TARGET_OPCODE(ICALL_BRANCH_FUNNEL)
 
 /// The following generic opcodes are not supposed to appear after ISel.

Modified: llvm/trunk/include/llvm/Target/Target.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Target/Target.td?rev=330219&r1=330218&r2=330219&view=diff
==============================================================================
--- llvm/trunk/include/llvm/Target/Target.td (original)
+++ llvm/trunk/include/llvm/Target/Target.td Tue Apr 17 14:30:29 2018
@@ -1133,6 +1133,16 @@ def PATCHABLE_EVENT_CALL : StandardPseud
   let mayStore = 1;
   let hasSideEffects = 1;
 }
+def PATCHABLE_TYPED_EVENT_CALL : StandardPseudoInstruction {
+  let OutOperandList = (outs);
+  let InOperandList = (ins i16imm:$type, ptr_rc:$event, i32imm:$size);
+  let AsmString = "# XRay Typed Event Log.";
+  let usesCustomInserter = 1;
+  let isCall = 1;
+  let mayLoad = 1;
+  let mayStore = 1;
+  let hasSideEffects = 1;
+}
 def FENTRY_CALL : StandardPseudoInstruction {
   let OutOperandList = (outs unknown:$dst);
   let InOperandList = (ins variable_ops);

Modified: llvm/trunk/lib/CodeGen/SelectionDAG/FastISel.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/FastISel.cpp?rev=330219&r1=330218&r2=330219&view=diff
==============================================================================
--- llvm/trunk/lib/CodeGen/SelectionDAG/FastISel.cpp (original)
+++ llvm/trunk/lib/CodeGen/SelectionDAG/FastISel.cpp Tue Apr 17 14:30:29 2018
@@ -1039,6 +1039,26 @@ bool FastISel::selectXRayCustomEvent(con
   return true;
 }
 
+bool FastISel::selectXRayTypedEvent(const CallInst *I) {
+  const auto &Triple = TM.getTargetTriple();
+  if (Triple.getArch() != Triple::x86_64 || !Triple.isOSLinux())
+    return true; // don't do anything to this instruction.
+  SmallVector<MachineOperand, 8> Ops;
+  Ops.push_back(MachineOperand::CreateReg(getRegForValue(I->getArgOperand(0)),
+                                          /*IsDef=*/false));
+  Ops.push_back(MachineOperand::CreateReg(getRegForValue(I->getArgOperand(1)),
+                                          /*IsDef=*/false));
+  Ops.push_back(MachineOperand::CreateReg(getRegForValue(I->getArgOperand(2)),
+                                          /*IsDef=*/false));
+  MachineInstrBuilder MIB =
+      BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+              TII.get(TargetOpcode::PATCHABLE_TYPED_EVENT_CALL));
+  for (auto &MO : Ops)
+    MIB.add(MO);
+
+  // Insert the Patchable Typed Event Call instruction, that gets lowered properly.
+  return true;
+}
 
 /// Returns an AttributeList representing the attributes applied to the return
 /// value of the given call.
@@ -1433,6 +1453,8 @@ bool FastISel::selectIntrinsicCall(const
 
   case Intrinsic::xray_customevent:
     return selectXRayCustomEvent(II);
+  case Intrinsic::xray_typedevent:
+    return selectXRayTypedEvent(II);
   }
 
   return fastLowerIntrinsicCall(II);

Modified: llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp?rev=330219&r1=330218&r2=330219&view=diff
==============================================================================
--- llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp (original)
+++ llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp Tue Apr 17 14:30:29 2018
@@ -6067,6 +6067,41 @@ SelectionDAGBuilder::visitIntrinsicCall(
     setValue(&I, patchableNode);
     return nullptr;
   }
+  case Intrinsic::xray_typedevent: {
+    // Here we want to make sure that the intrinsic behaves as if it has a
+    // specific calling convention, and only for x86_64.
+    // FIXME: Support other platforms later.
+    const auto &Triple = DAG.getTarget().getTargetTriple();
+    if (Triple.getArch() != Triple::x86_64 || !Triple.isOSLinux())
+      return nullptr;
+
+    SDLoc DL = getCurSDLoc();
+    SmallVector<SDValue, 8> Ops;
+
+    // We want to say that we always want the arguments in registers.
+    // It's unclear to me how manipulating the selection DAG here forces callers
+    // to provide arguments in registers instead of on the stack.
+    SDValue LogTypeId = getValue(I.getArgOperand(0));
+    SDValue LogEntryVal = getValue(I.getArgOperand(1));
+    SDValue StrSizeVal = getValue(I.getArgOperand(2));
+    SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
+    SDValue Chain = getRoot();
+    Ops.push_back(LogTypeId);
+    Ops.push_back(LogEntryVal);
+    Ops.push_back(StrSizeVal);
+    Ops.push_back(Chain);
+
+    // We need to enforce the calling convention for the callsite, so that
+    // argument ordering is enforced correctly, and that register allocation can
+    // see that some registers may be assumed clobbered and have to preserve
+    // them across calls to the intrinsic.
+    MachineSDNode *MN = DAG.getMachineNode(
+        TargetOpcode::PATCHABLE_TYPED_EVENT_CALL, DL, NodeTys, Ops);
+    SDValue patchableNode = SDValue(MN, 0);
+    DAG.setRoot(patchableNode);
+    setValue(&I, patchableNode);
+    return nullptr;
+  }
   case Intrinsic::experimental_deoptimize:
     LowerDeoptimizeCall(&I);
     return nullptr;

Modified: llvm/trunk/lib/CodeGen/TargetLoweringBase.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/TargetLoweringBase.cpp?rev=330219&r1=330218&r2=330219&view=diff
==============================================================================
--- llvm/trunk/lib/CodeGen/TargetLoweringBase.cpp (original)
+++ llvm/trunk/lib/CodeGen/TargetLoweringBase.cpp Tue Apr 17 14:30:29 2018
@@ -1001,6 +1001,21 @@ TargetLoweringBase::emitXRayCustomEvent(
   return MBB;
 }
 
+MachineBasicBlock *
+TargetLoweringBase::emitXRayTypedEvent(MachineInstr &MI,
+                                       MachineBasicBlock *MBB) const {
+  assert(MI.getOpcode() == TargetOpcode::PATCHABLE_TYPED_EVENT_CALL &&
+         "Called emitXRayTypedEvent on the wrong MI!");
+  auto &MF = *MI.getMF();
+  auto MIB = BuildMI(MF, MI.getDebugLoc(), MI.getDesc());
+  for (unsigned OpIdx = 0; OpIdx != MI.getNumOperands(); ++OpIdx)
+    MIB.add(MI.getOperand(OpIdx));
+
+  MBB->insert(MachineBasicBlock::iterator(MI), MIB);
+  MI.eraseFromParent();
+  return MBB;
+}
+
 /// findRepresentativeClass - Return the largest legal super-reg register class
 /// of the register class for the specified type and its associated "cost".
 // This function is in TargetLowering because it uses RegClassForVT which would

Modified: llvm/trunk/lib/Target/X86/X86AsmPrinter.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86AsmPrinter.h?rev=330219&r1=330218&r2=330219&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86AsmPrinter.h (original)
+++ llvm/trunk/lib/Target/X86/X86AsmPrinter.h Tue Apr 17 14:30:29 2018
@@ -95,6 +95,8 @@ class LLVM_LIBRARY_VISIBILITY X86AsmPrin
   void LowerPATCHABLE_RET(const MachineInstr &MI, X86MCInstLower &MCIL);
   void LowerPATCHABLE_TAIL_CALL(const MachineInstr &MI, X86MCInstLower &MCIL);
   void LowerPATCHABLE_EVENT_CALL(const MachineInstr &MI, X86MCInstLower &MCIL);
+  void LowerPATCHABLE_TYPED_EVENT_CALL(const MachineInstr &MI,
+                                       X86MCInstLower &MCIL);
 
   void LowerFENTRY_CALL(const MachineInstr &MI, X86MCInstLower &MCIL);
 

Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=330219&r1=330218&r2=330219&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Tue Apr 17 14:30:29 2018
@@ -28219,6 +28219,9 @@ X86TargetLowering::EmitInstrWithCustomIn
   case TargetOpcode::PATCHABLE_EVENT_CALL:
     return emitXRayCustomEvent(MI, BB);
 
+  case TargetOpcode::PATCHABLE_TYPED_EVENT_CALL:
+    return emitXRayTypedEvent(MI, BB);
+
   case X86::LCMPXCHG8B: {
     const X86RegisterInfo *TRI = Subtarget.getRegisterInfo();
     // In addition to 4 E[ABCD] registers implied by encoding, CMPXCHG8B

Modified: llvm/trunk/lib/Target/X86/X86MCInstLower.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86MCInstLower.cpp?rev=330219&r1=330218&r2=330219&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86MCInstLower.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86MCInstLower.cpp Tue Apr 17 14:30:29 2018
@@ -56,6 +56,7 @@ class X86MCInstLower {
   const TargetMachine &TM;
   const MCAsmInfo &MAI;
   X86AsmPrinter &AsmPrinter;
+
 public:
   X86MCInstLower(const MachineFunction &MF, X86AsmPrinter &asmprinter);
 
@@ -115,13 +116,12 @@ MachineModuleInfoMachO &X86MCInstLower::
   return MF.getMMI().getObjFileInfo<MachineModuleInfoMachO>();
 }
 
-
 /// GetSymbolFromOperand - Lower an MO_GlobalAddress or MO_ExternalSymbol
 /// operand to an MCSymbol.
-MCSymbol *X86MCInstLower::
-GetSymbolFromOperand(const MachineOperand &MO) const {
+MCSymbol *X86MCInstLower::GetSymbolFromOperand(const MachineOperand &MO) const {
   const DataLayout &DL = MF.getDataLayout();
-  assert((MO.isGlobal() || MO.isSymbol() || MO.isMBB()) && "Isn't a symbol reference");
+  assert((MO.isGlobal() || MO.isSymbol() || MO.isMBB()) &&
+         "Isn't a symbol reference");
 
   MCSymbol *Sym = nullptr;
   SmallString<128> Name;
@@ -158,17 +158,17 @@ GetSymbolFromOperand(const MachineOperan
   // If the target flags on the operand changes the name of the symbol, do that
   // before we return the symbol.
   switch (MO.getTargetFlags()) {
-  default: break;
+  default:
+    break;
   case X86II::MO_DARWIN_NONLAZY:
   case X86II::MO_DARWIN_NONLAZY_PIC_BASE: {
     MachineModuleInfoImpl::StubValueTy &StubSym =
-      getMachOMMI().getGVStubEntry(Sym);
+        getMachOMMI().getGVStubEntry(Sym);
     if (!StubSym.getPointer()) {
       assert(MO.isGlobal() && "Extern symbol not handled yet");
-      StubSym =
-        MachineModuleInfoImpl::
-        StubValueTy(AsmPrinter.getSymbol(MO.getGlobal()),
-                    !MO.getGlobal()->hasInternalLinkage());
+      StubSym = MachineModuleInfoImpl::StubValueTy(
+          AsmPrinter.getSymbol(MO.getGlobal()),
+          !MO.getGlobal()->hasInternalLinkage());
     }
     break;
   }
@@ -185,44 +185,74 @@ MCOperand X86MCInstLower::LowerSymbolOpe
   MCSymbolRefExpr::VariantKind RefKind = MCSymbolRefExpr::VK_None;
 
   switch (MO.getTargetFlags()) {
-  default: llvm_unreachable("Unknown target flag on GV operand");
-  case X86II::MO_NO_FLAG:    // No flag.
+  default:
+    llvm_unreachable("Unknown target flag on GV operand");
+  case X86II::MO_NO_FLAG: // No flag.
   // These affect the name of the symbol, not any suffix.
   case X86II::MO_DARWIN_NONLAZY:
   case X86II::MO_DLLIMPORT:
     break;
 
-  case X86II::MO_TLVP:      RefKind = MCSymbolRefExpr::VK_TLVP; break;
+  case X86II::MO_TLVP:
+    RefKind = MCSymbolRefExpr::VK_TLVP;
+    break;
   case X86II::MO_TLVP_PIC_BASE:
     Expr = MCSymbolRefExpr::create(Sym, MCSymbolRefExpr::VK_TLVP, Ctx);
     // Subtract the pic base.
-    Expr = MCBinaryExpr::createSub(Expr,
-                                  MCSymbolRefExpr::create(MF.getPICBaseSymbol(),
-                                                           Ctx),
-                                   Ctx);
-    break;
-  case X86II::MO_SECREL:    RefKind = MCSymbolRefExpr::VK_SECREL; break;
-  case X86II::MO_TLSGD:     RefKind = MCSymbolRefExpr::VK_TLSGD; break;
-  case X86II::MO_TLSLD:     RefKind = MCSymbolRefExpr::VK_TLSLD; break;
-  case X86II::MO_TLSLDM:    RefKind = MCSymbolRefExpr::VK_TLSLDM; break;
-  case X86II::MO_GOTTPOFF:  RefKind = MCSymbolRefExpr::VK_GOTTPOFF; break;
-  case X86II::MO_INDNTPOFF: RefKind = MCSymbolRefExpr::VK_INDNTPOFF; break;
-  case X86II::MO_TPOFF:     RefKind = MCSymbolRefExpr::VK_TPOFF; break;
-  case X86II::MO_DTPOFF:    RefKind = MCSymbolRefExpr::VK_DTPOFF; break;
-  case X86II::MO_NTPOFF:    RefKind = MCSymbolRefExpr::VK_NTPOFF; break;
-  case X86II::MO_GOTNTPOFF: RefKind = MCSymbolRefExpr::VK_GOTNTPOFF; break;
-  case X86II::MO_GOTPCREL:  RefKind = MCSymbolRefExpr::VK_GOTPCREL; break;
-  case X86II::MO_GOT:       RefKind = MCSymbolRefExpr::VK_GOT; break;
-  case X86II::MO_GOTOFF:    RefKind = MCSymbolRefExpr::VK_GOTOFF; break;
-  case X86II::MO_PLT:       RefKind = MCSymbolRefExpr::VK_PLT; break;
-  case X86II::MO_ABS8:      RefKind = MCSymbolRefExpr::VK_X86_ABS8; break;
+    Expr = MCBinaryExpr::createSub(
+        Expr, MCSymbolRefExpr::create(MF.getPICBaseSymbol(), Ctx), Ctx);
+    break;
+  case X86II::MO_SECREL:
+    RefKind = MCSymbolRefExpr::VK_SECREL;
+    break;
+  case X86II::MO_TLSGD:
+    RefKind = MCSymbolRefExpr::VK_TLSGD;
+    break;
+  case X86II::MO_TLSLD:
+    RefKind = MCSymbolRefExpr::VK_TLSLD;
+    break;
+  case X86II::MO_TLSLDM:
+    RefKind = MCSymbolRefExpr::VK_TLSLDM;
+    break;
+  case X86II::MO_GOTTPOFF:
+    RefKind = MCSymbolRefExpr::VK_GOTTPOFF;
+    break;
+  case X86II::MO_INDNTPOFF:
+    RefKind = MCSymbolRefExpr::VK_INDNTPOFF;
+    break;
+  case X86II::MO_TPOFF:
+    RefKind = MCSymbolRefExpr::VK_TPOFF;
+    break;
+  case X86II::MO_DTPOFF:
+    RefKind = MCSymbolRefExpr::VK_DTPOFF;
+    break;
+  case X86II::MO_NTPOFF:
+    RefKind = MCSymbolRefExpr::VK_NTPOFF;
+    break;
+  case X86II::MO_GOTNTPOFF:
+    RefKind = MCSymbolRefExpr::VK_GOTNTPOFF;
+    break;
+  case X86II::MO_GOTPCREL:
+    RefKind = MCSymbolRefExpr::VK_GOTPCREL;
+    break;
+  case X86II::MO_GOT:
+    RefKind = MCSymbolRefExpr::VK_GOT;
+    break;
+  case X86II::MO_GOTOFF:
+    RefKind = MCSymbolRefExpr::VK_GOTOFF;
+    break;
+  case X86II::MO_PLT:
+    RefKind = MCSymbolRefExpr::VK_PLT;
+    break;
+  case X86II::MO_ABS8:
+    RefKind = MCSymbolRefExpr::VK_X86_ABS8;
+    break;
   case X86II::MO_PIC_BASE_OFFSET:
   case X86II::MO_DARWIN_NONLAZY_PIC_BASE:
     Expr = MCSymbolRefExpr::create(Sym, Ctx);
     // Subtract the pic base.
-    Expr = MCBinaryExpr::createSub(Expr,
-                            MCSymbolRefExpr::create(MF.getPICBaseSymbol(), Ctx),
-                                   Ctx);
+    Expr = MCBinaryExpr::createSub(
+        Expr, MCSymbolRefExpr::create(MF.getPICBaseSymbol(), Ctx), Ctx);
     if (MO.isJTI()) {
       assert(MAI.doesSetDirectiveSuppressReloc());
       // If .set directive is supported, use it to reduce the number of
@@ -240,13 +270,11 @@ MCOperand X86MCInstLower::LowerSymbolOpe
     Expr = MCSymbolRefExpr::create(Sym, RefKind, Ctx);
 
   if (!MO.isJTI() && !MO.isMBB() && MO.getOffset())
-    Expr = MCBinaryExpr::createAdd(Expr,
-                                   MCConstantExpr::create(MO.getOffset(), Ctx),
-                                   Ctx);
+    Expr = MCBinaryExpr::createAdd(
+        Expr, MCConstantExpr::create(MO.getOffset(), Ctx), Ctx);
   return MCOperand::createExpr(Expr);
 }
 
-
 /// \brief Simplify FOO $imm, %{al,ax,eax,rax} to FOO $imm, for instruction with
 /// a short fixed-register form.
 static void SimplifyShortImmForm(MCInst &Inst, unsigned Opcode) {
@@ -255,7 +283,8 @@ static void SimplifyShortImmForm(MCInst
          (Inst.getOperand(ImmOp).isImm() || Inst.getOperand(ImmOp).isExpr()) &&
          ((Inst.getNumOperands() == 3 && Inst.getOperand(1).isReg() &&
            Inst.getOperand(0).getReg() == Inst.getOperand(1).getReg()) ||
-          Inst.getNumOperands() == 2) && "Unexpected instruction!");
+          Inst.getNumOperands() == 2) &&
+         "Unexpected instruction!");
 
   // Check whether the destination register can be fixed.
   unsigned Reg = Inst.getOperand(0).getReg();
@@ -277,7 +306,7 @@ static void SimplifyMOVSX(MCInst &Inst)
   switch (Inst.getOpcode()) {
   default:
     llvm_unreachable("Unexpected instruction!");
-  case X86::MOVSX16rr8:  // movsbw %al, %ax   --> cbtw
+  case X86::MOVSX16rr8: // movsbw %al, %ax   --> cbtw
     if (Op0 == X86::AX && Op1 == X86::AL)
       NewOpcode = X86::CBW;
     break;
@@ -309,14 +338,14 @@ static void SimplifyShortMoveForm(X86Asm
   unsigned AddrBase = IsStore;
   unsigned RegOp = IsStore ? 0 : 5;
   unsigned AddrOp = AddrBase + 3;
-  assert(Inst.getNumOperands() == 6 && Inst.getOperand(RegOp).isReg() &&
-         Inst.getOperand(AddrBase + X86::AddrBaseReg).isReg() &&
-         Inst.getOperand(AddrBase + X86::AddrScaleAmt).isImm() &&
-         Inst.getOperand(AddrBase + X86::AddrIndexReg).isReg() &&
-         Inst.getOperand(AddrBase + X86::AddrSegmentReg).isReg() &&
-         (Inst.getOperand(AddrOp).isExpr() ||
-          Inst.getOperand(AddrOp).isImm()) &&
-         "Unexpected instruction!");
+  assert(
+      Inst.getNumOperands() == 6 && Inst.getOperand(RegOp).isReg() &&
+      Inst.getOperand(AddrBase + X86::AddrBaseReg).isReg() &&
+      Inst.getOperand(AddrBase + X86::AddrScaleAmt).isImm() &&
+      Inst.getOperand(AddrBase + X86::AddrIndexReg).isReg() &&
+      Inst.getOperand(AddrBase + X86::AddrSegmentReg).isReg() &&
+      (Inst.getOperand(AddrOp).isExpr() || Inst.getOperand(AddrOp).isImm()) &&
+      "Unexpected instruction!");
 
   // Check whether the destination register can be fixed.
   unsigned Reg = Inst.getOperand(RegOp).getReg();
@@ -401,9 +430,9 @@ ReSimplify:
   case X86::LEA16r:
   case X86::LEA32r:
     // LEA should have a segment register, but it must be empty.
-    assert(OutMI.getNumOperands() == 1+X86::AddrNumOperands &&
+    assert(OutMI.getNumOperands() == 1 + X86::AddrNumOperands &&
            "Unexpected # of LEA operands");
-    assert(OutMI.getOperand(1+X86::AddrSegmentReg).getReg() == 0 &&
+    assert(OutMI.getOperand(1 + X86::AddrSegmentReg).getReg() == 0 &&
            "LEA has segment specified!");
     break;
 
@@ -426,20 +455,47 @@ ReSimplify:
         X86II::isX86_64ExtendedReg(OutMI.getOperand(1).getReg())) {
       unsigned NewOpc;
       switch (OutMI.getOpcode()) {
-      default: llvm_unreachable("Invalid opcode");
-      case X86::VMOVZPQILo2PQIrr: NewOpc = X86::VMOVPQI2QIrr;   break;
-      case X86::VMOVAPDrr:        NewOpc = X86::VMOVAPDrr_REV;  break;
-      case X86::VMOVAPDYrr:       NewOpc = X86::VMOVAPDYrr_REV; break;
-      case X86::VMOVAPSrr:        NewOpc = X86::VMOVAPSrr_REV;  break;
-      case X86::VMOVAPSYrr:       NewOpc = X86::VMOVAPSYrr_REV; break;
-      case X86::VMOVDQArr:        NewOpc = X86::VMOVDQArr_REV;  break;
-      case X86::VMOVDQAYrr:       NewOpc = X86::VMOVDQAYrr_REV; break;
-      case X86::VMOVDQUrr:        NewOpc = X86::VMOVDQUrr_REV;  break;
-      case X86::VMOVDQUYrr:       NewOpc = X86::VMOVDQUYrr_REV; break;
-      case X86::VMOVUPDrr:        NewOpc = X86::VMOVUPDrr_REV;  break;
-      case X86::VMOVUPDYrr:       NewOpc = X86::VMOVUPDYrr_REV; break;
-      case X86::VMOVUPSrr:        NewOpc = X86::VMOVUPSrr_REV;  break;
-      case X86::VMOVUPSYrr:       NewOpc = X86::VMOVUPSYrr_REV; break;
+      default:
+        llvm_unreachable("Invalid opcode");
+      case X86::VMOVZPQILo2PQIrr:
+        NewOpc = X86::VMOVPQI2QIrr;
+        break;
+      case X86::VMOVAPDrr:
+        NewOpc = X86::VMOVAPDrr_REV;
+        break;
+      case X86::VMOVAPDYrr:
+        NewOpc = X86::VMOVAPDYrr_REV;
+        break;
+      case X86::VMOVAPSrr:
+        NewOpc = X86::VMOVAPSrr_REV;
+        break;
+      case X86::VMOVAPSYrr:
+        NewOpc = X86::VMOVAPSYrr_REV;
+        break;
+      case X86::VMOVDQArr:
+        NewOpc = X86::VMOVDQArr_REV;
+        break;
+      case X86::VMOVDQAYrr:
+        NewOpc = X86::VMOVDQAYrr_REV;
+        break;
+      case X86::VMOVDQUrr:
+        NewOpc = X86::VMOVDQUrr_REV;
+        break;
+      case X86::VMOVDQUYrr:
+        NewOpc = X86::VMOVDQUYrr_REV;
+        break;
+      case X86::VMOVUPDrr:
+        NewOpc = X86::VMOVUPDrr_REV;
+        break;
+      case X86::VMOVUPDYrr:
+        NewOpc = X86::VMOVUPDYrr_REV;
+        break;
+      case X86::VMOVUPSrr:
+        NewOpc = X86::VMOVUPSrr_REV;
+        break;
+      case X86::VMOVUPSYrr:
+        NewOpc = X86::VMOVUPSYrr_REV;
+        break;
       }
       OutMI.setOpcode(NewOpc);
     }
@@ -451,9 +507,14 @@ ReSimplify:
         X86II::isX86_64ExtendedReg(OutMI.getOperand(2).getReg())) {
       unsigned NewOpc;
       switch (OutMI.getOpcode()) {
-      default: llvm_unreachable("Invalid opcode");
-      case X86::VMOVSDrr:   NewOpc = X86::VMOVSDrr_REV;   break;
-      case X86::VMOVSSrr:   NewOpc = X86::VMOVSSrr_REV;   break;
+      default:
+        llvm_unreachable("Invalid opcode");
+      case X86::VMOVSDrr:
+        NewOpc = X86::VMOVSDrr_REV;
+        break;
+      case X86::VMOVSSrr:
+        NewOpc = X86::VMOVSSrr_REV;
+        break;
       }
       OutMI.setOpcode(NewOpc);
     }
@@ -499,24 +560,30 @@ ReSimplify:
     break;
   }
 
-  // TAILJMPd, TAILJMPd64, TailJMPd_cc - Lower to the correct jump instruction.
-  { unsigned Opcode;
-  case X86::TAILJMPr:   Opcode = X86::JMP32r; goto SetTailJmpOpcode;
-  case X86::TAILJMPd:
-  case X86::TAILJMPd64: Opcode = X86::JMP_1;  goto SetTailJmpOpcode;
-  case X86::TAILJMPd_CC:
-  case X86::TAILJMPd64_CC:
-    Opcode = X86::GetCondBranchFromCond(
-        static_cast<X86::CondCode>(MI->getOperand(1).getImm()));
-    goto SetTailJmpOpcode;
-
-  SetTailJmpOpcode:
-    MCOperand Saved = OutMI.getOperand(0);
-    OutMI = MCInst();
-    OutMI.setOpcode(Opcode);
-    OutMI.addOperand(Saved);
-    break;
-  }
+    // TAILJMPd, TAILJMPd64, TailJMPd_cc - Lower to the correct jump
+    // instruction.
+    {
+      unsigned Opcode;
+    case X86::TAILJMPr:
+      Opcode = X86::JMP32r;
+      goto SetTailJmpOpcode;
+    case X86::TAILJMPd:
+    case X86::TAILJMPd64:
+      Opcode = X86::JMP_1;
+      goto SetTailJmpOpcode;
+    case X86::TAILJMPd_CC:
+    case X86::TAILJMPd64_CC:
+      Opcode = X86::GetCondBranchFromCond(
+          static_cast<X86::CondCode>(MI->getOperand(1).getImm()));
+      goto SetTailJmpOpcode;
+
+    SetTailJmpOpcode:
+      MCOperand Saved = OutMI.getOperand(0);
+      OutMI = MCInst();
+      OutMI.setOpcode(Opcode);
+      OutMI.addOperand(Saved);
+      break;
+    }
 
   case X86::DEC16r:
   case X86::DEC32r:
@@ -526,11 +593,20 @@ ReSimplify:
     if (!AsmPrinter.getSubtarget().is64Bit()) {
       unsigned Opcode;
       switch (OutMI.getOpcode()) {
-      default: llvm_unreachable("Invalid opcode");
-      case X86::DEC16r: Opcode = X86::DEC16r_alt; break;
-      case X86::DEC32r: Opcode = X86::DEC32r_alt; break;
-      case X86::INC16r: Opcode = X86::INC16r_alt; break;
-      case X86::INC32r: Opcode = X86::INC32r_alt; break;
+      default:
+        llvm_unreachable("Invalid opcode");
+      case X86::DEC16r:
+        Opcode = X86::DEC16r_alt;
+        break;
+      case X86::DEC32r:
+        Opcode = X86::DEC32r_alt;
+        break;
+      case X86::INC16r:
+        Opcode = X86::INC16r_alt;
+        break;
+      case X86::INC32r:
+        Opcode = X86::INC32r_alt;
+        break;
       }
       OutMI.setOpcode(Opcode);
     }
@@ -539,63 +615,169 @@ ReSimplify:
   // These are pseudo-ops for OR to help with the OR->ADD transformation.  We do
   // this with an ugly goto in case the resultant OR uses EAX and needs the
   // short form.
-  case X86::ADD16rr_DB:   OutMI.setOpcode(X86::OR16rr); goto ReSimplify;
-  case X86::ADD32rr_DB:   OutMI.setOpcode(X86::OR32rr); goto ReSimplify;
-  case X86::ADD64rr_DB:   OutMI.setOpcode(X86::OR64rr); goto ReSimplify;
-  case X86::ADD16ri_DB:   OutMI.setOpcode(X86::OR16ri); goto ReSimplify;
-  case X86::ADD32ri_DB:   OutMI.setOpcode(X86::OR32ri); goto ReSimplify;
-  case X86::ADD64ri32_DB: OutMI.setOpcode(X86::OR64ri32); goto ReSimplify;
-  case X86::ADD16ri8_DB:  OutMI.setOpcode(X86::OR16ri8); goto ReSimplify;
-  case X86::ADD32ri8_DB:  OutMI.setOpcode(X86::OR32ri8); goto ReSimplify;
-  case X86::ADD64ri8_DB:  OutMI.setOpcode(X86::OR64ri8); goto ReSimplify;
+  case X86::ADD16rr_DB:
+    OutMI.setOpcode(X86::OR16rr);
+    goto ReSimplify;
+  case X86::ADD32rr_DB:
+    OutMI.setOpcode(X86::OR32rr);
+    goto ReSimplify;
+  case X86::ADD64rr_DB:
+    OutMI.setOpcode(X86::OR64rr);
+    goto ReSimplify;
+  case X86::ADD16ri_DB:
+    OutMI.setOpcode(X86::OR16ri);
+    goto ReSimplify;
+  case X86::ADD32ri_DB:
+    OutMI.setOpcode(X86::OR32ri);
+    goto ReSimplify;
+  case X86::ADD64ri32_DB:
+    OutMI.setOpcode(X86::OR64ri32);
+    goto ReSimplify;
+  case X86::ADD16ri8_DB:
+    OutMI.setOpcode(X86::OR16ri8);
+    goto ReSimplify;
+  case X86::ADD32ri8_DB:
+    OutMI.setOpcode(X86::OR32ri8);
+    goto ReSimplify;
+  case X86::ADD64ri8_DB:
+    OutMI.setOpcode(X86::OR64ri8);
+    goto ReSimplify;
 
   // Atomic load and store require a separate pseudo-inst because Acquire
   // implies mayStore and Release implies mayLoad; fix these to regular MOV
   // instructions here
-  case X86::ACQUIRE_MOV8rm:    OutMI.setOpcode(X86::MOV8rm); goto ReSimplify;
-  case X86::ACQUIRE_MOV16rm:   OutMI.setOpcode(X86::MOV16rm); goto ReSimplify;
-  case X86::ACQUIRE_MOV32rm:   OutMI.setOpcode(X86::MOV32rm); goto ReSimplify;
-  case X86::ACQUIRE_MOV64rm:   OutMI.setOpcode(X86::MOV64rm); goto ReSimplify;
-  case X86::RELEASE_MOV8mr:    OutMI.setOpcode(X86::MOV8mr); goto ReSimplify;
-  case X86::RELEASE_MOV16mr:   OutMI.setOpcode(X86::MOV16mr); goto ReSimplify;
-  case X86::RELEASE_MOV32mr:   OutMI.setOpcode(X86::MOV32mr); goto ReSimplify;
-  case X86::RELEASE_MOV64mr:   OutMI.setOpcode(X86::MOV64mr); goto ReSimplify;
-  case X86::RELEASE_MOV8mi:    OutMI.setOpcode(X86::MOV8mi); goto ReSimplify;
-  case X86::RELEASE_MOV16mi:   OutMI.setOpcode(X86::MOV16mi); goto ReSimplify;
-  case X86::RELEASE_MOV32mi:   OutMI.setOpcode(X86::MOV32mi); goto ReSimplify;
-  case X86::RELEASE_MOV64mi32: OutMI.setOpcode(X86::MOV64mi32); goto ReSimplify;
-  case X86::RELEASE_ADD8mi:    OutMI.setOpcode(X86::ADD8mi); goto ReSimplify;
-  case X86::RELEASE_ADD8mr:    OutMI.setOpcode(X86::ADD8mr); goto ReSimplify;
-  case X86::RELEASE_ADD32mi:   OutMI.setOpcode(X86::ADD32mi); goto ReSimplify;
-  case X86::RELEASE_ADD32mr:   OutMI.setOpcode(X86::ADD32mr); goto ReSimplify;
-  case X86::RELEASE_ADD64mi32: OutMI.setOpcode(X86::ADD64mi32); goto ReSimplify;
-  case X86::RELEASE_ADD64mr:   OutMI.setOpcode(X86::ADD64mr); goto ReSimplify;
-  case X86::RELEASE_AND8mi:    OutMI.setOpcode(X86::AND8mi); goto ReSimplify;
-  case X86::RELEASE_AND8mr:    OutMI.setOpcode(X86::AND8mr); goto ReSimplify;
-  case X86::RELEASE_AND32mi:   OutMI.setOpcode(X86::AND32mi); goto ReSimplify;
-  case X86::RELEASE_AND32mr:   OutMI.setOpcode(X86::AND32mr); goto ReSimplify;
-  case X86::RELEASE_AND64mi32: OutMI.setOpcode(X86::AND64mi32); goto ReSimplify;
-  case X86::RELEASE_AND64mr:   OutMI.setOpcode(X86::AND64mr); goto ReSimplify;
-  case X86::RELEASE_OR8mi:     OutMI.setOpcode(X86::OR8mi); goto ReSimplify;
-  case X86::RELEASE_OR8mr:     OutMI.setOpcode(X86::OR8mr); goto ReSimplify;
-  case X86::RELEASE_OR32mi:    OutMI.setOpcode(X86::OR32mi); goto ReSimplify;
-  case X86::RELEASE_OR32mr:    OutMI.setOpcode(X86::OR32mr); goto ReSimplify;
-  case X86::RELEASE_OR64mi32:  OutMI.setOpcode(X86::OR64mi32); goto ReSimplify;
-  case X86::RELEASE_OR64mr:    OutMI.setOpcode(X86::OR64mr); goto ReSimplify;
-  case X86::RELEASE_XOR8mi:    OutMI.setOpcode(X86::XOR8mi); goto ReSimplify;
-  case X86::RELEASE_XOR8mr:    OutMI.setOpcode(X86::XOR8mr); goto ReSimplify;
-  case X86::RELEASE_XOR32mi:   OutMI.setOpcode(X86::XOR32mi); goto ReSimplify;
-  case X86::RELEASE_XOR32mr:   OutMI.setOpcode(X86::XOR32mr); goto ReSimplify;
-  case X86::RELEASE_XOR64mi32: OutMI.setOpcode(X86::XOR64mi32); goto ReSimplify;
-  case X86::RELEASE_XOR64mr:   OutMI.setOpcode(X86::XOR64mr); goto ReSimplify;
-  case X86::RELEASE_INC8m:     OutMI.setOpcode(X86::INC8m); goto ReSimplify;
-  case X86::RELEASE_INC16m:    OutMI.setOpcode(X86::INC16m); goto ReSimplify;
-  case X86::RELEASE_INC32m:    OutMI.setOpcode(X86::INC32m); goto ReSimplify;
-  case X86::RELEASE_INC64m:    OutMI.setOpcode(X86::INC64m); goto ReSimplify;
-  case X86::RELEASE_DEC8m:     OutMI.setOpcode(X86::DEC8m); goto ReSimplify;
-  case X86::RELEASE_DEC16m:    OutMI.setOpcode(X86::DEC16m); goto ReSimplify;
-  case X86::RELEASE_DEC32m:    OutMI.setOpcode(X86::DEC32m); goto ReSimplify;
-  case X86::RELEASE_DEC64m:    OutMI.setOpcode(X86::DEC64m); goto ReSimplify;
+  case X86::ACQUIRE_MOV8rm:
+    OutMI.setOpcode(X86::MOV8rm);
+    goto ReSimplify;
+  case X86::ACQUIRE_MOV16rm:
+    OutMI.setOpcode(X86::MOV16rm);
+    goto ReSimplify;
+  case X86::ACQUIRE_MOV32rm:
+    OutMI.setOpcode(X86::MOV32rm);
+    goto ReSimplify;
+  case X86::ACQUIRE_MOV64rm:
+    OutMI.setOpcode(X86::MOV64rm);
+    goto ReSimplify;
+  case X86::RELEASE_MOV8mr:
+    OutMI.setOpcode(X86::MOV8mr);
+    goto ReSimplify;
+  case X86::RELEASE_MOV16mr:
+    OutMI.setOpcode(X86::MOV16mr);
+    goto ReSimplify;
+  case X86::RELEASE_MOV32mr:
+    OutMI.setOpcode(X86::MOV32mr);
+    goto ReSimplify;
+  case X86::RELEASE_MOV64mr:
+    OutMI.setOpcode(X86::MOV64mr);
+    goto ReSimplify;
+  case X86::RELEASE_MOV8mi:
+    OutMI.setOpcode(X86::MOV8mi);
+    goto ReSimplify;
+  case X86::RELEASE_MOV16mi:
+    OutMI.setOpcode(X86::MOV16mi);
+    goto ReSimplify;
+  case X86::RELEASE_MOV32mi:
+    OutMI.setOpcode(X86::MOV32mi);
+    goto ReSimplify;
+  case X86::RELEASE_MOV64mi32:
+    OutMI.setOpcode(X86::MOV64mi32);
+    goto ReSimplify;
+  case X86::RELEASE_ADD8mi:
+    OutMI.setOpcode(X86::ADD8mi);
+    goto ReSimplify;
+  case X86::RELEASE_ADD8mr:
+    OutMI.setOpcode(X86::ADD8mr);
+    goto ReSimplify;
+  case X86::RELEASE_ADD32mi:
+    OutMI.setOpcode(X86::ADD32mi);
+    goto ReSimplify;
+  case X86::RELEASE_ADD32mr:
+    OutMI.setOpcode(X86::ADD32mr);
+    goto ReSimplify;
+  case X86::RELEASE_ADD64mi32:
+    OutMI.setOpcode(X86::ADD64mi32);
+    goto ReSimplify;
+  case X86::RELEASE_ADD64mr:
+    OutMI.setOpcode(X86::ADD64mr);
+    goto ReSimplify;
+  case X86::RELEASE_AND8mi:
+    OutMI.setOpcode(X86::AND8mi);
+    goto ReSimplify;
+  case X86::RELEASE_AND8mr:
+    OutMI.setOpcode(X86::AND8mr);
+    goto ReSimplify;
+  case X86::RELEASE_AND32mi:
+    OutMI.setOpcode(X86::AND32mi);
+    goto ReSimplify;
+  case X86::RELEASE_AND32mr:
+    OutMI.setOpcode(X86::AND32mr);
+    goto ReSimplify;
+  case X86::RELEASE_AND64mi32:
+    OutMI.setOpcode(X86::AND64mi32);
+    goto ReSimplify;
+  case X86::RELEASE_AND64mr:
+    OutMI.setOpcode(X86::AND64mr);
+    goto ReSimplify;
+  case X86::RELEASE_OR8mi:
+    OutMI.setOpcode(X86::OR8mi);
+    goto ReSimplify;
+  case X86::RELEASE_OR8mr:
+    OutMI.setOpcode(X86::OR8mr);
+    goto ReSimplify;
+  case X86::RELEASE_OR32mi:
+    OutMI.setOpcode(X86::OR32mi);
+    goto ReSimplify;
+  case X86::RELEASE_OR32mr:
+    OutMI.setOpcode(X86::OR32mr);
+    goto ReSimplify;
+  case X86::RELEASE_OR64mi32:
+    OutMI.setOpcode(X86::OR64mi32);
+    goto ReSimplify;
+  case X86::RELEASE_OR64mr:
+    OutMI.setOpcode(X86::OR64mr);
+    goto ReSimplify;
+  case X86::RELEASE_XOR8mi:
+    OutMI.setOpcode(X86::XOR8mi);
+    goto ReSimplify;
+  case X86::RELEASE_XOR8mr:
+    OutMI.setOpcode(X86::XOR8mr);
+    goto ReSimplify;
+  case X86::RELEASE_XOR32mi:
+    OutMI.setOpcode(X86::XOR32mi);
+    goto ReSimplify;
+  case X86::RELEASE_XOR32mr:
+    OutMI.setOpcode(X86::XOR32mr);
+    goto ReSimplify;
+  case X86::RELEASE_XOR64mi32:
+    OutMI.setOpcode(X86::XOR64mi32);
+    goto ReSimplify;
+  case X86::RELEASE_XOR64mr:
+    OutMI.setOpcode(X86::XOR64mr);
+    goto ReSimplify;
+  case X86::RELEASE_INC8m:
+    OutMI.setOpcode(X86::INC8m);
+    goto ReSimplify;
+  case X86::RELEASE_INC16m:
+    OutMI.setOpcode(X86::INC16m);
+    goto ReSimplify;
+  case X86::RELEASE_INC32m:
+    OutMI.setOpcode(X86::INC32m);
+    goto ReSimplify;
+  case X86::RELEASE_INC64m:
+    OutMI.setOpcode(X86::INC64m);
+    goto ReSimplify;
+  case X86::RELEASE_DEC8m:
+    OutMI.setOpcode(X86::DEC8m);
+    goto ReSimplify;
+  case X86::RELEASE_DEC16m:
+    OutMI.setOpcode(X86::DEC16m);
+    goto ReSimplify;
+  case X86::RELEASE_DEC32m:
+    OutMI.setOpcode(X86::DEC32m);
+    goto ReSimplify;
+  case X86::RELEASE_DEC64m:
+    OutMI.setOpcode(X86::DEC64m);
+    goto ReSimplify;
 
   // We don't currently select the correct instruction form for instructions
   // which have a short %eax, etc. form. Handle this by custom lowering, for
@@ -614,68 +796,181 @@ ReSimplify:
   case X86::MOV32rm: {
     unsigned NewOpc;
     switch (OutMI.getOpcode()) {
-    default: llvm_unreachable("Invalid opcode");
+    default:
+      llvm_unreachable("Invalid opcode");
     case X86::MOV8mr_NOREX:
-    case X86::MOV8mr:     NewOpc = X86::MOV8o32a; break;
+    case X86::MOV8mr:
+      NewOpc = X86::MOV8o32a;
+      break;
     case X86::MOV8rm_NOREX:
-    case X86::MOV8rm:     NewOpc = X86::MOV8ao32; break;
-    case X86::MOV16mr:    NewOpc = X86::MOV16o32a; break;
-    case X86::MOV16rm:    NewOpc = X86::MOV16ao32; break;
-    case X86::MOV32mr:    NewOpc = X86::MOV32o32a; break;
-    case X86::MOV32rm:    NewOpc = X86::MOV32ao32; break;
+    case X86::MOV8rm:
+      NewOpc = X86::MOV8ao32;
+      break;
+    case X86::MOV16mr:
+      NewOpc = X86::MOV16o32a;
+      break;
+    case X86::MOV16rm:
+      NewOpc = X86::MOV16ao32;
+      break;
+    case X86::MOV32mr:
+      NewOpc = X86::MOV32o32a;
+      break;
+    case X86::MOV32rm:
+      NewOpc = X86::MOV32ao32;
+      break;
     }
     SimplifyShortMoveForm(AsmPrinter, OutMI, NewOpc);
     break;
   }
 
-  case X86::ADC8ri: case X86::ADC16ri: case X86::ADC32ri: case X86::ADC64ri32:
-  case X86::ADD8ri: case X86::ADD16ri: case X86::ADD32ri: case X86::ADD64ri32:
-  case X86::AND8ri: case X86::AND16ri: case X86::AND32ri: case X86::AND64ri32:
-  case X86::CMP8ri: case X86::CMP16ri: case X86::CMP32ri: case X86::CMP64ri32:
-  case X86::OR8ri:  case X86::OR16ri:  case X86::OR32ri:  case X86::OR64ri32:
-  case X86::SBB8ri: case X86::SBB16ri: case X86::SBB32ri: case X86::SBB64ri32:
-  case X86::SUB8ri: case X86::SUB16ri: case X86::SUB32ri: case X86::SUB64ri32:
-  case X86::TEST8ri:case X86::TEST16ri:case X86::TEST32ri:case X86::TEST64ri32:
-  case X86::XOR8ri: case X86::XOR16ri: case X86::XOR32ri: case X86::XOR64ri32: {
+  case X86::ADC8ri:
+  case X86::ADC16ri:
+  case X86::ADC32ri:
+  case X86::ADC64ri32:
+  case X86::ADD8ri:
+  case X86::ADD16ri:
+  case X86::ADD32ri:
+  case X86::ADD64ri32:
+  case X86::AND8ri:
+  case X86::AND16ri:
+  case X86::AND32ri:
+  case X86::AND64ri32:
+  case X86::CMP8ri:
+  case X86::CMP16ri:
+  case X86::CMP32ri:
+  case X86::CMP64ri32:
+  case X86::OR8ri:
+  case X86::OR16ri:
+  case X86::OR32ri:
+  case X86::OR64ri32:
+  case X86::SBB8ri:
+  case X86::SBB16ri:
+  case X86::SBB32ri:
+  case X86::SBB64ri32:
+  case X86::SUB8ri:
+  case X86::SUB16ri:
+  case X86::SUB32ri:
+  case X86::SUB64ri32:
+  case X86::TEST8ri:
+  case X86::TEST16ri:
+  case X86::TEST32ri:
+  case X86::TEST64ri32:
+  case X86::XOR8ri:
+  case X86::XOR16ri:
+  case X86::XOR32ri:
+  case X86::XOR64ri32: {
     unsigned NewOpc;
     switch (OutMI.getOpcode()) {
-    default: llvm_unreachable("Invalid opcode");
-    case X86::ADC8ri:     NewOpc = X86::ADC8i8;    break;
-    case X86::ADC16ri:    NewOpc = X86::ADC16i16;  break;
-    case X86::ADC32ri:    NewOpc = X86::ADC32i32;  break;
-    case X86::ADC64ri32:  NewOpc = X86::ADC64i32;  break;
-    case X86::ADD8ri:     NewOpc = X86::ADD8i8;    break;
-    case X86::ADD16ri:    NewOpc = X86::ADD16i16;  break;
-    case X86::ADD32ri:    NewOpc = X86::ADD32i32;  break;
-    case X86::ADD64ri32:  NewOpc = X86::ADD64i32;  break;
-    case X86::AND8ri:     NewOpc = X86::AND8i8;    break;
-    case X86::AND16ri:    NewOpc = X86::AND16i16;  break;
-    case X86::AND32ri:    NewOpc = X86::AND32i32;  break;
-    case X86::AND64ri32:  NewOpc = X86::AND64i32;  break;
-    case X86::CMP8ri:     NewOpc = X86::CMP8i8;    break;
-    case X86::CMP16ri:    NewOpc = X86::CMP16i16;  break;
-    case X86::CMP32ri:    NewOpc = X86::CMP32i32;  break;
-    case X86::CMP64ri32:  NewOpc = X86::CMP64i32;  break;
-    case X86::OR8ri:      NewOpc = X86::OR8i8;     break;
-    case X86::OR16ri:     NewOpc = X86::OR16i16;   break;
-    case X86::OR32ri:     NewOpc = X86::OR32i32;   break;
-    case X86::OR64ri32:   NewOpc = X86::OR64i32;   break;
-    case X86::SBB8ri:     NewOpc = X86::SBB8i8;    break;
-    case X86::SBB16ri:    NewOpc = X86::SBB16i16;  break;
-    case X86::SBB32ri:    NewOpc = X86::SBB32i32;  break;
-    case X86::SBB64ri32:  NewOpc = X86::SBB64i32;  break;
-    case X86::SUB8ri:     NewOpc = X86::SUB8i8;    break;
-    case X86::SUB16ri:    NewOpc = X86::SUB16i16;  break;
-    case X86::SUB32ri:    NewOpc = X86::SUB32i32;  break;
-    case X86::SUB64ri32:  NewOpc = X86::SUB64i32;  break;
-    case X86::TEST8ri:    NewOpc = X86::TEST8i8;   break;
-    case X86::TEST16ri:   NewOpc = X86::TEST16i16; break;
-    case X86::TEST32ri:   NewOpc = X86::TEST32i32; break;
-    case X86::TEST64ri32: NewOpc = X86::TEST64i32; break;
-    case X86::XOR8ri:     NewOpc = X86::XOR8i8;    break;
-    case X86::XOR16ri:    NewOpc = X86::XOR16i16;  break;
-    case X86::XOR32ri:    NewOpc = X86::XOR32i32;  break;
-    case X86::XOR64ri32:  NewOpc = X86::XOR64i32;  break;
+    default:
+      llvm_unreachable("Invalid opcode");
+    case X86::ADC8ri:
+      NewOpc = X86::ADC8i8;
+      break;
+    case X86::ADC16ri:
+      NewOpc = X86::ADC16i16;
+      break;
+    case X86::ADC32ri:
+      NewOpc = X86::ADC32i32;
+      break;
+    case X86::ADC64ri32:
+      NewOpc = X86::ADC64i32;
+      break;
+    case X86::ADD8ri:
+      NewOpc = X86::ADD8i8;
+      break;
+    case X86::ADD16ri:
+      NewOpc = X86::ADD16i16;
+      break;
+    case X86::ADD32ri:
+      NewOpc = X86::ADD32i32;
+      break;
+    case X86::ADD64ri32:
+      NewOpc = X86::ADD64i32;
+      break;
+    case X86::AND8ri:
+      NewOpc = X86::AND8i8;
+      break;
+    case X86::AND16ri:
+      NewOpc = X86::AND16i16;
+      break;
+    case X86::AND32ri:
+      NewOpc = X86::AND32i32;
+      break;
+    case X86::AND64ri32:
+      NewOpc = X86::AND64i32;
+      break;
+    case X86::CMP8ri:
+      NewOpc = X86::CMP8i8;
+      break;
+    case X86::CMP16ri:
+      NewOpc = X86::CMP16i16;
+      break;
+    case X86::CMP32ri:
+      NewOpc = X86::CMP32i32;
+      break;
+    case X86::CMP64ri32:
+      NewOpc = X86::CMP64i32;
+      break;
+    case X86::OR8ri:
+      NewOpc = X86::OR8i8;
+      break;
+    case X86::OR16ri:
+      NewOpc = X86::OR16i16;
+      break;
+    case X86::OR32ri:
+      NewOpc = X86::OR32i32;
+      break;
+    case X86::OR64ri32:
+      NewOpc = X86::OR64i32;
+      break;
+    case X86::SBB8ri:
+      NewOpc = X86::SBB8i8;
+      break;
+    case X86::SBB16ri:
+      NewOpc = X86::SBB16i16;
+      break;
+    case X86::SBB32ri:
+      NewOpc = X86::SBB32i32;
+      break;
+    case X86::SBB64ri32:
+      NewOpc = X86::SBB64i32;
+      break;
+    case X86::SUB8ri:
+      NewOpc = X86::SUB8i8;
+      break;
+    case X86::SUB16ri:
+      NewOpc = X86::SUB16i16;
+      break;
+    case X86::SUB32ri:
+      NewOpc = X86::SUB32i32;
+      break;
+    case X86::SUB64ri32:
+      NewOpc = X86::SUB64i32;
+      break;
+    case X86::TEST8ri:
+      NewOpc = X86::TEST8i8;
+      break;
+    case X86::TEST16ri:
+      NewOpc = X86::TEST16i16;
+      break;
+    case X86::TEST32ri:
+      NewOpc = X86::TEST32i32;
+      break;
+    case X86::TEST64ri32:
+      NewOpc = X86::TEST64i32;
+      break;
+    case X86::XOR8ri:
+      NewOpc = X86::XOR8i8;
+      break;
+    case X86::XOR16ri:
+      NewOpc = X86::XOR16i16;
+      break;
+    case X86::XOR32ri:
+      NewOpc = X86::XOR32i32;
+      break;
+    case X86::XOR64ri32:
+      NewOpc = X86::XOR64i32;
+      break;
     }
     SimplifyShortImmForm(OutMI, NewOpc);
     break;
@@ -705,18 +1000,18 @@ void X86AsmPrinter::LowerTlsAddr(X86MCIn
 
   MCSymbolRefExpr::VariantKind SRVK;
   switch (MI.getOpcode()) {
-    case X86::TLS_addr32:
-    case X86::TLS_addr64:
-      SRVK = MCSymbolRefExpr::VK_TLSGD;
-      break;
-    case X86::TLS_base_addr32:
-      SRVK = MCSymbolRefExpr::VK_TLSLDM;
-      break;
-    case X86::TLS_base_addr64:
-      SRVK = MCSymbolRefExpr::VK_TLSLD;
-      break;
-    default:
-      llvm_unreachable("unexpected opcode");
+  case X86::TLS_addr32:
+  case X86::TLS_addr64:
+    SRVK = MCSymbolRefExpr::VK_TLSGD;
+    break;
+  case X86::TLS_base_addr32:
+    SRVK = MCSymbolRefExpr::VK_TLSLDM;
+    break;
+  case X86::TLS_base_addr64:
+    SRVK = MCSymbolRefExpr::VK_TLSLD;
+    break;
+  default:
+    llvm_unreachable("unexpected opcode");
   }
 
   MCSymbol *sym = MCInstLowering.GetSymbolFromOperand(MI.getOperand(3));
@@ -759,13 +1054,11 @@ void X86AsmPrinter::LowerTlsAddr(X86MCIn
   StringRef name = is64Bits ? "__tls_get_addr" : "___tls_get_addr";
   MCSymbol *tlsGetAddr = context.getOrCreateSymbol(name);
   const MCSymbolRefExpr *tlsRef =
-    MCSymbolRefExpr::create(tlsGetAddr,
-                            MCSymbolRefExpr::VK_PLT,
-                            context);
-
-  EmitAndCountInstruction(MCInstBuilder(is64Bits ? X86::CALL64pcrel32
-                                                 : X86::CALLpcrel32)
-                            .addExpr(tlsRef));
+      MCSymbolRefExpr::create(tlsGetAddr, MCSymbolRefExpr::VK_PLT, context);
+
+  EmitAndCountInstruction(
+      MCInstBuilder(is64Bits ? X86::CALL64pcrel32 : X86::CALLpcrel32)
+          .addExpr(tlsRef));
 }
 
 /// \brief Emit the largest nop instruction smaller than or equal to \p NumBytes
@@ -782,22 +1075,62 @@ static unsigned EmitNop(MCStreamer &OS,
   BaseReg = X86::RAX;
   ScaleVal = 1;
   switch (NumBytes) {
-  case  0: llvm_unreachable("Zero nops?"); break;
-  case  1: NopSize = 1; Opc = X86::NOOP; break;
-  case  2: NopSize = 2; Opc = X86::XCHG16ar; break;
-  case  3: NopSize = 3; Opc = X86::NOOPL; break;
-  case  4: NopSize = 4; Opc = X86::NOOPL; Displacement = 8; break;
-  case  5: NopSize = 5; Opc = X86::NOOPL; Displacement = 8;
-           IndexReg = X86::RAX; break;
-  case  6: NopSize = 6; Opc = X86::NOOPW; Displacement = 8;
-           IndexReg = X86::RAX; break;
-  case  7: NopSize = 7; Opc = X86::NOOPL; Displacement = 512; break;
-  case  8: NopSize = 8; Opc = X86::NOOPL; Displacement = 512;
-           IndexReg = X86::RAX; break;
-  case  9: NopSize = 9; Opc = X86::NOOPW; Displacement = 512;
-           IndexReg = X86::RAX; break;
-  default: NopSize = 10; Opc = X86::NOOPW; Displacement = 512;
-           IndexReg = X86::RAX; SegmentReg = X86::CS; break;
+  case 0:
+    llvm_unreachable("Zero nops?");
+    break;
+  case 1:
+    NopSize = 1;
+    Opc = X86::NOOP;
+    break;
+  case 2:
+    NopSize = 2;
+    Opc = X86::XCHG16ar;
+    break;
+  case 3:
+    NopSize = 3;
+    Opc = X86::NOOPL;
+    break;
+  case 4:
+    NopSize = 4;
+    Opc = X86::NOOPL;
+    Displacement = 8;
+    break;
+  case 5:
+    NopSize = 5;
+    Opc = X86::NOOPL;
+    Displacement = 8;
+    IndexReg = X86::RAX;
+    break;
+  case 6:
+    NopSize = 6;
+    Opc = X86::NOOPW;
+    Displacement = 8;
+    IndexReg = X86::RAX;
+    break;
+  case 7:
+    NopSize = 7;
+    Opc = X86::NOOPL;
+    Displacement = 512;
+    break;
+  case 8:
+    NopSize = 8;
+    Opc = X86::NOOPL;
+    Displacement = 512;
+    IndexReg = X86::RAX;
+    break;
+  case 9:
+    NopSize = 9;
+    Opc = X86::NOOPW;
+    Displacement = 512;
+    IndexReg = X86::RAX;
+    break;
+  default:
+    NopSize = 10;
+    Opc = X86::NOOPW;
+    Displacement = 512;
+    IndexReg = X86::RAX;
+    SegmentReg = X86::CS;
+    break;
   }
 
   unsigned NumPrefixes = std::min(NumBytes - NopSize, 5U);
@@ -971,7 +1304,7 @@ void X86AsmPrinter::LowerPATCHABLE_OP(co
       unsigned NopSize = EmitNop(*OutStreamer, MinSize, Subtarget->is64Bit(),
                                  getSubtargetInfo());
       assert(NopSize == MinSize && "Could not implement MinSize!");
-      (void) NopSize;
+      (void)NopSize;
     }
   }
 
@@ -1016,9 +1349,8 @@ void X86AsmPrinter::LowerPATCHPOINT(cons
       break;
     case MachineOperand::MO_ExternalSymbol:
     case MachineOperand::MO_GlobalAddress:
-      CalleeMCOp =
-        MCIL.LowerSymbolOperand(CalleeMO,
-                                MCIL.GetSymbolFromOperand(CalleeMO));
+      CalleeMCOp = MCIL.LowerSymbolOperand(CalleeMO,
+                                           MCIL.GetSymbolFromOperand(CalleeMO));
       break;
     }
 
@@ -1084,8 +1416,10 @@ void X86AsmPrinter::LowerPATCHABLE_EVENT
 
   // The default C calling convention will place two arguments into %rcx and
   // %rdx -- so we only work with those.
-  unsigned UsedRegs[] = {X86::RDI, X86::RSI};
+  unsigned DestRegs[] = {X86::RDI, X86::RSI};
   bool UsedMask[] = {false, false};
+  // Filled out in loop.
+  unsigned SrcRegs[] = {0, 0};
 
   // Then we put the operands in the %rdi and %rsi registers. We spill the
   // values in the register before we clobber them, and mark them as used in
@@ -1095,18 +1429,22 @@ void X86AsmPrinter::LowerPATCHABLE_EVENT
   for (unsigned I = 0; I < MI.getNumOperands(); ++I)
     if (auto Op = MCIL.LowerMachineOperand(&MI, MI.getOperand(I))) {
       assert(Op->isReg() && "Only support arguments in registers");
-      if (Op->getReg() != UsedRegs[I]) {
+      SrcRegs[I] = Op->getReg();
+      if (SrcRegs[I] != DestRegs[I]) {
         UsedMask[I] = true;
         EmitAndCountInstruction(
-            MCInstBuilder(X86::PUSH64r).addReg(UsedRegs[I]));
-        EmitAndCountInstruction(MCInstBuilder(X86::MOV64rr)
-                                    .addReg(UsedRegs[I])
-                                    .addReg(Op->getReg()));
+            MCInstBuilder(X86::PUSH64r).addReg(DestRegs[I]));
       } else {
         EmitNops(*OutStreamer, 4, Subtarget->is64Bit(), getSubtargetInfo());
       }
     }
 
+  // Now that the register values are stashed, mov arguments into place.
+  for (unsigned I = 0; I < MI.getNumOperands(); ++I)
+    if (SrcRegs[I] != DestRegs[I])
+      EmitAndCountInstruction(
+          MCInstBuilder(X86::MOV64rr).addReg(DestRegs[I]).addReg(SrcRegs[I]));
+
   // We emit a hard dependency on the __xray_CustomEvent symbol, which is the
   // name of the trampoline to be implemented by the XRay runtime.
   auto TSym = OutContext.getOrCreateSymbol("__xray_CustomEvent");
@@ -1121,7 +1459,7 @@ void X86AsmPrinter::LowerPATCHABLE_EVENT
   // Restore caller-saved and used registers.
   for (unsigned I = sizeof UsedMask; I-- > 0;)
     if (UsedMask[I])
-      EmitAndCountInstruction(MCInstBuilder(X86::POP64r).addReg(UsedRegs[I]));
+      EmitAndCountInstruction(MCInstBuilder(X86::POP64r).addReg(DestRegs[I]));
     else
       EmitNops(*OutStreamer, 1, Subtarget->is64Bit(), getSubtargetInfo());
 
@@ -1133,6 +1471,102 @@ void X86AsmPrinter::LowerPATCHABLE_EVENT
   recordSled(CurSled, MI, SledKind::CUSTOM_EVENT, 1);
 }
 
+void X86AsmPrinter::LowerPATCHABLE_TYPED_EVENT_CALL(const MachineInstr &MI,
+                                                    X86MCInstLower &MCIL) {
+  assert(Subtarget->is64Bit() && "XRay typed events only supports X86-64");
+
+  // We want to emit the following pattern, which follows the x86 calling
+  // convention to prepare for the trampoline call to be patched in.
+  //
+  //   .p2align 1, ...
+  // .Lxray_event_sled_N:
+  //   jmp +N                        // jump across the instrumentation sled
+  //   ...                           // set up arguments in register
+  //   callq __xray_TypedEvent at plt  // force dependency to symbol
+  //   ...
+  //   <jump here>
+  //
+  // After patching, it would look something like:
+  //
+  //   nopw (2-byte nop)
+  //   ...
+  //   callq __xrayTypedEvent  // already lowered
+  //   ...
+  //
+  // ---
+  // First we emit the label and the jump.
+  auto CurSled = OutContext.createTempSymbol("xray_typed_event_sled_", true);
+  OutStreamer->AddComment("# XRay Typed Event Log");
+  OutStreamer->EmitCodeAlignment(2);
+  OutStreamer->EmitLabel(CurSled);
+
+  // Use a two-byte `jmp`. This version of JMP takes an 8-bit relative offset as
+  // an operand (computed as an offset from the jmp instruction).
+  // FIXME: Find another less hacky way do force the relative jump.
+  OutStreamer->EmitBinaryData("\xeb\x14");
+
+  // An x86-64 convention may place three arguments into %rcx, %rdx, and R8,
+  // so we'll work with those. Or we may be called via SystemV, in which case
+  // we don't have to do any translation.
+  unsigned DestRegs[] = {X86::RDI, X86::RSI, X86::RDX};
+  bool UsedMask[] = {false, false, false};
+
+  // Will fill out src regs in the loop.
+  unsigned SrcRegs[] = {0, 0, 0};
+
+  // Then we put the operands in the SystemV registers. We spill the values in
+  // the registers before we clobber them, and mark them as used in UsedMask.
+  // In case the arguments are already in the correct register, we emit nops
+  // appropriately sized to keep the sled the same size in every situation.
+  for (unsigned I = 0; I < MI.getNumOperands(); ++I)
+    if (auto Op = MCIL.LowerMachineOperand(&MI, MI.getOperand(I))) {
+      // TODO: Is register only support adequate?
+      assert(Op->isReg() && "Only supports arguments in registers");
+      SrcRegs[I] = Op->getReg();
+      if (SrcRegs[I] != DestRegs[I]) {
+        UsedMask[I] = true;
+        EmitAndCountInstruction(
+            MCInstBuilder(X86::PUSH64r).addReg(DestRegs[I]));
+      } else {
+        EmitNops(*OutStreamer, 4, Subtarget->is64Bit(), getSubtargetInfo());
+      }
+    }
+
+  // In the above loop we only stash all of the destination registers or emit
+  // nops if the arguments are already in the right place. Doing the actually
+  // moving is postponed until after all the registers are stashed so nothing
+  // is clobbers. We've already added nops to account for the size of mov and
+  // push if the register is in the right place, so we only have to worry about
+  // emitting movs.
+  for (unsigned I = 0; I < MI.getNumOperands(); ++I)
+    if (UsedMask[I])
+      EmitAndCountInstruction(
+          MCInstBuilder(X86::MOV64rr).addReg(DestRegs[I]).addReg(SrcRegs[I]));
+
+  // We emit a hard dependency on the __xray_TypedEvent symbol, which is the
+  // name of the trampoline to be implemented by the XRay runtime.
+  auto TSym = OutContext.getOrCreateSymbol("__xray_TypedEvent");
+  MachineOperand TOp = MachineOperand::CreateMCSymbol(TSym);
+  if (isPositionIndependent())
+    TOp.setTargetFlags(X86II::MO_PLT);
+
+  // Emit the call instruction.
+  EmitAndCountInstruction(MCInstBuilder(X86::CALL64pcrel32)
+                              .addOperand(MCIL.LowerSymbolOperand(TOp, TSym)));
+
+  // Restore caller-saved and used registers.
+  for (unsigned I = sizeof UsedMask; I-- > 0;)
+    if (UsedMask[I])
+      EmitAndCountInstruction(MCInstBuilder(X86::POP64r).addReg(DestRegs[I]));
+    else
+      EmitNops(*OutStreamer, 1, Subtarget->is64Bit(), getSubtargetInfo());
+
+  OutStreamer->AddComment("xray typed event end.");
+
+  // Record the sled version.
+  recordSled(CurSled, MI, SledKind::TYPED_EVENT, 0);
+}
+
 void X86AsmPrinter::LowerPATCHABLE_FUNCTION_ENTER(const MachineInstr &MI,
                                                   X86MCInstLower &MCIL) {
   // We want to emit the following pattern:
@@ -1190,7 +1624,8 @@ void X86AsmPrinter::LowerPATCHABLE_RET(c
   recordSled(CurSled, MI, SledKind::FUNCTION_EXIT);
 }
 
-void X86AsmPrinter::LowerPATCHABLE_TAIL_CALL(const MachineInstr &MI, X86MCInstLower &MCIL) {
+void X86AsmPrinter::LowerPATCHABLE_TAIL_CALL(const MachineInstr &MI,
+                                             X86MCInstLower &MCIL) {
   // Like PATCHABLE_RET, we have the actual instruction in the operands to this
   // instruction so we lower that particular instruction and its operands.
   // Unlike PATCHABLE_RET though, we put the sled before the JMP, much like how
@@ -1244,8 +1679,7 @@ static const Constant *getConstantFromPo
 
   ArrayRef<MachineConstantPoolEntry> Constants =
       MI.getParent()->getParent()->getConstantPool()->getConstants();
-  const MachineConstantPoolEntry &ConstantEntry =
-      Constants[Op.getIndex()];
+  const MachineConstantPoolEntry &ConstantEntry = Constants[Op.getIndex()];
 
   // Bail if this is a machine constant pool entry, we won't be able to dig out
   // anything useful.
@@ -1258,10 +1692,8 @@ static const Constant *getConstantFromPo
   return C;
 }
 
-static std::string getShuffleComment(const MachineInstr *MI,
-                                     unsigned SrcOp1Idx,
-                                     unsigned SrcOp2Idx,
-                                     ArrayRef<int> Mask) {
+static std::string getShuffleComment(const MachineInstr *MI, unsigned SrcOp1Idx,
+                                     unsigned SrcOp2Idx, ArrayRef<int> Mask) {
   std::string Comment;
 
   // Compute the name for a register. This is really goofy because we have
@@ -1449,7 +1881,8 @@ void X86AsmPrinter::EmitSEHInstruction(c
 
 void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) {
   X86MCInstLower MCInstLowering(*MF, *this);
-  const X86RegisterInfo *RI = MF->getSubtarget<X86Subtarget>().getRegisterInfo();
+  const X86RegisterInfo *RI =
+      MF->getSubtarget<X86Subtarget>().getRegisterInfo();
 
   // Add a comment about EVEX-2-VEX compression for AVX-512 instrs that
   // are compressed from EVEX encoding to VEX encoding.
@@ -1467,7 +1900,6 @@ void X86AsmPrinter::EmitInstruction(cons
     OutStreamer->emitRawComment("MEMBARRIER");
     return;
 
-
   case X86::EH_RETURN:
   case X86::EH_RETURN64: {
     // Lower these as normal, but add some comments.
@@ -1519,13 +1951,14 @@ void X86AsmPrinter::EmitInstruction(cons
     MCSymbol *PICBase = MF->getPICBaseSymbol();
     // FIXME: We would like an efficient form for this, so we don't have to do a
     // lot of extra uniquing.
-    EmitAndCountInstruction(MCInstBuilder(X86::CALLpcrel32)
-      .addExpr(MCSymbolRefExpr::create(PICBase, OutContext)));
+    EmitAndCountInstruction(
+        MCInstBuilder(X86::CALLpcrel32)
+            .addExpr(MCSymbolRefExpr::create(PICBase, OutContext)));
 
-    const X86FrameLowering* FrameLowering =
+    const X86FrameLowering *FrameLowering =
         MF->getSubtarget<X86Subtarget>().getFrameLowering();
     bool hasFP = FrameLowering->hasFP(*MF);
-    
+
     // TODO: This is needed only if we require precise CFA.
     bool HasActiveDwarfFrame = OutStreamer->getNumFrameInfos() &&
                                !OutStreamer->getDwarfFrameInfos().back().End;
@@ -1540,8 +1973,8 @@ void X86AsmPrinter::EmitInstruction(cons
     OutStreamer->EmitLabel(PICBase);
 
     // popl $reg
-    EmitAndCountInstruction(MCInstBuilder(X86::POP32r)
-                            .addReg(MI->getOperand(0).getReg()));
+    EmitAndCountInstruction(
+        MCInstBuilder(X86::POP32r).addReg(MI->getOperand(0).getReg()));
 
     if (HasActiveDwarfFrame && !hasFP) {
       OutStreamer->EmitCFIAdjustCfaOffset(stackGrowth);
@@ -1569,16 +2002,16 @@ void X86AsmPrinter::EmitInstruction(cons
 
     const MCExpr *DotExpr = MCSymbolRefExpr::create(DotSym, OutContext);
     const MCExpr *PICBase =
-      MCSymbolRefExpr::create(MF->getPICBaseSymbol(), OutContext);
+        MCSymbolRefExpr::create(MF->getPICBaseSymbol(), OutContext);
     DotExpr = MCBinaryExpr::createSub(DotExpr, PICBase, OutContext);
 
-    DotExpr = MCBinaryExpr::createAdd(MCSymbolRefExpr::create(OpSym,OutContext),
-                                      DotExpr, OutContext);
+    DotExpr = MCBinaryExpr::createAdd(
+        MCSymbolRefExpr::create(OpSym, OutContext), DotExpr, OutContext);
 
     EmitAndCountInstruction(MCInstBuilder(X86::ADD32ri)
-      .addReg(MI->getOperand(0).getReg())
-      .addReg(MI->getOperand(1).getReg())
-      .addExpr(DotExpr));
+                                .addReg(MI->getOperand(0).getReg())
+                                .addReg(MI->getOperand(1).getReg())
+                                .addExpr(DotExpr));
     return;
   }
   case TargetOpcode::STATEPOINT:
@@ -1607,10 +2040,13 @@ void X86AsmPrinter::EmitInstruction(cons
 
   case TargetOpcode::PATCHABLE_TAIL_CALL:
     return LowerPATCHABLE_TAIL_CALL(*MI, MCInstLowering);
-    
+
   case TargetOpcode::PATCHABLE_EVENT_CALL:
     return LowerPATCHABLE_EVENT_CALL(*MI, MCInstLowering);
 
+  case TargetOpcode::PATCHABLE_TYPED_EVENT_CALL:
+    return LowerPATCHABLE_TYPED_EVENT_CALL(*MI, MCInstLowering);
+
   case X86::MORESTACK_RET:
     EmitAndCountInstruction(MCInstBuilder(getRetOpcode(*Subtarget)));
     return;
@@ -1618,9 +2054,8 @@ void X86AsmPrinter::EmitInstruction(cons
   case X86::MORESTACK_RET_RESTORE_R10:
     // Return, then restore R10.
     EmitAndCountInstruction(MCInstBuilder(getRetOpcode(*Subtarget)));
-    EmitAndCountInstruction(MCInstBuilder(X86::MOV64rr)
-                            .addReg(X86::R10)
-                            .addReg(X86::RAX));
+    EmitAndCountInstruction(
+        MCInstBuilder(X86::MOV64rr).addReg(X86::R10).addReg(X86::RAX));
     return;
 
   case X86::SEH_PushReg:
@@ -1670,22 +2105,29 @@ void X86AsmPrinter::EmitInstruction(cons
       break;
     unsigned SrcIdx, MaskIdx;
     switch (MI->getOpcode()) {
-    default: llvm_unreachable("Invalid opcode");
+    default:
+      llvm_unreachable("Invalid opcode");
     case X86::PSHUFBrm:
     case X86::VPSHUFBrm:
     case X86::VPSHUFBYrm:
     case X86::VPSHUFBZ128rm:
     case X86::VPSHUFBZ256rm:
     case X86::VPSHUFBZrm:
-      SrcIdx = 1; MaskIdx = 5; break;
+      SrcIdx = 1;
+      MaskIdx = 5;
+      break;
     case X86::VPSHUFBZ128rmkz:
     case X86::VPSHUFBZ256rmkz:
     case X86::VPSHUFBZrmkz:
-      SrcIdx = 2; MaskIdx = 6; break;
+      SrcIdx = 2;
+      MaskIdx = 6;
+      break;
     case X86::VPSHUFBZ128rmk:
     case X86::VPSHUFBZ256rmk:
     case X86::VPSHUFBZrmk:
-      SrcIdx = 3; MaskIdx = 7; break;
+      SrcIdx = 3;
+      MaskIdx = 7;
+      break;
     }
 
     assert(MI->getNumOperands() >= 6 &&
@@ -1729,35 +2171,54 @@ void X86AsmPrinter::EmitInstruction(cons
     unsigned SrcIdx, MaskIdx;
     unsigned ElSize;
     switch (MI->getOpcode()) {
-    default: llvm_unreachable("Invalid opcode");
+    default:
+      llvm_unreachable("Invalid opcode");
     case X86::VPERMILPSrm:
     case X86::VPERMILPSYrm:
     case X86::VPERMILPSZ128rm:
     case X86::VPERMILPSZ256rm:
     case X86::VPERMILPSZrm:
-      SrcIdx = 1; MaskIdx = 5; ElSize = 32; break;
+      SrcIdx = 1;
+      MaskIdx = 5;
+      ElSize = 32;
+      break;
     case X86::VPERMILPSZ128rmkz:
     case X86::VPERMILPSZ256rmkz:
     case X86::VPERMILPSZrmkz:
-      SrcIdx = 2; MaskIdx = 6; ElSize = 32; break;
+      SrcIdx = 2;
+      MaskIdx = 6;
+      ElSize = 32;
+      break;
     case X86::VPERMILPSZ128rmk:
     case X86::VPERMILPSZ256rmk:
     case X86::VPERMILPSZrmk:
-      SrcIdx = 3; MaskIdx = 7; ElSize = 32; break;
+      SrcIdx = 3;
+      MaskIdx = 7;
+      ElSize = 32;
+      break;
     case X86::VPERMILPDrm:
     case X86::VPERMILPDYrm:
     case X86::VPERMILPDZ128rm:
     case X86::VPERMILPDZ256rm:
     case X86::VPERMILPDZrm:
-      SrcIdx = 1; MaskIdx = 5; ElSize = 64; break;
+      SrcIdx = 1;
+      MaskIdx = 5;
+      ElSize = 64;
+      break;
     case X86::VPERMILPDZ128rmkz:
     case X86::VPERMILPDZ256rmkz:
     case X86::VPERMILPDZrmkz:
-      SrcIdx = 2; MaskIdx = 6; ElSize = 64; break;
+      SrcIdx = 2;
+      MaskIdx = 6;
+      ElSize = 64;
+      break;
     case X86::VPERMILPDZ128rmk:
     case X86::VPERMILPDZ256rmk:
     case X86::VPERMILPDZrmk:
-      SrcIdx = 3; MaskIdx = 7; ElSize = 64; break;
+      SrcIdx = 3;
+      MaskIdx = 7;
+      ElSize = 64;
+      break;
     }
 
     assert(MI->getNumOperands() >= 6 &&
@@ -1789,9 +2250,16 @@ void X86AsmPrinter::EmitInstruction(cons
 
     unsigned ElSize;
     switch (MI->getOpcode()) {
-    default: llvm_unreachable("Invalid opcode");
-    case X86::VPERMIL2PSrm: case X86::VPERMIL2PSYrm: ElSize = 32; break;
-    case X86::VPERMIL2PDrm: case X86::VPERMIL2PDYrm: ElSize = 64; break;
+    default:
+      llvm_unreachable("Invalid opcode");
+    case X86::VPERMIL2PSrm:
+    case X86::VPERMIL2PSYrm:
+      ElSize = 32;
+      break;
+    case X86::VPERMIL2PDrm:
+    case X86::VPERMIL2PDYrm:
+      ElSize = 64;
+      break;
     }
 
     const MachineOperand &MaskOp = MI->getOperand(6);
@@ -1840,37 +2308,37 @@ void X86AsmPrinter::EmitInstruction(cons
     break;
   }
 
-#define MOV_CASE(Prefix, Suffix)        \
-  case X86::Prefix##MOVAPD##Suffix##rm: \
-  case X86::Prefix##MOVAPS##Suffix##rm: \
-  case X86::Prefix##MOVUPD##Suffix##rm: \
-  case X86::Prefix##MOVUPS##Suffix##rm: \
-  case X86::Prefix##MOVDQA##Suffix##rm: \
+#define MOV_CASE(Prefix, Suffix)                                               \
+  case X86::Prefix##MOVAPD##Suffix##rm:                                        \
+  case X86::Prefix##MOVAPS##Suffix##rm:                                        \
+  case X86::Prefix##MOVUPD##Suffix##rm:                                        \
+  case X86::Prefix##MOVUPS##Suffix##rm:                                        \
+  case X86::Prefix##MOVDQA##Suffix##rm:                                        \
   case X86::Prefix##MOVDQU##Suffix##rm:
 
-#define MOV_AVX512_CASE(Suffix)         \
-  case X86::VMOVDQA64##Suffix##rm:      \
-  case X86::VMOVDQA32##Suffix##rm:      \
-  case X86::VMOVDQU64##Suffix##rm:      \
-  case X86::VMOVDQU32##Suffix##rm:      \
-  case X86::VMOVDQU16##Suffix##rm:      \
-  case X86::VMOVDQU8##Suffix##rm:       \
-  case X86::VMOVAPS##Suffix##rm:        \
-  case X86::VMOVAPD##Suffix##rm:        \
-  case X86::VMOVUPS##Suffix##rm:        \
+#define MOV_AVX512_CASE(Suffix)                                                \
+  case X86::VMOVDQA64##Suffix##rm:                                             \
+  case X86::VMOVDQA32##Suffix##rm:                                             \
+  case X86::VMOVDQU64##Suffix##rm:                                             \
+  case X86::VMOVDQU32##Suffix##rm:                                             \
+  case X86::VMOVDQU16##Suffix##rm:                                             \
+  case X86::VMOVDQU8##Suffix##rm:                                              \
+  case X86::VMOVAPS##Suffix##rm:                                               \
+  case X86::VMOVAPD##Suffix##rm:                                               \
+  case X86::VMOVUPS##Suffix##rm:                                               \
   case X86::VMOVUPD##Suffix##rm:
 
-#define CASE_ALL_MOV_RM()               \
-  MOV_CASE(, )   /* SSE */              \
-  MOV_CASE(V, )  /* AVX-128 */          \
-  MOV_CASE(V, Y) /* AVX-256 */          \
-  MOV_AVX512_CASE(Z)                    \
-  MOV_AVX512_CASE(Z256)                 \
+#define CASE_ALL_MOV_RM()                                                      \
+  MOV_CASE(, )   /* SSE */                                                     \
+  MOV_CASE(V, )  /* AVX-128 */                                                 \
+  MOV_CASE(V, Y) /* AVX-256 */                                                 \
+  MOV_AVX512_CASE(Z)                                                           \
+  MOV_AVX512_CASE(Z256)                                                        \
   MOV_AVX512_CASE(Z128)
 
-  // For loads from a constant pool to a vector register, print the constant
-  // loaded.
-  CASE_ALL_MOV_RM()
+    // For loads from a constant pool to a vector register, print the constant
+    // loaded.
+    CASE_ALL_MOV_RM()
   case X86::VBROADCASTF128:
   case X86::VBROADCASTI128:
   case X86::VBROADCASTF32X4Z256rm:
@@ -1893,20 +2361,48 @@ void X86AsmPrinter::EmitInstruction(cons
       int NumLanes = 1;
       // Override NumLanes for the broadcast instructions.
       switch (MI->getOpcode()) {
-      case X86::VBROADCASTF128:         NumLanes = 2;  break;
-      case X86::VBROADCASTI128:         NumLanes = 2;  break;
-      case X86::VBROADCASTF32X4Z256rm:  NumLanes = 2;  break;
-      case X86::VBROADCASTF32X4rm:      NumLanes = 4;  break;
-      case X86::VBROADCASTF32X8rm:      NumLanes = 2;  break;
-      case X86::VBROADCASTF64X2Z128rm:  NumLanes = 2;  break;
-      case X86::VBROADCASTF64X2rm:      NumLanes = 4;  break;
-      case X86::VBROADCASTF64X4rm:      NumLanes = 2;  break;
-      case X86::VBROADCASTI32X4Z256rm:  NumLanes = 2;  break;
-      case X86::VBROADCASTI32X4rm:      NumLanes = 4;  break;
-      case X86::VBROADCASTI32X8rm:      NumLanes = 2;  break;
-      case X86::VBROADCASTI64X2Z128rm:  NumLanes = 2;  break;
-      case X86::VBROADCASTI64X2rm:      NumLanes = 4;  break;
-      case X86::VBROADCASTI64X4rm:      NumLanes = 2;  break;
+      case X86::VBROADCASTF128:
+        NumLanes = 2;
+        break;
+      case X86::VBROADCASTI128:
+        NumLanes = 2;
+        break;
+      case X86::VBROADCASTF32X4Z256rm:
+        NumLanes = 2;
+        break;
+      case X86::VBROADCASTF32X4rm:
+        NumLanes = 4;
+        break;
+      case X86::VBROADCASTF32X8rm:
+        NumLanes = 2;
+        break;
+      case X86::VBROADCASTF64X2Z128rm:
+        NumLanes = 2;
+        break;
+      case X86::VBROADCASTF64X2rm:
+        NumLanes = 4;
+        break;
+      case X86::VBROADCASTF64X4rm:
+        NumLanes = 2;
+        break;
+      case X86::VBROADCASTI32X4Z256rm:
+        NumLanes = 2;
+        break;
+      case X86::VBROADCASTI32X4rm:
+        NumLanes = 4;
+        break;
+      case X86::VBROADCASTI32X8rm:
+        NumLanes = 2;
+        break;
+      case X86::VBROADCASTI64X2Z128rm:
+        NumLanes = 2;
+        break;
+      case X86::VBROADCASTI64X2rm:
+        NumLanes = 4;
+        break;
+      case X86::VBROADCASTI64X4rm:
+        NumLanes = 2;
+        break;
       }
 
       std::string Comment;
@@ -1916,7 +2412,8 @@ void X86AsmPrinter::EmitInstruction(cons
       if (auto *CDS = dyn_cast<ConstantDataSequential>(C)) {
         CS << "[";
         for (int l = 0; l != NumLanes; ++l) {
-          for (int i = 0, NumElements = CDS->getNumElements(); i < NumElements; ++i) {
+          for (int i = 0, NumElements = CDS->getNumElements(); i < NumElements;
+               ++i) {
             if (i != 0 || l != 0)
               CS << ",";
             if (CDS->getElementType()->isIntegerTy())
@@ -1934,7 +2431,8 @@ void X86AsmPrinter::EmitInstruction(cons
       } else if (auto *CV = dyn_cast<ConstantVector>(C)) {
         CS << "<";
         for (int l = 0; l != NumLanes; ++l) {
-          for (int i = 0, NumOperands = CV->getNumOperands(); i < NumOperands; ++i) {
+          for (int i = 0, NumOperands = CV->getNumOperands(); i < NumOperands;
+               ++i) {
             if (i != 0 || l != 0)
               CS << ",";
             printConstant(CV->getOperand(i), CS);
@@ -1980,35 +2478,92 @@ void X86AsmPrinter::EmitInstruction(cons
     if (auto *C = getConstantFromPool(*MI, MI->getOperand(4))) {
       int NumElts;
       switch (MI->getOpcode()) {
-      default: llvm_unreachable("Invalid opcode");
-      case X86::VBROADCASTSSrm:    NumElts = 4;  break;
-      case X86::VBROADCASTSSYrm:   NumElts = 8;  break;
-      case X86::VBROADCASTSSZ128m: NumElts = 4;  break;
-      case X86::VBROADCASTSSZ256m: NumElts = 8;  break;
-      case X86::VBROADCASTSSZm:    NumElts = 16; break;
-      case X86::VBROADCASTSDYrm:   NumElts = 4;  break;
-      case X86::VBROADCASTSDZ256m: NumElts = 4;  break;
-      case X86::VBROADCASTSDZm:    NumElts = 8;  break;
-      case X86::VPBROADCASTBrm:    NumElts = 16; break;
-      case X86::VPBROADCASTBYrm:   NumElts = 32; break;
-      case X86::VPBROADCASTBZ128m: NumElts = 16; break;
-      case X86::VPBROADCASTBZ256m: NumElts = 32; break;
-      case X86::VPBROADCASTBZm:    NumElts = 64; break;
-      case X86::VPBROADCASTDrm:    NumElts = 4;  break;
-      case X86::VPBROADCASTDYrm:   NumElts = 8;  break;
-      case X86::VPBROADCASTDZ128m: NumElts = 4;  break;
-      case X86::VPBROADCASTDZ256m: NumElts = 8;  break;
-      case X86::VPBROADCASTDZm:    NumElts = 16; break;
-      case X86::VPBROADCASTQrm:    NumElts = 2;  break;
-      case X86::VPBROADCASTQYrm:   NumElts = 4;  break;
-      case X86::VPBROADCASTQZ128m: NumElts = 2;  break;
-      case X86::VPBROADCASTQZ256m: NumElts = 4;  break;
-      case X86::VPBROADCASTQZm:    NumElts = 8;  break;
-      case X86::VPBROADCASTWrm:    NumElts = 8;  break;
-      case X86::VPBROADCASTWYrm:   NumElts = 16; break;
-      case X86::VPBROADCASTWZ128m: NumElts = 8;  break;
-      case X86::VPBROADCASTWZ256m: NumElts = 16; break;
-      case X86::VPBROADCASTWZm:    NumElts = 32; break;
+      default:
+        llvm_unreachable("Invalid opcode");
+      case X86::VBROADCASTSSrm:
+        NumElts = 4;
+        break;
+      case X86::VBROADCASTSSYrm:
+        NumElts = 8;
+        break;
+      case X86::VBROADCASTSSZ128m:
+        NumElts = 4;
+        break;
+      case X86::VBROADCASTSSZ256m:
+        NumElts = 8;
+        break;
+      case X86::VBROADCASTSSZm:
+        NumElts = 16;
+        break;
+      case X86::VBROADCASTSDYrm:
+        NumElts = 4;
+        break;
+      case X86::VBROADCASTSDZ256m:
+        NumElts = 4;
+        break;
+      case X86::VBROADCASTSDZm:
+        NumElts = 8;
+        break;
+      case X86::VPBROADCASTBrm:
+        NumElts = 16;
+        break;
+      case X86::VPBROADCASTBYrm:
+        NumElts = 32;
+        break;
+      case X86::VPBROADCASTBZ128m:
+        NumElts = 16;
+        break;
+      case X86::VPBROADCASTBZ256m:
+        NumElts = 32;
+        break;
+      case X86::VPBROADCASTBZm:
+        NumElts = 64;
+        break;
+      case X86::VPBROADCASTDrm:
+        NumElts = 4;
+        break;
+      case X86::VPBROADCASTDYrm:
+        NumElts = 8;
+        break;
+      case X86::VPBROADCASTDZ128m:
+        NumElts = 4;
+        break;
+      case X86::VPBROADCASTDZ256m:
+        NumElts = 8;
+        break;
+      case X86::VPBROADCASTDZm:
+        NumElts = 16;
+        break;
+      case X86::VPBROADCASTQrm:
+        NumElts = 2;
+        break;
+      case X86::VPBROADCASTQYrm:
+        NumElts = 4;
+        break;
+      case X86::VPBROADCASTQZ128m:
+        NumElts = 2;
+        break;
+      case X86::VPBROADCASTQZ256m:
+        NumElts = 4;
+        break;
+      case X86::VPBROADCASTQZm:
+        NumElts = 8;
+        break;
+      case X86::VPBROADCASTWrm:
+        NumElts = 8;
+        break;
+      case X86::VPBROADCASTWYrm:
+        NumElts = 16;
+        break;
+      case X86::VPBROADCASTWZ128m:
+        NumElts = 8;
+        break;
+      case X86::VPBROADCASTWZ256m:
+        NumElts = 16;
+        break;
+      case X86::VPBROADCASTWZm:
+        NumElts = 32;
+        break;
       }
 
       std::string Comment;

Modified: llvm/trunk/test/CodeGen/X86/xray-custom-log.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/xray-custom-log.ll?rev=330219&r1=330218&r2=330219&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/xray-custom-log.ll (original)
+++ llvm/trunk/test/CodeGen/X86/xray-custom-log.ll Tue Apr 17 14:30:29 2018
@@ -11,8 +11,8 @@ define i32 @fn() nounwind noinline uwtab
     ; CHECK-LABEL: Lxray_event_sled_0:
     ; CHECK:       .byte 0xeb, 0x0f
     ; CHECK-NEXT:  pushq %rdi
-    ; CHECK-NEXT:  movq {{.*}}, %rdi
     ; CHECK-NEXT:  pushq %rsi
+    ; CHECK-NEXT:  movq {{.*}}, %rdi
     ; CHECK-NEXT:  movq {{.*}}, %rsi
     ; CHECK-NEXT:  callq __xray_CustomEvent
     ; CHECK-NEXT:  popq %rsi
@@ -21,8 +21,8 @@ define i32 @fn() nounwind noinline uwtab
     ; PIC-LABEL: Lxray_event_sled_0:
     ; PIC:       .byte 0xeb, 0x0f
     ; PIC-NEXT:  pushq %rdi
-    ; PIC-NEXT:  movq {{.*}}, %rdi
     ; PIC-NEXT:  pushq %rsi
+    ; PIC-NEXT:  movq {{.*}}, %rdi
     ; PIC-NEXT:  movq {{.*}}, %rsi
     ; PIC-NEXT:  callq __xray_CustomEvent at PLT
     ; PIC-NEXT:  popq %rsi

Copied: llvm/trunk/test/CodeGen/X86/xray-typed-event-log.ll (from r330208, llvm/trunk/test/CodeGen/X86/xray-custom-log.ll)
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/xray-typed-event-log.ll?p2=llvm/trunk/test/CodeGen/X86/xray-typed-event-log.ll&p1=llvm/trunk/test/CodeGen/X86/xray-custom-log.ll&r1=330208&r2=330219&rev=330219&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/xray-custom-log.ll (original)
+++ llvm/trunk/test/CodeGen/X86/xray-typed-event-log.ll Tue Apr 17 14:30:29 2018
@@ -5,32 +5,41 @@
 define i32 @fn() nounwind noinline uwtable "function-instrument"="xray-always" {
     %eventptr = alloca i8
     %eventsize = alloca i32
+    %eventtype = alloca i16
+    store i16 6, i16* %eventtype
+    %type = load i16, i16* %eventtype
     store i32 3, i32* %eventsize
     %val = load i32, i32* %eventsize
-    call void @llvm.xray.customevent(i8* %eventptr, i32 %val)
-    ; CHECK-LABEL: Lxray_event_sled_0:
-    ; CHECK:       .byte 0xeb, 0x0f
+    call void @llvm.xray.typedevent(i16 %type, i8* %eventptr, i32 %val)
+    ; CHECK-LABEL: Lxray_typed_event_sled_0:
+    ; CHECK:       .byte 0xeb, 0x14
     ; CHECK-NEXT:  pushq %rdi
-    ; CHECK-NEXT:  movq {{.*}}, %rdi
     ; CHECK-NEXT:  pushq %rsi
+    ; CHECK-NEXT:  pushq %rdx
+    ; CHECK-NEXT:  movq {{.*}}, %rdi
     ; CHECK-NEXT:  movq {{.*}}, %rsi
-    ; CHECK-NEXT:  callq __xray_CustomEvent
+    ; CHECK-NEXT:  movq {{.*}}, %rdx
+    ; CHECK-NEXT:  callq __xray_TypedEvent
+    ; CHECK-NEXT:  popq %rdx
     ; CHECK-NEXT:  popq %rsi
     ; CHECK-NEXT:  popq %rdi
 
-    ; PIC-LABEL: Lxray_event_sled_0:
-    ; PIC:       .byte 0xeb, 0x0f
+    ; PIC-LABEL: Lxray_typed_event_sled_0:
+    ; PIC:       .byte 0xeb, 0x14
     ; PIC-NEXT:  pushq %rdi
-    ; PIC-NEXT:  movq {{.*}}, %rdi
     ; PIC-NEXT:  pushq %rsi
+    ; PIC-NEXT:  pushq %rdx
+    ; PIC-NEXT:  movq {{.*}}, %rdi
     ; PIC-NEXT:  movq {{.*}}, %rsi
-    ; PIC-NEXT:  callq __xray_CustomEvent at PLT
+    ; PIC-NEXT:  movq {{.*}}, %rdx
+    ; PIC-NEXT:  callq __xray_TypedEvent at PLT
+    ; PIC-NEXT:  popq %rdx
     ; PIC-NEXT:  popq %rsi
     ; PIC-NEXT:  popq %rdi
     ret i32 0
 }
 ; CHECK-LABEL: xray_instr_map
 ; CHECK-LABEL: Lxray_sleds_start0:
-; CHECK:       .quad {{.*}}xray_event_sled_0
+; CHECK:       .quad {{.*}}xray_typed_event_sled_0
 
-declare void @llvm.xray.customevent(i8*, i32)
+declare void @llvm.xray.typedevent(i16, i8*, i32)




More information about the llvm-commits mailing list