[clang] [libclc] [libcxx] [lld] [llvm] [openmp] llvm encode decode (PR #87187)

Jiang zixian via llvm-commits llvm-commits at lists.llvm.org
Thu Apr 11 00:13:40 PDT 2024


https://github.com/jiang-zixian updated https://github.com/llvm/llvm-project/pull/87187

>From ccf42e2d5e200aabe75e8d328a20c364cdb488cb Mon Sep 17 00:00:00 2001
From: x-codingman <xinhuishao1996 at gmail.com>
Date: Wed, 18 Jan 2023 20:02:45 +0800
Subject: [PATCH 1/8] update Hello.cpp

---
 llvm/lib/Transforms/Hello/Hello.cpp | 1 +
 1 file changed, 1 insertion(+)

diff --git a/llvm/lib/Transforms/Hello/Hello.cpp b/llvm/lib/Transforms/Hello/Hello.cpp
index b0adb5401f8912..5218d0897000d5 100644
--- a/llvm/lib/Transforms/Hello/Hello.cpp
+++ b/llvm/lib/Transforms/Hello/Hello.cpp
@@ -31,6 +31,7 @@ namespace {
       ++HelloCounter;
       errs() << "Hello: ";
       errs().write_escaped(F.getName()) << '\n';
+      F.setSectionPrefix("mySection")
       return false;
     }
   };

>From adfc72708c57e490d0829344982d5d2c5132e0a1 Mon Sep 17 00:00:00 2001
From: xcodingman <xinhuishao1996 at gmail.com>
Date: Fri, 10 Feb 2023 20:59:20 +0800
Subject: [PATCH 2/8] port shadow stack pass

---
 lld/ELF/Arch/ARM.cpp                          |  12 +
 llvm/include/llvm/CodeGen/MachineBasicBlock.h |  14 +
 llvm/lib/Target/ARM/ARMInstrThumb2.td         |  21 +
 llvm/lib/Target/ARM/ARMRandezvousCLR.cpp      | 335 +++++++
 llvm/lib/Target/ARM/ARMRandezvousCLR.h        |  54 ++
 .../Target/ARM/ARMRandezvousInstrumentor.cpp  | 824 +++++++++++++++++
 .../Target/ARM/ARMRandezvousInstrumentor.h    | 178 ++++
 llvm/lib/Target/ARM/ARMRandezvousOptions.cpp  | 210 +++++
 llvm/lib/Target/ARM/ARMRandezvousOptions.h    |  63 ++
 .../Target/ARM/ARMRandezvousShadowStack.cpp   | 861 ++++++++++++++++++
 .../lib/Target/ARM/ARMRandezvousShadowStack.h |  40 +
 llvm/lib/Target/ARM/ARMTargetMachine.cpp      |   9 +
 llvm/lib/Target/ARM/CMakeLists.txt            |   9 +
 .../Target/ARM/MCTargetDesc/ARMAsmBackend.cpp |  30 +
 .../ARM/MCTargetDesc/ARMELFObjectWriter.cpp   |  12 +
 .../Target/ARM/MCTargetDesc/ARMFixupKinds.h   |   3 +-
 .../ARM/MCTargetDesc/ARMMCCodeEmitter.cpp     |   4 +
 llvm/lib/Transforms/Hello/Hello.cpp           |   2 +-
 18 files changed, 2679 insertions(+), 2 deletions(-)
 create mode 100644 llvm/lib/Target/ARM/ARMRandezvousCLR.cpp
 create mode 100644 llvm/lib/Target/ARM/ARMRandezvousCLR.h
 create mode 100644 llvm/lib/Target/ARM/ARMRandezvousInstrumentor.cpp
 create mode 100644 llvm/lib/Target/ARM/ARMRandezvousInstrumentor.h
 create mode 100644 llvm/lib/Target/ARM/ARMRandezvousOptions.cpp
 create mode 100644 llvm/lib/Target/ARM/ARMRandezvousOptions.h
 create mode 100644 llvm/lib/Target/ARM/ARMRandezvousShadowStack.cpp
 create mode 100644 llvm/lib/Target/ARM/ARMRandezvousShadowStack.h

diff --git a/lld/ELF/Arch/ARM.cpp b/lld/ELF/Arch/ARM.cpp
index 24d78ebf58203c..49ca1dab20ee8e 100644
--- a/lld/ELF/Arch/ARM.cpp
+++ b/lld/ELF/Arch/ARM.cpp
@@ -848,6 +848,18 @@ int64_t ARM::getImplicitAddend(const uint8_t *buf, RelType type) const {
                             ((lo & 0x7000) >> 4) |  // imm3
                             (lo & 0x00ff));         // imm8
   }
+
+  case R_ARM_PRIVATE_14:
+    // Encoding T2: A = imm4:imm12
+    write16le(loc, (read16le(loc) & ~0x000f) | ((val >> 28) & 0x000f));
+    write16le(loc + 2, (read16le(loc + 2) & ~0x0fff) | ((val >> 16) & 0x0fff));
+    break;
+  case R_ARM_PRIVATE_15:
+    // Encoding T2: A = imm4:imm12
+    write16le(loc, (read16le(loc) & ~0x000f) | ((val >> 12) & 0x000f));
+    write16le(loc + 2, (read16le(loc + 2) & ~0x0fff) | (val & 0x0fff));
+    break;
+    
   case R_ARM_ALU_PC_G0:
   case R_ARM_ALU_PC_G0_NC:
   case R_ARM_ALU_PC_G1:
diff --git a/llvm/include/llvm/CodeGen/MachineBasicBlock.h b/llvm/include/llvm/CodeGen/MachineBasicBlock.h
index 1ab24b554f5b5e..6e4da3676929f4 100644
--- a/llvm/include/llvm/CodeGen/MachineBasicBlock.h
+++ b/llvm/include/llvm/CodeGen/MachineBasicBlock.h
@@ -185,6 +185,9 @@ class MachineBasicBlock
   /// Indicate that this basic block is the indirect dest of an INLINEASM_BR.
   bool IsInlineAsmBrIndirectTarget = false;
 
+  /// Indicate that this basic block is a trap block inserted by ARM Randezvous
+  /// Code Layout Randomization pass.
+  bool IsRandezvousTrapBlock = false;
   /// since getSymbol is a relatively heavy-weight operation, the symbol
   /// is only computed once and is cached.
   mutable MCSymbol *CachedMCSymbol = nullptr;
@@ -667,7 +670,18 @@ class MachineBasicBlock
   void setIsInlineAsmBrIndirectTarget(bool V = true) {
     IsInlineAsmBrIndirectTarget = V;
   }
+  
+  /// Returns true if this is a trap block inserted by ARM Randezvous Code
+  /// Layout Randomization pass.
+  bool isRandezvousTrapBlock() const {
+    return IsRandezvousTrapBlock;
+  }
 
+  /// Indicates if this is a trap block inserted by ARM Randezvous Code Layout
+  /// Randomization pass.
+  void setIsRandezvousTrapBlock(bool V = true) {
+    IsRandezvousTrapBlock = V;
+  }
   /// Returns true if it is legal to hoist instructions into this block.
   bool isLegalToHoistInto() const;
 
diff --git a/llvm/lib/Target/ARM/ARMInstrThumb2.td b/llvm/lib/Target/ARM/ARMInstrThumb2.td
index d8721cbebd2dc2..248362b108d0c3 100644
--- a/llvm/lib/Target/ARM/ARMInstrThumb2.td
+++ b/llvm/lib/Target/ARM/ARMInstrThumb2.td
@@ -1506,6 +1506,14 @@ def : T2Pat<(extloadi16 (ARMWrapper tconstpool:$addr)),
 //        not via pattern.
 
 // Indexed loads
+let isReturn = 1, isTerminator = 1, isBarrier = 1, mayLoad = 1,
+    hasSideEffects = 0, isCodeGenOnly = 1 in
+def t2LDR_PRE_RET : T2Ipreldst<0, 0b10, 1, 1, (outs GPR:$Rt, GPR:$Rn_wb),
+                               (ins t2addrmode_imm8_pre:$addr),
+                               AddrModeT2_i8, IndexModePre, IIC_iLoad_iu,
+                               "ldr", "\t$Rt, $addr!", "$addr.base = $Rn_wb",
+                               []>,
+                    Sched<[WriteLd]>;
 
 let mayLoad = 1, hasSideEffects = 0 in {
 def t2LDR_PRE  : T2Ipreldst<0, 0b10, 1, 1, (outs GPR:$Rt, GPR:$Rn_wb),
@@ -2846,6 +2854,19 @@ def t2UDF : T2XI<(outs), (ins imm0_65535:$imm16), IIC_Br, "udf.w\t$imm16",
   let Inst{11-0} = imm16{11-0};
 }
 
+let isCodeGenOnly = 1 in
+def t2UDF_ga : T2XI<(outs), (ins imm0_65535_expr:$imm16), IIC_Br,
+                    "udf.w\t$imm16", []>, Sched<[WriteBr]> {
+  bits<16> imm16;
+  let Inst{31-29} = 0b111;
+  let Inst{28-27} = 0b10;
+  let Inst{26-20} = 0b1111111;
+  let Inst{19-16} = imm16{15-12};
+  let Inst{15} = 0b1;
+  let Inst{14-12} = 0b010;
+  let Inst{11-0} = imm16{11-0};
+}
+
 // A8.6.18  BFI - Bitfield insert (Encoding T1)
 let Constraints = "$src = $Rd" in {
   def t2BFI : T2TwoRegBitFI<(outs rGPR:$Rd),
diff --git a/llvm/lib/Target/ARM/ARMRandezvousCLR.cpp b/llvm/lib/Target/ARM/ARMRandezvousCLR.cpp
new file mode 100644
index 00000000000000..3beda9b8f9018f
--- /dev/null
+++ b/llvm/lib/Target/ARM/ARMRandezvousCLR.cpp
@@ -0,0 +1,335 @@
+//===- ARMRandezvousCLR.cpp - ARM Randezvous Code Layout Randomization ----===//
+//
+// Copyright (c) 2021-2022, University of Rochester
+//
+// Part of the Randezvous Project, under the Apache License v2.0 with
+// LLVM Exceptions.  See LICENSE.txt in the llvm directory for license
+// information.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the implementation of a pass that randomizes the code
+// layout of ARM machine code.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "arm-randezvous-clr"
+
+#include "ARMRandezvousCLR.h"
+#include "ARMRandezvousOptions.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/IR/IRBuilder.h"
+
+using namespace llvm;
+
+char ARMRandezvousCLR::ID = 0;
+
+STATISTIC(NumTraps, "Number of trap instructions inserted");
+STATISTIC(NumFuncsBBLR, "Number of functions with basic blocks reordered");
+STATISTIC(NumJumps4BBLR, "Number of jump instructions inserted due to BBLR");
+STATISTIC(NumFuncsBBCLR, "Number of functions with basic block clusters reordered");
+
+ARMRandezvousCLR::ARMRandezvousCLR(bool LateStage)
+    : ModulePass(ID), LateStage(LateStage) {
+}
+
+StringRef
+ARMRandezvousCLR::getPassName() const {
+  return "ARM Randezvous Code Layout Randomization Pass";
+}
+
+void
+ARMRandezvousCLR::getAnalysisUsage(AnalysisUsage & AU) const {
+  // We need this to access MachineFunctions
+  AU.addRequired<MachineModuleInfoWrapperPass>();
+
+  AU.setPreservesCFG();
+  ModulePass::getAnalysisUsage(AU);
+}
+
+//
+// Method: shuffleMachineBasicBlocks()
+//
+// Description:
+//   This method shuffles the order of MachineBasicBlocks in a MachineFunction.
+//   It shuffles all the basic blocks except the entry block, so fall-through
+//   blocks will be taken apart and branch instructions will be inserted
+//   appropriately to preserve the CFG.
+//
+// Input:
+//   MF - A reference to the MachineFunction.
+//
+// Output:
+//   MF - The transformed MachineFunction.
+//
+void
+ARMRandezvousCLR::shuffleMachineBasicBlocks(MachineFunction & MF) {
+  // Shuffling has no effect on functions with fewer than 3 MachineBasicBlocks
+  // (because we are not reordering the entry block)
+  if (MF.size() < 3) {
+    return;
+  }
+
+  // Add an unconditional branch to all MachineBasicBlocks that fall through so
+  // that we can safely take them apart from their fall-through blocks
+  std::vector<MachineBasicBlock *> MBBs;
+  const TargetInstrInfo * TII = MF.getSubtarget().getInstrInfo();
+  for (MachineBasicBlock & MBB : MF) {
+    MachineBasicBlock * FallThruMBB = MBB.getFallThrough();
+    if (FallThruMBB != nullptr) {
+      BuildMI(MBB, MBB.end(), DebugLoc(), TII->get(ARM::t2B))
+      .addMBB(FallThruMBB)
+      .add(predOps(ARMCC::AL));
+      ++NumJumps4BBLR;
+    }
+    MBBs.push_back(&MBB);
+  }
+
+  // Now do shuffling; ilist (iplist_impl) does not support iterator
+  // increment/decrement so we have to first do out-of-place shuffling and then
+  // do in-place removal and insertion
+  auto & MBBList = (&MF)->*(MachineFunction::getSublistAccess)(nullptr);
+  llvm::shuffle(MBBs.begin() + 1, MBBs.end(), *RNG);
+  for (MachineBasicBlock * MBB : MBBs) {
+    MBBList.remove(MBB);
+  }
+  for (MachineBasicBlock * MBB : MBBs) {
+    MBBList.push_back(MBB);
+  }
+  ++NumFuncsBBLR;
+}
+
+//
+// Method: shuffleMachineBasicBlockClusters()
+//
+// Description:
+//   This method shuffles the order of clusters of MachineBasicBlocks that fall
+//   through in the order as they appear.  It shuffles all the basic block
+//   clusters except the entry cluster.
+//
+// Input:
+//   MF - A reference to the MachineFunction.
+//
+// Output:
+//   MF - The transformed MachineFunction.
+//
+void
+ARMRandezvousCLR::shuffleMachineBasicBlockClusters(MachineFunction & MF) {
+  auto & MBBList = (&MF)->*(MachineFunction::getSublistAccess)(nullptr);
+
+  // Construct a list of clusters
+  std::vector<std::vector<MachineBasicBlock *> *> Clusters;
+  std::vector<MachineBasicBlock *> * CurrentCluster = nullptr;
+  for (MachineBasicBlock & MBB : MF) {
+    if (CurrentCluster == nullptr) {
+      CurrentCluster = new std::vector<MachineBasicBlock *>();
+    }
+    CurrentCluster->push_back(&MBB);
+    if (!MBB.canFallThrough()) {
+      Clusters.push_back(CurrentCluster);
+      CurrentCluster = nullptr;
+    }
+  }
+
+  do {
+    // Shuffling has no effect on functions with fewer than 3 clusters (because
+    // we are not reordering the entry cluster)
+    if (Clusters.size() < 3) {
+      break;
+    }
+
+    // Now do shuffling; ilist (iplist_impl) does not support iterator
+    // increment/decrement so we have to first do out-of-place shuffling and
+    // then do in-place removal and insertion
+    llvm::shuffle(Clusters.begin() + 1, Clusters.end(), *RNG);
+    for (auto * Cluster : Clusters) {
+      for (MachineBasicBlock * MBB : *Cluster) {
+        MBBList.remove(MBB);
+      }
+    }
+    for (auto * Cluster : Clusters) {
+      for (MachineBasicBlock * MBB : *Cluster) {
+        MBBList.push_back(MBB);
+      }
+    }
+    ++NumFuncsBBCLR;
+  } while (false);
+
+  // Garbage collection
+  for (auto * Cluster : Clusters) {
+    delete Cluster;
+  }
+}
+
+//
+// Method: insertTrapBlocks()
+//
+// Description:
+//   This method inserts a given number of trap instructions into a Function
+//   and keeps track of each inserted trap instruction as a single basic block.
+//
+// Inputs:
+//   F            - A reference to the Function.
+//   MF           - A reference to the MachineFunction to which F corresponds.
+//   NumTrapInsts - Total number of trap instructions to insert.
+//
+// Output:
+//   MF - The transformed MachineFunction.
+//
+void
+ARMRandezvousCLR::insertTrapBlocks(Function & F, MachineFunction & MF,
+                                   uint64_t NumTrapInsts) {
+  LLVMContext & Ctx = F.getContext();
+  const TargetInstrInfo * TII = MF.getSubtarget().getInstrInfo();
+
+  //
+  // In machine IR, disperse trap blocks throughout the MachineFunction, where
+  // the insertion points are the MachineBasicBlocks that do not fall through;
+  // this allows us to preserve the CFG while adding randomness to the inside
+  // of the MachineFunction.
+  //
+  // In LLVM IR, simply place trap blocks at the end of the Function.
+  //
+
+  // Determine where to insert trap instructions
+  std::vector<MachineBasicBlock *> InsertionPts;
+  for (MachineBasicBlock & MBB : MF) {
+    if (!MBB.canFallThrough() && !MBB.isRandezvousTrapBlock()) {
+      InsertionPts.push_back(&MBB);
+    }
+  }
+
+  // Determine the numbers of trap instructions to insert at each point
+  uint64_t SumShares = 0;
+  std::vector<uint64_t> Shares(InsertionPts.size());
+  for (uint64_t i = 0; i < InsertionPts.size(); ++i) {
+    Shares[i] = (*RNG)() & 0xffffffff; // Prevent overflow
+    SumShares += Shares[i];
+  }
+  for (uint64_t i = 0; i < InsertionPts.size(); ++i) {
+    Shares[i] = Shares[i] * NumTrapInsts / SumShares;
+  }
+
+  // Do insertion
+  for (uint64_t i = 0; i < InsertionPts.size(); ++i) {
+    for (uint64_t j = 0; j < Shares[i]; ++j) {
+      // Build an IR basic block
+      BasicBlock * BB = BasicBlock::Create(Ctx, "", &F);
+      IRBuilder<> IRB(BB);
+      IRB.CreateUnreachable();
+
+      // Build a machine IR basic block
+      MachineBasicBlock * MBB = MF.CreateMachineBasicBlock(BB);
+      BuildMI(MBB, DebugLoc(), TII->get(ARM::t2UDF_ga)).addImm(0);
+      MF.push_back(MBB);
+      MBB->moveAfter(InsertionPts[i]);
+      MBB->setMachineBlockAddressTaken();
+      MBB->setIsRandezvousTrapBlock();
+
+      ++NumTraps;
+    }
+  }
+}
+
+//
+// Method: runOnModule()
+//
+// Description:
+//   This method is called when the PassManager wants this pass to transform
+//   the specified Module.  This method shuffles the order of functions within
+//   the module and/or the order of basic blocks within each function, and
+//   inserts trap instructions to fill the text section.
+//
+// Input:
+//   M - A reference to the Module to transform.
+//
+// Output:
+//   M - The transformed Module.
+//
+// Return value:
+//   true  - The Module was transformed.
+//   false - The Module was not transformed.
+//
+bool
+ARMRandezvousCLR::runOnModule(Module & M) {
+  if (!EnableRandezvousCLR) {
+    return false;
+  }
+
+  MachineModuleInfo & MMI = getAnalysis<MachineModuleInfoWrapperPass>().getMMI();
+  Twine RNGName = getPassName() + "-" + Twine(RandezvousCLRSeed);
+  RNG = M.createRNG(RNGName.str());
+
+  // First, shuffle the order of basic blocks in each function (if requested
+  // and at the late stage) and calculate how much space existing functions
+  // have taken up
+  uint64_t TotalTextSize = 0;
+  std::vector<std::pair<Function *, MachineFunction *> > Functions;
+  for (Function & F : M) {
+    MachineFunction * MF = MMI.getMachineFunction(F);
+    if (MF == nullptr) {
+      continue;
+    }
+
+    if (LateStage) {
+      if (EnableRandezvousBBLR) {
+        shuffleMachineBasicBlocks(*MF);
+      } else if (EnableRandezvousBBCLR) {
+        shuffleMachineBasicBlockClusters(*MF);
+      }
+    }
+
+    uint64_t TextSize = getFunctionCodeSize(*MF);
+    if (TextSize != 0) {
+      Functions.push_back(std::make_pair(&F, MF));
+      TotalTextSize += TextSize;
+    }
+  }
+  assert(TotalTextSize <= RandezvousMaxTextSize && "Text size exceeds the limit");
+
+  if (LateStage) {
+    // Second, shuffle the order of functions; SymbolTableList (iplist_impl)
+    // does not support iterator increment/decrement so we have to first do
+    // out-of-place shuffling and then do in-place removal and insertion
+    SymbolTableList<Function> & FunctionList = M.getFunctionList();
+    llvm::shuffle(Functions.begin(), Functions.end(), *RNG);
+    for (auto & FMF : Functions) {
+      FunctionList.remove(FMF.first);
+    }
+    for (auto & FMF : Functions) {
+      FunctionList.push_back(FMF.first);
+    }
+  }
+
+  // Third, determine the numbers of trap instructions to insert
+  uint64_t NumTrapInsts = (RandezvousMaxTextSize - TotalTextSize) / 4;
+  uint64_t SumShares = 0;
+  std::vector<uint64_t> Shares(Functions.size());
+  if (!LateStage) {
+    // Insert 80% of trap instructions during the early stage; this allows most
+    // of trap blocks to be consumed by later passes while still keeping a
+    // considerable code size budget for later passes and the late-stage CLR
+    // pass
+    NumTrapInsts = NumTrapInsts * 80 / 100;
+  }
+  for (uint64_t i = 0; i < Functions.size(); ++i) {
+    Shares[i] = (*RNG)() & 0xffffffff; // Prevent overflow
+    SumShares += Shares[i];
+  }
+  for (uint64_t i = 0; i < Functions.size(); ++i) {
+    Shares[i] = Shares[i] * NumTrapInsts / SumShares;
+  }
+
+  // Lastly, insert trap instructions into each function
+  for (uint64_t i = 0; i < Functions.size(); ++i) {
+    insertTrapBlocks(*Functions[i].first, *Functions[i].second, Shares[i]);
+  }
+
+  return true;
+}
+
+ModulePass *
+llvm::createARMRandezvousCLR(bool EarlyTrapInsertion) {
+  return new ARMRandezvousCLR(EarlyTrapInsertion);
+}
\ No newline at end of file
diff --git a/llvm/lib/Target/ARM/ARMRandezvousCLR.h b/llvm/lib/Target/ARM/ARMRandezvousCLR.h
new file mode 100644
index 00000000000000..b54e6438c8ff25
--- /dev/null
+++ b/llvm/lib/Target/ARM/ARMRandezvousCLR.h
@@ -0,0 +1,54 @@
+//===- ARMRandezvousCLR.h - ARM Randezvous Code Layout Randomization ------===//
+//
+// Copyright (c) 2021-2022, University of Rochester
+//
+// Part of the Randezvous Project, under the Apache License v2.0 with
+// LLVM Exceptions.  See LICENSE.txt in the llvm directory for license
+// information.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the interfaces of a pass that randomizes the code layout
+// of ARM machine code.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef ARM_RANDEZVOUS_CLR
+#define ARM_RANDEZVOUS_CLR
+
+#include "ARMRandezvousInstrumentor.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/RandomNumberGenerator.h"
+
+namespace llvm {
+  struct ARMRandezvousCLR : public ModulePass, ARMRandezvousInstrumentor {
+    // Pass Identifier
+    static char ID;
+
+    ARMRandezvousCLR(bool LateStage);
+    virtual StringRef getPassName() const override;
+    void getAnalysisUsage(AnalysisUsage & AU) const override;
+    virtual bool runOnModule(Module & M) override;
+
+  private:
+    // Which stage we are at:
+    //
+    // * Early stage: insert most of trap instructions for trap block consumers
+    //                between early and late stages
+    //
+    // * Late stage: shuffle code layout and insert the rest of trap
+    //               instructions
+    bool LateStage = false;
+
+    std::unique_ptr<RandomNumberGenerator> RNG;
+
+    void shuffleMachineBasicBlocks(MachineFunction & MF);
+    void shuffleMachineBasicBlockClusters(MachineFunction & MF);
+    void insertTrapBlocks(Function & F, MachineFunction & MF,
+                          uint64_t NumTrapInsts);
+  };
+
+  ModulePass * createARMRandezvousCLR(bool LateStage);
+}
+
+#endif
\ No newline at end of file
diff --git a/llvm/lib/Target/ARM/ARMRandezvousInstrumentor.cpp b/llvm/lib/Target/ARM/ARMRandezvousInstrumentor.cpp
new file mode 100644
index 00000000000000..af62fc2de7604b
--- /dev/null
+++ b/llvm/lib/Target/ARM/ARMRandezvousInstrumentor.cpp
@@ -0,0 +1,824 @@
+//===- ARMRandezvousInstrumentor.cpp - A helper class for instrumentation -===//
+//
+// Copyright (c) 2021-2022, University of Rochester
+//
+// Part of the Randezvous Project, under the Apache License v2.0 with
+// LLVM Exceptions.  See LICENSE.txt in the llvm directory for license
+// information.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the implementation of a class that can help passes of
+// its subclass easily instrument ARM machine IR without concerns of breaking
+// IT blocks.
+//
+//===----------------------------------------------------------------------===//
+
+#include "ARMRandezvousInstrumentor.h"
+#include "llvm/CodeGen/LivePhysRegs.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+
+using namespace llvm;
+
+//
+// Method: getITBlockSize()
+//
+// Description:
+//   This method computes how many predicated instructions an IT instruction
+//   covers.
+//
+// Input:
+//   IT - A reference to an IT instruction.
+//
+// Return value:
+//   The number of predicated instructions IT covers.
+//
+unsigned
+ARMRandezvousInstrumentor::getITBlockSize(const MachineInstr & IT) {
+  assert(IT.getOpcode() == ARM::t2IT && "Not an IT instruction!");
+
+  unsigned Mask = IT.getOperand(1).getImm() & 0xf;
+  assert(Mask != 0 && "Invalid IT mask!");
+
+  if (Mask & 0x1) {
+    return 4;
+  } else if (Mask & 0x2) {
+    return 3;
+  } else if (Mask & 0x4) {
+    return 2;
+  } else {
+    return 1;
+  }
+}
+
+//
+// Method: findIT()
+//
+// Description:
+//   This method finds the IT instruction that forms an IT block containing a
+//   given instruction MI.  It also computes the distance (from 0 to 4, 0 means
+//   MI itself is IT) between the IT and MI.  If there is no such IT, a null
+//   pointer is returned.
+//
+// Input:
+//   MI - A reference to an instruction from which to find IT.
+//
+// Output:
+//   distance - A reference to an unsigned to store the distance.
+//
+// Return value:
+//   A pointer to IT if found, nullptr otherwise.
+//
+MachineInstr *
+ARMRandezvousInstrumentor::findIT(MachineInstr & MI, unsigned & distance) {
+  MachineInstr * Prev = &MI;
+  unsigned dist = 0;
+  while (Prev != nullptr && dist < 5 && Prev->getOpcode() != ARM::t2IT) {
+    // Only count non-meta instructions
+    if (!Prev->isMetaInstruction()) {
+      ++dist;
+    }
+    Prev = Prev->getPrevNode();
+  }
+  if (Prev != nullptr && dist < 5 && Prev->getOpcode() == ARM::t2IT) {
+    if (getITBlockSize(*Prev) >= dist) {
+      distance = dist;
+      return Prev;
+    }
+  }
+  return nullptr;
+}
+
+//
+// Method: findIT()
+//
+// Description:
+//   This method finds the IT instruction that forms an IT block containing a
+//   given instruction MI.  It also computes the distance (from 0 to 4, 0 means
+//   MI itself is IT) between the IT and MI.  If there is no such IT, a null
+//   pointer is returned.
+//
+// Input:
+//   MI - A const reference to an instruction from which to find IT.
+//
+// Output:
+//   distance - A reference to an unsigned to store the distance.
+//
+// Return value:
+//   A const pointer to IT if found, nullptr otherwise.
+//
+const MachineInstr *
+ARMRandezvousInstrumentor::findIT(const MachineInstr & MI, unsigned & distance) {
+  return findIT(const_cast<MachineInstr &>(MI), distance);
+}
+
+//
+// Method: insertInstBefore()
+//
+// Description:
+//   This method inserts an instruction Inst before a given instruction MI.  If
+//   MI is a predicated instruction within an IT block, then Inst will have the
+//   same predicate as MI and also end up in an IT block.
+//
+// Inputs:
+//   MI   - A reference to an instruction before which to insert Inst.
+//   Inst - A pointer to an instruction to insert.
+//
+void
+ARMRandezvousInstrumentor::insertInstBefore(MachineInstr & MI,
+                                            MachineInstr * Inst) {
+  insertInstsBefore(MI, { Inst });
+}
+
+//
+// Method: insertInstAfter()
+//
+// Description:
+//   This method inserts an instruction Inst after a given instruction MI.  If
+//   MI is a predicated instruction within an IT block, then Inst will have the
+//   same predicate as MI and also end up in an IT block.
+//
+// Inputs:
+//   MI   - A reference to an instruction after which to insert Inst.
+//   Inst - A pointer to an instruction to insert.
+//
+void
+ARMRandezvousInstrumentor::insertInstAfter(MachineInstr & MI,
+                                           MachineInstr * Inst) {
+  insertInstsAfter(MI, { Inst });
+}
+
+//
+// Method: insertInstsBefore()
+//
+// Description:
+//   This method inserts a group of instructions contained in an array before a
+//   given instruction MI.  If MI is a predicated instruction within an IT
+//   block, then the new instructions will have the same predicate as MI and
+//   also end up in one or more IT blocks.
+//
+// Inputs:
+//   MI    - A reference to an instruction before which to insert instructions.
+//   Insts - A reference to an array containing the instructions.
+//
+void
+ARMRandezvousInstrumentor::insertInstsBefore(MachineInstr & MI,
+                                             ArrayRef<MachineInstr *> Insts) {
+  assert(!MI.isMetaInstruction() && "Cannot instrument meta instruction!");
+
+  MachineFunction & MF = *MI.getMF();
+  MachineBasicBlock & MBB = *MI.getParent();
+  const TargetInstrInfo * TII = MF.getSubtarget().getInstrInfo();
+
+  unsigned distance;
+  MachineInstr * IT = findIT(MI, distance);
+
+  // Do insert new instructions before MI
+  for (MachineInstr * Inst : Insts) {
+    MBB.insert(MI, Inst);
+  }
+
+  // If MI is inside an IT block, we should make sure to cover all new
+  // instructions with IT(s)
+  if (IT != nullptr && distance != 0) {
+    unsigned ITBlockSize = getITBlockSize(*IT);
+    unsigned Mask = IT->getOperand(1).getImm() & 0xf;
+    ARMCC::CondCodes firstCond = (ARMCC::CondCodes)IT->getOperand(0).getImm();
+    std::deque<bool> DQMask = decodeITMask(Mask);
+    bool sameAsFirstCond = DQMask[distance - 1];
+
+    // Find the range of instructions that are supposed to be in IT block(s)
+    MachineBasicBlock::iterator firstMI(IT->getNextNode()); // Inclusive
+    MachineBasicBlock::iterator lastMI(MI);                 // Non-inclusive
+    for (unsigned i = distance; i <= ITBlockSize; ) {
+      ++lastMI;
+      // Skip meta instructions if we have not reached the end
+      if (i == ITBlockSize || !lastMI->isMetaInstruction()) {
+        ++i;
+      }
+    }
+
+    // Track new non-meta instructions in DQMask
+    auto it = DQMask.begin();
+    for (unsigned i = 0; i < distance - 1; ++i) {
+      it++;
+    }
+    size_t NumRealInsts = Insts.size();
+    for (MachineInstr * Inst : Insts) {
+      if (Inst->isMetaInstruction()) {
+        --NumRealInsts;
+      }
+    }
+    DQMask.insert(it, NumRealInsts, sameAsFirstCond);
+
+    // Insert ITs to cover instructions in [firstMI, lastMI)
+    for (MachineBasicBlock::iterator i(firstMI); i != lastMI; ) {
+      std::deque<bool> NewDQMask;
+      MachineBasicBlock::iterator j(i);
+      for (unsigned k = 0; k < 4 && j != lastMI; ++j) {
+        if (j->isMetaInstruction()) {
+          continue;
+        }
+        NewDQMask.push_back(DQMask.front());
+        DQMask.pop_front();
+        ++k;
+      }
+      bool flip = false;
+      if (!NewDQMask[0]) {
+        for (unsigned k = 0; k < NewDQMask.size(); ++k) {
+          NewDQMask[k] = !NewDQMask[k];
+        }
+        flip = true;
+      }
+      BuildMI(MBB, i, IT->getDebugLoc(), TII->get(ARM::t2IT))
+      .addImm(flip ? ARMCC::getOppositeCondition(firstCond) : firstCond)
+      .addImm(encodeITMask(NewDQMask));
+      i = j; // Update i here
+    }
+
+    // Remove the original IT
+    IT->eraseFromParent();
+  }
+}
+
+//
+// Method: insertInstsAfter()
+//
+// Description:
+//   This method inserts a group of instructions contained in an array after a
+//   given instruction MI.  If MI is a predicated instruction within an IT
+//   block, then the new instructions will have the same predicate as MI and
+//   also end up in one or more IT blocks.
+//
+// Inputs:
+//   MI    - A reference to an instruction after which to insert instructions.
+//   Insts - A reference to an array containing the instructions.
+//
+void
+ARMRandezvousInstrumentor::insertInstsAfter(MachineInstr & MI,
+                                            ArrayRef<MachineInstr *> Insts) {
+  assert(!MI.isMetaInstruction() && "Cannot instrument meta instruction!");
+
+  MachineFunction & MF = *MI.getMF();
+  MachineBasicBlock & MBB = *MI.getParent();
+  const TargetInstrInfo * TII = MF.getSubtarget().getInstrInfo();
+  MachineBasicBlock::iterator NextMI(MI); ++NextMI;
+
+  unsigned distance;
+  MachineInstr * IT = findIT(MI, distance);
+
+  // Do insert new instructions after MI
+  for (MachineInstr * Inst : Insts) {
+    MBB.insert(NextMI, Inst);
+  }
+
+  // If MI is inside an IT block, we should make sure to cover all new
+  // instructions with IT(s)
+  if (IT != nullptr && distance != 0) {
+    unsigned ITBlockSize = getITBlockSize(*IT);
+    unsigned Mask = IT->getOperand(1).getImm() & 0xf;
+    ARMCC::CondCodes firstCond = (ARMCC::CondCodes)IT->getOperand(0).getImm();
+    std::deque<bool> DQMask = decodeITMask(Mask);
+    bool sameAsFirstCond = DQMask[distance - 1];
+
+    // Find the range of instructions that are supposed to be in IT block(s)
+    MachineBasicBlock::iterator firstMI(IT->getNextNode()); // Inclusive
+    MachineBasicBlock::iterator lastMI(Insts.back());       // Non-inclusive
+    for (unsigned i = distance; i <= ITBlockSize; ) {
+      ++lastMI;
+      // Skip meta instructions if we have not reached the end
+      if (i == ITBlockSize || !lastMI->isMetaInstruction()) {
+        ++i;
+      }
+    }
+
+    // Track new non-meta instructions in DQMask
+    auto it = DQMask.begin();
+    for (unsigned i = 0; i <= distance - 1; ++i) {
+      it++;
+    }
+    size_t NumRealInsts = Insts.size();
+    for (MachineInstr * Inst : Insts) {
+      if (Inst->isMetaInstruction()) {
+        --NumRealInsts;
+      }
+    }
+    DQMask.insert(it, NumRealInsts, sameAsFirstCond);
+
+    // Insert ITs to cover instructions in [firstMI, lastMI)
+    for (MachineBasicBlock::iterator i(firstMI); i != lastMI; ) {
+      std::deque<bool> NewDQMask;
+      MachineBasicBlock::iterator j(i);
+      for (unsigned k = 0; k < 4 && j != lastMI; ++j) {
+        if (j->isMetaInstruction()) {
+          continue;
+        }
+        NewDQMask.push_back(DQMask.front());
+        DQMask.pop_front();
+        ++k;
+      }
+      bool flip = false;
+      if (!NewDQMask[0]) {
+        for (unsigned k = 0; k < NewDQMask.size(); ++k) {
+          NewDQMask[k] = !NewDQMask[k];
+        }
+        flip = true;
+      }
+      BuildMI(MBB, i, IT->getDebugLoc(), TII->get(ARM::t2IT))
+      .addImm(flip ? ARMCC::getOppositeCondition(firstCond) : firstCond)
+      .addImm(encodeITMask(NewDQMask));
+      i = j; // Update i here
+    }
+
+    // Remove the original IT
+    IT->eraseFromParent();
+  }
+}
+
+//
+// Method: removeInst()
+//
+// Description:
+//   This method removes a given instruction MI from machine IR.  If MI is a
+//   predicated instruction within an IT block, then its corresponding IT
+//   instruction will be updated or removed as well.  Note that MI cannot be an
+//   IT instruction itself.
+//
+// Input:
+//   MI - A reference to the instruction to remove.
+//
+void
+ARMRandezvousInstrumentor::removeInst(MachineInstr & MI) {
+  assert(!MI.isMetaInstruction() && "Cannot instrument meta instruction!");
+
+  unsigned distance;
+  MachineInstr * IT = findIT(MI, distance);
+
+  // If MI was inside an IT block, we should make sure to update/remove the IT
+  // instruction
+  if (IT != nullptr) {
+    assert(distance != 0 && "Cannot remove an IT instruction directly!");
+
+    unsigned Mask = IT->getOperand(1).getImm() & 0xf;
+    ARMCC::CondCodes firstCond = (ARMCC::CondCodes)IT->getOperand(0).getImm();
+    std::deque<bool> DQMask = decodeITMask(Mask);
+
+    // Remove MI's entry from DQMask
+    auto it = DQMask.begin();
+    for (unsigned i = 0; i < distance - 1; ++i) {
+      it++;
+    }
+    DQMask.erase(it);
+
+    // Remove IT as well if MI was the only instruction in the IT block
+    if (DQMask.empty()) {
+      IT->eraseFromParent();
+    } else {
+      // If MI was the first instruction in the IT block, removing MI might
+      // change the first condition, in which case we need to flip it
+      if (!DQMask[0]) {
+        for (unsigned i = 0; i < DQMask.size(); ++i) {
+          DQMask[i] = !DQMask[i];
+        }
+        IT->getOperand(0).setImm(ARMCC::getOppositeCondition(firstCond));
+      }
+      // Update the IT mask
+      IT->getOperand(1).setImm(encodeITMask(DQMask));
+    }
+  }
+
+  // Now do remove MI
+  MI.eraseFromParent();
+}
+
+//
+// Method: splitBasicBlockBefore()
+//
+// Description:
+//   This method splits a basic block into two basic blocks at the point right
+//   before a given instruction MI.  If MI is the first instruction in the
+//   basic block, an empty basic block will be created.  If MI is a predicated
+//   instruction within an IT block, then a new IT instruction will be inserted
+//   at the beginning of the new basic block to cover MI and any succeeding
+//   instructions that were in the original IT block.  In that case, the old IT
+//   instruction will also be updated accordingly.
+//
+// Input:
+//   MI - A reference to an instruction before which to split the basic block.
+//
+// Return value:
+//   A pointer to the new basic block that contains MI.
+//
+MachineBasicBlock *
+ARMRandezvousInstrumentor::splitBasicBlockBefore(MachineInstr & MI) {
+  assert(!MI.isMetaInstruction() && "Cannot instrument meta instruction!");
+
+  unsigned distance;
+  MachineInstr * IT = findIT(MI, distance);
+
+  MachineFunction & MF = *MI.getMF();
+  MachineBasicBlock & MBB = *MI.getParent();
+  const TargetInstrInfo * TII = MF.getSubtarget().getInstrInfo();
+
+  // Create a new basic block
+  MachineBasicBlock & NewMBB = *MF.CreateMachineBasicBlock(MBB.getBasicBlock());
+  MF.push_back(&NewMBB);
+  NewMBB.moveAfter(&MBB);
+
+  // Move instructions from the old basic block to the new basic block
+  NewMBB.splice(NewMBB.end(), &MBB, &MI, MBB.end());
+
+  // Move successors from the old basic block to the new basic block
+  NewMBB.transferSuccessors(&MBB);
+  MBB.addSuccessor(&NewMBB);
+
+  // If MI was inside an IT block (but not the IT instruction itself), we
+  // should make sure to update/remove the IT instruction and insert a new IT
+  // in the new basic block
+  if (IT != nullptr && distance > 0) {
+    // Shortcut for just moving the IT over
+    if (distance == 1) {
+      NewMBB.splice(&MI, &MBB, IT);
+    } else {
+      unsigned Mask = IT->getOperand(1).getImm() & 0xf;
+      ARMCC::CondCodes firstCond = (ARMCC::CondCodes)IT->getOperand(0).getImm();
+      std::deque<bool> DQMask = decodeITMask(Mask);
+
+      // Create a new DQMask and adjust both
+      auto it = DQMask.begin();
+      for (unsigned i = 0; i < distance - 1; ++i) {
+        it++;
+      }
+      std::deque<bool> NewDQMask(it, DQMask.end());
+      DQMask.erase(it, DQMask.end());
+
+      // Update the old IT mask
+      IT->getOperand(1).setImm(encodeITMask(DQMask));
+
+      // Create a new IT
+      bool flip = false;
+      if (!NewDQMask[0]) {
+        for (unsigned i = 0; i < NewDQMask.size(); ++i) {
+          NewDQMask[i] = !NewDQMask[i];
+        }
+        flip = true;
+      }
+      BuildMI(NewMBB, MI, IT->getDebugLoc(), TII->get(ARM::t2IT))
+      .addImm(flip ? ARMCC::getOppositeCondition(firstCond) : firstCond)
+      .addImm(encodeITMask(NewDQMask));
+    }
+  }
+
+  return &NewMBB;
+}
+
+//
+// Method: splitBasicBlockAfter()
+//
+// Description:
+//   This method splits a basic block into two basic blocks at the point right
+//   after a given instruction MI.  If MI is the last instruction in the basic
+//   block, an empty basic block will be created.  If MI is a predicated
+//   instruction within an IT block and is not the last one, then a new IT
+//   instruction will be inserted at the beginning of the new basic block to
+//   cover MI's succeeding instructions that were in the original IT block.  In
+//   that case, the old IT instruction will also be updated accordingly.
+//
+// Input:
+//   MI - A reference to an instruction after which to split the basic block.
+//
+// Return value:
+//   A pointer to the new basic block that contains MI's next instruction if
+//   the split was done, nullptr otherwise.
+//
+MachineBasicBlock *
+ARMRandezvousInstrumentor::splitBasicBlockAfter(MachineInstr & MI) {
+  assert(!MI.isMetaInstruction() && "Cannot instrument meta instruction!");
+
+  unsigned distance;
+  MachineInstr * IT = findIT(MI, distance);
+
+  MachineFunction & MF = *MI.getMF();
+  MachineBasicBlock & MBB = *MI.getParent();
+  const TargetInstrInfo * TII = MF.getSubtarget().getInstrInfo();
+
+  // Create a new basic block
+  MachineBasicBlock & NewMBB = *MF.CreateMachineBasicBlock(MBB.getBasicBlock());
+  MF.push_back(&NewMBB);
+  NewMBB.moveAfter(&MBB);
+
+  // Move instructions from the old basic block to the new basic block
+  MachineBasicBlock::iterator next = ++MI.getIterator();
+  NewMBB.splice(NewMBB.end(), &MBB, next, MBB.end());
+
+  // Move successors from the old basic block to the new basic block
+  NewMBB.transferSuccessors(&MBB);
+  MBB.addSuccessor(&NewMBB);
+
+  // If MI was inside an IT block and is not the last one, we should make sure
+  // to update/remove the IT instruction and insert a new IT in the new basic
+  // block
+  if (IT != nullptr && distance < 4) {
+    // Shortcut for just moving the IT over
+    if (distance == 0) {
+      NewMBB.splice(NewMBB.begin(), &MBB, IT);
+    } else {
+      unsigned Mask = IT->getOperand(1).getImm() & 0xf;
+      ARMCC::CondCodes firstCond = (ARMCC::CondCodes)IT->getOperand(0).getImm();
+      std::deque<bool> DQMask = decodeITMask(Mask);
+
+      // Create a new DQMask and adjust both
+      auto it = DQMask.begin();
+      for (unsigned i = 0; i < distance; ++i) {
+        it++;
+      }
+      std::deque<bool> NewDQMask(it, DQMask.end());
+      DQMask.erase(it, DQMask.end());
+
+      // Update the old IT mask
+      IT->getOperand(1).setImm(encodeITMask(DQMask));
+
+      // Create a new IT
+      bool flip = false;
+      if (!NewDQMask[0]) {
+        for (unsigned i = 0; i < NewDQMask.size(); ++i) {
+          NewDQMask[i] = !NewDQMask[i];
+        }
+        flip = true;
+      }
+      BuildMI(NewMBB, NewMBB.begin(), IT->getDebugLoc(), TII->get(ARM::t2IT))
+      .addImm(flip ? ARMCC::getOppositeCondition(firstCond) : firstCond)
+      .addImm(encodeITMask(NewDQMask));
+    }
+  }
+
+  return &NewMBB;
+}
+
+//
+// Method: decodeITMask()
+//
+// Description:
+//   This method decodes an IT mask in LLVM's representation and puts a list of
+//   boolean values in a deque to return.  The boolean values represent whether
+//   their corresponding instructions in an IT block have the same predicate as
+//   the first one (which indicates that the first boolean value is always
+//   true).
+//
+// Input:
+//   Mask - The IT mask in LLVM's representation (immediate value of the second
+//          operand of a t2IT instruction).
+//
+// Return value:
+//   A deque of boolean values (see the above description).
+//
+std::deque<bool>
+ARMRandezvousInstrumentor::decodeITMask(unsigned Mask) {
+  Mask &= 0xf;
+  assert(Mask != 0 && "Invalid IT mask!");
+
+  std::deque<bool> DQMask { true };
+  unsigned size = 4;
+  for (unsigned i = 0x1; i < 0x10; i <<= 1) {
+    if (Mask & i) {
+      break;
+    }
+    --size;
+  }
+  for (unsigned i = 3; i > 4 - size; --i) {
+    DQMask.push_back((Mask & (1 << i)) == 0);
+  }
+
+  return DQMask;
+}
+
+//
+// Method: encodeITMask()
+//
+// Description:
+//   This method takes an IT mask in the form of a list of boolean values and
+//   encodes it into LLVM's representation.  The boolean values represent
+//   whether their corresponding instructions in an IT block have the same
+//   predicate as the first one (which requires that the first boolean value
+//   be always true).
+//
+// Input:
+//   DQMask - An IT mask in the form of a list of boolean values.
+//
+// Return value:
+//   The IT mask in LLVM's representation (immediate value of the second
+//   operand of a t2IT instruction).
+//
+unsigned
+ARMRandezvousInstrumentor::encodeITMask(std::deque<bool> DQMask) {
+  assert(!DQMask.empty() && "Invalid deque representation of an IT mask!");
+  assert(DQMask.size() <= 4 && "Invalid deque representation of an IT mask!");
+  assert(DQMask[0] && "Invalid deque representation of an IT mask!");
+
+  unsigned Mask = 0;
+  for (unsigned i = 1; i < DQMask.size(); ++i) {
+    Mask |= DQMask[i] ? 0 : 1;
+    Mask <<= 1;
+  }
+  Mask |= 1;
+  Mask <<= (4 - DQMask.size());
+
+  return Mask;
+}
+
+//
+// Method: findFreeRegistersBefore()
+//
+// Description:
+//   This method computes the liveness of ARM core registers before a given
+//   instruction MI and returns a list of free core registers that can be
+//   used for instrumentation purposes.
+//
+// Inputs:
+//   MI    - A reference to the instruction before which to find free
+//           registers.
+//   Thumb - Whether we are looking for Thumb registers (low registers, i,e,,
+//           R0 -- R7) or ARM registers (both low and high registers, i.e.,
+//           R0 -- R12 and LR).
+//
+// Return value:
+//   A vector of free registers (might be empty, if none is found).
+//
+std::vector<Register>
+ARMRandezvousInstrumentor::findFreeRegistersBefore(const MachineInstr & MI,
+                                                   bool Thumb) {
+  assert(!MI.isMetaInstruction() && "Cannot instrument meta instruction!");
+
+  unsigned distance;
+  const MachineInstr * IT = findIT(MI, distance);
+
+  Register PredReg;
+  ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg);
+
+  const MachineFunction & MF = *MI.getMF();
+  const MachineBasicBlock & MBB = *MI.getParent();
+  const MachineRegisterInfo & MRI = MF.getRegInfo();
+  const TargetRegisterInfo * TRI = MF.getSubtarget().getRegisterInfo();
+  LivePhysRegs UsedRegs(*TRI);
+
+  // First add live-out registers of MBB; these registers are considered live
+  // at the end of MBB
+  UsedRegs.addLiveOuts(MBB);
+
+  // Then move backward step by step to compute live registers before MI
+  MachineBasicBlock::const_iterator MBBI(MI);
+  MachineBasicBlock::const_iterator I = MBB.end();
+  while (I != MBBI) {
+    unsigned distance2;
+    const MachineInstr * IT2 = findIT(*--I, distance2);
+    Register PredReg2;
+    ARMCC::CondCodes Pred2 = getInstrPredicate(*I, PredReg2);
+
+    if (IT2 != nullptr && IT == IT2) {
+      // Skip instructions in the same IT block but with a different predicate
+      if (Pred != Pred2) {
+        continue;
+      }
+
+      // A return in the same IT block with the same predicate can reset live
+      // registers to the callee-saved registers
+      if (I->isReturn()) {
+        UsedRegs.init(*TRI);
+        for (auto CSR = MRI.getCalleeSavedRegs(); CSR && *CSR; ++CSR) {
+          UsedRegs.addReg(*CSR);
+        }
+      }
+    }
+
+    UsedRegs.stepBackward(*I);
+  }
+
+  // Now add registers that are neither reserved nor live to a free list
+  const auto LoGPRs = {
+    ARM::R0, ARM::R1, ARM::R2, ARM::R3, ARM::R4, ARM::R5, ARM::R6, ARM::R7,
+  };
+  const auto HiGPRs = {
+    ARM::R8, ARM::R9, ARM::R10, ARM::R11, ARM::R12, ARM::LR,
+  };
+  std::vector<Register> FreeRegs;
+  for (Register Reg : LoGPRs) {
+    if (!MRI.isReserved(Reg) && !UsedRegs.contains(Reg)) {
+      FreeRegs.push_back(Reg);
+    }
+  }
+  if (!Thumb) {
+    for (Register Reg : HiGPRs) {
+      if (!MRI.isReserved(Reg) && !UsedRegs.contains(Reg)) {
+        FreeRegs.push_back(Reg);
+      }
+    }
+  }
+
+  return FreeRegs;
+}
+
+//
+// Method: findFreeRegistersAfter()
+//
+// Description:
+//   This method computes the liveness of ARM core registers after a given
+//   instruction MI and returns a list of free core registers that can be
+//   used for instrumentation purposes.
+//
+// Inputs:
+//   MI    - A reference to the instruction after which to find free
+//           registers.
+//   Thumb - Whether we are looking for Thumb registers (low registers, i,e,,
+//           R0 -- R7) or ARM registers (both low and high registers, i.e.,
+//           R0 -- R12 and LR).
+//
+// Return value:
+//   A vector of free registers (might be empty, if none is found).
+//
+std::vector<Register>
+ARMRandezvousInstrumentor::findFreeRegistersAfter(const MachineInstr & MI,
+                                                  bool Thumb) {
+  assert(!MI.isMetaInstruction() && "Cannot instrument meta instruction!");
+
+  unsigned distance;
+  const MachineInstr * IT = findIT(MI, distance);
+
+  Register PredReg;
+  ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg);
+
+  const MachineFunction & MF = *MI.getMF();
+  const MachineBasicBlock & MBB = *MI.getParent();
+  const MachineRegisterInfo & MRI = MF.getRegInfo();
+  const TargetRegisterInfo * TRI = MF.getSubtarget().getRegisterInfo();
+  LivePhysRegs UsedRegs(*TRI);
+
+  // First add live-out registers of MBB; these registers are considered live
+  // at the end of MBB
+  UsedRegs.addLiveOuts(MBB);
+
+  // If there is a return, add registers used by the return as well; here the
+  // rationale is that, if MI is the return, MI will not be stepped over and
+  // therefore the (potentially live) registers used in MI would not be counted
+  MachineBasicBlock::const_iterator Terminator = MBB.getLastNonDebugInstr();
+  if (Terminator != MBB.end() && Terminator->isReturn()) {
+    UsedRegs.addUses(*Terminator);
+  }
+
+  // Then move backward step by step to compute live registers after MI
+  MachineBasicBlock::const_iterator MBBI(MI);
+  MachineBasicBlock::const_iterator I = MBB.end();
+  while (I != MBBI) {
+    unsigned distance2;
+    const MachineInstr * IT2 = findIT(*--I, distance2);
+    Register PredReg2;
+    ARMCC::CondCodes Pred2 = getInstrPredicate(*I, PredReg2);
+
+    if (IT2 != nullptr && IT == IT2) {
+      // Skip instructions in the same IT block but with a different predicate
+      if (Pred != Pred2) {
+        continue;
+      }
+
+      // A return in the same IT block with the same predicate can reset live
+      // registers to the callee-saved registers
+      if (I->isReturn()) {
+        UsedRegs.init(*TRI);
+        for (auto CSR = MRI.getCalleeSavedRegs(); CSR && *CSR; ++CSR) {
+          UsedRegs.addReg(*CSR);
+        }
+
+        // Add registers used by the return; if MI is the return, MI will not
+        // be stepped over and therefore the (potentially live) registers used
+        // in MI would not be counted
+        UsedRegs.addUses(*I);
+      }
+    }
+
+    if (I != MBBI) {
+      UsedRegs.stepBackward(*I);
+    }
+  }
+
+  // Now add registers that are neither reserved nor live to a free list
+  const auto LoGPRs = {
+    ARM::R0, ARM::R1, ARM::R2, ARM::R3, ARM::R4, ARM::R5, ARM::R6, ARM::R7,
+  };
+  const auto HiGPRs = {
+    ARM::R8, ARM::R9, ARM::R10, ARM::R11, ARM::R12, ARM::LR,
+  };
+  std::vector<Register> FreeRegs;
+  for (Register Reg : LoGPRs) {
+    if (!MRI.isReserved(Reg) && !UsedRegs.contains(Reg)) {
+      FreeRegs.push_back(Reg);
+    }
+  }
+  if (!Thumb) {
+    for (Register Reg : HiGPRs) {
+      if (!MRI.isReserved(Reg) && !UsedRegs.contains(Reg)) {
+        FreeRegs.push_back(Reg);
+      }
+    }
+  }
+
+  return FreeRegs;
+}
\ No newline at end of file
diff --git a/llvm/lib/Target/ARM/ARMRandezvousInstrumentor.h b/llvm/lib/Target/ARM/ARMRandezvousInstrumentor.h
new file mode 100644
index 00000000000000..c84c430e7e3149
--- /dev/null
+++ b/llvm/lib/Target/ARM/ARMRandezvousInstrumentor.h
@@ -0,0 +1,178 @@
+//===- ARMRandezvousInstrumentor.h - A helper class for instrumentation ---===//
+//
+// Copyright (c) 2021-2022, University of Rochester
+//
+// Part of the Randezvous Project, under the Apache License v2.0 with
+// LLVM Exceptions.  See LICENSE.txt in the llvm directory for license
+// information.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the interfaces of a class that can help passes of its
+// subclass easily instrument ARM machine IR without concerns of breaking IT
+// blocks.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef ARM_RANDEZVOUS_INSTRUMENTOR
+#define ARM_RANDEZVOUS_INSTRUMENTOR
+
+#include "ARMBaseInstrInfo.h"
+#include <deque>
+namespace llvm {
+  //====================================================================
+  // Static inline functions.
+  //====================================================================
+
+  static inline size_t getBasicBlockCodeSize(const MachineBasicBlock & MBB) {
+    const MachineFunction & MF = *MBB.getParent();
+    const TargetInstrInfo * TII = MF.getSubtarget().getInstrInfo();
+
+    size_t CodeSize = 0ul;
+    for (const MachineInstr & MI : MBB) {
+      CodeSize += TII->getInstSizeInBytes(MI);
+    }
+
+    return CodeSize;
+  }
+
+  //
+  // Function: getFunctionCodeSize()
+  //
+  // Description:
+  //   This function computes the code size of a machine function.
+  //
+  // Input:
+  //   MF - A reference to the target machine function.
+  //
+  // Return value:
+  //   The size (in bytes) of the machine function.
+  //
+  static inline size_t getFunctionCodeSize(const MachineFunction & MF) {
+    size_t CodeSize = 0ul;
+    for (const MachineBasicBlock & MBB : MF) {
+      CodeSize += getBasicBlockCodeSize(MBB);
+    }
+
+    return CodeSize;
+  }
+
+  //
+  // Function: containsFunctionPointerType()
+  //
+  // Description:
+  //   This function examines a Type to see whether it can explicitly contain one
+  //   or more function pointers.  Note that this function recurses on aggregate
+  //   types.
+  //
+  // Input:
+  //   Ty - A pointer to a Type to examine.
+  //
+  // Return value:
+  //   true  - The Type can contain one or more function pointers.
+  //   false - The Type does not contain a function pointer.
+  //
+//   static inline bool
+//   containsFunctionPointerType(Type * Ty) {
+//     // Pointer
+//     if (PointerType * PtrTy = dyn_cast<PointerType>(Ty)) {
+//       return PtrTy->getParamElementType()->isFunctionTy();
+//     }
+
+//     // Array
+//     if (ArrayType * ArrayTy = dyn_cast<ArrayType>(Ty)) {
+//       return containsFunctionPointerType(ArrayTy->getElementType());
+//     }
+
+//     // Struct
+//     if (StructType * StructTy = dyn_cast<StructType>(Ty)) {
+//       for (Type * ElementTy : StructTy->elements()) {
+//         if (containsFunctionPointerType(ElementTy)) {
+//           return true;
+//         }
+//       }
+//     }
+
+//     // Other types do not contain function pointers
+//     return false;
+//   }
+
+  //
+  // Function: createNonZeroInitializerFor()
+  //
+  // Description:
+  //   This function creates a non-zero Constant initializer for a give Type,
+  //   which is supposed to contain one or more function pointers.  Note that
+  //   this function recurses on aggregate types.
+  //
+  // Input:
+  //   Ty - A pointer to a Type for which to create an initializer.
+  //
+  // Return value:
+  //   A pointer to a created Constant.
+  //
+  static inline Constant *
+  createNonZeroInitializerFor(Type * Ty) {
+    // Pointer: this is where we insert non-zero values
+    if (PointerType * PtrTy = dyn_cast<PointerType>(Ty)) {
+      return ConstantExpr::getIntToPtr(
+        ConstantInt::get(Type::getInt32Ty(Ty->getContext()), 1), Ty
+      );
+    }
+
+    // Array
+    if (ArrayType * ArrayTy = dyn_cast<ArrayType>(Ty)) {
+      std::vector<Constant *> InitArray;
+      for (uint64_t i = 0; i < ArrayTy->getNumElements(); ++i) {
+        InitArray.push_back(createNonZeroInitializerFor(ArrayTy->getElementType()));
+      }
+      return ConstantArray::get(ArrayTy, InitArray);
+    }
+
+    // Struct
+    if (StructType * StructTy = dyn_cast<StructType>(Ty)) {
+      std::vector<Constant *> InitArray;
+      for (unsigned i = 0; i < StructTy->getNumElements(); ++i) {
+        InitArray.push_back(createNonZeroInitializerFor(StructTy->getElementType(i)));
+      }
+      return ConstantStruct::get(StructTy, InitArray);
+    }
+
+    // Zeroing out other types are fine
+    return Constant::getNullValue(Ty);
+  }
+
+  //====================================================================
+  // Class ARMRandezvousInstrumentor.
+  //====================================================================
+
+  struct ARMRandezvousInstrumentor {
+    void insertInstBefore(MachineInstr & MI, MachineInstr * Inst);
+
+    void insertInstAfter(MachineInstr & MI, MachineInstr * Inst);
+
+    void insertInstsBefore(MachineInstr & MI, ArrayRef<MachineInstr *> Insts);
+
+    void insertInstsAfter(MachineInstr & MI, ArrayRef<MachineInstr *> Insts);
+
+    void removeInst(MachineInstr & MI);
+
+    MachineBasicBlock * splitBasicBlockBefore(MachineInstr & MI);
+
+    MachineBasicBlock * splitBasicBlockAfter(MachineInstr & MI);
+
+    std::vector<Register> findFreeRegistersBefore(const MachineInstr & MI,
+                                                  bool Thumb = false);
+    std::vector<Register> findFreeRegistersAfter(const MachineInstr & MI,
+                                                 bool Thumb = false);
+
+  private:
+    unsigned getITBlockSize(const MachineInstr & IT);
+    MachineInstr * findIT(MachineInstr & MI, unsigned & distance);
+    const MachineInstr * findIT(const MachineInstr & MI, unsigned & distance);
+    std::deque<bool> decodeITMask(unsigned Mask);
+    unsigned encodeITMask(std::deque<bool> DQMask);
+  };
+}
+
+#endif
\ No newline at end of file
diff --git a/llvm/lib/Target/ARM/ARMRandezvousOptions.cpp b/llvm/lib/Target/ARM/ARMRandezvousOptions.cpp
new file mode 100644
index 00000000000000..58dbfca265ad4a
--- /dev/null
+++ b/llvm/lib/Target/ARM/ARMRandezvousOptions.cpp
@@ -0,0 +1,210 @@
+//===- ARMRandezvousOptions.cpp - ARM Randezvous Command Line Options -----===//
+//
+// Copyright (c) 2021-2022, University of Rochester
+//
+// Part of the Randezvous Project, under the Apache License v2.0 with
+// LLVM Exceptions.  See LICENSE.txt in the llvm directory for license
+// information.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the command line options for ARM Randezvous passes.
+//
+//===----------------------------------------------------------------------===//
+
+#include "ARMRandezvousOptions.h"
+#include "llvm/Support/CommandLine.h"
+
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+// Randezvous pass enablers
+//===----------------------------------------------------------------------===//
+
+bool EnableRandezvousCLR;
+static cl::opt<bool, true>
+CLR("arm-randezvous-clr",
+    cl::Hidden,
+    cl::desc("Enable ARM Randezvous Code Layout Randomization"),
+    cl::location(EnableRandezvousCLR),
+    cl::init(false));
+
+bool EnableRandezvousBBLR;
+static cl::opt<bool, true>
+BBLR("arm-randezvous-bblr",
+     cl::Hidden,
+     cl::desc("Enable Basic Block Layout Randomization for ARM Randezvous CLR"),
+     cl::location(EnableRandezvousBBLR),
+     cl::init(false));
+
+bool EnableRandezvousBBCLR;
+static cl::opt<bool, true>
+BBCLR("arm-randezvous-bbclr",
+      cl::Hidden,
+      cl::desc("Enable Basic Block Cluster Layout Randomization for ARM Randezvous CLR"),
+      cl::location(EnableRandezvousBBCLR),
+      cl::init(false));
+
+bool EnableRandezvousPicoXOM;
+static cl::opt<bool, true>
+PicoXOM("arm-randezvous-picoxom",
+        cl::Hidden,
+        cl::desc("Enable ARM Randezvous Execute-Only Memory"),
+        cl::location(EnableRandezvousPicoXOM),
+        cl::init(false));
+
+bool EnableRandezvousGDLR;
+static cl::opt<bool, true>
+GDLR("arm-randezvous-gdlr",
+     cl::Hidden,
+     cl::desc("Enable ARM Randezvous Global Data Layout Randomization"),
+     cl::location(EnableRandezvousGDLR),
+     cl::init(false));
+
+bool EnableRandezvousDecoyPointers;
+static cl::opt<bool, true>
+DecoyPointers("arm-randezvous-dp",
+              cl::Hidden,
+              cl::desc("Enable ARM Randezvous Decoy Pointers"),
+              cl::location(EnableRandezvousDecoyPointers),
+              cl::init(false));
+
+bool EnableRandezvousGlobalGuard;
+static cl::opt<bool, true>
+GlobalGuard("arm-randezvous-global-guard",
+            cl::Hidden,
+            cl::desc("Enable ARM Randezvou Global Guard"),
+            cl::location(EnableRandezvousGlobalGuard),
+            cl::init(false));
+
+bool EnableRandezvousShadowStack;
+static cl::opt<bool, true>
+ShadowStack("arm-randezvous-shadow-stack",
+            cl::Hidden,
+            cl::desc("Enable ARM Randezvous Shadow Stack"),
+            cl::location(EnableRandezvousShadowStack),
+            cl::init(false));
+
+bool EnableRandezvousRAN;
+static cl::opt<bool, true>
+RAN("arm-randezvous-ran",
+    cl::Hidden,
+    cl::desc("Enable ARM Randezvous Return Address Nullification"),
+    cl::location(EnableRandezvousRAN),
+    cl::init(false));
+
+bool EnableRandezvousLGPromote;
+static cl::opt<bool, true>
+LGPromote("arm-randezvous-lgp",
+          cl::Hidden,
+          cl::desc("Enable ARM Randezvous Local-to-Global Promotion"),
+          cl::location(EnableRandezvousLGPromote),
+          cl::init(false));
+
+bool EnableRandezvousICallLimiter;
+static cl::opt<bool, true>
+ICallLimiter("arm-randezvous-icall-limiter",
+             cl::Hidden,
+             cl::desc("Enable ARM Randezvous Indirect Call Limiter"),
+             cl::location(EnableRandezvousICallLimiter),
+             cl::init(false));
+
+//===----------------------------------------------------------------------===//
+// Randezvous pass seeds
+//===----------------------------------------------------------------------===//
+
+uint64_t RandezvousCLRSeed;
+static cl::opt<uint64_t, true>
+CLRSeed("arm-randezvous-clr-seed",
+        cl::Hidden,
+        cl::desc("Seed for the RNG used in ARM Randezvous CLR"),
+        cl::location(RandezvousCLRSeed),
+        cl::init(0));
+
+uint64_t RandezvousGDLRSeed;
+static cl::opt<uint64_t, true>
+GDLRSeed("arm-randezvous-gdlr-seed",
+         cl::Hidden,
+         cl::desc("Seed for the RNG used in ARM Randezvous GDLR"),
+         cl::location(RandezvousGDLRSeed),
+         cl::init(0));
+
+uint64_t RandezvousShadowStackSeed;
+static cl::opt<uint64_t, true>
+ShadowStackSeed("arm-randezvous-shadow-stack-seed",
+                cl::Hidden,
+                cl::desc("Seed for the RNG used in ARM Randezvous Shadow Stack"),
+                cl::location(RandezvousShadowStackSeed),
+                cl::init(0));
+
+//===----------------------------------------------------------------------===//
+// Size options used by Randezvous passes
+//===----------------------------------------------------------------------===//
+
+size_t RandezvousMaxTextSize;
+static cl::opt<size_t, true>
+MaxTextSize("arm-randezvous-max-text-size",
+            cl::Hidden,
+            cl::desc("Maximum text section size in bytes"),
+            cl::location(RandezvousMaxTextSize),
+            cl::init(0x1e0000));   // 2 MB - 128 KB
+
+size_t RandezvousMaxRodataSize;
+static cl::opt<size_t, true>
+MaxRodataSize("arm-randezvous-max-rodata-size",
+              cl::Hidden,
+              cl::desc("Maximum rodata section size in bytes"),
+              cl::location(RandezvousMaxRodataSize),
+              cl::init(0x10000));  // 64 KB
+
+size_t RandezvousMaxDataSize;
+static cl::opt<size_t, true>
+MaxDataSize("arm-randezvous-max-data-size",
+            cl::Hidden,
+            cl::desc("Maximum data section size in bytes"),
+            cl::location(RandezvousMaxDataSize),
+            cl::init(0x10000));    // 64 KB
+
+size_t RandezvousMaxBssSize;
+static cl::opt<size_t, true>
+MaxBssSize("arm-randezvous-max-bss-size",
+           cl::Hidden,
+           cl::desc("Maximum bss section size in bytes"),
+           cl::location(RandezvousMaxBssSize),
+           cl::init(0x10000));     // 64 KB
+
+size_t RandezvousShadowStackSize;
+static cl::opt<size_t, true>
+ShadowStackSize("arm-randezvous-shadow-stack-size",
+                cl::Hidden,
+                cl::desc("ARM Randezvous Shadow Stack size in bytes"),
+                cl::location(RandezvousShadowStackSize),
+                cl::init(0x8000)); // 32 KB
+
+//===----------------------------------------------------------------------===//
+// Miscellaneous options used by Randezvous passes
+//===----------------------------------------------------------------------===//
+
+unsigned RandezvousShadowStackStrideLength;
+static cl::opt<unsigned, true>
+ShadowStackStrideLength("arm-randezvous-shadow-stack-stride-length",
+                        cl::Hidden,
+                        cl::desc("Number of bits for ARM Randezvous Shadow Stack stride"),
+                        cl::location(RandezvousShadowStackStrideLength),
+                        cl::init(8));
+
+unsigned RandezvousNumGlobalGuardCandidates;
+static cl::opt<unsigned, true>
+NumGlobalGuardCandidates("arm-randezvous-num-global-guard-candidates",
+                         cl::Hidden,
+                         cl::desc("Number of global guard candidates to generate"),
+                         cl::location(RandezvousNumGlobalGuardCandidates),
+                         cl::init(64));
+
+uintptr_t RandezvousRNGAddress;
+static cl::opt<uintptr_t, true>
+RNGAddress("arm-randezvous-rng-addr",
+           cl::Hidden,
+           cl::desc("Address of a dynamic RNG"),
+           cl::location(RandezvousRNGAddress),
+           cl::init(0));
\ No newline at end of file
diff --git a/llvm/lib/Target/ARM/ARMRandezvousOptions.h b/llvm/lib/Target/ARM/ARMRandezvousOptions.h
new file mode 100644
index 00000000000000..3757fd09e07f42
--- /dev/null
+++ b/llvm/lib/Target/ARM/ARMRandezvousOptions.h
@@ -0,0 +1,63 @@
+//===- ARMRandezvousOptions.h - ARM Randezvous Command Line Options -------===//
+//
+// Copyright (c) 2021-2022, University of Rochester
+//
+// Part of the Randezvous Project, under the Apache License v2.0 with
+// LLVM Exceptions.  See LICENSE.txt in the llvm directory for license
+// information.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the command line options for ARM Randezvous passes.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef ARM_RANDEZVOUS_OPTIONS
+#define ARM_RANDEZVOUS_OPTIONS
+
+#include <cstddef>
+#include <cstdint>
+
+//===----------------------------------------------------------------------===//
+// Randezvous pass enablers
+//===----------------------------------------------------------------------===//
+
+extern bool EnableRandezvousCLR;
+extern bool EnableRandezvousBBLR;
+extern bool EnableRandezvousBBCLR;
+extern bool EnableRandezvousPicoXOM;
+extern bool EnableRandezvousGDLR;
+extern bool EnableRandezvousDecoyPointers;
+extern bool EnableRandezvousGlobalGuard;
+extern bool EnableRandezvousShadowStack;
+extern bool EnableRandezvousRAN;
+extern bool EnableRandezvousLGPromote;
+extern bool EnableRandezvousICallLimiter;
+
+//===----------------------------------------------------------------------===//
+// Randezvous pass seeds
+//===----------------------------------------------------------------------===//
+
+extern uint64_t RandezvousCLRSeed;
+extern uint64_t RandezvousGDLRSeed;
+extern uint64_t RandezvousShadowStackSeed;
+
+//===----------------------------------------------------------------------===//
+// Size options used by Randezvous passes
+//===----------------------------------------------------------------------===//
+
+extern size_t RandezvousMaxTextSize;
+extern size_t RandezvousMaxRodataSize;
+extern size_t RandezvousMaxDataSize;
+extern size_t RandezvousMaxBssSize;
+extern size_t RandezvousShadowStackSize;
+
+//===----------------------------------------------------------------------===//
+// Miscellaneous options used by Randezvous passes
+//===----------------------------------------------------------------------===//
+
+extern unsigned RandezvousShadowStackStrideLength;
+extern unsigned RandezvousNumGlobalGuardCandidates;
+extern uintptr_t RandezvousRNGAddress;
+
+#endif
\ No newline at end of file
diff --git a/llvm/lib/Target/ARM/ARMRandezvousShadowStack.cpp b/llvm/lib/Target/ARM/ARMRandezvousShadowStack.cpp
new file mode 100644
index 00000000000000..e0593fabe65e4d
--- /dev/null
+++ b/llvm/lib/Target/ARM/ARMRandezvousShadowStack.cpp
@@ -0,0 +1,861 @@
+//===- ARMRandezvousShadowStack.cpp - ARM Randezvous Shadow Stack ---------===//
+//
+// Copyright (c) 2021-2022, University of Rochester
+//
+// Part of the Randezvous Project, under the Apache License v2.0 with
+// LLVM Exceptions.  See LICENSE.txt in the llvm directory for license
+// information.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the implementation of a pass that instruments ARM machine
+// code to save/load the return address to/from a randomized compact shadow
+// stack.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "arm-randezvous-shadow-stack"
+
+#include "ARMRandezvousCLR.h"
+#include "ARMRandezvousOptions.h"
+#include "ARMRandezvousShadowStack.h"
+#include "MCTargetDesc/ARMAddressingModes.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/Support/RandomNumberGenerator.h"
+#include "llvm/Transforms/Utils/ModuleUtils.h"
+
+using namespace llvm;
+
+STATISTIC(NumPrologues, "Number of prologues transformed to use shadow stack");
+STATISTIC(NumEpilogues, "Number of epilogues transformed to use shadow stack");
+STATISTIC(NumNullified, "Number of return addresses nullified");
+
+char ARMRandezvousShadowStack::ID = 0;
+
+ARMRandezvousShadowStack::ARMRandezvousShadowStack() : ModulePass(ID) {
+}
+
+StringRef
+ARMRandezvousShadowStack::getPassName() const {
+  return "ARM Randezvous Shadow Stack Pass";
+}
+
+void
+ARMRandezvousShadowStack::getAnalysisUsage(AnalysisUsage & AU) const {
+  // We need this to access MachineFunctions
+  AU.addRequired<MachineModuleInfoWrapperPass>();
+
+  AU.setPreservesCFG();
+  ModulePass::getAnalysisUsage(AU);
+}
+
+void
+ARMRandezvousShadowStack::releaseMemory() {
+  TrapBlocks.clear();
+}
+
+//
+// Method: createShadowStack()
+//
+// Description:
+//   This method creates a GlobalVariable as the shadow stack.  The shadow
+//   stack is initialized either as zeroed memory or with addresses of randomly
+//   picked trap blocks.
+//
+// Input:
+//   M - A reference to the Module in which to create the shadow stack.
+//
+// Return value:
+//   A pointer to the created GlobalVariable.
+//
+GlobalVariable *
+ARMRandezvousShadowStack::createShadowStack(Module & M) {
+  // Create types for the shadow stack
+  uint64_t PtrSize = M.getDataLayout().getPointerSize();
+  LLVMContext & Ctx = M.getContext();
+  PointerType * RetAddrTy = PointerType::getUnqual(Type::getInt8Ty(Ctx));
+  ArrayType * SSTy = ArrayType::get(RetAddrTy,
+                                    RandezvousShadowStackSize / PtrSize);
+
+  // Create the shadow stack
+  Constant * CSS = M.getOrInsertGlobal(ShadowStackName, SSTy);
+  GlobalVariable * SS = dyn_cast<GlobalVariable>(CSS);
+  assert(SS != nullptr && "Shadow stack has wrong type!");
+  SS->setLinkage(GlobalVariable::LinkOnceAnyLinkage);
+
+  // Initialize the shadow stack if not initialized
+  if (!SS->hasInitializer()) {
+    Constant * SSInit = nullptr;
+    if (EnableRandezvousDecoyPointers) {
+      // Initialize the shadow stack with an array of random values; they are
+      // either random trap block addresses or purely random values with the
+      // LSB set
+      std::vector<Constant *> SSInitArray;
+      for (unsigned i = 0; i < SSTy->getNumElements(); ++i) {
+        if (!TrapBlocks.empty()) {
+          uint64_t Idx = (*RNG)() % TrapBlocks.size();
+          const BasicBlock * BB = TrapBlocks[Idx]->getBasicBlock();
+          SSInitArray.push_back(BlockAddress::get(const_cast<BasicBlock *>(BB)));
+        } else {
+          APInt A(8 * PtrSize, (*RNG)() | 0x1);
+          SSInitArray.push_back(Constant::getIntegerValue(RetAddrTy, A));
+        }
+      }
+      SSInit = ConstantArray::get(SSTy, SSInitArray);
+    } else {
+      // Initialize the shadow stack with zeros
+      SSInit = ConstantArray::getNullValue(SSTy);
+    }
+    SS->setInitializer(SSInit);
+  }
+
+  // Add the shadow stack to @llvm.used
+  appendToUsed(M, { SS });
+
+  return SS;
+}
+
+//
+// Method: createInitFunction()
+//
+// Description:
+//   This method creates a function (both Function and MachineFunction) that
+//   initializes the reserved registers for the shadow stack.
+//
+// Inputs:
+//   M  - A reference to the Module in which to create the function.
+//   SS - A reference to the shadow stack global variable.
+//
+// Return value:
+//   A pointer to the created Function.
+//
+Function *
+ARMRandezvousShadowStack::createInitFunction(Module & M, GlobalVariable & SS) {
+  // Create types for the init function
+  LLVMContext & Ctx = M.getContext();
+  FunctionType * FuncTy = FunctionType::get(Type::getVoidTy(Ctx), false);
+
+  // Create the init function
+  FunctionCallee FC = M.getOrInsertFunction(InitFuncName, FuncTy);
+  Function * F = dyn_cast<Function>(FC.getCallee());
+  assert(F != nullptr && "Init function has wrong type!");
+  MachineModuleInfo & MMI = getAnalysis<MachineModuleInfoWrapperPass>().getMMI();
+  MachineFunction & MF = MMI.getOrCreateMachineFunction(*F);
+
+  // Set necessary attributes and properties
+  F->setLinkage(GlobalVariable::LinkOnceAnyLinkage);
+  if (!F->hasFnAttribute(Attribute::Naked)) {
+    F->addFnAttr(Attribute::Naked);
+  }
+  if (!F->hasFnAttribute(Attribute::NoUnwind)) {
+    F->addFnAttr(Attribute::NoUnwind);
+  }
+  if (!F->hasFnAttribute(Attribute::WillReturn)) {
+    F->addFnAttr(Attribute::WillReturn);
+  }
+  using Property = MachineFunctionProperties::Property;
+  if (!MF.getProperties().hasProperty(Property::NoVRegs)) {
+    MF.getProperties().set(Property::NoVRegs);
+  }
+
+  // Create a basic block if not created
+  if (F->empty()) {
+    assert(MF.empty() && "Machine IR basic block already there!");
+
+    // Build an IR basic block
+    BasicBlock * BB = BasicBlock::Create(Ctx, "", F);
+    IRBuilder<> IRB(BB);
+    IRB.CreateRetVoid(); // At this point, what the IR basic block contains
+                         // doesn't matter so just place a return there
+
+    // Build machine IR basic block(s)
+    const TargetInstrInfo * TII = MF.getSubtarget().getInstrInfo();
+    MachineBasicBlock * MBB = MF.CreateMachineBasicBlock(BB);
+    MachineBasicBlock * MBB2 = nullptr;
+    MachineBasicBlock * MBB3 = nullptr;
+    MachineBasicBlock * RetMBB = MBB;
+    MF.push_back(MBB);
+    // MOVi16 SSPtrReg, @SS_lo
+    BuildMI(MBB, DebugLoc(), TII->get(ARM::t2MOVi16), ShadowStackPtrReg)
+    .addGlobalAddress(&SS, 0, ARMII::MO_LO16)
+    .add(predOps(ARMCC::AL));
+    // MOVTi16 SSPtrReg, @SS_hi
+    BuildMI(MBB, DebugLoc(), TII->get(ARM::t2MOVTi16), ShadowStackPtrReg)
+    .addReg(ShadowStackPtrReg)
+    .addGlobalAddress(&SS, 0, ARMII::MO_HI16)
+    .add(predOps(ARMCC::AL));
+    if (RandezvousRNGAddress != 0) {
+      // User provided an RNG address, so load a random stride from the RNG
+      if (ARM_AM::getT2SOImmVal(RandezvousRNGAddress) != -1) {
+        // Use MOVi if the address can be encoded in Thumb modified constant
+        BuildMI(MBB, DebugLoc(), TII->get(ARM::t2MOVi), ARM::R0)
+        .addImm(RandezvousRNGAddress)
+        .add(predOps(ARMCC::AL))
+        .add(condCodeOp()); // No 'S' bit
+      } else {
+        // Otherwise use MOVi16/MOVTi16 to encode lower/upper 16 bits of the
+        // address
+        BuildMI(MBB, DebugLoc(), TII->get(ARM::t2MOVi16), ARM::R0)
+        .addImm(RandezvousRNGAddress & 0xffff)
+        .add(predOps(ARMCC::AL));
+        BuildMI(MBB, DebugLoc(), TII->get(ARM::t2MOVTi16), ARM::R0)
+        .addReg(ARM::R0)
+        .addImm((RandezvousRNGAddress >> 16) & 0xffff)
+        .add(predOps(ARMCC::AL));
+      }
+
+      MBB2 = MF.CreateMachineBasicBlock(BB);
+      MF.push_back(MBB2);
+      MBB->addSuccessor(MBB2);
+      MBB2->addSuccessor(MBB2);
+      // LDRi12 SSStrideReg, [R0, #0]
+      BuildMI(MBB2, DebugLoc(), TII->get(ARM::t2LDRi12), ShadowStackStrideReg)
+      .addReg(ARM::R0)
+      .addImm(0)
+      .add(predOps(ARMCC::AL));
+      // CMPi8 SSStrideReg, #0
+      BuildMI(MBB2, DebugLoc(), TII->get(ARM::t2CMPri))
+      .addReg(ShadowStackStrideReg)
+      .addImm(0)
+      .add(predOps(ARMCC::AL));
+      // BEQ MBB2
+      BuildMI(MBB2, DebugLoc(), TII->get(ARM::t2Bcc))
+      .addMBB(MBB2)
+      .addImm(ARMCC::EQ)
+      .addReg(ARM::CPSR, RegState::Kill);
+
+      MBB3 = MF.CreateMachineBasicBlock(BB);
+      MF.push_back(MBB3);
+      MBB2->addSuccessor(MBB3);
+      // BFC SSStrideReg, #(SSStrideLength - 1), #(33 - SSStrideLength)
+      BuildMI(MBB3, DebugLoc(), TII->get(ARM::t2BFC), ShadowStackStrideReg)
+      .addReg(ShadowStackStrideReg)
+      .addImm((1 << (RandezvousShadowStackStrideLength - 1)) - 1)
+      .add(predOps(ARMCC::AL));
+      // BFC SSStrideReg, #0, #2
+      BuildMI(MBB3, DebugLoc(), TII->get(ARM::t2BFC), ShadowStackStrideReg)
+      .addReg(ShadowStackStrideReg)
+      .addImm(~0x3)
+      .add(predOps(ARMCC::AL));
+      RetMBB = MBB3;
+    } else {
+      // Generate a static random stride
+      uint64_t Stride = (*RNG)();
+      Stride &= (1ul << (RandezvousShadowStackStrideLength - 1)) - 1;
+      Stride &= ~0x3ul;
+      if (ARM_AM::getT2SOImmVal(Stride) != -1) {
+        // Use MOVi if the stride can be encoded in Thumb modified constant
+        BuildMI(MBB, DebugLoc(), TII->get(ARM::t2MOVi), ShadowStackStrideReg)
+        .addImm(Stride)
+        .add(predOps(ARMCC::AL))
+        .add(condCodeOp()); // No 'S' bit
+      } else {
+        // Otherwise use MOVi16/MOVTi16 to encode lower/upper 16 bits of the
+        // stride
+        BuildMI(MBB, DebugLoc(), TII->get(ARM::t2MOVi16), ShadowStackStrideReg)
+        .addImm(Stride & 0xffff)
+        .add(predOps(ARMCC::AL));
+        BuildMI(MBB, DebugLoc(), TII->get(ARM::t2MOVTi16), ShadowStackStrideReg)
+        .addReg(ShadowStackStrideReg)
+        .addImm((Stride >> 16) & 0xffff)
+        .add(predOps(ARMCC::AL));
+      }
+    }
+    // BX_RET
+    BuildMI(RetMBB, DebugLoc(), TII->get(ARM::tBX_RET))
+    .add(predOps(ARMCC::AL));
+  }
+
+  // Add the init function to @llvm.used
+  appendToUsed(M, { F });
+
+  return F;
+}
+
+//
+// Method: pushToShadowStack()
+//
+// Description:
+//   This method modifies a PUSH instruction to not save LR to the stack and
+//   inserts new instructions that save LR to the shadow stack.
+//
+// Inputs:
+//   MI     - A reference to a PUSH instruction that saves LR to the stack.
+//   LR     - A reference to the LR operand of the PUSH.
+//   Stride - A static stride to use.
+//
+// Return value:
+//   true - The machine code was modified.
+//
+bool
+ARMRandezvousShadowStack::pushToShadowStack(MachineInstr & MI,
+                                            MachineOperand & LR,
+                                            uint32_t Stride) {
+  MachineFunction & MF = *MI.getMF();
+  const TargetInstrInfo * TII = MF.getSubtarget().getInstrInfo();
+  const DebugLoc & DL = MI.getDebugLoc();
+
+  Register PredReg;
+  ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg);
+
+  // Build the following instruction sequence
+  //
+  // STR_POST LR, [SSPtrReg], #Stride
+  // ADDrr    SSPtrReg, SSPtrReg, SSStrideReg
+  std::vector<MachineInstr *> NewInsts;
+  NewInsts.push_back(BuildMI(MF, DL, TII->get(ARM::t2STR_POST), ShadowStackPtrReg)
+                     .addReg(ARM::LR)
+                     .addReg(ShadowStackPtrReg)
+                     .addImm(Stride)
+                     .add(predOps(Pred, PredReg)));
+  NewInsts.push_back(BuildMI(MF, DL, TII->get(ARM::t2ADDrr), ShadowStackPtrReg)
+                     .addReg(ShadowStackPtrReg)
+                     .addReg(ShadowStackStrideReg)
+                     .add(predOps(Pred, PredReg))
+                     .add(condCodeOp()));
+
+  // Now insert these new instructions into the basic block
+  insertInstsBefore(MI, NewInsts);
+
+  // At last, replace the old PUSH with a new one that doesn't push LR to the
+  // stack
+  switch (MI.getOpcode()) {
+  case ARM::t2STMDB_UPD:
+    // STMDB_UPD should store at least two registers; if it happens to be two,
+    // we replace it with a STR_PRE
+    assert(MI.getNumExplicitOperands() >= 6 && "Buggy STMDB_UPD!");
+    if (MI.getNumExplicitOperands() > 6) {
+      MI.removeOperand(MI.getOperandNo(&LR));
+    } else {
+      unsigned Idx = MI.getOperandNo(&LR);
+      Idx = Idx == 4 ? 5 : 4;
+      insertInstBefore(MI, BuildMI(MF, DL, TII->get(ARM::t2STR_PRE), ARM::SP)
+                           .add(MI.getOperand(Idx))
+                           .addReg(ARM::SP)
+                           .addImm(-4)
+                           .add(predOps(Pred, PredReg))
+                           .setMIFlags(MI.getFlags()));
+      removeInst(MI);
+    }
+    break;
+
+  case ARM::tPUSH:
+    // PUSH should store at least one register; if it happens to be one, we
+    // just remove it
+    assert(MI.getNumExplicitOperands() >= 3 && "Buggy PUSH!");
+    if (MI.getNumExplicitOperands() > 3) {
+      MI.removeOperand(MI.getOperandNo(&LR));
+    } else {
+      removeInst(MI);
+    }
+    break;
+
+  // ARM::t2STR_PRE
+  default:
+    // STR_PRE only stores one register, so we just remove it
+    removeInst(MI);
+    break;
+  }
+
+  ++NumPrologues;
+  return true;
+}
+
+//
+// Method: popFromShadowStack()
+//
+// Description:
+//   This method modifies a POP instruction to not write to PC/LR and inserts
+//   new instructions that load the return address from the shadow stack into
+//   PC/LR.
+//
+// Inputs:
+//   MI     - A reference to a POP instruction that writes to LR or PC.
+//   PCLR   - A reference to the PC or LR operand of the POP.
+//   Stride - A static stride to use.
+//
+// Return value:
+//   true - The machine code was modified.
+//
+bool
+ARMRandezvousShadowStack::popFromShadowStack(MachineInstr & MI,
+                                             MachineOperand & PCLR,
+                                             uint32_t Stride) {
+  MachineFunction & MF = *MI.getMF();
+  const TargetInstrInfo * TII = MF.getSubtarget().getInstrInfo();
+  const DebugLoc & DL = MI.getDebugLoc();
+
+  Register PredReg;
+  ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg);
+
+  // Build the following instruction sequence
+  //
+  // SUBrr    SSPtrReg, SSPtrReg, SSStrideReg
+  // LDR_PRE  PC/LR, [SSPtrReg, #-Stride]!
+  std::vector<MachineInstr *> NewInsts;
+  NewInsts.push_back(BuildMI(MF, DL, TII->get(ARM::t2SUBrr), ShadowStackPtrReg)
+                     .addReg(ShadowStackPtrReg)
+                     .addReg(ShadowStackStrideReg)
+                     .add(predOps(Pred, PredReg))
+                     .add(condCodeOp()));
+  NewInsts.push_back(BuildMI(MF, DL, TII->get(PCLR.getReg() == ARM::PC ?
+                                              ARM::t2LDR_PRE_RET :
+                                              ARM::t2LDR_PRE),
+                             PCLR.getReg())
+                     .addReg(ShadowStackPtrReg, RegState::Define)
+                     .addReg(ShadowStackPtrReg)
+                     .addImm(-Stride)
+                     .add(predOps(Pred, PredReg)));
+
+  // Now insert these new instructions into the basic block
+  insertInstsAfter(MI, NewInsts);
+
+  // Replace the old POP with a new one that doesn't write to PC/LR
+  switch (MI.getOpcode()) {
+  case ARM::t2LDMIA_RET:
+    MI.setDesc(TII->get(ARM::t2LDMIA_UPD));
+    NewInsts[1]->copyImplicitOps(MF, MI);
+    for (unsigned i = MI.getNumOperands() - 1, e = MI.getNumExplicitOperands();
+         i >= e; --i) {
+      MI.removeOperand(i);
+    }
+    LLVM_FALLTHROUGH;
+  case ARM::t2LDMIA_UPD:
+    // LDMIA_UPD should load at least two registers; if it happens to be two,
+    // we replace it with a LDR_POST
+    assert(MI.getNumExplicitOperands() >= 6 && "Buggy LDMIA_UPD!");
+    if (MI.getNumExplicitOperands() > 6) {
+      MI.removeOperand(MI.getOperandNo(&PCLR));
+    } else {
+      unsigned Idx = MI.getOperandNo(&PCLR);
+      Idx = Idx == 4 ? 5 : 4;
+      insertInstAfter(MI, BuildMI(MF, DL, TII->get(ARM::t2LDR_POST),
+                                  MI.getOperand(Idx).getReg())
+                          .addReg(ARM::SP, RegState::Define)
+                          .addReg(ARM::SP)
+                          .addImm(4)
+                          .add(predOps(Pred, PredReg))
+                          .setMIFlags(MI.getFlags()));
+      removeInst(MI);
+    }
+    break;
+
+  case ARM::tPOP_RET:
+    MI.setDesc(TII->get(ARM::tPOP));
+    NewInsts[1]->copyImplicitOps(MF, MI);
+    for (unsigned i = MI.getNumOperands() - 1, e = MI.getNumExplicitOperands();
+         i >= e; --i) {
+      MI.removeOperand(i);
+    }
+    LLVM_FALLTHROUGH;
+  case ARM::tPOP:
+    // POP should load at least one register; if it happens to be one, we just
+    // remove it
+    assert(MI.getNumExplicitOperands() >= 3 && "Buggy POP!");
+    if (MI.getNumExplicitOperands() > 3) {
+      MI.removeOperand(MI.getOperandNo(&PCLR));
+    } else {
+      removeInst(MI);
+    }
+    break;
+
+  // ARM::t2LDR_POST
+  default:
+    // LDR_POST only loads one register, so we just remove it
+    removeInst(MI);
+    break;
+  }
+
+  if (EnableRandezvousRAN) {
+    // Nullify the return address in the shadow stack
+    nullifyReturnAddress(*NewInsts[1], NewInsts[1]->getOperand(0));
+  }
+
+  ++NumEpilogues;
+  return true;
+}
+
+//
+// Method: nullifyReturnAddress()
+//
+// Description:
+//   This method nullifies an in-memory return address by either zeroing it out
+//   or filling it with a null value (either the address of a randomly picked
+//   trap block or a purely random value).
+//
+// Inputs:
+//   MI   - A reference to a POP or LDR instruction that writes to LR or PC.
+//   PCLR - A reference to the PC or LR operand of MI.
+//
+// Return value:
+//   true - The machine code was modified.
+//
+bool
+ARMRandezvousShadowStack::nullifyReturnAddress(MachineInstr & MI,
+                                               MachineOperand & PCLR) {
+  MachineFunction & MF = *MI.getMF();
+  const TargetInstrInfo * TII = MF.getSubtarget().getInstrInfo();
+  const DebugLoc & DL = MI.getDebugLoc();
+
+  Register PredReg;
+  ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg);
+
+  // Mark LR as restored since we're going to use LR to hold the return address
+  // in all the cases
+  MachineFrameInfo & MFI = MF.getFrameInfo();
+  if (MFI.isCalleeSavedInfoValid()) {
+    for (CalleeSavedInfo & CSI : MFI.getCalleeSavedInfo()) {
+      if (CSI.getReg() == ARM::LR) {
+        CSI.setRestored(true);
+        break;
+      }
+    }
+  }
+
+  // We need to use a scratch register as the source register of a store.  If
+  // no free register is around, spill and use R4.
+  std::vector<Register> FreeRegs = findFreeRegistersAfter(MI);
+  bool Spill = FreeRegs.empty();
+  Register FreeReg = Spill ? ARM::R4 : FreeRegs[0];
+
+  std::vector<MachineInstr *> NewInsts;
+  switch (MI.getOpcode()) {
+  // LDMIA_RET SP!, {..., PC} -> LDMIA_UPD SP!, {..., LR}
+  //                             MOVi16    FreeReg, #0
+  //                             STRi8     FreeReg, [SP, #-4]
+  //                             BX_RET
+  case ARM::t2LDMIA_RET:
+    assert(PCLR.getReg() == ARM::PC && "Buggy POP!");
+    MI.setDesc(TII->get(ARM::t2LDMIA_UPD));
+    PCLR.setReg(ARM::LR);
+    insertInstAfter(MI, BuildMI(MF, DL, TII->get(ARM::tBX_RET))
+                        .add(predOps(Pred, PredReg)));
+    LLVM_FALLTHROUGH;
+  // LDMIA_UPD SP!, {..., LR} -> LDMIA_UPD SP!, {..., LR}
+  //                             MOVi16    FreeReg, #0
+  //                             STRi8     FreeReg, [SP, #-4]
+  case ARM::t2LDMIA_UPD:
+  // LDR_POST LR, [SP], #4 -> LDR_POST LR, [SP], #4
+  //                          MOVi16   FreeReg, #0
+  //                          STRi8    FreeReg, [SP, #-4]
+  case ARM::t2LDR_POST:
+    assert(PCLR.getReg() == ARM::LR && "Buggy POP!");
+    if (Spill) {
+      NewInsts.push_back(BuildMI(MF, DL, TII->get(ARM::tPUSH))
+                         .add(predOps(Pred, PredReg))
+                         .addReg(FreeReg));
+    }
+    NewInsts.push_back(BuildMI(MF, DL, TII->get(ARM::t2MOVi16), FreeReg)
+                       .addImm(0)
+                       .add(predOps(Pred, PredReg)));
+    NewInsts.push_back(BuildMI(MF, DL, TII->get(ARM::t2STRi8))
+                       .addReg(FreeReg)
+                       .addReg(ARM::SP)
+                       .addImm(-4)
+                       .add(predOps(Pred, PredReg)));
+    if (Spill) {
+      NewInsts.push_back(BuildMI(MF, DL, TII->get(ARM::tPOP))
+                         .add(predOps(Pred, PredReg))
+                         .addReg(FreeReg));
+    }
+    insertInstsAfter(MI, NewInsts);
+    break;
+
+  // POP(_RET) {..., PC} -> LDMIA_UPD SP!, {..., LR}
+  //                        MOVi16    FreeReg, #0
+  //                        STRi8     FreeReg, [SP, #-4]
+  //                        BX_RET
+  case ARM::tPOP:
+  case ARM::tPOP_RET: {
+    assert(PCLR.getReg() == ARM::PC && "Buggy POP!");
+    MachineInstrBuilder MIB = BuildMI(MF, DL, TII->get(ARM::t2LDMIA_UPD), ARM::SP)
+                              .addReg(ARM::SP);
+    for (MachineOperand & MO : MI.explicit_operands()) {
+      if (MO.isReg() && MO.getReg() == ARM::PC) {
+        MIB.addReg(ARM::LR, RegState::Define);
+      } else {
+        MIB.add(MO);
+      }
+    }
+    NewInsts.push_back(MIB);
+    if (Spill) {
+      NewInsts.push_back(BuildMI(MF, DL, TII->get(ARM::tPUSH))
+                         .add(predOps(Pred, PredReg))
+                         .addReg(FreeReg));
+    }
+    NewInsts.push_back(BuildMI(MF, DL, TII->get(ARM::t2MOVi16), FreeReg)
+                       .addImm(0)
+                       .add(predOps(Pred, PredReg)));
+    NewInsts.push_back(BuildMI(MF, DL, TII->get(ARM::t2STRi8))
+                       .addReg(FreeReg)
+                       .addReg(ARM::SP)
+                       .addImm(-4)
+                       .add(predOps(Pred, PredReg)));
+    if (Spill) {
+      NewInsts.push_back(BuildMI(MF, DL, TII->get(ARM::tPOP))
+                         .add(predOps(Pred, PredReg))
+                         .addReg(FreeReg));
+    }
+    NewInsts.push_back(BuildMI(MF, DL, TII->get(ARM::tBX_RET))
+                       .add(predOps(Pred, PredReg)));
+    insertInstsAfter(MI, NewInsts);
+    removeInst(MI);
+    break;
+  }
+
+  // LDR_PRE_RET PC, [SSPtrReg, #imm]! -> LDR_PRE LR, [SSPtrReg, #imm]!
+  //                                      MOVi16  FreeReg, #0
+  //                                      STRi12  FreeReg, [SSPtrReg, #0]
+  //                                      BX_RET
+  //
+  //                                   or LDR_PRE LR, [SSPtrReg, #imm]!
+  //                                      MOVi16  FreeReg, #null-lo16
+  //                                      MOVTi16 FreeReg, #null-hi16
+  //                                      STRi12  FreeReg, [SSPtrReg, #0]
+  //                                      BX_RET
+  case ARM::t2LDR_PRE_RET:
+    assert(PCLR.getReg() == ARM::PC && "Buggy POP!");
+    MI.setDesc(TII->get(ARM::t2LDR_PRE));
+    PCLR.setReg(ARM::LR);
+    insertInstAfter(MI, BuildMI(MF, DL, TII->get(ARM::tBX_RET))
+                        .add(predOps(Pred, PredReg)));
+    LLVM_FALLTHROUGH;
+  // LDR_PRE LR, [SSPtrReg, #imm]! -> LDR_PRE LR, [SSPtrReg, #imm]!
+  //                                  MOVi16  FreeReg, #0
+  //                                  STRi12  FreeReg, [SSPtrReg, #0]
+  //
+  //                               or LDR_PRE LR, [SSPtrReg, #imm]!
+  //                                  MOVi16  FreeReg, #null-lo16
+  //                                  MOVTi16 FreeReg, #null-hi16
+  //                                  STRi12  FreeReg, [SSPtrReg, #0]
+  default:
+    assert(MI.getOpcode() == ARM::t2LDR_PRE && "Unrecognized POP!");
+    assert(MI.getOperand(1).getReg() == ShadowStackPtrReg && "Buggy POP!");
+    assert(PCLR.getReg() == ARM::LR && "Buggy POP!");
+    if (Spill) {
+      NewInsts.push_back(BuildMI(MF, DL, TII->get(ARM::tPUSH))
+                         .add(predOps(Pred, PredReg))
+                         .addReg(FreeReg));
+    }
+    if (EnableRandezvousDecoyPointers) {
+      if (!TrapBlocks.empty()) {
+        // Use the address of a trap block as the null value
+        uint64_t Idx = (*RNG)() % TrapBlocks.size();
+        const BasicBlock * BB = TrapBlocks[Idx]->getBasicBlock();
+        BlockAddress * BA = BlockAddress::get(const_cast<BasicBlock *>(BB));
+        NewInsts.push_back(BuildMI(MF, DL, TII->get(ARM::t2MOVi16), FreeReg)
+                           .addBlockAddress(BA, 0, ARMII::MO_LO16)
+                           .add(predOps(Pred, PredReg)));
+        NewInsts.push_back(BuildMI(MF, DL, TII->get(ARM::t2MOVTi16), FreeReg)
+                           .addReg(FreeReg)
+                           .addBlockAddress(BA, 0, ARMII::MO_HI16)
+                           .add(predOps(Pred, PredReg)));
+      } else {
+        // Use a random value with the LSB set as the null value
+        uint32_t NullValue = (*RNG)() | 0x1;
+        if (ARM_AM::getT2SOImmVal(NullValue) != -1) {
+          NewInsts.push_back(BuildMI(MF, DL, TII->get(ARM::t2MOVi), FreeReg)
+                             .addImm(NullValue)
+                             .add(predOps(Pred, PredReg))
+                             .add(condCodeOp())); // No 'S' bit
+        } else {
+          NewInsts.push_back(BuildMI(MF, DL, TII->get(ARM::t2MOVi16), FreeReg)
+                             .addImm(NullValue & 0xffff)
+                             .add(predOps(Pred, PredReg)));
+          NewInsts.push_back(BuildMI(MF, DL, TII->get(ARM::t2MOVTi16), FreeReg)
+                             .addReg(FreeReg)
+                             .addImm((NullValue >> 16) & 0xffff)
+                             .add(predOps(Pred, PredReg)));
+        }
+      }
+    } else {
+      NewInsts.push_back(BuildMI(MF, DL, TII->get(ARM::t2MOVi16), FreeReg)
+                         .addImm(0)
+                         .add(predOps(Pred, PredReg)));
+    }
+    NewInsts.push_back(BuildMI(MF, DL, TII->get(ARM::t2STRi12))
+                       .addReg(FreeReg)
+                       .addReg(ShadowStackPtrReg)
+                       .addImm(0)
+                       .add(predOps(Pred, PredReg)));
+    if (Spill) {
+      NewInsts.push_back(BuildMI(MF, DL, TII->get(ARM::tPOP))
+                         .add(predOps(Pred, PredReg))
+                         .addReg(FreeReg));
+    }
+    insertInstsAfter(MI, NewInsts);
+    break;
+  }
+
+  ++NumNullified;
+  return true;
+}
+
+//
+// Method: runOnModule()
+//
+// Description:
+//   This method is called when the PassManager wants this pass to transform
+//   the specified Module.  This method
+//
+//   * creates a global variable as the shadow stack,
+//
+//   * creates a function that initializes the reserved registers for the
+//     shadow stack, and
+//
+//   * transforms the Module to utilize the shadow stack for saving/restoring
+//     return addresses and/or to nullify a saved return address on returns.
+//
+// Input:
+//   M - A reference to the Module to transform.
+//
+// Output:
+//   M - The transformed Module.
+//
+// Return value:
+//   true  - The Module was transformed.
+//   false - The Module was not transformed.
+//
+bool
+ARMRandezvousShadowStack::runOnModule(Module & M) {
+  if (!EnableRandezvousShadowStack && !EnableRandezvousRAN) {
+    return false;
+  }
+
+  MachineModuleInfo & MMI = getAnalysis<MachineModuleInfoWrapperPass>().getMMI();
+  Twine RNGName = getPassName() + "-" + Twine(RandezvousShadowStackSeed);
+  RNG = M.createRNG(RNGName.str());
+
+  // Find trap blocks inserted by CLR
+  for (Function & F : M) {
+    MachineFunction * MF = MMI.getMachineFunction(F);
+    if (MF != nullptr) {
+      for (MachineBasicBlock & MBB : *MF) {
+        if (MBB.isRandezvousTrapBlock()) {
+          TrapBlocks.push_back(&MBB);
+        }
+      }
+    }
+  }
+
+  if (EnableRandezvousShadowStack) {
+    assert((RandezvousShadowStackStrideLength > 2 &&
+            RandezvousShadowStackStrideLength <= 32) && "Invalid stride length!");
+
+    // Create and initialize a global variable for the shadow stack
+    GlobalVariable * SS = createShadowStack(M);
+
+    // Create an init function that:
+    // * loads the address of the shadow stack to the shadow stack pointer
+    //   register, and
+    // * generates a random stride (either dynamic or static) to the shadow
+    //   stack stride register
+    createInitFunction(M, *SS);
+  }
+
+  // Instrument pushes and pops in each function
+  bool changed = false;
+  for (Function & F : M) {
+    MachineFunction * MF = MMI.getMachineFunction(F);
+    if (MF == nullptr) {
+      continue;
+    }
+
+    // Find out all pushes that write LR to the stack and all pops that read a
+    // return address from the stack to LR or PC
+    std::vector<std::pair<MachineInstr *, MachineOperand *> > Pushes;
+    std::vector<std::pair<MachineInstr *, MachineOperand *> > Pops;
+    for (MachineBasicBlock & MBB : *MF) {
+      for (MachineInstr & MI : MBB) {
+        switch (MI.getOpcode()) {
+        // Frame-setup instructions in function prologue
+        case ARM::t2STR_PRE:
+        case ARM::t2STMDB_UPD:
+          // STR_PRE and STMDB_UPD are considered as PUSH if they write to SP!
+          if (MI.getOperand(0).getReg() != ARM::SP) {
+            break;
+          }
+          LLVM_FALLTHROUGH;
+        case ARM::tPUSH:
+          if (MI.getFlag(MachineInstr::FrameSetup)) {
+            for (MachineOperand & MO : MI.explicit_operands()) {
+              if (MO.isReg() && MO.getReg() == ARM::LR) {
+                Pushes.push_back(std::make_pair(&MI, &MO));
+                break;
+              }
+            }
+          }
+          break;
+
+        // Frame-destroy instructions in function epilogue
+        case ARM::t2LDR_POST:
+        case ARM::t2LDMIA_UPD:
+        case ARM::t2LDMIA_RET:
+          // LDR_POST and LDMIA_(UPD|RET) are considered as POP if they read
+          // from SP!
+          if (MI.getOperand(1).getReg() != ARM::SP) {
+            break;
+          }
+          LLVM_FALLTHROUGH;
+        case ARM::tPOP:
+        case ARM::tPOP_RET:
+          if (MI.getFlag(MachineInstr::FrameDestroy)) {
+            // Handle 2 cases:
+            // (1) Pop writing to LR
+            // (2) Pop writing to PC
+            for (MachineOperand & MO : MI.explicit_operands()) {
+              if (MO.isReg()) {
+                if (MO.getReg() == ARM::LR || MO.getReg() == ARM::PC) {
+                  Pops.push_back(std::make_pair(&MI, &MO));
+                  break;
+                }
+              }
+            }
+          }
+          break;
+
+        default:
+          break;
+        }
+      }
+    }
+
+    // Instrument each push and pop
+    if (EnableRandezvousShadowStack) {
+      // Generate a per-function static stride
+      uint32_t Stride = (*RNG)();
+      Stride &= (1ul << (RandezvousShadowStackStrideLength - 1)) - 1;
+      Stride &= ~0x3ul;
+      // Limit the static stride to be within 8 bits, so that it can fit in
+      // STR_POST and LDR_PRE as an immediate
+      Stride &= 0xfful;
+      // Don't generate an empty stride; either the dynamic stride or the
+      // static stride needs to make sure of it, so just do it on the static to
+      // leave more room for the dynamic
+      if (Stride == 0u) {
+        Stride = 4u;
+      }
+
+      for (auto & MIMO : Pushes) {
+        changed |= pushToShadowStack(*MIMO.first, *MIMO.second, Stride);
+      }
+      for (auto & MIMO : Pops) {
+        changed |= popFromShadowStack(*MIMO.first, *MIMO.second, Stride);
+      }
+    } else if (EnableRandezvousRAN) {
+      for (auto & MIMO : Pops) {
+        changed |= nullifyReturnAddress(*MIMO.first, *MIMO.second);
+      }
+    }
+  }
+
+  return changed;
+}
+
+ModulePass *
+llvm::createARMRandezvousShadowStack(void) {
+  return new ARMRandezvousShadowStack();
+}
\ No newline at end of file
diff --git a/llvm/lib/Target/ARM/ARMRandezvousShadowStack.h b/llvm/lib/Target/ARM/ARMRandezvousShadowStack.h
new file mode 100644
index 00000000000000..5b9fcd61105b50
--- /dev/null
+++ b/llvm/lib/Target/ARM/ARMRandezvousShadowStack.h
@@ -0,0 +1,40 @@
+#ifndef ARM_RANDEZVOUS_SHADOW_STACK
+#define ARM_RANDEZVOUS_SHADOW_STACK
+
+#include "ARMRandezvousInstrumentor.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/RandomNumberGenerator.h"
+
+namespace llvm {
+  struct ARMRandezvousShadowStack : public ModulePass, ARMRandezvousInstrumentor {
+    // Pass Identifier
+    static char ID;
+
+    static constexpr Register ShadowStackPtrReg = ARM::R8;
+    static constexpr Register ShadowStackStrideReg = ARM::R9;
+    static constexpr StringRef ShadowStackName = "__randezvous_shadow_stack";
+    static constexpr StringRef InitFuncName = "__randezvous_shadow_stack_init";
+
+    ARMRandezvousShadowStack();
+    virtual StringRef getPassName() const override;
+    void getAnalysisUsage(AnalysisUsage & AU) const override;
+    void releaseMemory() override;
+    virtual bool runOnModule(Module & M) override;
+
+  private:
+    std::unique_ptr<RandomNumberGenerator> RNG;
+    std::vector<MachineBasicBlock *> TrapBlocks;
+
+    GlobalVariable * createShadowStack(Module & M);
+    Function * createInitFunction(Module & M, GlobalVariable & SS);
+    bool pushToShadowStack(MachineInstr & MI, MachineOperand & LR,
+                           uint32_t Stride);
+    bool popFromShadowStack(MachineInstr & MI, MachineOperand & PCLR,
+                            uint32_t Stride);
+    bool nullifyReturnAddress(MachineInstr & MI, MachineOperand & PCLR);
+  };
+
+  ModulePass * createARMRandezvousShadowStack(void);
+}
+
+#endif
\ No newline at end of file
diff --git a/llvm/lib/Target/ARM/ARMTargetMachine.cpp b/llvm/lib/Target/ARM/ARMTargetMachine.cpp
index 775d098fbaed57..bfd0d0ec4cecda 100644
--- a/llvm/lib/Target/ARM/ARMTargetMachine.cpp
+++ b/llvm/lib/Target/ARM/ARMTargetMachine.cpp
@@ -9,6 +9,8 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "ARMRandezvousInstrumentor.h"
+#include "ARMRandezvousShadowStack.h"
 #include "ARMTargetMachine.h"
 #include "ARM.h"
 #include "ARMMachineFunctionInfo.h"
@@ -610,6 +612,13 @@ void ARMPassConfig::addPreEmitPass2() {
   // sizes will only be decreased by this pass.
   addPass(createARMLowOverheadLoopsPass());
 
+  // Inserts sxh pass
+
+  // Add Randezvous CodeGen passes
+
+  addPass(createARMRandezvousShadowStack());
+
+
   if (TM->getTargetTriple().isOSWindows()) {
     // Identify valid longjmp targets for Windows Control Flow Guard.
     addPass(createCFGuardLongjmpPass());
diff --git a/llvm/lib/Target/ARM/CMakeLists.txt b/llvm/lib/Target/ARM/CMakeLists.txt
index a0f4290cb48db2..d4dd215b84ab83 100644
--- a/llvm/lib/Target/ARM/CMakeLists.txt
+++ b/llvm/lib/Target/ARM/CMakeLists.txt
@@ -19,6 +19,14 @@ tablegen(LLVM ARMGenSystemRegister.inc -gen-searchable-tables)
 
 add_public_tablegen_target(ARMCommonTableGen)
 
+
+set(ARMRandezvous_SOURCES
+  ARMRandezvousInstrumentor.cpp
+  ARMRandezvousShadowStack.cpp
+  ARMRandezvousOptions.cpp
+  ARMRandezvousCLR.cpp
+)
+
 add_llvm_target(ARMCodeGen
   A15SDOptimizer.cpp
   ARMAsmPrinter.cpp
@@ -68,6 +76,7 @@ add_llvm_target(ARMCodeGen
   Thumb2ITBlockPass.cpp
   Thumb2InstrInfo.cpp
   Thumb2SizeReduction.cpp
+  ${ARMRandezvous_SOURCES}
 
   LINK_COMPONENTS
   ARMDesc
diff --git a/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp b/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp
index dfa305a3b968d7..1a2cab20ee07c3 100644
--- a/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp
+++ b/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp
@@ -115,6 +115,10 @@ const MCFixupKindInfo &ARMAsmBackend::getFixupKindInfo(MCFixupKind Kind) const {
       {"fixup_arm_movw_lo16", 0, 20, 0},
       {"fixup_t2_movt_hi16", 0, 20, 0},
       {"fixup_t2_movw_lo16", 0, 20, 0},
+
+      {"fixup_t2_udf_hi16", 0, 20, 0},
+      {"fixup_t2_udf_lo16", 0, 20, 0},
+
       {"fixup_arm_mod_imm", 0, 12, 0},
       {"fixup_t2_so_imm", 0, 26, 0},
       {"fixup_bf_branch", 0, 32, MCFixupKindInfo::FKF_IsPCRel},
@@ -169,6 +173,10 @@ const MCFixupKindInfo &ARMAsmBackend::getFixupKindInfo(MCFixupKind Kind) const {
       {"fixup_arm_movw_lo16", 12, 20, 0},
       {"fixup_t2_movt_hi16", 12, 20, 0},
       {"fixup_t2_movw_lo16", 12, 20, 0},
+
+      {"fixup_t2_udf_hi16", 12, 20, 0},
+      {"fixup_t2_udf_lo16", 12, 20, 0},
+
       {"fixup_arm_mod_imm", 20, 12, 0},
       {"fixup_t2_so_imm", 26, 6, 0},
       {"fixup_bf_branch", 0, 32, MCFixupKindInfo::FKF_IsPCRel},
@@ -443,6 +451,7 @@ unsigned ARMAsmBackend::adjustFixupValue(const MCAssembler &Asm,
         A->getSymbol().isExternal() &&
         (Kind == FK_Data_4 || Kind == ARM::fixup_arm_movw_lo16 ||
          Kind == ARM::fixup_arm_movt_hi16 || Kind == ARM::fixup_t2_movw_lo16 ||
+         Kind == ARM::fixup_t2_movt_hi16 || Kind == ARM::fixup_t2_udf_lo16 ||
          Kind == ARM::fixup_t2_movt_hi16))
       Value |= 1;
   }
@@ -472,6 +481,19 @@ unsigned ARMAsmBackend::adjustFixupValue(const MCAssembler &Asm,
     Value = (Hi4 << 16) | (Lo12);
     return Value;
   }
+  case ARM::fixup_t2_udf_hi16:
+    assert(STI != nullptr);
+    if (IsResolved || !STI->getTargetTriple().isOSBinFormatELF())
+      Value >>= 16;
+    LLVM_FALLTHROUGH;
+  case ARM::fixup_t2_udf_lo16: {
+    unsigned Hi4 = (Value & 0xF000) >> 12;
+    unsigned Lo12 = Value & 0x0FFF;
+    // inst{19-16} = Hi4;
+    // inst{11-0} = Lo12;
+    Value = (Hi4 << 16) | (Lo12);
+    return swapHalfWords(Value, Endian == support::little);
+  }
   case ARM::fixup_t2_movt_hi16:
     assert(STI != nullptr);
     if (IsResolved || !STI->getTargetTriple().isOSBinFormatELF())
@@ -967,6 +989,10 @@ static unsigned getFixupKindNumBytes(unsigned Kind) {
   case ARM::fixup_arm_movw_lo16:
   case ARM::fixup_t2_movt_hi16:
   case ARM::fixup_t2_movw_lo16:
+
+  case ARM::fixup_t2_udf_hi16:
+  case ARM::fixup_t2_udf_lo16:
+
   case ARM::fixup_t2_so_imm:
   case ARM::fixup_bf_branch:
   case ARM::fixup_bf_target:
@@ -1028,6 +1054,10 @@ static unsigned getFixupKindContainerSizeBytes(unsigned Kind) {
   case ARM::fixup_arm_movw_lo16:
   case ARM::fixup_t2_movt_hi16:
   case ARM::fixup_t2_movw_lo16:
+
+  case ARM::fixup_t2_udf_hi16:
+  case ARM::fixup_t2_udf_lo16:
+  
   case ARM::fixup_arm_mod_imm:
   case ARM::fixup_t2_so_imm:
   case ARM::fixup_bf_branch:
diff --git a/llvm/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp b/llvm/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp
index df8f54d14a8690..b424bb8e74b603 100644
--- a/llvm/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp
+++ b/llvm/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp
@@ -155,6 +155,11 @@ unsigned ARMELFObjectWriter::GetRelocTypeInner(const MCValue &Target,
       return ELF::R_ARM_THM_BF12;
     case ARM::fixup_bfl_target:
       return ELF::R_ARM_THM_BF18;
+
+    case ARM::fixup_t2_udf_hi16:
+      return ELF::R_ARM_PRIVATE_14;
+    case ARM::fixup_t2_udf_lo16:
+      return ELF::R_ARM_PRIVATE_15;
     }
   }
   switch (Kind) {
@@ -233,6 +238,8 @@ unsigned ARMELFObjectWriter::GetRelocTypeInner(const MCValue &Target,
     case MCSymbolRefExpr::VK_ARM_SBREL:
       return ELF::R_ARM_MOVT_BREL;
     }
+
+    
   case ARM::fixup_arm_movw_lo16:
     switch (Modifier) {
     default:
@@ -265,6 +272,11 @@ unsigned ARMELFObjectWriter::GetRelocTypeInner(const MCValue &Target,
     case MCSymbolRefExpr::VK_ARM_SBREL:
       return ELF::R_ARM_THM_MOVW_BREL_NC;
     }
+
+    case ARM::fixup_t2_udf_hi16:
+      return ELF::R_ARM_PRIVATE_14;
+    case ARM::fixup_t2_udf_lo16:
+      return ELF::R_ARM_PRIVATE_15;
   }
 }
 
diff --git a/llvm/lib/Target/ARM/MCTargetDesc/ARMFixupKinds.h b/llvm/lib/Target/ARM/MCTargetDesc/ARMFixupKinds.h
index 53258a88c7fa27..d59b8f6fd5fefd 100644
--- a/llvm/lib/Target/ARM/MCTargetDesc/ARMFixupKinds.h
+++ b/llvm/lib/Target/ARM/MCTargetDesc/ARMFixupKinds.h
@@ -98,7 +98,8 @@ enum Fixups {
   fixup_arm_movw_lo16, // :lower16:
   fixup_t2_movt_hi16,  // :upper16:
   fixup_t2_movw_lo16,  // :lower16:
-
+  fixup_t2_udf_hi16,   // :upper16:
+  fixup_t2_udf_lo16,   // :lower16:
   // Fixup for mod_imm
   fixup_arm_mod_imm,
 
diff --git a/llvm/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp b/llvm/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp
index bafc0f85375605..7f194e345d5eae 100644
--- a/llvm/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp
+++ b/llvm/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp
@@ -1230,6 +1230,10 @@ ARMMCCodeEmitter::getHiLo16ImmOpValue(const MCInst &MI, unsigned OpIdx,
     case ARMMCExpr::VK_ARM_LO16:
       Kind = MCFixupKind(isThumb(STI) ? ARM::fixup_t2_movw_lo16
                                       : ARM::fixup_arm_movw_lo16);
+
+      if (MI.getOpcode() == ARM::t2UDF_ga) {
+        Kind = MCFixupKind(ARM::fixup_t2_udf_lo16);
+      }
       break;
     }
 
diff --git a/llvm/lib/Transforms/Hello/Hello.cpp b/llvm/lib/Transforms/Hello/Hello.cpp
index 5218d0897000d5..92aa1e449f53ab 100644
--- a/llvm/lib/Transforms/Hello/Hello.cpp
+++ b/llvm/lib/Transforms/Hello/Hello.cpp
@@ -30,8 +30,8 @@ namespace {
     bool runOnFunction(Function &F) override {
       ++HelloCounter;
       errs() << "Hello: ";
+      F.setSectionPrefix("mySection");
       errs().write_escaped(F.getName()) << '\n';
-      F.setSectionPrefix("mySection")
       return false;
     }
   };

>From 081cf99bbfa9125f764fc4ee53174b2ead3d4119 Mon Sep 17 00:00:00 2001
From: jiang-zixian <980485445 at qq.com>
Date: Sun, 31 Mar 2024 14:49:19 +0800
Subject: [PATCH 3/8] encode and decode is over!

---
 .../Driver/Inputs/CUDA-symlinks/usr/bin/ptxas |   1 -
 .../usr/bin/i386-unknown-linux-gnu-ld         |   2 +-
 .../usr/bin/x86_64-unknown-linux-gnu-ld       |   2 +-
 .../usr/i386-unknown-linux-gnu/bin/ld         |   2 +-
 .../usr/x86_64-unknown-linux-gnu/bin/ld       |   2 +-
 .../multilib_32bit_linux_tree/usr/bin/as      |   2 +-
 .../multilib_32bit_linux_tree/usr/bin/ld      |   2 +-
 .../usr/i386-unknown-linux/bin/as             |   2 +-
 .../usr/i386-unknown-linux/bin/ld             |   2 +-
 .../multilib_64bit_linux_tree/usr/bin/as      |   2 +-
 .../multilib_64bit_linux_tree/usr/bin/ld      |   2 +-
 .../usr/x86_64-unknown-linux/bin/as           |   2 +-
 .../usr/x86_64-unknown-linux/bin/ld           |   2 +-
 libclc/amdgcn-mesa3d                          |   1 -
 libclc/amdgcn-mesa3d/lib/SOURCES              |   3 +
 .../lib/workitem/get_global_size.cl           |  23 +
 .../lib/workitem/get_local_size.cl            |  29 +
 .../lib/workitem/get_num_groups.cl            |  12 +
 libclc/clspv64                                |   1 -
 libclc/clspv64/lib/SOURCES                    |  48 ++
 libclc/clspv64/lib/math/fma.cl                | 256 +++++++
 libclc/clspv64/lib/math/nextafter.cl          |   5 +
 libclc/clspv64/lib/math/nextafter.inc         |   3 +
 libclc/clspv64/lib/subnormal_config.cl        |  31 +
 libcxx/test/std/pstl                          |   1 -
 .../alg.merge/inplace_merge.pass.cpp          | 159 +++++
 .../pstl/algorithms/alg.merge/merge.pass.cpp  | 113 ++++
 .../alg.copy/copy_if.pass.cpp                 | 147 ++++
 .../alg.partitions/is_partitioned.pass.cpp    | 101 +++
 .../alg.partitions/partition.pass.cpp         | 178 +++++
 .../alg.partitions/partition_copy.pass.cpp    | 116 ++++
 .../alg.reverse/reverse.pass.cpp              | 104 +++
 .../alg.reverse/reverse_copy.pass.cpp         | 130 ++++
 .../copy_move.pass.cpp                        | 197 ++++++
 .../alg.modifying.operations/fill.pass.cpp    | 100 +++
 .../generate.pass.cpp                         | 104 +++
 .../alg.modifying.operations/remove.pass.cpp  | 161 +++++
 .../remove_copy.pass.cpp                      |  91 +++
 .../alg.modifying.operations/replace.pass.cpp | 160 +++++
 .../replace_copy.pass.cpp                     | 105 +++
 .../alg.modifying.operations/rotate.pass.cpp  | 176 +++++
 .../rotate_copy.pass.cpp                      | 146 ++++
 .../swap_ranges.pass.cpp                      | 133 ++++
 .../transform_binary.pass.cpp                 | 122 ++++
 .../transform_unary.pass.cpp                  |  91 +++
 .../alg.modifying.operations/unique.pass.cpp  | 163 +++++
 .../unique_copy_equal.pass.cpp                | 135 ++++
 .../alg.nonmodifying/adjacent_find.pass.cpp   | 114 ++++
 .../alg.nonmodifying/all_of.pass.cpp          | 117 ++++
 .../alg.nonmodifying/any_of.pass.cpp          | 103 +++
 .../alg.nonmodifying/count.pass.cpp           | 108 +++
 .../alg.nonmodifying/equal.pass.cpp           | 168 +++++
 .../algorithms/alg.nonmodifying/find.pass.cpp |  96 +++
 .../alg.nonmodifying/find_end.pass.cpp        | 123 ++++
 .../alg.nonmodifying/find_first_of.pass.cpp   | 112 ++++
 .../alg.nonmodifying/find_if.pass.cpp         | 109 +++
 .../alg.nonmodifying/for_each.pass.cpp        | 102 +++
 .../alg.nonmodifying/mismatch.pass.cpp        | 132 ++++
 .../alg.nonmodifying/none_of.pass.cpp         | 101 +++
 .../alg.nonmodifying/nth_element.pass.cpp     | 175 +++++
 .../alg.nonmodifying/search_n.pass.cpp        | 109 +++
 .../alg.heap.operations/is_heap.pass.cpp      | 146 ++++
 .../lexicographical_compare.pass.cpp          | 175 +++++
 .../alg.min.max/minmax_element.pass.cpp       | 192 ++++++
 .../alg.set.operations/includes.pass.cpp      | 106 +++
 .../alg.set.operations/set.pass.cpp           | 280 ++++++++
 .../algorithms/alg.sorting/is_sorted.pass.cpp | 100 +++
 .../alg.sorting/partial_sort.pass.cpp         | 149 +++++
 .../alg.sorting/partial_sort_copy.pass.cpp    | 196 ++++++
 .../pstl/algorithms/alg.sorting/sort.pass.cpp | 247 +++++++
 libcxx/test/std/pstl/lit.local.cfg            |   2 +
 .../numeric.ops/adjacent_difference.pass.cpp  | 170 +++++
 .../pstl/numerics/numeric.ops/reduce.pass.cpp | 114 ++++
 .../pstl/numerics/numeric.ops/scan.fail.cpp   |  28 +
 .../pstl/numerics/numeric.ops/scan.pass.cpp   | 201 ++++++
 .../numeric.ops/transform_reduce.pass.cpp     | 129 ++++
 .../numeric.ops/transform_scan.pass.cpp       | 177 +++++
 .../uninitialized_construct.pass.cpp          | 123 ++++
 .../uninitialized_copy_move.pass.cpp          | 143 ++++
 .../uninitialized_fill_destroy.pass.cpp       |  93 +++
 llvm/lib/Target/ARM/ARMEncodeDecode.cpp       | 628 ++++++++++++++++++
 llvm/lib/Target/ARM/ARMEncodeDecode.h         |  38 ++
 llvm/lib/Target/ARM/ARMRandezvousOptions.cpp  |  36 +
 llvm/lib/Target/ARM/ARMRandezvousOptions.h    |   4 +
 llvm/lib/Target/ARM/ARMTargetMachine.cpp      |   8 +
 llvm/lib/Target/ARM/CMakeLists.txt            |   4 +
 openmp/tools/analyzer/llvm-openmp-analyzer++  |  46 +-
 87 files changed, 8557 insertions(+), 17 deletions(-)
 mode change 120000 => 100755 clang/test/Driver/Inputs/CUDA-symlinks/usr/bin/ptxas
 mode change 120000 => 100755 clang/test/Driver/Inputs/basic_cross_linux_tree/usr/bin/i386-unknown-linux-gnu-ld
 mode change 120000 => 100755 clang/test/Driver/Inputs/basic_cross_linux_tree/usr/bin/x86_64-unknown-linux-gnu-ld
 mode change 120000 => 100755 clang/test/Driver/Inputs/basic_cross_linux_tree/usr/i386-unknown-linux-gnu/bin/ld
 mode change 120000 => 100755 clang/test/Driver/Inputs/basic_cross_linux_tree/usr/x86_64-unknown-linux-gnu/bin/ld
 mode change 120000 => 100755 clang/test/Driver/Inputs/multilib_32bit_linux_tree/usr/bin/as
 mode change 120000 => 100755 clang/test/Driver/Inputs/multilib_32bit_linux_tree/usr/bin/ld
 mode change 120000 => 100755 clang/test/Driver/Inputs/multilib_32bit_linux_tree/usr/i386-unknown-linux/bin/as
 mode change 120000 => 100755 clang/test/Driver/Inputs/multilib_32bit_linux_tree/usr/i386-unknown-linux/bin/ld
 mode change 120000 => 100755 clang/test/Driver/Inputs/multilib_64bit_linux_tree/usr/bin/as
 mode change 120000 => 100755 clang/test/Driver/Inputs/multilib_64bit_linux_tree/usr/bin/ld
 mode change 120000 => 100755 clang/test/Driver/Inputs/multilib_64bit_linux_tree/usr/x86_64-unknown-linux/bin/as
 mode change 120000 => 100755 clang/test/Driver/Inputs/multilib_64bit_linux_tree/usr/x86_64-unknown-linux/bin/ld
 delete mode 120000 libclc/amdgcn-mesa3d
 create mode 100644 libclc/amdgcn-mesa3d/lib/SOURCES
 create mode 100644 libclc/amdgcn-mesa3d/lib/workitem/get_global_size.cl
 create mode 100644 libclc/amdgcn-mesa3d/lib/workitem/get_local_size.cl
 create mode 100644 libclc/amdgcn-mesa3d/lib/workitem/get_num_groups.cl
 delete mode 120000 libclc/clspv64
 create mode 100644 libclc/clspv64/lib/SOURCES
 create mode 100644 libclc/clspv64/lib/math/fma.cl
 create mode 100644 libclc/clspv64/lib/math/nextafter.cl
 create mode 100644 libclc/clspv64/lib/math/nextafter.inc
 create mode 100644 libclc/clspv64/lib/subnormal_config.cl
 delete mode 120000 libcxx/test/std/pstl
 create mode 100644 libcxx/test/std/pstl/algorithms/alg.merge/inplace_merge.pass.cpp
 create mode 100644 libcxx/test/std/pstl/algorithms/alg.merge/merge.pass.cpp
 create mode 100644 libcxx/test/std/pstl/algorithms/alg.modifying.operations/alg.copy/copy_if.pass.cpp
 create mode 100644 libcxx/test/std/pstl/algorithms/alg.modifying.operations/alg.partitions/is_partitioned.pass.cpp
 create mode 100644 libcxx/test/std/pstl/algorithms/alg.modifying.operations/alg.partitions/partition.pass.cpp
 create mode 100644 libcxx/test/std/pstl/algorithms/alg.modifying.operations/alg.partitions/partition_copy.pass.cpp
 create mode 100644 libcxx/test/std/pstl/algorithms/alg.modifying.operations/alg.reverse/reverse.pass.cpp
 create mode 100644 libcxx/test/std/pstl/algorithms/alg.modifying.operations/alg.reverse/reverse_copy.pass.cpp
 create mode 100644 libcxx/test/std/pstl/algorithms/alg.modifying.operations/copy_move.pass.cpp
 create mode 100644 libcxx/test/std/pstl/algorithms/alg.modifying.operations/fill.pass.cpp
 create mode 100644 libcxx/test/std/pstl/algorithms/alg.modifying.operations/generate.pass.cpp
 create mode 100644 libcxx/test/std/pstl/algorithms/alg.modifying.operations/remove.pass.cpp
 create mode 100644 libcxx/test/std/pstl/algorithms/alg.modifying.operations/remove_copy.pass.cpp
 create mode 100644 libcxx/test/std/pstl/algorithms/alg.modifying.operations/replace.pass.cpp
 create mode 100644 libcxx/test/std/pstl/algorithms/alg.modifying.operations/replace_copy.pass.cpp
 create mode 100644 libcxx/test/std/pstl/algorithms/alg.modifying.operations/rotate.pass.cpp
 create mode 100644 libcxx/test/std/pstl/algorithms/alg.modifying.operations/rotate_copy.pass.cpp
 create mode 100644 libcxx/test/std/pstl/algorithms/alg.modifying.operations/swap_ranges.pass.cpp
 create mode 100644 libcxx/test/std/pstl/algorithms/alg.modifying.operations/transform_binary.pass.cpp
 create mode 100644 libcxx/test/std/pstl/algorithms/alg.modifying.operations/transform_unary.pass.cpp
 create mode 100644 libcxx/test/std/pstl/algorithms/alg.modifying.operations/unique.pass.cpp
 create mode 100644 libcxx/test/std/pstl/algorithms/alg.modifying.operations/unique_copy_equal.pass.cpp
 create mode 100644 libcxx/test/std/pstl/algorithms/alg.nonmodifying/adjacent_find.pass.cpp
 create mode 100644 libcxx/test/std/pstl/algorithms/alg.nonmodifying/all_of.pass.cpp
 create mode 100644 libcxx/test/std/pstl/algorithms/alg.nonmodifying/any_of.pass.cpp
 create mode 100644 libcxx/test/std/pstl/algorithms/alg.nonmodifying/count.pass.cpp
 create mode 100644 libcxx/test/std/pstl/algorithms/alg.nonmodifying/equal.pass.cpp
 create mode 100644 libcxx/test/std/pstl/algorithms/alg.nonmodifying/find.pass.cpp
 create mode 100644 libcxx/test/std/pstl/algorithms/alg.nonmodifying/find_end.pass.cpp
 create mode 100644 libcxx/test/std/pstl/algorithms/alg.nonmodifying/find_first_of.pass.cpp
 create mode 100644 libcxx/test/std/pstl/algorithms/alg.nonmodifying/find_if.pass.cpp
 create mode 100644 libcxx/test/std/pstl/algorithms/alg.nonmodifying/for_each.pass.cpp
 create mode 100644 libcxx/test/std/pstl/algorithms/alg.nonmodifying/mismatch.pass.cpp
 create mode 100644 libcxx/test/std/pstl/algorithms/alg.nonmodifying/none_of.pass.cpp
 create mode 100644 libcxx/test/std/pstl/algorithms/alg.nonmodifying/nth_element.pass.cpp
 create mode 100644 libcxx/test/std/pstl/algorithms/alg.nonmodifying/search_n.pass.cpp
 create mode 100644 libcxx/test/std/pstl/algorithms/alg.sorting/alg.heap.operations/is_heap.pass.cpp
 create mode 100644 libcxx/test/std/pstl/algorithms/alg.sorting/alg.lex.comparison/lexicographical_compare.pass.cpp
 create mode 100644 libcxx/test/std/pstl/algorithms/alg.sorting/alg.min.max/minmax_element.pass.cpp
 create mode 100644 libcxx/test/std/pstl/algorithms/alg.sorting/alg.set.operations/includes.pass.cpp
 create mode 100644 libcxx/test/std/pstl/algorithms/alg.sorting/alg.set.operations/set.pass.cpp
 create mode 100644 libcxx/test/std/pstl/algorithms/alg.sorting/is_sorted.pass.cpp
 create mode 100644 libcxx/test/std/pstl/algorithms/alg.sorting/partial_sort.pass.cpp
 create mode 100644 libcxx/test/std/pstl/algorithms/alg.sorting/partial_sort_copy.pass.cpp
 create mode 100644 libcxx/test/std/pstl/algorithms/alg.sorting/sort.pass.cpp
 create mode 100644 libcxx/test/std/pstl/lit.local.cfg
 create mode 100644 libcxx/test/std/pstl/numerics/numeric.ops/adjacent_difference.pass.cpp
 create mode 100644 libcxx/test/std/pstl/numerics/numeric.ops/reduce.pass.cpp
 create mode 100644 libcxx/test/std/pstl/numerics/numeric.ops/scan.fail.cpp
 create mode 100644 libcxx/test/std/pstl/numerics/numeric.ops/scan.pass.cpp
 create mode 100644 libcxx/test/std/pstl/numerics/numeric.ops/transform_reduce.pass.cpp
 create mode 100644 libcxx/test/std/pstl/numerics/numeric.ops/transform_scan.pass.cpp
 create mode 100644 libcxx/test/std/pstl/utilities/memory/specialized.algorithms/uninitialized_construct.pass.cpp
 create mode 100644 libcxx/test/std/pstl/utilities/memory/specialized.algorithms/uninitialized_copy_move.pass.cpp
 create mode 100644 libcxx/test/std/pstl/utilities/memory/specialized.algorithms/uninitialized_fill_destroy.pass.cpp
 create mode 100755 llvm/lib/Target/ARM/ARMEncodeDecode.cpp
 create mode 100755 llvm/lib/Target/ARM/ARMEncodeDecode.h
 mode change 100644 => 100755 llvm/lib/Target/ARM/ARMRandezvousOptions.cpp
 mode change 100644 => 100755 llvm/lib/Target/ARM/ARMRandezvousOptions.h
 mode change 100644 => 100755 llvm/lib/Target/ARM/ARMTargetMachine.cpp
 mode change 100644 => 100755 llvm/lib/Target/ARM/CMakeLists.txt
 mode change 120000 => 100755 openmp/tools/analyzer/llvm-openmp-analyzer++

diff --git a/clang/test/Driver/Inputs/CUDA-symlinks/usr/bin/ptxas b/clang/test/Driver/Inputs/CUDA-symlinks/usr/bin/ptxas
deleted file mode 120000
index 59eefd95a9023c..00000000000000
--- a/clang/test/Driver/Inputs/CUDA-symlinks/usr/bin/ptxas
+++ /dev/null
@@ -1 +0,0 @@
-../../opt/cuda/bin/ptxas
\ No newline at end of file
diff --git a/clang/test/Driver/Inputs/CUDA-symlinks/usr/bin/ptxas b/clang/test/Driver/Inputs/CUDA-symlinks/usr/bin/ptxas
new file mode 100755
index 00000000000000..e69de29bb2d1d6
diff --git a/clang/test/Driver/Inputs/basic_cross_linux_tree/usr/bin/i386-unknown-linux-gnu-ld b/clang/test/Driver/Inputs/basic_cross_linux_tree/usr/bin/i386-unknown-linux-gnu-ld
deleted file mode 120000
index 7e0a9cfe2ddbd6..00000000000000
--- a/clang/test/Driver/Inputs/basic_cross_linux_tree/usr/bin/i386-unknown-linux-gnu-ld
+++ /dev/null
@@ -1 +0,0 @@
-i386-unknown-linux-gnu-ld.gold
\ No newline at end of file
diff --git a/clang/test/Driver/Inputs/basic_cross_linux_tree/usr/bin/i386-unknown-linux-gnu-ld b/clang/test/Driver/Inputs/basic_cross_linux_tree/usr/bin/i386-unknown-linux-gnu-ld
new file mode 100755
index 00000000000000..b23e55619b2ff0
--- /dev/null
+++ b/clang/test/Driver/Inputs/basic_cross_linux_tree/usr/bin/i386-unknown-linux-gnu-ld
@@ -0,0 +1 @@
+#!/bin/true
diff --git a/clang/test/Driver/Inputs/basic_cross_linux_tree/usr/bin/x86_64-unknown-linux-gnu-ld b/clang/test/Driver/Inputs/basic_cross_linux_tree/usr/bin/x86_64-unknown-linux-gnu-ld
deleted file mode 120000
index ce36ac093b6176..00000000000000
--- a/clang/test/Driver/Inputs/basic_cross_linux_tree/usr/bin/x86_64-unknown-linux-gnu-ld
+++ /dev/null
@@ -1 +0,0 @@
-x86_64-unknown-linux-gnu-ld.gold
\ No newline at end of file
diff --git a/clang/test/Driver/Inputs/basic_cross_linux_tree/usr/bin/x86_64-unknown-linux-gnu-ld b/clang/test/Driver/Inputs/basic_cross_linux_tree/usr/bin/x86_64-unknown-linux-gnu-ld
new file mode 100755
index 00000000000000..b23e55619b2ff0
--- /dev/null
+++ b/clang/test/Driver/Inputs/basic_cross_linux_tree/usr/bin/x86_64-unknown-linux-gnu-ld
@@ -0,0 +1 @@
+#!/bin/true
diff --git a/clang/test/Driver/Inputs/basic_cross_linux_tree/usr/i386-unknown-linux-gnu/bin/ld b/clang/test/Driver/Inputs/basic_cross_linux_tree/usr/i386-unknown-linux-gnu/bin/ld
deleted file mode 120000
index 6cd03701cdda78..00000000000000
--- a/clang/test/Driver/Inputs/basic_cross_linux_tree/usr/i386-unknown-linux-gnu/bin/ld
+++ /dev/null
@@ -1 +0,0 @@
-ld.gold
\ No newline at end of file
diff --git a/clang/test/Driver/Inputs/basic_cross_linux_tree/usr/i386-unknown-linux-gnu/bin/ld b/clang/test/Driver/Inputs/basic_cross_linux_tree/usr/i386-unknown-linux-gnu/bin/ld
new file mode 100755
index 00000000000000..b23e55619b2ff0
--- /dev/null
+++ b/clang/test/Driver/Inputs/basic_cross_linux_tree/usr/i386-unknown-linux-gnu/bin/ld
@@ -0,0 +1 @@
+#!/bin/true
diff --git a/clang/test/Driver/Inputs/basic_cross_linux_tree/usr/x86_64-unknown-linux-gnu/bin/ld b/clang/test/Driver/Inputs/basic_cross_linux_tree/usr/x86_64-unknown-linux-gnu/bin/ld
deleted file mode 120000
index 6cd03701cdda78..00000000000000
--- a/clang/test/Driver/Inputs/basic_cross_linux_tree/usr/x86_64-unknown-linux-gnu/bin/ld
+++ /dev/null
@@ -1 +0,0 @@
-ld.gold
\ No newline at end of file
diff --git a/clang/test/Driver/Inputs/basic_cross_linux_tree/usr/x86_64-unknown-linux-gnu/bin/ld b/clang/test/Driver/Inputs/basic_cross_linux_tree/usr/x86_64-unknown-linux-gnu/bin/ld
new file mode 100755
index 00000000000000..b23e55619b2ff0
--- /dev/null
+++ b/clang/test/Driver/Inputs/basic_cross_linux_tree/usr/x86_64-unknown-linux-gnu/bin/ld
@@ -0,0 +1 @@
+#!/bin/true
diff --git a/clang/test/Driver/Inputs/multilib_32bit_linux_tree/usr/bin/as b/clang/test/Driver/Inputs/multilib_32bit_linux_tree/usr/bin/as
deleted file mode 120000
index 0065315cfd1de8..00000000000000
--- a/clang/test/Driver/Inputs/multilib_32bit_linux_tree/usr/bin/as
+++ /dev/null
@@ -1 +0,0 @@
-i386-unknown-linux-gnu-as
\ No newline at end of file
diff --git a/clang/test/Driver/Inputs/multilib_32bit_linux_tree/usr/bin/as b/clang/test/Driver/Inputs/multilib_32bit_linux_tree/usr/bin/as
new file mode 100755
index 00000000000000..b23e55619b2ff0
--- /dev/null
+++ b/clang/test/Driver/Inputs/multilib_32bit_linux_tree/usr/bin/as
@@ -0,0 +1 @@
+#!/bin/true
diff --git a/clang/test/Driver/Inputs/multilib_32bit_linux_tree/usr/bin/ld b/clang/test/Driver/Inputs/multilib_32bit_linux_tree/usr/bin/ld
deleted file mode 120000
index 9e5574285c70e4..00000000000000
--- a/clang/test/Driver/Inputs/multilib_32bit_linux_tree/usr/bin/ld
+++ /dev/null
@@ -1 +0,0 @@
-i386-unknown-linux-gnu-ld
\ No newline at end of file
diff --git a/clang/test/Driver/Inputs/multilib_32bit_linux_tree/usr/bin/ld b/clang/test/Driver/Inputs/multilib_32bit_linux_tree/usr/bin/ld
new file mode 100755
index 00000000000000..b23e55619b2ff0
--- /dev/null
+++ b/clang/test/Driver/Inputs/multilib_32bit_linux_tree/usr/bin/ld
@@ -0,0 +1 @@
+#!/bin/true
diff --git a/clang/test/Driver/Inputs/multilib_32bit_linux_tree/usr/i386-unknown-linux/bin/as b/clang/test/Driver/Inputs/multilib_32bit_linux_tree/usr/i386-unknown-linux/bin/as
deleted file mode 120000
index 2aa12fdef91620..00000000000000
--- a/clang/test/Driver/Inputs/multilib_32bit_linux_tree/usr/i386-unknown-linux/bin/as
+++ /dev/null
@@ -1 +0,0 @@
-../../bin/i386-unknown-linux-gnu-as
\ No newline at end of file
diff --git a/clang/test/Driver/Inputs/multilib_32bit_linux_tree/usr/i386-unknown-linux/bin/as b/clang/test/Driver/Inputs/multilib_32bit_linux_tree/usr/i386-unknown-linux/bin/as
new file mode 100755
index 00000000000000..b23e55619b2ff0
--- /dev/null
+++ b/clang/test/Driver/Inputs/multilib_32bit_linux_tree/usr/i386-unknown-linux/bin/as
@@ -0,0 +1 @@
+#!/bin/true
diff --git a/clang/test/Driver/Inputs/multilib_32bit_linux_tree/usr/i386-unknown-linux/bin/ld b/clang/test/Driver/Inputs/multilib_32bit_linux_tree/usr/i386-unknown-linux/bin/ld
deleted file mode 120000
index 5aeaff619662a8..00000000000000
--- a/clang/test/Driver/Inputs/multilib_32bit_linux_tree/usr/i386-unknown-linux/bin/ld
+++ /dev/null
@@ -1 +0,0 @@
-../../bin/i386-unknown-linux-gnu-ld
\ No newline at end of file
diff --git a/clang/test/Driver/Inputs/multilib_32bit_linux_tree/usr/i386-unknown-linux/bin/ld b/clang/test/Driver/Inputs/multilib_32bit_linux_tree/usr/i386-unknown-linux/bin/ld
new file mode 100755
index 00000000000000..b23e55619b2ff0
--- /dev/null
+++ b/clang/test/Driver/Inputs/multilib_32bit_linux_tree/usr/i386-unknown-linux/bin/ld
@@ -0,0 +1 @@
+#!/bin/true
diff --git a/clang/test/Driver/Inputs/multilib_64bit_linux_tree/usr/bin/as b/clang/test/Driver/Inputs/multilib_64bit_linux_tree/usr/bin/as
deleted file mode 120000
index 477cbc9635fcbf..00000000000000
--- a/clang/test/Driver/Inputs/multilib_64bit_linux_tree/usr/bin/as
+++ /dev/null
@@ -1 +0,0 @@
-x86_64-unknown-linux-gnu-as
\ No newline at end of file
diff --git a/clang/test/Driver/Inputs/multilib_64bit_linux_tree/usr/bin/as b/clang/test/Driver/Inputs/multilib_64bit_linux_tree/usr/bin/as
new file mode 100755
index 00000000000000..b23e55619b2ff0
--- /dev/null
+++ b/clang/test/Driver/Inputs/multilib_64bit_linux_tree/usr/bin/as
@@ -0,0 +1 @@
+#!/bin/true
diff --git a/clang/test/Driver/Inputs/multilib_64bit_linux_tree/usr/bin/ld b/clang/test/Driver/Inputs/multilib_64bit_linux_tree/usr/bin/ld
deleted file mode 120000
index 5343caf34d8f34..00000000000000
--- a/clang/test/Driver/Inputs/multilib_64bit_linux_tree/usr/bin/ld
+++ /dev/null
@@ -1 +0,0 @@
-x86_64-unknown-linux-gnu-ld
\ No newline at end of file
diff --git a/clang/test/Driver/Inputs/multilib_64bit_linux_tree/usr/bin/ld b/clang/test/Driver/Inputs/multilib_64bit_linux_tree/usr/bin/ld
new file mode 100755
index 00000000000000..b23e55619b2ff0
--- /dev/null
+++ b/clang/test/Driver/Inputs/multilib_64bit_linux_tree/usr/bin/ld
@@ -0,0 +1 @@
+#!/bin/true
diff --git a/clang/test/Driver/Inputs/multilib_64bit_linux_tree/usr/x86_64-unknown-linux/bin/as b/clang/test/Driver/Inputs/multilib_64bit_linux_tree/usr/x86_64-unknown-linux/bin/as
deleted file mode 120000
index 84a9113f2671f4..00000000000000
--- a/clang/test/Driver/Inputs/multilib_64bit_linux_tree/usr/x86_64-unknown-linux/bin/as
+++ /dev/null
@@ -1 +0,0 @@
-../../bin/x86_64-unknown-linux-gnu-as
\ No newline at end of file
diff --git a/clang/test/Driver/Inputs/multilib_64bit_linux_tree/usr/x86_64-unknown-linux/bin/as b/clang/test/Driver/Inputs/multilib_64bit_linux_tree/usr/x86_64-unknown-linux/bin/as
new file mode 100755
index 00000000000000..b23e55619b2ff0
--- /dev/null
+++ b/clang/test/Driver/Inputs/multilib_64bit_linux_tree/usr/x86_64-unknown-linux/bin/as
@@ -0,0 +1 @@
+#!/bin/true
diff --git a/clang/test/Driver/Inputs/multilib_64bit_linux_tree/usr/x86_64-unknown-linux/bin/ld b/clang/test/Driver/Inputs/multilib_64bit_linux_tree/usr/x86_64-unknown-linux/bin/ld
deleted file mode 120000
index c417e3afaa4945..00000000000000
--- a/clang/test/Driver/Inputs/multilib_64bit_linux_tree/usr/x86_64-unknown-linux/bin/ld
+++ /dev/null
@@ -1 +0,0 @@
-../../bin/x86_64-unknown-linux-gnu-ld
\ No newline at end of file
diff --git a/clang/test/Driver/Inputs/multilib_64bit_linux_tree/usr/x86_64-unknown-linux/bin/ld b/clang/test/Driver/Inputs/multilib_64bit_linux_tree/usr/x86_64-unknown-linux/bin/ld
new file mode 100755
index 00000000000000..b23e55619b2ff0
--- /dev/null
+++ b/clang/test/Driver/Inputs/multilib_64bit_linux_tree/usr/x86_64-unknown-linux/bin/ld
@@ -0,0 +1 @@
+#!/bin/true
diff --git a/libclc/amdgcn-mesa3d b/libclc/amdgcn-mesa3d
deleted file mode 120000
index 400782833efe6c..00000000000000
--- a/libclc/amdgcn-mesa3d
+++ /dev/null
@@ -1 +0,0 @@
-amdgcn-amdhsa
\ No newline at end of file
diff --git a/libclc/amdgcn-mesa3d/lib/SOURCES b/libclc/amdgcn-mesa3d/lib/SOURCES
new file mode 100644
index 00000000000000..8224b7721b2ca5
--- /dev/null
+++ b/libclc/amdgcn-mesa3d/lib/SOURCES
@@ -0,0 +1,3 @@
+workitem/get_global_size.cl
+workitem/get_local_size.cl
+workitem/get_num_groups.cl
diff --git a/libclc/amdgcn-mesa3d/lib/workitem/get_global_size.cl b/libclc/amdgcn-mesa3d/lib/workitem/get_global_size.cl
new file mode 100644
index 00000000000000..62bd2ba283523f
--- /dev/null
+++ b/libclc/amdgcn-mesa3d/lib/workitem/get_global_size.cl
@@ -0,0 +1,23 @@
+#include <clc/clc.h>
+
+#if __clang_major__ >= 8
+#define CONST_AS __constant
+#elif __clang_major__ >= 7
+#define CONST_AS __attribute__((address_space(4)))
+#else
+#define CONST_AS __attribute__((address_space(2)))
+#endif
+
+#if __clang_major__ >= 6
+#define __dispatch_ptr __builtin_amdgcn_dispatch_ptr
+#else
+#define __dispatch_ptr __clc_amdgcn_dispatch_ptr
+CONST_AS uchar * __clc_amdgcn_dispatch_ptr(void) __asm("llvm.amdgcn.dispatch.ptr");
+#endif
+
+_CLC_DEF _CLC_OVERLOAD size_t get_global_size(uint dim) {
+  CONST_AS uint *ptr = (CONST_AS uint *)__dispatch_ptr();
+  if (dim < 3)
+    return ptr[3 + dim];
+  return 1;
+}
diff --git a/libclc/amdgcn-mesa3d/lib/workitem/get_local_size.cl b/libclc/amdgcn-mesa3d/lib/workitem/get_local_size.cl
new file mode 100644
index 00000000000000..9f09fd5a16ec66
--- /dev/null
+++ b/libclc/amdgcn-mesa3d/lib/workitem/get_local_size.cl
@@ -0,0 +1,29 @@
+#include <clc/clc.h>
+
+#if __clang_major__ >= 8
+#define CONST_AS __constant
+#elif __clang_major__ >= 7
+#define CONST_AS __attribute__((address_space(4)))
+#else
+#define CONST_AS __attribute__((address_space(2)))
+#endif
+
+#if __clang_major__ >= 6
+#define __dispatch_ptr __builtin_amdgcn_dispatch_ptr
+#else
+#define __dispatch_ptr __clc_amdgcn_dispatch_ptr
+CONST_AS char * __clc_amdgcn_dispatch_ptr(void) __asm("llvm.amdgcn.dispatch.ptr");
+#endif
+
+_CLC_DEF _CLC_OVERLOAD size_t get_local_size(uint dim) {
+  CONST_AS uint *ptr = (CONST_AS uint *)__dispatch_ptr();
+  switch (dim) {
+  case 0:
+    return ptr[1] & 0xffffu;
+  case 1:
+    return ptr[1] >> 16;
+  case 2:
+    return ptr[2] & 0xffffu;
+  }
+  return 1;
+}
diff --git a/libclc/amdgcn-mesa3d/lib/workitem/get_num_groups.cl b/libclc/amdgcn-mesa3d/lib/workitem/get_num_groups.cl
new file mode 100644
index 00000000000000..35dc2218852114
--- /dev/null
+++ b/libclc/amdgcn-mesa3d/lib/workitem/get_num_groups.cl
@@ -0,0 +1,12 @@
+
+#include <clc/clc.h>
+
+_CLC_DEF _CLC_OVERLOAD size_t get_num_groups(uint dim) {
+  size_t global_size = get_global_size(dim);
+  size_t local_size = get_local_size(dim);
+  size_t num_groups = global_size / local_size;
+  if (global_size % local_size != 0) {
+    num_groups++;
+  }
+  return num_groups;
+}
diff --git a/libclc/clspv64 b/libclc/clspv64
deleted file mode 120000
index ea01ba94bc6368..00000000000000
--- a/libclc/clspv64
+++ /dev/null
@@ -1 +0,0 @@
-clspv
\ No newline at end of file
diff --git a/libclc/clspv64/lib/SOURCES b/libclc/clspv64/lib/SOURCES
new file mode 100644
index 00000000000000..0466345cee0271
--- /dev/null
+++ b/libclc/clspv64/lib/SOURCES
@@ -0,0 +1,48 @@
+subnormal_config.cl
+../../generic/lib/geometric/distance.cl
+../../generic/lib/geometric/length.cl
+math/fma.cl
+math/nextafter.cl
+../../generic/lib/math/acosh.cl
+../../generic/lib/math/asinh.cl
+../../generic/lib/math/atan.cl
+../../generic/lib/math/atan2.cl
+../../generic/lib/math/atan2pi.cl
+../../generic/lib/math/atanh.cl
+../../generic/lib/math/atanpi.cl
+../../generic/lib/math/cbrt.cl
+../../generic/lib/math/clc_fmod.cl
+../../generic/lib/math/clc_hypot.cl
+../../generic/lib/math/clc_ldexp.cl
+../../generic/lib/math/clc_nextafter.cl
+../../generic/lib/math/clc_remainder.cl
+../../generic/lib/math/clc_remquo.cl
+../../generic/lib/math/clc_rootn.cl
+../../generic/lib/math/clc_sqrt.cl
+../../generic/lib/math/clc_tan.cl
+../../generic/lib/math/erf.cl
+../../generic/lib/math/erfc.cl
+../../generic/lib/math/fmod.cl
+../../generic/lib/math/fract.cl
+../../generic/lib/math/frexp.cl
+../../generic/lib/math/half_divide.cl
+../../generic/lib/math/half_recip.cl
+../../generic/lib/math/half_sqrt.cl
+../../generic/lib/math/hypot.cl
+../../generic/lib/math/ilogb.cl
+../../generic/lib/math/ldexp.cl
+../../generic/lib/math/lgamma.cl
+../../generic/lib/math/lgamma_r.cl
+../../generic/lib/math/logb.cl
+../../generic/lib/math/maxmag.cl
+../../generic/lib/math/minmag.cl
+../../generic/lib/math/modf.cl
+../../generic/lib/math/nan.cl
+../../generic/lib/math/remainder.cl
+../../generic/lib/math/remquo.cl
+../../generic/lib/math/rootn.cl
+../../generic/lib/math/rsqrt.cl
+../../generic/lib/math/sqrt.cl
+../../generic/lib/math/tables.cl
+../../generic/lib/math/tanh.cl
+../../generic/lib/math/tgamma.cl
diff --git a/libclc/clspv64/lib/math/fma.cl b/libclc/clspv64/lib/math/fma.cl
new file mode 100644
index 00000000000000..fdc8b8b296876c
--- /dev/null
+++ b/libclc/clspv64/lib/math/fma.cl
@@ -0,0 +1,256 @@
+/*
+ * Copyright (c) 2014 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+// This version is derived from the generic fma software implementation
+// (__clc_sw_fma), but avoids the use of ulong in favor of uint2. The logic has
+// been updated as appropriate.
+
+#include <clc/clc.h>
+#include "../../../generic/lib/clcmacro.h"
+#include "../../../generic/lib/math/math.h"
+
+struct fp {
+  uint2 mantissa;
+  int exponent;
+  uint sign;
+};
+
+_CLC_DEF _CLC_OVERLOAD float fma(float a, float b, float c) {
+  /* special cases */
+  if (isnan(a) || isnan(b) || isnan(c) || isinf(a) || isinf(b)) {
+    return mad(a, b, c);
+  }
+
+  /* If only c is inf, and both a,b are regular numbers, the result is c*/
+  if (isinf(c)) {
+    return c;
+  }
+
+  a = __clc_flush_denormal_if_not_supported(a);
+  b = __clc_flush_denormal_if_not_supported(b);
+  c = __clc_flush_denormal_if_not_supported(c);
+
+  if (a == 0.0f || b == 0.0f) {
+    return c;
+  }
+
+  if (c == 0) {
+    return a * b;
+  }
+
+  struct fp st_a, st_b, st_c;
+
+  st_a.exponent = a == .0f ? 0 : ((as_uint(a) & 0x7f800000) >> 23) - 127;
+  st_b.exponent = b == .0f ? 0 : ((as_uint(b) & 0x7f800000) >> 23) - 127;
+  st_c.exponent = c == .0f ? 0 : ((as_uint(c) & 0x7f800000) >> 23) - 127;
+
+  st_a.mantissa.lo = a == .0f ? 0 : (as_uint(a) & 0x7fffff) | 0x800000;
+  st_b.mantissa.lo = b == .0f ? 0 : (as_uint(b) & 0x7fffff) | 0x800000;
+  st_c.mantissa.lo = c == .0f ? 0 : (as_uint(c) & 0x7fffff) | 0x800000;
+  st_a.mantissa.hi = 0;
+  st_b.mantissa.hi = 0;
+  st_c.mantissa.hi = 0;
+
+  st_a.sign = as_uint(a) & 0x80000000;
+  st_b.sign = as_uint(b) & 0x80000000;
+  st_c.sign = as_uint(c) & 0x80000000;
+
+  // Multiplication.
+  // Move the product to the highest bits to maximize precision
+  // mantissa is 24 bits => product is 48 bits, 2bits non-fraction.
+  // Add one bit for future addition overflow,
+  // add another bit to detect subtraction underflow
+  struct fp st_mul;
+  st_mul.sign = st_a.sign ^ st_b.sign;
+  st_mul.mantissa.hi = mul_hi(st_a.mantissa.lo, st_b.mantissa.lo);
+  st_mul.mantissa.lo = st_a.mantissa.lo * st_b.mantissa.lo;
+  uint upper_14bits = (st_mul.mantissa.lo >> 18) & 0x3fff;
+  st_mul.mantissa.lo <<= 14;
+  st_mul.mantissa.hi <<= 14;
+  st_mul.mantissa.hi |= upper_14bits;
+  st_mul.exponent = (st_mul.mantissa.lo != 0 || st_mul.mantissa.hi != 0)
+                        ? st_a.exponent + st_b.exponent
+                        : 0;
+
+// Mantissa is 23 fractional bits, shift it the same way as product mantissa
+#define C_ADJUST 37ul
+
+  // both exponents are bias adjusted
+  int exp_diff = st_mul.exponent - st_c.exponent;
+
+  uint abs_exp_diff = abs(exp_diff);
+  st_c.mantissa.hi = (st_c.mantissa.lo << 5);
+  st_c.mantissa.lo = 0;
+  uint2 cutoff_bits = (uint2)(0, 0);
+  uint2 cutoff_mask = (uint2)(0, 0);
+  if (abs_exp_diff < 32) {
+    cutoff_mask.lo = (1u << abs(exp_diff)) - 1u;
+  } else if (abs_exp_diff < 64) {
+    cutoff_mask.lo = 0xffffffff;
+    uint remaining = abs_exp_diff - 32;
+    cutoff_mask.hi = (1u << remaining) - 1u;
+  } else {
+    cutoff_mask = (uint2)(0, 0);
+  }
+  uint2 tmp = (exp_diff > 0) ? st_c.mantissa : st_mul.mantissa;
+  if (abs_exp_diff > 0) {
+    cutoff_bits = abs_exp_diff >= 64 ? tmp : (tmp & cutoff_mask);
+    if (abs_exp_diff < 32) {
+      // shift some of the hi bits into the shifted lo bits.
+      uint shift_mask = (1u << abs_exp_diff) - 1;
+      uint upper_saved_bits = tmp.hi & shift_mask;
+      upper_saved_bits = upper_saved_bits << (32 - abs_exp_diff);
+      tmp.hi >>= abs_exp_diff;
+      tmp.lo >>= abs_exp_diff;
+      tmp.lo |= upper_saved_bits;
+    } else if (abs_exp_diff < 64) {
+      tmp.lo = (tmp.hi >> (abs_exp_diff - 32));
+      tmp.hi = 0;
+    } else {
+      tmp = (uint2)(0, 0);
+    }
+  }
+  if (exp_diff > 0)
+    st_c.mantissa = tmp;
+  else
+    st_mul.mantissa = tmp;
+
+  struct fp st_fma;
+  st_fma.sign = st_mul.sign;
+  st_fma.exponent = max(st_mul.exponent, st_c.exponent);
+  st_fma.mantissa = (uint2)(0, 0);
+  if (st_c.sign == st_mul.sign) {
+    uint carry = (hadd(st_mul.mantissa.lo, st_c.mantissa.lo) >> 31) & 0x1;
+    st_fma.mantissa = st_mul.mantissa + st_c.mantissa;
+    st_fma.mantissa.hi += carry;
+  } else {
+    // cutoff bits borrow one
+    uint cutoff_borrow = ((cutoff_bits.lo != 0 || cutoff_bits.hi != 0) &&
+                          (st_mul.exponent > st_c.exponent))
+                             ? 1
+                             : 0;
+    uint borrow = 0;
+    if (st_c.mantissa.lo > st_mul.mantissa.lo) {
+      borrow = 1;
+    } else if (st_c.mantissa.lo == UINT_MAX && cutoff_borrow == 1) {
+      borrow = 1;
+    } else if ((st_c.mantissa.lo + cutoff_borrow) > st_mul.mantissa.lo) {
+      borrow = 1;
+    }
+
+    st_fma.mantissa.lo = st_mul.mantissa.lo - st_c.mantissa.lo - cutoff_borrow;
+    st_fma.mantissa.hi = st_mul.mantissa.hi - st_c.mantissa.hi - borrow;
+  }
+
+  // underflow: st_c.sign != st_mul.sign, and magnitude switches the sign
+  if (st_fma.mantissa.hi > INT_MAX) {
+    st_fma.mantissa = ~st_fma.mantissa;
+    uint carry = (hadd(st_fma.mantissa.lo, 1u) >> 31) & 0x1;
+    st_fma.mantissa.lo += 1;
+    st_fma.mantissa.hi += carry;
+
+    st_fma.sign = st_mul.sign ^ 0x80000000;
+  }
+
+  // detect overflow/underflow
+  uint leading_zeroes = clz(st_fma.mantissa.hi);
+  if (leading_zeroes == 32) {
+    leading_zeroes += clz(st_fma.mantissa.lo);
+  }
+  int overflow_bits = 3 - leading_zeroes;
+
+  // adjust exponent
+  st_fma.exponent += overflow_bits;
+
+  // handle underflow
+  if (overflow_bits < 0) {
+    uint shift = -overflow_bits;
+    if (shift < 32) {
+      uint shift_mask = (1u << shift) - 1;
+      uint saved_lo_bits = (st_fma.mantissa.lo >> (32 - shift)) & shift_mask;
+      st_fma.mantissa.lo <<= shift;
+      st_fma.mantissa.hi <<= shift;
+      st_fma.mantissa.hi |= saved_lo_bits;
+    } else if (shift < 64) {
+      st_fma.mantissa.hi = (st_fma.mantissa.lo << (64 - shift));
+      st_fma.mantissa.lo = 0;
+    } else {
+      st_fma.mantissa = (uint2)(0, 0);
+    }
+
+    overflow_bits = 0;
+  }
+
+  // rounding
+  // overflow_bits is now in the range of [0, 3] making the shift greater than
+  // 32 bits.
+  uint2 trunc_mask;
+  uint trunc_shift = C_ADJUST + overflow_bits - 32;
+  trunc_mask.hi = (1u << trunc_shift) - 1;
+  trunc_mask.lo = UINT_MAX;
+  uint2 trunc_bits = st_fma.mantissa & trunc_mask;
+  trunc_bits.lo |= (cutoff_bits.hi != 0 || cutoff_bits.lo != 0) ? 1 : 0;
+  uint2 last_bit;
+  last_bit.lo = 0;
+  last_bit.hi = st_fma.mantissa.hi & (1u << trunc_shift);
+  uint grs_shift = C_ADJUST - 3 + overflow_bits - 32;
+  uint2 grs_bits;
+  grs_bits.lo = 0;
+  grs_bits.hi = 0x4u << grs_shift;
+
+  // round to nearest even
+  if ((trunc_bits.hi > grs_bits.hi ||
+       (trunc_bits.hi == grs_bits.hi && trunc_bits.lo > grs_bits.lo)) ||
+      (trunc_bits.hi == grs_bits.hi && trunc_bits.lo == grs_bits.lo &&
+       last_bit.hi != 0)) {
+    uint shift = C_ADJUST + overflow_bits - 32;
+    st_fma.mantissa.hi += 1u << shift;
+  }
+
+        // Shift mantissa back to bit 23
+  st_fma.mantissa.lo = (st_fma.mantissa.hi >> (C_ADJUST + overflow_bits - 32));
+  st_fma.mantissa.hi = 0;
+
+  // Detect rounding overflow
+  if (st_fma.mantissa.lo > 0xffffff) {
+    ++st_fma.exponent;
+    st_fma.mantissa.lo >>= 1;
+  }
+
+  if (st_fma.mantissa.lo == 0) {
+    return 0.0f;
+  }
+
+  // Flating point range limit
+  if (st_fma.exponent > 127) {
+    return as_float(as_uint(INFINITY) | st_fma.sign);
+  }
+
+  // Flush denormals
+  if (st_fma.exponent <= -127) {
+    return as_float(st_fma.sign);
+  }
+
+  return as_float(st_fma.sign | ((st_fma.exponent + 127) << 23) |
+                  ((uint)st_fma.mantissa.lo & 0x7fffff));
+}
+_CLC_TERNARY_VECTORIZE(_CLC_DEF _CLC_OVERLOAD, float, fma, float, float, float)
diff --git a/libclc/clspv64/lib/math/nextafter.cl b/libclc/clspv64/lib/math/nextafter.cl
new file mode 100644
index 00000000000000..f05e7482136203
--- /dev/null
+++ b/libclc/clspv64/lib/math/nextafter.cl
@@ -0,0 +1,5 @@
+#include <clc/clc.h>
+#include <math/clc_nextafter.h>
+
+#define __CLC_BODY <nextafter.inc>
+#include <clc/math/gentype.inc>
diff --git a/libclc/clspv64/lib/math/nextafter.inc b/libclc/clspv64/lib/math/nextafter.inc
new file mode 100644
index 00000000000000..ee39be53b1e17c
--- /dev/null
+++ b/libclc/clspv64/lib/math/nextafter.inc
@@ -0,0 +1,3 @@
+_CLC_DEF _CLC_OVERLOAD __CLC_GENTYPE nextafter(__CLC_GENTYPE x, __CLC_GENTYPE y) {
+  return __clc_nextafter(x, y);
+}
diff --git a/libclc/clspv64/lib/subnormal_config.cl b/libclc/clspv64/lib/subnormal_config.cl
new file mode 100644
index 00000000000000..167fe1b1a1bcfd
--- /dev/null
+++ b/libclc/clspv64/lib/subnormal_config.cl
@@ -0,0 +1,31 @@
+/*
+ * Copyright (c) 2015 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include <clc/clc.h>
+
+#include "config.h"
+
+_CLC_DEF bool __clc_fp16_subnormals_supported() { return false; }
+
+_CLC_DEF bool __clc_fp32_subnormals_supported() { return false; }
+
+_CLC_DEF bool __clc_fp64_subnormals_supported() { return false; }
diff --git a/libcxx/test/std/pstl b/libcxx/test/std/pstl
deleted file mode 120000
index 27a2822d956696..00000000000000
--- a/libcxx/test/std/pstl
+++ /dev/null
@@ -1 +0,0 @@
-../../../pstl/test/std
\ No newline at end of file
diff --git a/libcxx/test/std/pstl/algorithms/alg.merge/inplace_merge.pass.cpp b/libcxx/test/std/pstl/algorithms/alg.merge/inplace_merge.pass.cpp
new file mode 100644
index 00000000000000..3446d955093add
--- /dev/null
+++ b/libcxx/test/std/pstl/algorithms/alg.merge/inplace_merge.pass.cpp
@@ -0,0 +1,159 @@
+// -*- C++ -*-
+//===-- inplace_merge.pass.cpp --------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// UNSUPPORTED: c++03, c++11, c++14
+
+#include "support/pstl_test_config.h"
+
+#include <execution>
+#include <algorithm>
+
+#include "support/utils.h"
+
+using namespace TestUtils;
+
+struct test_one_policy
+{
+#if defined(_PSTL_ICC_17_VC141_TEST_SIMD_LAMBDA_DEBUG_32_BROKEN) ||                                                             \
+    defined(_PSTL_ICC_16_VC14_TEST_SIMD_LAMBDA_DEBUG_32_BROKEN) // dummy specialization by policy type, in case of broken configuration
+    template <typename BiDirIt1, typename Size, typename Generator1, typename Generator2, typename Compare>
+    void
+    operator()(pstl::execution::unsequenced_policy, BiDirIt1 first1, BiDirIt1 last1, BiDirIt1 first2, BiDirIt1 last2,
+               Size n, Size m, Generator1 generator1, Generator2 generator2, Compare comp)
+    {
+    }
+
+    template <typename BiDirIt1, typename Size, typename Generator1, typename Generator2, typename Compare>
+    void
+    operator()(pstl::execution::parallel_unsequenced_policy, BiDirIt1 first1, BiDirIt1 last1, BiDirIt1 first2,
+               BiDirIt1 last2, Size n, Size m, Generator1 generator1, Generator2 generator2, Compare comp)
+    {
+    }
+#endif
+
+    // inplace_merge works with bidirectional iterators at least
+    template <typename Policy, typename BiDirIt1, typename Size, typename Generator1, typename Generator2,
+              typename Compare>
+    typename std::enable_if<!is_same_iterator_category<BiDirIt1, std::forward_iterator_tag>::value, void>::type
+    operator()(Policy&& exec, BiDirIt1 first1, BiDirIt1 last1, BiDirIt1 first2, BiDirIt1 last2, Size n, Size m,
+               Generator1 generator1, Generator2 generator2, Compare comp)
+    {
+        const BiDirIt1 mid1 = std::next(first1, m);
+        fill_data(first1, mid1, generator1);
+        fill_data(mid1, last1, generator2);
+
+        const BiDirIt1 mid2 = std::next(first2, m);
+        fill_data(first2, mid2, generator1);
+        fill_data(mid2, last2, generator2);
+
+        std::inplace_merge(first1, mid1, last1, comp);
+        std::inplace_merge(exec, first2, mid2, last2, comp);
+        EXPECT_EQ_N(first1, first2, n, "wrong effect from inplace_merge with predicate");
+    }
+
+    template <typename Policy, typename BiDirIt1, typename Size, typename Generator1, typename Generator2,
+              typename Compare>
+    typename std::enable_if<is_same_iterator_category<BiDirIt1, std::forward_iterator_tag>::value, void>::type
+    operator()(Policy&&, BiDirIt1, BiDirIt1, BiDirIt1, BiDirIt1, Size, Size, Generator1, Generator2, Compare)
+    {
+    }
+};
+
+template <typename T, typename Generator1, typename Generator2, typename Compare>
+void
+test_by_type(Generator1 generator1, Generator2 generator2, Compare comp)
+{
+    using namespace std;
+    size_t max_size = 100000;
+    Sequence<T> in1(max_size, [](size_t v) { return T(v); });
+    Sequence<T> exp(max_size, [](size_t v) { return T(v); });
+    size_t m;
+
+    for (size_t n = 0; n <= max_size; n = n <= 16 ? n + 1 : size_t(3.1415 * n))
+    {
+        m = 0;
+        invoke_on_all_policies(test_one_policy(), in1.begin(), in1.begin() + n, exp.begin(), exp.begin() + n, n, m,
+                               generator1, generator2, comp);
+
+        m = n / 3;
+        invoke_on_all_policies(test_one_policy(), in1.begin(), in1.begin() + n, exp.begin(), exp.begin() + n, n, m,
+                               generator1, generator2, comp);
+
+        m = 2 * n / 3;
+        invoke_on_all_policies(test_one_policy(), in1.begin(), in1.begin() + n, exp.begin(), exp.begin() + n, n, m,
+                               generator1, generator2, comp);
+    }
+}
+
+template <typename T>
+struct LocalWrapper
+{
+    explicit LocalWrapper(int32_t k) : my_val(k) {}
+    LocalWrapper(LocalWrapper&& input) { my_val = std::move(input.my_val); }
+    LocalWrapper&
+    operator=(LocalWrapper&& input)
+    {
+        my_val = std::move(input.my_val);
+        return *this;
+    }
+    bool
+    operator<(const LocalWrapper<T>& w) const
+    {
+        return my_val < w.my_val;
+    }
+    friend bool
+    operator==(const LocalWrapper<T>& x, const LocalWrapper<T>& y)
+    {
+        return x.my_val == y.my_val;
+    }
+    friend std::ostream&
+    operator<<(std::ostream& stream, const LocalWrapper<T>& input)
+    {
+        return stream << input.my_val;
+    }
+
+  private:
+    T my_val;
+};
+
+template <typename T>
+struct test_non_const
+{
+    template <typename Policy, typename Iterator>
+    void
+    operator()(Policy&& exec, Iterator iter)
+    {
+        invoke_if(exec, [&]() { inplace_merge(exec, iter, iter, iter, non_const(std::less<T>())); });
+    }
+};
+
+int
+main()
+{
+    test_by_type<float64_t>([](int32_t i) { return -2 * i; }, [](int32_t i) { return -(2 * i + 1); },
+                            [](const float64_t x, const float64_t y) { return x > y; });
+
+    test_by_type<int32_t>([](int32_t i) { return 10 * i; }, [](int32_t i) { return i + 1; }, std::less<int32_t>());
+
+    test_by_type<LocalWrapper<float32_t>>([](int32_t i) { return LocalWrapper<float32_t>(2 * i + 1); },
+                                          [](int32_t i) { return LocalWrapper<float32_t>(2 * i); },
+                                          std::less<LocalWrapper<float32_t>>());
+
+    test_algo_basic_single<int32_t>(run_for_rnd_bi<test_non_const<int32_t>>());
+
+    test_by_type<MemoryChecker>(
+        [](std::size_t idx){ return MemoryChecker{std::int32_t(idx * 2)}; },
+        [](std::size_t idx){ return MemoryChecker{std::int32_t(idx * 2 + 1)}; },
+        [](const MemoryChecker& val1, const MemoryChecker& val2){ return val1.value() == val2.value(); });
+    EXPECT_FALSE(MemoryChecker::alive_objects() < 0, "wrong effect from inplace_merge: number of ctors calls < num of dtors calls");
+    EXPECT_FALSE(MemoryChecker::alive_objects() > 0, "wrong effect from inplace_merge: number of ctors calls > num of dtors calls");
+
+    std::cout << done() << std::endl;
+    return 0;
+}
diff --git a/libcxx/test/std/pstl/algorithms/alg.merge/merge.pass.cpp b/libcxx/test/std/pstl/algorithms/alg.merge/merge.pass.cpp
new file mode 100644
index 00000000000000..a09ef4223bf774
--- /dev/null
+++ b/libcxx/test/std/pstl/algorithms/alg.merge/merge.pass.cpp
@@ -0,0 +1,113 @@
+// -*- C++ -*-
+//===-- merge.pass.cpp ----------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// UNSUPPORTED: c++03, c++11, c++14
+
+#include "support/pstl_test_config.h"
+
+#include <execution>
+#include <algorithm>
+#include <functional>
+
+#include "support/utils.h"
+
+using namespace TestUtils;
+
+struct test_merge
+{
+    template <typename Policy, typename InputIterator1, typename InputIterator2, typename OutputIterator,
+              typename Compare>
+    void
+    operator()(Policy&& exec, InputIterator1 first1, InputIterator1 last1, InputIterator2 first2, InputIterator2 last2,
+               OutputIterator out_first, OutputIterator out_last, Compare comp)
+    {
+        using namespace std;
+        {
+            const auto res = merge(exec, first1, last1, first2, last2, out_first, comp);
+            EXPECT_TRUE(res == out_last, "wrong return result from merge with predicate");
+            EXPECT_TRUE(is_sorted(out_first, res, comp), "wrong result from merge with predicate");
+            EXPECT_TRUE(includes(out_first, res, first1, last1, comp), "first sequence is not a part of result");
+            EXPECT_TRUE(includes(out_first, res, first2, last2, comp), "second sequence is not a part of result");
+        }
+        {
+            const auto res = merge(exec, first1, last1, first2, last2, out_first);
+            EXPECT_TRUE(res == out_last, "wrong return result from merge");
+            EXPECT_TRUE(is_sorted(out_first, res), "wrong result from merge");
+        }
+    }
+
+    // for reverse iterators
+    template <typename Policy, typename InputIterator1, typename InputIterator2, typename OutputIterator,
+              typename Compare>
+    void
+    operator()(Policy&& exec, std::reverse_iterator<InputIterator1> first1, std::reverse_iterator<InputIterator1> last1,
+               std::reverse_iterator<InputIterator2> first2, std::reverse_iterator<InputIterator2> last2,
+               std::reverse_iterator<OutputIterator> out_first, std::reverse_iterator<OutputIterator> out_last, Compare)
+    {
+        using namespace std;
+        typedef typename std::iterator_traits<std::reverse_iterator<InputIterator1>>::value_type T;
+        const auto res = merge(exec, first1, last1, first2, last2, out_first, std::greater<T>());
+
+        EXPECT_TRUE(res == out_last, "wrong return result from merge with predicate");
+        EXPECT_TRUE(is_sorted(out_first, res, std::greater<T>()), "wrong result from merge with predicate");
+        EXPECT_TRUE(includes(out_first, res, first1, last1, std::greater<T>()),
+                    "first sequence is not a part of result");
+        EXPECT_TRUE(includes(out_first, res, first2, last2, std::greater<T>()),
+                    "second sequence is not a part of result");
+    }
+};
+
+template <typename T, typename Generator1, typename Generator2>
+void
+test_merge_by_type(Generator1 generator1, Generator2 generator2)
+{
+    using namespace std;
+    size_t max_size = 100000;
+    Sequence<T> in1(max_size, generator1);
+    Sequence<T> in2(max_size / 2, generator2);
+    Sequence<T> out(in1.size() + in2.size());
+    std::sort(in1.begin(), in1.end());
+    std::sort(in2.begin(), in2.end());
+
+    for (size_t size = 0; size <= max_size; size = size <= 16 ? size + 1 : size_t(3.1415 * size))
+    {
+        invoke_on_all_policies(test_merge(), in1.cbegin(), in1.cbegin() + size, in2.data(), in2.data() + size / 2,
+                               out.begin(), out.begin() + 1.5 * size, std::less<T>());
+        invoke_on_all_policies(test_merge(), in1.data(), in1.data() + size, in2.cbegin(), in2.cbegin() + size / 2,
+                               out.begin(), out.begin() + 3 * size / 2, std::less<T>());
+    }
+}
+
+template <typename T>
+struct test_non_const
+{
+    template <typename Policy, typename InputIterator, typename OutputIterator>
+    void
+    operator()(Policy&& exec, InputIterator input_iter, OutputIterator out_iter)
+    {
+        merge(exec, input_iter, input_iter, input_iter, input_iter, out_iter, non_const(std::less<T>()));
+    }
+};
+
+int
+main()
+{
+    test_merge_by_type<int32_t>([](size_t v) { return (v % 2 == 0 ? v : -v) * 3; }, [](size_t v) { return v * 2; });
+    test_merge_by_type<float64_t>([](size_t v) { return float64_t(v); }, [](size_t v) { return float64_t(v - 100); });
+
+#if !defined(_PSTL_ICC_16_17_TEST_64_TIMEOUT)
+    test_merge_by_type<Wrapper<int16_t>>([](size_t v) { return Wrapper<int16_t>(v % 100); },
+                                         [](size_t v) { return Wrapper<int16_t>(v % 10); });
+#endif
+
+    test_algo_basic_double<int32_t>(run_for_rnd_fw<test_non_const<int32_t>>());
+
+    std::cout << done() << std::endl;
+    return 0;
+}
diff --git a/libcxx/test/std/pstl/algorithms/alg.modifying.operations/alg.copy/copy_if.pass.cpp b/libcxx/test/std/pstl/algorithms/alg.modifying.operations/alg.copy/copy_if.pass.cpp
new file mode 100644
index 00000000000000..692907e0f92e25
--- /dev/null
+++ b/libcxx/test/std/pstl/algorithms/alg.modifying.operations/alg.copy/copy_if.pass.cpp
@@ -0,0 +1,147 @@
+// -*- C++ -*-
+//===-- copy_if.pass.cpp --------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// UNSUPPORTED: c++03, c++11, c++14
+
+// Tests for copy_if and remove_copy_if
+#include "support/pstl_test_config.h"
+
+#include <execution>
+#include <algorithm>
+
+#include "support/utils.h"
+
+using namespace TestUtils;
+
+struct run_copy_if
+{
+#if defined(_PSTL_ICC_16_VC14_TEST_PAR_TBB_RT_RELEASE_64_BROKEN) // dummy specializations to skip testing in case of broken configuration
+    template <typename InputIterator, typename OutputIterator, typename OutputIterator2, typename Size,
+              typename Predicate, typename T>
+    void
+    operator()(pstl::execution::parallel_policy, InputIterator first, InputIterator last, OutputIterator out_first,
+               OutputIterator out_last, OutputIterator2 expected_first, OutputIterator2 expected_last, Size n,
+               Predicate pred, T trash)
+    {
+    }
+    template <typename InputIterator, typename OutputIterator, typename OutputIterator2, typename Size,
+              typename Predicate, typename T>
+    void
+    operator()(pstl::execution::parallel_unsequenced_policy, InputIterator first, InputIterator last,
+               OutputIterator out_first, OutputIterator out_last, OutputIterator2 expected_first,
+               OutputIterator2 expected_last, Size n, Predicate pred, T trash)
+    {
+    }
+#endif
+
+    template <typename Policy, typename InputIterator, typename OutputIterator, typename OutputIterator2, typename Size,
+              typename Predicate, typename T>
+    void
+    operator()(Policy&& exec, InputIterator first, InputIterator last, OutputIterator out_first,
+               OutputIterator out_last, OutputIterator2 expected_first, OutputIterator2, Size n, Predicate pred,
+               T trash)
+    {
+        // Cleaning
+        std::fill_n(expected_first, n, trash);
+        std::fill_n(out_first, n, trash);
+
+        // Run copy_if
+        auto i = copy_if(first, last, expected_first, pred);
+        auto k = copy_if(exec, first, last, out_first, pred);
+        EXPECT_EQ_N(expected_first, out_first, n, "wrong copy_if effect");
+        for (size_t j = 0; j < GuardSize; ++j)
+        {
+            ++k;
+        }
+        EXPECT_TRUE(out_last == k, "wrong return value from copy_if");
+
+        // Cleaning
+        std::fill_n(expected_first, n, trash);
+        std::fill_n(out_first, n, trash);
+        // Run remove_copy_if
+        i = remove_copy_if(first, last, expected_first, [=](const T& x) { return !pred(x); });
+        k = remove_copy_if(exec, first, last, out_first, [=](const T& x) { return !pred(x); });
+        EXPECT_EQ_N(expected_first, out_first, n, "wrong remove_copy_if effect");
+        for (size_t j = 0; j < GuardSize; ++j)
+        {
+            ++k;
+        }
+        EXPECT_TRUE(out_last == k, "wrong return value from remove_copy_if");
+    }
+};
+
+template <typename T, typename Predicate, typename Convert>
+void
+test(T trash, Predicate pred, Convert convert, bool check_weakness = true)
+{
+    // Try sequences of various lengths.
+    for (size_t n = 0; n <= 100000; n = n <= 16 ? n + 1 : size_t(3.1415 * n))
+    {
+        // count is number of output elements, plus a handful
+        // more for sake of detecting buffer overruns.
+        size_t count = GuardSize;
+        Sequence<T> in(n, [&](size_t k) -> T {
+            T val = convert(n ^ k);
+            count += pred(val) ? 1 : 0;
+            return val;
+        });
+
+        Sequence<T> out(count, [=](size_t) { return trash; });
+        Sequence<T> expected(count, [=](size_t) { return trash; });
+        if (check_weakness)
+        {
+            auto expected_result = copy_if(in.cfbegin(), in.cfend(), expected.begin(), pred);
+            size_t m = expected_result - expected.begin();
+            EXPECT_TRUE(n / 4 <= m && m <= 3 * (n + 1) / 4, "weak test for copy_if");
+        }
+        invoke_on_all_policies(run_copy_if(), in.begin(), in.end(), out.begin(), out.end(), expected.begin(),
+                               expected.end(), count, pred, trash);
+        invoke_on_all_policies(run_copy_if(), in.cbegin(), in.cend(), out.begin(), out.end(), expected.begin(),
+                               expected.end(), count, pred, trash);
+    }
+}
+
+struct test_non_const
+{
+    template <typename Policy, typename InputIterator, typename OutputInterator>
+    void
+    operator()(Policy&& exec, InputIterator input_iter, OutputInterator out_iter)
+    {
+        auto is_even = [&](float64_t v) {
+            uint32_t i = (uint32_t)v;
+            return i % 2 == 0;
+        };
+        copy_if(exec, input_iter, input_iter, out_iter, non_const(is_even));
+
+        invoke_if(exec, [&]() { remove_copy_if(exec, input_iter, input_iter, out_iter, non_const(is_even)); });
+    }
+};
+
+int
+main()
+{
+    test<float64_t>(-666.0, [](const float64_t& x) { return x * x <= 1024; },
+                    [](size_t j) { return ((j + 1) % 7 & 2) != 0 ? float64_t(j % 32) : float64_t(j % 33 + 34); });
+
+    test<int32_t>(-666, [](const int32_t& x) { return x != 42; },
+                  [](size_t j) { return ((j + 1) % 5 & 2) != 0 ? int32_t(j + 1) : 42; });
+
+#if !defined(_PSTL_ICC_17_TEST_MAC_RELEASE_32_BROKEN)
+    test<Number>(Number(42, OddTag()), IsMultiple(3, OddTag()), [](int32_t j) { return Number(j, OddTag()); });
+#endif
+
+#if !defined(_PSTL_ICC_16_17_TEST_REDUCTION_RELEASE_BROKEN)
+    test<int32_t>(-666, [](const int32_t&) { return true; }, [](size_t j) { return j; }, false);
+#endif
+
+    test_algo_basic_double<int32_t>(run_for_rnd_fw<test_non_const>());
+
+    std::cout << done() << std::endl;
+    return 0;
+}
diff --git a/libcxx/test/std/pstl/algorithms/alg.modifying.operations/alg.partitions/is_partitioned.pass.cpp b/libcxx/test/std/pstl/algorithms/alg.modifying.operations/alg.partitions/is_partitioned.pass.cpp
new file mode 100644
index 00000000000000..bd6b259c4d0afd
--- /dev/null
+++ b/libcxx/test/std/pstl/algorithms/alg.modifying.operations/alg.partitions/is_partitioned.pass.cpp
@@ -0,0 +1,101 @@
+// -*- C++ -*-
+//===-- is_partitioned.pass.cpp -------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// UNSUPPORTED: c++03, c++11, c++14
+
+#include "support/pstl_test_config.h"
+
+#include <execution>
+#include <algorithm>
+
+#include "support/utils.h"
+
+using namespace TestUtils;
+
+struct test_one_policy
+{
+    //dummy specialization by policy type, in case of broken configuration
+#if defined(_PSTL_ICC_17_VC141_TEST_SIMD_LAMBDA_DEBUG_32_BROKEN) || defined(_PSTL_ICC_16_VC14_TEST_SIMD_LAMBDA_DEBUG_32_BROKEN)
+
+    template <typename Iterator1, typename Predicate>
+    void
+    operator()(pstl::execution::unsequenced_policy, Iterator1 begin1, Iterator1 end1, Predicate pred)
+    {
+    }
+    template <typename Iterator1, typename Predicate>
+    void
+    operator()(pstl::execution::parallel_unsequenced_policy, Iterator1 begin1, Iterator1 end1, Predicate pred)
+    {
+    }
+#endif
+
+    template <typename ExecutionPolicy, typename Iterator1, typename Predicate>
+    void
+    operator()(ExecutionPolicy&& exec, Iterator1 begin1, Iterator1 end1, Predicate pred)
+    {
+        const bool expected = std::is_partitioned(begin1, end1, pred);
+        const bool actual = std::is_partitioned(exec, begin1, end1, pred);
+        EXPECT_TRUE(actual == expected, "wrong return result from is_partitioned");
+    }
+};
+
+template <typename T, typename Predicate>
+void
+test(Predicate pred)
+{
+
+    const std::size_t max_n = 1000000;
+    Sequence<T> in(max_n, [](std::size_t k) { return T(k); });
+
+    for (std::size_t n1 = 0; n1 <= max_n; n1 = n1 <= 16 ? n1 + 1 : std::size_t(3.1415 * n1))
+    {
+        invoke_on_all_policies(test_one_policy(), in.begin(), in.begin() + n1, pred);
+        std::partition(in.begin(), in.begin() + n1, pred);
+        invoke_on_all_policies(test_one_policy(), in.cbegin(), in.cbegin() + n1, pred);
+    }
+}
+
+template <typename T>
+struct LocalWrapper
+{
+    explicit LocalWrapper(std::size_t k) : my_val(k) {}
+
+  private:
+    T my_val;
+};
+
+struct test_non_const
+{
+    template <typename Policy, typename Iterator>
+    void
+    operator()(Policy&& exec, Iterator iter)
+    {
+        auto is_even = [&](float64_t v) {
+            uint32_t i = (uint32_t)v;
+            return i % 2 == 0;
+        };
+        invoke_if(exec, [&]() { is_partitioned(exec, iter, iter, non_const(is_even)); });
+    }
+};
+
+int
+main()
+{
+    test<float64_t>([](const float64_t x) { return x < 0; });
+    test<int32_t>([](const int32_t x) { return x > 1000; });
+    test<uint16_t>([](const uint16_t x) { return x % 5 < 3; });
+#if !defined(_PSTL_ICC_18_TEST_EARLY_EXIT_MONOTONIC_RELEASE_BROKEN) && !defined(_PSTL_ICC_19_TEST_IS_PARTITIONED_RELEASE_BROKEN)
+    test<LocalWrapper<float64_t>>([](const LocalWrapper<float64_t>&) { return true; });
+#endif
+
+    test_algo_basic_single<int32_t>(run_for_rnd_fw<test_non_const>());
+
+    std::cout << done() << std::endl;
+    return 0;
+}
diff --git a/libcxx/test/std/pstl/algorithms/alg.modifying.operations/alg.partitions/partition.pass.cpp b/libcxx/test/std/pstl/algorithms/alg.modifying.operations/alg.partitions/partition.pass.cpp
new file mode 100644
index 00000000000000..724f0ba6d3f4d9
--- /dev/null
+++ b/libcxx/test/std/pstl/algorithms/alg.modifying.operations/alg.partitions/partition.pass.cpp
@@ -0,0 +1,178 @@
+// -*- C++ -*-
+//===-- partition.pass.cpp ------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// UNSUPPORTED: c++03, c++11, c++14
+
+// Tests for stable_partition and partition
+#include "support/pstl_test_config.h"
+
+#include <execution>
+#include <algorithm>
+#include <iterator>
+#include <type_traits>
+
+#include "support/utils.h"
+
+using namespace TestUtils;
+
+template <typename T>
+struct DataType
+{
+    explicit DataType(int32_t k) : my_val(k) {}
+    DataType(DataType&& input) { my_val = std::move(input.my_val); }
+    DataType&
+    operator=(DataType&& input)
+    {
+        my_val = std::move(input.my_val);
+        return *this;
+    }
+    T
+    get_val() const
+    {
+        return my_val;
+    }
+
+    friend std::ostream&
+    operator<<(std::ostream& stream, const DataType<T>& input)
+    {
+        return stream << input.my_val;
+    }
+
+  private:
+    T my_val;
+};
+
+template <typename Iterator>
+typename std::enable_if<std::is_trivial<typename std::iterator_traits<Iterator>::value_type>::value, bool>::type
+is_equal(Iterator first, Iterator last, Iterator d_first)
+{
+    return std::equal(first, last, d_first);
+}
+
+template <typename Iterator>
+typename std::enable_if<!std::is_trivial<typename std::iterator_traits<Iterator>::value_type>::value, bool>::type
+    is_equal(Iterator, Iterator, Iterator)
+{
+    return true;
+}
+
+struct test_one_policy
+{
+#if defined(_PSTL_ICC_17_VC141_TEST_SIMD_LAMBDA_DEBUG_32_BROKEN) ||                                                             \
+    defined(_PSTL_ICC_16_VC14_TEST_SIMD_LAMBDA_DEBUG_32_BROKEN) //dummy specializations to skip testing in case of broken configuration
+    template <typename BiDirIt, typename Size, typename UnaryOp, typename Generator>
+    void
+    operator()(pstl::execution::unsequenced_policy, BiDirIt first, BiDirIt last, BiDirIt exp_first, BiDirIt exp_last,
+               Size n, UnaryOp unary_op, Generator generator)
+    {
+    }
+
+    template <typename BiDirIt, typename Size, typename UnaryOp, typename Generator>
+    void
+    operator()(pstl::execution::parallel_unsequenced_policy, BiDirIt first, BiDirIt last, BiDirIt exp_first,
+               BiDirIt exp_last, Size n, UnaryOp unary_op, Generator generator)
+    {
+    }
+#elif defined(_PSTL_ICC_16_VC14_TEST_PAR_TBB_RT_RELEASE_64_BROKEN) //dummy specializations to skip testing in case of broken configuration
+    template <typename BiDirIt, typename Size, typename UnaryOp, typename Generator>
+    void
+    operator()(pstl::execution::parallel_policy, BiDirIt first, BiDirIt last, BiDirIt exp_first, BiDirIt exp_last,
+               Size n, UnaryOp unary_op, Generator generator)
+    {
+    }
+
+    template <typename BiDirIt, typename Size, typename UnaryOp, typename Generator>
+    void
+    operator()(pstl::execution::parallel_unsequenced_policy, BiDirIt first, BiDirIt last, BiDirIt exp_first,
+               BiDirIt exp_last, Size n, UnaryOp unary_op, Generator generator)
+    {
+    }
+#endif
+
+    template <typename Policy, typename BiDirIt, typename Size, typename UnaryOp, typename Generator>
+    typename std::enable_if<!is_same_iterator_category<BiDirIt, std::forward_iterator_tag>::value, void>::type
+    operator()(Policy&& exec, BiDirIt first, BiDirIt last, BiDirIt exp_first, BiDirIt exp_last, Size, UnaryOp unary_op,
+               Generator generator)
+    {
+        // partition
+        {
+            fill_data(first, last, generator);
+            BiDirIt actual_ret = std::partition(exec, first, last, unary_op);
+            EXPECT_TRUE(std::all_of(first, actual_ret, unary_op) && !std::any_of(actual_ret, last, unary_op),
+                        "wrong effect from partition");
+        }
+        // stable_partition
+        {
+            fill_data(exp_first, exp_last, generator);
+            BiDirIt exp_ret = std::stable_partition(exp_first, exp_last, unary_op);
+            fill_data(first, last, generator);
+            BiDirIt actual_ret = std::stable_partition(exec, first, last, unary_op);
+
+            EXPECT_TRUE(std::distance(first, actual_ret) == std::distance(exp_first, exp_ret),
+                        "wrong result from stable_partition");
+            EXPECT_TRUE((is_equal<BiDirIt>(exp_first, exp_last, first)), "wrong effect from stable_partition");
+        }
+    }
+    template <typename Policy, typename BiDirIt, typename Size, typename UnaryOp, typename Generator>
+    typename std::enable_if<is_same_iterator_category<BiDirIt, std::forward_iterator_tag>::value, void>::type
+    operator()(Policy&&, BiDirIt, BiDirIt, BiDirIt, BiDirIt, Size, UnaryOp, Generator)
+    {
+    }
+};
+
+template <typename T, typename Generator, typename UnaryPred>
+void
+test_by_type(Generator generator, UnaryPred pred)
+{
+
+    using namespace std;
+    size_t max_size = 100000;
+    Sequence<T> in(max_size, [](size_t v) { return T(v); });
+    Sequence<T> exp(max_size, [](size_t v) { return T(v); });
+
+    for (size_t n = 0; n <= max_size; n = n <= 16 ? n + 1 : size_t(3.1415 * n))
+    {
+        invoke_on_all_policies(test_one_policy(), in.begin(), in.begin() + n, exp.begin(), exp.begin() + n, n, pred,
+                               generator);
+    }
+}
+
+struct test_non_const
+{
+    template <typename Policy, typename Iterator>
+    void
+    operator()(Policy&& exec, Iterator iter)
+    {
+        auto is_even = [&](float64_t v) {
+            uint32_t i = (uint32_t)v;
+            return i % 2 == 0;
+        };
+        invoke_if(exec, [&]() {
+            partition(exec, iter, iter, non_const(is_even));
+            stable_partition(exec, iter, iter, non_const(is_even));
+        });
+    }
+};
+
+int
+main()
+{
+#if !defined(_PSTL_ICC_16_17_TEST_REDUCTION_RELEASE_BROKEN)
+    test_by_type<int32_t>([](int32_t i) { return i; }, [](int32_t) { return true; });
+#endif
+    test_by_type<float64_t>([](int32_t i) { return -i; }, [](const float64_t x) { return x < 0; });
+    test_by_type<int64_t>([](int32_t i) { return i + 1; }, [](int64_t x) { return x % 3 == 0; });
+    test_by_type<DataType<float32_t>>([](int32_t i) { return DataType<float32_t>(2 * i + 1); },
+                                      [](const DataType<float32_t>& x) { return x.get_val() < 0; });
+
+    test_algo_basic_single<int32_t>(run_for_rnd_bi<test_non_const>());
+
+    std::cout << done() << std::endl;
+    return 0;
+}
diff --git a/libcxx/test/std/pstl/algorithms/alg.modifying.operations/alg.partitions/partition_copy.pass.cpp b/libcxx/test/std/pstl/algorithms/alg.modifying.operations/alg.partitions/partition_copy.pass.cpp
new file mode 100644
index 00000000000000..c621f8352f1dc7
--- /dev/null
+++ b/libcxx/test/std/pstl/algorithms/alg.modifying.operations/alg.partitions/partition_copy.pass.cpp
@@ -0,0 +1,116 @@
+// -*- C++ -*-
+//===-- partition_copy.pass.cpp -------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// UNSUPPORTED: c++03, c++11, c++14
+
+// Tests for stable_partition and partition_copy
+#include "support/pstl_test_config.h"
+
+#include <algorithm>
+#include <cstdlib>
+#include <execution>
+#include <functional>
+#include <iterator>
+
+#include "support/utils.h"
+
+using namespace TestUtils;
+
+struct test_partition_copy
+{
+    template <typename Policy, typename InputIterator, typename OutputIterator, typename OutputIterator2,
+              typename UnaryOp>
+    void
+    operator()(Policy&& exec, InputIterator first, InputIterator last, OutputIterator true_first, OutputIterator,
+               OutputIterator2 false_first, OutputIterator2, UnaryOp unary_op)
+    {
+
+        auto actual_ret = std::partition_copy(exec, first, last, true_first, false_first, unary_op);
+
+        EXPECT_TRUE(std::distance(true_first, actual_ret.first) == std::count_if(first, last, unary_op),
+                    "partition_copy has wrong effect from true sequence");
+        EXPECT_TRUE(std::distance(false_first, actual_ret.second) == std::count_if(first, last, std::not_fn(unary_op)),
+                    "partition_copy has wrong effect from false sequence");
+    }
+
+    //dummy specialization by iterator type and policy type, in case of broken configuration
+#if defined(_PSTL_ICC_1800_TEST_MONOTONIC_RELEASE_64_BROKEN)
+    template <typename InputIterator, typename OutputIterator, typename OutputIterator2, typename UnaryOp>
+    void
+    operator()(pstl::execution::unsequenced_policy, std::reverse_iterator<InputIterator> first,
+               std::reverse_iterator<InputIterator> last, std::reverse_iterator<OutputIterator> true_first,
+               std::reverse_iterator<OutputIterator> true_last, std::reverse_iterator<OutputIterator2> false_first,
+               OutputIterator2 false_last, UnaryOp unary_op)
+    {
+    }
+    template <typename InputIterator, typename OutputIterator, typename OutputIterator2, typename UnaryOp>
+    void
+    operator()(pstl::execution::parallel_unsequenced_policy, std::reverse_iterator<InputIterator> first,
+               std::reverse_iterator<InputIterator> last, std::reverse_iterator<OutputIterator> true_first,
+               std::reverse_iterator<OutputIterator> true_last, std::reverse_iterator<OutputIterator2> false_first,
+               OutputIterator2 false_last, UnaryOp unary_op)
+    {
+    }
+#endif
+};
+
+template <typename T, typename UnaryPred>
+void
+test(UnaryPred pred)
+{
+
+    const std::size_t max_size = 100000;
+    Sequence<T> in(max_size, [](std::size_t v) -> T { return T(v); });
+    Sequence<T> actual_true(max_size);
+    Sequence<T> actual_false(max_size);
+    for (std::size_t n = 0; n <= max_size; n = n <= 16 ? n + 1 : std::size_t(3.1415 * n))
+    {
+
+        // for non-const input iterators
+        invoke_on_all_policies(test_partition_copy(), in.begin(), in.begin() + n, actual_true.begin(),
+                               actual_true.begin() + n, actual_false.begin(), actual_false.begin() + n, pred);
+
+        // for const input iterators
+        invoke_on_all_policies(test_partition_copy(), in.cbegin(), in.cbegin() + n, actual_true.begin(),
+                               actual_true.begin() + n, actual_false.begin(), actual_false.begin() + n, pred);
+    }
+}
+
+struct test_non_const
+{
+    template <typename Policy, typename InputIterator, typename OutputInterator>
+    void
+    operator()(Policy&& exec, InputIterator input_iter, OutputInterator out_iter)
+    {
+        auto is_even = [&](float64_t v) {
+            uint32_t i = (uint32_t)v;
+            return i % 2 == 0;
+        };
+
+        partition_copy(exec, input_iter, input_iter, out_iter, out_iter, non_const(is_even));
+    }
+};
+
+int
+main()
+{
+    test<int32_t>([](const int32_t value) { return value % 2; });
+
+#if !defined(_PSTL_ICC_16_17_TEST_REDUCTION_RELEASE_BROKEN)
+    test<int32_t>([](const int32_t) { return true; });
+#endif
+
+    test<float64_t>([](const float64_t value) { return value > 2 << 6; });
+    test<Wrapper<float64_t>>([](const Wrapper<float64_t>& value) -> bool { return value.get_my_field() != nullptr; });
+
+    test_algo_basic_double<int32_t>(run_for_rnd_bi<test_non_const>());
+
+    std::cout << done() << std::endl;
+    return 0;
+}
diff --git a/libcxx/test/std/pstl/algorithms/alg.modifying.operations/alg.reverse/reverse.pass.cpp b/libcxx/test/std/pstl/algorithms/alg.modifying.operations/alg.reverse/reverse.pass.cpp
new file mode 100644
index 00000000000000..126454fd7272eb
--- /dev/null
+++ b/libcxx/test/std/pstl/algorithms/alg.modifying.operations/alg.reverse/reverse.pass.cpp
@@ -0,0 +1,104 @@
+// -*- C++ -*-
+//===-- reverse.pass.cpp --------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// UNSUPPORTED: c++03, c++11, c++14
+
+#include "support/pstl_test_config.h"
+
+#include <iterator>
+#include <execution>
+#include <algorithm>
+
+#include "support/utils.h"
+
+using namespace TestUtils;
+
+struct test_one_policy
+{
+#if defined(_PSTL_ICC_18_VC141_TEST_SIMD_LAMBDA_RELEASE_BROKEN) || defined(_PSTL_ICC_17_VC141_TEST_SIMD_LAMBDA_DEBUG_32_BROKEN) ||       \
+    defined(_PSTL_ICC_16_VC14_TEST_SIMD_LAMBDA_DEBUG_32_BROKEN) // dummy specialization by policy type, in case of broken configuration
+    template <typename Iterator1, typename Iterator2>
+    typename std::enable_if<is_same_iterator_category<Iterator1, std::random_access_iterator_tag>::value, void>::type
+    operator()(pstl::execution::unsequenced_policy, Iterator1 data_b, Iterator1 data_e, Iterator2 actual_b,
+               Iterator2 actual_e)
+    {
+    }
+    template <typename Iterator1, typename Iterator2>
+    typename std::enable_if<is_same_iterator_category<Iterator1, std::random_access_iterator_tag>::value, void>::type
+    operator()(pstl::execution::parallel_unsequenced_policy, Iterator1 data_b, Iterator1 data_e, Iterator2 actual_b,
+               Iterator2 actual_e)
+    {
+    }
+#endif
+
+    template <typename ExecutionPolicy, typename Iterator1, typename Iterator2>
+    typename std::enable_if<!is_same_iterator_category<Iterator1, std::forward_iterator_tag>::value>::type
+    operator()(ExecutionPolicy&& exec, Iterator1 data_b, Iterator1 data_e, Iterator2 actual_b, Iterator2 actual_e)
+    {
+        using namespace std;
+
+        copy(data_b, data_e, actual_b);
+
+        reverse(exec, actual_b, actual_e);
+
+        bool check = equal(data_b, data_e, reverse_iterator<Iterator2>(actual_e));
+
+        EXPECT_TRUE(check, "wrong result of reverse");
+    }
+
+    template <typename ExecutionPolicy, typename Iterator1, typename Iterator2>
+    typename std::enable_if<is_same_iterator_category<Iterator1, std::forward_iterator_tag>::value>::type
+    operator()(ExecutionPolicy&&, Iterator1, Iterator1, Iterator2, Iterator2)
+    {
+    }
+};
+
+template <typename T>
+void
+test()
+{
+    const std::size_t max_len = 100000;
+
+    Sequence<T> actual(max_len);
+
+    Sequence<T> data(max_len, [](std::size_t i) { return T(i); });
+
+    for (std::size_t len = 0; len < max_len; len = len <= 16 ? len + 1 : std::size_t(3.1415 * len))
+    {
+        invoke_on_all_policies(test_one_policy(), data.begin(), data.begin() + len, actual.begin(),
+                               actual.begin() + len);
+    }
+}
+
+template <typename T>
+struct wrapper
+{
+    T t;
+    wrapper() {}
+    explicit wrapper(T t_) : t(t_) {}
+    bool
+    operator==(const wrapper<T>& a) const
+    {
+        return t == a.t;
+    }
+};
+
+int
+main()
+{
+    test<int32_t>();
+    test<uint16_t>();
+    test<float64_t>();
+#if !defined(_PSTL_ICC_17_TEST_MAC_RELEASE_32_BROKEN)
+    test<wrapper<float64_t>>();
+#endif
+
+    std::cout << done() << std::endl;
+    return 0;
+}
diff --git a/libcxx/test/std/pstl/algorithms/alg.modifying.operations/alg.reverse/reverse_copy.pass.cpp b/libcxx/test/std/pstl/algorithms/alg.modifying.operations/alg.reverse/reverse_copy.pass.cpp
new file mode 100644
index 00000000000000..533443896c3586
--- /dev/null
+++ b/libcxx/test/std/pstl/algorithms/alg.modifying.operations/alg.reverse/reverse_copy.pass.cpp
@@ -0,0 +1,130 @@
+// -*- C++ -*-
+//===-- reverse_copy.pass.cpp ---------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// UNSUPPORTED: c++03, c++11, c++14
+
+#include "support/pstl_test_config.h"
+
+#include <iterator>
+#include <execution>
+#include <algorithm>
+
+#include "support/utils.h"
+
+using namespace TestUtils;
+
+template <typename T>
+struct wrapper
+{
+    T t;
+    wrapper() {}
+    explicit wrapper(T t_) : t(t_) {}
+    wrapper&
+    operator=(const T& t_)
+    {
+        t = t_;
+        return *this;
+    }
+    bool
+    operator==(const wrapper& t_) const
+    {
+        return t == t_.t;
+    }
+};
+
+template <typename T1, typename T2>
+bool
+eq(const wrapper<T1>& a, const wrapper<T2>& b)
+{
+    return a.t == b.t;
+}
+
+template <typename T1, typename T2>
+bool
+eq(const T1& a, const T2& b)
+{
+    return a == b;
+}
+
+// we need to save state here, because we need to test with different types of iterators
+// due to the caller invoke_on_all_policies does forcing modification passed iterator type to cover additional usage cases.
+template <typename Iterator>
+struct test_one_policy
+{
+    Iterator data_b;
+    Iterator data_e;
+    test_one_policy(Iterator b, Iterator e) : data_b(b), data_e(e) {}
+
+#if defined(_PSTL_ICC_17_VC141_TEST_SIMD_LAMBDA_DEBUG_32_BROKEN) ||                                                             \
+    defined(_PSTL_ICC_16_VC14_TEST_SIMD_LAMBDA_DEBUG_32_BROKEN) // dummy specialization by policy type, in case of broken configuration
+    template <typename Iterator1>
+    typename std::enable_if<is_same_iterator_category<Iterator1, std::random_access_iterator_tag>::value, void>::type
+    operator()(pstl::execution::unsequenced_policy, Iterator1 actual_b, Iterator1 actual_e)
+    {
+    }
+    template <typename Iterator1>
+    typename std::enable_if<is_same_iterator_category<Iterator1, std::random_access_iterator_tag>::value, void>::type
+    operator()(pstl::execution::parallel_unsequenced_policy, Iterator1 actual_b, Iterator1 actual_e)
+    {
+    }
+#endif
+
+    template <typename ExecutionPolicy, typename Iterator1>
+    void
+    operator()(ExecutionPolicy&& exec, Iterator1 actual_b, Iterator1 actual_e)
+    {
+        using namespace std;
+        using T = typename iterator_traits<Iterator1>::value_type;
+
+        fill(actual_b, actual_e, T(-123));
+        Iterator1 actual_return = reverse_copy(exec, data_b, data_e, actual_b);
+
+        EXPECT_TRUE(actual_return == actual_e, "wrong result of reverse_copy");
+
+        const auto n = std::distance(data_b, data_e);
+        Sequence<T> res(n);
+        std::copy(std::reverse_iterator<Iterator>(data_e), std::reverse_iterator<Iterator>(data_b), res.begin());
+
+        EXPECT_EQ_N(res.begin(), actual_b, n, "wrong effect of reverse_copy");
+    }
+};
+
+template <typename T1, typename T2>
+void
+test()
+{
+    typedef typename Sequence<T1>::iterator iterator_type;
+    typedef typename Sequence<T1>::const_bidirectional_iterator cbi_iterator_type;
+
+    const std::size_t max_len = 100000;
+
+    Sequence<T2> actual(max_len);
+
+    Sequence<T1> data(max_len, [](std::size_t i) { return T1(i); });
+
+    for (std::size_t len = 0; len < max_len; len = len <= 16 ? len + 1 : std::size_t(3.1415 * len))
+    {
+        invoke_on_all_policies(test_one_policy<iterator_type>(data.begin(), data.begin() + len), actual.begin(),
+                               actual.begin() + len);
+        invoke_on_all_policies(test_one_policy<cbi_iterator_type>(data.cbibegin(), std::next(data.cbibegin(), len)),
+                               actual.begin(), actual.begin() + len);
+    }
+}
+
+int
+main()
+{
+    test<int16_t, int8_t>();
+    test<uint16_t, float32_t>();
+    test<float64_t, int64_t>();
+    test<wrapper<float64_t>, wrapper<float64_t>>();
+
+    std::cout << done() << std::endl;
+    return 0;
+}
diff --git a/libcxx/test/std/pstl/algorithms/alg.modifying.operations/copy_move.pass.cpp b/libcxx/test/std/pstl/algorithms/alg.modifying.operations/copy_move.pass.cpp
new file mode 100644
index 00000000000000..24b784383916e0
--- /dev/null
+++ b/libcxx/test/std/pstl/algorithms/alg.modifying.operations/copy_move.pass.cpp
@@ -0,0 +1,197 @@
+// -*- C++ -*-
+//===-- copy_move.pass.cpp ------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// UNSUPPORTED: c++03, c++11, c++14
+
+// Tests for copy, move and copy_n
+#include "support/pstl_test_config.h"
+
+#include <execution>
+#include <algorithm>
+
+#include "support/utils.h"
+
+using namespace TestUtils;
+
+struct run_copy
+{
+
+#if defined(_PSTL_ICC_17_VC141_TEST_SIMD_LAMBDA_DEBUG_32_BROKEN) ||                                                             \
+    defined(_PSTL_ICC_16_VC14_TEST_SIMD_LAMBDA_DEBUG_32_BROKEN) //dummy specialization by policy type, in case of broken configuration
+    template <typename InputIterator, typename OutputIterator, typename OutputIterator2, typename Size, typename T>
+    void
+    operator()(pstl::execution::unsequenced_policy, InputIterator first, InputIterator last, OutputIterator out_first,
+               OutputIterator out_last, OutputIterator2 expected_first, OutputIterator2 expected_last, Size size,
+               Size n, T trash)
+    {
+    }
+
+    template <typename InputIterator, typename OutputIterator, typename OutputIterator2, typename Size, typename T>
+    void
+    operator()(pstl::execution::parallel_unsequenced_policy, InputIterator first, InputIterator last,
+               OutputIterator out_first, OutputIterator out_last, OutputIterator2 expected_first,
+               OutputIterator2 expected_last, Size size, Size n, T trash)
+    {
+    }
+#endif
+
+    template <typename Policy, typename InputIterator, typename OutputIterator, typename OutputIterator2, typename Size,
+              typename T>
+    void
+    operator()(Policy&& exec, InputIterator first, InputIterator last, OutputIterator out_first,
+               OutputIterator out_last, OutputIterator2 expected_first, OutputIterator2, Size size, Size n, T trash)
+    {
+        // Cleaning
+        std::fill_n(expected_first, size, trash);
+        std::fill_n(out_first, size, trash);
+
+        // Run copy
+        copy(first, last, expected_first);
+        auto k = copy(exec, first, last, out_first);
+        for (size_t j = 0; j < GuardSize; ++j)
+            ++k;
+        EXPECT_EQ_N(expected_first, out_first, size, "wrong effect from copy");
+        EXPECT_TRUE(out_last == k, "wrong return value from copy");
+
+        // Cleaning
+        std::fill_n(out_first, size, trash);
+        // Run copy_n
+        k = copy_n(exec, first, n, out_first);
+        for (size_t j = 0; j < GuardSize; ++j)
+            ++k;
+        EXPECT_EQ_N(expected_first, out_first, size, "wrong effect from copy_n");
+        EXPECT_TRUE(out_last == k, "wrong return value from copy_n");
+    }
+};
+
+template <typename T>
+struct run_move
+{
+
+#if defined(_PSTL_ICC_17_VC141_TEST_SIMD_LAMBDA_DEBUG_32_BROKEN) ||                                                             \
+    defined(_PSTL_ICC_16_VC14_TEST_SIMD_LAMBDA_DEBUG_32_BROKEN) //dummy specialization by policy type, in case of broken configuration
+    template <typename InputIterator, typename OutputIterator, typename OutputIterator2, typename Size>
+    void
+    operator()(pstl::execution::unsequenced_policy, InputIterator first, InputIterator last, OutputIterator out_first,
+               OutputIterator out_last, OutputIterator2 expected_first, OutputIterator2 expected_last, Size size,
+               Size n, T trash)
+    {
+    }
+
+    template <typename InputIterator, typename OutputIterator, typename OutputIterator2, typename Size>
+    void
+    operator()(pstl::execution::parallel_unsequenced_policy, InputIterator first, InputIterator last,
+               OutputIterator out_first, OutputIterator out_last, OutputIterator2 expected_first,
+               OutputIterator2 expected_last, Size size, Size n, T trash)
+    {
+    }
+#endif
+
+    template <typename Policy, typename InputIterator, typename OutputIterator, typename OutputIterator2, typename Size>
+    void
+    operator()(Policy&& exec, InputIterator first, InputIterator last, OutputIterator out_first,
+               OutputIterator out_last, OutputIterator2 expected_first, OutputIterator2, Size size, Size, T trash)
+    {
+        // Cleaning
+        std::fill_n(expected_first, size, trash);
+        std::fill_n(out_first, size, trash);
+
+        // Run move
+        move(first, last, expected_first);
+        auto k = move(exec, first, last, out_first);
+        for (size_t j = 0; j < GuardSize; ++j)
+            ++k;
+        EXPECT_EQ_N(expected_first, out_first, size, "wrong effect from move");
+        EXPECT_TRUE(out_last == k, "wrong return value from move");
+    }
+};
+
+template <typename T>
+struct run_move<Wrapper<T>>
+{
+
+#if defined(_PSTL_ICC_17_VC141_TEST_SIMD_LAMBDA_DEBUG_32_BROKEN) ||                                                             \
+    defined(_PSTL_ICC_16_VC14_TEST_SIMD_LAMBDA_DEBUG_32_BROKEN) //dummy specialization by policy type, in case of broken configuration
+    template <typename InputIterator, typename OutputIterator, typename OutputIterator2, typename Size>
+    void
+    operator()(pstl::execution::unsequenced_policy, InputIterator first, InputIterator last, OutputIterator out_first,
+               OutputIterator out_last, OutputIterator2 expected_first, OutputIterator2 expected_last, Size size,
+               Size n, Wrapper<T> trash)
+    {
+    }
+
+    template <typename InputIterator, typename OutputIterator, typename OutputIterator2, typename Size>
+    void
+    operator()(pstl::execution::parallel_unsequenced_policy, InputIterator first, InputIterator last,
+               OutputIterator out_first, OutputIterator out_last, OutputIterator2 expected_first,
+               OutputIterator2 expected_last, Size size, Size n, Wrapper<T> trash)
+    {
+    }
+#endif
+
+    template <typename Policy, typename InputIterator, typename OutputIterator, typename OutputIterator2, typename Size>
+    void
+    operator()(Policy&& exec, InputIterator first, InputIterator last, OutputIterator out_first,
+               OutputIterator out_last, OutputIterator2, OutputIterator2, Size size, Size, Wrapper<T> trash)
+    {
+        // Cleaning
+        std::fill_n(out_first, size, trash);
+        Wrapper<T>::SetMoveCount(0);
+
+        // Run move
+        auto k = move(exec, first, last, out_first);
+        for (size_t j = 0; j < GuardSize; ++j)
+            ++k;
+        EXPECT_TRUE(Wrapper<T>::MoveCount() == size, "wrong effect from move");
+        EXPECT_TRUE(out_last == k, "wrong return value from move");
+    }
+};
+
+template <typename T, typename Convert>
+void
+test(T trash, Convert convert)
+{
+    // Try sequences of various lengths.
+    for (size_t n = 0; n <= 100000; n = n <= 16 ? n + 1 : size_t(3.1415 * n))
+    {
+        // count is number of output elements, plus a handful
+        // more for sake of detecting buffer overruns.
+        Sequence<T> in(n, [&](size_t k) -> T {
+            T val = convert(n ^ k);
+            return val;
+        });
+
+        const size_t outN = n + GuardSize;
+        Sequence<T> out(outN, [=](size_t) { return trash; });
+        Sequence<T> expected(outN, [=](size_t) { return trash; });
+        invoke_on_all_policies(run_copy(), in.begin(), in.end(), out.begin(), out.end(), expected.begin(),
+                               expected.end(), outN, n, trash);
+        invoke_on_all_policies(run_copy(), in.cbegin(), in.cend(), out.begin(), out.end(), expected.begin(),
+                               expected.end(), outN, n, trash);
+        invoke_on_all_policies(run_move<T>(), in.begin(), in.end(), out.begin(), out.end(), expected.begin(),
+                               expected.end(), n, n, trash);
+
+        // For this test const iterator isn't suitable
+        // because const rvalue-reference call copy assignment operator
+    }
+}
+
+int
+main()
+{
+    test<int32_t>(-666, [](size_t j) { return int32_t(j); });
+    test<Wrapper<float64_t>>(Wrapper<float64_t>(-666.0), [](int32_t j) { return Wrapper<float64_t>(j); });
+
+#if !defined(_PSTL_ICC_16_17_TEST_64_TIMEOUT)
+    test<float64_t>(-666.0, [](size_t j) { return float64_t(j); });
+    test<Number>(Number(42, OddTag()), [](int32_t j) { return Number(j, OddTag()); });
+#endif
+    std::cout << done() << std::endl;
+    return 0;
+}
diff --git a/libcxx/test/std/pstl/algorithms/alg.modifying.operations/fill.pass.cpp b/libcxx/test/std/pstl/algorithms/alg.modifying.operations/fill.pass.cpp
new file mode 100644
index 00000000000000..d44a1a483c1975
--- /dev/null
+++ b/libcxx/test/std/pstl/algorithms/alg.modifying.operations/fill.pass.cpp
@@ -0,0 +1,100 @@
+// -*- C++ -*-
+//===-- fill.pass.cpp -----------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// UNSUPPORTED: c++03, c++11, c++14
+
+#include "support/pstl_test_config.h"
+
+#include <execution>
+#include <algorithm>
+
+#include "support/utils.h"
+
+using namespace TestUtils;
+
+struct test_fill
+{
+    template <typename It, typename T>
+    bool
+    check(It first, It last, const T& value)
+    {
+        for (; first != last; ++first)
+            if (*first != value)
+                return false;
+        return true;
+    }
+
+    template <typename Policy, typename Iterator, typename T>
+    void
+    operator()(Policy&& exec, Iterator first, Iterator last, const T& value)
+    {
+        fill(first, last, T(value + 1)); // initialize memory with different value
+
+        fill(exec, first, last, value);
+        EXPECT_TRUE(check(first, last, value), "fill wrong result");
+    }
+};
+
+struct test_fill_n
+{
+    template <typename It, typename Size, typename T>
+    bool
+    check(It first, Size n, const T& value)
+    {
+        for (Size i = 0; i < n; ++i, ++first)
+            if (*first != value)
+                return false;
+        return true;
+    }
+
+    template <typename Policy, typename Iterator, typename Size, typename T>
+    void
+    operator()(Policy&& exec, Iterator first, Size n, const T& value)
+    {
+        fill_n(first, n, T(value + 1)); // initialize memory with different value
+
+        const Iterator one_past_last = fill_n(exec, first, n, value);
+        const Iterator expected_return = std::next(first, n);
+
+        EXPECT_TRUE(expected_return == one_past_last, "fill_n should return Iterator to one past the element assigned");
+        EXPECT_TRUE(check(first, n, value), "fill_n wrong result");
+
+        //n == -1
+        const Iterator res = fill_n(exec, first, -1, value);
+        EXPECT_TRUE(res == first, "fill_n wrong result for n == -1");
+    }
+};
+
+template <typename T>
+void
+test_fill_by_type(std::size_t n)
+{
+    Sequence<T> in(n, [](std::size_t) -> T { return T(0); }); //fill with zeros
+    T value = -1;
+
+    invoke_on_all_policies(test_fill(), in.begin(), in.end(), value);
+    invoke_on_all_policies(test_fill_n(), in.begin(), n, value);
+}
+
+int
+main()
+{
+
+    const std::size_t N = 100000;
+
+    for (std::size_t n = 0; n < N; n = n < 16 ? n + 1 : size_t(3.1415 * n))
+    {
+        test_fill_by_type<int32_t>(n);
+        test_fill_by_type<float64_t>(n);
+    }
+
+    std::cout << done() << std::endl;
+
+    return 0;
+}
diff --git a/libcxx/test/std/pstl/algorithms/alg.modifying.operations/generate.pass.cpp b/libcxx/test/std/pstl/algorithms/alg.modifying.operations/generate.pass.cpp
new file mode 100644
index 00000000000000..92d87ea26e541b
--- /dev/null
+++ b/libcxx/test/std/pstl/algorithms/alg.modifying.operations/generate.pass.cpp
@@ -0,0 +1,104 @@
+// -*- C++ -*-
+//===-- generate.pass.cpp -------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// UNSUPPORTED: c++03, c++11, c++14
+
+#include "support/pstl_test_config.h"
+
+#include <atomic>
+#include <execution>
+#include <algorithm>
+
+#include "support/utils.h"
+
+using namespace TestUtils;
+
+template <typename T>
+struct Generator_count
+{
+    const T def_val = T(-1);
+    T
+    operator()()
+    {
+        return def_val;
+    }
+    T
+    default_value() const
+    {
+        return def_val;
+    }
+};
+
+struct test_generate
+{
+    template <typename Policy, typename Iterator, typename Size>
+    void
+    operator()(Policy&& exec, Iterator first, Iterator last, Size n)
+    {
+        using namespace std;
+        typedef typename std::iterator_traits<Iterator>::value_type T;
+
+        // Try random-access iterator
+        {
+            Generator_count<T> g;
+            generate(exec, first, last, g);
+            Size count = std::count(first, last, g.default_value());
+            EXPECT_TRUE(count == n, "generate wrong result for generate");
+            std::fill(first, last, T(0));
+        }
+
+        {
+            Generator_count<T> g;
+            const auto m = n / 2;
+            auto actual_last = generate_n(exec, first, m, g);
+            Size count = std::count(first, actual_last, g.default_value());
+            EXPECT_TRUE(count == m && actual_last == std::next(first, m), "generate_n wrong result for generate_n");
+            std::fill(first, actual_last, T(0));
+        }
+    }
+};
+
+template <typename T>
+void
+test_generate_by_type()
+{
+    for (size_t n = 0; n <= 100000; n = n < 16 ? n + 1 : size_t(3.1415 * n))
+    {
+        Sequence<T> in(n, [](size_t) -> T { return T(0); }); //fill by zero
+
+        invoke_on_all_policies(test_generate(), in.begin(), in.end(), in.size());
+    }
+}
+
+template <typename T>
+struct test_non_const
+{
+    template <typename Policy, typename Iterator>
+    void
+    operator()(Policy&& exec, Iterator iter)
+    {
+        auto gen = []() { return T(0); };
+
+        generate(exec, iter, iter, non_const(gen));
+        generate_n(exec, iter, 0, non_const(gen));
+    }
+};
+
+int
+main()
+{
+
+    test_generate_by_type<int32_t>();
+    test_generate_by_type<float64_t>();
+
+    test_algo_basic_single<int32_t>(run_for_rnd_fw<test_non_const<int32_t>>());
+
+    std::cout << done() << std::endl;
+    return 0;
+}
diff --git a/libcxx/test/std/pstl/algorithms/alg.modifying.operations/remove.pass.cpp b/libcxx/test/std/pstl/algorithms/alg.modifying.operations/remove.pass.cpp
new file mode 100644
index 00000000000000..872b0d292caa7a
--- /dev/null
+++ b/libcxx/test/std/pstl/algorithms/alg.modifying.operations/remove.pass.cpp
@@ -0,0 +1,161 @@
+// -*- C++ -*-
+//===-- remove.pass.cpp ---------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// UNSUPPORTED: c++03, c++11, c++14
+
+// Test for remove, remove_if
+#include "support/pstl_test_config.h"
+
+#include <execution>
+#include <algorithm>
+
+#include "support/utils.h"
+
+using namespace TestUtils;
+
+struct run_remove
+{
+#if defined(_PSTL_ICC_17_VC141_TEST_SIMD_LAMBDA_DEBUG_32_BROKEN) ||                                                             \
+    defined(_PSTL_ICC_16_VC14_TEST_SIMD_LAMBDA_DEBUG_32_BROKEN) //dummy specialization by policy type, in case of broken configuration
+    template <typename InputIterator, typename OutputIterator, typename Size, typename T>
+    void
+    operator()(pstl::execution::unsequenced_policy, InputIterator first, InputIterator last, OutputIterator out_first,
+               OutputIterator out_last, OutputIterator expected_first, OutputIterator expected_last, Size n,
+               const T& value)
+    {
+    }
+    template <typename InputIterator, typename OutputIterator, typename Size, typename T>
+    void
+    operator()(pstl::execution::parallel_unsequenced_policy, InputIterator first, InputIterator last,
+               OutputIterator out_first, OutputIterator out_last, OutputIterator expected_first,
+               OutputIterator expected_last, Size n, const T& value)
+    {
+    }
+#endif
+
+    template <typename Policy, typename InputIterator, typename OutputIterator, typename Size, typename T>
+    void
+    operator()(Policy&& exec, InputIterator first, InputIterator last, OutputIterator out_first,
+               OutputIterator out_last, OutputIterator expected_first, OutputIterator expected_last, Size,
+               const T& value)
+    {
+        // Cleaning
+        std::copy(first, last, expected_first);
+        std::copy(first, last, out_first);
+
+        // Run remove
+        OutputIterator i = remove(expected_first, expected_last, value);
+        OutputIterator k = remove(exec, out_first, out_last, value);
+        EXPECT_TRUE(std::distance(expected_first, i) == std::distance(out_first, k), "wrong return value from remove");
+        EXPECT_EQ_N(expected_first, out_first, std::distance(expected_first, i), "wrong remove effect");
+    }
+};
+
+struct run_remove_if
+{
+#if defined(_PSTL_ICC_17_VC141_TEST_SIMD_LAMBDA_DEBUG_32_BROKEN) ||                                                             \
+    defined(_PSTL_ICC_16_VC14_TEST_SIMD_LAMBDA_DEBUG_32_BROKEN) //dummy specialization by policy type, in case of broken configuration
+    template <typename InputIterator, typename OutputIterator, typename Size, typename Predicate>
+    void
+    operator()(pstl::execution::unsequenced_policy, InputIterator first, InputIterator last, OutputIterator out_first,
+               OutputIterator out_last, OutputIterator expected_first, OutputIterator expected_last, Size n,
+               Predicate pred)
+    {
+    }
+    template <typename InputIterator, typename OutputIterator, typename Size, typename Predicate>
+    void
+    operator()(pstl::execution::parallel_unsequenced_policy, InputIterator first, InputIterator last,
+               OutputIterator out_first, OutputIterator out_last, OutputIterator expected_first,
+               OutputIterator expected_last, Size n, Predicate pred)
+    {
+    }
+#endif
+
+    template <typename Policy, typename InputIterator, typename OutputIterator, typename Size, typename Predicate>
+    void
+    operator()(Policy&& exec, InputIterator first, InputIterator last, OutputIterator out_first,
+               OutputIterator out_last, OutputIterator expected_first, OutputIterator expected_last, Size,
+               Predicate pred)
+    {
+        // Cleaning
+        std::copy(first, last, expected_first);
+        std::copy(first, last, out_first);
+
+        // Run remove_if
+        OutputIterator i = remove_if(expected_first, expected_last, pred);
+        OutputIterator k = remove_if(exec, out_first, out_last, pred);
+        EXPECT_TRUE(std::distance(expected_first, i) == std::distance(out_first, k),
+                    "wrong return value from remove_if");
+        EXPECT_EQ_N(expected_first, out_first, std::distance(expected_first, i), "wrong remove_if effect");
+    }
+};
+
+template <typename T, typename Predicate, typename Convert>
+void
+test(T trash, const T& value, Predicate pred, Convert convert)
+{
+    const std::size_t max_size = 100000;
+    Sequence<T> out(max_size, [trash](size_t) { return trash; });
+    Sequence<T> expected(max_size, [trash](size_t) { return trash; });
+
+    for (size_t n = 0; n <= max_size; n = n <= 16 ? n + 1 : size_t(3.1415 * n))
+    {
+        Sequence<T> data(n, [&](size_t k) -> T { return convert(k); });
+
+        invoke_on_all_policies(run_remove(), data.begin(), data.end(), out.begin(), out.begin() + n, expected.begin(),
+                               expected.begin() + n, n, value);
+        invoke_on_all_policies(run_remove_if(), data.begin(), data.end(), out.begin(), out.begin() + n,
+                               expected.begin(), expected.begin() + n, n, pred);
+    }
+}
+
+struct test_non_const
+{
+    template <typename Policy, typename Iterator>
+    void
+    operator()(Policy&& exec, Iterator iter)
+    {
+        auto is_even = [&](float64_t v) {
+            uint32_t i = (uint32_t)v;
+            return i % 2 == 0;
+        };
+
+        invoke_if(exec, [&]() { remove_if(exec, iter, iter, non_const(is_even)); });
+    }
+};
+
+int
+main()
+{
+#if !defined(_PSTL_ICC_18_TEST_EARLY_EXIT_MONOTONIC_RELEASE_BROKEN)
+    test<int32_t>(666, 42, [](int32_t) { return true; }, [](size_t j) { return j; });
+#endif
+
+    test<int32_t>(666, 2001, [](const int32_t& val) { return val != 2001; },
+                  [](size_t j) { return ((j + 1) % 5 & 2) != 0 ? 2001 : -1 - int32_t(j); });
+    test<float64_t>(-666.0, 8.5, [](const float64_t& val) { return val != 8.5; },
+                    [](size_t j) { return ((j + 1) % 7 & 2) != 0 ? 8.5 : float64_t(j % 32 + j); });
+
+#if !defined(_PSTL_ICC_17_TEST_MAC_RELEASE_32_BROKEN)
+    test<Number>(Number(-666, OddTag()), Number(42, OddTag()), IsMultiple(3, OddTag()),
+                 [](int32_t j) { return Number(j, OddTag()); });
+#endif
+
+    test_algo_basic_single<int32_t>(run_for_rnd_fw<test_non_const>());
+
+    test<MemoryChecker>(MemoryChecker{0}, MemoryChecker{1},
+        [](const MemoryChecker& val){ return val.value() == 1; },
+        [](std::size_t idx){ return MemoryChecker{std::int32_t(idx % 3 == 0)}; }
+    );
+    EXPECT_FALSE(MemoryChecker::alive_objects() < 0, "wrong effect from remove,remove_if: number of ctors calls < num of dtors calls");
+    EXPECT_FALSE(MemoryChecker::alive_objects() > 0, "wrong effect from remove,remove_if: number of ctors calls > num of dtors calls");
+
+    std::cout << done() << std::endl;
+    return 0;
+}
diff --git a/libcxx/test/std/pstl/algorithms/alg.modifying.operations/remove_copy.pass.cpp b/libcxx/test/std/pstl/algorithms/alg.modifying.operations/remove_copy.pass.cpp
new file mode 100644
index 00000000000000..9cdd7d62a4f243
--- /dev/null
+++ b/libcxx/test/std/pstl/algorithms/alg.modifying.operations/remove_copy.pass.cpp
@@ -0,0 +1,91 @@
+// -*- C++ -*-
+//===-- remove_copy.pass.cpp ----------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// UNSUPPORTED: c++03, c++11, c++14
+
+#include "support/pstl_test_config.h"
+
+#include <execution>
+#include <algorithm>
+
+#include "support/utils.h"
+
+using namespace TestUtils;
+
+struct run_remove_copy
+{
+    template <typename Policy, typename InputIterator, typename OutputIterator, typename OutputIterator2, typename Size,
+              typename T>
+    void
+    operator()(Policy&& exec, InputIterator first, InputIterator last, OutputIterator out_first,
+               OutputIterator out_last, OutputIterator2 expected_first, OutputIterator2, Size n, const T& value,
+               T trash)
+    {
+        // Cleaning
+        std::fill_n(expected_first, n, trash);
+        std::fill_n(out_first, n, trash);
+
+        // Run copy_if
+        auto i = std::remove_copy(first, last, expected_first, value);
+        (void)i;
+        auto k = std::remove_copy(exec, first, last, out_first, value);
+        EXPECT_EQ_N(expected_first, out_first, n, "wrong remove_copy effect");
+        for (size_t j = 0; j < GuardSize; ++j)
+        {
+            ++k;
+        }
+        EXPECT_TRUE(out_last == k, "wrong return value from remove_copy");
+    }
+};
+
+template <typename T, typename Convert>
+void
+test(T trash, const T& value, Convert convert, bool check_weakness = true)
+{
+    // Try sequences of various lengths.
+    for (size_t n = 0; n <= 100000; n = n <= 16 ? n + 1 : size_t(3.1415 * n))
+    {
+        // count is number of output elements, plus a handful
+        // more for sake of detecting buffer overruns.
+        size_t count = GuardSize;
+        Sequence<T> in(n, [&](size_t k) -> T {
+            T x = convert(n ^ k);
+            count += !(x == value) ? 1 : 0;
+            return x;
+        });
+        using namespace std;
+
+        Sequence<T> out(count, [=](size_t) { return trash; });
+        Sequence<T> expected(count, [=](size_t) { return trash; });
+        if (check_weakness)
+        {
+            auto expected_result = remove_copy(in.cfbegin(), in.cfend(), expected.begin(), value);
+            size_t m = expected_result - expected.begin();
+            EXPECT_TRUE(n / 4 <= m && m <= 3 * (n + 1) / 4, "weak test for remove_copy");
+        }
+        invoke_on_all_policies(run_remove_copy(), in.begin(), in.end(), out.begin(), out.end(), expected.begin(),
+                               expected.end(), count, value, trash);
+        invoke_on_all_policies(run_remove_copy(), in.cbegin(), in.cend(), out.begin(), out.end(), expected.begin(),
+                               expected.end(), count, value, trash);
+    }
+}
+
+int
+main()
+{
+
+    test<float64_t>(-666.0, 8.5, [](size_t j) { return ((j + 1) % 7 & 2) != 0 ? 8.5 : float64_t(j % 32 + j); });
+
+    test<int32_t>(-666, 42, [](size_t j) { return ((j + 1) % 5 & 2) != 0 ? 42 : -1 - int32_t(j); });
+
+    test<Number>(Number(42, OddTag()), Number(2001, OddTag()),
+                 [](int32_t j) { return ((j + 1) % 3 & 2) != 0 ? Number(2001, OddTag()) : Number(j, OddTag()); });
+    std::cout << done() << std::endl;
+    return 0;
+}
diff --git a/libcxx/test/std/pstl/algorithms/alg.modifying.operations/replace.pass.cpp b/libcxx/test/std/pstl/algorithms/alg.modifying.operations/replace.pass.cpp
new file mode 100644
index 00000000000000..9c6ca1ea8c0faa
--- /dev/null
+++ b/libcxx/test/std/pstl/algorithms/alg.modifying.operations/replace.pass.cpp
@@ -0,0 +1,160 @@
+// -*- C++ -*-
+//===-- replace.pass.cpp --------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// UNSUPPORTED: c++03, c++11, c++14
+
+#include "support/pstl_test_config.h"
+
+#include <execution>
+#include <algorithm>
+
+#include "support/utils.h"
+
+using namespace TestUtils;
+
+// This class is needed to check the self-copying
+struct copy_int
+{
+    int32_t value;
+    int32_t copied_times = 0;
+    constexpr explicit copy_int(int32_t val = 0) : value(val) {}
+    constexpr copy_int(copy_int const& other) : value(other.value), copied_times(other.copied_times) { }
+
+    constexpr copy_int&
+    operator=(const copy_int& other)
+    {
+        if (&other == this)
+            copied_times++;
+        else
+        {
+            value = other.value;
+            copied_times = other.copied_times;
+        }
+        return *this;
+    }
+
+    constexpr bool
+    operator==(const copy_int& other) const
+    {
+        return (value == other.value);
+    }
+};
+
+template <typename Iterator>
+struct test_one_policy
+{
+    std::size_t len;
+    Iterator data_b;
+    Iterator data_e;
+    test_one_policy(Iterator data_, std::size_t len_)
+    {
+        len = len_;
+        data_b = data_;
+        data_e = std::next(data_b, len);
+    }
+    template <typename ExecutionPolicy, typename Iterator1, typename Iterator2, typename T, typename Predicate>
+    void
+    operator()(ExecutionPolicy&& exec, Iterator1 expected_b, Iterator1 expected_e, Iterator2 actual_b,
+               Iterator2 actual_e, Predicate pred, const T& value, const T& old_value)
+    {
+        using namespace std;
+
+        copy(data_b, data_e, expected_b);
+        copy(data_b, data_e, actual_b);
+
+        replace(expected_b, expected_e, old_value, value);
+        replace(exec, actual_b, actual_e, old_value, value);
+
+        EXPECT_TRUE((check<T, Iterator2>(actual_b, actual_e)), "wrong result of self assignment check");
+        EXPECT_TRUE(equal(expected_b, expected_e, actual_b), "wrong result of replace");
+
+        copy(data_b, data_e, expected_b);
+        copy(data_b, data_e, actual_b);
+
+        replace_if(expected_b, expected_e, pred, value);
+        replace_if(exec, actual_b, actual_e, pred, value);
+        EXPECT_TRUE(equal(expected_b, expected_e, actual_b), "wrong result of replace_if");
+    }
+
+    template <typename T, typename Iterator1>
+    bool check(Iterator1, Iterator1)
+    {
+        return true;
+    }
+
+    template <typename T, typename Iterator1>
+    typename std::enable_if<std::is_same<T, copy_int>::value, bool>::type_t
+    check(Iterator1 b, Iterator1 e)
+    {
+        return std::all_of(b, e, [](const copy_int& elem) { return elem.copied_times == 0; });
+    }
+};
+
+template <typename T1, typename T2, typename Pred>
+void
+test(Pred pred)
+{
+    typedef typename Sequence<T2>::iterator iterator_type;
+
+    const std::size_t max_len = 100000;
+
+    static constexpr T1 value = T1(0);
+    static constexpr T1 new_value = T1(666);
+
+    Sequence<T2> expected(max_len);
+    Sequence<T2> actual(max_len);
+
+    Sequence<T2> data(max_len, [](std::size_t i) {
+        if (i % 3 == 2)
+        {
+            return T1(i);
+        }
+        else
+        {
+            return value;
+        }
+    });
+
+    for (std::size_t len = 0; len < max_len; len = len <= 16 ? len + 1 : std::size_t(3.1415 * len))
+    {
+        test_one_policy<iterator_type> temp(data.begin(), len);
+
+        invoke_on_all_policies(temp, expected.begin(), expected.begin() + len, actual.begin(), actual.begin() + len,
+                               pred, new_value, value);
+    }
+}
+
+template <typename T>
+struct test_non_const
+{
+    template <typename Policy, typename Iterator>
+    void
+    operator()(Policy&& exec, Iterator iter)
+    {
+        auto is_even = [&](float64_t v) {
+            uint32_t i = (uint32_t)v;
+            return i % 2 == 0;
+        };
+        invoke_if(exec, [&]() { replace_if(exec, iter, iter, non_const(is_even), T(0)); });
+    }
+};
+
+int
+main()
+{
+    test<int32_t, float32_t>(__pstl::__internal::__equal_value<int32_t>(666));
+    test<uint16_t, uint8_t>([](const uint16_t& elem) { return elem % 3 < 2; });
+    test<float64_t, int64_t>([](const float64_t& elem) { return elem * elem - 3.5 * elem > 10; });
+    test<copy_int, copy_int>([](const copy_int& val) { return val.value / 5 > 2; });
+
+    test_algo_basic_single<int32_t>(run_for_rnd_fw<test_non_const<int32_t>>());
+
+    std::cout << done() << std::endl;
+    return 0;
+}
diff --git a/libcxx/test/std/pstl/algorithms/alg.modifying.operations/replace_copy.pass.cpp b/libcxx/test/std/pstl/algorithms/alg.modifying.operations/replace_copy.pass.cpp
new file mode 100644
index 00000000000000..94d725fb1c500c
--- /dev/null
+++ b/libcxx/test/std/pstl/algorithms/alg.modifying.operations/replace_copy.pass.cpp
@@ -0,0 +1,105 @@
+// -*- C++ -*-
+//===-- replace_copy.pass.cpp ---------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// UNSUPPORTED: c++03, c++11, c++14
+
+// Tests for replace_copy and replace_copy_if
+
+#include "support/pstl_test_config.h"
+
+#include <execution>
+#include <algorithm>
+
+#include "support/utils.h"
+
+using namespace TestUtils;
+
+struct test_replace_copy
+{
+    template <typename Policy, typename InputIterator, typename OutputIterator, typename OutputIterator2, typename Size,
+              typename Predicate, typename T>
+    void
+    operator()(Policy&& exec, InputIterator first, InputIterator last, OutputIterator out_first,
+               OutputIterator out_last, OutputIterator2 expected_first, OutputIterator2, Size n, Predicate pred,
+               const T& old_value, const T& new_value, T trash)
+    {
+        // Cleaning
+        std::fill_n(expected_first, n, trash);
+        std::fill_n(out_first, n, trash);
+        // Run replace_copy
+        auto i = std::replace_copy(first, last, expected_first, old_value, new_value);
+        auto k = std::replace_copy(exec, first, last, out_first, old_value, new_value);
+        EXPECT_EQ_N(expected_first, out_first, n, "wrong replace_copy effect");
+        EXPECT_TRUE(out_last == k, "wrong return value from replace_copy");
+
+        // Cleaning
+        std::fill_n(expected_first, n, trash);
+        std::fill_n(out_first, n, trash);
+        // Run replace_copy_if
+        i = replace_copy_if(first, last, expected_first, pred, new_value);
+        k = replace_copy_if(exec, first, last, out_first, pred, new_value);
+        EXPECT_EQ_N(expected_first, out_first, n, "wrong replace_copy_if effect");
+        EXPECT_TRUE(out_last == k, "wrong return value from replace_copy_if");
+    }
+};
+
+template <typename T, typename Convert, typename Predicate>
+void
+test(T trash, const T& old_value, const T& new_value, Predicate pred, Convert convert)
+{
+    // Try sequences of various lengths.
+    for (size_t n = 0; n <= 100000; n = n <= 16 ? n + 1 : size_t(3.1415 * n))
+    {
+        Sequence<T> in(n, [&](size_t k) -> T { return convert(n ^ k); });
+        Sequence<T> out(n, [=](size_t) { return trash; });
+        Sequence<T> expected(n, [=](size_t) { return trash; });
+
+        invoke_on_all_policies(test_replace_copy(), in.begin(), in.end(), out.begin(), out.end(), expected.begin(),
+                               expected.end(), out.size(), pred, old_value, new_value, trash);
+        invoke_on_all_policies(test_replace_copy(), in.cbegin(), in.cend(), out.begin(), out.end(), expected.begin(),
+                               expected.end(), out.size(), pred, old_value, new_value, trash);
+    }
+}
+
+template <typename T>
+struct test_non_const
+{
+    template <typename Policy, typename InputIterator, typename OutputInterator>
+    void
+    operator()(Policy&& exec, InputIterator input_iter, OutputInterator out_iter)
+    {
+        auto is_even = [&](float64_t v) {
+            uint32_t i = (uint32_t)v;
+            return i % 2 == 0;
+        };
+
+        invoke_if(exec, [&]() { replace_copy_if(exec, input_iter, input_iter, out_iter, non_const(is_even), T(0)); });
+    }
+};
+
+int
+main()
+{
+
+    test<float64_t>(-666.0, 8.5, 0.33, [](const float64_t& x) { return x * x <= 1024; },
+                    [](size_t j) { return ((j + 1) % 7 & 2) != 0 ? 8.5 : float64_t(j % 32 + j); });
+
+    test<int32_t>(-666, 42, 99, [](const int32_t& x) { return x != 42; },
+                  [](size_t j) { return ((j + 1) % 5 & 2) != 0 ? 42 : -1 - int32_t(j); });
+
+#if !defined(_PSTL_ICC_17_TEST_MAC_RELEASE_32_BROKEN)
+    test<Number>(Number(42, OddTag()), Number(2001, OddTag()), Number(2017, OddTag()), IsMultiple(3, OddTag()),
+                 [](int32_t j) { return ((j + 1) % 3 & 2) != 0 ? Number(2001, OddTag()) : Number(j, OddTag()); });
+#endif
+
+    test_algo_basic_double<int32_t>(run_for_rnd_fw<test_non_const<int32_t>>());
+
+    std::cout << done() << std::endl;
+    return 0;
+}
diff --git a/libcxx/test/std/pstl/algorithms/alg.modifying.operations/rotate.pass.cpp b/libcxx/test/std/pstl/algorithms/alg.modifying.operations/rotate.pass.cpp
new file mode 100644
index 00000000000000..0d1cfeb4ae8e37
--- /dev/null
+++ b/libcxx/test/std/pstl/algorithms/alg.modifying.operations/rotate.pass.cpp
@@ -0,0 +1,176 @@
+// -*- C++ -*-
+//===-- rotate.pass.cpp ---------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// UNSUPPORTED: c++03, c++11, c++14
+
+#include "support/pstl_test_config.h"
+
+#include <iterator>
+#include <execution>
+#include <algorithm>
+
+#include "support/utils.h"
+
+using namespace TestUtils;
+
+template <typename T>
+struct wrapper
+{
+    T t;
+    int move_count;
+    explicit wrapper(T t_) : t(t_), move_count(0) {}
+    wrapper&
+    operator=(const T& t_)
+    {
+        t = t_;
+        return *this;
+    }
+
+    wrapper(const wrapper<T>& a) : move_count(0) { t = a.t; }
+
+    wrapper<T>&
+    operator=(wrapper<T>& a)
+    {
+        t = a.t;
+        return *this;
+    }
+
+    wrapper<T>&
+    operator=(wrapper<T>&& a)
+    {
+        t = a.t;
+        move_count += 1;
+        return *this;
+    }
+};
+
+template <typename T>
+struct compare
+{
+    bool
+    operator()(const T& a, const T& b)
+    {
+        return a == b;
+    }
+};
+
+template <typename T>
+struct compare<wrapper<T>>
+{
+    bool
+    operator()(const wrapper<T>& a, const wrapper<T>& b)
+    {
+        return a.t == b.t;
+    }
+};
+#include <typeinfo>
+
+struct test_one_policy
+{
+
+#if defined(_PSTL_ICC_17_VC141_TEST_SIMD_LAMBDA_DEBUG_32_BROKEN) ||                                                             \
+    defined(_PSTL_ICC_16_VC14_TEST_SIMD_LAMBDA_DEBUG_32_BROKEN) // dummy specializations to skip testing in case of broken configuration
+    template <typename Iterator, typename Size>
+    void
+    operator()(pstl::execution::unsequenced_policy, Iterator data_b, Iterator data_e, Iterator actual_b,
+               Iterator actual_e, Size shift)
+    {
+    }
+    template <typename Iterator, typename Size>
+    void
+    operator()(pstl::execution::parallel_unsequenced_policy, Iterator data_b, Iterator data_e, Iterator actual_b,
+               Iterator actual_e, Size shift)
+    {
+    }
+#endif
+
+    template <typename ExecutionPolicy, typename Iterator, typename Size>
+    void
+    operator()(ExecutionPolicy&& exec, Iterator data_b, Iterator data_e, Iterator actual_b, Iterator actual_e,
+               Size shift)
+    {
+        using namespace std;
+        using T = typename iterator_traits<Iterator>::value_type;
+        Iterator actual_m = std::next(actual_b, shift);
+
+        copy(data_b, data_e, actual_b);
+        Iterator actual_return = rotate(exec, actual_b, actual_m, actual_e);
+
+        EXPECT_TRUE(actual_return == std::next(actual_b, std::distance(actual_m, actual_e)), "wrong result of rotate");
+        auto comparator = compare<T>();
+        bool check = std::equal(actual_return, actual_e, data_b, comparator);
+        check = check && std::equal(actual_b, actual_return, std::next(data_b, shift), comparator);
+
+        EXPECT_TRUE(check, "wrong effect of rotate");
+        EXPECT_TRUE(check_move(exec, actual_b, actual_e, shift), "wrong move test of rotate");
+    }
+
+    template <typename ExecutionPolicy, typename Iterator, typename Size>
+    typename std::enable_if<
+        is_same_iterator_category<Iterator, std::random_access_iterator_tag>::value &&
+            !std::is_same<ExecutionPolicy, std::execution::sequenced_policy>::value &&
+            std::is_same<typename std::iterator_traits<Iterator>::value_type, wrapper<float32_t>>::value,
+        bool>::type
+    check_move(ExecutionPolicy&&, Iterator b, Iterator e, Size shift)
+    {
+        bool result = all_of(b, e, [](wrapper<float32_t>& a) {
+            bool temp = a.move_count > 0;
+            a.move_count = 0;
+            return temp;
+        });
+        return shift == 0 || result;
+    }
+
+    template <typename ExecutionPolicy, typename Iterator, typename Size>
+    typename std::enable_if<
+        !(is_same_iterator_category<Iterator, std::random_access_iterator_tag>::value &&
+          !std::is_same<ExecutionPolicy, std::execution::sequenced_policy>::value &&
+          std::is_same<typename std::iterator_traits<Iterator>::value_type, wrapper<float32_t>>::value),
+        bool>::type
+    check_move(ExecutionPolicy&&, Iterator, Iterator, Size)
+    {
+        return true;
+    }
+};
+
+template <typename T>
+void
+test()
+{
+    const int32_t max_len = 100000;
+
+    Sequence<T> actual(max_len, [](std::size_t i) { return T(i); });
+    Sequence<T> data(max_len, [](std::size_t i) { return T(i); });
+
+    for (int32_t len = 0; len < max_len; len = len <= 16 ? len + 1 : int32_t(3.1415 * len))
+    {
+        int32_t shifts[] = {0, 1, 2, len / 3, (2 * len) / 3, len - 1};
+        for (auto shift : shifts)
+        {
+            if (shift >= 0 && shift < len)
+            {
+                invoke_on_all_policies(test_one_policy(), data.begin(), data.begin() + len, actual.begin(),
+                                       actual.begin() + len, shift);
+            }
+        }
+    }
+}
+
+int
+main()
+{
+    test<int32_t>();
+    test<wrapper<float64_t>>();
+    test<MemoryChecker>();
+    EXPECT_FALSE(MemoryChecker::alive_objects() < 0, "wrong effect from rotate: number of ctors calls < num of dtors calls");
+    EXPECT_FALSE(MemoryChecker::alive_objects() > 0, "wrong effect from rotate: number of ctors calls > num of dtors calls");
+
+    std::cout << done() << std::endl;
+    return 0;
+}
diff --git a/libcxx/test/std/pstl/algorithms/alg.modifying.operations/rotate_copy.pass.cpp b/libcxx/test/std/pstl/algorithms/alg.modifying.operations/rotate_copy.pass.cpp
new file mode 100644
index 00000000000000..539cefc929d452
--- /dev/null
+++ b/libcxx/test/std/pstl/algorithms/alg.modifying.operations/rotate_copy.pass.cpp
@@ -0,0 +1,146 @@
+// -*- C++ -*-
+//===-- rotate_copy.pass.cpp ----------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// UNSUPPORTED: c++03, c++11, c++14
+
+#include "support/pstl_test_config.h"
+
+#include <iterator>
+#include <execution>
+#include <algorithm>
+
+#include "support/utils.h"
+
+using namespace TestUtils;
+
+template <typename T>
+struct wrapper;
+
+template <typename T>
+bool
+compare(const wrapper<T>& a, const wrapper<T>& b)
+{
+    return a.t == b.t;
+}
+
+template <typename T>
+bool
+compare(const T& a, const T& b)
+{
+    return a == b;
+}
+
+template <typename T>
+struct wrapper
+{
+    explicit wrapper(T t_) : t(t_) {}
+    wrapper&
+    operator=(const T& t_)
+    {
+        t = t_;
+        return *this;
+    }
+    friend bool
+    compare<T>(const wrapper<T>& a, const wrapper<T>& b);
+
+  private:
+    T t;
+};
+
+template <typename T, typename It1, typename It2>
+struct comparator
+{
+    using T1 = typename std::iterator_traits<It1>::value_type;
+    using T2 = typename std::iterator_traits<It2>::value_type;
+    bool
+    operator()(T1 a, T2 b)
+    {
+        T temp = a;
+        return compare(temp, b);
+    }
+};
+
+struct test_one_policy
+{
+
+#if defined(_PSTL_ICC_17_VC141_TEST_SIMD_LAMBDA_DEBUG_32_BROKEN) ||                                                             \
+    defined(_PSTL_ICC_16_VC14_TEST_SIMD_LAMBDA_DEBUG_32_BROKEN) // dummy specialization by policy type, in case of broken configuration
+    template <typename Iterator1, typename Iterator2>
+    typename std::enable_if<is_same_iterator_category<Iterator1, std::random_access_iterator_tag>::value, void>::type
+    operator()(pstl::execution::unsequenced_policy, Iterator1 data_b, Iterator1 data_e, Iterator2 actual_b,
+               Iterator2 actual_e, std::size_t shift)
+    {
+    }
+    template <typename Iterator1, typename Iterator2>
+    typename std::enable_if<is_same_iterator_category<Iterator1, std::random_access_iterator_tag>::value, void>::type
+    operator()(pstl::execution::parallel_unsequenced_policy, Iterator1 data_b, Iterator1 data_e, Iterator2 actual_b,
+               Iterator2 actual_e, std::size_t shift)
+    {
+    }
+#endif
+
+    template <typename ExecutionPolicy, typename Iterator1, typename Iterator2>
+    void
+    operator()(ExecutionPolicy&& exec, Iterator1 data_b, Iterator1 data_e, Iterator2 actual_b, Iterator2 actual_e,
+               std::size_t shift)
+    {
+        using namespace std;
+        using T = typename iterator_traits<Iterator2>::value_type;
+        Iterator1 data_m = std::next(data_b, shift);
+
+        fill(actual_b, actual_e, T(-123));
+        Iterator2 actual_return = rotate_copy(exec, data_b, data_m, data_e, actual_b);
+
+        EXPECT_TRUE(actual_return == actual_e, "wrong result of rotate_copy");
+        auto comparer = comparator<T, Iterator1, Iterator2>();
+        bool check = std::equal(data_m, data_e, actual_b, comparer);
+        check = check && std::equal(data_b, data_m, std::next(actual_b, std::distance(data_m, data_e)), comparer);
+
+        EXPECT_TRUE(check, "wrong effect of rotate_copy");
+    }
+};
+
+template <typename T1, typename T2>
+void
+test()
+{
+
+    const std::size_t max_len = 100000;
+
+    Sequence<T2> actual(max_len, [](std::size_t i) { return T1(i); });
+
+    Sequence<T1> data(max_len, [](std::size_t i) { return T1(i); });
+
+    for (std::size_t len = 0; len < max_len; len = len <= 16 ? len + 1 : std::size_t(3.1415 * len))
+    {
+        std::size_t shifts[] = {0, 1, 2, len / 3, (2 * len) / 3, len - 1};
+        for (std::size_t shift : shifts)
+        {
+            if (shift > 0 && shift < len)
+            {
+                invoke_on_all_policies(test_one_policy(), data.begin(), data.begin() + len, actual.begin(),
+                                       actual.begin() + len, shift);
+                invoke_on_all_policies(test_one_policy(), data.cbegin(), data.cbegin() + len, actual.begin(),
+                                       actual.begin() + len, shift);
+            }
+        }
+    }
+}
+
+int
+main()
+{
+    test<int32_t, int8_t>();
+    test<uint16_t, float32_t>();
+    test<float64_t, int64_t>();
+    test<wrapper<float64_t>, wrapper<float64_t>>();
+
+    std::cout << done() << std::endl;
+    return 0;
+}
diff --git a/libcxx/test/std/pstl/algorithms/alg.modifying.operations/swap_ranges.pass.cpp b/libcxx/test/std/pstl/algorithms/alg.modifying.operations/swap_ranges.pass.cpp
new file mode 100644
index 00000000000000..c3c3693af59426
--- /dev/null
+++ b/libcxx/test/std/pstl/algorithms/alg.modifying.operations/swap_ranges.pass.cpp
@@ -0,0 +1,133 @@
+// -*- C++ -*-
+//===-- swap_ranges.pass.cpp ----------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// UNSUPPORTED: c++03, c++11, c++14
+
+#include "support/pstl_test_config.h"
+
+#include <iterator>
+#include <execution>
+#include <algorithm>
+
+#include "support/utils.h"
+
+using namespace TestUtils;
+
+template <typename T>
+struct wrapper
+{
+    T t;
+    std::size_t number_of_swaps = 0;
+    wrapper() {}
+    explicit wrapper(T t_) : t(t_) {}
+    template <typename U>
+    void
+    operator=(const U& b)
+    {
+        t = b;
+    }
+    bool
+    operator==(const wrapper<T>& a) const
+    {
+        return t == a.t;
+    }
+};
+
+template <typename T>
+void
+swap(wrapper<T>& a, wrapper<T>& b)
+{
+    std::swap(a.t, b.t);
+    a.number_of_swaps++;
+    b.number_of_swaps++;
+}
+
+template <typename T>
+struct check_swap
+{
+    bool
+    operator()(T&)
+    {
+        return true;
+    }
+};
+
+template <typename T>
+struct check_swap<wrapper<T>>
+{
+    bool
+    operator()(wrapper<T>& a)
+    {
+        bool temp = (a.number_of_swaps == 1);
+        a.number_of_swaps = 0;
+        return temp;
+    }
+};
+
+struct test_one_policy
+{
+    template <typename ExecutionPolicy, typename Iterator1, typename Iterator2>
+    void
+    operator()(ExecutionPolicy&& exec, Iterator1 data_b, Iterator1 data_e, Iterator2 actual_b, Iterator2 actual_e)
+    {
+        using namespace std;
+        using T_ref = typename iterator_traits<Iterator1>::reference;
+        using T = typename iterator_traits<Iterator1>::value_type;
+
+        iota(data_b, data_e, 0);
+        iota(actual_b, actual_e, std::distance(data_b, data_e));
+
+        Iterator2 actual_return = swap_ranges(exec, data_b, data_e, actual_b);
+        bool check_return = (actual_return == actual_e);
+        EXPECT_TRUE(check_return, "wrong result of swap_ranges");
+        if (check_return)
+        {
+            std::size_t i = 0;
+            bool check = all_of(actual_b, actual_e, [&i](T_ref a) { return a == T(i++); }) &&
+                         all_of(data_b, data_e, [&i](T_ref a) { return a == T(i++); });
+
+            EXPECT_TRUE(check, "wrong effect of swap_ranges");
+
+            if (check)
+            {
+                bool swap_check =
+                    all_of(data_b, data_e, check_swap<T>()) && all_of(actual_b, actual_e, check_swap<T>());
+                EXPECT_TRUE(swap_check, "wrong effect of swap_ranges swap check");
+            }
+        }
+    }
+};
+
+template <typename T>
+void
+test()
+{
+    const std::size_t max_len = 100000;
+
+    Sequence<T> data(max_len);
+    Sequence<T> actual(max_len);
+
+    for (std::size_t len = 0; len < max_len; len = len <= 16 ? len + 1 : std::size_t(3.1415 * len))
+    {
+        invoke_on_all_policies(test_one_policy(), data.begin(), data.begin() + len, actual.begin(),
+                               actual.begin() + len);
+    }
+}
+
+int
+main()
+{
+    test<wrapper<uint16_t>>();
+    test<wrapper<float64_t>>();
+    test<int32_t>();
+    test<float32_t>();
+
+    std::cout << done() << std::endl;
+    return 0;
+}
diff --git a/libcxx/test/std/pstl/algorithms/alg.modifying.operations/transform_binary.pass.cpp b/libcxx/test/std/pstl/algorithms/alg.modifying.operations/transform_binary.pass.cpp
new file mode 100644
index 00000000000000..949e6f54ffb413
--- /dev/null
+++ b/libcxx/test/std/pstl/algorithms/alg.modifying.operations/transform_binary.pass.cpp
@@ -0,0 +1,122 @@
+// -*- C++ -*-
+//===-- transform_binary.pass.cpp -----------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// UNSUPPORTED: c++03, c++11, c++14
+
+#include "support/pstl_test_config.h"
+
+#include <execution>
+#include <algorithm>
+
+#include "support/utils.h"
+
+using namespace TestUtils;
+
+template <typename In1, typename In2, typename Out>
+class TheOperation
+{
+    Out val;
+
+  public:
+    TheOperation(Out v) : val(v) {}
+    Out
+    operator()(const In1& x, const In2& y) const
+    {
+        return Out(val + x - y);
+    }
+};
+
+template <typename InputIterator1, typename InputIterator2, typename OutputIterator>
+void
+check_and_reset(InputIterator1 first1, InputIterator1 last1, InputIterator2 first2, OutputIterator out_first)
+{
+    typedef typename std::iterator_traits<OutputIterator>::value_type Out;
+    typename std::iterator_traits<OutputIterator>::difference_type k = 0;
+    for (; first1 != last1; ++first1, ++first2, ++out_first, ++k)
+    {
+        // check
+        Out expected = Out(1.5) + *first1 - *first2;
+        Out actual = *out_first;
+        if (std::is_floating_point<Out>::value)
+        {
+            EXPECT_TRUE((expected > actual ? expected - actual : actual - expected) < 1e7,
+                        "wrong value in output sequence");
+        }
+        else
+        {
+            EXPECT_EQ(expected, actual, "wrong value in output sequence");
+        }
+        // reset
+        *out_first = k % 7 != 4 ? 7 * k - 5 : 0;
+    }
+}
+
+struct test_one_policy
+{
+    template <typename Policy, typename InputIterator1, typename InputIterator2, typename OutputIterator,
+              typename BinaryOp>
+    void
+    operator()(Policy&& exec, InputIterator1 first1, InputIterator1 last1, InputIterator2 first2, InputIterator2,
+               OutputIterator out_first, OutputIterator, BinaryOp op)
+    {
+        auto result = std::transform(exec, first1, last1, first2, out_first, op);
+        (void)result;
+        check_and_reset(first1, last1, first2, out_first);
+    }
+};
+
+template <typename In1, typename In2, typename Out, typename Predicate>
+void
+test(Predicate pred)
+{
+    for (size_t n = 0; n <= 100000; n = n <= 16 ? n + 1 : size_t(3.1415 * n))
+    {
+        Sequence<In1> in1(n, [](size_t k) { return k % 5 != 1 ? 3 * k - 7 : 0; });
+        Sequence<In2> in2(n, [](size_t k) { return k % 7 != 2 ? 5 * k - 5 : 0; });
+
+        Sequence<Out> out(n, [](size_t) { return -1; });
+
+        invoke_on_all_policies(test_one_policy(), in1.begin(), in1.end(), in2.begin(), in2.end(), out.begin(),
+                               out.end(), pred);
+        invoke_on_all_policies(test_one_policy(), in1.cbegin(), in1.cend(), in2.cbegin(), in2.cend(), out.begin(),
+                               out.end(), pred);
+    }
+}
+
+template <typename T>
+struct test_non_const
+{
+    template <typename Policy, typename InputIterator, typename OutputInterator>
+    void
+    operator()(Policy&& exec, InputIterator input_iter, OutputInterator out_iter)
+    {
+        invoke_if(exec, [&]() {
+            InputIterator input_iter2 = input_iter;
+            transform(exec, input_iter, input_iter, input_iter2, out_iter, non_const(std::plus<T>()));
+        });
+    }
+};
+
+int
+main()
+{
+    //const operator()
+    test<int32_t, int32_t, int32_t>(TheOperation<int32_t, int32_t, int32_t>(1));
+    test<float32_t, float32_t, float32_t>(TheOperation<float32_t, float32_t, float32_t>(1.5));
+    //non-const operator()
+    test<int32_t, float32_t, float32_t>(non_const(TheOperation<int32_t, float32_t, float32_t>(1.5)));
+    test<int64_t, float64_t, float32_t>(non_const(TheOperation<int64_t, float64_t, float32_t>(1.5)));
+    //lambda
+    test<int8_t, float64_t, int8_t>([](const int8_t& x, const float64_t& y) { return int8_t(int8_t(1.5) + x - y); });
+
+    test_algo_basic_double<int32_t>(run_for_rnd_fw<test_non_const<int32_t>>());
+
+    std::cout << done() << std::endl;
+    return 0;
+}
diff --git a/libcxx/test/std/pstl/algorithms/alg.modifying.operations/transform_unary.pass.cpp b/libcxx/test/std/pstl/algorithms/alg.modifying.operations/transform_unary.pass.cpp
new file mode 100644
index 00000000000000..69a410c4cd6eb6
--- /dev/null
+++ b/libcxx/test/std/pstl/algorithms/alg.modifying.operations/transform_unary.pass.cpp
@@ -0,0 +1,91 @@
+// -*- C++ -*-
+//===-- transform_unary.pass.cpp ------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// UNSUPPORTED: c++03, c++11, c++14
+
+#include "support/pstl_test_config.h"
+
+#include <execution>
+#include <algorithm>
+
+#include "support/utils.h"
+
+using namespace TestUtils;
+
+template <typename InputIterator, typename OutputIterator>
+void
+check_and_reset(InputIterator first, InputIterator last, OutputIterator out_first)
+{
+    typedef typename std::iterator_traits<OutputIterator>::value_type Out;
+    typename std::iterator_traits<OutputIterator>::difference_type k = 0;
+    for (; first != last; ++first, ++out_first, ++k)
+    {
+        // check
+        Out expected = 1 - *first;
+        Out actual = *out_first;
+        EXPECT_EQ(expected, actual, "wrong value in output sequence");
+        // reset
+        *out_first = k % 7 != 4 ? 7 * k - 5 : 0;
+    }
+}
+
+struct test_one_policy
+{
+    template <typename Policy, typename InputIterator, typename OutputIterator, typename UnaryOp>
+    void
+    operator()(Policy&& exec, InputIterator first, InputIterator last, OutputIterator out_first,
+               OutputIterator out_last, UnaryOp op)
+    {
+        auto orr = std::transform(exec, first, last, out_first, op);
+        EXPECT_TRUE(out_last == orr, "transform returned wrong iterator");
+        check_and_reset(first, last, out_first);
+    }
+};
+
+template <typename Tin, typename Tout>
+void
+test()
+{
+    for (size_t n = 0; n <= 100000; n = n <= 16 ? n + 1 : size_t(3.1415 * n))
+    {
+        Sequence<Tin> in(n, [](int32_t k) { return k % 5 != 1 ? 3 * k - 7 : 0; });
+
+        Sequence<Tout> out(n);
+
+        const auto flip = Complement<Tin, Tout>(1);
+        invoke_on_all_policies(test_one_policy(), in.begin(), in.end(), out.begin(), out.end(), flip);
+        invoke_on_all_policies(test_one_policy(), in.cbegin(), in.cend(), out.begin(), out.end(), flip);
+    }
+}
+
+template <typename T>
+struct test_non_const
+{
+    template <typename Policy, typename InputIterator, typename OutputInterator>
+    void
+    operator()(Policy&& exec, InputIterator input_iter, OutputInterator out_iter)
+    {
+        invoke_if(exec, [&]() { transform(exec, input_iter, input_iter, out_iter, non_const(std::negate<T>())); });
+    }
+};
+
+int
+main()
+{
+    test<int32_t, int32_t>();
+    test<int32_t, float32_t>();
+    test<uint16_t, float32_t>();
+    test<float32_t, float64_t>();
+    test<float64_t, float64_t>();
+
+    test_algo_basic_double<int32_t>(run_for_rnd_fw<test_non_const<int32_t>>());
+
+    std::cout << done() << std::endl;
+    return 0;
+}
diff --git a/libcxx/test/std/pstl/algorithms/alg.modifying.operations/unique.pass.cpp b/libcxx/test/std/pstl/algorithms/alg.modifying.operations/unique.pass.cpp
new file mode 100644
index 00000000000000..fbd4742b7e4572
--- /dev/null
+++ b/libcxx/test/std/pstl/algorithms/alg.modifying.operations/unique.pass.cpp
@@ -0,0 +1,163 @@
+// -*- C++ -*-
+//===-- unique.pass.cpp ---------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// UNSUPPORTED: c++03, c++11, c++14
+
+// Test for unique
+#include "support/pstl_test_config.h"
+
+#include <execution>
+#include <algorithm>
+
+#include "support/utils.h"
+
+using namespace TestUtils;
+
+struct run_unique
+{
+#if defined(_PSTL_ICC_17_VC141_TEST_SIMD_LAMBDA_DEBUG_32_BROKEN) ||                                                             \
+    defined(_PSTL_ICC_16_VC14_TEST_SIMD_LAMBDA_DEBUG_32_BROKEN) //dummy specialization by policy type, in case of broken configuration
+    template <typename ForwardIt, typename Generator>
+    void
+    operator()(pstl::execution::unsequenced_policy, ForwardIt first1, ForwardIt last1, ForwardIt first2,
+               ForwardIt last2, Generator generator)
+    {
+    }
+
+    template <typename ForwardIt, typename Generator>
+    void
+    operator()(pstl::execution::parallel_unsequenced_policy, ForwardIt first1, ForwardIt last1, ForwardIt first2,
+               ForwardIt last2, Generator generator)
+    {
+    }
+
+    template <typename ForwardIt, typename BinaryPred, typename Generator>
+    void
+    operator()(pstl::execution::unsequenced_policy, ForwardIt first1, ForwardIt last1, ForwardIt first2,
+               ForwardIt last2, BinaryPred pred, Generator generator)
+    {
+    }
+
+    template <typename ForwardIt, typename BinaryPred, typename Generator>
+    void
+    operator()(pstl::execution::parallel_unsequenced_policy, ForwardIt first1, ForwardIt last1, ForwardIt first2,
+               ForwardIt last2, BinaryPred pred, Generator generator)
+    {
+    }
+#endif
+
+    template <typename Policy, typename ForwardIt, typename Generator>
+    void
+    operator()(Policy&& exec, ForwardIt first1, ForwardIt last1, ForwardIt first2, ForwardIt last2, Generator generator)
+    {
+        using namespace std;
+
+        // Preparation
+        fill_data(first1, last1, generator);
+        fill_data(first2, last2, generator);
+
+        ForwardIt i = unique(first1, last1);
+        ForwardIt k = unique(exec, first2, last2);
+
+        auto n = std::distance(first1, i);
+        EXPECT_TRUE(std::distance(first2, k) == n, "wrong return value from unique without predicate");
+        EXPECT_EQ_N(first1, first2, n, "wrong effect from unique without predicate");
+    }
+
+    template <typename Policy, typename ForwardIt, typename BinaryPred, typename Generator>
+    void
+    operator()(Policy&& exec, ForwardIt first1, ForwardIt last1, ForwardIt first2, ForwardIt last2, BinaryPred pred,
+               Generator generator)
+    {
+        using namespace std;
+
+        // Preparation
+        fill_data(first1, last1, generator);
+        fill_data(first2, last2, generator);
+
+        ForwardIt i = unique(first1, last1, pred);
+        ForwardIt k = unique(exec, first2, last2, pred);
+
+        auto n = std::distance(first1, i);
+        EXPECT_TRUE(std::distance(first2, k) == n, "wrong return value from unique with predicate");
+        EXPECT_EQ_N(first1, first2, n, "wrong effect from unique with predicate");
+    }
+};
+
+template <typename T, typename Generator, typename Predicate>
+void
+test(Generator generator, Predicate pred)
+{
+    const std::size_t max_size = 1000000;
+    Sequence<T> in(max_size, [](size_t v) { return T(v); });
+    Sequence<T> exp(max_size, [](size_t v) { return T(v); });
+
+    for (size_t n = 0; n <= max_size; n = n <= 16 ? n + 1 : size_t(3.1415 * n))
+    {
+        invoke_on_all_policies(run_unique(), exp.begin(), exp.begin() + n, in.begin(), in.begin() + n, generator);
+        invoke_on_all_policies(run_unique(), exp.begin(), exp.begin() + n, in.begin(), in.begin() + n, pred, generator);
+    }
+}
+
+template <typename T>
+struct LocalWrapper
+{
+    T my_val;
+
+    explicit LocalWrapper(T k) : my_val(k) {}
+    LocalWrapper(LocalWrapper&& input) : my_val(std::move(input.my_val)) {}
+    LocalWrapper&
+    operator=(LocalWrapper&& input)
+    {
+        my_val = std::move(input.my_val);
+        return *this;
+    }
+    friend bool
+    operator==(const LocalWrapper<T>& x, const LocalWrapper<T>& y)
+    {
+        return x.my_val == y.my_val;
+    }
+};
+
+template <typename T>
+struct test_non_const
+{
+    template <typename Policy, typename Iterator>
+    void
+    operator()(Policy&& exec, Iterator iter)
+    {
+        invoke_if(exec, [&]() { unique(exec, iter, iter, non_const(std::equal_to<T>())); });
+    }
+};
+
+int
+main()
+{
+#if !defined(_PSTL_ICC_16_17_18_TEST_UNIQUE_MASK_RELEASE_BROKEN)
+    test<int32_t>([](size_t j) { return j / 3; },
+                  [](const int32_t& val1, const int32_t& val2) { return val1 * val1 == val2 * val2; });
+    test<float64_t>([](size_t) { return float64_t(1); },
+                    [](const float64_t& val1, const float64_t& val2) { return val1 != val2; });
+#endif
+    test<LocalWrapper<uint32_t>>([](size_t j) { return LocalWrapper<uint32_t>(j); },
+                                 [](const LocalWrapper<uint32_t>& val1, const LocalWrapper<uint32_t>& val2) {
+                                     return val1.my_val != val2.my_val;
+                                 });
+
+    test_algo_basic_single<int32_t>(run_for_rnd_fw<test_non_const<int32_t>>());
+
+    test<MemoryChecker>(
+        [](std::size_t idx){ return MemoryChecker{std::int32_t(idx / 3)}; },
+        [](const MemoryChecker& val1, const MemoryChecker& val2){ return val1.value() == val2.value(); });
+    EXPECT_FALSE(MemoryChecker::alive_objects() < 0, "wrong effect from unique: number of ctors calls < num of dtors calls");
+    EXPECT_FALSE(MemoryChecker::alive_objects() > 0, "wrong effect from unique: number of ctors calls > num of dtors calls");
+
+    std::cout << done() << std::endl;
+    return 0;
+}
diff --git a/libcxx/test/std/pstl/algorithms/alg.modifying.operations/unique_copy_equal.pass.cpp b/libcxx/test/std/pstl/algorithms/alg.modifying.operations/unique_copy_equal.pass.cpp
new file mode 100644
index 00000000000000..356d4d24aba674
--- /dev/null
+++ b/libcxx/test/std/pstl/algorithms/alg.modifying.operations/unique_copy_equal.pass.cpp
@@ -0,0 +1,135 @@
+// -*- C++ -*-
+//===-- unique_copy_equal.pass.cpp ----------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// UNSUPPORTED: c++03, c++11, c++14
+
+// Tests for unique_copy
+#include "support/pstl_test_config.h"
+
+#include <execution>
+#include <algorithm>
+
+#include "support/utils.h"
+
+using namespace TestUtils;
+
+struct run_unique_copy
+{
+#if defined(_PSTL_ICC_16_VC14_TEST_PAR_TBB_RT_RELEASE_64_BROKEN) // dummy specializations to skip testing in case of broken configuration
+    template <typename InputIterator, typename OutputIterator, typename OutputIterator2, typename Size,
+              typename Predicate, typename T>
+    void
+    operator()(pstl::execution::parallel_policy, InputIterator first, InputIterator last, OutputIterator out_first,
+               OutputIterator out_last, OutputIterator2 expected_first, OutputIterator2 expected_last, Size n,
+               Predicate pred, T trash)
+    {
+    }
+
+    template <typename InputIterator, typename OutputIterator, typename OutputIterator2, typename Size,
+              typename Predicate, typename T>
+    void
+    operator()(pstl::execution::parallel_unsequenced_policy, InputIterator first, InputIterator last,
+               OutputIterator out_first, OutputIterator out_last, OutputIterator2 expected_first,
+               OutputIterator2 expected_last, Size n, Predicate pred, T trash)
+    {
+    }
+#endif
+
+    template <typename Policy, typename InputIterator, typename OutputIterator, typename OutputIterator2, typename Size,
+              typename Predicate, typename T>
+    void
+    operator()(Policy&& exec, InputIterator first, InputIterator last, OutputIterator out_first,
+               OutputIterator out_last, OutputIterator2 expected_first, OutputIterator2, Size n, Predicate pred,
+               T trash)
+    {
+        // Cleaning
+        std::fill_n(expected_first, n, trash);
+        std::fill_n(out_first, n, trash);
+
+        // Run unique_copy
+        auto i = unique_copy(first, last, expected_first);
+        auto k = unique_copy(exec, first, last, out_first);
+        EXPECT_EQ_N(expected_first, out_first, n, "wrong unique_copy effect");
+        for (size_t j = 0; j < GuardSize; ++j)
+        {
+            ++k;
+        }
+        EXPECT_TRUE(out_last == k, "wrong return value from unique_copy");
+
+        // Cleaning
+        std::fill_n(expected_first, n, trash);
+        std::fill_n(out_first, n, trash);
+        // Run unique_copy with predicate
+        i = unique_copy(first, last, expected_first, pred);
+        k = unique_copy(exec, first, last, out_first, pred);
+        EXPECT_EQ_N(expected_first, out_first, n, "wrong unique_copy with predicate effect");
+        for (size_t j = 0; j < GuardSize; ++j)
+        {
+            ++k;
+        }
+        EXPECT_TRUE(out_last == k, "wrong return value from unique_copy with predicate");
+    }
+};
+
+template <typename T, typename BinaryPredicate, typename Convert>
+void
+test(T trash, BinaryPredicate pred, Convert convert, bool check_weakness = true)
+{
+    // Try sequences of various lengths.
+    for (size_t n = 0; n <= 100000; n = n <= 16 ? n + 1 : size_t(3.1415 * n))
+    {
+        // count is number of output elements, plus a handful
+        // more for sake of detecting buffer overruns.
+        Sequence<T> in(n, [&](size_t k) -> T { return convert(k ^ n); });
+        using namespace std;
+        size_t count = GuardSize;
+        for (size_t k = 0; k < in.size(); ++k)
+            count += k == 0 || !pred(in[k], in[k - 1]) ? 1 : 0;
+        Sequence<T> out(count, [=](size_t) { return trash; });
+        Sequence<T> expected(count, [=](size_t) { return trash; });
+        if (check_weakness)
+        {
+            auto expected_result = unique_copy(in.begin(), in.end(), expected.begin(), pred);
+            size_t m = expected_result - expected.begin();
+            EXPECT_TRUE(n / (n < 10000 ? 4 : 6) <= m && m <= (3 * n + 1) / 4, "weak test for unique_copy");
+        }
+        invoke_on_all_policies(run_unique_copy(), in.begin(), in.end(), out.begin(), out.end(), expected.begin(),
+                               expected.end(), count, pred, trash);
+    }
+}
+
+template <typename T>
+struct test_non_const
+{
+    template <typename Policy, typename InputIterator, typename OutputInterator>
+    void
+    operator()(Policy&& exec, InputIterator input_iter, OutputInterator out_iter)
+    {
+        unique_copy(exec, input_iter, input_iter, out_iter, non_const(std::equal_to<T>()));
+    }
+};
+
+int
+main()
+{
+    test<Number>(Number(42, OddTag()), std::equal_to<Number>(),
+                 [](int32_t j) { return Number(3 * j / 13 ^ (j & 8), OddTag()); });
+
+    test<float32_t>(float32_t(42), std::equal_to<float32_t>(),
+                    [](int32_t j) { return float32_t(5 * j / 23 ^ (j / 7)); });
+#if !defined(_PSTL_ICC_16_17_TEST_REDUCTION_RELEASE_BROKEN)
+    test<float32_t>(float32_t(42), [](float32_t, float32_t) { return false; }, [](int32_t j) { return float32_t(j); },
+                    false);
+#endif
+
+    test_algo_basic_double<int32_t>(run_for_rnd_fw<test_non_const<int32_t>>());
+
+    std::cout << done() << std::endl;
+    return 0;
+}
diff --git a/libcxx/test/std/pstl/algorithms/alg.nonmodifying/adjacent_find.pass.cpp b/libcxx/test/std/pstl/algorithms/alg.nonmodifying/adjacent_find.pass.cpp
new file mode 100644
index 00000000000000..36a623bac42a4b
--- /dev/null
+++ b/libcxx/test/std/pstl/algorithms/alg.nonmodifying/adjacent_find.pass.cpp
@@ -0,0 +1,114 @@
+// -*- C++ -*-
+//===-- adjacent_find.pass.cpp --------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// UNSUPPORTED: c++03, c++11, c++14
+
+#include "support/pstl_test_config.h"
+
+#include <execution>
+#include <algorithm>
+
+#include "support/utils.h"
+
+using namespace TestUtils;
+
+struct test_adjacent_find
+{
+    template <typename Policy, typename Iterator, typename Pred>
+    void
+    operator()(Policy&& exec, Iterator first, Iterator last, Pred pred)
+    {
+        using namespace std;
+
+        auto k = std::adjacent_find(first, last, pred);
+        auto i = adjacent_find(exec, first, last, pred);
+        EXPECT_TRUE(i == k, "wrong return value from adjacent_find with predicate");
+
+        i = adjacent_find(exec, first, last);
+        EXPECT_TRUE(i == k, "wrong return value from adjacent_find without predicate");
+    }
+};
+
+template <typename T>
+void
+test_adjacent_find_by_type()
+{
+
+    size_t counts[] = {2, 3, 500};
+    for (size_t c = 0; c < const_size(counts); ++c)
+    {
+
+        for (size_t e = 0; e < (counts[c] >= 64 ? 64 : (counts[c] == 2 ? 1 : 2)); ++e)
+        {
+            Sequence<T> in(counts[c], [](size_t v) -> T { return T(v); }); //fill 0...n
+            in[e] = in[e + 1] = -1;                                        //make an adjacent pair
+
+            auto i = std::adjacent_find(in.cbegin(), in.cend(), std::equal_to<T>());
+            EXPECT_TRUE(i == in.cbegin() + e, "std::adjacent_find returned wrong result");
+
+            invoke_on_all_policies(test_adjacent_find(), in.begin(), in.end(), std::equal_to<T>());
+            invoke_on_all_policies(test_adjacent_find(), in.cbegin(), in.cend(), std::equal_to<T>());
+        }
+    }
+
+    //special cases: size=0, size=1;
+    for (size_t expect = 0; expect < 1; ++expect)
+    {
+        Sequence<T> in(expect, [](size_t v) -> T { return T(v); }); //fill 0...n
+        auto i = std::adjacent_find(in.cbegin(), in.cend(), std::equal_to<T>());
+        EXPECT_TRUE(i == in.cbegin() + expect, "std::adjacent_find returned wrong result");
+
+        invoke_on_all_policies(test_adjacent_find(), in.begin(), in.end(), std::equal_to<T>());
+        invoke_on_all_policies(test_adjacent_find(), in.cbegin(), in.cend(), std::equal_to<T>());
+    }
+
+    //special cases:
+    Sequence<T> a1 = {5, 5, 5, 6, 7, 8, 9};
+    invoke_on_all_policies(test_adjacent_find(), a1.begin(), a1.end(), std::equal_to<T>());
+    invoke_on_all_policies(test_adjacent_find(), a1.begin() + 1, a1.end(), std::equal_to<T>());
+
+    invoke_on_all_policies(test_adjacent_find(), a1.cbegin(), a1.cend(), std::equal_to<T>());
+    invoke_on_all_policies(test_adjacent_find(), a1.cbegin() + 1, a1.cend(), std::equal_to<T>());
+
+    Sequence<T> a2 = {5, 6, 7, 8, 9, 9};
+    invoke_on_all_policies(test_adjacent_find(), a2.begin(), a2.end(), std::equal_to<T>());
+    invoke_on_all_policies(test_adjacent_find(), a2.begin(), a2.end() - 1, std::equal_to<T>());
+
+    invoke_on_all_policies(test_adjacent_find(), a2.cbegin(), a2.cend(), std::equal_to<T>());
+    invoke_on_all_policies(test_adjacent_find(), a2.cbegin(), a2.cend() - 1, std::equal_to<T>());
+
+    Sequence<T> a3 = {5, 6, 6, 6, 7, 9, 9, 9, 9};
+    invoke_on_all_policies(test_adjacent_find(), a3.begin(), a3.end(), std::equal_to<T>());
+
+    invoke_on_all_policies(test_adjacent_find(), a3.cbegin(), a3.cend(), std::equal_to<T>());
+}
+
+template <typename T>
+struct test_non_const
+{
+    template <typename Policy, typename Iterator>
+    void
+    operator()(Policy&& exec, Iterator iter)
+    {
+        adjacent_find(exec, iter, iter, non_const(std::equal_to<T>()));
+    }
+};
+
+int
+main()
+{
+
+    test_adjacent_find_by_type<int32_t>();
+    test_adjacent_find_by_type<float64_t>();
+
+    test_algo_basic_single<int32_t>(run_for_rnd_bi<test_non_const<int32_t>>());
+
+    std::cout << done() << std::endl;
+    return 0;
+}
diff --git a/libcxx/test/std/pstl/algorithms/alg.nonmodifying/all_of.pass.cpp b/libcxx/test/std/pstl/algorithms/alg.nonmodifying/all_of.pass.cpp
new file mode 100644
index 00000000000000..ec23dc47e2f940
--- /dev/null
+++ b/libcxx/test/std/pstl/algorithms/alg.nonmodifying/all_of.pass.cpp
@@ -0,0 +1,117 @@
+// -*- C++ -*-
+//===-- all_of.pass.cpp ---------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// UNSUPPORTED: c++03, c++11, c++14
+
+#include "support/pstl_test_config.h"
+
+#include <execution>
+#include <algorithm>
+
+#include "support/utils.h"
+
+/*
+  TODO: consider implementing the following tests for a better code coverage
+  - correctness
+  - bad input argument (if applicable)
+  - data corruption around/of input and output
+  - correctly work with nested parallelism
+  - check that algorithm does not require anything more than is described in its requirements section
+*/
+
+using namespace TestUtils;
+
+struct test_all_of
+{
+    template <typename ExecutionPolicy, typename Iterator, typename Predicate>
+    void
+    operator()(ExecutionPolicy&& exec, Iterator begin, Iterator end, Predicate pred, bool expected)
+    {
+
+        auto actualr = std::all_of(exec, begin, end, pred);
+        EXPECT_EQ(expected, actualr, "result for all_of");
+    }
+};
+
+template <typename T>
+struct Parity
+{
+    bool parity;
+
+  public:
+    Parity(bool parity_) : parity(parity_) {}
+    bool
+    operator()(T value) const
+    {
+        return (size_t(value) ^ parity) % 2 == 0;
+    }
+};
+
+template <typename T>
+void
+test(size_t bits)
+{
+    for (size_t n = 0; n <= 100000; n = n <= 16 ? n + 1 : size_t(3.1415 * n))
+    {
+
+        // Sequence of odd values
+        Sequence<T> in(n, [n, bits](size_t) { return T(2 * HashBits(n, bits - 1) ^ 1); });
+
+        // Even value, or false when T is bool.
+        T spike(2 * HashBits(n, bits - 1));
+        Sequence<T> inCopy(in);
+
+        invoke_on_all_policies(test_all_of(), in.begin(), in.end(), Parity<T>(1), true);
+        invoke_on_all_policies(test_all_of(), in.cbegin(), in.cend(), Parity<T>(1), true);
+        EXPECT_EQ(in, inCopy, "all_of modified input sequence");
+        if (n > 0)
+        {
+            // Sprinkle in a miss
+            in[2 * n / 3] = spike;
+            invoke_on_all_policies(test_all_of(), in.begin(), in.end(), Parity<T>(1), false);
+            invoke_on_all_policies(test_all_of(), in.cbegin(), in.cend(), Parity<T>(1), false);
+
+            // Sprinkle in a few more misses
+            in[n / 2] = spike;
+            in[n / 3] = spike;
+            invoke_on_all_policies(test_all_of(), in.begin(), in.end(), Parity<T>(1), false);
+            invoke_on_all_policies(test_all_of(), in.cbegin(), in.cend(), Parity<T>(1), false);
+        }
+    }
+}
+
+struct test_non_const
+{
+    template <typename Policy, typename Iterator>
+    void
+    operator()(Policy&& exec, Iterator iter)
+    {
+        auto is_even = [&](float64_t v) {
+            uint32_t i = (uint32_t)v;
+            return i % 2 == 0;
+        };
+        all_of(exec, iter, iter, non_const(is_even));
+    }
+};
+
+int
+main()
+{
+    test<int32_t>(8 * sizeof(int32_t));
+    test<uint16_t>(8 * sizeof(uint16_t));
+    test<float64_t>(53);
+#if !defined(_PSTL_ICC_16_17_TEST_REDUCTION_BOOL_TYPE_RELEASE_64_BROKEN)
+    test<bool>(1);
+#endif
+
+    test_algo_basic_single<int32_t>(run_for_rnd_fw<test_non_const>());
+
+    std::cout << done() << std::endl;
+    return 0;
+}
diff --git a/libcxx/test/std/pstl/algorithms/alg.nonmodifying/any_of.pass.cpp b/libcxx/test/std/pstl/algorithms/alg.nonmodifying/any_of.pass.cpp
new file mode 100644
index 00000000000000..97d1691fa34ee1
--- /dev/null
+++ b/libcxx/test/std/pstl/algorithms/alg.nonmodifying/any_of.pass.cpp
@@ -0,0 +1,103 @@
+// -*- C++ -*-
+//===-- any_of.pass.cpp ---------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// UNSUPPORTED: c++03, c++11, c++14
+
+#include "support/pstl_test_config.h"
+
+#include <execution>
+#include <algorithm>
+
+#include "support/utils.h"
+
+/*
+  TODO: consider implementing the following tests for a better code coverage
+  - correctness
+  - bad input argument (if applicable)
+  - data corruption around/of input and output
+  - correctly work with nested parallelism
+  - check that algorithm does not require anything more than is described in its requirements section
+*/
+
+using namespace TestUtils;
+
+struct test_any_of
+{
+    template <typename ExecutionPolicy, typename Iterator, typename Predicate>
+    void
+    operator()(ExecutionPolicy&& exec, Iterator begin, Iterator end, Predicate pred, bool expected)
+    {
+
+        auto actualr = std::any_of(exec, begin, end, pred);
+        EXPECT_EQ(expected, actualr, "result for any_of");
+    }
+};
+
+template <typename T>
+void
+test(size_t bits)
+{
+    for (size_t n = 0; n <= 100000; n = n <= 16 ? n + 1 : size_t(3.1415 * n))
+    {
+
+        // Sequence of odd values
+        Sequence<T> in(n, [n, bits](size_t) { return T(2 * HashBits(n, bits - 1) ^ 1); });
+
+        // Even value, or false when T is bool.
+        T spike(2 * HashBits(n, bits - 1));
+        Sequence<T> inCopy(in);
+
+        invoke_on_all_policies(test_any_of(), in.begin(), in.end(), is_equal_to<T>(spike), false);
+        invoke_on_all_policies(test_any_of(), in.cbegin(), in.cend(), is_equal_to<T>(spike), false);
+        EXPECT_EQ(in, inCopy, "any_of modified input sequence");
+        if (n > 0)
+        {
+            // Sprinkle in a hit
+            in[2 * n / 3] = spike;
+            invoke_on_all_policies(test_any_of(), in.begin(), in.end(), is_equal_to<T>(spike), true);
+            invoke_on_all_policies(test_any_of(), in.cbegin(), in.cend(), is_equal_to<T>(spike), true);
+
+            // Sprinkle in a few more hits
+            in[n / 2] = spike;
+            in[n / 3] = spike;
+            invoke_on_all_policies(test_any_of(), in.begin(), in.end(), is_equal_to<T>(spike), true);
+            invoke_on_all_policies(test_any_of(), in.cbegin(), in.cend(), is_equal_to<T>(spike), true);
+        }
+    }
+}
+
+struct test_non_const
+{
+    template <typename Policy, typename Iterator>
+    void
+    operator()(Policy&& exec, Iterator iter)
+    {
+        auto is_even = [&](float64_t v) {
+            uint32_t i = (uint32_t)v;
+            return i % 2 == 0;
+        };
+        any_of(exec, iter, iter, non_const(is_even));
+    }
+};
+
+int
+main()
+{
+    test<int32_t>(8 * sizeof(int32_t));
+    test<uint16_t>(8 * sizeof(uint16_t));
+    test<float64_t>(53);
+#if !defined(_PSTL_ICC_16_17_TEST_REDUCTION_BOOL_TYPE_RELEASE_64_BROKEN)
+    test<bool>(1);
+#endif
+
+    test_algo_basic_single<int32_t>(run_for_rnd_fw<test_non_const>());
+
+    std::cout << done() << std::endl;
+    return 0;
+}
diff --git a/libcxx/test/std/pstl/algorithms/alg.nonmodifying/count.pass.cpp b/libcxx/test/std/pstl/algorithms/alg.nonmodifying/count.pass.cpp
new file mode 100644
index 00000000000000..e8eca023d15aec
--- /dev/null
+++ b/libcxx/test/std/pstl/algorithms/alg.nonmodifying/count.pass.cpp
@@ -0,0 +1,108 @@
+// -*- C++ -*-
+//===-- count.pass.cpp ----------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// UNSUPPORTED: c++03, c++11, c++14
+
+// Tests for count and count_if
+#include "support/pstl_test_config.h"
+
+#include <execution>
+#include <algorithm>
+
+#include "support/utils.h"
+
+using namespace TestUtils;
+
+struct test_count
+{
+    template <typename Policy, typename Iterator, typename T>
+    void
+    operator()(Policy&& exec, Iterator first, Iterator last, T needle)
+    {
+        auto expected = std::count(first, last, needle);
+        auto result = std::count(exec, first, last, needle);
+        EXPECT_EQ(expected, result, "wrong count result");
+    }
+};
+
+struct test_count_if
+{
+    template <typename Policy, typename Iterator, typename Predicate>
+    void
+    operator()(Policy&& exec, Iterator first, Iterator last, Predicate pred)
+    {
+        auto expected = std::count_if(first, last, pred);
+        auto result = std::count_if(exec, first, last, pred);
+        EXPECT_EQ(expected, result, "wrong count_if result");
+    }
+};
+
+template <typename T>
+class IsEqual
+{
+    T value;
+
+  public:
+    IsEqual(T value_, OddTag) : value(value_) {}
+    bool
+    operator()(const T& x) const
+    {
+        return x == value;
+    }
+};
+
+template <typename In, typename T, typename Predicate, typename Convert>
+void
+test(T needle, Predicate pred, Convert convert)
+{
+    // Try sequences of various lengths.
+    for (size_t n = 0; n <= 100000; n = n <= 16 ? n + 1 : size_t(3.1415 * n))
+    {
+        Sequence<In> in(n, [=](size_t k) -> In {
+            // Sprinkle "42" and "50" early, so that short sequences have non-zero count.
+            return convert((n - k - 1) % 3 == 0 ? 42 : (n - k - 2) % 5 == 0 ? 50 : 3 * (int(k) % 1000 - 500));
+        });
+        invoke_on_all_policies(test_count(), in.begin(), in.end(), needle);
+        invoke_on_all_policies(test_count_if(), in.begin(), in.end(), pred);
+
+        invoke_on_all_policies(test_count(), in.cbegin(), in.cend(), needle);
+        invoke_on_all_policies(test_count_if(), in.cbegin(), in.cend(), pred);
+    }
+}
+
+struct test_non_const
+{
+    template <typename Policy, typename Iterator>
+    void
+    operator()(Policy&& exec, Iterator iter)
+    {
+        auto is_even = [&](float64_t v) {
+            uint32_t i = (uint32_t)v;
+            return i % 2 == 0;
+        };
+        count_if(exec, iter, iter, non_const(is_even));
+    }
+};
+
+int
+main()
+{
+    test<int32_t>(42, IsEqual<int32_t>(50, OddTag()), [](int32_t j) { return j; });
+#if !defined(_PSTL_ICC_16_17_TEST_REDUCTION_RELEASE_BROKEN)
+    test<int32_t>(42, [](const int32_t&) { return true; }, [](int32_t j) { return j; });
+#endif
+    test<float64_t>(42, IsEqual<float64_t>(50, OddTag()), [](int32_t j) { return float64_t(j); });
+    test<Number>(Number(42, OddTag()), IsEqual<Number>(Number(50, OddTag()), OddTag()),
+                 [](int32_t j) { return Number(j, OddTag()); });
+
+    test_algo_basic_single<int32_t>(run_for_rnd_fw<test_non_const>());
+
+    std::cout << done() << std::endl;
+    return 0;
+}
diff --git a/libcxx/test/std/pstl/algorithms/alg.nonmodifying/equal.pass.cpp b/libcxx/test/std/pstl/algorithms/alg.nonmodifying/equal.pass.cpp
new file mode 100644
index 00000000000000..a6983eae2ff18d
--- /dev/null
+++ b/libcxx/test/std/pstl/algorithms/alg.nonmodifying/equal.pass.cpp
@@ -0,0 +1,168 @@
+// -*- C++ -*-
+//===-- equal.pass.cpp ----------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// UNSUPPORTED: c++03, c++11, c++14
+
+#include "support/pstl_test_config.h"
+
+#include <execution>
+#include <algorithm>
+
+#include "support/utils.h"
+
+using namespace TestUtils;
+
+#define CPP14_ENABLED 0
+
+struct UserType
+{
+    size_t key;
+    float32_t f;
+    float64_t d;
+    int32_t i;
+
+    bool
+    operator()(UserType a, UserType b)
+    {
+        return a.key < b.key;
+    }
+    bool
+    operator<(UserType a)
+    {
+        return a.key < key;
+    }
+    bool
+    operator>=(UserType a)
+    {
+        return a.key <= key;
+    }
+    bool
+    operator<=(UserType a)
+    {
+        return a.key >= key;
+    }
+    bool
+    operator==(UserType a)
+    {
+        return a.key == key;
+    }
+    bool
+    operator==(UserType a) const
+    {
+        return a.key == key;
+    }
+    bool
+    operator!=(UserType a)
+    {
+        return a.key != key;
+    }
+    UserType operator!()
+    {
+        UserType tmp;
+        tmp.key = !key;
+        return tmp;
+    }
+    friend std::ostream&
+    operator<<(std::ostream& stream, const UserType a)
+    {
+        stream << a.key;
+        return stream;
+    }
+
+    UserType() : key(-1), f(0.0f), d(0.0), i(0) {}
+    UserType(size_t Number) : key(Number), f(0.0f), d(0.0), i(0) {}
+    UserType&
+    operator=(const UserType& other)
+    {
+        key = other.key;
+        return *this;
+    }
+    UserType(const UserType& other) : key(other.key), f(other.f), d(other.d), i(other.i) {}
+    UserType(UserType&& other) : key(other.key), f(other.f), d(other.d), i(other.i)
+    {
+        other.key = -1;
+        other.f = 0.0f;
+        other.d = 0.0;
+        other.i = 0;
+    }
+};
+
+struct test_one_policy
+{
+    template <typename ExecutionPolicy, typename Iterator1, typename Iterator2>
+    void
+    operator()(ExecutionPolicy&& exec, Iterator1 first1, Iterator1 last1, Iterator2 first2, bool is_true_equal)
+    {
+        using namespace std;
+
+        auto expected = equal(first1, last1, first2);
+        auto actual = equal(exec, first1, last1, first2);
+        EXPECT_EQ(expected, actual, "result for equal for random-access iterator, checking against std::equal()");
+
+        // testing bool
+        EXPECT_TRUE(is_true_equal == actual, "result for equal for random-access iterator, bool");
+
+//add C++14 equal symantics tests
+//add more cases for inCopy size less than in
+#if CPP14_ENABLED
+        auto actualr14 = std::equal(in.cbegin(), in.cend(), inCopy.cbegin(), inCopy.cend());
+        EXPECT_EQ(expected, actualr14, "result for equal for random-access iterator");
+#endif
+    }
+};
+
+template <typename T>
+void
+test(size_t bits)
+{
+    for (size_t n = 1; n <= 100000; n = n <= 16 ? n + 1 : size_t(3.1415 * n))
+    {
+
+        // Sequence of odd values
+        Sequence<T> in(n, [bits](size_t k) { return T(2 * HashBits(k, bits - 1) ^ 1); });
+        Sequence<T> inCopy(in);
+
+        invoke_on_all_policies(test_one_policy(), in.begin(), in.end(), inCopy.begin(), true);
+        invoke_on_all_policies(test_one_policy(), in.cbegin(), in.cend(), inCopy.cbegin(), true);
+
+        // testing bool !equal()
+        inCopy[0] = !inCopy[0];
+        invoke_on_all_policies(test_one_policy(), in.begin(), in.end(), inCopy.begin(), false);
+        invoke_on_all_policies(test_one_policy(), in.cbegin(), in.cend(), inCopy.cbegin(), false);
+    }
+}
+
+template <typename T>
+struct test_non_const
+{
+    template <typename Policy, typename FirstIterator, typename SecondInterator>
+    void
+    operator()(Policy&& exec, FirstIterator first_iter, SecondInterator second_iter)
+    {
+        equal(exec, first_iter, first_iter, second_iter, second_iter, non_const(std::equal_to<T>()));
+    }
+};
+
+int
+main()
+{
+
+    test<int32_t>(8 * sizeof(int32_t));
+    test<uint16_t>(8 * sizeof(uint16_t));
+    test<float64_t>(53);
+#if !defined(_PSTL_ICC_16_17_TEST_REDUCTION_BOOL_TYPE_RELEASE_64_BROKEN)
+    test<bool>(1);
+#endif
+    test<UserType>(256);
+
+    test_algo_basic_double<int32_t>(run_for_rnd_fw<test_non_const<int32_t>>());
+
+    std::cout << done() << std::endl;
+    return 0;
+}
diff --git a/libcxx/test/std/pstl/algorithms/alg.nonmodifying/find.pass.cpp b/libcxx/test/std/pstl/algorithms/alg.nonmodifying/find.pass.cpp
new file mode 100644
index 00000000000000..54b25c27ea2fb0
--- /dev/null
+++ b/libcxx/test/std/pstl/algorithms/alg.nonmodifying/find.pass.cpp
@@ -0,0 +1,96 @@
+// -*- C++ -*-
+//===-- find.pass.cpp -----------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// UNSUPPORTED: c++03, c++11, c++14
+
+// Tests for find
+#include "support/pstl_test_config.h"
+
+#include <execution>
+#include <algorithm>
+
+#include "support/utils.h"
+
+using namespace TestUtils;
+
+struct test_find
+{
+#if defined(_PSTL_ICC_17_VC141_TEST_SIMD_LAMBDA_DEBUG_32_BROKEN) ||                                                             \
+    defined(_PSTL_ICC_16_VC14_TEST_SIMD_LAMBDA_DEBUG_32_BROKEN) //dummy specialization by policy type, in case of broken configuration
+    template <typename Iterator, typename Value>
+    void
+    operator()(pstl::execution::unsequenced_policy, Iterator first, Iterator last, Value value)
+    {
+    }
+    template <typename Iterator, typename Value>
+    void
+    operator()(pstl::execution::parallel_unsequenced_policy, Iterator first, Iterator last, Value value)
+    {
+    }
+#endif
+
+    template <typename Policy, typename Iterator, typename Value>
+    void
+    operator()(Policy&& exec, Iterator first, Iterator last, Value value)
+    {
+        auto i = std::find(first, last, value);
+        auto j = find(exec, first, last, value);
+        EXPECT_TRUE(i == j, "wrong return value from find");
+    }
+};
+
+template <typename T, typename Value, typename Hit, typename Miss>
+void
+test(Value value, Hit hit, Miss miss)
+{
+    // Try sequences of various lengths.
+    for (size_t n = 0; n <= 100000; n = n <= 16 ? n + 1 : size_t(3.1415 * n))
+    {
+        Sequence<T> in(n, [&](size_t k) -> T { return miss(n ^ k); });
+        // Try different find positions, including not found.
+        // By going backwards, we can add extra matches that are *not* supposed to be found.
+        // The decreasing exponential gives us O(n) total work for the loop since each find takes O(m) time.
+        for (size_t m = n; m > 0; m *= 0.6)
+        {
+            if (m < n)
+                in[m] = hit(n ^ m);
+            invoke_on_all_policies(test_find(), in.begin(), in.end(), value);
+            invoke_on_all_policies(test_find(), in.cbegin(), in.cend(), value);
+        }
+    }
+}
+
+// Type defined for sake of checking that std::find works with asymmetric ==.
+class Weird
+{
+    Number value;
+
+  public:
+    friend bool
+    operator==(Number x, Weird y)
+    {
+        return x == y.value;
+    }
+    Weird(int32_t val, OddTag) : value(val, OddTag()) {}
+};
+
+int
+main()
+{
+    // Note that the "hit" and "miss" functions here avoid overflow issues.
+    test<Number>(Weird(42, OddTag()), [](int32_t) { return Number(42, OddTag()); }, // hit
+                 [](int32_t j) { return Number(j == 42 ? 0 : j, OddTag()); });      // miss
+
+    // Test with value that is equal to two different bit patterns (-0.0 and 0.0)
+    test<float32_t>(-0.0, [](int32_t j) { return j & 1 ? 0.0 : -0.0; }, // hit
+                    [](int32_t j) { return j == 0 ? ~j : j; });         // miss
+
+    std::cout << done() << std::endl;
+    return 0;
+}
diff --git a/libcxx/test/std/pstl/algorithms/alg.nonmodifying/find_end.pass.cpp b/libcxx/test/std/pstl/algorithms/alg.nonmodifying/find_end.pass.cpp
new file mode 100644
index 00000000000000..ed0185f7b0c5e0
--- /dev/null
+++ b/libcxx/test/std/pstl/algorithms/alg.nonmodifying/find_end.pass.cpp
@@ -0,0 +1,123 @@
+// -*- C++ -*-
+//===-- find_end.pass.cpp -------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// UNSUPPORTED: c++03, c++11, c++14
+
+#include "support/pstl_test_config.h"
+
+#include <execution>
+#include <algorithm>
+
+#include "support/utils.h"
+
+using namespace TestUtils;
+
+struct test_one_policy
+{
+#if defined(_PSTL_ICC_17_VC141_TEST_SIMD_LAMBDA_DEBUG_32_BROKEN) ||                                                             \
+    defined(_PSTL_ICC_16_VC14_TEST_SIMD_LAMBDA_DEBUG_32_BROKEN) //dummy specialization by policy type, in case of broken configuration
+    template <typename Iterator1, typename Iterator2, typename Predicate>
+    void
+    operator()(pstl::execution::unsequenced_policy, Iterator1 b, Iterator1 e, Iterator2 bsub, Iterator2 esub,
+               Predicate pred)
+    {
+    }
+    template <typename Iterator1, typename Iterator2, typename Predicate>
+    void
+    operator()(pstl::execution::parallel_unsequenced_policy, Iterator1 b, Iterator1 e, Iterator2 bsub, Iterator2 esub,
+               Predicate pred)
+    {
+    }
+#endif
+
+    template <typename ExecutionPolicy, typename Iterator1, typename Iterator2, typename Predicate>
+    void
+    operator()(ExecutionPolicy&& exec, Iterator1 b, Iterator1 e, Iterator2 bsub, Iterator2 esub, Predicate pred)
+    {
+        using namespace std;
+        // For find_end
+        {
+            auto expected = find_end(b, e, bsub, esub, pred);
+            auto actual = find_end(exec, b, e, bsub, esub);
+            EXPECT_TRUE(actual == expected, "wrong return result from find_end");
+
+            actual = find_end(exec, b, e, bsub, esub, pred);
+            EXPECT_TRUE(actual == expected, "wrong return result from find_end with a predicate");
+        }
+
+        // For search
+        {
+            auto expected = search(b, e, bsub, esub, pred);
+            auto actual = search(exec, b, e, bsub, esub);
+            EXPECT_TRUE(actual == expected, "wrong return result from search");
+
+            actual = search(exec, b, e, bsub, esub, pred);
+            EXPECT_TRUE(actual == expected, "wrong return result from search with a predicate");
+        }
+    }
+};
+
+template <typename T>
+void
+test(const std::size_t bits)
+{
+
+    const std::size_t max_n1 = 1000;
+    const std::size_t max_n2 = (max_n1 * 10) / 8;
+    Sequence<T> in(max_n1, [bits](std::size_t) { return T(2 * HashBits(max_n1, bits - 1) ^ 1); });
+    Sequence<T> sub(max_n2, [bits](std::size_t) { return T(2 * HashBits(max_n1, bits - 1)); });
+    for (std::size_t n1 = 0; n1 <= max_n1; n1 = n1 <= 16 ? n1 + 1 : size_t(3.1415 * n1))
+    {
+        std::size_t sub_n[] = {0, 1, 3, n1, (n1 * 10) / 8};
+        std::size_t res[] = {0, 1, n1 / 2, n1};
+        for (auto n2 : sub_n)
+        {
+            for (auto r : res)
+            {
+                std::size_t i = r, isub = 0;
+                for (; i < n1 && isub < n2; ++i, ++isub)
+                    in[i] = sub[isub];
+                invoke_on_all_policies(test_one_policy(), in.begin(), in.begin() + n1, sub.begin(), sub.begin() + n2,
+                                       std::equal_to<T>());
+                invoke_on_all_policies(test_one_policy(), in.cbegin(), in.cbegin() + n1, sub.cbegin(),
+                                       sub.cbegin() + n2, std::equal_to<T>());
+            }
+        }
+    }
+}
+
+template <typename T>
+struct test_non_const
+{
+    template <typename Policy, typename FirstIterator, typename SecondInterator>
+    void
+    operator()(Policy&& exec, FirstIterator first_iter, SecondInterator second_iter)
+    {
+        invoke_if(exec, [&]() {
+            find_end(exec, first_iter, first_iter, second_iter, second_iter, non_const(std::equal_to<T>()));
+            search(exec, first_iter, first_iter, second_iter, second_iter, non_const(std::equal_to<T>()));
+        });
+    }
+};
+
+int
+main()
+{
+    test<int32_t>(8 * sizeof(int32_t));
+    test<uint16_t>(8 * sizeof(uint16_t));
+    test<float64_t>(53);
+#if !defined(_PSTL_ICC_16_17_TEST_REDUCTION_BOOL_TYPE_RELEASE_64_BROKEN)
+    test<bool>(1);
+#endif
+
+    test_algo_basic_double<int32_t>(run_for_rnd_fw<test_non_const<int32_t>>());
+
+    std::cout << done() << std::endl;
+    return 0;
+}
diff --git a/libcxx/test/std/pstl/algorithms/alg.nonmodifying/find_first_of.pass.cpp b/libcxx/test/std/pstl/algorithms/alg.nonmodifying/find_first_of.pass.cpp
new file mode 100644
index 00000000000000..5b4801e9b3a87b
--- /dev/null
+++ b/libcxx/test/std/pstl/algorithms/alg.nonmodifying/find_first_of.pass.cpp
@@ -0,0 +1,112 @@
+// -*- C++ -*-
+//===-- find_first_of.pass.cpp --------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// UNSUPPORTED: c++03, c++11, c++14
+
+#include "support/pstl_test_config.h"
+
+#include <execution>
+#include <algorithm>
+
+#include "support/utils.h"
+
+using namespace TestUtils;
+
+struct test_one_policy
+{
+#if defined(_PSTL_ICC_17_VC141_TEST_SIMD_LAMBDA_DEBUG_32_BROKEN) ||                                                             \
+    defined(_PSTL_ICC_16_VC14_TEST_SIMD_LAMBDA_DEBUG_32_BROKEN) //dummy specialization by policy type, in case of broken configuration
+    template <typename Iterator1, typename Iterator2, typename Predicate>
+    void
+    operator()(pstl::execution::unsequenced_policy, Iterator1 b, Iterator1 e, Iterator2 bsub, Iterator2 esub,
+               Predicate pred)
+    {
+    }
+    template <typename Iterator1, typename Iterator2, typename Predicate>
+    void
+    operator()(pstl::execution::parallel_unsequenced_policy, Iterator1 b, Iterator1 e, Iterator2 bsub, Iterator2 esub,
+               Predicate pred)
+    {
+    }
+#endif
+
+    template <typename ExecutionPolicy, typename Iterator1, typename Iterator2, typename Predicate>
+    void
+    operator()(ExecutionPolicy&& exec, Iterator1 b, Iterator1 e, Iterator2 bsub, Iterator2 esub, Predicate pred)
+    {
+        using namespace std;
+        Iterator1 expected = find_first_of(b, e, bsub, esub, pred);
+        Iterator1 actual = find_first_of(exec, b, e, bsub, esub, pred);
+        EXPECT_TRUE(actual == expected, "wrong return result from find_first_of with a predicate");
+
+        expected = find_first_of(b, e, bsub, esub);
+        actual = find_first_of(exec, b, e, bsub, esub);
+        EXPECT_TRUE(actual == expected, "wrong return result from find_first_of");
+    }
+};
+
+template <typename T, typename Predicate>
+void
+test(Predicate pred)
+{
+
+    const std::size_t max_n1 = 1000;
+    const std::size_t max_n2 = (max_n1 * 10) / 8;
+    Sequence<T> in1(max_n1, [](std::size_t) { return T(1); });
+    Sequence<T> in2(max_n2, [](std::size_t) { return T(0); });
+    for (std::size_t n1 = 0; n1 <= max_n1; n1 = n1 <= 16 ? n1 + 1 : size_t(3.1415 * n1))
+    {
+        std::size_t sub_n[] = {0, 1, n1 / 3, n1, (n1 * 10) / 8};
+        for (const auto n2 : sub_n)
+        {
+            invoke_on_all_policies(test_one_policy(), in1.begin(), in1.begin() + n1, in2.data(), in2.data() + n2, pred);
+
+            in2[n2 / 2] = T(1);
+            invoke_on_all_policies(test_one_policy(), in1.cbegin(), in1.cbegin() + n1, in2.data(), in2.data() + n2,
+                                   pred);
+
+            if (n2 >= 3)
+            {
+                in2[2 * n2 / 3] = T(1);
+                invoke_on_all_policies(test_one_policy(), in1.cbegin(), in1.cbegin() + n1, in2.begin(),
+                                       in2.begin() + n2, pred);
+                in2[2 * n2 / 3] = T(0);
+            }
+            in2[n2 / 2] = T(0);
+        }
+    }
+    invoke_on_all_policies(test_one_policy(), in1.begin(), in1.begin() + max_n1 / 10, in1.data(),
+                           in1.data() + max_n1 / 10, pred);
+}
+
+template <typename T>
+struct test_non_const
+{
+    template <typename Policy, typename FirstIterator, typename SecondInterator>
+    void
+    operator()(Policy&& exec, FirstIterator first_iter, SecondInterator second_iter)
+    {
+        invoke_if(exec, [&]() {
+            find_first_of(exec, first_iter, first_iter, second_iter, second_iter, non_const(std::equal_to<T>()));
+        });
+    }
+};
+
+int
+main()
+{
+    test<int32_t>(std::equal_to<int32_t>());
+    test<uint16_t>(std::not_equal_to<uint16_t>());
+    test<float64_t>([](const float64_t x, const float64_t y) { return x * x == y * y; });
+
+    test_algo_basic_double<int32_t>(run_for_rnd_fw<test_non_const<int32_t>>());
+
+    std::cout << done() << std::endl;
+    return 0;
+}
diff --git a/libcxx/test/std/pstl/algorithms/alg.nonmodifying/find_if.pass.cpp b/libcxx/test/std/pstl/algorithms/alg.nonmodifying/find_if.pass.cpp
new file mode 100644
index 00000000000000..180d003996ddac
--- /dev/null
+++ b/libcxx/test/std/pstl/algorithms/alg.nonmodifying/find_if.pass.cpp
@@ -0,0 +1,109 @@
+// -*- C++ -*-
+//===-- find_if.pass.cpp --------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// UNSUPPORTED: c++03, c++11, c++14
+
+// Tests for find_if and find_if_not
+#include "support/pstl_test_config.h"
+
+#include <execution>
+#include <algorithm>
+
+#include "support/utils.h"
+
+using namespace TestUtils;
+
+struct test_find_if
+{
+#if defined(_PSTL_ICC_17_VC141_TEST_SIMD_LAMBDA_DEBUG_32_BROKEN) ||                                                             \
+    defined(_PSTL_ICC_16_VC14_TEST_SIMD_LAMBDA_DEBUG_32_BROKEN) //dummy specialization by policy type, in case of broken configuration
+    template <typename Iterator, typename Predicate, typename NotPredicate>
+    void
+    operator()(pstl::execution::unsequenced_policy, Iterator first, Iterator last, Predicate pred,
+               NotPredicate not_pred)
+    {
+    }
+    template <typename Iterator, typename Predicate, typename NotPredicate>
+    void
+    operator()(pstl::execution::parallel_unsequenced_policy, Iterator first, Iterator last, Predicate pred,
+               NotPredicate not_pred)
+    {
+    }
+#endif
+
+    template <typename Policy, typename Iterator, typename Predicate, typename NotPredicate>
+    void
+    operator()(Policy&& exec, Iterator first, Iterator last, Predicate pred, NotPredicate not_pred)
+    {
+        auto i = std::find_if(first, last, pred);
+        auto j = find_if(exec, first, last, pred);
+        EXPECT_TRUE(i == j, "wrong return value from find_if");
+        auto i_not = find_if_not(exec, first, last, not_pred);
+        EXPECT_TRUE(i_not == i, "wrong return value from find_if_not");
+    }
+};
+
+template <typename T, typename Predicate, typename Hit, typename Miss>
+void
+test(Predicate pred, Hit hit, Miss miss)
+{
+    auto not_pred = [pred](T x) { return !pred(x); };
+    // Try sequences of various lengths.
+    for (size_t n = 0; n <= 100000; n = n <= 16 ? n + 1 : size_t(3.1415 * n))
+    {
+        Sequence<T> in(n, [&](size_t k) -> T { return miss(n ^ k); });
+        // Try different find positions, including not found.
+        // By going backwards, we can add extra matches that are *not* supposed to be found.
+        // The decreasing exponential gives us O(n) total work for the loop since each find takes O(m) time.
+        for (size_t m = n; m > 0; m *= 0.6)
+        {
+            if (m < n)
+                in[m] = hit(n ^ m);
+            invoke_on_all_policies(test_find_if(), in.begin(), in.end(), pred, not_pred);
+            invoke_on_all_policies(test_find_if(), in.cbegin(), in.cend(), pred, not_pred);
+        }
+    }
+}
+
+struct test_non_const
+{
+    template <typename Policy, typename Iterator>
+    void
+    operator()(Policy&& exec, Iterator iter)
+    {
+        auto is_even = [&](float64_t v) {
+            uint32_t i = (uint32_t)v;
+            return i % 2 == 0;
+        };
+
+        invoke_if(exec, [&]() {
+            find_if(exec, iter, iter, non_const(is_even));
+            find_if_not(exec, iter, iter, non_const(is_even));
+        });
+    }
+};
+
+int
+main()
+{
+#if !defined(_PSTL_ICC_17_TEST_MAC_RELEASE_32_BROKEN)
+    // Note that the "hit" and "miss" functions here avoid overflow issues.
+    test<Number>(IsMultiple(5, OddTag()), [](int32_t j) { return Number(j - j % 5, OddTag()); }, // hit
+                 [](int32_t j) { return Number(j % 5 == 0 ? j ^ 1 : j, OddTag()); });            // miss
+#endif
+
+    // Try type for which algorithm can really be vectorized.
+    test<float32_t>([](float32_t x) { return x >= 0; }, [](float32_t j) { return j * j; },
+                    [](float32_t j) { return -1 - j * j; });
+
+    test_algo_basic_single<int32_t>(run_for_rnd_fw<test_non_const>());
+
+    std::cout << done() << std::endl;
+    return 0;
+}
diff --git a/libcxx/test/std/pstl/algorithms/alg.nonmodifying/for_each.pass.cpp b/libcxx/test/std/pstl/algorithms/alg.nonmodifying/for_each.pass.cpp
new file mode 100644
index 00000000000000..14a0ad4f32b124
--- /dev/null
+++ b/libcxx/test/std/pstl/algorithms/alg.nonmodifying/for_each.pass.cpp
@@ -0,0 +1,102 @@
+// -*- C++ -*-
+//===-- for_each.pass.cpp -------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// UNSUPPORTED: c++03, c++11, c++14
+
+#include "support/pstl_test_config.h"
+
+#include <execution>
+#include <algorithm>
+
+#include "support/utils.h"
+
+using namespace TestUtils;
+
+template <typename Type>
+struct Gen
+{
+    Type
+    operator()(std::size_t k)
+    {
+        return Type(k % 5 != 1 ? 3 * k - 7 : 0);
+    };
+};
+
+template <typename T>
+struct Flip
+{
+    int32_t val;
+    Flip(int32_t y) : val(y) {}
+    T
+    operator()(T& x) const
+    {
+        return x = val - x;
+    }
+};
+
+struct test_one_policy
+{
+    template <typename Policy, typename Iterator, typename Size>
+    void
+    operator()(Policy&& exec, Iterator first, Iterator last, Iterator expected_first, Iterator expected_last, Size n)
+    {
+        typedef typename std::iterator_traits<Iterator>::value_type T;
+
+        // Try for_each
+        std::for_each(expected_first, expected_last, Flip<T>(1));
+        for_each(exec, first, last, Flip<T>(1));
+        EXPECT_EQ_N(expected_first, first, n, "wrong effect from for_each");
+
+        // Try for_each_n
+        std::for_each_n(std::execution::seq, expected_first, n, Flip<T>(1));
+        for_each_n(exec, first, n, Flip<T>(1));
+        EXPECT_EQ_N(expected_first, first, n, "wrong effect from for_each_n");
+    }
+};
+
+template <typename T>
+void
+test()
+{
+    for (size_t n = 0; n <= 100000; n = n <= 16 ? n + 1 : size_t(3.1415 * n))
+    {
+        Sequence<T> inout(n, Gen<T>());
+        Sequence<T> expected(n, Gen<T>());
+        invoke_on_all_policies(test_one_policy(), inout.begin(), inout.end(), expected.begin(), expected.end(),
+                               inout.size());
+    }
+}
+
+struct test_non_const
+{
+    template <typename Policy, typename Iterator>
+    void
+    operator()(Policy&& exec, Iterator iter)
+    {
+        invoke_if(exec, [&]() {
+            auto f = [](typename std::iterator_traits<Iterator>::reference x) { x = x + 1; };
+
+            for_each(exec, iter, iter, non_const(f));
+            for_each_n(exec, iter, 0, non_const(f));
+        });
+    }
+};
+
+int
+main()
+{
+    test<int32_t>();
+    test<uint16_t>();
+    test<float64_t>();
+
+    test_algo_basic_single<int32_t>(run_for_rnd_fw<test_non_const>());
+
+    std::cout << done() << std::endl;
+    return 0;
+}
diff --git a/libcxx/test/std/pstl/algorithms/alg.nonmodifying/mismatch.pass.cpp b/libcxx/test/std/pstl/algorithms/alg.nonmodifying/mismatch.pass.cpp
new file mode 100644
index 00000000000000..4d83ad6de7824c
--- /dev/null
+++ b/libcxx/test/std/pstl/algorithms/alg.nonmodifying/mismatch.pass.cpp
@@ -0,0 +1,132 @@
+// -*- C++ -*-
+//===-- mismatch.pass.cpp -------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// UNSUPPORTED: c++03, c++11, c++14
+
+#include "support/pstl_test_config.h"
+
+#include <execution>
+#include <algorithm>
+
+#include "support/utils.h"
+
+using namespace TestUtils;
+
+struct test_mismatch
+{
+    template <typename Policy, typename Iterator1, typename Iterator2>
+    void
+    operator()(Policy&& exec, Iterator1 first1, Iterator1 last1, Iterator2 first2)
+    {
+        using namespace std;
+        typedef typename iterator_traits<Iterator1>::value_type T;
+        {
+            const auto expected = std::mismatch(first1, last1, first2, std::equal_to<T>());
+            const auto res3 = mismatch(exec, first1, last1, first2, std::equal_to<T>());
+            EXPECT_TRUE(expected == res3, "wrong return result from mismatch");
+            const auto res4 = mismatch(exec, first1, last1, first2);
+            EXPECT_TRUE(expected == res4, "wrong return result from mismatch");
+        }
+    }
+    template <typename Policy, typename Iterator1, typename Iterator2>
+    void
+    operator()(Policy&& exec, Iterator1 first1, Iterator1 last1, Iterator2 first2, Iterator2 last2)
+    {
+        using namespace std;
+        typedef typename iterator_traits<Iterator1>::value_type T;
+        {
+            const auto expected = mismatch(std::execution::seq, first1, last1, first2, last2, std::equal_to<T>());
+            const auto res1 = mismatch(exec, first1, last1, first2, last2, std::equal_to<T>());
+            EXPECT_TRUE(expected == res1, "wrong return result from mismatch");
+            const auto res2 = mismatch(exec, first1, last1, first2, last2);
+            EXPECT_TRUE(expected == res2, "wrong return result from mismatch");
+        }
+    }
+};
+
+template <typename T>
+void
+test_mismatch_by_type()
+{
+    using namespace std;
+    for (size_t size = 0; size <= 100000; size = size <= 16 ? size + 1 : size_t(3.1415 * size))
+    {
+        const T val = T(-1);
+        Sequence<T> in(size, [](size_t v) -> T { return T(v % 100); });
+        {
+            Sequence<T> in2(in);
+            invoke_on_all_policies(test_mismatch(), in.begin(), in.end(), in2.begin(), in2.end());
+            invoke_on_all_policies(test_mismatch(), in.begin(), in.end(), in2.begin());
+
+            const size_t min_size = 3;
+            if (size > min_size)
+            {
+                const size_t idx_for_1 = size / min_size;
+                in[idx_for_1] = val, in[idx_for_1 + 1] = val, in[idx_for_1 + 2] = val;
+                invoke_on_all_policies(test_mismatch(), in.begin(), in.end(), in2.begin(), in2.end());
+                invoke_on_all_policies(test_mismatch(), in.begin(), in.end(), in2.begin());
+            }
+
+            const size_t idx_for_2 = 500;
+            if (size >= idx_for_2 - 1)
+            {
+                in2[size / idx_for_2] = val;
+                invoke_on_all_policies(test_mismatch(), in.cbegin(), in.cend(), in2.cbegin(), in2.cend());
+                invoke_on_all_policies(test_mismatch(), in.cbegin(), in.cend(), in2.cbegin());
+            }
+        }
+        {
+            Sequence<T> in2(100, [](size_t v) -> T { return T(v); });
+            invoke_on_all_policies(test_mismatch(), in2.begin(), in2.end(), in.begin(), in.end());
+            //  We can't call std::mismatch with semantic below when size of second sequence less than size of first sequence
+            if (in2.size() <= in.size())
+                invoke_on_all_policies(test_mismatch(), in2.begin(), in2.end(), in.begin());
+
+            const size_t idx = 97;
+            in2[idx] = val;
+            in2[idx + 1] = val;
+            invoke_on_all_policies(test_mismatch(), in.cbegin(), in.cend(), in2.cbegin(), in2.cend());
+            if (in.size() <= in2.size())
+                invoke_on_all_policies(test_mismatch(), in.cbegin(), in.cend(), in2.cbegin());
+        }
+        {
+            Sequence<T> in2({});
+            invoke_on_all_policies(test_mismatch(), in2.begin(), in2.end(), in.begin(), in.end());
+
+            invoke_on_all_policies(test_mismatch(), in.cbegin(), in.cend(), in2.cbegin(), in2.cend());
+            if (in.size() == 0)
+                invoke_on_all_policies(test_mismatch(), in.cbegin(), in.cend(), in2.cbegin());
+        }
+    }
+}
+
+template <typename T>
+struct test_non_const
+{
+    template <typename Policy, typename FirstIterator, typename SecondInterator>
+    void
+    operator()(Policy&& exec, FirstIterator first_iter, SecondInterator second_iter)
+    {
+        mismatch(exec, first_iter, first_iter, second_iter, second_iter, non_const(std::less<T>()));
+    }
+};
+
+int
+main()
+{
+
+    test_mismatch_by_type<int32_t>();
+    test_mismatch_by_type<float64_t>();
+    test_mismatch_by_type<Wrapper<int32_t>>();
+
+    test_algo_basic_double<int32_t>(run_for_rnd_fw<test_non_const<int32_t>>());
+
+    std::cout << done() << std::endl;
+    return 0;
+}
diff --git a/libcxx/test/std/pstl/algorithms/alg.nonmodifying/none_of.pass.cpp b/libcxx/test/std/pstl/algorithms/alg.nonmodifying/none_of.pass.cpp
new file mode 100644
index 00000000000000..dbdcd54438c439
--- /dev/null
+++ b/libcxx/test/std/pstl/algorithms/alg.nonmodifying/none_of.pass.cpp
@@ -0,0 +1,101 @@
+// -*- C++ -*-
+//===-- none_of.pass.cpp --------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// UNSUPPORTED: c++03, c++11, c++14
+
+#include "support/pstl_test_config.h"
+
+#include <execution>
+#include <algorithm>
+
+#include "support/utils.h"
+
+/*
+  TODO: consider implementing the following tests for a better code coverage
+  - correctness
+  - bad input argument (if applicable)
+  - data corruption around/of input and output
+  - correctly work with nested parallelism
+  - check that algorithm does not require anything more than is described in its requirements section
+*/
+
+using namespace TestUtils;
+
+struct test_none_of
+{
+    template <typename ExecutionPolicy, typename Iterator, typename Predicate>
+    void
+    operator()(ExecutionPolicy&& exec, Iterator begin, Iterator end, Predicate pred, bool expected)
+    {
+
+        auto actualr = std::none_of(exec, begin, end, pred);
+        EXPECT_EQ(expected, actualr, "result for none_of");
+    }
+};
+
+template <typename T>
+void
+test(size_t bits)
+{
+    for (size_t n = 0; n <= 100000; n = n <= 16 ? n + 1 : size_t(3.1415 * n))
+    {
+
+        // Sequence of odd values
+        Sequence<T> in(n, [n, bits](size_t) { return T(2 * HashBits(n, bits - 1) ^ 1); });
+
+        // Even value, or false when T is bool.
+        T spike(2 * HashBits(n, bits - 1));
+
+        invoke_on_all_policies(test_none_of(), in.begin(), in.end(), is_equal_to<T>(spike), true);
+        invoke_on_all_policies(test_none_of(), in.cbegin(), in.cend(), is_equal_to<T>(spike), true);
+        if (n > 0)
+        {
+            // Sprinkle in a hit
+            in[2 * n / 3] = spike;
+            invoke_on_all_policies(test_none_of(), in.begin(), in.end(), is_equal_to<T>(spike), false);
+            invoke_on_all_policies(test_none_of(), in.cbegin(), in.cend(), is_equal_to<T>(spike), false);
+
+            // Sprinkle in a few more hits
+            in[n / 3] = spike;
+            in[n / 2] = spike;
+            invoke_on_all_policies(test_none_of(), in.begin(), in.end(), is_equal_to<T>(spike), false);
+            invoke_on_all_policies(test_none_of(), in.cbegin(), in.cend(), is_equal_to<T>(spike), false);
+        }
+    }
+}
+
+struct test_non_const
+{
+    template <typename Policy, typename Iterator>
+    void
+    operator()(Policy&& exec, Iterator iter)
+    {
+        auto is_even = [&](float64_t v) {
+            uint32_t i = (uint32_t)v;
+            return i % 2 == 0;
+        };
+        none_of(exec, iter, iter, non_const(is_even));
+    }
+};
+
+int
+main()
+{
+    test<int32_t>(8 * sizeof(int32_t));
+    test<uint16_t>(8 * sizeof(uint16_t));
+    test<float64_t>(53);
+#if !defined(_PSTL_ICC_16_17_TEST_REDUCTION_BOOL_TYPE_RELEASE_64_BROKEN)
+    test<bool>(1);
+#endif
+
+    test_algo_basic_single<int32_t>(run_for_rnd_fw<test_non_const>());
+
+    std::cout << done() << std::endl;
+    return 0;
+}
diff --git a/libcxx/test/std/pstl/algorithms/alg.nonmodifying/nth_element.pass.cpp b/libcxx/test/std/pstl/algorithms/alg.nonmodifying/nth_element.pass.cpp
new file mode 100644
index 00000000000000..f3e43da39a3a91
--- /dev/null
+++ b/libcxx/test/std/pstl/algorithms/alg.nonmodifying/nth_element.pass.cpp
@@ -0,0 +1,175 @@
+// -*- C++ -*-
+//===-- nth_element.pass.cpp ----------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// UNSUPPORTED: c++03, c++11, c++14
+
+#include "support/pstl_test_config.h"
+
+#include <iostream>
+#include <execution>
+#include <algorithm>
+
+#include "support/utils.h"
+
+using namespace TestUtils;
+
+// User defined type with minimal requirements
+template <typename T>
+struct DataType
+{
+    explicit DataType(int32_t k) : my_val(k) {}
+    DataType(DataType&& input)
+    {
+        my_val = std::move(input.my_val);
+        input.my_val = T(0);
+    }
+    DataType&
+    operator=(DataType&& input)
+    {
+        my_val = std::move(input.my_val);
+        input.my_val = T(0);
+        return *this;
+    }
+    T
+    get_val() const
+    {
+        return my_val;
+    }
+
+    friend std::ostream&
+    operator<<(std::ostream& stream, const DataType<T>& input)
+    {
+        return stream << input.my_val;
+    }
+
+  private:
+    T my_val;
+};
+
+template <typename T>
+bool
+is_equal(const DataType<T>& x, const DataType<T>& y)
+{
+    return x.get_val() == y.get_val();
+}
+
+template <typename T>
+bool
+is_equal(const T& x, const T& y)
+{
+    return x == y;
+}
+
+struct test_one_policy
+{
+#if defined(_PSTL_ICC_17_VC141_TEST_SIMD_LAMBDA_DEBUG_32_BROKEN) ||                                                             \
+    defined(_PSTL_ICC_16_VC14_TEST_SIMD_LAMBDA_DEBUG_32_BROKEN) // dummy specialization by policy type, in case of broken configuration
+    template <typename Iterator1, typename Size, typename Generator1, typename Generator2, typename Compare>
+    typename std::enable_if<is_same_iterator_category<Iterator1, std::random_access_iterator_tag>::value, void>::type
+    operator()(pstl::execution::unsequenced_policy, Iterator1 first1, Iterator1 last1, Iterator1 first2,
+               Iterator1 last2, Size n, Size m, Generator1 generator1, Generator2 generator2, Compare comp)
+    {
+    }
+    template <typename Iterator1, typename Size, typename Generator1, typename Generator2, typename Compare>
+    typename std::enable_if<is_same_iterator_category<Iterator1, std::random_access_iterator_tag>::value, void>::type
+    operator()(pstl::execution::parallel_unsequenced_policy, Iterator1 first1, Iterator1 last1, Iterator1 first2,
+               Iterator1 last2, Size n, Size m, Generator1 generator1, Generator2 generator2, Compare comp)
+    {
+    }
+#endif
+
+    // nth_element works only with random access iterators
+    template <typename Policy, typename Iterator1, typename Size, typename Generator1, typename Generator2,
+              typename Compare>
+    typename std::enable_if<is_same_iterator_category<Iterator1, std::random_access_iterator_tag>::value, void>::type
+    operator()(Policy&& exec, Iterator1 first1, Iterator1 last1, Iterator1 first2, Iterator1 last2, Size n, Size m,
+               Generator1 generator1, Generator2 generator2, Compare comp)
+    {
+
+        using T = typename std::iterator_traits<Iterator1>::value_type;
+        const Iterator1 mid1 = std::next(first1, m);
+        const Iterator1 mid2 = std::next(first2, m);
+
+        fill_data(first1, mid1, generator1);
+        fill_data(mid1, last1, generator2);
+        fill_data(first2, mid2, generator1);
+        fill_data(mid2, last2, generator2);
+        std::nth_element(first1, mid1, last1, comp);
+        std::nth_element(exec, first2, mid2, last2, comp);
+        if (m > 0 && m < n)
+        {
+            EXPECT_TRUE(is_equal(*mid1, *mid2), "wrong result from nth_element with predicate");
+        }
+        EXPECT_TRUE(std::find_first_of(first2, mid2, mid2, last2, [comp](T& x, T& y) { return comp(y, x); }) == mid2,
+                    "wrong effect from nth_element with predicate");
+    }
+
+    template <typename Policy, typename Iterator1, typename Size, typename Generator1, typename Generator2,
+              typename Compare>
+    typename std::enable_if<!is_same_iterator_category<Iterator1, std::random_access_iterator_tag>::value, void>::type
+    operator()(Policy&&, Iterator1, Iterator1, Iterator1, Iterator1, Size, Size, Generator1, Generator2, Compare)
+    {
+    }
+};
+
+template <typename T, typename Generator1, typename Generator2, typename Compare>
+void
+test_by_type(Generator1 generator1, Generator2 generator2, Compare comp)
+{
+    using namespace std;
+    size_t max_size = 10000;
+    Sequence<T> in1(max_size, [](size_t v) { return T(v); });
+    Sequence<T> exp(max_size, [](size_t v) { return T(v); });
+    size_t m;
+
+    for (size_t n = 0; n <= max_size; n = n <= 16 ? n + 1 : size_t(3.1415 * n))
+    {
+        m = 0;
+        invoke_on_all_policies(test_one_policy(), exp.begin(), exp.begin() + n, in1.begin(), in1.begin() + n, n, m,
+                               generator1, generator2, comp);
+        m = n / 7;
+        invoke_on_all_policies(test_one_policy(), exp.begin(), exp.begin() + n, in1.begin(), in1.begin() + n, n, m,
+                               generator1, generator2, comp);
+        m = 3 * n / 5;
+        invoke_on_all_policies(test_one_policy(), exp.begin(), exp.begin() + n, in1.begin(), in1.begin() + n, n, m,
+                               generator1, generator2, comp);
+    }
+    invoke_on_all_policies(test_one_policy(), exp.begin(), exp.begin() + max_size, in1.begin(), in1.begin() + max_size,
+                           max_size, max_size, generator1, generator2, comp);
+}
+
+template <typename T>
+struct test_non_const
+{
+    template <typename Policy, typename Iterator>
+    void
+    operator()(Policy&& exec, Iterator iter)
+    {
+        invoke_if(exec, [&]() { nth_element(exec, iter, iter, iter, non_const(std::less<T>())); });
+    }
+};
+
+int
+main()
+{
+    test_by_type<int32_t>([](int32_t i) { return 10 * i; }, [](int32_t i) { return i + 1; }, std::less<int32_t>());
+    test_by_type<int32_t>([](int32_t) { return 0; }, [](int32_t) { return 0; }, std::less<int32_t>());
+
+    test_by_type<float64_t>([](int32_t i) { return -2 * i; }, [](int32_t i) { return -(2 * i + 1); },
+                            [](const float64_t x, const float64_t y) { return x > y; });
+
+    test_by_type<DataType<float32_t>>(
+        [](int32_t i) { return DataType<float32_t>(2 * i + 1); }, [](int32_t i) { return DataType<float32_t>(2 * i); },
+        [](const DataType<float32_t>& x, const DataType<float32_t>& y) { return x.get_val() < y.get_val(); });
+
+    test_algo_basic_single<int32_t>(run_for_rnd<test_non_const<int32_t>>());
+
+    std::cout << done() << std::endl;
+    return 0;
+}
diff --git a/libcxx/test/std/pstl/algorithms/alg.nonmodifying/search_n.pass.cpp b/libcxx/test/std/pstl/algorithms/alg.nonmodifying/search_n.pass.cpp
new file mode 100644
index 00000000000000..573f364edef21b
--- /dev/null
+++ b/libcxx/test/std/pstl/algorithms/alg.nonmodifying/search_n.pass.cpp
@@ -0,0 +1,109 @@
+// -*- C++ -*-
+//===-- search_n.pass.cpp -------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// UNSUPPORTED: c++03, c++11, c++14
+
+#include "support/pstl_test_config.h"
+
+#include <execution>
+#include <algorithm>
+
+#include "support/utils.h"
+
+using namespace TestUtils;
+
+struct test_one_policy
+{
+#if defined(_PSTL_ICC_17_VC141_TEST_SIMD_LAMBDA_DEBUG_32_BROKEN) ||                                                             \
+    defined(_PSTL_ICC_16_VC14_TEST_SIMD_LAMBDA_DEBUG_32_BROKEN) //dummy specialization by policy type, in case of broken configuration
+    template <typename Iterator, typename Size, typename T, typename Predicate>
+    void
+    operator()(pstl::execution::unsequenced_policy, Iterator b, Iterator e, Size count, const T& value, Predicate pred)
+    {
+    }
+    template <typename Iterator, typename Size, typename T, typename Predicate>
+    void
+    operator()(pstl::execution::parallel_unsequenced_policy, Iterator b, Iterator e, Size count, const T& value,
+               Predicate pred)
+    {
+    }
+#endif
+
+    template <typename ExecutionPolicy, typename Iterator, typename Size, typename T, typename Predicate>
+    void
+    operator()(ExecutionPolicy&& exec, Iterator b, Iterator e, Size count, const T& value, Predicate pred)
+    {
+        using namespace std;
+        auto expected = search_n(b, e, count, value, pred);
+        auto actual = search_n(exec, b, e, count, value);
+        EXPECT_TRUE(actual == expected, "wrong return result from search_n");
+
+        actual = search_n(exec, b, e, count, value, pred);
+        EXPECT_TRUE(actual == expected, "wrong return result from search_n with a predicate");
+    }
+};
+
+template <typename T>
+void
+test()
+{
+
+    const std::size_t max_n1 = 100000;
+    const T value = T(1);
+    for (std::size_t n1 = 0; n1 <= max_n1; n1 = n1 <= 16 ? n1 + 1 : size_t(3.1415 * n1))
+    {
+        std::size_t sub_n[] = {0, 1, 3, n1, (n1 * 10) / 8};
+        std::size_t res[] = {0, 1, n1 / 2, n1};
+        for (auto n2 : sub_n)
+        {
+            // Some of standard libraries return "first" in this case. We return "last" according to the standard
+            if (n2 == 0)
+            {
+                continue;
+            }
+            for (auto r : res)
+            {
+                Sequence<T> in(n1, [](std::size_t) { return T(0); });
+                std::size_t i = r, isub = 0;
+                for (; i < n1 && isub < n2; ++i, ++isub)
+                    in[i] = value;
+
+                invoke_on_all_policies(test_one_policy(), in.begin(), in.begin() + n1, n2, value, std::equal_to<T>());
+                invoke_on_all_policies(test_one_policy(), in.cbegin(), in.cbegin() + n1, n2, value, std::equal_to<T>());
+            }
+        }
+    }
+}
+
+template <typename T>
+struct test_non_const
+{
+    template <typename Policy, typename Iterator>
+    void
+    operator()(Policy&& exec, Iterator iter)
+    {
+        invoke_if(exec, [&]() { search_n(exec, iter, iter, 0, T(0), non_const(std::equal_to<T>())); });
+    }
+};
+
+int
+main()
+{
+    test<int32_t>();
+    test<uint16_t>();
+    test<float64_t>();
+#if !defined(_PSTL_ICC_16_17_TEST_REDUCTION_BOOL_TYPE_RELEASE_64_BROKEN)
+    test<bool>();
+#endif
+
+    test_algo_basic_single<int32_t>(run_for_rnd_fw<test_non_const<int32_t>>());
+
+    std::cout << done() << std::endl;
+    return 0;
+}
diff --git a/libcxx/test/std/pstl/algorithms/alg.sorting/alg.heap.operations/is_heap.pass.cpp b/libcxx/test/std/pstl/algorithms/alg.sorting/alg.heap.operations/is_heap.pass.cpp
new file mode 100644
index 00000000000000..08eca8eed1c527
--- /dev/null
+++ b/libcxx/test/std/pstl/algorithms/alg.sorting/alg.heap.operations/is_heap.pass.cpp
@@ -0,0 +1,146 @@
+// -*- C++ -*-
+//===-- is_heap.pass.cpp --------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// UNSUPPORTED: c++03, c++11, c++14
+
+// Tests for is_heap, is_heap_until
+#include "support/pstl_test_config.h"
+
+#include <execution>
+#include <algorithm>
+
+#include "support/utils.h"
+#include <iostream>
+
+using namespace TestUtils;
+
+struct WithCmpOp
+{
+    int32_t _first;
+    int32_t _second;
+    WithCmpOp() : _first(0), _second(0){};
+    explicit WithCmpOp(int32_t x) : _first(x), _second(x){};
+    bool
+    operator<(const WithCmpOp& rhs) const
+    {
+        return this->_first < rhs._first;
+    }
+};
+
+struct test_is_heap
+{
+#if defined(_PSTL_ICC_17_VC141_TEST_SIMD_LAMBDA_DEBUG_32_BROKEN) ||                                                             \
+    defined(_PSTL_ICC_16_VC14_TEST_SIMD_LAMBDA_DEBUG_32_BROKEN) //dummy specialization by policy type, in case of broken configuration
+    template <typename Iterator, typename Predicate>
+    typename std::enable_if<is_same_iterator_category<Iterator, std::random_access_iterator_tag>::value, void>::type
+    operator()(pstl::execution::unsequenced_policy, Iterator first, Iterator last, Predicate pred)
+    {
+    }
+    template <typename Iterator, typename Predicate>
+    typename std::enable_if<is_same_iterator_category<Iterator, std::random_access_iterator_tag>::value, void>::type
+    operator()(pstl::execution::parallel_unsequenced_policy, Iterator first, Iterator last, Predicate pred)
+    {
+    }
+#endif
+
+    template <typename Policy, typename Iterator, typename Predicate>
+    typename std::enable_if<is_same_iterator_category<Iterator, std::random_access_iterator_tag>::value, void>::type
+    operator()(Policy&& exec, Iterator first, Iterator last, Predicate pred)
+    {
+        using namespace std;
+        // is_heap
+        {
+            bool expected = is_heap(first, last);
+            bool actual = is_heap(exec, first, last);
+            EXPECT_TRUE(expected == actual, "wrong return value from is_heap");
+        }
+        // is_heap with predicate
+        {
+            bool expected = is_heap(first, last, pred);
+            bool actual = is_heap(exec, first, last, pred);
+            EXPECT_TRUE(expected == actual, "wrong return value from is_heap with predicate");
+        }
+        // is_heap_until
+        {
+            Iterator expected = is_heap_until(first, last);
+            Iterator actual = is_heap_until(exec, first, last);
+            EXPECT_TRUE(expected == actual, "wrong return value from is_heap_until");
+        }
+        // is_heap_until with predicate
+        {
+            const Iterator expected = is_heap_until(first, last, pred);
+            const auto y = std::distance(first, expected);
+            const Iterator actual = is_heap_until(exec, first, last, pred);
+            const auto x = std::distance(first, actual);
+            EXPECT_TRUE(expected == actual, "wrong return value from is_heap_until with predicate");
+            EXPECT_EQ(x, y, "both iterators should be the same distance away from 'first'");
+        }
+    }
+
+    // is_heap, is_heap_until works only with random access iterators
+    template <typename Policy, typename Iterator, typename Predicate>
+    typename std::enable_if<!is_same_iterator_category<Iterator, std::random_access_iterator_tag>::value, void>::type
+    operator()(Policy&&, Iterator, Iterator, Predicate)
+    {
+    }
+};
+
+template <typename T, typename Comp>
+void
+test_is_heap_by_type(Comp comp)
+{
+    using namespace std;
+
+    const size_t max_size = 100000;
+    for (size_t n = 0; n <= max_size; n = n <= 16 ? n + 1 : size_t(3.1415 * n))
+    {
+        Sequence<T> in(n, [](size_t v) -> T { return T(v); });
+
+        invoke_on_all_policies(test_is_heap(), in.begin(), in.end(), comp);
+
+        std::make_heap(in.begin(), in.begin() + n / 4, comp);
+        invoke_on_all_policies(test_is_heap(), in.cbegin(), in.cend(), comp);
+
+        std::make_heap(in.begin(), in.begin() + n / 3, comp);
+        invoke_on_all_policies(test_is_heap(), in.begin(), in.end(), comp);
+
+        std::make_heap(in.begin(), in.end(), comp);
+        invoke_on_all_policies(test_is_heap(), in.cbegin(), in.cend(), comp);
+    }
+
+    Sequence<T> in(max_size / 10, [](size_t) -> T { return T(1); });
+    invoke_on_all_policies(test_is_heap(), in.begin(), in.end(), comp);
+}
+
+template <typename T>
+struct test_non_const
+{
+    template <typename Policy, typename Iterator>
+    void
+    operator()(Policy&& exec, Iterator iter)
+    {
+        invoke_if(exec, [&]() {
+            is_heap(exec, iter, iter, non_const(std::less<T>()));
+            is_heap_until(exec, iter, iter, non_const(std::less<T>()));
+        });
+    }
+};
+
+int
+main()
+{
+    test_is_heap_by_type<float32_t>(std::greater<float32_t>());
+    test_is_heap_by_type<WithCmpOp>(std::less<WithCmpOp>());
+    test_is_heap_by_type<uint64_t>([](uint64_t x, uint64_t y) { return x % 100 < y % 100; });
+
+    test_algo_basic_single<int32_t>(run_for_rnd<test_non_const<int32_t>>());
+
+    std::cout << done() << std::endl;
+    return 0;
+}
diff --git a/libcxx/test/std/pstl/algorithms/alg.sorting/alg.lex.comparison/lexicographical_compare.pass.cpp b/libcxx/test/std/pstl/algorithms/alg.sorting/alg.lex.comparison/lexicographical_compare.pass.cpp
new file mode 100644
index 00000000000000..cb920574ba353f
--- /dev/null
+++ b/libcxx/test/std/pstl/algorithms/alg.sorting/alg.lex.comparison/lexicographical_compare.pass.cpp
@@ -0,0 +1,175 @@
+// -*- C++ -*-
+//===-- lexicographical_compare.pass.cpp ----------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// UNSUPPORTED: c++03, c++11, c++14
+
+#include "support/pstl_test_config.h"
+
+#include <iostream>
+#include <execution>
+#include <algorithm>
+
+#include "support/utils.h"
+
+using namespace TestUtils;
+
+struct test_one_policy
+{
+
+    template <typename ExecutionPolicy, typename Iterator1, typename Iterator2, typename Predicate>
+    void
+    operator()(ExecutionPolicy&& exec, Iterator1 begin1, Iterator1 end1, Iterator2 begin2, Iterator2 end2,
+               Predicate pred)
+    {
+        const bool expected = std::lexicographical_compare(begin1, end1, begin2, end2, pred);
+        const bool actual = std::lexicographical_compare(exec, begin1, end1, begin2, end2, pred);
+        EXPECT_TRUE(actual == expected, "wrong return result from lexicographical compare with predicate");
+    }
+
+    template <typename ExecutionPolicy, typename Iterator1, typename Iterator2>
+    void
+    operator()(ExecutionPolicy&& exec, Iterator1 begin1, Iterator1 end1, Iterator2 begin2, Iterator2 end2)
+    {
+        const bool expected = std::lexicographical_compare(begin1, end1, begin2, end2);
+        const bool actual = std::lexicographical_compare(exec, begin1, end1, begin2, end2);
+        EXPECT_TRUE(actual == expected, "wrong return result from lexicographical compare without predicate");
+    }
+};
+
+template <typename T1, typename T2, typename Predicate>
+void
+test(Predicate pred)
+{
+
+    const std::size_t max_n = 1000000;
+    Sequence<T1> in1(max_n, [](std::size_t k) { return T1(k); });
+    Sequence<T2> in2(2 * max_n, [](std::size_t k) { return T2(k); });
+
+    std::size_t n2;
+
+    // Test case: Call algorithm's version without predicate.
+    invoke_on_all_policies(test_one_policy(), in1.cbegin(), in1.cbegin() + max_n, in2.cbegin() + 3 * max_n / 10,
+                           in2.cbegin() + 5 * max_n / 10);
+
+    // Test case: If one range is a prefix of another, the shorter range is lexicographically less than the other.
+    std::size_t max_n2 = max_n / 10;
+    invoke_on_all_policies(test_one_policy(), in1.begin(), in1.begin() + max_n, in2.cbegin(), in2.cbegin() + max_n2,
+                           pred);
+    invoke_on_all_policies(test_one_policy(), in1.begin(), in1.begin() + max_n, in2.begin() + max_n2,
+                           in2.begin() + 3 * max_n2, pred);
+
+    // Test case: If one range is a prefix of another, the shorter range is lexicographically less than the other.
+    max_n2 = 2 * max_n;
+    invoke_on_all_policies(test_one_policy(), in1.cbegin(), in1.cbegin() + max_n, in2.begin(), in2.begin() + max_n2,
+                           pred);
+
+    for (std::size_t n1 = 0; n1 <= max_n; n1 = n1 <= 16 ? n1 + 1 : std::size_t(3.1415 * n1))
+    {
+        // Test case: If two ranges have equivalent elements and are of the same length, then the ranges are lexicographically equal.
+        n2 = n1;
+        invoke_on_all_policies(test_one_policy(), in1.begin(), in1.begin() + n1, in2.begin(), in2.begin() + n2, pred);
+
+        n2 = n1;
+        // Test case: two ranges have different elements and are of the same length (second sequence less than first)
+        std::size_t ind = n1 / 2;
+        in2[ind] = T2(-1);
+        invoke_on_all_policies(test_one_policy(), in1.begin(), in1.begin() + n1, in2.begin(), in2.begin() + n2, pred);
+        in2[ind] = T2(ind);
+
+        // Test case: two ranges have different elements and are of the same length (first sequence less than second)
+        ind = n1 / 5;
+        in1[ind] = T1(-1);
+        invoke_on_all_policies(test_one_policy(), in1.begin(), in1.begin() + n1, in2.cbegin(), in2.cbegin() + n2, pred);
+        in1[ind] = T1(ind);
+    }
+}
+
+template <typename Predicate>
+void
+test_string(Predicate pred)
+{
+
+    const std::size_t max_n = 1000000;
+    std::string in1 = "";
+    std::string in2 = "";
+    for (std::size_t n1 = 0; n1 <= max_n; ++n1)
+    {
+        in1 += n1;
+    }
+
+    for (std::size_t n1 = 0; n1 <= 2 * max_n; ++n1)
+    {
+        in2 += n1;
+    }
+
+    std::size_t n2;
+
+    for (std::size_t n1 = 0; n1 < in1.size(); n1 = n1 <= 16 ? n1 + 1 : std::size_t(3.1415 * n1))
+    {
+        // Test case: If two ranges have equivalent elements and are of the same length, then the ranges are lexicographically equal.
+        n2 = n1;
+        invoke_on_all_policies(test_one_policy(), in1.begin(), in1.begin() + n1, in2.begin(), in2.begin() + n2, pred);
+
+        n2 = n1;
+        // Test case: two ranges have different elements and are of the same length (second sequence less than first)
+        in2[n1 / 2] = 'a';
+        invoke_on_all_policies(test_one_policy(), in1.begin(), in1.begin() + n1, in2.begin(), in2.begin() + n2, pred);
+
+        // Test case: two ranges have different elements and are of the same length (first sequence less than second)
+        in1[n1 / 5] = 'a';
+        invoke_on_all_policies(test_one_policy(), in1.begin(), in1.begin() + n1, in2.cbegin(), in2.cbegin() + n2, pred);
+    }
+    invoke_on_all_policies(test_one_policy(), in1.cbegin(), in1.cbegin() + max_n, in2.cbegin() + 3 * max_n / 10,
+                           in2.cbegin() + 5 * max_n / 10);
+}
+
+template <typename T>
+struct LocalWrapper
+{
+    explicit LocalWrapper(std::size_t k) : my_val(k) {}
+    bool
+    operator<(const LocalWrapper<T>& w) const
+    {
+        return my_val < w.my_val;
+    }
+
+  private:
+    T my_val;
+};
+
+template <typename T>
+struct test_non_const
+{
+    template <typename Policy, typename FirstIterator, typename SecondInterator>
+    void
+    operator()(Policy&& exec, FirstIterator first_iter, SecondInterator second_iter)
+    {
+        invoke_if(exec, [&]() {
+            lexicographical_compare(exec, first_iter, first_iter, second_iter, second_iter, non_const(std::less<T>()));
+        });
+    }
+};
+
+int
+main()
+{
+    test<uint16_t, float64_t>(std::less<float64_t>());
+    test<float32_t, int32_t>(std::greater<float32_t>());
+#if !defined(_PSTL_ICC_18_TEST_EARLY_EXIT_AVX_RELEASE_BROKEN)
+    test<float64_t, int32_t>([](const float64_t x, const int32_t y) { return x * x < y * y; });
+#endif
+    test<LocalWrapper<int32_t>, LocalWrapper<int32_t>>(
+        [](const LocalWrapper<int32_t>& x, const LocalWrapper<int32_t>& y) { return x < y; });
+    test_string([](const char x, const char y) { return x < y; });
+
+    test_algo_basic_double<int32_t>(run_for_rnd_fw<test_non_const<int32_t>>());
+
+    std::cout << done() << std::endl;
+    return 0;
+}
diff --git a/libcxx/test/std/pstl/algorithms/alg.sorting/alg.min.max/minmax_element.pass.cpp b/libcxx/test/std/pstl/algorithms/alg.sorting/alg.min.max/minmax_element.pass.cpp
new file mode 100644
index 00000000000000..715b25014992da
--- /dev/null
+++ b/libcxx/test/std/pstl/algorithms/alg.sorting/alg.min.max/minmax_element.pass.cpp
@@ -0,0 +1,192 @@
+// -*- C++ -*-
+//===-- minmax_element.pass.cpp -------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// UNSUPPORTED: c++03, c++11, c++14
+
+#include "support/pstl_test_config.h"
+
+#include <execution>
+#include <algorithm>
+#include <set>
+#include <cmath>
+
+#include "support/utils.h"
+
+using namespace TestUtils;
+
+struct check_minelement
+{
+    template <typename Policy, typename Iterator>
+    void
+    operator()(Policy&& exec, Iterator begin, Iterator end)
+    {
+        typedef typename std::iterator_traits<Iterator>::value_type T;
+        const Iterator expect = std::min_element(begin, end);
+        const Iterator result = std::min_element(exec, begin, end);
+        const Iterator result_pred = std::min_element(exec, begin, end, std::less<T>());
+        EXPECT_TRUE(expect == result, "wrong return result from min_element");
+        EXPECT_TRUE(expect == result_pred, "wrong return result from min_element");
+    }
+};
+
+struct check_maxelement
+{
+    template <typename Policy, typename Iterator>
+    void
+    operator()(Policy&& exec, Iterator begin, Iterator end)
+    {
+        typedef typename std::iterator_traits<Iterator>::value_type T;
+        const Iterator expect = std::max_element(begin, end);
+        const Iterator result = std::max_element(exec, begin, end);
+        const Iterator result_pred = std::max_element(exec, begin, end, std::less<T>());
+        EXPECT_TRUE(expect == result, "wrong return result from max_element");
+        EXPECT_TRUE(expect == result_pred, "wrong return result from max_element");
+    }
+};
+
+struct check_minmaxelement
+{
+    template <typename Policy, typename Iterator>
+    void
+    operator()(Policy&& exec, Iterator begin, Iterator end)
+    {
+        typedef typename std::iterator_traits<Iterator>::value_type T;
+        const std::pair<Iterator, Iterator> expect = std::minmax_element(begin, end);
+        const std::pair<Iterator, Iterator> got = std::minmax_element(exec, begin, end);
+        const std::pair<Iterator, Iterator> got_pred = std::minmax_element(exec, begin, end, std::less<T>());
+        EXPECT_TRUE(expect.first == got.first, "wrong return result from minmax_element (min part)");
+        EXPECT_TRUE(expect.second == got.second, "wrong return result from minmax_element (max part)");
+        EXPECT_TRUE(expect == got_pred, "wrong return result from minmax_element");
+    }
+};
+
+template <typename T>
+struct sequence_wrapper
+{
+    TestUtils::Sequence<T> seq;
+    const T min_value;
+    const T max_value;
+    static const std::size_t bits = 30; // We assume that T can handle signed 2^bits+1 value
+
+    // TestUtils::HashBits returns value between 0 and (1<<bits)-1,
+    // therefore we could threat 1<<bits as maximum and -(1<<bits) as a minimum
+    sequence_wrapper(std::size_t n) : seq(n), min_value(-(1 << bits)), max_value(1 << bits) {}
+
+    void
+    pattern_fill()
+    {
+        seq.fill([](std::size_t i) -> T { return T(TestUtils::HashBits(i, bits)); });
+    }
+
+    // sets first one at position `at` and bunch of them farther
+    void
+    set_desired_value(std::size_t at, T value)
+    {
+        if (seq.size() == 0)
+            return;
+        seq[at] = value;
+
+        //Producing serveral red herrings
+        for (std::size_t i = at + 1; i < seq.size(); i += 1 + TestUtils::HashBits(i, 5))
+            seq[i] = value;
+    }
+};
+
+template <typename T>
+void
+test_by_type(std::size_t n)
+{
+    sequence_wrapper<T> wseq(n);
+
+    // to avoid overtesing we use std::set to leave only unique indexes
+    std::set<std::size_t> targets{0};
+    if (n > 1)
+    {
+        targets.insert(1);
+        targets.insert(2.718282 * n / 3);
+        targets.insert(n / 2);
+        targets.insert(n / 7.389056);
+        targets.insert(n - 1); // last
+    }
+
+    for (std::set<std::size_t>::iterator it = targets.begin(); it != targets.end(); ++it)
+    {
+        wseq.pattern_fill();
+        wseq.set_desired_value(*it, wseq.min_value);
+        TestUtils::invoke_on_all_policies(check_minelement(), wseq.seq.cbegin(), wseq.seq.cend());
+        TestUtils::invoke_on_all_policies(check_minelement(), wseq.seq.begin(), wseq.seq.end());
+
+        wseq.set_desired_value(*it, wseq.max_value);
+        TestUtils::invoke_on_all_policies(check_maxelement(), wseq.seq.cbegin(), wseq.seq.cend());
+        TestUtils::invoke_on_all_policies(check_maxelement(), wseq.seq.begin(), wseq.seq.end());
+
+        if (targets.size() > 1)
+        {
+            for (std::set<std::size_t>::reverse_iterator rit = targets.rbegin(); rit != targets.rend(); ++rit)
+            {
+                if (*rit == *it) // we requires at least 2 unique indexes in targets
+                    break;
+                wseq.pattern_fill();
+                wseq.set_desired_value(*it, wseq.min_value);  // setting minimum element
+                wseq.set_desired_value(*rit, wseq.max_value); // setting maximum element
+                TestUtils::invoke_on_all_policies(check_minmaxelement(), wseq.seq.cbegin(), wseq.seq.cend());
+                TestUtils::invoke_on_all_policies(check_minmaxelement(), wseq.seq.begin(), wseq.seq.end());
+            }
+        }
+        else
+        { // we must check this corner case; it can not be tested in loop above
+            TestUtils::invoke_on_all_policies(check_minmaxelement(), wseq.seq.cbegin(), wseq.seq.cend());
+            TestUtils::invoke_on_all_policies(check_minmaxelement(), wseq.seq.begin(), wseq.seq.end());
+        }
+    }
+}
+
+// should provide minimal requirements only
+struct OnlyLessCompare
+{
+    int32_t val;
+    OnlyLessCompare() : val(0) {}
+    OnlyLessCompare(int32_t val_) : val(val_) {}
+    bool
+    operator<(const OnlyLessCompare& other) const
+    {
+        return val < other.val;
+    }
+};
+
+template <typename T>
+struct test_non_const
+{
+    template <typename Policy, typename Iterator>
+    void
+    operator()(Policy&& exec, Iterator iter)
+    {
+        max_element(exec, iter, iter, non_const(std::less<T>()));
+        min_element(exec, iter, iter, non_const(std::less<T>()));
+        minmax_element(exec, iter, iter, non_const(std::less<T>()));
+    }
+};
+
+int
+main()
+{
+    using TestUtils::float64_t;
+    const std::size_t N = 100000;
+
+    for (std::size_t n = 0; n < N; n = n < 16 ? n + 1 : size_t(3.14159 * n))
+    {
+        test_by_type<float64_t>(n);
+        test_by_type<OnlyLessCompare>(n);
+    }
+
+    test_algo_basic_single<int32_t>(run_for_rnd_fw<test_non_const<int32_t>>());
+
+    std::cout << TestUtils::done() << std::endl;
+    return 0;
+}
diff --git a/libcxx/test/std/pstl/algorithms/alg.sorting/alg.set.operations/includes.pass.cpp b/libcxx/test/std/pstl/algorithms/alg.sorting/alg.set.operations/includes.pass.cpp
new file mode 100644
index 00000000000000..778d265f9ca7f9
--- /dev/null
+++ b/libcxx/test/std/pstl/algorithms/alg.sorting/alg.set.operations/includes.pass.cpp
@@ -0,0 +1,106 @@
+// -*- C++ -*-
+//===-- includes.pass.cpp -------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// UNSUPPORTED: c++03, c++11, c++14
+
+#include "support/pstl_test_config.h"
+
+#include <algorithm>
+#include <cmath>
+#include <execution>
+#include <functional>
+
+#include "support/utils.h"
+
+using namespace TestUtils;
+
+template <typename T>
+struct Num
+{
+    T val;
+    explicit Num(const T& v) : val(v) {}
+
+    //for "includes" checks
+    template <typename T1>
+    bool
+    operator<(const Num<T1>& v1) const
+    {
+        return val < v1.val;
+    }
+
+    //The types Type1 and Type2 must be such that an object of type InputIt can be dereferenced and then implicitly converted to both of them
+    template <typename T1>
+    operator Num<T1>() const
+    {
+        return Num<T1>((T1)val);
+    }
+};
+
+struct test_one_policy
+{
+    template <typename Policy, typename InputIterator1, typename InputIterator2, typename Compare>
+    typename std::enable_if<!TestUtils::isReverse<InputIterator1>::value, void>::type
+    operator()(Policy&& exec, InputIterator1 first1, InputIterator1 last1, InputIterator2 first2, InputIterator2 last2,
+               Compare comp)
+    {
+
+        auto expect_res = std::includes(first1, last1, first2, last2, comp);
+        auto res = std::includes(exec, first1, last1, first2, last2, comp);
+
+        EXPECT_TRUE(expect_res == res, "wrong result for includes");
+    }
+
+    template <typename Policy, typename InputIterator1, typename InputIterator2, typename Compare>
+    typename std::enable_if<TestUtils::isReverse<InputIterator1>::value, void>::type
+    operator()(Policy&&, InputIterator1, InputIterator1, InputIterator2, InputIterator2, Compare)
+    {
+    }
+};
+
+template <typename T1, typename T2, typename Compare>
+void
+test_includes(Compare compare)
+{
+
+    const std::size_t n_max = 1000000;
+
+    // The rand()%(2*n+1) encourages generation of some duplicates.
+    std::srand(42);
+
+    for (std::size_t n = 0; n < n_max; n = n <= 16 ? n + 1 : size_t(3.1415 * n))
+    {
+        for (std::size_t m = 0; m < n_max; m = m <= 16 ? m + 1 : size_t(2.71828 * m))
+        {
+            //prepare the input ranges
+            Sequence<T1> in1(n, [](std::size_t k) { return rand() % (2 * k + 1); });
+            Sequence<T2> in2(m, [](std::size_t k) { return rand() % (k + 1); });
+
+            std::sort(in1.begin(), in1.end(), compare);
+            std::sort(in2.begin(), in2.end(), compare);
+
+            invoke_on_all_policies(test_one_policy(), in1.begin(), in1.end(), in2.cbegin(), in2.cend(), compare);
+
+            //test w/ non constant predicate
+            if (n < 5 && m < 5)
+                invoke_on_all_policies(test_one_policy(), in1.begin(), in1.end(), in2.cbegin(), in2.cend(),
+                                       non_const(compare));
+        }
+    }
+}
+
+int
+main()
+{
+
+    test_includes<float64_t, float64_t>(std::less<>());
+    test_includes<Num<int64_t>, Num<int32_t>>([](const Num<int64_t>& x, const Num<int32_t>& y) { return x < y; });
+    std::cout << done() << std::endl;
+
+    return 0;
+}
diff --git a/libcxx/test/std/pstl/algorithms/alg.sorting/alg.set.operations/set.pass.cpp b/libcxx/test/std/pstl/algorithms/alg.sorting/alg.set.operations/set.pass.cpp
new file mode 100644
index 00000000000000..40ae3d92c8a03e
--- /dev/null
+++ b/libcxx/test/std/pstl/algorithms/alg.sorting/alg.set.operations/set.pass.cpp
@@ -0,0 +1,280 @@
+// -*- C++ -*-
+//===-- set.pass.cpp ------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// UNSUPPORTED: c++03, c++11, c++14
+
+#include "support/pstl_test_config.h"
+
+#include <algorithm>
+#include <chrono>
+#include <cmath>
+#include <execution>
+#include <functional>
+
+#include "support/utils.h"
+
+using namespace TestUtils;
+
+template <typename T>
+struct Num
+{
+    T val;
+
+    Num() : val{} {}
+    Num(const T& v) : val(v) {}
+
+    //for "includes" checks
+    template <typename T1>
+    bool
+    operator<(const Num<T1>& v1) const
+    {
+        return val < v1.val;
+    }
+
+    //The types Type1 and Type2 must be such that an object of type InputIt can be dereferenced and then implicitly converted to both of them
+    template <typename T1>
+    operator Num<T1>() const
+    {
+        return Num<T1>((T1)val);
+    }
+
+    friend bool
+    operator==(const Num& v1, const Num& v2)
+    {
+        return v1.val == v2.val;
+    }
+};
+
+template <typename Type>
+struct test_set_union
+{
+    template <typename Policy, typename InputIterator1, typename InputIterator2, typename Compare>
+    typename std::enable_if<!TestUtils::isReverse<InputIterator1>::value, void>::type
+    operator()(Policy&& exec, InputIterator1 first1, InputIterator1 last1, InputIterator2 first2, InputIterator2 last2,
+               Compare comp)
+    {
+        using T1 = typename std::iterator_traits<InputIterator1>::value_type;
+
+        auto n1 = std::distance(first1, last1);
+        auto n2 = std::distance(first2, last2);
+        auto n = n1 + n2;
+        Sequence<T1> expect(n);
+        Sequence<T1> out(n);
+
+        auto expect_res = std::set_union(first1, last1, first2, last2, expect.begin(), comp);
+        auto res = std::set_union(exec, first1, last1, first2, last2, out.begin(), comp);
+
+        EXPECT_TRUE(expect_res - expect.begin() == res - out.begin(), "wrong result for set_union");
+        EXPECT_EQ_N(expect.begin(), out.begin(), std::distance(out.begin(), res), "wrong set_union effect");
+    }
+
+    template <typename Policy, typename InputIterator1, typename InputIterator2, typename Compare>
+    typename std::enable_if<TestUtils::isReverse<InputIterator1>::value, void>::type
+    operator()(Policy&&, InputIterator1, InputIterator1, InputIterator2, InputIterator2, Compare)
+    {
+    }
+};
+
+template <typename Type>
+struct test_set_intersection
+{
+    template <typename Policy, typename InputIterator1, typename InputIterator2, typename Compare>
+    typename std::enable_if<!TestUtils::isReverse<InputIterator1>::value, void>::type
+    operator()(Policy&& exec, InputIterator1 first1, InputIterator1 last1, InputIterator2 first2, InputIterator2 last2,
+               Compare comp)
+    {
+        using T1 = typename std::iterator_traits<InputIterator1>::value_type;
+
+        auto n1 = std::distance(first1, last1);
+        auto n2 = std::distance(first2, last2);
+        auto n = n1 + n2;
+        Sequence<T1> expect(n);
+        Sequence<T1> out(n);
+
+        auto expect_res = std::set_intersection(first1, last1, first2, last2, expect.begin(), comp);
+        auto res = std::set_intersection(exec, first1, last1, first2, last2, out.begin(), comp);
+
+        EXPECT_TRUE(expect_res - expect.begin() == res - out.begin(), "wrong result for set_intersection");
+        EXPECT_EQ_N(expect.begin(), out.begin(), std::distance(out.begin(), res), "wrong set_intersection effect");
+    }
+
+    template <typename Policy, typename InputIterator1, typename InputIterator2, typename Compare>
+    typename std::enable_if<TestUtils::isReverse<InputIterator1>::value, void>::type
+    operator()(Policy&&, InputIterator1, InputIterator1, InputIterator2, InputIterator2, Compare)
+    {
+    }
+};
+
+template <typename Type>
+struct test_set_difference
+{
+    template <typename Policy, typename InputIterator1, typename InputIterator2, typename Compare>
+    typename std::enable_if<!TestUtils::isReverse<InputIterator1>::value, void>::type
+    operator()(Policy&& exec, InputIterator1 first1, InputIterator1 last1, InputIterator2 first2, InputIterator2 last2,
+               Compare comp)
+    {
+        using T1 = typename std::iterator_traits<InputIterator1>::value_type;
+
+        auto n1 = std::distance(first1, last1);
+        auto n2 = std::distance(first2, last2);
+        auto n = n1 + n2;
+        Sequence<T1> expect(n);
+        Sequence<T1> out(n);
+
+        auto expect_res = std::set_difference(first1, last1, first2, last2, expect.begin(), comp);
+        auto res = std::set_difference(exec, first1, last1, first2, last2, out.begin(), comp);
+
+        EXPECT_TRUE(expect_res - expect.begin() == res - out.begin(), "wrong result for set_difference");
+        EXPECT_EQ_N(expect.begin(), out.begin(), std::distance(out.begin(), res), "wrong set_difference effect");
+    }
+
+    template <typename Policy, typename InputIterator1, typename InputIterator2, typename Compare>
+    typename std::enable_if<TestUtils::isReverse<InputIterator1>::value, void>::type
+    operator()(Policy&&, InputIterator1, InputIterator1, InputIterator2, InputIterator2, Compare)
+    {
+    }
+};
+
+template <typename Type>
+struct test_set_symmetric_difference
+{
+    template <typename Policy, typename InputIterator1, typename InputIterator2, typename Compare>
+    typename std::enable_if<!TestUtils::isReverse<InputIterator1>::value, void>::type
+    operator()(Policy&& exec, InputIterator1 first1, InputIterator1 last1, InputIterator2 first2, InputIterator2 last2,
+               Compare comp)
+    {
+        using T1 = typename std::iterator_traits<InputIterator1>::value_type;
+
+        auto n1 = std::distance(first1, last1);
+        auto n2 = std::distance(first2, last2);
+        auto n = n1 + n2;
+        Sequence<T1> expect(n);
+        Sequence<T1> out(n);
+
+        auto expect_res = std::set_symmetric_difference(first1, last1, first2, last2, expect.begin(), comp);
+        auto res = std::set_symmetric_difference(exec, first1, last1, first2, last2, out.begin(), comp);
+
+        EXPECT_TRUE(expect_res - expect.begin() == res - out.begin(), "wrong result for set_symmetric_difference");
+        EXPECT_EQ_N(expect.begin(), out.begin(), std::distance(out.begin(), res),
+                    "wrong set_symmetric_difference effect");
+    }
+
+    template <typename Policy, typename InputIterator1, typename InputIterator2, typename Compare>
+    typename std::enable_if<TestUtils::isReverse<InputIterator1>::value, void>::type
+    operator()(Policy&&, InputIterator1, InputIterator1, InputIterator2, InputIterator2, Compare)
+    {
+    }
+};
+
+template <typename T1, typename T2, typename Compare>
+void
+test_set(Compare compare)
+{
+
+    const std::size_t n_max = 100000;
+
+    // The rand()%(2*n+1) encourages generation of some duplicates.
+    std::srand(4200);
+
+    for (std::size_t n = 0; n < n_max; n = n <= 16 ? n + 1 : size_t(3.1415 * n))
+    {
+        for (std::size_t m = 0; m < n_max; m = m <= 16 ? m + 1 : size_t(2.71828 * m))
+        {
+            //prepare the input ranges
+            Sequence<T1> in1(n, [](std::size_t k) { return rand() % (2 * k + 1); });
+            Sequence<T2> in2(m, [m](std::size_t k) { return (m % 2) * rand() + rand() % (k + 1); });
+
+            std::sort(in1.begin(), in1.end(), compare);
+            std::sort(in2.begin(), in2.end(), compare);
+
+            invoke_on_all_policies(test_set_union<T1>(), in1.begin(), in1.end(), in2.cbegin(), in2.cend(),
+                                        compare);
+
+            invoke_on_all_policies(test_set_intersection<T1>(), in1.begin(), in1.end(), in2.cbegin(), in2.cend(),
+                                        compare);
+
+            invoke_on_all_policies(test_set_difference<T1>(), in1.begin(), in1.end(), in2.cbegin(), in2.cend(),
+                                        compare);
+
+            invoke_on_all_policies(test_set_symmetric_difference<T1>(), in1.begin(), in1.end(), in2.cbegin(),
+                                        in2.cend(), compare);
+        }
+    }
+}
+
+template <typename T>
+struct test_non_const_set_difference
+{
+    template <typename Policy, typename InputIterator, typename OutputInterator>
+    void
+    operator()(Policy&& exec, InputIterator input_iter, OutputInterator out_iter)
+    {
+        set_difference(exec, input_iter, input_iter, input_iter, input_iter, out_iter, non_const(std::less<T>()));
+    }
+};
+
+template <typename T>
+struct test_non_const_set_intersection
+{
+    template <typename Policy, typename InputIterator, typename OutputInterator>
+    void
+    operator()(Policy&& exec, InputIterator input_iter, OutputInterator out_iter)
+    {
+        set_intersection(exec, input_iter, input_iter, input_iter, input_iter, out_iter, non_const(std::less<T>()));
+    }
+};
+
+template <typename T>
+struct test_non_const_set_symmetric_difference
+{
+    template <typename Policy, typename InputIterator, typename OutputInterator>
+    void
+    operator()(Policy&& exec, InputIterator input_iter, OutputInterator out_iter)
+    {
+        set_symmetric_difference(exec, input_iter, input_iter, input_iter, input_iter, out_iter,
+                                 non_const(std::less<T>()));
+    }
+};
+
+template <typename T>
+struct test_non_const_set_union
+{
+    template <typename Policy, typename InputIterator, typename OutputInterator>
+    void
+    operator()(Policy&& exec, InputIterator input_iter, OutputInterator out_iter)
+    {
+        set_union(exec, input_iter, input_iter, input_iter, input_iter, out_iter, non_const(std::less<T>()));
+    }
+};
+
+int
+main()
+{
+
+    test_set<float64_t, float64_t>(std::less<>());
+    test_set<Num<int64_t>, Num<int32_t>>([](const Num<int64_t>& x, const Num<int32_t>& y) { return x < y; });
+
+    test_set<MemoryChecker, MemoryChecker>([](const MemoryChecker& val1, const MemoryChecker& val2) -> bool {
+        return val1.value() < val2.value();
+    });
+    EXPECT_FALSE(MemoryChecker::alive_objects() < 0, "wrong effect from set algorithms: number of ctors calls < num of dtors calls");
+    EXPECT_FALSE(MemoryChecker::alive_objects() > 0, "wrong effect from set algorithms: number of ctors calls > num of dtors calls");
+
+    test_algo_basic_double<int32_t>(run_for_rnd_fw<test_non_const_set_difference<int32_t>>());
+
+    test_algo_basic_double<int32_t>(run_for_rnd_fw<test_non_const_set_intersection<int32_t>>());
+
+    test_algo_basic_double<int32_t>(run_for_rnd_fw<test_non_const_set_symmetric_difference<int32_t>>());
+
+    test_algo_basic_double<int32_t>(run_for_rnd_fw<test_non_const_set_union<int32_t>>());
+
+    std::cout << done() << std::endl;
+
+    return 0;
+}
diff --git a/libcxx/test/std/pstl/algorithms/alg.sorting/is_sorted.pass.cpp b/libcxx/test/std/pstl/algorithms/alg.sorting/is_sorted.pass.cpp
new file mode 100644
index 00000000000000..d4c6fe291ed849
--- /dev/null
+++ b/libcxx/test/std/pstl/algorithms/alg.sorting/is_sorted.pass.cpp
@@ -0,0 +1,100 @@
+// -*- C++ -*-
+//===-- is_sorted.pass.cpp ------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// UNSUPPORTED: c++03, c++11, c++14
+
+#include "support/pstl_test_config.h"
+
+#include <execution>
+#include <algorithm>
+
+#include "support/utils.h"
+
+using namespace TestUtils;
+
+struct test_is_sorted
+{
+    template <typename Policy, typename Iterator>
+    void
+    operator()(Policy&& exec, Iterator first, Iterator last, bool exam)
+    {
+        using namespace std;
+        typedef typename std::iterator_traits<Iterator>::value_type T;
+
+        //try random-access iterator
+        bool res = is_sorted(exec, first, last);
+        EXPECT_TRUE(exam == res, "is_sorted wrong result for random-access iterator");
+        auto iexam = is_sorted_until(first, last);
+        auto ires = is_sorted_until(exec, first, last);
+        EXPECT_TRUE(iexam == ires, "is_sorted_until wrong result for random-access iterator");
+
+        //try random-access iterator with a predicate
+        res = is_sorted(exec, first, last, std::less<T>());
+        EXPECT_TRUE(exam == res, "is_sorted wrong result for random-access iterator");
+        iexam = is_sorted_until(first, last, std::less<T>());
+        ires = is_sorted_until(exec, first, last, std::less<T>());
+        EXPECT_TRUE(iexam == ires, "is_sorted_until wrong result for random-access iterator");
+    }
+};
+
+template <typename T>
+void
+test_is_sorted_by_type()
+{
+
+    Sequence<T> in(99999, [](size_t v) -> T { return T(v); }); //fill 0..n
+
+    invoke_on_all_policies(test_is_sorted(), in.begin(), in.end(), std::is_sorted(in.begin(), in.end()));
+    invoke_on_all_policies(test_is_sorted(), in.cbegin(), in.cend(), std::is_sorted(in.begin(), in.end()));
+
+    in[in.size() / 2] = -1;
+    invoke_on_all_policies(test_is_sorted(), in.begin(), in.end(), std::is_sorted(in.begin(), in.end()));
+    invoke_on_all_policies(test_is_sorted(), in.cbegin(), in.cend(), std::is_sorted(in.begin(), in.end()));
+
+    in[1] = -1;
+    invoke_on_all_policies(test_is_sorted(), in.begin(), in.end(), std::is_sorted(in.begin(), in.end()));
+    invoke_on_all_policies(test_is_sorted(), in.cbegin(), in.cend(), std::is_sorted(in.begin(), in.end()));
+
+    //an empty container
+    Sequence<T> in0(0);
+    invoke_on_all_policies(test_is_sorted(), in0.begin(), in0.end(), std::is_sorted(in0.begin(), in0.end()));
+    invoke_on_all_policies(test_is_sorted(), in0.cbegin(), in0.cend(), std::is_sorted(in0.begin(), in0.end()));
+
+    //non-descending order
+    Sequence<T> in1(9, [](size_t) -> T { return T(0); });
+    invoke_on_all_policies(test_is_sorted(), in1.begin(), in1.end(), std::is_sorted(in1.begin(), in1.end()));
+    invoke_on_all_policies(test_is_sorted(), in1.cbegin(), in1.cend(), std::is_sorted(in1.begin(), in1.end()));
+}
+
+template <typename T>
+struct test_non_const
+{
+    template <typename Policy, typename Iterator>
+    void
+    operator()(Policy&& exec, Iterator iter)
+    {
+        is_sorted(exec, iter, iter, std::less<T>());
+        is_sorted_until(exec, iter, iter, std::less<T>());
+    }
+};
+
+int
+main()
+{
+
+    test_is_sorted_by_type<int32_t>();
+    test_is_sorted_by_type<float64_t>();
+
+    test_is_sorted_by_type<Wrapper<int32_t>>();
+
+    test_algo_basic_single<int32_t>(run_for_rnd_fw<test_non_const<int32_t>>());
+
+    std::cout << done() << std::endl;
+    return 0;
+}
diff --git a/libcxx/test/std/pstl/algorithms/alg.sorting/partial_sort.pass.cpp b/libcxx/test/std/pstl/algorithms/alg.sorting/partial_sort.pass.cpp
new file mode 100644
index 00000000000000..bf6e68be8c0790
--- /dev/null
+++ b/libcxx/test/std/pstl/algorithms/alg.sorting/partial_sort.pass.cpp
@@ -0,0 +1,149 @@
+// -*- C++ -*-
+//===-- partial_sort.pass.cpp ---------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// UNSUPPORTED: c++03, c++11, c++14
+
+#include "support/pstl_test_config.h"
+
+#include <cmath>
+#include <execution>
+#include <algorithm>
+
+#include "support/utils.h"
+
+using namespace TestUtils;
+
+static std::atomic<int32_t> count_val;
+static std::atomic<int32_t> count_comp;
+
+template <typename T>
+struct Num
+{
+    T val;
+
+    Num() { ++count_val; }
+    Num(T v) : val(v) { ++count_val; }
+    Num(const Num<T>& v) : val(v.val) { ++count_val; }
+    Num(Num<T>&& v) : val(v.val) { ++count_val; }
+    ~Num() { --count_val; }
+    Num<T>&
+    operator=(const Num<T>& v)
+    {
+        val = v.val;
+        return *this;
+    }
+    operator T() const { return val; }
+    bool
+    operator<(const Num<T>& v) const
+    {
+        ++count_comp;
+        return val < v.val;
+    }
+};
+
+struct test_brick_partial_sort
+{
+    template <typename Policy, typename InputIterator, typename Compare>
+    typename std::enable_if<is_same_iterator_category<InputIterator, std::random_access_iterator_tag>::value,
+                            void>::type
+    operator()(Policy&& exec, InputIterator first, InputIterator last, InputIterator exp_first, InputIterator exp_last,
+               Compare compare)
+    {
+
+        typedef typename std::iterator_traits<InputIterator>::value_type T;
+
+        // The rand()%(2*n+1) encourages generation of some duplicates.
+        std::srand(42);
+        const std::size_t n = last - first;
+        for (std::size_t k = 0; k < n; ++k)
+        {
+            first[k] = T(rand() % (2 * n + 1));
+        }
+        std::copy(first, last, exp_first);
+
+        for (std::size_t p = 0; p < n; p = p <= 16 ? p + 1 : std::size_t(31.415 * p))
+        {
+            auto m1 = first + p;
+            auto m2 = exp_first + p;
+
+            std::partial_sort(exp_first, m2, exp_last, compare);
+            count_comp = 0;
+            std::partial_sort(exec, first, m1, last, compare);
+            EXPECT_EQ_N(exp_first, first, p, "wrong effect from partial_sort");
+
+            //checking upper bound number of comparisons; O(p*(last-first)log(middle-first)); where p - number of threads;
+            if (m1 - first > 1)
+            {
+#ifdef _DEBUG
+#    if defined(_PSTL_PAR_BACKEND_TBB)
+                auto p = tbb::this_task_arena::max_concurrency();
+#    else
+                auto p = 1;
+#    endif
+                auto complex = std::ceil(n * std::log(float32_t(m1 - first)));
+                if (count_comp > complex * p)
+                {
+                    std::cout << "complexity exceeded" << std::endl;
+                }
+#endif // _DEBUG
+            }
+        }
+    }
+
+    template <typename Policy, typename InputIterator, typename Compare>
+    typename std::enable_if<!is_same_iterator_category<InputIterator, std::random_access_iterator_tag>::value,
+                            void>::type
+    operator()(Policy&&, InputIterator, InputIterator, InputIterator, InputIterator, Compare)
+    {
+    }
+};
+
+template <typename T, typename Compare>
+void
+test_partial_sort(Compare compare)
+{
+
+    const std::size_t n_max = 100000;
+    Sequence<T> in(n_max);
+    Sequence<T> exp(n_max);
+    for (std::size_t n = 0; n < n_max; n = n <= 16 ? n + 1 : size_t(3.1415 * n))
+    {
+        invoke_on_all_policies(test_brick_partial_sort(), in.begin(), in.begin() + n, exp.begin(), exp.begin() + n,
+                               compare);
+    }
+}
+
+template <typename T>
+struct test_non_const
+{
+    template <typename Policy, typename Iterator>
+    void
+    operator()(Policy&& exec, Iterator iter)
+    {
+        partial_sort(exec, iter, iter, iter, non_const(std::less<T>()));
+    }
+};
+
+int
+main()
+{
+    count_val = 0;
+
+    test_partial_sort<Num<float32_t>>([](Num<float32_t> x, Num<float32_t> y) { return x < y; });
+
+    EXPECT_TRUE(count_val == 0, "cleanup error");
+
+    test_partial_sort<int32_t>(
+        [](int32_t x, int32_t y) { return x > y; }); // Reversed so accidental use of < will be detected.
+
+    test_algo_basic_single<int32_t>(run_for_rnd<test_non_const<int32_t>>());
+
+    std::cout << done() << std::endl;
+    return 0;
+}
diff --git a/libcxx/test/std/pstl/algorithms/alg.sorting/partial_sort_copy.pass.cpp b/libcxx/test/std/pstl/algorithms/alg.sorting/partial_sort_copy.pass.cpp
new file mode 100644
index 00000000000000..9090f89e43616c
--- /dev/null
+++ b/libcxx/test/std/pstl/algorithms/alg.sorting/partial_sort_copy.pass.cpp
@@ -0,0 +1,196 @@
+// -*- C++ -*-
+//===-- partial_sort_copy.pass.cpp ----------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// UNSUPPORTED: c++03, c++11, c++14
+
+// Tests for partial_sort_copy
+#include "support/pstl_test_config.h"
+
+#include <cmath>
+#include <execution>
+#include <algorithm>
+
+#include "support/utils.h"
+
+using namespace TestUtils;
+
+template <typename T>
+struct Num
+{
+    T val;
+
+    Num() : val(0) {}
+    Num(T v) : val(v) {}
+    Num(const Num<T>& v) : val(v.val) {}
+    Num(Num<T>&& v) : val(v.val) {}
+    Num<T>&
+    operator=(const Num<T>& v)
+    {
+        val = v.val;
+        return *this;
+    }
+    operator T() const { return val; }
+    bool
+    operator<(const Num<T>& v) const
+    {
+        return val < v.val;
+    }
+};
+
+template <typename RandomAccessIterator>
+struct test_one_policy
+{
+    RandomAccessIterator d_first;
+    RandomAccessIterator d_last;
+    RandomAccessIterator exp_first;
+    RandomAccessIterator exp_last;
+    // This ctor is needed because output shouldn't be transformed to any iterator type (only random access iterators are allowed)
+    test_one_policy(RandomAccessIterator b1, RandomAccessIterator e1, RandomAccessIterator b2, RandomAccessIterator e2)
+        : d_first(b1), d_last(e1), exp_first(b2), exp_last(e2)
+    {
+    }
+#if defined(_PSTL_ICC_17_VC141_TEST_SIMD_LAMBDA_DEBUG_32_BROKEN) ||                                                             \
+    defined(_PSTL_ICC_16_VC14_TEST_SIMD_LAMBDA_DEBUG_32_BROKEN) // dummy specialization by policy type, in case of broken configuration
+    template <typename InputIterator, typename Size, typename T, typename Compare>
+    void
+    operator()(pstl::execution::unsequenced_policy, InputIterator first, InputIterator last, Size n1, Size n2,
+               const T& trash, Compare compare)
+    {
+    }
+
+    template <typename InputIterator, typename Size, typename T, typename Compare>
+    void
+    operator()(pstl::execution::parallel_unsequenced_policy, InputIterator first, InputIterator last, Size n1, Size n2,
+               const T& trash, Compare compare)
+    {
+    }
+
+    template <typename InputIterator, typename Size, typename T>
+    void
+    operator()(pstl::execution::unsequenced_policy, InputIterator first, InputIterator last, Size n1, Size n2,
+               const T& trash)
+    {
+    }
+
+    template <typename InputIterator, typename Size, typename T>
+    void
+    operator()(pstl::execution::parallel_unsequenced_policy, InputIterator first, InputIterator last, Size n1, Size n2,
+               const T& trash)
+    {
+    }
+#endif
+
+    template <typename Policy, typename InputIterator, typename Size, typename T, typename Compare>
+    void
+    operator()(Policy&& exec, InputIterator first, InputIterator last, Size n1, Size n2, const T& trash,
+               Compare compare)
+    {
+        prepare_data(first, last, n1, trash);
+        RandomAccessIterator exp = std::partial_sort_copy(first, last, exp_first, exp_last, compare);
+        RandomAccessIterator res = std::partial_sort_copy(exec, first, last, d_first, d_last, compare);
+
+        EXPECT_TRUE((exp - exp_first) == (res - d_first), "wrong result from partial_sort_copy with predicate");
+        EXPECT_EQ_N(exp_first, d_first, n2, "wrong effect from partial_sort_copy with predicate");
+    }
+
+    template <typename Policy, typename InputIterator, typename Size, typename T>
+    void
+    operator()(Policy&& exec, InputIterator first, InputIterator last, Size n1, Size n2, const T& trash)
+    {
+        prepare_data(first, last, n1, trash);
+        RandomAccessIterator exp = std::partial_sort_copy(first, last, exp_first, exp_last);
+        RandomAccessIterator res = std::partial_sort_copy(exec, first, last, d_first, d_last);
+
+        EXPECT_TRUE((exp - exp_first) == (res - d_first), "wrong result from partial_sort_copy without predicate");
+        EXPECT_EQ_N(exp_first, d_first, n2, "wrong effect from partial_sort_copy without predicate");
+    }
+
+  private:
+    template <typename InputIterator, typename Size, typename T>
+    void
+    prepare_data(InputIterator first, InputIterator last, Size n1, const T& trash)
+    {
+        // The rand()%(2*n+1) encourages generation of some duplicates.
+        std::srand(42);
+        std::generate(first, last, [n1]() { return T(rand() % (2 * n1 + 1)); });
+
+        std::fill(exp_first, exp_last, trash);
+        std::fill(d_first, d_last, trash);
+    }
+};
+
+template <typename T, typename Compare>
+void
+test_partial_sort_copy(Compare compare)
+{
+
+    typedef typename Sequence<T>::iterator iterator_type;
+    const std::size_t n_max = 100000;
+    Sequence<T> in(n_max);
+    Sequence<T> out(2 * n_max);
+    Sequence<T> exp(2 * n_max);
+    std::size_t n1 = 0;
+    std::size_t n2;
+    T trash = T(-666);
+    for (; n1 < n_max; n1 = n1 <= 16 ? n1 + 1 : size_t(3.1415 * n1))
+    {
+        // If both sequences are equal
+        n2 = n1;
+        invoke_on_all_policies(
+            test_one_policy<iterator_type>(out.begin(), out.begin() + n2, exp.begin(), exp.begin() + n2), in.begin(),
+            in.begin() + n1, n1, n2, trash, compare);
+
+        // If first sequence is greater than second
+        n2 = n1 / 3;
+        invoke_on_all_policies(
+            test_one_policy<iterator_type>(out.begin(), out.begin() + n2, exp.begin(), exp.begin() + n2), in.begin(),
+            in.begin() + n1, n1, n2, trash, compare);
+
+        // If first sequence is less than second
+        n2 = 2 * n1;
+        invoke_on_all_policies(
+            test_one_policy<iterator_type>(out.begin(), out.begin() + n2, exp.begin(), exp.begin() + n2), in.begin(),
+            in.begin() + n1, n1, n2, trash, compare);
+    }
+    // Test partial_sort_copy without predicate
+    n1 = n_max;
+    n2 = 2 * n1;
+    invoke_on_all_policies(test_one_policy<iterator_type>(out.begin(), out.begin() + n2, exp.begin(), exp.begin() + n2),
+                           in.begin(), in.begin() + n1, n1, n2, trash);
+}
+
+template <typename T>
+struct test_non_const
+{
+    template <typename Policy, typename InputIterator, typename OutputInterator>
+    void
+    operator()(Policy&& exec, InputIterator input_iter, OutputInterator out_iter)
+    {
+        invoke_if(exec, [&]() {
+            partial_sort_copy(exec, input_iter, input_iter, out_iter, out_iter, non_const(std::less<T>()));
+        });
+    }
+};
+
+int
+main()
+{
+    test_partial_sort_copy<Num<float32_t>>([](Num<float32_t> x, Num<float32_t> y) { return x < y; });
+    test_partial_sort_copy<int32_t>([](int32_t x, int32_t y) { return x > y; });
+
+    test_algo_basic_double<int32_t>(run_for_rnd<test_non_const<int32_t>>());
+
+    test_partial_sort_copy<MemoryChecker>(
+        [](const MemoryChecker& val1, const MemoryChecker& val2){ return val1.value() < val2.value(); });
+    EXPECT_FALSE(MemoryChecker::alive_objects() < 0, "wrong effect from partial_sort_copy: number of ctors calls < num of dtors calls");
+    EXPECT_FALSE(MemoryChecker::alive_objects() > 0, "wrong effect from partial_sort_copy: number of ctors calls > num of dtors calls");
+
+    std::cout << done() << std::endl;
+    return 0;
+}
diff --git a/libcxx/test/std/pstl/algorithms/alg.sorting/sort.pass.cpp b/libcxx/test/std/pstl/algorithms/alg.sorting/sort.pass.cpp
new file mode 100644
index 00000000000000..5617a6019f801f
--- /dev/null
+++ b/libcxx/test/std/pstl/algorithms/alg.sorting/sort.pass.cpp
@@ -0,0 +1,247 @@
+// -*- C++ -*-
+//===-- sort.pass.cpp -----------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// UNSUPPORTED: c++03, c++11, c++14
+
+#include "support/pstl_test_config.h"
+
+#include <execution>
+#include <algorithm>
+
+#include "support/utils.h"
+
+using namespace TestUtils;
+#define _CRT_SECURE_NO_WARNINGS
+
+#include <atomic>
+
+static bool Stable;
+
+//! Number of extant keys
+static std::atomic<int32_t> KeyCount;
+
+//! One more than highest index in array to be sorted.
+static uint32_t LastIndex;
+
+//! Keeping Equal() static and a friend of ParanoidKey class (C++, paragraphs 3.5/7.1.1)
+class ParanoidKey;
+static bool
+Equal(const ParanoidKey& x, const ParanoidKey& y);
+
+//! A key to be sorted, with lots of checking.
+class ParanoidKey
+{
+    //! Value used by comparator
+    int32_t value;
+    //! Original position or special value (Empty or Dead)
+    int32_t index;
+    //! Special value used to mark object without a comparable value, e.g. after being moved from.
+    static const int32_t Empty = -1;
+    //! Special value used to mark destroyed objects.
+    static const int32_t Dead = -2;
+    // True if key object has comparable value
+    bool
+    isLive() const
+    {
+        return (uint32_t)(index) < LastIndex;
+    }
+    // True if key object has been constructed.
+    bool
+    isConstructed() const
+    {
+        return isLive() || index == Empty;
+    }
+
+  public:
+    ParanoidKey()
+    {
+        ++KeyCount;
+        index = Empty;
+        value = Empty;
+    }
+    ParanoidKey(const ParanoidKey& k) : value(k.value), index(k.index)
+    {
+        EXPECT_TRUE(k.isLive(), "source for copy-constructor is dead");
+        ++KeyCount;
+    }
+    ~ParanoidKey()
+    {
+        EXPECT_TRUE(isConstructed(), "double destruction");
+        index = Dead;
+        --KeyCount;
+    }
+    ParanoidKey&
+    operator=(const ParanoidKey& k)
+    {
+        EXPECT_TRUE(k.isLive(), "source for copy-assignment is dead");
+        EXPECT_TRUE(isConstructed(), "destination for copy-assignment is dead");
+        value = k.value;
+        index = k.index;
+        return *this;
+    }
+    ParanoidKey(int32_t index, int32_t value, OddTag) : value(value), index(index) {}
+    ParanoidKey(ParanoidKey&& k) : value(k.value), index(k.index)
+    {
+        EXPECT_TRUE(k.isConstructed(), "source for move-construction is dead");
+// std::stable_sort() fails in move semantics on paranoid test before VS2015
+#if !defined(_MSC_VER) || _MSC_VER >= 1900
+        k.index = Empty;
+#endif
+        ++KeyCount;
+    }
+    ParanoidKey&
+    operator=(ParanoidKey&& k)
+    {
+        EXPECT_TRUE(k.isConstructed(), "source for move-assignment is dead");
+        EXPECT_TRUE(isConstructed(), "destination for move-assignment is dead");
+        value = k.value;
+        index = k.index;
+// std::stable_sort() fails in move semantics on paranoid test before VS2015
+#if !defined(_MSC_VER) || _MSC_VER >= 1900
+        k.index = Empty;
+#endif
+        return *this;
+    }
+    friend class KeyCompare;
+    friend bool
+    Equal(const ParanoidKey& x, const ParanoidKey& y);
+};
+
+class KeyCompare
+{
+    enum statusType
+    {
+        //! Special value used to mark defined object.
+        Live = 0xabcd,
+        //! Special value used to mark destroyed objects.
+        Dead = -1
+    } status;
+
+  public:
+    KeyCompare(OddTag) : status(Live) {}
+    ~KeyCompare() { status = Dead; }
+    bool
+    operator()(const ParanoidKey& j, const ParanoidKey& k) const
+    {
+        EXPECT_TRUE(status == Live, "key comparison object not defined");
+        EXPECT_TRUE(j.isLive(), "first key to operator() is not live");
+        EXPECT_TRUE(k.isLive(), "second key to operator() is not live");
+        return j.value < k.value;
+    }
+};
+
+// Equal is equality comparison used for checking result of sort against expected result.
+static bool
+Equal(const ParanoidKey& x, const ParanoidKey& y)
+{
+    return (x.value == y.value && !Stable) || (x.index == y.index);
+}
+
+static bool
+Equal(float32_t x, float32_t y)
+{
+    return x == y;
+}
+
+static bool
+Equal(int32_t x, int32_t y)
+{
+    return x == y;
+}
+
+struct test_sort_with_compare
+{
+    template <typename Policy, typename InputIterator, typename OutputIterator, typename OutputIterator2, typename Size,
+              typename Compare>
+    typename std::enable_if<is_same_iterator_category<InputIterator, std::random_access_iterator_tag>::value,
+                            void>::type
+    operator()(Policy&& exec, OutputIterator tmp_first, OutputIterator tmp_last, OutputIterator2 expected_first,
+               OutputIterator2 expected_last, InputIterator first, InputIterator, Size n, Compare compare)
+    {
+        using namespace std;
+        copy_n(first, n, expected_first);
+        copy_n(first, n, tmp_first);
+        if (Stable)
+            std::stable_sort(expected_first + 1, expected_last - 1, compare);
+        else
+            std::sort(expected_first + 1, expected_last - 1, compare);
+        int32_t count0 = KeyCount;
+        if (Stable)
+            stable_sort(exec, tmp_first + 1, tmp_last - 1, compare);
+        else
+            sort(exec, tmp_first + 1, tmp_last - 1, compare);
+
+        for (size_t i = 0; i < n; ++i, ++expected_first, ++tmp_first)
+        {
+            // Check that expected[i] is equal to tmp[i]
+            EXPECT_TRUE(Equal(*expected_first, *tmp_first), "bad sort");
+        }
+        int32_t count1 = KeyCount;
+        EXPECT_EQ(count0, count1, "key cleanup error");
+    }
+    template <typename Policy, typename InputIterator, typename OutputIterator, typename OutputIterator2, typename Size,
+              typename Compare>
+    typename std::enable_if<!is_same_iterator_category<InputIterator, std::random_access_iterator_tag>::value,
+                            void>::type
+    operator()(Policy&&, OutputIterator, OutputIterator, OutputIterator2, OutputIterator2, InputIterator, InputIterator,
+               Size, Compare)
+    {
+    }
+};
+
+template <typename T, typename Compare, typename Convert>
+void
+test_sort(Compare compare, Convert convert)
+{
+    for (size_t n = 0; n < 100000; n = n <= 16 ? n + 1 : size_t(3.1415 * n))
+    {
+        LastIndex = n + 2;
+        // The rand()%(2*n+1) encourages generation of some duplicates.
+        // Sequence is padded with an extra element at front and back, to detect overwrite bugs.
+        Sequence<T> in(n + 2, [=](size_t k) { return convert(k, rand() % (2 * n + 1)); });
+        Sequence<T> expected(in);
+        Sequence<T> tmp(in);
+        invoke_on_all_policies(test_sort_with_compare(), tmp.begin(), tmp.end(), expected.begin(), expected.end(),
+                               in.begin(), in.end(), in.size(), compare);
+    }
+}
+
+template <typename T>
+struct test_non_const
+{
+    template <typename Policy, typename Iterator>
+    void
+    operator()(Policy&& exec, Iterator iter)
+    {
+        sort(exec, iter, iter, non_const(std::less<T>()));
+        stable_sort(exec, iter, iter, non_const(std::less<T>()));
+    }
+};
+
+int
+main()
+{
+    std::srand(42);
+    for (int32_t kind = 0; kind < 2; ++kind)
+    {
+        Stable = kind != 0;
+        test_sort<ParanoidKey>(KeyCompare(OddTag()),
+                               [](size_t k, size_t val) { return ParanoidKey(k, val, OddTag()); });
+        test_sort<float32_t>([](float32_t x, float32_t y) { return x < y; },
+                             [](size_t, size_t val) { return float32_t(val); });
+        test_sort<int32_t>(
+            [](int32_t x, int32_t y) { return x > y; }, // Reversed so accidental use of < will be detected.
+            [](size_t, size_t val) { return int32_t(val); });
+    }
+
+    test_algo_basic_single<int32_t>(run_for_rnd<test_non_const<int32_t>>());
+
+    std::cout << done() << std::endl;
+    return 0;
+}
diff --git a/libcxx/test/std/pstl/lit.local.cfg b/libcxx/test/std/pstl/lit.local.cfg
new file mode 100644
index 00000000000000..6b1e2c65790626
--- /dev/null
+++ b/libcxx/test/std/pstl/lit.local.cfg
@@ -0,0 +1,2 @@
+if 'parallel-algorithms' not in config.available_features:
+    config.unsupported = True
diff --git a/libcxx/test/std/pstl/numerics/numeric.ops/adjacent_difference.pass.cpp b/libcxx/test/std/pstl/numerics/numeric.ops/adjacent_difference.pass.cpp
new file mode 100644
index 00000000000000..8e56f61c0ccba2
--- /dev/null
+++ b/libcxx/test/std/pstl/numerics/numeric.ops/adjacent_difference.pass.cpp
@@ -0,0 +1,170 @@
+// -*- C++ -*-
+//===-- adjacent_difference.pass.cpp --------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// UNSUPPORTED: c++03, c++11, c++14
+
+#include "support/pstl_test_config.h"
+
+#include <iterator>
+#include <execution>
+#include <numeric>
+
+#include "support/utils.h"
+
+using namespace TestUtils;
+
+template <typename T>
+struct wrapper
+{
+    T t;
+    constexpr explicit wrapper(T t_) : t(t_) {}
+    template <typename T2>
+    constexpr wrapper(const wrapper<T2>& a)
+    {
+        t = a.t;
+    }
+    template <typename T2>
+    constexpr void
+    operator=(const wrapper<T2>& a)
+    {
+        t = a.t;
+    }
+    constexpr wrapper<T>
+    operator-(const wrapper<T>& a) const
+    {
+        return wrapper<T>(t - a.t);
+    }
+};
+
+template <typename T>
+bool
+compare(const T& a, const T& b)
+{
+    return a == b;
+}
+
+template <typename T>
+bool
+compare(const wrapper<T>& a, const wrapper<T>& b)
+{
+    return a.t == b.t;
+}
+
+template <typename Iterator1, typename Iterator2, typename T, typename Function>
+typename std::enable_if<!std::is_floating_point<T>::value, bool>::type
+compute_and_check(Iterator1 first, Iterator1 last, Iterator2 d_first, T, Function f)
+{
+    using T2 = typename std::iterator_traits<Iterator2>::value_type;
+
+    if (first == last)
+        return true;
+
+    {
+        T2 temp(*first);
+        if (!compare(temp, *d_first))
+            return false;
+    }
+    Iterator1 second = std::next(first);
+
+    ++d_first;
+    for (; second != last; ++first, ++second, ++d_first)
+    {
+        T2 temp(f(*second, *first));
+        if (!compare(temp, *d_first))
+            return false;
+    }
+
+    return true;
+}
+
+// we don't want to check equality here
+// because we can't be sure it will be strictly equal for floating point types
+template <typename Iterator1, typename Iterator2, typename T, typename Function>
+typename std::enable_if<std::is_floating_point<T>::value, bool>::type compute_and_check(Iterator1, Iterator1, Iterator2,
+                                                                                        T, Function)
+{
+    return true;
+}
+
+struct test_one_policy
+{
+#if defined(_PSTL_ICC_17_VC141_TEST_SIMD_LAMBDA_DEBUG_32_BROKEN) ||                                                             \
+    defined(_PSTL_ICC_16_VC14_TEST_SIMD_LAMBDA_DEBUG_32_BROKEN) // dummy specialization by policy type, in case of broken configuration
+    template <typename Iterator1, typename Iterator2, typename T, typename Function>
+    typename std::enable_if<is_same_iterator_category<Iterator1, std::random_access_iterator_tag>::value, void>::type
+    operator()(pstl::execution::unsequenced_policy, Iterator1 data_b, Iterator1 data_e, Iterator2 actual_b,
+               Iterator2 actual_e, T trash, Function f)
+    {
+    }
+    template <typename Iterator1, typename Iterator2, typename T, typename Function>
+    typename std::enable_if<is_same_iterator_category<Iterator1, std::random_access_iterator_tag>::value, void>::type
+    operator()(pstl::execution::parallel_unsequenced_policy, Iterator1 data_b, Iterator1 data_e, Iterator2 actual_b,
+               Iterator2 actual_e, T trash, Function f)
+    {
+    }
+#endif
+
+    template <typename ExecutionPolicy, typename Iterator1, typename Iterator2, typename T, typename Function>
+    void
+    operator()(ExecutionPolicy&& exec, Iterator1 data_b, Iterator1 data_e, Iterator2 actual_b, Iterator2 actual_e,
+               T trash, Function f)
+    {
+        using namespace std;
+        using T2 = typename std::iterator_traits<Iterator1>::value_type;
+
+        fill(actual_b, actual_e, trash);
+
+        Iterator2 actual_return = adjacent_difference(exec, data_b, data_e, actual_b);
+        EXPECT_TRUE(compute_and_check(data_b, data_e, actual_b, T2(0), std::minus<T2>()),
+                    "wrong effect of adjacent_difference");
+        EXPECT_TRUE(actual_return == actual_e, "wrong result of adjacent_difference");
+
+        fill(actual_b, actual_e, trash);
+
+        actual_return = adjacent_difference(exec, data_b, data_e, actual_b, f);
+        EXPECT_TRUE(compute_and_check(data_b, data_e, actual_b, T2(0), f),
+                    "wrong effect of adjacent_difference with functor");
+        EXPECT_TRUE(actual_return == actual_e, "wrong result of adjacent_difference with functor");
+    }
+};
+
+template <typename T1, typename T2, typename Pred>
+void
+test(Pred pred)
+{
+    const std::size_t max_len = 100000;
+
+    static constexpr T2 value = T2(77);
+    static constexpr T1 trash = T1(31);
+
+    Sequence<T1> actual(max_len, [](std::size_t i) { return T1(i); });
+
+    Sequence<T2> data(max_len, [](std::size_t i) { return i % 3 == 2 ? T2(i * i) : value; });
+
+    for (std::size_t len = 0; len < max_len; len = len <= 16 ? len + 1 : std::size_t(3.1415 * len))
+    {
+        invoke_on_all_policies(test_one_policy(), data.begin(), data.begin() + len, actual.begin(),
+                               actual.begin() + len, trash, pred);
+        invoke_on_all_policies(test_one_policy(), data.cbegin(), data.cbegin() + len, actual.begin(),
+                               actual.begin() + len, trash, pred);
+    }
+}
+
+int
+main()
+{
+    test<uint8_t, uint32_t>([](uint32_t a, uint32_t b) { return a - b; });
+    test<int32_t, int64_t>([](int64_t a, int64_t b) { return a / (b + 1); });
+    test<int64_t, float32_t>([](float32_t a, float32_t b) { return (a + b) / 2; });
+    test<wrapper<int32_t>, wrapper<int64_t>>(
+        [](const wrapper<int64_t>& a, const wrapper<int64_t>& b) { return a - b; });
+
+    std::cout << done() << std::endl;
+    return 0;
+}
diff --git a/libcxx/test/std/pstl/numerics/numeric.ops/reduce.pass.cpp b/libcxx/test/std/pstl/numerics/numeric.ops/reduce.pass.cpp
new file mode 100644
index 00000000000000..b2144b96f5ddbb
--- /dev/null
+++ b/libcxx/test/std/pstl/numerics/numeric.ops/reduce.pass.cpp
@@ -0,0 +1,114 @@
+// -*- C++ -*-
+//===-- reduce.pass.cpp ---------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// UNSUPPORTED: c++03, c++11, c++14
+
+#include "support/pstl_test_config.h"
+
+#include <execution>
+#include <numeric>
+
+#include "support/utils.h"
+
+using namespace TestUtils;
+
+struct test_long_forms_for_one_policy
+{
+    template <typename Policy, typename Iterator, typename T, typename BinaryOp>
+    void
+    operator()(Policy&& exec, Iterator first, Iterator last, T init, BinaryOp binary, T expected)
+    {
+        T result_r = std::reduce(exec, first, last, init, binary);
+        EXPECT_EQ(expected, result_r, "bad result from reduce(exec, first, last, init, binary_op)");
+    }
+};
+
+template <typename T, typename BinaryOp, typename F>
+void
+test_long_form(T init, BinaryOp binary_op, F f)
+{
+    // Try sequences of various lengths
+    for (size_t n = 0; n <= 100000; n = n <= 16 ? n + 1 : size_t(3.1415 * n))
+    {
+        T expected(init);
+        Sequence<T> in(n, [n, f](size_t k) { return f((int32_t(k ^ n) % 1000 - 500)); });
+        for (size_t k = 0; k < n; ++k)
+            expected = binary_op(expected, in[k]);
+
+        using namespace std;
+
+        T result = transform_reduce_serial(in.cfbegin(), in.cfend(), init, binary_op, [](const T& t) { return t; });
+        EXPECT_EQ(expected, result, "bad result from reduce(first, last, init, binary_op_op)");
+
+        invoke_on_all_policies(test_long_forms_for_one_policy(), in.begin(), in.end(), init, binary_op, expected);
+        invoke_on_all_policies(test_long_forms_for_one_policy(), in.cbegin(), in.cend(), init, binary_op, expected);
+    }
+}
+
+struct test_two_short_forms
+{
+
+#if defined(_PSTL_ICC_16_VC14_TEST_PAR_TBB_RT_RELEASE_64_BROKEN) //dummy specialization by policy type, in case of broken configuration
+    template <typename Iterator>
+    void
+    operator()(pstl::execution::parallel_policy, Iterator first, Iterator last, Sum init, Sum expected)
+    {
+    }
+    template <typename Iterator>
+    void
+    operator()(pstl::execution::parallel_unsequenced_policy, Iterator first, Iterator last, Sum init, Sum expected)
+    {
+    }
+#endif
+
+    template <typename Policy, typename Iterator>
+    void
+    operator()(Policy&& exec, Iterator first, Iterator last, Sum init, Sum expected)
+    {
+        using namespace std;
+
+        Sum r0 = init + reduce(exec, first, last);
+        EXPECT_EQ(expected, r0, "bad result from reduce(exec, first, last)");
+
+        Sum r1 = reduce(exec, first, last, init);
+        EXPECT_EQ(expected, r1, "bad result from reduce(exec, first, last, init)");
+    }
+};
+
+// Test forms of reduce(...) that omit the binary_op or init operands.
+void
+test_short_forms()
+{
+    for (size_t n = 0; n <= 100000; n = n <= 16 ? n + 1 : size_t(3.1415 * n))
+    {
+        Sum init(42, OddTag());
+        Sum expected(init);
+        Sequence<Sum> in(n, [n](size_t k) { return Sum((int32_t(k ^ n) % 1000 - 500), OddTag()); });
+        for (size_t k = 0; k < n; ++k)
+            expected = expected + in[k];
+        invoke_on_all_policies(test_two_short_forms(), in.begin(), in.end(), init, expected);
+        invoke_on_all_policies(test_two_short_forms(), in.cbegin(), in.cend(), init, expected);
+    }
+}
+
+int
+main()
+{
+    // Test for popular types
+    test_long_form(42, std::plus<int32_t>(), [](int32_t x) { return x; });
+    test_long_form(42.0, std::plus<float64_t>(), [](float64_t x) { return x; });
+
+    // Test for strict types
+    test_long_form<Number>(Number(42, OddTag()), Add(OddTag()), [](int32_t x) { return Number(x, OddTag()); });
+
+    // Short forms are just facade for long forms, so just test with a single type.
+    test_short_forms();
+    std::cout << done() << std::endl;
+    return 0;
+}
diff --git a/libcxx/test/std/pstl/numerics/numeric.ops/scan.fail.cpp b/libcxx/test/std/pstl/numerics/numeric.ops/scan.fail.cpp
new file mode 100644
index 00000000000000..b17944ae084644
--- /dev/null
+++ b/libcxx/test/std/pstl/numerics/numeric.ops/scan.fail.cpp
@@ -0,0 +1,28 @@
+// -*- C++ -*-
+//===-- scan.fail.cpp -----------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// UNSUPPORTED: c++03, c++11, c++14
+
+#include <execution>
+#include <numeric>
+
+struct CustomPolicy
+{
+} policy;
+
+int32_t
+main()
+{
+    int *first = nullptr, *last = nullptr, *result = nullptr;
+
+    std::exclusive_scan(policy, first, last, result, 0); // expected-error {{no matching function for call to 'exclusive_scan'}}
+    std::exclusive_scan(policy, first, last, result, 0, std::plus<int>()); // expected-error {{no matching function for call to 'exclusive_scan'}}
+
+    return 0;
+}
diff --git a/libcxx/test/std/pstl/numerics/numeric.ops/scan.pass.cpp b/libcxx/test/std/pstl/numerics/numeric.ops/scan.pass.cpp
new file mode 100644
index 00000000000000..e89edc71a2dd0f
--- /dev/null
+++ b/libcxx/test/std/pstl/numerics/numeric.ops/scan.pass.cpp
@@ -0,0 +1,201 @@
+// -*- C++ -*-
+//===-- scan.pass.cpp -----------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// UNSUPPORTED: c++03, c++11, c++14
+
+#include "support/pstl_test_config.h"
+
+#include <execution>
+#include <numeric>
+
+#include "support/utils.h"
+
+using namespace TestUtils;
+
+// We provide the no execution policy versions of the exclusive_scan and inclusive_scan due checking correctness result of the versions with execution policies.
+//TODO: to add a macro for availability of ver implementations
+template <class InputIterator, class OutputIterator, class T>
+OutputIterator
+exclusive_scan_serial(InputIterator first, InputIterator last, OutputIterator result, T init)
+{
+    for (; first != last; ++first, ++result)
+    {
+        *result = init;
+        init = init + *first;
+    }
+    return result;
+}
+
+template <class InputIterator, class OutputIterator, class T, class BinaryOperation>
+OutputIterator
+exclusive_scan_serial(InputIterator first, InputIterator last, OutputIterator result, T init, BinaryOperation binary_op)
+{
+    for (; first != last; ++first, ++result)
+    {
+        *result = init;
+        init = binary_op(init, *first);
+    }
+    return result;
+}
+
+// Note: N4582 is missing the ", class T".  Issue was reported 2016-Apr-11 to cxxeditor at gmail.com
+template <class InputIterator, class OutputIterator, class BinaryOperation, class T>
+OutputIterator
+inclusive_scan_serial(InputIterator first, InputIterator last, OutputIterator result, BinaryOperation binary_op, T init)
+{
+    for (; first != last; ++first, ++result)
+    {
+        init = binary_op(init, *first);
+        *result = init;
+    }
+    return result;
+}
+
+template <class InputIterator, class OutputIterator, class BinaryOperation>
+OutputIterator
+inclusive_scan_serial(InputIterator first, InputIterator last, OutputIterator result, BinaryOperation binary_op)
+{
+    if (first != last)
+    {
+        auto tmp = *first;
+        *result = tmp;
+        return inclusive_scan_serial(++first, last, ++result, binary_op, tmp);
+    }
+    else
+    {
+        return result;
+    }
+}
+
+template <class InputIterator, class OutputIterator>
+OutputIterator
+inclusive_scan_serial(InputIterator first, InputIterator last, OutputIterator result)
+{
+    typedef typename std::iterator_traits<InputIterator>::value_type input_type;
+    return inclusive_scan_serial(first, last, result, std::plus<input_type>());
+}
+
+// Most of the framework required for testing inclusive and exclusive scan is identical,
+// so the tests for both are in this file.  Which is being tested is controlled by the global
+// flag inclusive, which is set to each alternative by main().
+static bool inclusive;
+
+template <typename Iterator, typename Size, typename T>
+void
+check_and_reset(Iterator expected_first, Iterator out_first, Size n, T trash)
+{
+    EXPECT_EQ_N(expected_first, out_first, n,
+                inclusive ? "wrong result from inclusive_scan" : "wrong result from exclusive_scan");
+    std::fill_n(out_first, n, trash);
+}
+
+struct test_scan_with_plus
+{
+    template <typename Policy, typename Iterator1, typename Iterator2, typename Iterator3, typename Size, typename T>
+    void
+    operator()(Policy&& exec, Iterator1 in_first, Iterator1 in_last, Iterator2 out_first, Iterator2 out_last,
+               Iterator3 expected_first, Iterator3, Size n, T init, T trash)
+    {
+        using namespace std;
+
+        auto orr1 = inclusive ? inclusive_scan_serial(in_first, in_last, expected_first)
+                              : exclusive_scan_serial(in_first, in_last, expected_first, init);
+        (void)orr1;
+        auto orr = inclusive ? inclusive_scan(exec, in_first, in_last, out_first)
+                             : exclusive_scan(exec, in_first, in_last, out_first, init);
+        EXPECT_TRUE(out_last == orr,
+                    inclusive ? "inclusive_scan returned wrong iterator" : "exclusive_scan returned wrong iterator");
+
+        check_and_reset(expected_first, out_first, n, trash);
+        fill(out_first, out_last, trash);
+    }
+};
+
+template <typename T, typename Convert>
+void
+test_with_plus(T init, T trash, Convert convert)
+{
+    for (size_t n = 0; n <= 100000; n = n <= 16 ? n + 1 : size_t(3.1415 * n))
+    {
+        Sequence<T> in(n, convert);
+        Sequence<T> expected(in);
+        Sequence<T> out(n, [&](int32_t) { return trash; });
+
+        invoke_on_all_policies(test_scan_with_plus(), in.begin(), in.end(), out.begin(), out.end(), expected.begin(),
+                               expected.end(), in.size(), init, trash);
+        invoke_on_all_policies(test_scan_with_plus(), in.cbegin(), in.cend(), out.begin(), out.end(), expected.begin(),
+                               expected.end(), in.size(), init, trash);
+    }
+}
+struct test_scan_with_binary_op
+{
+    template <typename Policy, typename Iterator1, typename Iterator2, typename Iterator3, typename Size, typename T,
+              typename BinaryOp>
+    typename std::enable_if<!TestUtils::isReverse<Iterator1>::value, void>::type
+    operator()(Policy&& exec, Iterator1 in_first, Iterator1 in_last, Iterator2 out_first, Iterator2 out_last,
+               Iterator3 expected_first, Iterator3, Size n, T init, BinaryOp binary_op, T trash)
+    {
+        using namespace std;
+
+        auto orr1 = inclusive ? inclusive_scan_serial(in_first, in_last, expected_first, binary_op, init)
+                              : exclusive_scan_serial(in_first, in_last, expected_first, init, binary_op);
+        (void)orr1;
+        auto orr = inclusive ? inclusive_scan(exec, in_first, in_last, out_first, binary_op, init)
+                             : exclusive_scan(exec, in_first, in_last, out_first, init, binary_op);
+
+        EXPECT_TRUE(out_last == orr, "scan returned wrong iterator");
+        check_and_reset(expected_first, out_first, n, trash);
+    }
+
+    template <typename Policy, typename Iterator1, typename Iterator2, typename Iterator3, typename Size, typename T,
+              typename BinaryOp>
+    typename std::enable_if<TestUtils::isReverse<Iterator1>::value, void>::type
+    operator()(Policy&&, Iterator1, Iterator1, Iterator2, Iterator2, Iterator3, Iterator3, Size, T, BinaryOp, T)
+    {
+    }
+};
+
+template <typename In, typename Out, typename BinaryOp>
+void
+test_matrix(Out init, BinaryOp binary_op, Out trash)
+{
+    for (size_t n = 0; n <= 100000; n = n <= 16 ? n + 1 : size_t(3.1415 * n))
+    {
+        Sequence<In> in(n, [](size_t k) { return In(k, k + 1); });
+
+        Sequence<Out> out(n, [&](size_t) { return trash; });
+        Sequence<Out> expected(n, [&](size_t) { return trash; });
+
+        invoke_on_all_policies(test_scan_with_binary_op(), in.begin(), in.end(), out.begin(), out.end(),
+                               expected.begin(), expected.end(), in.size(), init, binary_op, trash);
+        invoke_on_all_policies(test_scan_with_binary_op(), in.cbegin(), in.cend(), out.begin(), out.end(),
+                               expected.begin(), expected.end(), in.size(), init, binary_op, trash);
+    }
+}
+
+int
+main()
+{
+    for (int32_t mode = 0; mode < 2; ++mode)
+    {
+        inclusive = mode != 0;
+#if !defined(_PSTL_ICC_19_TEST_SIMD_UDS_WINDOWS_RELEASE_BROKEN)
+        // Test with highly restricted type and associative but not commutative operation
+        test_matrix<Matrix2x2<int32_t>, Matrix2x2<int32_t>>(Matrix2x2<int32_t>(), multiply_matrix<int32_t>,
+                                                            Matrix2x2<int32_t>(-666, 666));
+#endif
+
+        // Since the implict "+" forms of the scan delegate to the generic forms,
+        // there's little point in using a highly restricted type, so just use double.
+        test_with_plus<float64_t>(inclusive ? 0.0 : -1.0, -666.0,
+                                  [](uint32_t k) { return float64_t((k % 991 + 1) ^ (k % 997 + 2)); });
+    }
+    std::cout << done() << std::endl;
+    return 0;
+}
diff --git a/libcxx/test/std/pstl/numerics/numeric.ops/transform_reduce.pass.cpp b/libcxx/test/std/pstl/numerics/numeric.ops/transform_reduce.pass.cpp
new file mode 100644
index 00000000000000..f9c4538bf79316
--- /dev/null
+++ b/libcxx/test/std/pstl/numerics/numeric.ops/transform_reduce.pass.cpp
@@ -0,0 +1,129 @@
+// -*- C++ -*-
+//===-- transform_reduce.pass.cpp -----------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// UNSUPPORTED: c++03, c++11, c++14
+
+#include "support/pstl_test_config.h"
+
+#include <execution>
+#include <numeric>
+
+#include "support/utils.h"
+
+using namespace TestUtils;
+
+// Functor for xor-operation for modeling binary operations in inner_product
+class XOR
+{
+  public:
+    template <typename T>
+    T
+    operator()(const T& left, const T& right) const
+    {
+        return left ^ right;
+    }
+};
+
+// Model of User-defined class
+class MyClass
+{
+  public:
+    int32_t my_field;
+    MyClass() { my_field = 0; }
+    MyClass(int32_t in) { my_field = in; }
+    MyClass(const MyClass& in) = default;
+
+    friend MyClass
+    operator+(const MyClass& x, const MyClass& y)
+    {
+        return MyClass(x.my_field + y.my_field);
+    }
+    friend MyClass
+    operator-(const MyClass& x)
+    {
+        return MyClass(-x.my_field);
+    }
+    friend MyClass operator*(const MyClass& x, const MyClass& y)
+    {
+        return MyClass(x.my_field * y.my_field);
+    }
+    friend bool operator==(const MyClass& x, const MyClass& y)
+    {
+        return x.my_field == y.my_field;
+    }
+};
+
+template <typename T>
+void
+CheckResults(const T& expected, const T& in)
+{
+    EXPECT_TRUE(expected == in, "wrong result of transform_reduce");
+}
+
+// We need to check correctness only for "int" (for example) except cases
+// if we have "floating-point type"-specialization
+void
+CheckResults(const float32_t&, const float32_t&)
+{
+}
+
+// Test for different types and operations with different iterators
+struct test_transform_reduce
+{
+    template <typename Policy, typename InputIterator1, typename InputIterator2, typename T, typename BinaryOperation1,
+              typename BinaryOperation2, typename UnaryOp>
+    void
+    operator()(Policy&& exec, InputIterator1 first1, InputIterator1 last1, InputIterator2 first2, InputIterator2,
+               T init, BinaryOperation1 opB1, BinaryOperation2 opB2, UnaryOp opU)
+    {
+
+        auto expectedB = std::inner_product(first1, last1, first2, init, opB1, opB2);
+        auto expectedU = transform_reduce_serial(first1, last1, init, opB1, opU);
+        T resRA = std::transform_reduce(exec, first1, last1, first2, init, opB1, opB2);
+        CheckResults(expectedB, resRA);
+        resRA = std::transform_reduce(exec, first1, last1, init, opB1, opU);
+        CheckResults(expectedU, resRA);
+    }
+};
+
+template <typename T, typename BinaryOperation1, typename BinaryOperation2, typename UnaryOp, typename Initializer>
+void
+test_by_type(T init, BinaryOperation1 opB1, BinaryOperation2 opB2, UnaryOp opU, Initializer initObj)
+{
+
+    std::size_t maxSize = 100000;
+    Sequence<T> in1(maxSize, initObj);
+    Sequence<T> in2(maxSize, initObj);
+
+    for (std::size_t n = 0; n < maxSize; n = n < 16 ? n + 1 : size_t(3.1415 * n))
+    {
+        invoke_on_all_policies(test_transform_reduce(), in1.begin(), in1.begin() + n, in2.begin(), in2.begin() + n,
+                               init, opB1, opB2, opU);
+        invoke_on_all_policies(test_transform_reduce(), in1.cbegin(), in1.cbegin() + n, in2.cbegin(), in2.cbegin() + n,
+                               init, opB1, opB2, opU);
+    }
+}
+
+int
+main()
+{
+    test_by_type<int32_t>(42, std::plus<int32_t>(), std::multiplies<int32_t>(), std::negate<int32_t>(),
+                          [](std::size_t) -> int32_t { return int32_t(rand() % 1000); });
+    test_by_type<int64_t>(0, [](const int64_t& a, const int64_t& b) -> int64_t { return a | b; }, XOR(),
+                          [](const int64_t& x) -> int64_t { return x * 2; },
+                          [](std::size_t) -> int64_t { return int64_t(rand() % 1000); });
+    test_by_type<float32_t>(
+        1.0f, std::multiplies<float32_t>(), [](const float32_t& a, const float32_t& b) -> float32_t { return a + b; },
+        [](const float32_t& x) -> float32_t { return x + 2; }, [](std::size_t) -> float32_t { return rand() % 1000; });
+    test_by_type<MyClass>(MyClass(), std::plus<MyClass>(), std::multiplies<MyClass>(), std::negate<MyClass>(),
+                          [](std::size_t) -> MyClass { return MyClass(rand() % 1000); });
+
+    std::cout << done() << std::endl;
+    return 0;
+}
diff --git a/libcxx/test/std/pstl/numerics/numeric.ops/transform_scan.pass.cpp b/libcxx/test/std/pstl/numerics/numeric.ops/transform_scan.pass.cpp
new file mode 100644
index 00000000000000..95294e43c4c1d2
--- /dev/null
+++ b/libcxx/test/std/pstl/numerics/numeric.ops/transform_scan.pass.cpp
@@ -0,0 +1,177 @@
+// -*- C++ -*-
+//===-- transform_scan.pass.cpp -------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// UNSUPPORTED: c++03, c++11, c++14
+
+#include "support/pstl_test_config.h"
+
+#include <execution>
+#include <numeric>
+
+#include "support/utils.h"
+
+using namespace TestUtils;
+
+// Most of the framework required for testing inclusive and exclusive transform-scans is identical,
+// so the tests for both are in this file.  Which is being tested is controlled by the global
+// flag inclusive, which is set to each alternative by main().
+static bool inclusive;
+
+template <typename Iterator, typename Size, typename T>
+void
+check_and_reset(Iterator expected_first, Iterator out_first, Size n, T trash)
+{
+    EXPECT_EQ_N(expected_first, out_first, n,
+                inclusive ? "wrong result from transform_inclusive_scan"
+                          : "wrong result from transform_exclusive_scan");
+    std::fill_n(out_first, n, trash);
+}
+
+struct test_transform_scan
+{
+    template <typename Policy, typename InputIterator, typename OutputIterator, typename Size, typename UnaryOp,
+              typename T, typename BinaryOp>
+    typename std::enable_if<!TestUtils::isReverse<InputIterator>::value, void>::type
+    operator()(Policy&& exec, InputIterator first, InputIterator last, OutputIterator out_first,
+               OutputIterator out_last, OutputIterator expected_first, OutputIterator, Size n, UnaryOp unary_op, T init,
+               BinaryOp binary_op, T trash)
+    {
+        using namespace std;
+
+        auto orr1 =
+            inclusive
+                ? transform_inclusive_scan(std::execution::seq, first, last, expected_first, binary_op, unary_op, init)
+                : transform_exclusive_scan(std::execution::seq, first, last, expected_first, init, binary_op, unary_op);
+        auto orr2 = inclusive ? transform_inclusive_scan(exec, first, last, out_first, binary_op, unary_op, init)
+                              : transform_exclusive_scan(exec, first, last, out_first, init, binary_op, unary_op);
+        EXPECT_TRUE(out_last == orr2, "transform...scan returned wrong iterator");
+        check_and_reset(expected_first, out_first, n, trash);
+
+        // Checks inclusive scan if init is not provided
+        if (inclusive && n > 0)
+        {
+            orr1 = transform_inclusive_scan(std::execution::seq, first, last, expected_first, binary_op, unary_op);
+            orr2 = transform_inclusive_scan(exec, first, last, out_first, binary_op, unary_op);
+            EXPECT_TRUE(out_last == orr2, "transform...scan returned wrong iterator");
+            check_and_reset(expected_first, out_first, n, trash);
+        }
+    }
+
+    template <typename Policy, typename InputIterator, typename OutputIterator, typename Size, typename UnaryOp,
+              typename T, typename BinaryOp>
+    typename std::enable_if<TestUtils::isReverse<InputIterator>::value, void>::type
+    operator()(Policy&&, InputIterator, InputIterator, OutputIterator, OutputIterator, OutputIterator, OutputIterator,
+               Size, UnaryOp, T, BinaryOp, T)
+    {
+    }
+};
+
+const uint32_t encryption_mask = 0x314;
+
+template <typename InputIterator, typename OutputIterator, typename UnaryOperation, typename T,
+          typename BinaryOperation>
+std::pair<OutputIterator, T>
+transform_inclusive_scan_serial(InputIterator first, InputIterator last, OutputIterator result, UnaryOperation unary_op,
+                                T init, BinaryOperation binary_op) noexcept
+{
+    for (; first != last; ++first, ++result)
+    {
+        init = binary_op(init, unary_op(*first));
+        *result = init;
+    }
+    return std::make_pair(result, init);
+}
+
+template <typename InputIterator, typename OutputIterator, typename UnaryOperation, typename T,
+          typename BinaryOperation>
+std::pair<OutputIterator, T>
+transform_exclusive_scan_serial(InputIterator first, InputIterator last, OutputIterator result, UnaryOperation unary_op,
+                                T init, BinaryOperation binary_op) noexcept
+{
+    for (; first != last; ++first, ++result)
+    {
+        *result = init;
+        init = binary_op(init, unary_op(*first));
+    }
+    return std::make_pair(result, init);
+}
+
+template <typename In, typename Out, typename UnaryOp, typename BinaryOp>
+void
+test(UnaryOp unary_op, Out init, BinaryOp binary_op, Out trash)
+{
+    for (size_t n = 0; n <= 100000; n = n <= 16 ? n + 1 : size_t(3.1415 * n))
+    {
+        Sequence<In> in(n, [](size_t k) { return In(k ^ encryption_mask); });
+
+        Out tmp = init;
+        Sequence<Out> expected(n, [&](size_t k) -> Out {
+            if (inclusive)
+            {
+                tmp = binary_op(tmp, unary_op(in[k]));
+                return tmp;
+            }
+            else
+            {
+                Out val = tmp;
+                tmp = binary_op(tmp, unary_op(in[k]));
+                return val;
+            }
+        });
+
+        Sequence<Out> out(n, [&](size_t) { return trash; });
+
+        auto result =
+            inclusive
+                ? transform_inclusive_scan_serial(in.cbegin(), in.cend(), out.fbegin(), unary_op, init, binary_op)
+                : transform_exclusive_scan_serial(in.cbegin(), in.cend(), out.fbegin(), unary_op, init, binary_op);
+        (void)result;
+        check_and_reset(expected.begin(), out.begin(), out.size(), trash);
+
+        invoke_on_all_policies(test_transform_scan(), in.begin(), in.end(), out.begin(), out.end(), expected.begin(),
+                               expected.end(), in.size(), unary_op, init, binary_op, trash);
+        invoke_on_all_policies(test_transform_scan(), in.cbegin(), in.cend(), out.begin(), out.end(), expected.begin(),
+                               expected.end(), in.size(), unary_op, init, binary_op, trash);
+    }
+}
+
+template <typename In, typename Out, typename UnaryOp, typename BinaryOp>
+void
+test_matrix(UnaryOp unary_op, Out init, BinaryOp binary_op, Out trash)
+{
+    for (size_t n = 0; n <= 100000; n = n <= 16 ? n + 1 : size_t(3.1415 * n))
+    {
+        Sequence<In> in(n, [](size_t k) { return In(k, k + 1); });
+
+        Sequence<Out> out(n, [&](size_t) { return trash; });
+        Sequence<Out> expected(n, [&](size_t) { return trash; });
+
+        invoke_on_all_policies(test_transform_scan(), in.begin(), in.end(), out.begin(), out.end(), expected.begin(),
+                               expected.end(), in.size(), unary_op, init, binary_op, trash);
+        invoke_on_all_policies(test_transform_scan(), in.cbegin(), in.cend(), out.begin(), out.end(), expected.begin(),
+                               expected.end(), in.size(), unary_op, init, binary_op, trash);
+    }
+}
+
+int
+main()
+{
+    for (int32_t mode = 0; mode < 2; ++mode)
+    {
+        inclusive = mode != 0;
+#if !defined(_PSTL_ICC_19_TEST_SIMD_UDS_WINDOWS_RELEASE_BROKEN)
+        test_matrix<Matrix2x2<int32_t>, Matrix2x2<int32_t>>([](const Matrix2x2<int32_t> x) { return x; },
+                                                            Matrix2x2<int32_t>(), multiply_matrix<int32_t>,
+                                                            Matrix2x2<int32_t>(-666, 666));
+#endif
+        test<int32_t, uint32_t>([](int32_t x) { return x++; }, -123, [](int32_t x, int32_t y) { return x + y; }, 666);
+    }
+    std::cout << done() << std::endl;
+    return 0;
+}
diff --git a/libcxx/test/std/pstl/utilities/memory/specialized.algorithms/uninitialized_construct.pass.cpp b/libcxx/test/std/pstl/utilities/memory/specialized.algorithms/uninitialized_construct.pass.cpp
new file mode 100644
index 00000000000000..a5bdbb3a9c7e1a
--- /dev/null
+++ b/libcxx/test/std/pstl/utilities/memory/specialized.algorithms/uninitialized_construct.pass.cpp
@@ -0,0 +1,123 @@
+// -*- C++ -*-
+//===-- uninitialized_construct.pass.cpp ----------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// UNSUPPORTED: c++03, c++11, c++14
+
+// Tests for uninitialized_default_construct, uninitialized_default_construct_n,
+//           uninitialized_value_construct,   uninitialized_value_construct_n
+
+#include "support/pstl_test_config.h"
+
+#include <execution>
+#include <memory>
+
+#include "support/utils.h"
+
+using namespace TestUtils;
+
+// function of checking correctness for uninitialized.construct.value
+template <typename T, typename Iterator>
+bool
+IsCheckValueCorrectness(Iterator begin, Iterator end)
+{
+    for (; begin != end; ++begin)
+    {
+        if (*begin != T())
+        {
+            return false;
+        }
+    }
+    return true;
+}
+
+struct test_uninit_construct
+{
+    template <typename Policy, typename Iterator>
+    void
+    operator()(Policy&& exec, Iterator begin, Iterator end, size_t n, /*is_trivial<T>=*/std::false_type)
+    {
+        typedef typename std::iterator_traits<Iterator>::value_type T;
+        // it needs for cleaning memory that was filled by default constructors in unique_ptr<T[]> p(new T[n])
+        // and for cleaning memory after last calling of uninitialized_value_construct_n.
+        // It is important for non-trivial types
+        std::destroy_n(exec, begin, n);
+
+        // reset counter of constructors
+        T::SetCount(0);
+        // run algorithm
+        std::uninitialized_default_construct(exec, begin, end);
+        // compare counter of constructors to length of container
+        EXPECT_TRUE(T::Count() == n, "wrong uninitialized_default_construct");
+        // destroy objects for testing new algorithms on same memory
+        std::destroy(exec, begin, end);
+
+        std::uninitialized_default_construct_n(exec, begin, n);
+        EXPECT_TRUE(T::Count() == n, "wrong uninitialized_default_construct_n");
+        std::destroy_n(exec, begin, n);
+
+        std::uninitialized_value_construct(exec, begin, end);
+        EXPECT_TRUE(T::Count() == n, "wrong uninitialized_value_construct");
+        std::destroy(exec, begin, end);
+
+        std::uninitialized_value_construct_n(exec, begin, n);
+        EXPECT_TRUE(T::Count() == n, "wrong uninitialized_value_construct_n");
+    }
+
+    template <typename Policy, typename Iterator>
+    void
+    operator()(Policy&& exec, Iterator begin, Iterator end, size_t n, /*is_trivial<T>=*/std::true_type)
+    {
+        typedef typename std::iterator_traits<Iterator>::value_type T;
+
+        std::uninitialized_default_construct(exec, begin, end);
+        std::destroy(exec, begin, end);
+
+        std::uninitialized_default_construct_n(exec, begin, n);
+        std::destroy_n(exec, begin, n);
+
+        std::uninitialized_value_construct(exec, begin, end);
+        // check correctness for uninitialized.construct.value
+        EXPECT_TRUE(IsCheckValueCorrectness<T>(begin, end), "wrong uninitialized_value_construct");
+        std::destroy(exec, begin, end);
+
+        std::uninitialized_value_construct_n(exec, begin, n);
+        EXPECT_TRUE(IsCheckValueCorrectness<T>(begin, end), "wrong uninitialized_value_construct_n");
+        std::destroy_n(exec, begin, n);
+    }
+};
+
+template <typename T>
+void
+test_uninit_construct_by_type()
+{
+    std::size_t N = 100000;
+    for (size_t n = 0; n <= N; n = n <= 16 ? n + 1 : size_t(3.1415 * n))
+    {
+        std::unique_ptr<T[]> p(new T[n]);
+        invoke_on_all_policies(test_uninit_construct(), p.get(), std::next(p.get(), n), n, std::is_trivial<T>());
+    }
+}
+
+int
+main()
+{
+
+    // for user-defined types
+#if !defined(_PSTL_ICC_16_VC14_TEST_PAR_TBB_RT_RELEASE_64_BROKEN)
+    test_uninit_construct_by_type<Wrapper<int32_t>>();
+    test_uninit_construct_by_type<Wrapper<std::vector<std::string>>>();
+#endif
+
+    // for trivial types
+    test_uninit_construct_by_type<int8_t>();
+    test_uninit_construct_by_type<float64_t>();
+
+    std::cout << done() << std::endl;
+    return 0;
+}
diff --git a/libcxx/test/std/pstl/utilities/memory/specialized.algorithms/uninitialized_copy_move.pass.cpp b/libcxx/test/std/pstl/utilities/memory/specialized.algorithms/uninitialized_copy_move.pass.cpp
new file mode 100644
index 00000000000000..21186b41ee139b
--- /dev/null
+++ b/libcxx/test/std/pstl/utilities/memory/specialized.algorithms/uninitialized_copy_move.pass.cpp
@@ -0,0 +1,143 @@
+// -*- C++ -*-
+//===-- uninitialized_copy_move.pass.cpp ----------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// UNSUPPORTED: c++03, c++11, c++14
+
+// Tests for uninitialized_copy, uninitialized_copy_n, uninitialized_move, uninitialized_move_n
+
+#include "support/pstl_test_config.h"
+
+#include <execution>
+#include <memory>
+
+#include "support/utils.h"
+
+using namespace TestUtils;
+
+// function of checking correctness for uninitialized.construct.value
+template <typename InputIterator, typename OutputIterator, typename Size>
+bool
+IsCheckValueCorrectness(InputIterator first1, OutputIterator first2, Size n)
+{
+    for (Size i = 0; i < n; ++i, ++first1, ++first2)
+    {
+        if (*first1 != *first2)
+        {
+            return false;
+        }
+    }
+    return true;
+}
+
+struct test_uninitialized_copy_move
+{
+    template <typename Policy, typename InputIterator, typename OutputIterator>
+    void
+    operator()(Policy&& exec, InputIterator first, InputIterator last, OutputIterator out_first, size_t n,
+               /*is_trivial<T>=*/std::false_type)
+    {
+        typedef typename std::iterator_traits<InputIterator>::value_type T;
+        // it needs for cleaning memory that was filled by default constructors in unique_ptr<T[]> p(new T[n])
+        // and for cleaning memory after last calling of uninitialized_value_construct_n.
+        // It is important for non-trivial types
+        std::destroy_n(exec, out_first, n);
+
+        // reset counter of constructors
+        T::SetCount(0);
+        // run algorithm
+        std::uninitialized_copy(exec, first, last, out_first);
+        // compare counter of constructors to length of container
+        EXPECT_TRUE(T::Count() == n, "wrong uninitialized_copy");
+        // destroy objects for testing new algorithms on same memory
+        std::destroy_n(exec, out_first, n);
+
+        std::uninitialized_copy_n(exec, first, n, out_first);
+        EXPECT_TRUE(T::Count() == n, "wrong uninitialized_copy_n");
+        std::destroy_n(exec, out_first, n);
+
+        // For move
+        std::uninitialized_move(exec, first, last, out_first);
+        // compare counter of constructors to length of container
+        EXPECT_TRUE(T::MoveCount() == n, "wrong uninitialized_move");
+        // destroy objects for testing new algorithms on same memory
+        std::destroy_n(exec, out_first, n);
+
+        std::uninitialized_move_n(exec, first, n, out_first);
+        EXPECT_TRUE(T::MoveCount() == n, "wrong uninitialized_move_n");
+        std::destroy_n(exec, out_first, n);
+    }
+
+#if defined(_PSTL_ICC_17_VC141_TEST_SIMD_LAMBDA_DEBUG_32_BROKEN) || defined(_PSTL_ICC_16_VC14_TEST_SIMD_LAMBDA_DEBUG_32_BROKEN)
+    template <typename InputIterator, typename OutputIterator>
+    void
+    operator()(pstl::execution::unsequenced_policy, InputIterator first, InputIterator last, OutputIterator out_first,
+               size_t n, /*is_trivial<T>=*/std::true_type)
+    {
+    }
+    template <typename InputIterator, typename OutputIterator>
+    void
+    operator()(pstl::execution::parallel_unsequenced_policy, InputIterator first, InputIterator last,
+               OutputIterator out_first, size_t n, /*is_trivial<T>=*/std::true_type)
+    {
+    }
+#endif
+
+    template <typename Policy, typename InputIterator, typename OutputIterator>
+    void
+    operator()(Policy&& exec, InputIterator first, InputIterator last, OutputIterator out_first, size_t n,
+               /*is_trivial<T>=*/std::true_type)
+    {
+        std::uninitialized_copy(exec, first, last, out_first);
+        EXPECT_TRUE(IsCheckValueCorrectness(first, out_first, n), "wrong uninitialized_copy");
+        std::destroy_n(exec, out_first, n);
+
+        std::uninitialized_copy_n(exec, first, n, out_first);
+        EXPECT_TRUE(IsCheckValueCorrectness(first, out_first, n), "wrong uninitialized_copy_n");
+        std::destroy_n(exec, out_first, n);
+
+        std::uninitialized_move(exec, first, last, out_first);
+        EXPECT_TRUE(IsCheckValueCorrectness(first, out_first, n), "wrong uninitialized_move");
+        std::destroy_n(exec, out_first, n);
+
+        std::uninitialized_move_n(exec, first, n, out_first);
+        EXPECT_TRUE(IsCheckValueCorrectness(first, out_first, n), "wrong uninitialized_move_n");
+        std::destroy_n(exec, out_first, n);
+    }
+};
+
+template <typename T>
+void
+test_uninitialized_copy_move_by_type()
+{
+    std::size_t N = 100000;
+    for (size_t n = 0; n <= N; n = n <= 16 ? n + 1 : size_t(3.1415 * n))
+    {
+        Sequence<T> in(n, [=](size_t k) -> T { return T(k); });
+        std::unique_ptr<T[]> p(new T[n]);
+        invoke_on_all_policies(test_uninitialized_copy_move(), in.begin(), in.end(), p.get(), n, std::is_trivial<T>());
+    }
+}
+
+int
+main()
+{
+
+    // for trivial types
+    test_uninitialized_copy_move_by_type<int16_t>();
+    test_uninitialized_copy_move_by_type<float64_t>();
+
+    // for user-defined types
+#if !defined(_PSTL_ICC_17_VC141_TEST_SIMD_LAMBDA_DEBUG_32_BROKEN) && !defined(_PSTL_ICC_16_VC14_TEST_SIMD_LAMBDA_DEBUG_32_BROKEN) &&     \
+    !defined(_PSTL_ICC_16_VC14_TEST_PAR_TBB_RT_RELEASE_64_BROKEN)
+    test_uninitialized_copy_move_by_type<Wrapper<int8_t>>();
+#endif
+
+    std::cout << done() << std::endl;
+    return 0;
+}
diff --git a/libcxx/test/std/pstl/utilities/memory/specialized.algorithms/uninitialized_fill_destroy.pass.cpp b/libcxx/test/std/pstl/utilities/memory/specialized.algorithms/uninitialized_fill_destroy.pass.cpp
new file mode 100644
index 00000000000000..d579ea558839d0
--- /dev/null
+++ b/libcxx/test/std/pstl/utilities/memory/specialized.algorithms/uninitialized_fill_destroy.pass.cpp
@@ -0,0 +1,93 @@
+// -*- C++ -*-
+//===-- uninitialized_fill_destroy.pass.cpp -------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// UNSUPPORTED: c++03, c++11, c++14
+
+#include "support/pstl_test_config.h"
+
+#include <execution>
+#include <memory>
+
+#include "support/utils.h"
+
+using namespace TestUtils;
+
+struct test_uninitialized_fill_destroy
+{
+    template <typename Policy, typename Iterator, typename T>
+    void
+    operator()(Policy&& exec, Iterator first, Iterator last, const T& in, std::size_t n, std::false_type)
+    {
+        using namespace std;
+        {
+            T::SetCount(0);
+            uninitialized_fill(exec, first, last, in);
+            size_t count = count_if(first, last, [&in](T& x) -> bool { return x == in; });
+            EXPECT_TRUE(n == count, "wrong work of uninitialized_fill");
+            destroy(exec, first, last);
+            EXPECT_TRUE(T::Count() == 0, "wrong work of destroy");
+        }
+
+        {
+            auto res = uninitialized_fill_n(exec, first, n, in);
+            EXPECT_TRUE(res == last, "wrong result of uninitialized_fill_n");
+            size_t count = count_if(first, last, [&in](T& x) -> bool { return x == in; });
+            EXPECT_TRUE(n == count, "wrong work of uninitialized_fill_n");
+            destroy_n(exec, first, n);
+            EXPECT_TRUE(T::Count() == 0, "wrong work of destroy_n");
+        }
+    }
+    template <typename Policy, typename Iterator, typename T>
+    void
+    operator()(Policy&& exec, Iterator first, Iterator last, const T& in, std::size_t n, std::true_type)
+    {
+        using namespace std;
+        {
+            destroy(exec, first, last);
+            uninitialized_fill(exec, first, last, in);
+            size_t count = count_if(first, last, [&in](T& x) -> bool { return x == in; });
+            EXPECT_EQ(n, count, "wrong work of uninitialized:_fill");
+        }
+        {
+            destroy_n(exec, first, n);
+            auto res = uninitialized_fill_n(exec, first, n, in);
+            size_t count = count_if(first, last, [&in](T& x) -> bool { return x == in; });
+            EXPECT_EQ(n, count, "wrong work of uninitialized_fill_n");
+            EXPECT_TRUE(res == last, "wrong result of uninitialized_fill_n");
+        }
+    }
+};
+
+template <typename T>
+void
+test_uninitialized_fill_destroy_by_type()
+{
+    std::size_t N = 100000;
+    for (size_t n = 0; n <= N; n = n <= 16 ? n + 1 : size_t(3.1415 * n))
+    {
+        std::unique_ptr<T[]> p(new T[n]);
+        invoke_on_all_policies(test_uninitialized_fill_destroy(), p.get(), std::next(p.get(), n), T(), n,
+                               std::is_trivial<T>());
+    }
+}
+
+int
+main()
+{
+    // for trivial types
+    test_uninitialized_fill_destroy_by_type<int32_t>();
+    test_uninitialized_fill_destroy_by_type<float64_t>();
+
+    // for user-defined types
+    test_uninitialized_fill_destroy_by_type<Wrapper<std::string>>();
+    test_uninitialized_fill_destroy_by_type<Wrapper<int8_t*>>();
+    std::cout << done() << std::endl;
+
+    return 0;
+}
diff --git a/llvm/lib/Target/ARM/ARMEncodeDecode.cpp b/llvm/lib/Target/ARM/ARMEncodeDecode.cpp
new file mode 100755
index 00000000000000..ddf73811254f23
--- /dev/null
+++ b/llvm/lib/Target/ARM/ARMEncodeDecode.cpp
@@ -0,0 +1,628 @@
+//===- ARMRandezvousShadowStack.cpp - ARM Randezvous Shadow Stack ---------===//
+//
+// Copyright (c) 2021-2022, University of Rochester
+//
+// Part of the Randezvous Project, under the Apache License v2.0 with
+// LLVM Exceptions.  See LICENSE.txt in the llvm directory for license
+// information.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the implementation of a pass that instruments ARM machine
+// code to save/load the return address to/from a randomized compact shadow
+// stack.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "arm-randezvous-shadow-stack"
+
+#include "ARMEncodeDecode.h"
+#include "ARMRandezvousCLR.h"
+#include "ARMRandezvousOptions.h"
+#include "MCTargetDesc/ARMAddressingModes.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/Support/RandomNumberGenerator.h"
+#include "llvm/Transforms/Utils/ModuleUtils.h"
+
+using namespace llvm;
+
+STATISTIC(NumPrologues, "Number of prologues transformed to use shadow stack");
+STATISTIC(NumEpilogues, "Number of epilogues transformed to use shadow stack");
+STATISTIC(NumNullified, "Number of return addresses nullified");
+
+char ARMEncodeDecode::ID = 0;
+
+ARMEncodeDecode::ARMEncodeDecode() : ModulePass(ID) {}
+
+StringRef ARMEncodeDecode::getPassName() const {
+  return "ARM Encode and Decode Pass";
+}
+
+void ARMEncodeDecode::getAnalysisUsage(AnalysisUsage &AU) const {
+  // We need this to access MachineFunctions
+  AU.addRequired<MachineModuleInfoWrapperPass>();
+
+  AU.setPreservesCFG();
+  ModulePass::getAnalysisUsage(AU);
+}
+
+// 初始化R9
+
+// 代码插桩,如下
+//  mov r3 [r4]
+//  // Decode the function pointer
+//  xor r3 r9
+//  // Jump or not jump to the other shadow
+//  add r3 #offset (add r3 #0)
+//  // Jump to the target function
+//  blx r3
+
+// push {r4-r8}
+// // Encode the lr before push it to the stack
+// xor lr r9
+// push lr
+
+// pop {r4-r8} lr
+// // Decode the value of lr after fetch it from the stack
+// xor lr r9
+
+// pop {r7,lr}
+// xor pc lr r9
+
+//
+// Method: createInitFunction()
+//
+// Description:
+//   This method creates a function (both Function and MachineFunction) that
+//   initializes the reserved registers for the shadow stack.
+//
+// Inputs:
+//   M  - A reference to the Module in which to create the function.
+//   SS - A reference to the shadow stack global variable.
+//
+// Return value:
+//   A pointer to the created Function.
+//
+Function *ARMEncodeDecode::createInitFunction(Module &M) {
+  // Create types for the init function
+  LLVMContext &Ctx = M.getContext();
+  FunctionType *FuncTy = FunctionType::get(Type::getVoidTy(Ctx), false);
+
+  // Create the init function
+  FunctionCallee FC =
+      M.getOrInsertFunction(ARMEncodeDecode::InitFuncName, FuncTy);
+  Function *F = dyn_cast<Function>(FC.getCallee());
+  assert(F != nullptr && "Init function has wrong type!");
+  MachineModuleInfo &MMI = getAnalysis<MachineModuleInfoWrapperPass>().getMMI();
+  MachineFunction &MF = MMI.getOrCreateMachineFunction(*F);
+
+  // Set necessary attributes and properties
+  F->setLinkage(GlobalVariable::LinkOnceAnyLinkage);
+  if (!F->hasFnAttribute(Attribute::Naked)) {
+    F->addFnAttr(Attribute::Naked);
+  }
+  if (!F->hasFnAttribute(Attribute::NoUnwind)) {
+    F->addFnAttr(Attribute::NoUnwind);
+  }
+  if (!F->hasFnAttribute(Attribute::WillReturn)) {
+    F->addFnAttr(Attribute::WillReturn);
+  }
+  using Property = MachineFunctionProperties::Property;
+  if (!MF.getProperties().hasProperty(Property::NoVRegs)) {
+    MF.getProperties().set(Property::NoVRegs);
+  }
+
+  // Create a basic block if not created
+  if (F->empty()) {
+    assert(MF.empty() && "Machine IR basic block already there!");
+
+    // Build an IR basic block
+    BasicBlock *BB = BasicBlock::Create(Ctx, "", F);
+    IRBuilder<> IRB(BB);
+    IRB.CreateRetVoid(); // At this point, what the IR basic block contains
+                         // doesn't matter so just place a return there
+
+    // Build machine IR basic block(s)
+    const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();
+    MachineBasicBlock *MBB = MF.CreateMachineBasicBlock(BB);
+    // MachineBasicBlock * MBB2 = nullptr;
+    // MachineBasicBlock * MBB3 = nullptr;
+    MachineBasicBlock *RetMBB = MBB;
+    MF.push_back(MBB);
+
+    // // Generate a static random stride
+    // uint64_t Stride = (*RNG)();
+    // Stride &= (1ul << (EncodeDecodeNumberLength - 1)) - 1;
+    // Stride &= ~0x3ul;
+
+    uint64_t Stride = 0x00000001;
+
+    if (ARM_AM::getT2SOImmVal(Stride) != -1) {
+      // Use MOVi if the stride can be encoded in Thumb modified constant
+      BuildMI(MBB, DebugLoc(), TII->get(ARM::t2MOVi), XorReg)
+          .addImm(Stride)
+          .add(predOps(ARMCC::AL))
+          .add(condCodeOp()); // No 'S' bit
+    } else {
+      // Otherwise use MOVi16/MOVTi16 to encode lower/upper 16 bits of the
+      // stride
+      BuildMI(MBB, DebugLoc(), TII->get(ARM::t2MOVi16), XorReg)
+          .addImm(Stride & 0xffff)
+          .add(predOps(ARMCC::AL));
+      BuildMI(MBB, DebugLoc(), TII->get(ARM::t2MOVTi16), XorReg)
+          .addReg(XorReg)
+          .addImm((Stride >> 16) & 0xffff)
+          .add(predOps(ARMCC::AL));
+    }
+
+    // BX_RET
+    BuildMI(RetMBB, DebugLoc(), TII->get(ARM::tBX_RET)).add(predOps(ARMCC::AL));
+  }
+  // Add the init function to @llvm.used
+  appendToUsed(M, {F});
+
+  return F;
+}
+
+//
+// Method: EncodeLR()
+//
+// Description:
+//   This method modifies a PUSH instruction to not save LR to the stack and
+//   inserts new instructions that save LR to the shadow stack.
+//
+// Inputs:
+//   MI     - A reference to a PUSH instruction that saves LR to the stack.
+//   LR     - A reference to the LR operand of the PUSH.
+//   Stride - A static stride to use.
+//
+// Return value:
+//   true - The machine code was modified.
+//
+
+// push {r4-r8}
+// // Encode the lr before push it to the stack
+// xor lr r9
+// push lr
+bool ARMEncodeDecode::EncodeLR(MachineInstr &MI, MachineOperand &LR,
+                               uint32_t Stride) {
+  MachineFunction &MF = *MI.getMF();
+  const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();
+  const DebugLoc &DL = MI.getDebugLoc();
+
+  Register PredReg;
+  ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg);
+
+  // 1. replace the old PUSH with a new one that doesn't push LR to the
+  // stack
+  switch (MI.getOpcode()) {
+  case ARM::t2STMDB_UPD:
+    // STMDB_UPD should store at least two registers; if it happens to be two,
+    // we replace it with a STR_PRE
+    assert(MI.getNumExplicitOperands() >= 6 && "Buggy STMDB_UPD!");
+    if (MI.getNumExplicitOperands() > 6) {
+      MI.removeOperand(MI.getOperandNo(&LR));
+    } else {
+      unsigned Idx = MI.getOperandNo(&LR);
+      Idx = Idx == 4 ? 5 : 4;
+      insertInstBefore(MI, BuildMI(MF, DL, TII->get(ARM::t2STR_PRE), ARM::SP)
+                               .add(MI.getOperand(Idx))
+                               .addReg(ARM::SP)
+                               .addImm(-4)
+                               .add(predOps(Pred, PredReg))
+                               .setMIFlags(MI.getFlags()));
+      removeInst(MI);
+    }
+    break;
+
+  case ARM::tPUSH:
+    // PUSH should store at least one register; if it happens to be one, we
+    // just remove it
+    assert(MI.getNumExplicitOperands() >= 3 && "Buggy PUSH!");
+    if (MI.getNumExplicitOperands() > 3) {
+      MI.removeOperand(MI.getOperandNo(&LR));
+    } else {
+      removeInst(MI);
+    }
+    break;
+
+  // ARM::t2STR_PRE
+  default:
+    // STR_PRE only stores one register, so we just remove it
+    removeInst(MI);
+    break;
+  }
+
+  // 2. Build the xor instruction
+  // eor.w lr,lr,r9
+
+  std::vector<MachineInstr *> NewInsts;
+  NewInsts.push_back(BuildMI(MF, DL, TII->get(ARM::t2EORrr), ARM::LR)
+                         .addReg(ARM::LR)
+                         .addReg(XorReg)
+                         .add(predOps(Pred, PredReg))
+                         .add(condCodeOp()));
+
+  // 3. insert a new PUSH with a new one that only push LR to the stack
+  switch (MI.getOpcode()) {
+  case ARM::tPUSH:
+    NewInsts.push_back(BuildMI(MF, DL, TII->get(ARM::tPUSH))
+                           .add(predOps(Pred, PredReg))
+                           .addReg(ARM::LR));
+    break;
+
+  // ARM::t2STR_PRE
+  default:
+    // STR_PRE only stores one register, so we just remove it
+    removeInst(MI);
+    break;
+  }
+
+  MI.addOperand(
+      MachineOperand::CreateReg(storeReg, /*isDef=*/true, /*isImp=*/false));
+
+  // 4. Now insert these new instructions into the basic block
+  insertInstsBefore(MI, NewInsts);
+
+  ++NumPrologues;
+  return true;
+}
+
+bool ARMEncodeDecode::EncodeCallSite(MachineInstr &MI, MachineOperand &MO,
+                                     uint32_t Stride) {
+  MachineFunction &MF = *MI.getMF();
+  const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();
+  const DebugLoc &DL = MI.getDebugLoc();
+
+  Register PredReg;
+  ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg);
+
+  std::vector<MachineInstr *> NewInsts;
+  unsigned Idx = MI.getOperandNo(&MO);
+
+  NewInsts.push_back(BuildMI(MF, DL, TII->get(ARM::t2EORrr), MO.getReg())
+                         .addReg(MO.getReg())
+                         .addReg(XorReg)
+                         .add(predOps(Pred, PredReg))
+                         .add(condCodeOp()));
+
+  // insert the decode instructions before blx
+  insertInstsBefore(MI, NewInsts);
+
+  return true;
+}
+
+//
+// Method: DecodeLR()
+//
+// Description:
+//   This method modifies a POP instruction to not write to PC/LR and inserts
+//   new instructions that load the return address from the shadow stack into
+//   PC/LR.
+//
+// Inputs:
+//   MI     - A reference to a POP instruction that writes to LR or PC.
+//   PCLR   - A reference to the PC or LR operand of the POP.
+//   Stride - A static stride to use.
+//
+// Return value:
+//   true - The machine code was modified.
+//
+
+bool ARMEncodeDecode::DecodeLR(MachineInstr &MI, MachineOperand &PCLR,
+                               uint32_t Stride) {
+  MachineFunction &MF = *MI.getMF();
+  const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();
+  const DebugLoc &DL = MI.getDebugLoc();
+
+  Register PredReg;
+  ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg);
+
+  std::vector<MachineInstr *> NewInsts;
+
+  MachineInstrBuilder MIB =
+      BuildMI(MF, DL, TII->get(ARM::t2LDMIA_UPD), ARM::SP).addReg(ARM::SP);
+  for (MachineOperand &MO : MI.explicit_operands()) {
+    if (MO.isReg() && MO.getReg() == ARM::PC) {
+      MIB.addReg(ARM::LR, RegState::Define);
+    } else {
+      MIB.add(MO);
+    }
+  }
+  NewInsts.push_back(MIB);
+
+  NewInsts.push_back(BuildMI(MF, DL, TII->get(ARM::t2EORrr), ARM::LR)
+                         .addReg(ARM::LR)
+                         .addReg(XorReg)
+                         .add(predOps(Pred, PredReg))
+                         .add(condCodeOp()));
+
+  NewInsts.push_back(
+      BuildMI(MF, DL, TII->get(ARM::tBX_RET)).add(predOps(Pred, PredReg)));
+
+  // Now insert these new instructions after pop.w
+  insertInstsAfter(MI, NewInsts);
+
+  removeInst(MI);
+
+  return true;
+}
+
+//
+// Method: runOnModule()
+//
+// Description:
+//   This method is called when the PassManager wants this pass to transform
+//   the specified Module.  This method
+//
+//   * creates a global variable as the shadow stack,
+//
+//   * creates a function that initializes the reserved registers for the
+//     shadow stack, and
+//
+//   * transforms the Module to utilize the shadow stack for saving/restoring
+//     return addresses and/or to nullify a saved return address on returns.
+//
+// Input:
+//   M - A reference to the Module to transform.
+//
+// Output:
+//   M - The transformed Module.
+//
+// Return value:
+//   true  - The Module was transformed.
+//   false - The Module was not transformed.
+//
+bool ARMEncodeDecode::runOnModule(Module &M) {
+  // if (!EnableEncodeDecode) {
+  //   return false;
+  // }
+
+  // random number stored in r9
+  MachineModuleInfo &MMI = getAnalysis<MachineModuleInfoWrapperPass>().getMMI();
+  Twine RNGName = getPassName() + "-" + Twine(EncodeDecodeSeed);
+  RNG = M.createRNG(RNGName.str());
+
+  if (EnableEncodeDecode) {
+    // Create an init function that:
+    // * loads the address of the shadow stack to the shadow stack pointer
+    //   register, and
+    // * generates a random stride (either dynamic or static) to the shadow
+    //   stack stride register
+    //createInitFunction(M);
+  }
+
+  // Instrument pushes and pops in each function
+  bool changed = false;
+
+  for (Function &F : M) {
+
+    // if (F.getName() == "jzx_Sum" || F.getName() == "jzx_foo" ||
+    //     F.getName() == "getAddr" ||
+    //     F.getName() == "testThread") 
+    //     {
+    if (F.getName() != "Reset_Handler" ) 
+        {
+      MachineFunction *MF = MMI.getMachineFunction(F);
+      if (MF == nullptr) {
+        continue;
+      }
+
+      // Find out all pushes that write LR to the stack and all pops that read a
+      // return address from the stack to LR or PC
+      std::vector<std::pair<MachineInstr *, MachineOperand *>> Pushes;
+      std::vector<std::pair<MachineInstr *, MachineOperand *>> Pops;
+      std::vector<std::pair<MachineInstr *, MachineOperand *>> Blxs;
+      MachineOperand *callRegister = nullptr; // 保存最后一个寄存器操作数的指针
+      Register callRegister1 = ARM::R9; // 对标以下的r0
+      Register callRegister2 = ARM::R9; // 对标以下的r0
+      unsigned SPImm;
+
+      for (MachineBasicBlock &MBB : *MF) {
+        for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
+             I != E;) {
+          MachineInstr &MI = *I;
+          MachineBasicBlock::iterator I_ = I; // 记录当前位置
+          int state = 1; // 表示当前要找哪一条指令
+          if (MI.getOpcode() == ARM::tBLXr ||
+              MI.getOpcode() == ARM::tBLXr_noip) {
+            for (MachineOperand &MO : MI.explicit_operands()) {
+              if (MO.isReg()) {
+                callRegister = &MO; // 更新最后一个寄存器操作数的指针
+              }
+            }
+            // Blxs.push_back(std::make_pair(&MI, callRegister));
+            while (1) {
+              if (I == MBB.begin()) {
+                break;
+              } else {
+                I--;                    // 往上找
+                MachineInstr &MI1 = *I; // 取当前的MI1
+                if (state == 1) {       // 先找ldr  r2, [sp, #16]
+                  int x = 0;
+                  switch (MI1.getOpcode()) {
+                  case ARM::LDRrs:
+                  case ARM::PICLDR:
+                  case ARM::LDRBrs:
+                  case ARM::t2LDRDi8:
+                  case ARM::LDRi12:
+                  case ARM::t2LDRi12:
+                  case ARM::tLDRi:
+                  case ARM::tLDRspi:
+                    for (MachineOperand &MO : MI1.explicit_operands()) {
+                      if (MO.isReg() && callRegister->getReg() == MO.getReg()) {
+                        unsigned Idx = MI1.getOperandNo(&MO);
+                        callRegister1 = MI1.getOperand(Idx + 1).getReg();
+                        SPImm = MI1.getOperand(Idx + 2).getImm();
+                        state = 2; // 接下来可以找str  r0, [sp, #16] 2
+                      }
+                    }
+                    break;
+                  case ARM::tLDRpci:
+                    x = 1;
+                    break;
+                  default:
+                    break;
+                  }
+                  if (x == 1) {
+                    break;
+                  }
+                } else if (state == 2) {
+                  switch (MI1.getOpcode()) {
+                  case ARM::STRrs:
+                  case ARM::STRBrs:
+                  case ARM::t2STRDi8:
+                  case ARM::STRi12:
+                  case ARM::t2STRi12:
+                  case ARM::tSTRi:
+                  case ARM::tSTRspi:
+                    for (MachineOperand &MO : MI1.explicit_operands()) {
+                      if (MO.isReg() && callRegister1 == MO.getReg()) {
+                        unsigned Idx = MI1.getOperandNo(&MO); //
+                        if (SPImm == MI1.getOperand(Idx + 1).getImm()) {
+                          callRegister1 = MI1.getOperand(Idx - 1).getReg();
+                          state = 3; // 接下来可以找ldr  r0, [r0, #0]
+                        }
+                      }
+                    }
+                    break;
+                  default:
+                    break;
+                  }
+                } else if (state ==
+                           3) { // 找ldr  r0, [r0, #0],一步步迭代到add r0,pc
+                  int f = 0;
+                  switch (MI1.getOpcode()) {
+                  case ARM::tPICADD:
+                  case ARM::PICADD:
+                    for (MachineOperand &MO : MI1.explicit_operands()) {
+                      if (MO.isReg() && callRegister1 == MO.getReg()) {
+                        unsigned Idx = MI1.getOperandNo(&MO);
+                        Blxs.push_back(std::make_pair(&MI, callRegister));
+                        f = 1;
+                        break;
+                      }
+                    }
+                    break;
+                  case ARM::LDRrs:
+                  case ARM::PICLDR:
+                  case ARM::LDRBrs:
+                  case ARM::t2LDRDi8:
+                  case ARM::LDRi12:
+                  case ARM::t2LDRi12:
+                  case ARM::tLDRi:
+                  case ARM::tLDRspi:
+                  case ARM::tLDRpci:
+                    if (MI1.getOperand(0).isReg() &&
+                        callRegister1 == MI1.getOperand(0).getReg() &&
+                        callRegister1 != MI1.getOperand(1).getReg()) {
+                      state = 1; // 转到状态1
+                      callRegister1 = MI1.getOperand(1).getReg();
+                    }
+                    break;
+                  default:
+                    break;
+                  }
+                  if (f == 1) {
+                    break;
+                  }
+                }
+              }
+            }
+            I = I_;
+          }
+          I++;
+        }
+      }
+
+      for (MachineBasicBlock &MBB : *MF) {
+        for (MachineInstr &MI : MBB) {
+          switch (MI.getOpcode()) {
+          // Frame-setup instructions in function prologue
+          case ARM::t2STR_PRE:
+          case ARM::t2STMDB_UPD:
+            // STR_PRE and STMDB_UPD are considered as PUSH if they write to SP!
+            if (MI.getOperand(0).getReg() != ARM::SP) {
+              break;
+            }
+            LLVM_FALLTHROUGH;
+          case ARM::tPUSH:
+            if (MI.getFlag(MachineInstr::FrameSetup)) {
+              for (MachineOperand &MO : MI.explicit_operands()) {
+                if (MO.isReg() && MO.getReg() == ARM::LR) {
+                  Pushes.push_back(std::make_pair(&MI, &MO));
+                  break;
+                }
+              }
+            }
+            break;
+          // Frame-destroy instructions in function epilogue
+          case ARM::t2LDR_POST:
+          case ARM::t2LDMIA_UPD:
+          case ARM::t2LDMIA_RET:
+            // LDR_POST and LDMIA_(UPD|RET) are considered as POP if they read
+            // from SP!
+            if (MI.getOperand(1).getReg() != ARM::SP) {
+              break;
+            }
+            LLVM_FALLTHROUGH;
+          case ARM::tPOP:
+          case ARM::tPOP_RET:
+            if (MI.getFlag(MachineInstr::FrameDestroy)) {
+              // Handle 2 cases:
+              // (1) Pop writing to LR
+              // (2) Pop writing to PC
+              for (MachineOperand &MO : MI.explicit_operands()) {
+                if (MO.isReg()) {
+                  if (MO.getReg() == ARM::LR || MO.getReg() == ARM::PC) {
+                    Pops.push_back(std::make_pair(&MI, &MO));
+                    break;
+                  }
+                }
+              }
+            }
+            break;
+
+          default:
+            break;
+          }
+        }
+      }
+
+      // Instrument each push and pop
+      // if (EnableEncodeDecode) {
+      // Generate a per-function static stride
+      uint32_t Stride = (*RNG)();
+      Stride &= (1ul << (EncodeDecodeNumberLength - 1)) - 1;
+      Stride &= ~0x3ul;
+      // Limit the static stride to be within 8 bits, so that it can fit in
+      // STR_POST and LDR_PRE as an immediate
+      Stride &= 0xfful;
+      // Don't generate an empty stride; either the dynamic stride or the
+      // static stride needs to make sure of it, so just do it on the static to
+      // leave more room for the dynamic
+      if (Stride == 0u) {
+        Stride = 4u;
+      }
+
+      for (auto &MIMO : Pushes) {
+        changed |= EncodeLR(*MIMO.first, *MIMO.second, Stride);
+      }
+
+      for (auto &MIMO : Blxs) {
+        changed |= EncodeCallSite(*MIMO.first, *MIMO.second, Stride);
+      }
+
+      for (auto &MIMO : Pops) {
+        changed |= DecodeLR(*MIMO.first, *MIMO.second, Stride);
+      }
+    }
+  }
+
+  return changed;
+}
+
+ModulePass *llvm::createARMEncodeDecode(void) { return new ARMEncodeDecode(); }
\ No newline at end of file
diff --git a/llvm/lib/Target/ARM/ARMEncodeDecode.h b/llvm/lib/Target/ARM/ARMEncodeDecode.h
new file mode 100755
index 00000000000000..3b00de8bb71d24
--- /dev/null
+++ b/llvm/lib/Target/ARM/ARMEncodeDecode.h
@@ -0,0 +1,38 @@
+#ifndef ARM_ENCODE_DECODE
+#define ARM_ENCODE_DECODE
+
+#include "ARMRandezvousInstrumentor.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/RandomNumberGenerator.h"
+
+namespace llvm {
+  struct ARMEncodeDecode : public ModulePass, ARMRandezvousInstrumentor {
+    // Pass Identifier
+    static char ID;
+
+    // xor number
+    static constexpr Register storeReg = ARM::R8;
+    static constexpr Register XorReg = ARM::R9;
+    static constexpr StringRef InitFuncName = "__xor_register_init";
+
+    ARMEncodeDecode();
+    virtual StringRef getPassName() const override;
+    void getAnalysisUsage(AnalysisUsage & AU) const override;
+    virtual bool runOnModule(Module & M) override;
+
+  private:
+    std::unique_ptr<RandomNumberGenerator> RNG;
+
+    Function * createInitFunction(Module & M);
+    bool EncodeLR(MachineInstr & MI, MachineOperand & LR,
+                           uint32_t Stride);
+    bool EncodeCallSite(MachineInstr & MI, MachineOperand & MO,
+                           uint32_t Stride);
+    bool DecodeLR(MachineInstr & MI, MachineOperand & PCLR,
+                            uint32_t Stride);
+  };
+
+  ModulePass * createARMEncodeDecode(void);
+}
+
+#endif
\ No newline at end of file
diff --git a/llvm/lib/Target/ARM/ARMRandezvousOptions.cpp b/llvm/lib/Target/ARM/ARMRandezvousOptions.cpp
old mode 100644
new mode 100755
index 58dbfca265ad4a..72e3f5dfe9b2ab
--- a/llvm/lib/Target/ARM/ARMRandezvousOptions.cpp
+++ b/llvm/lib/Target/ARM/ARMRandezvousOptions.cpp
@@ -85,6 +85,15 @@ ShadowStack("arm-randezvous-shadow-stack",
             cl::location(EnableRandezvousShadowStack),
             cl::init(false));
 
+//jzx
+bool EnableEncodeDecode;
+static cl::opt<bool, true>
+EncodeDecode("arm-encode-decode",
+            cl::Hidden,
+            cl::desc("Enable ARM Encode and Decode"),
+            cl::location(EnableEncodeDecode),
+            cl::init(true));
+
 bool EnableRandezvousRAN;
 static cl::opt<bool, true>
 RAN("arm-randezvous-ran",
@@ -137,6 +146,15 @@ ShadowStackSeed("arm-randezvous-shadow-stack-seed",
                 cl::location(RandezvousShadowStackSeed),
                 cl::init(0));
 
+//jzx
+uint64_t EncodeDecodeSeed;
+static cl::opt<uint64_t, true>
+XorSeed("encode-decode-seed",
+                cl::Hidden,
+                cl::desc("Seed for the RNG used in ARM Encode and Decode"),
+                cl::location(EncodeDecodeSeed),
+                cl::init(0));
+
 //===----------------------------------------------------------------------===//
 // Size options used by Randezvous passes
 //===----------------------------------------------------------------------===//
@@ -193,6 +211,15 @@ ShadowStackStrideLength("arm-randezvous-shadow-stack-stride-length",
                         cl::location(RandezvousShadowStackStrideLength),
                         cl::init(8));
 
+//jzx
+unsigned EncodeDecodeNumberLength;
+static cl::opt<unsigned, true>
+XorNumLength("arm-xor-number-length",
+                        cl::Hidden,
+                        cl::desc("Number of bits for ARM Xor Number"),
+                        cl::location(EncodeDecodeNumberLength),
+                        cl::init(8));
+
 unsigned RandezvousNumGlobalGuardCandidates;
 static cl::opt<unsigned, true>
 NumGlobalGuardCandidates("arm-randezvous-num-global-guard-candidates",
@@ -207,4 +234,13 @@ RNGAddress("arm-randezvous-rng-addr",
            cl::Hidden,
            cl::desc("Address of a dynamic RNG"),
            cl::location(RandezvousRNGAddress),
+           cl::init(0));
+
+//jzx
+uintptr_t EncodeDecodeRNGAddress;
+static cl::opt<uintptr_t, true>
+XorRNGAddress("arm-encode-and-decode-rng-addr",
+           cl::Hidden,
+           cl::desc("Address of a dynamic RNG"),
+           cl::location(EncodeDecodeRNGAddress),
            cl::init(0));
\ No newline at end of file
diff --git a/llvm/lib/Target/ARM/ARMRandezvousOptions.h b/llvm/lib/Target/ARM/ARMRandezvousOptions.h
old mode 100644
new mode 100755
index 3757fd09e07f42..7e754cdf9e0cf2
--- a/llvm/lib/Target/ARM/ARMRandezvousOptions.h
+++ b/llvm/lib/Target/ARM/ARMRandezvousOptions.h
@@ -30,6 +30,7 @@ extern bool EnableRandezvousGDLR;
 extern bool EnableRandezvousDecoyPointers;
 extern bool EnableRandezvousGlobalGuard;
 extern bool EnableRandezvousShadowStack;
+extern bool EnableEncodeDecode;//jzx
 extern bool EnableRandezvousRAN;
 extern bool EnableRandezvousLGPromote;
 extern bool EnableRandezvousICallLimiter;
@@ -41,6 +42,7 @@ extern bool EnableRandezvousICallLimiter;
 extern uint64_t RandezvousCLRSeed;
 extern uint64_t RandezvousGDLRSeed;
 extern uint64_t RandezvousShadowStackSeed;
+extern uint64_t EncodeDecodeSeed;//jzx
 
 //===----------------------------------------------------------------------===//
 // Size options used by Randezvous passes
@@ -57,7 +59,9 @@ extern size_t RandezvousShadowStackSize;
 //===----------------------------------------------------------------------===//
 
 extern unsigned RandezvousShadowStackStrideLength;
+extern unsigned EncodeDecodeNumberLength;
 extern unsigned RandezvousNumGlobalGuardCandidates;
+extern uintptr_t EncodeDecodeRNGAddress;//jzx
 extern uintptr_t RandezvousRNGAddress;
 
 #endif
\ No newline at end of file
diff --git a/llvm/lib/Target/ARM/ARMTargetMachine.cpp b/llvm/lib/Target/ARM/ARMTargetMachine.cpp
old mode 100644
new mode 100755
index bfd0d0ec4cecda..1f798dc792952e
--- a/llvm/lib/Target/ARM/ARMTargetMachine.cpp
+++ b/llvm/lib/Target/ARM/ARMTargetMachine.cpp
@@ -11,6 +11,8 @@
 
 #include "ARMRandezvousInstrumentor.h"
 #include "ARMRandezvousShadowStack.h"
+//jzx
+#include "ARMEncodeDecode.h"
 #include "ARMTargetMachine.h"
 #include "ARM.h"
 #include "ARMMachineFunctionInfo.h"
@@ -58,6 +60,9 @@
 #include <optional>
 #include <string>
 
+//jzx
+#include "ARMEncodeDecode.h"
+
 using namespace llvm;
 
 static cl::opt<bool>
@@ -618,6 +623,9 @@ void ARMPassConfig::addPreEmitPass2() {
 
   addPass(createARMRandezvousShadowStack());
 
+  //jzx
+  addPass(createARMEncodeDecode());
+
 
   if (TM->getTargetTriple().isOSWindows()) {
     // Identify valid longjmp targets for Windows Control Flow Guard.
diff --git a/llvm/lib/Target/ARM/CMakeLists.txt b/llvm/lib/Target/ARM/CMakeLists.txt
old mode 100644
new mode 100755
index d4dd215b84ab83..df33e216107b52
--- a/llvm/lib/Target/ARM/CMakeLists.txt
+++ b/llvm/lib/Target/ARM/CMakeLists.txt
@@ -19,6 +19,9 @@ tablegen(LLVM ARMGenSystemRegister.inc -gen-searchable-tables)
 
 add_public_tablegen_target(ARMCommonTableGen)
 
+set(ARM_Shadow_Randomization
+  ARMEncodeDecode.cpp
+)
 
 set(ARMRandezvous_SOURCES
   ARMRandezvousInstrumentor.cpp
@@ -77,6 +80,7 @@ add_llvm_target(ARMCodeGen
   Thumb2InstrInfo.cpp
   Thumb2SizeReduction.cpp
   ${ARMRandezvous_SOURCES}
+  ${ARM_Shadow_Randomization}
 
   LINK_COMPONENTS
   ARMDesc
diff --git a/openmp/tools/analyzer/llvm-openmp-analyzer++ b/openmp/tools/analyzer/llvm-openmp-analyzer++
deleted file mode 120000
index b45062ac867626..00000000000000
--- a/openmp/tools/analyzer/llvm-openmp-analyzer++
+++ /dev/null
@@ -1 +0,0 @@
-llvm-openmp-analyzer
\ No newline at end of file
diff --git a/openmp/tools/analyzer/llvm-openmp-analyzer++ b/openmp/tools/analyzer/llvm-openmp-analyzer++
new file mode 100755
index 00000000000000..259809c2dddb29
--- /dev/null
+++ b/openmp/tools/analyzer/llvm-openmp-analyzer++
@@ -0,0 +1,45 @@
+#!/usr/bin/env python
+
+"""
+A wrapper for Clang specialized for gathering information about OpenMP programs.
+Simple replace calls to clang or clang++ with llvm-openmp-analyzer to run the
+analysis passes.
+"""
+
+import argparse
+import subprocess
+import yaml # PyYaml to save and load analysis information
+import sys
+import io
+
+from analyzer import getKernelUsage
+
+desc = '''A wrapper around clang that runs OpenMP Analysis passes and gathers
+information about OpenMP programs.'''
+
+default_args = ["-fopenmp", "-Rpass=openmp-opt", "-Rpass-missed=openmp-opt", "-Rpass-analysis=openmp-opt"]
+
+def main():
+    compiler = ["clang++"] if sys.argv[0].endswith('++') else ["clang"]
+    parser = argparse.ArgumentParser(description=desc)
+    parser.add_argument('--usage-report-file',
+            metavar='filename',
+            default='usage.yaml',
+            help='Filename used for the OpenMP kernel usage reports in YAML format. "usage.yaml" by default.')
+    parser.add_argument('--no-usage-report', 
+            action='store_true',
+            default=False, 
+            help='Do not general a usage report for the OpenMP kernels.')
+    args, clang_args = parser.parse_known_args()
+
+    subprocess.run(compiler + default_args + clang_args, check=True)
+    output = subprocess.run(compiler + default_args + clang_args + ["-v"], stderr=subprocess.PIPE)
+    stderr = output.stderr.decode('utf-8')
+
+    if not args.no_usage_report:
+        usage = getKernelUsage(stderr, fname=args.usage_report_file)
+        with io.open(args.usage_report_file, 'w', encoding = 'utf-8') as f:
+            yaml.dump(usage, f)
+
+if __name__ == '__main__':
+    main()

>From d26a1e94b29fd2cb1abe93302adcb1cfdff7d28a Mon Sep 17 00:00:00 2001
From: jiang-zixian <980485445 at qq.com>
Date: Sun, 31 Mar 2024 23:49:41 +0800
Subject: [PATCH 4/8] edit the reademe.md to add commands for compile

---
 README.md                               | 29 +++++++++++++++++++++++++
 llvm/lib/Target/ARM/ARMEncodeDecode.cpp |  1 +
 2 files changed, 30 insertions(+)

diff --git a/README.md b/README.md
index 1273ba17c2fa0c..631ba44fff154e 100644
--- a/README.md
+++ b/README.md
@@ -1,3 +1,32 @@
+# srtp小组 可用编译命令
+
+1. 设置交换分区
+```bash
+sudo rm /var/cache/swap/swap0
+sudo swapoff -a
+sudo mkdir -p /var/cache/swap/
+sudo dd if=/dev/zero of=/var/cache/swap/swap0 bs=64M count=256
+#(这里的count可以小一点,1024完全ok,更小一些不知道够不够)
+sudo chmod 0600 /var/cache/swap/swap0
+sudo mkswap /var/cache/swap/swap0
+sudo swapon /var/cache/swap/swap0
+sudo swapon -s
+```
+
+2. 进入build文件夹编译
+```bash
+cd llvm-project
+sudo mkdir build
+cd build
+```
+3. 编译
+```bash
+sudo cmake -G "Ninja" -DLLVM_ENABLE_PROJECTS="clang;clang-tools-extra;compiler-rt;" ../llvm -DCMAKE_BUILD_TYPE=release &&sudo ninja -j3
+```
+ninja命令后的-j后跟的数字可根据自己电脑内核数修改,建议比电脑配置内核数少一个,否则电脑其他进程会很卡
+
+
+
 # The LLVM Compiler Infrastructure
 
 This directory and its sub-directories contain the source code for LLVM,
diff --git a/llvm/lib/Target/ARM/ARMEncodeDecode.cpp b/llvm/lib/Target/ARM/ARMEncodeDecode.cpp
index ddf73811254f23..b2bd7fc9401600 100755
--- a/llvm/lib/Target/ARM/ARMEncodeDecode.cpp
+++ b/llvm/lib/Target/ARM/ARMEncodeDecode.cpp
@@ -271,6 +271,7 @@ bool ARMEncodeDecode::EncodeLR(MachineInstr &MI, MachineOperand &LR,
   return true;
 }
 
+//
 bool ARMEncodeDecode::EncodeCallSite(MachineInstr &MI, MachineOperand &MO,
                                      uint32_t Stride) {
   MachineFunction &MF = *MI.getMF();

>From 84b6378ebc1196c159d50be1da847ab64d75c4eb Mon Sep 17 00:00:00 2001
From: jiang-zixian <980485445 at qq.com>
Date: Sun, 31 Mar 2024 23:52:28 +0800
Subject: [PATCH 5/8] add commands for compile

---
 README.md   | 29 -----------------------------
 commands.md | 28 ++++++++++++++++++++++++++++
 2 files changed, 28 insertions(+), 29 deletions(-)
 create mode 100644 commands.md

diff --git a/README.md b/README.md
index 631ba44fff154e..1273ba17c2fa0c 100644
--- a/README.md
+++ b/README.md
@@ -1,32 +1,3 @@
-# srtp小组 可用编译命令
-
-1. 设置交换分区
-```bash
-sudo rm /var/cache/swap/swap0
-sudo swapoff -a
-sudo mkdir -p /var/cache/swap/
-sudo dd if=/dev/zero of=/var/cache/swap/swap0 bs=64M count=256
-#(这里的count可以小一点,1024完全ok,更小一些不知道够不够)
-sudo chmod 0600 /var/cache/swap/swap0
-sudo mkswap /var/cache/swap/swap0
-sudo swapon /var/cache/swap/swap0
-sudo swapon -s
-```
-
-2. 进入build文件夹编译
-```bash
-cd llvm-project
-sudo mkdir build
-cd build
-```
-3. 编译
-```bash
-sudo cmake -G "Ninja" -DLLVM_ENABLE_PROJECTS="clang;clang-tools-extra;compiler-rt;" ../llvm -DCMAKE_BUILD_TYPE=release &&sudo ninja -j3
-```
-ninja命令后的-j后跟的数字可根据自己电脑内核数修改,建议比电脑配置内核数少一个,否则电脑其他进程会很卡
-
-
-
 # The LLVM Compiler Infrastructure
 
 This directory and its sub-directories contain the source code for LLVM,
diff --git a/commands.md b/commands.md
new file mode 100644
index 00000000000000..b989ad0a15a491
--- /dev/null
+++ b/commands.md
@@ -0,0 +1,28 @@
+# srtp小组 可用编译命令
+
+1. 设置交换分区
+```bash
+sudo rm /var/cache/swap/swap0
+sudo swapoff -a
+sudo mkdir -p /var/cache/swap/
+sudo dd if=/dev/zero of=/var/cache/swap/swap0 bs=64M count=256
+#(这里的count可以小一点,1024完全ok,更小一些不知道够不够)
+sudo chmod 0600 /var/cache/swap/swap0
+sudo mkswap /var/cache/swap/swap0
+sudo swapon /var/cache/swap/swap0
+sudo swapon -s
+```
+
+2. 进入build文件夹编译
+```bash
+cd llvm-project
+sudo mkdir build
+cd build
+```
+3. 编译
+```bash
+sudo cmake -G "Ninja" -DLLVM_ENABLE_PROJECTS="clang;clang-tools-extra;compiler-rt;" ../llvm -DCMAKE_BUILD_TYPE=release &&sudo ninja -j3
+```
+ninja命令后的-j后跟的数字可根据自己电脑内核数修改,建议比电脑配置内核数少一个,否则电脑其他进程会很卡
+
+

>From 486da820ff714d5757c3699472ac209b2fb34fcd Mon Sep 17 00:00:00 2001
From: jiang-zixian <980485445 at qq.com>
Date: Sun, 31 Mar 2024 23:54:17 +0800
Subject: [PATCH 6/8] edit the readme.md

---
 README.md   | 30 ++++++++++++++++++++++++++++++
 commands.md | 28 ----------------------------
 2 files changed, 30 insertions(+), 28 deletions(-)
 delete mode 100644 commands.md

diff --git a/README.md b/README.md
index 1273ba17c2fa0c..7f052397e868db 100644
--- a/README.md
+++ b/README.md
@@ -1,3 +1,33 @@
+# srtp小组 可用编译命令
+
+1. 设置交换分区
+```bash
+sudo rm /var/cache/swap/swap0
+sudo swapoff -a
+sudo mkdir -p /var/cache/swap/
+sudo dd if=/dev/zero of=/var/cache/swap/swap0 bs=64M count=256
+#(这里的count可以小一点,1024完全ok,更小一些不知道够不够)
+sudo chmod 0600 /var/cache/swap/swap0
+sudo mkswap /var/cache/swap/swap0
+sudo swapon /var/cache/swap/swap0
+sudo swapon -s
+```
+
+2. 进入build文件夹编译
+```bash
+cd llvm-project
+sudo mkdir build
+cd build
+```
+3. 编译
+```bash
+sudo cmake -G "Ninja" -DLLVM_ENABLE_PROJECTS="clang;clang-tools-extra;compiler-rt;" ../llvm -DCMAKE_BUILD_TYPE=release &&sudo ninja -j3
+```
+ninja命令后的-j后跟的数字可根据自己电脑内核数修改,建议比电脑配置内核数少一个,否则电脑其他进程会很卡
+
+
+
+
 # The LLVM Compiler Infrastructure
 
 This directory and its sub-directories contain the source code for LLVM,
diff --git a/commands.md b/commands.md
deleted file mode 100644
index b989ad0a15a491..00000000000000
--- a/commands.md
+++ /dev/null
@@ -1,28 +0,0 @@
-# srtp小组 可用编译命令
-
-1. 设置交换分区
-```bash
-sudo rm /var/cache/swap/swap0
-sudo swapoff -a
-sudo mkdir -p /var/cache/swap/
-sudo dd if=/dev/zero of=/var/cache/swap/swap0 bs=64M count=256
-#(这里的count可以小一点,1024完全ok,更小一些不知道够不够)
-sudo chmod 0600 /var/cache/swap/swap0
-sudo mkswap /var/cache/swap/swap0
-sudo swapon /var/cache/swap/swap0
-sudo swapon -s
-```
-
-2. 进入build文件夹编译
-```bash
-cd llvm-project
-sudo mkdir build
-cd build
-```
-3. 编译
-```bash
-sudo cmake -G "Ninja" -DLLVM_ENABLE_PROJECTS="clang;clang-tools-extra;compiler-rt;" ../llvm -DCMAKE_BUILD_TYPE=release &&sudo ninja -j3
-```
-ninja命令后的-j后跟的数字可根据自己电脑内核数修改,建议比电脑配置内核数少一个,否则电脑其他进程会很卡
-
-

>From 458239ab824bbb01217494ff6874bd92b6507fa2 Mon Sep 17 00:00:00 2001
From: jiang-zixian <980485445 at qq.com>
Date: Sun, 7 Apr 2024 17:00:18 +0800
Subject: [PATCH 7/8] can insert nop before bl

---
 llvm/lib/Target/ARM/ARMEncodeDecode.cpp       | 101 ++++++++++++++++--
 llvm/lib/Target/ARM/ARMEncodeDecode.h         |   1 +
 .../Target/ARM/MCTargetDesc/ARMAsmBackend.cpp |  64 +++++------
 3 files changed, 128 insertions(+), 38 deletions(-)

diff --git a/llvm/lib/Target/ARM/ARMEncodeDecode.cpp b/llvm/lib/Target/ARM/ARMEncodeDecode.cpp
index b2bd7fc9401600..e1b1d1e950e162 100755
--- a/llvm/lib/Target/ARM/ARMEncodeDecode.cpp
+++ b/llvm/lib/Target/ARM/ARMEncodeDecode.cpp
@@ -271,6 +271,30 @@ bool ARMEncodeDecode::EncodeLR(MachineInstr &MI, MachineOperand &LR,
   return true;
 }
 
+bool ARMEncodeDecode::insertNop(MachineInstr &MI) {
+  MachineFunction &MF = *MI.getMF();
+  const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();
+  const DebugLoc &DL = MI.getDebugLoc();
+
+  Register PredReg;
+  ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg);
+
+  // 2. Build the xor instruction
+  // eor.w lr,lr,r9
+
+  std::vector<MachineInstr *> NewInsts;
+  for(int i=0;i<4;i++){
+      NewInsts.push_back(
+    BuildMI(MF,DL,TII->get(ARM::tHINT)).addImm(0).addImm(ARMCC::AL).addReg(0));
+  }
+
+  // 4. Now insert these new instructions into the basic block
+  insertInstsBefore(MI, NewInsts);
+
+  ++NumPrologues;
+  return true;
+}
+
 //
 bool ARMEncodeDecode::EncodeCallSite(MachineInstr &MI, MachineOperand &MO,
                                      uint32_t Stride) {
@@ -377,6 +401,7 @@ bool ARMEncodeDecode::DecodeLR(MachineInstr &MI, MachineOperand &PCLR,
 //   true  - The Module was transformed.
 //   false - The Module was not transformed.
 //
+
 bool ARMEncodeDecode::runOnModule(Module &M) {
   // if (!EnableEncodeDecode) {
   //   return false;
@@ -400,11 +425,6 @@ bool ARMEncodeDecode::runOnModule(Module &M) {
   bool changed = false;
 
   for (Function &F : M) {
-
-    // if (F.getName() == "jzx_Sum" || F.getName() == "jzx_foo" ||
-    //     F.getName() == "getAddr" ||
-    //     F.getName() == "testThread") 
-    //     {
     if (F.getName() != "Reset_Handler" ) 
         {
       MachineFunction *MF = MMI.getMachineFunction(F);
@@ -417,6 +437,7 @@ bool ARMEncodeDecode::runOnModule(Module &M) {
       std::vector<std::pair<MachineInstr *, MachineOperand *>> Pushes;
       std::vector<std::pair<MachineInstr *, MachineOperand *>> Pops;
       std::vector<std::pair<MachineInstr *, MachineOperand *>> Blxs;
+      std::vector<std::pair<MachineInstr *, MachineOperand *>> Bs;
       MachineOperand *callRegister = nullptr; // 保存最后一个寄存器操作数的指针
       Register callRegister1 = ARM::R9; // 对标以下的r0
       Register callRegister2 = ARM::R9; // 对标以下的r0
@@ -586,7 +607,28 @@ bool ARMEncodeDecode::runOnModule(Module &M) {
               }
             }
             break;
-
+          case ARM::BL:
+          case ARM::BL_pred:
+          case ARM::BMOVPCB_CALL:
+          case ARM::BL_PUSHLR:
+          case ARM::BLXi:
+          case ARM::tBL:
+          case ARM::tBLXi:
+          case ARM::tBL_PUSHLR:
+          case ARM::tBLXr:
+          case ARM::tBLXr_noip:
+          case ARM::tBLXNSr:
+          case ARM::tBLXNS_CALL:
+          case ARM::tBX_CALL:
+          case ARM::tTAILJMPr:
+          case ARM::BLX:
+          case ARM::BLX_noip:
+          case ARM::BLX_pred:
+          case ARM::BLX_pred_noip:
+          case ARM::BX_CALL:
+          case ARM::BMOVPCRX_CALL:
+            Bs.push_back(std::make_pair(&MI, nullptr));
+            break;
           default:
             break;
           }
@@ -613,6 +655,10 @@ bool ARMEncodeDecode::runOnModule(Module &M) {
         changed |= EncodeLR(*MIMO.first, *MIMO.second, Stride);
       }
 
+      for (auto &MIMO : Bs) {
+        changed |= insertNop(*MIMO.first);
+      }
+
       for (auto &MIMO : Blxs) {
         changed |= EncodeCallSite(*MIMO.first, *MIMO.second, Stride);
       }
@@ -620,7 +666,50 @@ bool ARMEncodeDecode::runOnModule(Module &M) {
       for (auto &MIMO : Pops) {
         changed |= DecodeLR(*MIMO.first, *MIMO.second, Stride);
       }
+    }else{
+      MachineFunction *MF = MMI.getMachineFunction(F);
+      if (MF == nullptr) {
+        continue;
+      }
+
+      std::vector<std::pair<MachineInstr *, MachineOperand *>> Bs;
+
+      for (MachineBasicBlock &MBB : *MF) {
+        for (MachineInstr &MI : MBB) {
+          switch (MI.getOpcode()) {
+          case ARM::BL:
+          case ARM::BL_pred:
+          case ARM::BMOVPCB_CALL:
+          case ARM::BL_PUSHLR:
+          case ARM::BLXi:
+          case ARM::tBL:
+          case ARM::tBLXi:
+          case ARM::tBL_PUSHLR:
+          case ARM::tBLXr:
+          case ARM::tBLXr_noip:
+          case ARM::tBLXNSr:
+          case ARM::tBLXNS_CALL:
+          case ARM::tBX_CALL:
+          case ARM::tTAILJMPr:
+          case ARM::BLX:
+          case ARM::BLX_noip:
+          case ARM::BLX_pred:
+          case ARM::BLX_pred_noip:
+          case ARM::BX_CALL:
+          case ARM::BMOVPCRX_CALL:
+            Bs.push_back(std::make_pair(&MI, nullptr));
+            break;
+          default:
+            break;
+          }
+        }
+      }
+
+      for (auto &MIMO : Bs) {
+        changed |= insertNop(*MIMO.first);
+      }
     }
+
   }
 
   return changed;
diff --git a/llvm/lib/Target/ARM/ARMEncodeDecode.h b/llvm/lib/Target/ARM/ARMEncodeDecode.h
index 3b00de8bb71d24..ae2812250b6b8d 100755
--- a/llvm/lib/Target/ARM/ARMEncodeDecode.h
+++ b/llvm/lib/Target/ARM/ARMEncodeDecode.h
@@ -26,6 +26,7 @@ namespace llvm {
     Function * createInitFunction(Module & M);
     bool EncodeLR(MachineInstr & MI, MachineOperand & LR,
                            uint32_t Stride);
+    bool insertNop(MachineInstr &MI);
     bool EncodeCallSite(MachineInstr & MI, MachineOperand & MO,
                            uint32_t Stride);
     bool DecodeLR(MachineInstr & MI, MachineOperand & PCLR,
diff --git a/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp b/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp
index 1a2cab20ee07c3..83384f8bc281ae 100644
--- a/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp
+++ b/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp
@@ -246,8 +246,8 @@ bool ARMAsmBackend::mayNeedRelaxation(const MCInst &Inst,
 
 static const char *checkPCRelOffset(uint64_t Value, int64_t Min, int64_t Max) {
   int64_t Offset = int64_t(Value) - 4;
-  if (Offset < Min || Offset > Max)
-    return "out of range pc-relative fixup value";
+  // if (Offset < Min || Offset > Max)
+  //   return "out of range pc-relative fixup value";
   return nullptr;
 }
 
@@ -262,8 +262,8 @@ const char *ARMAsmBackend::reasonForFixupRelaxation(const MCFixup &Fixup,
     //
     // Relax if the value is too big for a (signed) i8.
     int64_t Offset = int64_t(Value) - 4;
-    if (Offset > 2046 || Offset < -2048)
-      return "out of range pc-relative fixup value";
+    // if (Offset > 2046 || Offset < -2048)
+    //   return "out of range pc-relative fixup value";
     break;
   }
   case ARM::fixup_arm_thumb_bcc: {
@@ -274,8 +274,8 @@ const char *ARMAsmBackend::reasonForFixupRelaxation(const MCFixup &Fixup,
     //
     // Relax if the value is too big for a (signed) i8.
     int64_t Offset = int64_t(Value) - 4;
-    if (Offset > 254 || Offset < -256)
-      return "out of range pc-relative fixup value";
+    // if (Offset > 254 || Offset < -256)
+    //   return "out of range pc-relative fixup value";
     break;
   }
   case ARM::fixup_thumb_adr_pcrel_10:
@@ -285,8 +285,8 @@ const char *ARMAsmBackend::reasonForFixupRelaxation(const MCFixup &Fixup,
     int64_t Offset = int64_t(Value) - 4;
     if (Offset & 3)
       return "misaligned pc-relative fixup value";
-    else if (Offset > 1020 || Offset < 0)
-      return "out of range pc-relative fixup value";
+    // else if (Offset > 1020 || Offset < 0)
+    //   return "out of range pc-relative fixup value";
     break;
   }
   case ARM::fixup_arm_thumb_cb: {
@@ -525,10 +525,10 @@ unsigned ARMAsmBackend::adjustFixupValue(const MCAssembler &Asm,
       Value = -Value;
       isAdd = false;
     }
-    if (Value >= 4096) {
-      Ctx.reportError(Fixup.getLoc(), "out of range pc-relative fixup value");
-      return 0;
-    }
+    // if (Value >= 4096) {
+    //   Ctx.reportError(Fixup.getLoc(), "out of range pc-relative fixup value");
+    //   return 0;
+    // }
     Value |= isAdd << 23;
 
     // Same addressing mode as fixup_arm_pcrel_10,
@@ -546,10 +546,10 @@ unsigned ARMAsmBackend::adjustFixupValue(const MCAssembler &Asm,
       Value = -Value;
       opc = 2; // 0b0010
     }
-    if (ARM_AM::getSOImmVal(Value) == -1) {
-      Ctx.reportError(Fixup.getLoc(), "out of range pc-relative fixup value");
-      return 0;
-    }
+    // if (ARM_AM::getSOImmVal(Value) == -1) {
+    //   Ctx.reportError(Fixup.getLoc(), "out of range pc-relative fixup value");
+    //   return 0;
+    // }
     // Encode the immediate and shift the opcode into place.
     return ARM_AM::getSOImmVal(Value) | (opc << 21);
   }
@@ -714,10 +714,10 @@ unsigned ARMAsmBackend::adjustFixupValue(const MCAssembler &Asm,
     // CB instructions can only branch to offsets in [4, 126] in multiples of 2
     // so ensure that the raw value LSB is zero and it lies in [2, 130].
     // An offset of 2 will be relaxed to a NOP.
-    if ((int64_t)Value < 2 || Value > 0x82 || Value & 1) {
-      Ctx.reportError(Fixup.getLoc(), "out of range pc-relative fixup value");
-      return 0;
-    }
+    // if ((int64_t)Value < 2 || Value > 0x82 || Value & 1) {
+    //   Ctx.reportError(Fixup.getLoc(), "out of range pc-relative fixup value");
+    //   return 0;
+    // }
     // Offset by 4 and don't encode the lower bit, which is always 0.
     // FIXME: diagnose if no Thumb2
     uint32_t Binary = (Value - 4) >> 1;
@@ -755,10 +755,10 @@ unsigned ARMAsmBackend::adjustFixupValue(const MCAssembler &Asm,
       isAdd = false;
     }
     // The value has the low 4 bits encoded in [3:0] and the high 4 in [11:8].
-    if (Value >= 256) {
-      Ctx.reportError(Fixup.getLoc(), "out of range pc-relative fixup value");
-      return 0;
-    }
+    // if (Value >= 256) {
+    //   Ctx.reportError(Fixup.getLoc(), "out of range pc-relative fixup value");
+    //   return 0;
+    // }
     Value = (Value & 0xf) | ((Value & 0xf0) << 4);
     return Value | (isAdd << 23);
   }
@@ -776,10 +776,10 @@ unsigned ARMAsmBackend::adjustFixupValue(const MCAssembler &Asm,
     }
     // These values don't encode the low two bits since they're always zero.
     Value >>= 2;
-    if (Value >= 256) {
-      Ctx.reportError(Fixup.getLoc(), "out of range pc-relative fixup value");
-      return 0;
-    }
+    // if (Value >= 256) {
+    //   Ctx.reportError(Fixup.getLoc(), "out of range pc-relative fixup value");
+    //   return 0;
+    // }
     Value |= isAdd << 23;
 
     // Same addressing mode as fixup_arm_pcrel_10, but with 16-bit halfwords
@@ -807,10 +807,10 @@ unsigned ARMAsmBackend::adjustFixupValue(const MCAssembler &Asm,
       return 0;
     }
     Value >>= 1;
-    if (Value >= 256) {
-      Ctx.reportError(Fixup.getLoc(), "out of range pc-relative fixup value");
-      return 0;
-    }
+    // if (Value >= 256) {
+    //   Ctx.reportError(Fixup.getLoc(), "out of range pc-relative fixup value");
+    //   return 0;
+    // }
     Value |= isAdd << 23;
 
     // Same addressing mode as fixup_arm_pcrel_9, but with 16-bit halfwords

>From 473f894c098e764d3abf523b1b7427cac312de88 Mon Sep 17 00:00:00 2001
From: jiang-zixian <980485445 at qq.com>
Date: Thu, 11 Apr 2024 15:13:16 +0800
Subject: [PATCH 8/8] can insert nop before bl and update blx

---
 llvm/lib/Target/ARM/ARMEncodeDecode.cpp      |  69 -------
 llvm/lib/Target/ARM/ARMRandezvousOptions.cpp |   8 +
 llvm/lib/Target/ARM/ARMRandezvousOptions.h   |   1 +
 llvm/lib/Target/ARM/ARMTargetMachine.cpp     |   5 +-
 llvm/lib/Target/ARM/ARMTrampoline.cpp        | 196 +++++++++++++++++++
 llvm/lib/Target/ARM/ARMTrampoline.h          |  26 +++
 llvm/lib/Target/ARM/CMakeLists.txt           |   1 +
 7 files changed, 234 insertions(+), 72 deletions(-)
 create mode 100644 llvm/lib/Target/ARM/ARMTrampoline.cpp
 create mode 100644 llvm/lib/Target/ARM/ARMTrampoline.h

diff --git a/llvm/lib/Target/ARM/ARMEncodeDecode.cpp b/llvm/lib/Target/ARM/ARMEncodeDecode.cpp
index e1b1d1e950e162..9c8bd085273515 100755
--- a/llvm/lib/Target/ARM/ARMEncodeDecode.cpp
+++ b/llvm/lib/Target/ARM/ARMEncodeDecode.cpp
@@ -437,7 +437,6 @@ bool ARMEncodeDecode::runOnModule(Module &M) {
       std::vector<std::pair<MachineInstr *, MachineOperand *>> Pushes;
       std::vector<std::pair<MachineInstr *, MachineOperand *>> Pops;
       std::vector<std::pair<MachineInstr *, MachineOperand *>> Blxs;
-      std::vector<std::pair<MachineInstr *, MachineOperand *>> Bs;
       MachineOperand *callRegister = nullptr; // 保存最后一个寄存器操作数的指针
       Register callRegister1 = ARM::R9; // 对标以下的r0
       Register callRegister2 = ARM::R9; // 对标以下的r0
@@ -607,28 +606,6 @@ bool ARMEncodeDecode::runOnModule(Module &M) {
               }
             }
             break;
-          case ARM::BL:
-          case ARM::BL_pred:
-          case ARM::BMOVPCB_CALL:
-          case ARM::BL_PUSHLR:
-          case ARM::BLXi:
-          case ARM::tBL:
-          case ARM::tBLXi:
-          case ARM::tBL_PUSHLR:
-          case ARM::tBLXr:
-          case ARM::tBLXr_noip:
-          case ARM::tBLXNSr:
-          case ARM::tBLXNS_CALL:
-          case ARM::tBX_CALL:
-          case ARM::tTAILJMPr:
-          case ARM::BLX:
-          case ARM::BLX_noip:
-          case ARM::BLX_pred:
-          case ARM::BLX_pred_noip:
-          case ARM::BX_CALL:
-          case ARM::BMOVPCRX_CALL:
-            Bs.push_back(std::make_pair(&MI, nullptr));
-            break;
           default:
             break;
           }
@@ -655,10 +632,6 @@ bool ARMEncodeDecode::runOnModule(Module &M) {
         changed |= EncodeLR(*MIMO.first, *MIMO.second, Stride);
       }
 
-      for (auto &MIMO : Bs) {
-        changed |= insertNop(*MIMO.first);
-      }
-
       for (auto &MIMO : Blxs) {
         changed |= EncodeCallSite(*MIMO.first, *MIMO.second, Stride);
       }
@@ -666,48 +639,6 @@ bool ARMEncodeDecode::runOnModule(Module &M) {
       for (auto &MIMO : Pops) {
         changed |= DecodeLR(*MIMO.first, *MIMO.second, Stride);
       }
-    }else{
-      MachineFunction *MF = MMI.getMachineFunction(F);
-      if (MF == nullptr) {
-        continue;
-      }
-
-      std::vector<std::pair<MachineInstr *, MachineOperand *>> Bs;
-
-      for (MachineBasicBlock &MBB : *MF) {
-        for (MachineInstr &MI : MBB) {
-          switch (MI.getOpcode()) {
-          case ARM::BL:
-          case ARM::BL_pred:
-          case ARM::BMOVPCB_CALL:
-          case ARM::BL_PUSHLR:
-          case ARM::BLXi:
-          case ARM::tBL:
-          case ARM::tBLXi:
-          case ARM::tBL_PUSHLR:
-          case ARM::tBLXr:
-          case ARM::tBLXr_noip:
-          case ARM::tBLXNSr:
-          case ARM::tBLXNS_CALL:
-          case ARM::tBX_CALL:
-          case ARM::tTAILJMPr:
-          case ARM::BLX:
-          case ARM::BLX_noip:
-          case ARM::BLX_pred:
-          case ARM::BLX_pred_noip:
-          case ARM::BX_CALL:
-          case ARM::BMOVPCRX_CALL:
-            Bs.push_back(std::make_pair(&MI, nullptr));
-            break;
-          default:
-            break;
-          }
-        }
-      }
-
-      for (auto &MIMO : Bs) {
-        changed |= insertNop(*MIMO.first);
-      }
     }
 
   }
diff --git a/llvm/lib/Target/ARM/ARMRandezvousOptions.cpp b/llvm/lib/Target/ARM/ARMRandezvousOptions.cpp
index 72e3f5dfe9b2ab..fb6956da20fc66 100755
--- a/llvm/lib/Target/ARM/ARMRandezvousOptions.cpp
+++ b/llvm/lib/Target/ARM/ARMRandezvousOptions.cpp
@@ -92,6 +92,14 @@ EncodeDecode("arm-encode-decode",
             cl::Hidden,
             cl::desc("Enable ARM Encode and Decode"),
             cl::location(EnableEncodeDecode),
+            cl::init(false));
+
+bool EnableTrampoline;//jzx
+static cl::opt<bool, true>
+Trampoline("arm-trampoline",
+            cl::Hidden,
+            cl::desc("Enable ARM trampoline"),
+            cl::location(EnableTrampoline),
             cl::init(true));
 
 bool EnableRandezvousRAN;
diff --git a/llvm/lib/Target/ARM/ARMRandezvousOptions.h b/llvm/lib/Target/ARM/ARMRandezvousOptions.h
index 7e754cdf9e0cf2..418391f5c18951 100755
--- a/llvm/lib/Target/ARM/ARMRandezvousOptions.h
+++ b/llvm/lib/Target/ARM/ARMRandezvousOptions.h
@@ -31,6 +31,7 @@ extern bool EnableRandezvousDecoyPointers;
 extern bool EnableRandezvousGlobalGuard;
 extern bool EnableRandezvousShadowStack;
 extern bool EnableEncodeDecode;//jzx
+extern bool EnableTrampoline;//jzx
 extern bool EnableRandezvousRAN;
 extern bool EnableRandezvousLGPromote;
 extern bool EnableRandezvousICallLimiter;
diff --git a/llvm/lib/Target/ARM/ARMTargetMachine.cpp b/llvm/lib/Target/ARM/ARMTargetMachine.cpp
index 1f798dc792952e..79c08e926c8701 100755
--- a/llvm/lib/Target/ARM/ARMTargetMachine.cpp
+++ b/llvm/lib/Target/ARM/ARMTargetMachine.cpp
@@ -11,8 +11,6 @@
 
 #include "ARMRandezvousInstrumentor.h"
 #include "ARMRandezvousShadowStack.h"
-//jzx
-#include "ARMEncodeDecode.h"
 #include "ARMTargetMachine.h"
 #include "ARM.h"
 #include "ARMMachineFunctionInfo.h"
@@ -62,6 +60,7 @@
 
 //jzx
 #include "ARMEncodeDecode.h"
+#include "ARMTrampoline.h"
 
 using namespace llvm;
 
@@ -625,7 +624,7 @@ void ARMPassConfig::addPreEmitPass2() {
 
   //jzx
   addPass(createARMEncodeDecode());
-
+  addPass(createARMTrampoline());
 
   if (TM->getTargetTriple().isOSWindows()) {
     // Identify valid longjmp targets for Windows Control Flow Guard.
diff --git a/llvm/lib/Target/ARM/ARMTrampoline.cpp b/llvm/lib/Target/ARM/ARMTrampoline.cpp
new file mode 100644
index 00000000000000..be28669ddf0e17
--- /dev/null
+++ b/llvm/lib/Target/ARM/ARMTrampoline.cpp
@@ -0,0 +1,196 @@
+//===- ARMRandezvousShadowStack.cpp - ARM Randezvous Shadow Stack ---------===//
+//
+// Copyright (c) 2021-2022, University of Rochester
+//
+// Part of the Randezvous Project, under the Apache License v2.0 with
+// LLVM Exceptions.  See LICENSE.txt in the llvm directory for license
+// information.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the implementation of a pass that instruments ARM machine
+// code to save/load the return address to/from a randomized compact shadow
+// stack.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "arm_shadow_randomization"
+
+#include "ARMTrampoline.h"
+#include "ARMRandezvousCLR.h"
+#include "ARMRandezvousOptions.h"
+#include "MCTargetDesc/ARMAddressingModes.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/Support/RandomNumberGenerator.h"
+#include "llvm/Transforms/Utils/ModuleUtils.h"
+
+using namespace llvm;
+
+STATISTIC(NumPrologues, "Number of prologues transformed to use shadow stack");
+STATISTIC(NumEpilogues, "Number of epilogues transformed to use shadow stack");
+STATISTIC(NumNullified, "Number of return addresses nullified");
+
+char ARMTrampoline::ID = 0;
+
+ARMTrampoline::ARMTrampoline() : ModulePass(ID) {}
+
+StringRef ARMTrampoline::getPassName() const {
+  return "ARM Trampoline Pass";
+}
+
+void ARMTrampoline::getAnalysisUsage(AnalysisUsage &AU) const {
+  // We need this to access MachineFunctions
+  AU.addRequired<MachineModuleInfoWrapperPass>();
+
+  AU.setPreservesCFG();
+  ModulePass::getAnalysisUsage(AU);
+}
+
+bool ARMTrampoline::insertNop(MachineInstr &MI) {
+  MachineFunction &MF = *MI.getMF();
+  const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();
+  const DebugLoc &DL = MI.getDebugLoc();
+
+  std::vector<MachineInstr *> NewInsts;
+  for(int i=0;i<4;i++){
+      NewInsts.push_back(
+    BuildMI(MF,DL,TII->get(ARM::tHINT)).addImm(0).addImm(ARMCC::AL).addReg(0));//Nop
+  }
+
+  // 4. Now insert these new instructions into the basic block
+  insertInstsBefore(MI, NewInsts);
+
+  ++NumPrologues;
+  return true;
+}
+
+bool ARMTrampoline::BlxTrampoline(MachineInstr &MI, MachineOperand &MO) {
+    // before update:
+    // blx ri
+
+    // after update:
+    // mov r8,ri
+    // blx r12
+
+    MachineFunction &MF = *MI.getMF();
+    const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();
+    const DebugLoc &DL = MI.getDebugLoc();
+
+    std::vector<MachineInstr *> NewInsts;
+    NewInsts.push_back(BuildMI(MF, DL, TII->get(ARM::tMOVr),ARM::R8)
+                        .addReg(MI.getOperand(2).getReg())
+                        .add(predOps(ARMCC::AL)));
+    
+    NewInsts.push_back(BuildMI(MF, DL, TII->get(ARM::tBLXr))
+              .add(predOps(ARMCC::AL))
+              .addReg(ARM::R11));
+    insertInstsBefore(MI, NewInsts);
+
+    removeInst(MI);
+
+    return true;
+}
+
+
+//
+// Method: runOnModule()
+//
+// Description:
+//   This method is called when the PassManager wants this pass to transform
+//   the specified Module.  This method
+//
+//   * creates a global variable as the shadow stack,
+//
+//   * creates a function that initializes the reserved registers for the
+//     shadow stack, and
+//
+//   * transforms the Module to utilize the shadow stack for saving/restoring
+//     return addresses and/or to nullify a saved return address on returns.
+//
+// Input:
+//   M - A reference to the Module to transform.
+//
+// Output:
+//   M - The transformed Module.
+//
+// Return value:
+//   true  - The Module was transformed.
+//   false - The Module was not transformed.
+//
+
+bool ARMTrampoline::runOnModule(Module &M) {
+  if (!EnableTrampoline) {
+    return false;
+  }
+
+  MachineModuleInfo &MMI = getAnalysis<MachineModuleInfoWrapperPass>().getMMI();
+
+  // Instrument pushes and pops in each function
+  bool changed = false;
+
+  for (Function &F : M) {
+      MachineFunction *MF = MMI.getMachineFunction(F);
+      if (MF == nullptr) {
+        continue;
+      }
+
+      std::vector<std::pair<MachineInstr *, MachineOperand *>> BLs;
+      std::vector<std::pair<MachineInstr *, MachineOperand *>> BLXs;
+
+      for (MachineBasicBlock &MBB : *MF) {
+        for (MachineInstr &MI : MBB) {
+          switch (MI.getOpcode()) {
+          case ARM::BL:
+          case ARM::BL_pred:
+          case ARM::BMOVPCB_CALL:
+          case ARM::BL_PUSHLR:
+          case ARM::tBL:
+          case ARM::tBL_PUSHLR:
+          case ARM::tTAILJMPr:
+          case ARM::BMOVPCRX_CALL:
+            BLs.push_back(std::make_pair(&MI, nullptr));
+            break;
+          case ARM::BLX:
+          case ARM::BLX_noip:
+          case ARM::BLX_pred:
+          case ARM::BLX_pred_noip:
+          case ARM::BX_CALL:
+          case ARM::tBLXr:
+          case ARM::tBLXr_noip:
+          case ARM::tBLXNSr:
+          case ARM::tBLXNS_CALL:
+          case ARM::tBX_CALL:
+          case ARM::tBLXi:
+          case ARM::BLXi:
+            for (MachineOperand &MO : MI.explicit_operands()) {
+                if (MO.isReg()) {
+                  if (MO.getReg() != ARM::LR) {
+                    BLXs.push_back(std::make_pair(&MI, &MO));
+                    break;
+                  }
+                }
+            }
+            break;
+          default:
+            break;
+          }
+        }
+      }
+
+      for (auto &MIMO : BLs) {
+        changed |= insertNop(*MIMO.first);
+      }
+
+      for (auto &MIMO : BLXs) {
+        changed |= BlxTrampoline(*MIMO.first,*MIMO.second);
+      }
+
+  }
+
+  return changed;
+}
+
+ModulePass *llvm::createARMTrampoline(void) { return new ARMTrampoline(); }
\ No newline at end of file
diff --git a/llvm/lib/Target/ARM/ARMTrampoline.h b/llvm/lib/Target/ARM/ARMTrampoline.h
new file mode 100644
index 00000000000000..afe812858d639d
--- /dev/null
+++ b/llvm/lib/Target/ARM/ARMTrampoline.h
@@ -0,0 +1,26 @@
+#ifndef ARM_TRAMPOLINE
+#define ARM_TRAMPOLINE
+
+#include "ARMRandezvousInstrumentor.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/RandomNumberGenerator.h"
+
+namespace llvm {
+  struct ARMTrampoline : public ModulePass, ARMRandezvousInstrumentor {
+    // Pass Identifier
+    static char ID;
+
+    ARMTrampoline();
+    virtual StringRef getPassName() const override;
+    void getAnalysisUsage(AnalysisUsage & AU) const override;
+    virtual bool runOnModule(Module & M) override;
+
+  private:
+    bool insertNop(MachineInstr &MI);
+    bool BlxTrampoline(MachineInstr &MI, MachineOperand &MO);
+  };
+
+  ModulePass * createARMTrampoline(void);
+}
+
+#endif
\ No newline at end of file
diff --git a/llvm/lib/Target/ARM/CMakeLists.txt b/llvm/lib/Target/ARM/CMakeLists.txt
index df33e216107b52..3115f8b44fc5df 100755
--- a/llvm/lib/Target/ARM/CMakeLists.txt
+++ b/llvm/lib/Target/ARM/CMakeLists.txt
@@ -21,6 +21,7 @@ add_public_tablegen_target(ARMCommonTableGen)
 
 set(ARM_Shadow_Randomization
   ARMEncodeDecode.cpp
+  ARMTrampoline.cpp
 )
 
 set(ARMRandezvous_SOURCES



More information about the llvm-commits mailing list