[libcxx-commits] [clang] [compiler-rt] [libc] [libcxx] [lld] [lldb] [llvm] [mlir] [BOLT][NFC] Track fragment relationships using EquivalenceClasses (PR #99979)

Wed Jul 24 07:14:51 PDT 2024

https://github.com/aaupov updated https://github.com/llvm/llvm-project/pull/99979

>From 5cd086013b91df97f69b151c42d24f1d2a5de08b Mon Sep 17 00:00:00 2001
From: Amir Ayupov <aaupov at meta.com>
Date: Mon, 22 Jul 2024 15:31:36 -0700
Subject: [PATCH 1/2] =?UTF-8?q?[=F0=9D=98=80=F0=9D=97=BD=F0=9D=97=BF]=20ch?=
 =?UTF-8?q?anges=20to=20main=20this=20commit=20is=20based=20on?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Created using spr 1.3.4

[skip ci]
---
 a-abfdec1d.o.tmp                              |   0
 bolt/docs/OptimizingClang.md                  |   1 +
 bolt/docs/generate_doc.py                     |   4 +-
 bolt/include/bolt/Core/BinaryContext.h        |   5 +-
 bolt/lib/Core/BinaryContext.cpp               |   5 +-
 bolt/lib/Rewrite/BinaryPassManager.cpp        |   7 +-
 bolt/lib/Rewrite/DWARFRewriter.cpp            | 144 +--
 bolt/test/AArch64/update-debug-reloc.test     |   2 +-
 bolt/test/AArch64/veneer-gold.s               |   4 +-
 ...dwarf5-df-types-modify-dwo-name-mixed.test |   2 +-
 .../X86/dwarf5-one-loclists-two-bases.test    |   2 +-
 bolt/test/X86/dwarf5-two-loclists.test        |   2 +-
 bolt/test/X86/dwarf5-two-rnglists.test        |   4 +-
 bolt/test/perf2bolt/lit.local.cfg             |   2 +-
 clang/cmake/caches/Fuchsia.cmake              |   3 -
 clang/docs/MSVCCompatibility.rst              | 130 +++
 clang/docs/ReleaseNotes.rst                   |   5 +-
 .../include/clang/Basic/DiagnosticLexKinds.td |   5 +
 clang/include/clang/Basic/IdentifierTable.h   |  24 +-
 clang/include/clang/Basic/TokenKinds.def      |   3 +
 clang/include/clang/Driver/Options.td         |   8 +-
 .../include/clang/Frontend/FrontendOptions.h  |   8 +-
 clang/include/clang/Lex/Preprocessor.h        |  83 +-
 clang/include/clang/Lex/Token.h               |   3 +
 clang/include/clang/Parse/Parser.h            |   2 +-
 clang/lib/AST/Interp/Boolean.h                |   2 +-
 clang/lib/AST/Interp/Disasm.cpp               |   4 +-
 clang/lib/AST/Interp/EvalEmitter.cpp          |  11 +-
 clang/lib/AST/Interp/EvaluationResult.cpp     |   6 +-
 clang/lib/AST/Interp/Floating.h               |   2 +-
 clang/lib/AST/Interp/FunctionPointer.h        |   4 +-
 clang/lib/AST/Interp/Integral.h               |   2 +-
 clang/lib/AST/Interp/IntegralAP.h             |   2 +-
 clang/lib/AST/Interp/Interp.cpp               |   4 +-
 clang/lib/AST/Interp/Interp.h                 |   9 +-
 clang/lib/AST/Interp/MemberPointer.cpp        |   4 +-
 clang/lib/AST/Interp/MemberPointer.h          |   2 +-
 clang/lib/AST/Interp/Pointer.cpp              |  79 +-
 clang/lib/AST/Interp/Pointer.h                |   2 +-
 clang/lib/Basic/IdentifierTable.cpp           |   3 +-
 clang/lib/Driver/ToolChains/Clang.cpp         |  57 +-
 clang/lib/Driver/ToolChains/Gnu.cpp           |  29 +
 clang/lib/Format/TokenAnnotator.cpp           |  19 +-
 .../lib/Frontend/PrintPreprocessedOutput.cpp  |  12 +-
 clang/lib/Headers/prfchwintrin.h              |   2 +-
 clang/lib/Lex/PPLexerChange.cpp               |   9 +-
 clang/lib/Lex/Preprocessor.cpp                | 444 +++++---
 clang/lib/Lex/TokenConcatenation.cpp          |  10 +
 clang/lib/Parse/ParseDecl.cpp                 |   8 +-
 clang/lib/Parse/Parser.cpp                    |  93 +-
 clang/lib/Sema/SemaTemplateInstantiate.cpp    |  11 +-
 .../lib/Sema/SemaTemplateInstantiateDecl.cpp  |  11 +-
 clang/test/AST/Interp/codegen.cpp             |  20 +
 clang/test/AST/Interp/cxx11.cpp               |   8 +
 clang/test/AST/Interp/new-delete.cpp          |   5 +
 clang/test/CXX/cpp/cpp.module/p2.cppm         |  88 ++
 .../basic/basic.link/module-declaration.cpp   |  61 +-
 .../dcl.module/dcl.module.import/p1.cppm      |  39 +-
 clang/test/CodeGen/attr-target-x86.c          |   2 +-
 clang/test/Driver/ftime-trace-sections.cpp    |   2 +-
 clang/test/Driver/ftime-trace.cpp             |  39 +-
 clang/test/Driver/gpu-libc-headers.c          |   8 +-
 clang/test/SemaCXX/modules.cppm               |  89 +-
 clang/tools/driver/cc1_main.cpp               |   3 +-
 clang/tools/driver/cc1as_main.cpp             |   7 +-
 clang/unittests/AST/Interp/toAPValue.cpp      |  20 +-
 clang/unittests/Format/TokenAnnotatorTest.cpp |  20 +
 clang/unittests/Support/TimeProfilerTest.cpp  | 123 +--
 clang/www/cxx_status.html                     |   2 +-
 compiler-rt/lib/asan/asan_interceptors.cpp    |   2 +-
 compiler-rt/lib/builtins/cpu_model/x86.c      |   2 +-
 compiler-rt/lib/lsan/lsan_interceptors.cpp    |   2 +-
 cross-project-tests/lit.cfg.py                |  14 +-
 cross-project-tests/lit.site.cfg.py.in        |   4 +
 libc/config/config.json                       |  10 +
 libc/config/linux/aarch64/entrypoints.txt     |   1 +
 libc/config/linux/riscv/entrypoints.txt       | 107 +-
 libc/config/linux/x86_64/entrypoints.txt      |   5 +-
 libc/docs/configure.rst                       |   3 +
 libc/docs/dev/undefined_behavior.rst          |  23 +
 libc/include/assert.h.def                     |  11 +-
 libc/spec/posix.td                            |  15 +-
 libc/src/__support/File/file.cpp              |   2 +-
 libc/src/__support/File/file.h                |   2 +-
 libc/src/__support/HashTable/randomness.h     |   2 +-
 libc/src/__support/OSUtil/CMakeLists.txt      |  17 +
 .../src/__support/OSUtil/linux/CMakeLists.txt |  13 +
 libc/src/__support/OSUtil/linux/pid.cpp       |  20 +
 libc/src/__support/OSUtil/pid.h               |  41 +
 libc/src/__support/threads/CMakeLists.txt     |  27 +
 .../__support/threads/linux/CMakeLists.txt    |   1 +
 libc/src/__support/threads/linux/rwlock.h     |   9 +-
 libc/src/__support/threads/linux/thread.cpp   |   2 +
 libc/src/__support/threads/thread.h           |  37 +-
 libc/src/__support/threads/tid.h              |  34 +
 libc/src/math/docs/add_math_function.md       |   2 +-
 libc/src/stdio/fopencookie.cpp                |   8 +-
 libc/src/sys/auxv/linux/getauxval.cpp         |   2 +-
 libc/src/sys/epoll/linux/epoll_pwait2.cpp     |  12 +
 libc/src/sys/stat/linux/kernel_statx.h        |   5 +-
 libc/src/unistd/CMakeLists.txt                |  10 +
 libc/src/unistd/getpid.h                      |   4 +-
 libc/src/unistd/gettid.cpp                    |  17 +
 libc/src/unistd/gettid.h                      |  21 +
 libc/src/unistd/linux/CMakeLists.txt          |   4 +-
 libc/src/unistd/linux/fork.cpp                |  32 +-
 libc/src/unistd/linux/getpid.cpp              |  11 +-
 libc/startup/linux/CMakeLists.txt             |   1 +
 libc/startup/linux/do_start.cpp               |   5 +
 .../integration/src/unistd/CMakeLists.txt     |   4 +
 .../test/integration/src/unistd/fork_test.cpp |  24 +-
 .../src/__support/File/platform_file_test.cpp |   3 +-
 .../src/sys/epoll/linux/epoll_create_test.cpp |   3 +
 libc/test/src/unistd/CMakeLists.txt           |  10 +
 libc/test/src/unistd/gettid_test.cpp          |  15 +
 libc/utils/gpu/server/rpc_server.cpp          |   4 +
 libcxx/docs/ImplementationDefinedBehavior.rst |  11 +
 libcxx/docs/Status/Cxx17.rst                  |   1 +
 libcxx/docs/Status/Cxx17Papers.csv            |   2 +-
 libcxx/docs/Status/FormatPaper.csv            |   4 +-
 libcxx/docs/Status/SpecialMath.rst            |  35 +
 libcxx/docs/Status/SpecialMathProjects.csv    |  22 +
 libcxx/docs/index.rst                         |   1 +
 libcxx/include/CMakeLists.txt                 |   1 +
 libcxx/include/__chrono/convert_to_tm.h       |  10 +-
 libcxx/include/__chrono/formatter.h           |  69 +-
 libcxx/include/__chrono/ostream.h             |   9 +
 libcxx/include/__configuration/abi.h          |   4 +
 libcxx/include/__math/special_functions.h     |  84 ++
 libcxx/include/__type_traits/datasizeof.h     |   1 +
 libcxx/include/__utility/pair.h               |  46 +-
 libcxx/include/chrono                         |   7 +
 libcxx/include/cmath                          |   9 +
 libcxx/include/module.modulemap               |   1 +
 libcxx/include/vector                         |   6 +-
 libcxx/modules/std/cmath.inc                  |   2 +
 .../pairs.pair/abi.trivial_copy_move.pass.cpp |   5 +
 .../abi.trivially_copyable.compile.pass.cpp   |  22 +-
 .../vector.capacity/shrink_to_fit.pass.cpp    |  47 +-
 .../test/std/numerics/c.math/hermite.pass.cpp | 341 ++++++
 .../time.syn/formatter.zoned_time.pass.cpp    | 974 ++++++++++++++++++
 .../test_offset_time_zone.h                   |  19 +
 .../ostream.pass.cpp                          | 351 +++++++
 libcxx/utils/libcxx/test/modules.py           |   7 +
 lld/ELF/ScriptLexer.cpp                       |  14 +-
 lld/ELF/ScriptLexer.h                         |   1 -
 lld/ELF/ScriptParser.cpp                      |  46 +-
 lld/MachO/ObjC.cpp                            |  14 +-
 lld/test/ELF/defsym.s                         |   5 +-
 .../ELF/linkerscript/custom-section-type.s    |   2 +-
 ...jc-category-merging-erase-objc-name-test.s | 306 ++++++
 lld/test/MachO/reproduce-thin-archive-objc.s  |  13 +-
 .../process_save_core/TestProcessSaveCore.py  |   4 +-
 lldb/test/API/lit.cfg.py                      |   5 +
 lldb/test/API/lit.site.cfg.py.in              |   8 +
 lldb/test/Shell/helper/toolchain.py           |   5 +
 lldb/test/Shell/lit.site.cfg.py.in            |   9 +
 llvm/CMakeLists.txt                           |   4 +
 llvm/docs/CommandGuide/lit.rst                |   4 +
 .../CodeGen/GlobalISel/InstructionSelect.h    |   4 +-
 .../include/llvm/CodeGen/RuntimeLibcallUtil.h |   4 +
 llvm/include/llvm/CodeGen/TargetLowering.h    |  12 +-
 llvm/include/llvm/IR/IntrinsicsAMDGPU.td      |  26 +-
 llvm/include/llvm/IR/RuntimeLibcalls.h        |  21 -
 llvm/include/llvm/MC/TargetRegistry.h         | 152 +--
 llvm/include/llvm/SandboxIR/SandboxIR.h       |  52 +-
 .../llvm/SandboxIR/SandboxIRValues.def        |   1 +
 llvm/include/llvm/Support/TimeProfiler.h      |  23 +-
 .../llvm/TargetParser/X86TargetParser.def     |   8 +-
 llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp    |  11 +-
 .../CodeGen/AsmPrinter/DwarfCFIException.cpp  |   2 +-
 llvm/lib/CodeGen/CFGuardLongjmp.cpp           |   2 +-
 llvm/lib/CodeGen/EHContGuardCatchret.cpp      |   2 +-
 .../CodeGen/GlobalISel/InstructionSelect.cpp  |  10 +-
 llvm/lib/CodeGen/KCFI.cpp                     |   2 +-
 llvm/lib/CodeGen/LLVMTargetMachine.cpp        |  31 +-
 llvm/lib/CodeGen/TargetLoweringBase.cpp       |  35 +
 .../lib/DWARFLinker/Classic/DWARFStreamer.cpp |   5 +-
 .../DWARFLinker/Parallel/DWARFEmitterImpl.cpp |   5 +-
 llvm/lib/IR/CMakeLists.txt                    |   1 -
 llvm/lib/IR/RuntimeLibcalls.cpp               |  33 -
 llvm/lib/LTO/LTO.cpp                          |   1 -
 llvm/lib/MC/MCAsmStreamer.cpp                 |  39 +-
 llvm/lib/MC/MCMachOStreamer.cpp               |  56 +-
 llvm/lib/MC/MCParser/AsmParser.cpp            |  18 +-
 llvm/lib/MC/MCWinCOFFStreamer.cpp             |   7 +-
 llvm/lib/MC/MCXCOFFStreamer.cpp               |   9 -
 llvm/lib/MC/TargetRegistry.cpp                |  95 ++
 llvm/lib/SandboxIR/SandboxIR.cpp              |  57 +-
 llvm/lib/Support/TimeProfiler.cpp             |  61 +-
 llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp |   6 +-
 .../AArch64/AArch64CallingConvention.td       |  73 +-
 .../Target/AArch64/AArch64FrameLowering.cpp   |  20 +-
 .../Target/AArch64/AArch64ISelLowering.cpp    |  39 +-
 .../Target/AArch64/AArch64RegisterInfo.cpp    |   7 +-
 llvm/lib/Target/AArch64/AArch64Subtarget.h    |   4 +-
 .../AArch64/GISel/AArch64CallLowering.cpp     |  12 +-
 .../GISel/AArch64InstructionSelector.cpp      |   2 +-
 .../MCTargetDesc/AArch64MCTargetDesc.cpp      |  17 +-
 .../MCTargetDesc/AArch64WinCOFFStreamer.cpp   |  15 +-
 .../MCTargetDesc/AArch64WinCOFFStreamer.h     |   3 +-
 .../AMDGPU/AMDGPUInstructionSelector.cpp      |   3 +
 .../lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp |  17 +-
 .../Target/AMDGPU/AMDGPURegisterBankInfo.cpp  |   2 +
 .../AMDGPU/AsmParser/AMDGPUAsmParser.cpp      |   3 +-
 llvm/lib/Target/AMDGPU/MIMGInstructions.td    | 223 +++-
 llvm/lib/Target/AMDGPU/SIISelLowering.cpp     |  31 +-
 llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp   |   3 +-
 llvm/lib/Target/AMDGPU/SIInstrInfo.cpp        |   2 +
 llvm/lib/Target/AMDGPU/SIInstructions.td      |   8 +
 llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h |   1 +
 .../ARM/MCTargetDesc/ARMMCTargetDesc.cpp      |   5 +-
 .../Target/ARM/MCTargetDesc/ARMMCTargetDesc.h |   3 +-
 .../ARM/MCTargetDesc/ARMWinCOFFStreamer.cpp   |   9 +-
 .../LoongArch/LoongArchISelLowering.cpp       | 162 ---
 llvm/lib/Target/M68k/M68kFrameLowering.cpp    |   4 +-
 llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp   |  10 +-
 llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp   |   7 +-
 .../SPIRV/MCTargetDesc/SPIRVMCTargetDesc.cpp  |  10 -
 .../X86/MCTargetDesc/X86MCCodeEmitter.cpp     |   5 +-
 .../Target/X86/MCTargetDesc/X86MCTargetDesc.h |   3 +-
 .../X86/MCTargetDesc/X86WinCOFFStreamer.cpp   |  15 +-
 llvm/lib/Target/X86/X86AsmPrinter.cpp         |   7 +-
 llvm/lib/Target/X86/X86FrameLowering.cpp      |   7 +-
 llvm/lib/Target/X86/X86ISelDAGToDAG.cpp       |   3 +-
 llvm/lib/Target/X86/X86ISelLowering.cpp       |   6 +-
 .../Target/X86/X86IndirectBranchTracking.cpp  |   2 +-
 llvm/lib/Target/X86/X86MCInstLower.cpp        |   4 +-
 llvm/lib/Target/X86/X86ReturnThunks.cpp       |   2 +-
 llvm/lib/TargetParser/X86TargetParser.cpp     |  20 +-
 llvm/lib/Transforms/IPO/HotColdSplitting.cpp  |  22 +-
 .../Instrumentation/InstrProfiling.cpp        |   3 +
 .../Transforms/Vectorize/LoopVectorize.cpp    | 400 +------
 .../lib/Transforms/Vectorize/VPlanRecipes.cpp | 367 +++++++
 .../Transforms/Vectorize/VPlanTransforms.cpp  |   5 +
 .../AArch64/preserve_nonecc_varargs_aapcs.ll  | 123 +++
 .../AArch64/preserve_nonecc_varargs_darwin.ll |  67 ++
 .../AArch64/preserve_nonecc_varargs_win64.ll  |  69 ++
 .../AMDGPU/llvm.amdgcn.image.sample.noret.ll  | 479 +++++++++
 llvm/test/CodeGen/LoongArch/andn-icmp.ll      |  78 +-
 llvm/test/CodeGen/NVPTX/addr-mode.ll          |  85 ++
 .../runtime-counter-relocation.ll             |  26 +-
 llvm/test/MC/AsmParser/directive_abort.s      |  13 +-
 llvm/test/MC/X86/x86-32-coverage.s            |  10 +-
 llvm/test/Transforms/HotColdSplit/pr40056.ll  |  72 ++
 .../LoopVectorize/AArch64/pr73894.ll          |   2 -
 .../PowerPC/vplan-force-tail-with-evl.ll      |   2 -
 .../X86/divs-with-tail-folding.ll             |   8 -
 .../LoopVectorize/X86/x86-predication.ll      |  10 -
 ...-order-recurrence-sink-replicate-region.ll |   7 -
 .../LoopVectorize/if-pred-non-void.ll         |   8 -
 .../interleave-and-scalarize-only.ll          |   3 -
 .../pr45679-fold-tail-by-masking.ll           |  12 -
 .../LoopVectorize/select-cmp-multiuse.ll      |  14 -
 .../vplan-sink-scalars-and-merge.ll           |   6 -
 .../NVPTX/split-gep.ll                        |  24 +
 .../tools/llvm-profgen/Inputs/cmov_3.perfbin  | Bin 0 -> 27192 bytes
 .../llvm-profgen/Inputs/cmov_3.perfscript     |  39 +
 .../Inputs/ip-duplication.perfscript          |   2 +
 .../Inputs/noprobe-skid.perfscript            |   5 +
 .../tools/llvm-profgen/event-filtering.test   |  78 ++
 .../llvm-profgen/iponly-nodupfactor.test      |  22 +
 llvm/test/tools/llvm-profgen/iponly.test      |  58 ++
 llvm/tools/llvm-dwp/llvm-dwp.cpp              |   5 +-
 .../llvm-mc-assemble-fuzzer.cpp               |   5 +-
 llvm/tools/llvm-mc/llvm-mc.cpp                |   7 +-
 llvm/tools/llvm-ml/llvm-ml.cpp                |   6 +-
 llvm/tools/llvm-profgen/PerfReader.cpp        | 116 ++-
 llvm/tools/llvm-profgen/ProfileGenerator.cpp  |  31 +-
 .../DWARF/DWARFExpressionCopyBytesTest.cpp    |   5 +-
 .../DebugInfo/DWARF/DwarfGenerator.cpp        |   3 +-
 llvm/unittests/MC/DwarfLineTableHeaders.cpp   |   5 +-
 llvm/unittests/SandboxIR/SandboxIRTest.cpp    |  33 +
 llvm/utils/lit/lit/cl_arguments.py            |  16 +-
 llvm/utils/lit/lit/main.py                    |   3 +-
 llvm/utils/lit/tests/Inputs/time-tests/a.txt  |   1 +
 .../utils/lit/tests/Inputs/time-tests/lit.cfg |   7 +
 llvm/utils/lit/tests/time-tests.py            |  15 +
 mlir/lib/Bytecode/Writer/BytecodeWriter.cpp   | 266 +++--
 mlir/lib/Target/LLVM/ROCDL/Target.cpp         |   3 +-
 .../Transforms/Utils/DialectConversion.cpp    | 173 +---
 .../test-legalize-type-conversion.mlir        |   6 +-
 .../llvm-project-overlay/llvm/BUILD.bazel     |   9 +-
 283 files changed, 7057 insertions(+), 2306 deletions(-)
 delete mode 100644 a-abfdec1d.o.tmp
 create mode 100644 clang/test/AST/Interp/codegen.cpp
 create mode 100644 clang/test/CXX/cpp/cpp.module/p2.cppm
 create mode 100644 libc/src/__support/OSUtil/linux/pid.cpp
 create mode 100644 libc/src/__support/OSUtil/pid.h
 create mode 100644 libc/src/__support/threads/tid.h
 create mode 100644 libc/src/unistd/gettid.cpp
 create mode 100644 libc/src/unistd/gettid.h
 create mode 100644 libc/test/src/unistd/gettid_test.cpp
 create mode 100644 libcxx/docs/Status/SpecialMath.rst
 create mode 100644 libcxx/docs/Status/SpecialMathProjects.csv
 create mode 100644 libcxx/include/__math/special_functions.h
 create mode 100644 libcxx/test/std/numerics/c.math/hermite.pass.cpp
 create mode 100644 libcxx/test/std/time/time.syn/formatter.zoned_time.pass.cpp
 create mode 100644 libcxx/test/std/time/time.zone/time.zone.zonedtime/time.zone.zonedtime.nonmembers/ostream.pass.cpp
 create mode 100644 lld/test/MachO/objc-category-merging-erase-objc-name-test.s
 create mode 100644 llvm/test/CodeGen/AArch64/preserve_nonecc_varargs_aapcs.ll
 create mode 100644 llvm/test/CodeGen/AArch64/preserve_nonecc_varargs_darwin.ll
 create mode 100644 llvm/test/CodeGen/AArch64/preserve_nonecc_varargs_win64.ll
 create mode 100644 llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.noret.ll
 create mode 100644 llvm/test/CodeGen/NVPTX/addr-mode.ll
 create mode 100644 llvm/test/Transforms/HotColdSplit/pr40056.ll
 create mode 100755 llvm/test/tools/llvm-profgen/Inputs/cmov_3.perfbin
 create mode 100644 llvm/test/tools/llvm-profgen/Inputs/cmov_3.perfscript
 create mode 100644 llvm/test/tools/llvm-profgen/Inputs/ip-duplication.perfscript
 create mode 100644 llvm/test/tools/llvm-profgen/Inputs/noprobe-skid.perfscript
 create mode 100644 llvm/test/tools/llvm-profgen/event-filtering.test
 create mode 100644 llvm/test/tools/llvm-profgen/iponly-nodupfactor.test
 create mode 100644 llvm/test/tools/llvm-profgen/iponly.test
 create mode 100644 llvm/utils/lit/tests/Inputs/time-tests/a.txt
 create mode 100644 llvm/utils/lit/tests/Inputs/time-tests/lit.cfg
 create mode 100644 llvm/utils/lit/tests/time-tests.py

diff --git a/a-abfdec1d.o.tmp b/a-abfdec1d.o.tmp
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/bolt/docs/OptimizingClang.md b/bolt/docs/OptimizingClang.md
index ff7e71b6a76bc..685fcc2b738fa 100644
--- a/bolt/docs/OptimizingClang.md
+++ b/bolt/docs/OptimizingClang.md
@@ -49,6 +49,7 @@ $ cd ${TOPLEV}/stage3
 $ CPATH=${TOPLEV}/stage2-prof-use-lto/install/bin/
 $ cmake -G Ninja ${TOPLEV}/llvm -DLLVM_TARGETS_TO_BUILD=X86 -DCMAKE_BUILD_TYPE=Release \
     -DCMAKE_C_COMPILER=$CPATH/clang -DCMAKE_CXX_COMPILER=$CPATH/clang++ \
+    -DLLVM_ENABLE_PROJECTS="clang" \
     -DLLVM_USE_LINKER=lld -DCMAKE_INSTALL_PREFIX=${TOPLEV}/stage3/install
 $ perf record -e cycles:u -j any,u -- ninja clang
 ```
diff --git a/bolt/docs/generate_doc.py b/bolt/docs/generate_doc.py
index d8829daf677b4..763dc00b44ca3 100644
--- a/bolt/docs/generate_doc.py
+++ b/bolt/docs/generate_doc.py
@@ -45,7 +45,7 @@ def parse_bolt_options(output):
         cleaned_line = line.strip()
 
         if cleaned_line.casefold() in map(str.casefold, section_headers):
-            if prev_section != None:  # Save last option from prev section
+            if prev_section is not None:  # Save last option from prev section
                 add_info(sections, current_section, option, description)
                 option, description = None, []
 
@@ -76,7 +76,7 @@ def parse_bolt_options(output):
                 description = [descr]
                 if option.startswith("--print") or option.startswith("--time"):
                     current_section = "BOLT printing options:"
-                elif prev_section != None:
+                elif prev_section is not None:
                     current_section = prev_section
             continue
 
diff --git a/bolt/include/bolt/Core/BinaryContext.h b/bolt/include/bolt/Core/BinaryContext.h
index de9ba09a5bb49..b3cf9f834cc08 100644
--- a/bolt/include/bolt/Core/BinaryContext.h
+++ b/bolt/include/bolt/Core/BinaryContext.h
@@ -1452,10 +1452,7 @@ class BinaryContext {
     std::unique_ptr<MCObjectWriter> OW = MAB->createObjectWriter(OS);
     std::unique_ptr<MCStreamer> Streamer(TheTarget->createMCObjectStreamer(
         *TheTriple, *Ctx, std::unique_ptr<MCAsmBackend>(MAB), std::move(OW),
-        std::unique_ptr<MCCodeEmitter>(MCE), *STI,
-        /* RelaxAll */ false,
-        /* IncrementalLinkerCompatible */ false,
-        /* DWARFMustBeAtTheEnd */ false));
+        std::unique_ptr<MCCodeEmitter>(MCE), *STI));
     return Streamer;
   }
 
diff --git a/bolt/lib/Core/BinaryContext.cpp b/bolt/lib/Core/BinaryContext.cpp
index 035f68e39751b..83a5484f097ef 100644
--- a/bolt/lib/Core/BinaryContext.cpp
+++ b/bolt/lib/Core/BinaryContext.cpp
@@ -2367,10 +2367,7 @@ BinaryContext::calculateEmittedSize(BinaryFunction &BF, bool FixBranches) {
   std::unique_ptr<MCObjectWriter> OW = MAB->createObjectWriter(VecOS);
   std::unique_ptr<MCStreamer> Streamer(TheTarget->createMCObjectStreamer(
       *TheTriple, *LocalCtx, std::unique_ptr<MCAsmBackend>(MAB), std::move(OW),
-      std::unique_ptr<MCCodeEmitter>(MCEInstance.MCE.release()), *STI,
-      /*RelaxAll=*/false,
-      /*IncrementalLinkerCompatible=*/false,
-      /*DWARFMustBeAtTheEnd=*/false));
+      std::unique_ptr<MCCodeEmitter>(MCEInstance.MCE.release()), *STI));
 
   Streamer->initSections(false, *STI);
 
diff --git a/bolt/lib/Rewrite/BinaryPassManager.cpp b/bolt/lib/Rewrite/BinaryPassManager.cpp
index aaa0e1ff4d46f..5dfef0b71cc79 100644
--- a/bolt/lib/Rewrite/BinaryPassManager.cpp
+++ b/bolt/lib/Rewrite/BinaryPassManager.cpp
@@ -263,6 +263,10 @@ static cl::opt<bool> CMOVConversionFlag("cmov-conversion",
                                         cl::ReallyHidden,
                                         cl::cat(BoltOptCategory));
 
+static cl::opt<bool> ShortenInstructions("shorten-instructions",
+                                         cl::desc("shorten instructions"),
+                                         cl::init(true),
+                                         cl::cat(BoltOptCategory));
 } // namespace opts
 
 namespace llvm {
@@ -378,7 +382,8 @@ Error BinaryFunctionPassManager::runAllPasses(BinaryContext &BC) {
   else if (opts::Hugify)
     Manager.registerPass(std::make_unique<HugePage>(NeverPrint));
 
-  Manager.registerPass(std::make_unique<ShortenInstructions>(NeverPrint));
+  Manager.registerPass(std::make_unique<ShortenInstructions>(NeverPrint),
+                       opts::ShortenInstructions);
 
   Manager.registerPass(std::make_unique<RemoveNops>(NeverPrint),
                        !opts::KeepNops);
diff --git a/bolt/lib/Rewrite/DWARFRewriter.cpp b/bolt/lib/Rewrite/DWARFRewriter.cpp
index 1ec216b39e95c..ccb45f40c5c7a 100644
--- a/bolt/lib/Rewrite/DWARFRewriter.cpp
+++ b/bolt/lib/Rewrite/DWARFRewriter.cpp
@@ -620,9 +620,10 @@ void DWARFRewriter::updateDebugInfo() {
   uint32_t CUIndex = 0;
   std::mutex AccessMutex;
   // Needs to be invoked in the same order as CUs are processed.
-  auto createRangeLocListAddressWriters =
-      [&](DWARFUnit &CU) -> DebugLocWriter * {
+  llvm::DenseMap<uint64_t, uint64_t> LocListWritersIndexByCU;
+  auto createRangeLocListAddressWriters = [&](DWARFUnit &CU) {
     std::lock_guard<std::mutex> Lock(AccessMutex);
+
     const uint16_t DwarfVersion = CU.getVersion();
     if (DwarfVersion >= 5) {
       auto AddrW = std::make_unique<DebugAddrWriterDwarf5>(
@@ -641,7 +642,6 @@ void DWARFRewriter::updateDebugInfo() {
         RangeListsWritersByCU[*DWOId] = std::move(DWORangeListsSectionWriter);
       }
       AddressWritersByCU[CU.getOffset()] = std::move(AddrW);
-
     } else {
       auto AddrW =
           std::make_unique<DebugAddrWriter>(&BC, CU.getAddressByteSize());
@@ -657,7 +657,7 @@ void DWARFRewriter::updateDebugInfo() {
             std::move(LegacyRangesSectionWriterByCU);
       }
     }
-    return LocListWritersByCU[CUIndex++].get();
+    LocListWritersIndexByCU[CU.getOffset()] = CUIndex++;
   };
 
   DWARF5AcceleratorTable DebugNamesTable(opts::CreateDebugNames, BC,
@@ -666,74 +666,68 @@ void DWARFRewriter::updateDebugInfo() {
   DWPState State;
   if (opts::WriteDWP)
     initDWPState(State);
-  auto processUnitDIE = [&](DWARFUnit *Unit, DIEBuilder *DIEBlder) {
-    // Check if the unit is a skeleton and we need special updates for it and
-    // its matching split/DWO CU.
-    std::optional<DWARFUnit *> SplitCU;
+  auto processSplitCU = [&](DWARFUnit &Unit, DWARFUnit &SplitCU,
+                            DIEBuilder &DIEBlder,
+                            DebugRangesSectionWriter &TempRangesSectionWriter,
+                            DebugAddrWriter &AddressWriter) {
+    DIEBuilder DWODIEBuilder(BC, &(SplitCU).getContext(), DebugNamesTable,
+                             &Unit);
+    DWODIEBuilder.buildDWOUnit(SplitCU);
+    std::string DWOName = "";
+    std::optional<std::string> DwarfOutputPath =
+        opts::DwarfOutputPath.empty()
+            ? std::nullopt
+            : std::optional<std::string>(opts::DwarfOutputPath.c_str());
+    {
+      std::lock_guard<std::mutex> Lock(AccessMutex);
+      DWOName = DIEBlder.updateDWONameCompDir(
+          *StrOffstsWriter, *StrWriter, Unit, DwarfOutputPath, std::nullopt);
+    }
+    DebugStrOffsetsWriter DWOStrOffstsWriter(BC);
+    DebugStrWriter DWOStrWriter((SplitCU).getContext(), true);
+    DWODIEBuilder.updateDWONameCompDirForTypes(
+        DWOStrOffstsWriter, DWOStrWriter, SplitCU, DwarfOutputPath, DWOName);
+    DebugLoclistWriter DebugLocDWoWriter(Unit, Unit.getVersion(), true,
+                                         AddressWriter);
+
+    updateUnitDebugInfo(SplitCU, DWODIEBuilder, DebugLocDWoWriter,
+                        TempRangesSectionWriter, AddressWriter);
+    DebugLocDWoWriter.finalize(DWODIEBuilder,
+                               *DWODIEBuilder.getUnitDIEbyUnit(SplitCU));
+    if (Unit.getVersion() >= 5)
+      TempRangesSectionWriter.finalizeSection();
+
+    emitDWOBuilder(DWOName, DWODIEBuilder, *this, SplitCU, Unit, State,
+                   DebugLocDWoWriter, DWOStrOffstsWriter, DWOStrWriter,
+                   GDBIndexSection);
+  };
+  auto processMainBinaryCU = [&](DWARFUnit &Unit, DIEBuilder &DIEBlder) {
+    DebugAddrWriter &AddressWriter =
+        *AddressWritersByCU[Unit.getOffset()].get();
+    DebugRangesSectionWriter &RangesSectionWriter =
+        Unit.getVersion() >= 5 ? *RangeListsSectionWriter.get()
+                               : *LegacyRangesSectionWriter.get();
+    DebugLocWriter &DebugLocWriter =
+        *LocListWritersByCU[LocListWritersIndexByCU[Unit.getOffset()]].get();
     std::optional<uint64_t> RangesBase;
-    std::optional<uint64_t> DWOId = Unit->getDWOId();
+    std::optional<DWARFUnit *> SplitCU;
+    std::optional<uint64_t> DWOId = Unit.getDWOId();
     if (DWOId)
       SplitCU = BC.getDWOCU(*DWOId);
-    DebugLocWriter *DebugLocWriter = createRangeLocListAddressWriters(*Unit);
-    DebugRangesSectionWriter *RangesSectionWriter =
-        Unit->getVersion() >= 5 ? RangeListsSectionWriter.get()
-                                : LegacyRangesSectionWriter.get();
-    DebugAddrWriter *AddressWriter =
-        AddressWritersByCU[Unit->getOffset()].get();
-    // Skipping CUs that failed to load.
-    if (SplitCU) {
-      DIEBuilder DWODIEBuilder(BC, &(*SplitCU)->getContext(), DebugNamesTable,
-                               Unit);
-      DWODIEBuilder.buildDWOUnit(**SplitCU);
-      std::string DWOName = "";
-      std::optional<std::string> DwarfOutputPath =
-          opts::DwarfOutputPath.empty()
-              ? std::nullopt
-              : std::optional<std::string>(opts::DwarfOutputPath.c_str());
-      {
-        std::lock_guard<std::mutex> Lock(AccessMutex);
-        DWOName = DIEBlder->updateDWONameCompDir(
-            *StrOffstsWriter, *StrWriter, *Unit, DwarfOutputPath, std::nullopt);
-      }
-      DebugStrOffsetsWriter DWOStrOffstsWriter(BC);
-      DebugStrWriter DWOStrWriter((*SplitCU)->getContext(), true);
-      DWODIEBuilder.updateDWONameCompDirForTypes(DWOStrOffstsWriter,
-                                                 DWOStrWriter, **SplitCU,
-                                                 DwarfOutputPath, DWOName);
-      DebugLoclistWriter DebugLocDWoWriter(*Unit, Unit->getVersion(), true,
-                                           *AddressWriter);
-      DebugRangesSectionWriter *TempRangesSectionWriter = RangesSectionWriter;
-      if (Unit->getVersion() >= 5) {
-        TempRangesSectionWriter = RangeListsWritersByCU[*DWOId].get();
-      } else {
-        TempRangesSectionWriter = LegacyRangesWritersByCU[*DWOId].get();
-        RangesBase = RangesSectionWriter->getSectionOffset();
-      }
-
-      updateUnitDebugInfo(*(*SplitCU), DWODIEBuilder, DebugLocDWoWriter,
-                          *TempRangesSectionWriter, *AddressWriter);
-      DebugLocDWoWriter.finalize(DWODIEBuilder,
-                                 *DWODIEBuilder.getUnitDIEbyUnit(**SplitCU));
-      if (Unit->getVersion() >= 5)
-        TempRangesSectionWriter->finalizeSection();
-
-      emitDWOBuilder(DWOName, DWODIEBuilder, *this, **SplitCU, *Unit, State,
-                     DebugLocDWoWriter, DWOStrOffstsWriter, DWOStrWriter,
-                     GDBIndexSection);
-    }
-
-    if (Unit->getVersion() >= 5) {
-      RangesBase = RangesSectionWriter->getSectionOffset() +
+    if (Unit.getVersion() >= 5) {
+      RangesBase = RangesSectionWriter.getSectionOffset() +
                    getDWARF5RngListLocListHeaderSize();
-      RangesSectionWriter->initSection(*Unit);
-      StrOffstsWriter->finalizeSection(*Unit, *DIEBlder);
+      RangesSectionWriter.initSection(Unit);
+      StrOffstsWriter->finalizeSection(Unit, DIEBlder);
+    } else if (SplitCU) {
+      RangesBase = LegacyRangesSectionWriter.get()->getSectionOffset();
     }
 
-    updateUnitDebugInfo(*Unit, *DIEBlder, *DebugLocWriter, *RangesSectionWriter,
-                        *AddressWriter, RangesBase);
-    DebugLocWriter->finalize(*DIEBlder, *DIEBlder->getUnitDIEbyUnit(*Unit));
-    if (Unit->getVersion() >= 5)
-      RangesSectionWriter->finalizeSection();
+    updateUnitDebugInfo(Unit, DIEBlder, DebugLocWriter, RangesSectionWriter,
+                        AddressWriter, RangesBase);
+    DebugLocWriter.finalize(DIEBlder, *DIEBlder.getUnitDIEbyUnit(Unit));
+    if (Unit.getVersion() >= 5)
+      RangesSectionWriter.finalizeSection();
   };
 
   DIEBuilder DIEBlder(BC, BC.DwCtx.get(), DebugNamesTable);
@@ -751,8 +745,24 @@ void DWARFRewriter::updateDebugInfo() {
   CUPartitionVector PartVec = partitionCUs(*BC.DwCtx);
   for (std::vector<DWARFUnit *> &Vec : PartVec) {
     DIEBlder.buildCompileUnits(Vec);
+    for (DWARFUnit *CU : DIEBlder.getProcessedCUs()) {
+      createRangeLocListAddressWriters(*CU);
+      std::optional<DWARFUnit *> SplitCU;
+      std::optional<uint64_t> DWOId = CU->getDWOId();
+      if (DWOId)
+        SplitCU = BC.getDWOCU(*DWOId);
+      if (!SplitCU)
+        continue;
+      DebugAddrWriter &AddressWriter =
+          *AddressWritersByCU[CU->getOffset()].get();
+      DebugRangesSectionWriter *TempRangesSectionWriter =
+          CU->getVersion() >= 5 ? RangeListsWritersByCU[*DWOId].get()
+                                : LegacyRangesWritersByCU[*DWOId].get();
+      processSplitCU(*CU, **SplitCU, DIEBlder, *TempRangesSectionWriter,
+                     AddressWriter);
+    }
     for (DWARFUnit *CU : DIEBlder.getProcessedCUs())
-      processUnitDIE(CU, &DIEBlder);
+      processMainBinaryCU(*CU, DIEBlder);
     finalizeCompileUnits(DIEBlder, *Streamer, OffsetMap,
                          DIEBlder.getProcessedCUs(), *FinalAddrWriter);
   }
diff --git a/bolt/test/AArch64/update-debug-reloc.test b/bolt/test/AArch64/update-debug-reloc.test
index d57f42a3852a5..dd83229ea7143 100644
--- a/bolt/test/AArch64/update-debug-reloc.test
+++ b/bolt/test/AArch64/update-debug-reloc.test
@@ -2,7 +2,7 @@
 # update-debug-sections option.
 
 RUN: %clang %cflags -g %p/../Inputs/asm_foo.s %p/../Inputs/asm_main.c -o %t.exe
-RUN: llvm-bolt %t.exe -o %t --update-debug-sections
+RUN: llvm-bolt %t.exe -o %t --update-debug-sections 2>&1 | FileCheck %s
 
 CHECK: BOLT-INFO: Target architecture: aarch64
 CHECK-NOT: Reloc num: 10
diff --git a/bolt/test/AArch64/veneer-gold.s b/bolt/test/AArch64/veneer-gold.s
index 3b3e34ecb1a9f..275febce2b372 100644
--- a/bolt/test/AArch64/veneer-gold.s
+++ b/bolt/test/AArch64/veneer-gold.s
@@ -29,7 +29,7 @@ dummy:
 .type foo, %function
 foo:
 # CHECK: <foo>:
-# CHECK-NEXT : {{.*}} bl {{.*}} <foo2>
+# CHECK-NEXT: {{.*}} bl {{.*}} <foo2>
   bl .L2
   ret
 .size foo, .-foo
@@ -38,7 +38,7 @@ foo:
 .type foo2, %function
 foo2:
 # CHECK: <foo2>:
-# CHECK-NEXT : {{.*}} bl {{.*}} <foo2>
+# CHECK-NEXT: {{.*}} bl {{.*}} <foo2>
   bl .L2
   ret
 .size foo2, .-foo2
diff --git a/bolt/test/X86/dwarf5-df-types-modify-dwo-name-mixed.test b/bolt/test/X86/dwarf5-df-types-modify-dwo-name-mixed.test
index a4f5ee77ab565..6c603ba4ee19d 100644
--- a/bolt/test/X86/dwarf5-df-types-modify-dwo-name-mixed.test
+++ b/bolt/test/X86/dwarf5-df-types-modify-dwo-name-mixed.test
@@ -90,7 +90,7 @@
 ; BOLT-DWP: DW_TAG_compile_unit
 ; BOLT-DWP: DW_AT_dwo_name  ("main.dwo.dwo")
 ; BOLT-DWP: DW_TAG_type_unit
-; BOLT-DW-NOT: DW_AT_dwo_name
+; BOLT-DWP-NOT: DW_AT_dwo_name
 ; BOLT-DWP:       Contribution size = 68, Format = DWARF32, Version = 5
 ; BOLT-DWP-NEXT: "main"
 ; BOLT-DWP-NEXT: "int"
diff --git a/bolt/test/X86/dwarf5-one-loclists-two-bases.test b/bolt/test/X86/dwarf5-one-loclists-two-bases.test
index 873512aad5e8d..f25f6c7a46858 100644
--- a/bolt/test/X86/dwarf5-one-loclists-two-bases.test
+++ b/bolt/test/X86/dwarf5-one-loclists-two-bases.test
@@ -34,7 +34,7 @@
 # POSTCHECK: version = 0x0005
 # POSTCHECK: DW_AT_loclists_base [DW_FORM_sec_offset]	(0x0000000c)
 # POSTCHECK: DW_AT_rnglists_base [DW_FORM_sec_offset]	(0x0000000c)
-# POSTCHECK-EMPTY
+# POSTCHECK-EMPTY:
 # POSTCHECK: DW_TAG_variable
 # POSTCHECK: DW_AT_location [DW_FORM_loclistx]
 # POSTCHECK-SAME: indexed (0x0)
diff --git a/bolt/test/X86/dwarf5-two-loclists.test b/bolt/test/X86/dwarf5-two-loclists.test
index 2ede02f3b76fb..a7c6351f9813c 100644
--- a/bolt/test/X86/dwarf5-two-loclists.test
+++ b/bolt/test/X86/dwarf5-two-loclists.test
@@ -45,7 +45,7 @@
 # POSTCHECK: version = 0x0005
 # POSTCHECK: DW_AT_loclists_base [DW_FORM_sec_offset]	(0x0000000c)
 # POSTCHECK: DW_AT_rnglists_base [DW_FORM_sec_offset]	(0x0000000c)
-# POSTCHECK-EMPTY
+# POSTCHECK-EMPTY:
 # POSTCHECK: DW_TAG_variable
 # POSTCHECK: DW_AT_location [DW_FORM_loclistx]
 # POSTCHECK-SAME: indexed (0x0)
diff --git a/bolt/test/X86/dwarf5-two-rnglists.test b/bolt/test/X86/dwarf5-two-rnglists.test
index 17cdc7643bae5..98f2e347d7673 100644
--- a/bolt/test/X86/dwarf5-two-rnglists.test
+++ b/bolt/test/X86/dwarf5-two-rnglists.test
@@ -52,7 +52,7 @@
 # POSTCHECK-NEXT: DW_AT_addr_base [DW_FORM_sec_offset]  (0x00000008)
 # POSTCHECK-NEXT: DW_AT_loclists_base [DW_FORM_sec_offset]	(0x0000000c)
 # POSTCHECK-NEXT: DW_AT_rnglists_base [DW_FORM_sec_offset]	(0x0000000c)
-# POSTCHECK-EMPTY
+# POSTCHECK-EMPTY:
 # POSTCHECK: DW_TAG_subprogram
 # POSTCHECK-NEXT: DW_AT_ranges [DW_FORM_rnglistx]
 # POSTCHECK-SAME: indexed (0x1)
@@ -75,7 +75,7 @@
 # POSTCHECK-NEXT: DW_AT_addr_base [DW_FORM_sec_offset]  (0x00000030)
 # POSTCHECK-NEXT: DW_AT_loclists_base [DW_FORM_sec_offset]	(0x00000045)
 # POSTCHECK-NEXT: DW_AT_rnglists_base [DW_FORM_sec_offset]	(0x00000035)
-# POSTCHECK-EMPTY
+# POSTCHECK-EMPTY:
 
 # POSTCHECK: DW_TAG_subprogram
 # POSTCHECK-NEXT: DW_AT_ranges [DW_FORM_rnglistx]
diff --git a/bolt/test/perf2bolt/lit.local.cfg b/bolt/test/perf2bolt/lit.local.cfg
index 05f41ff333b0e..4ee9ad08cc78a 100644
--- a/bolt/test/perf2bolt/lit.local.cfg
+++ b/bolt/test/perf2bolt/lit.local.cfg
@@ -1,4 +1,4 @@
 import shutil
 
-if shutil.which("perf") != None:
+if shutil.which("perf") is not None:
     config.available_features.add("perf")
\ No newline at end of file
diff --git a/clang/cmake/caches/Fuchsia.cmake b/clang/cmake/caches/Fuchsia.cmake
index 4d3af3ad3f403..2d2dcb9ae6798 100644
--- a/clang/cmake/caches/Fuchsia.cmake
+++ b/clang/cmake/caches/Fuchsia.cmake
@@ -67,9 +67,6 @@ set(_FUCHSIA_BOOTSTRAP_PASSTHROUGH
   SWIG_EXECUTABLE
   CMAKE_FIND_PACKAGE_PREFER_CONFIG
   CMAKE_SYSROOT
-  CMAKE_MODULE_LINKER_FLAGS
-  CMAKE_SHARED_LINKER_FLAGS
-  CMAKE_EXE_LINKER_FLAGS
   LLVM_WINSYSROOT
   LLVM_VFSOVERLAY
 )
diff --git a/clang/docs/MSVCCompatibility.rst b/clang/docs/MSVCCompatibility.rst
index b2486052abf9a..0b6fea597f8d3 100644
--- a/clang/docs/MSVCCompatibility.rst
+++ b/clang/docs/MSVCCompatibility.rst
@@ -154,3 +154,133 @@ a hint suggesting how to fix the problem.
 As of this writing, Clang is able to compile a simple ATL hello world
 application.  There are still issues parsing WRL headers for modern Windows 8
 apps, but they should be addressed soon.
+
+__forceinline behavior
+======================
+
+``__forceinline`` behaves like ``[[clang::always_inline]]``.
+Inlining is always attempted regardless of optimization level.
+
+This differs from MSVC where ``__forceinline`` is only respected once inline expansion is enabled
+which allows any function marked implicitly or explicitly ``inline`` or ``__forceinline`` to be expanded.
+Therefore functions marked ``__forceinline`` will be expanded when the optimization level is ``/Od`` unlike
+MSVC where ``__forceinline`` will not be expanded under ``/Od``.
+
+SIMD and instruction set intrinsic behavior
+===========================================
+
+Clang follows the GCC model for intrinsics and not the MSVC model.
+There are currently no plans to support the MSVC model.
+
+MSVC intrinsics always emit the machine instruction the intrinsic models regardless of the compile time options specified.
+For example ``__popcnt`` always emits the x86 popcnt instruction even if the compiler does not have the option enabled to emit popcnt on its own volition.
+
+There are two common cases where code that compiles with MSVC will need reworking to build on clang.
+Assume the examples are only built with `-msse2` so we do not have the intrinsics at compile time.
+
+.. code-block:: c++
+
+  unsigned PopCnt(unsigned v) {
+    if (HavePopCnt)
+      return __popcnt(v);
+    else
+      return GenericPopCnt(v);
+  }
+
+.. code-block:: c++
+
+  __m128 dot4_sse3(__m128 v0, __m128 v1) {
+    __m128 r = _mm_mul_ps(v0, v1);
+    r = _mm_hadd_ps(r, r);
+    r = _mm_hadd_ps(r, r);
+    return r;
+  }
+
+Clang expects that either you have compile time support for the target features, `-msse3` and `-mpopcnt`, you mark the function with the expected target feature or use runtime detection with an indirect call.
+
+.. code-block:: c++
+
+  __attribute__((__target__("sse3"))) __m128 dot4_sse3(__m128 v0, __m128 v1) {
+    __m128 r = _mm_mul_ps(v0, v1);
+    r = _mm_hadd_ps(r, r);
+    r = _mm_hadd_ps(r, r);
+    return r;
+  }
+
+The SSE3 dot product can be easily fixed by either building the translation unit with SSE3 support or using `__target__` to compile that specific function with SSE3 support.
+
+.. code-block:: c++
+
+  unsigned PopCnt(unsigned v) {
+    if (HavePopCnt)
+      return __popcnt(v);
+    else
+      return GenericPopCnt(v);
+  }
+
+The above ``PopCnt`` example must be changed to work with clang. If we mark the function with `__target__("popcnt")` then the compiler is free to emit popcnt at will which we do not want. While this isn't a concern in our small example it is a concern in larger functions with surrounding code around the intrinsics. Similar reasoning for compiling the translation unit with `-mpopcnt`.
+We must split each branch into its own function that can be called indirectly instead of using the intrinsic directly.
+
+.. code-block:: c++
+
+  __attribute__((__target__("popcnt"))) unsigned hwPopCnt(unsigned v) { return __popcnt(v); }
+  unsigned (*PopCnt)(unsigned) = HavePopCnt ? hwPopCnt : GenericPopCnt;
+
+.. code-block:: c++
+
+  __attribute__((__target__("popcnt"))) unsigned hwPopCnt(unsigned v) { return __popcnt(v); }
+  unsigned PopCnt(unsigned v) {
+    if (HavePopCnt)
+      return hwPopCnt(v);
+    else
+      return GenericPopCnt(v);
+  }
+
+In the above example ``hwPopCnt`` will not be inlined into ``PopCnt`` since ``PopCnt`` doesn't have the popcnt target feature.
+With a larger function that does real work the function call overhead is negligible. However in our popcnt example there is the function call
+overhead. There is no analog for this specific MSVC behavior in clang.
+
+For clang we effectively have to create the dispatch function ourselves to each specfic implementation.
+
+SIMD vector types
+=================
+
+Clang's simd vector types are builtin types and not user defined types as in MSVC. This does have some observable behavior changes.
+We will look at the x86 `__m128` type for the examples below but the statements apply to all vector types including ARM's `float32x4_t`.
+
+There are no members that can be accessed on the vector types. Vector types are not structs in clang.
+You cannot use ``__m128.m128_f32[0]`` to access the first element of the `__m128`.
+This also means struct initialization like ``__m128{ { 0.0f, 0.0f, 0.0f, 0.0f } }`` will not compile with clang.
+
+Since vector types are builtin types, clang implements operators on them natively.
+
+.. code-block:: c++
+
+  #ifdef _MSC_VER
+  __m128 operator+(__m128 a, __m128 b) { return _mm_add_ps(a, b); }
+  #endif
+
+The above code will fail to compile since overloaded 'operator+' must have at least one parameter of class or enumeration type.
+You will need to fix such code to have the check ``#if defined(_MSC_VER) && !defined(__clang__)``.
+
+Since `__m128` is not a class type in clang any overloads after a template definition will not be considered.
+
+.. code-block:: c++
+
+  template<class T>
+  void foo(T) {}
+
+  template<class T>
+  void bar(T t) {
+    foo(t);
+  }
+
+  void foo(__m128) {}
+
+  int main() {
+    bar(_mm_setzero_ps());
+  }
+
+With MSVC ``foo(__m128)`` will be selected but with clang ``foo<__m128>()`` will be selected since on clang `__m128` is a builtin type.
+
+In general the takeaway is `__m128` is a builtin type on clang while a class type on MSVC.
diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst
index 7ac6ed934290d..4638b91b48f95 100644
--- a/clang/docs/ReleaseNotes.rst
+++ b/clang/docs/ReleaseNotes.rst
@@ -285,6 +285,8 @@ C++2c Feature Support
 
 - Implemented `P2963R3 Ordering of constraints involving fold expressions <https://wg21.link/P2963R3>`_.
 
+- Implemented `P3034R1 Module Declarations Shouldn’t be Macros <https://wg21.link/P3034R1>`_.
+
 
 Resolutions to C++ Defect Reports
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
@@ -750,9 +752,6 @@ Improvements to Clang's time-trace
 - Clang now specifies that using ``auto`` in a lambda parameter is a C++14 extension when
   appropriate. (`#46059: <https://github.com/llvm/llvm-project/issues/46059>`_).
 
-- Clang now adds source file infomation for template instantiations as ``event["args"]["filename"]``. This
-  added behind an option ``-ftime-trace-verbose``. This is expected to increase the size of trace by 2-3 times.
-
 Improvements to Coverage Mapping
 --------------------------------
 
diff --git a/clang/include/clang/Basic/DiagnosticLexKinds.td b/clang/include/clang/Basic/DiagnosticLexKinds.td
index 12d7b8c0205ee..08ece01009387 100644
--- a/clang/include/clang/Basic/DiagnosticLexKinds.td
+++ b/clang/include/clang/Basic/DiagnosticLexKinds.td
@@ -952,6 +952,11 @@ def warn_module_conflict : Warning<
   InGroup<ModuleConflict>;
 
 // C++20 modules
+def err_module_decl_cannot_be_macros : Error<
+  "the module name in a module%select{| partition}0 declaration cannot contain "
+  "an object-like macro %1">;
+def err_unxepected_paren_in_module_decl : Error<
+  "unexpected '(' after the module name in a module%select{| partition}0 declaration">;
 def err_header_import_semi_in_macro : Error<
   "semicolon terminating header import declaration cannot be produced "
   "by a macro">;
diff --git a/clang/include/clang/Basic/IdentifierTable.h b/clang/include/clang/Basic/IdentifierTable.h
index ae9ebd9f59154..f40f74d0355ad 100644
--- a/clang/include/clang/Basic/IdentifierTable.h
+++ b/clang/include/clang/Basic/IdentifierTable.h
@@ -180,6 +180,10 @@ class alignas(IdentifierInfoAlignment) IdentifierInfo {
   LLVM_PREFERRED_TYPE(bool)
   unsigned IsModulesImport : 1;
 
+  // True if this is the 'module' contextual keyword.
+  LLVM_PREFERRED_TYPE(bool)
+  unsigned IsModulesDecl : 1;
+
   // True if this is a mangled OpenMP variant name.
   LLVM_PREFERRED_TYPE(bool)
   unsigned IsMangledOpenMPVariantName : 1;
@@ -196,7 +200,7 @@ class alignas(IdentifierInfoAlignment) IdentifierInfo {
   LLVM_PREFERRED_TYPE(bool)
   unsigned IsFinal : 1;
 
-  // 22 bits left in a 64-bit word.
+  // 21 bits left in a 64-bit word.
 
   // Managed by the language front-end.
   void *FETokenInfo = nullptr;
@@ -212,8 +216,8 @@ class alignas(IdentifierInfoAlignment) IdentifierInfo {
         IsCPPOperatorKeyword(false), NeedsHandleIdentifier(false),
         IsFromAST(false), ChangedAfterLoad(false), FEChangedAfterLoad(false),
         RevertedTokenID(false), OutOfDate(false), IsModulesImport(false),
-        IsMangledOpenMPVariantName(false), IsDeprecatedMacro(false),
-        IsRestrictExpansion(false), IsFinal(false) {}
+        IsModulesDecl(false), IsMangledOpenMPVariantName(false),
+        IsDeprecatedMacro(false), IsRestrictExpansion(false), IsFinal(false) {}
 
 public:
   IdentifierInfo(const IdentifierInfo &) = delete;
@@ -520,6 +524,18 @@ class alignas(IdentifierInfoAlignment) IdentifierInfo {
       RecomputeNeedsHandleIdentifier();
   }
 
+  /// Determine whether this is the contextual keyword \c module.
+  bool isModulesDeclaration() const { return IsModulesDecl; }
+
+  /// Set whether this identifier is the contextual keyword \c module.
+  void setModulesDeclaration(bool I) {
+    IsModulesDecl = I;
+    if (I)
+      NeedsHandleIdentifier = true;
+    else
+      RecomputeNeedsHandleIdentifier();
+  }
+
   /// Determine whether this is the mangled name of an OpenMP variant.
   bool isMangledOpenMPVariantName() const { return IsMangledOpenMPVariantName; }
 
@@ -740,6 +756,8 @@ class IdentifierTable {
     // If this is the 'import' contextual keyword, mark it as such.
     if (Name == "import")
       II->setModulesImport(true);
+    else if (Name == "module")
+      II->setModulesDeclaration(true);
 
     return *II;
   }
diff --git a/clang/include/clang/Basic/TokenKinds.def b/clang/include/clang/Basic/TokenKinds.def
index 7f4912b9bcd96..8db18c049b6d0 100644
--- a/clang/include/clang/Basic/TokenKinds.def
+++ b/clang/include/clang/Basic/TokenKinds.def
@@ -1003,6 +1003,9 @@ ANNOTATION(module_include)
 ANNOTATION(module_begin)
 ANNOTATION(module_end)
 
+// Annotations for C++, Clang and Objective-C named modules.
+ANNOTATION(module_name)
+
 // Annotation for a header_name token that has been looked up and transformed
 // into the name of a header unit.
 ANNOTATION(header_unit)
diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td
index 9c6cebd77ff0a..6046b0c97bc6f 100644
--- a/clang/include/clang/Driver/Options.td
+++ b/clang/include/clang/Driver/Options.td
@@ -3998,10 +3998,6 @@ def ftime_trace_granularity_EQ : Joined<["-"], "ftime-trace-granularity=">, Grou
   HelpText<"Minimum time granularity (in microseconds) traced by time profiler">,
   Visibility<[ClangOption, CC1Option, CLOption, DXCOption]>,
   MarshallingInfoInt<FrontendOpts<"TimeTraceGranularity">, "500u">;
-def ftime_trace_verbose : Joined<["-"], "ftime-trace-verbose">, Group<f_Group>,
-  HelpText<"Make time trace capture verbose event details (e.g. source filenames). This can increase the size of the output by 2-3 times">,
-  Visibility<[ClangOption, CC1Option, CLOption, DXCOption]>,
-  MarshallingInfoFlag<FrontendOpts<"TimeTraceVerbose">>;
 def ftime_trace_EQ : Joined<["-"], "ftime-trace=">, Group<f_Group>,
   HelpText<"Similar to -ftime-trace. Specify the JSON file or a directory which will contain the JSON file">,
   Visibility<[ClangOption, CC1Option, CLOption, DXCOption]>,
@@ -5553,6 +5549,10 @@ def pg : Flag<["-"], "pg">, HelpText<"Enable mcount instrumentation">,
   MarshallingInfoFlag<CodeGenOpts<"InstrumentForProfiling">>;
 def pipe : Flag<["-", "--"], "pipe">,
   HelpText<"Use pipes between commands, when possible">;
+// Facebook T92898286
+def post_link_optimize : Flag<["--"], "post-link-optimize">,
+  HelpText<"Apply post-link optimizations using BOLT">;
+// End Facebook T92898286
 def prebind__all__twolevel__modules : Flag<["-"], "prebind_all_twolevel_modules">;
 def prebind : Flag<["-"], "prebind">;
 def preload : Flag<["-"], "preload">;
diff --git a/clang/include/clang/Frontend/FrontendOptions.h b/clang/include/clang/Frontend/FrontendOptions.h
index 8241925c98476..5e5034fe01eb5 100644
--- a/clang/include/clang/Frontend/FrontendOptions.h
+++ b/clang/include/clang/Frontend/FrontendOptions.h
@@ -580,11 +580,6 @@ class FrontendOptions {
   /// Minimum time granularity (in microseconds) traced by time profiler.
   unsigned TimeTraceGranularity;
 
-  /// Make time trace capture verbose event details (e.g. source filenames).
-  /// This can increase the size of the output by 2-3 times.
-  LLVM_PREFERRED_TYPE(bool)
-  unsigned TimeTraceVerbose : 1;
-
   /// Path which stores the output files for -ftime-trace
   std::string TimeTracePath;
 
@@ -606,8 +601,7 @@ class FrontendOptions {
         EmitSymbolGraph(false), EmitExtensionSymbolGraphs(false),
         EmitSymbolGraphSymbolLabelsForTesting(false),
         EmitPrettySymbolGraphs(false), GenReducedBMI(false),
-        UseClangIRPipeline(false), TimeTraceGranularity(500),
-        TimeTraceVerbose(false) {}
+        UseClangIRPipeline(false), TimeTraceGranularity(500) {}
 
   /// getInputKindForExtension - Return the appropriate input kind for a file
   /// extension. For example, "c" would return Language::C.
diff --git a/clang/include/clang/Lex/Preprocessor.h b/clang/include/clang/Lex/Preprocessor.h
index fc7d0053f2323..56aef99a3f38a 100644
--- a/clang/include/clang/Lex/Preprocessor.h
+++ b/clang/include/clang/Lex/Preprocessor.h
@@ -615,10 +615,6 @@ class Preprocessor {
 
   ModuleDeclSeq ModuleDeclState;
 
-  /// Whether the module import expects an identifier next. Otherwise,
-  /// it expects a '.' or ';'.
-  bool ModuleImportExpectsIdentifier = false;
-
   /// The identifier and source location of the currently-active
   /// \#pragma clang arc_cf_code_audited begin.
   std::pair<IdentifierInfo *, SourceLocation> PragmaARCCFCodeAuditedInfo;
@@ -1744,11 +1740,14 @@ class Preprocessor {
   /// Lex a token, forming a header-name token if possible.
   bool LexHeaderName(Token &Result, bool AllowMacroExpansion = true);
 
+  /// Lex a module name or a partition name.
+  bool LexModuleName(Token &Result, bool IsImport);
+
   /// Lex the parameters for an #embed directive, returns nullopt on error.
   std::optional<LexEmbedParametersResult> LexEmbedParameters(Token &Current,
                                                              bool ForHasEmbed);
-
   bool LexAfterModuleImport(Token &Result);
+  bool LexAfterModuleDecl(Token &Result);
   void CollectPpImportSuffix(SmallVectorImpl<Token> &Toks);
 
   void makeModuleVisible(Module *M, SourceLocation Loc);
@@ -3039,6 +3038,9 @@ class Preprocessor {
   static bool CLK_LexAfterModuleImport(Preprocessor &P, Token &Result) {
     return P.LexAfterModuleImport(Result);
   }
+  static bool CLK_LexAfterModuleDecl(Preprocessor &P, Token &Result) {
+    return P.LexAfterModuleDecl(Result);
+  }
 };
 
 /// Abstract base class that describes a handler that will receive
@@ -3071,6 +3073,77 @@ struct EmbedAnnotationData {
 /// Registry of pragma handlers added by plugins
 using PragmaHandlerRegistry = llvm::Registry<PragmaHandler>;
 
+/// Represents module or partition name token sequance.
+///
+///     module-name:
+///           module-name-qualifier[opt] identifier
+///
+///     partition-name: [C++20]
+///           : module-name-qualifier[opt] identifier
+///
+///     module-name-qualifier
+///           module-name-qualifier[opt] identifier .
+///
+/// This class can only be created by the preprocessor and guarantees that the
+/// two source array being contiguous in memory and only contains 3 kind of
+/// tokens (identifier, '.' and ':'). And only available when the preprocessor
+/// returns annot_module_name token.
+///
+/// For exmaple:
+///
+/// export module m.n:c.d
+///
+/// The module name array has 3 tokens ['m', '.', 'n'].
+/// The partition name array has 4 tokens [':', 'c', '.', 'd'].
+///
+/// When import a partition in a named module fragment (Eg. import :part1;),
+/// the module name array will be empty, and the partition name array has 2
+/// tokens.
+///
+/// When we meet a private-module-fragment (Eg. module :private;), preprocessor
+/// will not return a annot_module_name token, but will return 2 separate tokens
+/// [':', 'kw_private'].
+
+class ModuleNameInfo {
+  friend class Preprocessor;
+  ArrayRef<Token> ModuleName;
+  ArrayRef<Token> PartitionName;
+
+  ModuleNameInfo(ArrayRef<Token> AnnotToks, std::optional<unsigned> ColonIndex);
+
+public:
+  /// Return the contiguous token array.
+  ArrayRef<Token> getTokens() const {
+    if (ModuleName.empty())
+      return PartitionName;
+    if (PartitionName.empty())
+      return ModuleName;
+    return ArrayRef(ModuleName.begin(), PartitionName.end());
+  }
+  bool hasModuleName() const { return !ModuleName.empty(); }
+  bool hasPartitionName() const { return !PartitionName.empty(); }
+  ArrayRef<Token> getModuleName() const { return ModuleName; }
+  ArrayRef<Token> getPartitionName() const { return PartitionName; }
+  Token getColonToken() const {
+    assert(hasPartitionName() && "Do not have a partition name");
+    return getPartitionName().front();
+  }
+
+  /// Under the standard C++ Modules, the dot is just part of the module name,
+  /// and not a real hierarchy separator. Flatten such module names now.
+  std::string getFlatName() const;
+
+  /// Build a module id path from the contiguous token array, both include
+  /// module name and partition name.
+  void getModuleIdPath(
+      SmallVectorImpl<std::pair<IdentifierInfo *, SourceLocation>> &Path) const;
+
+  /// Build a module id path from \param ModuleName.
+  static void getModuleIdPath(
+      ArrayRef<Token> ModuleName,
+      SmallVectorImpl<std::pair<IdentifierInfo *, SourceLocation>> &Path);
+};
+
 } // namespace clang
 
 #endif // LLVM_CLANG_LEX_PREPROCESSOR_H
diff --git a/clang/include/clang/Lex/Token.h b/clang/include/clang/Lex/Token.h
index 4f29fb7d11415..2be3ad39529f0 100644
--- a/clang/include/clang/Lex/Token.h
+++ b/clang/include/clang/Lex/Token.h
@@ -235,6 +235,9 @@ class Token {
     assert(isAnnotation() && "Used AnnotVal on non-annotation token");
     return PtrData;
   }
+  template <class T> T getAnnotationValueAs() const {
+    return static_cast<T>(getAnnotationValue());
+  }
   void setAnnotationValue(void *val) {
     assert(isAnnotation() && "Used AnnotVal on non-annotation token");
     PtrData = val;
diff --git a/clang/include/clang/Parse/Parser.h b/clang/include/clang/Parse/Parser.h
index 93e60be512aae..afcdacf02583a 100644
--- a/clang/include/clang/Parse/Parser.h
+++ b/clang/include/clang/Parse/Parser.h
@@ -3876,7 +3876,7 @@ class Parser : public CodeCompletionHandler {
   }
 
   bool ParseModuleName(
-      SourceLocation UseLoc,
+      SourceLocation UseLoc, ArrayRef<Token> ModuleName,
       SmallVectorImpl<std::pair<IdentifierInfo *, SourceLocation>> &Path,
       bool IsImport);
 
diff --git a/clang/lib/AST/Interp/Boolean.h b/clang/lib/AST/Interp/Boolean.h
index 1bfb26b1b669f..23f7286036764 100644
--- a/clang/lib/AST/Interp/Boolean.h
+++ b/clang/lib/AST/Interp/Boolean.h
@@ -56,7 +56,7 @@ class Boolean final {
   APSInt toAPSInt(unsigned NumBits) const {
     return APSInt(toAPSInt().zextOrTrunc(NumBits), true);
   }
-  APValue toAPValue() const { return APValue(toAPSInt()); }
+  APValue toAPValue(const ASTContext &) const { return APValue(toAPSInt()); }
 
   Boolean toUnsigned() const { return *this; }
 
diff --git a/clang/lib/AST/Interp/Disasm.cpp b/clang/lib/AST/Interp/Disasm.cpp
index c6c6275593007..867284ecf7f4b 100644
--- a/clang/lib/AST/Interp/Disasm.cpp
+++ b/clang/lib/AST/Interp/Disasm.cpp
@@ -366,9 +366,9 @@ LLVM_DUMP_METHOD void EvaluationResult::dump() const {
 
     OS << "LValue: ";
     if (const auto *P = std::get_if<Pointer>(&Value))
-      P->toAPValue().printPretty(OS, ASTCtx, SourceType);
+      P->toAPValue(ASTCtx).printPretty(OS, ASTCtx, SourceType);
     else if (const auto *FP = std::get_if<FunctionPointer>(&Value)) // Nope
-      FP->toAPValue().printPretty(OS, ASTCtx, SourceType);
+      FP->toAPValue(ASTCtx).printPretty(OS, ASTCtx, SourceType);
     OS << "\n";
     break;
   }
diff --git a/clang/lib/AST/Interp/EvalEmitter.cpp b/clang/lib/AST/Interp/EvalEmitter.cpp
index 59e78686b78ad..08536536ac3c2 100644
--- a/clang/lib/AST/Interp/EvalEmitter.cpp
+++ b/clang/lib/AST/Interp/EvalEmitter.cpp
@@ -145,7 +145,7 @@ template <PrimType OpType> bool EvalEmitter::emitRet(const SourceInfo &Info) {
     return false;
 
   using T = typename PrimConv<OpType>::T;
-  EvalResult.setValue(S.Stk.pop<T>().toAPValue());
+  EvalResult.setValue(S.Stk.pop<T>().toAPValue(Ctx.getASTContext()));
   return true;
 }
 
@@ -169,7 +169,9 @@ template <> bool EvalEmitter::emitRet<PT_Ptr>(const SourceInfo &Info) {
       return false;
     // Never allow reading from a non-const pointer, unless the memory
     // has been created in this evaluation.
-    if (!Ptr.isConst() && Ptr.block()->getEvalID() != Ctx.getEvalID())
+    if (!Ptr.isZero() && Ptr.isBlockPointer() &&
+        Ptr.block()->getEvalID() != Ctx.getEvalID() &&
+        (!CheckLoad(S, OpPC, Ptr, AK_Read) || !Ptr.isConst()))
       return false;
 
     if (std::optional<APValue> V =
@@ -179,7 +181,7 @@ template <> bool EvalEmitter::emitRet<PT_Ptr>(const SourceInfo &Info) {
       return false;
     }
   } else {
-    EvalResult.setValue(Ptr.toAPValue());
+    EvalResult.setValue(Ptr.toAPValue(Ctx.getASTContext()));
   }
 
   return true;
@@ -283,7 +285,8 @@ void EvalEmitter::updateGlobalTemporaries() {
       APValue *Cached = Temp->getOrCreateValue(true);
 
       if (std::optional<PrimType> T = Ctx.classify(E->getType())) {
-        TYPE_SWITCH(*T, { *Cached = Ptr.deref<T>().toAPValue(); });
+        TYPE_SWITCH(
+            *T, { *Cached = Ptr.deref<T>().toAPValue(Ctx.getASTContext()); });
       } else {
         if (std::optional<APValue> APV =
                 Ptr.toRValue(Ctx, Temp->getTemporaryExpr()->getType()))
diff --git a/clang/lib/AST/Interp/EvaluationResult.cpp b/clang/lib/AST/Interp/EvaluationResult.cpp
index 0bebfd4ad984e..1b255711c7b36 100644
--- a/clang/lib/AST/Interp/EvaluationResult.cpp
+++ b/clang/lib/AST/Interp/EvaluationResult.cpp
@@ -21,9 +21,9 @@ APValue EvaluationResult::toAPValue() const {
   case LValue:
     // Either a pointer or a function pointer.
     if (const auto *P = std::get_if<Pointer>(&Value))
-      return P->toAPValue();
+      return P->toAPValue(Ctx->getASTContext());
     else if (const auto *FP = std::get_if<FunctionPointer>(&Value))
-      return FP->toAPValue();
+      return FP->toAPValue(Ctx->getASTContext());
     else
       llvm_unreachable("Unhandled LValue type");
     break;
@@ -46,7 +46,7 @@ std::optional<APValue> EvaluationResult::toRValue() const {
   if (const auto *P = std::get_if<Pointer>(&Value))
     return P->toRValue(*Ctx, getSourceType());
   else if (const auto *FP = std::get_if<FunctionPointer>(&Value)) // Nope
-    return FP->toAPValue();
+    return FP->toAPValue(Ctx->getASTContext());
   llvm_unreachable("Unhandled lvalue kind");
 }
 
diff --git a/clang/lib/AST/Interp/Floating.h b/clang/lib/AST/Interp/Floating.h
index e4ac76d8509fb..114487821880f 100644
--- a/clang/lib/AST/Interp/Floating.h
+++ b/clang/lib/AST/Interp/Floating.h
@@ -69,7 +69,7 @@ class Floating final {
   APSInt toAPSInt(unsigned NumBits = 0) const {
     return APSInt(F.bitcastToAPInt());
   }
-  APValue toAPValue() const { return APValue(F); }
+  APValue toAPValue(const ASTContext &) const { return APValue(F); }
   void print(llvm::raw_ostream &OS) const {
     // Can't use APFloat::print() since it appends a newline.
     SmallVector<char, 16> Buffer;
diff --git a/clang/lib/AST/Interp/FunctionPointer.h b/clang/lib/AST/Interp/FunctionPointer.h
index fc3d7a4214a72..0f2c6e571a1d8 100644
--- a/clang/lib/AST/Interp/FunctionPointer.h
+++ b/clang/lib/AST/Interp/FunctionPointer.h
@@ -40,7 +40,7 @@ class FunctionPointer final {
     return Func->getDecl()->isWeak();
   }
 
-  APValue toAPValue() const {
+  APValue toAPValue(const ASTContext &) const {
     if (!Func)
       return APValue(static_cast<Expr *>(nullptr), CharUnits::Zero(), {},
                      /*OnePastTheEnd=*/false, /*IsNull=*/true);
@@ -69,7 +69,7 @@ class FunctionPointer final {
     if (!Func)
       return "nullptr";
 
-    return toAPValue().getAsString(Ctx, Func->getDecl()->getType());
+    return toAPValue(Ctx).getAsString(Ctx, Func->getDecl()->getType());
   }
 
   uint64_t getIntegerRepresentation() const {
diff --git a/clang/lib/AST/Interp/Integral.h b/clang/lib/AST/Interp/Integral.h
index db4cc9ae45b49..aafdd02676c96 100644
--- a/clang/lib/AST/Interp/Integral.h
+++ b/clang/lib/AST/Interp/Integral.h
@@ -112,7 +112,7 @@ template <unsigned Bits, bool Signed> class Integral final {
     else
       return APSInt(toAPSInt().zextOrTrunc(NumBits), !Signed);
   }
-  APValue toAPValue() const { return APValue(toAPSInt()); }
+  APValue toAPValue(const ASTContext &) const { return APValue(toAPSInt()); }
 
   Integral<Bits, false> toUnsigned() const {
     return Integral<Bits, false>(*this);
diff --git a/clang/lib/AST/Interp/IntegralAP.h b/clang/lib/AST/Interp/IntegralAP.h
index 7464f15cdb03b..b8aa21038256c 100644
--- a/clang/lib/AST/Interp/IntegralAP.h
+++ b/clang/lib/AST/Interp/IntegralAP.h
@@ -133,7 +133,7 @@ template <bool Signed> class IntegralAP final {
     else
       return APSInt(V.zext(Bits), !Signed);
   }
-  APValue toAPValue() const { return APValue(toAPSInt()); }
+  APValue toAPValue(const ASTContext &) const { return APValue(toAPSInt()); }
 
   bool isZero() const { return V.isZero(); }
   bool isPositive() const { return V.isNonNegative(); }
diff --git a/clang/lib/AST/Interp/Interp.cpp b/clang/lib/AST/Interp/Interp.cpp
index cd6fc60400ebd..6fcd90e5f5849 100644
--- a/clang/lib/AST/Interp/Interp.cpp
+++ b/clang/lib/AST/Interp/Interp.cpp
@@ -728,8 +728,8 @@ bool CheckDynamicMemoryAllocation(InterpState &S, CodePtr OpPC) {
     return true;
 
   const SourceInfo &E = S.Current->getSource(OpPC);
-  S.FFDiag(E, diag::note_constexpr_new);
-  return false;
+  S.CCEDiag(E, diag::note_constexpr_new);
+  return true;
 }
 
 bool CheckNewDeleteForms(InterpState &S, CodePtr OpPC, bool NewWasArray,
diff --git a/clang/lib/AST/Interp/Interp.h b/clang/lib/AST/Interp/Interp.h
index f86b787fb034e..b2581b5f7b5d0 100644
--- a/clang/lib/AST/Interp/Interp.h
+++ b/clang/lib/AST/Interp/Interp.h
@@ -39,8 +39,9 @@ namespace interp {
 using APSInt = llvm::APSInt;
 
 /// Convert a value to an APValue.
-template <typename T> bool ReturnValue(const T &V, APValue &R) {
-  R = V.toAPValue();
+template <typename T>
+bool ReturnValue(const InterpState &S, const T &V, APValue &R) {
+  R = V.toAPValue(S.getCtx());
   return true;
 }
 
@@ -286,7 +287,7 @@ bool Ret(InterpState &S, CodePtr &PC, APValue &Result) {
   } else {
     delete S.Current;
     S.Current = nullptr;
-    if (!ReturnValue<T>(Ret, Result))
+    if (!ReturnValue<T>(S, Ret, Result))
       return false;
   }
   return true;
@@ -1318,7 +1319,7 @@ bool InitGlobalTemp(InterpState &S, CodePtr OpPC, uint32_t I,
   const Pointer &Ptr = S.P.getGlobal(I);
 
   const T Value = S.Stk.peek<T>();
-  APValue APV = Value.toAPValue();
+  APValue APV = Value.toAPValue(S.getCtx());
   APValue *Cached = Temp->getOrCreateValue(true);
   *Cached = APV;
 
diff --git a/clang/lib/AST/Interp/MemberPointer.cpp b/clang/lib/AST/Interp/MemberPointer.cpp
index 96f63643e83c9..0c1b6edc5f7e1 100644
--- a/clang/lib/AST/Interp/MemberPointer.cpp
+++ b/clang/lib/AST/Interp/MemberPointer.cpp
@@ -60,13 +60,13 @@ FunctionPointer MemberPointer::toFunctionPointer(const Context &Ctx) const {
   return FunctionPointer(Ctx.getProgram().getFunction(cast<FunctionDecl>(Dcl)));
 }
 
-APValue MemberPointer::toAPValue() const {
+APValue MemberPointer::toAPValue(const ASTContext &ASTCtx) const {
   if (isZero())
     return APValue(static_cast<ValueDecl *>(nullptr), /*IsDerivedMember=*/false,
                    /*Path=*/{});
 
   if (hasBase())
-    return Base.toAPValue();
+    return Base.toAPValue(ASTCtx);
 
   return APValue(cast<ValueDecl>(getDecl()), /*IsDerivedMember=*/false,
                  /*Path=*/{});
diff --git a/clang/lib/AST/Interp/MemberPointer.h b/clang/lib/AST/Interp/MemberPointer.h
index f56dc530431e4..2b3be124db426 100644
--- a/clang/lib/AST/Interp/MemberPointer.h
+++ b/clang/lib/AST/Interp/MemberPointer.h
@@ -80,7 +80,7 @@ class MemberPointer final {
     return MemberPointer(Instance, this->Dcl, this->PtrOffset);
   }
 
-  APValue toAPValue() const;
+  APValue toAPValue(const ASTContext &) const;
 
   bool isZero() const { return Base.isZero() && !Dcl; }
   bool hasBase() const { return !Base.isZero(); }
diff --git a/clang/lib/AST/Interp/Pointer.cpp b/clang/lib/AST/Interp/Pointer.cpp
index ff4da0fa805dc..229007c6d720a 100644
--- a/clang/lib/AST/Interp/Pointer.cpp
+++ b/clang/lib/AST/Interp/Pointer.cpp
@@ -16,6 +16,7 @@
 #include "MemberPointer.h"
 #include "PrimType.h"
 #include "Record.h"
+#include "clang/AST/RecordLayout.h"
 
 using namespace clang;
 using namespace clang::interp;
@@ -119,7 +120,7 @@ void Pointer::operator=(Pointer &&P) {
   }
 }
 
-APValue Pointer::toAPValue() const {
+APValue Pointer::toAPValue(const ASTContext &ASTCtx) const {
   llvm::SmallVector<APValue::LValuePathEntry, 5> Path;
 
   if (isZero())
@@ -141,25 +142,38 @@ APValue Pointer::toAPValue() const {
   else
     llvm_unreachable("Invalid allocation type");
 
-  if (isDummy() || isUnknownSizeArray() || Desc->asExpr())
+  if (isUnknownSizeArray() || Desc->asExpr())
     return APValue(Base, CharUnits::Zero(), Path,
                    /*IsOnePastEnd=*/isOnePastEnd(), /*IsNullPtr=*/false);
 
-  // TODO: compute the offset into the object.
   CharUnits Offset = CharUnits::Zero();
 
+  auto getFieldOffset = [&](const FieldDecl *FD) -> CharUnits {
+    const ASTRecordLayout &Layout = ASTCtx.getASTRecordLayout(FD->getParent());
+    unsigned FieldIndex = FD->getFieldIndex();
+    return ASTCtx.toCharUnitsFromBits(Layout.getFieldOffset(FieldIndex));
+  };
+
   // Build the path into the object.
   Pointer Ptr = *this;
   while (Ptr.isField() || Ptr.isArrayElement()) {
     if (Ptr.isArrayRoot()) {
       Path.push_back(APValue::LValuePathEntry(
           {Ptr.getFieldDesc()->asDecl(), /*IsVirtual=*/false}));
+
+      if (const auto *FD = dyn_cast<FieldDecl>(Ptr.getFieldDesc()->asDecl()))
+        Offset += getFieldOffset(FD);
+
       Ptr = Ptr.getBase();
     } else if (Ptr.isArrayElement()) {
+      unsigned Index;
       if (Ptr.isOnePastEnd())
-        Path.push_back(APValue::LValuePathEntry::ArrayIndex(Ptr.getArray().getNumElems()));
+        Index = Ptr.getArray().getNumElems();
       else
-        Path.push_back(APValue::LValuePathEntry::ArrayIndex(Ptr.getIndex()));
+        Index = Ptr.getIndex();
+
+      Offset += (Index * ASTCtx.getTypeSizeInChars(Ptr.getType()));
+      Path.push_back(APValue::LValuePathEntry::ArrayIndex(Index));
       Ptr = Ptr.getArray();
     } else {
       // TODO: figure out if base is virtual
@@ -170,12 +184,21 @@ APValue Pointer::toAPValue() const {
       if (const auto *BaseOrMember = Desc->asDecl()) {
         Path.push_back(APValue::LValuePathEntry({BaseOrMember, IsVirtual}));
         Ptr = Ptr.getBase();
+
+        if (const auto *FD = dyn_cast<FieldDecl>(BaseOrMember))
+          Offset += getFieldOffset(FD);
+
         continue;
       }
       llvm_unreachable("Invalid field type");
     }
   }
 
+  // FIXME(perf): We compute the lvalue path above, but we can't supply it
+  // for dummy pointers (that causes crashes later in CheckConstantExpression).
+  if (isDummy())
+    Path.clear();
+
   // We assemble the LValuePath starting from the innermost pointer to the
   // outermost one. SO in a.b.c, the first element in Path will refer to
   // the field 'c', while later code expects it to refer to 'a'.
@@ -220,13 +243,19 @@ std::string Pointer::toDiagnosticString(const ASTContext &Ctx) const {
   if (isIntegralPointer())
     return (Twine("&(") + Twine(asIntPointer().Value + Offset) + ")").str();
 
-  return toAPValue().getAsString(Ctx, getType());
+  return toAPValue(Ctx).getAsString(Ctx, getType());
 }
 
 bool Pointer::isInitialized() const {
   if (isIntegralPointer())
     return true;
 
+  if (isRoot() && PointeeStorage.BS.Base == sizeof(GlobalInlineDescriptor)) {
+    const GlobalInlineDescriptor &GD =
+        *reinterpret_cast<const GlobalInlineDescriptor *>(block()->rawData());
+    return GD.InitState == GlobalInitState::Initialized;
+  }
+
   assert(PointeeStorage.BS.Pointee &&
          "Cannot check if null pointer was initialized");
   const Descriptor *Desc = getFieldDesc();
@@ -249,12 +278,6 @@ bool Pointer::isInitialized() const {
   if (asBlockPointer().Base == 0)
     return true;
 
-  if (isRoot() && PointeeStorage.BS.Base == sizeof(GlobalInlineDescriptor)) {
-    const GlobalInlineDescriptor &GD =
-        *reinterpret_cast<const GlobalInlineDescriptor *>(block()->rawData());
-    return GD.InitState == GlobalInitState::Initialized;
-  }
-
   // Field has its bit in an inline descriptor.
   return getInlineDesc()->IsInitialized;
 }
@@ -266,6 +289,13 @@ void Pointer::initialize() const {
   assert(PointeeStorage.BS.Pointee && "Cannot initialize null pointer");
   const Descriptor *Desc = getFieldDesc();
 
+  if (isRoot() && PointeeStorage.BS.Base == sizeof(GlobalInlineDescriptor)) {
+    GlobalInlineDescriptor &GD = *reinterpret_cast<GlobalInlineDescriptor *>(
+        asBlockPointer().Pointee->rawData());
+    GD.InitState = GlobalInitState::Initialized;
+    return;
+  }
+
   assert(Desc);
   if (Desc->isPrimitiveArray()) {
     // Primitive global arrays don't have an initmap.
@@ -294,13 +324,6 @@ void Pointer::initialize() const {
     return;
   }
 
-  if (isRoot() && PointeeStorage.BS.Base == sizeof(GlobalInlineDescriptor)) {
-    GlobalInlineDescriptor &GD = *reinterpret_cast<GlobalInlineDescriptor *>(
-        asBlockPointer().Pointee->rawData());
-    GD.InitState = GlobalInitState::Initialized;
-    return;
-  }
-
   // Field has its bit in an inline descriptor.
   assert(PointeeStorage.BS.Base != 0 &&
          "Only composite fields can be initialised");
@@ -344,10 +367,12 @@ bool Pointer::hasSameArray(const Pointer &A, const Pointer &B) {
 
 std::optional<APValue> Pointer::toRValue(const Context &Ctx,
                                          QualType ResultType) const {
+  const ASTContext &ASTCtx = Ctx.getASTContext();
   assert(!ResultType.isNull());
   // Method to recursively traverse composites.
   std::function<bool(QualType, const Pointer &, APValue &)> Composite;
-  Composite = [&Composite, &Ctx](QualType Ty, const Pointer &Ptr, APValue &R) {
+  Composite = [&Composite, &Ctx, &ASTCtx](QualType Ty, const Pointer &Ptr,
+                                          APValue &R) {
     if (const auto *AT = Ty->getAs<AtomicType>())
       Ty = AT->getValueType();
 
@@ -358,7 +383,7 @@ std::optional<APValue> Pointer::toRValue(const Context &Ctx,
 
     // Primitive values.
     if (std::optional<PrimType> T = Ctx.classify(Ty)) {
-      TYPE_SWITCH(*T, R = Ptr.deref<T>().toAPValue());
+      TYPE_SWITCH(*T, R = Ptr.deref<T>().toAPValue(ASTCtx));
       return true;
     }
 
@@ -375,7 +400,7 @@ std::optional<APValue> Pointer::toRValue(const Context &Ctx,
           QualType FieldTy = F.Decl->getType();
           if (FP.isActive()) {
             if (std::optional<PrimType> T = Ctx.classify(FieldTy)) {
-              TYPE_SWITCH(*T, Value = FP.deref<T>().toAPValue());
+              TYPE_SWITCH(*T, Value = FP.deref<T>().toAPValue(ASTCtx));
             } else {
               Ok &= Composite(FieldTy, FP, Value);
             }
@@ -398,7 +423,7 @@ std::optional<APValue> Pointer::toRValue(const Context &Ctx,
           APValue &Value = R.getStructField(I);
 
           if (std::optional<PrimType> T = Ctx.classify(FieldTy)) {
-            TYPE_SWITCH(*T, Value = FP.deref<T>().toAPValue());
+            TYPE_SWITCH(*T, Value = FP.deref<T>().toAPValue(ASTCtx));
           } else {
             Ok &= Composite(FieldTy, FP, Value);
           }
@@ -436,7 +461,7 @@ std::optional<APValue> Pointer::toRValue(const Context &Ctx,
         APValue &Slot = R.getArrayInitializedElt(I);
         const Pointer &EP = Ptr.atIndex(I);
         if (std::optional<PrimType> T = Ctx.classify(ElemTy)) {
-          TYPE_SWITCH(*T, Slot = EP.deref<T>().toAPValue());
+          TYPE_SWITCH(*T, Slot = EP.deref<T>().toAPValue(ASTCtx));
         } else {
           Ok &= Composite(ElemTy, EP.narrow(), Slot);
         }
@@ -475,7 +500,7 @@ std::optional<APValue> Pointer::toRValue(const Context &Ctx,
       Values.reserve(VT->getNumElements());
       for (unsigned I = 0; I != VT->getNumElements(); ++I) {
         TYPE_SWITCH(ElemT, {
-          Values.push_back(Ptr.atIndex(I).deref<T>().toAPValue());
+          Values.push_back(Ptr.atIndex(I).deref<T>().toAPValue(ASTCtx));
         });
       }
 
@@ -493,11 +518,11 @@ std::optional<APValue> Pointer::toRValue(const Context &Ctx,
 
   // We can return these as rvalues, but we can't deref() them.
   if (isZero() || isIntegralPointer())
-    return toAPValue();
+    return toAPValue(ASTCtx);
 
   // Just load primitive types.
   if (std::optional<PrimType> T = Ctx.classify(ResultType)) {
-    TYPE_SWITCH(*T, return this->deref<T>().toAPValue());
+    TYPE_SWITCH(*T, return this->deref<T>().toAPValue(ASTCtx));
   }
 
   // Return the composite type.
diff --git a/clang/lib/AST/Interp/Pointer.h b/clang/lib/AST/Interp/Pointer.h
index 972f55a553f6e..7fa6a3230a4f9 100644
--- a/clang/lib/AST/Interp/Pointer.h
+++ b/clang/lib/AST/Interp/Pointer.h
@@ -118,7 +118,7 @@ class Pointer {
   bool operator!=(const Pointer &P) const { return !(P == *this); }
 
   /// Converts the pointer to an APValue.
-  APValue toAPValue() const;
+  APValue toAPValue(const ASTContext &ASTCtx) const;
 
   /// Converts the pointer to a string usable in diagnostics.
   std::string toDiagnosticString(const ASTContext &Ctx) const;
diff --git a/clang/lib/Basic/IdentifierTable.cpp b/clang/lib/Basic/IdentifierTable.cpp
index 4f7ccaf4021d6..97d830214f890 100644
--- a/clang/lib/Basic/IdentifierTable.cpp
+++ b/clang/lib/Basic/IdentifierTable.cpp
@@ -322,8 +322,9 @@ void IdentifierTable::AddKeywords(const LangOptions &LangOpts) {
   if (LangOpts.IEEE128)
     AddKeyword("__ieee128", tok::kw___float128, KEYALL, LangOpts, *this);
 
-  // Add the 'import' contextual keyword.
+  // Add the 'import' and 'module' contextual keyword.
   get("import").setModulesImport(true);
+  get("module").setModulesDeclaration(true);
 }
 
 /// Checks if the specified token kind represents a keyword in the
diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp
index f7b987bf810c1..71cdaa10416f4 100644
--- a/clang/lib/Driver/ToolChains/Clang.cpp
+++ b/clang/lib/Driver/ToolChains/Clang.cpp
@@ -1077,33 +1077,6 @@ void Clang::AddPreprocessingOptions(Compilation &C, const JobAction &JA,
   if (JA.isOffloading(Action::OFK_HIP))
     getToolChain().AddHIPIncludeArgs(Args, CmdArgs);
 
-  // If we are compiling for a GPU target we want to override the system headers
-  // with ones created by the 'libc' project if present.
-  if (!Args.hasArg(options::OPT_nostdinc) &&
-      !Args.hasArg(options::OPT_nogpuinc) &&
-      !Args.hasArg(options::OPT_nobuiltininc)) {
-    // Without an offloading language we will include these headers directly.
-    // Offloading languages will instead only use the declarations stored in
-    // the resource directory at clang/lib/Headers/llvm_libc_wrappers.
-    if ((getToolChain().getTriple().isNVPTX() ||
-         getToolChain().getTriple().isAMDGCN()) &&
-        C.getActiveOffloadKinds() == Action::OFK_None) {
-      SmallString<128> P(llvm::sys::path::parent_path(D.Dir));
-      llvm::sys::path::append(P, "include");
-      llvm::sys::path::append(P, getToolChain().getTripleString());
-      CmdArgs.push_back("-internal-isystem");
-      CmdArgs.push_back(Args.MakeArgString(P));
-    } else if (C.getActiveOffloadKinds() == Action::OFK_OpenMP) {
-      // TODO: CUDA / HIP include their own headers for some common functions
-      // implemented here. We'll need to clean those up so they do not conflict.
-      SmallString<128> P(D.ResourceDir);
-      llvm::sys::path::append(P, "include");
-      llvm::sys::path::append(P, "llvm_libc_wrappers");
-      CmdArgs.push_back("-internal-isystem");
-      CmdArgs.push_back(Args.MakeArgString(P));
-    }
-  }
-
   // If we are offloading to a target via OpenMP we need to include the
   // openmp_wrappers folder which contains alternative system headers.
   if (JA.isDeviceOffloading(Action::OFK_OpenMP) &&
@@ -1276,6 +1249,35 @@ void Clang::AddPreprocessingOptions(Compilation &C, const JobAction &JA,
         });
   }
 
+  // If we are compiling for a GPU target we want to override the system headers
+  // with ones created by the 'libc' project if present.
+  // TODO: This should be moved to `AddClangSystemIncludeArgs` by passing the
+  //       OffloadKind as an argument.
+  if (!Args.hasArg(options::OPT_nostdinc) &&
+      !Args.hasArg(options::OPT_nogpuinc) &&
+      !Args.hasArg(options::OPT_nobuiltininc)) {
+    // Without an offloading language we will include these headers directly.
+    // Offloading languages will instead only use the declarations stored in
+    // the resource directory at clang/lib/Headers/llvm_libc_wrappers.
+    if ((getToolChain().getTriple().isNVPTX() ||
+         getToolChain().getTriple().isAMDGCN()) &&
+        C.getActiveOffloadKinds() == Action::OFK_None) {
+      SmallString<128> P(llvm::sys::path::parent_path(D.Dir));
+      llvm::sys::path::append(P, "include");
+      llvm::sys::path::append(P, getToolChain().getTripleString());
+      CmdArgs.push_back("-internal-isystem");
+      CmdArgs.push_back(Args.MakeArgString(P));
+    } else if (C.getActiveOffloadKinds() == Action::OFK_OpenMP) {
+      // TODO: CUDA / HIP include their own headers for some common functions
+      // implemented here. We'll need to clean those up so they do not conflict.
+      SmallString<128> P(D.ResourceDir);
+      llvm::sys::path::append(P, "include");
+      llvm::sys::path::append(P, "llvm_libc_wrappers");
+      CmdArgs.push_back("-internal-isystem");
+      CmdArgs.push_back(Args.MakeArgString(P));
+    }
+  }
+
   // Add system include arguments for all targets but IAMCU.
   if (!IsIAMCU)
     forAllAssociatedToolChains(C, JA, getToolChain(),
@@ -6757,7 +6759,6 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
   if (const char *Name = C.getTimeTraceFile(&JA)) {
     CmdArgs.push_back(Args.MakeArgString("-ftime-trace=" + Twine(Name)));
     Args.AddLastArg(CmdArgs, options::OPT_ftime_trace_granularity_EQ);
-    Args.AddLastArg(CmdArgs, options::OPT_ftime_trace_verbose);
   }
 
   if (Arg *A = Args.getLastArg(options::OPT_ftrapv_handler_EQ)) {
diff --git a/clang/lib/Driver/ToolChains/Gnu.cpp b/clang/lib/Driver/ToolChains/Gnu.cpp
index 52c2ee90b1b28..ff20deb9c4f86 100644
--- a/clang/lib/Driver/ToolChains/Gnu.cpp
+++ b/clang/lib/Driver/ToolChains/Gnu.cpp
@@ -672,12 +672,41 @@ void tools::gnutools::Linker::ConstructJob(Compilation &C, const JobAction &JA,
     }
   }
 
+  // Facebook T92898286
+  if (Args.hasArg(options::OPT_post_link_optimize))
+    CmdArgs.push_back("-q");
+  // End Facebook T92898286
+
   Args.AddAllArgs(CmdArgs, options::OPT_T);
 
   const char *Exec = Args.MakeArgString(ToolChain.GetLinkerPath());
   C.addCommand(std::make_unique<Command>(JA, *this,
                                          ResponseFileSupport::AtFileCurCP(),
                                          Exec, CmdArgs, Inputs, Output));
+  // Facebook T92898286
+  if (!Args.hasArg(options::OPT_post_link_optimize) || !Output.isFilename())
+    return;
+
+  const char *MvExec = Args.MakeArgString(ToolChain.GetProgramPath("mv"));
+  ArgStringList MoveCmdArgs;
+  MoveCmdArgs.push_back(Output.getFilename());
+  const char *PreBoltBin =
+      Args.MakeArgString(Twine(Output.getFilename()) + ".pre-bolt");
+  MoveCmdArgs.push_back(PreBoltBin);
+  C.addCommand(std::make_unique<Command>(JA, *this, ResponseFileSupport::None(),
+                                         MvExec, MoveCmdArgs, std::nullopt));
+
+  ArgStringList BoltCmdArgs;
+  const char *BoltExec =
+      Args.MakeArgString(ToolChain.GetProgramPath("llvm-bolt"));
+  BoltCmdArgs.push_back(PreBoltBin);
+  BoltCmdArgs.push_back("-reorder-blocks=reverse");
+  BoltCmdArgs.push_back("-update-debug-sections");
+  BoltCmdArgs.push_back("-o");
+  BoltCmdArgs.push_back(Output.getFilename());
+  C.addCommand(std::make_unique<Command>(JA, *this, ResponseFileSupport::None(),
+                                         BoltExec, BoltCmdArgs, std::nullopt));
+  // End Facebook T92898286
 }
 
 void tools::gnutools::Assembler::ConstructJob(Compilation &C,
diff --git a/clang/lib/Format/TokenAnnotator.cpp b/clang/lib/Format/TokenAnnotator.cpp
index b6d6e52ccb8f8..db66911f00f63 100644
--- a/clang/lib/Format/TokenAnnotator.cpp
+++ b/clang/lib/Format/TokenAnnotator.cpp
@@ -372,10 +372,6 @@ class AnnotatingParser {
                OpeningParen.Previous->is(tok::kw__Generic)) {
       Contexts.back().ContextType = Context::C11GenericSelection;
       Contexts.back().IsExpression = true;
-    } else if (Line.InPPDirective &&
-               (!OpeningParen.Previous ||
-                OpeningParen.Previous->isNot(tok::identifier))) {
-      Contexts.back().IsExpression = true;
     } else if (Contexts[Contexts.size() - 2].CaretFound) {
       // This is the parameter list of an ObjC block.
       Contexts.back().IsExpression = false;
@@ -388,7 +384,20 @@ class AnnotatingParser {
                OpeningParen.Previous->MatchingParen->isOneOf(
                    TT_ObjCBlockLParen, TT_FunctionTypeLParen)) {
       Contexts.back().IsExpression = false;
-    } else if (!Line.MustBeDeclaration && !Line.InPPDirective) {
+    } else if (Line.InPPDirective) {
+      auto IsExpr = [&OpeningParen] {
+        const auto *Tok = OpeningParen.Previous;
+        if (!Tok || Tok->isNot(tok::identifier))
+          return true;
+        Tok = Tok->Previous;
+        while (Tok && Tok->endsSequence(tok::coloncolon, tok::identifier)) {
+          assert(Tok->Previous);
+          Tok = Tok->Previous->Previous;
+        }
+        return !Tok || !Tok->Tok.getIdentifierInfo();
+      };
+      Contexts.back().IsExpression = IsExpr();
+    } else if (!Line.MustBeDeclaration) {
       bool IsForOrCatch =
           OpeningParen.Previous &&
           OpeningParen.Previous->isOneOf(tok::kw_for, tok::kw_catch);
diff --git a/clang/lib/Frontend/PrintPreprocessedOutput.cpp b/clang/lib/Frontend/PrintPreprocessedOutput.cpp
index 0592423c12eca..1fff88ccf0405 100644
--- a/clang/lib/Frontend/PrintPreprocessedOutput.cpp
+++ b/clang/lib/Frontend/PrintPreprocessedOutput.cpp
@@ -758,9 +758,10 @@ void PrintPPOutputPPCallbacks::HandleWhitespaceBeforeTok(const Token &Tok,
   // These tokens are not expanded to anything and don't need whitespace before
   // them.
   if (Tok.is(tok::eof) ||
-      (Tok.isAnnotation() && !Tok.is(tok::annot_header_unit) &&
-       !Tok.is(tok::annot_module_begin) && !Tok.is(tok::annot_module_end) &&
-       !Tok.is(tok::annot_repl_input_end) && !Tok.is(tok::annot_embed)))
+      (Tok.isAnnotation() && Tok.isNot(tok::annot_header_unit) &&
+       Tok.isNot(tok::annot_module_begin) && Tok.isNot(tok::annot_module_end) &&
+       Tok.isNot(tok::annot_module_name) &&
+       Tok.isNot(tok::annot_repl_input_end) && Tok.isNot(tok::annot_embed)))
     return;
 
   // EmittedDirectiveOnThisLine takes priority over RequireSameLine.
@@ -951,6 +952,11 @@ static void PrintPreprocessedTokens(Preprocessor &PP, Token &Tok,
       PP.Lex(Tok);
       IsStartOfLine = true;
       continue;
+    } else if (Tok.is(tok::annot_module_name)) {
+      auto *Info = static_cast<ModuleNameInfo *>(Tok.getAnnotationValue());
+      *Callbacks->OS << Info->getFlatName();
+      PP.Lex(Tok);
+      continue;
     } else if (Tok.is(tok::annot_header_unit)) {
       // This is a header-name that has been (effectively) converted into a
       // module-name.
diff --git a/clang/lib/Headers/prfchwintrin.h b/clang/lib/Headers/prfchwintrin.h
index 8a13784543c5f..eaea5f3cf8feb 100644
--- a/clang/lib/Headers/prfchwintrin.h
+++ b/clang/lib/Headers/prfchwintrin.h
@@ -8,7 +8,7 @@
  */
 
 #if !defined(__X86INTRIN_H) && !defined(_MM3DNOW_H_INCLUDED)
-#error "Never use <prfchwintrin.h> directly; include <x86intrin.h> or <mm3dnow.h> instead."
+#error "Never use <prfchwintrin.h> directly; include <x86intrin.h> instead."
 #endif
 
 #ifndef __PRFCHWINTRIN_H
diff --git a/clang/lib/Lex/PPLexerChange.cpp b/clang/lib/Lex/PPLexerChange.cpp
index 8221db46e06ac..c3a903917e9ce 100644
--- a/clang/lib/Lex/PPLexerChange.cpp
+++ b/clang/lib/Lex/PPLexerChange.cpp
@@ -122,7 +122,8 @@ void Preprocessor::EnterSourceFileWithLexer(Lexer *TheLexer,
   CurPPLexer = TheLexer;
   CurDirLookup = CurDir;
   CurLexerSubmodule = nullptr;
-  if (CurLexerCallback != CLK_LexAfterModuleImport)
+  if (CurLexerCallback != CLK_LexAfterModuleImport &&
+      CurLexerCallback != CLK_LexAfterModuleDecl)
     CurLexerCallback = TheLexer->isDependencyDirectivesLexer()
                            ? CLK_DependencyDirectivesLexer
                            : CLK_Lexer;
@@ -161,8 +162,7 @@ void Preprocessor::EnterMacro(Token &Tok, SourceLocation ILEnd,
   PushIncludeMacroStack();
   CurDirLookup = nullptr;
   CurTokenLexer = std::move(TokLexer);
-  if (CurLexerCallback != CLK_LexAfterModuleImport)
-    CurLexerCallback = CLK_TokenLexer;
+  CurLexerCallback = CLK_TokenLexer;
 }
 
 /// EnterTokenStream - Add a "macro" context to the top of the include stack,
@@ -216,7 +216,8 @@ void Preprocessor::EnterTokenStream(const Token *Toks, unsigned NumToks,
   PushIncludeMacroStack();
   CurDirLookup = nullptr;
   CurTokenLexer = std::move(TokLexer);
-  if (CurLexerCallback != CLK_LexAfterModuleImport)
+  if (CurLexerCallback != CLK_LexAfterModuleImport &&
+      CurLexerCallback != CLK_LexAfterModuleDecl)
     CurLexerCallback = CLK_TokenLexer;
 }
 
diff --git a/clang/lib/Lex/Preprocessor.cpp b/clang/lib/Lex/Preprocessor.cpp
index 63e27e62cffc8..2726fae344337 100644
--- a/clang/lib/Lex/Preprocessor.cpp
+++ b/clang/lib/Lex/Preprocessor.cpp
@@ -860,9 +860,15 @@ bool Preprocessor::HandleIdentifier(Token &Identifier) {
     ModuleImportLoc = Identifier.getLocation();
     NamedModuleImportPath.clear();
     IsAtImport = true;
-    ModuleImportExpectsIdentifier = true;
     CurLexerCallback = CLK_LexAfterModuleImport;
   }
+
+  if ((II.isModulesDeclaration() || Identifier.is(tok::kw_module)) &&
+      !InMacroArgs && !DisableMacroExpansion &&
+      (getLangOpts().CPlusPlusModules || getLangOpts().DebuggerSupport) &&
+      CurLexerCallback != CLK_CachingLexer) {
+    CurLexerCallback = CLK_LexAfterModuleDecl;
+  }
   return true;
 }
 
@@ -905,6 +911,7 @@ void Preprocessor::Lex(Token &Result) {
     // This token is injected to represent the translation of '#include "a.h"'
     // into "import a.h;". Mimic the notional ';'.
     case tok::annot_module_include:
+    case tok::annot_repl_input_end:
     case tok::semi:
       TrackGMFState.handleSemi();
       StdCXXImportSeqState.handleSemi();
@@ -919,12 +926,30 @@ void Preprocessor::Lex(Token &Result) {
       StdCXXImportSeqState.handleExport();
       ModuleDeclState.handleExport();
       break;
-    case tok::colon:
-      ModuleDeclState.handleColon();
-      break;
-    case tok::period:
-      ModuleDeclState.handlePeriod();
+    case tok::annot_module_name: {
+      auto *Info = static_cast<ModuleNameInfo *>(Result.getAnnotationValue());
+      for (const auto &Tok : Info->getTokens()) {
+        switch (Tok.getKind()) {
+        case tok::identifier:
+          ModuleDeclState.handleIdentifier(Tok.getIdentifierInfo());
+          break;
+        case tok::period:
+          ModuleDeclState.handlePeriod();
+          break;
+        case tok::colon:
+          ModuleDeclState.handleColon();
+          break;
+        default:
+          llvm_unreachable("Unexpected token in module name");
+        }
+      }
+      if (ModuleDeclState.isModuleCandidate())
+        break;
+      TrackGMFState.handleMisc();
+      StdCXXImportSeqState.handleMisc();
+      ModuleDeclState.handleMisc();
       break;
+    }
     case tok::identifier:
       // Check "import" and "module" when there is no open bracket. The two
       // identifiers are not meaningful with open brackets.
@@ -936,17 +961,17 @@ void Preprocessor::Lex(Token &Result) {
             ModuleImportLoc = Result.getLocation();
             NamedModuleImportPath.clear();
             IsAtImport = false;
-            ModuleImportExpectsIdentifier = true;
             CurLexerCallback = CLK_LexAfterModuleImport;
           }
           break;
-        } else if (Result.getIdentifierInfo() == getIdentifierInfo("module")) {
+        }
+        if (Result.getIdentifierInfo()->isModulesDeclaration()) {
           TrackGMFState.handleModule(StdCXXImportSeqState.afterTopLevelSeq());
           ModuleDeclState.handleModule();
+          CurLexerCallback = CLK_LexAfterModuleDecl;
           break;
         }
       }
-      ModuleDeclState.handleIdentifier(Result.getIdentifierInfo());
       if (ModuleDeclState.isModuleCandidate())
         break;
       [[fallthrough]];
@@ -1121,6 +1146,151 @@ void Preprocessor::CollectPpImportSuffix(SmallVectorImpl<Token> &Toks) {
   }
 }
 
+ModuleNameInfo::ModuleNameInfo(ArrayRef<Token> AnnotToks,
+                               std::optional<unsigned> ColonIndex) {
+  assert(!AnnotToks.empty() && "Named module token cannot be empty.");
+  if (!ColonIndex.has_value())
+    ColonIndex = AnnotToks.size();
+  ModuleName = ArrayRef(AnnotToks.begin(), AnnotToks.begin() + *ColonIndex);
+  PartitionName = ArrayRef(AnnotToks.begin() + *ColonIndex, AnnotToks.end());
+  assert(ModuleName.end() == PartitionName.begin());
+}
+
+std::string ModuleNameInfo::getFlatName() const {
+  std::string FlatModuleName;
+  for (auto &Tok : getTokens()) {
+    switch (Tok.getKind()) {
+    case tok::identifier:
+      FlatModuleName += Tok.getIdentifierInfo()->getName();
+      break;
+    case tok::period:
+      FlatModuleName += '.';
+      break;
+    case tok::colon:
+      FlatModuleName += ':';
+      break;
+    default:
+      llvm_unreachable("Unexpected token in module name");
+    }
+  }
+  return FlatModuleName;
+}
+
+void ModuleNameInfo::getModuleIdPath(
+    SmallVectorImpl<std::pair<IdentifierInfo *, SourceLocation>> &Path) const {
+  return getModuleIdPath(getTokens(), Path);
+}
+
+void ModuleNameInfo::getModuleIdPath(
+    ArrayRef<Token> ModuleName,
+    SmallVectorImpl<std::pair<IdentifierInfo *, SourceLocation>> &Path) {
+  for (const auto &Tok : ModuleName) {
+    if (Tok.is(tok::identifier))
+      Path.push_back(
+          std::make_pair(Tok.getIdentifierInfo(), Tok.getLocation()));
+  }
+}
+
+/// Lex a module name or a partition name.
+///
+///     module-name:
+///           module-name-qualifier[opt] identifier
+///
+///     partition-name: [C++20]
+///           : module-name-qualifier[opt] identifier
+///
+///     module-name-qualifier
+///           module-name-qualifier[opt] identifier .
+bool Preprocessor::LexModuleName(Token &Result, bool IsImport) {
+  bool ExpectsIdentifier = true, IsLexingPartition = false;
+  SmallVector<Token, 8> ModuleName;
+  std::optional<unsigned> ColonTokIndex;
+  auto LexNextToken = [&](Token &Tok) {
+    if (IsImport)
+      Lex(Tok);
+    else
+      LexUnexpandedToken(Tok);
+  };
+
+  while (true) {
+    LexNextToken(Result);
+    if (ExpectsIdentifier && Result.is(tok::identifier)) {
+      auto *MI = getMacroInfo(Result.getIdentifierInfo());
+      if (getLangOpts().CPlusPlusModules && !IsImport && MI &&
+          MI->isObjectLike()) {
+        Diag(Result, diag::err_module_decl_cannot_be_macros)
+            << Result.getLocation() << IsLexingPartition
+            << Result.getIdentifierInfo();
+      }
+      ModuleName.push_back(Result);
+      ExpectsIdentifier = false;
+      continue;
+    }
+
+    if (!ExpectsIdentifier && Result.is(tok::period)) {
+      ModuleName.push_back(Result);
+      ExpectsIdentifier = true;
+      continue;
+    }
+
+    // Module partition only allowed in C++20 Modules.
+    if (getLangOpts().CPlusPlusModules && Result.is(tok::colon)) {
+      // Handle the form like: import :P;
+      // If the token after ':' is not an identifier, this is a invalid module
+      // name.
+      if (ModuleName.empty()) {
+        Token Tmp;
+        LexNextToken(Tmp);
+        EnterToken(Tmp, /*IsReiject=*/false);
+        // A private-module-fragment:
+        // export module :private;
+        if (!IsImport && Tmp.is(tok::kw_private))
+          return true;
+        // import :N;
+        if (IsImport && Tmp.isNot(tok::identifier))
+          return false;
+      } else if (!ExpectsIdentifier) {
+        ExpectsIdentifier = true;
+      }
+      IsLexingPartition = true;
+      ColonTokIndex = ModuleName.size();
+      ModuleName.push_back(Result);
+      continue;
+    }
+
+    // [cpp.module]/p2: where the pp-tokens (if any) shall not begin with a (
+    // preprocessing token [...]
+    //
+    // We only emit diagnostic in the preprocessor, and in the parser we skip
+    // invalid tokens and recover from errors.
+    if (getLangOpts().CPlusPlusModules && !ExpectsIdentifier &&
+        Result.is(tok::l_paren))
+      Diag(Result, diag::err_unxepected_paren_in_module_decl)
+          << IsLexingPartition;
+    break;
+  }
+
+  // Put the last token back to stream, it's not a valid part of module name.
+  // We lexed it unexpanded but it might be a valid macro expansion
+  Result.clearFlag(Token::DisableExpand);
+  auto ToksCopy = std::make_unique<Token[]>(1);
+  *ToksCopy.get() = Result;
+  EnterTokenStream(std::move(ToksCopy), 1,
+                   /*DisableMacroExpansion=*/false,
+                   /*IsReinject=*/false);
+
+  if (ModuleName.empty())
+    return false;
+  Result.startToken();
+  Result.setKind(tok::annot_module_name);
+  Result.setLocation(ModuleName.front().getLocation());
+  Result.setAnnotationEndLoc(ModuleName.back().getLocation());
+  auto AnnotToks = ArrayRef(ModuleName).copy(getPreprocessorAllocator());
+  ModuleNameInfo *Info =
+      new (getPreprocessorAllocator()) ModuleNameInfo(AnnotToks, ColonTokIndex);
+  Result.setAnnotationValue(static_cast<void *>(Info));
+  return true;
+}
 
 /// Lex a token following the 'import' contextual keyword.
 ///
@@ -1145,6 +1315,17 @@ bool Preprocessor::LexAfterModuleImport(Token &Result) {
   // Figure out what kind of lexer we actually have.
   recomputeCurLexerKind();
 
+  // Allocate a holding buffer for a sequence of tokens and introduce it into
+  // the token stream.
+  auto EnterTokens = [this](ArrayRef<Token> Toks) {
+    auto ToksCopy = std::make_unique<Token[]>(Toks.size());
+    std::copy(Toks.begin(), Toks.end(), ToksCopy.get());
+    EnterTokenStream(std::move(ToksCopy), Toks.size(),
+                     /*DisableMacroExpansion*/ true, /*IsReinject*/ false);
+  };
+
+  SmallVector<Token, 32> Suffix;
+
   // Lex the next token. The header-name lexing rules are used at the start of
   // a pp-import.
   //
@@ -1155,122 +1336,108 @@ bool Preprocessor::LexAfterModuleImport(Token &Result) {
     if (LexHeaderName(Result))
       return true;
 
-    if (Result.is(tok::colon) && ModuleDeclState.isNamedModule()) {
-      std::string Name = ModuleDeclState.getPrimaryName().str();
-      Name += ":";
-      NamedModuleImportPath.push_back(
-          {getIdentifierInfo(Name), Result.getLocation()});
-      CurLexerCallback = CLK_LexAfterModuleImport;
-      return true;
-    }
-  } else {
-    Lex(Result);
-  }
+    // Check for a header-name.
+    if (Result.is(tok::header_name)) {
+      // Enter the header-name token into the token stream; a Lex action cannot
+      // both return a token and cache tokens (doing so would corrupt the token
+      // cache if the call to Lex comes from CachingLex / PeekAhead).
+      Suffix.push_back(Result);
+
+      // Consume the pp-import-suffix and expand any macros in it now. We'll add
+      // it back into the token stream later.
+      CollectPpImportSuffix(Suffix);
+      if (Suffix.back().isNot(tok::semi)) {
+        // This is not a pp-import after all.
+        EnterTokens(Suffix);
+        return false;
+      }
 
-  // Allocate a holding buffer for a sequence of tokens and introduce it into
-  // the token stream.
-  auto EnterTokens = [this](ArrayRef<Token> Toks) {
-    auto ToksCopy = std::make_unique<Token[]>(Toks.size());
-    std::copy(Toks.begin(), Toks.end(), ToksCopy.get());
-    EnterTokenStream(std::move(ToksCopy), Toks.size(),
-                     /*DisableMacroExpansion*/ true, /*IsReinject*/ false);
-  };
+      // C++2a [cpp.module]p1:
+      //   The ';' preprocessing-token terminating a pp-import shall not have
+      //   been produced by macro replacement.
+      SourceLocation SemiLoc = Suffix.back().getLocation();
+      if (SemiLoc.isMacroID())
+        Diag(SemiLoc, diag::err_header_import_semi_in_macro);
+
+      // Reconstitute the import token.
+      Token ImportTok;
+      ImportTok.startToken();
+      ImportTok.setKind(tok::kw_import);
+      ImportTok.setLocation(ModuleImportLoc);
+      ImportTok.setIdentifierInfo(getIdentifierInfo("import"));
+      ImportTok.setLength(6);
+
+      auto Action = HandleHeaderIncludeOrImport(
+          /*HashLoc*/ SourceLocation(), ImportTok, Suffix.front(), SemiLoc);
+      switch (Action.Kind) {
+      case ImportAction::None:
+        break;
 
-  bool ImportingHeader = Result.is(tok::header_name);
-  // Check for a header-name.
-  SmallVector<Token, 32> Suffix;
-  if (ImportingHeader) {
-    // Enter the header-name token into the token stream; a Lex action cannot
-    // both return a token and cache tokens (doing so would corrupt the token
-    // cache if the call to Lex comes from CachingLex / PeekAhead).
-    Suffix.push_back(Result);
+      case ImportAction::ModuleBegin:
+        // Let the parser know we're textually entering the module.
+        Suffix.emplace_back();
+        Suffix.back().startToken();
+        Suffix.back().setKind(tok::annot_module_begin);
+        Suffix.back().setLocation(SemiLoc);
+        Suffix.back().setAnnotationEndLoc(SemiLoc);
+        Suffix.back().setAnnotationValue(Action.ModuleForHeader);
+        [[fallthrough]];
+
+      case ImportAction::ModuleImport:
+      case ImportAction::HeaderUnitImport:
+      case ImportAction::SkippedModuleImport:
+        // We chose to import (or textually enter) the file. Convert the
+        // header-name token into a header unit annotation token.
+        Suffix[0].setKind(tok::annot_header_unit);
+        Suffix[0].setAnnotationEndLoc(Suffix[0].getLocation());
+        Suffix[0].setAnnotationValue(Action.ModuleForHeader);
+        // FIXME: Call the moduleImport callback?
+        break;
+      case ImportAction::Failure:
+        assert(TheModuleLoader.HadFatalFailure &&
+               "This should be an early exit only to a fatal error");
+        Result.setKind(tok::eof);
+        CurLexer->cutOffLexing();
+        EnterTokens(Suffix);
+        return true;
+      }
 
-    // Consume the pp-import-suffix and expand any macros in it now. We'll add
-    // it back into the token stream later.
-    CollectPpImportSuffix(Suffix);
-    if (Suffix.back().isNot(tok::semi)) {
-      // This is not a pp-import after all.
       EnterTokens(Suffix);
       return false;
     }
+  } else {
+    Lex(Result);
+  }
 
-    // C++2a [cpp.module]p1:
-    //   The ';' preprocessing-token terminating a pp-import shall not have
-    //   been produced by macro replacement.
-    SourceLocation SemiLoc = Suffix.back().getLocation();
-    if (SemiLoc.isMacroID())
-      Diag(SemiLoc, diag::err_header_import_semi_in_macro);
-
-    // Reconstitute the import token.
-    Token ImportTok;
-    ImportTok.startToken();
-    ImportTok.setKind(tok::kw_import);
-    ImportTok.setLocation(ModuleImportLoc);
-    ImportTok.setIdentifierInfo(getIdentifierInfo("import"));
-    ImportTok.setLength(6);
-
-    auto Action = HandleHeaderIncludeOrImport(
-        /*HashLoc*/ SourceLocation(), ImportTok, Suffix.front(), SemiLoc);
-    switch (Action.Kind) {
-    case ImportAction::None:
-      break;
-
-    case ImportAction::ModuleBegin:
-      // Let the parser know we're textually entering the module.
-      Suffix.emplace_back();
-      Suffix.back().startToken();
-      Suffix.back().setKind(tok::annot_module_begin);
-      Suffix.back().setLocation(SemiLoc);
-      Suffix.back().setAnnotationEndLoc(SemiLoc);
-      Suffix.back().setAnnotationValue(Action.ModuleForHeader);
-      [[fallthrough]];
-
-    case ImportAction::ModuleImport:
-    case ImportAction::HeaderUnitImport:
-    case ImportAction::SkippedModuleImport:
-      // We chose to import (or textually enter) the file. Convert the
-      // header-name token into a header unit annotation token.
-      Suffix[0].setKind(tok::annot_header_unit);
-      Suffix[0].setAnnotationEndLoc(Suffix[0].getLocation());
-      Suffix[0].setAnnotationValue(Action.ModuleForHeader);
-      // FIXME: Call the moduleImport callback?
-      break;
-    case ImportAction::Failure:
-      assert(TheModuleLoader.HadFatalFailure &&
-             "This should be an early exit only to a fatal error");
-      Result.setKind(tok::eof);
-      CurLexer->cutOffLexing();
-      EnterTokens(Suffix);
+  if (Result.isOneOf(tok::identifier, tok::colon)) {
+    EnterToken(Result, /*IsReinject=*/false);
+    if (!LexModuleName(Result, /*IsImport=*/true))
       return true;
+    auto *Info = Result.getAnnotationValueAs<ModuleNameInfo *>();
+    if (getLangOpts().CPlusPlusModules) {
+      // Under the standard C++ Modules, the dot is just part of the module
+      // name, and not a real hierarchy separator. Flatten such module names
+      // now.
+      //
+      // FIXME: Is this the right level to be performing this transformation?
+      std::string FlatModuleName;
+      if (Info->getTokens().front().is(tok::colon)) {
+        // Import a module partition allowed in C++20 Modules.
+        // We can import a partition in named module TU.
+        if (NamedModuleImportPath.empty() && ModuleDeclState.isNamedModule())
+          FlatModuleName = llvm::Twine(ModuleDeclState.getPrimaryName())
+                               .concat(Info->getFlatName())
+                               .str();
+        else
+          return true;
+      } else {
+        FlatModuleName = Info->getFlatName();
+      }
+      NamedModuleImportPath.emplace_back(getIdentifierInfo(FlatModuleName),
+                                         Result.getLocation());
+    } else {
+      Info->getModuleIdPath(NamedModuleImportPath);
     }
-
-    EnterTokens(Suffix);
-    return false;
-  }
-
-  // The token sequence
-  //
-  //   import identifier (. identifier)*
-  //
-  // indicates a module import directive. We already saw the 'import'
-  // contextual keyword, so now we're looking for the identifiers.
-  if (ModuleImportExpectsIdentifier && Result.getKind() == tok::identifier) {
-    // We expected to see an identifier here, and we did; continue handling
-    // identifiers.
-    NamedModuleImportPath.push_back(
-        std::make_pair(Result.getIdentifierInfo(), Result.getLocation()));
-    ModuleImportExpectsIdentifier = false;
-    CurLexerCallback = CLK_LexAfterModuleImport;
-    return true;
-  }
-
-  // If we're expecting a '.' or a ';', and we got a '.', then wait until we
-  // see the next identifier. (We can also see a '[[' that begins an
-  // attribute-specifier-seq here under the Standard C++ Modules.)
-  if (!ModuleImportExpectsIdentifier && Result.getKind() == tok::period) {
-    ModuleImportExpectsIdentifier = true;
-    CurLexerCallback = CLK_LexAfterModuleImport;
-    return true;
   }
 
   // If we didn't recognize a module name at all, this is not a (valid) import.
@@ -1291,24 +1458,6 @@ bool Preprocessor::LexAfterModuleImport(Token &Result) {
     SemiLoc = Suffix.back().getLocation();
   }
 
-  // Under the standard C++ Modules, the dot is just part of the module name,
-  // and not a real hierarchy separator. Flatten such module names now.
-  //
-  // FIXME: Is this the right level to be performing this transformation?
-  std::string FlatModuleName;
-  if (getLangOpts().CPlusPlusModules) {
-    for (auto &Piece : NamedModuleImportPath) {
-      // If the FlatModuleName ends with colon, it implies it is a partition.
-      if (!FlatModuleName.empty() && FlatModuleName.back() != ':')
-        FlatModuleName += ".";
-      FlatModuleName += Piece.first->getName();
-    }
-    SourceLocation FirstPathLoc = NamedModuleImportPath[0].second;
-    NamedModuleImportPath.clear();
-    NamedModuleImportPath.push_back(
-        std::make_pair(getIdentifierInfo(FlatModuleName), FirstPathLoc));
-  }
-
   Module *Imported = nullptr;
   // We don't/shouldn't load the standard c++20 modules when preprocessing.
   if (getLangOpts().Modules && !isInImportingCXXNamedModules()) {
@@ -1330,6 +1479,33 @@ bool Preprocessor::LexAfterModuleImport(Token &Result) {
   return true;
 }
 
+/// Lex a token following the 'module' contextual keyword.
+///
+/// [cpp.module]/p2:
+/// The pp-tokens, if any, of a pp-module shall be of the form:
+///       pp-module-name pp-module-partition[opt] pp-tokens[opt]
+///
+/// where the pp-tokens (if any) shall not begin with a ( preprocessing token
+/// and the grammar non-terminals are defined as:
+///       pp-module-name:
+///             pp-module-name-qualifierp[opt] identifier
+///       pp-module-partition:
+///             : pp-module-name-qualifier[opt] identifier
+///       pp-module-name-qualifier:
+///             identifier .
+///             pp-module-name-qualifier identifier .
+/// No identifier in the pp-module-name or pp-module-partition shall currently
+/// be defined as an object-like macro.
+///
+/// [cpp.module]/p3:
+/// Any preprocessing tokens after the module preprocessing token in the module
+/// directive are processed just as in normal text.
+bool Preprocessor::LexAfterModuleDecl(Token &Result) {
+  // Figure out what kind of lexer we actually have.
+  recomputeCurLexerKind();
+  return LexModuleName(Result, /*IsImport=*/false);
+}
+
 void Preprocessor::makeModuleVisible(Module *M, SourceLocation Loc) {
   CurSubmoduleState->VisibleModules.setVisible(
       M, Loc, [](Module *) {},
diff --git a/clang/lib/Lex/TokenConcatenation.cpp b/clang/lib/Lex/TokenConcatenation.cpp
index 865879d180533..cdb636923b9e9 100644
--- a/clang/lib/Lex/TokenConcatenation.cpp
+++ b/clang/lib/Lex/TokenConcatenation.cpp
@@ -160,6 +160,13 @@ static char GetFirstChar(const Preprocessor &PP, const Token &Tok) {
 bool TokenConcatenation::AvoidConcat(const Token &PrevPrevTok,
                                      const Token &PrevTok,
                                      const Token &Tok) const {
+  // If previous token is a module name, we need avoid concat it with current
+  // token, otherwise, there will has an extra space between 'M' and ';' for the
+  // following code:
+  //
+  // import M;
+  if (PrevTok.is(tok::annot_module_name))
+    return false;
   // Conservatively assume that every annotation token that has a printable
   // form requires whitespace.
   if (PrevTok.isAnnotation())
@@ -190,6 +197,9 @@ bool TokenConcatenation::AvoidConcat(const Token &PrevPrevTok,
       return true;
     ConcatInfo &= ~aci_avoid_equal;
   }
+
+  if (Tok.is(tok::annot_module_name))
+    return true;
   if (Tok.isAnnotation()) {
     // Modules annotation can show up when generated automatically for includes.
     assert(Tok.isOneOf(tok::annot_module_include, tok::annot_module_begin,
diff --git a/clang/lib/Parse/ParseDecl.cpp b/clang/lib/Parse/ParseDecl.cpp
index 7ce9a9cea1c7a..577527d0318f2 100644
--- a/clang/lib/Parse/ParseDecl.cpp
+++ b/clang/lib/Parse/ParseDecl.cpp
@@ -3958,7 +3958,13 @@ void Parser::ParseDeclarationSpecifiers(
 
       // We're done with the declaration-specifiers.
       goto DoneWithDeclSpec;
-
+    case tok::annot_module_name: {
+      PP.EnterTokenStream(
+          Tok.getAnnotationValueAs<ModuleNameInfo *>()->getTokens(),
+          /*DisableMacroExpansion=*/true, /*IsReinject=*/false);
+      ConsumeAnyToken();
+      [[fallthrough]];
+    }
       // typedef-name
     case tok::kw___super:
     case tok::kw_decltype:
diff --git a/clang/lib/Parse/Parser.cpp b/clang/lib/Parse/Parser.cpp
index 5ebe71e496a2e..afb2e1e416168 100644
--- a/clang/lib/Parse/Parser.cpp
+++ b/clang/lib/Parse/Parser.cpp
@@ -2511,18 +2511,28 @@ Parser::ParseModuleDecl(Sema::ModuleImportState &ImportState) {
   }
 
   SmallVector<std::pair<IdentifierInfo *, SourceLocation>, 2> Path;
-  if (ParseModuleName(ModuleLoc, Path, /*IsImport*/ false))
+  if (Tok.isNot(tok::annot_module_name)) {
+    Diag(Tok, diag::err_module_expected_ident) << /*IsImport=*/false;
+    SkipUntil(tok::semi, StopBeforeMatch);
+    return nullptr;
+  }
+
+  auto *Info = Tok.getAnnotationValueAs<ModuleNameInfo *>();
+  ConsumeAnnotationToken();
+  if (ParseModuleName(ModuleLoc, Info->getModuleName(), Path,
+                      /*IsImport=*/false))
     return nullptr;
 
   // Parse the optional module-partition.
   SmallVector<std::pair<IdentifierInfo *, SourceLocation>, 2> Partition;
-  if (Tok.is(tok::colon)) {
-    SourceLocation ColonLoc = ConsumeToken();
+  if (Info->hasPartitionName()) {
+    SourceLocation ColonLoc = Info->getColonToken().getLocation();
     if (!getLangOpts().CPlusPlusModules)
       Diag(ColonLoc, diag::err_unsupported_module_partition)
           << SourceRange(ColonLoc, Partition.back().second);
     // Recover by ignoring the partition name.
-    else if (ParseModuleName(ModuleLoc, Partition, /*IsImport*/ false))
+    else if (ParseModuleName(ModuleLoc, Info->getPartitionName(), Partition,
+                             /*IsImport=*/false))
       return nullptr;
   }
 
@@ -2581,18 +2591,32 @@ Decl *Parser::ParseModuleImport(SourceLocation AtLoc,
     // This is a header import that the preprocessor mapped to a module import.
     HeaderUnit = reinterpret_cast<Module *>(Tok.getAnnotationValue());
     ConsumeAnnotationToken();
-  } else if (Tok.is(tok::colon)) {
-    SourceLocation ColonLoc = ConsumeToken();
-    if (!getLangOpts().CPlusPlusModules)
-      Diag(ColonLoc, diag::err_unsupported_module_partition)
-          << SourceRange(ColonLoc, Path.back().second);
-    // Recover by leaving partition empty.
-    else if (ParseModuleName(ColonLoc, Path, /*IsImport*/ true))
-      return nullptr;
-    else
-      IsPartition = true;
   } else {
-    if (ParseModuleName(ImportLoc, Path, /*IsImport*/ true))
+    if (Tok.isNot(tok::annot_module_name)) {
+      if (Tok.is(tok::code_completion)) {
+        cutOffParsing();
+        Actions.CodeCompletion().CodeCompleteModuleImport(ImportLoc, Path);
+        return nullptr;
+      }
+      Diag(Tok, diag::err_module_expected_ident) << /*IsImport=*/true;
+      SkipUntil(tok::semi, StopBeforeMatch);
+      return nullptr;
+    }
+    auto *Info = Tok.getAnnotationValueAs<ModuleNameInfo *>();
+    ConsumeAnnotationToken();
+    if (Info->hasPartitionName()) {
+      SourceLocation ColonLoc = Info->getColonToken().getLocation();
+      if (!getLangOpts().CPlusPlusModules)
+        Diag(ColonLoc, diag::err_unsupported_module_partition)
+            << SourceRange(ColonLoc, Path.back().second);
+      // Recover by leaving partition empty.
+      else if (ParseModuleName(ColonLoc, Info->getPartitionName(), Path,
+                               /*IsImport=*/true))
+        return nullptr;
+      else
+        IsPartition = true;
+    } else if (ParseModuleName(ImportLoc, Info->getModuleName(), Path,
+                               /*IsImport=*/true))
       return nullptr;
   }
 
@@ -2689,32 +2713,31 @@ Decl *Parser::ParseModuleImport(SourceLocation AtLoc,
 ///         module-name-qualifier:
 ///           module-name-qualifier[opt] identifier '.'
 bool Parser::ParseModuleName(
-    SourceLocation UseLoc,
+    SourceLocation UseLoc, ArrayRef<Token> ModuleName,
     SmallVectorImpl<std::pair<IdentifierInfo *, SourceLocation>> &Path,
     bool IsImport) {
-  // Parse the module path.
-  while (true) {
-    if (!Tok.is(tok::identifier)) {
-      if (Tok.is(tok::code_completion)) {
-        cutOffParsing();
-        Actions.CodeCompletion().CodeCompleteModuleImport(UseLoc, Path);
-        return true;
-      }
-
-      Diag(Tok, diag::err_module_expected_ident) << IsImport;
-      SkipUntil(tok::semi);
+  ModuleNameInfo::getModuleIdPath(ModuleName, Path);
+  // Eg. import A.B.
+  if (ModuleName.back().isNot(tok::identifier)) {
+    if (Tok.is(tok::code_completion)) {
+      cutOffParsing();
+      Actions.CodeCompletion().CodeCompleteModuleImport(UseLoc, Path);
       return true;
     }
+    Diag(ModuleName.back(), diag::err_module_expected_ident) << IsImport;
+    SkipUntil(tok::semi, StopBeforeMatch);
+    return true;
+  }
 
-    // Record this part of the module path.
-    Path.push_back(std::make_pair(Tok.getIdentifierInfo(), Tok.getLocation()));
-    ConsumeToken();
-
-    if (Tok.isNot(tok::period))
-      return false;
-
-    ConsumeToken();
+  // [cpp.module]/p2: where the pp-tokens (if any) shall not begin with a (
+  // preprocessing token [...]
+  //
+  // Skip unitl ';' to recovery.
+  if (getLangOpts().CPlusPlusModules && Tok.is(tok::l_paren)) {
+    SkipUntil(tok::semi, StopBeforeMatch);
+    return true;
   }
+  return false;
 }
 
 /// Try recover parser when module annotation appears where it must not
diff --git a/clang/lib/Sema/SemaTemplateInstantiate.cpp b/clang/lib/Sema/SemaTemplateInstantiate.cpp
index 725b62db5e80a..a7bc6749c5852 100644
--- a/clang/lib/Sema/SemaTemplateInstantiate.cpp
+++ b/clang/lib/Sema/SemaTemplateInstantiate.cpp
@@ -3426,16 +3426,11 @@ Sema::InstantiateClass(SourceLocation PointOfInstantiation,
     return true;
 
   llvm::TimeTraceScope TimeScope("InstantiateClass", [&]() {
-    llvm::TimeTraceMetadata M;
-    llvm::raw_string_ostream OS(M.Detail);
+    std::string Name;
+    llvm::raw_string_ostream OS(Name);
     Instantiation->getNameForDiagnostic(OS, getPrintingPolicy(),
                                         /*Qualified=*/true);
-    if (llvm::isTimeTraceVerbose()) {
-      auto Loc = SourceMgr.getExpansionLoc(Instantiation->getLocation());
-      M.File = SourceMgr.getFilename(Loc);
-      M.Line = SourceMgr.getExpansionLineNumber(Loc);
-    }
-    return M;
+    return Name;
   });
 
   Pattern = PatternDef;
diff --git a/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp b/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp
index a12d2eff1d2c8..97161febc15f7 100644
--- a/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp
+++ b/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp
@@ -4966,16 +4966,11 @@ void Sema::InstantiateFunctionDefinition(SourceLocation PointOfInstantiation,
   }
 
   llvm::TimeTraceScope TimeScope("InstantiateFunction", [&]() {
-    llvm::TimeTraceMetadata M;
-    llvm::raw_string_ostream OS(M.Detail);
+    std::string Name;
+    llvm::raw_string_ostream OS(Name);
     Function->getNameForDiagnostic(OS, getPrintingPolicy(),
                                    /*Qualified=*/true);
-    if (llvm::isTimeTraceVerbose()) {
-      auto Loc = SourceMgr.getExpansionLoc(Function->getLocation());
-      M.File = SourceMgr.getFilename(Loc);
-      M.Line = SourceMgr.getExpansionLineNumber(Loc);
-    }
-    return M;
+    return Name;
   });
 
   // If we're performing recursive template instantiation, create our own
diff --git a/clang/test/AST/Interp/codegen.cpp b/clang/test/AST/Interp/codegen.cpp
new file mode 100644
index 0000000000000..8a0d070d19da3
--- /dev/null
+++ b/clang/test/AST/Interp/codegen.cpp
@@ -0,0 +1,20 @@
+// RUN: %clang_cc1 -triple x86_64-linux -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -triple x86_64-linux -emit-llvm -o - %s -fexperimental-new-constant-interpreter | FileCheck %s
+
+
+int arr[2];
+// CHECK: @pastEnd = constant ptr getelementptr (i8, ptr @arr, i64 8)
+int &pastEnd = arr[2];
+
+// CHECK: @F = constant ptr @arr, align 8
+int &F = arr[0];
+
+struct S {
+  int a;
+  float c[3];
+};
+
+// CHECK: @s = global %struct.S zeroinitializer, align 4
+S s;
+// CHECK: @sp = constant ptr getelementptr (i8, ptr @s, i64 16), align 8
+float &sp = s.c[3];
diff --git a/clang/test/AST/Interp/cxx11.cpp b/clang/test/AST/Interp/cxx11.cpp
index 92ab9b605f30d..cf2dfba079ef7 100644
--- a/clang/test/AST/Interp/cxx11.cpp
+++ b/clang/test/AST/Interp/cxx11.cpp
@@ -152,3 +152,11 @@ void A::f(SortOrder order) {
     return;
 }
 }
+
+namespace FinalLtorDiags {
+  template<int*> struct A {}; // both-note {{template parameter is declared here}}
+  int k;
+  int *q = &k; // both-note {{declared here}}
+  A<q> c; // both-error {{non-type template argument of type 'int *' is not a constant expression}} \
+          // both-note {{read of non-constexpr variable 'q' is not allowed in a constant expression}}
+}
diff --git a/clang/test/AST/Interp/new-delete.cpp b/clang/test/AST/Interp/new-delete.cpp
index cb46426c0e3be..7a85def784920 100644
--- a/clang/test/AST/Interp/new-delete.cpp
+++ b/clang/test/AST/Interp/new-delete.cpp
@@ -560,4 +560,9 @@ constexpr int a() { // both-error {{never produces a constant expression}}
 }
 static_assert(a() == 1, ""); // both-error {{not an integral constant expression}} \
                              // both-note {{in call to 'a()'}}
+
+
+static_assert(true ? *new int : 4, ""); // both-error {{expression is not an integral constant expression}} \
+                                        // both-note {{read of uninitialized object is not allowed in a constant expression}}
+
 #endif
diff --git a/clang/test/CXX/cpp/cpp.module/p2.cppm b/clang/test/CXX/cpp/cpp.module/p2.cppm
new file mode 100644
index 0000000000000..966a88ccfa972
--- /dev/null
+++ b/clang/test/CXX/cpp/cpp.module/p2.cppm
@@ -0,0 +1,88 @@
+// RUN: rm -rf %t
+// RUN: mkdir -p %t
+// RUN: split-file %s %t
+
+// RUN: %clang_cc1 -std=c++20 %t/A.cppm -triple x86_64-linux-gnu -verify
+// RUN: %clang_cc1 -std=c++20 %t/B.cppm -triple x86_64-linux-gnu -verify
+// RUN: %clang_cc1 -std=c++20 %t/C.cppm -triple x86_64-linux-gnu -verify
+// RUN: %clang_cc1 -std=c++20 %t/D.cppm -triple x86_64-linux-gnu -verify
+// RUN: %clang_cc1 -std=c++20 %t/E.cppm -triple x86_64-linux-gnu -verify
+// RUN: %clang_cc1 -std=c++20 %t/F.cppm -triple x86_64-linux-gnu -verify
+// RUN: %clang_cc1 -std=c++20 %t/G.cppm -triple x86_64-linux-gnu -verify
+// RUN: %clang_cc1 -std=c++20 %t/H.cppm -triple x86_64-linux-gnu -verify
+// RUN: %clang_cc1 -std=c++20 %t/I.cppm -triple x86_64-linux-gnu -verify
+// RUN: %clang_cc1 -std=c++20 %t/J.cppm -triple x86_64-linux-gnu -verify
+
+//--- version.h
+#ifndef VERSION_H
+#define VERSION_H
+
+#define VERSION libv5
+#define A a
+#define B b
+#define C c
+#define FUNC_LIKE(X) function_like_##X
+#define ATTRS [[]]
+#define SEMICOLON ;
+
+#endif // VERSION_H
+
+//--- A.cppm
+module;
+#include "version.h"
+export module VERSION;  // expected-error {{the module name in a module declaration cannot contain an object-like macro 'VERSION'}}
+
+//--- B.cppm
+module;
+#include "version.h"
+export module A.B;      // expected-error {{the module name in a module declaration cannot contain an object-like macro 'A'}} \
+                        // expected-error {{the module name in a module declaration cannot contain an object-like macro 'B'}}
+
+//--- C.cppm
+module;                             // expected-error {{missing 'module' declaration at end of global module fragment introduced here}}
+#include "version.h"
+export module A.FUNC_LIKE(foo):C;   // expected-error {{the module name in a module declaration cannot contain an object-like macro 'A'}} \
+                                    // expected-error {{unexpected '(' after the module name in a module declaration}}
+
+//--- D.cppm
+module;                               // expected-error {{missing 'module' declaration at end of global module fragment introduced here}}
+#include "version.h"
+export module B.A.FUNC_LIKE(bar):C;   // expected-error {{the module name in a module declaration cannot contain an object-like macro 'B'}} \
+                                      // expected-error {{the module name in a module declaration cannot contain an object-like macro 'A'}} \
+                                      // expected-error {{unexpected '(' after the module name in a module declaration}}
+
+//--- E.cppm
+module;
+#include "version.h"
+export module a.FUNC_LIKE:c; // OK, FUNC_LIKE would not be treated as a macro name.
+// expected-no-diagnostics
+
+//--- F.cppm
+module;
+#include "version.h"
+export module a.FUNC_LIKE:c ATTRS; // OK, FUNC_LIKE would not be treated as a macro name.
+// expected-no-diagnostics
+
+//--- G.cppm
+module;                               // expected-error {{missing 'module' declaration at end of global module fragment introduced here}}
+#include "version.h"
+export module A.FUNC_LIKE(B c:C ATTRS // expected-error {{the module name in a module declaration cannot contain an object-like macro 'A'}} \
+                                      // expected-error {{unexpected '(' after the module name in a module declaration}}
+
+//--- H.cppm
+module;                                   // expected-error {{missing 'module' declaration at end of global module fragment introduced here}}
+#include "version.h"
+export module A.FUNC_LIKE(B,). c:C ATTRS  // expected-error {{the module name in a module declaration cannot contain an object-like macro 'A'}} \
+                                          // expected-error {{unexpected '(' after the module name in a module declaration}}
+
+//--- I.cppm
+module;                                   // expected-error {{missing 'module' declaration at end of global module fragment introduced here}}
+#include "version.h"
+export module A.FUNC_LIKE(B,) c:C ATTRS   // expected-error {{the module name in a module declaration cannot contain an object-like macro 'A'}} \
+                                          // expected-error {{unexpected '(' after the module name in a module declaration}}
+
+//--- J.cppm
+module;
+#include "version.h"
+export module unexpanded : unexpanded ATTRS SEMICOLON // OK, ATTRS and SEMICOLON can be expanded.
+// expected-no-diagnostics
diff --git a/clang/test/CXX/module/basic/basic.link/module-declaration.cpp b/clang/test/CXX/module/basic/basic.link/module-declaration.cpp
index d71358cc7a571..14bbc911febfc 100644
--- a/clang/test/CXX/module/basic/basic.link/module-declaration.cpp
+++ b/clang/test/CXX/module/basic/basic.link/module-declaration.cpp
@@ -8,27 +8,19 @@
 // RUN: %clang_cc1 -std=c++20 -emit-module-interface -fmodule-file=x=%t/x.pcm %t/x.y.cppm -o %t/x.y.pcm
 //
 // Module implementation for unknown and known module. (The former is ill-formed.)
-// RUN: %clang_cc1 -std=c++20 -I%t -fmodule-file=x.y=%t/x.y.pcm -verify -x c++ %t/M.cpp \
-// RUN:            -DTEST=1 -DEXPORT= -DMODULE_NAME=z
-// RUN: %clang_cc1 -std=c++20 -I%t -fmodule-file=x=%t/x.pcm -fmodule-file=x.y=%t/x.y.pcm -verify -x c++ %t/M.cpp \
-// RUN:            -DTEST=2 -DEXPORT= -DMODULE_NAME=x
+// RUN: %clang_cc1 -std=c++20 -I%t -fmodule-file=x.y=%t/x.y.pcm -verify -x c++ %t/M1.cpp
+// RUN: %clang_cc1 -std=c++20 -I%t -fmodule-file=x=%t/x.pcm -fmodule-file=x.y=%t/x.y.pcm -verify -x c++ %t/M2.cpp
 //
 // Module interface for unknown and known module. (The latter is ill-formed due to
 // redefinition.)
-// RUN: %clang_cc1 -std=c++20 -I%t -fmodule-file=x.y=%t/x.y.pcm -verify %t/M.cpp \
-// RUN:            -DTEST=3 -DEXPORT=export -DMODULE_NAME=z
-// RUN: %clang_cc1 -std=c++20 -I%t -fmodule-file=x.y=%t/x.y.pcm -verify %t/M.cpp \
-// RUN:            -DTEST=4 -DEXPORT=export -DMODULE_NAME=x
+// RUN: %clang_cc1 -std=c++20 -I%t -fmodule-file=x.y=%t/x.y.pcm -verify %t/M3.cpp
+// RUN: %clang_cc1 -std=c++20 -I%t -fmodule-file=x.y=%t/x.y.pcm -verify %t/M4.cpp
 //
 // Miscellaneous syntax.
-// RUN: %clang_cc1 -std=c++20 -I%t -fmodule-file=x.y=%t/x.y.pcm -verify %t/M.cpp \
-// RUN:            -DTEST=7 -DEXPORT=export -DMODULE_NAME='z elderberry'
-// RUN: %clang_cc1 -std=c++20 -I%t -fmodule-file=x.y=%t/x.y.pcm -verify %t/M.cpp \
-// RUN:            -DTEST=8 -DEXPORT=export -DMODULE_NAME='z [[]]'
-// RUN: %clang_cc1 -std=c++20 -I%t -fmodule-file=x.y=%t/x.y.pcm -verify %t/M.cpp \
-// RUN:            -DTEST=9 -DEXPORT=export -DMODULE_NAME='z [[fancy]]'
-// RUN: %clang_cc1 -std=c++20 -I%t -fmodule-file=x.y=%t/x.y.pcm -verify %t/M.cpp \
-// RUN:            -DTEST=10 -DEXPORT=export -DMODULE_NAME='z [[maybe_unused]]'
+// RUN: %clang_cc1 -std=c++20 -I%t -fmodule-file=x.y=%t/x.y.pcm -verify %t/M5.cpp
+// RUN: %clang_cc1 -std=c++20 -I%t -fmodule-file=x.y=%t/x.y.pcm -verify %t/M6.cpp
+// RUN: %clang_cc1 -std=c++20 -I%t -fmodule-file=x.y=%t/x.y.pcm -verify %t/M7.cpp
+// RUN: %clang_cc1 -std=c++20 -I%t -fmodule-file=x.y=%t/x.y.pcm -verify %t/M8.cpp
 
 //--- x.cppm
 export module x;
@@ -38,17 +30,26 @@ int a, b;
 export module x.y;
 int c;
 
-//--- M.cpp
-
-EXPORT module MODULE_NAME;
-#if TEST == 7
-// expected-error at -2 {{expected ';'}} expected-error at -2 {{a type specifier is required}}
-#elif TEST == 9
-// expected-warning at -4 {{unknown attribute 'fancy' ignored}}
-#elif TEST == 10
-// expected-error-re at -6 {{'maybe_unused' attribute cannot be applied to a module{{$}}}}
-#elif TEST == 1
-// expected-error at -8 {{module 'z' not found}}
-#else
-// expected-no-diagnostics
-#endif
+//--- M1.cpp
+module z; // expected-error {{module 'z' not found}}
+
+//--- M2.cpp
+module x; // expected-no-diagnostics
+
+//--- M3.cpp
+export module z; // expected-no-diagnostics
+
+//--- M4.cpp
+export module x; // expected-no-diagnostics
+
+//--- M5.cpp
+export module z elderberry; // expected-error {{expected ';'}} expected-error {{a type specifier is required}}
+
+//--- M6.cpp
+export module z [[]]; // expected-no-diagnostics
+
+//--- M7.cpp
+export module z [[fancy]]; // expected-warning {{unknown attribute 'fancy' ignored}}
+
+//--- M8.cpp
+export module z [[maybe_unused]]; // expected-error-re {{'maybe_unused' attribute cannot be applied to a module{{$}}}}
diff --git a/clang/test/CXX/module/dcl.dcl/dcl.module/dcl.module.import/p1.cppm b/clang/test/CXX/module/dcl.dcl/dcl.module/dcl.module.import/p1.cppm
index 873e4c0edeac2..ecad4db32a7e9 100644
--- a/clang/test/CXX/module/dcl.dcl/dcl.module/dcl.module.import/p1.cppm
+++ b/clang/test/CXX/module/dcl.dcl/dcl.module/dcl.module.import/p1.cppm
@@ -6,10 +6,12 @@
 // RUN: %clang_cc1 -std=c++20 -emit-module-interface -fmodule-file=x=%t/x.pcm %t/x.y.cppm -o %t/x.y.pcm
 // RUN: %clang_cc1 -std=c++20 -emit-module-interface %t/a.b.cppm -o %t/a.b.pcm
 //
-// RUN: %clang_cc1 -std=c++20 -I%t -fmodule-file=x.y=%t/x.y.pcm -fmodule-file=x=%t/x.pcm -verify %t/test.cpp \
-// RUN:            -DMODULE_NAME=z -DINTERFACE
+// RUN: %clang_cc1 -std=c++20 -I%t -fmodule-file=x.y=%t/x.y.pcm -fmodule-file=x=%t/x.pcm -verify %t/test-interface.cpp \
+// RUN:            -DINTERFACE
 // RUN: %clang_cc1 -std=c++20 -I%t -fmodule-file=x.y=%t/x.y.pcm -fmodule-file=x=%t/x.pcm \
-// RUN:            -fmodule-file=a.b=%t/a.b.pcm -verify %t/test.cpp -DMODULE_NAME=a.b
+// RUN:            -fmodule-file=a.b=%t/a.b.pcm -verify %t/test.cpp
+// RUN: %clang_cc1 -std=c++20 -I%t -fmodule-file=x.y=%t/x.y.pcm -fmodule-file=x=%t/x.pcm \
+// RUN:            -verify %t/test-module-not-found.cpp
 // RUN: %clang_cc1 -std=c++20 -I%t -fmodule-file=x.y=%t/x.y.pcm -fmodule-file=x=%t/x.pcm -verify %t/test.x.cpp
 
 //--- x.cppm
@@ -34,11 +36,8 @@ int use_2 = b; // ok
 int use_3 = c; // expected-error {{use of undeclared identifier 'c'}}
 
 //--- test.cpp
-#ifdef INTERFACE
-export module MODULE_NAME;
-#else
-module MODULE_NAME;
-#endif
+module;
+module a.b;
 
 import x;
 
@@ -51,6 +50,28 @@ import x.y;
 import x.; // expected-error {{expected a module name after 'import'}}
 import .x; // expected-error {{expected a module name after 'import'}}
 
-import blarg; // expected-error {{module 'blarg' not found}}
+int use_4 = c; // ok
+
+
+//--- test-interface.cpp
+module;
+export module z;
+
+import x;
+
+import x [[]];
+import x [[foo]]; // expected-warning {{unknown attribute 'foo' ignored}}
+import x [[noreturn]]; // expected-error {{'noreturn' attribute cannot be applied to a module import}}
+import x [[blarg::noreturn]]; // expected-warning {{unknown attribute 'noreturn' ignored}}
+
+import x.y;
+import x.; // expected-error {{expected a module name after 'import'}}
+import .x; // expected-error {{expected a module name after 'import'}}
 
 int use_4 = c; // ok
+
+//--- test-module-not-found.cpp
+module;
+
+import blarg; // expected-error {{module 'blarg' not found}}
+
diff --git a/clang/test/CodeGen/attr-target-x86.c b/clang/test/CodeGen/attr-target-x86.c
index 3c2b511157f99..b1ae6678531b9 100644
--- a/clang/test/CodeGen/attr-target-x86.c
+++ b/clang/test/CodeGen/attr-target-x86.c
@@ -64,7 +64,7 @@ void __attribute__((target("avx10.1-512"))) avx10_1_512(void) {}
 // CHECK: #4 = {{.*}}"target-cpu"="i686" "target-features"="+cmov,+cx8,+x87,-avx,-avx10.1-256,-avx10.1-512,-avx2,-avx512bf16,-avx512bitalg,-avx512bw,-avx512cd,-avx512dq,-avx512f,-avx512fp16,-avx512ifma,-avx512vbmi,-avx512vbmi2,-avx512vl,-avx512vnni,-avx512vp2intersect,-avx512vpopcntdq,-avxifma,-avxneconvert,-avxvnni,-avxvnniint16,-avxvnniint8,-f16c,-fma,-fma4,-sha512,-sm3,-sm4,-sse4.1,-sse4.2,-vaes,-vpclmulqdq,-xop" "tune-cpu"="i686"
 // CHECK: #5 = {{.*}}"target-cpu"="ivybridge" "target-features"="+avx,+cmov,+crc32,+cx16,+cx8,+f16c,+fsgsbase,+fxsr,+mmx,+pclmul,+popcnt,+rdrnd,+sahf,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave,+xsaveopt,-aes,-avx10.1-256,-avx10.1-512,-vaes"
 // CHECK-NOT: tune-cpu
-// CHECK: #6 = {{.*}}"target-cpu"="i686" "target-features"="+cmov,+cx8,+x87,-3dnow,-3dnowa,-mmx"
+// CHECK: #6 = {{.*}}"target-cpu"="i686" "target-features"="+cmov,+cx8,+x87,-mmx"
 // CHECK: #7 = {{.*}}"target-cpu"="lakemont" "target-features"="+cx8,+mmx"
 // CHECK-NOT: tune-cpu
 // CHECK: #8 = {{.*}}"target-cpu"="i686" "target-features"="+cmov,+cx8,+x87" "tune-cpu"="sandybridge"
diff --git a/clang/test/Driver/ftime-trace-sections.cpp b/clang/test/Driver/ftime-trace-sections.cpp
index da7109b9d81a6..0c16052bc0c3a 100644
--- a/clang/test/Driver/ftime-trace-sections.cpp
+++ b/clang/test/Driver/ftime-trace-sections.cpp
@@ -1,5 +1,5 @@
 // RUN: rm -rf %t && mkdir %t && cd %t
-// RUN: %clangxx -S -ftime-trace -ftime-trace-granularity=0 -ftime-trace-verbose -o out %s
+// RUN: %clangxx -S -ftime-trace -ftime-trace-granularity=0 -o out %s
 // RUN: %python %S/ftime-trace-sections.py < out.json
 
 template <typename T>
diff --git a/clang/test/Driver/ftime-trace.cpp b/clang/test/Driver/ftime-trace.cpp
index 60c5885704b58..5fe63de915a71 100644
--- a/clang/test/Driver/ftime-trace.cpp
+++ b/clang/test/Driver/ftime-trace.cpp
@@ -1,18 +1,18 @@
 // RUN: rm -rf %t && mkdir -p %t && cd %t
-// RUN: %clangxx -S -no-canonical-prefixes -ftime-trace -ftime-trace-granularity=0 -ftime-trace-verbose -o out %s
+// RUN: %clangxx -S -no-canonical-prefixes -ftime-trace -ftime-trace-granularity=0 -o out %s
 // RUN: cat out.json \
 // RUN:   | %python -c 'import json, sys; json.dump(json.loads(sys.stdin.read()), sys.stdout, sort_keys=True, indent=2)' \
 // RUN:   | FileCheck %s
-// RUN: %clangxx -S -no-canonical-prefixes -ftime-trace=new-name.json -ftime-trace-granularity=0 -ftime-trace-verbose -o out %s
+// RUN: %clangxx -S -no-canonical-prefixes -ftime-trace=new-name.json -ftime-trace-granularity=0 -o out %s
 // RUN: cat new-name.json \
 // RUN:   | %python -c 'import json, sys; json.dump(json.loads(sys.stdin.read()), sys.stdout, sort_keys=True, indent=2)' \
 // RUN:   | FileCheck %s
 // RUN: mkdir dir1 dir2
-// RUN: %clangxx -S -no-canonical-prefixes -ftime-trace=dir1 -ftime-trace-granularity=0 -ftime-trace-verbose -o out %s
+// RUN: %clangxx -S -no-canonical-prefixes -ftime-trace=dir1 -ftime-trace-granularity=0 -o out %s
 // RUN: cat dir1/out.json \
 // RUN:   | %python -c 'import json, sys; json.dump(json.loads(sys.stdin.read()), sys.stdout, sort_keys=True, indent=2)' \
 // RUN:   | FileCheck %s
-// RUN: %clangxx -S -no-canonical-prefixes -ftime-trace=dir2/ -ftime-trace-granularity=0 -ftime-trace-verbose -o out %s
+// RUN: %clangxx -S -no-canonical-prefixes -ftime-trace=dir2/ -ftime-trace-granularity=0 -o out %s
 // RUN: cat dir2/out.json \
 // RUN:   | %python -c 'import json, sys; json.dump(json.loads(sys.stdin.read()), sys.stdout, sort_keys=True, indent=2)' \
 // RUN:   | FileCheck %s
@@ -34,33 +34,32 @@
 // RUN: mkdir d e f && cp %s d/a.cpp && touch d/b.c
 
 /// TODO: Support -fno-integrated-as.
-// RUN: %clang -### -c -ftime-trace -ftime-trace-granularity=0 -ftime-trace-verbose -fintegrated-as d/a.cpp -o e/a.o 2>&1 | FileCheck %s --check-prefix=COMPILE1
-// COMPILE1: -cc1{{.*}} "-ftime-trace=e/a.json" "-ftime-trace-granularity=0" "-ftime-trace-verbose"
+// RUN: %clang -### -c -ftime-trace -ftime-trace-granularity=0 -fintegrated-as d/a.cpp -o e/a.o 2>&1 | FileCheck %s --check-prefix=COMPILE1
+// COMPILE1: -cc1{{.*}} "-ftime-trace=e/a.json" "-ftime-trace-granularity=0"
 
-// RUN: %clang -### -c -ftime-trace -ftime-trace-granularity=0 -ftime-trace-verbose d/a.cpp d/b.c -dumpdir f/ 2>&1 | FileCheck %s --check-prefix=COMPILE2
-// COMPILE2: -cc1{{.*}} "-ftime-trace=f/a.json" "-ftime-trace-granularity=0" "-ftime-trace-verbose"
-// COMPILE2: -cc1{{.*}} "-ftime-trace=f/b.json" "-ftime-trace-granularity=0" "-ftime-trace-verbose"
+// RUN: %clang -### -c -ftime-trace -ftime-trace-granularity=0 d/a.cpp d/b.c -dumpdir f/ 2>&1 | FileCheck %s --check-prefix=COMPILE2
+// COMPILE2: -cc1{{.*}} "-ftime-trace=f/a.json" "-ftime-trace-granularity=0"
+// COMPILE2: -cc1{{.*}} "-ftime-trace=f/b.json" "-ftime-trace-granularity=0"
 
 /// -o specifies the link output. Create ${output}-${basename}.json.
-// RUN: %clang -### -ftime-trace -ftime-trace-granularity=0 -ftime-trace-verbose d/a.cpp d/b.c -o e/x 2>&1 | FileCheck %s --check-prefix=LINK1
-// LINK1: -cc1{{.*}} "-ftime-trace=e/x-a.json" "-ftime-trace-granularity=0" "-ftime-trace-verbose"
-// LINK1: -cc1{{.*}} "-ftime-trace=e/x-b.json" "-ftime-trace-granularity=0" "-ftime-trace-verbose"
+// RUN: %clang -### -ftime-trace -ftime-trace-granularity=0 d/a.cpp d/b.c -o e/x 2>&1 | FileCheck %s --check-prefix=LINK1
+// LINK1: -cc1{{.*}} "-ftime-trace=e/x-a.json" "-ftime-trace-granularity=0"
+// LINK1: -cc1{{.*}} "-ftime-trace=e/x-b.json" "-ftime-trace-granularity=0"
 
 /// -dumpdir is f/g, not ending with a path separator. We create f/g${basename}.json.
-// RUN: %clang -### -ftime-trace -ftime-trace-granularity=0 -ftime-trace-verbose d/a.cpp d/b.c -o e/x -dumpdir f/g 2>&1 | FileCheck %s --check-prefix=LINK2
-// LINK2: -cc1{{.*}} "-ftime-trace=f/ga.json" "-ftime-trace-granularity=0" "-ftime-trace-verbose"
-// LINK2: -cc1{{.*}} "-ftime-trace=f/gb.json" "-ftime-trace-granularity=0" "-ftime-trace-verbose"
+// RUN: %clang -### -ftime-trace -ftime-trace-granularity=0 d/a.cpp d/b.c -o e/x -dumpdir f/g 2>&1 | FileCheck %s --check-prefix=LINK2
+// LINK2: -cc1{{.*}} "-ftime-trace=f/ga.json" "-ftime-trace-granularity=0"
+// LINK2: -cc1{{.*}} "-ftime-trace=f/gb.json" "-ftime-trace-granularity=0"
 
-// RUN: %clang -### -ftime-trace=e -ftime-trace-granularity=0 -ftime-trace-verbose d/a.cpp d/b.c -o f/x -dumpdir f/ 2>&1 | FileCheck %s --check-prefix=LINK3
-// LINK3: -cc1{{.*}} "-ftime-trace=e{{/|\\\\}}a-{{[^.]*}}.json" "-ftime-trace-granularity=0" "-ftime-trace-verbose"
-// LINK3: -cc1{{.*}} "-ftime-trace=e{{/|\\\\}}b-{{[^.]*}}.json" "-ftime-trace-granularity=0" "-ftime-trace-verbose"
+// RUN: %clang -### -ftime-trace=e -ftime-trace-granularity=0 d/a.cpp d/b.c -o f/x -dumpdir f/ 2>&1 | FileCheck %s --check-prefix=LINK3
+// LINK3: -cc1{{.*}} "-ftime-trace=e{{/|\\\\}}a-{{[^.]*}}.json" "-ftime-trace-granularity=0"
+// LINK3: -cc1{{.*}} "-ftime-trace=e{{/|\\\\}}b-{{[^.]*}}.json" "-ftime-trace-granularity=0"
 
-// RUN: %clang -### -ftime-trace -ftime-trace=e -ftime-trace-granularity=1 -ftime-trace-verbose -xassembler d/a.cpp 2>&1 | \
+// RUN: %clang -### -ftime-trace -ftime-trace=e -ftime-trace-granularity=1 -xassembler d/a.cpp 2>&1 | \
 // RUN:   FileCheck %s --check-prefix=UNUSED
 // UNUSED:      warning: argument unused during compilation: '-ftime-trace'
 // UNUSED-NEXT: warning: argument unused during compilation: '-ftime-trace=e'
 // UNUSED-NEXT: warning: argument unused during compilation: '-ftime-trace-granularity=1'
-// UNUSED-NEXT: warning: argument unused during compilation: '-ftime-trace-verbose'
 // UNUSED-NOT:  warning:
 
 template <typename T>
diff --git a/clang/test/Driver/gpu-libc-headers.c b/clang/test/Driver/gpu-libc-headers.c
index 32a5edb175e61..53c016837dde6 100644
--- a/clang/test/Driver/gpu-libc-headers.c
+++ b/clang/test/Driver/gpu-libc-headers.c
@@ -4,15 +4,15 @@
 // RUN:   %clang -### --target=x86_64-unknown-linux-gnu -fopenmp=libomp --sysroot=./ \
 // RUN:     -fopenmp-targets=nvptx64-nvidia-cuda -Xopenmp-target=nvptx64-nvidia-cuda --offload-arch=sm_70  \
 // RUN:     -nogpulib %s 2>&1 | FileCheck %s --check-prefix=CHECK-HEADERS
-// CHECK-HEADERS: "-cc1"{{.*}}"-internal-isystem" "{{.*}}include{{.*}}llvm_libc_wrappers"{{.*}}"-isysroot" "./"
-// CHECK-HEADERS: "-cc1"{{.*}}"-internal-isystem" "{{.*}}include{{.*}}llvm_libc_wrappers"{{.*}}"-isysroot" "./"
+// CHECK-HEADERS: "-cc1"{{.*}}"-isysroot" "./"{{.*}}"-internal-isystem" "{{.*}}include{{.*}}llvm_libc_wrappers"
+// CHECK-HEADERS: "-cc1"{{.*}}"-isysroot" "./"{{.*}}"-internal-isystem" "{{.*}}include{{.*}}llvm_libc_wrappers"
 
 // RUN:   %clang -### --target=amdgcn-amd-amdhsa -mcpu=gfx90a --sysroot=./ \
 // RUN:     -nogpulib %s 2>&1 | FileCheck %s --check-prefix=CHECK-HEADERS-AMDGPU
 // RUN:   %clang -### --target=nvptx64-nvidia-cuda -march=sm_89 --sysroot=./ \
 // RUN:     -nogpulib %s 2>&1 | FileCheck %s --check-prefix=CHECK-HEADERS-NVPTX
-// CHECK-HEADERS-AMDGPU: "-cc1"{{.*}}"-internal-isystem" "{{.*}}include{{.*}}amdgcn-amd-amdhsa"{{.*}}"-isysroot" "./"
-// CHECK-HEADERS-NVPTX: "-cc1"{{.*}}"-internal-isystem" "{{.*}}include{{.*}}nvptx64-nvidia-cuda"{{.*}}"-isysroot" "./"
+// CHECK-HEADERS-AMDGPU: "-cc1"{{.*}}"-isysroot" "./"{{.*}}"-internal-isystem" "{{.*}}include{{.*}}amdgcn-amd-amdhsa"
+// CHECK-HEADERS-NVPTX: "-cc1"{{.*}}"-isysroot" "./"{{.*}}"-internal-isystem" "{{.*}}include{{.*}}nvptx64-nvidia-cuda"
 
 // RUN:   %clang -### --target=amdgcn-amd-amdhsa -mcpu=gfx1030 -nogpulib \
 // RUN:     -nogpuinc %s 2>&1 | FileCheck %s --check-prefix=CHECK-HEADERS-DISABLED
diff --git a/clang/test/SemaCXX/modules.cppm b/clang/test/SemaCXX/modules.cppm
index 41204be76eafa..267417bf5da2c 100644
--- a/clang/test/SemaCXX/modules.cppm
+++ b/clang/test/SemaCXX/modules.cppm
@@ -1,19 +1,17 @@
-// RUN:     %clang_cc1 -std=c++20 -emit-module-interface %s -o %t.0.pcm -verify -DTEST=0
-// RUN:     %clang_cc1 -std=c++20 -emit-module-interface %s -o %t.1.pcm -verify -DTEST=1
-// RUN:     %clang_cc1 -std=c++20 -emit-module-interface %s -fmodule-file=foo=%t.0.pcm -o %t.2.pcm -verify -DTEST=2
-// RUN:     %clang_cc1 -std=c++20 -emit-module-interface %s -fmodule-file=foo=%t.0.pcm -o %t.3.pcm -verify -Dfoo=bar -DTEST=3
+// RUN: rm -rf %t
+// RUN: mkdir -p %t
+// RUN: split-file %s %t
 
-#if TEST == 0 || TEST == 2
-// expected-no-diagnostics
-#endif
+// RUN: %clang_cc1 -std=c++20 -emit-module-interface %t/A.cppm -o %t.0.pcm -verify
+// RUN: %clang_cc1 -std=c++20 -emit-module-interface %t/B.cppm -o %t.1.pcm -verify
+// RUN: %clang_cc1 -std=c++20 -emit-module-interface %t/C.cppm -fmodule-file=foo=%t.0.pcm -o %t.2.pcm -verify
+// RUN: %clang_cc1 -std=c++20 -emit-module-interface %t/D.cppm -fmodule-file=foo=%t.0.pcm -o %t.3.pcm -verify
+// RUN: %clang_cc1 -std=c++20 -emit-module-interface %t/E.cppm -fmodule-file=foo=%t.0.pcm -o %t.3.pcm -verify -Dfoo=bar
 
+//--- A.cppm
 export module foo;
-
 static int m;
-
 int n;
-
-#if TEST == 0
 export {
   int a;
   int b;
@@ -27,7 +25,43 @@ export void f() {}
 
 export struct T {
 } t;
-#elif TEST == 3
+// expected-no-diagnostics
+
+//--- B.cppm
+export module foo;
+static int m;
+int n;
+struct S {
+  export int n;        // expected-error {{expected member name or ';'}}
+  export static int n; // expected-error {{expected member name or ';'}}
+};
+
+// FIXME: Exports of declarations without external linkage are disallowed.
+// Exports of declarations with non-external-linkage types are disallowed.
+
+// Cannot export within another export. This isn't precisely covered by the
+// language rules right now, but (per personal correspondence between zygoloid
+// and gdr) is the intent.
+export { // expected-note {{export block begins here}}
+  extern "C++" {
+    namespace NestedExport {
+      export { // expected-error {{export declaration appears within another export declaration}}
+        int q;
+      }
+    } // namespace NestedExport
+  }
+}
+
+//--- C.cppm
+export module foo;
+static int m;
+int n;
+// expected-no-diagnostics
+
+//--- D.cppm
+export module foo;
+static int m;
+int n;
 int use_a = a; // expected-error {{use of undeclared identifier 'a'}}
 
 #undef foo
@@ -46,29 +80,12 @@ int use_n = n; // FIXME: this should not be visible, because it is not exported
 
 extern int n;
 static_assert(&n != p); // expected-error{{use of undeclared identifier 'p'}}
-#endif
 
-#if TEST == 1
-struct S {
-  export int n;        // expected-error {{expected member name or ';'}}
-  export static int n; // expected-error {{expected member name or ';'}}
-};
-#endif
-
-// FIXME: Exports of declarations without external linkage are disallowed.
-// Exports of declarations with non-external-linkage types are disallowed.
+//--- E.cppm
+export module foo; // expected-error {{the module name in a module declaration cannot contain an object-like macro 'foo'}}
+static int m;
+int n;
+int use_a = a; // expected-error {{use of undeclared identifier 'a'}}
 
-// Cannot export within another export. This isn't precisely covered by the
-// language rules right now, but (per personal correspondence between zygoloid
-// and gdr) is the intent.
-#if TEST == 1
-export { // expected-note {{export block begins here}}
-  extern "C++" {
-  namespace NestedExport {
-  export { // expected-error {{export declaration appears within another export declaration}}
-    int q;
-  }
-  } // namespace NestedExport
-  }
-}
-#endif
+#undef foo
+import foo; // expected-error {{imports must immediately follow the module declaration}}
diff --git a/clang/tools/driver/cc1_main.cpp b/clang/tools/driver/cc1_main.cpp
index f5e5fad36573e..c2ccb47a15bc8 100644
--- a/clang/tools/driver/cc1_main.cpp
+++ b/clang/tools/driver/cc1_main.cpp
@@ -241,8 +241,7 @@ int cc1_main(ArrayRef<const char *> Argv, const char *Argv0, void *MainAddr) {
 
   if (!Clang->getFrontendOpts().TimeTracePath.empty()) {
     llvm::timeTraceProfilerInitialize(
-        Clang->getFrontendOpts().TimeTraceGranularity, Argv0,
-        Clang->getFrontendOpts().TimeTraceVerbose);
+        Clang->getFrontendOpts().TimeTraceGranularity, Argv0);
   }
   // --print-supported-cpus takes priority over the actual compilation.
   if (Clang->getFrontendOpts().PrintSupportedCPUs)
diff --git a/clang/tools/driver/cc1as_main.cpp b/clang/tools/driver/cc1as_main.cpp
index ec93f092713f5..15d1e0c2f2f2d 100644
--- a/clang/tools/driver/cc1as_main.cpp
+++ b/clang/tools/driver/cc1as_main.cpp
@@ -531,6 +531,9 @@ static bool ExecuteAssemblerImpl(AssemblerInvocation &Opts,
   MCOptions.MCNoWarn = Opts.NoWarn;
   MCOptions.MCFatalWarnings = Opts.FatalWarnings;
   MCOptions.MCNoTypeCheck = Opts.NoTypeCheck;
+  MCOptions.ShowMCInst = Opts.ShowInst;
+  MCOptions.AsmVerbose = true;
+  MCOptions.MCUseDwarfDirectory = MCTargetOptions::EnableDwarfDirectory;
   MCOptions.ABIName = Opts.TargetABI;
 
   // FIXME: There is a bit of code duplication with addPassesToEmitFile.
@@ -571,9 +574,7 @@ static bool ExecuteAssemblerImpl(AssemblerInvocation &Opts,
 
     Triple T(Opts.Triple);
     Str.reset(TheTarget->createMCObjectStreamer(
-        T, Ctx, std::move(MAB), std::move(OW), std::move(CE), *STI,
-        Opts.RelaxAll, Opts.IncrementalLinkerCompatible,
-        /*DWARFMustBeAtTheEnd*/ true));
+        T, Ctx, std::move(MAB), std::move(OW), std::move(CE), *STI));
     Str.get()->initSections(Opts.NoExecStack, *STI);
   }
 
diff --git a/clang/unittests/AST/Interp/toAPValue.cpp b/clang/unittests/AST/Interp/toAPValue.cpp
index d6879d6e0bca3..5ec607a824349 100644
--- a/clang/unittests/AST/Interp/toAPValue.cpp
+++ b/clang/unittests/AST/Interp/toAPValue.cpp
@@ -27,6 +27,7 @@ TEST(ToAPValue, Pointers) {
   auto AST = tooling::buildASTFromCodeWithArgs(
       Code, {"-fexperimental-new-constant-interpreter"});
 
+  auto &ASTCtx = AST->getASTContext();
   auto &Ctx = AST->getASTContext().getInterpContext();
   Program &Prog = Ctx.getProgram();
 
@@ -47,7 +48,7 @@ TEST(ToAPValue, Pointers) {
     const Pointer &GP = getGlobalPtr("b");
     const Pointer &P = GP.deref<Pointer>();
     ASSERT_TRUE(P.isLive());
-    APValue A = P.toAPValue();
+    APValue A = P.toAPValue(ASTCtx);
     ASSERT_TRUE(A.isLValue());
     ASSERT_TRUE(A.hasLValuePath());
     const auto &Path = A.getLValuePath();
@@ -62,7 +63,7 @@ TEST(ToAPValue, Pointers) {
     const Pointer &GP = getGlobalPtr("p");
     const Pointer &P = GP.deref<Pointer>();
     ASSERT_TRUE(P.isIntegralPointer());
-    APValue A = P.toAPValue();
+    APValue A = P.toAPValue(ASTCtx);
     ASSERT_TRUE(A.isLValue());
     ASSERT_TRUE(A.getLValueBase().isNull());
     APSInt I;
@@ -77,7 +78,7 @@ TEST(ToAPValue, Pointers) {
     const Pointer &GP = getGlobalPtr("nullp");
     const Pointer &P = GP.deref<Pointer>();
     ASSERT_TRUE(P.isIntegralPointer());
-    APValue A = P.toAPValue();
+    APValue A = P.toAPValue(ASTCtx);
     ASSERT_TRUE(A.isLValue());
     ASSERT_TRUE(A.getLValueBase().isNull());
     ASSERT_TRUE(A.isNullPointer());
@@ -96,6 +97,7 @@ TEST(ToAPValue, FunctionPointers) {
   auto AST = tooling::buildASTFromCodeWithArgs(
       Code, {"-fexperimental-new-constant-interpreter"});
 
+  auto &ASTCtx = AST->getASTContext();
   auto &Ctx = AST->getASTContext().getInterpContext();
   Program &Prog = Ctx.getProgram();
 
@@ -117,7 +119,7 @@ TEST(ToAPValue, FunctionPointers) {
     const Pointer &GP = getGlobalPtr("func");
     const FunctionPointer &FP = GP.deref<FunctionPointer>();
     ASSERT_FALSE(FP.isZero());
-    APValue A = FP.toAPValue();
+    APValue A = FP.toAPValue(ASTCtx);
     ASSERT_TRUE(A.hasValue());
     ASSERT_TRUE(A.isLValue());
     ASSERT_TRUE(A.hasLValuePath());
@@ -132,7 +134,7 @@ TEST(ToAPValue, FunctionPointers) {
     ASSERT_NE(D, nullptr);
     const Pointer &GP = getGlobalPtr("nullp");
     const auto &P = GP.deref<FunctionPointer>();
-    APValue A = P.toAPValue();
+    APValue A = P.toAPValue(ASTCtx);
     ASSERT_TRUE(A.isLValue());
     ASSERT_TRUE(A.getLValueBase().isNull());
     ASSERT_TRUE(A.isNullPointer());
@@ -151,6 +153,7 @@ TEST(ToAPValue, FunctionPointersC) {
   auto AST = tooling::buildASTFromCodeWithArgs(
       Code, {"-x", "c", "-fexperimental-new-constant-interpreter"});
 
+  auto &ASTCtx = AST->getASTContext();
   auto &Ctx = AST->getASTContext().getInterpContext();
   Program &Prog = Ctx.getProgram();
 
@@ -174,7 +177,7 @@ TEST(ToAPValue, FunctionPointersC) {
     ASSERT_TRUE(GP.isLive());
     const FunctionPointer &FP = GP.deref<FunctionPointer>();
     ASSERT_FALSE(FP.isZero());
-    APValue A = FP.toAPValue();
+    APValue A = FP.toAPValue(ASTCtx);
     ASSERT_TRUE(A.hasValue());
     ASSERT_TRUE(A.isLValue());
     const auto &Path = A.getLValuePath();
@@ -197,6 +200,7 @@ TEST(ToAPValue, MemberPointers) {
   auto AST = tooling::buildASTFromCodeWithArgs(
       Code, {"-fexperimental-new-constant-interpreter"});
 
+  auto &ASTCtx = AST->getASTContext();
   auto &Ctx = AST->getASTContext().getInterpContext();
   Program &Prog = Ctx.getProgram();
 
@@ -218,7 +222,7 @@ TEST(ToAPValue, MemberPointers) {
     const Pointer &GP = getGlobalPtr("pm");
     ASSERT_TRUE(GP.isLive());
     const MemberPointer &FP = GP.deref<MemberPointer>();
-    APValue A = FP.toAPValue();
+    APValue A = FP.toAPValue(ASTCtx);
     ASSERT_EQ(A.getMemberPointerDecl(), getDecl("m"));
     ASSERT_EQ(A.getKind(), APValue::MemberPointer);
   }
@@ -228,7 +232,7 @@ TEST(ToAPValue, MemberPointers) {
     ASSERT_TRUE(GP.isLive());
     const MemberPointer &NP = GP.deref<MemberPointer>();
     ASSERT_TRUE(NP.isZero());
-    APValue A = NP.toAPValue();
+    APValue A = NP.toAPValue(ASTCtx);
     ASSERT_EQ(A.getKind(), APValue::MemberPointer);
   }
 }
diff --git a/clang/unittests/Format/TokenAnnotatorTest.cpp b/clang/unittests/Format/TokenAnnotatorTest.cpp
index c5e8aa72cd2cb..f70424c3ee060 100644
--- a/clang/unittests/Format/TokenAnnotatorTest.cpp
+++ b/clang/unittests/Format/TokenAnnotatorTest.cpp
@@ -75,6 +75,26 @@ TEST_F(TokenAnnotatorTest, UnderstandsUsesOfStarAndAmp) {
   EXPECT_TOKEN(Tokens[10], tok::r_paren, TT_TypeDeclarationParen);
   EXPECT_TOKEN(Tokens[11], tok::star, TT_PointerOrReference);
 
+  Tokens = annotate("#define FOO bar(a * b)");
+  ASSERT_EQ(Tokens.size(), 10u) << Tokens;
+  EXPECT_TOKEN(Tokens[6], tok::star, TT_BinaryOperator);
+
+  Tokens = annotate("#define FOO foo.bar(a & b)");
+  ASSERT_EQ(Tokens.size(), 12u) << Tokens;
+  EXPECT_TOKEN(Tokens[8], tok::amp, TT_BinaryOperator);
+
+  Tokens = annotate("#define FOO foo::bar(a && b)");
+  ASSERT_EQ(Tokens.size(), 12u) << Tokens;
+  EXPECT_TOKEN(Tokens[8], tok::ampamp, TT_BinaryOperator);
+
+  Tokens = annotate("#define FOO foo bar(a *b)");
+  ASSERT_EQ(Tokens.size(), 11u) << Tokens;
+  EXPECT_TOKEN(Tokens[7], tok::star, TT_PointerOrReference);
+
+  Tokens = annotate("#define FOO void foo::bar(a &b)");
+  ASSERT_EQ(Tokens.size(), 13u) << Tokens;
+  EXPECT_TOKEN(Tokens[9], tok::amp, TT_PointerOrReference);
+
   Tokens = annotate("void f() {\n"
                     "  while (p < a && *p == 'a')\n"
                     "    p++;\n"
diff --git a/clang/unittests/Support/TimeProfilerTest.cpp b/clang/unittests/Support/TimeProfilerTest.cpp
index 56d880cffde61..5f3950ff033f1 100644
--- a/clang/unittests/Support/TimeProfilerTest.cpp
+++ b/clang/unittests/Support/TimeProfilerTest.cpp
@@ -10,15 +10,11 @@
 #include "clang/Frontend/FrontendActions.h"
 #include "clang/Lex/PreprocessorOptions.h"
 
-#include "llvm/ADT/StringMap.h"
 #include "llvm/Support/JSON.h"
-#include "llvm/Support/Path.h"
 #include "llvm/Support/TimeProfiler.h"
-#include "llvm/Support/VirtualFileSystem.h"
 #include <stack>
 
 #include "gtest/gtest.h"
-#include <tuple>
 
 using namespace clang;
 using namespace llvm;
@@ -27,8 +23,7 @@ namespace {
 
 // Should be called before testing.
 void setupProfiler() {
-  timeTraceProfilerInitialize(/*TimeTraceGranularity=*/0, "test",
-                              /*TimeTraceVerbose=*/true);
+  timeTraceProfilerInitialize(/*TimeTraceGranularity=*/0, "test");
 }
 
 // Should be called after `compileFromString()`.
@@ -43,24 +38,14 @@ std::string teardownProfiler() {
 
 // Returns true if code compiles successfully.
 // We only parse AST here. This is enough for constexpr evaluation.
-bool compileFromString(StringRef Code, StringRef Standard, StringRef File,
-                       llvm::StringMap<std::string> Headers = {}) {
+bool compileFromString(StringRef Code, StringRef Standard, StringRef FileName) {
   CompilerInstance Compiler;
   Compiler.createDiagnostics();
 
-  llvm::IntrusiveRefCntPtr<llvm::vfs::InMemoryFileSystem> FS(
-      new llvm::vfs::InMemoryFileSystem());
-  FS->addFile(File, 0, MemoryBuffer::getMemBuffer(Code));
-  for (const auto &Header : Headers) {
-    FS->addFile(Header.getKey(), 0,
-                MemoryBuffer::getMemBuffer(Header.getValue()));
-  }
-  llvm::IntrusiveRefCntPtr<FileManager> Files(
-      new FileManager(FileSystemOptions(), FS));
-  Compiler.setFileManager(Files.get());
-
   auto Invocation = std::make_shared<CompilerInvocation>();
-  std::vector<const char *> Args = {Standard.data(), File.data()};
+  Invocation->getPreprocessorOpts().addRemappedFile(
+      FileName, MemoryBuffer::getMemBuffer(Code).release());
+  const char *Args[] = {Standard.data(), FileName.data()};
   CompilerInvocation::CreateFromArgs(*Invocation, Args,
                                      Compiler.getDiagnostics());
   Compiler.setInvocation(std::move(Invocation));
@@ -75,28 +60,13 @@ bool compileFromString(StringRef Code, StringRef Standard, StringRef File,
   return Compiler.ExecuteAction(Action);
 }
 
-std::string GetMetadata(json::Object *Event) {
-  std::string Metadata;
-  llvm::raw_string_ostream OS(Metadata);
-  if (json::Object *Args = Event->getObject("args")) {
-    if (auto Detail = Args->getString("detail"))
-      OS << Detail;
-    // Use only filename to not include os-specific path separators.
-    if (auto File = Args->getString("file"))
-      OS << ", " << llvm::sys::path::filename(*File);
-    if (auto Line = Args->getInteger("line"))
-      OS << ":" << *Line;
-  }
-  return Metadata;
-}
-
 // Returns pretty-printed trace graph.
 std::string buildTraceGraph(StringRef Json) {
   struct EventRecord {
     int64_t TimestampBegin;
     int64_t TimestampEnd;
-    std::string Name;
-    std::string Metadata;
+    StringRef Name;
+    StringRef Detail;
   };
   std::vector<EventRecord> Events;
 
@@ -111,13 +81,10 @@ std::string buildTraceGraph(StringRef Json) {
     int64_t TimestampBegin = TraceEventObj->getInteger("ts").value_or(0);
     int64_t TimestampEnd =
         TimestampBegin + TraceEventObj->getInteger("dur").value_or(0);
-    std::string Name = TraceEventObj->getString("name").value_or("").str();
-    std::string Metadata = GetMetadata(TraceEventObj);
-
-    // Source events are asynchronous events and may not perfectly nest the
-    // synchronous events. Skip testing them.
-    if (Name == "Source")
-      continue;
+    StringRef Name = TraceEventObj->getString("name").value_or("");
+    StringRef Detail = "";
+    if (json::Object *Args = TraceEventObj->getObject("args"))
+      Detail = Args->getString("detail").value_or("");
 
     // This is a "summary" event, like "Total PerformPendingInstantiations",
     // skip it
@@ -125,7 +92,7 @@ std::string buildTraceGraph(StringRef Json) {
       continue;
 
     Events.emplace_back(
-        EventRecord{TimestampBegin, TimestampEnd, Name, Metadata});
+        EventRecord{TimestampBegin, TimestampEnd, Name, Detail});
   }
 
   // There can be nested events that are very fast, for example:
@@ -165,9 +132,9 @@ std::string buildTraceGraph(StringRef Json) {
       Stream << "| ";
     }
     Stream.write(Event.Name.data(), Event.Name.size());
-    if (!Event.Metadata.empty()) {
+    if (!Event.Detail.empty()) {
       Stream << " (";
-      Stream.write(Event.Metadata.data(), Event.Metadata.size());
+      Stream.write(Event.Detail.data(), Event.Detail.size());
       Stream << ")";
     }
     Stream << "\n";
@@ -178,7 +145,7 @@ std::string buildTraceGraph(StringRef Json) {
 } // namespace
 
 TEST(TimeProfilerTest, ConstantEvaluationCxx20) {
-  std::string Code = R"(
+  constexpr StringRef Code = R"(
 void print(double value);
 
 namespace slow_namespace {
@@ -208,7 +175,8 @@ constexpr int slow_init_list[] = {1, 1, 2, 3, 5, 8, 13, 21}; // 25th line
   setupProfiler();
   ASSERT_TRUE(compileFromString(Code, "-std=c++20", "test.cc"));
   std::string Json = teardownProfiler();
-  ASSERT_EQ(R"(
+  std::string TraceGraph = buildTraceGraph(Json);
+  ASSERT_TRUE(TraceGraph == R"(
 Frontend
 | ParseDeclarationOrFunctionDefinition (test.cc:2:1)
 | ParseDeclarationOrFunctionDefinition (test.cc:6:1)
@@ -234,54 +202,14 @@ Frontend
 | ParseDeclarationOrFunctionDefinition (test.cc:25:1)
 | | EvaluateAsInitializer (slow_init_list)
 | PerformPendingInstantiations
-)",
-            buildTraceGraph(Json));
-}
-
-TEST(TimeProfilerTest, TemplateInstantiations) {
-  std::string B_H = R"(
-    template <typename T>
-    T fooB(T t) {
-      return T();
-    }
+)");
 
-    #define MacroTemp(x) template <typename T> void foo##x(T) { T(); }
-  )";
-
-  std::string A_H = R"(
-    #include "b.h"
-
-    MacroTemp(MTA)
-
-    template <typename T>
-    void fooA(T t) { fooB(t); fooMTA(t); }
-  )";
-  std::string Code = R"(
-    #include "a.h"
-    void user() { fooA(0); }
-  )";
-
-  setupProfiler();
-  ASSERT_TRUE(compileFromString(Code, "-std=c++20", "test.cc",
-                                /*Headers=*/{{"a.h", A_H}, {"b.h", B_H}}));
-  std::string Json = teardownProfiler();
-  ASSERT_EQ(R"(
-Frontend
-| ParseFunctionDefinition (fooB)
-| ParseFunctionDefinition (fooMTA)
-| ParseFunctionDefinition (fooA)
-| ParseDeclarationOrFunctionDefinition (test.cc:3:5)
-| | ParseFunctionDefinition (user)
-| PerformPendingInstantiations
-| | InstantiateFunction (fooA<int>, a.h:7)
-| | | InstantiateFunction (fooB<int>, b.h:3)
-| | | InstantiateFunction (fooMTA<int>, a.h:4)
-)",
-            buildTraceGraph(Json));
+  // NOTE: If this test is failing, run this test with
+  // `llvm::errs() << TraceGraph;` and change the assert above.
 }
 
 TEST(TimeProfilerTest, ConstantEvaluationC99) {
-  std::string Code = R"(
+  constexpr StringRef Code = R"(
 struct {
   short quantval[4]; // 3rd line
 } value;
@@ -290,12 +218,15 @@ struct {
   setupProfiler();
   ASSERT_TRUE(compileFromString(Code, "-std=c99", "test.c"));
   std::string Json = teardownProfiler();
-  ASSERT_EQ(R"(
+  std::string TraceGraph = buildTraceGraph(Json);
+  ASSERT_TRUE(TraceGraph == R"(
 Frontend
 | ParseDeclarationOrFunctionDefinition (test.c:2:1)
 | | isIntegerConstantExpr (<test.c:3:18>)
 | | EvaluateKnownConstIntCheckOverflow (<test.c:3:18>)
 | PerformPendingInstantiations
-)",
-            buildTraceGraph(Json));
+)");
+
+  // NOTE: If this test is failing, run this test with
+  // `llvm::errs() << TraceGraph;` and change the assert above.
 }
diff --git a/clang/www/cxx_status.html b/clang/www/cxx_status.html
index a6ded8be3ae9e..1f69a4e8a5620 100755
--- a/clang/www/cxx_status.html
+++ b/clang/www/cxx_status.html
@@ -182,7 +182,7 @@ <h2 id="cxx26">C++2c implementation status</h2>
  <tr>
   <td>Module Declarations Shouldn’t be Macros</td>
   <td><a href="https://wg21.link/P3034R1">P3034R1</a> (<a href="#dr">DR</a>)</td>
-  <td class="none" align="center">No</td>
+  <td class="unreleased" align="center">Clang 19</td>
  </tr>
  <tr>
   <td>Trivial infinite loops are not Undefined Behavior</td>
diff --git a/compiler-rt/lib/asan/asan_interceptors.cpp b/compiler-rt/lib/asan/asan_interceptors.cpp
index f8f86a766b204..74af2e65e9bfa 100644
--- a/compiler-rt/lib/asan/asan_interceptors.cpp
+++ b/compiler-rt/lib/asan/asan_interceptors.cpp
@@ -747,7 +747,7 @@ INTERCEPTOR(int, atexit, void (*func)()) {
 extern "C" {
 extern int _pthread_atfork(void (*prepare)(), void (*parent)(),
                            void (*child)());
-};
+}
 
 INTERCEPTOR(int, pthread_atfork, void (*prepare)(), void (*parent)(),
             void (*child)()) {
diff --git a/compiler-rt/lib/builtins/cpu_model/x86.c b/compiler-rt/lib/builtins/cpu_model/x86.c
index ab2b685e67ef8..867ed97e57bf2 100644
--- a/compiler-rt/lib/builtins/cpu_model/x86.c
+++ b/compiler-rt/lib/builtins/cpu_model/x86.c
@@ -141,7 +141,7 @@ enum ProcessorFeatures {
   FEATURE_AVX512VP2INTERSECT,
   // FIXME: Below Features has some missings comparing to gcc, it's because gcc
   // has some not one-to-one mapped in llvm.
-  FEATURE_3DNOW,
+  // FEATURE_3DNOW,
   // FEATURE_3DNOWP,
   FEATURE_ADX = 40,
   // FEATURE_ABM,
diff --git a/compiler-rt/lib/lsan/lsan_interceptors.cpp b/compiler-rt/lib/lsan/lsan_interceptors.cpp
index 6df4b6865b379..b569c337e9764 100644
--- a/compiler-rt/lib/lsan/lsan_interceptors.cpp
+++ b/compiler-rt/lib/lsan/lsan_interceptors.cpp
@@ -389,7 +389,7 @@ INTERCEPTOR(int, atexit, void (*f)()) {
 extern "C" {
 extern int _pthread_atfork(void (*prepare)(), void (*parent)(),
                            void (*child)());
-};
+}
 
 INTERCEPTOR(int, pthread_atfork, void (*prepare)(), void (*parent)(),
             void (*child)()) {
diff --git a/cross-project-tests/lit.cfg.py b/cross-project-tests/lit.cfg.py
index 774c4eaf4d976..619634578dfe6 100644
--- a/cross-project-tests/lit.cfg.py
+++ b/cross-project-tests/lit.cfg.py
@@ -84,7 +84,13 @@ def get_required_attr(config, attr_name):
 # use_clang() and use_lld() respectively, so set them to "", if needed.
 if not hasattr(config, "clang_src_dir"):
     config.clang_src_dir = ""
-llvm_config.use_clang(required=("clang" in config.llvm_enabled_projects))
+# Facebook T92898286
+should_test_bolt = get_required_attr(config, "llvm_test_bolt")
+if should_test_bolt:
+    llvm_config.use_clang(required=("clang" in config.llvm_enabled_projects), additional_flags=["--post-link-optimize"])
+else:
+    llvm_config.use_clang(required=("clang" in config.llvm_enabled_projects))
+# End Facebook T92898286
 
 if not hasattr(config, "lld_src_dir"):
     config.lld_src_dir = ""
@@ -293,3 +299,9 @@ def get_clang_default_dwarf_version_string(triple):
 # Allow 'REQUIRES: XXX-registered-target' in tests.
 for arch in config.targets_to_build:
     config.available_features.add(arch.lower() + "-registered-target")
+
+# Facebook T92898286
+# Ensure the user's PYTHONPATH is included.
+if "PYTHONPATH" in os.environ:
+    config.environment["PYTHONPATH"] = os.environ["PYTHONPATH"]
+# End Facebook T92898286
diff --git a/cross-project-tests/lit.site.cfg.py.in b/cross-project-tests/lit.site.cfg.py.in
index 39458dfc79afd..2d53cd377f033 100644
--- a/cross-project-tests/lit.site.cfg.py.in
+++ b/cross-project-tests/lit.site.cfg.py.in
@@ -21,6 +21,10 @@ config.mlir_src_root = "@MLIR_SOURCE_DIR@"
 
 config.llvm_use_sanitizer = "@LLVM_USE_SANITIZER@"
 
+# Facebook T92898286
+config.llvm_test_bolt = lit.util.pythonize_bool("@LLVM_TEST_BOLT@")
+# End Facebook T92898286
+
 import lit.llvm
 lit.llvm.initialize(lit_config, config)
 
diff --git a/libc/config/config.json b/libc/config/config.json
index 94bfed894c173..2005f4297bfc1 100644
--- a/libc/config/config.json
+++ b/libc/config/config.json
@@ -77,6 +77,16 @@
       "doc": "Default size for the constinit freelist buffer used for the freelist malloc implementation (default 1o 1GB)."
     }
   },
+  "unistd": {
+    "LIBC_CONF_ENABLE_TID_CACHE": {
+      "value": true,
+      "doc": "Enable caching mechanism for gettid to avoid syscall (only effective in fullbuild mode, default to true). Please refer to Undefined Behavior documentation for implications."
+    },
+    "LIBC_CONF_ENABLE_PID_CACHE": {
+      "value": true,
+      "doc": "Enable caching mechanism for getpid to avoid syscall (default to true). Please refer to Undefined Behavior documentation for implications."
+    }
+  },
   "math": {
     "LIBC_CONF_MATH_OPTIMIZATIONS": {
       "value": 0,
diff --git a/libc/config/linux/aarch64/entrypoints.txt b/libc/config/linux/aarch64/entrypoints.txt
index e2f6bd74bb694..8afd3fb67197e 100644
--- a/libc/config/linux/aarch64/entrypoints.txt
+++ b/libc/config/linux/aarch64/entrypoints.txt
@@ -297,6 +297,7 @@ set(TARGET_LIBC_ENTRYPOINTS
     libc.src.unistd.geteuid
     libc.src.unistd.getpid
     libc.src.unistd.getppid
+    libc.src.unistd.gettid
     libc.src.unistd.getuid
     libc.src.unistd.isatty
     libc.src.unistd.link
diff --git a/libc/config/linux/riscv/entrypoints.txt b/libc/config/linux/riscv/entrypoints.txt
index 33dd8d06173b2..54a382eccb546 100644
--- a/libc/config/linux/riscv/entrypoints.txt
+++ b/libc/config/linux/riscv/entrypoints.txt
@@ -17,6 +17,12 @@ set(TARGET_LIBC_ENTRYPOINTS
     libc.src.ctype.tolower
     libc.src.ctype.toupper
 
+    # dlfcn.h entrypoints
+    libc.src.dlfcn.dlclose
+    libc.src.dlfcn.dlerror
+    libc.src.dlfcn.dlopen
+    libc.src.dlfcn.dlsym
+
     # errno.h entrypoints
     libc.src.errno.errno
 
@@ -52,6 +58,7 @@ set(TARGET_LIBC_ENTRYPOINTS
     libc.src.string.mempcpy
     libc.src.string.memrchr
     libc.src.string.memset
+    libc.src.string.memset_explicit
     libc.src.string.rindex
     libc.src.string.stpcpy
     libc.src.string.stpncpy
@@ -180,6 +187,9 @@ set(TARGET_LIBC_ENTRYPOINTS
     libc.src.stdlib.qsort_r
     libc.src.stdlib.rand
     libc.src.stdlib.srand
+    libc.src.stdlib.strfromd
+    libc.src.stdlib.strfromf
+    libc.src.stdlib.strfroml
     libc.src.stdlib.strtod
     libc.src.stdlib.strtof
     libc.src.stdlib.strtol
@@ -197,6 +207,7 @@ set(TARGET_LIBC_ENTRYPOINTS
 
     # stdio.h entrypoints
     libc.src.stdio.fdopen
+    libc.src.stdio.fileno
     libc.src.stdio.fprintf
     libc.src.stdio.fscanf
     libc.src.stdio.printf
@@ -211,6 +222,14 @@ set(TARGET_LIBC_ENTRYPOINTS
     libc.src.stdio.vsnprintf
     libc.src.stdio.vsprintf
 
+    # sys/epoll.h entrypoints
+    libc.src.sys.epoll.epoll_create
+    libc.src.sys.epoll.epoll_create1
+    libc.src.sys.epoll.epoll_ctl
+    libc.src.sys.epoll.epoll_pwait
+    libc.src.sys.epoll.epoll_wait
+    libc.src.sys.epoll.epoll_pwait2
+
     # sys/mman.h entrypoints
     libc.src.sys.mman.madvise
     libc.src.sys.mman.mincore
@@ -261,12 +280,6 @@ set(TARGET_LIBC_ENTRYPOINTS
     # sys/auxv.h entrypoints
     libc.src.sys.auxv.getauxval
 
-    # sys/epoll.h entrypoints
-    # Disabled due to epoll_wait syscalls not being available on this platform.
-    # libc.src.sys.epoll.epoll_wait
-    # libc.src.sys.epoll.epoll_pwait
-    # libc.src.sys.epoll.epoll_pwait2
-
     # termios.h entrypoints
     libc.src.termios.cfgetispeed
     libc.src.termios.cfgetospeed
@@ -296,12 +309,14 @@ set(TARGET_LIBC_ENTRYPOINTS
     libc.src.unistd.geteuid
     libc.src.unistd.getpid
     libc.src.unistd.getppid
+    libc.src.unistd.gettid
     libc.src.unistd.getuid
     libc.src.unistd.isatty
     libc.src.unistd.link
     libc.src.unistd.linkat
     libc.src.unistd.lseek
     libc.src.unistd.pathconf
+    libc.src.unistd.pipe
     libc.src.unistd.pread
     libc.src.unistd.pwrite
     libc.src.unistd.read
@@ -347,6 +362,9 @@ set(TARGET_LIBM_ENTRYPOINTS
     libc.src.math.atan2f
     libc.src.math.atanf
     libc.src.math.atanhf
+    libc.src.math.canonicalize
+    libc.src.math.canonicalizef
+    libc.src.math.canonicalizel
     libc.src.math.cbrt
     libc.src.math.cbrtf
     libc.src.math.ceil
@@ -365,6 +383,7 @@ set(TARGET_LIBM_ENTRYPOINTS
     libc.src.math.exp10f
     libc.src.math.exp2
     libc.src.math.exp2f
+    libc.src.math.exp2m1f
     libc.src.math.expf
     libc.src.math.expm1
     libc.src.math.expm1f
@@ -492,6 +511,9 @@ set(TARGET_LIBM_ENTRYPOINTS
     libc.src.math.rintf
     libc.src.math.rintl
     libc.src.math.round
+    libc.src.math.roundeven
+    libc.src.math.roundevenf
+    libc.src.math.roundevenl
     libc.src.math.roundf
     libc.src.math.roundl
     libc.src.math.scalbn
@@ -523,8 +545,10 @@ set(TARGET_LIBM_ENTRYPOINTS
 if(LIBC_TYPES_HAS_FLOAT128)
   list(APPEND TARGET_LIBM_ENTRYPOINTS
     # math.h C23 _Float128 entrypoints
+    libc.src.math.canonicalizef128
     libc.src.math.ceilf128
     libc.src.math.copysignf128
+    libc.src.math.dmulf128
     libc.src.math.fabsf128
     libc.src.math.fdimf128
     libc.src.math.floorf128
@@ -539,6 +563,7 @@ if(LIBC_TYPES_HAS_FLOAT128)
     libc.src.math.fminimum_numf128
     libc.src.math.fminimumf128
     libc.src.math.fmodf128
+    libc.src.math.fmulf128
     libc.src.math.frexpf128
     libc.src.math.fromfpf128
     libc.src.math.fromfpxf128
@@ -556,7 +581,9 @@ if(LIBC_TYPES_HAS_FLOAT128)
     libc.src.math.nextafterf128
     libc.src.math.nextdownf128
     libc.src.math.nextupf128
+    libc.src.math.remquof128
     libc.src.math.rintf128
+    libc.src.math.roundevenf128
     libc.src.math.roundf128
     libc.src.math.scalbnf128
     libc.src.math.sqrtf128
@@ -566,14 +593,47 @@ if(LIBC_TYPES_HAS_FLOAT128)
   )
 endif()
 
+if(LIBC_COMPILER_HAS_FIXED_POINT)
+  list(APPEND TARGET_LIBM_ENTRYPOINTS
+    # stdfix.h _Fract and _Accum entrypoints
+    libc.src.stdfix.abshk
+    libc.src.stdfix.abshr
+    libc.src.stdfix.absk
+    libc.src.stdfix.abslk
+    libc.src.stdfix.abslr
+    libc.src.stdfix.absr
+    libc.src.stdfix.exphk
+    libc.src.stdfix.expk
+    libc.src.stdfix.roundhk
+    libc.src.stdfix.roundhr
+    libc.src.stdfix.roundk
+    libc.src.stdfix.roundlk
+    libc.src.stdfix.roundlr
+    libc.src.stdfix.roundr
+    libc.src.stdfix.rounduhk
+    libc.src.stdfix.rounduhr
+    libc.src.stdfix.rounduk
+    libc.src.stdfix.roundulk
+    libc.src.stdfix.roundulr
+    libc.src.stdfix.roundur
+    libc.src.stdfix.sqrtuhk
+    libc.src.stdfix.sqrtuhr
+    libc.src.stdfix.sqrtuk
+    libc.src.stdfix.sqrtur
+    libc.src.stdfix.sqrtulr
+    libc.src.stdfix.uhksqrtus
+    libc.src.stdfix.uksqrtui
+  )
+endif()
+
 if(LLVM_LIBC_FULL_BUILD)
   list(APPEND TARGET_LIBC_ENTRYPOINTS
-    # compiler entrypoints (no corresponding header)
-    libc.src.compiler.__stack_chk_fail
-
     # assert.h entrypoints
     libc.src.assert.__assert_fail
 
+    # compiler entrypoints (no corresponding header)
+    libc.src.compiler.__stack_chk_fail
+
     # dirent.h entrypoints
     libc.src.dirent.closedir
     libc.src.dirent.dirfd
@@ -598,6 +658,12 @@ if(LLVM_LIBC_FULL_BUILD)
     libc.src.pthread.pthread_attr_setguardsize
     libc.src.pthread.pthread_attr_setstack
     libc.src.pthread.pthread_attr_setstacksize
+    libc.src.pthread.pthread_condattr_destroy
+    libc.src.pthread.pthread_condattr_getclock
+    libc.src.pthread.pthread_condattr_getpshared
+    libc.src.pthread.pthread_condattr_init
+    libc.src.pthread.pthread_condattr_setclock
+    libc.src.pthread.pthread_condattr_setpshared
     libc.src.pthread.pthread_create
     libc.src.pthread.pthread_detach
     libc.src.pthread.pthread_equal
@@ -620,6 +686,21 @@ if(LLVM_LIBC_FULL_BUILD)
     libc.src.pthread.pthread_mutexattr_setrobust
     libc.src.pthread.pthread_mutexattr_settype
     libc.src.pthread.pthread_once
+    libc.src.pthread.pthread_rwlock_destroy
+    libc.src.pthread.pthread_rwlock_init
+    libc.src.pthread.pthread_rwlock_rdlock
+    libc.src.pthread.pthread_rwlock_timedrdlock
+    libc.src.pthread.pthread_rwlock_timedwrlock
+    libc.src.pthread.pthread_rwlock_tryrdlock
+    libc.src.pthread.pthread_rwlock_trywrlock
+    libc.src.pthread.pthread_rwlock_unlock
+    libc.src.pthread.pthread_rwlock_wrlock
+    libc.src.pthread.pthread_rwlockattr_destroy
+    libc.src.pthread.pthread_rwlockattr_getkind_np
+    libc.src.pthread.pthread_rwlockattr_getpshared
+    libc.src.pthread.pthread_rwlockattr_init
+    libc.src.pthread.pthread_rwlockattr_setkind_np
+    libc.src.pthread.pthread_rwlockattr_setpshared
     libc.src.pthread.pthread_self
     libc.src.pthread.pthread_setname_np
     libc.src.pthread.pthread_setspecific
@@ -643,7 +724,6 @@ if(LLVM_LIBC_FULL_BUILD)
     libc.src.stdio.fgetc
     libc.src.stdio.fgetc_unlocked
     libc.src.stdio.fgets
-    libc.src.stdio.fileno
     libc.src.stdio.flockfile
     libc.src.stdio.fopen
     libc.src.stdio.fopencookie
@@ -652,7 +732,9 @@ if(LLVM_LIBC_FULL_BUILD)
     libc.src.stdio.fread
     libc.src.stdio.fread_unlocked
     libc.src.stdio.fseek
+    libc.src.stdio.fseeko
     libc.src.stdio.ftell
+    libc.src.stdio.ftello
     libc.src.stdio.funlockfile
     libc.src.stdio.fwrite
     libc.src.stdio.fwrite_unlocked
@@ -673,9 +755,11 @@ if(LLVM_LIBC_FULL_BUILD)
     # stdlib.h entrypoints
     libc.src.stdlib._Exit
     libc.src.stdlib.abort
+    libc.src.stdlib.at_quick_exit
     libc.src.stdlib.atexit
     libc.src.stdlib.exit
     libc.src.stdlib.getenv
+    libc.src.stdlib.quick_exit
 
     # signal.h entrypoints
     libc.src.signal.kill
@@ -757,6 +841,9 @@ if(LLVM_LIBC_FULL_BUILD)
 
     # sys/select.h entrypoints
     libc.src.sys.select.select
+
+    # sys/socket.h entrypoints
+    libc.src.sys.socket.socket
   )
 endif()
 
diff --git a/libc/config/linux/x86_64/entrypoints.txt b/libc/config/linux/x86_64/entrypoints.txt
index 7309e95644c74..d4e246efb3f8a 100644
--- a/libc/config/linux/x86_64/entrypoints.txt
+++ b/libc/config/linux/x86_64/entrypoints.txt
@@ -228,9 +228,7 @@ set(TARGET_LIBC_ENTRYPOINTS
     libc.src.sys.epoll.epoll_ctl
     libc.src.sys.epoll.epoll_pwait
     libc.src.sys.epoll.epoll_wait
-    # TODO: Need to check if pwait2 is available before providing.
-    # https://github.com/llvm/llvm-project/issues/80060
-    # libc.src.sys.epoll.epoll_pwait2
+    libc.src.sys.epoll.epoll_pwait2
 
     # sys/mman.h entrypoints
     libc.src.sys.mman.madvise
@@ -315,6 +313,7 @@ set(TARGET_LIBC_ENTRYPOINTS
     libc.src.unistd.geteuid
     libc.src.unistd.getpid
     libc.src.unistd.getppid
+    libc.src.unistd.gettid
     libc.src.unistd.getuid
     libc.src.unistd.isatty
     libc.src.unistd.link
diff --git a/libc/docs/configure.rst b/libc/docs/configure.rst
index dfb35f6a6611a..5c55e4ab0f181 100644
--- a/libc/docs/configure.rst
+++ b/libc/docs/configure.rst
@@ -52,3 +52,6 @@ to learn about the defaults for your platform and target.
 * **"string" options**
     - ``LIBC_CONF_MEMSET_X86_USE_SOFTWARE_PREFETCHING``: Inserts prefetch for write instructions (PREFETCHW) for memset on x86 to recover performance when hardware prefetcher is disabled.
     - ``LIBC_CONF_STRING_UNSAFE_WIDE_READ``: Read more than a byte at a time to perform byte-string operations like strlen.
+* **"unistd" options**
+    - ``LIBC_CONF_ENABLE_PID_CACHE``: Enable caching mechanism for getpid to avoid syscall (default to true). Please refer to Undefined Behavior documentation for implications.
+    - ``LIBC_CONF_ENABLE_TID_CACHE``: Enable caching mechanism for gettid to avoid syscall (only effective in fullbuild mode, default to true). Please refer to Undefined Behavior documentation for implications.
diff --git a/libc/docs/dev/undefined_behavior.rst b/libc/docs/dev/undefined_behavior.rst
index 3faae3134ce2a..b712780222aa3 100644
--- a/libc/docs/dev/undefined_behavior.rst
+++ b/libc/docs/dev/undefined_behavior.rst
@@ -93,3 +93,26 @@ direction in this case.
 Non-const Constant Return Values
 --------------------------------
 Some libc functions, like ``dlerror()``, return ``char *`` instead of ``const char *`` and then tell the caller they promise not to to modify this value. Any modification of this value is undefined behavior.
+
+Cached ``getpid/gettid``
+------------------------
+Since version ``2.25``, glibc removes its cache mechanism for ``getpid/gettid`` 
+(See the history section in https://man7.org/linux/man-pages/man2/getpid.2.html).
+LLVM's libc still implements the cache as it is useful for fast deadlock detection.
+The cache mechanism is also implemented in MUSL and bionic. The tid/pid cache can 
+be disabled by setting ``LIBC_CONF_ENABLE_TID_CACHE`` and ``LIBC_CONF_ENABLE_PID_CACHE``
+to ``false`` respectively.
+
+Unwrapped ``SYS_clone/SYS_fork/SYS_vfork``
+------------------------------------------
+It is highly discouraged to use unwrapped ``SYS_clone/SYS_fork/SYS_vfork``. 
+First, calling such syscalls without provided libc wrappers ignores 
+all the ``pthread_atfork`` entries as libc can no longer detect the ``fork``. 
+Second, libc relies on the ``fork/clone`` wrappers to correctly maintain cache for
+process id and thread id, and other important process-specific states such as the list 
+of robust mutexes. Third, even if the user is to call ``exec*`` functions immediately, 
+there can still be other unexpected issues. For instance, there can be signal handlers 
+inherited from parent process triggered inside the instruction window between ``fork`` 
+and ``exec*``. As libc failed to maintain its internal states correctly, even though the
+functions used inside the signal handlers are marked as ``async-signal-safe`` (such as
+``getpid``), they will still return wrong values or lead to other even worse situations.
diff --git a/libc/include/assert.h.def b/libc/include/assert.h.def
index 9c924c7f58545..d5ae14a1cd810 100644
--- a/libc/include/assert.h.def
+++ b/libc/include/assert.h.def
@@ -12,22 +12,19 @@
 // This file may be usefully included multiple times to change assert()'s
 // definition based on NDEBUG.
 
-
-#undef assert
-#ifdef NDEBUG
-#define assert(e) (void)0
-#else
-
 #ifndef __cplusplus
 #undef static_assert
 #define static_assert _Static_assert
 #endif
 
+#undef assert
+#ifdef NDEBUG
+#define assert(e) (void)0
+#else
 #ifdef __cplusplus
 extern "C"
 #endif
 _Noreturn void __assert_fail(const char *, const char *, unsigned, const char *) __NOEXCEPT;
-
 #define assert(e)  \
   ((e) ? (void)0 : __assert_fail(#e, __FILE__, __LINE__, __PRETTY_FUNCTION__))
 #endif
diff --git a/libc/spec/posix.td b/libc/spec/posix.td
index 1878b1ee2ae41..48f743dff4e6f 100644
--- a/libc/spec/posix.td
+++ b/libc/spec/posix.td
@@ -546,6 +546,11 @@ def POSIX : StandardSpec<"POSIX"> {
           RetValSpec<PidT>,
           [ArgSpec<VoidType>]
         >,
+        FunctionSpec<
+          "gettid",
+          RetValSpec<PidT>,
+          [ArgSpec<VoidType>]
+        >,
         FunctionSpec<
           "getuid",
           RetValSpec<UidT>,
@@ -601,16 +606,6 @@ def POSIX : StandardSpec<"POSIX"> {
           RetValSpec<IntType>,
           [ArgSpec<ConstCharPtr>]
         >,
-        FunctionSpec<
-          "getpid",
-          RetValSpec<IntType>,
-          [ArgSpec<VoidType>]
-        >,
-        FunctionSpec<
-          "getppid",
-          RetValSpec<IntType>,
-          [ArgSpec<VoidType>]
-        >,
         FunctionSpec<
           "link",
           RetValSpec<IntType>,
diff --git a/libc/src/__support/File/file.cpp b/libc/src/__support/File/file.cpp
index 1b545c5096936..51811a27c1acd 100644
--- a/libc/src/__support/File/file.cpp
+++ b/libc/src/__support/File/file.cpp
@@ -282,7 +282,7 @@ int File::ungetc_unlocked(int c) {
   return c;
 }
 
-ErrorOr<int> File::seek(long offset, int whence) {
+ErrorOr<int> File::seek(off_t offset, int whence) {
   FileLock lock(this);
   if (prev_op == FileOp::WRITE && pos > 0) {
 
diff --git a/libc/src/__support/File/file.h b/libc/src/__support/File/file.h
index 0cedf866519d6..42e1d11b4ab1a 100644
--- a/libc/src/__support/File/file.h
+++ b/libc/src/__support/File/file.h
@@ -183,7 +183,7 @@ class File {
     return read_unlocked(data, len);
   }
 
-  ErrorOr<int> seek(long offset, int whence);
+  ErrorOr<int> seek(off_t offset, int whence);
 
   ErrorOr<off_t> tell();
 
diff --git a/libc/src/__support/HashTable/randomness.h b/libc/src/__support/HashTable/randomness.h
index 06d3e84a710e7..244dd41be3eec 100644
--- a/libc/src/__support/HashTable/randomness.h
+++ b/libc/src/__support/HashTable/randomness.h
@@ -36,7 +36,7 @@ LIBC_INLINE uint64_t next_random_seed() {
     entropy[1] = reinterpret_cast<uint64_t>(&state);
 #if defined(LIBC_HASHTABLE_USE_GETRANDOM)
     int errno_backup = libc_errno;
-    ssize_t count = sizeof(entropy);
+    size_t count = sizeof(entropy);
     uint8_t *buffer = reinterpret_cast<uint8_t *>(entropy);
     while (count > 0) {
       ssize_t len = getrandom(buffer, count, 0);
diff --git a/libc/src/__support/OSUtil/CMakeLists.txt b/libc/src/__support/OSUtil/CMakeLists.txt
index 94d1042ccbb4a..517f888178718 100644
--- a/libc/src/__support/OSUtil/CMakeLists.txt
+++ b/libc/src/__support/OSUtil/CMakeLists.txt
@@ -15,3 +15,20 @@ add_object_library(
   DEPENDS
     ${target_os_util}
 )
+
+if (LIBC_CONF_ENABLE_PID_CACHE)
+  set(libc_copt_enable_pid_cache 1)
+else()
+  set(libc_copt_enable_pid_cache 0)
+endif()
+
+if(TARGET libc.src.__support.OSUtil.${LIBC_TARGET_OS}.pid)
+  add_object_library(
+    pid
+    ALIAS
+    DEPENDS
+      .${LIBC_TARGET_OS}.pid
+    COMPILE_OPTIONS
+      -DLIBC_COPT_ENABLE_PID_CACHE=${libc_copt_enable_pid_cache}
+  )
+endif()
diff --git a/libc/src/__support/OSUtil/linux/CMakeLists.txt b/libc/src/__support/OSUtil/linux/CMakeLists.txt
index 089cad454d534..95a83d77d0257 100644
--- a/libc/src/__support/OSUtil/linux/CMakeLists.txt
+++ b/libc/src/__support/OSUtil/linux/CMakeLists.txt
@@ -23,3 +23,16 @@ add_object_library(
     libc.hdr.types.struct_f_owner_ex
     libc.hdr.types.off_t
 )
+
+add_object_library(
+  pid
+  SRCS
+    pid.cpp
+  HDRS
+    ../pid.h
+  DEPENDS
+    libc.src.__support.OSUtil.osutil
+    libc.src.__support.common
+    libc.hdr.types.pid_t
+    libc.include.sys_syscall
+)
diff --git a/libc/src/__support/OSUtil/linux/pid.cpp b/libc/src/__support/OSUtil/linux/pid.cpp
new file mode 100644
index 0000000000000..a8499af596229
--- /dev/null
+++ b/libc/src/__support/OSUtil/linux/pid.cpp
@@ -0,0 +1,20 @@
+//===------------ pid_t utilities implementation ----------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "src/__support/OSUtil/pid.h"
+#include "src/__support/OSUtil/syscall.h"
+#include <sys/syscall.h>
+
+namespace LIBC_NAMESPACE_DECL {
+
+pid_t ProcessIdentity::cache = -1;
+pid_t ProcessIdentity::get_uncached() {
+  return syscall_impl<pid_t>(SYS_getpid);
+}
+
+} // namespace LIBC_NAMESPACE_DECL
diff --git a/libc/src/__support/OSUtil/pid.h b/libc/src/__support/OSUtil/pid.h
new file mode 100644
index 0000000000000..d723abe728569
--- /dev/null
+++ b/libc/src/__support/OSUtil/pid.h
@@ -0,0 +1,41 @@
+//===------------ pid_t utilities -------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC___SUPPORT_OSUTIL_PID_H
+#define LLVM_LIBC_SRC___SUPPORT_OSUTIL_PID_H
+#include "hdr/types/pid_t.h"
+#include "src/__support/macros/attributes.h"
+#include "src/__support/macros/optimization.h"
+
+#ifndef LIBC_COPT_ENABLE_PID_CACHE
+#define LIBC_COPT_ENABLE_PID_CACHE 1
+#endif
+
+namespace LIBC_NAMESPACE_DECL {
+
+class ProcessIdentity {
+  static LIBC_INLINE_VAR thread_local bool fork_inflight = true;
+  static pid_t cache;
+  static pid_t get_uncached();
+
+public:
+  LIBC_INLINE static void start_fork() { fork_inflight = true; }
+  LIBC_INLINE static void end_fork() { fork_inflight = false; }
+  LIBC_INLINE static void refresh_cache() { cache = get_uncached(); }
+  LIBC_INLINE static pid_t get() {
+#if LIBC_COPT_ENABLE_PID_CACHE
+    if (LIBC_LIKELY(!fork_inflight))
+      return cache;
+#endif
+    return get_uncached();
+  }
+};
+
+} // namespace LIBC_NAMESPACE_DECL
+
+#endif // LLVM_LIBC_SRC___SUPPORT_OSUTIL_PID_H
diff --git a/libc/src/__support/threads/CMakeLists.txt b/libc/src/__support/threads/CMakeLists.txt
index d2e46b8e2574e..f1a2f162acfc2 100644
--- a/libc/src/__support/threads/CMakeLists.txt
+++ b/libc/src/__support/threads/CMakeLists.txt
@@ -44,6 +44,12 @@ if(TARGET libc.src.__support.threads.${LIBC_TARGET_OS}.mutex)
   )
 endif()
 
+if (LIBC_CONF_ENABLE_TID_CACHE)
+  set(libc_copt_enable_tid_cache 1)
+else()
+  set(libc_copt_enable_tid_cache 0)
+endif()
+
 add_header_library(
   thread_common
   HDRS
@@ -54,6 +60,9 @@ add_header_library(
     libc.src.__support.CPP.optional
     libc.src.__support.CPP.string_view
     libc.src.__support.CPP.stringstream
+    libc.hdr.types.pid_t
+  COMPILE_OPTIONS
+    -DLIBC_COPT_ENABLE_TID_CACHE=${libc_copt_enable_tid_cache}
 )
 
 if(TARGET libc.src.__support.threads.${LIBC_TARGET_OS}.thread)
@@ -89,3 +98,21 @@ if(TARGET libc.src.__support.threads.${LIBC_TARGET_OS}.CndVar)
     .${LIBC_TARGET_OS}.CndVar
   )
 endif()
+
+set(tid_dep)
+if (LLVM_LIBC_FULL_BUILD)
+  list(APPEND tid_dep libc.src.__support.thread)
+else()
+  list(APPEND tid_dep libc.src.__support.OSUtil.osutil)
+  list(APPEND tid_dep libc.include.sys_syscall)
+endif()
+
+add_header_library(
+  tid
+  HDRS
+    tid.h
+  DEPENDS
+    libc.src.__support.common
+    libc.hdr.types.pid_t
+    ${tid_dep}
+)
diff --git a/libc/src/__support/threads/linux/CMakeLists.txt b/libc/src/__support/threads/linux/CMakeLists.txt
index 8b7971584e77e..d86441dd67cd7 100644
--- a/libc/src/__support/threads/linux/CMakeLists.txt
+++ b/libc/src/__support/threads/linux/CMakeLists.txt
@@ -55,6 +55,7 @@ add_header_library(
     libc.src.__support.common
     libc.src.__support.OSUtil.osutil
     libc.src.__support.CPP.limits
+    libc.src.__support.threads.tid
   COMPILE_OPTIONS
     -DLIBC_COPT_RWLOCK_DEFAULT_SPIN_COUNT=${LIBC_CONF_RWLOCK_DEFAULT_SPIN_COUNT}
     ${monotonicity_flags}
diff --git a/libc/src/__support/threads/linux/rwlock.h b/libc/src/__support/threads/linux/rwlock.h
index d2fb0ce1a3c08..cae8aa6410686 100644
--- a/libc/src/__support/threads/linux/rwlock.h
+++ b/libc/src/__support/threads/linux/rwlock.h
@@ -23,6 +23,7 @@
 #include "src/__support/threads/linux/futex_word.h"
 #include "src/__support/threads/linux/raw_mutex.h"
 #include "src/__support/threads/sleep.h"
+#include "src/__support/threads/tid.h"
 
 #ifndef LIBC_COPT_RWLOCK_DEFAULT_SPIN_COUNT
 #define LIBC_COPT_RWLOCK_DEFAULT_SPIN_COUNT 100
@@ -336,8 +337,6 @@ class RwLock {
   LIBC_INLINE Role get_preference() const {
     return static_cast<Role>(preference);
   }
-  // TODO: use cached thread id once implemented.
-  LIBC_INLINE static pid_t gettid() { return syscall_impl<pid_t>(SYS_gettid); }
 
   template <Role role> LIBC_INLINE LockResult try_lock(RwState &old) {
     if constexpr (role == Role::Reader) {
@@ -359,7 +358,7 @@ class RwLock {
         if (LIBC_LIKELY(old.compare_exchange_weak_with(
                 state, old.set_writer_bit(), cpp::MemoryOrder::ACQUIRE,
                 cpp::MemoryOrder::RELAXED))) {
-          writer_tid.store(gettid(), cpp::MemoryOrder::RELAXED);
+          writer_tid.store(gettid_inline(), cpp::MemoryOrder::RELAXED);
           return LockResult::Success;
         }
         // Notice that old is updated by the compare_exchange_weak_with
@@ -394,7 +393,7 @@ class RwLock {
             unsigned spin_count = LIBC_COPT_RWLOCK_DEFAULT_SPIN_COUNT) {
     // Phase 1: deadlock detection.
     // A deadlock happens if this is a RAW/WAW lock in the same thread.
-    if (writer_tid.load(cpp::MemoryOrder::RELAXED) == gettid())
+    if (writer_tid.load(cpp::MemoryOrder::RELAXED) == gettid_inline())
       return LockResult::Deadlock;
 
 #if LIBC_COPT_TIMEOUT_ENSURE_MONOTONICITY
@@ -520,7 +519,7 @@ class RwLock {
     if (old.has_active_writer()) {
       // The lock is held by a writer.
       // Check if we are the owner of the lock.
-      if (writer_tid.load(cpp::MemoryOrder::RELAXED) != gettid())
+      if (writer_tid.load(cpp::MemoryOrder::RELAXED) != gettid_inline())
         return LockResult::PermissionDenied;
       // clear writer tid.
       writer_tid.store(0, cpp::MemoryOrder::RELAXED);
diff --git a/libc/src/__support/threads/linux/thread.cpp b/libc/src/__support/threads/linux/thread.cpp
index 36b4a88eba9b4..c8ad086f3d1cb 100644
--- a/libc/src/__support/threads/linux/thread.cpp
+++ b/libc/src/__support/threads/linux/thread.cpp
@@ -518,4 +518,6 @@ void thread_exit(ThreadReturnValue retval, ThreadStyle style) {
   __builtin_unreachable();
 }
 
+pid_t Thread::get_uncached_tid() { return syscall_impl<pid_t>(SYS_gettid); }
+
 } // namespace LIBC_NAMESPACE_DECL
diff --git a/libc/src/__support/threads/thread.h b/libc/src/__support/threads/thread.h
index ce23a880e048a..931745299ffac 100644
--- a/libc/src/__support/threads/thread.h
+++ b/libc/src/__support/threads/thread.h
@@ -9,6 +9,11 @@
 #ifndef LLVM_LIBC_SRC___SUPPORT_THREADS_THREAD_H
 #define LLVM_LIBC_SRC___SUPPORT_THREADS_THREAD_H
 
+#ifndef LIBC_COPT_ENABLE_TID_CACHE
+#define LIBC_COPT_ENABLE_TID_CACHE 1
+#endif
+
+#include "hdr/types/pid_t.h"
 #include "src/__support/CPP/atomic.h"
 #include "src/__support/CPP/optional.h"
 #include "src/__support/CPP/string_view.h"
@@ -97,13 +102,13 @@ struct alignas(STACK_ALIGNMENT) ThreadAttributes {
   //          exits. It will clean up the thread resources once the thread
   //          exits.
   cpp::Atomic<uint32_t> detach_state;
-  void *stack;                  // Pointer to the thread stack
-  unsigned long long stacksize; // Size of the stack
-  unsigned long long guardsize; // Guard size on stack
-  uintptr_t tls;                // Address to the thread TLS memory
-  uintptr_t tls_size;           // The size of area pointed to by |tls|.
+  void *stack;               // Pointer to the thread stack
+  size_t stacksize;          // Size of the stack
+  size_t guardsize;          // Guard size on stack
+  uintptr_t tls;             // Address to the thread TLS memory
+  uintptr_t tls_size;        // The size of area pointed to by |tls|.
   unsigned char owned_stack; // Indicates if the thread owns this stack memory
-  int tid;
+  pid_t tid;
   ThreadStyle style;
   ThreadReturnValue retval;
   ThreadAtExitCallbackMgr *atexit_callback_mgr;
@@ -228,6 +233,26 @@ struct Thread {
 
   // Return the name of the thread in |name|. Return the error number of error.
   int get_name(cpp::StringStream &name) const;
+
+  static pid_t get_uncached_tid();
+
+  LIBC_INLINE void refresh_tid(pid_t cached = -1) {
+    if (cached >= 0)
+      this->attrib->tid = cached;
+    else
+      this->attrib->tid = get_uncached_tid();
+  }
+  LIBC_INLINE void invalidate_tid() { this->attrib->tid = -1; }
+
+  LIBC_INLINE pid_t get_tid() {
+#if LIBC_COPT_ENABLE_TID_CACHE
+    if (LIBC_UNLIKELY(this->attrib->tid < 0))
+      return get_uncached_tid();
+    return this->attrib->tid;
+#else
+    return get_uncached_tid();
+#endif
+  }
 };
 
 extern LIBC_THREAD_LOCAL Thread self;
diff --git a/libc/src/__support/threads/tid.h b/libc/src/__support/threads/tid.h
new file mode 100644
index 0000000000000..a575cff508a0f
--- /dev/null
+++ b/libc/src/__support/threads/tid.h
@@ -0,0 +1,34 @@
+//===--- Tid wrapper --------------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC___SUPPORT_THREADS_TID_H
+#define LLVM_LIBC_SRC___SUPPORT_THREADS_TID_H
+
+// This header is for internal usage which automatically dispatches full build
+// and overlay build behaviors.
+
+#include "hdr/types/pid_t.h"
+#include "src/__support/common.h"
+#ifdef LIBC_FULL_BUILD
+#include "src/__support/threads/thread.h"
+#else
+#include "src/__support/OSUtil/syscall.h"
+#include <sys/syscall.h>
+#endif // LIBC_FULL_BUILD
+
+namespace LIBC_NAMESPACE_DECL {
+LIBC_INLINE pid_t gettid_inline() {
+#ifdef LIBC_FULL_BUILD
+  return self.get_tid();
+#else
+  return syscall_impl<pid_t>(SYS_gettid);
+#endif
+}
+} // namespace LIBC_NAMESPACE_DECL
+
+#endif // LLVM_LIBC_SRC___SUPPORT_THREADS_TID_H
diff --git a/libc/src/math/docs/add_math_function.md b/libc/src/math/docs/add_math_function.md
index 9c23b8ca789bc..e9a6aadc6c44f 100644
--- a/libc/src/math/docs/add_math_function.md
+++ b/libc/src/math/docs/add_math_function.md
@@ -18,7 +18,7 @@ together with its specifications:
 ```
 - Add function specs to the file:
 ```
-  libc/spec/stdc.td
+  libc/newhdrgen/yaml/math.yaml
 ```
 
 ## Implementation
diff --git a/libc/src/stdio/fopencookie.cpp b/libc/src/stdio/fopencookie.cpp
index 07be9a5635a92..9f5694e8e0581 100644
--- a/libc/src/stdio/fopencookie.cpp
+++ b/libc/src/stdio/fopencookie.cpp
@@ -43,16 +43,16 @@ FileIOResult CookieFile::cookie_write(File *f, const void *data, size_t size) {
   auto cookie_file = reinterpret_cast<CookieFile *>(f);
   if (cookie_file->ops.write == nullptr)
     return 0;
-  return cookie_file->ops.write(cookie_file->cookie,
-                                reinterpret_cast<const char *>(data), size);
+  return static_cast<size_t>(cookie_file->ops.write(
+      cookie_file->cookie, reinterpret_cast<const char *>(data), size));
 }
 
 FileIOResult CookieFile::cookie_read(File *f, void *data, size_t size) {
   auto cookie_file = reinterpret_cast<CookieFile *>(f);
   if (cookie_file->ops.read == nullptr)
     return 0;
-  return cookie_file->ops.read(cookie_file->cookie,
-                               reinterpret_cast<char *>(data), size);
+  return static_cast<size_t>(cookie_file->ops.read(
+      cookie_file->cookie, reinterpret_cast<char *>(data), size));
 }
 
 ErrorOr<off_t> CookieFile::cookie_seek(File *f, off_t offset, int whence) {
diff --git a/libc/src/sys/auxv/linux/getauxval.cpp b/libc/src/sys/auxv/linux/getauxval.cpp
index 2ca894d091532..bfa6b23b5ef91 100644
--- a/libc/src/sys/auxv/linux/getauxval.cpp
+++ b/libc/src/sys/auxv/linux/getauxval.cpp
@@ -155,7 +155,7 @@ static void initialize_auxv_once(void) {
 
 static AuxEntry read_entry(int fd) {
   AuxEntry buf;
-  ssize_t size = sizeof(AuxEntry);
+  size_t size = sizeof(AuxEntry);
   char *ptr = reinterpret_cast<char *>(&buf);
   while (size > 0) {
     ssize_t ret = read(fd, ptr, size);
diff --git a/libc/src/sys/epoll/linux/epoll_pwait2.cpp b/libc/src/sys/epoll/linux/epoll_pwait2.cpp
index 14b419399fe9b..4123157d29fff 100644
--- a/libc/src/sys/epoll/linux/epoll_pwait2.cpp
+++ b/libc/src/sys/epoll/linux/epoll_pwait2.cpp
@@ -25,10 +25,22 @@ namespace LIBC_NAMESPACE_DECL {
 LLVM_LIBC_FUNCTION(int, epoll_pwait2,
                    (int epfd, struct epoll_event *events, int maxevents,
                     const struct timespec *timeout, const sigset_t *sigmask)) {
+#ifdef SYS_epoll_pwait2
   int ret = LIBC_NAMESPACE::syscall_impl<int>(
       SYS_epoll_pwait2, epfd, reinterpret_cast<long>(events), maxevents,
       reinterpret_cast<long>(timeout), reinterpret_cast<long>(sigmask),
       NSIG / 8);
+#elif defined(SYS_epoll_pwait)
+  // Convert nanoseconds to milliseconds, rounding up if there are remaining
+  // nanoseconds
+  long timeout_ms = static_cast<long>(timeout->tv_sec * 1000 +
+                                      (timeout->tv_nsec + 999999) / 1000000);
+  int ret = LIBC_NAMESPACE::syscall_impl<int>(
+      SYS_epoll_pwait, epfd, reinterpret_cast<long>(events), maxevents,
+      timeout_ms, reinterpret_cast<long>(sigmask), NSIG / 8);
+#else
+#error "epoll_pwait and epoll_pwait2 syscalls not available."
+#endif
 
   // A negative return value indicates an error with the magnitude of the
   // value being the error code.
diff --git a/libc/src/sys/stat/linux/kernel_statx.h b/libc/src/sys/stat/linux/kernel_statx.h
index f26f0b826ac1e..d0e223aec3e1e 100644
--- a/libc/src/sys/stat/linux/kernel_statx.h
+++ b/libc/src/sys/stat/linux/kernel_statx.h
@@ -80,7 +80,7 @@ LIBC_INLINE int statx(int dirfd, const char *__restrict path, int flags,
     return -ret;
 
   statbuf->st_dev = MKDEV(xbuf.stx_dev_major, xbuf.stx_dev_minor);
-  statbuf->st_ino = xbuf.stx_ino;
+  statbuf->st_ino = static_cast<decltype(statbuf->st_ino)>(xbuf.stx_ino);
   statbuf->st_mode = xbuf.stx_mode;
   statbuf->st_nlink = xbuf.stx_nlink;
   statbuf->st_uid = xbuf.stx_uid;
@@ -94,7 +94,8 @@ LIBC_INLINE int statx(int dirfd, const char *__restrict path, int flags,
   statbuf->st_ctim.tv_sec = xbuf.stx_ctime.tv_sec;
   statbuf->st_ctim.tv_nsec = xbuf.stx_ctime.tv_nsec;
   statbuf->st_blksize = xbuf.stx_blksize;
-  statbuf->st_blocks = xbuf.stx_blocks;
+  statbuf->st_blocks =
+      static_cast<decltype(statbuf->st_blocks)>(xbuf.stx_blocks);
 
   return 0;
 }
diff --git a/libc/src/unistd/CMakeLists.txt b/libc/src/unistd/CMakeLists.txt
index ddafcd7c92f21..ec767128588f6 100644
--- a/libc/src/unistd/CMakeLists.txt
+++ b/libc/src/unistd/CMakeLists.txt
@@ -333,3 +333,13 @@ add_entrypoint_external(
 add_entrypoint_external(
   opterr
 )
+
+add_entrypoint_object(
+  gettid
+  SRCS
+    gettid.cpp
+  HDRS
+    gettid.h
+  DEPENDS
+    libc.src.__support.threads.tid
+)
diff --git a/libc/src/unistd/getpid.h b/libc/src/unistd/getpid.h
index c3c55b0c06b10..5812df0dfecd6 100644
--- a/libc/src/unistd/getpid.h
+++ b/libc/src/unistd/getpid.h
@@ -9,12 +9,12 @@
 #ifndef LLVM_LIBC_SRC_UNISTD_GETPID_H
 #define LLVM_LIBC_SRC_UNISTD_GETPID_H
 
+#include "hdr/types/pid_t.h"
 #include "src/__support/macros/config.h"
-#include <unistd.h>
 
 namespace LIBC_NAMESPACE_DECL {
 
-pid_t getpid();
+pid_t getpid(void);
 
 } // namespace LIBC_NAMESPACE_DECL
 
diff --git a/libc/src/unistd/gettid.cpp b/libc/src/unistd/gettid.cpp
new file mode 100644
index 0000000000000..6d8ed65fb753d
--- /dev/null
+++ b/libc/src/unistd/gettid.cpp
@@ -0,0 +1,17 @@
+//===-- Implementation file for gettid --------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "src/unistd/gettid.h"
+#include "src/__support/common.h"
+#include "src/__support/threads/tid.h"
+
+namespace LIBC_NAMESPACE_DECL {
+
+LLVM_LIBC_FUNCTION(pid_t, gettid, (void)) { return gettid_inline(); }
+
+} // namespace LIBC_NAMESPACE_DECL
diff --git a/libc/src/unistd/gettid.h b/libc/src/unistd/gettid.h
new file mode 100644
index 0000000000000..42283191be49b
--- /dev/null
+++ b/libc/src/unistd/gettid.h
@@ -0,0 +1,21 @@
+//===-- Implementation header for gettid ------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC_UNISTD_GETTID_H
+#define LLVM_LIBC_SRC_UNISTD_GETTID_H
+
+#include "hdr/types/pid_t.h"
+#include "src/__support/common.h"
+
+namespace LIBC_NAMESPACE_DECL {
+
+pid_t gettid(void);
+
+} // namespace LIBC_NAMESPACE_DECL
+
+#endif // LLVM_LIBC_SRC_UNISTD_GETTID_H
diff --git a/libc/src/unistd/linux/CMakeLists.txt b/libc/src/unistd/linux/CMakeLists.txt
index 7e733d7f002c3..651ea60d07a30 100644
--- a/libc/src/unistd/linux/CMakeLists.txt
+++ b/libc/src/unistd/linux/CMakeLists.txt
@@ -101,6 +101,7 @@ add_entrypoint_object(
     libc.include.sys_syscall
     libc.src.__support.threads.fork_callbacks
     libc.src.__support.OSUtil.osutil
+    libc.src.__support.OSUtil.pid
     libc.src.__support.threads.thread
     libc.src.errno.errno
 )
@@ -204,8 +205,7 @@ add_entrypoint_object(
     ../getpid.h
   DEPENDS
     libc.include.unistd
-    libc.include.sys_syscall
-    libc.src.__support.OSUtil.osutil
+    libc.src.__support.OSUtil.pid
 )
 
 add_entrypoint_object(
diff --git a/libc/src/unistd/linux/fork.cpp b/libc/src/unistd/linux/fork.cpp
index 7d47665b16d3f..8fe1881733f34 100644
--- a/libc/src/unistd/linux/fork.cpp
+++ b/libc/src/unistd/linux/fork.cpp
@@ -8,13 +8,14 @@
 
 #include "src/unistd/fork.h"
 
+#include "src/__support/OSUtil/pid.h"
 #include "src/__support/OSUtil/syscall.h" // For internal syscall function.
 #include "src/__support/common.h"
 #include "src/__support/macros/config.h"
 #include "src/__support/threads/fork_callbacks.h"
 #include "src/__support/threads/thread.h" // For thread self object
-
 #include "src/errno/libc_errno.h"
+
 #include <signal.h>      // For SIGCHLD
 #include <sys/syscall.h> // For syscall numbers.
 
@@ -25,6 +26,14 @@ namespace LIBC_NAMESPACE_DECL {
 
 LLVM_LIBC_FUNCTION(pid_t, fork, (void)) {
   invoke_prepare_callbacks();
+
+  // Invalidate tid/pid cache before fork to avoid post fork signal handler from
+  // getting wrong values. gettid() is not async-signal-safe, but let's provide
+  // our best efforts here.
+  pid_t parent_tid = self.get_tid();
+  self.invalidate_tid();
+  ProcessIdentity::start_fork();
+
 #ifdef SYS_fork
   pid_t ret = LIBC_NAMESPACE::syscall_impl<pid_t>(SYS_fork);
 #elif defined(SYS_clone)
@@ -32,15 +41,6 @@ LLVM_LIBC_FUNCTION(pid_t, fork, (void)) {
 #else
 #error "fork and clone syscalls not available."
 #endif
-  if (ret == 0) {
-    // Return value is 0 in the child process.
-    // The child is created with a single thread whose self object will be a
-    // copy of parent process' thread which called fork. So, we have to fix up
-    // the child process' self object with the new process' tid.
-    self.attrib->tid = LIBC_NAMESPACE::syscall_impl<pid_t>(SYS_gettid);
-    invoke_child_callbacks();
-    return 0;
-  }
 
   if (ret < 0) {
     // Error case, a child process was not created.
@@ -48,6 +48,18 @@ LLVM_LIBC_FUNCTION(pid_t, fork, (void)) {
     return -1;
   }
 
+  // Child process
+  if (ret == 0) {
+    self.refresh_tid();
+    ProcessIdentity::refresh_cache();
+    ProcessIdentity::end_fork();
+    invoke_child_callbacks();
+    return 0;
+  }
+
+  // Parent process
+  self.refresh_tid(parent_tid);
+  ProcessIdentity::end_fork();
   invoke_parent_callbacks();
   return ret;
 }
diff --git a/libc/src/unistd/linux/getpid.cpp b/libc/src/unistd/linux/getpid.cpp
index b24c86a15990f..65d6c8a3bea95 100644
--- a/libc/src/unistd/linux/getpid.cpp
+++ b/libc/src/unistd/linux/getpid.cpp
@@ -7,17 +7,10 @@
 //===----------------------------------------------------------------------===//
 
 #include "src/unistd/getpid.h"
-
-#include "src/__support/OSUtil/syscall.h" // For internal syscall function.
+#include "src/__support/OSUtil/pid.h"
 #include "src/__support/common.h"
-#include "src/__support/macros/config.h"
-
-#include <sys/syscall.h> // For syscall numbers.
-
 namespace LIBC_NAMESPACE_DECL {
 
-LLVM_LIBC_FUNCTION(pid_t, getpid, ()) {
-  return LIBC_NAMESPACE::syscall_impl<pid_t>(SYS_getpid);
-}
+LLVM_LIBC_FUNCTION(pid_t, getpid, (void)) { return ProcessIdentity::get(); }
 
 } // namespace LIBC_NAMESPACE_DECL
diff --git a/libc/startup/linux/CMakeLists.txt b/libc/startup/linux/CMakeLists.txt
index 336c5d0f6bfa2..585edf20f65be 100644
--- a/libc/startup/linux/CMakeLists.txt
+++ b/libc/startup/linux/CMakeLists.txt
@@ -101,6 +101,7 @@ add_object_library(
     libc.include.llvm-libc-macros.link_macros
     libc.src.__support.threads.thread
     libc.src.__support.OSUtil.osutil
+    libc.src.__support.OSUtil.pid
     libc.src.stdlib.exit
     libc.src.stdlib.atexit
     libc.src.unistd.environ
diff --git a/libc/startup/linux/do_start.cpp b/libc/startup/linux/do_start.cpp
index 824c0e1cf8f26..4047c06ff25c1 100644
--- a/libc/startup/linux/do_start.cpp
+++ b/libc/startup/linux/do_start.cpp
@@ -7,6 +7,7 @@
 //===----------------------------------------------------------------------===//
 #include "startup/linux/do_start.h"
 #include "include/llvm-libc-macros/link-macros.h"
+#include "src/__support/OSUtil/pid.h"
 #include "src/__support/OSUtil/syscall.h"
 #include "src/__support/macros/config.h"
 #include "src/__support/threads/thread.h"
@@ -127,6 +128,10 @@ static ThreadAttributes main_thread_attrib;
   if (tls.size != 0 && !set_thread_ptr(tls.tp))
     syscall_impl<long>(SYS_exit, 1);
 
+  // Validate process identity cache (TLS needed).
+  ProcessIdentity::refresh_cache();
+  ProcessIdentity::end_fork();
+
   self.attrib = &main_thread_attrib;
   main_thread_attrib.atexit_callback_mgr =
       internal::get_thread_atexit_callback_mgr();
diff --git a/libc/test/integration/src/unistd/CMakeLists.txt b/libc/test/integration/src/unistd/CMakeLists.txt
index 3f18231209512..f50405d0925e2 100644
--- a/libc/test/integration/src/unistd/CMakeLists.txt
+++ b/libc/test/integration/src/unistd/CMakeLists.txt
@@ -31,6 +31,10 @@ add_integration_test(
     libc.src.sys.wait.wait4
     libc.src.sys.wait.waitpid
     libc.src.unistd.fork
+    libc.src.unistd.getpid
+    libc.src.unistd.gettid
+    libc.src.stdlib.exit
+    libc.include.sys_syscall
 )
 
 if((${LIBC_TARGET_OS} STREQUAL "linux") AND (${LIBC_TARGET_ARCHITECTURE_IS_X86}))
diff --git a/libc/test/integration/src/unistd/fork_test.cpp b/libc/test/integration/src/unistd/fork_test.cpp
index 9c9213ed46316..4b82d5f195627 100644
--- a/libc/test/integration/src/unistd/fork_test.cpp
+++ b/libc/test/integration/src/unistd/fork_test.cpp
@@ -6,17 +6,21 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "src/__support/OSUtil/syscall.h"
 #include "src/pthread/pthread_atfork.h"
 #include "src/signal/raise.h"
+#include "src/stdlib/exit.h"
 #include "src/sys/wait/wait.h"
 #include "src/sys/wait/wait4.h"
 #include "src/sys/wait/waitpid.h"
 #include "src/unistd/fork.h"
-
+#include "src/unistd/getpid.h"
+#include "src/unistd/gettid.h"
 #include "test/IntegrationTest/test.h"
 
 #include <errno.h>
 #include <signal.h>
+#include <sys/syscall.h>
 #include <sys/wait.h>
 #include <unistd.h>
 
@@ -140,7 +144,25 @@ void fork_with_atfork_callbacks() {
   ASSERT_NE(child, DONE);
 }
 
+void fork_pid_tid_test() {
+  pid_t pid = fork();
+  ASSERT_TRUE(pid >= 0);
+  ASSERT_EQ(LIBC_NAMESPACE::gettid(),
+            LIBC_NAMESPACE::syscall_impl<pid_t>(SYS_gettid));
+  ASSERT_EQ(LIBC_NAMESPACE::getpid(),
+            LIBC_NAMESPACE::syscall_impl<pid_t>(SYS_getpid));
+
+  if (pid == 0) {
+    LIBC_NAMESPACE::exit(0);
+  } else {
+    int status;
+    LIBC_NAMESPACE::waitpid(pid, &status, 0);
+    ASSERT_EQ(status, 0);
+  }
+}
+
 TEST_MAIN(int argc, char **argv, char **envp) {
+  fork_pid_tid_test();
   fork_and_wait_normal_exit();
   fork_and_wait4_normal_exit();
   fork_and_waitpid_normal_exit();
diff --git a/libc/test/src/__support/File/platform_file_test.cpp b/libc/test/src/__support/File/platform_file_test.cpp
index 469d7500032b9..8aa07219a6527 100644
--- a/libc/test/src/__support/File/platform_file_test.cpp
+++ b/libc/test/src/__support/File/platform_file_test.cpp
@@ -103,7 +103,8 @@ TEST(LlvmLibcPlatformFileTest, CreateAppendSeekAndReadBack) {
   constexpr size_t APPEND_TEXT_SIZE = sizeof(APPEND_TEXT) - 1;
   ASSERT_EQ(file->write(APPEND_TEXT, APPEND_TEXT_SIZE).value, APPEND_TEXT_SIZE);
 
-  ASSERT_EQ(file->seek(-APPEND_TEXT_SIZE, SEEK_END).value(), 0);
+  ASSERT_EQ(file->seek(-static_cast<off_t>(APPEND_TEXT_SIZE), SEEK_END).value(),
+            0);
   char data[APPEND_TEXT_SIZE + 1];
   ASSERT_EQ(file->read(data, APPEND_TEXT_SIZE).value, APPEND_TEXT_SIZE);
   data[APPEND_TEXT_SIZE] = '\0';
diff --git a/libc/test/src/sys/epoll/linux/epoll_create_test.cpp b/libc/test/src/sys/epoll/linux/epoll_create_test.cpp
index fdcdcf8eb4271..9c4bad10c8384 100644
--- a/libc/test/src/sys/epoll/linux/epoll_create_test.cpp
+++ b/libc/test/src/sys/epoll/linux/epoll_create_test.cpp
@@ -10,6 +10,7 @@
 #include "src/unistd/close.h"
 #include "test/UnitTest/ErrnoSetterMatcher.h"
 #include "test/UnitTest/Test.h"
+#include <sys/syscall.h> // For syscall numbers.
 
 using namespace LIBC_NAMESPACE::testing::ErrnoSetterMatcher;
 
@@ -21,6 +22,8 @@ TEST(LlvmLibcEpollCreateTest, Basic) {
   ASSERT_THAT(LIBC_NAMESPACE::close(fd), Succeeds());
 }
 
+#ifdef SYS_epoll_create
 TEST(LlvmLibcEpollCreateTest, Fails) {
   ASSERT_THAT(LIBC_NAMESPACE::epoll_create(0), Fails(EINVAL));
 }
+#endif
diff --git a/libc/test/src/unistd/CMakeLists.txt b/libc/test/src/unistd/CMakeLists.txt
index 332455b791aee..f8292653081f1 100644
--- a/libc/test/src/unistd/CMakeLists.txt
+++ b/libc/test/src/unistd/CMakeLists.txt
@@ -378,6 +378,16 @@ add_libc_unittest(
     libc.src.unistd.getpid
 )
 
+add_libc_unittest(
+  gettid_test
+  SUITE
+    libc_unistd_unittests
+  SRCS
+    gettid_test.cpp
+  DEPENDS
+    libc.src.unistd.gettid
+)
+
 add_libc_unittest(
   getppid_test
   SUITE
diff --git a/libc/test/src/unistd/gettid_test.cpp b/libc/test/src/unistd/gettid_test.cpp
new file mode 100644
index 0000000000000..c2330f4002279
--- /dev/null
+++ b/libc/test/src/unistd/gettid_test.cpp
@@ -0,0 +1,15 @@
+//===-- Unittests for gettid ----------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "src/unistd/gettid.h"
+#include "test/UnitTest/Test.h"
+
+TEST(LlvmLibcGetTidTest, SmokeTest) {
+  // gettid always succeeds. So, we just call it as a smoke test.
+  ASSERT_GT(LIBC_NAMESPACE::gettid(), 0);
+}
diff --git a/libc/utils/gpu/server/rpc_server.cpp b/libc/utils/gpu/server/rpc_server.cpp
index 119539e3cad44..ed23d22f0bc36 100644
--- a/libc/utils/gpu/server/rpc_server.cpp
+++ b/libc/utils/gpu/server/rpc_server.cpp
@@ -108,6 +108,10 @@ void handle_printf(rpc::Server::Port &port) {
       if (cur_section.has_conv && cur_section.conv_name == 's' &&
           cur_section.conv_val_ptr) {
         strs_to_copy[lane].emplace_back(cur_section.conv_val_ptr);
+        // Get the minimum size of the string in the case of padding.
+        char c = '\0';
+        cur_section.conv_val_ptr = &c;
+        convert(&writer, cur_section);
       } else if (cur_section.has_conv) {
         // Ignore conversion errors for the first pass.
         convert(&writer, cur_section);
diff --git a/libcxx/docs/ImplementationDefinedBehavior.rst b/libcxx/docs/ImplementationDefinedBehavior.rst
index 3000bb7cfa468..f0ef733fc2c55 100644
--- a/libcxx/docs/ImplementationDefinedBehavior.rst
+++ b/libcxx/docs/ImplementationDefinedBehavior.rst
@@ -51,6 +51,17 @@ Libc++ determines that a stream is Unicode-capable terminal by:
   <http://eel.is/c++draft/print.fun#7>`_. This function is used for other
   ``std::print`` overloads that don't take an ``ostream&`` argument.
 
+`[sf.cmath] <https://wg21.link/sf.cmath>`_ Mathematical Special Functions: Large indices
+----------------------------------------------------------------------------------------
+
+Most functions within the Mathematical Special Functions section contain integral indices.
+The Standard specifies the result for larger indices as implementation-defined.
+Libc++ pursuits reasonable results by choosing the same formulas as for indices below that threshold.
+E.g.
+
+- ``std::hermite(unsigned n, T x)`` for ``n >= 128``
+
+
 Listed in the index of implementation-defined behavior
 ======================================================
 
diff --git a/libcxx/docs/Status/Cxx17.rst b/libcxx/docs/Status/Cxx17.rst
index d4426afa81638..ad4f8576f03db 100644
--- a/libcxx/docs/Status/Cxx17.rst
+++ b/libcxx/docs/Status/Cxx17.rst
@@ -41,6 +41,7 @@ Paper Status
 .. note::
 
    .. [#note-P0067] P0067: ``std::(to|from)_chars`` for integrals has been available since version 7.0. ``std::to_chars`` for ``float`` and ``double`` since version 14.0 ``std::to_chars`` for ``long double`` uses the implementation for ``double``.
+   .. [#note-P0226] P0226: Progress is tracked `here <https://https://libcxx.llvm.org/Status/SpecialMath.html>`_.
    .. [#note-P0607] P0607: The parts of P0607 that are not done are the ``<regex>`` bits.
    .. [#note-P0154] P0154: The required macros are only implemented as of clang 19.
    .. [#note-P0452] P0452: The changes to ``std::transform_inclusive_scan`` and ``std::transform_exclusive_scan`` have not yet been implemented.
diff --git a/libcxx/docs/Status/Cxx17Papers.csv b/libcxx/docs/Status/Cxx17Papers.csv
index 2e560cfe0d576..6c657d51f5c7e 100644
--- a/libcxx/docs/Status/Cxx17Papers.csv
+++ b/libcxx/docs/Status/Cxx17Papers.csv
@@ -26,7 +26,7 @@
 "`P0013R1 <https://wg21.link/p0013r1>`__","LWG","Logical type traits rev 2","Kona","|Complete|","3.8"
 "","","","","",""
 "`P0024R2 <https://wg21.link/P0024R2>`__","LWG","The Parallelism TS Should be Standardized","Jacksonville","|Partial|",""
-"`P0226R1 <https://wg21.link/P0226R1>`__","LWG","Mathematical Special Functions for C++17","Jacksonville","",""
+"`P0226R1 <https://wg21.link/P0226R1>`__","LWG","Mathematical Special Functions for C++17","Jacksonville","|In Progress| [#note-P0226]_",""
 "`P0220R1 <https://wg21.link/P0220R1>`__","LWG","Adopt Library Fundamentals V1 TS Components for C++17","Jacksonville","|Complete|","16.0"
 "`P0218R1 <https://wg21.link/P0218R1>`__","LWG","Adopt the File System TS for C++17","Jacksonville","|Complete|","7.0"
 "`P0033R1 <https://wg21.link/P0033R1>`__","LWG","Re-enabling shared_from_this","Jacksonville","|Complete|","3.9"
diff --git a/libcxx/docs/Status/FormatPaper.csv b/libcxx/docs/Status/FormatPaper.csv
index f29f1f7ca7487..fb96b1fff30ad 100644
--- a/libcxx/docs/Status/FormatPaper.csv
+++ b/libcxx/docs/Status/FormatPaper.csv
@@ -7,7 +7,7 @@ Section,Description,Dependencies,Assignee,Status,First released version
 `[time.syn] <https://wg21.link/time.syn>`_,"Formatter ``chrono::gps_time<Duration>``",A ``<chrono>`` implementation,Mark de Wever,,,
 `[time.syn] <https://wg21.link/time.syn>`_,"Formatter ``chrono::file_time<Duration>``",,Mark de Wever,|Complete|,17.0
 `[time.syn] <https://wg21.link/time.syn>`_,"Formatter ``chrono::local_time<Duration>``",,Mark de Wever,|Complete|,17.0
-`[time.syn] <https://wg21.link/time.syn>`_,"Formatter ``chrono::local-time-format-t<Duration>``",A ``<chrono>`` implementation,Mark de Wever,,,
+`[time.syn] <https://wg21.link/time.syn>`_,"Formatter ``chrono::local-time-format-t<Duration>``",,,|Nothing To Do|,
 `[time.syn] <https://wg21.link/time.syn>`_,"Formatter ``chrono::day``",,Mark de Wever,|Complete|,16.0
 `[time.syn] <https://wg21.link/time.syn>`_,"Formatter ``chrono::month``",,Mark de Wever,|Complete|,16.0
 `[time.syn] <https://wg21.link/time.syn>`_,"Formatter ``chrono::year``",,Mark de Wever,|Complete|,16.0
@@ -26,7 +26,7 @@ Section,Description,Dependencies,Assignee,Status,First released version
 `[time.syn] <https://wg21.link/time.syn>`_,"Formatter ``chrono::hh_mm_ss<duration<Rep, Period>>``",,Mark de Wever,|Complete|,17.0
 `[time.syn] <https://wg21.link/time.syn>`_,"Formatter ``chrono::sys_info``",,Mark de Wever,|Complete|,19.0
 `[time.syn] <https://wg21.link/time.syn>`_,"Formatter ``chrono::local_info``",,Mark de Wever,|Complete|,19.0
-`[time.syn] <https://wg21.link/time.syn>`_,"Formatter ``chrono::zoned_time<Duration, TimeZonePtr>``",A ``<chrono>`` implementation,Mark de Wever,,
+`[time.syn] <https://wg21.link/time.syn>`_,"Formatter ``chrono::zoned_time<Duration, TimeZonePtr>``",,Mark de Wever,|Complete|,19.0
 
 "`P2693R1 <https://wg21.link/P2693R1>`__","Formatting ``thread::id`` and ``stacktrace``"
 `[thread.thread.id] <https://wg21.link/thread.thread.id>`_,"Formatting ``thread::id``",,Mark de Wever,|Complete|,17.0
diff --git a/libcxx/docs/Status/SpecialMath.rst b/libcxx/docs/Status/SpecialMath.rst
new file mode 100644
index 0000000000000..fcc9f03e3ae64
--- /dev/null
+++ b/libcxx/docs/Status/SpecialMath.rst
@@ -0,0 +1,35 @@
+.. special-math-status:
+
+======================================================
+libc++ Mathematical Special Functions Status (P0226R1)
+======================================================
+
+.. include:: ../Helpers/Styles.rst
+
+.. contents::
+  :local:
+
+Overview
+========
+
+This document contains the status of the C++17 mathematical special functions implementation in libc++.
+It is used to track both the status of the sub-projects of the effort and who is assigned to these sub-projects.
+This avoids duplicating effort.
+
+If you are interested in contributing to this effort, please send a message
+to the #libcxx channel in the LLVM discord. Please *do not* start working
+on any items below that has already been assigned to someone else.
+
+Sub-projects in the Implementation Effort
+=========================================
+
+.. csv-table::
+  :file: SpecialMathProjects.csv
+  :header-rows: 1
+  :widths: auto
+
+Paper and Issue Status
+======================
+
+The underlying paper is `Mathematical Special Functions for C++17 (P0226) <https://wg21.link/P0226>`_ and is included in C++17.
+Implementation is *In Progress*.
diff --git a/libcxx/docs/Status/SpecialMathProjects.csv b/libcxx/docs/Status/SpecialMathProjects.csv
new file mode 100644
index 0000000000000..f964e79de91d3
--- /dev/null
+++ b/libcxx/docs/Status/SpecialMathProjects.csv
@@ -0,0 +1,22 @@
+Section,Description,Assignee,Complete
+| `[sf.cmath.assoc.laguerre] <https://wg21.link/sf.cmath.assoc.laguerre>`_, std::assoc_laguerre, None, |Not Started|
+| `[sf.cmath.assoc.legendre] <https://wg21.link/sf.cmath.assoc.legendre>`_, std::assoc_legendre, None, |Not Started|
+| `[sf.cmath.beta] <https://wg21.link/sf.cmath.beta>`_, std::beta, None, |Not Started|
+| `[sf.cmath.comp.ellint.1] <https://wg21.link/sf.cmath.comp.ellint.1>`_, std::comp_ellint_1, None, |Not Started|
+| `[sf.cmath.comp.ellint.2] <https://wg21.link/sf.cmath.comp.ellint.2>`_, std::comp_ellint_2, None, |Not Started|
+| `[sf.cmath.comp.ellint.3] <https://wg21.link/sf.cmath.comp.ellint.3>`_, std::comp_ellint_3, None, |Not Started|
+| `[sf.cmath.cyl.bessel.i] <https://wg21.link/sf.cmath.cyl.bessel.i>`_, std::cyl_bessel_i, None, |Not Started|
+| `[sf.cmath.cyl.bessel.j] <https://wg21.link/sf.cmath.cyl.bessel.j>`_, std::cyl_bessel_j, None, |Not Started|
+| `[sf.cmath.cyl.bessel.k] <https://wg21.link/sf.cmath.cyl.bessel.k>`_, std::cyl_bessel_k, None, |Not Started|
+| `[sf.cmath.cyl.neumann] <https://wg21.link/sf.cmath.cyl.neumann>`_, std::cyl_neumann, None, |Not Started|
+| `[sf.cmath.ellint.1] <https://wg21.link/sf.cmath.ellint.1>`_, std::ellint_1, None, |Not Started|
+| `[sf.cmath.ellint.2] <https://wg21.link/sf.cmath.ellint.2>`_, std::ellint_2, None, |Not Started|
+| `[sf.cmath.ellint.3] <https://wg21.link/sf.cmath.ellint.3>`_, std::ellint_3, None, |Not Started|
+| `[sf.cmath.expint] <https://wg21.link/sf.cmath.expint>`_, std::expint, None, |Not Started|
+| `[sf.cmath.hermite] <https://wg21.link/sf.cmath.hermite>`_, std::hermite, Paul Xi Cao, |Complete|
+| `[sf.cmath.laguerre] <https://wg21.link/sf.cmath.laguerre>`_, std::laguerre, None, |Not Started|
+| `[sf.cmath.legendre] <https://wg21.link/sf.cmath.legendre>`_, std::legendre, None, |Not Started|
+| `[sf.cmath.riemann.zeta] <https://wg21.link/sf.cmath.riemann.zeta>`_, std::riemann_zeta, None, |Not Started|
+| `[sf.cmath.sph.bessel] <https://wg21.link/sf.cmath.sph.bessel>`_, std::sph_bessel, None, |Not Started|
+| `[sf.cmath.sph.legendre] <https://wg21.link/sf.cmath.sph.legendre>`_, std::sph_legendre, None, |Not Started|
+| `[sf.cmath.sph.neumann] <https://wg21.link/sf.cmath.sph.neumann>`_, std::sph_neumann, None, |Not Started|
diff --git a/libcxx/docs/index.rst b/libcxx/docs/index.rst
index 69a9e575cfe7c..4bca3ccc8fa06 100644
--- a/libcxx/docs/index.rst
+++ b/libcxx/docs/index.rst
@@ -53,6 +53,7 @@ Getting Started with libc++
    Status/PSTL
    Status/Ranges
    Status/Spaceship
+   Status/SpecialMath
    Status/Zip
 
 
diff --git a/libcxx/include/CMakeLists.txt b/libcxx/include/CMakeLists.txt
index 1a4d9c7070f14..32579272858a8 100644
--- a/libcxx/include/CMakeLists.txt
+++ b/libcxx/include/CMakeLists.txt
@@ -509,6 +509,7 @@ set(files
   __math/remainder.h
   __math/roots.h
   __math/rounding_functions.h
+  __math/special_functions.h
   __math/traits.h
   __math/trigonometric_functions.h
   __mbstate_t.h
diff --git a/libcxx/include/__chrono/convert_to_tm.h b/libcxx/include/__chrono/convert_to_tm.h
index 881a4970822d8..3a51019b80784 100644
--- a/libcxx/include/__chrono/convert_to_tm.h
+++ b/libcxx/include/__chrono/convert_to_tm.h
@@ -29,11 +29,13 @@
 #include <__chrono/year_month.h>
 #include <__chrono/year_month_day.h>
 #include <__chrono/year_month_weekday.h>
+#include <__chrono/zoned_time.h>
 #include <__concepts/same_as.h>
 #include <__config>
 #include <__format/format_error.h>
 #include <__memory/addressof.h>
 #include <__type_traits/is_convertible.h>
+#include <__type_traits/is_specialization.h>
 #include <cstdint>
 #include <ctime>
 #include <limits>
@@ -178,7 +180,13 @@ _LIBCPP_HIDE_FROM_ABI _Tm __convert_to_tm(const _ChronoT& __value) {
     // Has no time information.
   } else if constexpr (same_as<_ChronoT, chrono::local_info>) {
     // Has no time information.
-#  endif
+#    if !defined(_LIBCPP_HAS_NO_TIME_ZONE_DATABASE) && !defined(_LIBCPP_HAS_NO_FILESYSTEM) &&                          \
+        !defined(_LIBCPP_HAS_NO_LOCALIZATION)
+  } else if constexpr (__is_specialization_v<_ChronoT, chrono::zoned_time>) {
+    return std::__convert_to_tm<_Tm>(
+        chrono::sys_time<typename _ChronoT::duration>{__value.get_local_time().time_since_epoch()});
+#    endif
+#  endif // !defined(_LIBCPP_HAS_NO_EXPERIMENTAL_TZDB)
   } else
     static_assert(sizeof(_ChronoT) == 0, "Add the missing type specialization");
 
diff --git a/libcxx/include/__chrono/formatter.h b/libcxx/include/__chrono/formatter.h
index 9a77316385abd..449c415e95760 100644
--- a/libcxx/include/__chrono/formatter.h
+++ b/libcxx/include/__chrono/formatter.h
@@ -33,6 +33,7 @@
 #include <__chrono/year_month.h>
 #include <__chrono/year_month_day.h>
 #include <__chrono/year_month_weekday.h>
+#include <__chrono/zoned_time.h>
 #include <__concepts/arithmetic.h>
 #include <__concepts/same_as.h>
 #include <__config>
@@ -44,6 +45,7 @@
 #include <__format/parser_std_format_spec.h>
 #include <__format/write_escaped.h>
 #include <__memory/addressof.h>
+#include <__type_traits/is_specialization.h>
 #include <cmath>
 #include <ctime>
 #include <limits>
@@ -137,10 +139,24 @@ __format_sub_seconds(basic_stringstream<_CharT>& __sstr, const chrono::hh_mm_ss<
                    __value.fractional_width);
 }
 
+#  if !defined(_LIBCPP_HAS_NO_EXPERIMENTAL_TZDB) && !defined(_LIBCPP_HAS_NO_TIME_ZONE_DATABASE) &&                     \
+      !defined(_LIBCPP_HAS_NO_FILESYSTEM) && !defined(_LIBCPP_HAS_NO_LOCALIZATION)
+template <class _CharT, class _Duration, class _TimeZonePtr>
+_LIBCPP_HIDE_FROM_ABI void
+__format_sub_seconds(basic_stringstream<_CharT>& __sstr, const chrono::zoned_time<_Duration, _TimeZonePtr>& __value) {
+  __formatter::__format_sub_seconds(__sstr, __value.get_local_time().time_since_epoch());
+}
+#  endif
+
 template <class _Tp>
 consteval bool __use_fraction() {
   if constexpr (__is_time_point<_Tp>)
     return chrono::hh_mm_ss<typename _Tp::duration>::fractional_width;
+#  if !defined(_LIBCPP_HAS_NO_EXPERIMENTAL_TZDB) && !defined(_LIBCPP_HAS_NO_TIME_ZONE_DATABASE) &&                     \
+      !defined(_LIBCPP_HAS_NO_FILESYSTEM) && !defined(_LIBCPP_HAS_NO_LOCALIZATION)
+  else if constexpr (__is_specialization_v<_Tp, chrono::zoned_time>)
+    return chrono::hh_mm_ss<typename _Tp::duration>::fractional_width;
+#  endif
   else if constexpr (chrono::__is_duration<_Tp>::value)
     return chrono::hh_mm_ss<_Tp>::fractional_width;
   else if constexpr (__is_hh_mm_ss<_Tp>)
@@ -212,8 +228,13 @@ _LIBCPP_HIDE_FROM_ABI __time_zone __convert_to_time_zone([[maybe_unused]] const
 #  if !defined(_LIBCPP_HAS_NO_EXPERIMENTAL_TZDB)
   if constexpr (same_as<_Tp, chrono::sys_info>)
     return {__value.abbrev, __value.offset};
+#    if !defined(_LIBCPP_HAS_NO_TIME_ZONE_DATABASE) && !defined(_LIBCPP_HAS_NO_FILESYSTEM) &&                          \
+        !defined(_LIBCPP_HAS_NO_LOCALIZATION)
+  else if constexpr (__is_specialization_v<_Tp, chrono::zoned_time>)
+    return __formatter::__convert_to_time_zone(__value.get_info());
+#    endif
   else
-#  endif
+#  endif // !defined(_LIBCPP_HAS_NO_EXPERIMENTAL_TZDB)
     return {"UTC", chrono::seconds{0}};
 }
 
@@ -426,7 +447,12 @@ _LIBCPP_HIDE_FROM_ABI constexpr bool __weekday_ok(const _Tp& __value) {
     return true;
   else if constexpr (same_as<_Tp, chrono::local_info>)
     return true;
-#  endif
+#    if !defined(_LIBCPP_HAS_NO_TIME_ZONE_DATABASE) && !defined(_LIBCPP_HAS_NO_FILESYSTEM) &&                          \
+        !defined(_LIBCPP_HAS_NO_LOCALIZATION)
+  else if constexpr (__is_specialization_v<_Tp, chrono::zoned_time>)
+    return true;
+#    endif
+#  endif // !defined(_LIBCPP_HAS_NO_EXPERIMENTAL_TZDB)
   else
     static_assert(sizeof(_Tp) == 0, "Add the missing type specialization");
 }
@@ -472,7 +498,12 @@ _LIBCPP_HIDE_FROM_ABI constexpr bool __weekday_name_ok(const _Tp& __value) {
     return true;
   else if constexpr (same_as<_Tp, chrono::local_info>)
     return true;
-#  endif
+#    if !defined(_LIBCPP_HAS_NO_TIME_ZONE_DATABASE) && !defined(_LIBCPP_HAS_NO_FILESYSTEM) &&                          \
+        !defined(_LIBCPP_HAS_NO_LOCALIZATION)
+  else if constexpr (__is_specialization_v<_Tp, chrono::zoned_time>)
+    return true;
+#    endif
+#  endif // !defined(_LIBCPP_HAS_NO_EXPERIMENTAL_TZDB)
   else
     static_assert(sizeof(_Tp) == 0, "Add the missing type specialization");
 }
@@ -518,7 +549,12 @@ _LIBCPP_HIDE_FROM_ABI constexpr bool __date_ok(const _Tp& __value) {
     return true;
   else if constexpr (same_as<_Tp, chrono::local_info>)
     return true;
-#  endif
+#    if !defined(_LIBCPP_HAS_NO_TIME_ZONE_DATABASE) && !defined(_LIBCPP_HAS_NO_FILESYSTEM) &&                          \
+        !defined(_LIBCPP_HAS_NO_LOCALIZATION)
+  else if constexpr (__is_specialization_v<_Tp, chrono::zoned_time>)
+    return true;
+#    endif
+#  endif // !defined(_LIBCPP_HAS_NO_EXPERIMENTAL_TZDB)
   else
     static_assert(sizeof(_Tp) == 0, "Add the missing type specialization");
 }
@@ -564,7 +600,12 @@ _LIBCPP_HIDE_FROM_ABI constexpr bool __month_name_ok(const _Tp& __value) {
     return true;
   else if constexpr (same_as<_Tp, chrono::local_info>)
     return true;
-#  endif
+#    if !defined(_LIBCPP_HAS_NO_TIME_ZONE_DATABASE) && !defined(_LIBCPP_HAS_NO_FILESYSTEM) &&                          \
+        !defined(_LIBCPP_HAS_NO_LOCALIZATION)
+  else if constexpr (__is_specialization_v<_Tp, chrono::zoned_time>)
+    return true;
+#    endif
+#  endif // !defined(_LIBCPP_HAS_NO_EXPERIMENTAL_TZDB)
   else
     static_assert(sizeof(_Tp) == 0, "Add the missing type specialization");
 }
@@ -924,7 +965,23 @@ struct formatter<chrono::local_info, _CharT> : public __formatter_chrono<_CharT>
     return _Base::__parse(__ctx, __format_spec::__fields_chrono, __format_spec::__flags{});
   }
 };
-#  endif // !defined(_LIBCPP_HAS_NO_EXPERIMENTAL_TZDB)
+#    if !defined(_LIBCPP_HAS_NO_TIME_ZONE_DATABASE) && !defined(_LIBCPP_HAS_NO_FILESYSTEM) &&                          \
+        !defined(_LIBCPP_HAS_NO_LOCALIZATION)
+// Note due to how libc++'s formatters are implemented there is no need to add
+// the exposition only local-time-format-t abstraction.
+template <class _Duration, class _TimeZonePtr, __fmt_char_type _CharT>
+struct formatter<chrono::zoned_time<_Duration, _TimeZonePtr>, _CharT> : public __formatter_chrono<_CharT> {
+public:
+  using _Base = __formatter_chrono<_CharT>;
+
+  template <class _ParseContext>
+  _LIBCPP_HIDE_FROM_ABI constexpr typename _ParseContext::iterator parse(_ParseContext& __ctx) {
+    return _Base::__parse(__ctx, __format_spec::__fields_chrono, __format_spec::__flags::__clock);
+  }
+};
+#    endif // !defined(_LIBCPP_HAS_NO_TIME_ZONE_DATABASE) && !defined(_LIBCPP_HAS_NO_FILESYSTEM) &&
+           // !defined(_LIBCPP_HAS_NO_LOCALIZATION)
+#  endif   // !defined(_LIBCPP_HAS_NO_EXPERIMENTAL_TZDB)
 
 #endif // if _LIBCPP_STD_VER >= 20
 
diff --git a/libcxx/include/__chrono/ostream.h b/libcxx/include/__chrono/ostream.h
index bb0341bc3ec63..e6c43254eea15 100644
--- a/libcxx/include/__chrono/ostream.h
+++ b/libcxx/include/__chrono/ostream.h
@@ -27,6 +27,7 @@
 #include <__chrono/year_month.h>
 #include <__chrono/year_month_day.h>
 #include <__chrono/year_month_weekday.h>
+#include <__chrono/zoned_time.h>
 #include <__concepts/same_as.h>
 #include <__config>
 #include <__format/format_functions.h>
@@ -302,6 +303,14 @@ operator<<(basic_ostream<_CharT, _Traits>& __os, const local_info& __info) {
              _LIBCPP_STATICALLY_WIDEN(_CharT, "{}: {{{}, {}}}"), __result(), __info.first, __info.second);
 }
 
+#    if !defined(_LIBCPP_HAS_NO_TIME_ZONE_DATABASE) && !defined(_LIBCPP_HAS_NO_FILESYSTEM) &&                          \
+        !defined(_LIBCPP_HAS_NO_LOCALIZATION)
+template <class _CharT, class _Traits, class _Duration, class _TimeZonePtr>
+_LIBCPP_HIDE_FROM_ABI basic_ostream<_CharT, _Traits>&
+operator<<(basic_ostream<_CharT, _Traits>& __os, const zoned_time<_Duration, _TimeZonePtr>& __tp) {
+  return __os << std::format(__os.getloc(), _LIBCPP_STATICALLY_WIDEN(_CharT, "{:L%F %T %Z}"), __tp);
+}
+#    endif
 #  endif // !defined(_LIBCPP_HAS_NO_EXPERIMENTAL_TZDB)
 
 } // namespace chrono
diff --git a/libcxx/include/__configuration/abi.h b/libcxx/include/__configuration/abi.h
index cbde7887becf1..710548d90a649 100644
--- a/libcxx/include/__configuration/abi.h
+++ b/libcxx/include/__configuration/abi.h
@@ -98,6 +98,10 @@
 // and WCHAR_MAX. This ABI setting determines whether we should instead track whether the fill
 // value has been initialized using a separate boolean, which changes the ABI.
 #  define _LIBCPP_ABI_IOS_ALLOW_ARBITRARY_FILL_VALUE
+// Make a std::pair of trivially copyable types trivially copyable.
+// While this technically doesn't change the layout of pair itself, other types may decide to programatically change
+// their representation based on whether something is trivially copyable.
+#  define _LIBCPP_ABI_TRIVIALLY_COPYABLE_PAIR
 #elif _LIBCPP_ABI_VERSION == 1
 #  if !(defined(_LIBCPP_OBJECT_FORMAT_COFF) || defined(_LIBCPP_OBJECT_FORMAT_XCOFF))
 // Enable compiling copies of now inline methods into the dylib to support
diff --git a/libcxx/include/__math/special_functions.h b/libcxx/include/__math/special_functions.h
new file mode 100644
index 0000000000000..0b1c753a659ad
--- /dev/null
+++ b/libcxx/include/__math/special_functions.h
@@ -0,0 +1,84 @@
+// -*- C++ -*-
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef _LIBCPP___MATH_SPECIAL_FUNCTIONS_H
+#define _LIBCPP___MATH_SPECIAL_FUNCTIONS_H
+
+#include <__config>
+#include <__math/copysign.h>
+#include <__math/traits.h>
+#include <__type_traits/enable_if.h>
+#include <__type_traits/is_integral.h>
+#include <limits>
+
+#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
+#  pragma GCC system_header
+#endif
+
+_LIBCPP_BEGIN_NAMESPACE_STD
+
+#if _LIBCPP_STD_VER >= 17
+
+template <class _Real>
+_LIBCPP_HIDE_FROM_ABI _Real __hermite(unsigned __n, _Real __x) {
+  // The Hermite polynomial H_n(x).
+  // The implementation is based on the recurrence formula: H_{n+1}(x) = 2x H_n(x) - 2n H_{n-1}.
+  // Press, William H., et al. Numerical recipes 3rd edition: The art of scientific computing.
+  // Cambridge university press, 2007, p. 183.
+
+  // NOLINTBEGIN(readability-identifier-naming)
+  if (__math::isnan(__x))
+    return __x;
+
+  _Real __H_0{1};
+  if (__n == 0)
+    return __H_0;
+
+  _Real __H_n_prev = __H_0;
+  _Real __H_n      = 2 * __x;
+  for (unsigned __i = 1; __i < __n; ++__i) {
+    _Real __H_n_next = 2 * (__x * __H_n - __i * __H_n_prev);
+    __H_n_prev       = __H_n;
+    __H_n            = __H_n_next;
+  }
+
+  if (!__math::isfinite(__H_n)) {
+    // Overflow occured. Two possible cases:
+    //    n is odd:  return infinity of the same sign as x.
+    //    n is even: return +Inf
+    _Real __inf = std::numeric_limits<_Real>::infinity();
+    return (__n & 1) ? __math::copysign(__inf, __x) : __inf;
+  }
+  return __H_n;
+  // NOLINTEND(readability-identifier-naming)
+}
+
+inline _LIBCPP_HIDE_FROM_ABI double hermite(unsigned __n, double __x) { return std::__hermite(__n, __x); }
+
+inline _LIBCPP_HIDE_FROM_ABI float hermite(unsigned __n, float __x) {
+  // use double internally -- float is too prone to overflow!
+  return static_cast<float>(std::hermite(__n, static_cast<double>(__x)));
+}
+
+inline _LIBCPP_HIDE_FROM_ABI long double hermite(unsigned __n, long double __x) { return std::__hermite(__n, __x); }
+
+inline _LIBCPP_HIDE_FROM_ABI float hermitef(unsigned __n, float __x) { return std::hermite(__n, __x); }
+
+inline _LIBCPP_HIDE_FROM_ABI long double hermitel(unsigned __n, long double __x) { return std::hermite(__n, __x); }
+
+template <class _Integer, std::enable_if_t<std::is_integral_v<_Integer>, int> = 0>
+_LIBCPP_HIDE_FROM_ABI double hermite(unsigned __n, _Integer __x) {
+  return std::hermite(__n, static_cast<double>(__x));
+}
+
+#endif // _LIBCPP_STD_VER >= 17
+
+_LIBCPP_END_NAMESPACE_STD
+
+#endif // _LIBCPP___MATH_SPECIAL_FUNCTIONS_H
diff --git a/libcxx/include/__type_traits/datasizeof.h b/libcxx/include/__type_traits/datasizeof.h
index 35c12921e8ffa..a27baf67cc2d8 100644
--- a/libcxx/include/__type_traits/datasizeof.h
+++ b/libcxx/include/__type_traits/datasizeof.h
@@ -54,6 +54,7 @@ struct _FirstPaddingByte<_Tp, true> {
 // the use as an extension.
 _LIBCPP_DIAGNOSTIC_PUSH
 _LIBCPP_CLANG_DIAGNOSTIC_IGNORED("-Winvalid-offsetof")
+_LIBCPP_GCC_DIAGNOSTIC_IGNORED("-Winvalid-offsetof")
 template <class _Tp>
 inline const size_t __datasizeof_v = offsetof(_FirstPaddingByte<_Tp>, __first_padding_byte_);
 _LIBCPP_DIAGNOSTIC_POP
diff --git a/libcxx/include/__utility/pair.h b/libcxx/include/__utility/pair.h
index 0afbebcdc9f2a..c0002b7abb3ca 100644
--- a/libcxx/include/__utility/pair.h
+++ b/libcxx/include/__utility/pair.h
@@ -32,6 +32,7 @@
 #include <__type_traits/is_implicitly_default_constructible.h>
 #include <__type_traits/is_nothrow_assignable.h>
 #include <__type_traits/is_nothrow_constructible.h>
+#include <__type_traits/is_reference.h>
 #include <__type_traits/is_same.h>
 #include <__type_traits/is_swappable.h>
 #include <__type_traits/is_trivially_relocatable.h>
@@ -80,6 +81,38 @@ struct _LIBCPP_TEMPLATE_VIS pair
   _LIBCPP_HIDE_FROM_ABI pair(pair const&) = default;
   _LIBCPP_HIDE_FROM_ABI pair(pair&&)      = default;
 
+  // When we are requested for pair to be trivially copyable by the ABI macro, we use defaulted members
+  // if it is both legal to do it (i.e. no references) and we have a way to actually implement it, which requires
+  // the __enable_if__ attribute before C++20.
+#ifdef _LIBCPP_ABI_TRIVIALLY_COPYABLE_PAIR
+  // FIXME: This should really just be a static constexpr variable. It's in a struct to avoid gdb printing the value
+  // when printing a pair
+  struct __has_defaulted_members {
+    static const bool value = !is_reference<first_type>::value && !is_reference<second_type>::value;
+  };
+#  if _LIBCPP_STD_VER >= 20
+  _LIBCPP_HIDE_FROM_ABI constexpr pair& operator=(const pair&)
+    requires __has_defaulted_members::value
+  = default;
+
+  _LIBCPP_HIDE_FROM_ABI constexpr pair& operator=(pair&&)
+    requires __has_defaulted_members::value
+  = default;
+#  elif __has_attribute(__enable_if__)
+  _LIBCPP_HIDE_FROM_ABI pair& operator=(const pair&)
+      __attribute__((__enable_if__(__has_defaulted_members::value, ""))) = default;
+
+  _LIBCPP_HIDE_FROM_ABI pair& operator=(pair&&)
+      __attribute__((__enable_if__(__has_defaulted_members::value, ""))) = default;
+#  else
+#    error "_LIBCPP_ABI_TRIVIALLY_COPYABLE_PAIR isn't supported with this compiler"
+#  endif
+#else
+  struct __has_defaulted_members {
+    static const bool value = false;
+  };
+#endif // defined(_LIBCPP_ABI_TRIVIALLY_COPYABLE_PAIR) && __has_attribute(__enable_if__)
+
 #ifdef _LIBCPP_CXX03_LANG
   _LIBCPP_HIDE_FROM_ABI pair() : first(), second() {}
 
@@ -225,7 +258,8 @@ struct _LIBCPP_TEMPLATE_VIS pair
              typename __make_tuple_indices<sizeof...(_Args2) >::type()) {}
 
   _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 pair&
-  operator=(__conditional_t<is_copy_assignable<first_type>::value && is_copy_assignable<second_type>::value,
+  operator=(__conditional_t<!__has_defaulted_members::value && is_copy_assignable<first_type>::value &&
+                                is_copy_assignable<second_type>::value,
                             pair,
                             __nat> const& __p) noexcept(is_nothrow_copy_assignable<first_type>::value &&
                                                         is_nothrow_copy_assignable<second_type>::value) {
@@ -234,10 +268,12 @@ struct _LIBCPP_TEMPLATE_VIS pair
     return *this;
   }
 
-  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 pair& operator=(
-      __conditional_t<is_move_assignable<first_type>::value && is_move_assignable<second_type>::value, pair, __nat>&&
-          __p) noexcept(is_nothrow_move_assignable<first_type>::value &&
-                        is_nothrow_move_assignable<second_type>::value) {
+  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 pair&
+  operator=(__conditional_t<!__has_defaulted_members::value && is_move_assignable<first_type>::value &&
+                                is_move_assignable<second_type>::value,
+                            pair,
+                            __nat>&& __p) noexcept(is_nothrow_move_assignable<first_type>::value &&
+                                                   is_nothrow_move_assignable<second_type>::value) {
     first  = std::forward<first_type>(__p.first);
     second = std::forward<second_type>(__p.second);
     return *this;
diff --git a/libcxx/include/chrono b/libcxx/include/chrono
index 7f25c76fda542..990c415ec2e97 100644
--- a/libcxx/include/chrono
+++ b/libcxx/include/chrono
@@ -799,6 +799,11 @@ template<class Duration1, class Duration2, class TimeZonePtr>
   bool operator==(const zoned_time<Duration1, TimeZonePtr>& x,
                   const zoned_time<Duration2, TimeZonePtr>& y);
 
+template<class charT, class traits, class Duration, class TimeZonePtr>           // C++20
+  basic_ostream<charT, traits>&
+    operator<<(basic_ostream<charT, traits>& os,
+               const zoned_time<Duration, TimeZonePtr>& t);
+
 // [time.zone.leap], leap second support
 class leap_second {                                                              // C++20
 public:
@@ -881,6 +886,8 @@ namespace std {
     struct formatter<chrono::hh_mm_ss<duration<Rep, Period>>, charT>;             // C++20
   template<class charT> struct formatter<chrono::sys_info, charT>;                // C++20
   template<class charT> struct formatter<chrono::local_info, charT>;              // C++20
+  template<class Duration, class TimeZonePtr, class charT>                        // C++20
+    struct formatter<chrono::zoned_time<Duration, TimeZonePtr>, charT>;
 } // namespace std
 
 namespace chrono {
diff --git a/libcxx/include/cmath b/libcxx/include/cmath
index 7a87e35c84603..3c22604a683c3 100644
--- a/libcxx/include/cmath
+++ b/libcxx/include/cmath
@@ -204,6 +204,14 @@ floating_point fmin (arithmetic x, arithmetic y);
 float          fminf(float x, float y);
 long double    fminl(long double x, long double y);
 
+double         hermite(unsigned n, double x);                    // C++17
+float          hermite(unsigned n, float x);                     // C++17
+long double    hermite(unsigned n, long double x);               // C++17
+float          hermitef(unsigned n, float x);                    // C++17
+long double    hermitel(unsigned n, long double x);              // C++17
+template <class Integer>
+double         hermite(unsigned n, Integer x);                   // C++17
+
 floating_point hypot (arithmetic x, arithmetic y);
 float          hypotf(float x, float y);
 long double    hypotl(long double x, long double y);
@@ -315,6 +323,7 @@ constexpr long double lerp(long double a, long double b, long double t) noexcept
 #include <limits>
 #include <version>
 
+#include <__math/special_functions.h>
 #include <math.h>
 
 #ifndef _LIBCPP_MATH_H
diff --git a/libcxx/include/module.modulemap b/libcxx/include/module.modulemap
index 3443fbc3347a3..13d0dce34d97e 100644
--- a/libcxx/include/module.modulemap
+++ b/libcxx/include/module.modulemap
@@ -1485,6 +1485,7 @@ module std_private_math_modulo                          [system] { header "__mat
 module std_private_math_remainder                       [system] { header "__math/remainder.h" }
 module std_private_math_roots                           [system] { header "__math/roots.h" }
 module std_private_math_rounding_functions              [system] { header "__math/rounding_functions.h" }
+module std_private_math_special_functions               [system] { header "__math/special_functions.h" }
 module std_private_math_traits                          [system] { header "__math/traits.h" }
 module std_private_math_trigonometric_functions         [system] { header "__math/trigonometric_functions.h" }
 
diff --git a/libcxx/include/vector b/libcxx/include/vector
index aaf51d18fe30f..45980043a3c15 100644
--- a/libcxx/include/vector
+++ b/libcxx/include/vector
@@ -1443,7 +1443,11 @@ _LIBCPP_CONSTEXPR_SINCE_CXX20 void vector<_Tp, _Allocator>::shrink_to_fit() _NOE
 #endif // _LIBCPP_HAS_NO_EXCEPTIONS
       allocator_type& __a = this->__alloc();
       __split_buffer<value_type, allocator_type&> __v(size(), size(), __a);
-      __swap_out_circular_buffer(__v);
+      // The Standard mandates shrink_to_fit() does not increase the capacity.
+      // With equal capacity keep the existing buffer. This avoids extra work
+      // due to swapping the elements.
+      if (__v.capacity() < capacity())
+        __swap_out_circular_buffer(__v);
 #ifndef _LIBCPP_HAS_NO_EXCEPTIONS
     } catch (...) {
     }
diff --git a/libcxx/modules/std/cmath.inc b/libcxx/modules/std/cmath.inc
index a463c1e3ccf86..fe8ac773c9d1c 100644
--- a/libcxx/modules/std/cmath.inc
+++ b/libcxx/modules/std/cmath.inc
@@ -334,12 +334,14 @@ export namespace std {
   using std::expint;
   using std::expintf;
   using std::expintl;
+#endif
 
   // [sf.cmath.hermite], Hermite polynomials
   using std::hermite;
   using std::hermitef;
   using std::hermitel;
 
+#if 0
   // [sf.cmath.laguerre], Laguerre polynomials
   using std::laguerre;
   using std::laguerref;
diff --git a/libcxx/test/libcxx/utilities/utility/pairs/pairs.pair/abi.trivial_copy_move.pass.cpp b/libcxx/test/libcxx/utilities/utility/pairs/pairs.pair/abi.trivial_copy_move.pass.cpp
index 3ec60c08b8eab..5481ba443046d 100644
--- a/libcxx/test/libcxx/utilities/utility/pairs/pairs.pair/abi.trivial_copy_move.pass.cpp
+++ b/libcxx/test/libcxx/utilities/utility/pairs/pairs.pair/abi.trivial_copy_move.pass.cpp
@@ -162,8 +162,13 @@ void test_trivial()
         static_assert(!std::is_trivially_copy_constructible<P>::value, "");
         static_assert(!std::is_trivially_move_constructible<P>::value, "");
 #endif // TEST_STD_VER >= 11
+#ifndef _LIBCPP_ABI_TRIVIALLY_COPYABLE_PAIR
         static_assert(!std::is_trivially_copy_assignable<P>::value, "");
         static_assert(!std::is_trivially_move_assignable<P>::value, "");
+#else
+        static_assert(std::is_trivially_copy_assignable<P>::value, "");
+        static_assert(std::is_trivially_move_assignable<P>::value, "");
+#endif
         static_assert(std::is_trivially_destructible<P>::value, "");
     }
 }
diff --git a/libcxx/test/libcxx/utilities/utility/pairs/pairs.pair/abi.trivially_copyable.compile.pass.cpp b/libcxx/test/libcxx/utilities/utility/pairs/pairs.pair/abi.trivially_copyable.compile.pass.cpp
index 1132b3e5def18..c5f9c8d0f2559 100644
--- a/libcxx/test/libcxx/utilities/utility/pairs/pairs.pair/abi.trivially_copyable.compile.pass.cpp
+++ b/libcxx/test/libcxx/utilities/utility/pairs/pairs.pair/abi.trivially_copyable.compile.pass.cpp
@@ -47,11 +47,20 @@ static_assert(!std::is_trivially_copyable<std::pair<int&, int> >::value, "");
 static_assert(!std::is_trivially_copyable<std::pair<int, int&> >::value, "");
 static_assert(!std::is_trivially_copyable<std::pair<int&, int&> >::value, "");
 
+#ifdef _LIBCPP_ABI_TRIVIALLY_COPYABLE_PAIR
+static_assert(std::is_trivially_copyable<std::pair<int, int> >::value, "");
+static_assert(std::is_trivially_copyable<std::pair<int, char> >::value, "");
+static_assert(std::is_trivially_copyable<std::pair<char, int> >::value, "");
+static_assert(std::is_trivially_copyable<std::pair<std::pair<char, char>, int> >::value, "");
+static_assert(std::is_trivially_copyable<std::pair<trivially_copyable, int> >::value, "");
+#else
 static_assert(!std::is_trivially_copyable<std::pair<int, int> >::value, "");
 static_assert(!std::is_trivially_copyable<std::pair<int, char> >::value, "");
 static_assert(!std::is_trivially_copyable<std::pair<char, int> >::value, "");
 static_assert(!std::is_trivially_copyable<std::pair<std::pair<char, char>, int> >::value, "");
 static_assert(!std::is_trivially_copyable<std::pair<trivially_copyable, int> >::value, "");
+#endif // _LIBCPP_ABI_TRIVIALLY_COPYABLE_PAIR
+
 #if TEST_STD_VER == 03 // Known ABI difference
 static_assert(!std::is_trivially_copyable<std::pair<trivially_copyable_no_copy_assignment, int> >::value, "");
 static_assert(!std::is_trivially_copyable<std::pair<trivially_copyable_no_move_assignment, int> >::value, "");
@@ -59,10 +68,21 @@ static_assert(!std::is_trivially_copyable<std::pair<trivially_copyable_no_move_a
 static_assert(std::is_trivially_copyable<std::pair<trivially_copyable_no_copy_assignment, int> >::value, "");
 static_assert(std::is_trivially_copyable<std::pair<trivially_copyable_no_move_assignment, int> >::value, "");
 #endif
+
+#ifdef _LIBCPP_ABI_TRIVIALLY_COPYABLE_PAIR
+static_assert(std::is_trivially_copyable<std::pair<trivially_copyable_no_construction, int> >::value, "");
+#else
 static_assert(!std::is_trivially_copyable<std::pair<trivially_copyable_no_construction, int> >::value, "");
+#endif
 
 static_assert(std::is_trivially_copy_constructible<std::pair<int, int> >::value, "");
 static_assert(std::is_trivially_move_constructible<std::pair<int, int> >::value, "");
+static_assert(std::is_trivially_destructible<std::pair<int, int> >::value, "");
+
+#ifdef _LIBCPP_ABI_TRIVIALLY_COPYABLE_PAIR
+static_assert(std::is_trivially_copy_assignable<std::pair<int, int> >::value, "");
+static_assert(std::is_trivially_move_assignable<std::pair<int, int> >::value, "");
+#else
 static_assert(!std::is_trivially_copy_assignable<std::pair<int, int> >::value, "");
 static_assert(!std::is_trivially_move_assignable<std::pair<int, int> >::value, "");
-static_assert(std::is_trivially_destructible<std::pair<int, int> >::value, "");
+#endif // _LIBCPP_ABI_TRIVIALLY_COPYABLE_PAIR
diff --git a/libcxx/test/std/containers/sequences/vector/vector.capacity/shrink_to_fit.pass.cpp b/libcxx/test/std/containers/sequences/vector/vector.capacity/shrink_to_fit.pass.cpp
index 8851e2a9ed0c7..e39afb2d48f0a 100644
--- a/libcxx/test/std/containers/sequences/vector/vector.capacity/shrink_to_fit.pass.cpp
+++ b/libcxx/test/std/containers/sequences/vector/vector.capacity/shrink_to_fit.pass.cpp
@@ -71,11 +71,56 @@ TEST_CONSTEXPR_CXX20 bool tests() {
     return true;
 }
 
+#if TEST_STD_VER >= 23
+template <typename T>
+struct increasing_allocator {
+  using value_type         = T;
+  std::size_t min_elements = 1000;
+  increasing_allocator()   = default;
+
+  template <typename U>
+  constexpr increasing_allocator(const increasing_allocator<U>& other) noexcept : min_elements(other.min_elements) {}
+
+  constexpr std::allocation_result<T*> allocate_at_least(std::size_t n) {
+    if (n < min_elements)
+      n = min_elements;
+    min_elements += 1000;
+    return std::allocator<T>{}.allocate_at_least(n);
+  }
+  constexpr T* allocate(std::size_t n) { return allocate_at_least(n).ptr; }
+  constexpr void deallocate(T* p, std::size_t n) noexcept { std::allocator<T>{}.deallocate(p, n); }
+};
+
+template <typename T, typename U>
+bool operator==(increasing_allocator<T>, increasing_allocator<U>) {
+  return true;
+}
+
+// https://github.com/llvm/llvm-project/issues/95161
+constexpr bool test_increasing_allocator() {
+  std::vector<int, increasing_allocator<int>> v;
+  v.push_back(1);
+  assert(is_contiguous_container_asan_correct(v));
+  std::size_t capacity = v.capacity();
+  v.shrink_to_fit();
+  assert(v.capacity() <= capacity);
+  assert(v.size() == 1);
+  assert(is_contiguous_container_asan_correct(v));
+
+  return true;
+}
+#endif // TEST_STD_VER >= 23
+
 int main(int, char**)
 {
-    tests();
+  tests();
 #if TEST_STD_VER > 17
     static_assert(tests());
 #endif
+#if TEST_STD_VER >= 23
+    test_increasing_allocator();
+    static_assert(test_increasing_allocator());
+#endif
+
     return 0;
 }
diff --git a/libcxx/test/std/numerics/c.math/hermite.pass.cpp b/libcxx/test/std/numerics/c.math/hermite.pass.cpp
new file mode 100644
index 0000000000000..08fbd5c3283c1
--- /dev/null
+++ b/libcxx/test/std/numerics/c.math/hermite.pass.cpp
@@ -0,0 +1,341 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// UNSUPPORTED: c++03, c++11, c++14
+
+// <cmath>
+
+// double         hermite(unsigned n, double x);
+// float          hermite(unsigned n, float x);
+// long double    hermite(unsigned n, long double x);
+// float          hermitef(unsigned n, float x);
+// long double    hermitel(unsigned n, long double x);
+// template <class Integer>
+// double         hermite(unsigned n, Integer x);
+
+#include <array>
+#include <cassert>
+#include <cmath>
+#include <limits>
+#include <vector>
+
+#include "type_algorithms.h"
+
+inline constexpr unsigned g_max_n = 128;
+
+template <class T>
+std::array<T, 11> sample_points() {
+  return {-12.34, -7.42, -1.0, -0.5, -0.1, 0.0, 0.1, 0.5, 1.0, 5.67, 15.67};
+}
+
+template <class Real>
+class CompareFloatingValues {
+private:
+  Real abs_tol;
+  Real rel_tol;
+
+public:
+  CompareFloatingValues() {
+    abs_tol = []() -> Real {
+      if (std::is_same_v<Real, float>)
+        return 1e-5f;
+      else if (std::is_same_v<Real, double>)
+        return 1e-11;
+      else
+        return 1e-12l;
+    }();
+
+    rel_tol = abs_tol;
+  }
+
+  bool operator()(Real result, Real expected) const {
+    if (std::isinf(expected) && std::isinf(result))
+      return result == expected;
+
+    if (std::isnan(expected) || std::isnan(result))
+      return false;
+
+    Real tol = abs_tol + std::abs(expected) * rel_tol;
+    return std::abs(result - expected) < tol;
+  }
+};
+
+// Roots are taken from
+// Salzer, Herbert E., Ruth Zucker, and Ruth Capuano.
+// Table of the zeros and weight factors of the first twenty Hermite
+// polynomials. US Government Printing Office, 1952.
+template <class T>
+std::vector<T> get_roots(unsigned n) {
+  switch (n) {
+  case 0:
+    return {};
+  case 1:
+    return {T(0)};
+  case 2:
+    return {T(0.707106781186548)};
+  case 3:
+    return {T(0), T(1.224744871391589)};
+  case 4:
+    return {T(0.524647623275290), T(1.650680123885785)};
+  case 5:
+    return {T(0), T(0.958572464613819), T(2.020182870456086)};
+  case 6:
+    return {T(0.436077411927617), T(1.335849074013697), T(2.350604973674492)};
+  case 7:
+    return {T(0), T(0.816287882858965), T(1.673551628767471), T(2.651961356835233)};
+  case 8:
+    return {T(0.381186990207322), T(1.157193712446780), T(1.981656756695843), T(2.930637420257244)};
+  case 9:
+    return {T(0), T(0.723551018752838), T(1.468553289216668), T(2.266580584531843), T(3.190993201781528)};
+  case 10:
+    return {
+        T(0.342901327223705), T(1.036610829789514), T(1.756683649299882), T(2.532731674232790), T(3.436159118837738)};
+  case 11:
+    return {T(0),
+            T(0.65680956682100),
+            T(1.326557084494933),
+            T(2.025948015825755),
+            T(2.783290099781652),
+            T(3.668470846559583)};
+
+  case 12:
+    return {T(0.314240376254359),
+            T(0.947788391240164),
+            T(1.597682635152605),
+            T(2.279507080501060),
+            T(3.020637025120890),
+            T(3.889724897869782)};
+
+  case 13:
+    return {T(0),
+            T(0.605763879171060),
+            T(1.220055036590748),
+            T(1.853107651601512),
+            T(2.519735685678238),
+            T(3.246608978372410),
+            T(4.101337596178640)};
+
+  case 14:
+    return {T(0.29174551067256),
+            T(0.87871378732940),
+            T(1.47668273114114),
+            T(2.09518325850772),
+            T(2.74847072498540),
+            T(3.46265693360227),
+            T(4.30444857047363)};
+
+  case 15:
+    return {T(0.00000000000000),
+            T(0.56506958325558),
+            T(1.13611558521092),
+            T(1.71999257518649),
+            T(2.32573248617386),
+            T(2.96716692790560),
+            T(3.66995037340445),
+            T(4.49999070730939)};
+
+  case 16:
+    return {T(0.27348104613815),
+            T(0.82295144914466),
+            T(1.38025853919888),
+            T(1.95178799091625),
+            T(2.54620215784748),
+            T(3.17699916197996),
+            T(3.86944790486012),
+            T(4.68873893930582)};
+
+  case 17:
+    return {T(0),
+            T(0.5316330013427),
+            T(1.0676487257435),
+            T(1.6129243142212),
+            T(2.1735028266666),
+            T(2.7577629157039),
+            T(3.3789320911415),
+            T(4.0619466758755),
+            T(4.8713451936744)};
+
+  case 18:
+    return {T(0.2582677505191),
+            T(0.7766829192674),
+            T(1.3009208583896),
+            T(1.8355316042616),
+            T(2.3862990891667),
+            T(2.9613775055316),
+            T(3.5737690684863),
+            T(4.2481178735681),
+            T(5.0483640088745)};
+
+  case 19:
+    return {T(0),
+            T(0.5035201634239),
+            T(1.0103683871343),
+            T(1.5241706193935),
+            T(2.0492317098506),
+            T(2.5911337897945),
+            T(3.1578488183476),
+            T(3.7621873519640),
+            T(4.4285328066038),
+            T(5.2202716905375)};
+
+  case 20:
+    return {T(0.2453407083009),
+            T(0.7374737285454),
+            T(1.2340762153953),
+            T(1.7385377121166),
+            T(2.2549740020893),
+            T(2.7888060584281),
+            T(3.347854567332),
+            T(3.9447640401156),
+            T(4.6036824495507),
+            T(5.3874808900112)};
+
+  default: // polynom degree n>20 is unsupported
+    assert(false);
+    return {T(-42)};
+  }
+}
+
+template <class Real>
+void test() {
+  { // checks if NaNs are reported correctly (i.e. output == input for input == NaN)
+    using nl = std::numeric_limits<Real>;
+    for (Real NaN : {nl::quiet_NaN(), nl::signaling_NaN()})
+      for (unsigned n = 0; n < g_max_n; ++n)
+        assert(std::isnan(std::hermite(n, NaN)));
+  }
+
+  { // simple sample points for n=0..127 should not produce NaNs.
+    for (Real x : sample_points<Real>())
+      for (unsigned n = 0; n < g_max_n; ++n)
+        assert(!std::isnan(std::hermite(n, x)));
+  }
+
+  { // checks std::hermite(n, x) for n=0..5 against analytic polynoms
+    const auto h0 = [](Real) -> Real { return 1; };
+    const auto h1 = [](Real y) -> Real { return 2 * y; };
+    const auto h2 = [](Real y) -> Real { return 4 * y * y - 2; };
+    const auto h3 = [](Real y) -> Real { return y * (8 * y * y - 12); };
+    const auto h4 = [](Real y) -> Real { return (16 * std::pow(y, 4) - 48 * y * y + 12); };
+    const auto h5 = [](Real y) -> Real { return y * (32 * std::pow(y, 4) - 160 * y * y + 120); };
+
+    for (Real x : sample_points<Real>()) {
+      const CompareFloatingValues<Real> compare;
+      assert(compare(std::hermite(0, x), h0(x)));
+      assert(compare(std::hermite(1, x), h1(x)));
+      assert(compare(std::hermite(2, x), h2(x)));
+      assert(compare(std::hermite(3, x), h3(x)));
+      assert(compare(std::hermite(4, x), h4(x)));
+      assert(compare(std::hermite(5, x), h5(x)));
+    }
+  }
+
+  { // checks std::hermitef for bitwise equality with std::hermite(unsigned, float)
+    if constexpr (std::is_same_v<Real, float>)
+      for (unsigned n = 0; n < g_max_n; ++n)
+        for (float x : sample_points<float>())
+          assert(std::hermite(n, x) == std::hermitef(n, x));
+  }
+
+  { // checks std::hermitel for bitwise equality with std::hermite(unsigned, long double)
+    if constexpr (std::is_same_v<Real, long double>)
+      for (unsigned n = 0; n < g_max_n; ++n)
+        for (long double x : sample_points<long double>())
+          assert(std::hermite(n, x) == std::hermitel(n, x));
+  }
+
+  { // Checks if the characteristic recurrence relation holds:    H_{n+1}(x) = 2x H_n(x) - 2n H_{n-1}(x)
+    for (Real x : sample_points<Real>()) {
+      for (unsigned n = 1; n < g_max_n - 1; ++n) {
+        Real H_next            = std::hermite(n + 1, x);
+        Real H_next_recurrence = 2 * (x * std::hermite(n, x) - n * std::hermite(n - 1, x));
+
+        if (std::isinf(H_next))
+          break;
+        const CompareFloatingValues<Real> compare;
+        assert(compare(H_next, H_next_recurrence));
+      }
+    }
+  }
+
+  { // sanity checks: hermite polynoms need to change signs at (simple) roots. checked upto order n<=20.
+
+    // root tolerance: must be smaller than the smallest difference between adjacent roots
+    Real tol = []() -> Real {
+      if (std::is_same_v<Real, float>)
+        return 1e-5f;
+      else if (std::is_same_v<Real, double>)
+        return 1e-9;
+      else
+        return 1e-10l;
+    }();
+
+    const auto is_sign_change = [tol](unsigned n, Real x) -> bool {
+      return std::hermite(n, x - tol) * std::hermite(n, x + tol) < 0;
+    };
+
+    for (unsigned n = 0; n <= 20u; ++n) {
+      for (Real x : get_roots<Real>(n)) {
+        // the roots are symmetric: if x is a root, so is -x
+        if (x > 0)
+          assert(is_sign_change(n, -x));
+        assert(is_sign_change(n, x));
+      }
+    }
+  }
+
+  { // check input infinity is handled correctly
+    Real inf = std::numeric_limits<Real>::infinity();
+    for (unsigned n = 1; n < g_max_n; ++n) {
+      assert(std::hermite(n, +inf) == inf);
+      assert(std::hermite(n, -inf) == ((n & 1) ? -inf : inf));
+    }
+  }
+
+  { // check: if overflow occurs that it is mapped to the correct infinity
+    if constexpr (std::is_same_v<Real, double>) {
+      // Q: Why only double?
+      // A: The numeric values (e.g. overflow threshold `n`) below are different for other types.
+      static_assert(sizeof(double) == 8);
+      for (unsigned n = 0; n < g_max_n; ++n) {
+        // Q: Why n=111 and x=300?
+        // A: Both are chosen s.t. the first overlow occurs for some `n<g_max_n`.
+        if (n < 111) {
+          assert(std::isfinite(std::hermite(n, +300.0)));
+          assert(std::isfinite(std::hermite(n, -300.0)));
+        } else {
+          double inf = std::numeric_limits<double>::infinity();
+          assert(std::hermite(n, +300.0) == inf);
+          assert(std::hermite(n, -300.0) == ((n & 1) ? -inf : inf));
+        }
+      }
+    }
+  }
+}
+
+struct TestFloat {
+  template <class Real>
+  void operator()() {
+    test<Real>();
+  }
+};
+
+struct TestInt {
+  template <class Integer>
+  void operator()() {
+    // checks that std::hermite(unsigned, Integer) actually wraps std::hermite(unsigned, double)
+    for (unsigned n = 0; n < g_max_n; ++n)
+      for (Integer x : {-42, -7, -5, -1, 0, 1, 5, 7, 42})
+        assert(std::hermite(n, x) == std::hermite(n, static_cast<double>(x)));
+  }
+};
+
+int main() {
+  types::for_each(types::floating_point_types(), TestFloat());
+  types::for_each(types::type_list<short, int, long, long long>(), TestInt());
+}
diff --git a/libcxx/test/std/time/time.syn/formatter.zoned_time.pass.cpp b/libcxx/test/std/time/time.syn/formatter.zoned_time.pass.cpp
new file mode 100644
index 0000000000000..1e366ac72fa9f
--- /dev/null
+++ b/libcxx/test/std/time/time.syn/formatter.zoned_time.pass.cpp
@@ -0,0 +1,974 @@
+//===----------------------------------------------------------------------===//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// UNSUPPORTED: c++03, c++11, c++14, c++17
+// UNSUPPORTED: no-filesystem, no-localization, no-tzdb
+
+// TODO FMT This test should not require std::to_chars(floating-point)
+// XFAIL: availability-fp_to_chars-missing
+
+// XFAIL: libcpp-has-no-experimental-tzdb
+
+// REQUIRES: locale.fr_FR.UTF-8
+// REQUIRES: locale.ja_JP.UTF-8
+
+// <chrono>
+//
+// template<class Duration, class TimeZonePtr, class charT>
+// struct formatter<chrono::zoned_time<Duration, TimeZonePtr>, charT>
+
+#include <chrono>
+#include <format>
+
+#include <cassert>
+#include <concepts>
+#include <locale>
+#include <iostream>
+#include <type_traits>
+
+#include "formatter_tests.h"
+#include "make_string.h"
+#include "platform_support.h" // locale name macros
+#include "test_macros.h"
+
+template <class CharT>
+static void test_no_chrono_specs() {
+  using namespace std::literals::chrono_literals;
+
+  check(SV("1970-01-01 01:00:00.000000042 +01"),
+        SV("{}"),
+        std::chrono::zoned_time("Etc/GMT-1", std::chrono::sys_time<std::chrono::nanoseconds>{42ns}));
+  check(SV("1970-01-01 01:00:00.000042 +01"),
+        SV("{}"),
+        std::chrono::zoned_time("Etc/GMT-1", std::chrono::sys_time<std::chrono::microseconds>{42us}));
+  check(SV("1970-01-01 01:00:00.042 +01"),
+        SV("{}"),
+        std::chrono::zoned_time("Etc/GMT-1", std::chrono::sys_time<std::chrono::milliseconds>{42ms}));
+  check(SV("1970-01-01 01:00:42 +01"),
+        SV("{}"),
+        std::chrono::zoned_time("Etc/GMT-1", std::chrono::sys_time<std::chrono::seconds>{42s}));
+  check(SV("1970-02-12 01:00:00 +01"),
+        SV("{}"),
+        std::chrono::zoned_time("Etc/GMT-1", std::chrono::sys_time<std::chrono::days>{std::chrono::days{42}}));
+  check(SV("1970-10-22 01:00:00 +01"),
+        SV("{}"),
+        std::chrono::zoned_time("Etc/GMT-1", std::chrono::sys_time<std::chrono::weeks>{std::chrono::weeks{42}}));
+}
+
+template <class CharT>
+static void test_valid_values_year() {
+  using namespace std::literals::chrono_literals;
+
+  constexpr std::basic_string_view<CharT> fmt =
+      SV("{:%%C='%C'%t%%EC='%EC'%t%%y='%y'%t%%Oy='%Oy'%t%%Ey='%Ey'%t%%Y='%Y'%t%%EY='%EY'%n}");
+  constexpr std::basic_string_view<CharT> lfmt =
+      SV("{:L%%C='%C'%t%%EC='%EC'%t%%y='%y'%t%%Oy='%Oy'%t%%Ey='%Ey'%t%%Y='%Y'%t%%EY='%EY'%n}");
+
+  const std::locale loc(LOCALE_ja_JP_UTF_8);
+  std::locale::global(std::locale(LOCALE_fr_FR_UTF_8));
+
+  // Non localized output using C-locale
+  check(SV("%C='19'\t%EC='19'\t%y='70'\t%Oy='70'\t%Ey='70'\t%Y='1970'\t%EY='1970'\n"),
+        fmt,
+        std::chrono::zoned_time(std::chrono::sys_seconds{0s})); // 00:00:00 UTC Thursday, 1 January 1970
+
+  check(SV("%C='20'\t%EC='20'\t%y='09'\t%Oy='09'\t%Ey='09'\t%Y='2009'\t%EY='2009'\n"),
+        fmt,
+        std::chrono::zoned_time(std::chrono::sys_seconds{1'234'567'890s})); // 23:31:30 UTC on Friday, 13 February 2009
+
+  // Use the global locale (fr_FR)
+  check(SV("%C='19'\t%EC='19'\t%y='70'\t%Oy='70'\t%Ey='70'\t%Y='1970'\t%EY='1970'\n"),
+        lfmt,
+        std::chrono::zoned_time(std::chrono::sys_seconds{0s})); // 00:00:00 UTC Thursday, 1 January 1970
+
+  check(SV("%C='20'\t%EC='20'\t%y='09'\t%Oy='09'\t%Ey='09'\t%Y='2009'\t%EY='2009'\n"),
+        lfmt,
+        std::chrono::zoned_time(std::chrono::sys_seconds{1'234'567'890s})); // 23:31:30 UTC on Friday, 13 February 2009
+
+  // Use supplied locale (ja_JP). This locale has a different alternate.
+#if defined(_WIN32) || defined(__APPLE__) || defined(_AIX) || defined(__FreeBSD__)
+
+  check(loc,
+        SV("%C='19'\t%EC='19'\t%y='70'\t%Oy='70'\t%Ey='70'\t%Y='1970'\t%EY='1970'\n"),
+        lfmt,
+        std::chrono::zoned_time(std::chrono::sys_seconds{0s})); // 00:00:00 UTC Thursday, 1 January 1970
+
+  check(loc,
+        SV("%C='20'\t%EC='20'\t%y='09'\t%Oy='09'\t%Ey='09'\t%Y='2009'\t%EY='2009'\n"),
+        lfmt,
+        std::chrono::zoned_time(std::chrono::sys_seconds{1'234'567'890s})); // 23:31:30 UTC on Friday, 13 February 2009
+#else  // defined(_WIN32) || defined(__APPLE__) || defined(_AIX)|| defined(__FreeBSD__)
+
+  check(loc,
+        SV("%C='19'\t%EC='昭和'\t%y='70'\t%Oy='七十'\t%Ey='45'\t%Y='1970'\t%EY='昭和45年'\n"),
+        lfmt,
+        std::chrono::zoned_time(std::chrono::sys_seconds{0s})); // 00:00:00 UTC Thursday, 1 January 1970
+
+  check(loc,
+        SV("%C='20'\t%EC='平成'\t%y='09'\t%Oy='九'\t%Ey='21'\t%Y='2009'\t%EY='平成21年'\n"),
+        lfmt,
+        std::chrono::zoned_time(std::chrono::sys_seconds{1'234'567'890s})); // 23:31:30 UTC on Friday, 13 February 2009
+#endif // defined(_WIN32) || defined(__APPLE__) || defined(_AIX)|| defined(__FreeBSD__)
+
+  std::locale::global(std::locale::classic());
+}
+
+template <class CharT>
+static void test_valid_values_month() {
+  using namespace std::literals::chrono_literals;
+
+  constexpr std::basic_string_view<CharT> fmt  = SV("{:%%b='%b'%t%%h='%h'%t%%B='%B'%t%%m='%m'%t%%Om='%Om'%n}");
+  constexpr std::basic_string_view<CharT> lfmt = SV("{:L%%b='%b'%t%%h='%h'%t%%B='%B'%t%%m='%m'%t%%Om='%Om'%n}");
+
+  const std::locale loc(LOCALE_ja_JP_UTF_8);
+  std::locale::global(std::locale(LOCALE_fr_FR_UTF_8));
+
+  // Non localized output using C-locale
+  check(SV("%b='Jan'\t%h='Jan'\t%B='January'\t%m='01'\t%Om='01'\n"),
+        fmt,
+        std::chrono::zoned_time(std::chrono::sys_seconds{0s})); // 00:00:00 UTC Thursday, 1 January 1970
+
+  check(SV("%b='May'\t%h='May'\t%B='May'\t%m='05'\t%Om='05'\n"),
+        fmt,
+        std::chrono::zoned_time(std::chrono::sys_seconds{2'000'000'000s})); // 03:33:20 UTC on Wednesday, 18 May 2033
+
+  // Use the global locale (fr_FR)
+#if defined(__APPLE__)
+  check(SV("%b='jan'\t%h='jan'\t%B='janvier'\t%m='01'\t%Om='01'\n"),
+        lfmt,
+        std::chrono::zoned_time(std::chrono::sys_seconds{0s})); // 00:00:00 UTC Thursday, 1 January 1970
+#else
+  check(SV("%b='janv.'\t%h='janv.'\t%B='janvier'\t%m='01'\t%Om='01'\n"),
+        lfmt,
+        std::chrono::zoned_time(std::chrono::sys_seconds{0s})); // 00:00:00 UTC Thursday, 1 January 1970
+#endif
+
+  check(SV("%b='mai'\t%h='mai'\t%B='mai'\t%m='05'\t%Om='05'\n"),
+        lfmt,
+        std::chrono::zoned_time(std::chrono::sys_seconds{2'000'000'000s})); // 03:33:20 UTC on Wednesday, 18 May 2033
+
+  // Use supplied locale (ja_JP). This locale has a different alternate.
+#ifdef _WIN32
+  check(loc,
+        SV("%b='1'\t%h='1'\t%B='1月'\t%m='01'\t%Om='01'\n"),
+        lfmt,
+        std::chrono::zoned_time(std::chrono::sys_seconds{0s})); // 00:00:00 UTC Thursday, 1 January 1970
+
+  check(loc,
+        SV("%b='5'\t%h='5'\t%B='5月'\t%m='05'\t%Om='05'\n"),
+        lfmt,
+        std::chrono::zoned_time(std::chrono::sys_seconds{2'000'000'000s})); // 03:33:20 UTC on Wednesday, 18 May 2033
+#elif defined(_AIX)                                                         // _WIN32
+  check(loc,
+        SV("%b='1月'\t%h='1月'\t%B='1月'\t%m='01'\t%Om='01'\n"),
+        lfmt,
+        std::chrono::zoned_time(std::chrono::sys_seconds{0s})); // 00:00:00 UTC Thursday, 1 January 1970
+
+  check(loc,
+        SV("%b='5月'\t%h='5月'\t%B='5月'\t%m='05'\t%Om='05'\n"),
+        lfmt,
+        std::chrono::zoned_time(std::chrono::sys_seconds{2'000'000'000s})); // 03:33:20 UTC on Wednesday, 18 May 2033
+#elif defined(__APPLE__)                                                    // _WIN32
+  check(loc,
+        SV("%b=' 1'\t%h=' 1'\t%B='1月'\t%m='01'\t%Om='01'\n"),
+        lfmt,
+        std::chrono::zoned_time(std::chrono::sys_seconds{0s})); // 00:00:00 UTC Thursday, 1 January 1970
+
+  check(loc,
+        SV("%b=' 5'\t%h=' 5'\t%B='5月'\t%m='05'\t%Om='05'\n"),
+        lfmt,
+        std::chrono::zoned_time(std::chrono::sys_seconds{2'000'000'000s})); // 03:33:20 UTC on Wednesday, 18 May 2033
+#elif defined(__FreeBSD__)                                                  // _WIN32
+  check(loc,
+        SV("%b=' 1月'\t%h=' 1月'\t%B='1月'\t%m='01'\t%Om='01'\n"),
+        lfmt,
+        std::chrono::zoned_time(std::chrono::sys_seconds{0s})); // 00:00:00 UTC Thursday, 1 January 1970
+
+  check(loc,
+        SV("%b=' 5月'\t%h=' 5月'\t%B='5月'\t%m='05'\t%Om='05'\n"),
+        lfmt,
+        std::chrono::zoned_time(std::chrono::sys_seconds{2'000'000'000s})); // 03:33:20 UTC on Wednesday, 18 May 2033
+#else                                                                       // _WIN32
+  check(loc,
+        SV("%b=' 1月'\t%h=' 1月'\t%B='1月'\t%m='01'\t%Om='一'\n"),
+        lfmt,
+        std::chrono::zoned_time(std::chrono::sys_seconds{0s})); // 00:00:00 UTC Thursday, 1 January 1970
+
+  check(loc,
+        SV("%b=' 5月'\t%h=' 5月'\t%B='5月'\t%m='05'\t%Om='五'\n"),
+        lfmt,
+        std::chrono::zoned_time(std::chrono::sys_seconds{2'000'000'000s})); // 03:33:20 UTC on Wednesday, 18 May 2033
+#endif                                                                      // _WIN32
+
+  std::locale::global(std::locale::classic());
+}
+
+template <class CharT>
+static void test_valid_values_day() {
+  using namespace std::literals::chrono_literals;
+
+  constexpr std::basic_string_view<CharT> fmt  = SV("{:%%d='%d'%t%%Od='%Od'%t%%e='%e'%t%%Oe='%Oe'%n}");
+  constexpr std::basic_string_view<CharT> lfmt = SV("{:L%%d='%d'%t%%Od='%Od'%t%%e='%e'%t%%Oe='%Oe'%n}");
+
+  const std::locale loc(LOCALE_ja_JP_UTF_8);
+  std::locale::global(std::locale(LOCALE_fr_FR_UTF_8));
+
+  // Non localized output using C-locale
+  check(SV("%d='01'\t%Od='01'\t%e=' 1'\t%Oe=' 1'\n"),
+        fmt,
+        std::chrono::zoned_time(std::chrono::sys_seconds{0s})); // 00:00:00 UTC Thursday, 1 January 1970
+
+  check(SV("%d='13'\t%Od='13'\t%e='13'\t%Oe='13'\n"),
+        fmt,
+        std::chrono::zoned_time(std::chrono::sys_seconds{1'234'567'890s})); // 23:31:30 UTC on Friday, 13 February 2009
+
+  // Use the global locale (fr_FR)
+  check(SV("%d='01'\t%Od='01'\t%e=' 1'\t%Oe=' 1'\n"),
+        lfmt,
+        std::chrono::zoned_time(std::chrono::sys_seconds{0s})); // 00:00:00 UTC Thursday, 1 January 1970
+
+  check(SV("%d='13'\t%Od='13'\t%e='13'\t%Oe='13'\n"),
+        lfmt,
+        std::chrono::zoned_time(std::chrono::sys_seconds{1'234'567'890s})); // 23:31:30 UTC on Friday, 13 February 2009
+
+  // Use supplied locale (ja_JP). This locale has a different alternate.
+#if defined(_WIN32) || defined(__APPLE__) || defined(_AIX) || defined(__FreeBSD__)
+  check(loc,
+        SV("%d='01'\t%Od='01'\t%e=' 1'\t%Oe=' 1'\n"),
+        lfmt,
+        std::chrono::zoned_time(std::chrono::sys_seconds{0s})); // 00:00:00 UTC Thursday, 1 January 1970
+
+  check(loc,
+        SV("%d='13'\t%Od='13'\t%e='13'\t%Oe='13'\n"),
+        lfmt,
+        std::chrono::zoned_time(std::chrono::sys_seconds{1'234'567'890s})); // 23:31:30 UTC on Friday, 13 February 2009
+#else // defined(_WIN32) || defined(__APPLE__) || defined(_AIX) || defined(__FreeBSD__)
+  check(loc,
+        SV("%d='01'\t%Od='一'\t%e=' 1'\t%Oe='一'\n"),
+        lfmt,
+        std::chrono::zoned_time(std::chrono::sys_seconds{0s})); // 00:00:00 UTC Thursday, 1 January 1970
+
+  check(loc,
+        SV("%d='13'\t%Od='十三'\t%e='13'\t%Oe='十三'\n"),
+        lfmt,
+        std::chrono::zoned_time(std::chrono::sys_seconds{1'234'567'890s})); // 23:31:30 UTC on Friday, 13 February 2009
+
+#endif // defined(_WIN32) || defined(__APPLE__) || defined(_AIX) || defined(__FreeBSD__)
+
+  std::locale::global(std::locale::classic());
+}
+
+template <class CharT>
+static void test_valid_values_weekday() {
+  using namespace std::literals::chrono_literals;
+
+  constexpr std::basic_string_view<CharT> fmt =
+      SV("{:%%a='%a'%t%%A='%A'%t%%u='%u'%t%%Ou='%Ou'%t%%w='%w'%t%%Ow='%Ow'%n}");
+  constexpr std::basic_string_view<CharT> lfmt =
+      SV("{:L%%a='%a'%t%%A='%A'%t%%u='%u'%t%%Ou='%Ou'%t%%w='%w'%t%%Ow='%Ow'%n}");
+
+  const std::locale loc(LOCALE_ja_JP_UTF_8);
+  std::locale::global(std::locale(LOCALE_fr_FR_UTF_8));
+
+  // Non localized output using C-locale
+  check(SV("%a='Thu'\t%A='Thursday'\t%u='4'\t%Ou='4'\t%w='4'\t%Ow='4'\n"),
+        fmt,
+        std::chrono::zoned_time(std::chrono::sys_seconds{0s})); // 00:00:00 UTC Thursday, 1 January 1970
+
+  check(SV("%a='Sun'\t%A='Sunday'\t%u='7'\t%Ou='7'\t%w='0'\t%Ow='0'\n"),
+        fmt,
+        std::chrono::zoned_time(std::chrono::sys_seconds{4'294'967'295s})); // 06:28:15 UTC on Sunday, 7 February 2106
+
+  // Use the global locale (fr_FR)
+#if defined(__APPLE__)
+  check(SV("%a='Jeu'\t%A='Jeudi'\t%u='4'\t%Ou='4'\t%w='4'\t%Ow='4'\n"),
+        lfmt,
+        std::chrono::zoned_time(std::chrono::sys_seconds{0s})); // 00:00:00 UTC Thursday, 1 January 1970
+
+  check(SV("%a='Dim'\t%A='Dimanche'\t%u='7'\t%Ou='7'\t%w='0'\t%Ow='0'\n"),
+        lfmt,
+        std::chrono::zoned_time(std::chrono::sys_seconds{4'294'967'295s})); // 06:28:15 UTC on Sunday, 7 February 2106
+#else
+  check(SV("%a='jeu.'\t%A='jeudi'\t%u='4'\t%Ou='4'\t%w='4'\t%Ow='4'\n"),
+        lfmt,
+        std::chrono::zoned_time(std::chrono::sys_seconds{0s})); // 00:00:00 UTC Thursday, 1 January 1970
+
+  check(SV("%a='dim.'\t%A='dimanche'\t%u='7'\t%Ou='7'\t%w='0'\t%Ow='0'\n"),
+        lfmt,
+        std::chrono::zoned_time(std::chrono::sys_seconds{4'294'967'295s})); // 06:28:15 UTC on Sunday, 7 February 2106
+#endif
+
+  // Use supplied locale (ja_JP).
+  // This locale has a different alternate, but not on all platforms
+#if defined(_WIN32) || defined(__APPLE__) || defined(_AIX) || defined(__FreeBSD__)
+  check(loc,
+        SV("%a='木'\t%A='木曜日'\t%u='4'\t%Ou='4'\t%w='4'\t%Ow='4'\n"),
+        lfmt,
+        std::chrono::zoned_time(std::chrono::sys_seconds{0s})); // 00:00:00 UTC Thursday, 1 January 1970
+
+  check(loc,
+        SV("%a='日'\t%A='日曜日'\t%u='7'\t%Ou='7'\t%w='0'\t%Ow='0'\n"),
+        lfmt,
+        std::chrono::zoned_time(std::chrono::sys_seconds{4'294'967'295s})); // 06:28:15 UTC on Sunday, 7 February 2106
+#else  // defined(_WIN32) || defined(__APPLE__) || defined(_AIX) || defined(__FreeBSD__)
+  check(loc,
+        SV("%a='木'\t%A='木曜日'\t%u='4'\t%Ou='四'\t%w='4'\t%Ow='四'\n"),
+        lfmt,
+        std::chrono::zoned_time(std::chrono::sys_seconds{0s})); // 00:00:00 UTC Thursday, 1 January 1970
+
+  check(loc,
+        SV("%a='日'\t%A='日曜日'\t%u='7'\t%Ou='七'\t%w='0'\t%Ow='〇'\n"),
+        lfmt,
+        std::chrono::zoned_time(std::chrono::sys_seconds{4'294'967'295s})); // 06:28:15 UTC on Sunday, 7 February 2106
+#endif // defined(_WIN32) || defined(__APPLE__) || defined(_AIX) || defined(__FreeBSD__)
+
+  std::locale::global(std::locale::classic());
+}
+
+template <class CharT>
+static void test_valid_values_day_of_year() {
+  using namespace std::literals::chrono_literals;
+
+  constexpr std::basic_string_view<CharT> fmt  = SV("{:%%j='%j'%n}");
+  constexpr std::basic_string_view<CharT> lfmt = SV("{:L%%j='%j'%n}");
+
+  const std::locale loc(LOCALE_ja_JP_UTF_8);
+  std::locale::global(std::locale(LOCALE_fr_FR_UTF_8));
+
+  // Non localized output using C-locale
+  check(SV("%j='001'\n"),
+        fmt,
+        std::chrono::zoned_time(std::chrono::sys_seconds{0s})); // 00:00:00 UTC Thursday, 1 January 1970
+  check(SV("%j='138'\n"),
+        fmt,
+        std::chrono::zoned_time(std::chrono::sys_seconds{2'000'000'000s})); // 03:33:20 UTC on Wednesday, 18 May 2033
+
+  // Use the global locale (fr_FR)
+  check(SV("%j='001'\n"),
+        lfmt,
+        std::chrono::zoned_time(std::chrono::sys_seconds{0s})); // 00:00:00 UTC Thursday, 1 January 1970
+  check(SV("%j='138'\n"),
+        lfmt,
+        std::chrono::zoned_time(std::chrono::sys_seconds{2'000'000'000s})); // 03:33:20 UTC on Wednesday, 18 May 2033
+
+  // Use supplied locale (ja_JP). This locale has a different alternate.
+  check(loc,
+        SV("%j='001'\n"),
+        lfmt,
+        std::chrono::zoned_time(std::chrono::sys_seconds{0s})); // 00:00:00 UTC Thursday, 1 January 1970
+
+  check(loc,
+        SV("%j='138'\n"),
+        lfmt,
+        std::chrono::zoned_time(std::chrono::sys_seconds{2'000'000'000s})); // 03:33:20 UTC on Wednesday, 18 May 2033
+
+  std::locale::global(std::locale::classic());
+}
+
+template <class CharT>
+static void test_valid_values_week() {
+  using namespace std::literals::chrono_literals;
+
+  constexpr std::basic_string_view<CharT> fmt  = SV("{:%%U='%U'%t%%OU='%OU'%t%%W='%W'%t%%OW='%OW'%n}");
+  constexpr std::basic_string_view<CharT> lfmt = SV("{:L%%U='%U'%t%%OU='%OU'%t%%W='%W'%t%%OW='%OW'%n}");
+
+  const std::locale loc(LOCALE_ja_JP_UTF_8);
+  std::locale::global(std::locale(LOCALE_fr_FR_UTF_8));
+
+  // Non localized output using C-locale
+  check(SV("%U='00'\t%OU='00'\t%W='00'\t%OW='00'\n"),
+        fmt,
+        std::chrono::zoned_time(std::chrono::sys_seconds{0s})); // 00:00:00 UTC Thursday, 1 January 1970
+
+  check(SV("%U='20'\t%OU='20'\t%W='20'\t%OW='20'\n"),
+        fmt,
+        std::chrono::zoned_time(std::chrono::sys_seconds{2'000'000'000s})); // 03:33:20 UTC on Wednesday, 18 May 2033
+
+  // Use the global locale (fr_FR)
+  check(SV("%U='00'\t%OU='00'\t%W='00'\t%OW='00'\n"),
+        lfmt,
+        std::chrono::zoned_time(std::chrono::sys_seconds{0s})); // 00:00:00 UTC Thursday, 1 January 1970
+
+  check(SV("%U='20'\t%OU='20'\t%W='20'\t%OW='20'\n"),
+        lfmt,
+        std::chrono::zoned_time(std::chrono::sys_seconds{2'000'000'000s})); // 03:33:20 UTC on Wednesday, 18 May 2033
+
+  // Use supplied locale (ja_JP). This locale has a different alternate.
+#if defined(_WIN32) || defined(__APPLE__) || defined(_AIX) || defined(__FreeBSD__)
+  check(loc,
+        SV("%U='00'\t%OU='00'\t%W='00'\t%OW='00'\n"),
+        lfmt,
+        std::chrono::zoned_time(std::chrono::sys_seconds{0s})); // 00:00:00 UTC Thursday, 1 January 1970
+
+  check(loc,
+        SV("%U='20'\t%OU='20'\t%W='20'\t%OW='20'\n"),
+        lfmt,
+        std::chrono::zoned_time(std::chrono::sys_seconds{2'000'000'000s})); // 03:33:20 UTC on Wednesday, 18 May 2033
+#else  // defined(_WIN32) || defined(__APPLE__) || defined(_AIX) || defined(__FreeBSD__)
+  check(loc,
+        SV("%U='00'\t%OU='〇'\t%W='00'\t%OW='〇'\n"),
+        lfmt,
+        std::chrono::zoned_time(std::chrono::sys_seconds{0s})); // 00:00:00 UTC Thursday, 1 January 1970
+
+  check(loc,
+        SV("%U='20'\t%OU='二十'\t%W='20'\t%OW='二十'\n"),
+        lfmt,
+        std::chrono::zoned_time(std::chrono::sys_seconds{2'000'000'000s})); // 03:33:20 UTC on Wednesday, 18 May 2033
+#endif // defined(_WIN32) || defined(__APPLE__) || defined(_AIX) || defined(__FreeBSD__)
+  std::locale::global(std::locale::classic());
+}
+
+template <class CharT>
+static void test_valid_values_iso_8601_week() {
+  using namespace std::literals::chrono_literals;
+
+  constexpr std::basic_string_view<CharT> fmt  = SV("{:%%g='%g'%t%%G='%G'%t%%V='%V'%t%%OV='%OV'%n}");
+  constexpr std::basic_string_view<CharT> lfmt = SV("{:L%%g='%g'%t%%G='%G'%t%%V='%V'%t%%OV='%OV'%n}");
+
+  const std::locale loc(LOCALE_ja_JP_UTF_8);
+  std::locale::global(std::locale(LOCALE_fr_FR_UTF_8));
+
+  // Non localized output using C-locale
+  check(SV("%g='70'\t%G='1970'\t%V='01'\t%OV='01'\n"),
+        fmt,
+        std::chrono::zoned_time(std::chrono::sys_seconds{0s})); // 00:00:00 UTC Thursday, 1 January 1970
+
+  check(SV("%g='09'\t%G='2009'\t%V='07'\t%OV='07'\n"),
+        fmt,
+        std::chrono::zoned_time(std::chrono::sys_seconds{1'234'567'890s})); // 23:31:30 UTC on Friday, 13 February 2009
+
+  // Use the global locale (fr_FR)
+  check(SV("%g='70'\t%G='1970'\t%V='01'\t%OV='01'\n"),
+        lfmt,
+        std::chrono::zoned_time(std::chrono::sys_seconds{0s})); // 00:00:00 UTC Thursday, 1 January 1970
+
+  check(SV("%g='09'\t%G='2009'\t%V='07'\t%OV='07'\n"),
+        lfmt,
+        std::chrono::zoned_time(std::chrono::sys_seconds{1'234'567'890s})); // 23:31:30 UTC on Friday, 13 February 2009
+
+  // Use supplied locale (ja_JP). This locale has a different alternate.
+#if defined(_WIN32) || defined(__APPLE__) || defined(_AIX) || defined(__FreeBSD__)
+  check(loc,
+        SV("%g='70'\t%G='1970'\t%V='01'\t%OV='01'\n"),
+        lfmt,
+        std::chrono::zoned_time(std::chrono::sys_seconds{0s})); // 00:00:00 UTC Thursday, 1 January 1970
+
+  check(loc,
+        SV("%g='09'\t%G='2009'\t%V='07'\t%OV='07'\n"),
+        lfmt,
+        std::chrono::zoned_time(std::chrono::sys_seconds{1'234'567'890s})); // 23:31:30 UTC on Friday, 13 February 2009
+#else  // defined(_WIN32) || defined(__APPLE__) || defined(_AIX) || defined(__FreeBSD__)
+  check(loc,
+        SV("%g='70'\t%G='1970'\t%V='01'\t%OV='一'\n"),
+        lfmt,
+        std::chrono::zoned_time(std::chrono::sys_seconds{0s})); // 00:00:00 UTC Thursday, 1 January 1970
+
+  check(loc,
+        SV("%g='09'\t%G='2009'\t%V='07'\t%OV='七'\n"),
+        lfmt,
+        std::chrono::zoned_time(std::chrono::sys_seconds{1'234'567'890s})); // 23:31:30 UTC on Friday, 13 February 2009
+#endif // defined(_WIN32) || defined(__APPLE__) || defined(_AIX) || defined(__FreeBSD__)
+
+  std::locale::global(std::locale::classic());
+}
+
+template <class CharT>
+static void test_valid_values_date() {
+  using namespace std::literals::chrono_literals;
+
+  constexpr std::basic_string_view<CharT> fmt  = SV("{:%%D='%D'%t%%F='%F'%t%%x='%x'%t%%Ex='%Ex'%n}");
+  constexpr std::basic_string_view<CharT> lfmt = SV("{:L%%D='%D'%t%%F='%F'%t%%x='%x'%t%%Ex='%Ex'%n}");
+
+  const std::locale loc(LOCALE_ja_JP_UTF_8);
+  std::locale::global(std::locale(LOCALE_fr_FR_UTF_8));
+
+  // Non localized output using C-locale
+  check(SV("%D='01/01/70'\t%F='1970-01-01'\t%x='01/01/70'\t%Ex='01/01/70'\n"),
+        fmt,
+        std::chrono::zoned_time(std::chrono::sys_seconds{0s})); // 00:00:00 UTC Thursday, 1 January 1970
+
+  check(SV("%D='02/13/09'\t%F='2009-02-13'\t%x='02/13/09'\t%Ex='02/13/09'\n"),
+        fmt,
+        std::chrono::zoned_time(std::chrono::sys_seconds{1'234'567'890s})); // 23:31:30 UTC on Friday, 13 February 2009
+
+  // Use the global locale (fr_FR)
+#if defined(__APPLE__) || defined(__FreeBSD__)
+  check(SV("%D='01/01/70'\t%F='1970-01-01'\t%x='01.01.1970'\t%Ex='01.01.1970'\n"),
+        lfmt,
+        std::chrono::zoned_time(std::chrono::sys_seconds{0s})); // 00:00:00 UTC Thursday, 1 January 1970
+
+  check(SV("%D='02/13/09'\t%F='2009-02-13'\t%x='13.02.2009'\t%Ex='13.02.2009'\n"),
+        lfmt,
+        std::chrono::zoned_time(std::chrono::sys_seconds{1'234'567'890s})); // 23:31:30 UTC on Friday, 13 February 2009
+#else
+  check(SV("%D='01/01/70'\t%F='1970-01-01'\t%x='01/01/1970'\t%Ex='01/01/1970'\n"),
+        lfmt,
+        std::chrono::zoned_time(std::chrono::sys_seconds{0s})); // 00:00:00 UTC Thursday, 1 January 1970
+
+  check(SV("%D='02/13/09'\t%F='2009-02-13'\t%x='13/02/2009'\t%Ex='13/02/2009'\n"),
+        lfmt,
+        std::chrono::zoned_time(std::chrono::sys_seconds{1'234'567'890s})); // 23:31:30 UTC on Friday, 13 February 2009
+#endif
+
+  // Use supplied locale (ja_JP). This locale has a different alternate.
+#if defined(_WIN32) || defined(__APPLE__) || defined(_AIX) || defined(__FreeBSD__)
+  check(loc,
+        SV("%D='01/01/70'\t%F='1970-01-01'\t%x='1970/01/01'\t%Ex='1970/01/01'\n"),
+        lfmt,
+        std::chrono::zoned_time(std::chrono::sys_seconds{0s})); // 00:00:00 UTC Thursday, 1 January 1970
+
+  check(loc,
+        SV("%D='02/13/09'\t%F='2009-02-13'\t%x='2009/02/13'\t%Ex='2009/02/13'\n"),
+        lfmt,
+        std::chrono::zoned_time(std::chrono::sys_seconds{1'234'567'890s})); // 23:31:30 UTC on Friday, 13 February 2009
+#else  // defined(_WIN32) || defined(__APPLE__) || defined(_AIX) || defined(__FreeBSD__)
+  check(loc,
+        SV("%D='01/01/70'\t%F='1970-01-01'\t%x='1970年01月01日'\t%Ex='昭和45年01月01日'\n"),
+        lfmt,
+        std::chrono::zoned_time(std::chrono::sys_seconds{0s})); // 00:00:00 UTC Thursday, 1 January 1970
+
+  check(loc,
+        SV("%D='02/13/09'\t%F='2009-02-13'\t%x='2009年02月13日'\t%Ex='平成21年02月13日'\n"),
+        lfmt,
+        std::chrono::zoned_time(std::chrono::sys_seconds{1'234'567'890s})); // 23:31:30 UTC on Friday, 13 February 2009
+#endif // defined(_WIN32) || defined(__APPLE__) || defined(_AIX) || defined(__FreeBSD__)
+
+  std::locale::global(std::locale::classic());
+}
+
+template <class CharT>
+static void test_valid_values_time() {
+  using namespace std::literals::chrono_literals;
+
+  constexpr std::basic_string_view<CharT> fmt = SV(
+      "{:"
+      "%%H='%H'%t"
+      "%%OH='%OH'%t"
+      "%%I='%I'%t"
+      "%%OI='%OI'%t"
+      "%%M='%M'%t"
+      "%%OM='%OM'%t"
+      "%%S='%S'%t"
+      "%%OS='%OS'%t"
+      "%%p='%p'%t"
+      "%%R='%R'%t"
+      "%%T='%T'%t"
+      "%%r='%r'%t"
+      "%%X='%X'%t"
+      "%%EX='%EX'%t"
+      "%n}");
+  constexpr std::basic_string_view<CharT> lfmt = SV(
+      "{:L"
+      "%%H='%H'%t"
+      "%%OH='%OH'%t"
+      "%%I='%I'%t"
+      "%%OI='%OI'%t"
+      "%%M='%M'%t"
+      "%%OM='%OM'%t"
+      "%%S='%S'%t"
+      "%%OS='%OS'%t"
+      "%%p='%p'%t"
+      "%%R='%R'%t"
+      "%%T='%T'%t"
+      "%%r='%r'%t"
+      "%%X='%X'%t"
+      "%%EX='%EX'%t"
+      "%n}");
+
+  const std::locale loc(LOCALE_ja_JP_UTF_8);
+  std::locale::global(std::locale(LOCALE_fr_FR_UTF_8));
+
+  // Non localized output using C-locale
+  check(SV("%H='00'\t"
+           "%OH='00'\t"
+           "%I='12'\t"
+           "%OI='12'\t"
+           "%M='00'\t"
+           "%OM='00'\t"
+           "%S='00'\t"
+           "%OS='00'\t"
+           "%p='AM'\t"
+           "%R='00:00'\t"
+           "%T='00:00:00'\t"
+           "%r='12:00:00 AM'\t"
+           "%X='00:00:00'\t"
+           "%EX='00:00:00'\t"
+           "\n"),
+        fmt,
+        std::chrono::zoned_time(std::chrono::sys_seconds{0s})); // 00:00:00 UTC Thursday, 1 January 1970
+
+  check(SV("%H='23'\t"
+           "%OH='23'\t"
+           "%I='11'\t"
+           "%OI='11'\t"
+           "%M='31'\t"
+           "%OM='31'\t"
+           "%S='30.123'\t"
+           "%OS='30.123'\t"
+           "%p='PM'\t"
+           "%R='23:31'\t"
+           "%T='23:31:30.123'\t"
+           "%r='11:31:30 PM'\t"
+           "%X='23:31:30'\t"
+           "%EX='23:31:30'\t"
+           "\n"),
+        fmt,
+        std::chrono::sys_time<std::chrono::milliseconds>(
+            1'234'567'890'123ms)); // 23:31:30 UTC on Friday, 13 February 2009
+  // Use the global locale (fr_FR)
+  check(SV("%H='00'\t"
+           "%OH='00'\t"
+           "%I='12'\t"
+           "%OI='12'\t"
+           "%M='00'\t"
+           "%OM='00'\t"
+           "%S='00'\t"
+           "%OS='00'\t"
+#if defined(_AIX)
+           "%p='AM'\t"
+#else
+           "%p=''\t"
+#endif
+           "%R='00:00'\t"
+           "%T='00:00:00'\t"
+#ifdef _WIN32
+           "%r='00:00:00'\t"
+#elif defined(_AIX)
+           "%r='12:00:00 AM'\t"
+#elif defined(__APPLE__) || defined(__FreeBSD__)
+           "%r=''\t"
+#else
+           "%r='12:00:00 '\t"
+#endif
+           "%X='00:00:00'\t"
+           "%EX='00:00:00'\t"
+           "\n"),
+        lfmt,
+        std::chrono::zoned_time(std::chrono::sys_seconds{0s})); // 00:00:00 UTC Thursday, 1 January 1970
+
+  check(SV("%H='23'\t"
+           "%OH='23'\t"
+           "%I='11'\t"
+           "%OI='11'\t"
+           "%M='31'\t"
+           "%OM='31'\t"
+           "%S='30,123'\t"
+           "%OS='30,123'\t"
+#if defined(_AIX)
+           "%p='PM'\t"
+#else
+           "%p=''\t"
+#endif
+           "%R='23:31'\t"
+           "%T='23:31:30,123'\t"
+#ifdef _WIN32
+           "%r='23:31:30'\t"
+#elif defined(_AIX)
+           "%r='11:31:30 PM'\t"
+#elif defined(__APPLE__) || defined(__FreeBSD__)
+           "%r=''\t"
+#elif defined(_WIN32)
+           "%r='23:31:30 '\t"
+#else
+           "%r='11:31:30 '\t"
+#endif
+           "%X='23:31:30'\t"
+           "%EX='23:31:30'\t"
+           "\n"),
+        lfmt,
+        std::chrono::sys_time<std::chrono::milliseconds>(
+            1'234'567'890'123ms)); // 23:31:30 UTC on Friday, 13 February 2009
+
+  // Use supplied locale (ja_JP). This locale has a different alternate.a
+#if defined(__APPLE__) || defined(_AIX) || defined(_WIN32) || defined(__FreeBSD__)
+  check(loc,
+        SV("%H='00'\t"
+           "%OH='00'\t"
+           "%I='12'\t"
+           "%OI='12'\t"
+           "%M='00'\t"
+           "%OM='00'\t"
+           "%S='00'\t"
+           "%OS='00'\t"
+#  if defined(__APPLE__)
+           "%p='AM'\t"
+#  else
+           "%p='午前'\t"
+#  endif
+           "%R='00:00'\t"
+           "%T='00:00:00'\t"
+#  if defined(__APPLE__) || defined(__FreeBSD__)
+#    if defined(__APPLE__)
+           "%r='12:00:00 AM'\t"
+#    else
+           "%r='12:00:00 午前'\t"
+#    endif
+           "%X='00時00分00秒'\t"
+           "%EX='00時00分00秒'\t"
+#  elif defined(_WIN32)
+           "%r='0:00:00'\t"
+           "%X='0:00:00'\t"
+           "%EX='0:00:00'\t"
+#  else
+           "%r='午前12:00:00'\t"
+           "%X='00:00:00'\t"
+           "%EX='00:00:00'\t"
+#  endif
+           "\n"),
+        lfmt,
+        std::chrono::hh_mm_ss(0s));
+
+  check(loc,
+        SV("%H='23'\t"
+           "%OH='23'\t"
+           "%I='11'\t"
+           "%OI='11'\t"
+           "%M='31'\t"
+           "%OM='31'\t"
+           "%S='30.123'\t"
+           "%OS='30.123'\t"
+#  if defined(__APPLE__)
+           "%p='PM'\t"
+#  else
+           "%p='午後'\t"
+#  endif
+           "%R='23:31'\t"
+           "%T='23:31:30.123'\t"
+#  if defined(__APPLE__) || defined(__FreeBSD__)
+#    if defined(__APPLE__)
+           "%r='11:31:30 PM'\t"
+#    else
+           "%r='11:31:30 午後'\t"
+#    endif
+           "%X='23時31分30秒'\t"
+           "%EX='23時31分30秒'\t"
+#  elif defined(_WIN32)
+           "%r='23:31:30'\t"
+           "%X='23:31:30'\t"
+           "%EX='23:31:30'\t"
+#  else
+           "%r='午後11:31:30'\t"
+           "%X='23:31:30'\t"
+           "%EX='23:31:30'\t"
+#  endif
+           "\n"),
+        lfmt,
+        std::chrono::hh_mm_ss(23h + 31min + 30s + 123ms));
+#else  // defined(__APPLE__) || defined(_AIX) || defined(_WIN32) || defined(__FreeBSD__)
+  check(loc,
+        SV("%H='00'\t"
+           "%OH='〇'\t"
+           "%I='12'\t"
+           "%OI='十二'\t"
+           "%M='00'\t"
+           "%OM='〇'\t"
+           "%S='00'\t"
+           "%OS='〇'\t"
+           "%p='午前'\t"
+           "%R='00:00'\t"
+           "%T='00:00:00'\t"
+           "%r='午前12時00分00秒'\t"
+           "%X='00時00分00秒'\t"
+           "%EX='00時00分00秒'\t"
+           "\n"),
+        lfmt,
+        std::chrono::zoned_time(std::chrono::sys_seconds{0s})); // 00:00:00 UTC Thursday, 1 January 1970
+
+  check(loc,
+        SV("%H='23'\t"
+           "%OH='二十三'\t"
+           "%I='11'\t"
+           "%OI='十一'\t"
+           "%M='31'\t"
+           "%OM='三十一'\t"
+           "%S='30.123'\t"
+           "%OS='三十.123'\t"
+           "%p='午後'\t"
+           "%R='23:31'\t"
+           "%T='23:31:30.123'\t"
+           "%r='午後11時31分30秒'\t"
+           "%X='23時31分30秒'\t"
+           "%EX='23時31分30秒'\t"
+           "\n"),
+        lfmt,
+        std::chrono::sys_time<std::chrono::milliseconds>(
+            1'234'567'890'123ms)); // 23:31:30 UTC on Friday, 13 February 2009
+#endif // defined(__APPLE__) || defined(_AIX) || defined(_WIN32) || defined(__FreeBSD__)
+
+  std::locale::global(std::locale::classic());
+}
+
+template <class CharT>
+static void test_valid_values_date_time() {
+  using namespace std::literals::chrono_literals;
+
+  constexpr std::basic_string_view<CharT> fmt  = SV("{:%%c='%c'%t%%Ec='%Ec'%n}");
+  constexpr std::basic_string_view<CharT> lfmt = SV("{:L%%c='%c'%t%%Ec='%Ec'%n}");
+
+  const std::locale loc(LOCALE_ja_JP_UTF_8);
+  std::locale::global(std::locale(LOCALE_fr_FR_UTF_8));
+
+  // Non localized output using C-locale
+  check(SV("%c='Thu Jan  1 00:00:00 1970'\t%Ec='Thu Jan  1 00:00:00 1970'\n"),
+        fmt,
+        std::chrono::zoned_time(std::chrono::sys_seconds{0s})); // 00:00:00 UTC Thursday, 1 January 1970
+
+  check(SV("%c='Fri Feb 13 23:31:30 2009'\t%Ec='Fri Feb 13 23:31:30 2009'\n"),
+        fmt,
+        std::chrono::zoned_time(std::chrono::sys_seconds{1'234'567'890s})); // 23:31:30 UTC on Friday, 13 February 2009
+
+  // Use the global locale (fr_FR)
+  check(
+// https://sourceware.org/bugzilla/show_bug.cgi?id=24054
+#if defined(__powerpc__) && defined(__linux__)
+      SV("%c='jeu. 01 janv. 1970 00:00:00 UTC'\t%Ec='jeu. 01 janv. 1970 00:00:00 UTC'\n"),
+#elif defined(__GLIBC__) && __GLIBC__ <= 2 && __GLIBC_MINOR__ < 29
+      SV("%c='jeu. 01 janv. 1970 00:00:00 GMT'\t%Ec='jeu. 01 janv. 1970 00:00:00 GMT'\n"),
+#elif defined(_AIX)
+      SV("%c=' 1 janvier 1970 à 00:00:00 UTC'\t%Ec=' 1 janvier 1970 à 00:00:00 UTC'\n"),
+#elif defined(__APPLE__)
+      SV("%c='Jeu  1 jan 00:00:00 1970'\t%Ec='Jeu  1 jan 00:00:00 1970'\n"),
+#elif defined(_WIN32)
+      SV("%c='01/01/1970 00:00:00'\t%Ec='01/01/1970 00:00:00'\n"),
+#elif defined(__FreeBSD__)
+      SV("%c='jeu.  1 janv. 00:00:00 1970'\t%Ec='jeu.  1 janv. 00:00:00 1970'\n"),
+#else
+      SV("%c='jeu. 01 janv. 1970 00:00:00'\t%Ec='jeu. 01 janv. 1970 00:00:00'\n"),
+#endif
+      lfmt,
+      std::chrono::zoned_time(std::chrono::sys_seconds{0s})); // 00:00:00 UTC Thursday, 1 January 1970
+
+  check(
+// https://sourceware.org/bugzilla/show_bug.cgi?id=24054
+#if defined(__powerpc__) && defined(__linux__)
+      SV("%c='ven. 13 févr. 2009 23:31:30 UTC'\t%Ec='ven. 13 févr. 2009 23:31:30 UTC'\n"),
+#elif defined(__GLIBC__) && __GLIBC__ <= 2 && __GLIBC_MINOR__ < 29
+      SV("%c='ven. 13 févr. 2009 23:31:30 GMT'\t%Ec='ven. 13 févr. 2009 23:31:30 GMT'\n"),
+#elif defined(_AIX)
+      SV("%c='13 février 2009 à 23:31:30 UTC'\t%Ec='13 février 2009 à 23:31:30 UTC'\n"),
+#elif defined(__APPLE__)
+      SV("%c='Ven 13 fév 23:31:30 2009'\t%Ec='Ven 13 fév 23:31:30 2009'\n"),
+#elif defined(_WIN32)
+      SV("%c='13/02/2009 23:31:30'\t%Ec='13/02/2009 23:31:30'\n"),
+#elif defined(__FreeBSD__)
+      SV("%c='ven. 13 févr. 23:31:30 2009'\t%Ec='ven. 13 févr. 23:31:30 2009'\n"),
+#else
+      SV("%c='ven. 13 févr. 2009 23:31:30'\t%Ec='ven. 13 févr. 2009 23:31:30'\n"),
+#endif
+      lfmt,
+      std::chrono::zoned_time(std::chrono::sys_seconds{1'234'567'890s})); // 23:31:30 UTC on Friday, 13 February 2009
+
+  // Use supplied locale (ja_JP). This locale has a different alternate.a
+#if defined(__APPLE__) || defined(__FreeBSD__)
+  check(loc,
+        SV("%c='木  1/ 1 00:00:00 1970'\t%Ec='木  1/ 1 00:00:00 1970'\n"),
+        lfmt,
+        std::chrono::zoned_time(std::chrono::sys_seconds{0s})); // 00:00:00 UTC Thursday, 1 January 1970
+  check(loc,
+        SV("%c='金  2/13 23:31:30 2009'\t%Ec='金  2/13 23:31:30 2009'\n"),
+        lfmt,
+        std::chrono::zoned_time(std::chrono::sys_seconds{1'234'567'890s})); // 23:31:30 UTC on Friday, 13 February 2009
+#elif defined(_AIX)                                                         // defined(__APPLE__)|| defined(__FreeBSD__)
+  check(loc,
+        SV("%c='1970年01月 1日 00:00:00 UTC'\t%Ec='1970年01月 1日 00:00:00 UTC'\n"),
+        lfmt,
+        std::chrono::zoned_time(std::chrono::sys_seconds{0s})); // 00:00:00 UTC Thursday, 1 January 1970
+  check(loc,
+        SV("%c='2009年02月13日 23:31:30 UTC'\t%Ec='2009年02月13日 23:31:30 UTC'\n"),
+        lfmt,
+        std::chrono::zoned_time(std::chrono::sys_seconds{1'234'567'890s})); // 23:31:30 UTC on Friday, 13 February 2009
+#elif defined(_WIN32)                                                       // defined(__APPLE__)|| defined(__FreeBSD__)
+  check(loc,
+        SV("%c='1970/01/01 0:00:00'\t%Ec='1970/01/01 0:00:00'\n"),
+        lfmt,
+        std::chrono::sys_seconds(0s)); // 00:00:00 UTC Thursday, 1 January 1970
+  check(loc,
+        SV("%c='2009/02/13 23:31:30'\t%Ec='2009/02/13 23:31:30'\n"),
+        lfmt,
+        std::chrono::sys_seconds(1'234'567'890s)); // 23:31:30 UTC on Friday, 13 February 2009
+#else                                                                       // defined(__APPLE__)|| defined(__FreeBSD__)
+  check(loc,
+        SV("%c='1970年01月01日 00時00分00秒'\t%Ec='昭和45年01月01日 00時00分00秒'\n"),
+        lfmt,
+        std::chrono::zoned_time(std::chrono::sys_seconds{0s})); // 00:00:00 UTC Thursday, 1 January 1970
+
+  check(loc,
+        SV("%c='2009年02月13日 23時31分30秒'\t%Ec='平成21年02月13日 23時31分30秒'\n"),
+        lfmt,
+        std::chrono::zoned_time(std::chrono::sys_seconds{1'234'567'890s})); // 23:31:30 UTC on Friday, 13 February 2009
+#endif                                                                      // defined(__APPLE__)|| defined(__FreeBSD__)
+
+  std::locale::global(std::locale::classic());
+}
+
+template <class CharT>
+static void test_valid_values_time_zone() {
+  using namespace std::literals::chrono_literals;
+
+  constexpr std::basic_string_view<CharT> fmt  = SV("{:%%z='%z'%t%%Ez='%Ez'%t%%Oz='%Oz'%t%%Z='%Z'%n}");
+  constexpr std::basic_string_view<CharT> lfmt = SV("{:L%%z='%z'%t%%Ez='%Ez'%t%%Oz='%Oz'%t%%Z='%Z'%n}");
+
+  const std::locale loc(LOCALE_ja_JP_UTF_8);
+  std::locale::global(std::locale(LOCALE_fr_FR_UTF_8));
+
+  // Non localized output using C-locale
+  check(SV("%z='+0000'\t%Ez='+00:00'\t%Oz='+00:00'\t%Z='UTC'\n"),
+        fmt,
+        std::chrono::zoned_time(std::chrono::sys_seconds{0s})); // 00:00:00 UTC Thursday, 1 January 1970
+
+  // Use the global locale (fr_FR)
+  check(SV("%z='+0000'\t%Ez='+00:00'\t%Oz='+00:00'\t%Z='UTC'\n"),
+        lfmt,
+        std::chrono::zoned_time(std::chrono::sys_seconds{0s})); // 00:00:00 UTC Thursday, 1 January 1970
+
+  // Use supplied locale (ja_JP).
+  check(loc,
+        SV("%z='+0000'\t%Ez='+00:00'\t%Oz='+00:00'\t%Z='UTC'\n"),
+        lfmt,
+        std::chrono::zoned_time(std::chrono::sys_seconds{0s})); // 00:00:00 UTC Thursday, 1 January 1970
+
+  std::locale::global(std::locale::classic());
+}
+
+template <class CharT>
+static void test_valid_values() {
+  test_valid_values_year<CharT>();
+  test_valid_values_month<CharT>();
+  test_valid_values_day<CharT>();
+  test_valid_values_weekday<CharT>();
+  test_valid_values_day_of_year<CharT>();
+  test_valid_values_week<CharT>();
+  test_valid_values_iso_8601_week<CharT>();
+  test_valid_values_date<CharT>();
+  test_valid_values_time<CharT>();
+  test_valid_values_date_time<CharT>();
+  test_valid_values_time_zone<CharT>();
+}
+
+template <class CharT>
+static void test() {
+  test_no_chrono_specs<CharT>();
+  test_valid_values<CharT>();
+
+  check_invalid_types<CharT>(
+      {SV("a"),  SV("A"),  SV("b"),  SV("B"),  SV("c"),  SV("C"),  SV("d"),  SV("D"),  SV("e"),  SV("F"),  SV("g"),
+       SV("G"),  SV("h"),  SV("H"),  SV("I"),  SV("j"),  SV("m"),  SV("M"),  SV("p"),  SV("r"),  SV("R"),  SV("S"),
+       SV("T"),  SV("u"),  SV("U"),  SV("V"),  SV("w"),  SV("W"),  SV("x"),  SV("X"),  SV("y"),  SV("Y"),  SV("z"),
+       SV("Z"),  SV("Ec"), SV("EC"), SV("Ex"), SV("EX"), SV("Ey"), SV("EY"), SV("Ez"), SV("Od"), SV("Oe"), SV("OH"),
+       SV("OI"), SV("Om"), SV("OM"), SV("OS"), SV("Ou"), SV("OU"), SV("OV"), SV("Ow"), SV("OW"), SV("Oy"), SV("Oz")},
+      std::chrono::zoned_time{});
+}
+
+int main(int, char**) {
+  test<char>();
+
+#ifndef TEST_HAS_NO_WIDE_CHARACTERS
+  test<wchar_t>();
+#endif
+
+  return 0;
+}
diff --git a/libcxx/test/std/time/time.zone/time.zone.zonedtime/test_offset_time_zone.h b/libcxx/test/std/time/time.zone/time.zone.zonedtime/test_offset_time_zone.h
index c137049bde8aa..e9262c5d95db1 100644
--- a/libcxx/test/std/time/time.zone/time.zone.zonedtime/test_offset_time_zone.h
+++ b/libcxx/test/std/time/time.zone/time.zone.zonedtime/test_offset_time_zone.h
@@ -13,6 +13,7 @@
 #include <cassert>
 #include <charconv>
 #include <chrono>
+#include <format>
 #include <string_view>
 #include <type_traits>
 
@@ -42,6 +43,8 @@ class offset_time_zone {
 
   offset_time_zone* operator->() { return this; }
 
+  const offset_time_zone* operator->() const { return this; }
+
   template <class Duration>
   std::chrono::sys_time<std::common_type_t<Duration, std::chrono::seconds>>
   to_sys(const std::chrono::local_time<Duration>& local) const {
@@ -49,6 +52,22 @@ class offset_time_zone {
         local.time_since_epoch() + offset_};
   }
 
+  template <class Duration>
+  std::chrono::local_time<std::common_type_t<Duration, std::chrono::seconds>>
+  to_local(const std::chrono::sys_time<Duration>& sys) const {
+    return std::chrono::local_time<std::common_type_t<Duration, std::chrono::seconds>>{
+        sys.time_since_epoch() - offset_};
+  }
+
+  template <class Duration>
+  std::chrono::sys_info get_info(const std::chrono::sys_time<Duration>&) const {
+    return {std::chrono::sys_seconds::min(),
+            std::chrono::sys_seconds::max(),
+            offset_,
+            std::chrono::minutes{0},
+            std::format("{:+03d}s", offset_.count())};
+  }
+
 private:
   std::chrono::seconds offset_;
 };
diff --git a/libcxx/test/std/time/time.zone/time.zone.zonedtime/time.zone.zonedtime.nonmembers/ostream.pass.cpp b/libcxx/test/std/time/time.zone/time.zone.zonedtime/time.zone.zonedtime.nonmembers/ostream.pass.cpp
new file mode 100644
index 0000000000000..06131d66c0f5c
--- /dev/null
+++ b/libcxx/test/std/time/time.zone/time.zone.zonedtime/time.zone.zonedtime.nonmembers/ostream.pass.cpp
@@ -0,0 +1,351 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// UNSUPPORTED: c++03, c++11, c++14, c++17
+// UNSUPPORTED: no-filesystem, no-localization, no-tzdb
+
+// TODO FMT This test should not require std::to_chars(floating-point)
+// XFAIL: availability-fp_to_chars-missing
+
+// XFAIL: libcpp-has-no-experimental-tzdb
+
+// REQUIRES: locale.fr_FR.UTF-8
+// REQUIRES: locale.ja_JP.UTF-8
+
+// <chrono>
+
+// template<class charT, class traits, class Duration, class TimeZonePtr>
+// basic_ostream<charT, traits>&
+// operator<<(basic_ostream<charT, traits>& os,
+//            const zoned_time<Duration, TimeZonePtr>& t);
+
+#include <chrono>
+#include <cassert>
+#include <sstream>
+
+#include "assert_macros.h"
+#include "concat_macros.h"
+#include "make_string.h"
+#include "platform_support.h" // locale name macros
+#include "test_macros.h"
+#include "../test_offset_time_zone.h"
+
+#define SV(S) MAKE_STRING_VIEW(CharT, S)
+
+#define TEST_EQUAL(OUT, EXPECTED)                                                                                      \
+  TEST_REQUIRE(OUT == EXPECTED,                                                                                        \
+               TEST_WRITE_CONCATENATED(                                                                                \
+                   "\nExpression      ", #OUT, "\nExpected output ", EXPECTED, "\nActual output   ", OUT, '\n'));
+
+template <class CharT, class Duration, class TimeZonePtr>
+static std::basic_string<CharT> stream_c_locale(std::chrono::zoned_time<Duration, TimeZonePtr> time_point) {
+  std::basic_stringstream<CharT> sstr;
+  sstr << time_point;
+  return sstr.str();
+}
+
+template <class CharT, class Duration, class TimeZonePtr>
+static std::basic_string<CharT> stream_fr_FR_locale(std::chrono::zoned_time<Duration, TimeZonePtr> time_point) {
+  std::basic_stringstream<CharT> sstr;
+  const std::locale locale(LOCALE_fr_FR_UTF_8);
+  sstr.imbue(locale);
+  sstr << time_point;
+  return sstr.str();
+}
+
+template <class CharT, class Duration, class TimeZonePtr>
+static std::basic_string<CharT> stream_ja_JP_locale(std::chrono::zoned_time<Duration, TimeZonePtr> time_point) {
+  std::basic_stringstream<CharT> sstr;
+  const std::locale locale(LOCALE_ja_JP_UTF_8);
+  sstr.imbue(locale);
+  sstr << time_point;
+  return sstr.str();
+}
+
+template <class CharT>
+static void test_c() {
+  using namespace std::literals::chrono_literals;
+
+  { //  Different durations
+    TEST_EQUAL(stream_c_locale<CharT>(
+                   std::chrono::zoned_time("Etc/GMT-1", std::chrono::sys_time<std::chrono::nanoseconds>{42ns})),
+               SV("1970-01-01 01:00:00.000000042 +01"));
+
+    TEST_EQUAL(stream_c_locale<CharT>(
+                   std::chrono::zoned_time("Etc/GMT-1", std::chrono::sys_time<std::chrono::microseconds>{42us})),
+               SV("1970-01-01 01:00:00.000042 +01"));
+
+    TEST_EQUAL(stream_c_locale<CharT>(
+                   std::chrono::zoned_time("Etc/GMT-1", std::chrono::sys_time<std::chrono::milliseconds>{42ms})),
+               SV("1970-01-01 01:00:00.042 +01"));
+
+    TEST_EQUAL(
+        stream_c_locale<CharT>(std::chrono::zoned_time("Etc/GMT-1", std::chrono::sys_time<std::chrono::seconds>{42s})),
+        SV("1970-01-01 01:00:42 +01"));
+
+    TEST_EQUAL(stream_c_locale<CharT>(std::chrono::zoned_time(
+                   "Etc/GMT-1", std::chrono::sys_time<std::chrono::days>{std::chrono::days{42}})),
+               SV("1970-02-12 01:00:00 +01"));
+
+    TEST_EQUAL(stream_c_locale<CharT>(std::chrono::zoned_time(
+                   "Etc/GMT-1", std::chrono::sys_time<std::chrono::weeks>{std::chrono::weeks{42}})),
+               SV("1970-10-22 01:00:00 +01"));
+  }
+
+  { // Daylight saving time switches
+    // Pick an historic date where it's well known what the time zone rules were.
+    // This makes it unlikely updates to the database change these rules.
+
+    // Z Europe/Berlin 0:53:28 - LMT 1893 Ap
+    // ...
+    // 1 DE CE%sT 1980
+    // 1 E CE%sT
+    //
+    // ...
+    // R E 1979 1995 - S lastSu 1u 0 -
+    // R E 1981 ma - Mar lastSu 1u 1 S
+
+    // Pick an historic date where it's well known what the time zone rules were.
+    // This makes it unlikely updates to the database change these rules.
+
+    // Start of daylight saving time
+    TEST_EQUAL(stream_c_locale<CharT>(std::chrono::zoned_time(
+                   "Europe/Berlin", std::chrono::sys_days{std::chrono::March / 30 / 1986} + 0h + 59min + 59s)),
+               SV("1986-03-30 01:59:59 CET"));
+
+    TEST_EQUAL(stream_c_locale<CharT>(std::chrono::zoned_time(
+                   "Europe/Berlin", std::chrono::sys_days{std::chrono::March / 30 / 1986} + 1h)),
+               SV("1986-03-30 03:00:00 CEST"));
+
+    // End of daylight saving time
+    TEST_EQUAL(stream_c_locale<CharT>(std::chrono::zoned_time(
+                   "Europe/Berlin", std::chrono::sys_days{std::chrono::September / 28 / 1986} + 0h + 59min + 59s)),
+               SV("1986-09-28 02:59:59 CEST"));
+
+    TEST_EQUAL(stream_c_locale<CharT>(std::chrono::zoned_time(
+                   "Europe/Berlin", std::chrono::sys_days{std::chrono::September / 28 / 1986} + 1h)),
+               SV("1986-09-28 02:00:00 CET"));
+
+    TEST_EQUAL(stream_c_locale<CharT>(std::chrono::zoned_time(
+                   "Europe/Berlin", std::chrono::sys_days{std::chrono::September / 28 / 1986} + 1h + 59min + 59s)),
+               SV("1986-09-28 02:59:59 CET"));
+
+    TEST_EQUAL(stream_c_locale<CharT>(std::chrono::zoned_time(
+                   "Europe/Berlin", std::chrono::sys_days{std::chrono::September / 28 / 1986} + 2h)),
+               SV("1986-09-28 03:00:00 CET"));
+  }
+
+  { // offset pointer
+    TEST_EQUAL(stream_c_locale<CharT>(std::chrono::zoned_time(
+                   offset_time_zone<offset_time_zone_flags::none>{}, std::chrono::sys_seconds{})),
+               SV("1970-01-01 00:00:00 +00s"));
+
+    TEST_EQUAL(stream_c_locale<CharT>(std::chrono::zoned_time(
+                   offset_time_zone<offset_time_zone_flags::none>{"42"}, std::chrono::sys_seconds{})),
+               SV("1969-12-31 23:59:18 +42s"));
+
+    TEST_EQUAL(stream_c_locale<CharT>(std::chrono::zoned_time(
+                   offset_time_zone<offset_time_zone_flags::none>{"-42"}, std::chrono::sys_seconds{})),
+               SV("1970-01-01 00:00:42 -42s"));
+  }
+}
+
+template <class CharT>
+static void test_fr_FR() {
+  using namespace std::literals::chrono_literals;
+
+  { //  Different durations
+
+    TEST_EQUAL(stream_fr_FR_locale<CharT>(
+                   std::chrono::zoned_time("Etc/GMT-1", std::chrono::sys_time<std::chrono::nanoseconds>{42ns})),
+               SV("1970-01-01 01:00:00,000000042 +01"));
+
+    TEST_EQUAL(stream_fr_FR_locale<CharT>(
+                   std::chrono::zoned_time("Etc/GMT-1", std::chrono::sys_time<std::chrono::microseconds>{42us})),
+               SV("1970-01-01 01:00:00,000042 +01"));
+
+    TEST_EQUAL(stream_fr_FR_locale<CharT>(
+                   std::chrono::zoned_time("Etc/GMT-1", std::chrono::sys_time<std::chrono::milliseconds>{42ms})),
+               SV("1970-01-01 01:00:00,042 +01"));
+
+    TEST_EQUAL(stream_fr_FR_locale<CharT>(
+                   std::chrono::zoned_time("Etc/GMT-1", std::chrono::sys_time<std::chrono::seconds>{42s})),
+               SV("1970-01-01 01:00:42 +01"));
+
+    TEST_EQUAL(stream_fr_FR_locale<CharT>(std::chrono::zoned_time(
+                   "Etc/GMT-1", std::chrono::sys_time<std::chrono::days>{std::chrono::days{42}})),
+               SV("1970-02-12 01:00:00 +01"));
+
+    TEST_EQUAL(stream_fr_FR_locale<CharT>(std::chrono::zoned_time(
+                   "Etc/GMT-1", std::chrono::sys_time<std::chrono::weeks>{std::chrono::weeks{42}})),
+               SV("1970-10-22 01:00:00 +01"));
+  }
+
+  { // Daylight saving time switches
+    // Pick an historic date where it's well known what the time zone rules were.
+    // This makes it unlikely updates to the database change these rules.
+
+    // Z Europe/Berlin 0:53:28 - LMT 1893 Ap
+    // ...
+    // 1 DE CE%sT 1980
+    // 1 E CE%sT
+    //
+    // ...
+    // R E 1979 1995 - S lastSu 1u 0 -
+    // R E 1981 ma - Mar lastSu 1u 1 S
+
+    // Pick an historic date where it's well known what the time zone rules were.
+    // This makes it unlikely updates to the database change these rules.
+
+    // Start of daylight saving time
+    TEST_EQUAL(stream_fr_FR_locale<CharT>(std::chrono::zoned_time(
+                   "Europe/Berlin", std::chrono::sys_days{std::chrono::March / 30 / 1986} + 0h + 59min + 59s)),
+               SV("1986-03-30 01:59:59 CET"));
+
+    TEST_EQUAL(stream_fr_FR_locale<CharT>(std::chrono::zoned_time(
+                   "Europe/Berlin", std::chrono::sys_days{std::chrono::March / 30 / 1986} + 1h)),
+               SV("1986-03-30 03:00:00 CEST"));
+
+    // End of daylight saving time
+    TEST_EQUAL(stream_fr_FR_locale<CharT>(std::chrono::zoned_time(
+                   "Europe/Berlin", std::chrono::sys_days{std::chrono::September / 28 / 1986} + 0h + 59min + 59s)),
+               SV("1986-09-28 02:59:59 CEST"));
+
+    TEST_EQUAL(stream_fr_FR_locale<CharT>(std::chrono::zoned_time(
+                   "Europe/Berlin", std::chrono::sys_days{std::chrono::September / 28 / 1986} + 1h)),
+               SV("1986-09-28 02:00:00 CET"));
+
+    TEST_EQUAL(stream_fr_FR_locale<CharT>(std::chrono::zoned_time(
+                   "Europe/Berlin", std::chrono::sys_days{std::chrono::September / 28 / 1986} + 1h + 59min + 59s)),
+               SV("1986-09-28 02:59:59 CET"));
+
+    TEST_EQUAL(stream_fr_FR_locale<CharT>(std::chrono::zoned_time(
+                   "Europe/Berlin", std::chrono::sys_days{std::chrono::September / 28 / 1986} + 2h)),
+               SV("1986-09-28 03:00:00 CET"));
+  }
+
+  { // offset pointer
+    TEST_EQUAL(stream_fr_FR_locale<CharT>(std::chrono::zoned_time(
+                   offset_time_zone<offset_time_zone_flags::none>{}, std::chrono::sys_seconds{})),
+               SV("1970-01-01 00:00:00 +00s"));
+
+    TEST_EQUAL(stream_fr_FR_locale<CharT>(std::chrono::zoned_time(
+                   offset_time_zone<offset_time_zone_flags::none>{"42"}, std::chrono::sys_seconds{})),
+               SV("1969-12-31 23:59:18 +42s"));
+
+    TEST_EQUAL(stream_fr_FR_locale<CharT>(std::chrono::zoned_time(
+                   offset_time_zone<offset_time_zone_flags::none>{"-42"}, std::chrono::sys_seconds{})),
+               SV("1970-01-01 00:00:42 -42s"));
+  }
+}
+
+template <class CharT>
+static void test_ja_JP() {
+  using namespace std::literals::chrono_literals;
+
+  { //  Different durations
+
+    TEST_EQUAL(stream_ja_JP_locale<CharT>(
+                   std::chrono::zoned_time("Etc/GMT-1", std::chrono::sys_time<std::chrono::nanoseconds>{42ns})),
+               SV("1970-01-01 01:00:00.000000042 +01"));
+
+    TEST_EQUAL(stream_ja_JP_locale<CharT>(
+                   std::chrono::zoned_time("Etc/GMT-1", std::chrono::sys_time<std::chrono::microseconds>{42us})),
+               SV("1970-01-01 01:00:00.000042 +01"));
+
+    TEST_EQUAL(stream_ja_JP_locale<CharT>(
+                   std::chrono::zoned_time("Etc/GMT-1", std::chrono::sys_time<std::chrono::milliseconds>{42ms})),
+               SV("1970-01-01 01:00:00.042 +01"));
+
+    TEST_EQUAL(stream_ja_JP_locale<CharT>(
+                   std::chrono::zoned_time("Etc/GMT-1", std::chrono::sys_time<std::chrono::seconds>{42s})),
+               SV("1970-01-01 01:00:42 +01"));
+
+    TEST_EQUAL(stream_ja_JP_locale<CharT>(std::chrono::zoned_time(
+                   "Etc/GMT-1", std::chrono::sys_time<std::chrono::days>{std::chrono::days{42}})),
+               SV("1970-02-12 01:00:00 +01"));
+
+    TEST_EQUAL(stream_ja_JP_locale<CharT>(std::chrono::zoned_time(
+                   "Etc/GMT-1", std::chrono::sys_time<std::chrono::weeks>{std::chrono::weeks{42}})),
+               SV("1970-10-22 01:00:00 +01"));
+  }
+
+  { // Daylight saving time switches
+    // Pick an historic date where it's well known what the time zone rules were.
+    // This makes it unlikely updates to the database change these rules.
+
+    // Z Europe/Berlin 0:53:28 - LMT 1893 Ap
+    // ...
+    // 1 DE CE%sT 1980
+    // 1 E CE%sT
+    //
+    // ...
+    // R E 1979 1995 - S lastSu 1u 0 -
+    // R E 1981 ma - Mar lastSu 1u 1 S
+
+    // Pick an historic date where it's well known what the time zone rules were.
+    // This makes it unlikely updates to the database change these rules.
+
+    // Start of daylight saving time
+    TEST_EQUAL(stream_ja_JP_locale<CharT>(std::chrono::zoned_time(
+                   "Europe/Berlin", std::chrono::sys_days{std::chrono::March / 30 / 1986} + 0h + 59min + 59s)),
+               SV("1986-03-30 01:59:59 CET"));
+
+    TEST_EQUAL(stream_ja_JP_locale<CharT>(std::chrono::zoned_time(
+                   "Europe/Berlin", std::chrono::sys_days{std::chrono::March / 30 / 1986} + 1h)),
+               SV("1986-03-30 03:00:00 CEST"));
+
+    // End of daylight saving time
+    TEST_EQUAL(stream_ja_JP_locale<CharT>(std::chrono::zoned_time(
+                   "Europe/Berlin", std::chrono::sys_days{std::chrono::September / 28 / 1986} + 0h + 59min + 59s)),
+               SV("1986-09-28 02:59:59 CEST"));
+
+    TEST_EQUAL(stream_ja_JP_locale<CharT>(std::chrono::zoned_time(
+                   "Europe/Berlin", std::chrono::sys_days{std::chrono::September / 28 / 1986} + 1h)),
+               SV("1986-09-28 02:00:00 CET"));
+
+    TEST_EQUAL(stream_ja_JP_locale<CharT>(std::chrono::zoned_time(
+                   "Europe/Berlin", std::chrono::sys_days{std::chrono::September / 28 / 1986} + 1h + 59min + 59s)),
+               SV("1986-09-28 02:59:59 CET"));
+
+    TEST_EQUAL(stream_ja_JP_locale<CharT>(std::chrono::zoned_time(
+                   "Europe/Berlin", std::chrono::sys_days{std::chrono::September / 28 / 1986} + 2h)),
+               SV("1986-09-28 03:00:00 CET"));
+  }
+
+  { // offset pointer
+    TEST_EQUAL(stream_ja_JP_locale<CharT>(std::chrono::zoned_time(
+                   offset_time_zone<offset_time_zone_flags::none>{}, std::chrono::sys_seconds{})),
+               SV("1970-01-01 00:00:00 +00s"));
+
+    TEST_EQUAL(stream_ja_JP_locale<CharT>(std::chrono::zoned_time(
+                   offset_time_zone<offset_time_zone_flags::none>{"42"}, std::chrono::sys_seconds{})),
+               SV("1969-12-31 23:59:18 +42s"));
+
+    TEST_EQUAL(stream_ja_JP_locale<CharT>(std::chrono::zoned_time(
+                   offset_time_zone<offset_time_zone_flags::none>{"-42"}, std::chrono::sys_seconds{})),
+               SV("1970-01-01 00:00:42 -42s"));
+  }
+}
+
+template <class CharT>
+static void test() {
+  test_c<CharT>();
+  test_fr_FR<CharT>();
+  test_ja_JP<CharT>();
+}
+
+int main(int, char**) {
+  test<char>();
+
+#ifndef TEST_HAS_NO_WIDE_CHARACTERS
+  test<wchar_t>();
+#endif
+
+  return 0;
+}
diff --git a/libcxx/utils/libcxx/test/modules.py b/libcxx/utils/libcxx/test/modules.py
index aab7651c7bb03..b7758dc9a41ee 100644
--- a/libcxx/utils/libcxx/test/modules.py
+++ b/libcxx/utils/libcxx/test/modules.py
@@ -76,6 +76,13 @@
 # This declaration is in the ostream header.
 ExtraDeclarations["system_error"] = ["std::operator<<"]
 
+# TODO MODULES avoid this work-around
+# This is a work-around for the special math functions. They are declared in
+# __math/special_functions.h. Adding this as an ExtraHeader works for the std
+# module. However these functions are special; they are not available in the
+# global namespace.
+ExtraDeclarations["cmath"] = ["std::hermite", "std::hermitef", "std::hermitel"]
+
 ### ExtraHeader
 
 # Adds extra headers file to scan
diff --git a/lld/ELF/ScriptLexer.cpp b/lld/ELF/ScriptLexer.cpp
index 14f39ed10e17c..c8c02ab0f3e09 100644
--- a/lld/ELF/ScriptLexer.cpp
+++ b/lld/ELF/ScriptLexer.cpp
@@ -272,20 +272,10 @@ StringRef ScriptLexer::peek() {
   return tok;
 }
 
-StringRef ScriptLexer::peek2() {
-  skip();
-  StringRef tok = next();
-  if (errorCount())
-    return "";
-  pos = pos - 2;
-  return tok;
-}
-
 bool ScriptLexer::consume(StringRef tok) {
-  if (peek() == tok) {
-    skip();
+  if (next() == tok)
     return true;
-  }
+  --pos;
   return false;
 }
 
diff --git a/lld/ELF/ScriptLexer.h b/lld/ELF/ScriptLexer.h
index 7919e493fa28b..d5393818ed553 100644
--- a/lld/ELF/ScriptLexer.h
+++ b/lld/ELF/ScriptLexer.h
@@ -26,7 +26,6 @@ class ScriptLexer {
   bool atEOF();
   StringRef next();
   StringRef peek();
-  StringRef peek2();
   void skip();
   bool consume(StringRef tok);
   void expect(StringRef expect);
diff --git a/lld/ELF/ScriptParser.cpp b/lld/ELF/ScriptParser.cpp
index 47a94c29ea496..49aa7e6374905 100644
--- a/lld/ELF/ScriptParser.cpp
+++ b/lld/ELF/ScriptParser.cpp
@@ -92,7 +92,7 @@ class ScriptParser final : ScriptLexer {
   SymbolAssignment *readSymbolAssignment(StringRef name);
   ByteCommand *readByteCommand(StringRef tok);
   std::array<uint8_t, 4> readFill();
-  bool readSectionDirective(OutputSection *cmd, StringRef tok1, StringRef tok2);
+  bool readSectionDirective(OutputSection *cmd, StringRef tok);
   void readSectionAddressType(OutputSection *cmd);
   OutputDesc *readOverlaySectionDescription();
   OutputDesc *readOutputSectionDescription(StringRef outSec);
@@ -873,16 +873,11 @@ constexpr std::pair<const char *, unsigned> typeMap[] = {
 // Tries to read the special directive for an output section definition which
 // can be one of following: "(NOLOAD)", "(COPY)", "(INFO)", "(OVERLAY)", and
 // "(TYPE=<value>)".
-// Tok1 and Tok2 are next 2 tokens peeked. See comment for
-// readSectionAddressType below.
-bool ScriptParser::readSectionDirective(OutputSection *cmd, StringRef tok1, StringRef tok2) {
-  if (tok1 != "(")
-    return false;
-  if (tok2 != "NOLOAD" && tok2 != "COPY" && tok2 != "INFO" &&
-      tok2 != "OVERLAY" && tok2 != "TYPE")
+bool ScriptParser::readSectionDirective(OutputSection *cmd, StringRef tok) {
+  if (tok != "NOLOAD" && tok != "COPY" && tok != "INFO" && tok != "OVERLAY" &&
+      tok != "TYPE")
     return false;
 
-  expect("(");
   if (consume("NOLOAD")) {
     cmd->type = SHT_NOBITS;
     cmd->typeIsSet = true;
@@ -921,16 +916,23 @@ bool ScriptParser::readSectionDirective(OutputSection *cmd, StringRef tok1, Stri
 // https://sourceware.org/binutils/docs/ld/Output-Section-Address.html
 // https://sourceware.org/binutils/docs/ld/Output-Section-Type.html
 void ScriptParser::readSectionAddressType(OutputSection *cmd) {
-  // Temporarily set inExpr to support TYPE=<value> without spaces.
-  bool saved = std::exchange(inExpr, true);
-  bool isDirective = readSectionDirective(cmd, peek(), peek2());
-  inExpr = saved;
-  if (isDirective)
-    return;
+  if (consume("(")) {
+    // Temporarily set inExpr to support TYPE=<value> without spaces.
+    SaveAndRestore saved(inExpr, true);
+    if (readSectionDirective(cmd, peek()))
+      return;
+    cmd->addrExpr = readExpr();
+    expect(")");
+  } else {
+    cmd->addrExpr = readExpr();
+  }
 
-  cmd->addrExpr = readExpr();
-  if (peek() == "(" && !readSectionDirective(cmd, "(", peek2()))
-    setError("unknown section directive: " + peek2());
+  if (consume("(")) {
+    SaveAndRestore saved(inExpr, true);
+    StringRef tok = peek();
+    if (!readSectionDirective(cmd, tok))
+      setError("unknown section directive: " + tok);
+  }
 }
 
 static Expr checkAlignment(Expr e, std::string &loc) {
@@ -1180,10 +1182,8 @@ SymbolAssignment *ScriptParser::readSymbolAssignment(StringRef name) {
 Expr ScriptParser::readExpr() {
   // Our lexer is context-aware. Set the in-expression bit so that
   // they apply different tokenization rules.
-  bool orig = inExpr;
-  inExpr = true;
+  SaveAndRestore saved(inExpr, true);
   Expr e = readExpr1(readPrimary(), 0);
-  inExpr = orig;
   return e;
 }
 
@@ -1249,9 +1249,9 @@ Expr ScriptParser::readExpr1(Expr lhs, int minPrec) {
     StringRef op1 = peek();
     if (precedence(op1) < minPrec)
       break;
-    if (consume("?"))
-      return readTernary(lhs);
     skip();
+    if (op1 == "?")
+      return readTernary(lhs);
     Expr rhs = readPrimary();
 
     // Evaluate the remaining part of the expression first if the
diff --git a/lld/MachO/ObjC.cpp b/lld/MachO/ObjC.cpp
index 4a6f99654ba13..9c056f40aa943 100644
--- a/lld/MachO/ObjC.cpp
+++ b/lld/MachO/ObjC.cpp
@@ -1303,12 +1303,16 @@ void ObjcCategoryMerger::eraseMergedCategories() {
         continue;
 
       eraseISec(catInfo.catBodyIsec);
-      // We can't erase 'catLayout.nameOffset' for Swift categories because the
-      // name will be referenced for generating relative offsets
-      // See usages of 'l_.str.11.SimpleClass' in objc-category-merging-swift.s
+
+      // We can't erase 'catLayout.nameOffset' for either Swift or ObjC
+      //   categories because the name will sometimes also be used for other
+      //   purposes.
+      // For Swift, see usages of 'l_.str.11.SimpleClass' in
+      //   objc-category-merging-swift.s
+      // For ObjC, see usages of 'l_OBJC_CLASS_NAME_.1' in
+      //   objc-category-merging-erase-objc-name-test.s
       // TODO: handle the above in a smarter way
-      if (catInfo.sourceLanguage != SourceLanguage::Swift)
-        tryEraseDefinedAtIsecOffset(catInfo.catBodyIsec, catLayout.nameOffset);
+
       tryEraseDefinedAtIsecOffset(catInfo.catBodyIsec,
                                   catLayout.instanceMethodsOffset);
       tryEraseDefinedAtIsecOffset(catInfo.catBodyIsec,
diff --git a/lld/test/ELF/defsym.s b/lld/test/ELF/defsym.s
index 0168ce854cc15..fed937ffc1c9c 100644
--- a/lld/test/ELF/defsym.s
+++ b/lld/test/ELF/defsym.s
@@ -11,8 +11,7 @@
 
 ## Check we are reporting the error correctly and don't crash
 ## when handling the second --defsym.
-# RUN: not ld.lld -o /dev/null %t.o --defsym ERR+ \
-#        --defsym foo2=foo1 2>&1 | FileCheck %s --check-prefix=ERR
+# RUN: not ld.lld -o /dev/null %t.o --defsym ERR+ --defsym foo2=foo1 2>&1 | FileCheck %s --check-prefix=ERR
 # ERR: error: --defsym: syntax error: ERR+
 
 # CHECK-DAG: 0000000000000123     0 NOTYPE  GLOBAL DEFAULT   ABS foo1
@@ -27,7 +26,7 @@
 # RUN: ld.lld -o %t %t.o --defsym=foo2=1
 # RUN: llvm-readelf -s %t | FileCheck %s --check-prefix=ABS
 
-# ABS: 0000000000000123     0 NOTYPE  GLOBAL DEFAULT   ABS foo2
+# ABS: 0000000000000001     0 NOTYPE  GLOBAL DEFAULT   ABS foo2
 
 # RUN: ld.lld -o %t %t.o --defsym=foo2=foo1+5
 # RUN: llvm-readelf -s %t | FileCheck %s --check-prefix=EXPR
diff --git a/lld/test/ELF/linkerscript/custom-section-type.s b/lld/test/ELF/linkerscript/custom-section-type.s
index 8ca0a4db325bd..2add3a52f8117 100644
--- a/lld/test/ELF/linkerscript/custom-section-type.s
+++ b/lld/test/ELF/linkerscript/custom-section-type.s
@@ -67,7 +67,7 @@ SECTIONS {
   nobits ( TYPE=SHT_NOBITS) : { BYTE(8) }
   init_array (TYPE=SHT_INIT_ARRAY ) : { QUAD(myinit) }
   fini_array (TYPE=SHT_FINI_ARRAY) : { QUAD(15) }
-  preinit_array (TYPE=SHT_PREINIT_ARRAY) : { QUAD(16) }
+  preinit_array . (TYPE=SHT_PREINIT_ARRAY) : { QUAD(16) }
   group (TYPE=17) : { LONG(17) }
   expr (TYPE=0x41+1) : { BYTE(0x42) *(expr) }
 }
diff --git a/lld/test/MachO/objc-category-merging-erase-objc-name-test.s b/lld/test/MachO/objc-category-merging-erase-objc-name-test.s
new file mode 100644
index 0000000000000..01c5c4fd9e0c3
--- /dev/null
+++ b/lld/test/MachO/objc-category-merging-erase-objc-name-test.s
@@ -0,0 +1,306 @@
+; REQUIRES: aarch64
+
+; Here we test that if we defined a protocol MyTestProtocol and also a category MyTestProtocol
+; then when merging the category into the base class (and deleting the category), we don't
+; delete the 'MyTestProtocol' name
+
+; RUN: llvm-mc -filetype=obj -triple=arm64-apple-macos -o erase-objc-name.o %s
+; RUN: %lld -arch arm64 -dylib -o erase-objc-name.dylib erase-objc-name.o -objc_category_merging
+; RUN: llvm-objdump --objc-meta-data --macho erase-objc-name.dylib | FileCheck %s --check-prefixes=MERGE_CATS
+
+; === Check merge categories enabled ===
+; Check that the original categories are not there
+; MERGE_CATS-NOT: __OBJC_$_CATEGORY_MyBaseClass_$_Category01
+; MERGE_CATS-NOT: __OBJC_$_CATEGORY_MyBaseClass_$_Category02
+
+; Check that we get the expected output - most importantly that the protocol is named `MyTestProtocol`
+; MERGE_CATS:        Contents of (__DATA_CONST,__objc_classlist) section
+; MERGE_CATS-NEXT:   _OBJC_CLASS_$_MyBaseClass
+; MERGE_CATS-NEXT:            isa {{.*}} _OBJC_METACLASS_$_MyBaseClass
+; MERGE_CATS-NEXT:     superclass {{.*}}
+; MERGE_CATS-NEXT:          cache {{.*}}
+; MERGE_CATS-NEXT:         vtable {{.*}}
+; MERGE_CATS-NEXT:           data {{.*}} (struct class_ro_t *)
+; MERGE_CATS-NEXT:                     flags {{.*}} RO_ROOT
+; MERGE_CATS-NEXT:             instanceStart 0
+; MERGE_CATS-NEXT:              instanceSize 0
+; MERGE_CATS-NEXT:                  reserved {{.*}}
+; MERGE_CATS-NEXT:                ivarLayout {{.*}}
+; MERGE_CATS-NEXT:                      name {{.*}} MyBaseClass
+; MERGE_CATS-NEXT:               baseMethods {{.*}} (struct method_list_t *)
+; MERGE_CATS-NEXT:             entsize 24
+; MERGE_CATS-NEXT:               count 2
+; MERGE_CATS-NEXT:                name {{.*}} getValue
+; MERGE_CATS-NEXT:               types {{.*}} i16 at 0:8
+; MERGE_CATS-NEXT:                 imp -[MyBaseClass(MyTestProtocol) getValue]
+; MERGE_CATS-NEXT:                name {{.*}} baseInstanceMethod
+; MERGE_CATS-NEXT:               types {{.*}} v16 at 0:8
+; MERGE_CATS-NEXT:                 imp -[MyBaseClass baseInstanceMethod]
+; MERGE_CATS-NEXT:             baseProtocols {{.*}}
+; MERGE_CATS-NEXT:                       count 1
+; MERGE_CATS-NEXT:                list[0] {{.*}} (struct protocol_t *)
+; MERGE_CATS-NEXT:                       isa {{.*}}
+; MERGE_CATS-NEXT:                      name {{.*}} MyTestProtocol
+; MERGE_CATS-NEXT:                 protocols {{.*}}
+; MERGE_CATS-NEXT:            instanceMethods {{.*}} (struct method_list_t *)
+; MERGE_CATS-NEXT:                    entsize 24
+; MERGE_CATS-NEXT:                      count 1
+; MERGE_CATS-NEXT:                       name {{.*}} getValue
+; MERGE_CATS-NEXT:                      types {{.*}} i16 at 0:8
+; MERGE_CATS-NEXT:                        imp {{.*}}
+; MERGE_CATS-NEXT:               classMethods {{.*}} (struct method_list_t *)
+; MERGE_CATS-NEXT:     optionalInstanceMethods {{.*}}
+; MERGE_CATS-NEXT:        optionalClassMethods {{.*}}
+; MERGE_CATS-NEXT:          instanceProperties {{.*}}
+; MERGE_CATS-NEXT:                     ivars {{.*}}
+; MERGE_CATS-NEXT:            weakIvarLayout {{.*}}
+; MERGE_CATS-NEXT:            baseProperties {{.*}}
+; MERGE_CATS-NEXT: Meta Class
+; MERGE_CATS-NEXT:            isa {{.*}} _OBJC_METACLASS_$_MyBaseClass
+; MERGE_CATS-NEXT:     superclass {{.*}} _OBJC_CLASS_$_MyBaseClass
+; MERGE_CATS-NEXT:          cache {{.*}}
+; MERGE_CATS-NEXT:         vtable {{.*}}
+; MERGE_CATS-NEXT:           data {{.*}} (struct class_ro_t *)
+; MERGE_CATS-NEXT:                     flags {{.*}} RO_META RO_ROOT
+; MERGE_CATS-NEXT:             instanceStart 40
+; MERGE_CATS-NEXT:              instanceSize 40
+; MERGE_CATS-NEXT:                  reserved {{.*}}
+; MERGE_CATS-NEXT:                ivarLayout {{.*}}
+; MERGE_CATS-NEXT:                      name {{.*}} MyBaseClass
+; MERGE_CATS-NEXT:               baseMethods {{.*}} (struct method_list_t *)
+; MERGE_CATS-NEXT:             baseProtocols {{.*}}
+; MERGE_CATS-NEXT:                       count 1
+; MERGE_CATS-NEXT:                list[0] {{.*}} (struct protocol_t *)
+; MERGE_CATS-NEXT:                       isa {{.*}}
+; MERGE_CATS-NEXT:                      name {{.*}} MyTestProtocol
+; MERGE_CATS-NEXT:                 protocols {{.*}}
+; MERGE_CATS-NEXT:            instanceMethods {{.*}} (struct method_list_t *)
+; MERGE_CATS-NEXT:                    entsize 24
+; MERGE_CATS-NEXT:                      count 1
+; MERGE_CATS-NEXT:                       name {{.*}} getValue
+; MERGE_CATS-NEXT:                      types {{.*}} i16 at 0:8
+; MERGE_CATS-NEXT:                        imp {{.*}}
+; MERGE_CATS-NEXT:               classMethods {{.*}} (struct method_list_t *)
+; MERGE_CATS-NEXT:     optionalInstanceMethods {{.*}}
+; MERGE_CATS-NEXT:        optionalClassMethods {{.*}}
+; MERGE_CATS-NEXT:          instanceProperties {{.*}}
+; MERGE_CATS-NEXT:                     ivars {{.*}}
+; MERGE_CATS-NEXT:            weakIvarLayout {{.*}}
+; MERGE_CATS-NEXT:            baseProperties {{.*}}
+; MERGE_CATS-NEXT: Contents of (__DATA_CONST,__objc_protolist) section
+; MERGE_CATS-NEXT: {{.*}} {{.*}} __OBJC_PROTOCOL_$_MyTestProtocol
+; MERGE_CATS-NEXT: Contents of (__DATA_CONST,__objc_imageinfo) section
+; MERGE_CATS-NEXT:   version 0
+; MERGE_CATS-NEXT:     flags {{.*}} OBJC_IMAGE_HAS_CATEGORY_CLASS_PROPERTIES
+
+
+; ================== repro.sh ====================
+; # Write the Objective-C code to a file
+; cat << EOF > MyClass.m
+; @protocol MyTestProtocol
+; - (int)getValue;
+; @end
+;
+; __attribute__((objc_root_class))
+; @interface MyBaseClass
+; - (void)baseInstanceMethod;
+; @end
+;
+; @implementation MyBaseClass
+; - (void)baseInstanceMethod {}
+; @end
+;
+; @interface MyBaseClass (MyTestProtocol) <MyTestProtocol>
+; @end
+;
+; @implementation MyBaseClass (MyTestProtocol)
+;
+; - (int)getValue {
+;     return 0x30;
+; }
+;
+; @end
+; EOF
+;
+; # Compile the Objective-C file to assembly
+; xcrun clang -S -arch arm64 MyClass.m -o MyClass.s
+; ==============================================
+
+
+       .section      __TEXT,__text,regular,pure_instructions
+       .p2align      2                               ; -- Begin function -[MyBaseClass baseInstanceMethod]
+"-[MyBaseClass baseInstanceMethod]":    ; @"\01-[MyBaseClass baseInstanceMethod]"
+       .cfi_startproc
+; %bb.0:
+       sub    sp, sp, #16
+       .cfi_def_cfa_offset 16
+       str    x0, [sp, #8]
+       str    x1, [sp]
+       add    sp, sp, #16
+       ret
+       .cfi_endproc
+                                        ; -- End function
+       .p2align      2                               ; -- Begin function -[MyBaseClass(MyTestProtocol) getValue]
+"-[MyBaseClass(MyTestProtocol) getValue]": ; @"\01-[MyBaseClass(MyTestProtocol) getValue]"
+       .cfi_startproc
+; %bb.0:
+       sub    sp, sp, #16
+       .cfi_def_cfa_offset 16
+       str    x0, [sp, #8]
+       str    x1, [sp]
+       mov    w0, #48                         ; =0x30
+       add    sp, sp, #16
+       ret
+       .cfi_endproc
+                                        ; -- End function
+       .section      __DATA,__objc_data
+       .globl _OBJC_CLASS_$_MyBaseClass       ; @"OBJC_CLASS_$_MyBaseClass"
+       .p2align      3, 0x0
+_OBJC_CLASS_$_MyBaseClass:
+       .quad  _OBJC_METACLASS_$_MyBaseClass
+       .quad  0
+       .quad  __objc_empty_cache
+       .quad  0
+       .quad  __OBJC_CLASS_RO_$_MyBaseClass
+       .globl _OBJC_METACLASS_$_MyBaseClass   ; @"OBJC_METACLASS_$_MyBaseClass"
+       .p2align      3, 0x0
+_OBJC_METACLASS_$_MyBaseClass:
+       .quad  _OBJC_METACLASS_$_MyBaseClass
+       .quad  _OBJC_CLASS_$_MyBaseClass
+       .quad  __objc_empty_cache
+       .quad  0
+       .quad  __OBJC_METACLASS_RO_$_MyBaseClass
+       .section      __TEXT,__objc_classname,cstring_literals
+l_OBJC_CLASS_NAME_:                     ; @OBJC_CLASS_NAME_
+       .asciz "MyBaseClass"
+       .section      __DATA,__objc_const
+       .p2align      3, 0x0                          ; @"_OBJC_METACLASS_RO_$_MyBaseClass"
+__OBJC_METACLASS_RO_$_MyBaseClass:
+       .long  131                             ; 0x83
+       .long  40                              ; 0x28
+       .long  40                              ; 0x28
+       .space 4
+       .quad  0
+       .quad  l_OBJC_CLASS_NAME_
+       .quad  0
+       .quad  0
+       .quad  0
+       .quad  0
+       .quad  0
+       .section      __TEXT,__objc_methname,cstring_literals
+l_OBJC_METH_VAR_NAME_:                  ; @OBJC_METH_VAR_NAME_
+       .asciz "baseInstanceMethod"
+       .section      __TEXT,__objc_methtype,cstring_literals
+l_OBJC_METH_VAR_TYPE_:                  ; @OBJC_METH_VAR_TYPE_
+       .asciz "v16 at 0:8"
+       .section      __DATA,__objc_const
+       .p2align      3, 0x0                          ; @"_OBJC_$_INSTANCE_METHODS_MyBaseClass"
+__OBJC_$_INSTANCE_METHODS_MyBaseClass:
+       .long  24                              ; 0x18
+       .long  1                               ; 0x1
+       .quad  l_OBJC_METH_VAR_NAME_
+       .quad  l_OBJC_METH_VAR_TYPE_
+       .quad  "-[MyBaseClass baseInstanceMethod]"
+       .p2align      3, 0x0                          ; @"_OBJC_CLASS_RO_$_MyBaseClass"
+__OBJC_CLASS_RO_$_MyBaseClass:
+       .long  130                             ; 0x82
+       .long  0                               ; 0x0
+       .long  0                               ; 0x0
+       .space 4
+       .quad  0
+       .quad  l_OBJC_CLASS_NAME_
+       .quad  __OBJC_$_INSTANCE_METHODS_MyBaseClass
+       .quad  0
+       .quad  0
+       .quad  0
+       .quad  0
+       .section      __TEXT,__objc_classname,cstring_literals
+l_OBJC_CLASS_NAME_.1:                   ; @OBJC_CLASS_NAME_.1
+       .asciz "MyTestProtocol"
+       .section      __TEXT,__objc_methname,cstring_literals
+l_OBJC_METH_VAR_NAME_.2:                ; @OBJC_METH_VAR_NAME_.2
+       .asciz "getValue"
+       .section      __TEXT,__objc_methtype,cstring_literals
+l_OBJC_METH_VAR_TYPE_.3:                ; @OBJC_METH_VAR_TYPE_.3
+       .asciz "i16 at 0:8"
+       .section      __DATA,__objc_const
+       .p2align      3, 0x0                          ; @"_OBJC_$_CATEGORY_INSTANCE_METHODS_MyBaseClass_$_MyTestProtocol"
+__OBJC_$_CATEGORY_INSTANCE_METHODS_MyBaseClass_$_MyTestProtocol:
+       .long  24                              ; 0x18
+       .long  1                               ; 0x1
+       .quad  l_OBJC_METH_VAR_NAME_.2
+       .quad  l_OBJC_METH_VAR_TYPE_.3
+       .quad  "-[MyBaseClass(MyTestProtocol) getValue]"
+       .p2align      3, 0x0                          ; @"_OBJC_$_PROTOCOL_INSTANCE_METHODS_MyTestProtocol"
+__OBJC_$_PROTOCOL_INSTANCE_METHODS_MyTestProtocol:
+       .long  24                              ; 0x18
+       .long  1                               ; 0x1
+       .quad  l_OBJC_METH_VAR_NAME_.2
+       .quad  l_OBJC_METH_VAR_TYPE_.3
+       .quad  0
+       .p2align      3, 0x0                          ; @"_OBJC_$_PROTOCOL_METHOD_TYPES_MyTestProtocol"
+__OBJC_$_PROTOCOL_METHOD_TYPES_MyTestProtocol:
+       .quad  l_OBJC_METH_VAR_TYPE_.3
+       .private_extern      __OBJC_PROTOCOL_$_MyTestProtocol ; @"_OBJC_PROTOCOL_$_MyTestProtocol"
+       .section      __DATA,__data
+       .globl __OBJC_PROTOCOL_$_MyTestProtocol
+       .weak_definition     __OBJC_PROTOCOL_$_MyTestProtocol
+       .p2align      3, 0x0
+__OBJC_PROTOCOL_$_MyTestProtocol:
+       .quad  0
+       .quad  l_OBJC_CLASS_NAME_.1
+       .quad  0
+       .quad  __OBJC_$_PROTOCOL_INSTANCE_METHODS_MyTestProtocol
+       .quad  0
+       .quad  0
+       .quad  0
+       .quad  0
+       .long  96                              ; 0x60
+       .long  0                               ; 0x0
+       .quad  __OBJC_$_PROTOCOL_METHOD_TYPES_MyTestProtocol
+       .quad  0
+       .quad  0
+       .private_extern      __OBJC_LABEL_PROTOCOL_$_MyTestProtocol ; @"_OBJC_LABEL_PROTOCOL_$_MyTestProtocol"
+       .section      __DATA,__objc_protolist,coalesced,no_dead_strip
+       .globl __OBJC_LABEL_PROTOCOL_$_MyTestProtocol
+       .weak_definition     __OBJC_LABEL_PROTOCOL_$_MyTestProtocol
+       .p2align      3, 0x0
+__OBJC_LABEL_PROTOCOL_$_MyTestProtocol:
+       .quad  __OBJC_PROTOCOL_$_MyTestProtocol
+       .section      __DATA,__objc_const
+       .p2align      3, 0x0                          ; @"_OBJC_CATEGORY_PROTOCOLS_$_MyBaseClass_$_MyTestProtocol"
+__OBJC_CATEGORY_PROTOCOLS_$_MyBaseClass_$_MyTestProtocol:
+       .quad  1                               ; 0x1
+       .quad  __OBJC_PROTOCOL_$_MyTestProtocol
+       .quad  0
+       .p2align      3, 0x0                          ; @"_OBJC_$_CATEGORY_MyBaseClass_$_MyTestProtocol"
+__OBJC_$_CATEGORY_MyBaseClass_$_MyTestProtocol:
+       .quad  l_OBJC_CLASS_NAME_.1
+       .quad  _OBJC_CLASS_$_MyBaseClass
+       .quad  __OBJC_$_CATEGORY_INSTANCE_METHODS_MyBaseClass_$_MyTestProtocol
+       .quad  0
+       .quad  __OBJC_CATEGORY_PROTOCOLS_$_MyBaseClass_$_MyTestProtocol
+       .quad  0
+       .quad  0
+       .long  64                              ; 0x40
+       .space 4
+       .section      __DATA,__objc_classlist,regular,no_dead_strip
+       .p2align      3, 0x0                          ; @"OBJC_LABEL_CLASS_$"
+l_OBJC_LABEL_CLASS_$:
+       .quad  _OBJC_CLASS_$_MyBaseClass
+       .section      __DATA,__objc_catlist,regular,no_dead_strip
+       .p2align      3, 0x0                          ; @"OBJC_LABEL_CATEGORY_$"
+l_OBJC_LABEL_CATEGORY_$:
+       .quad  __OBJC_$_CATEGORY_MyBaseClass_$_MyTestProtocol
+       .no_dead_strip       __OBJC_PROTOCOL_$_MyTestProtocol
+       .no_dead_strip       __OBJC_LABEL_PROTOCOL_$_MyTestProtocol
+       .section      __DATA,__objc_imageinfo,regular,no_dead_strip
+L_OBJC_IMAGE_INFO:
+       .long  0
+       .long  64
+
+__objc_empty_cache:
+_$sBOWV:
+  .quad 0
+
+.subsections_via_symbols
diff --git a/lld/test/MachO/reproduce-thin-archive-objc.s b/lld/test/MachO/reproduce-thin-archive-objc.s
index c5fe42f130526..8159f03f0f740 100644
--- a/lld/test/MachO/reproduce-thin-archive-objc.s
+++ b/lld/test/MachO/reproduce-thin-archive-objc.s
@@ -4,20 +4,19 @@
 ## during linking. However, we need to iterate over all members for -ObjC, check that we don't
 ## crash when we encounter a missing member.
 
-# RUN: rm -rf %t; mkdir %t
-# RUN: sed s/SYM/_main/   %s | llvm-mc -filetype=obj -triple=x86_64-apple-macos -o %t/main.o
-# RUN: sed s/SYM/_unused/ %s | llvm-mc -filetype=obj -triple=x86_64-apple-macos -o %t/unused.o
+# RUN: rm -rf %t && mkdir %t && cd %t
+# RUN: sed s/SYM/_main/   %s | llvm-mc -filetype=obj -triple=x86_64-apple-macos -o main.o
+# RUN: sed s/SYM/_unused/ %s | llvm-mc -filetype=obj -triple=x86_64-apple-macos -o unused.o
 
-# RUN: cd %t; llvm-ar rcsT unused.a unused.o; rm unused.o
+# RUN: llvm-ar rcsT unused.a unused.o; rm unused.o
 ## FIXME: Absolute paths don't end up relativized in the repro file.
 
 # RUN: %no-fatal-warnings-lld %t/main.o %t/unused.a -ObjC -o /dev/null 2>&1 \
 # RUN:                      | FileCheck %s --check-prefix=WARN
 
-# RUN: %lld %t/main.o %t/unused.a -ObjC --no-warn-thin-archive-missing-members -o /dev/null \
-# RUN:    | FileCheck %s --implicit-check-not 'warning' --allow-empty
+# RUN: %lld main.o unused.a -ObjC --no-warn-thin-archive-missing-members 2>&1 | count 0
 
-# WARN: ld64.lld: warning: {{.*}}unused.a: -ObjC failed to open archive member: 'unused.o'
+# WARN: warning: {{.*}}unused.a: -ObjC failed to open archive member: 'unused.o'
 
 .text
 .globl SYM
diff --git a/lldb/test/API/functionalities/process_save_core/TestProcessSaveCore.py b/lldb/test/API/functionalities/process_save_core/TestProcessSaveCore.py
index 07d06bdc116ec..8573d15733927 100644
--- a/lldb/test/API/functionalities/process_save_core/TestProcessSaveCore.py
+++ b/lldb/test/API/functionalities/process_save_core/TestProcessSaveCore.py
@@ -20,8 +20,8 @@ def test_cannot_save_core_unless_process_stopped(self):
         target = self.dbg.CreateTarget(exe)
         process = target.LaunchSimple(None, None, self.get_process_working_directory())
         self.assertNotEqual(process.GetState(), lldb.eStateStopped)
-        options = SBSaveCoreOptions()
-        options.SetOutputFile(SBFileSpec(core))
+        options = lldb.SBSaveCoreOptions()
+        options.SetOutputFile(lldb.SBFileSpec(core))
         error = process.SaveCore(core)
         self.assertTrue(error.Fail())
 
diff --git a/lldb/test/API/lit.cfg.py b/lldb/test/API/lit.cfg.py
index 96520c7c82624..dfeb76544e57d 100644
--- a/lldb/test/API/lit.cfg.py
+++ b/lldb/test/API/lit.cfg.py
@@ -265,6 +265,11 @@ def delete_module_cache(path):
 if is_configured("lldb_framework_dir"):
     dotest_cmd += ["--framework", config.lldb_framework_dir]
 
+# Facebook T92898286
+if is_configured("llvm_test_bolt"):
+    dotest_cmd += ["-E", '"--post-link-optimize"']
+# End Facebook T92898286
+
 if (
     "lldb-repro-capture" in config.available_features
     or "lldb-repro-replay" in config.available_features
diff --git a/lldb/test/API/lit.site.cfg.py.in b/lldb/test/API/lit.site.cfg.py.in
index 8b2d09ae41cd2..602f45759e48f 100644
--- a/lldb/test/API/lit.site.cfg.py.in
+++ b/lldb/test/API/lit.site.cfg.py.in
@@ -1,5 +1,9 @@
 @LIT_SITE_CFG_IN_HEADER@
 
+#Facebook T92898286
+import lit.util
+#End Facebook T92898286
+
 config.llvm_src_root = "@LLVM_SOURCE_DIR@"
 config.llvm_obj_root = "@LLVM_BINARY_DIR@"
 config.llvm_tools_dir = lit_config.substitute("@LLVM_TOOLS_DIR@")
@@ -39,6 +43,10 @@ config.libcxx_include_target_dir = "@LIBCXX_GENERATED_INCLUDE_TARGET_DIR@"
 config.lldb_module_cache = os.path.join("@LLDB_TEST_MODULE_CACHE_LLDB@", "lldb-api")
 config.clang_module_cache = os.path.join("@LLDB_TEST_MODULE_CACHE_CLANG@", "lldb-api")
 
+# Facebook T92898286
+config.llvm_test_bolt = lit.util.pythonize_bool("@LLVM_TEST_BOLT@")
+# End Facebook T92898286
+
 # Plugins
 lldb_build_intel_pt = '@LLDB_BUILD_INTEL_PT@'
 if lldb_build_intel_pt == '1':
diff --git a/lldb/test/Shell/helper/toolchain.py b/lldb/test/Shell/helper/toolchain.py
index 255955fc70d8c..7b7be06643166 100644
--- a/lldb/test/Shell/helper/toolchain.py
+++ b/lldb/test/Shell/helper/toolchain.py
@@ -165,6 +165,11 @@ def use_support_substitutions(config):
     if config.cmake_sysroot:
         host_flags += ["--sysroot={}".format(config.cmake_sysroot)]
 
+    # Facebook T92898286
+    if config.llvm_test_bolt:
+        host_flags += ["--post-link-optimize"]
+    # End Facebook T92898286
+
     host_flags = " ".join(host_flags)
     config.substitutions.append(("%clang_host", "%clang " + host_flags))
     config.substitutions.append(("%clangxx_host", "%clangxx " + host_flags))
diff --git a/lldb/test/Shell/lit.site.cfg.py.in b/lldb/test/Shell/lit.site.cfg.py.in
index b69e7bce1bc0b..fe8323734b7db 100644
--- a/lldb/test/Shell/lit.site.cfg.py.in
+++ b/lldb/test/Shell/lit.site.cfg.py.in
@@ -1,5 +1,10 @@
 @LIT_SITE_CFG_IN_HEADER@
 
+#Facebook T92898286
+import lit.util
+#End Facebook T92898286
+
+
 config.llvm_src_root = "@LLVM_SOURCE_DIR@"
 config.llvm_obj_root = "@LLVM_BINARY_DIR@"
 config.llvm_tools_dir = lit_config.substitute("@LLVM_TOOLS_DIR@")
@@ -31,6 +36,10 @@ config.llvm_use_sanitizer = "@LLVM_USE_SANITIZER@"
 config.lldb_module_cache = os.path.join("@LLDB_TEST_MODULE_CACHE_LLDB@", "lldb-shell")
 config.clang_module_cache = os.path.join("@LLDB_TEST_MODULE_CACHE_CLANG@", "lldb-shell")
 
+# Facebook T92898286
+config.llvm_test_bolt = lit.util.pythonize_bool("@LLVM_TEST_BOLT@")
+# End Facebook T92898286
+
 import lit.llvm
 lit.llvm.initialize(lit_config, config)
 
diff --git a/llvm/CMakeLists.txt b/llvm/CMakeLists.txt
index 12618966c4adf..a08b477060f48 100644
--- a/llvm/CMakeLists.txt
+++ b/llvm/CMakeLists.txt
@@ -709,6 +709,10 @@ set(LLVM_LIB_FUZZING_ENGINE "" CACHE PATH
 option(LLVM_USE_SPLIT_DWARF
   "Use -gsplit-dwarf when compiling llvm and --gdb-index when linking." OFF)
 
+# Facebook T92898286
+option(LLVM_TEST_BOLT "Enable BOLT testing in non-BOLT tests that use clang" OFF)
+# End Facebook T92898286
+
 # Define an option controlling whether we should build for 32-bit on 64-bit
 # platforms, where supported.
 if( CMAKE_SIZEOF_VOID_P EQUAL 8 AND NOT (WIN32 OR ${CMAKE_SYSTEM_NAME} MATCHES "AIX"))
diff --git a/llvm/docs/CommandGuide/lit.rst b/llvm/docs/CommandGuide/lit.rst
index 799ee34e9f9ff..c9d5baba3e2f4 100644
--- a/llvm/docs/CommandGuide/lit.rst
+++ b/llvm/docs/CommandGuide/lit.rst
@@ -151,6 +151,10 @@ EXECUTION OPTIONS
  feature that can be used to conditionally disable (or expect failure in)
  certain tests.
 
+.. option:: --skip-test-time-recording
+
+ Disable tracking the wall time individual tests take to execute.
+
 .. option:: --time-tests
 
  Track the wall time individual tests take to execute and includes the results
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/InstructionSelect.h b/llvm/include/llvm/CodeGen/GlobalISel/InstructionSelect.h
index cada7f30072e2..8017f09aa3c8b 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/InstructionSelect.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/InstructionSelect.h
@@ -49,8 +49,8 @@ class InstructionSelect : public MachineFunctionPass {
         MachineFunctionProperties::Property::Selected);
   }
 
-  InstructionSelect(CodeGenOptLevel OL);
-  InstructionSelect();
+  InstructionSelect(CodeGenOptLevel OL = CodeGenOptLevel::Default,
+                    char &PassID = ID);
 
   bool runOnMachineFunction(MachineFunction &MF) override;
 
diff --git a/llvm/include/llvm/CodeGen/RuntimeLibcallUtil.h b/llvm/include/llvm/CodeGen/RuntimeLibcallUtil.h
index ce63dcc405fd5..7a13164589392 100644
--- a/llvm/include/llvm/CodeGen/RuntimeLibcallUtil.h
+++ b/llvm/include/llvm/CodeGen/RuntimeLibcallUtil.h
@@ -13,6 +13,7 @@
 #ifndef LLVM_CODEGEN_RUNTIMELIBCALLS_H
 #define LLVM_CODEGEN_RUNTIMELIBCALLS_H
 
+#include "llvm/CodeGen/ISDOpcodes.h"
 #include "llvm/CodeGen/ValueTypes.h"
 #include "llvm/IR/RuntimeLibcalls.h"
 #include "llvm/Support/AtomicOrdering.h"
@@ -90,6 +91,9 @@ Libcall getMEMMOVE_ELEMENT_UNORDERED_ATOMIC(uint64_t ElementSize);
 /// UNKNOW_LIBCALL if there is none.
 Libcall getMEMSET_ELEMENT_UNORDERED_ATOMIC(uint64_t ElementSize);
 
+/// Initialize the default condition code on the libcalls.
+void initCmpLibcallCCs(ISD::CondCode *CmpLibcallCCs);
+
 } // namespace RTLIB
 } // namespace llvm
 
diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h
index d4a2166bf768e..9d9886f4920a2 100644
--- a/llvm/include/llvm/CodeGen/TargetLowering.h
+++ b/llvm/include/llvm/CodeGen/TargetLowering.h
@@ -3431,16 +3431,20 @@ class TargetLoweringBase {
 
   /// Override the default CondCode to be used to test the result of the
   /// comparison libcall against zero.
+  /// FIXME: This can't be merged with 'RuntimeLibcallsInfo' because of the ISD.
   void setCmpLibcallCC(RTLIB::Libcall Call, ISD::CondCode CC) {
-    Libcalls.setCmpLibcallCC(Call, CC);
+    CmpLibcallCCs[Call] = CC;
   }
 
+
   /// Get the CondCode that's to be used to test the result of the comparison
   /// libcall against zero.
+  /// FIXME: This can't be merged with 'RuntimeLibcallsInfo' because of the ISD.
   ISD::CondCode getCmpLibcallCC(RTLIB::Libcall Call) const {
-    return Libcalls.getCmpLibcallCC(Call);
+    return CmpLibcallCCs[Call];
   }
 
+
   /// Set the CallingConv that should be used for the specified libcall.
   void setLibcallCallingConv(RTLIB::Libcall Call, CallingConv::ID CC) {
     Libcalls.setLibcallCallingConv(Call, CC);
@@ -3630,6 +3634,10 @@ class TargetLoweringBase {
   /// The list of libcalls that the target will use.
   RTLIB::RuntimeLibcallsInfo Libcalls;
 
+  /// The ISD::CondCode that should be used to test the result of each of the
+  /// comparison libcall against zero.
+  ISD::CondCode CmpLibcallCCs[RTLIB::UNKNOWN_LIBCALL];
+
   /// The bits of IndexedModeActions used to store the legalisation actions
   /// We store the data as   | ML | MS |  L |  S | each taking 4 bits.
   enum IndexedModeActionsBits {
diff --git a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
index 71b1e832bde3c..ca85ff30f683f 100644
--- a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
+++ b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
@@ -769,9 +769,10 @@ class AMDGPUDimProfileCopy<AMDGPUDimProfile base> : AMDGPUDimProfile<base.OpMod,
 
 class AMDGPUDimSampleProfile<string opmod,
                              AMDGPUDimProps dim,
-                             AMDGPUSampleVariant sample> : AMDGPUDimProfile<opmod, dim> {
+                             AMDGPUSampleVariant sample,
+                             bit has_return = true> : AMDGPUDimProfile<opmod, dim> {
   let IsSample = true;
-  let RetTypes = [llvm_any_ty];
+  let RetTypes = !if(has_return, [llvm_any_ty], []);
   let ExtraAddrArgs = sample.ExtraAddrArgs;
   let Offset = sample.Offset;
   let Bias = sample.Bias;
@@ -780,6 +781,12 @@ class AMDGPUDimSampleProfile<string opmod,
   let LodClampMip = sample.LodOrClamp;
 }
 
+class AMDGPUDimSampleNoReturnProfile<string opmod,
+                             AMDGPUDimProps dim,
+                             AMDGPUSampleVariant sample>
+    : AMDGPUDimSampleProfile<opmod, dim, sample, false> {
+}
+
 class AMDGPUDimNoSampleProfile<string opmod,
                                AMDGPUDimProps dim,
                                list<LLVMType> retty,
@@ -970,6 +977,21 @@ defset list<AMDGPUImageDimIntrinsic> AMDGPUImageDimIntrinsics = {
         AMDGPUImageDMaskIntrinsic;
   }
 
+  multiclass AMDGPUImageDimSampleNoReturnDims<string opmod,
+                                      AMDGPUSampleVariant sample> {
+    foreach dim = AMDGPUDims.NoMsaa in {
+      def !strconcat(NAME, "_", dim.Name, "_nortn") : AMDGPUImageDimIntrinsic<
+          AMDGPUDimSampleNoReturnProfile<opmod, dim, sample>,
+          [IntrWillReturn], [SDNPMemOperand]>;
+    }
+  }
+  foreach sample = AMDGPUSampleVariants in {
+    defm int_amdgcn_image_sample # sample.LowerCaseMod
+      : AMDGPUImageDimSampleNoReturnDims<
+        "SAMPLE" # sample.UpperCaseMod # "_nortn", sample>,
+        AMDGPUImageDMaskIntrinsic;
+  }
+
   defm int_amdgcn_image_getlod
     : AMDGPUImageDimSampleDims<"GET_LOD", AMDGPUSample, 1>,
       AMDGPUImageDMaskIntrinsic;
diff --git a/llvm/include/llvm/IR/RuntimeLibcalls.h b/llvm/include/llvm/IR/RuntimeLibcalls.h
index 3057bff397b2f..b3648f5a31e2a 100644
--- a/llvm/include/llvm/IR/RuntimeLibcalls.h
+++ b/llvm/include/llvm/IR/RuntimeLibcalls.h
@@ -15,7 +15,6 @@
 #define LLVM_IR_RUNTIME_LIBCALLS_H
 
 #include "llvm/ADT/ArrayRef.h"
-#include "llvm/CodeGen/ISDOpcodes.h"
 #include "llvm/IR/CallingConv.h"
 #include "llvm/Support/AtomicOrdering.h"
 #include "llvm/TargetParser/Triple.h"
@@ -41,7 +40,6 @@ enum Libcall {
 struct RuntimeLibcallsInfo {
   explicit RuntimeLibcallsInfo(const Triple &TT) {
     initLibcalls(TT);
-    initCmpLibcallCCs();
   }
 
   /// Rename the default libcall routine name for the specified libcall.
@@ -59,18 +57,6 @@ struct RuntimeLibcallsInfo {
     return LibcallRoutineNames[Call];
   }
 
-  /// Override the default CondCode to be used to test the result of the
-  /// comparison libcall against zero.
-  void setCmpLibcallCC(RTLIB::Libcall Call, ISD::CondCode CC) {
-    CmpLibcallCCs[Call] = CC;
-  }
-
-  /// Get the CondCode that's to be used to test the result of the comparison
-  /// libcall against zero.
-  ISD::CondCode getCmpLibcallCC(RTLIB::Libcall Call) const {
-    return CmpLibcallCCs[Call];
-  }
-
   /// Set the CallingConv that should be used for the specified libcall.
   void setLibcallCallingConv(RTLIB::Libcall Call, CallingConv::ID CC) {
     LibcallCallingConvs[Call] = CC;
@@ -90,10 +76,6 @@ struct RuntimeLibcallsInfo {
   /// Stores the name each libcall.
   const char *LibcallRoutineNames[RTLIB::UNKNOWN_LIBCALL + 1];
 
-  /// The ISD::CondCode that should be used to test the result of each of the
-  /// comparison libcall against zero.
-  ISD::CondCode CmpLibcallCCs[RTLIB::UNKNOWN_LIBCALL];
-
   /// Stores the CallingConv that should be used for each libcall.
   CallingConv::ID LibcallCallingConvs[RTLIB::UNKNOWN_LIBCALL];
 
@@ -112,9 +94,6 @@ struct RuntimeLibcallsInfo {
     return true;
   }
 
-  /// Sets default libcall calling conventions.
-  void initCmpLibcallCCs();
-
   /// Set default libcall names. If a target wants to opt-out of a libcall it
   /// should be placed here.
   void initLibcalls(const Triple &TT);
diff --git a/llvm/include/llvm/MC/TargetRegistry.h b/llvm/include/llvm/MC/TargetRegistry.h
index 5038b87cd1dc9..14ceb76d312c5 100644
--- a/llvm/include/llvm/MC/TargetRegistry.h
+++ b/llvm/include/llvm/MC/TargetRegistry.h
@@ -23,6 +23,7 @@
 #include "llvm/ADT/iterator_range.h"
 #include "llvm/MC/MCObjectFileInfo.h"
 #include "llvm/Support/CodeGen.h"
+#include "llvm/Support/Compiler.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/FormattedStream.h"
 #include "llvm/TargetParser/Triple.h"
@@ -107,10 +108,6 @@ MCStreamer *createWasmStreamer(MCContext &Ctx,
                                std::unique_ptr<MCAsmBackend> &&TAB,
                                std::unique_ptr<MCObjectWriter> &&OW,
                                std::unique_ptr<MCCodeEmitter> &&CE);
-MCStreamer *createXCOFFStreamer(MCContext &Ctx,
-                                std::unique_ptr<MCAsmBackend> &&TAB,
-                                std::unique_ptr<MCObjectWriter> &&OW,
-                                std::unique_ptr<MCCodeEmitter> &&CE);
 MCStreamer *createSPIRVStreamer(MCContext &Ctx,
                                 std::unique_ptr<MCAsmBackend> &&TAB,
                                 std::unique_ptr<MCObjectWriter> &&OW,
@@ -194,23 +191,12 @@ class Target {
                       std::unique_ptr<MCAsmBackend> &&TAB,
                       std::unique_ptr<MCObjectWriter> &&OW,
                       std::unique_ptr<MCCodeEmitter> &&Emitter);
-  using GOFFStreamerCtorTy =
-      MCStreamer *(*)(MCContext &Ctx, std::unique_ptr<MCAsmBackend> &&TAB,
-                      std::unique_ptr<MCObjectWriter> &&OW,
-                      std::unique_ptr<MCCodeEmitter> &&Emitter);
   using MachOStreamerCtorTy =
       MCStreamer *(*)(MCContext &Ctx, std::unique_ptr<MCAsmBackend> &&TAB,
                       std::unique_ptr<MCObjectWriter> &&OW,
-                      std::unique_ptr<MCCodeEmitter> &&Emitter,
-                      bool DWARFMustBeAtTheEnd);
+                      std::unique_ptr<MCCodeEmitter> &&Emitter);
   using COFFStreamerCtorTy =
       MCStreamer *(*)(MCContext &Ctx, std::unique_ptr<MCAsmBackend> &&TAB,
-                      std::unique_ptr<MCObjectWriter> &&OW,
-                      std::unique_ptr<MCCodeEmitter> &&Emitter,
-                      bool IncrementalLinkerCompatible);
-  using WasmStreamerCtorTy =
-      MCStreamer *(*)(const Triple &T, MCContext &Ctx,
-                      std::unique_ptr<MCAsmBackend> &&TAB,
                       std::unique_ptr<MCObjectWriter> &&OW,
                       std::unique_ptr<MCCodeEmitter> &&Emitter);
   using XCOFFStreamerCtorTy =
@@ -218,17 +204,6 @@ class Target {
                       std::unique_ptr<MCAsmBackend> &&TAB,
                       std::unique_ptr<MCObjectWriter> &&OW,
                       std::unique_ptr<MCCodeEmitter> &&Emitter);
-  using SPIRVStreamerCtorTy =
-      MCStreamer *(*)(const Triple &T, MCContext &Ctx,
-                      std::unique_ptr<MCAsmBackend> &&TAB,
-                      std::unique_ptr<MCObjectWriter> &&OW,
-                      std::unique_ptr<MCCodeEmitter> &&Emitter);
-
-  using DXContainerStreamerCtorTy =
-      MCStreamer *(*)(const Triple &T, MCContext &Ctx,
-                      std::unique_ptr<MCAsmBackend> &&TAB,
-                      std::unique_ptr<MCObjectWriter> &&OW,
-                      std::unique_ptr<MCCodeEmitter> &&Emitter);
 
   using NullTargetStreamerCtorTy = MCTargetStreamer *(*)(MCStreamer &S);
   using AsmTargetStreamerCtorTy = MCTargetStreamer *(*)(
@@ -330,13 +305,9 @@ class Target {
 
   // Construction functions for the various object formats, if registered.
   COFFStreamerCtorTy COFFStreamerCtorFn = nullptr;
-  GOFFStreamerCtorTy GOFFStreamerCtorFn = nullptr;
   MachOStreamerCtorTy MachOStreamerCtorFn = nullptr;
   ELFStreamerCtorTy ELFStreamerCtorFn = nullptr;
-  WasmStreamerCtorTy WasmStreamerCtorFn = nullptr;
   XCOFFStreamerCtorTy XCOFFStreamerCtorFn = nullptr;
-  SPIRVStreamerCtorTy SPIRVStreamerCtorFn = nullptr;
-  DXContainerStreamerCtorTy DXContainerStreamerCtorFn = nullptr;
 
   /// Construction function for this target's null TargetStreamer, if
   /// registered (default = nullptr).
@@ -555,100 +526,29 @@ class Target {
   /// \param TAB The target assembler backend object. Takes ownership.
   /// \param OW The stream object.
   /// \param Emitter The target independent assembler object.Takes ownership.
-  /// \param RelaxAll Relax all fixups?
+  MCStreamer *createMCObjectStreamer(const Triple &T, MCContext &Ctx,
+                                     std::unique_ptr<MCAsmBackend> TAB,
+                                     std::unique_ptr<MCObjectWriter> OW,
+                                     std::unique_ptr<MCCodeEmitter> Emitter,
+                                     const MCSubtargetInfo &STI) const;
+  LLVM_DEPRECATED("Use the overload without the 3 trailing bool", "")
   MCStreamer *createMCObjectStreamer(const Triple &T, MCContext &Ctx,
                                      std::unique_ptr<MCAsmBackend> &&TAB,
                                      std::unique_ptr<MCObjectWriter> &&OW,
                                      std::unique_ptr<MCCodeEmitter> &&Emitter,
-                                     const MCSubtargetInfo &STI, bool,
-                                     bool IncrementalLinkerCompatible,
-                                     bool DWARFMustBeAtTheEnd) const {
-    MCStreamer *S = nullptr;
-    switch (T.getObjectFormat()) {
-    case Triple::UnknownObjectFormat:
-      llvm_unreachable("Unknown object format");
-    case Triple::COFF:
-      assert((T.isOSWindows() || T.isUEFI()) &&
-             "only Windows and UEFI COFF are supported");
-      S = COFFStreamerCtorFn(Ctx, std::move(TAB), std::move(OW),
-                             std::move(Emitter), IncrementalLinkerCompatible);
-      break;
-    case Triple::MachO:
-      if (MachOStreamerCtorFn)
-        S = MachOStreamerCtorFn(Ctx, std::move(TAB), std::move(OW),
-                                std::move(Emitter), DWARFMustBeAtTheEnd);
-      else
-        S = createMachOStreamer(Ctx, std::move(TAB), std::move(OW),
-                                std::move(Emitter), DWARFMustBeAtTheEnd);
-      break;
-    case Triple::ELF:
-      if (ELFStreamerCtorFn)
-        S = ELFStreamerCtorFn(T, Ctx, std::move(TAB), std::move(OW),
-                              std::move(Emitter));
-      else
-        S = createELFStreamer(Ctx, std::move(TAB), std::move(OW),
-                              std::move(Emitter));
-      break;
-    case Triple::Wasm:
-      if (WasmStreamerCtorFn)
-        S = WasmStreamerCtorFn(T, Ctx, std::move(TAB), std::move(OW),
-                               std::move(Emitter));
-      else
-        S = createWasmStreamer(Ctx, std::move(TAB), std::move(OW),
-                               std::move(Emitter));
-      break;
-    case Triple::GOFF:
-      if (GOFFStreamerCtorFn)
-        S = GOFFStreamerCtorFn(Ctx, std::move(TAB), std::move(OW),
-                               std::move(Emitter));
-      else
-        S = createGOFFStreamer(Ctx, std::move(TAB), std::move(OW),
-                               std::move(Emitter));
-      break;
-    case Triple::XCOFF:
-      if (XCOFFStreamerCtorFn)
-        S = XCOFFStreamerCtorFn(T, Ctx, std::move(TAB), std::move(OW),
-                                std::move(Emitter));
-      else
-        S = createXCOFFStreamer(Ctx, std::move(TAB), std::move(OW),
-                                std::move(Emitter));
-      break;
-    case Triple::SPIRV:
-      if (SPIRVStreamerCtorFn)
-        S = SPIRVStreamerCtorFn(T, Ctx, std::move(TAB), std::move(OW),
-                                std::move(Emitter));
-      else
-        S = createSPIRVStreamer(Ctx, std::move(TAB), std::move(OW),
-                                std::move(Emitter));
-      break;
-    case Triple::DXContainer:
-      if (DXContainerStreamerCtorFn)
-        S = DXContainerStreamerCtorFn(T, Ctx, std::move(TAB), std::move(OW),
-                                      std::move(Emitter));
-      else
-        S = createDXContainerStreamer(Ctx, std::move(TAB), std::move(OW),
-                                      std::move(Emitter));
-      break;
-    }
-    if (ObjectTargetStreamerCtorFn)
-      ObjectTargetStreamerCtorFn(*S, STI);
-    return S;
-  }
+                                     const MCSubtargetInfo &STI, bool, bool,
+                                     bool) const;
 
   MCStreamer *createAsmStreamer(MCContext &Ctx,
                                 std::unique_ptr<formatted_raw_ostream> OS,
-                                bool IsVerboseAsm, bool UseDwarfDirectory,
-                                MCInstPrinter *InstPrint,
-                                std::unique_ptr<MCCodeEmitter> &&CE,
-                                std::unique_ptr<MCAsmBackend> &&TAB,
-                                bool ShowInst) const {
-    formatted_raw_ostream &OSRef = *OS;
-    MCStreamer *S = llvm::createAsmStreamer(
-        Ctx, std::move(OS), IsVerboseAsm, UseDwarfDirectory, InstPrint,
-        std::move(CE), std::move(TAB), ShowInst);
-    createAsmTargetStreamer(*S, OSRef, InstPrint, IsVerboseAsm);
-    return S;
-  }
+                                MCInstPrinter *IP,
+                                std::unique_ptr<MCCodeEmitter> CE,
+                                std::unique_ptr<MCAsmBackend> TAB) const;
+  MCStreamer *
+  createAsmStreamer(MCContext &Ctx, std::unique_ptr<formatted_raw_ostream> OS,
+                    bool IsVerboseAsm, bool UseDwarfDirectory,
+                    MCInstPrinter *IP, std::unique_ptr<MCCodeEmitter> &&CE,
+                    std::unique_ptr<MCAsmBackend> &&TAB, bool ShowInst) const;
 
   MCTargetStreamer *createAsmTargetStreamer(MCStreamer &S,
                                             formatted_raw_ostream &OS,
@@ -1011,10 +911,6 @@ struct TargetRegistry {
     T.COFFStreamerCtorFn = Fn;
   }
 
-  static void RegisterGOFFStreamer(Target &T, Target::GOFFStreamerCtorTy Fn) {
-    T.GOFFStreamerCtorFn = Fn;
-  }
-
   static void RegisterMachOStreamer(Target &T, Target::MachOStreamerCtorTy Fn) {
     T.MachOStreamerCtorFn = Fn;
   }
@@ -1023,18 +919,6 @@ struct TargetRegistry {
     T.ELFStreamerCtorFn = Fn;
   }
 
-  static void RegisterSPIRVStreamer(Target &T, Target::SPIRVStreamerCtorTy Fn) {
-    T.SPIRVStreamerCtorFn = Fn;
-  }
-
-  static void RegisterDXContainerStreamer(Target &T, Target::DXContainerStreamerCtorTy Fn) {
-    T.DXContainerStreamerCtorFn = Fn;
-  }
-
-  static void RegisterWasmStreamer(Target &T, Target::WasmStreamerCtorTy Fn) {
-    T.WasmStreamerCtorFn = Fn;
-  }
-
   static void RegisterXCOFFStreamer(Target &T, Target::XCOFFStreamerCtorTy Fn) {
     T.XCOFFStreamerCtorFn = Fn;
   }
diff --git a/llvm/include/llvm/SandboxIR/SandboxIR.h b/llvm/include/llvm/SandboxIR/SandboxIR.h
index f168fdf8b1056..dfffe5c96f1cf 100644
--- a/llvm/include/llvm/SandboxIR/SandboxIR.h
+++ b/llvm/include/llvm/SandboxIR/SandboxIR.h
@@ -76,6 +76,7 @@ class Context;
 class Function;
 class Instruction;
 class LoadInst;
+class StoreInst;
 class User;
 class Value;
 
@@ -172,10 +173,11 @@ class Value {
   /// order.
   llvm::Value *Val = nullptr;
 
-  friend class Context;  // For getting `Val`.
-  friend class User;     // For getting `Val`.
-  friend class Use;      // For getting `Val`.
-  friend class LoadInst; // For getting `Val`.
+  friend class Context;   // For getting `Val`.
+  friend class User;      // For getting `Val`.
+  friend class Use;       // For getting `Val`.
+  friend class LoadInst;  // For getting `Val`.
+  friend class StoreInst; // For getting `Val`.
 
   /// All values point to the context.
   Context &Ctx;
@@ -495,7 +497,8 @@ class Instruction : public sandboxir::User {
   /// A SandboxIR Instruction may map to multiple LLVM IR Instruction. This
   /// returns its topmost LLVM IR instruction.
   llvm::Instruction *getTopmostLLVMInstruction() const;
-  friend class LoadInst; // For getTopmostLLVMInstruction().
+  friend class LoadInst;  // For getTopmostLLVMInstruction().
+  friend class StoreInst; // For getTopmostLLVMInstruction().
 
   /// \Returns the LLVM IR Instructions that this SandboxIR maps to in program
   /// order.
@@ -599,6 +602,43 @@ class LoadInst final : public Instruction {
 #endif
 };
 
+class StoreInst final : public Instruction {
+  /// Use StoreInst::create().
+  StoreInst(llvm::StoreInst *SI, Context &Ctx)
+      : Instruction(ClassID::Store, Opcode::Store, SI, Ctx) {}
+  friend Context; // for StoreInst()
+  Use getOperandUseInternal(unsigned OpIdx, bool Verify) const final {
+    return getOperandUseDefault(OpIdx, Verify);
+  }
+  SmallVector<llvm::Instruction *, 1> getLLVMInstrs() const final {
+    return {cast<llvm::Instruction>(Val)};
+  }
+
+public:
+  unsigned getUseOperandNo(const Use &Use) const final {
+    return getUseOperandNoDefault(Use);
+  }
+  unsigned getNumOfIRInstrs() const final { return 1u; }
+  static StoreInst *create(Value *V, Value *Ptr, MaybeAlign Align,
+                           Instruction *InsertBefore, Context &Ctx);
+  static StoreInst *create(Value *V, Value *Ptr, MaybeAlign Align,
+                           BasicBlock *InsertAtEnd, Context &Ctx);
+  /// For isa/dyn_cast.
+  static bool classof(const Value *From);
+  Value *getValueOperand() const;
+  Value *getPointerOperand() const;
+  Align getAlign() const { return cast<llvm::StoreInst>(Val)->getAlign(); }
+  bool isSimple() const { return cast<llvm::StoreInst>(Val)->isSimple(); }
+  bool isUnordered() const { return cast<llvm::StoreInst>(Val)->isUnordered(); }
+#ifndef NDEBUG
+  void verify() const final {
+    assert(isa<llvm::StoreInst>(Val) && "Expected StoreInst!");
+  }
+  void dump(raw_ostream &OS) const override;
+  LLVM_DUMP_METHOD void dump() const override;
+#endif
+};
+
 /// An LLLVM Instruction that has no SandboxIR equivalent class gets mapped to
 /// an OpaqueInstr.
 class OpaqueInst : public sandboxir::Instruction {
@@ -734,6 +774,8 @@ class Context {
 
   LoadInst *createLoadInst(llvm::LoadInst *LI);
   friend LoadInst; // For createLoadInst()
+  StoreInst *createStoreInst(llvm::StoreInst *SI);
+  friend StoreInst; // For createStoreInst()
 
 public:
   Context(LLVMContext &LLVMCtx)
diff --git a/llvm/include/llvm/SandboxIR/SandboxIRValues.def b/llvm/include/llvm/SandboxIR/SandboxIRValues.def
index e1ed3cdac6bba..90365ca7a1c45 100644
--- a/llvm/include/llvm/SandboxIR/SandboxIRValues.def
+++ b/llvm/include/llvm/SandboxIR/SandboxIRValues.def
@@ -26,6 +26,7 @@ DEF_USER(Constant, Constant)
 //       ClassID, Opcode(s),  Class
 DEF_INSTR(Opaque, OP(Opaque), OpaqueInst)
 DEF_INSTR(Load, OP(Load), LoadInst)
+DEF_INSTR(Store, OP(Store), StoreInst)
 
 #ifdef DEF_VALUE
 #undef DEF_VALUE
diff --git a/llvm/include/llvm/Support/TimeProfiler.h b/llvm/include/llvm/Support/TimeProfiler.h
index 6eb92930b36fd..31f7df10916db 100644
--- a/llvm/include/llvm/Support/TimeProfiler.h
+++ b/llvm/include/llvm/Support/TimeProfiler.h
@@ -83,28 +83,16 @@ namespace llvm {
 
 class raw_pwrite_stream;
 
-struct TimeTraceMetadata {
-  std::string Detail;
-  // Source file and line number information for the event.
-  std::string File;
-  int Line;
-
-  bool isEmpty() const { return Detail.empty() && File.empty(); }
-};
-
 struct TimeTraceProfiler;
 TimeTraceProfiler *getTimeTraceProfilerInstance();
 
-bool isTimeTraceVerbose();
-
 struct TimeTraceProfilerEntry;
 
 /// Initialize the time trace profiler.
 /// This sets up the global \p TimeTraceProfilerInstance
 /// variable to be the profiler instance.
 void timeTraceProfilerInitialize(unsigned TimeTraceGranularity,
-                                 StringRef ProcName,
-                                 bool TimeTraceVerbose = false);
+                                 StringRef ProcName);
 
 /// Cleanup the time trace profiler, if it was initialized.
 void timeTraceProfilerCleanup();
@@ -140,10 +128,6 @@ TimeTraceProfilerEntry *
 timeTraceProfilerBegin(StringRef Name,
                        llvm::function_ref<std::string()> Detail);
 
-TimeTraceProfilerEntry *
-timeTraceProfilerBegin(StringRef Name,
-                       llvm::function_ref<TimeTraceMetadata()> MetaData);
-
 /// Manually begin a time section, with the given \p Name and \p Detail.
 /// This starts Async Events having \p Name as a category which is shown
 /// separately from other traces. See
@@ -180,11 +164,6 @@ class TimeTraceScope {
     if (getTimeTraceProfilerInstance() != nullptr)
       Entry = timeTraceProfilerBegin(Name, Detail);
   }
-  TimeTraceScope(StringRef Name,
-                 llvm::function_ref<TimeTraceMetadata()> Metadata) {
-    if (getTimeTraceProfilerInstance() != nullptr)
-      Entry = timeTraceProfilerBegin(Name, Metadata);
-  }
   ~TimeTraceScope() {
     if (getTimeTraceProfilerInstance() != nullptr)
       timeTraceProfilerEnd(Entry);
diff --git a/llvm/include/llvm/TargetParser/X86TargetParser.def b/llvm/include/llvm/TargetParser/X86TargetParser.def
index 0e4ad873e3639..92798cbe4b4c1 100644
--- a/llvm/include/llvm/TargetParser/X86TargetParser.def
+++ b/llvm/include/llvm/TargetParser/X86TargetParser.def
@@ -175,8 +175,12 @@ X86_FEATURE_COMPAT(AVX512BF16,      "avx512bf16",            34)
 X86_FEATURE_COMPAT(AVX512VP2INTERSECT, "avx512vp2intersect", 35)
 // Below Features has some missings comparing to gcc, it's because gcc has some
 // not one-to-one mapped in llvm.
-X86_FEATURE_COMPAT(3DNOW,           "3dnow",                  0)
-X86_FEATURE       (3DNOWA,          "3dnowa")
+
+// FIXME: dummy features were added to keep the numeric values of later features
+// stable. Since the values need to be ABI stable, they should be changed to
+// have explicitly assigned values, and then these dummy features removed.
+X86_FEATURE       (DUMMYFEATURE1,   "__dummyfeature1")
+X86_FEATURE       (DUMMYFEATURE2,   "__dummyfeature2")
 X86_FEATURE_COMPAT(ADX,             "adx",                    0)
 X86_FEATURE       (64BIT,           "64bit")
 X86_FEATURE_COMPAT(CLDEMOTE,        "cldemote",               0)
diff --git a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
index b46a6d348413b..91b5703944f3d 100644
--- a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
@@ -1669,8 +1669,8 @@ void AsmPrinter::emitPCSections(const MachineFunction &MF) {
 }
 
 /// Returns true if function begin and end labels should be emitted.
-static bool needFuncLabels(const MachineFunction &MF) {
-  MachineModuleInfo &MMI = MF.getMMI();
+static bool needFuncLabels(const MachineFunction &MF,
+                           const MachineModuleInfo &MMI) {
   if (!MF.getLandingPads().empty() || MF.hasEHFunclets() ||
       MMI.hasDebugInfo() ||
       MF.getFunction().hasMetadata(LLVMContext::MD_pcsections))
@@ -1944,7 +1944,7 @@ void AsmPrinter::emitFunctionBody() {
   // are automatically sized.
   bool EmitFunctionSize = MAI->hasDotTypeDotSizeDirective() && !TT.isWasm();
 
-  if (needFuncLabels(*MF) || EmitFunctionSize) {
+  if (needFuncLabels(*MF, *MMI) || EmitFunctionSize) {
     // Create a symbol for the end of function.
     CurrentFnEnd = createTempSymbol("func_end");
     OutStreamer->emitLabel(CurrentFnEnd);
@@ -2587,8 +2587,9 @@ void AsmPrinter::SetupMachineFunction(MachineFunction &MF) {
   bool NeedsLocalForSize = MAI->needsLocalForSize();
   if (F.hasFnAttribute("patchable-function-entry") ||
       F.hasFnAttribute("function-instrument") ||
-      F.hasFnAttribute("xray-instruction-threshold") || needFuncLabels(MF) ||
-      NeedsLocalForSize || MF.getTarget().Options.EmitStackSizeSection ||
+      F.hasFnAttribute("xray-instruction-threshold") ||
+      needFuncLabels(MF, *MMI) || NeedsLocalForSize ||
+      MF.getTarget().Options.EmitStackSizeSection ||
       MF.getTarget().Options.BBAddrMap || MF.hasBBLabels()) {
     CurrentFnBegin = createTempSymbol("func_begin");
     if (NeedsLocalForSize)
diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp b/llvm/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp
index 49f3fc1a1fa59..087ee02a7f2b3 100644
--- a/llvm/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp
@@ -90,7 +90,7 @@ void DwarfCFIException::beginFunction(const MachineFunction *MF) {
   shouldEmitLSDA = shouldEmitPersonality &&
     LSDAEncoding != dwarf::DW_EH_PE_omit;
 
-  const MCAsmInfo &MAI = *MF->getMMI().getContext().getAsmInfo();
+  const MCAsmInfo &MAI = *MF->getContext().getAsmInfo();
   if (MAI.getExceptionHandlingType() != ExceptionHandling::None)
     shouldEmitCFI =
         MAI.usesCFIForEH() && (shouldEmitPersonality || shouldEmitMoves);
diff --git a/llvm/lib/CodeGen/CFGuardLongjmp.cpp b/llvm/lib/CodeGen/CFGuardLongjmp.cpp
index b5d88a7432b17..04de011400568 100644
--- a/llvm/lib/CodeGen/CFGuardLongjmp.cpp
+++ b/llvm/lib/CodeGen/CFGuardLongjmp.cpp
@@ -62,7 +62,7 @@ FunctionPass *llvm::createCFGuardLongjmpPass() { return new CFGuardLongjmp(); }
 bool CFGuardLongjmp::runOnMachineFunction(MachineFunction &MF) {
 
   // Skip modules for which the cfguard flag is not set.
-  if (!MF.getMMI().getModule()->getModuleFlag("cfguard"))
+  if (!MF.getFunction().getParent()->getModuleFlag("cfguard"))
     return false;
 
   // Skip functions that do not have calls to _setjmp.
diff --git a/llvm/lib/CodeGen/EHContGuardCatchret.cpp b/llvm/lib/CodeGen/EHContGuardCatchret.cpp
index f7c6580a73da5..cd1cdb0653618 100644
--- a/llvm/lib/CodeGen/EHContGuardCatchret.cpp
+++ b/llvm/lib/CodeGen/EHContGuardCatchret.cpp
@@ -62,7 +62,7 @@ FunctionPass *llvm::createEHContGuardCatchretPass() {
 bool EHContGuardCatchret::runOnMachineFunction(MachineFunction &MF) {
 
   // Skip modules for which the ehcontguard flag is not set.
-  if (!MF.getMMI().getModule()->getModuleFlag("ehcontguard"))
+  if (!MF.getFunction().getParent()->getModuleFlag("ehcontguard"))
     return false;
 
   // Skip functions that do not have catchret
diff --git a/llvm/lib/CodeGen/GlobalISel/InstructionSelect.cpp b/llvm/lib/CodeGen/GlobalISel/InstructionSelect.cpp
index 4cb1d01f3e8ca..9a27728dcb4dd 100644
--- a/llvm/lib/CodeGen/GlobalISel/InstructionSelect.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/InstructionSelect.cpp
@@ -62,14 +62,8 @@ INITIALIZE_PASS_END(InstructionSelect, DEBUG_TYPE,
                     "Select target instructions out of generic instructions",
                     false, false)
 
-InstructionSelect::InstructionSelect(CodeGenOptLevel OL)
-    : MachineFunctionPass(ID), OptLevel(OL) {}
-
-// In order not to crash when calling getAnalysis during testing with -run-pass
-// we use the default opt level here instead of None, so that the addRequired()
-// calls are made in getAnalysisUsage().
-InstructionSelect::InstructionSelect()
-    : MachineFunctionPass(ID), OptLevel(CodeGenOptLevel::Default) {}
+InstructionSelect::InstructionSelect(CodeGenOptLevel OL, char &PassID)
+    : MachineFunctionPass(PassID), OptLevel(OL) {}
 
 void InstructionSelect::getAnalysisUsage(AnalysisUsage &AU) const {
   AU.addRequired<TargetPassConfig>();
diff --git a/llvm/lib/CodeGen/KCFI.cpp b/llvm/lib/CodeGen/KCFI.cpp
index 91c6ac2618279..af19319bc1bb8 100644
--- a/llvm/lib/CodeGen/KCFI.cpp
+++ b/llvm/lib/CodeGen/KCFI.cpp
@@ -89,7 +89,7 @@ bool KCFI::emitCheck(MachineBasicBlock &MBB,
 }
 
 bool KCFI::runOnMachineFunction(MachineFunction &MF) {
-  const Module *M = MF.getMMI().getModule();
+  const Module *M = MF.getFunction().getParent();
   if (!M->getModuleFlag("kcfi"))
     return false;
 
diff --git a/llvm/lib/CodeGen/LLVMTargetMachine.cpp b/llvm/lib/CodeGen/LLVMTargetMachine.cpp
index 1d13173632833..819187c129c3a 100644
--- a/llvm/lib/CodeGen/LLVMTargetMachine.cpp
+++ b/llvm/lib/CodeGen/LLVMTargetMachine.cpp
@@ -167,25 +167,12 @@ Expected<std::unique_ptr<MCStreamer>> LLVMTargetMachine::createMCStreamer(
     if (Options.MCOptions.ShowMCEncoding)
       MCE.reset(getTarget().createMCCodeEmitter(MII, Context));
 
-    bool UseDwarfDirectory = false;
-    switch (Options.MCOptions.MCUseDwarfDirectory) {
-    case MCTargetOptions::DisableDwarfDirectory:
-      UseDwarfDirectory = false;
-      break;
-    case MCTargetOptions::EnableDwarfDirectory:
-      UseDwarfDirectory = true;
-      break;
-    case MCTargetOptions::DefaultDwarfDirectory:
-      UseDwarfDirectory = MAI.enableDwarfFileDirectoryDefault();
-      break;
-    }
-
     std::unique_ptr<MCAsmBackend> MAB(
         getTarget().createMCAsmBackend(STI, MRI, Options.MCOptions));
     auto FOut = std::make_unique<formatted_raw_ostream>(Out);
     MCStreamer *S = getTarget().createAsmStreamer(
-        Context, std::move(FOut), Options.MCOptions.AsmVerbose,
-        UseDwarfDirectory, InstPrinter, std::move(MCE), std::move(MAB),
+        Context, std::move(FOut), Options.MCOptions.AsmVerbose, true,
+        InstPrinter, std::move(MCE), std::move(MAB),
         Options.MCOptions.ShowMCInst);
     AsmStreamer.reset(S);
     break;
@@ -208,9 +195,7 @@ Expected<std::unique_ptr<MCStreamer>> LLVMTargetMachine::createMCStreamer(
         T, Context, std::unique_ptr<MCAsmBackend>(MAB),
         DwoOut ? MAB->createDwoObjectWriter(Out, *DwoOut)
                : MAB->createObjectWriter(Out),
-        std::unique_ptr<MCCodeEmitter>(MCE), STI, Options.MCOptions.MCRelaxAll,
-        Options.MCOptions.MCIncrementalLinkerCompatible,
-        /*DWARFMustBeAtTheEnd*/ true));
+        std::unique_ptr<MCCodeEmitter>(MCE), STI));
     break;
   }
   case CodeGenFileType::Null:
@@ -276,17 +261,15 @@ bool LLVMTargetMachine::addPassesToEmitMC(PassManagerBase &PM, MCContext *&Ctx,
   const MCRegisterInfo &MRI = *getMCRegisterInfo();
   std::unique_ptr<MCCodeEmitter> MCE(
       getTarget().createMCCodeEmitter(*getMCInstrInfo(), *Ctx));
-  std::unique_ptr<MCAsmBackend> MAB(
-      getTarget().createMCAsmBackend(STI, MRI, Options.MCOptions));
+  MCAsmBackend *MAB =
+      getTarget().createMCAsmBackend(STI, MRI, Options.MCOptions);
   if (!MCE || !MAB)
     return true;
 
   const Triple &T = getTargetTriple();
   std::unique_ptr<MCStreamer> AsmStreamer(getTarget().createMCObjectStreamer(
-      T, *Ctx, std::move(MAB), MAB->createObjectWriter(Out), std::move(MCE),
-      STI, Options.MCOptions.MCRelaxAll,
-      Options.MCOptions.MCIncrementalLinkerCompatible,
-      /*DWARFMustBeAtTheEnd*/ true));
+      T, *Ctx, std::unique_ptr<MCAsmBackend>(MAB), MAB->createObjectWriter(Out),
+      std::move(MCE), STI));
 
   // Create the AsmPrinter, which takes ownership of AsmStreamer if successful.
   FunctionPass *Printer =
diff --git a/llvm/lib/CodeGen/TargetLoweringBase.cpp b/llvm/lib/CodeGen/TargetLoweringBase.cpp
index 8040f1eeae810..2be7fc90a0e75 100644
--- a/llvm/lib/CodeGen/TargetLoweringBase.cpp
+++ b/llvm/lib/CodeGen/TargetLoweringBase.cpp
@@ -574,6 +574,39 @@ RTLIB::Libcall RTLIB::getMEMSET_ELEMENT_UNORDERED_ATOMIC(uint64_t ElementSize) {
   }
 }
 
+void RTLIB::initCmpLibcallCCs(ISD::CondCode *CmpLibcallCCs) {
+  std::fill(CmpLibcallCCs, CmpLibcallCCs + RTLIB::UNKNOWN_LIBCALL,
+            ISD::SETCC_INVALID);
+  CmpLibcallCCs[RTLIB::OEQ_F32] = ISD::SETEQ;
+  CmpLibcallCCs[RTLIB::OEQ_F64] = ISD::SETEQ;
+  CmpLibcallCCs[RTLIB::OEQ_F128] = ISD::SETEQ;
+  CmpLibcallCCs[RTLIB::OEQ_PPCF128] = ISD::SETEQ;
+  CmpLibcallCCs[RTLIB::UNE_F32] = ISD::SETNE;
+  CmpLibcallCCs[RTLIB::UNE_F64] = ISD::SETNE;
+  CmpLibcallCCs[RTLIB::UNE_F128] = ISD::SETNE;
+  CmpLibcallCCs[RTLIB::UNE_PPCF128] = ISD::SETNE;
+  CmpLibcallCCs[RTLIB::OGE_F32] = ISD::SETGE;
+  CmpLibcallCCs[RTLIB::OGE_F64] = ISD::SETGE;
+  CmpLibcallCCs[RTLIB::OGE_F128] = ISD::SETGE;
+  CmpLibcallCCs[RTLIB::OGE_PPCF128] = ISD::SETGE;
+  CmpLibcallCCs[RTLIB::OLT_F32] = ISD::SETLT;
+  CmpLibcallCCs[RTLIB::OLT_F64] = ISD::SETLT;
+  CmpLibcallCCs[RTLIB::OLT_F128] = ISD::SETLT;
+  CmpLibcallCCs[RTLIB::OLT_PPCF128] = ISD::SETLT;
+  CmpLibcallCCs[RTLIB::OLE_F32] = ISD::SETLE;
+  CmpLibcallCCs[RTLIB::OLE_F64] = ISD::SETLE;
+  CmpLibcallCCs[RTLIB::OLE_F128] = ISD::SETLE;
+  CmpLibcallCCs[RTLIB::OLE_PPCF128] = ISD::SETLE;
+  CmpLibcallCCs[RTLIB::OGT_F32] = ISD::SETGT;
+  CmpLibcallCCs[RTLIB::OGT_F64] = ISD::SETGT;
+  CmpLibcallCCs[RTLIB::OGT_F128] = ISD::SETGT;
+  CmpLibcallCCs[RTLIB::OGT_PPCF128] = ISD::SETGT;
+  CmpLibcallCCs[RTLIB::UO_F32] = ISD::SETNE;
+  CmpLibcallCCs[RTLIB::UO_F64] = ISD::SETNE;
+  CmpLibcallCCs[RTLIB::UO_F128] = ISD::SETNE;
+  CmpLibcallCCs[RTLIB::UO_PPCF128] = ISD::SETNE;
+}
+
 /// NOTE: The TargetMachine owns TLOF.
 TargetLoweringBase::TargetLoweringBase(const TargetMachine &tm)
     : TM(tm), Libcalls(TM.getTargetTriple()) {
@@ -608,6 +641,8 @@ TargetLoweringBase::TargetLoweringBase(const TargetMachine &tm)
 
   MinCmpXchgSizeInBits = 0;
   SupportsUnalignedAtomics = false;
+
+  RTLIB::initCmpLibcallCCs(CmpLibcallCCs);
 }
 
 void TargetLoweringBase::initActions() {
diff --git a/llvm/lib/DWARFLinker/Classic/DWARFStreamer.cpp b/llvm/lib/DWARFLinker/Classic/DWARFStreamer.cpp
index 74df2eb9d68ae..45a62daffb033 100644
--- a/llvm/lib/DWARFLinker/Classic/DWARFStreamer.cpp
+++ b/llvm/lib/DWARFLinker/Classic/DWARFStreamer.cpp
@@ -62,6 +62,8 @@ Error DwarfStreamer::init(Triple TheTriple,
                              TripleName.c_str());
 
   MCTargetOptions MCOptions = mc::InitMCTargetOptionsFromFlags();
+  MCOptions.AsmVerbose = true;
+  MCOptions.MCUseDwarfDirectory = MCTargetOptions::EnableDwarfDirectory;
   MAI.reset(TheTarget->createMCAsmInfo(*MRI, TripleName, MCOptions));
   if (!MAI)
     return createStringError(std::errc::invalid_argument,
@@ -110,8 +112,7 @@ Error DwarfStreamer::init(Triple TheTriple,
     MS = TheTarget->createMCObjectStreamer(
         TheTriple, *MC, std::unique_ptr<MCAsmBackend>(MAB),
         MAB->createObjectWriter(OutFile), std::unique_ptr<MCCodeEmitter>(MCE),
-        *MSTI, MCOptions.MCRelaxAll, MCOptions.MCIncrementalLinkerCompatible,
-        /*DWARFMustBeAtTheEnd*/ false);
+        *MSTI);
     break;
   }
   }
diff --git a/llvm/lib/DWARFLinker/Parallel/DWARFEmitterImpl.cpp b/llvm/lib/DWARFLinker/Parallel/DWARFEmitterImpl.cpp
index b9edcb63a3401..f790766348b53 100644
--- a/llvm/lib/DWARFLinker/Parallel/DWARFEmitterImpl.cpp
+++ b/llvm/lib/DWARFLinker/Parallel/DWARFEmitterImpl.cpp
@@ -41,6 +41,8 @@ Error DwarfEmitterImpl::init(Triple TheTriple,
                              TripleName.c_str());
 
   MCTargetOptions MCOptions = mc::InitMCTargetOptionsFromFlags();
+  MCOptions.AsmVerbose = true;
+  MCOptions.MCUseDwarfDirectory = MCTargetOptions::EnableDwarfDirectory;
   MAI.reset(TheTarget->createMCAsmInfo(*MRI, TripleName, MCOptions));
   if (!MAI)
     return createStringError(std::errc::invalid_argument,
@@ -89,8 +91,7 @@ Error DwarfEmitterImpl::init(Triple TheTriple,
     MS = TheTarget->createMCObjectStreamer(
         TheTriple, *MC, std::unique_ptr<MCAsmBackend>(MAB),
         MAB->createObjectWriter(OutFile), std::unique_ptr<MCCodeEmitter>(MCE),
-        *MSTI, MCOptions.MCRelaxAll, MCOptions.MCIncrementalLinkerCompatible,
-        /*DWARFMustBeAtTheEnd*/ false);
+        *MSTI);
     break;
   }
   }
diff --git a/llvm/lib/IR/CMakeLists.txt b/llvm/lib/IR/CMakeLists.txt
index 8bf199f0f44c9..91e0e0cc65f36 100644
--- a/llvm/lib/IR/CMakeLists.txt
+++ b/llvm/lib/IR/CMakeLists.txt
@@ -82,7 +82,6 @@ add_llvm_component_library(LLVMCore
   ${LLVM_PTHREAD_LIB}
 
   DEPENDS
-  vt_gen
   intrinsics_gen
 
   LINK_COMPONENTS
diff --git a/llvm/lib/IR/RuntimeLibcalls.cpp b/llvm/lib/IR/RuntimeLibcalls.cpp
index de3db557d8b50..8ce0caadbafd1 100644
--- a/llvm/lib/IR/RuntimeLibcalls.cpp
+++ b/llvm/lib/IR/RuntimeLibcalls.cpp
@@ -344,36 +344,3 @@ void RuntimeLibcallsInfo::initLibcalls(const Triple &TT) {
     }
   }
 }
-
-void RuntimeLibcallsInfo::initCmpLibcallCCs() {
-  std::fill(CmpLibcallCCs, CmpLibcallCCs + RTLIB::UNKNOWN_LIBCALL,
-            ISD::SETCC_INVALID);
-  CmpLibcallCCs[RTLIB::OEQ_F32] = ISD::SETEQ;
-  CmpLibcallCCs[RTLIB::OEQ_F64] = ISD::SETEQ;
-  CmpLibcallCCs[RTLIB::OEQ_F128] = ISD::SETEQ;
-  CmpLibcallCCs[RTLIB::OEQ_PPCF128] = ISD::SETEQ;
-  CmpLibcallCCs[RTLIB::UNE_F32] = ISD::SETNE;
-  CmpLibcallCCs[RTLIB::UNE_F64] = ISD::SETNE;
-  CmpLibcallCCs[RTLIB::UNE_F128] = ISD::SETNE;
-  CmpLibcallCCs[RTLIB::UNE_PPCF128] = ISD::SETNE;
-  CmpLibcallCCs[RTLIB::OGE_F32] = ISD::SETGE;
-  CmpLibcallCCs[RTLIB::OGE_F64] = ISD::SETGE;
-  CmpLibcallCCs[RTLIB::OGE_F128] = ISD::SETGE;
-  CmpLibcallCCs[RTLIB::OGE_PPCF128] = ISD::SETGE;
-  CmpLibcallCCs[RTLIB::OLT_F32] = ISD::SETLT;
-  CmpLibcallCCs[RTLIB::OLT_F64] = ISD::SETLT;
-  CmpLibcallCCs[RTLIB::OLT_F128] = ISD::SETLT;
-  CmpLibcallCCs[RTLIB::OLT_PPCF128] = ISD::SETLT;
-  CmpLibcallCCs[RTLIB::OLE_F32] = ISD::SETLE;
-  CmpLibcallCCs[RTLIB::OLE_F64] = ISD::SETLE;
-  CmpLibcallCCs[RTLIB::OLE_F128] = ISD::SETLE;
-  CmpLibcallCCs[RTLIB::OLE_PPCF128] = ISD::SETLE;
-  CmpLibcallCCs[RTLIB::OGT_F32] = ISD::SETGT;
-  CmpLibcallCCs[RTLIB::OGT_F64] = ISD::SETGT;
-  CmpLibcallCCs[RTLIB::OGT_F128] = ISD::SETGT;
-  CmpLibcallCCs[RTLIB::OGT_PPCF128] = ISD::SETGT;
-  CmpLibcallCCs[RTLIB::UO_F32] = ISD::SETNE;
-  CmpLibcallCCs[RTLIB::UO_F64] = ISD::SETNE;
-  CmpLibcallCCs[RTLIB::UO_F128] = ISD::SETNE;
-  CmpLibcallCCs[RTLIB::UO_PPCF128] = ISD::SETNE;
-}
diff --git a/llvm/lib/LTO/LTO.cpp b/llvm/lib/LTO/LTO.cpp
index d303f228aa72c..bb3c9f7acdb8e 100644
--- a/llvm/lib/LTO/LTO.cpp
+++ b/llvm/lib/LTO/LTO.cpp
@@ -1360,7 +1360,6 @@ Error LTO::runRegularLTO(AddStreamFn AddStream) {
 
 SmallVector<const char *> LTO::getRuntimeLibcallSymbols(const Triple &TT) {
   RTLIB::RuntimeLibcallsInfo Libcalls(TT);
-
   SmallVector<const char *> LibcallSymbols;
   copy_if(Libcalls.getLibcallNames(), std::back_inserter(LibcallSymbols),
           [](const char *Name) { return Name; });
diff --git a/llvm/lib/MC/MCAsmStreamer.cpp b/llvm/lib/MC/MCAsmStreamer.cpp
index 24209e456b5e2..db93a33bbe3f7 100644
--- a/llvm/lib/MC/MCAsmStreamer.cpp
+++ b/llvm/lib/MC/MCAsmStreamer.cpp
@@ -55,9 +55,9 @@ class MCAsmStreamer final : public MCStreamer {
   raw_svector_ostream CommentStream;
   raw_null_ostream NullStream;
 
-  unsigned IsVerboseAsm : 1;
-  unsigned ShowInst : 1;
-  unsigned UseDwarfDirectory : 1;
+  bool IsVerboseAsm = false;
+  bool ShowInst = false;
+  bool UseDwarfDirectory = false;
 
   void EmitRegisterName(int64_t Register);
   void PrintQuotedString(StringRef Data, raw_ostream &OS) const;
@@ -72,24 +72,40 @@ class MCAsmStreamer final : public MCStreamer {
 
 public:
   MCAsmStreamer(MCContext &Context, std::unique_ptr<formatted_raw_ostream> os,
-                bool isVerboseAsm, bool useDwarfDirectory,
                 MCInstPrinter *printer, std::unique_ptr<MCCodeEmitter> emitter,
-                std::unique_ptr<MCAsmBackend> asmbackend, bool showInst)
+                std::unique_ptr<MCAsmBackend> asmbackend)
       : MCStreamer(Context), OSOwner(std::move(os)), OS(*OSOwner),
         MAI(Context.getAsmInfo()), InstPrinter(printer),
         Assembler(std::make_unique<MCAssembler>(
             Context, std::move(asmbackend), std::move(emitter),
             (asmbackend) ? asmbackend->createObjectWriter(NullStream)
                          : nullptr)),
-        CommentStream(CommentToEmit), IsVerboseAsm(isVerboseAsm),
-        ShowInst(showInst), UseDwarfDirectory(useDwarfDirectory) {
+        CommentStream(CommentToEmit) {
     assert(InstPrinter);
-    if (IsVerboseAsm)
-        InstPrinter->setCommentStream(CommentStream);
     if (Assembler->getBackendPtr())
       setAllowAutoPadding(Assembler->getBackend().allowAutoPadding());
 
     Context.setUseNamesOnTempLabels(true);
+
+    auto *TO = Context.getTargetOptions();
+    if (!TO)
+      return;
+    IsVerboseAsm = TO->AsmVerbose;
+    if (IsVerboseAsm)
+      InstPrinter->setCommentStream(CommentStream);
+    ShowInst = TO->ShowMCInst;
+    switch (TO->MCUseDwarfDirectory) {
+    case MCTargetOptions::DisableDwarfDirectory:
+      UseDwarfDirectory = false;
+      break;
+    case MCTargetOptions::EnableDwarfDirectory:
+      UseDwarfDirectory = true;
+      break;
+    case MCTargetOptions::DefaultDwarfDirectory:
+      UseDwarfDirectory =
+          Context.getAsmInfo()->enableDwarfFileDirectoryDefault();
+      break;
+    }
   }
 
   MCAssembler &getAssembler() { return *Assembler; }
@@ -2646,7 +2662,6 @@ MCStreamer *llvm::createAsmStreamer(MCContext &Context,
                                     std::unique_ptr<MCCodeEmitter> &&CE,
                                     std::unique_ptr<MCAsmBackend> &&MAB,
                                     bool ShowInst) {
-  return new MCAsmStreamer(Context, std::move(OS), isVerboseAsm,
-                           useDwarfDirectory, IP, std::move(CE), std::move(MAB),
-                           ShowInst);
+  return new MCAsmStreamer(Context, std::move(OS), IP, std::move(CE),
+                           std::move(MAB));
 }
diff --git a/llvm/lib/MC/MCMachOStreamer.cpp b/llvm/lib/MC/MCMachOStreamer.cpp
index c8bc819129850..5231d10626f85 100644
--- a/llvm/lib/MC/MCMachOStreamer.cpp
+++ b/llvm/lib/MC/MCMachOStreamer.cpp
@@ -54,9 +54,6 @@ class MCMachOStreamer : public MCObjectStreamer {
   /// need for local relocations. False by default.
   bool LabelSections;
 
-  bool DWARFMustBeAtTheEnd;
-  bool CreatedADWARFSection;
-
   /// HasSectionLabel - map of which sections have already had a non-local
   /// label emitted to them. Used so we don't emit extraneous linker local
   /// labels in the middle of the section.
@@ -70,16 +67,13 @@ class MCMachOStreamer : public MCObjectStreamer {
 public:
   MCMachOStreamer(MCContext &Context, std::unique_ptr<MCAsmBackend> MAB,
                   std::unique_ptr<MCObjectWriter> OW,
-                  std::unique_ptr<MCCodeEmitter> Emitter,
-                  bool DWARFMustBeAtTheEnd, bool label)
+                  std::unique_ptr<MCCodeEmitter> Emitter, bool label)
       : MCObjectStreamer(Context, std::move(MAB), std::move(OW),
                          std::move(Emitter)),
-        LabelSections(label), DWARFMustBeAtTheEnd(DWARFMustBeAtTheEnd),
-        CreatedADWARFSection(false) {}
+        LabelSections(label) {}
 
   /// state management
   void reset() override {
-    CreatedADWARFSection = false;
     HasSectionLabel.clear();
     MCObjectStreamer::reset();
   }
@@ -141,48 +135,9 @@ class MCMachOStreamer : public MCObjectStreamer {
 
 } // end anonymous namespace.
 
-static bool canGoAfterDWARF(const MCSectionMachO &MSec) {
-  // These sections are created by the assembler itself after the end of
-  // the .s file.
-  StringRef SegName = MSec.getSegmentName();
-  StringRef SecName = MSec.getName();
-
-  if (SegName == "__LD" && SecName == "__compact_unwind")
-    return true;
-
-  if (SegName == "__IMPORT") {
-    if (SecName == "__jump_table")
-      return true;
-
-    if (SecName == "__pointers")
-      return true;
-  }
-
-  if (SegName == "__TEXT" && SecName == "__eh_frame")
-    return true;
-
-  if (SegName == "__DATA" &&
-      (SecName == "__llvm_addrsig" || SecName == "__nl_symbol_ptr" ||
-       SecName == "__thread_ptr"))
-    return true;
-  if (SegName == "__LLVM" && (SecName == "__cg_profile"))
-    return true;
-
-  if (SegName == "__DATA" && SecName == "__auth_ptr")
-    return true;
-
-  return false;
-}
-
 void MCMachOStreamer::changeSection(MCSection *Section, uint32_t Subsection) {
   // Change the section normally.
-  bool Created = changeSectionImpl(Section, Subsection);
-  const MCSectionMachO &MSec = *cast<MCSectionMachO>(Section);
-  StringRef SegName = MSec.getSegmentName();
-  if (SegName == "__DWARF")
-    CreatedADWARFSection = true;
-  else if (Created && DWARFMustBeAtTheEnd && !canGoAfterDWARF(MSec))
-    assert(!CreatedADWARFSection && "Creating regular section after DWARF");
+  changeSectionImpl(Section, Subsection);
 
   // Output a linker-local symbol so we don't need section-relative local
   // relocations. The linker hates us when we do that.
@@ -576,9 +531,8 @@ MCStreamer *llvm::createMachOStreamer(MCContext &Context,
                                       std::unique_ptr<MCCodeEmitter> &&CE,
                                       bool DWARFMustBeAtTheEnd,
                                       bool LabelSections) {
-  MCMachOStreamer *S =
-      new MCMachOStreamer(Context, std::move(MAB), std::move(OW), std::move(CE),
-                          DWARFMustBeAtTheEnd, LabelSections);
+  MCMachOStreamer *S = new MCMachOStreamer(
+      Context, std::move(MAB), std::move(OW), std::move(CE), LabelSections);
   const Triple &Target = Context.getTargetTriple();
   S->emitVersionForTarget(
       Target, Context.getObjectFileInfo()->getSDKVersion(),
diff --git a/llvm/lib/MC/MCParser/AsmParser.cpp b/llvm/lib/MC/MCParser/AsmParser.cpp
index d05712bca73cd..992b69f1c5f32 100644
--- a/llvm/lib/MC/MCParser/AsmParser.cpp
+++ b/llvm/lib/MC/MCParser/AsmParser.cpp
@@ -658,7 +658,7 @@ class AsmParser : public MCAsmParser {
 
   bool parseDirectiveComm(bool IsLocal); // ".comm" and ".lcomm"
 
-  bool parseDirectiveAbort(); // ".abort"
+  bool parseDirectiveAbort(SMLoc DirectiveLoc); // ".abort"
   bool parseDirectiveInclude(); // ".include"
   bool parseDirectiveIncbin(); // ".incbin"
 
@@ -2120,7 +2120,7 @@ bool AsmParser::parseStatement(ParseStatementInfo &Info,
     case DK_LCOMM:
       return parseDirectiveComm(/*IsLocal=*/true);
     case DK_ABORT:
-      return parseDirectiveAbort();
+      return parseDirectiveAbort(IDLoc);
     case DK_INCLUDE:
       return parseDirectiveInclude();
     case DK_INCBIN:
@@ -5095,21 +5095,17 @@ bool AsmParser::parseDirectiveComm(bool IsLocal) {
 
 /// parseDirectiveAbort
 ///  ::= .abort [... message ...]
-bool AsmParser::parseDirectiveAbort() {
-  // FIXME: Use loc from directive.
-  SMLoc Loc = getLexer().getLoc();
-
+bool AsmParser::parseDirectiveAbort(SMLoc DirectiveLoc) {
   StringRef Str = parseStringToEndOfStatement();
   if (parseEOL())
     return true;
 
   if (Str.empty())
-    return Error(Loc, ".abort detected. Assembly stopping.");
-  else
-    return Error(Loc, ".abort '" + Str + "' detected. Assembly stopping.");
-  // FIXME: Actually abort assembly here.
+    return Error(DirectiveLoc, ".abort detected. Assembly stopping");
 
-  return false;
+  // FIXME: Actually abort assembly here.
+  return Error(DirectiveLoc,
+               ".abort '" + Str + "' detected. Assembly stopping");
 }
 
 /// parseDirectiveInclude
diff --git a/llvm/lib/MC/MCWinCOFFStreamer.cpp b/llvm/lib/MC/MCWinCOFFStreamer.cpp
index 538244cb8ba6a..a14d3bcf37f3f 100644
--- a/llvm/lib/MC/MCWinCOFFStreamer.cpp
+++ b/llvm/lib/MC/MCWinCOFFStreamer.cpp
@@ -27,6 +27,7 @@
 #include "llvm/MC/MCObjectWriter.h"
 #include "llvm/MC/MCSectionCOFF.h"
 #include "llvm/MC/MCSymbolCOFF.h"
+#include "llvm/MC/MCTargetOptions.h"
 #include "llvm/Support/Casting.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/MathExtras.h"
@@ -45,7 +46,11 @@ MCWinCOFFStreamer::MCWinCOFFStreamer(MCContext &Context,
                                      std::unique_ptr<MCCodeEmitter> CE,
                                      std::unique_ptr<MCObjectWriter> OW)
     : MCObjectStreamer(Context, std::move(MAB), std::move(OW), std::move(CE)),
-      CurSymbol(nullptr) {}
+      CurSymbol(nullptr) {
+  auto *TO = Context.getTargetOptions();
+  if (TO && TO->MCIncrementalLinkerCompatible)
+    getAssembler().setIncrementalLinkerCompatible(true);
+}
 
 void MCWinCOFFStreamer::emitInstToData(const MCInst &Inst,
                                        const MCSubtargetInfo &STI) {
diff --git a/llvm/lib/MC/MCXCOFFStreamer.cpp b/llvm/lib/MC/MCXCOFFStreamer.cpp
index 175d7d6b6c31a..9cd46e504b554 100644
--- a/llvm/lib/MC/MCXCOFFStreamer.cpp
+++ b/llvm/lib/MC/MCXCOFFStreamer.cpp
@@ -159,15 +159,6 @@ void MCXCOFFStreamer::emitInstToData(const MCInst &Inst,
   DF->getContents().append(Code.begin(), Code.end());
 }
 
-MCStreamer *llvm::createXCOFFStreamer(MCContext &Context,
-                                      std::unique_ptr<MCAsmBackend> &&MAB,
-                                      std::unique_ptr<MCObjectWriter> &&OW,
-                                      std::unique_ptr<MCCodeEmitter> &&CE) {
-  MCXCOFFStreamer *S = new MCXCOFFStreamer(Context, std::move(MAB),
-                                           std::move(OW), std::move(CE));
-  return S;
-}
-
 void MCXCOFFStreamer::emitXCOFFLocalCommonSymbol(MCSymbol *LabelSym,
                                                  uint64_t Size,
                                                  MCSymbol *CsectSym,
diff --git a/llvm/lib/MC/TargetRegistry.cpp b/llvm/lib/MC/TargetRegistry.cpp
index 0aa48916c7d25..4190117c1e264 100644
--- a/llvm/lib/MC/TargetRegistry.cpp
+++ b/llvm/lib/MC/TargetRegistry.cpp
@@ -9,6 +9,12 @@
 #include "llvm/MC/TargetRegistry.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/StringRef.h"
+#include "llvm/MC/MCAsmBackend.h"
+#include "llvm/MC/MCCodeEmitter.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCObjectStreamer.h"
+#include "llvm/MC/MCObjectWriter.h"
+#include "llvm/MC/MCTargetOptions.h"
 #include "llvm/Support/raw_ostream.h"
 #include <cassert>
 #include <vector>
@@ -17,6 +23,95 @@ using namespace llvm;
 // Clients are responsible for avoid race conditions in registration.
 static Target *FirstTarget = nullptr;
 
+MCStreamer *Target::createMCObjectStreamer(
+    const Triple &T, MCContext &Ctx, std::unique_ptr<MCAsmBackend> TAB,
+    std::unique_ptr<MCObjectWriter> OW, std::unique_ptr<MCCodeEmitter> Emitter,
+    const MCSubtargetInfo &STI) const {
+  MCStreamer *S = nullptr;
+  switch (T.getObjectFormat()) {
+  case Triple::UnknownObjectFormat:
+    llvm_unreachable("Unknown object format");
+  case Triple::COFF:
+    assert((T.isOSWindows() || T.isUEFI()) &&
+           "only Windows and UEFI COFF are supported");
+    S = COFFStreamerCtorFn(Ctx, std::move(TAB), std::move(OW),
+                           std::move(Emitter));
+    break;
+  case Triple::MachO:
+    if (MachOStreamerCtorFn)
+      S = MachOStreamerCtorFn(Ctx, std::move(TAB), std::move(OW),
+                              std::move(Emitter));
+    else
+      S = createMachOStreamer(Ctx, std::move(TAB), std::move(OW),
+                              std::move(Emitter), false);
+    break;
+  case Triple::ELF:
+    if (ELFStreamerCtorFn)
+      S = ELFStreamerCtorFn(T, Ctx, std::move(TAB), std::move(OW),
+                            std::move(Emitter));
+    else
+      S = createELFStreamer(Ctx, std::move(TAB), std::move(OW),
+                            std::move(Emitter));
+    break;
+  case Triple::Wasm:
+    S = createWasmStreamer(Ctx, std::move(TAB), std::move(OW),
+                           std::move(Emitter));
+    break;
+  case Triple::GOFF:
+    S = createGOFFStreamer(Ctx, std::move(TAB), std::move(OW),
+                           std::move(Emitter));
+    break;
+  case Triple::XCOFF:
+    S = XCOFFStreamerCtorFn(T, Ctx, std::move(TAB), std::move(OW),
+                            std::move(Emitter));
+    break;
+  case Triple::SPIRV:
+    S = createSPIRVStreamer(Ctx, std::move(TAB), std::move(OW),
+                            std::move(Emitter));
+    break;
+  case Triple::DXContainer:
+    S = createDXContainerStreamer(Ctx, std::move(TAB), std::move(OW),
+                                  std::move(Emitter));
+    break;
+  }
+  if (ObjectTargetStreamerCtorFn)
+    ObjectTargetStreamerCtorFn(*S, STI);
+  return S;
+}
+
+MCStreamer *Target::createMCObjectStreamer(
+    const Triple &T, MCContext &Ctx, std::unique_ptr<MCAsmBackend> &&TAB,
+    std::unique_ptr<MCObjectWriter> &&OW,
+    std::unique_ptr<MCCodeEmitter> &&Emitter, const MCSubtargetInfo &STI, bool,
+    bool, bool) const {
+  return createMCObjectStreamer(T, Ctx, std::move(TAB), std::move(OW),
+                                std::move(Emitter), STI);
+}
+
+MCStreamer *Target::createAsmStreamer(MCContext &Ctx,
+                                      std::unique_ptr<formatted_raw_ostream> OS,
+                                      MCInstPrinter *IP,
+                                      std::unique_ptr<MCCodeEmitter> CE,
+                                      std::unique_ptr<MCAsmBackend> TAB) const {
+  formatted_raw_ostream &OSRef = *OS;
+  MCStreamer *S = llvm::createAsmStreamer(Ctx, std::move(OS), false, false, IP,
+                                          std::move(CE), std::move(TAB), false);
+  auto *TO = Ctx.getTargetOptions();
+  createAsmTargetStreamer(*S, OSRef, IP, TO && TO->AsmVerbose);
+  return S;
+}
+
+MCStreamer *Target::createAsmStreamer(MCContext &Ctx,
+                                      std::unique_ptr<formatted_raw_ostream> OS,
+                                      bool IsVerboseAsm, bool UseDwarfDirectory,
+                                      MCInstPrinter *IP,
+                                      std::unique_ptr<MCCodeEmitter> &&CE,
+                                      std::unique_ptr<MCAsmBackend> &&TAB,
+                                      bool ShowInst) const {
+  return createAsmStreamer(Ctx, std::move(OS), IP, std::move(CE),
+                           std::move(TAB));
+}
+
 iterator_range<TargetRegistry::iterator> TargetRegistry::targets() {
   return make_range(iterator(FirstTarget), iterator());
 }
diff --git a/llvm/lib/SandboxIR/SandboxIR.cpp b/llvm/lib/SandboxIR/SandboxIR.cpp
index f392704a6d27e..209b677bafbb5 100644
--- a/llvm/lib/SandboxIR/SandboxIR.cpp
+++ b/llvm/lib/SandboxIR/SandboxIR.cpp
@@ -496,6 +496,50 @@ void LoadInst::dump() const {
   dump(dbgs());
   dbgs() << "\n";
 }
+#endif // NDEBUG
+StoreInst *StoreInst::create(Value *V, Value *Ptr, MaybeAlign Align,
+                             Instruction *InsertBefore, Context &Ctx) {
+  llvm::Instruction *BeforeIR = InsertBefore->getTopmostLLVMInstruction();
+  auto &Builder = Ctx.getLLVMIRBuilder();
+  Builder.SetInsertPoint(BeforeIR);
+  auto *NewSI =
+      Builder.CreateAlignedStore(V->Val, Ptr->Val, Align, /*isVolatile=*/false);
+  auto *NewSBI = Ctx.createStoreInst(NewSI);
+  return NewSBI;
+}
+StoreInst *StoreInst::create(Value *V, Value *Ptr, MaybeAlign Align,
+                             BasicBlock *InsertAtEnd, Context &Ctx) {
+  auto *InsertAtEndIR = cast<llvm::BasicBlock>(InsertAtEnd->Val);
+  auto &Builder = Ctx.getLLVMIRBuilder();
+  Builder.SetInsertPoint(InsertAtEndIR);
+  auto *NewSI =
+      Builder.CreateAlignedStore(V->Val, Ptr->Val, Align, /*isVolatile=*/false);
+  auto *NewSBI = Ctx.createStoreInst(NewSI);
+  return NewSBI;
+}
+
+bool StoreInst::classof(const Value *From) {
+  return From->getSubclassID() == ClassID::Store;
+}
+
+Value *StoreInst::getValueOperand() const {
+  return Ctx.getValue(cast<llvm::StoreInst>(Val)->getValueOperand());
+}
+
+Value *StoreInst::getPointerOperand() const {
+  return Ctx.getValue(cast<llvm::StoreInst>(Val)->getPointerOperand());
+}
+
+#ifndef NDEBUG
+void StoreInst::dump(raw_ostream &OS) const {
+  dumpCommonPrefix(OS);
+  dumpCommonSuffix(OS);
+}
+
+void StoreInst::dump() const {
+  dump(dbgs());
+  dbgs() << "\n";
+}
 
 void OpaqueInst::dump(raw_ostream &OS) const {
   dumpCommonPrefix(OS);
@@ -581,7 +625,8 @@ Value *Context::registerValue(std::unique_ptr<Value> &&VPtr) {
   assert(VPtr->getSubclassID() != Value::ClassID::User &&
          "Can't register a user!");
   Value *V = VPtr.get();
-  auto Pair = LLVMValueToValueMap.insert({VPtr->Val, std::move(VPtr)});
+  [[maybe_unused]] auto Pair =
+         LLVMValueToValueMap.insert({VPtr->Val, std::move(VPtr)});
   assert(Pair.second && "Already exists!");
   return V;
 }
@@ -618,6 +663,11 @@ Value *Context::getOrCreateValueInternal(llvm::Value *LLVMV, llvm::User *U) {
     It->second = std::unique_ptr<LoadInst>(new LoadInst(LLVMLd, *this));
     return It->second.get();
   }
+  case llvm::Instruction::Store: {
+    auto *LLVMSt = cast<llvm::StoreInst>(LLVMV);
+    It->second = std::unique_ptr<StoreInst>(new StoreInst(LLVMSt, *this));
+    return It->second.get();
+  }
   default:
     break;
   }
@@ -641,6 +691,11 @@ LoadInst *Context::createLoadInst(llvm::LoadInst *LI) {
   return cast<LoadInst>(registerValue(std::move(NewPtr)));
 }
 
+StoreInst *Context::createStoreInst(llvm::StoreInst *SI) {
+  auto NewPtr = std::unique_ptr<StoreInst>(new StoreInst(SI, *this));
+  return cast<StoreInst>(registerValue(std::move(NewPtr)));
+}
+
 Value *Context::getValue(llvm::Value *V) const {
   auto It = LLVMValueToValueMap.find(V);
   if (It != LLVMValueToValueMap.end())
diff --git a/llvm/lib/Support/TimeProfiler.cpp b/llvm/lib/Support/TimeProfiler.cpp
index c2014028ddadc..9612db7d30f98 100644
--- a/llvm/lib/Support/TimeProfiler.cpp
+++ b/llvm/lib/Support/TimeProfiler.cpp
@@ -73,20 +73,12 @@ struct llvm::TimeTraceProfilerEntry {
   const TimePointType Start;
   TimePointType End;
   const std::string Name;
-  TimeTraceMetadata Metadata;
-
+  const std::string Detail;
   const bool AsyncEvent = false;
   TimeTraceProfilerEntry(TimePointType &&S, TimePointType &&E, std::string &&N,
                          std::string &&Dt, bool Ae)
-      : Start(std::move(S)), End(std::move(E)), Name(std::move(N)), Metadata(),
-        AsyncEvent(Ae) {
-    Metadata.Detail = std::move(Dt);
-  }
-
-  TimeTraceProfilerEntry(TimePointType &&S, TimePointType &&E, std::string &&N,
-                         TimeTraceMetadata &&Mt, bool Ae)
       : Start(std::move(S)), End(std::move(E)), Name(std::move(N)),
-        Metadata(std::move(Mt)), AsyncEvent(Ae) {}
+        Detail(std::move(Dt)), AsyncEvent(Ae) {}
 
   // Calculate timings for FlameGraph. Cast time points to microsecond precision
   // rather than casting duration. This avoids truncation issues causing inner
@@ -105,12 +97,10 @@ struct llvm::TimeTraceProfilerEntry {
 };
 
 struct llvm::TimeTraceProfiler {
-  TimeTraceProfiler(unsigned TimeTraceGranularity = 0, StringRef ProcName = "",
-                    bool TimeTraceVerbose = false)
+  TimeTraceProfiler(unsigned TimeTraceGranularity = 0, StringRef ProcName = "")
       : BeginningOfTime(system_clock::now()), StartTime(ClockType::now()),
         ProcName(ProcName), Pid(sys::Process::getProcessId()),
-        Tid(llvm::get_threadid()), TimeTraceGranularity(TimeTraceGranularity),
-        TimeTraceVerbose(TimeTraceVerbose) {
+        Tid(llvm::get_threadid()), TimeTraceGranularity(TimeTraceGranularity) {
     llvm::get_thread_name(ThreadName);
   }
 
@@ -123,15 +113,6 @@ struct llvm::TimeTraceProfiler {
     return Stack.back().get();
   }
 
-  TimeTraceProfilerEntry *
-  begin(std::string Name, llvm::function_ref<TimeTraceMetadata()> Metadata,
-        bool AsyncEvent = false) {
-    Stack.emplace_back(std::make_unique<TimeTraceProfilerEntry>(
-        ClockType::now(), TimePointType(), std::move(Name), Metadata(),
-        AsyncEvent));
-    return Stack.back().get();
-  }
-
   void end() {
     assert(!Stack.empty() && "Must call begin() first");
     end(*Stack.back());
@@ -203,15 +184,8 @@ struct llvm::TimeTraceProfiler {
           J.attribute("dur", DurUs);
         }
         J.attribute("name", E.Name);
-        if (!E.Metadata.isEmpty()) {
-          J.attributeObject("args", [&] {
-            if (!E.Metadata.Detail.empty())
-              J.attribute("detail", E.Metadata.Detail);
-            if (!E.Metadata.File.empty())
-              J.attribute("file", E.Metadata.File);
-            if (E.Metadata.Line > 0)
-              J.attribute("line", E.Metadata.Line);
-          });
+        if (!E.Detail.empty()) {
+          J.attributeObject("args", [&] { J.attribute("detail", E.Detail); });
         }
       });
 
@@ -333,25 +307,14 @@ struct llvm::TimeTraceProfiler {
 
   // Minimum time granularity (in microseconds)
   const unsigned TimeTraceGranularity;
-
-  // Make time trace capture verbose event details (e.g. source filenames). This
-  // can increase the size of the output by 2-3 times.
-  const bool TimeTraceVerbose;
 };
 
-bool llvm::isTimeTraceVerbose() {
-  return getTimeTraceProfilerInstance() &&
-         getTimeTraceProfilerInstance()->TimeTraceVerbose;
-}
-
 void llvm::timeTraceProfilerInitialize(unsigned TimeTraceGranularity,
-                                       StringRef ProcName,
-                                       bool TimeTraceVerbose) {
+                                       StringRef ProcName) {
   assert(TimeTraceProfilerInstance == nullptr &&
          "Profiler should not be initialized");
   TimeTraceProfilerInstance = new TimeTraceProfiler(
-      TimeTraceGranularity, llvm::sys::path::filename(ProcName),
-      TimeTraceVerbose);
+      TimeTraceGranularity, llvm::sys::path::filename(ProcName));
 }
 
 // Removes all TimeTraceProfilerInstances.
@@ -418,14 +381,6 @@ llvm::timeTraceProfilerBegin(StringRef Name,
   return nullptr;
 }
 
-TimeTraceProfilerEntry *
-llvm::timeTraceProfilerBegin(StringRef Name,
-                             llvm::function_ref<TimeTraceMetadata()> Metadata) {
-  if (TimeTraceProfilerInstance != nullptr)
-    return TimeTraceProfilerInstance->begin(std::string(Name), Metadata, false);
-  return nullptr;
-}
-
 TimeTraceProfilerEntry *llvm::timeTraceAsyncProfilerBegin(StringRef Name,
                                                           StringRef Detail) {
   if (TimeTraceProfilerInstance != nullptr)
diff --git a/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp b/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp
index 63358c1568a35..1e60ce9c40df8 100644
--- a/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp
+++ b/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp
@@ -1728,8 +1728,7 @@ void AArch64AsmPrinter::LowerLOADauthptrstatic(const MachineInstr &MI) {
     assert(GAOp.getOffset() == 0 &&
            "non-zero offset for $auth_ptr$ stub slots is not supported");
     const MCSymbol *GASym = TM.getSymbol(GAOp.getGlobal());
-    AuthPtrStubSym =
-        TLOF.getAuthPtrSlotSymbol(TM, &MF->getMMI(), GASym, Key, Disc);
+    AuthPtrStubSym = TLOF.getAuthPtrSlotSymbol(TM, MMI, GASym, Key, Disc);
   } else {
     assert(TM.getTargetTriple().isOSBinFormatMachO() &&
            "LOADauthptrstatic is implemented only for MachO/ELF");
@@ -1740,8 +1739,7 @@ void AArch64AsmPrinter::LowerLOADauthptrstatic(const MachineInstr &MI) {
     assert(GAOp.getOffset() == 0 &&
            "non-zero offset for $auth_ptr$ stub slots is not supported");
     const MCSymbol *GASym = TM.getSymbol(GAOp.getGlobal());
-    AuthPtrStubSym =
-        TLOF.getAuthPtrSlotSymbol(TM, &MF->getMMI(), GASym, Key, Disc);
+    AuthPtrStubSym = TLOF.getAuthPtrSlotSymbol(TM, MMI, GASym, Key, Disc);
   }
 
   MachineOperand StubMOHi =
diff --git a/llvm/lib/Target/AArch64/AArch64CallingConvention.td b/llvm/lib/Target/AArch64/AArch64CallingConvention.td
index 6f885f4588c4b..2bbb4997d56a5 100644
--- a/llvm/lib/Target/AArch64/AArch64CallingConvention.td
+++ b/llvm/lib/Target/AArch64/AArch64CallingConvention.td
@@ -17,6 +17,11 @@ class CCIfBigEndian<CCAction A> :
 class CCIfILP32<CCAction A> :
   CCIf<"State.getMachineFunction().getDataLayout().getPointerSize() == 4", A>;
 
+/// CCIfSubtarget - Match if the current subtarget has a feature F.
+class CCIfSubtarget<string F, CCAction A>
+    : CCIf<!strconcat("State.getMachineFunction()"
+                      ".getSubtarget<AArch64Subtarget>().", F),
+           A>;
 
 //===----------------------------------------------------------------------===//
 // ARM AAPCS64 Calling Convention
@@ -496,36 +501,44 @@ def CC_AArch64_GHC : CallingConv<[
 
 let Entry = 1 in
 def CC_AArch64_Preserve_None : CallingConv<[
-    // We can pass arguments in all general registers, except:
-    // - X8, used for sret
-    // - X16/X17, used by the linker as IP0/IP1
-    // - X18, the platform register
-    // - X19, the base pointer
-    // - X29, the frame pointer
-    // - X30, the link register
-    // General registers are not preserved with the exception of
-    // FP, LR, and X18
-    // Non-volatile registers are used first, so functions may call
-    // normal functions without saving and reloading arguments.
-    // X9 is assigned last as it is used in FrameLowering as the first
-    // choice for a scratch register.
-    CCIfType<[i32], CCAssignToReg<[W20, W21, W22, W23,
-                                   W24, W25, W26, W27, W28,
-                                   W0, W1, W2, W3, W4, W5,
-                                   W6, W7, W10, W11,
-                                   W12, W13, W14, W9]>>,
-    CCIfType<[i64], CCAssignToReg<[X20, X21, X22, X23,
-                                   X24, X25, X26, X27, X28,
-                                   X0, X1, X2, X3, X4, X5,
-                                   X6, X7, X10, X11,
-                                   X12, X13, X14, X9]>>,
-
-    // Windows uses X15 for stack allocation
-    CCIf<"!State.getMachineFunction().getSubtarget<AArch64Subtarget>().isTargetWindows()",
-        CCIfType<[i32], CCAssignToReg<[W15]>>>,
-    CCIf<"!State.getMachineFunction().getSubtarget<AArch64Subtarget>().isTargetWindows()",
-        CCIfType<[i64], CCAssignToReg<[X15]>>>,
-    CCDelegateTo<CC_AArch64_AAPCS>
+  // VarArgs are only supported using the C calling convention.
+  // This handles the non-variadic parameter case. Variadic parameters
+  // are handled in CCAssignFnForCall.
+  CCIfVarArg<CCIfSubtarget<"isTargetDarwin()", CCDelegateTo<CC_AArch64_DarwinPCS>>>,
+  CCIfVarArg<CCIfSubtarget<"isTargetWindows()", CCDelegateTo<CC_AArch64_Win64PCS>>>,
+  CCIfVarArg<CCDelegateTo<CC_AArch64_AAPCS>>,
+
+  // We can pass arguments in all general registers, except:
+  // - X8, used for sret
+  // - X16/X17, used by the linker as IP0/IP1
+  // - X18, the platform register
+  // - X19, the base pointer
+  // - X29, the frame pointer
+  // - X30, the link register
+  // General registers are not preserved with the exception of
+  // FP, LR, and X18
+  // Non-volatile registers are used first, so functions may call
+  // normal functions without saving and reloading arguments.
+  // X9 is assigned last as it is used in FrameLowering as the first
+  // choice for a scratch register.
+  CCIfType<[i32], CCAssignToReg<[W20, W21, W22, W23,
+                                 W24, W25, W26, W27, W28,
+                                 W0, W1, W2, W3, W4, W5,
+                                 W6, W7, W10, W11,
+                                 W12, W13, W14, W9]>>,
+  CCIfType<[i64], CCAssignToReg<[X20, X21, X22, X23,
+                                 X24, X25, X26, X27, X28,
+                                 X0, X1, X2, X3, X4, X5,
+                                 X6, X7, X10, X11,
+                                 X12, X13, X14, X9]>>,
+
+  // Windows uses X15 for stack allocation
+  CCIf<"!State.getMachineFunction().getSubtarget<AArch64Subtarget>().isTargetWindows()",
+    CCIfType<[i32], CCAssignToReg<[W15]>>>,
+  CCIf<"!State.getMachineFunction().getSubtarget<AArch64Subtarget>().isTargetWindows()",
+    CCIfType<[i64], CCAssignToReg<[X15]>>>,
+
+  CCDelegateTo<CC_AArch64_AAPCS>
 ]>;
 
 // The order of the callee-saves in this file is important, because the
diff --git a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
index dac03bc3c1d9e..b1b83e27c5592 100644
--- a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
@@ -1714,7 +1714,6 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
   const AArch64RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
   const TargetInstrInfo *TII = Subtarget.getInstrInfo();
 
-  MachineModuleInfo &MMI = MF.getMMI();
   AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
   bool EmitCFI = AFI->needsDwarfUnwindInfo(MF);
   bool EmitAsyncCFI = AFI->needsAsyncDwarfUnwindInfo(MF);
@@ -1882,7 +1881,7 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
                       MachineInstr::FrameSetup, false, NeedsWinCFI, &HasWinCFI);
       if (EmitCFI) {
         // Label used to tie together the PROLOG_LABEL and the MachineMoves.
-        MCSymbol *FrameLabel = MMI.getContext().createTempSymbol();
+        MCSymbol *FrameLabel = MF.getContext().createTempSymbol();
         // Encode the stack size of the leaf function.
         unsigned CFIIndex = MF.addFrameInst(
             MCCFIInstruction::cfiDefCfaOffset(FrameLabel, NumBytes));
@@ -1901,8 +1900,7 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
     return;
   }
 
-  bool IsWin64 =
-      Subtarget.isCallingConvWin64(MF.getFunction().getCallingConv());
+  bool IsWin64 = Subtarget.isCallingConvWin64(F.getCallingConv(), F.isVarArg());
   unsigned FixedObject = getFixedObjectSize(MF, AFI, IsWin64, IsFunclet);
 
   auto PrologueSaveSize = AFI->getCalleeSavedStackSize() + FixedObject;
@@ -2308,8 +2306,8 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF,
   // How much of the stack used by incoming arguments this function is expected
   // to restore in this particular epilogue.
   int64_t ArgumentStackToRestore = getArgumentStackToRestore(MF, MBB);
-  bool IsWin64 =
-      Subtarget.isCallingConvWin64(MF.getFunction().getCallingConv());
+  bool IsWin64 = Subtarget.isCallingConvWin64(MF.getFunction().getCallingConv(),
+                                              MF.getFunction().isVarArg());
   unsigned FixedObject = getFixedObjectSize(MF, AFI, IsWin64, IsFunclet);
 
   int64_t AfterCSRPopSize = ArgumentStackToRestore;
@@ -2615,8 +2613,8 @@ static StackOffset getFPOffset(const MachineFunction &MF,
                                int64_t ObjectOffset) {
   const auto *AFI = MF.getInfo<AArch64FunctionInfo>();
   const auto &Subtarget = MF.getSubtarget<AArch64Subtarget>();
-  bool IsWin64 =
-      Subtarget.isCallingConvWin64(MF.getFunction().getCallingConv());
+  const Function &F = MF.getFunction();
+  bool IsWin64 = Subtarget.isCallingConvWin64(F.getCallingConv(), F.isVarArg());
   unsigned FixedObject =
       getFixedObjectSize(MF, AFI, IsWin64, /*IsFunclet=*/false);
   int64_t CalleeSaveSize = AFI->getCalleeSavedStackSize(MF.getFrameInfo());
@@ -2722,9 +2720,9 @@ StackOffset AArch64FrameLowering::resolveFrameOffsetReference(
         // via the frame pointer, so we have to use the FP in the parent
         // function.
         (void) Subtarget;
-        assert(
-            Subtarget.isCallingConvWin64(MF.getFunction().getCallingConv()) &&
-            "Funclets should only be present on Win64");
+        assert(Subtarget.isCallingConvWin64(MF.getFunction().getCallingConv(),
+                                            MF.getFunction().isVarArg()) &&
+               "Funclets should only be present on Win64");
         UseFP = true;
       } else {
         // We have the choice between FP and (SP or BP).
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 84de1ee8f8923..bf205b1706a6c 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -7109,7 +7109,13 @@ CCAssignFn *AArch64TargetLowering::CCAssignFnForCall(CallingConv::ID CC,
   case CallingConv::GHC:
     return CC_AArch64_GHC;
   case CallingConv::PreserveNone:
-    return CC_AArch64_Preserve_None;
+    // The VarArg implementation makes assumptions about register
+    // argument passing that do not hold for preserve_none, so we
+    // instead fall back to C argument passing.
+    // The non-vararg case is handled in the CC function itself.
+    if (!IsVarArg)
+      return CC_AArch64_Preserve_None;
+    [[fallthrough]];
   case CallingConv::C:
   case CallingConv::Fast:
   case CallingConv::PreserveMost:
@@ -7182,7 +7188,8 @@ SDValue AArch64TargetLowering::LowerFormalArguments(
   MachineFunction &MF = DAG.getMachineFunction();
   const Function &F = MF.getFunction();
   MachineFrameInfo &MFI = MF.getFrameInfo();
-  bool IsWin64 = Subtarget->isCallingConvWin64(F.getCallingConv());
+  bool IsWin64 =
+      Subtarget->isCallingConvWin64(F.getCallingConv(), F.isVarArg());
   bool StackViaX4 = CallConv == CallingConv::ARM64EC_Thunk_X64 ||
                     (isVarArg && Subtarget->isWindowsArm64EC());
   AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
@@ -7634,7 +7641,9 @@ void AArch64TargetLowering::saveVarArgRegisters(CCState &CCInfo,
   MachineFrameInfo &MFI = MF.getFrameInfo();
   AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
   auto PtrVT = getPointerTy(DAG.getDataLayout());
-  bool IsWin64 = Subtarget->isCallingConvWin64(MF.getFunction().getCallingConv());
+  Function &F = MF.getFunction();
+  bool IsWin64 =
+      Subtarget->isCallingConvWin64(F.getCallingConv(), F.isVarArg());
 
   SmallVector<SDValue, 8> MemOps;
 
@@ -7805,6 +7814,21 @@ static bool mayTailCallThisCC(CallingConv::ID CC) {
   }
 }
 
+/// Return true if the call convention supports varargs
+/// Currently only those that pass varargs like the C
+/// calling convention does are eligible
+/// Calling conventions listed in this function must also
+/// be properly handled in AArch64Subtarget::isCallingConvWin64
+static bool callConvSupportsVarArgs(CallingConv::ID CC) {
+  switch (CC) {
+  case CallingConv::C:
+  case CallingConv::PreserveNone:
+    return true;
+  default:
+    return false;
+  }
+}
+
 static void analyzeCallOperands(const AArch64TargetLowering &TLI,
                                 const AArch64Subtarget *Subtarget,
                                 const TargetLowering::CallLoweringInfo &CLI,
@@ -7813,7 +7837,7 @@ static void analyzeCallOperands(const AArch64TargetLowering &TLI,
   CallingConv::ID CalleeCC = CLI.CallConv;
   bool IsVarArg = CLI.IsVarArg;
   const SmallVector<ISD::OutputArg, 32> &Outs = CLI.Outs;
-  bool IsCalleeWin64 = Subtarget->isCallingConvWin64(CalleeCC);
+  bool IsCalleeWin64 = Subtarget->isCallingConvWin64(CalleeCC, IsVarArg);
 
   // For Arm64EC thunks, allocate 32 extra bytes at the bottom of the stack
   // for the shadow store.
@@ -7941,8 +7965,8 @@ bool AArch64TargetLowering::isEligibleForTailCallOptimization(
 
   // I want anyone implementing a new calling convention to think long and hard
   // about this assert.
-  assert((!IsVarArg || CalleeCC == CallingConv::C) &&
-         "Unexpected variadic calling convention");
+  if (IsVarArg && !callConvSupportsVarArgs(CalleeCC))
+    report_fatal_error("Unsupported variadic calling convention");
 
   LLVMContext &C = *DAG.getContext();
   // Check that the call results are passed in the same way.
@@ -10872,8 +10896,9 @@ SDValue AArch64TargetLowering::LowerAAPCS_VASTART(SDValue Op,
 SDValue AArch64TargetLowering::LowerVASTART(SDValue Op,
                                             SelectionDAG &DAG) const {
   MachineFunction &MF = DAG.getMachineFunction();
+  Function &F = MF.getFunction();
 
-  if (Subtarget->isCallingConvWin64(MF.getFunction().getCallingConv()))
+  if (Subtarget->isCallingConvWin64(F.getCallingConv(), F.isVarArg()))
     return LowerWin64_VASTART(Op, DAG);
   else if (Subtarget->isTargetDarwin())
     return LowerDarwin_VASTART(Op, DAG);
diff --git a/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp b/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp
index 1e069f4790c53..435cc18cdea62 100644
--- a/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp
@@ -611,7 +611,8 @@ bool AArch64RegisterInfo::isArgumentRegister(const MachineFunction &MF,
                                              MCRegister Reg) const {
   CallingConv::ID CC = MF.getFunction().getCallingConv();
   const AArch64Subtarget &STI = MF.getSubtarget<AArch64Subtarget>();
-  bool IsVarArg = STI.isCallingConvWin64(MF.getFunction().getCallingConv());
+  bool IsVarArg = STI.isCallingConvWin64(MF.getFunction().getCallingConv(),
+                                         MF.getFunction().isVarArg());
 
   auto HasReg = [](ArrayRef<MCRegister> RegList, MCRegister Reg) {
     return llvm::is_contained(RegList, Reg);
@@ -623,7 +624,9 @@ bool AArch64RegisterInfo::isArgumentRegister(const MachineFunction &MF,
   case CallingConv::GHC:
     return HasReg(CC_AArch64_GHC_ArgRegs, Reg);
   case CallingConv::PreserveNone:
-    return HasReg(CC_AArch64_Preserve_None_ArgRegs, Reg);
+    if (!MF.getFunction().isVarArg())
+      return HasReg(CC_AArch64_Preserve_None_ArgRegs, Reg);
+    [[fallthrough]];
   case CallingConv::C:
   case CallingConv::Fast:
   case CallingConv::PreserveMost:
diff --git a/llvm/lib/Target/AArch64/AArch64Subtarget.h b/llvm/lib/Target/AArch64/AArch64Subtarget.h
index 4b840b24ba134..12c3d25d32ee7 100644
--- a/llvm/lib/Target/AArch64/AArch64Subtarget.h
+++ b/llvm/lib/Target/AArch64/AArch64Subtarget.h
@@ -322,13 +322,15 @@ class AArch64Subtarget final : public AArch64GenSubtargetInfo {
 
   std::unique_ptr<PBQPRAConstraint> getCustomPBQPConstraints() const override;
 
-  bool isCallingConvWin64(CallingConv::ID CC) const {
+  bool isCallingConvWin64(CallingConv::ID CC, bool IsVarArg) const {
     switch (CC) {
     case CallingConv::C:
     case CallingConv::Fast:
     case CallingConv::Swift:
     case CallingConv::SwiftTail:
       return isTargetWindows();
+    case CallingConv::PreserveNone:
+      return IsVarArg && isTargetWindows();
     case CallingConv::Win64:
       return true;
     default:
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64CallLowering.cpp b/llvm/lib/Target/AArch64/GISel/AArch64CallLowering.cpp
index 5206ba46260ed..b4d2a3388c1df 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64CallLowering.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64CallLowering.cpp
@@ -117,7 +117,9 @@ struct AArch64OutgoingValueAssigner
                  CCValAssign::LocInfo LocInfo,
                  const CallLowering::ArgInfo &Info, ISD::ArgFlagsTy Flags,
                  CCState &State) override {
-    bool IsCalleeWin = Subtarget.isCallingConvWin64(State.getCallingConv());
+    const Function &F = State.getMachineFunction().getFunction();
+    bool IsCalleeWin =
+        Subtarget.isCallingConvWin64(State.getCallingConv(), F.isVarArg());
     bool UseVarArgsCCForFixed = IsCalleeWin && State.isVarArg();
 
     bool Res;
@@ -557,8 +559,8 @@ void AArch64CallLowering::saveVarArgRegisters(
   MachineFrameInfo &MFI = MF.getFrameInfo();
   AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
   auto &Subtarget = MF.getSubtarget<AArch64Subtarget>();
-  bool IsWin64CC =
-      Subtarget.isCallingConvWin64(CCInfo.getCallingConv());
+  bool IsWin64CC = Subtarget.isCallingConvWin64(CCInfo.getCallingConv(),
+                                                MF.getFunction().isVarArg());
   const LLT p0 = LLT::pointer(0, 64);
   const LLT s64 = LLT::scalar(64);
 
@@ -653,7 +655,9 @@ bool AArch64CallLowering::lowerFormalArguments(
       F.getCallingConv() == CallingConv::ARM64EC_Thunk_X64)
     return false;
 
-  bool IsWin64 = Subtarget.isCallingConvWin64(F.getCallingConv()) && !Subtarget.isWindowsArm64EC();
+  bool IsWin64 =
+      Subtarget.isCallingConvWin64(F.getCallingConv(), F.isVarArg()) &&
+      !Subtarget.isWindowsArm64EC();
 
   SmallVector<ArgInfo, 8> SplitArgs;
   SmallVector<std::pair<Register, Register>> BoolArgs;
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
index 0d3f6d9e353ba..009928a8a7488 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
@@ -2006,7 +2006,7 @@ bool AArch64InstructionSelector::selectVaStartDarwin(
 
   int FrameIdx = FuncInfo->getVarArgsStackIndex();
   if (MF.getSubtarget<AArch64Subtarget>().isCallingConvWin64(
-          MF.getFunction().getCallingConv())) {
+          MF.getFunction().getCallingConv(), MF.getFunction().isVarArg())) {
     FrameIdx = FuncInfo->getVarArgsGPRSize() > 0
                    ? FuncInfo->getVarArgsGPRIndex()
                    : FuncInfo->getVarArgsStackIndex();
diff --git a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp
index f05e5e6df7f8e..97c5f96388abe 100644
--- a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp
+++ b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp
@@ -386,24 +386,21 @@ static MCStreamer *createELFStreamer(const Triple &T, MCContext &Ctx,
                                   std::move(Emitter));
 }
 
-static MCStreamer *createMachOStreamer(MCContext &Ctx,
-                                       std::unique_ptr<MCAsmBackend> &&TAB,
-                                       std::unique_ptr<MCObjectWriter> &&OW,
-                                       std::unique_ptr<MCCodeEmitter> &&Emitter,
-                                       bool DWARFMustBeAtTheEnd) {
+static MCStreamer *
+createMachOStreamer(MCContext &Ctx, std::unique_ptr<MCAsmBackend> &&TAB,
+                    std::unique_ptr<MCObjectWriter> &&OW,
+                    std::unique_ptr<MCCodeEmitter> &&Emitter) {
   return createMachOStreamer(Ctx, std::move(TAB), std::move(OW),
-                             std::move(Emitter), DWARFMustBeAtTheEnd,
+                             std::move(Emitter), /*ignore=*/false,
                              /*LabelSections*/ true);
 }
 
 static MCStreamer *
 createWinCOFFStreamer(MCContext &Ctx, std::unique_ptr<MCAsmBackend> &&TAB,
                       std::unique_ptr<MCObjectWriter> &&OW,
-                      std::unique_ptr<MCCodeEmitter> &&Emitter,
-                      bool IncrementalLinkerCompatible) {
+                      std::unique_ptr<MCCodeEmitter> &&Emitter) {
   return createAArch64WinCOFFStreamer(Ctx, std::move(TAB), std::move(OW),
-                                      std::move(Emitter),
-                                      IncrementalLinkerCompatible);
+                                      std::move(Emitter));
 }
 
 namespace {
diff --git a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64WinCOFFStreamer.cpp b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64WinCOFFStreamer.cpp
index c25cc2e99adca..208d43502cb88 100644
--- a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64WinCOFFStreamer.cpp
+++ b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64WinCOFFStreamer.cpp
@@ -291,12 +291,11 @@ void AArch64TargetWinCOFFStreamer::emitARM64WinCFISaveAnyRegQPX(unsigned Reg,
   emitARM64WinUnwindCode(Win64EH::UOP_SaveAnyRegQPX, Reg, Offset);
 }
 
-MCWinCOFFStreamer *llvm::createAArch64WinCOFFStreamer(
-    MCContext &Context, std::unique_ptr<MCAsmBackend> MAB,
-    std::unique_ptr<MCObjectWriter> OW, std::unique_ptr<MCCodeEmitter> Emitter,
-    bool IncrementalLinkerCompatible) {
-  auto *S = new AArch64WinCOFFStreamer(Context, std::move(MAB),
-                                       std::move(Emitter), std::move(OW));
-  S->getAssembler().setIncrementalLinkerCompatible(IncrementalLinkerCompatible);
-  return S;
+MCWinCOFFStreamer *
+llvm::createAArch64WinCOFFStreamer(MCContext &Context,
+                                   std::unique_ptr<MCAsmBackend> MAB,
+                                   std::unique_ptr<MCObjectWriter> OW,
+                                   std::unique_ptr<MCCodeEmitter> Emitter) {
+  return new AArch64WinCOFFStreamer(Context, std::move(MAB), std::move(Emitter),
+                                    std::move(OW));
 }
diff --git a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64WinCOFFStreamer.h b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64WinCOFFStreamer.h
index a13b1a451be5f..5caf520a3aa37 100644
--- a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64WinCOFFStreamer.h
+++ b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64WinCOFFStreamer.h
@@ -20,8 +20,7 @@ namespace llvm {
 
 MCWinCOFFStreamer *createAArch64WinCOFFStreamer(
     MCContext &Context, std::unique_ptr<MCAsmBackend> TAB,
-    std::unique_ptr<MCObjectWriter> OW, std::unique_ptr<MCCodeEmitter> Emitter,
-    bool IncrementalLinkerCompatible);
+    std::unique_ptr<MCObjectWriter> OW, std::unique_ptr<MCCodeEmitter> Emitter);
 } // end llvm namespace
 
 #endif
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
index da3e8c0a62b08..63048c7d1a0a1 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
@@ -1870,6 +1870,8 @@ bool AMDGPUInstructionSelector::selectImageIntrinsic(
       VDataIn = MI.getOperand(1).getReg();
       VDataTy = MRI->getType(VDataIn);
       NumVDataDwords = (VDataTy.getSizeInBits() + 31) / 32;
+    } else if (BaseOpcode->NoReturn) {
+      NumVDataDwords = 0;
     } else {
       VDataOut = MI.getOperand(0).getReg();
       VDataTy = MRI->getType(VDataOut);
@@ -3616,6 +3618,7 @@ bool AMDGPUInstructionSelector::select(MachineInstr &I) {
     return selectG_INSERT_VECTOR_ELT(I);
   case AMDGPU::G_AMDGPU_INTRIN_IMAGE_LOAD:
   case AMDGPU::G_AMDGPU_INTRIN_IMAGE_LOAD_D16:
+  case AMDGPU::G_AMDGPU_INTRIN_IMAGE_LOAD_NORET:
   case AMDGPU::G_AMDGPU_INTRIN_IMAGE_STORE:
   case AMDGPU::G_AMDGPU_INTRIN_IMAGE_STORE_D16: {
     const AMDGPU::ImageDimIntrinsicInfo *Intr =
diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
index 88e40da110555..89ef0f299feab 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
@@ -6334,8 +6334,13 @@ bool AMDGPULegalizerInfo::legalizeImageIntrinsic(
   const LLT V2S16 = LLT::fixed_vector(2, 16);
 
   unsigned DMask = 0;
-  Register VData = MI.getOperand(NumDefs == 0 ? 1 : 0).getReg();
-  LLT Ty = MRI->getType(VData);
+  Register VData;
+  LLT Ty;
+
+  if (!BaseOpcode->NoReturn || BaseOpcode->Store) {
+    VData = MI.getOperand(NumDefs == 0 ? 1 : 0).getReg();
+    Ty = MRI->getType(VData);
+  }
 
   const bool IsAtomicPacked16Bit =
       (BaseOpcode->BaseOpcode == AMDGPU::IMAGE_ATOMIC_PK_ADD_F16 ||
@@ -6373,7 +6378,11 @@ bool AMDGPULegalizerInfo::legalizeImageIntrinsic(
                                      : AMDGPU::G_AMDGPU_INTRIN_IMAGE_STORE;
   const unsigned LoadOpcode = IsD16 ? AMDGPU::G_AMDGPU_INTRIN_IMAGE_LOAD_D16
                                     : AMDGPU::G_AMDGPU_INTRIN_IMAGE_LOAD;
-  unsigned NewOpcode = NumDefs == 0 ? StoreOpcode : LoadOpcode;
+  unsigned NewOpcode = LoadOpcode;
+  if (BaseOpcode->Store)
+    NewOpcode = StoreOpcode;
+  else if (BaseOpcode->NoReturn)
+    NewOpcode = AMDGPU::G_AMDGPU_INTRIN_IMAGE_LOAD_NORET;
 
   // Track that we legalized this
   MI.setDesc(B.getTII().get(NewOpcode));
@@ -6503,7 +6512,7 @@ bool AMDGPULegalizerInfo::legalizeImageIntrinsic(
     Flags |= 2;
   MI.addOperand(MachineOperand::CreateImm(Flags));
 
-  if (BaseOpcode->Store) { // No TFE for stores?
+  if (BaseOpcode->NoReturn) { // No TFE for stores?
     // TODO: Handle dmask trim
     if (!Ty.isVector() || !IsD16)
       return true;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
index 73796edb5d3e3..68f4767458703 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
@@ -3172,6 +3172,7 @@ void AMDGPURegisterBankInfo::applyMappingImpl(
   }
   case AMDGPU::G_AMDGPU_INTRIN_IMAGE_LOAD:
   case AMDGPU::G_AMDGPU_INTRIN_IMAGE_LOAD_D16:
+  case AMDGPU::G_AMDGPU_INTRIN_IMAGE_LOAD_NORET:
   case AMDGPU::G_AMDGPU_INTRIN_IMAGE_STORE:
   case AMDGPU::G_AMDGPU_INTRIN_IMAGE_STORE_D16: {
     const AMDGPU::RsrcIntrinsic *RSrcIntrin =
@@ -4842,6 +4843,7 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
   }
   case AMDGPU::G_AMDGPU_INTRIN_IMAGE_LOAD:
   case AMDGPU::G_AMDGPU_INTRIN_IMAGE_LOAD_D16:
+  case AMDGPU::G_AMDGPU_INTRIN_IMAGE_LOAD_NORET:
   case AMDGPU::G_AMDGPU_INTRIN_IMAGE_STORE:
   case AMDGPU::G_AMDGPU_INTRIN_IMAGE_STORE_D16: {
     auto IntrID = AMDGPU::getIntrinsicID(MI);
diff --git a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
index 217487b2cc7e6..92c3b26ca4d6f 100644
--- a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
+++ b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
@@ -3868,7 +3868,8 @@ bool AMDGPUAsmParser::validateMIMGDataSize(const MCInst &Inst,
   int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
   int TFEIdx   = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::tfe);
 
-  assert(VDataIdx != -1);
+  if (VDataIdx == -1 && isGFX10Plus()) // no return image_sample
+    return true;
 
   if ((DMaskIdx == -1 || TFEIdx == -1) && isGFX10_AEncoding()) // intersect_ray
     return true;
diff --git a/llvm/lib/Target/AMDGPU/MIMGInstructions.td b/llvm/lib/Target/AMDGPU/MIMGInstructions.td
index 15fd36ebd10a4..b4e58cfd98a23 100644
--- a/llvm/lib/Target/AMDGPU/MIMGInstructions.td
+++ b/llvm/lib/Target/AMDGPU/MIMGInstructions.td
@@ -51,6 +51,7 @@ class MIMGBaseOpcode : PredicateControl {
   bit MSAA = 0;
   bit BVH = 0;
   bit A16 = 0;
+  bit NoReturn = 0;
 }
 
 def MIMGBaseOpcode : GenericEnum {
@@ -62,7 +63,7 @@ def MIMGBaseOpcodesTable : GenericTable {
   let CppTypeName = "MIMGBaseOpcodeInfo";
   let Fields = ["BaseOpcode", "Store", "Atomic", "AtomicX2", "Sampler",
                 "Gather4", "NumExtraArgs", "Gradients", "G16", "Coordinates",
-                "LodOrClampOrMip", "HasD16", "MSAA", "BVH", "A16"];
+                "LodOrClampOrMip", "HasD16", "MSAA", "BVH", "A16", "NoReturn"];
   string TypeOf_BaseOpcode = "MIMGBaseOpcode";
 
   let PrimaryKey = ["BaseOpcode"];
@@ -521,6 +522,25 @@ class VSAMPLE_Sampler_gfx12<mimgopc op, string opcode, RegisterClass DataRC,
                     #!if(BaseOpcode.HasD16, "$d16", "");
 }
 
+class VSAMPLE_Sampler_nortn_gfx12<mimgopc op, string opcode,
+                            int num_addrs, RegisterClass Addr3RC = VGPR_32,
+                            string dns="">
+  : VSAMPLE_gfx12<op.GFX12, (outs), num_addrs, dns, Addr3RC> {
+  let InOperandList = !con(AddrIns,
+                           (ins SReg_256:$rsrc),
+                           !if(BaseOpcode.Sampler, (ins SReg_128:$samp), (ins)),
+                           (ins DMask:$dmask, Dim:$dim, UNorm:$unorm,
+                                CPol:$cpol, R128A16:$r128, A16:$a16, TFE:$tfe,
+                                LWE:$lwe),
+                           !if(BaseOpcode.HasD16, (ins D16:$d16), (ins)));
+  let AsmString = opcode#" off, "#AddrAsm#", $rsrc"
+                    #!if(BaseOpcode.Sampler, ", $samp", "")
+                    #"$dmask$dim$unorm$cpol$r128$a16$tfe$lwe"
+                    #!if(BaseOpcode.HasD16, "$d16", "");
+  // Force vdata to VGPR0 as no result will be returned.
+  let vdata = 0;
+}
+
 multiclass MIMG_NoSampler_Src_Helper <mimgopc op, string asm,
                                       RegisterClass dst_rc, bit enableDisasm,
                                       bit ExtendedImageInst = 1,
@@ -835,6 +855,7 @@ multiclass MIMG_Store <mimgopc op, string asm, bit has_d16, bit mip = 0> {
     let Store = 1;
     let LodOrClampOrMip = mip;
     let HasD16 = has_d16;
+    let NoReturn = 1;
   }
 
   let BaseOpcode = !cast<MIMGBaseOpcode>(NAME) in {
@@ -1136,44 +1157,62 @@ class MIMG_Sampler_gfx90a<mimgopc op, string asm, RegisterClass dst_rc,
                       #!if(BaseOpcode.HasD16, "$d16", "");
 }
 
+class MIMG_Sampler_OpList_gfx10p<dag OpPrefix, bit HasD16> {
+  dag ret = !con(OpPrefix,
+                 (ins SReg_256:$srsrc, SReg_128:$ssamp,
+                  DMask:$dmask, Dim:$dim, UNorm:$unorm, CPol:$cpol,
+                  R128A16:$r128, A16:$a16, TFE:$tfe, LWE:$lwe),
+                 !if(HasD16, (ins D16:$d16), (ins)));
+}
+
+class MIMG_Sampler_Asm_gfx10p<string opcode, string AsmPrefix, bit HasD16> {
+  string ret = opcode#" "#AsmPrefix#", $srsrc, $ssamp$dmask$dim$unorm"
+               #"$cpol$r128$a16$tfe$lwe"
+               #!if(HasD16, "$d16", "");
+}
+
 class MIMG_Sampler_gfx10<mimgopc op, string opcode,
                          RegisterClass DataRC, RegisterClass AddrRC,
                          string dns="">
   : MIMG_gfx10<op.GFX10M, (outs DataRC:$vdata), dns> {
-  let InOperandList = !con((ins AddrRC:$vaddr0, SReg_256:$srsrc, SReg_128:$ssamp,
-                                DMask:$dmask, Dim:$dim, UNorm:$unorm, CPol:$cpol,
-                                R128A16:$r128, A16:$a16, TFE:$tfe, LWE:$lwe),
-                           !if(BaseOpcode.HasD16, (ins D16:$d16), (ins)));
-  let AsmString = opcode#" $vdata, $vaddr0, $srsrc, $ssamp$dmask$dim$unorm"
-                    #"$cpol$r128$a16$tfe$lwe"
-                    #!if(BaseOpcode.HasD16, "$d16", "");
+  let InOperandList = MIMG_Sampler_OpList_gfx10p<(ins AddrRC:$vaddr0), BaseOpcode.HasD16>.ret;
+  let AsmString = MIMG_Sampler_Asm_gfx10p<opcode, "$vdata, $vaddr0", BaseOpcode.HasD16>.ret;
 }
 
 class MIMG_Sampler_nsa_gfx10<mimgopc op, string opcode,
                              RegisterClass DataRC, int num_addrs,
                              string dns="">
   : MIMG_nsa_gfx10<op.GFX10M, (outs DataRC:$vdata), num_addrs, dns> {
-  let InOperandList = !con(AddrIns,
-                           (ins SReg_256:$srsrc, SReg_128:$ssamp, DMask:$dmask,
-                                Dim:$dim, UNorm:$unorm, CPol:$cpol,
-                                R128A16:$r128, A16:$a16, TFE:$tfe, LWE:$lwe),
-                           !if(BaseOpcode.HasD16, (ins D16:$d16), (ins)));
-  let AsmString = opcode#" $vdata, "#AddrAsm#", $srsrc, $ssamp$dmask$dim$unorm"
-                    #"$cpol$r128$a16$tfe$lwe"
-                    #!if(BaseOpcode.HasD16, "$d16", "");
+  let InOperandList = MIMG_Sampler_OpList_gfx10p<AddrIns, BaseOpcode.HasD16>.ret;
+  let AsmString = MIMG_Sampler_Asm_gfx10p<opcode, " $vdata, "#AddrAsm, BaseOpcode.HasD16>.ret;
+}
+
+class MIMG_Sampler_nortn_gfx10<mimgopc op, string opcode,
+                         RegisterClass AddrRC,
+                         string dns="">
+  : MIMG_gfx10<op.GFX10M, (outs), dns> {
+  let InOperandList = MIMG_Sampler_OpList_gfx10p<(ins AddrRC:$vaddr0), BaseOpcode.HasD16>.ret;
+  let AsmString = MIMG_Sampler_Asm_gfx10p<opcode, "off, $vaddr0", BaseOpcode.HasD16>.ret;
+  // Force vdata to VGPR0 as no result will be returned.
+  let vdata = 0;
+}
+
+class MIMG_Sampler_nortn_nsa_gfx10<mimgopc op, string opcode,
+                         int num_addrs,
+                         string dns="">
+  : MIMG_nsa_gfx10<op.GFX10M, (outs), num_addrs, dns> {
+  let InOperandList = MIMG_Sampler_OpList_gfx10p<AddrIns, BaseOpcode.HasD16>.ret;
+  let AsmString = MIMG_Sampler_Asm_gfx10p<opcode, " off, "#AddrAsm, BaseOpcode.HasD16>.ret;
+  // Force vdata to VGPR0 as no result will be returned.
+  let vdata = 0;
 }
 
 class MIMG_Sampler_gfx11<mimgopc op, string opcode,
                          RegisterClass DataRC, RegisterClass AddrRC,
                          string dns="">
   : MIMG_gfx11<op.GFX11, (outs DataRC:$vdata), dns> {
-  let InOperandList = !con((ins AddrRC:$vaddr0, SReg_256:$srsrc, SReg_128:$ssamp,
-                                DMask:$dmask, Dim:$dim, UNorm:$unorm, CPol:$cpol,
-                                R128A16:$r128, A16:$a16, TFE:$tfe, LWE:$lwe),
-                           !if(BaseOpcode.HasD16, (ins D16:$d16), (ins)));
-  let AsmString = opcode#" $vdata, $vaddr0, $srsrc, $ssamp$dmask$dim$unorm"
-                    #"$cpol$r128$a16$tfe$lwe"
-                    #!if(BaseOpcode.HasD16, "$d16", "");
+  let InOperandList = MIMG_Sampler_OpList_gfx10p<(ins AddrRC:$vaddr0), BaseOpcode.HasD16>.ret;
+  let AsmString = MIMG_Sampler_Asm_gfx10p<opcode, "$vdata, $vaddr0", BaseOpcode.HasD16>.ret;
 }
 
 class MIMG_Sampler_nsa_gfx11<mimgopc op, string opcode,
@@ -1181,14 +1220,26 @@ class MIMG_Sampler_nsa_gfx11<mimgopc op, string opcode,
                              RegisterClass LastVAddrSize, string dns="">
   : MIMG_nsa_gfx11<op.GFX11, (outs DataRC:$vdata), num_addrs, dns, [],
                    LastVAddrSize> {
-  let InOperandList = !con(AddrIns,
-                           (ins SReg_256:$srsrc, SReg_128:$ssamp, DMask:$dmask,
-                                Dim:$dim, UNorm:$unorm, CPol:$cpol,
-                                R128A16:$r128, A16:$a16, TFE:$tfe, LWE:$lwe),
-                           !if(BaseOpcode.HasD16, (ins D16:$d16), (ins)));
-  let AsmString = opcode#" $vdata, "#AddrAsm#", $srsrc, $ssamp$dmask$dim$unorm"
-                    #"$cpol$r128$a16$tfe$lwe"
-                    #!if(BaseOpcode.HasD16, "$d16", "");
+  let InOperandList = MIMG_Sampler_OpList_gfx10p<AddrIns, BaseOpcode.HasD16>.ret;
+  let AsmString = MIMG_Sampler_Asm_gfx10p<opcode, " $vdata, "#AddrAsm, BaseOpcode.HasD16>.ret;
+}
+
+class MIMG_Sampler_nortn_gfx11<mimgopc op, string opcode,
+                                  RegisterClass AddrRC,
+                                  string dns="">
+  : MIMG_gfx11<op.GFX11, (outs), dns> {
+  let InOperandList = MIMG_Sampler_OpList_gfx10p<(ins AddrRC:$vaddr0), BaseOpcode.HasD16>.ret;
+  let AsmString = MIMG_Sampler_Asm_gfx10p<opcode, "off, $vaddr0", BaseOpcode.HasD16>.ret;
+  let vdata = 0;
+}
+
+class MIMG_Sampler_nortn_nsa_gfx11<mimgopc op, string opcode,
+                                      int num_addrs,
+                                      RegisterClass LastVAddrSize, string dns="">
+  : MIMG_nsa_gfx11<op.GFX11, (outs), num_addrs, dns, [], LastVAddrSize> {
+  let InOperandList = MIMG_Sampler_OpList_gfx10p<AddrIns, BaseOpcode.HasD16>.ret;
+  let AsmString = MIMG_Sampler_Asm_gfx10p<opcode, "off, "#AddrAsm, BaseOpcode.HasD16>.ret;
+  let vdata = 0;
 }
 
 class MIMGAddrSize<int dw, bit enable_disasm, int AddrDW = dw> {
@@ -1366,6 +1417,57 @@ class MIMG_Sampler_BaseOpcode<AMDGPUSampleVariant sample>
   let LodOrClampOrMip = !ne(sample.LodOrClamp, "");
 }
 
+multiclass MIMG_Sampler_NoReturn <mimgopc op, AMDGPUSampleVariant sample, bit wqm = 0, bit isG16, string asm> {
+  def "" : MIMG_Sampler_BaseOpcode<sample> {
+    let HasD16 = 1;
+    let G16 = isG16;
+    let NoReturn = 1;
+  }
+
+  let BaseOpcode = !cast<MIMGBaseOpcode>(NAME), WQM = wqm,
+      mayLoad = 1, mayStore = 1, VDataDwords = 0 in {
+    foreach addr = MIMG_Sampler_AddrSizes<sample, isG16>.MachineInstrs in {
+      let VAddrDwords = addr.NumWords in {
+        if op.HAS_GFX10M then {
+          def _V # addr.NumWords # _gfx10
+            : MIMG_Sampler_nortn_gfx10 <op, asm, addr.RegClass>;
+        }
+        if op.HAS_GFX11 then {
+          def _V # addr.NumWords # _gfx11
+            : MIMG_Sampler_nortn_gfx11 <op, asm, addr.RegClass>;
+        }
+      }
+    }
+
+    foreach addr = MIMG_Sampler_AddrSizes<sample, isG16>.NSAInstrs in {
+      let VAddrDwords = addr.NumWords in {
+        if op.HAS_GFX10M then {
+          def _V # addr.NumWords # _nsa_gfx10
+            : MIMG_Sampler_nortn_nsa_gfx10<op, asm, addr.NumWords>;
+        }
+      }
+    }
+
+    foreach addr = MIMG_Sampler_AddrSizes<sample, isG16, 5/*MaxNSASize*/>.PartialNSAInstrs in {
+      let VAddrDwords = addr.NumWords in {
+        if op.HAS_GFX11 then {
+          def _V # addr.NumWords # _nsa_gfx11
+            : MIMG_Sampler_nortn_nsa_gfx11<op, asm, addr.NumWords, addr.RegClass>;
+        }
+      }
+    }
+
+    foreach addr = MIMG_Sampler_AddrSizes<sample, isG16, 4/*MaxNSASize*/, 1>.PartialNSAInstrs in {
+      let VAddrDwords = addr.NumWords in {
+        if op.HAS_GFX12 then {
+          def _V # addr.NumWords # _gfx12
+            : VSAMPLE_Sampler_nortn_gfx12<op, asm, addr.NumWords, addr.RegClass>;
+        }
+      }
+    }
+  }
+}
+
 multiclass MIMG_Sampler <mimgopc op, AMDGPUSampleVariant sample, bit wqm = 0,
                          bit isG16 = 0, bit isGetLod = 0,
                          string asm = "image_sample"#sample.LowerCaseMod#!if(isG16, "_g16", ""),
@@ -1388,6 +1490,9 @@ multiclass MIMG_Sampler <mimgopc op, AMDGPUSampleVariant sample, bit wqm = 0,
     let VDataDwords = 5 in
     defm _V5 : MIMG_Sampler_Src_Helper<op, asm, sample, VReg_160, 0, ExtendedImageInst, isG16>;
   }
+
+  if !not(isGetLod) then
+  defm "_nortn" : MIMG_Sampler_NoReturn <op, sample, wqm, isG16, asm>;
 }
 
 multiclass MIMG_Sampler_WQM <mimgopc op, AMDGPUSampleVariant sample>
@@ -1755,6 +1860,10 @@ def : MIMGLZMapping<IMAGE_GATHER4_L, IMAGE_GATHER4_LZ>;
 def : MIMGLZMapping<IMAGE_GATHER4_C_L, IMAGE_GATHER4_C_LZ>;
 def : MIMGLZMapping<IMAGE_GATHER4_L_O, IMAGE_GATHER4_LZ_O>;
 def : MIMGLZMapping<IMAGE_GATHER4_C_L_O, IMAGE_GATHER4_C_LZ_O>;
+def : MIMGLZMapping<IMAGE_SAMPLE_L_nortn, IMAGE_SAMPLE_LZ_nortn>;
+def : MIMGLZMapping<IMAGE_SAMPLE_C_L_nortn, IMAGE_SAMPLE_C_LZ_nortn>;
+def : MIMGLZMapping<IMAGE_SAMPLE_L_O_nortn, IMAGE_SAMPLE_LZ_O_nortn>;
+def : MIMGLZMapping<IMAGE_SAMPLE_C_L_O_nortn, IMAGE_SAMPLE_C_LZ_O_nortn>;
 
 // MIP to NONMIP Optimization Mapping
 def : MIMGMIPMapping<IMAGE_LOAD_MIP, IMAGE_LOAD>;
@@ -1777,6 +1886,14 @@ def : MIMGBiasMapping<IMAGE_GATHER4_B_O, IMAGE_GATHER4_O>;
 def : MIMGBiasMapping<IMAGE_GATHER4_B_CL_O, IMAGE_GATHER4_CL_O>;
 def : MIMGBiasMapping<IMAGE_GATHER4_C_B_O, IMAGE_GATHER4_C_O>;
 def : MIMGBiasMapping<IMAGE_GATHER4_C_B_CL_O, IMAGE_GATHER4_C_CL_O>;
+def : MIMGBiasMapping<IMAGE_SAMPLE_B_nortn, IMAGE_SAMPLE_nortn>;
+def : MIMGBiasMapping<IMAGE_SAMPLE_B_CL_nortn, IMAGE_SAMPLE_CL_nortn>;
+def : MIMGBiasMapping<IMAGE_SAMPLE_C_B_nortn, IMAGE_SAMPLE_C_nortn>;
+def : MIMGBiasMapping<IMAGE_SAMPLE_C_B_CL_nortn, IMAGE_SAMPLE_C_CL_nortn>;
+def : MIMGBiasMapping<IMAGE_SAMPLE_B_O_nortn, IMAGE_SAMPLE_O_nortn>;
+def : MIMGBiasMapping<IMAGE_SAMPLE_B_CL_O_nortn, IMAGE_SAMPLE_CL_O_nortn>;
+def : MIMGBiasMapping<IMAGE_SAMPLE_C_B_O_nortn, IMAGE_SAMPLE_C_O_nortn>;
+def : MIMGBiasMapping<IMAGE_SAMPLE_C_B_CL_O_nortn, IMAGE_SAMPLE_C_CL_O_nortn>;
 
 // Offset to NoOffset Optimization Mapping
 def : MIMGOffsetMapping<IMAGE_SAMPLE_O, IMAGE_SAMPLE>;
@@ -1819,6 +1936,34 @@ def : MIMGOffsetMapping<IMAGE_SAMPLE_CD_O_G16, IMAGE_SAMPLE_CD_G16>;
 def : MIMGOffsetMapping<IMAGE_SAMPLE_CD_CL_O_G16, IMAGE_SAMPLE_CD_CL_G16>;
 def : MIMGOffsetMapping<IMAGE_SAMPLE_C_CD_O_G16, IMAGE_SAMPLE_C_CD_G16>;
 def : MIMGOffsetMapping<IMAGE_SAMPLE_C_CD_CL_O_G16, IMAGE_SAMPLE_C_CD_CL_G16>;
+def : MIMGOffsetMapping<IMAGE_SAMPLE_O_nortn, IMAGE_SAMPLE_nortn>;
+def : MIMGOffsetMapping<IMAGE_SAMPLE_CL_O_nortn, IMAGE_SAMPLE_CL_nortn>;
+def : MIMGOffsetMapping<IMAGE_SAMPLE_D_O_nortn, IMAGE_SAMPLE_D_nortn>;
+def : MIMGOffsetMapping<IMAGE_SAMPLE_D_CL_O_nortn, IMAGE_SAMPLE_D_CL_nortn>;
+def : MIMGOffsetMapping<IMAGE_SAMPLE_D_O_G16_nortn, IMAGE_SAMPLE_D_G16_nortn>;
+def : MIMGOffsetMapping<IMAGE_SAMPLE_D_CL_O_G16_nortn, IMAGE_SAMPLE_D_CL_G16_nortn>;
+def : MIMGOffsetMapping<IMAGE_SAMPLE_L_O_nortn, IMAGE_SAMPLE_L_nortn>;
+def : MIMGOffsetMapping<IMAGE_SAMPLE_B_O_nortn, IMAGE_SAMPLE_B_nortn>;
+def : MIMGOffsetMapping<IMAGE_SAMPLE_B_CL_O_nortn, IMAGE_SAMPLE_B_CL_nortn>;
+def : MIMGOffsetMapping<IMAGE_SAMPLE_LZ_O_nortn, IMAGE_SAMPLE_LZ_nortn>;
+def : MIMGOffsetMapping<IMAGE_SAMPLE_C_O_nortn, IMAGE_SAMPLE_C_nortn>;
+def : MIMGOffsetMapping<IMAGE_SAMPLE_C_CL_O_nortn, IMAGE_SAMPLE_C_CL_nortn>;
+def : MIMGOffsetMapping<IMAGE_SAMPLE_C_D_O_nortn, IMAGE_SAMPLE_C_D_nortn>;
+def : MIMGOffsetMapping<IMAGE_SAMPLE_C_D_CL_O_nortn, IMAGE_SAMPLE_C_D_CL_nortn>;
+def : MIMGOffsetMapping<IMAGE_SAMPLE_C_D_O_G16_nortn, IMAGE_SAMPLE_C_D_G16_nortn>;
+def : MIMGOffsetMapping<IMAGE_SAMPLE_C_D_CL_O_G16_nortn, IMAGE_SAMPLE_C_D_CL_G16_nortn>;
+def : MIMGOffsetMapping<IMAGE_SAMPLE_C_L_O_nortn, IMAGE_SAMPLE_C_L_nortn>;
+def : MIMGOffsetMapping<IMAGE_SAMPLE_C_B_CL_O_nortn, IMAGE_SAMPLE_C_B_CL_nortn>;
+def : MIMGOffsetMapping<IMAGE_SAMPLE_C_B_O_nortn, IMAGE_SAMPLE_C_B_nortn>;
+def : MIMGOffsetMapping<IMAGE_SAMPLE_C_LZ_O_nortn, IMAGE_SAMPLE_C_LZ_nortn>;
+def : MIMGOffsetMapping<IMAGE_SAMPLE_CD_O_nortn, IMAGE_SAMPLE_CD>;
+def : MIMGOffsetMapping<IMAGE_SAMPLE_CD_CL_O_nortn, IMAGE_SAMPLE_CD_CL_nortn>;
+def : MIMGOffsetMapping<IMAGE_SAMPLE_C_CD_O_nortn, IMAGE_SAMPLE_C_CD_nortn>;
+def : MIMGOffsetMapping<IMAGE_SAMPLE_C_CD_CL_O_nortn, IMAGE_SAMPLE_C_CD_CL_nortn>;
+def : MIMGOffsetMapping<IMAGE_SAMPLE_CD_O_G16_nortn, IMAGE_SAMPLE_CD_G16_nortn>;
+def : MIMGOffsetMapping<IMAGE_SAMPLE_CD_CL_O_G16_nortn, IMAGE_SAMPLE_CD_CL_G16_nortn>;
+def : MIMGOffsetMapping<IMAGE_SAMPLE_C_CD_O_G16_nortn, IMAGE_SAMPLE_C_CD_G16_nortn>;
+def : MIMGOffsetMapping<IMAGE_SAMPLE_C_CD_CL_O_G16_nortn, IMAGE_SAMPLE_C_CD_CL_G16_nortn>;
 
 // G to G16 Optimization Mapping
 def : MIMGG16Mapping<IMAGE_SAMPLE_D, IMAGE_SAMPLE_D_G16>;
@@ -1837,3 +1982,19 @@ def : MIMGG16Mapping<IMAGE_SAMPLE_CD_O, IMAGE_SAMPLE_CD_O_G16>;
 def : MIMGG16Mapping<IMAGE_SAMPLE_CD_CL_O, IMAGE_SAMPLE_CD_CL_O_G16>;
 def : MIMGG16Mapping<IMAGE_SAMPLE_C_CD_O, IMAGE_SAMPLE_C_CD_O_G16>;
 def : MIMGG16Mapping<IMAGE_SAMPLE_C_CD_CL_O, IMAGE_SAMPLE_C_CD_CL_O_G16>;
+def : MIMGG16Mapping<IMAGE_SAMPLE_D_nortn, IMAGE_SAMPLE_D_G16_nortn>;
+def : MIMGG16Mapping<IMAGE_SAMPLE_D_CL_nortn, IMAGE_SAMPLE_D_CL_G16_nortn>;
+def : MIMGG16Mapping<IMAGE_SAMPLE_C_D_nortn, IMAGE_SAMPLE_C_D_G16_nortn>;
+def : MIMGG16Mapping<IMAGE_SAMPLE_C_D_CL_nortn, IMAGE_SAMPLE_C_D_CL_G16_nortn>;
+def : MIMGG16Mapping<IMAGE_SAMPLE_D_O_nortn, IMAGE_SAMPLE_D_O_G16_nortn>;
+def : MIMGG16Mapping<IMAGE_SAMPLE_D_CL_O_nortn, IMAGE_SAMPLE_D_CL_O_G16_nortn>;
+def : MIMGG16Mapping<IMAGE_SAMPLE_C_D_O_nortn, IMAGE_SAMPLE_C_D_O_G16_nortn>;
+def : MIMGG16Mapping<IMAGE_SAMPLE_C_D_CL_O_nortn, IMAGE_SAMPLE_C_D_CL_O_G16_nortn>;
+def : MIMGG16Mapping<IMAGE_SAMPLE_CD_nortn, IMAGE_SAMPLE_CD_G16_nortn>;
+def : MIMGG16Mapping<IMAGE_SAMPLE_CD_CL_nortn, IMAGE_SAMPLE_CD_CL_G16_nortn>;
+def : MIMGG16Mapping<IMAGE_SAMPLE_C_CD_nortn, IMAGE_SAMPLE_C_CD_G16_nortn>;
+def : MIMGG16Mapping<IMAGE_SAMPLE_C_CD_CL_nortn, IMAGE_SAMPLE_C_CD_CL_G16_nortn>;
+def : MIMGG16Mapping<IMAGE_SAMPLE_CD_O_nortn, IMAGE_SAMPLE_CD_O_G16_nortn>;
+def : MIMGG16Mapping<IMAGE_SAMPLE_CD_CL_O_nortn, IMAGE_SAMPLE_CD_CL_O_G16_nortn>;
+def : MIMGG16Mapping<IMAGE_SAMPLE_C_CD_O_nortn, IMAGE_SAMPLE_C_CD_O_G16_nortn>;
+def : MIMGG16Mapping<IMAGE_SAMPLE_C_CD_CL_O_nortn, IMAGE_SAMPLE_C_CD_CL_O_G16_nortn>;
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index d5ffb4478bee1..a09e0ad2c0c29 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -1190,8 +1190,13 @@ bool SITargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
     // TODO: Should images get their own address space?
     Info.fallbackAddressSpace = AMDGPUAS::BUFFER_RESOURCE;
 
-    if (RsrcIntr->IsImage)
+    const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode = nullptr;
+    if (RsrcIntr->IsImage) {
+      const AMDGPU::ImageDimIntrinsicInfo *Intr =
+          AMDGPU::getImageDimIntrinsicInfo(IntrID);
+      BaseOpcode = AMDGPU::getMIMGBaseOpcodeInfo(Intr->BaseOpcode);
       Info.align.reset();
+    }
 
     Value *RsrcArg = CI.getArgOperand(RsrcIntr->RsrcArg);
     if (auto *RsrcPtrTy = dyn_cast<PointerType>(RsrcArg->getType())) {
@@ -1212,11 +1217,6 @@ bool SITargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
       if (RsrcIntr->IsImage) {
         unsigned MaxNumLanes = 4;
 
-        const AMDGPU::ImageDimIntrinsicInfo *Intr
-          = AMDGPU::getImageDimIntrinsicInfo(IntrID);
-        const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
-          AMDGPU::getMIMGBaseOpcodeInfo(Intr->BaseOpcode);
-
         if (!BaseOpcode->Gather4) {
           // If this isn't a gather, we may have excess loaded elements in the
           // IR type. Check the dmask for the real number of elements loaded.
@@ -1250,7 +1250,7 @@ bool SITargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
 
       Info.flags |= MachineMemOperand::MOStore;
     } else {
-      // Atomic
+      // Atomic or NoReturn Sampler
       Info.opc = CI.getType()->isVoidTy() ? ISD::INTRINSIC_VOID :
                                             ISD::INTRINSIC_W_CHAIN;
       Info.flags |= MachineMemOperand::MOLoad |
@@ -1259,9 +1259,14 @@ bool SITargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
 
       switch (IntrID) {
       default:
-        Info.memVT = MVT::getVT(CI.getArgOperand(0)->getType());
-        // XXX - Should this be volatile without known ordering?
-        Info.flags |= MachineMemOperand::MOVolatile;
+        if (RsrcIntr->IsImage && BaseOpcode->NoReturn) {
+          // Fake memory access type for no return sampler intrinsics
+          Info.memVT = MVT::i32;
+        } else {
+          // XXX - Should this be volatile without known ordering?
+          Info.flags |= MachineMemOperand::MOVolatile;
+          Info.memVT = MVT::getVT(CI.getArgOperand(0)->getType());
+        }
         break;
       case Intrinsic::amdgcn_raw_buffer_load_lds:
       case Intrinsic::amdgcn_raw_ptr_buffer_load_lds:
@@ -7900,7 +7905,7 @@ SDValue SITargetLowering::lowerImage(SDValue Op,
   bool IsG16 = false;
   bool IsA16 = false;
   SDValue VData;
-  int NumVDataDwords;
+  int NumVDataDwords = 0;
   bool AdjustRetType = false;
   bool IsAtomicPacked16Bit = false;
 
@@ -7949,7 +7954,7 @@ SDValue SITargetLowering::lowerImage(SDValue Op,
       }
 
       NumVDataDwords = (VData.getValueType().getSizeInBits() + 31) / 32;
-    } else {
+    } else if (!BaseOpcode->NoReturn) {
       // Work out the num dwords based on the dmask popcount and underlying type
       // and whether packing is supported.
       MVT LoadVT = ResultTypes[0].getSimpleVT();
@@ -8242,7 +8247,7 @@ SDValue SITargetLowering::lowerImage(SDValue Op,
     DAG.ExtractVectorElements(SDValue(NewNode, 0), Elt, 0, 1);
     return DAG.getMergeValues({Elt[0], SDValue(NewNode, 1)}, DL);
   }
-  if (BaseOpcode->Store)
+  if (BaseOpcode->NoReturn)
     return SDValue(NewNode, 0);
   return constructRetValue(DAG, NewNode, OrigResultTypes, IsTexFail,
                            Subtarget->hasUnpackedD16VMem(), IsD16, DMaskLanes,
diff --git a/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp b/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
index a18da72b02ebe..1315aa0855788 100644
--- a/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
@@ -699,7 +699,8 @@ class SIInsertWaitcnts : public MachineFunctionPass {
     // these should use VM_CNT.
     if (!ST->hasVscnt() || SIInstrInfo::mayWriteLDSThroughDMA(Inst))
       return VMEM_ACCESS;
-    if (Inst.mayStore() && !SIInstrInfo::isAtomicRet(Inst)) {
+    if (Inst.mayStore() &&
+        (!Inst.mayLoad() || SIInstrInfo::isAtomicNoRet(Inst))) {
       // FLAT and SCRATCH instructions may access scratch. Other VMEM
       // instructions do not.
       if (SIInstrInfo::isFLAT(Inst) && mayAccessScratchThroughFlat(Inst))
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index 52044791e6c66..463737f645d45 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -472,6 +472,8 @@ bool SIInstrInfo::getMemOperandsWithOffsetWidth(
     Offset = 0;
     // Get appropriate operand, and compute width accordingly.
     DataOpIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdata);
+    if (DataOpIdx == -1)
+      return false; // no return sampler
     Width = getOpSize(LdSt, DataOpIdx);
     return true;
   }
diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td
index 2e617e5646c59..15078bc941292 100644
--- a/llvm/lib/Target/AMDGPU/SIInstructions.td
+++ b/llvm/lib/Target/AMDGPU/SIInstructions.td
@@ -3953,6 +3953,14 @@ def G_AMDGPU_INTRIN_IMAGE_LOAD_D16 : AMDGPUGenericInstruction {
   let mayStore = 1;
 }
 
+def G_AMDGPU_INTRIN_IMAGE_LOAD_NORET : AMDGPUGenericInstruction {
+  let OutOperandList = (outs);
+  let InOperandList = (ins unknown:$intrin, variable_ops);
+  let hasSideEffects = 0;
+  let mayLoad = 1;
+  let mayStore = 1;
+}
+
 // This is equivalent to the G_INTRINSIC*, but the operands may have
 // been legalized depending on the subtarget requirements.
 def G_AMDGPU_INTRIN_IMAGE_STORE : AMDGPUGenericInstruction {
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
index af2f0bc1a6306..429c3ad335d21 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
@@ -404,6 +404,7 @@ struct MIMGBaseOpcodeInfo {
   bool MSAA;
   bool BVH;
   bool A16;
+  bool NoReturn;
 };
 
 LLVM_READONLY
diff --git a/llvm/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp b/llvm/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp
index 20603b6cf1b0b..cf4fc37f84553 100644
--- a/llvm/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp
+++ b/llvm/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp
@@ -369,10 +369,9 @@ static MCStreamer *createELFStreamer(const Triple &T, MCContext &Ctx,
 static MCStreamer *
 createARMMachOStreamer(MCContext &Ctx, std::unique_ptr<MCAsmBackend> &&MAB,
                        std::unique_ptr<MCObjectWriter> &&OW,
-                       std::unique_ptr<MCCodeEmitter> &&Emitter,
-                       bool DWARFMustBeAtTheEnd) {
+                       std::unique_ptr<MCCodeEmitter> &&Emitter) {
   return createMachOStreamer(Ctx, std::move(MAB), std::move(OW),
-                             std::move(Emitter), DWARFMustBeAtTheEnd);
+                             std::move(Emitter), false);
 }
 
 static MCInstPrinter *createARMMCInstPrinter(const Triple &T,
diff --git a/llvm/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h b/llvm/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h
index a673d590419ec..66f19237f275f 100644
--- a/llvm/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h
+++ b/llvm/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h
@@ -93,8 +93,7 @@ MCAsmBackend *createARMBEAsmBackend(const Target &T, const MCSubtargetInfo &STI,
 MCStreamer *createARMWinCOFFStreamer(MCContext &Context,
                                      std::unique_ptr<MCAsmBackend> &&MAB,
                                      std::unique_ptr<MCObjectWriter> &&OW,
-                                     std::unique_ptr<MCCodeEmitter> &&Emitter,
-                                     bool IncrementalLinkerCompatible);
+                                     std::unique_ptr<MCCodeEmitter> &&Emitter);
 
 /// Construct an ELF Mach-O object writer.
 std::unique_ptr<MCObjectTargetWriter> createARMELFObjectWriter(uint8_t OSABI);
diff --git a/llvm/lib/Target/ARM/MCTargetDesc/ARMWinCOFFStreamer.cpp b/llvm/lib/Target/ARM/MCTargetDesc/ARMWinCOFFStreamer.cpp
index 0fcf6eb1a5abb..e66059c2a0e09 100644
--- a/llvm/lib/Target/ARM/MCTargetDesc/ARMWinCOFFStreamer.cpp
+++ b/llvm/lib/Target/ARM/MCTargetDesc/ARMWinCOFFStreamer.cpp
@@ -70,12 +70,9 @@ MCStreamer *
 llvm::createARMWinCOFFStreamer(MCContext &Context,
                                std::unique_ptr<MCAsmBackend> &&MAB,
                                std::unique_ptr<MCObjectWriter> &&OW,
-                               std::unique_ptr<MCCodeEmitter> &&Emitter,
-                               bool IncrementalLinkerCompatible) {
-  auto *S = new ARMWinCOFFStreamer(Context, std::move(MAB), std::move(Emitter),
-                                   std::move(OW));
-  S->getAssembler().setIncrementalLinkerCompatible(IncrementalLinkerCompatible);
-  return S;
+                               std::unique_ptr<MCCodeEmitter> &&Emitter) {
+  return new ARMWinCOFFStreamer(Context, std::move(MAB), std::move(Emitter),
+                                std::move(OW));
 }
 
 namespace {
diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
index 6072e5e244263..ba6be85c7f2e8 100644
--- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
+++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
@@ -335,7 +335,6 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM,
   setTargetDAGCombine(ISD::AND);
   setTargetDAGCombine(ISD::OR);
   setTargetDAGCombine(ISD::SRL);
-  setTargetDAGCombine(ISD::SETCC);
 
   // Set DAG combine for 'LSX' feature.
 
@@ -2529,165 +2528,6 @@ static SDValue performORCombine(SDNode *N, SelectionDAG &DAG,
   return SDValue();
 }
 
-static bool checkValueWidth(SDValue V, ISD::LoadExtType &ExtType) {
-  ExtType = ISD::NON_EXTLOAD;
-
-  switch (V.getNode()->getOpcode()) {
-  case ISD::LOAD: {
-    LoadSDNode *LoadNode = cast<LoadSDNode>(V.getNode());
-    if ((LoadNode->getMemoryVT() == MVT::i8) ||
-        (LoadNode->getMemoryVT() == MVT::i16)) {
-      ExtType = LoadNode->getExtensionType();
-      return true;
-    }
-    return false;
-  }
-  case ISD::AssertSext: {
-    VTSDNode *TypeNode = cast<VTSDNode>(V.getNode()->getOperand(1));
-    if ((TypeNode->getVT() == MVT::i8) || (TypeNode->getVT() == MVT::i16)) {
-      ExtType = ISD::SEXTLOAD;
-      return true;
-    }
-    return false;
-  }
-  case ISD::AssertZext: {
-    VTSDNode *TypeNode = cast<VTSDNode>(V.getNode()->getOperand(1));
-    if ((TypeNode->getVT() == MVT::i8) || (TypeNode->getVT() == MVT::i16)) {
-      ExtType = ISD::ZEXTLOAD;
-      return true;
-    }
-    return false;
-  }
-  default:
-    return false;
-  }
-
-  return false;
-}
-
-// Eliminate redundant truncation and zero-extension nodes.
-// * Case 1:
-//  +------------+ +------------+ +------------+
-//  |   Input1   | |   Input2   | |     CC     |
-//  +------------+ +------------+ +------------+
-//         |              |              |
-//         V              V              +----+
-//  +------------+ +------------+             |
-//  |  TRUNCATE  | |  TRUNCATE  |             |
-//  +------------+ +------------+             |
-//         |              |                   |
-//         V              V                   |
-//  +------------+ +------------+             |
-//  |  ZERO_EXT  | |  ZERO_EXT  |             |
-//  +------------+ +------------+             |
-//         |              |                   |
-//         |              +-------------+     |
-//         V              V             |     |
-//        +----------------+            |     |
-//        |      AND       |            |     |
-//        +----------------+            |     |
-//                |                     |     |
-//                +---------------+     |     |
-//                                |     |     |
-//                                V     V     V
-//                               +-------------+
-//                               |     CMP     |
-//                               +-------------+
-// * Case 2:
-//  +------------+ +------------+ +-------------+ +------------+ +------------+
-//  |   Input1   | |   Input2   | | Constant -1 | | Constant 0 | |     CC     |
-//  +------------+ +------------+ +-------------+ +------------+ +------------+
-//         |              |             |               |               |
-//         V              |             |               |               |
-//  +------------+        |             |               |               |
-//  |     XOR    |<---------------------+               |               |
-//  +------------+        |                             |               |
-//         |              |                             |               |
-//         V              V             +---------------+               |
-//  +------------+ +------------+       |                               |
-//  |  TRUNCATE  | |  TRUNCATE  |       |     +-------------------------+
-//  +------------+ +------------+       |     |
-//         |              |             |     |
-//         V              V             |     |
-//  +------------+ +------------+       |     |
-//  |  ZERO_EXT  | |  ZERO_EXT  |       |     |
-//  +------------+ +------------+       |     |
-//         |              |             |     |
-//         V              V             |     |
-//        +----------------+            |     |
-//        |      AND       |            |     |
-//        +----------------+            |     |
-//                |                     |     |
-//                +---------------+     |     |
-//                                |     |     |
-//                                V     V     V
-//                               +-------------+
-//                               |     CMP     |
-//                               +-------------+
-static SDValue performSETCCCombine(SDNode *N, SelectionDAG &DAG,
-                                   TargetLowering::DAGCombinerInfo &DCI,
-                                   const LoongArchSubtarget &Subtarget) {
-  ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
-
-  SDNode *AndNode = N->getOperand(0).getNode();
-  if (AndNode->getOpcode() != ISD::AND)
-    return SDValue();
-
-  SDValue AndInputValue2 = AndNode->getOperand(1);
-  if (AndInputValue2.getOpcode() != ISD::ZERO_EXTEND)
-    return SDValue();
-
-  SDValue CmpInputValue = N->getOperand(1);
-  SDValue AndInputValue1 = AndNode->getOperand(0);
-  if (AndInputValue1.getOpcode() == ISD::XOR) {
-    if (CC != ISD::SETEQ && CC != ISD::SETNE)
-      return SDValue();
-    ConstantSDNode *CN = dyn_cast<ConstantSDNode>(AndInputValue1.getOperand(1));
-    if (!CN || CN->getSExtValue() != -1)
-      return SDValue();
-    CN = dyn_cast<ConstantSDNode>(CmpInputValue);
-    if (!CN || CN->getSExtValue() != 0)
-      return SDValue();
-    AndInputValue1 = AndInputValue1.getOperand(0);
-    if (AndInputValue1.getOpcode() != ISD::ZERO_EXTEND)
-      return SDValue();
-  } else if (AndInputValue1.getOpcode() == ISD::ZERO_EXTEND) {
-    if (AndInputValue2 != CmpInputValue)
-      return SDValue();
-  } else {
-    return SDValue();
-  }
-
-  SDValue TruncValue1 = AndInputValue1.getNode()->getOperand(0);
-  if (TruncValue1.getOpcode() != ISD::TRUNCATE)
-    return SDValue();
-
-  SDValue TruncValue2 = AndInputValue2.getNode()->getOperand(0);
-  if (TruncValue2.getOpcode() != ISD::TRUNCATE)
-    return SDValue();
-
-  SDValue TruncInputValue1 = TruncValue1.getNode()->getOperand(0);
-  SDValue TruncInputValue2 = TruncValue2.getNode()->getOperand(0);
-  ISD::LoadExtType ExtType1;
-  ISD::LoadExtType ExtType2;
-
-  if (!checkValueWidth(TruncInputValue1, ExtType1) ||
-      !checkValueWidth(TruncInputValue2, ExtType2))
-    return SDValue();
-
-  if ((ExtType2 != ISD::ZEXTLOAD) &&
-      ((ExtType2 != ISD::SEXTLOAD) && (ExtType1 != ISD::SEXTLOAD)))
-    return SDValue();
-
-  // These truncation and zero-extension nodes are not necessary, remove them.
-  SDValue NewAnd = DAG.getNode(ISD::AND, SDLoc(N), AndNode->getValueType(0),
-                               TruncInputValue1, TruncInputValue2);
-  SDValue NewSetCC =
-      DAG.getSetCC(SDLoc(N), N->getValueType(0), NewAnd, TruncInputValue2, CC);
-  DAG.ReplaceAllUsesWith(N, NewSetCC.getNode());
-  return SDValue(N, 0);
-}
-
 // Combine (loongarch_bitrev_w (loongarch_revb_2w X)) to loongarch_bitrev_4b.
 static SDValue performBITREV_WCombine(SDNode *N, SelectionDAG &DAG,
                                       TargetLowering::DAGCombinerInfo &DCI,
@@ -3315,8 +3155,6 @@ SDValue LoongArchTargetLowering::PerformDAGCombine(SDNode *N,
     return performANDCombine(N, DAG, DCI, Subtarget);
   case ISD::OR:
     return performORCombine(N, DAG, DCI, Subtarget);
-  case ISD::SETCC:
-    return performSETCCCombine(N, DAG, DCI, Subtarget);
   case ISD::SRL:
     return performSRLCombine(N, DAG, DCI, Subtarget);
   case LoongArchISD::BITREV_W:
diff --git a/llvm/lib/Target/M68k/M68kFrameLowering.cpp b/llvm/lib/Target/M68k/M68kFrameLowering.cpp
index 36443f9d33451..c548346f35a2f 100644
--- a/llvm/lib/Target/M68k/M68kFrameLowering.cpp
+++ b/llvm/lib/Target/M68k/M68kFrameLowering.cpp
@@ -246,9 +246,7 @@ MachineBasicBlock::iterator M68kFrameLowering::eliminateCallFramePseudoInstr(
     unsigned StackAlign = getStackAlignment();
     Amount = alignTo(Amount, StackAlign);
 
-    MachineModuleInfo &MMI = MF.getMMI();
-    const auto &Fn = MF.getFunction();
-    bool DwarfCFI = MMI.hasDebugInfo() || Fn.needsUnwindTableEntry();
+    bool DwarfCFI = MF.needsFrameMoves();
 
     // If we have any exception handlers in this function, and we adjust
     // the SP before calls, we may need to indicate this to the unwinder
diff --git a/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp b/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
index 11193c11ede3b..8516bc1bef83e 100644
--- a/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
@@ -3752,8 +3752,14 @@ bool NVPTXDAGToDAGISel::SelectADDRri_imp(
         Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), mvt);
       else
         Base = Addr.getOperand(0);
-      Offset = CurDAG->getTargetConstant(CN->getZExtValue(), SDLoc(OpNode),
-                                         mvt);
+
+      // Offset must fit in a 32-bit signed int in PTX [register+offset] address
+      // mode
+      if (!CN->getAPIntValue().isSignedIntN(32))
+        return false;
+
+      Offset = CurDAG->getTargetConstant(CN->getSExtValue(), SDLoc(OpNode),
+                                         MVT::i32);
       return true;
     }
   }
diff --git a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
index bc23998455a68..44c1a2e50486c 100644
--- a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
@@ -5167,9 +5167,12 @@ bool NVPTXTargetLowering::isLegalAddressingMode(const DataLayout &DL,
   // - [areg+immoff]
   // - [immAddr]
 
-  if (AM.BaseGV) {
+  // immoff must fit in a signed 32-bit int
+  if (!APInt(64, AM.BaseOffs).isSignedIntN(32))
+    return false;
+
+  if (AM.BaseGV)
     return !AM.BaseOffs && !AM.HasBaseReg && !AM.Scale;
-  }
 
   switch (AM.Scale) {
   case 0: // "r", "r+i" or "i" is allowed
diff --git a/llvm/lib/Target/SPIRV/MCTargetDesc/SPIRVMCTargetDesc.cpp b/llvm/lib/Target/SPIRV/MCTargetDesc/SPIRVMCTargetDesc.cpp
index 74ebaa9d0c004..2f302ed4c9e83 100644
--- a/llvm/lib/Target/SPIRV/MCTargetDesc/SPIRVMCTargetDesc.cpp
+++ b/llvm/lib/Target/SPIRV/MCTargetDesc/SPIRVMCTargetDesc.cpp
@@ -49,15 +49,6 @@ createSPIRVMCSubtargetInfo(const Triple &TT, StringRef CPU, StringRef FS) {
   return createSPIRVMCSubtargetInfoImpl(TT, CPU, /*TuneCPU*/ CPU, FS);
 }
 
-static MCStreamer *
-createSPIRVMCStreamer(const Triple &T, MCContext &Ctx,
-                      std::unique_ptr<MCAsmBackend> &&MAB,
-                      std::unique_ptr<MCObjectWriter> &&OW,
-                      std::unique_ptr<MCCodeEmitter> &&Emitter) {
-  return createSPIRVStreamer(Ctx, std::move(MAB), std::move(OW),
-                             std::move(Emitter));
-}
-
 static MCTargetStreamer *createTargetAsmStreamer(MCStreamer &S,
                                                  formatted_raw_ostream &,
                                                  MCInstPrinter *, bool) {
@@ -94,7 +85,6 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeSPIRVTargetMC() {
     TargetRegistry::RegisterMCInstrInfo(*T, createSPIRVMCInstrInfo);
     TargetRegistry::RegisterMCRegInfo(*T, createSPIRVMCRegisterInfo);
     TargetRegistry::RegisterMCSubtargetInfo(*T, createSPIRVMCSubtargetInfo);
-    TargetRegistry::RegisterSPIRVStreamer(*T, createSPIRVMCStreamer);
     TargetRegistry::RegisterMCInstPrinter(*T, createSPIRVMCInstPrinter);
     TargetRegistry::RegisterMCInstrAnalysis(*T, createSPIRVInstrAnalysis);
     TargetRegistry::RegisterMCCodeEmitter(*T, createSPIRVMCCodeEmitter);
diff --git a/llvm/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp b/llvm/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp
index 3cd0af0c7f546..6553e1cc4a930 100644
--- a/llvm/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp
+++ b/llvm/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp
@@ -691,9 +691,12 @@ void X86MCCodeEmitter::emitMemModRMByte(
 
   unsigned BaseRegNo = BaseReg ? getX86RegNum(Base) : -1U;
 
+  bool IsAdSize16 = STI.hasFeature(X86::Is32Bit) &&
+                    (TSFlags & X86II::AdSizeMask) == X86II::AdSize16;
+
   // 16-bit addressing forms of the ModR/M byte have a different encoding for
   // the R/M field and are far more limited in which registers can be used.
-  if (X86_MC::is16BitMemOperand(MI, Op, STI)) {
+  if (IsAdSize16 || X86_MC::is16BitMemOperand(MI, Op, STI)) {
     if (BaseReg) {
       // For 32-bit addressing, the row and column values in Table 2-2 are
       // basically the same. It's AX/CX/DX/BX/SP/BP/SI/DI in that order, with
diff --git a/llvm/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.h b/llvm/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.h
index 96a7823b04ad8..4e83e7e437aeb 100644
--- a/llvm/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.h
+++ b/llvm/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.h
@@ -128,8 +128,7 @@ MCTargetStreamer *createX86ObjectTargetStreamer(MCStreamer &S,
 MCStreamer *createX86WinCOFFStreamer(MCContext &C,
                                      std::unique_ptr<MCAsmBackend> &&AB,
                                      std::unique_ptr<MCObjectWriter> &&OW,
-                                     std::unique_ptr<MCCodeEmitter> &&CE,
-                                     bool IncrementalLinkerCompatible);
+                                     std::unique_ptr<MCCodeEmitter> &&CE);
 
 MCStreamer *createX86ELFStreamer(const Triple &T, MCContext &Context,
                                  std::unique_ptr<MCAsmBackend> &&MAB,
diff --git a/llvm/lib/Target/X86/MCTargetDesc/X86WinCOFFStreamer.cpp b/llvm/lib/Target/X86/MCTargetDesc/X86WinCOFFStreamer.cpp
index b1e5362c5d24b..1ef10928c05d8 100644
--- a/llvm/lib/Target/X86/MCTargetDesc/X86WinCOFFStreamer.cpp
+++ b/llvm/lib/Target/X86/MCTargetDesc/X86WinCOFFStreamer.cpp
@@ -72,14 +72,9 @@ void X86WinCOFFStreamer::finishImpl() {
 }
 } // namespace
 
-MCStreamer *llvm::createX86WinCOFFStreamer(MCContext &C,
-                                           std::unique_ptr<MCAsmBackend> &&AB,
-                                           std::unique_ptr<MCObjectWriter> &&OW,
-                                           std::unique_ptr<MCCodeEmitter> &&CE,
-                                           bool IncrementalLinkerCompatible) {
-  X86WinCOFFStreamer *S =
-      new X86WinCOFFStreamer(C, std::move(AB), std::move(CE), std::move(OW));
-  S->getAssembler().setIncrementalLinkerCompatible(IncrementalLinkerCompatible);
-  return S;
+MCStreamer *
+llvm::createX86WinCOFFStreamer(MCContext &C, std::unique_ptr<MCAsmBackend> &&AB,
+                               std::unique_ptr<MCObjectWriter> &&OW,
+                               std::unique_ptr<MCCodeEmitter> &&CE) {
+  return new X86WinCOFFStreamer(C, std::move(AB), std::move(CE), std::move(OW));
 }
-
diff --git a/llvm/lib/Target/X86/X86AsmPrinter.cpp b/llvm/lib/Target/X86/X86AsmPrinter.cpp
index 3395a13545e45..0c2c6bf7f8b70 100644
--- a/llvm/lib/Target/X86/X86AsmPrinter.cpp
+++ b/llvm/lib/Target/X86/X86AsmPrinter.cpp
@@ -66,11 +66,10 @@ bool X86AsmPrinter::runOnMachineFunction(MachineFunction &MF) {
   CodeEmitter.reset(TM.getTarget().createMCCodeEmitter(
       *Subtarget->getInstrInfo(), MF.getContext()));
 
-  EmitFPOData =
-      Subtarget->isTargetWin32() && MF.getMMI().getModule()->getCodeViewFlag();
+  const Module *M = MF.getFunction().getParent();
+  EmitFPOData = Subtarget->isTargetWin32() && M->getCodeViewFlag();
 
-  IndCSPrefix =
-      MF.getMMI().getModule()->getModuleFlag("indirect_branch_cs_prefix");
+  IndCSPrefix = M->getModuleFlag("indirect_branch_cs_prefix");
 
   SetupMachineFunction(MF);
 
diff --git a/llvm/lib/Target/X86/X86FrameLowering.cpp b/llvm/lib/Target/X86/X86FrameLowering.cpp
index 89801783e9280..0ff50d8ef678e 100644
--- a/llvm/lib/Target/X86/X86FrameLowering.cpp
+++ b/llvm/lib/Target/X86/X86FrameLowering.cpp
@@ -1530,7 +1530,6 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF,
   MachineBasicBlock::iterator MBBI = MBB.begin();
   MachineFrameInfo &MFI = MF.getFrameInfo();
   const Function &Fn = MF.getFunction();
-  MachineModuleInfo &MMI = MF.getMMI();
   X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
   uint64_t MaxAlign = calculateMaxStackAlign(MF); // Desired stack alignment.
   uint64_t StackSize = MFI.getStackSize(); // Number of bytes to allocate.
@@ -1545,8 +1544,8 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF,
   bool IsWin64Prologue = isWin64Prologue(MF);
   bool NeedsWin64CFI = IsWin64Prologue && Fn.needsUnwindTableEntry();
   // FIXME: Emit FPO data for EH funclets.
-  bool NeedsWinFPO =
-      !IsFunclet && STI.isTargetWin32() && MMI.getModule()->getCodeViewFlag();
+  bool NeedsWinFPO = !IsFunclet && STI.isTargetWin32() &&
+                     MF.getFunction().getParent()->getCodeViewFlag();
   bool NeedsWinCFI = NeedsWin64CFI || NeedsWinFPO;
   bool NeedsDwarfCFI = needsDwarfCFI(MF);
   Register FramePtr = TRI->getFrameRegister(MF);
@@ -3521,7 +3520,7 @@ void X86FrameLowering::adjustForHiPEPrologue(
 
   // HiPE-specific values
   NamedMDNode *HiPELiteralsMD =
-      MF.getMMI().getModule()->getNamedMetadata("hipe.literals");
+      MF.getFunction().getParent()->getNamedMetadata("hipe.literals");
   if (!HiPELiteralsMD)
     report_fatal_error(
         "Can't generate HiPE prologue without runtime parameters");
diff --git a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
index c91bd576dc9f6..74804e5c9783d 100644
--- a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
+++ b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
@@ -922,7 +922,8 @@ void X86DAGToDAGISel::PreprocessISelDAG() {
       if (Imm == EndbrImm || isEndbrImm64(Imm)) {
         // Check that the cf-protection-branch is enabled.
         Metadata *CFProtectionBranch =
-          MF->getMMI().getModule()->getModuleFlag("cf-protection-branch");
+            MF->getFunction().getParent()->getModuleFlag(
+                "cf-protection-branch");
         if (CFProtectionBranch || IndirectBranchTracking) {
           SDLoc dl(N);
           SDValue Complement = CurDAG->getConstant(~Imm, dl, VT, false, true);
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 890728ba088dc..2959902c78675 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -35882,7 +35882,7 @@ X86TargetLowering::emitEHSjLjSetJmp(MachineInstr &MI,
     MIB.addMBB(restoreMBB);
   MIB.setMemRefs(MMOs);
 
-  if (MF->getMMI().getModule()->getModuleFlag("cf-protection-return")) {
+  if (MF->getFunction().getParent()->getModuleFlag("cf-protection-return")) {
     emitSetJmpShadowStackFix(MI, thisMBB);
   }
 
@@ -36158,7 +36158,7 @@ X86TargetLowering::emitEHSjLjLongJmp(MachineInstr &MI,
   MachineBasicBlock *thisMBB = MBB;
 
   // When CET and shadow stack is enabled, we need to fix the Shadow Stack.
-  if (MF->getMMI().getModule()->getModuleFlag("cf-protection-return")) {
+  if (MF->getFunction().getParent()->getModuleFlag("cf-protection-return")) {
     thisMBB = emitLongJmpShadowStackFix(MI, thisMBB);
   }
 
@@ -57981,7 +57981,7 @@ SDValue X86TargetLowering::expandIndirectJTBranch(const SDLoc &dl,
                                                   SDValue Value, SDValue Addr,
                                                   int JTI,
                                                   SelectionDAG &DAG) const {
-  const Module *M = DAG.getMachineFunction().getMMI().getModule();
+  const Module *M = DAG.getMachineFunction().getFunction().getParent();
   Metadata *IsCFProtectionSupported = M->getModuleFlag("cf-protection-branch");
   if (IsCFProtectionSupported) {
     // In case control-flow branch protection is enabled, we need to add
diff --git a/llvm/lib/Target/X86/X86IndirectBranchTracking.cpp b/llvm/lib/Target/X86/X86IndirectBranchTracking.cpp
index 381286a3bbfc0..7740a174af4f3 100644
--- a/llvm/lib/Target/X86/X86IndirectBranchTracking.cpp
+++ b/llvm/lib/Target/X86/X86IndirectBranchTracking.cpp
@@ -116,7 +116,7 @@ static bool needsPrologueENDBR(MachineFunction &MF, const Module *M) {
 bool X86IndirectBranchTrackingPass::runOnMachineFunction(MachineFunction &MF) {
   const X86Subtarget &SubTarget = MF.getSubtarget<X86Subtarget>();
 
-  const Module *M = MF.getMMI().getModule();
+  const Module *M = MF.getFunction().getParent();
   // Check that the cf-protection-branch is enabled.
   Metadata *isCFProtectionSupported = M->getModuleFlag("cf-protection-branch");
 
diff --git a/llvm/lib/Target/X86/X86MCInstLower.cpp b/llvm/lib/Target/X86/X86MCInstLower.cpp
index df20ecd1b9b21..77ddd2366e629 100644
--- a/llvm/lib/Target/X86/X86MCInstLower.cpp
+++ b/llvm/lib/Target/X86/X86MCInstLower.cpp
@@ -147,7 +147,7 @@ X86MCInstLower::X86MCInstLower(const MachineFunction &mf,
       AsmPrinter(asmprinter) {}
 
 MachineModuleInfoMachO &X86MCInstLower::getMachOMMI() const {
-  return MF.getMMI().getObjFileInfo<MachineModuleInfoMachO>();
+  return AsmPrinter.MMI->getObjFileInfo<MachineModuleInfoMachO>();
 }
 
 /// GetSymbolFromOperand - Lower an MO_GlobalAddress or MO_ExternalSymbol
@@ -203,7 +203,7 @@ MCSymbol *X86MCInstLower::GetSymbolFromOperand(const MachineOperand &MO) const {
     break;
   case X86II::MO_COFFSTUB: {
     MachineModuleInfoCOFF &MMICOFF =
-        MF.getMMI().getObjFileInfo<MachineModuleInfoCOFF>();
+        AsmPrinter.MMI->getObjFileInfo<MachineModuleInfoCOFF>();
     MachineModuleInfoImpl::StubValueTy &StubSym = MMICOFF.getGVStubEntry(Sym);
     if (!StubSym.getPointer()) {
       assert(MO.isGlobal() && "Extern symbol not handled yet");
diff --git a/llvm/lib/Target/X86/X86ReturnThunks.cpp b/llvm/lib/Target/X86/X86ReturnThunks.cpp
index fe89238f26f97..c40b4f371fb31 100644
--- a/llvm/lib/Target/X86/X86ReturnThunks.cpp
+++ b/llvm/lib/Target/X86/X86ReturnThunks.cpp
@@ -78,7 +78,7 @@ bool X86ReturnThunks::runOnMachineFunction(MachineFunction &MF) {
         Rets.push_back(&Term);
 
   bool IndCS =
-      MF.getMMI().getModule()->getModuleFlag("indirect_branch_cs_prefix");
+      MF.getFunction().getParent()->getModuleFlag("indirect_branch_cs_prefix");
   const MCInstrDesc &CS = ST.getInstrInfo()->get(X86::CS_PREFIX);
   const MCInstrDesc &JMP = ST.getInstrInfo()->get(X86::TAILJMPd);
 
diff --git a/llvm/lib/TargetParser/X86TargetParser.cpp b/llvm/lib/TargetParser/X86TargetParser.cpp
index 141ecb936b708..dcf9130052ac1 100644
--- a/llvm/lib/TargetParser/X86TargetParser.cpp
+++ b/llvm/lib/TargetParser/X86TargetParser.cpp
@@ -171,14 +171,14 @@ constexpr FeatureBitset FeaturesClearwaterforest =
 
 // Geode Processor.
 constexpr FeatureBitset FeaturesGeode =
-    FeatureX87 | FeatureCMPXCHG8B | FeatureMMX | Feature3DNOW | Feature3DNOWA;
+    FeatureX87 | FeatureCMPXCHG8B | FeatureMMX | FeaturePRFCHW;
 
 // K6 processor.
 constexpr FeatureBitset FeaturesK6 = FeatureX87 | FeatureCMPXCHG8B | FeatureMMX;
 
 // K7 and K8 architecture processors.
 constexpr FeatureBitset FeaturesAthlon =
-    FeatureX87 | FeatureCMPXCHG8B | FeatureMMX | Feature3DNOW | Feature3DNOWA;
+    FeatureX87 | FeatureCMPXCHG8B | FeatureMMX | FeaturePRFCHW;
 constexpr FeatureBitset FeaturesAthlonXP =
     FeaturesAthlon | FeatureFXSR | FeatureSSE;
 constexpr FeatureBitset FeaturesK8 =
@@ -256,8 +256,8 @@ constexpr ProcInfo Processors[] = {
   // i486-generation processors.
   { {"i486"}, CK_i486, ~0U, FeatureX87, '\0', false },
   { {"winchip-c6"}, CK_WinChipC6, ~0U, FeaturesPentiumMMX, '\0', false },
-  { {"winchip2"}, CK_WinChip2, ~0U, FeaturesPentiumMMX | Feature3DNOW, '\0', false },
-  { {"c3"}, CK_C3, ~0U, FeaturesPentiumMMX | Feature3DNOW, '\0', false },
+  { {"winchip2"}, CK_WinChip2, ~0U, FeaturesPentiumMMX | FeaturePRFCHW, '\0', false },
+  { {"c3"}, CK_C3, ~0U, FeaturesPentiumMMX | FeaturePRFCHW, '\0', false },
   // i586-generation processors, P5 microarchitecture based.
   { {"i586"}, CK_i586, ~0U, FeatureX87 | FeatureCMPXCHG8B, '\0', false },
   { {"pentium"}, CK_Pentium, ~0U, FeatureX87 | FeatureCMPXCHG8B, 'B', false },
@@ -386,8 +386,8 @@ constexpr ProcInfo Processors[] = {
   { {"lakemont"}, CK_Lakemont, ~0U, FeatureCMPXCHG8B, '\0', false },
   // K6 architecture processors.
   { {"k6"}, CK_K6, ~0U, FeaturesK6, '\0', false },
-  { {"k6-2"}, CK_K6_2, ~0U, FeaturesK6 | Feature3DNOW, '\0', false },
-  { {"k6-3"}, CK_K6_3, ~0U, FeaturesK6 | Feature3DNOW, '\0', false },
+  { {"k6-2"}, CK_K6_2, ~0U, FeaturesK6 | FeaturePRFCHW, '\0', false },
+  { {"k6-3"}, CK_K6_3, ~0U, FeaturesK6 | FeaturePRFCHW, '\0', false },
   // K7 architecture processors.
   { {"athlon"}, CK_Athlon, ~0U, FeaturesAthlon, '\0', false },
   { {"athlon-tbird"}, CK_Athlon, ~0U, FeaturesAthlon, '\0', false },
@@ -493,6 +493,7 @@ constexpr FeatureBitset ImpliedFeaturesFXSR = {};
 constexpr FeatureBitset ImpliedFeaturesINVPCID = {};
 constexpr FeatureBitset ImpliedFeaturesLWP = {};
 constexpr FeatureBitset ImpliedFeaturesLZCNT = {};
+constexpr FeatureBitset ImpliedFeaturesMMX = {};
 constexpr FeatureBitset ImpliedFeaturesMWAITX = {};
 constexpr FeatureBitset ImpliedFeaturesMOVBE = {};
 constexpr FeatureBitset ImpliedFeaturesMOVDIR64B = {};
@@ -520,6 +521,8 @@ constexpr FeatureBitset ImpliedFeaturesWBNOINVD = {};
 constexpr FeatureBitset ImpliedFeaturesVZEROUPPER = {};
 constexpr FeatureBitset ImpliedFeaturesX87 = {};
 constexpr FeatureBitset ImpliedFeaturesXSAVE = {};
+constexpr FeatureBitset ImpliedFeaturesDUMMYFEATURE1 = {};
+constexpr FeatureBitset ImpliedFeaturesDUMMYFEATURE2 = {};
 
 // Not really CPU features, but need to be in the table because clang uses
 // target features to communicate them to the backend.
@@ -534,11 +537,6 @@ constexpr FeatureBitset ImpliedFeaturesXSAVEC = FeatureXSAVE;
 constexpr FeatureBitset ImpliedFeaturesXSAVEOPT = FeatureXSAVE;
 constexpr FeatureBitset ImpliedFeaturesXSAVES = FeatureXSAVE;
 
-// MMX->3DNOW->3DNOWA chain.
-constexpr FeatureBitset ImpliedFeaturesMMX = {};
-constexpr FeatureBitset ImpliedFeatures3DNOW = FeatureMMX;
-constexpr FeatureBitset ImpliedFeatures3DNOWA = Feature3DNOW;
-
 // SSE/AVX/AVX512F chain.
 constexpr FeatureBitset ImpliedFeaturesSSE = {};
 constexpr FeatureBitset ImpliedFeaturesSSE2 = FeatureSSE;
diff --git a/llvm/lib/Transforms/IPO/HotColdSplitting.cpp b/llvm/lib/Transforms/IPO/HotColdSplitting.cpp
index 5aefcbf13182c..2ec5da4886839 100644
--- a/llvm/lib/Transforms/IPO/HotColdSplitting.cpp
+++ b/llvm/lib/Transforms/IPO/HotColdSplitting.cpp
@@ -39,6 +39,7 @@
 #include "llvm/IR/CFG.h"
 #include "llvm/IR/DiagnosticInfo.h"
 #include "llvm/IR/Dominators.h"
+#include "llvm/IR/EHPersonalities.h"
 #include "llvm/IR/Function.h"
 #include "llvm/IR/Instruction.h"
 #include "llvm/IR/Instructions.h"
@@ -168,10 +169,24 @@ static bool mayExtractBlock(const BasicBlock &BB) {
   //
   // Resumes that are not reachable from a cleanup landing pad are considered to
   // be unreachable. It’s not safe to split them out either.
+
   if (BB.hasAddressTaken() || BB.isEHPad())
     return false;
   auto Term = BB.getTerminator();
-  return !isa<InvokeInst>(Term) && !isa<ResumeInst>(Term);
+  if (isa<InvokeInst>(Term) || isa<ResumeInst>(Term))
+    return false;
+
+  // Do not outline basic blocks that have token type instructions. e.g.,
+  // exception:
+  // %0 = cleanuppad within none []
+  // call void @"?terminate@@YAXXZ"() [ "funclet"(token %0) ]
+  // br label %continue-exception
+  if (llvm::any_of(
+          BB, [](const Instruction &I) { return I.getType()->isTokenTy(); })) {
+    return false;
+  }
+
+  return true;
 }
 
 /// Mark \p F cold. Based on this assumption, also optimize it for minimum size.
@@ -258,6 +273,11 @@ bool HotColdSplitting::shouldOutlineFrom(const Function &F) const {
       F.hasFnAttribute(Attribute::SanitizeMemory))
     return false;
 
+  // Do not outline scoped EH personality functions.
+  if (F.hasPersonalityFn())
+    if (isScopedEHPersonality(classifyEHPersonality(F.getPersonalityFn())))
+      return false;
+
   return true;
 }
 
diff --git a/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp b/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp
index f994f8a62c320..09c56eb5fe6aa 100644
--- a/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp
+++ b/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp
@@ -945,6 +945,9 @@ Value *InstrLowerer::getCounterAddress(InstrProfCntrInstBase *I) {
     IRBuilder<> EntryBuilder(&Fn->getEntryBlock().front());
     auto *Bias = getOrCreateBiasVar(getInstrProfCounterBiasVarName());
     BiasLI = EntryBuilder.CreateLoad(Int64Ty, Bias, "profc_bias");
+    // Bias doesn't change after startup.
+    BiasLI->setMetadata(LLVMContext::MD_invariant_load,
+                        MDNode::get(M.getContext(), std::nullopt));
   }
   auto *Add = Builder.CreateAdd(Builder.CreatePtrToInt(Addr, Int64Ty), BiasLI);
   return Builder.CreateIntToPtr(Add, Addr->getType());
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index ff60bd894cd40..6d28b8fabe42e 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -441,37 +441,6 @@ static std::optional<unsigned> getSmallBestKnownTC(ScalarEvolution &SE,
   return std::nullopt;
 }
 
-/// Return a vector containing interleaved elements from multiple
-/// smaller input vectors.
-static Value *interleaveVectors(IRBuilderBase &Builder, ArrayRef<Value *> Vals,
-                                const Twine &Name) {
-  unsigned Factor = Vals.size();
-  assert(Factor > 1 && "Tried to interleave invalid number of vectors");
-
-  VectorType *VecTy = cast<VectorType>(Vals[0]->getType());
-#ifndef NDEBUG
-  for (Value *Val : Vals)
-    assert(Val->getType() == VecTy && "Tried to interleave mismatched types");
-#endif
-
-  // Scalable vectors cannot use arbitrary shufflevectors (only splats), so
-  // must use intrinsics to interleave.
-  if (VecTy->isScalableTy()) {
-    VectorType *WideVecTy = VectorType::getDoubleElementsVectorType(VecTy);
-    return Builder.CreateIntrinsic(WideVecTy, Intrinsic::vector_interleave2,
-                                   Vals,
-                                   /*FMFSource=*/nullptr, Name);
-  }
-
-  // Fixed length. Start by concatenating all vectors into a wide vector.
-  Value *WideVec = concatenateVectors(Builder, Vals);
-
-  // Interleave the elements into the wide vector.
-  const unsigned NumElts = VecTy->getElementCount().getFixedValue();
-  return Builder.CreateShuffleVector(
-      WideVec, createInterleaveMask(NumElts, Factor), Name);
-}
-
 namespace {
 // Forward declare GeneratedRTChecks.
 class GeneratedRTChecks;
@@ -553,16 +522,6 @@ class InnerLoopVectorizer {
                             const VPIteration &Instance,
                             VPTransformState &State);
 
-  /// Try to vectorize interleaved access group \p Group with the base address
-  /// given in \p Addr, optionally masking the vector operations if \p
-  /// BlockInMask is non-null. Use \p State to translate given VPValues to IR
-  /// values in the vectorized loop.
-  void vectorizeInterleaveGroup(const InterleaveGroup<Instruction> *Group,
-                                ArrayRef<VPValue *> VPDefs,
-                                VPTransformState &State, VPValue *Addr,
-                                ArrayRef<VPValue *> StoredValues,
-                                VPValue *BlockInMask, bool NeedsMaskForGaps);
-
   /// Fix the non-induction PHIs in \p Plan.
   void fixNonInductionPHIs(VPlan &Plan, VPTransformState &State);
 
@@ -611,11 +570,6 @@ class InnerLoopVectorizer {
   /// Returns (and creates if needed) the trip count of the widened loop.
   Value *getOrCreateVectorTripCount(BasicBlock *InsertBlock);
 
-  /// Returns a bitcasted value to the requested vector type.
-  /// Also handles bitcasts of vector<float> <-> vector<pointer> types.
-  Value *createBitOrPointerCast(Value *V, VectorType *DstVTy,
-                                const DataLayout &DL);
-
   /// Emit a bypass check to see if the vector trip count is zero, including if
   /// it overflows.
   void emitIterationCountCheck(BasicBlock *Bypass);
@@ -2393,275 +2347,6 @@ static bool useMaskedInterleavedAccesses(const TargetTransformInfo &TTI) {
   return TTI.enableMaskedInterleavedAccessVectorization();
 }
 
-// Try to vectorize the interleave group that \p Instr belongs to.
-//
-// E.g. Translate following interleaved load group (factor = 3):
-//   for (i = 0; i < N; i+=3) {
-//     R = Pic[i];             // Member of index 0
-//     G = Pic[i+1];           // Member of index 1
-//     B = Pic[i+2];           // Member of index 2
-//     ... // do something to R, G, B
-//   }
-// To:
-//   %wide.vec = load <12 x i32>                       ; Read 4 tuples of R,G,B
-//   %R.vec = shuffle %wide.vec, poison, <0, 3, 6, 9>   ; R elements
-//   %G.vec = shuffle %wide.vec, poison, <1, 4, 7, 10>  ; G elements
-//   %B.vec = shuffle %wide.vec, poison, <2, 5, 8, 11>  ; B elements
-//
-// Or translate following interleaved store group (factor = 3):
-//   for (i = 0; i < N; i+=3) {
-//     ... do something to R, G, B
-//     Pic[i]   = R;           // Member of index 0
-//     Pic[i+1] = G;           // Member of index 1
-//     Pic[i+2] = B;           // Member of index 2
-//   }
-// To:
-//   %R_G.vec = shuffle %R.vec, %G.vec, <0, 1, 2, ..., 7>
-//   %B_U.vec = shuffle %B.vec, poison, <0, 1, 2, 3, u, u, u, u>
-//   %interleaved.vec = shuffle %R_G.vec, %B_U.vec,
-//        <0, 4, 8, 1, 5, 9, 2, 6, 10, 3, 7, 11>    ; Interleave R,G,B elements
-//   store <12 x i32> %interleaved.vec              ; Write 4 tuples of R,G,B
-void InnerLoopVectorizer::vectorizeInterleaveGroup(
-    const InterleaveGroup<Instruction> *Group, ArrayRef<VPValue *> VPDefs,
-    VPTransformState &State, VPValue *Addr, ArrayRef<VPValue *> StoredValues,
-    VPValue *BlockInMask, bool NeedsMaskForGaps) {
-  Instruction *Instr = Group->getInsertPos();
-  const DataLayout &DL = Instr->getDataLayout();
-
-  // Prepare for the vector type of the interleaved load/store.
-  Type *ScalarTy = getLoadStoreType(Instr);
-  unsigned InterleaveFactor = Group->getFactor();
-  auto *VecTy = VectorType::get(ScalarTy, State.VF * InterleaveFactor);
-
-  // Prepare for the new pointers.
-  SmallVector<Value *, 2> AddrParts;
-  unsigned Index = Group->getIndex(Instr);
-
-  // TODO: extend the masked interleaved-group support to reversed access.
-  assert((!BlockInMask || !Group->isReverse()) &&
-         "Reversed masked interleave-group not supported.");
-
-  Value *Idx;
-  // If the group is reverse, adjust the index to refer to the last vector lane
-  // instead of the first. We adjust the index from the first vector lane,
-  // rather than directly getting the pointer for lane VF - 1, because the
-  // pointer operand of the interleaved access is supposed to be uniform. For
-  // uniform instructions, we're only required to generate a value for the
-  // first vector lane in each unroll iteration.
-  if (Group->isReverse()) {
-    Value *RuntimeVF = getRuntimeVF(Builder, Builder.getInt32Ty(), State.VF);
-    Idx = Builder.CreateSub(RuntimeVF, Builder.getInt32(1));
-    Idx = Builder.CreateMul(Idx, Builder.getInt32(Group->getFactor()));
-    Idx = Builder.CreateAdd(Idx, Builder.getInt32(Index));
-    Idx = Builder.CreateNeg(Idx);
-  } else
-    Idx = Builder.getInt32(-Index);
-
-  for (unsigned Part = 0; Part < State.UF; Part++) {
-    Value *AddrPart = State.get(Addr, VPIteration(Part, 0));
-    if (auto *I = dyn_cast<Instruction>(AddrPart))
-      State.setDebugLocFrom(I->getDebugLoc());
-
-    // Notice current instruction could be any index. Need to adjust the address
-    // to the member of index 0.
-    //
-    // E.g.  a = A[i+1];     // Member of index 1 (Current instruction)
-    //       b = A[i];       // Member of index 0
-    // Current pointer is pointed to A[i+1], adjust it to A[i].
-    //
-    // E.g.  A[i+1] = a;     // Member of index 1
-    //       A[i]   = b;     // Member of index 0
-    //       A[i+2] = c;     // Member of index 2 (Current instruction)
-    // Current pointer is pointed to A[i+2], adjust it to A[i].
-
-    bool InBounds = false;
-    if (auto *gep = dyn_cast<GetElementPtrInst>(AddrPart->stripPointerCasts()))
-      InBounds = gep->isInBounds();
-    AddrPart = Builder.CreateGEP(ScalarTy, AddrPart, Idx, "", InBounds);
-    AddrParts.push_back(AddrPart);
-  }
-
-  State.setDebugLocFrom(Instr->getDebugLoc());
-  Value *PoisonVec = PoisonValue::get(VecTy);
-
-  auto CreateGroupMask = [this, &BlockInMask, &State, &InterleaveFactor](
-                             unsigned Part, Value *MaskForGaps) -> Value * {
-    if (State.VF.isScalable()) {
-      assert(!MaskForGaps && "Interleaved groups with gaps are not supported.");
-      assert(InterleaveFactor == 2 &&
-             "Unsupported deinterleave factor for scalable vectors");
-      auto *BlockInMaskPart = State.get(BlockInMask, Part);
-      SmallVector<Value *, 2> Ops = {BlockInMaskPart, BlockInMaskPart};
-      auto *MaskTy = VectorType::get(Builder.getInt1Ty(),
-                                     State.VF.getKnownMinValue() * 2, true);
-      return Builder.CreateIntrinsic(MaskTy, Intrinsic::vector_interleave2, Ops,
-                                     /*FMFSource=*/nullptr, "interleaved.mask");
-    }
-
-    if (!BlockInMask)
-      return MaskForGaps;
-
-    Value *BlockInMaskPart = State.get(BlockInMask, Part);
-    Value *ShuffledMask = Builder.CreateShuffleVector(
-        BlockInMaskPart,
-        createReplicatedMask(InterleaveFactor, State.VF.getKnownMinValue()),
-        "interleaved.mask");
-    return MaskForGaps ? Builder.CreateBinOp(Instruction::And, ShuffledMask,
-                                             MaskForGaps)
-                       : ShuffledMask;
-  };
-
-  // Vectorize the interleaved load group.
-  if (isa<LoadInst>(Instr)) {
-    Value *MaskForGaps = nullptr;
-    if (NeedsMaskForGaps) {
-      MaskForGaps =
-          createBitMaskForGaps(Builder, State.VF.getKnownMinValue(), *Group);
-      assert(MaskForGaps && "Mask for Gaps is required but it is null");
-    }
-
-    // For each unroll part, create a wide load for the group.
-    SmallVector<Value *, 2> NewLoads;
-    for (unsigned Part = 0; Part < State.UF; Part++) {
-      Instruction *NewLoad;
-      if (BlockInMask || MaskForGaps) {
-        assert(useMaskedInterleavedAccesses(*TTI) &&
-               "masked interleaved groups are not allowed.");
-        Value *GroupMask = CreateGroupMask(Part, MaskForGaps);
-        NewLoad =
-            Builder.CreateMaskedLoad(VecTy, AddrParts[Part], Group->getAlign(),
-                                     GroupMask, PoisonVec, "wide.masked.vec");
-      }
-      else
-        NewLoad = Builder.CreateAlignedLoad(VecTy, AddrParts[Part],
-                                            Group->getAlign(), "wide.vec");
-      Group->addMetadata(NewLoad);
-      NewLoads.push_back(NewLoad);
-    }
-
-    if (VecTy->isScalableTy()) {
-      assert(InterleaveFactor == 2 &&
-             "Unsupported deinterleave factor for scalable vectors");
-
-      for (unsigned Part = 0; Part < State.UF; ++Part) {
-        // Scalable vectors cannot use arbitrary shufflevectors (only splats),
-        // so must use intrinsics to deinterleave.
-        Value *DI = Builder.CreateIntrinsic(
-            Intrinsic::vector_deinterleave2, VecTy, NewLoads[Part],
-            /*FMFSource=*/nullptr, "strided.vec");
-        unsigned J = 0;
-        for (unsigned I = 0; I < InterleaveFactor; ++I) {
-          Instruction *Member = Group->getMember(I);
-
-          if (!Member)
-            continue;
-
-          Value *StridedVec = Builder.CreateExtractValue(DI, I);
-          // If this member has different type, cast the result type.
-          if (Member->getType() != ScalarTy) {
-            VectorType *OtherVTy = VectorType::get(Member->getType(), State.VF);
-            StridedVec = createBitOrPointerCast(StridedVec, OtherVTy, DL);
-          }
-
-          if (Group->isReverse())
-            StridedVec = Builder.CreateVectorReverse(StridedVec, "reverse");
-
-          State.set(VPDefs[J], StridedVec, Part);
-          ++J;
-        }
-      }
-
-      return;
-    }
-
-    // For each member in the group, shuffle out the appropriate data from the
-    // wide loads.
-    unsigned J = 0;
-    for (unsigned I = 0; I < InterleaveFactor; ++I) {
-      Instruction *Member = Group->getMember(I);
-
-      // Skip the gaps in the group.
-      if (!Member)
-        continue;
-
-      auto StrideMask =
-          createStrideMask(I, InterleaveFactor, State.VF.getKnownMinValue());
-      for (unsigned Part = 0; Part < State.UF; Part++) {
-        Value *StridedVec = Builder.CreateShuffleVector(
-            NewLoads[Part], StrideMask, "strided.vec");
-
-        // If this member has different type, cast the result type.
-        if (Member->getType() != ScalarTy) {
-          assert(!State.VF.isScalable() && "VF is assumed to be non scalable.");
-          VectorType *OtherVTy = VectorType::get(Member->getType(), State.VF);
-          StridedVec = createBitOrPointerCast(StridedVec, OtherVTy, DL);
-        }
-
-        if (Group->isReverse())
-          StridedVec = Builder.CreateVectorReverse(StridedVec, "reverse");
-
-        State.set(VPDefs[J], StridedVec, Part);
-      }
-      ++J;
-    }
-    return;
-  }
-
-  // The sub vector type for current instruction.
-  auto *SubVT = VectorType::get(ScalarTy, State.VF);
-
-  // Vectorize the interleaved store group.
-  Value *MaskForGaps =
-      createBitMaskForGaps(Builder, State.VF.getKnownMinValue(), *Group);
-  assert((!MaskForGaps || useMaskedInterleavedAccesses(*TTI)) &&
-         "masked interleaved groups are not allowed.");
-  assert((!MaskForGaps || !State.VF.isScalable()) &&
-         "masking gaps for scalable vectors is not yet supported.");
-  for (unsigned Part = 0; Part < State.UF; Part++) {
-    // Collect the stored vector from each member.
-    SmallVector<Value *, 4> StoredVecs;
-    unsigned StoredIdx = 0;
-    for (unsigned i = 0; i < InterleaveFactor; i++) {
-      assert((Group->getMember(i) || MaskForGaps) &&
-             "Fail to get a member from an interleaved store group");
-      Instruction *Member = Group->getMember(i);
-
-      // Skip the gaps in the group.
-      if (!Member) {
-        Value *Undef = PoisonValue::get(SubVT);
-        StoredVecs.push_back(Undef);
-        continue;
-      }
-
-      Value *StoredVec = State.get(StoredValues[StoredIdx], Part);
-      ++StoredIdx;
-
-      if (Group->isReverse())
-        StoredVec = Builder.CreateVectorReverse(StoredVec, "reverse");
-
-      // If this member has different type, cast it to a unified type.
-
-      if (StoredVec->getType() != SubVT)
-        StoredVec = createBitOrPointerCast(StoredVec, SubVT, DL);
-
-      StoredVecs.push_back(StoredVec);
-    }
-
-    // Interleave all the smaller vectors into one wider vector.
-    Value *IVec = interleaveVectors(Builder, StoredVecs, "interleaved.vec");
-    Instruction *NewStoreInstr;
-    if (BlockInMask || MaskForGaps) {
-      Value *GroupMask = CreateGroupMask(Part, MaskForGaps);
-      NewStoreInstr = Builder.CreateMaskedStore(IVec, AddrParts[Part],
-                                                Group->getAlign(), GroupMask);
-    } else
-      NewStoreInstr =
-          Builder.CreateAlignedStore(IVec, AddrParts[Part], Group->getAlign());
-
-    Group->addMetadata(NewStoreInstr);
-  }
-}
-
 void InnerLoopVectorizer::scalarizeInstruction(const Instruction *Instr,
                                                VPReplicateRecipe *RepRecipe,
                                                const VPIteration &Instance,
@@ -2769,36 +2454,6 @@ InnerLoopVectorizer::getOrCreateVectorTripCount(BasicBlock *InsertBlock) {
   return VectorTripCount;
 }
 
-Value *InnerLoopVectorizer::createBitOrPointerCast(Value *V, VectorType *DstVTy,
-                                                   const DataLayout &DL) {
-  // Verify that V is a vector type with same number of elements as DstVTy.
-  auto VF = DstVTy->getElementCount();
-  auto *SrcVecTy = cast<VectorType>(V->getType());
-  assert(VF == SrcVecTy->getElementCount() && "Vector dimensions do not match");
-  Type *SrcElemTy = SrcVecTy->getElementType();
-  Type *DstElemTy = DstVTy->getElementType();
-  assert((DL.getTypeSizeInBits(SrcElemTy) == DL.getTypeSizeInBits(DstElemTy)) &&
-         "Vector elements must have same size");
-
-  // Do a direct cast if element types are castable.
-  if (CastInst::isBitOrNoopPointerCastable(SrcElemTy, DstElemTy, DL)) {
-    return Builder.CreateBitOrPointerCast(V, DstVTy);
-  }
-  // V cannot be directly casted to desired vector type.
-  // May happen when V is a floating point vector but DstVTy is a vector of
-  // pointers or vice-versa. Handle this using a two-step bitcast using an
-  // intermediate Integer type for the bitcast i.e. Ptr <-> Int <-> Float.
-  assert((DstElemTy->isPointerTy() != SrcElemTy->isPointerTy()) &&
-         "Only one type should be a pointer type");
-  assert((DstElemTy->isFloatingPointTy() != SrcElemTy->isFloatingPointTy()) &&
-         "Only one type should be a floating point type");
-  Type *IntTy =
-      IntegerType::getIntNTy(V->getContext(), DL.getTypeSizeInBits(SrcElemTy));
-  auto *VecIntTy = VectorType::get(IntTy, VF);
-  Value *CastVal = Builder.CreateBitOrPointerCast(V, VecIntTy);
-  return Builder.CreateBitOrPointerCast(CastVal, DstVTy);
-}
-
 void InnerLoopVectorizer::emitIterationCountCheck(BasicBlock *Bypass) {
   Value *Count = getTripCount();
   // Reuse existing vector loop preheader for TC checks.
@@ -4594,15 +4249,12 @@ ElementCount LoopVectorizationCostModel::getMaximizedVFForTarget(
 
     // Select the largest VF which doesn't require more registers than existing
     // ones.
-    for (int i = RUs.size() - 1; i >= 0; --i) {
-      bool Selected = true;
-      for (auto &pair : RUs[i].MaxLocalUsers) {
-        unsigned TargetNumRegisters = TTI.getNumberOfRegisters(pair.first);
-        if (pair.second > TargetNumRegisters)
-          Selected = false;
-      }
-      if (Selected) {
-        MaxVF = VFs[i];
+    for (int I = RUs.size() - 1; I >= 0; --I) {
+      const auto &MLU = RUs[I].MaxLocalUsers;
+      if (all_of(MLU, [&](decltype(MLU.front()) &LU) {
+            return LU.second <= TTI.getNumberOfRegisters(LU.first);
+          })) {
+        MaxVF = VFs[I];
         break;
       }
     }
@@ -8986,6 +8638,8 @@ LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(VFRange &Range) {
 
     bool NeedsMaskForGaps =
         IG->requiresScalarEpilogue() && !CM.isScalarEpilogueAllowed();
+    assert((!NeedsMaskForGaps || useMaskedInterleavedAccesses(CM.TTI)) &&
+           "masked interleaved groups are not allowed.");
     auto *VPIG = new VPInterleaveRecipe(IG, Recipe->getAddr(), StoredValues,
                                         Recipe->getMask(), NeedsMaskForGaps);
     VPIG->insertBefore(Recipe);
@@ -9397,37 +9051,6 @@ void LoopVectorizationPlanner::adjustRecipesForReductions(
   VPlanTransforms::clearReductionWrapFlags(*Plan);
 }
 
-#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
-void VPInterleaveRecipe::print(raw_ostream &O, const Twine &Indent,
-                               VPSlotTracker &SlotTracker) const {
-  O << Indent << "INTERLEAVE-GROUP with factor " << IG->getFactor() << " at ";
-  IG->getInsertPos()->printAsOperand(O, false);
-  O << ", ";
-  getAddr()->printAsOperand(O, SlotTracker);
-  VPValue *Mask = getMask();
-  if (Mask) {
-    O << ", ";
-    Mask->printAsOperand(O, SlotTracker);
-  }
-
-  unsigned OpIdx = 0;
-  for (unsigned i = 0; i < IG->getFactor(); ++i) {
-    if (!IG->getMember(i))
-      continue;
-    if (getNumStoreOperands() > 0) {
-      O << "\n" << Indent << "  store ";
-      getOperand(1 + OpIdx)->printAsOperand(O, SlotTracker);
-      O << " to index " << i;
-    } else {
-      O << "\n" << Indent << "  ";
-      getVPValue(OpIdx)->printAsOperand(O, SlotTracker);
-      O << " = load from index " << i;
-    }
-    ++OpIdx;
-  }
-}
-#endif
-
 void VPWidenPointerInductionRecipe::execute(VPTransformState &State) {
   assert(IndDesc.getKind() == InductionDescriptor::IK_PtrInduction &&
          "Not a pointer induction according to InductionDescriptor!");
@@ -9511,13 +9134,6 @@ void VPDerivedIVRecipe::execute(VPTransformState &State) {
   State.set(this, DerivedIV, VPIteration(0, 0));
 }
 
-void VPInterleaveRecipe::execute(VPTransformState &State) {
-  assert(!State.Instance && "Interleave group being replicated.");
-  State.ILV->vectorizeInterleaveGroup(IG, definedValues(), State, getAddr(),
-                                      getStoredValues(), getMask(),
-                                      NeedsMaskForGaps);
-}
-
 void VPReplicateRecipe::execute(VPTransformState &State) {
   Instruction *UI = getUnderlyingInstr();
   if (State.Instance) { // Generate a single instance.
diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
index 4b1ac79bbfdd4..1b787d0490672 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
@@ -2037,6 +2037,373 @@ void VPWidenStoreEVLRecipe::print(raw_ostream &O, const Twine &Indent,
 }
 #endif
 
+static Value *createBitOrPointerCast(IRBuilderBase &Builder, Value *V,
+                                     VectorType *DstVTy, const DataLayout &DL) {
+  // Verify that V is a vector type with same number of elements as DstVTy.
+  auto VF = DstVTy->getElementCount();
+  auto *SrcVecTy = cast<VectorType>(V->getType());
+  assert(VF == SrcVecTy->getElementCount() && "Vector dimensions do not match");
+  Type *SrcElemTy = SrcVecTy->getElementType();
+  Type *DstElemTy = DstVTy->getElementType();
+  assert((DL.getTypeSizeInBits(SrcElemTy) == DL.getTypeSizeInBits(DstElemTy)) &&
+         "Vector elements must have same size");
+
+  // Do a direct cast if element types are castable.
+  if (CastInst::isBitOrNoopPointerCastable(SrcElemTy, DstElemTy, DL)) {
+    return Builder.CreateBitOrPointerCast(V, DstVTy);
+  }
+  // V cannot be directly casted to desired vector type.
+  // May happen when V is a floating point vector but DstVTy is a vector of
+  // pointers or vice-versa. Handle this using a two-step bitcast using an
+  // intermediate Integer type for the bitcast i.e. Ptr <-> Int <-> Float.
+  assert((DstElemTy->isPointerTy() != SrcElemTy->isPointerTy()) &&
+         "Only one type should be a pointer type");
+  assert((DstElemTy->isFloatingPointTy() != SrcElemTy->isFloatingPointTy()) &&
+         "Only one type should be a floating point type");
+  Type *IntTy =
+      IntegerType::getIntNTy(V->getContext(), DL.getTypeSizeInBits(SrcElemTy));
+  auto *VecIntTy = VectorType::get(IntTy, VF);
+  Value *CastVal = Builder.CreateBitOrPointerCast(V, VecIntTy);
+  return Builder.CreateBitOrPointerCast(CastVal, DstVTy);
+}
+
+/// Return a vector containing interleaved elements from multiple
+/// smaller input vectors.
+static Value *interleaveVectors(IRBuilderBase &Builder, ArrayRef<Value *> Vals,
+                                const Twine &Name) {
+  unsigned Factor = Vals.size();
+  assert(Factor > 1 && "Tried to interleave invalid number of vectors");
+
+  VectorType *VecTy = cast<VectorType>(Vals[0]->getType());
+#ifndef NDEBUG
+  for (Value *Val : Vals)
+    assert(Val->getType() == VecTy && "Tried to interleave mismatched types");
+#endif
+
+  // Scalable vectors cannot use arbitrary shufflevectors (only splats), so
+  // must use intrinsics to interleave.
+  if (VecTy->isScalableTy()) {
+    VectorType *WideVecTy = VectorType::getDoubleElementsVectorType(VecTy);
+    return Builder.CreateIntrinsic(WideVecTy, Intrinsic::vector_interleave2,
+                                   Vals,
+                                   /*FMFSource=*/nullptr, Name);
+  }
+
+  // Fixed length. Start by concatenating all vectors into a wide vector.
+  Value *WideVec = concatenateVectors(Builder, Vals);
+
+  // Interleave the elements into the wide vector.
+  const unsigned NumElts = VecTy->getElementCount().getFixedValue();
+  return Builder.CreateShuffleVector(
+      WideVec, createInterleaveMask(NumElts, Factor), Name);
+}
+
+// Try to vectorize the interleave group that \p Instr belongs to.
+//
+// E.g. Translate following interleaved load group (factor = 3):
+//   for (i = 0; i < N; i+=3) {
+//     R = Pic[i];             // Member of index 0
+//     G = Pic[i+1];           // Member of index 1
+//     B = Pic[i+2];           // Member of index 2
+//     ... // do something to R, G, B
+//   }
+// To:
+//   %wide.vec = load <12 x i32>                       ; Read 4 tuples of R,G,B
+//   %R.vec = shuffle %wide.vec, poison, <0, 3, 6, 9>   ; R elements
+//   %G.vec = shuffle %wide.vec, poison, <1, 4, 7, 10>  ; G elements
+//   %B.vec = shuffle %wide.vec, poison, <2, 5, 8, 11>  ; B elements
+//
+// Or translate following interleaved store group (factor = 3):
+//   for (i = 0; i < N; i+=3) {
+//     ... do something to R, G, B
+//     Pic[i]   = R;           // Member of index 0
+//     Pic[i+1] = G;           // Member of index 1
+//     Pic[i+2] = B;           // Member of index 2
+//   }
+// To:
+//   %R_G.vec = shuffle %R.vec, %G.vec, <0, 1, 2, ..., 7>
+//   %B_U.vec = shuffle %B.vec, poison, <0, 1, 2, 3, u, u, u, u>
+//   %interleaved.vec = shuffle %R_G.vec, %B_U.vec,
+//        <0, 4, 8, 1, 5, 9, 2, 6, 10, 3, 7, 11>    ; Interleave R,G,B elements
+//   store <12 x i32> %interleaved.vec              ; Write 4 tuples of R,G,B
+void VPInterleaveRecipe::execute(VPTransformState &State) {
+  assert(!State.Instance && "Interleave group being replicated.");
+  const InterleaveGroup<Instruction> *Group = IG;
+  Instruction *Instr = Group->getInsertPos();
+
+  // Prepare for the vector type of the interleaved load/store.
+  Type *ScalarTy = getLoadStoreType(Instr);
+  unsigned InterleaveFactor = Group->getFactor();
+  auto *VecTy = VectorType::get(ScalarTy, State.VF * InterleaveFactor);
+
+  // Prepare for the new pointers.
+  SmallVector<Value *, 2> AddrParts;
+  unsigned Index = Group->getIndex(Instr);
+
+  // TODO: extend the masked interleaved-group support to reversed access.
+  VPValue *BlockInMask = getMask();
+  assert((!BlockInMask || !Group->isReverse()) &&
+         "Reversed masked interleave-group not supported.");
+
+  Value *Idx;
+  // If the group is reverse, adjust the index to refer to the last vector lane
+  // instead of the first. We adjust the index from the first vector lane,
+  // rather than directly getting the pointer for lane VF - 1, because the
+  // pointer operand of the interleaved access is supposed to be uniform. For
+  // uniform instructions, we're only required to generate a value for the
+  // first vector lane in each unroll iteration.
+  if (Group->isReverse()) {
+    Value *RuntimeVF =
+        getRuntimeVF(State.Builder, State.Builder.getInt32Ty(), State.VF);
+    Idx = State.Builder.CreateSub(RuntimeVF, State.Builder.getInt32(1));
+    Idx = State.Builder.CreateMul(Idx,
+                                  State.Builder.getInt32(Group->getFactor()));
+    Idx = State.Builder.CreateAdd(Idx, State.Builder.getInt32(Index));
+    Idx = State.Builder.CreateNeg(Idx);
+  } else
+    Idx = State.Builder.getInt32(-Index);
+
+  VPValue *Addr = getAddr();
+  for (unsigned Part = 0; Part < State.UF; Part++) {
+    Value *AddrPart = State.get(Addr, VPIteration(Part, 0));
+    if (auto *I = dyn_cast<Instruction>(AddrPart))
+      State.setDebugLocFrom(I->getDebugLoc());
+
+    // Notice current instruction could be any index. Need to adjust the address
+    // to the member of index 0.
+    //
+    // E.g.  a = A[i+1];     // Member of index 1 (Current instruction)
+    //       b = A[i];       // Member of index 0
+    // Current pointer is pointed to A[i+1], adjust it to A[i].
+    //
+    // E.g.  A[i+1] = a;     // Member of index 1
+    //       A[i]   = b;     // Member of index 0
+    //       A[i+2] = c;     // Member of index 2 (Current instruction)
+    // Current pointer is pointed to A[i+2], adjust it to A[i].
+
+    bool InBounds = false;
+    if (auto *gep = dyn_cast<GetElementPtrInst>(AddrPart->stripPointerCasts()))
+      InBounds = gep->isInBounds();
+    AddrPart = State.Builder.CreateGEP(ScalarTy, AddrPart, Idx, "", InBounds);
+    AddrParts.push_back(AddrPart);
+  }
+
+  State.setDebugLocFrom(Instr->getDebugLoc());
+  Value *PoisonVec = PoisonValue::get(VecTy);
+
+  auto CreateGroupMask = [&BlockInMask, &State, &InterleaveFactor](
+                             unsigned Part, Value *MaskForGaps) -> Value * {
+    if (State.VF.isScalable()) {
+      assert(!MaskForGaps && "Interleaved groups with gaps are not supported.");
+      assert(InterleaveFactor == 2 &&
+             "Unsupported deinterleave factor for scalable vectors");
+      auto *BlockInMaskPart = State.get(BlockInMask, Part);
+      SmallVector<Value *, 2> Ops = {BlockInMaskPart, BlockInMaskPart};
+      auto *MaskTy = VectorType::get(State.Builder.getInt1Ty(),
+                                     State.VF.getKnownMinValue() * 2, true);
+      return State.Builder.CreateIntrinsic(
+          MaskTy, Intrinsic::vector_interleave2, Ops,
+          /*FMFSource=*/nullptr, "interleaved.mask");
+    }
+
+    if (!BlockInMask)
+      return MaskForGaps;
+
+    Value *BlockInMaskPart = State.get(BlockInMask, Part);
+    Value *ShuffledMask = State.Builder.CreateShuffleVector(
+        BlockInMaskPart,
+        createReplicatedMask(InterleaveFactor, State.VF.getKnownMinValue()),
+        "interleaved.mask");
+    return MaskForGaps ? State.Builder.CreateBinOp(Instruction::And,
+                                                   ShuffledMask, MaskForGaps)
+                       : ShuffledMask;
+  };
+
+  const DataLayout &DL = Instr->getDataLayout();
+  // Vectorize the interleaved load group.
+  if (isa<LoadInst>(Instr)) {
+    Value *MaskForGaps = nullptr;
+    if (NeedsMaskForGaps) {
+      MaskForGaps = createBitMaskForGaps(State.Builder,
+                                         State.VF.getKnownMinValue(), *Group);
+      assert(MaskForGaps && "Mask for Gaps is required but it is null");
+    }
+
+    // For each unroll part, create a wide load for the group.
+    SmallVector<Value *, 2> NewLoads;
+    for (unsigned Part = 0; Part < State.UF; Part++) {
+      Instruction *NewLoad;
+      if (BlockInMask || MaskForGaps) {
+        Value *GroupMask = CreateGroupMask(Part, MaskForGaps);
+        NewLoad = State.Builder.CreateMaskedLoad(VecTy, AddrParts[Part],
+                                                 Group->getAlign(), GroupMask,
+                                                 PoisonVec, "wide.masked.vec");
+      } else
+        NewLoad = State.Builder.CreateAlignedLoad(
+            VecTy, AddrParts[Part], Group->getAlign(), "wide.vec");
+      Group->addMetadata(NewLoad);
+      NewLoads.push_back(NewLoad);
+    }
+
+    ArrayRef<VPValue *> VPDefs = definedValues();
+    const DataLayout &DL = State.CFG.PrevBB->getDataLayout();
+    if (VecTy->isScalableTy()) {
+      assert(InterleaveFactor == 2 &&
+             "Unsupported deinterleave factor for scalable vectors");
+
+      for (unsigned Part = 0; Part < State.UF; ++Part) {
+        // Scalable vectors cannot use arbitrary shufflevectors (only splats),
+        // so must use intrinsics to deinterleave.
+        Value *DI = State.Builder.CreateIntrinsic(
+            Intrinsic::vector_deinterleave2, VecTy, NewLoads[Part],
+            /*FMFSource=*/nullptr, "strided.vec");
+        unsigned J = 0;
+        for (unsigned I = 0; I < InterleaveFactor; ++I) {
+          Instruction *Member = Group->getMember(I);
+
+          if (!Member)
+            continue;
+
+          Value *StridedVec = State.Builder.CreateExtractValue(DI, I);
+          // If this member has different type, cast the result type.
+          if (Member->getType() != ScalarTy) {
+            VectorType *OtherVTy = VectorType::get(Member->getType(), State.VF);
+            StridedVec =
+                createBitOrPointerCast(State.Builder, StridedVec, OtherVTy, DL);
+          }
+
+          if (Group->isReverse())
+            StridedVec =
+                State.Builder.CreateVectorReverse(StridedVec, "reverse");
+
+          State.set(VPDefs[J], StridedVec, Part);
+          ++J;
+        }
+      }
+
+      return;
+    }
+
+    // For each member in the group, shuffle out the appropriate data from the
+    // wide loads.
+    unsigned J = 0;
+    for (unsigned I = 0; I < InterleaveFactor; ++I) {
+      Instruction *Member = Group->getMember(I);
+
+      // Skip the gaps in the group.
+      if (!Member)
+        continue;
+
+      auto StrideMask =
+          createStrideMask(I, InterleaveFactor, State.VF.getKnownMinValue());
+      for (unsigned Part = 0; Part < State.UF; Part++) {
+        Value *StridedVec = State.Builder.CreateShuffleVector(
+            NewLoads[Part], StrideMask, "strided.vec");
+
+        // If this member has different type, cast the result type.
+        if (Member->getType() != ScalarTy) {
+          assert(!State.VF.isScalable() && "VF is assumed to be non scalable.");
+          VectorType *OtherVTy = VectorType::get(Member->getType(), State.VF);
+          StridedVec =
+              createBitOrPointerCast(State.Builder, StridedVec, OtherVTy, DL);
+        }
+
+        if (Group->isReverse())
+          StridedVec = State.Builder.CreateVectorReverse(StridedVec, "reverse");
+
+        State.set(VPDefs[J], StridedVec, Part);
+      }
+      ++J;
+    }
+    return;
+  }
+
+  // The sub vector type for current instruction.
+  auto *SubVT = VectorType::get(ScalarTy, State.VF);
+
+  // Vectorize the interleaved store group.
+  Value *MaskForGaps =
+      createBitMaskForGaps(State.Builder, State.VF.getKnownMinValue(), *Group);
+  assert((!MaskForGaps || !State.VF.isScalable()) &&
+         "masking gaps for scalable vectors is not yet supported.");
+  ArrayRef<VPValue *> StoredValues = getStoredValues();
+  for (unsigned Part = 0; Part < State.UF; Part++) {
+    // Collect the stored vector from each member.
+    SmallVector<Value *, 4> StoredVecs;
+    unsigned StoredIdx = 0;
+    for (unsigned i = 0; i < InterleaveFactor; i++) {
+      assert((Group->getMember(i) || MaskForGaps) &&
+             "Fail to get a member from an interleaved store group");
+      Instruction *Member = Group->getMember(i);
+
+      // Skip the gaps in the group.
+      if (!Member) {
+        Value *Undef = PoisonValue::get(SubVT);
+        StoredVecs.push_back(Undef);
+        continue;
+      }
+
+      Value *StoredVec = State.get(StoredValues[StoredIdx], Part);
+      ++StoredIdx;
+
+      if (Group->isReverse())
+        StoredVec = State.Builder.CreateVectorReverse(StoredVec, "reverse");
+
+      // If this member has different type, cast it to a unified type.
+
+      if (StoredVec->getType() != SubVT)
+        StoredVec = createBitOrPointerCast(State.Builder, StoredVec, SubVT, DL);
+
+      StoredVecs.push_back(StoredVec);
+    }
+
+    // Interleave all the smaller vectors into one wider vector.
+    Value *IVec =
+        interleaveVectors(State.Builder, StoredVecs, "interleaved.vec");
+    Instruction *NewStoreInstr;
+    if (BlockInMask || MaskForGaps) {
+      Value *GroupMask = CreateGroupMask(Part, MaskForGaps);
+      NewStoreInstr = State.Builder.CreateMaskedStore(
+          IVec, AddrParts[Part], Group->getAlign(), GroupMask);
+    } else
+      NewStoreInstr = State.Builder.CreateAlignedStore(IVec, AddrParts[Part],
+                                                       Group->getAlign());
+
+    Group->addMetadata(NewStoreInstr);
+  }
+}
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+void VPInterleaveRecipe::print(raw_ostream &O, const Twine &Indent,
+                               VPSlotTracker &SlotTracker) const {
+  O << Indent << "INTERLEAVE-GROUP with factor " << IG->getFactor() << " at ";
+  IG->getInsertPos()->printAsOperand(O, false);
+  O << ", ";
+  getAddr()->printAsOperand(O, SlotTracker);
+  VPValue *Mask = getMask();
+  if (Mask) {
+    O << ", ";
+    Mask->printAsOperand(O, SlotTracker);
+  }
+
+  unsigned OpIdx = 0;
+  for (unsigned i = 0; i < IG->getFactor(); ++i) {
+    if (!IG->getMember(i))
+      continue;
+    if (getNumStoreOperands() > 0) {
+      O << "\n" << Indent << "  store ";
+      getOperand(1 + OpIdx)->printAsOperand(O, SlotTracker);
+      O << " to index " << i;
+    } else {
+      O << "\n" << Indent << "  ";
+      getVPValue(OpIdx)->printAsOperand(O, SlotTracker);
+      O << " = load from index " << i;
+    }
+    ++OpIdx;
+  }
+}
+#endif
+
 void VPCanonicalIVPHIRecipe::execute(VPTransformState &State) {
   Value *Start = getStartValue()->getLiveInIRValue();
   PHINode *EntryPart = PHINode::Create(Start->getType(), 2, "index");
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index d668ae2aa5c08..c91fd0f118e31 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -278,6 +278,11 @@ static bool mergeReplicateRegionsIntoSuccessors(VPlan &Plan) {
         return UI && UI->getParent() == Then2;
       });
 
+      // Remove phi recipes that are unused after merging the regions.
+      if (Phi1ToMove.getVPSingleValue()->getNumUsers() == 0) {
+        Phi1ToMove.eraseFromParent();
+        continue;
+      }
       Phi1ToMove.moveBefore(*Merge2, Merge2->begin());
     }
 
diff --git a/llvm/test/CodeGen/AArch64/preserve_nonecc_varargs_aapcs.ll b/llvm/test/CodeGen/AArch64/preserve_nonecc_varargs_aapcs.ll
new file mode 100644
index 0000000000000..48898719f40ce
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/preserve_nonecc_varargs_aapcs.ll
@@ -0,0 +1,123 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -mtriple=aarch64 < %s | FileCheck %s
+
+%va_list = type { ptr, ptr, ptr, i32, i32 }
+
+define preserve_nonecc i32 @callee(i32 %a1, i32 %a2, i32 %a3, i32 %a4, i32 %a5, ...) nounwind noinline ssp {
+; CHECK-LABEL: callee:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    sub sp, sp, #192
+; CHECK-NEXT:    mov x8, #-24 // =0xffffffffffffffe8
+; CHECK-NEXT:    mov x9, sp
+; CHECK-NEXT:    add x10, sp, #136
+; CHECK-NEXT:    movk x8, #65408, lsl #32
+; CHECK-NEXT:    add x9, x9, #128
+; CHECK-NEXT:    stp x6, x7, [sp, #144]
+; CHECK-NEXT:    stp x9, x8, [sp, #176]
+; CHECK-NEXT:    add x9, x10, #24
+; CHECK-NEXT:    add x10, sp, #192
+; CHECK-NEXT:    mov w8, #-24 // =0xffffffe8
+; CHECK-NEXT:    str x5, [sp, #136]
+; CHECK-NEXT:    stp q0, q1, [sp]
+; CHECK-NEXT:    stp q2, q3, [sp, #32]
+; CHECK-NEXT:    stp q4, q5, [sp, #64]
+; CHECK-NEXT:    stp q6, q7, [sp, #96]
+; CHECK-NEXT:    stp x10, x9, [sp, #160]
+; CHECK-NEXT:    tbz w8, #31, .LBB0_3
+; CHECK-NEXT:  // %bb.1: // %maybe_reg
+; CHECK-NEXT:    add w9, w8, #8
+; CHECK-NEXT:    cmp w9, #0
+; CHECK-NEXT:    str w9, [sp, #184]
+; CHECK-NEXT:    b.gt .LBB0_3
+; CHECK-NEXT:  // %bb.2: // %in_reg
+; CHECK-NEXT:    ldr x9, [sp, #168]
+; CHECK-NEXT:    add x8, x9, w8, sxtw
+; CHECK-NEXT:    b .LBB0_4
+; CHECK-NEXT:  .LBB0_3: // %on_stack
+; CHECK-NEXT:    ldr x8, [sp, #160]
+; CHECK-NEXT:    add x9, x8, #8
+; CHECK-NEXT:    str x9, [sp, #160]
+; CHECK-NEXT:  .LBB0_4: // %end
+; CHECK-NEXT:    ldr w0, [x8]
+; CHECK-NEXT:    add sp, sp, #192
+; CHECK-NEXT:    ret
+entry:
+  %args = alloca %va_list, align 8
+  call void @llvm.va_start(ptr %args)
+  %gr_offs_p = getelementptr inbounds %va_list, ptr %args, i32 0, i32 3
+  %gr_offs = load i32, ptr %gr_offs_p, align 8
+  %0 = icmp sge i32 %gr_offs, 0
+  br i1 %0, label %on_stack, label %maybe_reg
+
+maybe_reg:
+  %new_reg_offs = add i32 %gr_offs, 8
+  store i32 %new_reg_offs, ptr %gr_offs_p, align 8
+  %inreg = icmp sle i32 %new_reg_offs, 0
+  br i1 %inreg, label %in_reg, label %on_stack
+
+in_reg:
+  %reg_top_p = getelementptr inbounds %va_list, ptr %args, i32 0, i32 1
+  %reg_top = load ptr, ptr %reg_top_p, align 8
+  %reg = getelementptr inbounds i8, ptr %reg_top, i32 %gr_offs
+  br label %end
+
+on_stack:
+  %stack_p = getelementptr inbounds %va_list, ptr %args, i32 0, i32 0
+  %stack = load ptr, ptr %stack_p, align 8
+  %new_stack = getelementptr inbounds i8, ptr %stack, i64 8
+  store ptr %new_stack, ptr %stack_p, align 8
+  br label %end
+
+end:
+  %p = phi ptr [ %reg, %in_reg ], [ %stack, %on_stack ]
+  %10 = load i32, ptr %p, align 8
+  call void @llvm.va_end.p0(ptr %args)
+  ret i32 %10
+}
+
+declare void @llvm.va_start(ptr) nounwind
+declare void @llvm.va_end(ptr) nounwind
+
+define i32 @caller() nounwind ssp {
+; CHECK-LABEL: caller:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sub sp, sp, #176
+; CHECK-NEXT:    mov w8, #10 // =0xa
+; CHECK-NEXT:    mov w9, #9 // =0x9
+; CHECK-NEXT:    mov w0, #1 // =0x1
+; CHECK-NEXT:    mov w1, #2 // =0x2
+; CHECK-NEXT:    mov w2, #3 // =0x3
+; CHECK-NEXT:    mov w3, #4 // =0x4
+; CHECK-NEXT:    mov w4, #5 // =0x5
+; CHECK-NEXT:    mov w5, #6 // =0x6
+; CHECK-NEXT:    mov w6, #7 // =0x7
+; CHECK-NEXT:    mov w7, #8 // =0x8
+; CHECK-NEXT:    stp d15, d14, [sp, #16] // 16-byte Folded Spill
+; CHECK-NEXT:    stp d13, d12, [sp, #32] // 16-byte Folded Spill
+; CHECK-NEXT:    stp d11, d10, [sp, #48] // 16-byte Folded Spill
+; CHECK-NEXT:    stp d9, d8, [sp, #64] // 16-byte Folded Spill
+; CHECK-NEXT:    str x30, [sp, #80] // 8-byte Folded Spill
+; CHECK-NEXT:    stp x28, x27, [sp, #96] // 16-byte Folded Spill
+; CHECK-NEXT:    stp x26, x25, [sp, #112] // 16-byte Folded Spill
+; CHECK-NEXT:    stp x24, x23, [sp, #128] // 16-byte Folded Spill
+; CHECK-NEXT:    stp x22, x21, [sp, #144] // 16-byte Folded Spill
+; CHECK-NEXT:    stp x20, x19, [sp, #160] // 16-byte Folded Spill
+; CHECK-NEXT:    str w8, [sp, #8]
+; CHECK-NEXT:    str w9, [sp]
+; CHECK-NEXT:    bl callee
+; CHECK-NEXT:    ldp x20, x19, [sp, #160] // 16-byte Folded Reload
+; CHECK-NEXT:    ldr x30, [sp, #80] // 8-byte Folded Reload
+; CHECK-NEXT:    ldp x22, x21, [sp, #144] // 16-byte Folded Reload
+; CHECK-NEXT:    ldp x24, x23, [sp, #128] // 16-byte Folded Reload
+; CHECK-NEXT:    ldp x26, x25, [sp, #112] // 16-byte Folded Reload
+; CHECK-NEXT:    ldp x28, x27, [sp, #96] // 16-byte Folded Reload
+; CHECK-NEXT:    ldp d9, d8, [sp, #64] // 16-byte Folded Reload
+; CHECK-NEXT:    ldp d11, d10, [sp, #48] // 16-byte Folded Reload
+; CHECK-NEXT:    ldp d13, d12, [sp, #32] // 16-byte Folded Reload
+; CHECK-NEXT:    ldp d15, d14, [sp, #16] // 16-byte Folded Reload
+; CHECK-NEXT:    add sp, sp, #176
+; CHECK-NEXT:    ret
+  %r = tail call preserve_nonecc i32 (i32, i32, i32, i32, i32, ...) @callee(i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10)
+  ret i32 %r
+}
+
diff --git a/llvm/test/CodeGen/AArch64/preserve_nonecc_varargs_darwin.ll b/llvm/test/CodeGen/AArch64/preserve_nonecc_varargs_darwin.ll
new file mode 100644
index 0000000000000..e227f14542cc1
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/preserve_nonecc_varargs_darwin.ll
@@ -0,0 +1,67 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -mtriple=aarch64-apple-darwin < %s | FileCheck %s
+
+define preserve_nonecc i32 @callee(i32 %a1, i32 %a2, i32 %a3, i32 %a4, i32 %a5, ...) nounwind noinline ssp {
+; CHECK-LABEL: callee:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    sub sp, sp, #16
+; CHECK-NEXT:    add x8, sp, #16
+; CHECK-NEXT:    ldr w0, [sp, #16]
+; CHECK-NEXT:    orr x8, x8, #0x8
+; CHECK-NEXT:    str x8, [sp, #8]
+; CHECK-NEXT:    add sp, sp, #16
+; CHECK-NEXT:    ret
+  %args = alloca ptr, align 8
+  call void @llvm.va_start(ptr %args)
+  %10 = va_arg ptr %args, i32
+  call void @llvm.va_end(ptr %args)
+  ret i32 %10
+}
+
+declare void @llvm.va_start(ptr) nounwind
+declare void @llvm.va_end(ptr) nounwind
+
+define i32 @caller() nounwind ssp {
+; CHECK-LABEL: caller:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    sub sp, sp, #208
+; CHECK-NEXT:    mov w8, #10 ; =0xa
+; CHECK-NEXT:    mov w9, #9 ; =0x9
+; CHECK-NEXT:    mov w0, #1 ; =0x1
+; CHECK-NEXT:    stp x9, x8, [sp, #24]
+; CHECK-NEXT:    mov w8, #8 ; =0x8
+; CHECK-NEXT:    mov w9, #6 ; =0x6
+; CHECK-NEXT:    str x8, [sp, #16]
+; CHECK-NEXT:    mov w8, #7 ; =0x7
+; CHECK-NEXT:    mov w1, #2 ; =0x2
+; CHECK-NEXT:    mov w2, #3 ; =0x3
+; CHECK-NEXT:    mov w3, #4 ; =0x4
+; CHECK-NEXT:    mov w4, #5 ; =0x5
+; CHECK-NEXT:    stp d15, d14, [sp, #48] ; 16-byte Folded Spill
+; CHECK-NEXT:    stp d13, d12, [sp, #64] ; 16-byte Folded Spill
+; CHECK-NEXT:    stp d11, d10, [sp, #80] ; 16-byte Folded Spill
+; CHECK-NEXT:    stp d9, d8, [sp, #96] ; 16-byte Folded Spill
+; CHECK-NEXT:    stp x28, x27, [sp, #112] ; 16-byte Folded Spill
+; CHECK-NEXT:    stp x26, x25, [sp, #128] ; 16-byte Folded Spill
+; CHECK-NEXT:    stp x24, x23, [sp, #144] ; 16-byte Folded Spill
+; CHECK-NEXT:    stp x22, x21, [sp, #160] ; 16-byte Folded Spill
+; CHECK-NEXT:    stp x20, x19, [sp, #176] ; 16-byte Folded Spill
+; CHECK-NEXT:    stp x29, x30, [sp, #192] ; 16-byte Folded Spill
+; CHECK-NEXT:    stp x9, x8, [sp]
+; CHECK-NEXT:    bl _callee
+; CHECK-NEXT:    ldp x29, x30, [sp, #192] ; 16-byte Folded Reload
+; CHECK-NEXT:    ldp x20, x19, [sp, #176] ; 16-byte Folded Reload
+; CHECK-NEXT:    ldp x22, x21, [sp, #160] ; 16-byte Folded Reload
+; CHECK-NEXT:    ldp x24, x23, [sp, #144] ; 16-byte Folded Reload
+; CHECK-NEXT:    ldp x26, x25, [sp, #128] ; 16-byte Folded Reload
+; CHECK-NEXT:    ldp x28, x27, [sp, #112] ; 16-byte Folded Reload
+; CHECK-NEXT:    ldp d9, d8, [sp, #96] ; 16-byte Folded Reload
+; CHECK-NEXT:    ldp d11, d10, [sp, #80] ; 16-byte Folded Reload
+; CHECK-NEXT:    ldp d13, d12, [sp, #64] ; 16-byte Folded Reload
+; CHECK-NEXT:    ldp d15, d14, [sp, #48] ; 16-byte Folded Reload
+; CHECK-NEXT:    add sp, sp, #208
+; CHECK-NEXT:    ret
+  %r = tail call preserve_nonecc i32 (i32, i32, i32, i32, i32, ...) @callee(i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10)
+  ret i32 %r
+}
+
diff --git a/llvm/test/CodeGen/AArch64/preserve_nonecc_varargs_win64.ll b/llvm/test/CodeGen/AArch64/preserve_nonecc_varargs_win64.ll
new file mode 100644
index 0000000000000..83dd240a6540f
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/preserve_nonecc_varargs_win64.ll
@@ -0,0 +1,69 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -mtriple=aarch64-pc-windows < %s | FileCheck %s
+
+define preserve_nonecc i32 @callee(i32 %a1, i32 %a2, i32 %a3, i32 %a4, i32 %a5, ...) nounwind noinline ssp {
+; CHECK-LABEL: callee:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sub sp, sp, #48
+; CHECK-NEXT:    mov x0, x5
+; CHECK-NEXT:    add x8, sp, #24
+; CHECK-NEXT:    stp x6, x7, [sp, #32]
+; CHECK-NEXT:    str x5, [sp, #24]
+; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
+; CHECK-NEXT:    str x8, [sp, #8]
+; CHECK-NEXT:    add sp, sp, #48
+; CHECK-NEXT:    ret
+  %args = alloca ptr, align 8
+  call void @llvm.va_start(ptr %args)
+  %p = load ptr, ptr %args, align 8
+  %10 = load i32, ptr %p, align 8
+  call void @llvm.va_end(ptr %args)
+  ret i32 %10
+}
+
+declare void @llvm.va_start(ptr) nounwind
+declare void @llvm.va_end(ptr) nounwind
+
+define i32 @caller() nounwind ssp {
+; CHECK-LABEL: caller:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sub sp, sp, #176
+; CHECK-NEXT:    mov w8, #10 // =0xa
+; CHECK-NEXT:    mov w9, #9 // =0x9
+; CHECK-NEXT:    mov w0, #1 // =0x1
+; CHECK-NEXT:    mov w1, #2 // =0x2
+; CHECK-NEXT:    mov w2, #3 // =0x3
+; CHECK-NEXT:    mov w3, #4 // =0x4
+; CHECK-NEXT:    mov w4, #5 // =0x5
+; CHECK-NEXT:    mov w5, #6 // =0x6
+; CHECK-NEXT:    mov w6, #7 // =0x7
+; CHECK-NEXT:    mov w7, #8 // =0x8
+; CHECK-NEXT:    stp d15, d14, [sp, #16] // 16-byte Folded Spill
+; CHECK-NEXT:    stp d13, d12, [sp, #32] // 16-byte Folded Spill
+; CHECK-NEXT:    stp d11, d10, [sp, #48] // 16-byte Folded Spill
+; CHECK-NEXT:    stp d9, d8, [sp, #64] // 16-byte Folded Spill
+; CHECK-NEXT:    str x30, [sp, #80] // 8-byte Folded Spill
+; CHECK-NEXT:    stp x28, x27, [sp, #96] // 16-byte Folded Spill
+; CHECK-NEXT:    stp x26, x25, [sp, #112] // 16-byte Folded Spill
+; CHECK-NEXT:    stp x24, x23, [sp, #128] // 16-byte Folded Spill
+; CHECK-NEXT:    stp x22, x21, [sp, #144] // 16-byte Folded Spill
+; CHECK-NEXT:    stp x20, x19, [sp, #160] // 16-byte Folded Spill
+; CHECK-NEXT:    str w8, [sp, #8]
+; CHECK-NEXT:    str w9, [sp]
+; CHECK-NEXT:    bl callee
+; CHECK-NEXT:    ldp x20, x19, [sp, #160] // 16-byte Folded Reload
+; CHECK-NEXT:    ldr x30, [sp, #80] // 8-byte Folded Reload
+; CHECK-NEXT:    ldp x22, x21, [sp, #144] // 16-byte Folded Reload
+; CHECK-NEXT:    ldp x24, x23, [sp, #128] // 16-byte Folded Reload
+; CHECK-NEXT:    ldp x26, x25, [sp, #112] // 16-byte Folded Reload
+; CHECK-NEXT:    ldp x28, x27, [sp, #96] // 16-byte Folded Reload
+; CHECK-NEXT:    ldp d9, d8, [sp, #64] // 16-byte Folded Reload
+; CHECK-NEXT:    ldp d11, d10, [sp, #48] // 16-byte Folded Reload
+; CHECK-NEXT:    ldp d13, d12, [sp, #32] // 16-byte Folded Reload
+; CHECK-NEXT:    ldp d15, d14, [sp, #16] // 16-byte Folded Reload
+; CHECK-NEXT:    add sp, sp, #176
+; CHECK-NEXT:    ret
+  %r = tail call preserve_nonecc i32 (i32, i32, i32, i32, i32, ...) @callee(i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10)
+  ret i32 %r
+}
+
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.noret.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.noret.ll
new file mode 100644
index 0000000000000..90dfab501d0a4
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.noret.ll
@@ -0,0 +1,479 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1030 < %s | FileCheck -check-prefixes=GFX10PLUS,GFX10PLUS-SDAG,GFX10,GFX10-SDAG %s
+; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1030 < %s | FileCheck -check-prefixes=GFX10PLUS,GFX10PLUS-GISEL,GFX10,GFX10-GISEL %s
+; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefixes=GFX10PLUS,GFX10PLUS-SDAG,GFX11,GFX11-SDAG %s
+; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefixes=GFX10PLUS,GFX10PLUS-GISEL,GFX11,GFX11-GISEL %s
+; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1200 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefixes=GFX12,GFX12-SDAG %s
+; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1200 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefixes=GFX12,GFX12-GISEL %s
+
+define amdgpu_ps void @sample_1d_nortn(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) {
+; GFX10PLUS-LABEL: sample_1d_nortn:
+; GFX10PLUS:       ; %bb.0: ; %main_body
+; GFX10PLUS-NEXT:    s_wqm_b32 exec_lo, exec_lo
+; GFX10PLUS-NEXT:    image_sample off, v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
+; GFX10PLUS-NEXT:    s_endpgm
+;
+; GFX12-LABEL: sample_1d_nortn:
+; GFX12:       ; %bb.0: ; %main_body
+; GFX12-NEXT:    s_wqm_b32 exec_lo, exec_lo
+; GFX12-NEXT:    image_sample off, v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
+; GFX12-NEXT:    s_endpgm
+main_body:
+  call void @llvm.amdgcn.image.sample.1d.nortn.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+  ret void
+}
+
+define amdgpu_ps void @sample_2d_nortn(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %t) {
+; GFX10PLUS-LABEL: sample_2d_nortn:
+; GFX10PLUS:       ; %bb.0: ; %main_body
+; GFX10PLUS-NEXT:    s_wqm_b32 exec_lo, exec_lo
+; GFX10PLUS-NEXT:    image_sample off, v[0:1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
+; GFX10PLUS-NEXT:    s_endpgm
+;
+; GFX12-LABEL: sample_2d_nortn:
+; GFX12:       ; %bb.0: ; %main_body
+; GFX12-NEXT:    s_wqm_b32 exec_lo, exec_lo
+; GFX12-NEXT:    image_sample off, [v0, v1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
+; GFX12-NEXT:    s_endpgm
+main_body:
+  call void @llvm.amdgcn.image.sample.2d.nortn.f32(i32 15, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+  ret void
+}
+
+define amdgpu_ps void @sample_3d_nortn(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %t, float %r) {
+; GFX10PLUS-LABEL: sample_3d_nortn:
+; GFX10PLUS:       ; %bb.0: ; %main_body
+; GFX10PLUS-NEXT:    s_wqm_b32 exec_lo, exec_lo
+; GFX10PLUS-NEXT:    image_sample off, v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_3D
+; GFX10PLUS-NEXT:    s_endpgm
+;
+; GFX12-LABEL: sample_3d_nortn:
+; GFX12:       ; %bb.0: ; %main_body
+; GFX12-NEXT:    s_wqm_b32 exec_lo, exec_lo
+; GFX12-NEXT:    image_sample off, [v0, v1, v2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_3D
+; GFX12-NEXT:    s_endpgm
+main_body:
+  call void @llvm.amdgcn.image.sample.3d.nortn.f32(i32 15, float %s, float %t, float %r, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+  ret void
+}
+
+define amdgpu_ps void @sample_cube_nortn(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %t, float %face) {
+; GFX10PLUS-LABEL: sample_cube_nortn:
+; GFX10PLUS:       ; %bb.0: ; %main_body
+; GFX10PLUS-NEXT:    s_wqm_b32 exec_lo, exec_lo
+; GFX10PLUS-NEXT:    image_sample off, v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_CUBE
+; GFX10PLUS-NEXT:    s_endpgm
+;
+; GFX12-LABEL: sample_cube_nortn:
+; GFX12:       ; %bb.0: ; %main_body
+; GFX12-NEXT:    s_wqm_b32 exec_lo, exec_lo
+; GFX12-NEXT:    image_sample off, [v0, v1, v2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_CUBE
+; GFX12-NEXT:    s_endpgm
+main_body:
+  call void @llvm.amdgcn.image.sample.cube.nortn.f32(i32 15, float %s, float %t, float %face, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+  ret void
+}
+
+define amdgpu_ps void @sample_1darray_nortn(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %slice) {
+; GFX10PLUS-LABEL: sample_1darray_nortn:
+; GFX10PLUS:       ; %bb.0: ; %main_body
+; GFX10PLUS-NEXT:    s_wqm_b32 exec_lo, exec_lo
+; GFX10PLUS-NEXT:    image_sample off, v[0:1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D_ARRAY
+; GFX10PLUS-NEXT:    s_endpgm
+;
+; GFX12-LABEL: sample_1darray_nortn:
+; GFX12:       ; %bb.0: ; %main_body
+; GFX12-NEXT:    s_wqm_b32 exec_lo, exec_lo
+; GFX12-NEXT:    image_sample off, [v0, v1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D_ARRAY
+; GFX12-NEXT:    s_endpgm
+main_body:
+  call void @llvm.amdgcn.image.sample.1darray.nortn.f32(i32 15, float %s, float %slice, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+  ret void
+}
+
+define amdgpu_ps void @sample_2darray_nortn(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %t, float %slice) {
+; GFX10PLUS-LABEL: sample_2darray_nortn:
+; GFX10PLUS:       ; %bb.0: ; %main_body
+; GFX10PLUS-NEXT:    s_wqm_b32 exec_lo, exec_lo
+; GFX10PLUS-NEXT:    image_sample off, v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D_ARRAY
+; GFX10PLUS-NEXT:    s_endpgm
+;
+; GFX12-LABEL: sample_2darray_nortn:
+; GFX12:       ; %bb.0: ; %main_body
+; GFX12-NEXT:    s_wqm_b32 exec_lo, exec_lo
+; GFX12-NEXT:    image_sample off, [v0, v1, v2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D_ARRAY
+; GFX12-NEXT:    s_endpgm
+main_body:
+  call void @llvm.amdgcn.image.sample.2darray.nortn.f32(i32 15, float %s, float %t, float %slice, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+  ret void
+}
+
+define amdgpu_ps void @sample_b_1d_nortn(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %s) {
+; GFX10PLUS-LABEL: sample_b_1d_nortn:
+; GFX10PLUS:       ; %bb.0: ; %main_body
+; GFX10PLUS-NEXT:    s_wqm_b32 exec_lo, exec_lo
+; GFX10PLUS-NEXT:    image_sample_b off, v[0:1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
+; GFX10PLUS-NEXT:    s_endpgm
+;
+; GFX12-LABEL: sample_b_1d_nortn:
+; GFX12:       ; %bb.0: ; %main_body
+; GFX12-NEXT:    s_wqm_b32 exec_lo, exec_lo
+; GFX12-NEXT:    image_sample_b off, [v0, v1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
+; GFX12-NEXT:    s_endpgm
+main_body:
+  call void @llvm.amdgcn.image.sample.b.1d.nortn.f32(i32 15, float %zcompare, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+  ret void
+}
+
+define amdgpu_ps void @sample_b_2d_nortn(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %s, float %t) {
+; GFX10PLUS-LABEL: sample_b_2d_nortn:
+; GFX10PLUS:       ; %bb.0: ; %main_body
+; GFX10PLUS-NEXT:    s_wqm_b32 exec_lo, exec_lo
+; GFX10PLUS-NEXT:    image_sample_b off, v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
+; GFX10PLUS-NEXT:    s_endpgm
+;
+; GFX12-LABEL: sample_b_2d_nortn:
+; GFX12:       ; %bb.0: ; %main_body
+; GFX12-NEXT:    s_wqm_b32 exec_lo, exec_lo
+; GFX12-NEXT:    image_sample_b off, [v0, v1, v2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
+; GFX12-NEXT:    s_endpgm
+main_body:
+  call void @llvm.amdgcn.image.sample.b.2d.nortn.f32(i32 15, float %zcompare, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+  ret void
+}
+
+define amdgpu_ps void @sample_c_1d_nortn(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %s) {
+; GFX10PLUS-LABEL: sample_c_1d_nortn:
+; GFX10PLUS:       ; %bb.0: ; %main_body
+; GFX10PLUS-NEXT:    s_wqm_b32 exec_lo, exec_lo
+; GFX10PLUS-NEXT:    image_sample_c off, v[0:1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
+; GFX10PLUS-NEXT:    s_endpgm
+;
+; GFX12-LABEL: sample_c_1d_nortn:
+; GFX12:       ; %bb.0: ; %main_body
+; GFX12-NEXT:    s_wqm_b32 exec_lo, exec_lo
+; GFX12-NEXT:    image_sample_c off, [v0, v1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
+; GFX12-NEXT:    s_endpgm
+main_body:
+  call void @llvm.amdgcn.image.sample.c.1d.nortn.f32(i32 15, float %zcompare, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+  ret void
+}
+
+define amdgpu_ps void @sample_c_2d_nortn(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %s, float %t) {
+; GFX10PLUS-LABEL: sample_c_2d_nortn:
+; GFX10PLUS:       ; %bb.0: ; %main_body
+; GFX10PLUS-NEXT:    s_wqm_b32 exec_lo, exec_lo
+; GFX10PLUS-NEXT:    image_sample_c off, v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
+; GFX10PLUS-NEXT:    s_endpgm
+;
+; GFX12-LABEL: sample_c_2d_nortn:
+; GFX12:       ; %bb.0: ; %main_body
+; GFX12-NEXT:    s_wqm_b32 exec_lo, exec_lo
+; GFX12-NEXT:    image_sample_c off, [v0, v1, v2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
+; GFX12-NEXT:    s_endpgm
+main_body:
+  call void @llvm.amdgcn.image.sample.c.2d.nortn.f32(i32 15, float %zcompare, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+  ret void
+}
+
+define amdgpu_ps void @sample_d_1d_nortn(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %dsdh, float %dsdv, float %s) {
+; GFX10PLUS-LABEL: sample_d_1d_nortn:
+; GFX10PLUS:       ; %bb.0: ; %main_body
+; GFX10PLUS-NEXT:    image_sample_d off, v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
+; GFX10PLUS-NEXT:    s_endpgm
+;
+; GFX12-LABEL: sample_d_1d_nortn:
+; GFX12:       ; %bb.0: ; %main_body
+; GFX12-NEXT:    image_sample_d off, [v0, v1, v2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
+; GFX12-NEXT:    s_endpgm
+main_body:
+  call void @llvm.amdgcn.image.sample.d.1d.nortn.f32.f32(i32 15, float %dsdh, float %dsdv, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+  ret void
+}
+
+define amdgpu_ps void @sample_d_2d_nortn(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t) {
+; GFX10PLUS-LABEL: sample_d_2d_nortn:
+; GFX10PLUS:       ; %bb.0: ; %main_body
+; GFX10PLUS-NEXT:    image_sample_d off, v[0:5], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
+; GFX10PLUS-NEXT:    s_endpgm
+;
+; GFX12-LABEL: sample_d_2d_nortn:
+; GFX12:       ; %bb.0: ; %main_body
+; GFX12-NEXT:    image_sample_d off, [v0, v1, v2, v[3:5]], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
+; GFX12-NEXT:    s_endpgm
+main_body:
+  call void @llvm.amdgcn.image.sample.d.2d.nortn.f32.f32(i32 15, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+  ret void
+}
+
+define amdgpu_ps void @sample_l_1d_nortn(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %lod) {
+; GFX10PLUS-LABEL: sample_l_1d_nortn:
+; GFX10PLUS:       ; %bb.0: ; %main_body
+; GFX10PLUS-NEXT:    image_sample_l off, v[0:1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
+; GFX10PLUS-NEXT:    s_endpgm
+;
+; GFX12-LABEL: sample_l_1d_nortn:
+; GFX12:       ; %bb.0: ; %main_body
+; GFX12-NEXT:    image_sample_l off, [v0, v1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
+; GFX12-NEXT:    s_endpgm
+main_body:
+  call void @llvm.amdgcn.image.sample.l.1d.nortn.f32(i32 15, float %s, float %lod, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+  ret void
+}
+
+define amdgpu_ps void @sample_l_2d_nortn(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %t, float %lod) {
+; GFX10PLUS-LABEL: sample_l_2d_nortn:
+; GFX10PLUS:       ; %bb.0: ; %main_body
+; GFX10PLUS-NEXT:    image_sample_l off, v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
+; GFX10PLUS-NEXT:    s_endpgm
+;
+; GFX12-LABEL: sample_l_2d_nortn:
+; GFX12:       ; %bb.0: ; %main_body
+; GFX12-NEXT:    image_sample_l off, [v0, v1, v2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
+; GFX12-NEXT:    s_endpgm
+main_body:
+  call void @llvm.amdgcn.image.sample.l.2d.nortn.f32(i32 15, float %s, float %t, float %lod, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+  ret void
+}
+
+define amdgpu_ps <4 x float> @sample_nortn_mix_1(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) {
+; GFX10PLUS-LABEL: sample_nortn_mix_1:
+; GFX10PLUS:       ; %bb.0: ; %main_body
+; GFX10PLUS-NEXT:    s_mov_b32 s12, exec_lo
+; GFX10PLUS-NEXT:    s_wqm_b32 exec_lo, exec_lo
+; GFX10PLUS-NEXT:    s_and_b32 exec_lo, exec_lo, s12
+; GFX10PLUS-NEXT:    image_sample off, v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
+; GFX10PLUS-NEXT:    image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
+; GFX10PLUS-NEXT:    s_waitcnt vmcnt(0)
+; GFX10PLUS-NEXT:    ; return to shader part epilog
+;
+; GFX12-LABEL: sample_nortn_mix_1:
+; GFX12:       ; %bb.0: ; %main_body
+; GFX12-NEXT:    s_mov_b32 s12, exec_lo
+; GFX12-NEXT:    s_wqm_b32 exec_lo, exec_lo
+; GFX12-NEXT:    s_and_b32 exec_lo, exec_lo, s12
+; GFX12-NEXT:    image_sample off, v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
+; GFX12-NEXT:    image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
+; GFX12-NEXT:    s_wait_samplecnt 0x0
+; GFX12-NEXT:    ; return to shader part epilog
+main_body:
+  call void @llvm.amdgcn.image.sample.1d.nortn.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+  %v = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+  ret <4 x float> %v
+}
+
+define amdgpu_ps <4 x float> @sample_nortn_mix_2(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) {
+; GFX10PLUS-LABEL: sample_nortn_mix_2:
+; GFX10PLUS:       ; %bb.0: ; %main_body
+; GFX10PLUS-NEXT:    s_mov_b32 s12, exec_lo
+; GFX10PLUS-NEXT:    s_wqm_b32 exec_lo, exec_lo
+; GFX10PLUS-NEXT:    v_mov_b32_e32 v4, v0
+; GFX10PLUS-NEXT:    s_and_b32 exec_lo, exec_lo, s12
+; GFX10PLUS-NEXT:    image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
+; GFX10PLUS-NEXT:    image_sample off, v4, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
+; GFX10PLUS-NEXT:    s_waitcnt vmcnt(0)
+; GFX10PLUS-NEXT:    ; return to shader part epilog
+;
+; GFX12-LABEL: sample_nortn_mix_2:
+; GFX12:       ; %bb.0: ; %main_body
+; GFX12-NEXT:    s_mov_b32 s12, exec_lo
+; GFX12-NEXT:    s_wqm_b32 exec_lo, exec_lo
+; GFX12-NEXT:    v_mov_b32_e32 v4, v0
+; GFX12-NEXT:    s_and_b32 exec_lo, exec_lo, s12
+; GFX12-NEXT:    image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
+; GFX12-NEXT:    image_sample off, v4, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
+; GFX12-NEXT:    s_wait_samplecnt 0x0
+; GFX12-NEXT:    ; return to shader part epilog
+main_body:
+  %v = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+  call void @llvm.amdgcn.image.sample.1d.nortn.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+  ret <4 x float> %v
+}
+
+define amdgpu_ps <4 x float> @sample_nortn_mix_3(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) {
+; GFX10PLUS-SDAG-LABEL: sample_nortn_mix_3:
+; GFX10PLUS-SDAG:       ; %bb.0: ; %main_body
+; GFX10PLUS-SDAG-NEXT:    s_mov_b32 s12, exec_lo
+; GFX10PLUS-SDAG-NEXT:    s_wqm_b32 exec_lo, exec_lo
+; GFX10PLUS-SDAG-NEXT:    image_sample v1, v0, s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_1D
+; GFX10PLUS-SDAG-NEXT:    s_and_b32 exec_lo, exec_lo, s12
+; GFX10PLUS-SDAG-NEXT:    image_sample off, v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
+; GFX10PLUS-SDAG-NEXT:    s_waitcnt vmcnt(1)
+; GFX10PLUS-SDAG-NEXT:    image_sample v[0:3], v1, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
+; GFX10PLUS-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX10PLUS-SDAG-NEXT:    ; return to shader part epilog
+;
+; GFX10PLUS-GISEL-LABEL: sample_nortn_mix_3:
+; GFX10PLUS-GISEL:       ; %bb.0: ; %main_body
+; GFX10PLUS-GISEL-NEXT:    s_mov_b32 s12, exec_lo
+; GFX10PLUS-GISEL-NEXT:    s_wqm_b32 exec_lo, exec_lo
+; GFX10PLUS-GISEL-NEXT:    image_sample v[1:4], v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
+; GFX10PLUS-GISEL-NEXT:    s_and_b32 exec_lo, exec_lo, s12
+; GFX10PLUS-GISEL-NEXT:    image_sample off, v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
+; GFX10PLUS-GISEL-NEXT:    s_waitcnt vmcnt(1)
+; GFX10PLUS-GISEL-NEXT:    image_sample v[0:3], v1, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
+; GFX10PLUS-GISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX10PLUS-GISEL-NEXT:    ; return to shader part epilog
+;
+; GFX12-SDAG-LABEL: sample_nortn_mix_3:
+; GFX12-SDAG:       ; %bb.0: ; %main_body
+; GFX12-SDAG-NEXT:    s_mov_b32 s12, exec_lo
+; GFX12-SDAG-NEXT:    s_wqm_b32 exec_lo, exec_lo
+; GFX12-SDAG-NEXT:    image_sample v1, v0, s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_1D
+; GFX12-SDAG-NEXT:    s_and_b32 exec_lo, exec_lo, s12
+; GFX12-SDAG-NEXT:    image_sample off, v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
+; GFX12-SDAG-NEXT:    s_wait_samplecnt 0x1
+; GFX12-SDAG-NEXT:    image_sample v[0:3], v1, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
+; GFX12-SDAG-NEXT:    s_wait_samplecnt 0x0
+; GFX12-SDAG-NEXT:    ; return to shader part epilog
+;
+; GFX12-GISEL-LABEL: sample_nortn_mix_3:
+; GFX12-GISEL:       ; %bb.0: ; %main_body
+; GFX12-GISEL-NEXT:    s_mov_b32 s12, exec_lo
+; GFX12-GISEL-NEXT:    s_wqm_b32 exec_lo, exec_lo
+; GFX12-GISEL-NEXT:    image_sample v[1:4], v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
+; GFX12-GISEL-NEXT:    s_and_b32 exec_lo, exec_lo, s12
+; GFX12-GISEL-NEXT:    image_sample off, v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
+; GFX12-GISEL-NEXT:    s_wait_samplecnt 0x1
+; GFX12-GISEL-NEXT:    image_sample v[0:3], v1, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
+; GFX12-GISEL-NEXT:    s_wait_samplecnt 0x0
+; GFX12-GISEL-NEXT:    ; return to shader part epilog
+main_body:
+  %v = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+  %v.0 = extractelement <4 x float> %v, i32 0
+  call void @llvm.amdgcn.image.sample.1d.nortn.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+  %u = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %v.0, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+  ret <4 x float> %u
+}
+
+define amdgpu_ps <4 x float> @sample_nortn_mix_4(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) {
+; GFX10PLUS-SDAG-LABEL: sample_nortn_mix_4:
+; GFX10PLUS-SDAG:       ; %bb.0: ; %main_body
+; GFX10PLUS-SDAG-NEXT:    s_mov_b32 s12, exec_lo
+; GFX10PLUS-SDAG-NEXT:    s_wqm_b32 exec_lo, exec_lo
+; GFX10PLUS-SDAG-NEXT:    image_sample v4, v0, s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_1D
+; GFX10PLUS-SDAG-NEXT:    image_sample off, v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
+; GFX10PLUS-SDAG-NEXT:    s_waitcnt vmcnt(1)
+; GFX10PLUS-SDAG-NEXT:    image_sample off, v4, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
+; GFX10PLUS-SDAG-NEXT:    image_sample v[0:3], v4, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
+; GFX10PLUS-SDAG-NEXT:    s_and_b32 exec_lo, exec_lo, s12
+; GFX10PLUS-SDAG-NEXT:    image_sample off, v4, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
+; GFX10PLUS-SDAG-NEXT:    image_sample off, v4, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
+; GFX10PLUS-SDAG-NEXT:    s_waitcnt vmcnt(2)
+; GFX10PLUS-SDAG-NEXT:    image_sample off, v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
+; GFX10PLUS-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX10PLUS-SDAG-NEXT:    ; return to shader part epilog
+;
+; GFX10PLUS-GISEL-LABEL: sample_nortn_mix_4:
+; GFX10PLUS-GISEL:       ; %bb.0: ; %main_body
+; GFX10PLUS-GISEL-NEXT:    s_mov_b32 s12, exec_lo
+; GFX10PLUS-GISEL-NEXT:    s_wqm_b32 exec_lo, exec_lo
+; GFX10PLUS-GISEL-NEXT:    image_sample v[4:7], v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
+; GFX10PLUS-GISEL-NEXT:    image_sample off, v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
+; GFX10PLUS-GISEL-NEXT:    s_waitcnt vmcnt(1)
+; GFX10PLUS-GISEL-NEXT:    image_sample off, v4, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
+; GFX10PLUS-GISEL-NEXT:    image_sample v[0:3], v4, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
+; GFX10PLUS-GISEL-NEXT:    s_and_b32 exec_lo, exec_lo, s12
+; GFX10PLUS-GISEL-NEXT:    image_sample off, v4, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
+; GFX10PLUS-GISEL-NEXT:    image_sample off, v4, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
+; GFX10PLUS-GISEL-NEXT:    s_waitcnt vmcnt(2)
+; GFX10PLUS-GISEL-NEXT:    image_sample off, v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
+; GFX10PLUS-GISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX10PLUS-GISEL-NEXT:    ; return to shader part epilog
+;
+; GFX12-SDAG-LABEL: sample_nortn_mix_4:
+; GFX12-SDAG:       ; %bb.0: ; %main_body
+; GFX12-SDAG-NEXT:    s_mov_b32 s12, exec_lo
+; GFX12-SDAG-NEXT:    s_wqm_b32 exec_lo, exec_lo
+; GFX12-SDAG-NEXT:    image_sample v4, v0, s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_1D
+; GFX12-SDAG-NEXT:    image_sample off, v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
+; GFX12-SDAG-NEXT:    s_wait_samplecnt 0x1
+; GFX12-SDAG-NEXT:    image_sample off, v4, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
+; GFX12-SDAG-NEXT:    image_sample v[0:3], v4, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
+; GFX12-SDAG-NEXT:    s_and_b32 exec_lo, exec_lo, s12
+; GFX12-SDAG-NEXT:    image_sample off, v4, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
+; GFX12-SDAG-NEXT:    image_sample off, v4, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
+; GFX12-SDAG-NEXT:    s_wait_samplecnt 0x2
+; GFX12-SDAG-NEXT:    image_sample off, v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
+; GFX12-SDAG-NEXT:    s_wait_samplecnt 0x0
+; GFX12-SDAG-NEXT:    ; return to shader part epilog
+;
+; GFX12-GISEL-LABEL: sample_nortn_mix_4:
+; GFX12-GISEL:       ; %bb.0: ; %main_body
+; GFX12-GISEL-NEXT:    s_mov_b32 s12, exec_lo
+; GFX12-GISEL-NEXT:    s_wqm_b32 exec_lo, exec_lo
+; GFX12-GISEL-NEXT:    image_sample v[4:7], v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
+; GFX12-GISEL-NEXT:    image_sample off, v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
+; GFX12-GISEL-NEXT:    s_wait_samplecnt 0x1
+; GFX12-GISEL-NEXT:    image_sample off, v4, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
+; GFX12-GISEL-NEXT:    image_sample v[0:3], v4, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
+; GFX12-GISEL-NEXT:    s_and_b32 exec_lo, exec_lo, s12
+; GFX12-GISEL-NEXT:    image_sample off, v4, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
+; GFX12-GISEL-NEXT:    image_sample off, v4, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
+; GFX12-GISEL-NEXT:    s_wait_samplecnt 0x2
+; GFX12-GISEL-NEXT:    image_sample off, v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
+; GFX12-GISEL-NEXT:    s_wait_samplecnt 0x0
+; GFX12-GISEL-NEXT:    ; return to shader part epilog
+main_body:
+  %v = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+  %v.0 = extractelement <4 x float> %v, i32 0
+  %v.1 = extractelement <4 x float> %v, i32 0
+  %v.2 = extractelement <4 x float> %v, i32 0
+  %v.3 = extractelement <4 x float> %v, i32 0
+  call void @llvm.amdgcn.image.sample.1d.nortn.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+  call void @llvm.amdgcn.image.sample.1d.nortn.f32(i32 15, float %v.0, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+  %u = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %v.1, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+  %u.0 = extractelement <4 x float> %u, i32 0
+  call void @llvm.amdgcn.image.sample.1d.nortn.f32(i32 15, float %v.2, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+  call void @llvm.amdgcn.image.sample.1d.nortn.f32(i32 15, float %v.3, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+  call void @llvm.amdgcn.image.sample.1d.nortn.f32(i32 15, float %u.0, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+  ret <4 x float> %u
+}
+
+define amdgpu_ps void @sample_d_1d_g16_nortn(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dsdv, float %s) {
+; GFX10PLUS-LABEL: sample_d_1d_g16_nortn:
+; GFX10PLUS:       ; %bb.0: ; %main_body
+; GFX10PLUS-NEXT:    image_sample_d_g16 off, v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
+; GFX10PLUS-NEXT:    s_endpgm
+;
+; GFX12-LABEL: sample_d_1d_g16_nortn:
+; GFX12:       ; %bb.0: ; %main_body
+; GFX12-NEXT:    image_sample_d_g16 off, [v0, v1, v2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
+; GFX12-NEXT:    s_endpgm
+main_body:
+  call void @llvm.amdgcn.image.sample.d.1d.nortn.f16.f32(i32 15, half %dsdh, half %dsdv, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+  ret void
+}
+
+declare void @llvm.amdgcn.image.sample.1d.nortn.f32(i32, float, <8 x i32>, <4 x i32>, i1, i32, i32) #0
+declare void @llvm.amdgcn.image.sample.2d.nortn.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #0
+declare void @llvm.amdgcn.image.sample.3d.nortn.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #0
+declare void @llvm.amdgcn.image.sample.cube.nortn.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #0
+declare void @llvm.amdgcn.image.sample.1darray.nortn.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #0
+declare void @llvm.amdgcn.image.sample.2darray.nortn.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #0
+
+declare void @llvm.amdgcn.image.sample.b.1d.nortn.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #0
+declare void @llvm.amdgcn.image.sample.b.2d.nortn.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #0
+
+declare void @llvm.amdgcn.image.sample.c.1d.nortn.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #0
+declare void @llvm.amdgcn.image.sample.c.2d.nortn.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #0
+
+declare void @llvm.amdgcn.image.sample.d.1d.f32.nortn.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #0
+declare void @llvm.amdgcn.image.sample.d.2d.f32.nortn.f32(i32, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #0
+
+declare void @llvm.amdgcn.image.sample.l.1d.nortn.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #0
+declare void @llvm.amdgcn.image.sample.l.2d.nortn.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #0
+
+declare <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+
+declare void @llvm.amdgcn.image.sample.d.1d.nortn.f16.f32(i32, half, half, float, <8 x i32>, <4 x i32>, i1, i32, i32) #0
+
+attributes #0 = { nounwind }
+attributes #1 = { nounwind readonly }
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; GFX10: {{.*}}
+; GFX10-GISEL: {{.*}}
+; GFX10-SDAG: {{.*}}
+; GFX11: {{.*}}
+; GFX11-GISEL: {{.*}}
+; GFX11-SDAG: {{.*}}
diff --git a/llvm/test/CodeGen/LoongArch/andn-icmp.ll b/llvm/test/CodeGen/LoongArch/andn-icmp.ll
index 6d07e7a947297..447f3ac5c34fd 100644
--- a/llvm/test/CodeGen/LoongArch/andn-icmp.ll
+++ b/llvm/test/CodeGen/LoongArch/andn-icmp.ll
@@ -6,12 +6,14 @@ define i1 @andn_icmp_eq_i8(i8 signext %a, i8 signext %b) nounwind {
 ; LA32-LABEL: andn_icmp_eq_i8:
 ; LA32:       # %bb.0:
 ; LA32-NEXT:    andn $a0, $a1, $a0
+; LA32-NEXT:    andi $a0, $a0, 255
 ; LA32-NEXT:    sltui $a0, $a0, 1
 ; LA32-NEXT:    ret
 ;
 ; LA64-LABEL: andn_icmp_eq_i8:
 ; LA64:       # %bb.0:
 ; LA64-NEXT:    andn $a0, $a1, $a0
+; LA64-NEXT:    andi $a0, $a0, 255
 ; LA64-NEXT:    sltui $a0, $a0, 1
 ; LA64-NEXT:    ret
   %and = and i8 %a, %b
@@ -23,12 +25,14 @@ define i1 @andn_icmp_eq_i16(i16 signext %a, i16 signext %b) nounwind {
 ; LA32-LABEL: andn_icmp_eq_i16:
 ; LA32:       # %bb.0:
 ; LA32-NEXT:    andn $a0, $a1, $a0
+; LA32-NEXT:    bstrpick.w $a0, $a0, 15, 0
 ; LA32-NEXT:    sltui $a0, $a0, 1
 ; LA32-NEXT:    ret
 ;
 ; LA64-LABEL: andn_icmp_eq_i16:
 ; LA64:       # %bb.0:
 ; LA64-NEXT:    andn $a0, $a1, $a0
+; LA64-NEXT:    bstrpick.d $a0, $a0, 15, 0
 ; LA64-NEXT:    sltui $a0, $a0, 1
 ; LA64-NEXT:    ret
   %and = and i16 %a, %b
@@ -76,12 +80,14 @@ define i1 @andn_icmp_ne_i8(i8 signext %a, i8 signext %b) nounwind {
 ; LA32-LABEL: andn_icmp_ne_i8:
 ; LA32:       # %bb.0:
 ; LA32-NEXT:    andn $a0, $a1, $a0
+; LA32-NEXT:    andi $a0, $a0, 255
 ; LA32-NEXT:    sltu $a0, $zero, $a0
 ; LA32-NEXT:    ret
 ;
 ; LA64-LABEL: andn_icmp_ne_i8:
 ; LA64:       # %bb.0:
 ; LA64-NEXT:    andn $a0, $a1, $a0
+; LA64-NEXT:    andi $a0, $a0, 255
 ; LA64-NEXT:    sltu $a0, $zero, $a0
 ; LA64-NEXT:    ret
   %and = and i8 %a, %b
@@ -93,12 +99,14 @@ define i1 @andn_icmp_ne_i16(i16 signext %a, i16 signext %b) nounwind {
 ; LA32-LABEL: andn_icmp_ne_i16:
 ; LA32:       # %bb.0:
 ; LA32-NEXT:    andn $a0, $a1, $a0
+; LA32-NEXT:    bstrpick.w $a0, $a0, 15, 0
 ; LA32-NEXT:    sltu $a0, $zero, $a0
 ; LA32-NEXT:    ret
 ;
 ; LA64-LABEL: andn_icmp_ne_i16:
 ; LA64:       # %bb.0:
 ; LA64-NEXT:    andn $a0, $a1, $a0
+; LA64-NEXT:    bstrpick.d $a0, $a0, 15, 0
 ; LA64-NEXT:    sltu $a0, $zero, $a0
 ; LA64-NEXT:    ret
   %and = and i16 %a, %b
@@ -145,13 +153,15 @@ define i1 @andn_icmp_ne_i64(i64 %a, i64 %b) nounwind {
 define i1 @andn_icmp_ult_i8(i8 signext %a, i8 signext %b) nounwind {
 ; LA32-LABEL: andn_icmp_ult_i8:
 ; LA32:       # %bb.0:
-; LA32-NEXT:    and $a0, $a0, $a1
+; LA32-NEXT:    andi $a1, $a1, 255
+; LA32-NEXT:    and $a0, $a1, $a0
 ; LA32-NEXT:    sltu $a0, $a0, $a1
 ; LA32-NEXT:    ret
 ;
 ; LA64-LABEL: andn_icmp_ult_i8:
 ; LA64:       # %bb.0:
-; LA64-NEXT:    and $a0, $a0, $a1
+; LA64-NEXT:    andi $a1, $a1, 255
+; LA64-NEXT:    and $a0, $a1, $a0
 ; LA64-NEXT:    sltu $a0, $a0, $a1
 ; LA64-NEXT:    ret
   %and = and i8 %a, %b
@@ -162,13 +172,15 @@ define i1 @andn_icmp_ult_i8(i8 signext %a, i8 signext %b) nounwind {
 define i1 @andn_icmp_ult_i16(i16 signext %a, i16 signext %b) nounwind {
 ; LA32-LABEL: andn_icmp_ult_i16:
 ; LA32:       # %bb.0:
-; LA32-NEXT:    and $a0, $a0, $a1
+; LA32-NEXT:    bstrpick.w $a1, $a1, 15, 0
+; LA32-NEXT:    and $a0, $a1, $a0
 ; LA32-NEXT:    sltu $a0, $a0, $a1
 ; LA32-NEXT:    ret
 ;
 ; LA64-LABEL: andn_icmp_ult_i16:
 ; LA64:       # %bb.0:
-; LA64-NEXT:    and $a0, $a0, $a1
+; LA64-NEXT:    bstrpick.d $a1, $a1, 15, 0
+; LA64-NEXT:    and $a0, $a1, $a0
 ; LA64-NEXT:    sltu $a0, $a0, $a1
 ; LA64-NEXT:    ret
   %and = and i16 %a, %b
@@ -179,14 +191,16 @@ define i1 @andn_icmp_ult_i16(i16 signext %a, i16 signext %b) nounwind {
 define i1 @andn_icmp_uge_i8(i8 signext %a, i8 signext %b) nounwind {
 ; LA32-LABEL: andn_icmp_uge_i8:
 ; LA32:       # %bb.0:
-; LA32-NEXT:    and $a0, $a0, $a1
+; LA32-NEXT:    andi $a1, $a1, 255
+; LA32-NEXT:    and $a0, $a1, $a0
 ; LA32-NEXT:    sltu $a0, $a0, $a1
 ; LA32-NEXT:    xori $a0, $a0, 1
 ; LA32-NEXT:    ret
 ;
 ; LA64-LABEL: andn_icmp_uge_i8:
 ; LA64:       # %bb.0:
-; LA64-NEXT:    and $a0, $a0, $a1
+; LA64-NEXT:    andi $a1, $a1, 255
+; LA64-NEXT:    and $a0, $a1, $a0
 ; LA64-NEXT:    sltu $a0, $a0, $a1
 ; LA64-NEXT:    xori $a0, $a0, 1
 ; LA64-NEXT:    ret
@@ -198,14 +212,16 @@ define i1 @andn_icmp_uge_i8(i8 signext %a, i8 signext %b) nounwind {
 define i1 @andn_icmp_uge_i16(i16 signext %a, i16 signext %b) nounwind {
 ; LA32-LABEL: andn_icmp_uge_i16:
 ; LA32:       # %bb.0:
-; LA32-NEXT:    and $a0, $a0, $a1
+; LA32-NEXT:    bstrpick.w $a1, $a1, 15, 0
+; LA32-NEXT:    and $a0, $a1, $a0
 ; LA32-NEXT:    sltu $a0, $a0, $a1
 ; LA32-NEXT:    xori $a0, $a0, 1
 ; LA32-NEXT:    ret
 ;
 ; LA64-LABEL: andn_icmp_uge_i16:
 ; LA64:       # %bb.0:
-; LA64-NEXT:    and $a0, $a0, $a1
+; LA64-NEXT:    bstrpick.d $a1, $a1, 15, 0
+; LA64-NEXT:    and $a0, $a1, $a0
 ; LA64-NEXT:    sltu $a0, $a0, $a1
 ; LA64-NEXT:    xori $a0, $a0, 1
 ; LA64-NEXT:    ret
@@ -217,13 +233,15 @@ define i1 @andn_icmp_uge_i16(i16 signext %a, i16 signext %b) nounwind {
 define i1 @andn_icmp_ugt_i8(i8 signext %a, i8 signext %b) nounwind {
 ; LA32-LABEL: andn_icmp_ugt_i8:
 ; LA32:       # %bb.0:
-; LA32-NEXT:    and $a0, $a0, $a1
+; LA32-NEXT:    andi $a1, $a1, 255
+; LA32-NEXT:    and $a0, $a1, $a0
 ; LA32-NEXT:    sltu $a0, $a1, $a0
 ; LA32-NEXT:    ret
 ;
 ; LA64-LABEL: andn_icmp_ugt_i8:
 ; LA64:       # %bb.0:
-; LA64-NEXT:    and $a0, $a0, $a1
+; LA64-NEXT:    andi $a1, $a1, 255
+; LA64-NEXT:    and $a0, $a1, $a0
 ; LA64-NEXT:    sltu $a0, $a1, $a0
 ; LA64-NEXT:    ret
   %and = and i8 %a, %b
@@ -234,13 +252,15 @@ define i1 @andn_icmp_ugt_i8(i8 signext %a, i8 signext %b) nounwind {
 define i1 @andn_icmp_ugt_i16(i16 signext %a, i16 signext %b) nounwind {
 ; LA32-LABEL: andn_icmp_ugt_i16:
 ; LA32:       # %bb.0:
-; LA32-NEXT:    and $a0, $a0, $a1
+; LA32-NEXT:    bstrpick.w $a1, $a1, 15, 0
+; LA32-NEXT:    and $a0, $a1, $a0
 ; LA32-NEXT:    sltu $a0, $a1, $a0
 ; LA32-NEXT:    ret
 ;
 ; LA64-LABEL: andn_icmp_ugt_i16:
 ; LA64:       # %bb.0:
-; LA64-NEXT:    and $a0, $a0, $a1
+; LA64-NEXT:    bstrpick.d $a1, $a1, 15, 0
+; LA64-NEXT:    and $a0, $a1, $a0
 ; LA64-NEXT:    sltu $a0, $a1, $a0
 ; LA64-NEXT:    ret
   %and = and i16 %a, %b
@@ -251,14 +271,16 @@ define i1 @andn_icmp_ugt_i16(i16 signext %a, i16 signext %b) nounwind {
 define i1 @andn_icmp_ule_i8(i8 signext %a, i8 signext %b) nounwind {
 ; LA32-LABEL: andn_icmp_ule_i8:
 ; LA32:       # %bb.0:
-; LA32-NEXT:    and $a0, $a0, $a1
+; LA32-NEXT:    andi $a1, $a1, 255
+; LA32-NEXT:    and $a0, $a1, $a0
 ; LA32-NEXT:    sltu $a0, $a1, $a0
 ; LA32-NEXT:    xori $a0, $a0, 1
 ; LA32-NEXT:    ret
 ;
 ; LA64-LABEL: andn_icmp_ule_i8:
 ; LA64:       # %bb.0:
-; LA64-NEXT:    and $a0, $a0, $a1
+; LA64-NEXT:    andi $a1, $a1, 255
+; LA64-NEXT:    and $a0, $a1, $a0
 ; LA64-NEXT:    sltu $a0, $a1, $a0
 ; LA64-NEXT:    xori $a0, $a0, 1
 ; LA64-NEXT:    ret
@@ -270,14 +292,16 @@ define i1 @andn_icmp_ule_i8(i8 signext %a, i8 signext %b) nounwind {
 define i1 @andn_icmp_ule_i16(i16 signext %a, i16 signext %b) nounwind {
 ; LA32-LABEL: andn_icmp_ule_i16:
 ; LA32:       # %bb.0:
-; LA32-NEXT:    and $a0, $a0, $a1
+; LA32-NEXT:    bstrpick.w $a1, $a1, 15, 0
+; LA32-NEXT:    and $a0, $a1, $a0
 ; LA32-NEXT:    sltu $a0, $a1, $a0
 ; LA32-NEXT:    xori $a0, $a0, 1
 ; LA32-NEXT:    ret
 ;
 ; LA64-LABEL: andn_icmp_ule_i16:
 ; LA64:       # %bb.0:
-; LA64-NEXT:    and $a0, $a0, $a1
+; LA64-NEXT:    bstrpick.d $a1, $a1, 15, 0
+; LA64-NEXT:    and $a0, $a1, $a0
 ; LA64-NEXT:    sltu $a0, $a1, $a0
 ; LA64-NEXT:    xori $a0, $a0, 1
 ; LA64-NEXT:    ret
@@ -577,3 +601,25 @@ define i1 @andn_icmp_ult_i8_nn(i8 %a, i8 %b) nounwind {
   %cmp = icmp ult i8 %and, %b
   ret i1 %cmp
 }
+
+define i1 @andn_icmp_eq_i8_i32(i8 signext %a, i8 signext %b) nounwind {
+; LA32-LABEL: andn_icmp_eq_i8_i32:
+; LA32:       # %bb.0:
+; LA32-NEXT:    andn $a0, $a1, $a0
+; LA32-NEXT:    andi $a0, $a0, 255
+; LA32-NEXT:    sltui $a0, $a0, 1
+; LA32-NEXT:    ret
+;
+; LA64-LABEL: andn_icmp_eq_i8_i32:
+; LA64:       # %bb.0:
+; LA64-NEXT:    andn $a0, $a1, $a0
+; LA64-NEXT:    andi $a0, $a0, 255
+; LA64-NEXT:    sltui $a0, $a0, 1
+; LA64-NEXT:    ret
+  %x = zext i8 %a to i32
+  %y = zext i8 %b to i32
+  %not = xor i32 %x, -1
+  %and = and i32 %not, %y
+  %cmp = icmp eq i32 %and, 0
+  ret i1 %cmp
+}
diff --git a/llvm/test/CodeGen/NVPTX/addr-mode.ll b/llvm/test/CodeGen/NVPTX/addr-mode.ll
new file mode 100644
index 0000000000000..a6a085c0e2e33
--- /dev/null
+++ b/llvm/test/CodeGen/NVPTX/addr-mode.ll
@@ -0,0 +1,85 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc < %s -march=nvptx64 | FileCheck %s
+
+target triple = "nvptx64-nvidia-cuda"
+
+define i32 @test_addr_mode_i64(ptr %x) {
+; CHECK-LABEL: test_addr_mode_i64(
+; CHECK:       {
+; CHECK-NEXT:    .reg .b32 %r<2>;
+; CHECK-NEXT:    .reg .b64 %rd<2>;
+; CHECK-EMPTY:
+; CHECK-NEXT:  // %bb.0:
+; CHECK-NEXT:    ld.param.u64 %rd1, [test_addr_mode_i64_param_0];
+; CHECK-NEXT:    ld.u32 %r1, [%rd1+-4];
+; CHECK-NEXT:    st.param.b32 [func_retval0+0], %r1;
+; CHECK-NEXT:    ret;
+  %addr = getelementptr i32, ptr %x, i64 -1
+  %res = load i32, ptr %addr
+  ret i32 %res
+}
+
+define i32 @test_addr_mode_i32(ptr %x) {
+; CHECK-LABEL: test_addr_mode_i32(
+; CHECK:       {
+; CHECK-NEXT:    .reg .b32 %r<2>;
+; CHECK-NEXT:    .reg .b64 %rd<2>;
+; CHECK-EMPTY:
+; CHECK-NEXT:  // %bb.0:
+; CHECK-NEXT:    ld.param.u64 %rd1, [test_addr_mode_i32_param_0];
+; CHECK-NEXT:    ld.u32 %r1, [%rd1+-4];
+; CHECK-NEXT:    st.param.b32 [func_retval0+0], %r1;
+; CHECK-NEXT:    ret;
+  %addr = getelementptr i32, ptr %x, i32 -1
+  %res = load i32, ptr %addr
+  ret i32 %res
+}
+
+define i32 @test_addr_mode_i16(ptr %x) {
+; CHECK-LABEL: test_addr_mode_i16(
+; CHECK:       {
+; CHECK-NEXT:    .reg .b32 %r<2>;
+; CHECK-NEXT:    .reg .b64 %rd<2>;
+; CHECK-EMPTY:
+; CHECK-NEXT:  // %bb.0:
+; CHECK-NEXT:    ld.param.u64 %rd1, [test_addr_mode_i16_param_0];
+; CHECK-NEXT:    ld.u32 %r1, [%rd1+-4];
+; CHECK-NEXT:    st.param.b32 [func_retval0+0], %r1;
+; CHECK-NEXT:    ret;
+  %addr = getelementptr i32, ptr %x, i16 -1
+  %res = load i32, ptr %addr
+  ret i32 %res
+}
+
+define i32 @test_addr_mode_i8(ptr %x) {
+; CHECK-LABEL: test_addr_mode_i8(
+; CHECK:       {
+; CHECK-NEXT:    .reg .b32 %r<2>;
+; CHECK-NEXT:    .reg .b64 %rd<2>;
+; CHECK-EMPTY:
+; CHECK-NEXT:  // %bb.0:
+; CHECK-NEXT:    ld.param.u64 %rd1, [test_addr_mode_i8_param_0];
+; CHECK-NEXT:    ld.u32 %r1, [%rd1+-4];
+; CHECK-NEXT:    st.param.b32 [func_retval0+0], %r1;
+; CHECK-NEXT:    ret;
+  %addr = getelementptr i32, ptr %x, i8 -1
+  %res = load i32, ptr %addr
+  ret i32 %res
+}
+
+define i32 @test_addr_mode_i64_large(ptr %x) {
+; CHECK-LABEL: test_addr_mode_i64_large(
+; CHECK:       {
+; CHECK-NEXT:    .reg .b32 %r<2>;
+; CHECK-NEXT:    .reg .b64 %rd<3>;
+; CHECK-EMPTY:
+; CHECK-NEXT:  // %bb.0:
+; CHECK-NEXT:    ld.param.u64 %rd1, [test_addr_mode_i64_large_param_0];
+; CHECK-NEXT:    add.s64 %rd2, %rd1, 17179869172;
+; CHECK-NEXT:    ld.u32 %r1, [%rd2];
+; CHECK-NEXT:    st.param.b32 [func_retval0+0], %r1;
+; CHECK-NEXT:    ret;
+  %addr = getelementptr i32, ptr %x, i64 4294967293
+  %res = load i32, ptr %addr
+  ret i32 %res
+}
diff --git a/llvm/test/Instrumentation/InstrProfiling/runtime-counter-relocation.ll b/llvm/test/Instrumentation/InstrProfiling/runtime-counter-relocation.ll
index 53b1e4918e8d1..e1da23e7be31c 100644
--- a/llvm/test/Instrumentation/InstrProfiling/runtime-counter-relocation.ll
+++ b/llvm/test/Instrumentation/InstrProfiling/runtime-counter-relocation.ll
@@ -1,9 +1,13 @@
 ; RUN: opt < %s -S -passes=instrprof | FileCheck %s
 ; RUN: opt < %s -S -passes=instrprof -runtime-counter-relocation | FileCheck -check-prefixes=RELOC %s
+; RUN: opt < %s -S -passes=instrprof,inline,gvn -runtime-counter-relocation | FileCheck -check-prefixes=RELOC,RELOCOPT %s
+; RUN: opt < %s -S -passes=instrprof            -runtime-counter-relocation -instrprof-atomic-counter-update-all | FileCheck -check-prefixes=ATOMIC %s
+; RUN: opt < %s -S -passes=instrprof,inline,gvn -runtime-counter-relocation -instrprof-atomic-counter-update-all | FileCheck -check-prefixes=ATOMIC,ATOMICOPT %s
 
 target triple = "x86_64-unknown-linux-gnu"
 
 @__profn_foo = private constant [3 x i8] c"foo"
+ at __profn_bar = private constant [3 x i8] c"bar"
 ; RELOC: $__llvm_profile_counter_bias = comdat any
 ; RELOC: @__llvm_profile_counter_bias = linkonce_odr hidden global i64 0, comdat
 
@@ -12,14 +16,34 @@ target triple = "x86_64-unknown-linux-gnu"
 ; CHECK-NEXT: %[[PGOCOUNTINC:.+]] = add i64 %[[PGOCOUNT]], 1
 ; CHECK-NEXT: store i64 %[[PGOCOUNTINC]], ptr @__profc_foo
 ; RELOC-LABEL: define void @foo
-; RELOC-NEXT: %[[BIAS:.+]] = load i64, ptr @__llvm_profile_counter_bias
+; RELOC-NEXT: %[[BIAS:.+]] = load i64, ptr @__llvm_profile_counter_bias, align {{[0-9]+}}, !invariant.load !0
 ; RELOC-NEXT: %[[PROFC_BIAS:.+]] = add i64 ptrtoint (ptr @__profc_foo to i64), %[[BIAS]]
 ; RELOC-NEXT: %[[PROFC_ADDR:.+]] = inttoptr i64 %[[PROFC_BIAS]] to ptr
 ; RELOC-NEXT: %[[PGOCOUNT:.+]] = load i64, ptr %[[PROFC_ADDR]]
 ; RELOC-NEXT: %[[PGOCOUNTINC:.+]] = add i64 %[[PGOCOUNT]], 1
 ; RELOC-NEXT: store i64 %[[PGOCOUNTINC]], ptr %[[PROFC_ADDR]]
+; RELOCOPT-NEXT: %[[PROFC_BIAS1:.+]] = add i64 ptrtoint (ptr @__profc_bar to i64), %[[BIAS]]
+; RELOCOPT-NEXT: %[[PROFC_ADDR1:.+]] = inttoptr i64 %[[PROFC_BIAS1]] to ptr
+; RELOCOPT-NEXT: %[[PGOCOUNT1:.+]] = load i64, ptr %[[PROFC_ADDR1]]
+; RELOCOPT-NEXT: %[[PGOCOUNTINC1:.+]] = add i64 %[[PGOCOUNT1]], 1
+; RELOCOPT-NEXT: store i64 %[[PGOCOUNTINC1]], ptr %[[PROFC_ADDR1]]
+; ATOMIC-LABEL: define void @foo
+; ATOMIC-NEXT: %[[BIAS:.+]] = load i64, ptr @__llvm_profile_counter_bias, align {{[0-9]+}}, !invariant.load !0
+; ATOMIC-NEXT: %[[PROFC_BIAS:.+]] = add i64 ptrtoint (ptr @__profc_foo to i64), %[[BIAS]]
+; ATOMIC-NEXT: %[[PROFC_ADDR:.+]] = inttoptr i64 %[[PROFC_BIAS]] to ptr
+; ATOMIC-NEXT: %[[PGOCOUNTINC:.+]] = atomicrmw add ptr %[[PROFC_ADDR]], i64 1 monotonic
+; ATOMICOPT-NEXT: %[[PROFC_BIAS1:.+]] = add i64 ptrtoint (ptr @__profc_bar to i64), %[[BIAS]]
+; ATOMICOPT-NEXT: %[[PROFC_ADDR1:.+]] = inttoptr i64 %[[PROFC_BIAS1]] to ptr
+; ATOMICOPT-NEXT: %[[PGOCOUNTINC1:.+]] = atomicrmw add ptr %[[PROFC_ADDR1]], i64 1 monotonic
+
+define void @bar() {
+  call void @llvm.instrprof.increment(ptr @__profn_bar, i64 0, i32 1, i32 0)
+  ret void
+}
+
 define void @foo() {
   call void @llvm.instrprof.increment(ptr @__profn_foo, i64 0, i32 1, i32 0)
+  call void @bar()
   ret void
 }
 
diff --git a/llvm/test/MC/AsmParser/directive_abort.s b/llvm/test/MC/AsmParser/directive_abort.s
index 86e6267a7a1eb..f4dda229a017b 100644
--- a/llvm/test/MC/AsmParser/directive_abort.s
+++ b/llvm/test/MC/AsmParser/directive_abort.s
@@ -1,6 +1,9 @@
-# RUN: not llvm-mc -triple i386-unknown-unknown %s 2> %t
-# RUN: FileCheck -input-file %t %s
+// RUN: not llvm-mc -filetype=obj -triple x86_64 %s 2>&1 -o /dev/null | FileCheck %s
 
-# CHECK: error: .abort 'please stop assembing'
-TEST0:
-	.abort       please stop assembing
+.abort
+// CHECK:      [[#@LINE-1]]:1: error: .abort detected. Assembly stopping
+// CHECK-NEXT: abort
+
+.abort "abort message"
+// CHECK:      [[#@LINE-1]]:1: error: .abort '"abort message"' detected. Assembly stopping
+// CHECK-NEXT: abort
diff --git a/llvm/test/MC/X86/x86-32-coverage.s b/llvm/test/MC/X86/x86-32-coverage.s
index fbe2714aed263..5475946a9d216 100644
--- a/llvm/test/MC/X86/x86-32-coverage.s
+++ b/llvm/test/MC/X86/x86-32-coverage.s
@@ -10790,7 +10790,7 @@ btcl $4, (%eax)
           movdir64b 485498096, %ecx
 
 // CHECK: movdir64b 485498096, %cx
-// CHECK: # encoding: [0x67,0x66,0x0f,0x38,0xf8,0x0d,0xf0,0x1c,0xf0,0x1c]
+// CHECK: # encoding: [0x67,0x66,0x0f,0x38,0xf8,0x0e,0xf0,0x1c]
           movdir64b 485498096, %cx
 
 // CHECK: movdir64b (%edx), %eax
@@ -10877,6 +10877,10 @@ enqcmd  (%bx,%di), %di
 // CHECK: encoding: [0x67,0xf2,0x0f,0x38,0xf8,0x81,0xc0,0x1f]
 enqcmd  8128(%bx,%di), %ax
 
+// CHECK: enqcmd 485498096, %cx
+// CHECK: encoding: [0x67,0xf2,0x0f,0x38,0xf8,0x0e,0xf0,0x1c]
+enqcmd 485498096, %cx
+
 // CHECK: enqcmds (%bx,%di), %di
 // CHECK: encoding: [0x67,0xf3,0x0f,0x38,0xf8,0x39]
 enqcmds (%bx,%di), %di
@@ -10885,6 +10889,10 @@ enqcmds (%bx,%di), %di
 // CHECK: encoding: [0x67,0xf3,0x0f,0x38,0xf8,0x81,0xc0,0x1f]
 enqcmds 8128(%bx,%di), %ax
 
+// CHECK: enqcmds 485498096, %cx
+// CHECK: encoding: [0x67,0xf3,0x0f,0x38,0xf8,0x0e,0xf0,0x1c]
+enqcmds 485498096, %cx
+
 // CHECK: serialize
 // CHECK: encoding: [0x0f,0x01,0xe8]
 serialize
diff --git a/llvm/test/Transforms/HotColdSplit/pr40056.ll b/llvm/test/Transforms/HotColdSplit/pr40056.ll
new file mode 100644
index 0000000000000..950b62c673fbf
--- /dev/null
+++ b/llvm/test/Transforms/HotColdSplit/pr40056.ll
@@ -0,0 +1,72 @@
+; RUN: opt -passes=hotcoldsplit -hotcoldsplit-threshold=-1 -S < %s | FileCheck %s
+; Hot cold splitting should not outline:
+; 1. Basic blocks with token type instructions
+; 2. Functions with scoped EH personality
+
+target datalayout = "e-m:w-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-pc-windows-msvc19.0.0"
+
+; CHECK-LABEL: define {{.*}}@with_funclet
+; CHECK-NOT: with_funclet.cold
+define void @with_funclet() personality ptr @__CxxFrameHandler3 {
+entry:
+  invoke void @fYAXXZ()
+          to label %normal unwind label %exception
+
+normal:                                           ; preds = %entry
+  ret void
+
+exception:                                        ; preds = %entry
+  %0 = cleanuppad within none []
+  call void @terminateYAXXZ() [ "funclet"(token %0) ]
+  br label %continueexception
+
+continueexception:                                ; preds = %exception
+  ret void
+}
+
+; CHECK-LABEL: define {{.*}}@with_personality
+; CHECK-NOT: with_personality.cold
+define void @with_personality(i32 %cond) personality ptr @__CxxFrameHandler3 {
+entry:
+  %cond.addr = alloca i32
+  store i32 %cond, ptr %cond.addr
+  %0 = load i32, ptr %cond.addr
+  %tobool = icmp ne i32 %0, 0
+  br i1 %tobool, label %if.then, label %if.end2
+
+if.then:                                          ; preds = %entry
+  %1 = load i32, ptr %cond.addr
+  %cmp = icmp sgt i32 %1, 10
+  br i1 %cmp, label %if.then1, label %if.else
+
+if.then1:                                         ; preds = %if.then
+  call void @sideeffect(i32 0)
+  br label %if.end
+
+if.else:                                          ; preds = %if.then
+  call void @sideeffect(i32 1)
+  br label %if.end
+
+if.end:                                           ; preds = %if.else, %if.then1
+  call void (...) @sink()
+  ret void
+
+if.end2:                                          ; preds = %entry
+  call void @sideeffect(i32 2)
+  ret void
+}
+
+declare i32 @__CxxFrameHandler3(...)
+
+declare void @fYAXXZ()
+
+declare void @bar() #0
+
+declare void @terminateYAXXZ()
+
+declare void @sideeffect(i32)
+
+declare void @sink(...) #0
+
+attributes #0 = { cold }
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/pr73894.ll b/llvm/test/Transforms/LoopVectorize/AArch64/pr73894.ll
index 5116a85e4f2a1..a70eafb6078a0 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/pr73894.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/pr73894.ll
@@ -32,7 +32,6 @@ define i32 @pr70988(ptr %src, i32 %n) {
 ; CHECK-NEXT:    [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4
 ; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE]]
 ; CHECK:       pred.load.continue:
-; CHECK-NEXT:    [[TMP7:%.*]] = phi ptr [ poison, [[VECTOR_BODY]] ], [ [[TMP5]], [[PRED_LOAD_IF]] ]
 ; CHECK-NEXT:    [[TMP8:%.*]] = phi i32 [ poison, [[VECTOR_BODY]] ], [ [[TMP6]], [[PRED_LOAD_IF]] ]
 ; CHECK-NEXT:    br i1 [[ACTIVE_LANE_MASK2]], label [[PRED_LOAD_IF4:%.*]], label [[PRED_LOAD_CONTINUE5]]
 ; CHECK:       pred.load.if4:
@@ -42,7 +41,6 @@ define i32 @pr70988(ptr %src, i32 %n) {
 ; CHECK-NEXT:    [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4
 ; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE5]]
 ; CHECK:       pred.load.continue5:
-; CHECK-NEXT:    [[TMP13:%.*]] = phi ptr [ poison, [[PRED_LOAD_CONTINUE]] ], [ [[TMP11]], [[PRED_LOAD_IF4]] ]
 ; CHECK-NEXT:    [[TMP14:%.*]] = phi i32 [ poison, [[PRED_LOAD_CONTINUE]] ], [ [[TMP12]], [[PRED_LOAD_IF4]] ]
 ; CHECK-NEXT:    [[TMP15:%.*]] = tail call i32 @llvm.smax.i32(i32 [[TMP8]], i32 [[VEC_PHI]])
 ; CHECK-NEXT:    [[TMP16:%.*]] = tail call i32 @llvm.smax.i32(i32 [[TMP14]], i32 [[VEC_PHI3]])
diff --git a/llvm/test/Transforms/LoopVectorize/PowerPC/vplan-force-tail-with-evl.ll b/llvm/test/Transforms/LoopVectorize/PowerPC/vplan-force-tail-with-evl.ll
index 7f258d57e7018..0b8a2d2f02057 100644
--- a/llvm/test/Transforms/LoopVectorize/PowerPC/vplan-force-tail-with-evl.ll
+++ b/llvm/test/Transforms/LoopVectorize/PowerPC/vplan-force-tail-with-evl.ll
@@ -40,8 +40,6 @@ define void @foo(ptr noalias %a, ptr noalias %b, ptr noalias %c, i64 %N) {
 ; CHECK-NEXT:    Successor(s): pred.store.continue
 ; CHECK-EMPTY:
 ; CHECK-NEXT:    pred.store.continue:
-; CHECK-NEXT:      PHI-PREDICATED-INSTRUCTION vp<[[P1:%.+]]> = ir<%0>
-; CHECK-NEXT:      PHI-PREDICATED-INSTRUCTION vp<[[P2:%.+]]> = ir<%1>
 ; CHECK-NEXT:    No successors
 ; CHECK-NEXT:  }
 ; CHECK-NEXT:  Successor(s): for.body.2
diff --git a/llvm/test/Transforms/LoopVectorize/X86/divs-with-tail-folding.ll b/llvm/test/Transforms/LoopVectorize/X86/divs-with-tail-folding.ll
index 42a9ab0ca270f..133510fbb2db8 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/divs-with-tail-folding.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/divs-with-tail-folding.ll
@@ -288,9 +288,7 @@ define void @udiv_urem_feeding_gep(i64 %x, ptr %dst, i64 %N) {
 ; CHECK-NEXT:    br label %[[PRED_UREM_CONTINUE]]
 ; CHECK:       [[PRED_UREM_CONTINUE]]:
 ; CHECK-NEXT:    [[TMP15:%.*]] = phi <4 x i64> [ poison, %[[VECTOR_BODY]] ], [ [[TMP9]], %[[PRED_UREM_IF]] ]
-; CHECK-NEXT:    [[TMP16:%.*]] = phi i64 [ poison, %[[VECTOR_BODY]] ], [ [[TMP10]], %[[PRED_UREM_IF]] ]
 ; CHECK-NEXT:    [[TMP17:%.*]] = phi i64 [ poison, %[[VECTOR_BODY]] ], [ [[TMP11]], %[[PRED_UREM_IF]] ]
-; CHECK-NEXT:    [[TMP18:%.*]] = phi i64 [ poison, %[[VECTOR_BODY]] ], [ [[TMP12]], %[[PRED_UREM_IF]] ]
 ; CHECK-NEXT:    [[TMP19:%.*]] = phi i64 [ poison, %[[VECTOR_BODY]] ], [ [[TMP13]], %[[PRED_UREM_IF]] ]
 ; CHECK-NEXT:    [[TMP20:%.*]] = phi i64 [ poison, %[[VECTOR_BODY]] ], [ [[TMP14]], %[[PRED_UREM_IF]] ]
 ; CHECK-NEXT:    [[TMP21:%.*]] = extractelement <4 x i1> [[TMP5]], i32 1
@@ -307,9 +305,7 @@ define void @udiv_urem_feeding_gep(i64 %x, ptr %dst, i64 %N) {
 ; CHECK-NEXT:    br label %[[PRED_UREM_CONTINUE2]]
 ; CHECK:       [[PRED_UREM_CONTINUE2]]:
 ; CHECK-NEXT:    [[TMP30:%.*]] = phi <4 x i64> [ [[TMP15]], %[[PRED_UREM_CONTINUE]] ], [ [[TMP24]], %[[PRED_UREM_IF1]] ]
-; CHECK-NEXT:    [[TMP31:%.*]] = phi i64 [ poison, %[[PRED_UREM_CONTINUE]] ], [ [[TMP25]], %[[PRED_UREM_IF1]] ]
 ; CHECK-NEXT:    [[TMP32:%.*]] = phi i64 [ poison, %[[PRED_UREM_CONTINUE]] ], [ [[TMP26]], %[[PRED_UREM_IF1]] ]
-; CHECK-NEXT:    [[TMP33:%.*]] = phi i64 [ poison, %[[PRED_UREM_CONTINUE]] ], [ [[TMP27]], %[[PRED_UREM_IF1]] ]
 ; CHECK-NEXT:    [[TMP34:%.*]] = phi i64 [ poison, %[[PRED_UREM_CONTINUE]] ], [ [[TMP28]], %[[PRED_UREM_IF1]] ]
 ; CHECK-NEXT:    [[TMP35:%.*]] = phi i64 [ poison, %[[PRED_UREM_CONTINUE]] ], [ [[TMP29]], %[[PRED_UREM_IF1]] ]
 ; CHECK-NEXT:    [[TMP36:%.*]] = extractelement <4 x i1> [[TMP5]], i32 2
@@ -326,9 +322,7 @@ define void @udiv_urem_feeding_gep(i64 %x, ptr %dst, i64 %N) {
 ; CHECK-NEXT:    br label %[[PRED_UREM_CONTINUE4]]
 ; CHECK:       [[PRED_UREM_CONTINUE4]]:
 ; CHECK-NEXT:    [[TMP45:%.*]] = phi <4 x i64> [ [[TMP30]], %[[PRED_UREM_CONTINUE2]] ], [ [[TMP39]], %[[PRED_UREM_IF3]] ]
-; CHECK-NEXT:    [[TMP46:%.*]] = phi i64 [ poison, %[[PRED_UREM_CONTINUE2]] ], [ [[TMP40]], %[[PRED_UREM_IF3]] ]
 ; CHECK-NEXT:    [[TMP47:%.*]] = phi i64 [ poison, %[[PRED_UREM_CONTINUE2]] ], [ [[TMP41]], %[[PRED_UREM_IF3]] ]
-; CHECK-NEXT:    [[TMP48:%.*]] = phi i64 [ poison, %[[PRED_UREM_CONTINUE2]] ], [ [[TMP42]], %[[PRED_UREM_IF3]] ]
 ; CHECK-NEXT:    [[TMP49:%.*]] = phi i64 [ poison, %[[PRED_UREM_CONTINUE2]] ], [ [[TMP43]], %[[PRED_UREM_IF3]] ]
 ; CHECK-NEXT:    [[TMP50:%.*]] = phi i64 [ poison, %[[PRED_UREM_CONTINUE2]] ], [ [[TMP44]], %[[PRED_UREM_IF3]] ]
 ; CHECK-NEXT:    [[TMP51:%.*]] = extractelement <4 x i1> [[TMP5]], i32 3
@@ -345,9 +339,7 @@ define void @udiv_urem_feeding_gep(i64 %x, ptr %dst, i64 %N) {
 ; CHECK-NEXT:    br label %[[PRED_UREM_CONTINUE6]]
 ; CHECK:       [[PRED_UREM_CONTINUE6]]:
 ; CHECK-NEXT:    [[TMP60:%.*]] = phi <4 x i64> [ [[TMP45]], %[[PRED_UREM_CONTINUE4]] ], [ [[TMP54]], %[[PRED_UREM_IF5]] ]
-; CHECK-NEXT:    [[TMP61:%.*]] = phi i64 [ poison, %[[PRED_UREM_CONTINUE4]] ], [ [[TMP55]], %[[PRED_UREM_IF5]] ]
 ; CHECK-NEXT:    [[TMP62:%.*]] = phi i64 [ poison, %[[PRED_UREM_CONTINUE4]] ], [ [[TMP56]], %[[PRED_UREM_IF5]] ]
-; CHECK-NEXT:    [[TMP63:%.*]] = phi i64 [ poison, %[[PRED_UREM_CONTINUE4]] ], [ [[TMP57]], %[[PRED_UREM_IF5]] ]
 ; CHECK-NEXT:    [[TMP64:%.*]] = phi i64 [ poison, %[[PRED_UREM_CONTINUE4]] ], [ [[TMP58]], %[[PRED_UREM_IF5]] ]
 ; CHECK-NEXT:    [[TMP65:%.*]] = phi i64 [ poison, %[[PRED_UREM_CONTINUE4]] ], [ [[TMP59]], %[[PRED_UREM_IF5]] ]
 ; CHECK-NEXT:    [[TMP66:%.*]] = extractelement <4 x i64> [[TMP60]], i32 0
diff --git a/llvm/test/Transforms/LoopVectorize/X86/x86-predication.ll b/llvm/test/Transforms/LoopVectorize/X86/x86-predication.ll
index feaa5fa2fc4d3..eee1b6f35d1b7 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/x86-predication.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/x86-predication.ll
@@ -238,7 +238,6 @@ define i32 @scalarize_and_sink_gather(ptr %a, i1 %c, i32 %x, i64 %n) {
 ; CHECK-NEXT:    [[TMP6:%.*]] = insertelement <2 x i32> poison, i32 [[TMP5]], i32 0
 ; CHECK-NEXT:    br label [[PRED_UDIV_CONTINUE]]
 ; CHECK:       pred.udiv.continue:
-; CHECK-NEXT:    [[TMP7:%.*]] = phi i32 [ poison, [[VECTOR_BODY]] ], [ [[TMP4]], [[PRED_UDIV_IF]] ]
 ; CHECK-NEXT:    [[TMP8:%.*]] = phi <2 x i32> [ poison, [[VECTOR_BODY]] ], [ [[TMP6]], [[PRED_UDIV_IF]] ]
 ; CHECK-NEXT:    [[TMP9:%.*]] = extractelement <2 x i1> [[BROADCAST_SPLAT]], i32 1
 ; CHECK-NEXT:    br i1 [[TMP9]], label [[PRED_UDIV_IF1:%.*]], label [[PRED_UDIV_CONTINUE2]]
@@ -250,7 +249,6 @@ define i32 @scalarize_and_sink_gather(ptr %a, i1 %c, i32 %x, i64 %n) {
 ; CHECK-NEXT:    [[TMP14:%.*]] = insertelement <2 x i32> [[TMP8]], i32 [[TMP13]], i32 1
 ; CHECK-NEXT:    br label [[PRED_UDIV_CONTINUE2]]
 ; CHECK:       pred.udiv.continue2:
-; CHECK-NEXT:    [[TMP15:%.*]] = phi i32 [ poison, [[PRED_UDIV_CONTINUE]] ], [ [[TMP12]], [[PRED_UDIV_IF1]] ]
 ; CHECK-NEXT:    [[TMP16:%.*]] = phi <2 x i32> [ [[TMP8]], [[PRED_UDIV_CONTINUE]] ], [ [[TMP14]], [[PRED_UDIV_IF1]] ]
 ; CHECK-NEXT:    [[PREDPHI:%.*]] = select <2 x i1> [[BROADCAST_SPLAT]], <2 x i32> [[TMP16]], <2 x i32> [[BROADCAST_SPLAT4]]
 ; CHECK-NEXT:    [[TMP18]] = add <2 x i32> [[VEC_PHI]], [[PREDPHI]]
@@ -314,7 +312,6 @@ define i32 @scalarize_and_sink_gather(ptr %a, i1 %c, i32 %x, i64 %n) {
 ; SINK-GATHER-NEXT:    [[TMP6:%.*]] = insertelement <8 x i32> poison, i32 [[TMP5]], i32 0
 ; SINK-GATHER-NEXT:    br label [[PRED_UDIV_CONTINUE]]
 ; SINK-GATHER:       pred.udiv.continue:
-; SINK-GATHER-NEXT:    [[TMP7:%.*]] = phi i32 [ poison, [[VECTOR_BODY]] ], [ [[TMP4]], [[PRED_UDIV_IF]] ]
 ; SINK-GATHER-NEXT:    [[TMP8:%.*]] = phi <8 x i32> [ poison, [[VECTOR_BODY]] ], [ [[TMP6]], [[PRED_UDIV_IF]] ]
 ; SINK-GATHER-NEXT:    [[TMP9:%.*]] = extractelement <8 x i1> [[BROADCAST_SPLAT]], i32 1
 ; SINK-GATHER-NEXT:    br i1 [[TMP9]], label [[PRED_UDIV_IF1:%.*]], label [[PRED_UDIV_CONTINUE2:%.*]]
@@ -326,7 +323,6 @@ define i32 @scalarize_and_sink_gather(ptr %a, i1 %c, i32 %x, i64 %n) {
 ; SINK-GATHER-NEXT:    [[TMP14:%.*]] = insertelement <8 x i32> [[TMP8]], i32 [[TMP13]], i32 1
 ; SINK-GATHER-NEXT:    br label [[PRED_UDIV_CONTINUE2]]
 ; SINK-GATHER:       pred.udiv.continue2:
-; SINK-GATHER-NEXT:    [[TMP15:%.*]] = phi i32 [ poison, [[PRED_UDIV_CONTINUE]] ], [ [[TMP12]], [[PRED_UDIV_IF1]] ]
 ; SINK-GATHER-NEXT:    [[TMP16:%.*]] = phi <8 x i32> [ [[TMP8]], [[PRED_UDIV_CONTINUE]] ], [ [[TMP14]], [[PRED_UDIV_IF1]] ]
 ; SINK-GATHER-NEXT:    [[TMP17:%.*]] = extractelement <8 x i1> [[BROADCAST_SPLAT]], i32 2
 ; SINK-GATHER-NEXT:    br i1 [[TMP17]], label [[PRED_UDIV_IF3:%.*]], label [[PRED_UDIV_CONTINUE4:%.*]]
@@ -338,7 +334,6 @@ define i32 @scalarize_and_sink_gather(ptr %a, i1 %c, i32 %x, i64 %n) {
 ; SINK-GATHER-NEXT:    [[TMP22:%.*]] = insertelement <8 x i32> [[TMP16]], i32 [[TMP21]], i32 2
 ; SINK-GATHER-NEXT:    br label [[PRED_UDIV_CONTINUE4]]
 ; SINK-GATHER:       pred.udiv.continue4:
-; SINK-GATHER-NEXT:    [[TMP23:%.*]] = phi i32 [ poison, [[PRED_UDIV_CONTINUE2]] ], [ [[TMP20]], [[PRED_UDIV_IF3]] ]
 ; SINK-GATHER-NEXT:    [[TMP24:%.*]] = phi <8 x i32> [ [[TMP16]], [[PRED_UDIV_CONTINUE2]] ], [ [[TMP22]], [[PRED_UDIV_IF3]] ]
 ; SINK-GATHER-NEXT:    [[TMP25:%.*]] = extractelement <8 x i1> [[BROADCAST_SPLAT]], i32 3
 ; SINK-GATHER-NEXT:    br i1 [[TMP25]], label [[PRED_UDIV_IF5:%.*]], label [[PRED_UDIV_CONTINUE6:%.*]]
@@ -350,7 +345,6 @@ define i32 @scalarize_and_sink_gather(ptr %a, i1 %c, i32 %x, i64 %n) {
 ; SINK-GATHER-NEXT:    [[TMP30:%.*]] = insertelement <8 x i32> [[TMP24]], i32 [[TMP29]], i32 3
 ; SINK-GATHER-NEXT:    br label [[PRED_UDIV_CONTINUE6]]
 ; SINK-GATHER:       pred.udiv.continue6:
-; SINK-GATHER-NEXT:    [[TMP31:%.*]] = phi i32 [ poison, [[PRED_UDIV_CONTINUE4]] ], [ [[TMP28]], [[PRED_UDIV_IF5]] ]
 ; SINK-GATHER-NEXT:    [[TMP32:%.*]] = phi <8 x i32> [ [[TMP24]], [[PRED_UDIV_CONTINUE4]] ], [ [[TMP30]], [[PRED_UDIV_IF5]] ]
 ; SINK-GATHER-NEXT:    [[TMP33:%.*]] = extractelement <8 x i1> [[BROADCAST_SPLAT]], i32 4
 ; SINK-GATHER-NEXT:    br i1 [[TMP33]], label [[PRED_UDIV_IF7:%.*]], label [[PRED_UDIV_CONTINUE8:%.*]]
@@ -362,7 +356,6 @@ define i32 @scalarize_and_sink_gather(ptr %a, i1 %c, i32 %x, i64 %n) {
 ; SINK-GATHER-NEXT:    [[TMP38:%.*]] = insertelement <8 x i32> [[TMP32]], i32 [[TMP37]], i32 4
 ; SINK-GATHER-NEXT:    br label [[PRED_UDIV_CONTINUE8]]
 ; SINK-GATHER:       pred.udiv.continue8:
-; SINK-GATHER-NEXT:    [[TMP39:%.*]] = phi i32 [ poison, [[PRED_UDIV_CONTINUE6]] ], [ [[TMP36]], [[PRED_UDIV_IF7]] ]
 ; SINK-GATHER-NEXT:    [[TMP40:%.*]] = phi <8 x i32> [ [[TMP32]], [[PRED_UDIV_CONTINUE6]] ], [ [[TMP38]], [[PRED_UDIV_IF7]] ]
 ; SINK-GATHER-NEXT:    [[TMP41:%.*]] = extractelement <8 x i1> [[BROADCAST_SPLAT]], i32 5
 ; SINK-GATHER-NEXT:    br i1 [[TMP41]], label [[PRED_UDIV_IF9:%.*]], label [[PRED_UDIV_CONTINUE10:%.*]]
@@ -374,7 +367,6 @@ define i32 @scalarize_and_sink_gather(ptr %a, i1 %c, i32 %x, i64 %n) {
 ; SINK-GATHER-NEXT:    [[TMP46:%.*]] = insertelement <8 x i32> [[TMP40]], i32 [[TMP45]], i32 5
 ; SINK-GATHER-NEXT:    br label [[PRED_UDIV_CONTINUE10]]
 ; SINK-GATHER:       pred.udiv.continue10:
-; SINK-GATHER-NEXT:    [[TMP47:%.*]] = phi i32 [ poison, [[PRED_UDIV_CONTINUE8]] ], [ [[TMP44]], [[PRED_UDIV_IF9]] ]
 ; SINK-GATHER-NEXT:    [[TMP48:%.*]] = phi <8 x i32> [ [[TMP40]], [[PRED_UDIV_CONTINUE8]] ], [ [[TMP46]], [[PRED_UDIV_IF9]] ]
 ; SINK-GATHER-NEXT:    [[TMP49:%.*]] = extractelement <8 x i1> [[BROADCAST_SPLAT]], i32 6
 ; SINK-GATHER-NEXT:    br i1 [[TMP49]], label [[PRED_UDIV_IF11:%.*]], label [[PRED_UDIV_CONTINUE12:%.*]]
@@ -386,7 +378,6 @@ define i32 @scalarize_and_sink_gather(ptr %a, i1 %c, i32 %x, i64 %n) {
 ; SINK-GATHER-NEXT:    [[TMP54:%.*]] = insertelement <8 x i32> [[TMP48]], i32 [[TMP53]], i32 6
 ; SINK-GATHER-NEXT:    br label [[PRED_UDIV_CONTINUE12]]
 ; SINK-GATHER:       pred.udiv.continue12:
-; SINK-GATHER-NEXT:    [[TMP55:%.*]] = phi i32 [ poison, [[PRED_UDIV_CONTINUE10]] ], [ [[TMP52]], [[PRED_UDIV_IF11]] ]
 ; SINK-GATHER-NEXT:    [[TMP56:%.*]] = phi <8 x i32> [ [[TMP48]], [[PRED_UDIV_CONTINUE10]] ], [ [[TMP54]], [[PRED_UDIV_IF11]] ]
 ; SINK-GATHER-NEXT:    [[TMP57:%.*]] = extractelement <8 x i1> [[BROADCAST_SPLAT]], i32 7
 ; SINK-GATHER-NEXT:    br i1 [[TMP57]], label [[PRED_UDIV_IF13:%.*]], label [[PRED_UDIV_CONTINUE14]]
@@ -398,7 +389,6 @@ define i32 @scalarize_and_sink_gather(ptr %a, i1 %c, i32 %x, i64 %n) {
 ; SINK-GATHER-NEXT:    [[TMP62:%.*]] = insertelement <8 x i32> [[TMP56]], i32 [[TMP61]], i32 7
 ; SINK-GATHER-NEXT:    br label [[PRED_UDIV_CONTINUE14]]
 ; SINK-GATHER:       pred.udiv.continue14:
-; SINK-GATHER-NEXT:    [[TMP63:%.*]] = phi i32 [ poison, [[PRED_UDIV_CONTINUE12]] ], [ [[TMP60]], [[PRED_UDIV_IF13]] ]
 ; SINK-GATHER-NEXT:    [[TMP64:%.*]] = phi <8 x i32> [ [[TMP56]], [[PRED_UDIV_CONTINUE12]] ], [ [[TMP62]], [[PRED_UDIV_IF13]] ]
 ; SINK-GATHER-NEXT:    [[PREDPHI:%.*]] = select <8 x i1> [[BROADCAST_SPLAT]], <8 x i32> [[TMP64]], <8 x i32> [[BROADCAST_SPLAT16]]
 ; SINK-GATHER-NEXT:    [[TMP66]] = add <8 x i32> [[VEC_PHI]], [[PREDPHI]]
diff --git a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-sink-replicate-region.ll b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-sink-replicate-region.ll
index bff730f263ab2..048b670f0fec8 100644
--- a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-sink-replicate-region.ll
+++ b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-sink-replicate-region.ll
@@ -60,7 +60,6 @@ define void @sink_replicate_region_1(i32 %x, ptr %ptr, ptr noalias %dst) optsize
 ; CHECK-NEXT:   Successor(s): pred.store.continue
 ; CHECK-EMPTY:
 ; CHECK-NEXT:   pred.store.continue:
-; CHECK-NEXT:     PHI-PREDICATED-INSTRUCTION vp<[[PRED2:%.+]]> = ir<%rem>
 ; CHECK-NEXT:   No successors
 ; CHECK-NEXT: }
 ; CHECK-NEXT: Successor(s): loop.2
@@ -143,7 +142,6 @@ define void @sink_replicate_region_2(i32 %x, i8 %y, ptr %ptr) optsize {
 ; CHECK-NEXT:   Successor(s): pred.store.continue
 ; CHECK-EMPTY:
 ; CHECK-NEXT:   pred.store.continue:
-; CHECK-NEXT:     PHI-PREDICATED-INSTRUCTION vp<[[PRED:%.+]]> = ir<%rem>
 ; CHECK-NEXT:   No successors
 ; CHECK-NEXT: }
 ; CHECK-NEXT: Successor(s): loop.1
@@ -332,8 +330,6 @@ define void @sink_replicate_region_4_requires_split_at_end_of_block(i32 %x, ptr
 ; CHECK-NEXT:   Successor(s): pred.store.continue
 ; CHECK-EMPTY:
 ; CHECK:        pred.store.continue:
-; CHECK-NEXT:     PHI-PREDICATED-INSTRUCTION vp<[[PRED1:%.+]]> = ir<%rem>
-; CHECK-NEXT:     PHI-PREDICATED-INSTRUCTION vp<[[PRED2:%.+]]> = ir<%lv.2>
 ; CHECK-NEXT:   No successors
 ; CHECK-NEXT: }
 ; CHECK-NEXT:   Successor(s): loop.3
@@ -426,8 +422,6 @@ define void @sink_replicate_region_after_replicate_region(ptr %ptr, ptr noalias
 ; CHECK-NEXT:   Successor(s): pred.store.continue
 ; CHECK-EMPTY:
 ; CHECK-NEXT:   pred.store.continue:
-; CHECK-NEXT:     PHI-PREDICATED-INSTRUCTION vp<[[PRED:%.+]]> = ir<%rem>
-; CHECK-NEXT:     PHI-PREDICATED-INSTRUCTION vp<[[PRED2:%.+]]> = ir<%rem.div>
 ; CHECK-NEXT:   No successors
 ; CHECK-NEXT: }
 ; CHECK-NEXT: Successor(s): loop.3
@@ -510,7 +504,6 @@ define void @need_new_block_after_sinking_pr56146(i32 %x, ptr %src, ptr noalias
 ; CHECK-NEXT:     Successor(s): pred.store.continue
 ; CHECK-EMPTY:
 ; CHECK-NEXT:     pred.store.continue:
-; CHECK-NEXT:       PHI-PREDICATED-INSTRUCTION vp<[[P_VAL:%.+]]> = ir<%val>
 ; CHECK-NEXT:     No successors
 ; CHECK-NEXT:   }
 ; CHECK-NEXT:   Successor(s): loop.1
diff --git a/llvm/test/Transforms/LoopVectorize/if-pred-non-void.ll b/llvm/test/Transforms/LoopVectorize/if-pred-non-void.ll
index eade22f3fe11f..ecb57c539a40e 100644
--- a/llvm/test/Transforms/LoopVectorize/if-pred-non-void.ll
+++ b/llvm/test/Transforms/LoopVectorize/if-pred-non-void.ll
@@ -388,7 +388,6 @@ define void @test_scalar2scalar(ptr nocapture %asd, ptr nocapture %bsd) {
 ; CHECK-NEXT:    [[TMP13:%.*]] = insertelement <2 x i32> poison, i32 [[TMP12]], i32 0
 ; CHECK-NEXT:    br label [[PRED_SDIV_CONTINUE]]
 ; CHECK:       pred.sdiv.continue:
-; CHECK-NEXT:    [[TMP14:%.*]] = phi i32 [ poison, [[VECTOR_BODY]] ], [ [[TMP10]], [[PRED_SDIV_IF]] ]
 ; CHECK-NEXT:    [[TMP15:%.*]] = phi <2 x i32> [ poison, [[VECTOR_BODY]] ], [ [[TMP13]], [[PRED_SDIV_IF]] ]
 ; CHECK-NEXT:    [[TMP16:%.*]] = extractelement <2 x i1> [[TMP6]], i32 1
 ; CHECK-NEXT:    br i1 [[TMP16]], label [[PRED_SDIV_IF3:%.*]], label [[PRED_SDIV_CONTINUE4]]
@@ -401,7 +400,6 @@ define void @test_scalar2scalar(ptr nocapture %asd, ptr nocapture %bsd) {
 ; CHECK-NEXT:    [[TMP22:%.*]] = insertelement <2 x i32> [[TMP15]], i32 [[TMP21]], i32 1
 ; CHECK-NEXT:    br label [[PRED_SDIV_CONTINUE4]]
 ; CHECK:       pred.sdiv.continue4:
-; CHECK-NEXT:    [[TMP23:%.*]] = phi i32 [ poison, [[PRED_SDIV_CONTINUE]] ], [ [[TMP19]], [[PRED_SDIV_IF3]] ]
 ; CHECK-NEXT:    [[TMP24:%.*]] = phi <2 x i32> [ [[TMP15]], [[PRED_SDIV_CONTINUE]] ], [ [[TMP22]], [[PRED_SDIV_IF3]] ]
 ; CHECK-NEXT:    [[TMP25:%.*]] = xor <2 x i1> [[TMP6]], <i1 true, i1 true>
 ; CHECK-NEXT:    [[PREDPHI:%.*]] = select <2 x i1> [[TMP25]], <2 x i32> [[TMP5]], <2 x i32> [[TMP24]]
@@ -466,7 +464,6 @@ define void @test_scalar2scalar(ptr nocapture %asd, ptr nocapture %bsd) {
 ; UNROLL-NO-VF-NEXT:    [[TMP15:%.*]] = sdiv i32 [[TMP8]], [[TMP14]]
 ; UNROLL-NO-VF-NEXT:    br label [[PRED_SDIV_CONTINUE]]
 ; UNROLL-NO-VF:       pred.sdiv.continue:
-; UNROLL-NO-VF-NEXT:    [[TMP16:%.*]] = phi i32 [ poison, [[VECTOR_BODY]] ], [ [[TMP14]], [[PRED_SDIV_IF]] ]
 ; UNROLL-NO-VF-NEXT:    [[TMP17:%.*]] = phi i32 [ poison, [[VECTOR_BODY]] ], [ [[TMP15]], [[PRED_SDIV_IF]] ]
 ; UNROLL-NO-VF-NEXT:    br i1 [[TMP13]], label [[PRED_SDIV_IF2:%.*]], label [[PRED_SDIV_CONTINUE3]]
 ; UNROLL-NO-VF:       pred.sdiv.if2:
@@ -474,7 +471,6 @@ define void @test_scalar2scalar(ptr nocapture %asd, ptr nocapture %bsd) {
 ; UNROLL-NO-VF-NEXT:    [[TMP19:%.*]] = sdiv i32 [[TMP9]], [[TMP18]]
 ; UNROLL-NO-VF-NEXT:    br label [[PRED_SDIV_CONTINUE3]]
 ; UNROLL-NO-VF:       pred.sdiv.continue3:
-; UNROLL-NO-VF-NEXT:    [[TMP20:%.*]] = phi i32 [ poison, [[PRED_SDIV_CONTINUE]] ], [ [[TMP18]], [[PRED_SDIV_IF2]] ]
 ; UNROLL-NO-VF-NEXT:    [[TMP21:%.*]] = phi i32 [ poison, [[PRED_SDIV_CONTINUE]] ], [ [[TMP19]], [[PRED_SDIV_IF2]] ]
 ; UNROLL-NO-VF-NEXT:    [[TMP22:%.*]] = xor i1 [[TMP12]], true
 ; UNROLL-NO-VF-NEXT:    [[TMP23:%.*]] = xor i1 [[TMP13]], true
@@ -577,7 +573,6 @@ define void @pr30172(ptr nocapture %asd, ptr nocapture %bsd) !dbg !5 {;
 ; CHECK-NEXT:    [[TMP17:%.*]] = insertelement <2 x i32> poison, i32 [[TMP16]], i32 0
 ; CHECK-NEXT:    br label [[PRED_SDIV_CONTINUE]]
 ; CHECK:       pred.sdiv.continue:
-; CHECK-NEXT:    [[TMP18:%.*]] = phi i32 [ poison, [[VECTOR_BODY]] ], [ [[TMP14]], [[PRED_SDIV_IF]] ]
 ; CHECK-NEXT:    [[TMP19:%.*]] = phi <2 x i32> [ poison, [[VECTOR_BODY]] ], [ [[TMP17]], [[PRED_SDIV_IF]] ]
 ; CHECK-NEXT:    [[TMP20:%.*]] = extractelement <2 x i1> [[TMP10]], i32 1
 ; CHECK-NEXT:    br i1 [[TMP20]], label [[PRED_SDIV_IF3:%.*]], label [[PRED_SDIV_CONTINUE4]]
@@ -590,7 +585,6 @@ define void @pr30172(ptr nocapture %asd, ptr nocapture %bsd) !dbg !5 {;
 ; CHECK-NEXT:    [[TMP26:%.*]] = insertelement <2 x i32> [[TMP19]], i32 [[TMP25]], i32 1
 ; CHECK-NEXT:    br label [[PRED_SDIV_CONTINUE4]]
 ; CHECK:       pred.sdiv.continue4:
-; CHECK-NEXT:    [[TMP27:%.*]] = phi i32 [ poison, [[PRED_SDIV_CONTINUE]] ], [ [[TMP23]], [[PRED_SDIV_IF3]] ]
 ; CHECK-NEXT:    [[TMP28:%.*]] = phi <2 x i32> [ [[TMP19]], [[PRED_SDIV_CONTINUE]] ], [ [[TMP26]], [[PRED_SDIV_IF3]] ]
 ; CHECK-NEXT:    [[TMP29:%.*]] = xor <2 x i1> [[TMP7]], <i1 true, i1 true>, !dbg [[DBG35]]
 ; CHECK-NEXT:    [[TMP30:%.*]] = select <2 x i1> [[TMP8]], <2 x i1> [[TMP29]], <2 x i1> zeroinitializer, !dbg [[DBG35]]
@@ -666,7 +660,6 @@ define void @pr30172(ptr nocapture %asd, ptr nocapture %bsd) !dbg !5 {;
 ; UNROLL-NO-VF-NEXT:    [[TMP23:%.*]] = sdiv i32 [[TMP8]], [[TMP22]]
 ; UNROLL-NO-VF-NEXT:    br label [[PRED_SDIV_CONTINUE]]
 ; UNROLL-NO-VF:       pred.sdiv.continue:
-; UNROLL-NO-VF-NEXT:    [[TMP24:%.*]] = phi i32 [ poison, [[VECTOR_BODY]] ], [ [[TMP22]], [[PRED_SDIV_IF]] ]
 ; UNROLL-NO-VF-NEXT:    [[TMP25:%.*]] = phi i32 [ poison, [[VECTOR_BODY]] ], [ [[TMP23]], [[PRED_SDIV_IF]] ]
 ; UNROLL-NO-VF-NEXT:    br i1 [[TMP21]], label [[PRED_SDIV_IF2:%.*]], label [[PRED_SDIV_CONTINUE3]]
 ; UNROLL-NO-VF:       pred.sdiv.if2:
@@ -674,7 +667,6 @@ define void @pr30172(ptr nocapture %asd, ptr nocapture %bsd) !dbg !5 {;
 ; UNROLL-NO-VF-NEXT:    [[TMP27:%.*]] = sdiv i32 [[TMP9]], [[TMP26]]
 ; UNROLL-NO-VF-NEXT:    br label [[PRED_SDIV_CONTINUE3]]
 ; UNROLL-NO-VF:       pred.sdiv.continue3:
-; UNROLL-NO-VF-NEXT:    [[TMP28:%.*]] = phi i32 [ poison, [[PRED_SDIV_CONTINUE]] ], [ [[TMP26]], [[PRED_SDIV_IF2]] ]
 ; UNROLL-NO-VF-NEXT:    [[TMP29:%.*]] = phi i32 [ poison, [[PRED_SDIV_CONTINUE]] ], [ [[TMP27]], [[PRED_SDIV_IF2]] ]
 ; UNROLL-NO-VF-NEXT:    [[TMP30:%.*]] = xor i1 [[TMP14]], true, !dbg [[DBG35]]
 ; UNROLL-NO-VF-NEXT:    [[TMP31:%.*]] = xor i1 [[TMP15]], true, !dbg [[DBG35]]
diff --git a/llvm/test/Transforms/LoopVectorize/interleave-and-scalarize-only.ll b/llvm/test/Transforms/LoopVectorize/interleave-and-scalarize-only.ll
index 7ed42edfc7753..2503520c0ff9d 100644
--- a/llvm/test/Transforms/LoopVectorize/interleave-and-scalarize-only.ll
+++ b/llvm/test/Transforms/LoopVectorize/interleave-and-scalarize-only.ll
@@ -96,7 +96,6 @@ declare i32 @llvm.smin.i32(i32, i32)
 ; DBG-NEXT:     Successor(s): pred.store.continue
 ; DBG-EMPTY:
 ; DBG-NEXT:     pred.store.continue:
-; DBG-NEXT:       PHI-PREDICATED-INSTRUCTION vp<{{.+}}> = ir<%l>
 ; DBG-NEXT:     No successors
 ; DBG-NEXT:   }
 ; DBG-NEXT:   Successor(s): cond.false.1
@@ -137,7 +136,6 @@ define void @test_scalarize_with_branch_cond(ptr %src, ptr %dst) {
 ; CHECK-NEXT:    store i32 [[TMP4]], ptr [[TMP1]], align 4
 ; CHECK-NEXT:    br label %pred.store.continue
 ; CHECK:       pred.store.continue:
-; CHECK-NEXT:    [[TMP5:%.*]] = phi i32 [ poison, %vector.body ], [ [[TMP4]], %pred.store.if ]
 ; CHECK-NEXT:    br i1 [[INDUCTION3]], label %pred.store.if4, label %pred.store.continue5
 ; CHECK:       pred.store.if4:
 ; CHECK-NEXT:    [[INDUCTION5:%.*]] = add i64 [[INDEX]], 1
@@ -147,7 +145,6 @@ define void @test_scalarize_with_branch_cond(ptr %src, ptr %dst) {
 ; CHECK-NEXT:    store i32 [[TMP7]], ptr [[TMP2]], align 4
 ; CHECK-NEXT:    br label %pred.store.continue5
 ; CHECK:       pred.store.continue5:
-; CHECK-NEXT:    [[TMP8:%.*]] = phi i32 [ poison, %pred.store.continue ], [ [[TMP7]], %pred.store.if4 ]
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
 ; CHECK-NEXT:    [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000
 ; CHECK-NEXT:    br i1 [[TMP9]], label %middle.block, label %vector.body
diff --git a/llvm/test/Transforms/LoopVectorize/pr45679-fold-tail-by-masking.ll b/llvm/test/Transforms/LoopVectorize/pr45679-fold-tail-by-masking.ll
index f05ec30619c5d..7c23b603b6e91 100644
--- a/llvm/test/Transforms/LoopVectorize/pr45679-fold-tail-by-masking.ll
+++ b/llvm/test/Transforms/LoopVectorize/pr45679-fold-tail-by-masking.ll
@@ -224,7 +224,6 @@ define void @load_variant(ptr noalias %a, ptr noalias %b) {
 ; CHECK-NEXT:    store i64 [[TMP4]], ptr [[B:%.*]], align 8
 ; CHECK-NEXT:    br label [[PRED_STORE_CONTINUE]]
 ; CHECK:       pred.store.continue:
-; CHECK-NEXT:    [[TMP5:%.*]] = phi i64 [ poison, [[VECTOR_BODY]] ], [ [[TMP4]], [[PRED_STORE_IF]] ]
 ; CHECK-NEXT:    [[TMP6:%.*]] = extractelement <4 x i1> [[TMP0]], i32 1
 ; CHECK-NEXT:    br i1 [[TMP6]], label [[PRED_STORE_IF1:%.*]], label [[PRED_STORE_CONTINUE2:%.*]]
 ; CHECK:       pred.store.if1:
@@ -234,7 +233,6 @@ define void @load_variant(ptr noalias %a, ptr noalias %b) {
 ; CHECK-NEXT:    store i64 [[TMP9]], ptr [[B]], align 8
 ; CHECK-NEXT:    br label [[PRED_STORE_CONTINUE2]]
 ; CHECK:       pred.store.continue2:
-; CHECK-NEXT:    [[TMP10:%.*]] = phi i64 [ poison, [[PRED_STORE_CONTINUE]] ], [ [[TMP9]], [[PRED_STORE_IF1]] ]
 ; CHECK-NEXT:    [[TMP11:%.*]] = extractelement <4 x i1> [[TMP0]], i32 2
 ; CHECK-NEXT:    br i1 [[TMP11]], label [[PRED_STORE_IF3:%.*]], label [[PRED_STORE_CONTINUE4:%.*]]
 ; CHECK:       pred.store.if3:
@@ -244,7 +242,6 @@ define void @load_variant(ptr noalias %a, ptr noalias %b) {
 ; CHECK-NEXT:    store i64 [[TMP14]], ptr [[B]], align 8
 ; CHECK-NEXT:    br label [[PRED_STORE_CONTINUE4]]
 ; CHECK:       pred.store.continue4:
-; CHECK-NEXT:    [[TMP15:%.*]] = phi i64 [ poison, [[PRED_STORE_CONTINUE2]] ], [ [[TMP14]], [[PRED_STORE_IF3]] ]
 ; CHECK-NEXT:    [[TMP16:%.*]] = extractelement <4 x i1> [[TMP0]], i32 3
 ; CHECK-NEXT:    br i1 [[TMP16]], label [[PRED_STORE_IF5:%.*]], label [[PRED_STORE_CONTINUE6]]
 ; CHECK:       pred.store.if5:
@@ -254,7 +251,6 @@ define void @load_variant(ptr noalias %a, ptr noalias %b) {
 ; CHECK-NEXT:    store i64 [[TMP19]], ptr [[B]], align 8
 ; CHECK-NEXT:    br label [[PRED_STORE_CONTINUE6]]
 ; CHECK:       pred.store.continue6:
-; CHECK-NEXT:    [[TMP20:%.*]] = phi i64 [ poison, [[PRED_STORE_CONTINUE4]] ], [ [[TMP19]], [[PRED_STORE_IF5]] ]
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add i64 [[INDEX]], 4
 ; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], <i64 4, i64 4, i64 4, i64 4>
 ; CHECK-NEXT:    [[TMP21:%.*]] = icmp eq i64 [[INDEX_NEXT]], 16
@@ -295,7 +291,6 @@ define void @load_variant(ptr noalias %a, ptr noalias %b) {
 ; VF2UF2-NEXT:    store i64 [[TMP5]], ptr [[B:%.*]], align 8
 ; VF2UF2-NEXT:    br label [[PRED_STORE_CONTINUE]]
 ; VF2UF2:       pred.store.continue:
-; VF2UF2-NEXT:    [[TMP6:%.*]] = phi i64 [ poison, [[VECTOR_BODY]] ], [ [[TMP5]], [[PRED_STORE_IF]] ]
 ; VF2UF2-NEXT:    [[TMP7:%.*]] = extractelement <2 x i1> [[TMP0]], i32 1
 ; VF2UF2-NEXT:    br i1 [[TMP7]], label [[PRED_STORE_IF2:%.*]], label [[PRED_STORE_CONTINUE3:%.*]]
 ; VF2UF2:       pred.store.if2:
@@ -305,7 +300,6 @@ define void @load_variant(ptr noalias %a, ptr noalias %b) {
 ; VF2UF2-NEXT:    store i64 [[TMP10]], ptr [[B]], align 8
 ; VF2UF2-NEXT:    br label [[PRED_STORE_CONTINUE3]]
 ; VF2UF2:       pred.store.continue3:
-; VF2UF2-NEXT:    [[TMP11:%.*]] = phi i64 [ poison, [[PRED_STORE_CONTINUE]] ], [ [[TMP10]], [[PRED_STORE_IF2]] ]
 ; VF2UF2-NEXT:    [[TMP12:%.*]] = extractelement <2 x i1> [[TMP1]], i32 0
 ; VF2UF2-NEXT:    br i1 [[TMP12]], label [[PRED_STORE_IF4:%.*]], label [[PRED_STORE_CONTINUE5:%.*]]
 ; VF2UF2:       pred.store.if4:
@@ -315,7 +309,6 @@ define void @load_variant(ptr noalias %a, ptr noalias %b) {
 ; VF2UF2-NEXT:    store i64 [[TMP15]], ptr [[B]], align 8
 ; VF2UF2-NEXT:    br label [[PRED_STORE_CONTINUE5]]
 ; VF2UF2:       pred.store.continue5:
-; VF2UF2-NEXT:    [[TMP16:%.*]] = phi i64 [ poison, [[PRED_STORE_CONTINUE3]] ], [ [[TMP15]], [[PRED_STORE_IF4]] ]
 ; VF2UF2-NEXT:    [[TMP17:%.*]] = extractelement <2 x i1> [[TMP1]], i32 1
 ; VF2UF2-NEXT:    br i1 [[TMP17]], label [[PRED_STORE_IF6:%.*]], label [[PRED_STORE_CONTINUE7]]
 ; VF2UF2:       pred.store.if6:
@@ -325,7 +318,6 @@ define void @load_variant(ptr noalias %a, ptr noalias %b) {
 ; VF2UF2-NEXT:    store i64 [[TMP20]], ptr [[B]], align 8
 ; VF2UF2-NEXT:    br label [[PRED_STORE_CONTINUE7]]
 ; VF2UF2:       pred.store.continue7:
-; VF2UF2-NEXT:    [[TMP21:%.*]] = phi i64 [ poison, [[PRED_STORE_CONTINUE5]] ], [ [[TMP20]], [[PRED_STORE_IF6]] ]
 ; VF2UF2-NEXT:    [[INDEX_NEXT]] = add i64 [[INDEX]], 4
 ; VF2UF2-NEXT:    [[VEC_IND_NEXT]] = add <2 x i64> [[STEP_ADD]], <i64 2, i64 2>
 ; VF2UF2-NEXT:    [[TMP22:%.*]] = icmp eq i64 [[INDEX_NEXT]], 16
@@ -368,7 +360,6 @@ define void @load_variant(ptr noalias %a, ptr noalias %b) {
 ; VF1UF4-NEXT:    store i64 [[TMP9]], ptr [[B:%.*]], align 8
 ; VF1UF4-NEXT:    br label [[PRED_STORE_CONTINUE]]
 ; VF1UF4:       pred.store.continue:
-; VF1UF4-NEXT:    [[TMP10:%.*]] = phi i64 [ poison, [[VECTOR_BODY]] ], [ [[TMP9]], [[PRED_STORE_IF]] ]
 ; VF1UF4-NEXT:    br i1 [[TMP5]], label [[PRED_STORE_IF1:%.*]], label [[PRED_STORE_CONTINUE2:%.*]]
 ; VF1UF4:       pred.store.if1:
 ; VF1UF4-NEXT:    [[TMP11:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP1]]
@@ -376,7 +367,6 @@ define void @load_variant(ptr noalias %a, ptr noalias %b) {
 ; VF1UF4-NEXT:    store i64 [[TMP12]], ptr [[B]], align 8
 ; VF1UF4-NEXT:    br label [[PRED_STORE_CONTINUE2]]
 ; VF1UF4:       pred.store.continue2:
-; VF1UF4-NEXT:    [[TMP13:%.*]] = phi i64 [ poison, [[PRED_STORE_CONTINUE]] ], [ [[TMP12]], [[PRED_STORE_IF1]] ]
 ; VF1UF4-NEXT:    br i1 [[TMP6]], label [[PRED_STORE_IF3:%.*]], label [[PRED_STORE_CONTINUE4:%.*]]
 ; VF1UF4:       pred.store.if3:
 ; VF1UF4-NEXT:    [[TMP14:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP2]]
@@ -384,7 +374,6 @@ define void @load_variant(ptr noalias %a, ptr noalias %b) {
 ; VF1UF4-NEXT:    store i64 [[TMP15]], ptr [[B]], align 8
 ; VF1UF4-NEXT:    br label [[PRED_STORE_CONTINUE4]]
 ; VF1UF4:       pred.store.continue4:
-; VF1UF4-NEXT:    [[TMP16:%.*]] = phi i64 [ poison, [[PRED_STORE_CONTINUE2]] ], [ [[TMP15]], [[PRED_STORE_IF3]] ]
 ; VF1UF4-NEXT:    br i1 [[TMP7]], label [[PRED_STORE_IF5:%.*]], label [[PRED_STORE_CONTINUE6]]
 ; VF1UF4:       pred.store.if5:
 ; VF1UF4-NEXT:    [[TMP17:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP3]]
@@ -392,7 +381,6 @@ define void @load_variant(ptr noalias %a, ptr noalias %b) {
 ; VF1UF4-NEXT:    store i64 [[TMP18]], ptr [[B]], align 8
 ; VF1UF4-NEXT:    br label [[PRED_STORE_CONTINUE6]]
 ; VF1UF4:       pred.store.continue6:
-; VF1UF4-NEXT:    [[TMP19:%.*]] = phi i64 [ poison, [[PRED_STORE_CONTINUE4]] ], [ [[TMP18]], [[PRED_STORE_IF5]] ]
 ; VF1UF4-NEXT:    [[INDEX_NEXT]] = add i64 [[INDEX]], 4
 ; VF1UF4-NEXT:    [[TMP20:%.*]] = icmp eq i64 [[INDEX_NEXT]], 16
 ; VF1UF4-NEXT:    br i1 [[TMP20]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
diff --git a/llvm/test/Transforms/LoopVectorize/select-cmp-multiuse.ll b/llvm/test/Transforms/LoopVectorize/select-cmp-multiuse.ll
index 8983c80bf3ef4..9eb90099214e1 100644
--- a/llvm/test/Transforms/LoopVectorize/select-cmp-multiuse.ll
+++ b/llvm/test/Transforms/LoopVectorize/select-cmp-multiuse.ll
@@ -510,7 +510,6 @@ define i32 @multi_user_cmp_branch_use(ptr readonly %a, ptr %b, i64 noundef %n) {
 ; CHECK-VF4-IC1-NEXT:    store i32 [[TMP11]], ptr [[TMP9]], align 4, !alias.scope [[META9]], !noalias [[META6]]
 ; CHECK-VF4-IC1-NEXT:    br label [[PRED_STORE_CONTINUE]]
 ; CHECK-VF4-IC1:       pred.store.continue:
-; CHECK-VF4-IC1-NEXT:    [[TMP12:%.*]] = phi i32 [ poison, [[VECTOR_BODY]] ], [ [[TMP10]], [[PRED_STORE_IF]] ]
 ; CHECK-VF4-IC1-NEXT:    [[TMP13:%.*]] = extractelement <4 x i1> [[TMP4]], i32 1
 ; CHECK-VF4-IC1-NEXT:    br i1 [[TMP13]], label [[PRED_STORE_IF3:%.*]], label [[PRED_STORE_CONTINUE4:%.*]]
 ; CHECK-VF4-IC1:       pred.store.if3:
@@ -521,7 +520,6 @@ define i32 @multi_user_cmp_branch_use(ptr readonly %a, ptr %b, i64 noundef %n) {
 ; CHECK-VF4-IC1-NEXT:    store i32 [[TMP17]], ptr [[TMP15]], align 4, !alias.scope [[META9]], !noalias [[META6]]
 ; CHECK-VF4-IC1-NEXT:    br label [[PRED_STORE_CONTINUE4]]
 ; CHECK-VF4-IC1:       pred.store.continue4:
-; CHECK-VF4-IC1-NEXT:    [[TMP18:%.*]] = phi i32 [ poison, [[PRED_STORE_CONTINUE]] ], [ [[TMP16]], [[PRED_STORE_IF3]] ]
 ; CHECK-VF4-IC1-NEXT:    [[TMP19:%.*]] = extractelement <4 x i1> [[TMP4]], i32 2
 ; CHECK-VF4-IC1-NEXT:    br i1 [[TMP19]], label [[PRED_STORE_IF5:%.*]], label [[PRED_STORE_CONTINUE6:%.*]]
 ; CHECK-VF4-IC1:       pred.store.if5:
@@ -532,7 +530,6 @@ define i32 @multi_user_cmp_branch_use(ptr readonly %a, ptr %b, i64 noundef %n) {
 ; CHECK-VF4-IC1-NEXT:    store i32 [[TMP23]], ptr [[TMP21]], align 4, !alias.scope [[META9]], !noalias [[META6]]
 ; CHECK-VF4-IC1-NEXT:    br label [[PRED_STORE_CONTINUE6]]
 ; CHECK-VF4-IC1:       pred.store.continue6:
-; CHECK-VF4-IC1-NEXT:    [[TMP24:%.*]] = phi i32 [ poison, [[PRED_STORE_CONTINUE4]] ], [ [[TMP22]], [[PRED_STORE_IF5]] ]
 ; CHECK-VF4-IC1-NEXT:    [[TMP25:%.*]] = extractelement <4 x i1> [[TMP4]], i32 3
 ; CHECK-VF4-IC1-NEXT:    br i1 [[TMP25]], label [[PRED_STORE_IF7:%.*]], label [[PRED_STORE_CONTINUE8]]
 ; CHECK-VF4-IC1:       pred.store.if7:
@@ -543,7 +540,6 @@ define i32 @multi_user_cmp_branch_use(ptr readonly %a, ptr %b, i64 noundef %n) {
 ; CHECK-VF4-IC1-NEXT:    store i32 [[TMP29]], ptr [[TMP27]], align 4, !alias.scope [[META9]], !noalias [[META6]]
 ; CHECK-VF4-IC1-NEXT:    br label [[PRED_STORE_CONTINUE8]]
 ; CHECK-VF4-IC1:       pred.store.continue8:
-; CHECK-VF4-IC1-NEXT:    [[TMP30:%.*]] = phi i32 [ poison, [[PRED_STORE_CONTINUE6]] ], [ [[TMP28]], [[PRED_STORE_IF7]] ]
 ; CHECK-VF4-IC1-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
 ; CHECK-VF4-IC1-NEXT:    [[TMP31:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
 ; CHECK-VF4-IC1-NEXT:    br i1 [[TMP31]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]]
@@ -636,7 +632,6 @@ define i32 @multi_user_cmp_branch_use(ptr readonly %a, ptr %b, i64 noundef %n) {
 ; CHECK-VF4-IC2-NEXT:    store i32 [[TMP18]], ptr [[TMP16]], align 4, !alias.scope [[META9]], !noalias [[META6]]
 ; CHECK-VF4-IC2-NEXT:    br label [[PRED_STORE_CONTINUE]]
 ; CHECK-VF4-IC2:       pred.store.continue:
-; CHECK-VF4-IC2-NEXT:    [[TMP19:%.*]] = phi i32 [ poison, [[VECTOR_BODY]] ], [ [[TMP17]], [[PRED_STORE_IF]] ]
 ; CHECK-VF4-IC2-NEXT:    [[TMP20:%.*]] = extractelement <4 x i1> [[TMP7]], i32 1
 ; CHECK-VF4-IC2-NEXT:    br i1 [[TMP20]], label [[PRED_STORE_IF6:%.*]], label [[PRED_STORE_CONTINUE7:%.*]]
 ; CHECK-VF4-IC2:       pred.store.if6:
@@ -647,7 +642,6 @@ define i32 @multi_user_cmp_branch_use(ptr readonly %a, ptr %b, i64 noundef %n) {
 ; CHECK-VF4-IC2-NEXT:    store i32 [[TMP24]], ptr [[TMP22]], align 4, !alias.scope [[META9]], !noalias [[META6]]
 ; CHECK-VF4-IC2-NEXT:    br label [[PRED_STORE_CONTINUE7]]
 ; CHECK-VF4-IC2:       pred.store.continue7:
-; CHECK-VF4-IC2-NEXT:    [[TMP25:%.*]] = phi i32 [ poison, [[PRED_STORE_CONTINUE]] ], [ [[TMP23]], [[PRED_STORE_IF6]] ]
 ; CHECK-VF4-IC2-NEXT:    [[TMP26:%.*]] = extractelement <4 x i1> [[TMP7]], i32 2
 ; CHECK-VF4-IC2-NEXT:    br i1 [[TMP26]], label [[PRED_STORE_IF8:%.*]], label [[PRED_STORE_CONTINUE9:%.*]]
 ; CHECK-VF4-IC2:       pred.store.if8:
@@ -658,7 +652,6 @@ define i32 @multi_user_cmp_branch_use(ptr readonly %a, ptr %b, i64 noundef %n) {
 ; CHECK-VF4-IC2-NEXT:    store i32 [[TMP30]], ptr [[TMP28]], align 4, !alias.scope [[META9]], !noalias [[META6]]
 ; CHECK-VF4-IC2-NEXT:    br label [[PRED_STORE_CONTINUE9]]
 ; CHECK-VF4-IC2:       pred.store.continue9:
-; CHECK-VF4-IC2-NEXT:    [[TMP31:%.*]] = phi i32 [ poison, [[PRED_STORE_CONTINUE7]] ], [ [[TMP29]], [[PRED_STORE_IF8]] ]
 ; CHECK-VF4-IC2-NEXT:    [[TMP32:%.*]] = extractelement <4 x i1> [[TMP7]], i32 3
 ; CHECK-VF4-IC2-NEXT:    br i1 [[TMP32]], label [[PRED_STORE_IF10:%.*]], label [[PRED_STORE_CONTINUE11:%.*]]
 ; CHECK-VF4-IC2:       pred.store.if10:
@@ -669,7 +662,6 @@ define i32 @multi_user_cmp_branch_use(ptr readonly %a, ptr %b, i64 noundef %n) {
 ; CHECK-VF4-IC2-NEXT:    store i32 [[TMP36]], ptr [[TMP34]], align 4, !alias.scope [[META9]], !noalias [[META6]]
 ; CHECK-VF4-IC2-NEXT:    br label [[PRED_STORE_CONTINUE11]]
 ; CHECK-VF4-IC2:       pred.store.continue11:
-; CHECK-VF4-IC2-NEXT:    [[TMP37:%.*]] = phi i32 [ poison, [[PRED_STORE_CONTINUE9]] ], [ [[TMP35]], [[PRED_STORE_IF10]] ]
 ; CHECK-VF4-IC2-NEXT:    [[TMP38:%.*]] = extractelement <4 x i1> [[TMP8]], i32 0
 ; CHECK-VF4-IC2-NEXT:    br i1 [[TMP38]], label [[PRED_STORE_IF12:%.*]], label [[PRED_STORE_CONTINUE13:%.*]]
 ; CHECK-VF4-IC2:       pred.store.if12:
@@ -679,7 +671,6 @@ define i32 @multi_user_cmp_branch_use(ptr readonly %a, ptr %b, i64 noundef %n) {
 ; CHECK-VF4-IC2-NEXT:    store i32 [[TMP41]], ptr [[TMP39]], align 4, !alias.scope [[META9]], !noalias [[META6]]
 ; CHECK-VF4-IC2-NEXT:    br label [[PRED_STORE_CONTINUE13]]
 ; CHECK-VF4-IC2:       pred.store.continue13:
-; CHECK-VF4-IC2-NEXT:    [[TMP42:%.*]] = phi i32 [ poison, [[PRED_STORE_CONTINUE11]] ], [ [[TMP40]], [[PRED_STORE_IF12]] ]
 ; CHECK-VF4-IC2-NEXT:    [[TMP43:%.*]] = extractelement <4 x i1> [[TMP8]], i32 1
 ; CHECK-VF4-IC2-NEXT:    br i1 [[TMP43]], label [[PRED_STORE_IF14:%.*]], label [[PRED_STORE_CONTINUE15:%.*]]
 ; CHECK-VF4-IC2:       pred.store.if14:
@@ -690,7 +681,6 @@ define i32 @multi_user_cmp_branch_use(ptr readonly %a, ptr %b, i64 noundef %n) {
 ; CHECK-VF4-IC2-NEXT:    store i32 [[TMP47]], ptr [[TMP45]], align 4, !alias.scope [[META9]], !noalias [[META6]]
 ; CHECK-VF4-IC2-NEXT:    br label [[PRED_STORE_CONTINUE15]]
 ; CHECK-VF4-IC2:       pred.store.continue15:
-; CHECK-VF4-IC2-NEXT:    [[TMP48:%.*]] = phi i32 [ poison, [[PRED_STORE_CONTINUE13]] ], [ [[TMP46]], [[PRED_STORE_IF14]] ]
 ; CHECK-VF4-IC2-NEXT:    [[TMP49:%.*]] = extractelement <4 x i1> [[TMP8]], i32 2
 ; CHECK-VF4-IC2-NEXT:    br i1 [[TMP49]], label [[PRED_STORE_IF16:%.*]], label [[PRED_STORE_CONTINUE17:%.*]]
 ; CHECK-VF4-IC2:       pred.store.if16:
@@ -701,7 +691,6 @@ define i32 @multi_user_cmp_branch_use(ptr readonly %a, ptr %b, i64 noundef %n) {
 ; CHECK-VF4-IC2-NEXT:    store i32 [[TMP53]], ptr [[TMP51]], align 4, !alias.scope [[META9]], !noalias [[META6]]
 ; CHECK-VF4-IC2-NEXT:    br label [[PRED_STORE_CONTINUE17]]
 ; CHECK-VF4-IC2:       pred.store.continue17:
-; CHECK-VF4-IC2-NEXT:    [[TMP54:%.*]] = phi i32 [ poison, [[PRED_STORE_CONTINUE15]] ], [ [[TMP52]], [[PRED_STORE_IF16]] ]
 ; CHECK-VF4-IC2-NEXT:    [[TMP55:%.*]] = extractelement <4 x i1> [[TMP8]], i32 3
 ; CHECK-VF4-IC2-NEXT:    br i1 [[TMP55]], label [[PRED_STORE_IF18:%.*]], label [[PRED_STORE_CONTINUE19]]
 ; CHECK-VF4-IC2:       pred.store.if18:
@@ -712,7 +701,6 @@ define i32 @multi_user_cmp_branch_use(ptr readonly %a, ptr %b, i64 noundef %n) {
 ; CHECK-VF4-IC2-NEXT:    store i32 [[TMP59]], ptr [[TMP57]], align 4, !alias.scope [[META9]], !noalias [[META6]]
 ; CHECK-VF4-IC2-NEXT:    br label [[PRED_STORE_CONTINUE19]]
 ; CHECK-VF4-IC2:       pred.store.continue19:
-; CHECK-VF4-IC2-NEXT:    [[TMP60:%.*]] = phi i32 [ poison, [[PRED_STORE_CONTINUE17]] ], [ [[TMP58]], [[PRED_STORE_IF18]] ]
 ; CHECK-VF4-IC2-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
 ; CHECK-VF4-IC2-NEXT:    [[TMP61:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
 ; CHECK-VF4-IC2-NEXT:    br i1 [[TMP61]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]]
@@ -804,7 +792,6 @@ define i32 @multi_user_cmp_branch_use(ptr readonly %a, ptr %b, i64 noundef %n) {
 ; CHECK-VF1-IC2-NEXT:    store i32 [[TMP17]], ptr [[TMP15]], align 4, !alias.scope [[META9]], !noalias [[META6]]
 ; CHECK-VF1-IC2-NEXT:    br label [[PRED_STORE_CONTINUE]]
 ; CHECK-VF1-IC2:       pred.store.continue:
-; CHECK-VF1-IC2-NEXT:    [[TMP18:%.*]] = phi i32 [ poison, [[VECTOR_BODY]] ], [ [[TMP16]], [[PRED_STORE_IF]] ]
 ; CHECK-VF1-IC2-NEXT:    br i1 [[TMP8]], label [[PRED_STORE_IF5:%.*]], label [[PRED_STORE_CONTINUE6]]
 ; CHECK-VF1-IC2:       pred.store.if5:
 ; CHECK-VF1-IC2-NEXT:    [[TMP19:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP2]]
@@ -813,7 +800,6 @@ define i32 @multi_user_cmp_branch_use(ptr readonly %a, ptr %b, i64 noundef %n) {
 ; CHECK-VF1-IC2-NEXT:    store i32 [[TMP21]], ptr [[TMP19]], align 4, !alias.scope [[META9]], !noalias [[META6]]
 ; CHECK-VF1-IC2-NEXT:    br label [[PRED_STORE_CONTINUE6]]
 ; CHECK-VF1-IC2:       pred.store.continue6:
-; CHECK-VF1-IC2-NEXT:    [[TMP22:%.*]] = phi i32 [ poison, [[PRED_STORE_CONTINUE]] ], [ [[TMP20]], [[PRED_STORE_IF5]] ]
 ; CHECK-VF1-IC2-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
 ; CHECK-VF1-IC2-NEXT:    [[TMP23:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
 ; CHECK-VF1-IC2-NEXT:    br i1 [[TMP23]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]]
diff --git a/llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge.ll b/llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge.ll
index 313be091f5f09..9c07281a9a8a9 100644
--- a/llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge.ll
+++ b/llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge.ll
@@ -46,7 +46,6 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3
 ; CHECK-NEXT:   Successor(s): pred.store.continue
 
 ; CHECK:      pred.store.continue:
-; CHECK-NEXT:   PHI-PREDICATED-INSTRUCTION vp<[[PRED:%.+]]> = ir<%lv.b>
 ; CHECK-NEXT:   No successors
 ; CHECK-NEXT: }
 
@@ -768,8 +767,6 @@ define void @update_2_uses_in_same_recipe_in_merged_block(i32 %k) {
 ; CHECK-NEXT:   Successor(s): pred.store.continue
 ; CHECK-EMPTY:
 ; CHECK-NEXT:   pred.store.continue:
-; CHECK-NEXT:     PHI-PREDICATED-INSTRUCTION vp<[[PRED1:%.+]]> = ir<%lv.a>
-; CHECK-NEXT:     PHI-PREDICATED-INSTRUCTION vp<[[PRED2:%.+]]> = ir<%div>
 ; CHECK-NEXT:   No successors
 ; CHECK-NEXT: }
 ; CHECK-NEXT: Successor(s): loop.2
@@ -854,7 +851,6 @@ define void @recipe_in_merge_candidate_used_by_first_order_recurrence(i32 %k) {
 ; CHECK-NEXT:   Successor(s): pred.store.continue
 ; CHECK-EMPTY:
 ; CHECK-NEXT:   pred.store.continue:
-; CHECK-NEXT:     PHI-PREDICATED-INSTRUCTION vp<[[PRED2:%.+]]> = ir<%div>
 ; CHECK-NEXT:   No successors
 ; CHECK-NEXT: }
 ; CHECK-NEXT: Successor(s): loop.2
@@ -914,7 +910,6 @@ define void @update_multiple_users(ptr noalias %src, ptr noalias %dst, i1 %c) {
 ; CHECK-NEXT:   Successor(s): pred.store.continue
 ; CHECK-EMPTY:
 ; CHECK-NEXT:   pred.store.continue:
-; CHECK-NEXT:     PHI-PREDICATED-INSTRUCTION vp<[[PRED:%.+]]> = ir<%l1>
 ; CHECK-NEXT:   No successors
 ; CHECK-NEXT: }
 ; CHECK-NEXT: Successor(s): loop.then.1
@@ -1053,7 +1048,6 @@ define void @merge_with_dead_gep_between_regions(i32 %n, ptr noalias %src, ptr n
 ; CHECK-NEXT:     Successor(s): pred.store.continue
 ; CHECK-EMPTY:
 ; CHECK-NEXT:     pred.store.continue:
-; CHECK-NEXT:       PHI-PREDICATED-INSTRUCTION vp<[[P_LOAD:%.+]]> = ir<%l>
 ; CHECK-NEXT:     No successors
 ; CHECK-NEXT:   }
 ; CHECK-NEXT:   Successor(s): loop.1
diff --git a/llvm/test/Transforms/SeparateConstOffsetFromGEP/NVPTX/split-gep.ll b/llvm/test/Transforms/SeparateConstOffsetFromGEP/NVPTX/split-gep.ll
index c915b9a5e59ac..16e9e5eeb6143 100644
--- a/llvm/test/Transforms/SeparateConstOffsetFromGEP/NVPTX/split-gep.ll
+++ b/llvm/test/Transforms/SeparateConstOffsetFromGEP/NVPTX/split-gep.ll
@@ -397,3 +397,27 @@ entry:
   %ptr2 = getelementptr inbounds %struct0, ptr %ptr, i65 1, i32 3, i64 %idx, i32 1
   ret ptr %ptr2
 }
+
+; Do not extract large constant offset that cannot be folded in to PTX
+; addressing mode
+define void @large_offset(ptr %out, i32 %in) {
+; CHECK-LABEL: define void @large_offset(
+; CHECK-SAME: ptr [[OUT:%.*]], i32 [[IN:%.*]]) {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = tail call i32 @llvm.nvvm.read.ptx.sreg.tid.x()
+; CHECK-NEXT:    [[ADD:%.*]] = add nuw nsw i32 [[TMP0]], 536870912
+; CHECK-NEXT:    [[IDX:%.*]] = zext nneg i32 [[ADD]] to i64
+; CHECK-NEXT:    [[GETELEM:%.*]] = getelementptr inbounds i32, ptr [[OUT]], i64 [[IDX]]
+; CHECK-NEXT:    store i32 [[IN]], ptr [[GETELEM]], align 4
+; CHECK-NEXT:    ret void
+;
+entry:
+  %0 = tail call i32 @llvm.nvvm.read.ptx.sreg.tid.x()
+  %add = add nuw nsw i32 %0, 536870912
+  %idx = zext nneg i32 %add to i64
+  %getElem = getelementptr inbounds i32, ptr %out, i64 %idx
+  store i32 %in, ptr %getElem, align 4
+  ret void
+}
+
+declare i32 @llvm.nvvm.read.ptx.sreg.tid.x()
diff --git a/llvm/test/tools/llvm-profgen/Inputs/cmov_3.perfbin b/llvm/test/tools/llvm-profgen/Inputs/cmov_3.perfbin
new file mode 100755
index 0000000000000000000000000000000000000000..7a1543041f8055c4dbc60c23645f91f680ba621b
GIT binary patch
literal 27192
zcmeHQeQX at Zb)Wm#6D9GRjzxXNtAw&CDYrb5lFC|Nd;HSTrX)t9WhrsII^HeGtB!Z<
z-JU|FK_i)t6Gn!jq)wx>MqmR)8>2~(KLW=HoS24^NNVdu4$#183r7j!Le7W7B#NuL
zuCMRS%scKb_Z*={3kcW&Zr{A$dv8A8%+BuY42K^Z7#<7;0*peC?PkawYt%U;u8cjC
z<gCPcSRK>Zd)NjR0VE7RolB647}1x4LO`@e>V<%!T`NX7)g5#l=NKbOB}9sLi$RxB
zL3<Rz5 at n5oM>{T(Jd^b!`lKueQM(<_PZjAfRTmLScglhhwcDX at m?qTOZe2tq)umr1
zB_%s#(XLV2HA;P=S7m-g$)8w8{PoCs^n`TakQY=%27=13jJfTONV_9apD5`QCEp)<
z#Y+Eg@*9zMVVPi9`jO~PSw5na-}}Li`%}vQ5TRy1DD$h<4vhRSX+S32k<E;4-`bH)
zN3)q+X(l?;vpu?ft5(cwU0egpg?V6~+PnWg7Cgn~5tDA at ZGm!359Jt8M%f`wKa|~U
z*aC{p9?H9_;41-FJ at nhE=<lwAGw{b0R)0G)jESjy&L~>Rf at K&eND0AIGMmk(K#q`<
zPbD)sRxBiQX|{KGsJGAP(z>*5m3a47$S=q}6XMqZaBBdg_A9N)EJx4vN$l&N`Ag^q
zL3RLm3=4a7PO)#GKPlX>;nXH5Yt?nGvJ9@&76_;|ocbK%?KWHvEeQL;_kiyK-vhn}
zd=K~@@IByr!1ur{>4D!hy!(yB>@RB)XOvev7)yM7-U^nlBxb);bAi{VyzQp|mpA?l
zt_^qS5JP?vWv^Z>L)iE)$m6T`t5-PxL*((*`qg>hG5yY84b8pyU}Em4iP at hmjEoL+
z&Uao;T)e2ZKyvRo4(iv(wT3$$#xxn~Usz<%J)^@5cw!e`t(Ip{1B<x)IRwgA$Io48
zxTBx3MT2j1{nM{;GPjU;{<S at c=Wm1(f$t?=e9LO8q_9k;;I==tYZDZRl~&!Cn0w{g
zpF<74r>p`b at FAh;*8j!X#k|o!@%!V4<D-X0o;%eOVAo!Lt6WaZDL;nEyZ~Mhz>g}x
zvjTXb0{p!IzJtJKD9N=?A#i5>Ujs0FX4~UH;?LEBVED|&&jT~gL&bmB{tVU6Co-45
za^_zr6LbHNc>d?$f&YvKUd&up_cONXQtw><j_Z9F`*&Z5R2W2dUdQIU7QZRW8)$ih
zEN^hjWjAH{CR)BpmTz*)7ykPXmo~hk?_wC|rBK!||IRWrd$Dooboo(mal^+~KpSCy
z{b2lo_=9odg3}+J^G^Qs&PTfMVr*#c%11*(bC(9rgirO)g+Gxf&nI at FdrB|!1S<Z#
zPc|srBaIDTeW0!O^l)3PehO}eua36WUaJ$0b9<CG&VRfQ$*vnfzSRi1KoebuIFz80
zUSjUY!*j0>&%H4`_mgV|6utaH=@*^zP=Q8Sfe&4%+>@2N5G(Ho-vhn}d=K~@@IByr
z!1sXf0pA0 at 2Ye5_GY{Z;TVQQyM-L=}YiR2h%H at v&eE{ev&_{vdY=8JS<?`PE{kPwi
z%U6M}`(3$=^T*rZhavEhgDf!97+71otmYg{?Qs2W9SXqyPNe<I8WD at Z5A&e4v^EsM
z at BFXJWgRH1Zyc;|y{BR2higu;J!^Js?rwVz!dSj6{GI^Yhj8lHTi at 7Q-`ZQR#_QYT
z^|4vy(eT;Or-PwF0C|<mKm*sgG2bu2?>f|>8z=@`Uw#&TD%8h9AWrG^^7Q)a_1Vay
z%GvOzLyrW5Mtuyjfy_j*ah at z%`N8*q?*ZQfz6X2{_#W^*;CsOLfbRj{1GkI^^d_B`
zwNX->T48Wg+>?o+RuGs}={!~^`Nv=}7sG0Ph0AW8$JR<7=V2Hqof;`$C*aC{<lnzl
z&LcDv(z%4 at iS1#*;~W*kc`08ZSoQ%)alb2uRzcXiBqdVW3kvJU7=ru?tICqnd=~eI
zVpt^zTaft^J}-Hy7tU)jz-Cn*D1TTV<n=ix^<lw}^W<Nc&T{@CDVOH|Z-n-hyEEJ_
z+jmgX!;-r7_V)GNt+vAs=S^y7Cv5HQRJ&qbU9rxtKT_KdnrStWw4@;V$D5d4JEnDN
zF*cO5%xwF?O=>=8#z%(K{*k^dTRPyU_T{IhGg-5M33fxuF}0n?X0(`g6vDoJ{r#I5
zwl~i2vBU0-w*|VVJ7e3nGWLipFHseBFbF-RP|D>pxe0Y5m2wxHsa$bUH<`=j$1*uj
z-AaF#=}u=$6QG()PMLSB(h!rQekt<by!XKU>`w-u^F<taZ4P4pY;a&(V54>FxRJ^x
zi<6ACIAohs<HmF$Z<#48lh5HzME9f2XYjzm(V at Yktc+&2J8ZY`{n$s281X}g20-h;
z{zD*lm1h~2hwed<<_6y7U1{Z~GL|u3fLa^V`3$sa0qp2rkY$N9%Rf74a+G6(s9(WM
zW-}8x6jf`}W3o+&s*ZHp{5FmIZR+2ra8vNH$iGi<(8eLcAA2|ZV=s0DeqZ>{Q~w<a
zI|BsXA2$2%NaBT^soW{#mo~+J at 0AYmpQrv@#&K`>&r|LW|9R^8=0NjOqN=0M`18}f
zx8VHroc|pO`uMH+j)dRB at Y^6BZ{n^4wd?0(`{MVkMP2XjC~h17JC*<KQOqIyvHeyY
z+vzz?RE05s2R>f-ZR)qFzwWbmKm6Z+PsW8M$68*pvvLj at t>by=&@-H<mky)nyrf<_
zw1)k-T+4nt9n$sExn1V5_HFW_Jf4(!)xnq(=FYBx80@!(-+H#d5yoBzIwDu-`(f2E
zNDOX;z7u|dn$R~uXH4pFrf6p#wlkk2<~gwWTVgH(^Urpsp;-qAPG3UFke!*83=6M@
z%!6x#VHN4cKzIYv?*^2sQllG8w_pKp58~f at W5Dg!-3Z+g(jeL=;m{9&IV~BGHU&1;
zpFUCFqJ-)jSFNpY3N*voWKaXVIr915)uH;kSMGo}u+SX%ptGe35)5mMqsm>Ycdp(8
zE3=BmT-xltsK>TLR)Ij^zv}Cd0ECb6Ku4)q=*Z+!*;3ks-R2ofOM!$Htu!2HrA?v^
zENWJKzgnqXF{`YId{L>5lpM4U_?3VwK$kyRx~#O+99Gsujwnqvmm)_$qqLm8Dem4`
zWfcpCgCA2AB#|#Gfym0^3ak07vU$bpN^|6kp>Zt|TDwzejNGHF2c2*vAxt7Wh0Qpa
zbSdm5CA6aEvQqZ}<+g~Wtg#gJn~GASDNQ`xvJ&D{i^L);wIKQgR-0jQd at NanCH!}~
zM6X7pxFVcYqvNGq=1)r|XA8Mx7SC2Yo}4hFxLjTYdD<K+O at MejA05vZqVRe-o&lW+
z5Q(@MEheX?vu1R{%$bFxWvaZH)My?fYV<%CsGcxW&|td`9o{z#+ at zVw<x9orM6MJy
zXDl;Ugr#!GEM1zKPG<^e7nP0O0mqjmv*}Fncvputl`5EKYO-V6ER5f47IJ2`Z+e=^
zGtR_`W|>^ZVns7+Bnt(Qh}9<SB;FweJ*YLLFA;+GE9dmtg?J*`5l{VaWq*uQU#aYi
zapDn1`)8c^GUm1KBm^4?Xupb6e>og-r`sVO53v=M{V{f72v^?Z80f at nnd*txRmOX#
zem$e{)``PT301d4JRX9l2%RTr7lv39(>?W at EBlq4`Ynw1DLL`mnAg6U5bQRh^XQ!V
zFuT-sJH+E5*j*I!#9Kk#OTV()#F17Ac2!Lt^U|l?R}OXPRkZ8MjjwkCk_)kSFTrsv
z471<xeI<4w&Z8L&A at sZ^86IWfh4)IF-oZ(q at 8^Pa-1vzq`hQUc|9TbtN)>#e3cegG
z>`@;6 at U`8iD%wMcNqx5;^eXshmGt?^DjxZss-pjeD)`e?@b6c_e_jPISHW9 at P-!S9
zeIwycKtD*_?T2BByW5wa9OF^nPgc=?stW%7D)?(v at MhfT<dJVj6?{bE?)rXM;`A;`
zes~;kulwci01v{httVXL;ZqX7;=-SmIPKS@^nVCAJY6c|1~wChmpQ(498=(B6Z570
zpF*3lCcp!1>3A3e+^hWCxqd54=sHgV--j{mkobrT*Cl?$g%3#lm<zAww|lvM6YJTa
z^Ze`=uW}yc`b)>{F~D)w;eK9}xc<^{`wZaVoh9KP0arc7^QQoB1l&EIe+|<KE`6>&
zT_yd?G9CBx;zxk1OG?AoD}XlzSFs-HPg40U;<&?5)j0$#4~JlrUo-Rv`@{kT0g2n+
z=Mi7daf~^`-XPYul})<xU61;<@c~#KegFl)e2>X~ey7B at fdZ!E_FJ6iTgU8^2{ai5
z9P8`WKZyFlrSBo%;Q7kT>;w<~?a(xN{%@*ab!vI0$)#{3IhHYUu-_791|FDU!ZEiV
zxHg%$yl~X7Ceuo}Z05L`Jz3T8?G!YAI&W#Zs`1;%HFE}DYdxyLi_3)ucZ8LkSX9Gz
zVY^oZ at AQbkoHbt5^TIv6U^7_i0hz;++S!b%$XvCy893bm=2j+U6ik>a*z4=Wo%MFg
zob`0dmi7pz+`}JE1)jiCzc`ha_710<CjwBxviBgTj>}(8Ay2^RHzS=DPJ>Aso(+vo
zoX(h{D2v^wni}3&a5EOI5*((LVrjErPGpL(&)Bf0U~e?+9xuYd)9Ji1k<E`Kvqsv=
z7m7x*G{aJ`k34HyW*T;zdukZt(AYSIOP+*sTZNNMY-ENB;?zknv2g}=3x}%sN-$*<
zI~iQM at X`&JbkPFK!GrOA1IEDqemExE=s&tYzHg`xVAw5h3?!s9(SMK`dxsD7#)pjq
zgM)_#Mvc*U at 9+RvxX;nnbvi;Dj?FgAbka&PUU+f9_M#Y1*oLZtT%5uUjN`~|kQMXB
zBpk|}HJLGV0JPGXoKb?=zHK^-hk3iw^ln2)eymuOS at CJGVT|tUql%-?;2iwo$tf#2
z2GlADI!Q6UUmS)iKCr?xU4ywk%(^WY`ZyHt9g6a?24$1D4PHy1%t3mBS_P(|<b+wk
zsl9_SKnAyJ5)EW*ItzmvFOz1$_W-70^MfqU{i&IgvcV_QU`w2^5G^k(C=LlHr!w$3
zn7}a}E*j53BnEC_8g#%Z=vSg#W2H<sttBn1kQpmk&}lXP%>z2tI6P)VJUKR2Fi#K%
z-#<)>gU9U8s&^2_nIV6bmub*<3HpBw?inJzPg0ieg$VvF3_R|6=n3hP3SJeYa;5h|
z0=7fMZcpDi=>HI?q$nx9rx4r^k=NmGXDI&}ng0yD?_!|x!{eU^cm8nHAKw>D_UEPj
zd1-%6+Tq_rVBN`{{;uaRV5my=^nImW7L@)T2~>E%H0<F)VDN8&$ezB#JS6Su at 2sf&
zq(}5P*yG<85l`Q1&Pn^2%*<VWD#J%0f~sUs-*x6)_LRT7{trw0UYQ{Mebj=qPe>f?
z;VpWJ|IPx2KEd+wAm9JsrH=>qmkXJ!5W;m1es+6R+N;uDuV{*hgeiIUn9H8tKaWYp
zt+F3bGGtHuU%Bk*`_yACLZk9`IMTpf{wJh;vy{{OI8z1S)+e6mm!v(lAI)FF(tf8%
zrg9}a0-lBl`j71CyO^#DjW-3UT&evDn1 at I`{3>qRtpf)EKj}ko>l07(pFjuy&WY^l
zJ6p|7+P?(0*muaDzQ at u3O}UDdgFyX@@}+kC3CK`|?CHDRLI+ym+`}2jp6I`W*lrIK
z4`C2<5W&Ee?1=svFuOg?>tfQrTK<&a?`4`)ewz2_(!QM(C8c}`egh(yzf>ofk^QL(
zA|j}ZfoKRKIDS*0?~n5`!Amp%O9glT!*wG{i_$>)$$f&u^&ER}v20@%`!DPlR`=VD
zA?~*43s5c{WEZ6UT`mI`%l63p-TqrOA`<O!84zoybnn at VG4g)F&C5TYQ-K|YAGIGn
me^5+q57A0!{Gf;)p at CaUC|^=+f{2&><T0UfkITTtvi||LxjNba

literal 0
HcmV?d00001

diff --git a/llvm/test/tools/llvm-profgen/Inputs/cmov_3.perfscript b/llvm/test/tools/llvm-profgen/Inputs/cmov_3.perfscript
new file mode 100644
index 0000000000000..3d29d444d56bb
--- /dev/null
+++ b/llvm/test/tools/llvm-profgen/Inputs/cmov_3.perfscript
@@ -0,0 +1,39 @@
+  br_inst_retired.near_taken:upp:            401310 0x401310/0x4012f0/P/-/-/22//-  0x4012fa/0x4012ff/M/-/-/3//-  0x401310/0x4012f0/P/-/-/1//-  0x4012fa/0x4012ff/P/-/-/1//-  0x401310/0x4012f0/P/-/-/3//-  0x401310/0x4012f0/P/-/-/26//-  0x401310/0x4012f0/P/-/-/1//-  0x4012fa/0x4012ff/P/-/-/1//-  0x401310/0x4012f0/P/-/-/1//-  0x4012fa/0x4012ff/P/-/-/1//-  0x401310/0x4012f0/P/-/-/26//-  0x401310/0x4012f0/P/-/-/26//-  0x401310/0x4012f0/P/-/-/27//-  0x401310/0x4012f0/P/-/-/1//-  0x4012fa/0x4012ff/P/-/-/1//-  0x401310/0x4012f0/P/-/-/5//-  0x401310/0x4012f0/P/-/-/1//-  0x4012fa/0x4012ff/P/-/-/1//-  0x401310/0x4012f0/P/-/-/2//-  0x401310/0x4012f0/P/-/-/5//-  0x401310/0x4012f0/P/-/-/22//-  0x4012fa/0x4012ff/M/-/-/4//-  0x401310/0x4012f0/P/-/-/22//-  0x4012fa/0x4012ff/M/-/-/1//-  0x401310/0x4012f0/P/-/-/3//-  0x401310/0x4012f0/P/-/-/2//-  0x401310/0x4012f0/P/-/-/5//-  0x401310/0x4012f0/P/-/-/22//-  0x4012fa/0x4012ff/M/-/-/4//-  0x401310/0x4012f0/P/-/-/21//-  0x4012fa/0x4012ff/M/-/-/2//-  0x401310/0x4012f0/P/-/-/3//- 
+  br_inst_retired.near_taken:upp:            401310 0x401310/0x4012f0/P/-/-/22//-  0x4012fa/0x4012ff/M/-/-/3//-  0x401310/0x4012f0/P/-/-/28//-  0x401310/0x4012f0/P/-/-/21//-  0x4012fa/0x4012ff/M/-/-/4//-  0x401310/0x4012f0/P/-/-/2//-  0x401310/0x4012f0/P/-/-/4//-  0x401310/0x4012f0/P/-/-/1//-  0x4012fa/0x4012ff/P/-/-/1//-  0x401310/0x4012f0/P/-/-/2//-  0x401310/0x4012f0/P/-/-/4//-  0x401310/0x4012f0/P/-/-/26//-  0x401310/0x4012f0/P/-/-/1//-  0x4012fa/0x4012ff/P/-/-/2//-  0x401310/0x4012f0/P/-/-/25//-  0x401310/0x4012f0/P/-/-/1//-  0x4012fa/0x4012ff/P/-/-/3//-  0x401310/0x4012f0/P/-/-/23//-  0x4012fa/0x4012ff/M/-/-/1//-  0x401310/0x4012f0/P/-/-/4//-  0x401310/0x4012f0/P/-/-/24//-  0x401310/0x4012f0/P/-/-/3//-  0x401310/0x4012f0/P/-/-/1//-  0x4012fa/0x4012ff/P/-/-/1//-  0x401310/0x4012f0/P/-/-/4//-  0x401310/0x4012f0/P/-/-/26//-  0x401310/0x4012f0/P/-/-/1//-  0x4012fa/0x4012ff/P/-/-/2//-  0x401310/0x4012f0/P/-/-/1//-  0x4012fa/0x4012ff/P/-/-/2//-  0x401310/0x4012f0/P/-/-/1//-  0x4012fa/0x4012ff/P/-/-/1//- 
+  br_inst_retired.near_taken:upp:            401310 0x401310/0x4012f0/P/-/-/21//-  0x4012fa/0x4012ff/M/-/-/3//-  0x401310/0x4012f0/P/-/-/1//-  0x4012fa/0x4012ff/P/-/-/1//-  0x401310/0x4012f0/P/-/-/1//-  0x4012fa/0x4012ff/P/-/-/2//-  0x401310/0x4012f0/P/-/-/1//-  0x4012fa/0x4012ff/P/-/-/3//-  0x401310/0x4012f0/P/-/-/22//-  0x4012fa/0x4012ff/M/-/-/2//-  0x401310/0x4012f0/P/-/-/27//-  0x401310/0x4012f0/P/-/-/1//-  0x4012fa/0x4012ff/P/-/-/2//-  0x401310/0x4012f0/P/-/-/1//-  0x4012fa/0x4012ff/P/-/-/2//-  0x401310/0x4012f0/P/-/-/1//-  0x4012fa/0x4012ff/P/-/-/2//-  0x401310/0x4012f0/P/-/-/1//-  0x4012fa/0x4012ff/P/-/-/1//-  0x401310/0x4012f0/P/-/-/1//-  0x4012fa/0x4012ff/P/-/-/1//-  0x401310/0x4012f0/P/-/-/26//-  0x401310/0x4012f0/P/-/-/26//-  0x401310/0x4012f0/P/-/-/1//-  0x4012fa/0x4012ff/P/-/-/3//-  0x401310/0x4012f0/P/-/-/22//-  0x4012fa/0x4012ff/M/-/-/1//-  0x401310/0x4012f0/P/-/-/29//-  0x401310/0x4012f0/P/-/-/1//-  0x4012fa/0x4012ff/P/-/-/3//-  0x401310/0x4012f0/P/-/-/22//-  0x4012fa/0x4012ff/M/-/-/2//- 
+br_misp_retired.all_branches:upp:            4012fa 0x4012fa/0x4012ff/M/-/-/3//-  0x401310/0x4012f0/P/-/-/26//-  0x401310/0x4012f0/P/-/-/27//-  0x401310/0x4012f0/P/-/-/24//-  0x4012fa/0x4012ff/M/-/-/1//-  0x401310/0x4012f0/P/-/-/4//-  0x401310/0x4012f0/P/-/-/1//-  0x4012fa/0x4012ff/P/-/-/1//-  0x401310/0x4012f0/P/-/-/1//-  0x4012fa/0x4012ff/P/-/-/2//-  0x401310/0x4012f0/P/-/-/25//-  0x401310/0x4012f0/P/-/-/1//-  0x4012fa/0x4012ff/P/-/-/2//-  0x401310/0x4012f0/P/-/-/25//-  0x401310/0x4012f0/P/-/-/1//-  0x4012fa/0x4012ff/P/-/-/2//-  0x401310/0x4012f0/P/-/-/27//-  0x401310/0x4012f0/P/-/-/22//-  0x4012fa/0x4012ff/M/-/-/3//-  0x401310/0x4012f0/P/-/-/26//-  0x401310/0x4012f0/P/-/-/1//-  0x4012fa/0x4012ff/P/-/-/2//-  0x401310/0x4012f0/P/-/-/25//-  0x401310/0x4012f0/P/-/-/1//-  0x4012fa/0x4012ff/P/-/-/1//-  0x401310/0x4012f0/P/-/-/1//-  0x4012fa/0x4012ff/P/-/-/2//-  0x401310/0x4012f0/P/-/-/27//-  0x401310/0x4012f0/P/-/-/1//-  0x4012fa/0x4012ff/P/-/-/1//-  0x401310/0x4012f0/P/-/-/2//-  0x401310/0x4012f0/P/-/-/4//- 
+  br_inst_retired.near_taken:upp:            401310 0x401310/0x4012f0/P/-/-/22//-  0x4012fa/0x4012ff/M/-/-/1//-  0x401310/0x4012f0/P/-/-/5//-  0x401310/0x4012f0/P/-/-/22//-  0x4012fa/0x4012ff/M/-/-/3//-  0x401310/0x4012f0/P/-/-/26//-  0x401310/0x4012f0/P/-/-/26//-  0x401310/0x4012f0/P/-/-/1//-  0x4012fa/0x4012ff/P/-/-/1//-  0x401310/0x4012f0/P/-/-/5//-  0x401310/0x4012f0/P/-/-/21//-  0x4012fa/0x4012ff/M/-/-/4//-  0x401310/0x4012f0/P/-/-/1//-  0x4012fa/0x4012ff/P/-/-/2//-  0x401310/0x4012f0/P/-/-/1//-  0x4012fa/0x4012ff/P/-/-/2//-  0x401310/0x4012f0/P/-/-/25//-  0x401310/0x4012f0/P/-/-/1//-  0x4012fa/0x4012ff/P/-/-/1//-  0x401310/0x4012f0/P/-/-/1//-  0x4012fa/0x4012ff/P/-/-/3//-  0x401310/0x4012f0/P/-/-/22//-  0x4012fa/0x4012ff/M/-/-/3//-  0x401310/0x4012f0/P/-/-/1//-  0x4012fa/0x4012ff/P/-/-/1//-  0x401310/0x4012f0/P/-/-/1//-  0x4012fa/0x4012ff/P/-/-/2//-  0x401310/0x4012f0/P/-/-/27//-  0x401310/0x4012f0/P/-/-/27//-  0x401310/0x4012f0/P/-/-/22//-  0x4012fa/0x4012ff/M/-/-/4//-  0x401310/0x4012f0/P/-/-/22//- 
+  br_inst_retired.near_taken:upp:            401310 0x401310/0x4012f0/P/-/-/22//-  0x4012fa/0x4012ff/M/-/-/2//-  0x401310/0x4012f0/P/-/-/2//-  0x4012fa/0x4012ff/P/-/-/3//-  0x401310/0x4012f0/P/-/-/23//-  0x4012fa/0x4012ff/M/-/-/1//-  0x401310/0x4012f0/P/-/-/3//-  0x401310/0x4012f0/P/-/-/4//-  0x401310/0x4012f0/P/-/-/1//-  0x4012fa/0x4012ff/P/-/-/3//-  0x401310/0x4012f0/P/-/-/21//-  0x4012fa/0x4012ff/M/-/-/3//-  0x401310/0x4012f0/P/-/-/1//-  0x4012fa/0x4012ff/P/-/-/1//-  0x401310/0x4012f0/P/-/-/4//-  0x401310/0x4012f0/P/-/-/1//-  0x4012fa/0x4012ff/P/-/-/1//-  0x401310/0x4012f0/P/-/-/26//-  0x401310/0x4012f0/P/-/-/25//-  0x401310/0x4012f0/P/-/-/3//-  0x401310/0x4012f0/P/-/-/25//-  0x401310/0x4012f0/P/-/-/3//-  0x401310/0x4012f0/P/-/-/26//-  0x401310/0x4012f0/P/-/-/5//-  0x401310/0x4012f0/P/-/-/22//-  0x4012fa/0x4012ff/M/-/-/4//-  0x401310/0x4012f0/P/-/-/21//-  0x4012fa/0x4012ff/M/-/-/3//-  0x401310/0x4012f0/P/-/-/1//-  0x4012fa/0x4012ff/P/-/-/1//-  0x401310/0x4012f0/P/-/-/25//-  0x401310/0x4012f0/P/-/-/3//- 
+  br_inst_retired.near_taken:upp:            401310 0x401310/0x4012f0/P/-/-/26//-  0x401310/0x4012f0/P/-/-/2//-  0x401310/0x4012f0/P/-/-/5//-  0x401310/0x4012f0/P/-/-/22//-  0x4012fa/0x4012ff/M/-/-/1//-  0x401310/0x4012f0/P/-/-/1//-  0x4012fa/0x4012ff/P/-/-/1//-  0x401310/0x4012f0/P/-/-/5//-  0x401310/0x4012f0/P/-/-/23//-  0x4012fa/0x4012ff/M/-/-/2//-  0x401310/0x4012f0/P/-/-/1//-  0x4012fa/0x4012ff/P/-/-/1//-  0x401310/0x4012f0/P/-/-/28//-  0x401310/0x4012f0/P/-/-/22//-  0x4012fa/0x4012ff/M/-/-/4//-  0x401310/0x4012f0/P/-/-/23//-  0x4012fa/0x4012ff/M/-/-/4//-  0x401310/0x4012f0/P/-/-/22//-  0x4012fa/0x4012ff/M/-/-/1//-  0x401310/0x4012f0/P/-/-/5//-  0x401310/0x4012f0/P/-/-/22//-  0x4012fa/0x4012ff/M/-/-/4//-  0x401310/0x4012f0/P/-/-/23//-  0x4012fa/0x4012ff/M/-/-/1//-  0x401310/0x4012f0/P/-/-/5//-  0x401310/0x4012f0/P/-/-/22//-  0x4012fa/0x4012ff/M/-/-/2//-  0x401310/0x4012f0/P/-/-/1//-  0x4012fa/0x4012ff/P/-/-/1//-  0x401310/0x4012f0/P/-/-/25//-  0x401310/0x4012f0/P/-/-/5//-  0x401310/0x4012f0/P/-/-/22//- 
+br_misp_retired.all_branches:upp:            4012fa 0x4012fa/0x4012ff/M/-/-/2//-  0x401310/0x4012f0/P/-/-/28//-  0x401310/0x4012f0/P/-/-/24//-  0x4012fa/0x4012ff/M/-/-/1//-  0x401310/0x4012f0/P/-/-/5//-  0x401310/0x4012f0/P/-/-/22//-  0x4012fa/0x4012ff/M/-/-/2//-  0x401310/0x4012f0/P/-/-/1//-  0x4012fa/0x4012ff/P/-/-/1//-  0x401310/0x4012f0/P/-/-/5//-  0x401310/0x4012f0/P/-/-/22//-  0x4012fa/0x4012ff/M/-/-/3//-  0x401310/0x4012f0/P/-/-/1//-  0x4012fa/0x4012ff/P/-/-/3//-  0x401310/0x4012f0/P/-/-/21//-  0x4012fa/0x4012ff/M/-/-/2//-  0x401310/0x4012f0/P/-/-/3//-  0x401310/0x4012f0/P/-/-/3//-  0x401310/0x4012f0/P/-/-/1//-  0x4012fa/0x4012ff/P/-/-/1//-  0x401310/0x4012f0/P/-/-/4//-  0x401310/0x4012f0/P/-/-/26//-  0x401310/0x4012f0/P/-/-/26//-  0x401310/0x4012f0/P/-/-/26//-  0x401310/0x4012f0/P/-/-/1//-  0x4012fa/0x4012ff/P/-/-/1//-  0x401310/0x4012f0/P/-/-/26//-  0x401310/0x4012f0/P/-/-/26//-  0x401310/0x4012f0/P/-/-/25//-  0x401310/0x4012f0/P/-/-/5//-  0x401310/0x4012f0/P/-/-/1//-  0x4012fa/0x4012ff/P/-/-/1//- 
+  br_inst_retired.near_taken:upp:            401310 0x401310/0x4012f0/P/-/-/22//-  0x4012fa/0x4012ff/M/-/-/3//-  0x401310/0x4012f0/P/-/-/26//-  0x401310/0x4012f0/P/-/-/26//-  0x401310/0x4012f0/P/-/-/1//-  0x4012fa/0x4012ff/P/-/-/2//-  0x401310/0x4012f0/P/-/-/1//-  0x4012fa/0x4012ff/P/-/-/2//-  0x401310/0x4012f0/P/-/-/1//-  0x4012fa/0x4012ff/P/-/-/1//-  0x401310/0x4012f0/P/-/-/1//-  0x4012fa/0x4012ff/P/-/-/3//-  0x401310/0x4012f0/P/-/-/22//-  0x4012fa/0x4012ff/M/-/-/3//-  0x401310/0x4012f0/P/-/-/23//-  0x4012fa/0x4012ff/M/-/-/3//-  0x401310/0x4012f0/P/-/-/1//-  0x4012fa/0x4012ff/P/-/-/1//-  0x401310/0x4012f0/P/-/-/1//-  0x4012fa/0x4012ff/P/-/-/3//-  0x401310/0x4012f0/P/-/-/22//-  0x4012fa/0x4012ff/M/-/-/1//-  0x401310/0x4012f0/P/-/-/2//-  0x401310/0x4012f0/P/-/-/3//-  0x401310/0x4012f0/P/-/-/2//-  0x401310/0x4012f0/P/-/-/4//-  0x401310/0x4012f0/P/-/-/26//-  0x401310/0x4012f0/P/-/-/26//-  0x401310/0x4012f0/P/-/-/24//-  0x401310/0x4012f0/P/-/-/5//-  0x401310/0x4012f0/P/-/-/22//-  0x4012fa/0x4012ff/M/-/-/4//- 
+  br_inst_retired.near_taken:upp:            401310 0x401310/0x4012f0/P/-/-/25//-  0x401310/0x4012f0/P/-/-/1//-  0x4012fa/0x4012ff/P/-/-/2//-  0x401310/0x4012f0/P/-/-/27//-  0x401310/0x4012f0/P/-/-/24//-  0x4012fa/0x4012ff/M/-/-/1//-  0x401310/0x4012f0/P/-/-/1//-  0x4012fa/0x4012ff/P/-/-/2//-  0x401310/0x4012f0/P/-/-/26//-  0x401310/0x4012f0/P/-/-/27//-  0x401310/0x4012f0/P/-/-/1//-  0x4012fa/0x4012ff/P/-/-/1//-  0x401310/0x4012f0/P/-/-/2//-  0x401310/0x4012f0/P/-/-/2//-  0x401310/0x4012f0/P/-/-/5//-  0x401310/0x4012f0/P/-/-/22//-  0x4012fa/0x4012ff/M/-/-/3//-  0x401310/0x4012f0/P/-/-/26//-  0x401310/0x4012f0/P/-/-/27//-  0x401310/0x4012f0/P/-/-/22//-  0x4012fa/0x4012ff/M/-/-/3//-  0x401310/0x4012f0/P/-/-/27//-  0x401310/0x4012f0/P/-/-/22//-  0x4012fa/0x4012ff/M/-/-/1//-  0x401310/0x4012f0/P/-/-/5//-  0x401310/0x4012f0/P/-/-/23//-  0x4012fa/0x4012ff/M/-/-/4//-  0x401310/0x4012f0/P/-/-/22//-  0x4012fa/0x4012ff/M/-/-/4//-  0x401310/0x4012f0/P/-/-/22//-  0x4012fa/0x4012ff/M/-/-/3//-  0x401310/0x4012f0/P/-/-/26//- 
+  br_inst_retired.near_taken:upp:            401310 0x401310/0x4012f0/P/-/-/23//-  0x4012fa/0x4012ff/M/-/-/2//-  0x401310/0x4012f0/P/-/-/27//-  0x401310/0x4012f0/P/-/-/1//-  0x4012fa/0x4012ff/P/-/-/1//-  0x401310/0x4012f0/P/-/-/25//-  0x401310/0x4012f0/P/-/-/3//-  0x401310/0x4012f0/P/-/-/6//-  0x401310/0x4012f0/P/-/-/1//-  0x4012fa/0x4012ff/P/-/-/1//-  0x401310/0x4012f0/P/-/-/1//-  0x4012fa/0x4012ff/P/-/-/1//-  0x401310/0x4012f0/P/-/-/21//-  0x401310/0x4012f0/P/-/-/2//-  0x401310/0x4012f0/P/-/-/4//-  0x401310/0x4012f0/P/-/-/1//-  0x4012fa/0x4012ff/P/-/-/1//-  0x401310/0x4012f0/P/-/-/1//-  0x4012fa/0x4012ff/P/-/-/1//-  0x401310/0x4012f0/P/-/-/14//-  0x401310/0x4012f0/P/-/-/22//-  0x4012fa/0x4012ff/M/-/-/1//-  0x401310/0x4012f0/P/-/-/3//-  0x401310/0x4012f0/P/-/-/1//-  0x4012fa/0x4012ff/P/-/-/1//-  0x401310/0x4012f0/P/-/-/2//-  0x401310/0x4012f0/P/-/-/5//-  0x401310/0x4012f0/P/-/-/22//-  0x4012fa/0x4012ff/M/-/-/4//-  0x401310/0x4012f0/P/-/-/21//-  0x4012fa/0x4012ff/M/-/-/3//-  0x401310/0x4012f0/P/-/-/2//- 
+br_misp_retired.all_branches:upp:            4012fa 0x401310/0x4012f0/P/-/-/27//-  0x401310/0x4012f0/P/-/-/1//-  0x4012fa/0x4012ff/P/-/-/3//-  0x401310/0x4012f0/P/-/-/22//-  0x4012fa/0x4012ff/M/-/-/2//-  0x401310/0x4012f0/P/-/-/1//-  0x4012fa/0x4012ff/P/-/-/1//-  0x401310/0x4012f0/P/-/-/27//-  0x401310/0x4012f0/P/-/-/28//-  0x401310/0x4012f0/P/-/-/22//-  0x4012fa/0x4012ff/M/-/-/2//-  0x401310/0x4012f0/P/-/-/1//-  0x4012fa/0x4012ff/P/-/-/3//-  0x401310/0x4012f0/P/-/-/23//-  0x4012fa/0x4012ff/M/-/-/2//-  0x401310/0x4012f0/P/-/-/26//-  0x401310/0x4012f0/P/-/-/1//-  0x4012fa/0x4012ff/P/-/-/1//-  0x401310/0x4012f0/P/-/-/2//-  0x401310/0x4012f0/P/-/-/5//-  0x401310/0x4012f0/P/-/-/22//-  0x4012fa/0x4012ff/M/-/-/1//-  0x401310/0x4012f0/P/-/-/1//-  0x4012fa/0x4012ff/P/-/-/1//-  0x401310/0x4012f0/P/-/-/4//-  0x401310/0x4012f0/P/-/-/28//-  0x401310/0x4012f0/P/-/-/22//-  0x4012fa/0x4012ff/M/-/-/2//-  0x401310/0x4012f0/P/-/-/26//-  0x401310/0x4012f0/P/-/-/2//-  0x401310/0x4012f0/P/-/-/3//-  0x401310/0x4012f0/P/-/-/26//- 
+  br_inst_retired.near_taken:upp:            401310 0x401310/0x4012f0/P/-/-/23//-  0x4012fa/0x4012ff/M/-/-/3//-  0x401310/0x4012f0/P/-/-/1//-  0x4012fa/0x4012ff/P/-/-/2//-  0x401310/0x4012f0/P/-/-/2//-  0x401310/0x4012f0/P/-/-/1//-  0x4012fa/0x4012ff/P/-/-/2//-  0x401310/0x4012f0/P/-/-/27//-  0x401310/0x4012f0/P/-/-/23//-  0x4012fa/0x4012ff/M/-/-/1//-  0x401310/0x4012f0/P/-/-/1//-  0x4012fa/0x4012ff/P/-/-/2//-  0x401310/0x4012f0/P/-/-/26//-  0x401310/0x4012f0/P/-/-/2//-  0x401310/0x4012f0/P/-/-/1//-  0x4012fa/0x4012ff/P/-/-/2//-  0x401310/0x4012f0/P/-/-/25//-  0x401310/0x4012f0/P/-/-/1//-  0x4012fa/0x4012ff/P/-/-/1//-  0x401310/0x4012f0/P/-/-/26//-  0x401310/0x4012f0/P/-/-/1//-  0x4012fa/0x4012ff/P/-/-/1//-  0x401310/0x4012f0/P/-/-/1//-  0x4012fa/0x4012ff/P/-/-/3//-  0x401310/0x4012f0/P/-/-/23//-  0x4012fa/0x4012ff/M/-/-/4//-  0x401310/0x4012f0/P/-/-/21//-  0x4012fa/0x4012ff/M/-/-/5//-  0x401310/0x4012f0/P/-/-/1//-  0x4012fa/0x4012ff/P/-/-/2//-  0x401310/0x4012f0/P/-/-/24//-  0x401310/0x4012f0/P/-/-/2//- 
+  br_inst_retired.near_taken:upp:            401310 0x401310/0x4012f0/P/-/-/27//-  0x401310/0x4012f0/P/-/-/1//-  0x4012fa/0x4012ff/P/-/-/3//-  0x401310/0x4012f0/P/-/-/22//-  0x4012fa/0x4012ff/M/-/-/2//-  0x401310/0x4012f0/P/-/-/28//-  0x401310/0x4012f0/P/-/-/22//-  0x4012fa/0x4012ff/M/-/-/4//-  0x401310/0x4012f0/P/-/-/22//-  0x4012fa/0x4012ff/M/-/-/2//-  0x401310/0x4012f0/P/-/-/2//-  0x401310/0x4012f0/P/-/-/5//-  0x401310/0x4012f0/P/-/-/22//-  0x4012fa/0x4012ff/M/-/-/4//-  0x401310/0x4012f0/P/-/-/22//-  0x4012fa/0x4012ff/M/-/-/2//-  0x401310/0x4012f0/P/-/-/28//-  0x401310/0x4012f0/P/-/-/22//-  0x4012fa/0x4012ff/M/-/-/2//-  0x401310/0x4012f0/P/-/-/1//-  0x4012fa/0x4012ff/P/-/-/1//-  0x401310/0x4012f0/P/-/-/27//-  0x401310/0x4012f0/P/-/-/1//-  0x4012fa/0x4012ff/P/-/-/1//-  0x401310/0x4012f0/P/-/-/1//-  0x4012fa/0x4012ff/P/-/-/3//-  0x401310/0x4012f0/P/-/-/22//-  0x4012fa/0x4012ff/M/-/-/2//-  0x401310/0x4012f0/P/-/-/1//-  0x4012fa/0x4012ff/P/-/-/1//-  0x401310/0x4012f0/P/-/-/1//-  0x4012fa/0x4012ff/P/-/-/1//- 
+  br_inst_retired.near_taken:upp:            401310 0x401310/0x4012f0/P/-/-/22//-  0x4012fa/0x4012ff/M/-/-/4//-  0x401310/0x4012f0/P/-/-/22//-  0x4012fa/0x4012ff/M/-/-/2//-  0x401310/0x4012f0/P/-/-/1//-  0x4012fa/0x4012ff/P/-/-/1//-  0x401310/0x4012f0/P/-/-/28//-  0x401310/0x4012f0/P/-/-/22//-  0x4012fa/0x4012ff/M/-/-/1//-  0x401310/0x4012f0/P/-/-/5//-  0x401310/0x4012f0/P/-/-/22//-  0x4012fa/0x4012ff/M/-/-/4//-  0x401310/0x4012f0/P/-/-/23//-  0x4012fa/0x4012ff/M/-/-/2//-  0x401310/0x4012f0/P/-/-/28//-  0x401310/0x4012f0/P/-/-/22//-  0x4012fa/0x4012ff/M/-/-/2//-  0x401310/0x4012f0/P/-/-/25//-  0x401310/0x4012f0/P/-/-/4//-  0x401310/0x4012f0/P/-/-/25//-  0x401310/0x4012f0/P/-/-/4//-  0x401310/0x4012f0/P/-/-/1//-  0x4012fa/0x4012ff/P/-/-/1//-  0x401310/0x4012f0/P/-/-/1//-  0x4012fa/0x4012ff/P/-/-/1//-  0x401310/0x4012f0/P/-/-/3//-  0x401310/0x4012f0/P/-/-/25//-  0x401310/0x4012f0/P/-/-/5//-  0x401310/0x4012f0/P/-/-/22//-  0x4012fa/0x4012ff/M/-/-/4//-  0x401310/0x4012f0/P/-/-/22//-  0x4012fa/0x4012ff/M/-/-/4//- 
+br_misp_retired.all_branches:upp:            4012fa 0x401310/0x4012f0/P/-/-/3//-  0x401310/0x4012f0/P/-/-/27//-  0x401310/0x4012f0/P/-/-/2//-  0x4012fa/0x4012ff/P/-/-/1//-  0x401310/0x4012f0/P/-/-/5//-  0x401310/0x4012f0/P/-/-/22//-  0x4012fa/0x4012ff/M/-/-/2//-  0x401310/0x4012f0/P/-/-/1//-  0x4012fa/0x4012ff/P/-/-/3//-  0x401310/0x4012f0/P/-/-/22//-  0x4012fa/0x4012ff/M/-/-/4//-  0x401310/0x4012f0/P/-/-/22//-  0x4012fa/0x4012ff/M/-/-/1//-  0x401310/0x4012f0/P/-/-/4//-  0x401310/0x4012f0/P/-/-/2//-  0x4012fa/0x4012ff/P/-/-/2//-  0x401310/0x4012f0/P/-/-/1//-  0x4012fa/0x4012ff/P/-/-/2//-  0x401310/0x4012f0/P/-/-/1//-  0x4012fa/0x4012ff/P/-/-/1//-  0x401310/0x4012f0/P/-/-/5//-  0x401310/0x4012f0/P/-/-/21//-  0x4012fa/0x4012ff/M/-/-/3//-  0x401310/0x4012f0/P/-/-/1//-  0x4012fa/0x4012ff/P/-/-/2//-  0x401310/0x4012f0/P/-/-/1//-  0x4012fa/0x4012ff/P/-/-/2//-  0x401310/0x4012f0/P/-/-/1//-  0x4012fa/0x4012ff/P/-/-/1//-  0x401310/0x4012f0/P/-/-/5//-  0x401310/0x4012f0/P/-/-/22//-  0x4012fa/0x4012ff/M/-/-/1//- 
+  br_inst_retired.near_taken:upp:            401310 0x401310/0x4012f0/P/-/-/25//-  0x401310/0x4012f0/P/-/-/4//-  0x401310/0x4012f0/P/-/-/1//-  0x4012fa/0x4012ff/P/-/-/2//-  0x401310/0x4012f0/P/-/-/1//-  0x4012fa/0x4012ff/P/-/-/2//-  0x401310/0x4012f0/P/-/-/1//-  0x4012fa/0x4012ff/P/-/-/2//-  0x401310/0x4012f0/P/-/-/1//-  0x4012fa/0x4012ff/P/-/-/1//-  0x401310/0x4012f0/P/-/-/25//-  0x401310/0x4012f0/P/-/-/1//-  0x4012fa/0x4012ff/P/-/-/1//-  0x401310/0x4012f0/P/-/-/5//-  0x401310/0x4012f0/P/-/-/22//-  0x4012fa/0x4012ff/M/-/-/1//-  0x401310/0x4012f0/P/-/-/4//-  0x401310/0x4012f0/P/-/-/27//-  0x401310/0x4012f0/P/-/-/22//-  0x4012fa/0x4012ff/M/-/-/4//-  0x401310/0x4012f0/P/-/-/22//-  0x4012fa/0x4012ff/M/-/-/3//-  0x401310/0x4012f0/P/-/-/26//-  0x401310/0x4012f0/P/-/-/4//-  0x401310/0x4012f0/P/-/-/26//-  0x401310/0x4012f0/P/-/-/1//-  0x4012fa/0x4012ff/P/-/-/2//-  0x401310/0x4012f0/P/-/-/1//-  0x4012fa/0x4012ff/P/-/-/3//-  0x401310/0x4012f0/P/-/-/22//-  0x4012fa/0x4012ff/M/-/-/3//-  0x401310/0x4012f0/P/-/-/27//- 
+  br_inst_retired.near_taken:upp:            401310 0x401310/0x4012f0/P/-/-/26//-  0x401310/0x4012f0/P/-/-/2//-  0x401310/0x4012f0/P/-/-/3//-  0x401310/0x4012f0/P/-/-/26//-  0x401310/0x4012f0/P/-/-/1//-  0x4012fa/0x4012ff/P/-/-/1//-  0x401310/0x4012f0/P/-/-/29//-  0x401310/0x4012f0/P/-/-/24//-  0x4012fa/0x4012ff/M/-/-/1//-  0x401310/0x4012f0/P/-/-/3//-  0x401310/0x4012f0/P/-/-/25//-  0x401310/0x4012f0/P/-/-/1//-  0x4012fa/0x4012ff/P/-/-/2//-  0x401310/0x4012f0/P/-/-/1//-  0x4012fa/0x4012ff/P/-/-/19//-  0x401310/0x4012f0/P/-/-/5//-  0x401310/0x4012f0/P/-/-/21//-  0x4012fa/0x4012ff/M/-/-/2//-  0x401310/0x4012f0/P/-/-/25//-  0x401310/0x4012f0/P/-/-/4//-  0x401310/0x4012f0/P/-/-/1//-  0x4012fa/0x4012ff/P/-/-/1//-  0x401310/0x4012f0/P/-/-/1//-  0x4012fa/0x4012ff/P/-/-/1//-  0x401310/0x4012f0/P/-/-/3//-  0x401310/0x4012f0/P/-/-/25//-  0x401310/0x4012f0/P/-/-/1//-  0x4012fa/0x4012ff/P/-/-/1//-  0x401310/0x4012f0/P/-/-/3//-  0x401310/0x4012f0/P/-/-/28//-  0x401310/0x4012f0/P/-/-/3//-  0x401310/0x4012f0/P/-/-/26//- 
+  br_inst_retired.near_taken:upp:            401310 0x401310/0x4012f0/P/-/-/23//-  0x4012fa/0x4012ff/M/-/-/1//-  0x401310/0x4012f0/P/-/-/1//-  0x4012fa/0x4012ff/P/-/-/1//-  0x401310/0x4012f0/P/-/-/5//-  0x401310/0x4012f0/P/-/-/22//-  0x4012fa/0x4012ff/M/-/-/3//-  0x401310/0x4012f0/P/-/-/1//-  0x4012fa/0x4012ff/P/-/-/3//-  0x401310/0x4012f0/P/-/-/22//-  0x4012fa/0x4012ff/M/-/-/1//-  0x401310/0x4012f0/P/-/-/5//-  0x401310/0x4012f0/P/-/-/22//-  0x4012fa/0x4012ff/M/-/-/3//-  0x401310/0x4012f0/P/-/-/26//-  0x401310/0x4012f0/P/-/-/26//-  0x401310/0x4012f0/P/-/-/27//-  0x401310/0x4012f0/P/-/-/22//-  0x4012fa/0x4012ff/M/-/-/4//-  0x401310/0x4012f0/P/-/-/23//-  0x4012fa/0x4012ff/M/-/-/1//-  0x401310/0x4012f0/P/-/-/1//-  0x4012fa/0x4012ff/P/-/-/2//-  0x401310/0x4012f0/P/-/-/26//-  0x401310/0x4012f0/P/-/-/25//-  0x401310/0x4012f0/P/-/-/1//-  0x4012fa/0x4012ff/P/-/-/2//-  0x401310/0x4012f0/P/-/-/27//-  0x401310/0x4012f0/P/-/-/27//-  0x401310/0x4012f0/P/-/-/23//-  0x4012fa/0x4012ff/M/-/-/1//-  0x401310/0x4012f0/P/-/-/1//- 
+  br_inst_retired.near_taken:upp:            401310 0x401310/0x4012f0/P/-/-/22//-  0x4012fa/0x4012ff/M/-/-/1//-  0x401310/0x4012f0/P/-/-/5//-  0x401310/0x4012f0/P/-/-/23//-  0x4012fa/0x4012ff/M/-/-/2//-  0x401310/0x4012f0/P/-/-/1//-  0x4012fa/0x4012ff/P/-/-/3//-  0x401310/0x4012f0/P/-/-/22//-  0x4012fa/0x4012ff/M/-/-/1//-  0x401310/0x4012f0/P/-/-/29//-  0x401310/0x4012f0/P/-/-/23//-  0x4012fa/0x4012ff/M/-/-/2//-  0x401310/0x4012f0/P/-/-/1//-  0x4012fa/0x4012ff/P/-/-/1//-  0x401310/0x4012f0/P/-/-/26//-  0x401310/0x4012f0/P/-/-/4//-  0x401310/0x4012f0/P/-/-/1//-  0x4012fa/0x4012ff/P/-/-/1//-  0x401310/0x4012f0/P/-/-/3//-  0x401310/0x4012f0/P/-/-/1//-  0x4012fa/0x4012ff/P/-/-/3//-  0x401310/0x4012f0/P/-/-/22//-  0x4012fa/0x4012ff/M/-/-/2//-  0x401310/0x4012f0/P/-/-/1//-  0x4012fa/0x4012ff/P/-/-/1//-  0x401310/0x4012f0/P/-/-/26//-  0x401310/0x4012f0/P/-/-/4//-  0x401310/0x4012f0/P/-/-/1//-  0x4012fa/0x4012ff/P/-/-/3//-  0x401310/0x4012f0/P/-/-/23//-  0x4012fa/0x4012ff/M/-/-/1//-  0x401310/0x4012f0/P/-/-/4//- 
+br_misp_retired.all_branches:upp:            4012fa 0x401310/0x4012f0/P/-/-/13//-  0x401310/0x4012f0/P/-/-/22//-  0x4012fa/0x4012ff/M/-/-/1//-  0x401310/0x4012f0/P/-/-/5//-  0x401310/0x4012f0/P/-/-/22//-  0x4012fa/0x4012ff/M/-/-/4//-  0x401310/0x4012f0/P/-/-/22//-  0x4012fa/0x4012ff/M/-/-/1//-  0x401310/0x4012f0/P/-/-/5//-  0x401310/0x4012f0/P/-/-/23//-  0x4012fa/0x4012ff/M/-/-/3//-  0x401310/0x4012f0/P/-/-/1//-  0x4012fa/0x4012ff/P/-/-/1//-  0x401310/0x4012f0/P/-/-/4//-  0x401310/0x4012f0/P/-/-/1//-  0x4012fa/0x4012ff/P/-/-/1//-  0x401310/0x4012f0/P/-/-/1//-  0x4012fa/0x4012ff/P/-/-/2//-  0x401310/0x4012f0/P/-/-/26//-  0x401310/0x4012f0/P/-/-/1//-  0x4012fa/0x4012ff/P/-/-/2//-  0x401310/0x4012f0/P/-/-/26//-  0x401310/0x4012f0/P/-/-/2//-  0x401310/0x4012f0/P/-/-/1//-  0x4012fa/0x4012ff/P/-/-/3//-  0x401310/0x4012f0/P/-/-/23//-  0x4012fa/0x4012ff/M/-/-/1//-  0x401310/0x4012f0/P/-/-/4//-  0x401310/0x4012f0/P/-/-/1//-  0x4012fa/0x4012ff/P/-/-/2//-  0x401310/0x4012f0/P/-/-/2//-  0x401310/0x4012f0/P/-/-/1//- 
+  br_inst_retired.near_taken:upp:            401310 0x401310/0x4012f0/P/-/-/26//-  0x401310/0x4012f0/P/-/-/24//-  0x401310/0x4012f0/P/-/-/5//-  0x401310/0x4012f0/P/-/-/22//-  0x4012fa/0x4012ff/M/-/-/4//-  0x401310/0x4012f0/P/-/-/23//-  0x4012fa/0x4012ff/M/-/-/1//-  0x401310/0x4012f0/P/-/-/1//-  0x4012fa/0x4012ff/P/-/-/1//-  0x401310/0x4012f0/P/-/-/4//-  0x401310/0x4012f0/P/-/-/27//-  0x401310/0x4012f0/P/-/-/22//-  0x4012fa/0x4012ff/M/-/-/4//-  0x401310/0x4012f0/P/-/-/23//-  0x4012fa/0x4012ff/M/-/-/1//-  0x401310/0x4012f0/P/-/-/4//-  0x401310/0x4012f0/P/-/-/24//-  0x401310/0x4012f0/P/-/-/5//-  0x401310/0x4012f0/P/-/-/22//-  0x4012fa/0x4012ff/M/-/-/1//-  0x401310/0x4012f0/P/-/-/1//-  0x4012fa/0x4012ff/P/-/-/1//-  0x401310/0x4012f0/P/-/-/5//-  0x401310/0x4012f0/P/-/-/23//-  0x4012fa/0x4012ff/M/-/-/1//-  0x401310/0x4012f0/P/-/-/1//-  0x4012fa/0x4012ff/P/-/-/2//-  0x401310/0x4012f0/P/-/-/27//-  0x401310/0x4012f0/P/-/-/23//-  0x4012fa/0x4012ff/M/-/-/1//-  0x401310/0x4012f0/P/-/-/1//-  0x4012fa/0x4012ff/P/-/-/3//- 
+  br_inst_retired.near_taken:upp:            401310 0x401310/0x4012f0/P/-/-/26//-  0x401310/0x4012f0/P/-/-/26//-  0x401310/0x4012f0/P/-/-/28//-  0x401310/0x4012f0/P/-/-/22//-  0x4012fa/0x4012ff/M/-/-/2//-  0x401310/0x4012f0/P/-/-/27//-  0x401310/0x4012f0/P/-/-/26//-  0x401310/0x4012f0/P/-/-/1//-  0x4012fa/0x4012ff/P/-/-/1//-  0x401310/0x4012f0/P/-/-/25//-  0x401310/0x4012f0/P/-/-/5//-  0x401310/0x4012f0/P/-/-/22//-  0x4012fa/0x4012ff/M/-/-/2//-  0x401310/0x4012f0/P/-/-/1//-  0x4012fa/0x4012ff/P/-/-/1//-  0x401310/0x4012f0/P/-/-/26//-  0x401310/0x4012f0/P/-/-/26//-  0x401310/0x4012f0/P/-/-/25//-  0x401310/0x4012f0/P/-/-/4//-  0x401310/0x4012f0/P/-/-/1//-  0x4012fa/0x4012ff/P/-/-/2//-  0x401310/0x4012f0/P/-/-/1//-  0x4012fa/0x4012ff/P/-/-/1//-  0x401310/0x4012f0/P/-/-/1//-  0x4012fa/0x4012ff/P/-/-/1//-  0x401310/0x4012f0/P/-/-/28//-  0x401310/0x4012f0/P/-/-/23//-  0x4012fa/0x4012ff/M/-/-/2//-  0x401310/0x4012f0/P/-/-/26//-  0x401310/0x4012f0/P/-/-/26//-  0x401310/0x4012f0/P/-/-/1//-  0x4012fa/0x4012ff/P/-/-/1//- 
+  br_inst_retired.near_taken:upp:            401310 0x401310/0x4012f0/P/-/-/22//-  0x4012fa/0x4012ff/M/-/-/3//-  0x401310/0x4012f0/P/-/-/2//-  0x4012fa/0x4012ff/P/-/-/3//-  0x401310/0x4012f0/P/-/-/1//-  0x4012fa/0x4012ff/P/-/-/1//-  0x401310/0x4012f0/P/-/-/5//-  0x401310/0x4012f0/P/-/-/22//-  0x4012fa/0x4012ff/M/-/-/4//-  0x401310/0x4012f0/P/-/-/22//-  0x4012fa/0x4012ff/M/-/-/4//-  0x401310/0x4012f0/P/-/-/22//-  0x4012fa/0x4012ff/M/-/-/3//-  0x401310/0x4012f0/P/-/-/1//-  0x4012fa/0x4012ff/P/-/-/1//-  0x401310/0x4012f0/P/-/-/26//-  0x401310/0x4012f0/P/-/-/3//-  0x401310/0x4012f0/P/-/-/1//-  0x4012fa/0x4012ff/P/-/-/1//-  0x401310/0x4012f0/P/-/-/26//-  0x401310/0x4012f0/P/-/-/26//-  0x401310/0x4012f0/P/-/-/26//-  0x401310/0x4012f0/P/-/-/3//-  0x401310/0x4012f0/P/-/-/1//-  0x4012fa/0x4012ff/P/-/-/1//-  0x401310/0x4012f0/P/-/-/2//-  0x401310/0x4012f0/P/-/-/3//-  0x401310/0x4012f0/P/-/-/28//-  0x401310/0x4012f0/P/-/-/1//-  0x4012fa/0x4012ff/P/-/-/3//-  0x401310/0x4012f0/P/-/-/22//-  0x4012fa/0x4012ff/M/-/-/3//- 
+br_misp_retired.all_branches:upp:            4012fa 0x401310/0x4012f0/P/-/-/4//-  0x401310/0x4012f0/P/-/-/2//-  0x401310/0x4012f0/P/-/-/1//-  0x4012fa/0x4012ff/P/-/-/2//-  0x401310/0x4012f0/P/-/-/26//-  0x401310/0x4012f0/P/-/-/25//-  0x401310/0x4012f0/P/-/-/1//-  0x4012fa/0x4012ff/P/-/-/1//-  0x401310/0x4012f0/P/-/-/4//-  0x401310/0x4012f0/P/-/-/1//-  0x4012fa/0x4012ff/P/-/-/3//-  0x401310/0x4012f0/P/-/-/22//-  0x4012fa/0x4012ff/M/-/-/1//-  0x401310/0x4012f0/P/-/-/1//-  0x4012fa/0x4012ff/P/-/-/1//-  0x401310/0x4012f0/P/-/-/4//-  0x401310/0x4012f0/P/-/-/27//-  0x401310/0x4012f0/P/-/-/22//-  0x4012fa/0x4012ff/M/-/-/3//-  0x401310/0x4012f0/P/-/-/20//-  0x401310/0x4012f0/P/-/-/2//-  0x401310/0x4012f0/P/-/-/1//-  0x4012fa/0x4012ff/P/-/-/1//-  0x401310/0x4012f0/P/-/-/34//-  0x401310/0x4012f0/P/-/-/1//-  0x4012fa/0x4012ff/P/-/-/2//-  0x401310/0x4012f0/P/-/-/1//-  0x4012fa/0x4012ff/P/-/-/3//-  0x401310/0x4012f0/P/-/-/23//-  0x4012fa/0x4012ff/M/-/-/1//-  0x401310/0x4012f0/P/-/-/1//-  0x4012fa/0x4012ff/P/-/-/2//- 
+  br_inst_retired.near_taken:upp:            4012fa 0x4012fa/0x4012ff/M/-/-/3//-  0x401310/0x4012f0/P/-/-/1//-  0x4012fa/0x4012ff/P/-/-/1//-  0x401310/0x4012f0/P/-/-/4//-  0x401310/0x4012f0/P/-/-/1//-  0x4012fa/0x4012ff/P/-/-/2//-  0x401310/0x4012f0/P/-/-/1//-  0x4012fa/0x4012ff/P/-/-/1//-  0x401310/0x4012f0/P/-/-/1//-  0x4012fa/0x4012ff/P/-/-/1//-  0x401310/0x4012f0/P/-/-/5//-  0x401310/0x4012f0/P/-/-/22//-  0x4012fa/0x4012ff/M/-/-/1//-  0x401310/0x4012f0/P/-/-/5//-  0x401310/0x4012f0/P/-/-/22//-  0x4012fa/0x4012ff/M/-/-/4//-  0x401310/0x4012f0/P/-/-/21//-  0x4012fa/0x4012ff/M/-/-/3//-  0x401310/0x4012f0/P/-/-/1//-  0x4012fa/0x4012ff/P/-/-/1//-  0x401310/0x4012f0/P/-/-/26//-  0x401310/0x4012f0/P/-/-/26//-  0x401310/0x4012f0/P/-/-/1//-  0x4012fa/0x4012ff/P/-/-/1//-  0x401310/0x4012f0/P/-/-/3//-  0x401310/0x4012f0/P/-/-/3//-  0x401310/0x4012f0/P/-/-/3//-  0x401310/0x4012f0/P/-/-/2//-  0x401310/0x4012f0/P/-/-/3//-  0x401310/0x4012f0/P/-/-/26//-  0x401310/0x4012f0/P/-/-/1//-  0x4012fa/0x4012ff/P/-/-/3//- 
+  br_inst_retired.near_taken:upp:            401310 0x401310/0x4012f0/P/-/-/21//-  0x4012fa/0x4012ff/M/-/-/3//-  0x401310/0x4012f0/P/-/-/1//-  0x4012fa/0x4012ff/P/-/-/1//-  0x401310/0x4012f0/P/-/-/1//-  0x4012fa/0x4012ff/P/-/-/3//-  0x401310/0x4012f0/P/-/-/23//-  0x4012fa/0x4012ff/M/-/-/1//-  0x401310/0x4012f0/P/-/-/1//-  0x4012fa/0x4012ff/P/-/-/2//-  0x401310/0x4012f0/P/-/-/27//-  0x401310/0x4012f0/P/-/-/24//-  0x401310/0x4012f0/P/-/-/5//-  0x401310/0x4012f0/P/-/-/22//-  0x4012fa/0x4012ff/M/-/-/3//-  0x401310/0x4012f0/P/-/-/26//-  0x401310/0x4012f0/P/-/-/25//-  0x401310/0x4012f0/P/-/-/2//-  0x401310/0x4012f0/P/-/-/1//-  0x4012fa/0x4012ff/P/-/-/3//-  0x401310/0x4012f0/P/-/-/22//-  0x4012fa/0x4012ff/M/-/-/3//-  0x401310/0x4012f0/P/-/-/17//-  0x401310/0x4012f0/P/-/-/9//-  0x4012fa/0x4012ff/P/-/-/1//-  0x401310/0x4012f0/P/-/-/4//-  0x401310/0x4012f0/P/-/-/4//-  0x401310/0x4012f0/P/-/-/1//-  0x4012fa/0x4012ff/P/-/-/1//-  0x401310/0x4012f0/P/-/-/1//-  0x4012fa/0x4012ff/P/-/-/1//-  0x401310/0x4012f0/P/-/-/4//- 
+  br_inst_retired.near_taken:upp:            401310 0x401310/0x4012f0/P/-/-/25//-  0x401310/0x4012f0/P/-/-/1//-  0x4012fa/0x4012ff/P/-/-/1//-  0x401310/0x4012f0/P/-/-/2//-  0x401310/0x4012f0/P/-/-/4//-  0x401310/0x4012f0/P/-/-/26//-  0x401310/0x4012f0/P/-/-/24//-  0x401310/0x4012f0/P/-/-/4//-  0x401310/0x4012f0/P/-/-/5//-  0x401310/0x4012f0/P/-/-/1//-  0x4012fa/0x4012ff/P/-/-/2//-  0x401310/0x4012f0/P/-/-/1//-  0x4012fa/0x4012ff/P/-/-/1//-  0x401310/0x4012f0/P/-/-/4//-  0x401310/0x4012f0/P/-/-/1//-  0x4012fa/0x4012ff/P/-/-/2//-  0x401310/0x4012f0/P/-/-/2//-  0x401310/0x4012f0/P/-/-/1//-  0x4012fa/0x4012ff/P/-/-/1//-  0x401310/0x4012f0/P/-/-/5//-  0x401310/0x4012f0/P/-/-/23//-  0x4012fa/0x4012ff/M/-/-/4//-  0x401310/0x4012f0/P/-/-/23//-  0x4012fa/0x4012ff/M/-/-/1//-  0x401310/0x4012f0/P/-/-/4//-  0x401310/0x4012f0/P/-/-/25//-  0x401310/0x4012f0/P/-/-/1//-  0x4012fa/0x4012ff/P/-/-/2//-  0x401310/0x4012f0/P/-/-/1//-  0x4012fa/0x4012ff/P/-/-/2//-  0x401310/0x4012f0/P/-/-/1//-  0x4012fa/0x4012ff/P/-/-/3//- 
+br_misp_retired.all_branches:upp:            4012fa 0x4012fa/0x4012ff/M/-/-/2//-  0x401310/0x4012f0/P/-/-/1//-  0x4012fa/0x4012ff/P/-/-/3//-  0x401310/0x4012f0/P/-/-/22//-  0x4012fa/0x4012ff/M/-/-/2//-  0x401310/0x4012f0/P/-/-/27//-  0x401310/0x4012f0/P/-/-/23//-  0x4012fa/0x4012ff/M/-/-/3//-  0x401310/0x4012f0/P/-/-/1//-  0x4012fa/0x4012ff/P/-/-/1//-  0x401310/0x4012f0/P/-/-/4//-  0x401310/0x4012f0/P/-/-/1//-  0x4012fa/0x4012ff/P/-/-/2//-  0x401310/0x4012f0/P/-/-/2//-  0x4012fa/0x4012ff/P/-/-/2//-  0x401310/0x4012f0/P/-/-/3//-  0x401310/0x4012f0/P/-/-/1//-  0x4012fa/0x4012ff/P/-/-/1//-  0x401310/0x4012f0/P/-/-/26//-  0x401310/0x4012f0/P/-/-/25//-  0x401310/0x4012f0/P/-/-/3//-  0x401310/0x4012f0/P/-/-/1//-  0x4012fa/0x4012ff/P/-/-/1//-  0x401310/0x4012f0/P/-/-/3//-  0x401310/0x4012f0/P/-/-/26//-  0x401310/0x4012f0/P/-/-/26//-  0x401310/0x4012f0/P/-/-/27//-  0x401310/0x4012f0/P/-/-/1//-  0x4012fa/0x4012ff/P/-/-/2//-  0x401310/0x4012f0/P/-/-/1//-  0x4012fa/0x4012ff/P/-/-/2//-  0x401310/0x4012f0/P/-/-/1//- 
+  br_inst_retired.near_taken:upp:            401310 0x401310/0x4012f0/P/-/-/22//-  0x4012fa/0x4012ff/M/-/-/1//-  0x401310/0x4012f0/P/-/-/4//-  0x401310/0x4012f0/P/-/-/1//-  0x4012fa/0x4012ff/P/-/-/1//-  0x401310/0x4012f0/P/-/-/1//-  0x4012fa/0x4012ff/P/-/-/1//-  0x401310/0x4012f0/P/-/-/1//-  0x4012fa/0x4012ff/P/-/-/1//-  0x401310/0x4012f0/P/-/-/29//-  0x401310/0x4012f0/P/-/-/22//-  0x4012fa/0x4012ff/M/-/-/1//-  0x401310/0x4012f0/P/-/-/2//-  0x401310/0x4012f0/P/-/-/6//-  0x401310/0x4012f0/P/-/-/24//-  0x4012fa/0x4012ff/M/-/-/1//-  0x401310/0x4012f0/P/-/-/4//-  0x401310/0x4012f0/P/-/-/1//-  0x4012fa/0x4012ff/P/-/-/1//-  0x401310/0x4012f0/P/-/-/4//-  0x401310/0x4012f0/P/-/-/1//-  0x4012fa/0x4012ff/P/-/-/2//-  0x401310/0x4012f0/P/-/-/1//-  0x4012fa/0x4012ff/P/-/-/2//-  0x401310/0x4012f0/P/-/-/1//-  0x4012fa/0x4012ff/P/-/-/1//-  0x401310/0x4012f0/P/-/-/2//-  0x401310/0x4012f0/P/-/-/4//-  0x401310/0x4012f0/P/-/-/26//-  0x401310/0x4012f0/P/-/-/25//-  0x401310/0x4012f0/P/-/-/3//-  0x401310/0x4012f0/P/-/-/28//- 
+  br_inst_retired.near_taken:upp:            401310 0x401310/0x4012f0/P/-/-/25//-  0x401310/0x4012f0/P/-/-/1//-  0x4012fa/0x4012ff/P/-/-/3//-  0x401310/0x4012f0/P/-/-/22//-  0x4012fa/0x4012ff/M/-/-/4//-  0x401310/0x4012f0/P/-/-/22//-  0x4012fa/0x4012ff/M/-/-/4//-  0x401310/0x4012f0/P/-/-/22//-  0x4012fa/0x4012ff/M/-/-/1//-  0x401310/0x4012f0/P/-/-/5//-  0x401310/0x4012f0/P/-/-/15//-  0x4012fa/0x4012ff/M/-/-/1//-  0x401310/0x4012f0/P/-/-/9//-  0x4012fa/0x4012ff/P/-/-/2//-  0x401310/0x4012f0/P/-/-/27//-  0x401310/0x4012f0/P/-/-/22//-  0x4012fa/0x4012ff/M/-/-/3//-  0x401310/0x4012f0/P/-/-/1//-  0x4012fa/0x4012ff/P/-/-/1//-  0x401310/0x4012f0/P/-/-/4//-  0x401310/0x4012f0/P/-/-/1//-  0x4012fa/0x4012ff/P/-/-/1//-  0x401310/0x4012f0/P/-/-/5//-  0x401310/0x4012f0/P/-/-/2//-  0x401310/0x4012f0/P/-/-/2//-  0x401310/0x4012f0/P/-/-/1//-  0x4012fa/0x4012ff/P/-/-/1//-  0x401310/0x4012f0/P/-/-/3//-  0x401310/0x4012f0/P/-/-/2//-  0x401310/0x4012f0/P/-/-/5//-  0x401310/0x4012f0/P/-/-/25//-  0x401310/0x4012f0/P/-/-/1//- 
+  br_inst_retired.near_taken:upp:            401310 0x401310/0x4012f0/P/-/-/27//-  0x401310/0x4012f0/P/-/-/1//-  0x4012fa/0x4012ff/P/-/-/3//-  0x401310/0x4012f0/P/-/-/21//-  0x4012fa/0x4012ff/M/-/-/2//-  0x401310/0x4012f0/P/-/-/2//-  0x401310/0x4012f0/P/-/-/5//-  0x401310/0x4012f0/P/-/-/22//-  0x4012fa/0x4012ff/M/-/-/1//-  0x401310/0x4012f0/P/-/-/3//-  0x401310/0x4012f0/P/-/-/25//-  0x401310/0x4012f0/P/-/-/3//-  0x401310/0x4012f0/P/-/-/26//-  0x401310/0x4012f0/P/-/-/26//-  0x401310/0x4012f0/P/-/-/1//-  0x4012fa/0x4012ff/P/-/-/1//-  0x401310/0x4012f0/P/-/-/28//-  0x401310/0x4012f0/P/-/-/1//-  0x4012fa/0x4012ff/P/-/-/2//-  0x401310/0x4012f0/P/-/-/1//-  0x4012fa/0x4012ff/P/-/-/1//-  0x401310/0x4012f0/P/-/-/26//-  0x401310/0x4012f0/P/-/-/1//-  0x4012fa/0x4012ff/P/-/-/1//-  0x401310/0x4012f0/P/-/-/3//-  0x401310/0x4012f0/P/-/-/25//-  0x401310/0x4012f0/P/-/-/27//-  0x401310/0x4012f0/P/-/-/1//-  0x4012fa/0x4012ff/P/-/-/1//-  0x401310/0x4012f0/P/-/-/5//-  0x401310/0x4012f0/P/-/-/17//-  0x4012fa/0x4012ff/M/-/-/1//- 
+br_misp_retired.all_branches:upp:            4012fa 0x401310/0x4012f0/P/-/-/5//-  0x401310/0x4012f0/P/-/-/22//-  0x4012fa/0x4012ff/M/-/-/3//-  0x401310/0x4012f0/P/-/-/1//-  0x4012fa/0x4012ff/P/-/-/1//-  0x401310/0x4012f0/P/-/-/3//-  0x401310/0x4012f0/P/-/-/1//-  0x4012fa/0x4012ff/P/-/-/1//-  0x401310/0x4012f0/P/-/-/5//-  0x401310/0x4012f0/P/-/-/22//-  0x4012fa/0x4012ff/M/-/-/4//-  0x401310/0x4012f0/P/-/-/23//-  0x4012fa/0x4012ff/M/-/-/1//-  0x401310/0x4012f0/P/-/-/3//-  0x401310/0x4012f0/P/-/-/1//-  0x4012fa/0x4012ff/P/-/-/1//-  0x401310/0x4012f0/P/-/-/25//-  0x401310/0x4012f0/P/-/-/1//-  0x4012fa/0x4012ff/P/-/-/4//-  0x401310/0x4012f0/P/-/-/22//-  0x4012fa/0x4012ff/M/-/-/2//-  0x401310/0x4012f0/P/-/-/1//-  0x4012fa/0x4012ff/P/-/-/3//-  0x401310/0x4012f0/P/-/-/22//-  0x4012fa/0x4012ff/M/-/-/2//-  0x401310/0x4012f0/P/-/-/4//-  0x401310/0x4012f0/P/-/-/1//-  0x4012fa/0x4012ff/P/-/-/2//-  0x401310/0x4012f0/P/-/-/1//-  0x4012fa/0x4012ff/P/-/-/3//-  0x401310/0x4012f0/P/-/-/22//-  0x4012fa/0x4012ff/M/-/-/1//- 
+  br_inst_retired.near_taken:upp:            401310 0x401310/0x4012f0/P/-/-/23//-  0x4012fa/0x4012ff/M/-/-/4//-  0x401310/0x4012f0/P/-/-/21//-  0x4012fa/0x4012ff/M/-/-/1//-  0x401310/0x4012f0/P/-/-/3//-  0x401310/0x4012f0/P/-/-/2//-  0x401310/0x4012f0/P/-/-/2//-  0x401310/0x4012f0/P/-/-/4//-  0x401310/0x4012f0/P/-/-/26//-  0x401310/0x4012f0/P/-/-/27//-  0x401310/0x4012f0/P/-/-/2//-  0x401310/0x4012f0/P/-/-/4//-  0x401310/0x4012f0/P/-/-/25//-  0x401310/0x4012f0/P/-/-/1//-  0x4012fa/0x4012ff/P/-/-/3//-  0x401310/0x4012f0/P/-/-/22//-  0x4012fa/0x4012ff/M/-/-/3//-  0x401310/0x4012f0/P/-/-/1//-  0x4012fa/0x4012ff/P/-/-/2//-  0x401310/0x4012f0/P/-/-/1//-  0x4012fa/0x4012ff/P/-/-/2//-  0x401310/0x4012f0/P/-/-/3//-  0x401310/0x4012f0/P/-/-/2//-  0x401310/0x4012f0/P/-/-/1//-  0x4012fa/0x4012ff/P/-/-/1//-  0x401310/0x4012f0/P/-/-/4//-  0x401310/0x4012f0/P/-/-/24//-  0x401310/0x4012f0/P/-/-/1//-  0x4012fa/0x4012ff/P/-/-/1//-  0x401310/0x4012f0/P/-/-/5//-  0x401310/0x4012f0/P/-/-/23//-  0x4012fa/0x4012ff/M/-/-/4//- 
+  br_inst_retired.near_taken:upp:            401310 0x401310/0x4012f0/P/-/-/23//-  0x4012fa/0x4012ff/M/-/-/2//-  0x401310/0x4012f0/P/-/-/3//-  0x401310/0x4012f0/P/-/-/19//-  0x401310/0x4012f0/P/-/-/3//-  0x401310/0x4012f0/P/-/-/2//-  0x401310/0x4012f0/P/-/-/10//-  0x4012fa/0x4012ff/P/-/-/1//-  0x401310/0x4012f0/P/-/-/1//-  0x4012fa/0x4012ff/P/-/-/1//-  0x401310/0x4012f0/P/-/-/26//-  0x401310/0x4012f0/P/-/-/26//-  0x401310/0x4012f0/P/-/-/4//-  0x401310/0x4012f0/P/-/-/1//-  0x4012fa/0x4012ff/P/-/-/1//-  0x401310/0x4012f0/P/-/-/3//-  0x401310/0x4012f0/P/-/-/1//-  0x4012fa/0x4012ff/P/-/-/1//-  0x401310/0x4012f0/P/-/-/28//-  0x401310/0x4012f0/P/-/-/22//-  0x4012fa/0x4012ff/M/-/-/4//-  0x401310/0x4012f0/P/-/-/23//-  0x4012fa/0x4012ff/M/-/-/2//-  0x401310/0x4012f0/P/-/-/2//-  0x4012fa/0x4012ff/P/-/-/3//-  0x401310/0x4012f0/P/-/-/22//-  0x4012fa/0x4012ff/M/-/-/1//-  0x401310/0x4012f0/P/-/-/5//-  0x401310/0x4012f0/P/-/-/1//-  0x4012fa/0x4012ff/P/-/-/1//-  0x401310/0x4012f0/P/-/-/3//-  0x401310/0x4012f0/P/-/-/25//- 
+  br_inst_retired.near_taken:upp:            401310 0x401310/0x4012f0/P/-/-/26//-  0x401310/0x4012f0/P/-/-/2//-  0x401310/0x4012f0/P/-/-/1//-  0x4012fa/0x4012ff/P/-/-/1//-  0x401310/0x4012f0/P/-/-/1//-  0x4012fa/0x4012ff/P/-/-/1//-  0x401310/0x4012f0/P/-/-/4//-  0x401310/0x4012f0/P/-/-/27//-  0x401310/0x4012f0/P/-/-/1//-  0x4012fa/0x4012ff/P/-/-/2//-  0x401310/0x4012f0/P/-/-/5//-  0x401310/0x4012f0/P/-/-/1//-  0x4012fa/0x4012ff/P/-/-/1//-  0x401310/0x4012f0/P/-/-/1//-  0x4012fa/0x4012ff/P/-/-/2//-  0x401310/0x4012f0/P/-/-/26//-  0x401310/0x4012f0/P/-/-/27//-  0x401310/0x4012f0/P/-/-/22//-  0x4012fa/0x4012ff/M/-/-/3//-  0x401310/0x4012f0/P/-/-/26//-  0x401310/0x4012f0/P/-/-/1//-  0x4012fa/0x4012ff/P/-/-/2//-  0x401310/0x4012f0/P/-/-/1//-  0x4012fa/0x4012ff/P/-/-/2//-  0x401310/0x4012f0/P/-/-/1//-  0x4012fa/0x4012ff/P/-/-/1//-  0x401310/0x4012f0/P/-/-/1//-  0x4012fa/0x4012ff/P/-/-/3//-  0x401310/0x4012f0/P/-/-/22//-  0x4012fa/0x4012ff/M/-/-/4//-  0x401310/0x4012f0/P/-/-/7//-  0x4012fa/0x4012ff/M/-/-/1//- 
+br_misp_retired.all_branches:upp:            4012fa 0x4012fa/0x4012ff/M/-/-/4//-  0x401310/0x4012f0/P/-/-/21//-  0x4012fa/0x4012ff/M/-/-/3//-  0x401310/0x4012f0/P/-/-/2//-  0x401310/0x4012f0/P/-/-/1//-  0x4012fa/0x4012ff/P/-/-/1//-  0x401310/0x4012f0/P/-/-/4//-  0x401310/0x4012f0/P/-/-/24//-  0x401310/0x4012f0/P/-/-/5//-  0x401310/0x4012f0/P/-/-/23//-  0x4012fa/0x4012ff/M/-/-/1//-  0x401310/0x4012f0/P/-/-/5//-  0x401310/0x4012f0/P/-/-/1//-  0x4012fa/0x4012ff/P/-/-/3//-  0x401310/0x4012f0/P/-/-/1//-  0x4012fa/0x4012ff/P/-/-/1//-  0x401310/0x4012f0/P/-/-/1//-  0x4012fa/0x4012ff/P/-/-/2//-  0x401310/0x4012f0/P/-/-/25//-  0x401310/0x4012f0/P/-/-/4//-  0x401310/0x4012f0/P/-/-/1//-  0x4012fa/0x4012ff/P/-/-/2//-  0x401310/0x4012f0/P/-/-/1//-  0x4012fa/0x4012ff/P/-/-/1//-  0x401310/0x4012f0/P/-/-/1//-  0x4012fa/0x4012ff/P/-/-/3//-  0x401310/0x4012f0/P/-/-/22//-  0x4012fa/0x4012ff/M/-/-/3//-  0x401310/0x4012f0/P/-/-/25//-  0x401310/0x4012f0/P/-/-/1//-  0x4012fa/0x4012ff/P/-/-/3//-  0x401310/0x4012f0/P/-/-/23//- 
+  br_inst_retired.near_taken:upp:            401310 0x401310/0x4012f0/P/-/-/23//-  0x4012fa/0x4012ff/M/-/-/2//-  0x401310/0x4012f0/P/-/-/2//-  0x401310/0x4012f0/P/-/-/1//-  0x4012fa/0x4012ff/P/-/-/1//-  0x401310/0x4012f0/P/-/-/2//-  0x401310/0x4012f0/P/-/-/4//-  0x401310/0x4012f0/P/-/-/25//-  0x401310/0x4012f0/P/-/-/5//-  0x401310/0x4012f0/P/-/-/22//-  0x4012fa/0x4012ff/M/-/-/3//-  0x401310/0x4012f0/P/-/-/26//-  0x401310/0x4012f0/P/-/-/25//-  0x401310/0x4012f0/P/-/-/2//-  0x401310/0x4012f0/P/-/-/1//-  0x4012fa/0x4012ff/P/-/-/2//-  0x401310/0x4012f0/P/-/-/26//-  0x401310/0x4012f0/P/-/-/4//-  0x401310/0x4012f0/P/-/-/26//-  0x401310/0x4012f0/P/-/-/26//-  0x401310/0x4012f0/P/-/-/1//-  0x4012fa/0x4012ff/P/-/-/2//-  0x401310/0x4012f0/P/-/-/1//-  0x4012fa/0x4012ff/P/-/-/3//-  0x401310/0x4012f0/P/-/-/1//-  0x4012fa/0x4012ff/P/-/-/3//-  0x401310/0x4012f0/P/-/-/1//-  0x4012fa/0x4012ff/P/-/-/1//-  0x401310/0x4012f0/P/-/-/5//-  0x401310/0x4012f0/P/-/-/23//-  0x4012fa/0x4012ff/M/-/-/1//-  0x401310/0x4012f0/P/-/-/1//- 
+  br_inst_retired.near_taken:upp:            401310 0x401310/0x4012f0/P/-/-/26//-  0x401310/0x4012f0/P/-/-/1//-  0x4012fa/0x4012ff/P/-/-/3//-  0x401310/0x4012f0/P/-/-/23//-  0x4012fa/0x4012ff/M/-/-/4//-  0x401310/0x4012f0/P/-/-/22//-  0x4012fa/0x4012ff/M/-/-/2//-  0x401310/0x4012f0/P/-/-/26//-  0x401310/0x4012f0/P/-/-/1//-  0x4012fa/0x4012ff/P/-/-/1//-  0x401310/0x4012f0/P/-/-/2//-  0x401310/0x4012f0/P/-/-/3//-  0x401310/0x4012f0/P/-/-/26//-  0x401310/0x4012f0/P/-/-/26//-  0x401310/0x4012f0/P/-/-/26//-  0x401310/0x4012f0/P/-/-/27//-  0x401310/0x4012f0/P/-/-/1//-  0x4012fa/0x4012ff/P/-/-/1//-  0x401310/0x4012f0/P/-/-/3//-  0x401310/0x4012f0/P/-/-/25//-  0x401310/0x4012f0/P/-/-/4//-  0x401310/0x4012f0/P/-/-/1//-  0x4012fa/0x4012ff/P/-/-/1//-  0x401310/0x4012f0/P/-/-/3//-  0x401310/0x4012f0/P/-/-/3//-  0x401310/0x4012f0/P/-/-/1//-  0x4012fa/0x4012ff/P/-/-/1//-  0x401310/0x4012f0/P/-/-/28//-  0x401310/0x4012f0/P/-/-/22//-  0x4012fa/0x4012ff/M/-/-/2//-  0x401310/0x4012f0/P/-/-/26//-  0x401310/0x4012f0/P/-/-/28//- 
diff --git a/llvm/test/tools/llvm-profgen/Inputs/ip-duplication.perfscript b/llvm/test/tools/llvm-profgen/Inputs/ip-duplication.perfscript
new file mode 100644
index 0000000000000..f0d4efcbe668e
--- /dev/null
+++ b/llvm/test/tools/llvm-profgen/Inputs/ip-duplication.perfscript
@@ -0,0 +1,2 @@
+           4006b7 0x4006b7/0x40068b/P/-/-/1  0x4006c8/0x4006b0/P/-/-/1  0x400689/0x4006b9/P/-/-/1  0x40066d/0x400686/P/-/-/2  0x4007a6/0x400650/P/-/-/9  0x4007ca/0x400790/P/-/-/8  0x4007d7/0x4007bd/P/-/-/1  0x400792/0x4007d7/P/-/-/1  0x4007b8/0x400790/P/-/-/2  0x4006a2/0x4007a8/P/-/-/3
+           40065d 40065d/0x40068f/M/-/-/1
diff --git a/llvm/test/tools/llvm-profgen/Inputs/noprobe-skid.perfscript b/llvm/test/tools/llvm-profgen/Inputs/noprobe-skid.perfscript
new file mode 100644
index 0000000000000..7c70a18452e57
--- /dev/null
+++ b/llvm/test/tools/llvm-profgen/Inputs/noprobe-skid.perfscript
@@ -0,0 +1,5 @@
+// Invalid perf line
+           40062f 0x40062f/0x4005b0/P/-/-/9  0x400645/0x4005ff/P/-/-/1  0x400637/0x400645/P/-/-/1  0x4005e9/0x400634/P/-/-/1  0x4005d7/0x4005e5/P/-/-/6  0x40062f/0x4005b0/P/-/-/16  0x400645/0x4005ff/P/-/-/1  0x400637/0x400645/P/-/-/1  0x4005e9/0x400634/P/-/-/1  0x4005d7/0x4005e5/P/-/-/6  0x40062f/0x4005b0/P/-/-/6  0x400645/0x4005ff/P/-/-/1  0x400637/0x400645/P/-/-/1  0x4005e9/0x400634/P/-/-/1  0x4005c8/0x4005dc/P/-/-/8  0x40062f/0x4005b0/P/-/-/9  0x400645/0x4005ff/P/-/-/1  0x400637/0x400645/P/-/-/1  0x4005e9/0x400634/P/-/-/1  0x4005d7/0x4005e5/P/-/-/10  0x40062f/0x4005b0/P/-/-/14  0x400645/0x4005ff/P/-/-/1  0x400637/0x400645/P/-/-/1  0x4005e9/0x400634/P/-/-/1  0x4005d7/0x4005e5/P/-/-/7  0x40062f/0x4005b0/P/-/-/8  0x400645/0x4005ff/P/-/-/1  0x400637/0x400645/P/-/-/1  0x4005e9/0x400634/P/-/-/1  0x4005c8/0x4005dc/P/-/-/7  0x40062f/0x4005b0/P/-/-/15  0x400645/0x4005ff/P/-/-/1
+           4005d7 0x4005d7/0x4005e5/P/-/-/8  0x40062f/0x4005b0/P/-/-/6  0x400645/0x4005ff/P/-/-/1  0x400637/0x400645/P/-/-/1  0x4005e9/0x400634/P/-/-/2  0x4005c8/0x4005dc/P/-/-/7  0x40062f/0x4005b0/P/-/-/11  0x400645/0x4005ff/P/-/-/1  0x400637/0x400645/P/-/-/1  0x4005e9/0x400634/P/-/-/1  0x4005d7/0x4005e5/P/-/-/8  0x40062f/0x4005b0/P/-/-/9  0x400645/0x4005ff/P/-/-/1  0x400637/0x400645/P/-/-/1  0x4005e9/0x400634/P/-/-/1  0x4005d7/0x4005e5/P/-/-/5  0x40062f/0x4005b0/P/-/-/11  0x400645/0x4005ff/P/-/-/1  0x400637/0x400645/P/-/-/1  0x4005e9/0x400634/P/-/-/2  0x4005c8/0x4005dc/P/-/-/7  0x40062f/0x4005b0/P/-/-/10  0x400645/0x4005ff/P/-/-/1  0x400637/0x400645/P/-/-/1  0x4005e9/0x400634/P/-/-/1  0x4005d7/0x4005e5/P/-/-/8  0x40062f/0x4005b0/P/-/-/9  0x400645/0x4005ff/P/-/-/1  0x400637/0x400645/P/-/-/1  0x4005e9/0x400634/P/-/-/1  0x4005d7/0x4005e5/P/-/-/13  0x40062f/0x4005b0/P/-/-/9
+           4005c8 0x4005c8/0x4005dc/P/-/-/11  0x40062f/0x4005b0/P/-/-/8  0x400645/0x4005ff/P/-/-/1  0x400637/0x400645/P/-/-/1  0x4005e9/0x400634/P/-/-/1  0x4005d7/0x4005e5/P/-/-/5  0x40062f/0x4005b0/P/-/-/6  0x400645/0x4005ff/P/-/-/1  0x400637/0x400645/P/-/-/1  0x4005e9/0x400634/P/-/-/1  0x4005d7/0x4005e5/P/-/-/12  0x40062f/0x4005b0/P/-/-/6  0x400645/0x4005ff/P/-/-/1  0x400637/0x400645/P/-/-/1  0x4005e9/0x400634/P/-/-/2  0x4005c8/0x4005dc/P/-/-/7  0x40062f/0x4005b0/P/-/-/10  0x400645/0x4005ff/P/-/-/1  0x400637/0x400645/P/-/-/1  0x4005e9/0x400634/P/-/-/1  0x4005d7/0x4005e5/P/-/-/8  0x40062f/0x4005b0/P/-/-/9  0x400645/0x4005ff/P/-/-/1  0x400637/0x400645/P/-/-/1  0x4005e9/0x400634/P/-/-/1  0x4005d7/0x4005e5/P/-/-/12  0x40062f/0x4005b0/P/-/-/6  0x400645/0x4005ff/P/-/-/1  0x400637/0x400645/P/-/-/1  0x4005e9/0x400634/P/-/-/2  0x4005c8/0x4005dc/P/-/-/8  0x40062f/0x4005b0/P/-/-/8
+           4005c5 0x4005c8/0x4005dc/P/-/-/11  0x40062f/0x4005b0/P/-/-/8  0x400645/0x4005ff/P/-/-/1  0x400637/0x400645/P/-/-/1  0x4005e9/0x400634/P/-/-/1  0x4005d7/0x4005e5/P/-/-/5  0x40062f/0x4005b0/P/-/-/6  0x400645/0x4005ff/P/-/-/1  0x400637/0x400645/P/-/-/1  0x4005e9/0x400634/P/-/-/1  0x4005d7/0x4005e5/P/-/-/12  0x40062f/0x4005b0/P/-/-/6  0x400645/0x4005ff/P/-/-/1  0x400637/0x400645/P/-/-/1  0x4005e9/0x400634/P/-/-/2  0x4005c8/0x4005dc/P/-/-/7  0x40062f/0x4005b0/P/-/-/10  0x400645/0x4005ff/P/-/-/1  0x400637/0x400645/P/-/-/1  0x4005e9/0x400634/P/-/-/1  0x4005d7/0x4005e5/P/-/-/8  0x40062f/0x4005b0/P/-/-/9  0x400645/0x4005ff/P/-/-/1  0x400637/0x400645/P/-/-/1  0x4005e9/0x400634/P/-/-/1  0x4005d7/0x4005e5/P/-/-/12  0x40062f/0x4005b0/P/-/-/6  0x400645/0x4005ff/P/-/-/1  0x400637/0x400645/P/-/-/1  0x4005e9/0x400634/P/-/-/2  0x4005c8/0x4005dc/P/-/-/8  0x40062f/0x4005b0/P/-/-/8
diff --git a/llvm/test/tools/llvm-profgen/event-filtering.test b/llvm/test/tools/llvm-profgen/event-filtering.test
new file mode 100644
index 0000000000000..ea486a8fa2f7b
--- /dev/null
+++ b/llvm/test/tools/llvm-profgen/event-filtering.test
@@ -0,0 +1,78 @@
+// RUN: llvm-profgen --format=text --perfscript=%S/Inputs/cmov_3.perfscript --binary=%S/Inputs/cmov_3.perfbin --output=%t --skip-symbolization --perf-event=br_inst_retired.near_taken:upp
+// RUN: FileCheck %s --input-file %t --check-prefix=CHECK-RAW-PROFILE
+// RUN: llvm-profgen --format=text --perfscript=%S/Inputs/cmov_3.perfscript --binary=%S/Inputs/cmov_3.perfbin --output=%t --perf-event=br_inst_retired.near_taken:upp
+// RUN: FileCheck %s --input-file %t --check-prefix=CHECK
+
+// RUN: llvm-profgen --format=text --perfscript=%S/Inputs/cmov_3.perfscript --binary=%S/Inputs/cmov_3.perfbin --output=%t --skip-symbolization --perf-event=br_misp_retired.all_branches:upp --leading-ip-only
+// RUN: FileCheck %s --input-file %t --check-prefix=UNPRED-RAW-PROFILE
+// RUN: llvm-profgen --format=text --perfscript=%S/Inputs/cmov_3.perfscript --binary=%S/Inputs/cmov_3.perfbin --output=%t --perf-event=br_misp_retired.all_branches:upp --leading-ip-only
+// RUN: FileCheck %s --input-file %t --check-prefix=UNPRED
+
+// Check that we can use perf event filtering to generate multiple types of
+// source-level profiles from a single perf profile. In this case, we generate
+// a typical execution frequency profile using br_inst_retired.near_taken LBRs,
+// and a branch mispredict profile using br_misp_retired.all_branches sample
+// IPs.
+
+// The source example below is based on perfKernelCpp/cmov_3, except a
+// misleading builtin is used to persuade the compiler not to use cmov, which
+// induces branch mispredicts.
+
+// CHECK: sel_arr:20229:0
+// CHECK:  3.1: 627
+// CHECK:  3.2: 627
+// CHECK:  4: 615
+// CHECK:  5: 627
+
+// UNPRED: sel_arr:18:0
+// UNPRED:  3.1: 0
+// UNPRED:  3.2: 0
+// UNPRED:  4: 9
+// UNPRED:  5: 0
+
+// CHECK-RAW-PROFILE:      3
+// CHECK-RAW-PROFILE-NEXT: 2f0-2fa:303
+// CHECK-RAW-PROFILE-NEXT: 2f0-310:312
+// CHECK-RAW-PROFILE-NEXT: 2ff-310:315
+
+// UNPRED-RAW-PROFILE:      1
+// UNPRED-RAW-PROFILE-NEXT: 2fa-2fa:9
+
+// original code:
+// clang -O2 -gline-tables-only -fdebug-info-for-profiling lit.c
+#include <stdlib.h>
+
+#define N 20000
+#define ITERS 10000
+
+static int *m_s1, *m_s2, *m_s3, *m_dst;
+
+void init(void) {
+    m_s1 = malloc(sizeof(int)*N);
+    m_s2 = malloc(sizeof(int)*N);
+    m_s3 = malloc(sizeof(int)*N);
+    m_dst = malloc(sizeof(int)*N);
+    srand(42);
+
+    for (int i = 0; i < N; i++) {
+        m_s1[i] = rand() % N;
+        m_s2[i] = 0;
+        m_s3[i] = 1;
+    }
+}
+
+void __attribute__((noinline)) sel_arr(int *dst, int *s1, int *s2, int *s3) {
+#pragma nounroll
+#pragma clang loop vectorize(disable) interleave(disable)
+    for (int i = 0; i < N; i++) {
+        int *p = __builtin_expect((s1[i] < 10035), 0) ? &s2[i] : &s3[i];
+        dst[i] = *p;
+    }
+}
+
+int main(void) {
+  init();
+  for(int i=0; i<ITERS; ++i)
+    sel_arr(m_dst, m_s1, m_s2, m_s3);
+  return 0;
+}
diff --git a/llvm/test/tools/llvm-profgen/iponly-nodupfactor.test b/llvm/test/tools/llvm-profgen/iponly-nodupfactor.test
new file mode 100644
index 0000000000000..006b1c42f3234
--- /dev/null
+++ b/llvm/test/tools/llvm-profgen/iponly-nodupfactor.test
@@ -0,0 +1,22 @@
+; RUN: llvm-profgen --format=text --perfscript=%S/Inputs/ip-duplication.perfscript --binary=%S/Inputs/inline-noprobe2.perfbin --output=%t --use-offset=0 --leading-ip-only
+; RUN: FileCheck %s --input-file %t --check-prefix=CHECK
+
+; Test that we don't over-count samples for duplicated source code when
+; building an IP-based profile.
+
+; The inline-noprobe2.perfbin binary is used for this test because one of the
+; partition_pivot_last+3.1 debug locations has a duplication factor of 2
+; encoded into its discriminator. In IP-sample mode, a hit in one instruction
+; in the duplicated code does not imply a hit to the other duplicates.
+
+; The perfscript input includes 1 sample at a location with duplication factor
+; of 2, and another sample at the same source location but with no duplication
+; factor. These should be summed without duplication factors. Ensure we record
+; a count of 1+1=2 (and not 2+1=3) for the 3.1 location.
+
+;CHECK-LABEL: partition_pivot_last
+;CHECK-NEXT:  1: 0
+;CHECK-NEXT:  2: 0
+;CHECK-NEXT:  3: 0
+;CHECK-NEXT:  3.1: 2
+
diff --git a/llvm/test/tools/llvm-profgen/iponly.test b/llvm/test/tools/llvm-profgen/iponly.test
new file mode 100644
index 0000000000000..2e81798d7e6fe
--- /dev/null
+++ b/llvm/test/tools/llvm-profgen/iponly.test
@@ -0,0 +1,58 @@
+; RUN: llvm-profgen --format=text --perfscript=%S/Inputs/noprobe-skid.perfscript --binary=%S/Inputs/noprobe.perfbin --output=%t --skip-symbolization --leading-ip-only
+; RUN: FileCheck %s --input-file %t --check-prefix=CHECK-RAW-PROFILE
+; RUN: llvm-profgen --format=text --perfscript=%S/Inputs/noprobe-skid.perfscript --binary=%S/Inputs/noprobe.perfbin --output=%t --leading-ip-only
+; RUN: FileCheck %s --input-file %t --check-prefix=CHECK
+
+; Here we check the ability to ignore LBRs, which is useful for generating
+; profiles where only the precise PMU sample IP is of interest. In general the
+; IPs need not identify a branch. In this case there are exactly 4 samples, so
+; we see only these 4 locations as "hot" and none of the LBR history.
+; Compare with noinline-noprobe.test, which includes LBR history.
+
+; Note that there are two different IPs (5c5 and 5c8) contributing to line
+; offset 1 in bar. This tests that sample counts corresponding to the same
+; debug location are summed into that location in the profile rather than the
+; maximum being taken, as happens with basic block execution count profiles.
+
+;CHECK: bar:14:0
+;CHECK:  0: 0
+;CHECK:  1: 2
+;CHECK:  2: 1
+;CHECK:  4: 0
+;CHECK:  5: 0
+;CHECK: foo:5:0
+;CHECK:  0: 0
+;CHECK:  1: 0
+;CHECK:  2: 0
+;CHECK:  3: 1
+;CHECK:  4: 0
+;CHECK:  5: 0
+
+CHECK-RAW-PROFILE:      4
+CHECK-RAW-PROFILE-NEXT: 5c5-5c5:1
+CHECK-RAW-PROFILE-NEXT: 5c8-5c8:1
+CHECK-RAW-PROFILE-NEXT: 5d7-5d7:1
+CHECK-RAW-PROFILE-NEXT: 62f-62f:1
+
+; original code:
+; clang -O3 -g -fdebug-info-for-profiling test.c -fno-inline -o a.out
+#include <stdio.h>
+
+int bar(int x, int y) {
+  if (x % 3) {
+    return x - y;
+  }
+  return x + y;
+}
+
+void foo() {
+  int s, i = 0;
+  while (i++ < 4000 * 4000)
+    if (i % 91) s = bar(i, s); else s += 30;
+  printf("sum is %d\n", s);
+}
+
+int main() {
+  foo();
+  return 0;
+}
diff --git a/llvm/tools/llvm-dwp/llvm-dwp.cpp b/llvm/tools/llvm-dwp/llvm-dwp.cpp
index 18f4f1a0eb9fb..60a89cb13c57a 100644
--- a/llvm/tools/llvm-dwp/llvm-dwp.cpp
+++ b/llvm/tools/llvm-dwp/llvm-dwp.cpp
@@ -266,9 +266,8 @@ int llvm_dwp_main(int argc, char **argv, const llvm::ToolContext &) {
 
   std::unique_ptr<MCStreamer> MS(TheTarget->createMCObjectStreamer(
       *ErrOrTriple, MC, std::unique_ptr<MCAsmBackend>(MAB),
-      MAB->createObjectWriter(*OS), std::unique_ptr<MCCodeEmitter>(MCE), *MSTI,
-      MCOptions.MCRelaxAll, MCOptions.MCIncrementalLinkerCompatible,
-      /*DWARFMustBeAtTheEnd*/ false));
+      MAB->createObjectWriter(*OS), std::unique_ptr<MCCodeEmitter>(MCE),
+      *MSTI));
   if (!MS)
     return error("no object streamer for target " + TripleName, Context);
 
diff --git a/llvm/tools/llvm-mc-assemble-fuzzer/llvm-mc-assemble-fuzzer.cpp b/llvm/tools/llvm-mc-assemble-fuzzer/llvm-mc-assemble-fuzzer.cpp
index 6ec19a367d58b..ef9d0f37198ac 100644
--- a/llvm/tools/llvm-mc-assemble-fuzzer/llvm-mc-assemble-fuzzer.cpp
+++ b/llvm/tools/llvm-mc-assemble-fuzzer/llvm-mc-assemble-fuzzer.cpp
@@ -233,9 +233,8 @@ int AssembleOneInput(const uint8_t *Data, size_t Size) {
     MCAsmBackend *MAB = TheTarget->createMCAsmBackend(*STI, *MRI, MCOptions);
     Str.reset(TheTarget->createMCObjectStreamer(
         TheTriple, Ctx, std::unique_ptr<MCAsmBackend>(MAB),
-        MAB->createObjectWriter(*OS), std::unique_ptr<MCCodeEmitter>(CE), *STI,
-        MCOptions.MCRelaxAll, MCOptions.MCIncrementalLinkerCompatible,
-        /*DWARFMustBeAtTheEnd*/ false));
+        MAB->createObjectWriter(*OS), std::unique_ptr<MCCodeEmitter>(CE),
+        *STI));
   }
   const int Res = AssembleInput(ProgName, TheTarget, SrcMgr, Ctx, *Str, *MAI, *STI,
       *MCII, MCOptions);
diff --git a/llvm/tools/llvm-mc/llvm-mc.cpp b/llvm/tools/llvm-mc/llvm-mc.cpp
index de999a48d5753..0f1e330541441 100644
--- a/llvm/tools/llvm-mc/llvm-mc.cpp
+++ b/llvm/tools/llvm-mc/llvm-mc.cpp
@@ -356,6 +356,9 @@ int main(int argc, char **argv) {
   cl::ParseCommandLineOptions(argc, argv, "llvm machine code playground\n");
   MCTargetOptions MCOptions = mc::InitMCTargetOptionsFromFlags();
   MCOptions.CompressDebugSections = CompressDebugSections.getValue();
+  MCOptions.ShowMCInst = ShowInst;
+  MCOptions.AsmVerbose = true;
+  MCOptions.MCUseDwarfDirectory = MCTargetOptions::EnableDwarfDirectory;
 
   setDwarfDebugFlags(argc, argv);
   setDwarfDebugProducer();
@@ -555,9 +558,7 @@ int main(int argc, char **argv) {
         TheTriple, Ctx, std::unique_ptr<MCAsmBackend>(MAB),
         DwoOut ? MAB->createDwoObjectWriter(*OS, DwoOut->os())
                : MAB->createObjectWriter(*OS),
-        std::unique_ptr<MCCodeEmitter>(CE), *STI, MCOptions.MCRelaxAll,
-        MCOptions.MCIncrementalLinkerCompatible,
-        /*DWARFMustBeAtTheEnd*/ false));
+        std::unique_ptr<MCCodeEmitter>(CE), *STI));
     if (NoExecStack)
       Str->initSections(true, *STI);
   }
diff --git a/llvm/tools/llvm-ml/llvm-ml.cpp b/llvm/tools/llvm-ml/llvm-ml.cpp
index 24643bd4296be..bcfec97019c1c 100644
--- a/llvm/tools/llvm-ml/llvm-ml.cpp
+++ b/llvm/tools/llvm-ml/llvm-ml.cpp
@@ -264,6 +264,7 @@ int llvm_ml_main(int Argc, char **Argv, const llvm::ToolContext &) {
   MCOptions.AssemblyLanguage = "masm";
   MCOptions.MCFatalWarnings = InputArgs.hasArg(OPT_fatal_warnings);
   MCOptions.MCSaveTempLabels = InputArgs.hasArg(OPT_save_temp_labels);
+  MCOptions.AsmVerbose = true;
 
   Triple TheTriple = GetTriple(ProgName, InputArgs);
   std::string Error;
@@ -402,9 +403,8 @@ int llvm_ml_main(int Argc, char **Argv, const llvm::ToolContext &) {
     MCAsmBackend *MAB = TheTarget->createMCAsmBackend(*STI, *MRI, MCOptions);
     Str.reset(TheTarget->createMCObjectStreamer(
         TheTriple, Ctx, std::unique_ptr<MCAsmBackend>(MAB),
-        MAB->createObjectWriter(*OS), std::unique_ptr<MCCodeEmitter>(CE), *STI,
-        MCOptions.MCRelaxAll, MCOptions.MCIncrementalLinkerCompatible,
-        /*DWARFMustBeAtTheEnd*/ false));
+        MAB->createObjectWriter(*OS), std::unique_ptr<MCCodeEmitter>(CE),
+        *STI));
   } else {
     llvm_unreachable("Invalid file type!");
   }
diff --git a/llvm/tools/llvm-profgen/PerfReader.cpp b/llvm/tools/llvm-profgen/PerfReader.cpp
index 111c546f5329f..b4e4911fb8912 100644
--- a/llvm/tools/llvm-profgen/PerfReader.cpp
+++ b/llvm/tools/llvm-profgen/PerfReader.cpp
@@ -41,6 +41,17 @@ static cl::opt<bool>
                                 "and produce context-insensitive profile."));
 cl::opt<bool> ShowDetailedWarning("show-detailed-warning",
                                   cl::desc("Show detailed warning message."));
+cl::opt<bool>
+    LeadingIPOnly("leading-ip-only",
+                  cl::desc("Form a profile based only on sample IPs"));
+
+static cl::list<std::string> PerfEventFilter(
+    "perf-event",
+    cl::desc("Ignore samples not matching the given event names"));
+static cl::alias
+    PerfEventFilterPlural("perf-events", cl::CommaSeparated,
+                          cl::desc("Comma-delimited version of -perf-event"),
+                          cl::aliasopt(PerfEventFilter));
 
 extern cl::opt<std::string> PerfTraceFilename;
 extern cl::opt<bool> ShowDisassemblyOnly;
@@ -404,13 +415,18 @@ PerfScriptReader::convertPerfDataToTrace(ProfiledBinary *Binary, bool SkipPID,
     }
   }
 
+  // If filtering by events was requested, additionally request the "event"
+  // field.
+  const std::string FieldList =
+      PerfEventFilter.empty() ? "ip,brstack" : "event,ip,brstack";
+
   // Run perf script again to retrieve events for PIDs collected above
   SmallVector<StringRef, 8> ScriptSampleArgs;
   ScriptSampleArgs.push_back(PerfPath);
   ScriptSampleArgs.push_back("script");
   ScriptSampleArgs.push_back("--show-mmap-events");
   ScriptSampleArgs.push_back("-F");
-  ScriptSampleArgs.push_back("ip,brstack");
+  ScriptSampleArgs.push_back(FieldList);
   ScriptSampleArgs.push_back("-i");
   ScriptSampleArgs.push_back(PerfData);
   if (!PIDs.empty()) {
@@ -575,14 +591,54 @@ bool PerfScriptReader::extractLBRStack(TraceStream &TraceIt,
 
   // Skip the leading instruction pointer.
   size_t Index = 0;
+
+  StringRef EventName;
+  // Skip a perf event name. This may or may not exist.
+  if (Records.size() > Index && Records[Index].ends_with(":")) {
+    EventName = Records[Index].ltrim().rtrim(':');
+    Index++;
+
+    if (PerfEventFilter.empty()) {
+      WithColor::warning() << "No --perf-event filter was specified, but an "
+                              "\"event\" field was found in line "
+                           << TraceIt.getLineNumber() << ": "
+                           << TraceIt.getCurrentLine() << "\n";
+    } else if (std::find(PerfEventFilter.begin(), PerfEventFilter.end(),
+                         EventName) == PerfEventFilter.end()) {
+      TraceIt.advance();
+      return false;
+    }
+
+  } else if (!PerfEventFilter.empty()) {
+    WithColor::warning() << "A --perf-event filter was specified, but no "
+                            "\"event\" field found in line "
+                         << TraceIt.getLineNumber() << ": "
+                         << TraceIt.getCurrentLine() << "\n";
+  }
+
   uint64_t LeadingAddr;
-  if (!Records.empty() && !Records[0].contains('/')) {
-    if (Records[0].getAsInteger(16, LeadingAddr)) {
+  if (Records.size() > Index && !Records[Index].contains('/')) {
+    if (Records[Index].getAsInteger(16, LeadingAddr)) {
       WarnInvalidLBR(TraceIt);
       TraceIt.advance();
       return false;
     }
-    Index = 1;
+    Index++;
+  }
+
+  // We assume that if we saw an event name we also saw a leading addr.
+  // In other words, LeadingAddr is set if Index is 1 or 2.
+  if (LeadingIPOnly && Index > 0) {
+    // Form a profile only from the sample IP. Do not assume an LBR stack
+    // follows, and ignore it if it does.
+    uint64_t SampleIP = Binary->canonicalizeVirtualAddress(LeadingAddr);
+    bool SampleIPIsInternal = Binary->addressIsCode(SampleIP);
+    if (SampleIPIsInternal) {
+      // Form a half LBR entry where the sample IP is the destination.
+      LBRStack.emplace_back(LBREntry(SampleIP, SampleIP));
+    }
+    TraceIt.advance();
+    return !LBRStack.empty();
   }
 
   // Now extract LBR samples - note that we do not reverse the
@@ -902,6 +958,20 @@ void PerfScriptReader::computeCounterFromLBR(const PerfSample *Sample,
                                              uint64_t Repeat) {
   SampleCounter &Counter = SampleCounters.begin()->second;
   uint64_t EndAddress = 0;
+
+  if (LeadingIPOnly) {
+    assert(Sample->LBRStack.size() == 1 &&
+           "Expected only half LBR entries for ip-only mode");
+    const LBREntry &LBR = *(Sample->LBRStack.begin());
+    uint64_t SourceAddress = LBR.Source;
+    uint64_t TargetAddress = LBR.Target;
+    if (SourceAddress == TargetAddress &&
+        Binary->addressIsCode(TargetAddress)) {
+      Counter.recordRangeCount(SourceAddress, TargetAddress, Repeat);
+    }
+    return;
+  }
+
   for (const LBREntry &LBR : Sample->LBRStack) {
     uint64_t SourceAddress = LBR.Source;
     uint64_t TargetAddress = LBR.Target;
@@ -1062,6 +1132,18 @@ bool PerfScriptReader::isLBRSample(StringRef Line) {
   Line.trim().split(Records, " ", 2, false);
   if (Records.size() < 2)
     return false;
+  // Check if there is an event name before the leading IP.
+  // If there is, it will be in Records[0]. To skip it, we'll re-split on
+  // Records[1], which should contain the rest of the line.
+  if (Records[0].contains(":")) {
+    // If so, consume the event name and continue processing the rest of the
+    // line.
+    StringRef IPAndLBR = Records[1].ltrim();
+    Records.clear();
+    IPAndLBR.split(Records, " ", 2, false);
+    if (Records.size() < 2)
+      return false;
+  }
   if (Records[1].starts_with("0x") && Records[1].contains('/'))
     return true;
   return false;
@@ -1152,6 +1234,18 @@ void PerfScriptReader::warnInvalidRange() {
     const PerfSample *Sample = Item.first.getPtr();
     uint64_t Count = Item.second;
     uint64_t EndAddress = 0;
+
+    if (LeadingIPOnly) {
+      assert(Sample->LBRStack.size() == 1 &&
+             "Expected only half LBR entries for ip-only mode");
+      const LBREntry &LBR = *(Sample->LBRStack.begin());
+      if (LBR.Source == LBR.Target && LBR.Source != ExternalAddr) {
+        // This is an leading-addr-only profile.
+        Ranges[{LBR.Source, LBR.Source}] += Count;
+      }
+      continue;
+    }
+
     for (const LBREntry &LBR : Sample->LBRStack) {
       uint64_t SourceAddress = LBR.Source;
       uint64_t StartAddress = LBR.Target;
@@ -1199,11 +1293,15 @@ void PerfScriptReader::warnInvalidRange() {
         !Binary->addressIsCode(EndAddress))
       continue;
 
-    if (!Binary->addressIsCode(StartAddress) ||
-        !Binary->addressIsTransfer(EndAddress)) {
-      InstNotBoundary += I.second;
-      WarnInvalidRange(StartAddress, EndAddress, EndNotBoundaryMsg);
-    }
+    // IP samples can indicate activity on individual instructions rather than
+    // basic blocks/edges. In this mode, don't warn if sampled IPs aren't
+    // branches.
+    if (!LeadingIPOnly)
+      if (!Binary->addressIsCode(StartAddress) ||
+          !Binary->addressIsTransfer(EndAddress)) {
+        InstNotBoundary += I.second;
+        WarnInvalidRange(StartAddress, EndAddress, EndNotBoundaryMsg);
+      }
 
     auto *FRange = Binary->findFuncRange(StartAddress);
     if (!FRange) {
diff --git a/llvm/tools/llvm-profgen/ProfileGenerator.cpp b/llvm/tools/llvm-profgen/ProfileGenerator.cpp
index 53a25b279b432..175556c2220e6 100644
--- a/llvm/tools/llvm-profgen/ProfileGenerator.cpp
+++ b/llvm/tools/llvm-profgen/ProfileGenerator.cpp
@@ -104,6 +104,8 @@ cl::opt<bool> InferMissingFrames(
         "Infer missing call frames due to compiler tail call elimination."),
     llvm::cl::Optional);
 
+extern cl::opt<bool> LeadingIPOnly;
+
 using namespace llvm;
 using namespace sampleprof;
 
@@ -388,18 +390,25 @@ void ProfileGeneratorBase::updateBodySamplesforFunctionProfile(
   // Use the maximum count of samples with same line location
   uint32_t Discriminator = getBaseDiscriminator(LeafLoc.Location.Discriminator);
 
-  // Use duplication factor to compensated for loop unroll/vectorization.
-  // Note that this is only needed when we're taking MAX of the counts at
-  // the location instead of SUM.
-  Count *= getDuplicationFactor(LeafLoc.Location.Discriminator);
-
-  ErrorOr<uint64_t> R =
-      FunctionProfile.findSamplesAt(LeafLoc.Location.LineOffset, Discriminator);
-
-  uint64_t PreviousCount = R ? R.get() : 0;
-  if (PreviousCount <= Count) {
+  if (LeadingIPOnly) {
+    // When computing an IP-based profile we take the SUM of counts at the
+    // location instead of applying duplication factors and taking the MAX.
     FunctionProfile.addBodySamples(LeafLoc.Location.LineOffset, Discriminator,
-                                   Count - PreviousCount);
+                                   Count);
+  } else {
+    // Otherwise, use duplication factor to compensate for loop
+    // unroll/vectorization. Note that this is only needed when we're taking
+    // MAX of the counts at the location instead of SUM.
+    Count *= getDuplicationFactor(LeafLoc.Location.Discriminator);
+
+    ErrorOr<uint64_t> R = FunctionProfile.findSamplesAt(
+        LeafLoc.Location.LineOffset, Discriminator);
+
+    uint64_t PreviousCount = R ? R.get() : 0;
+    if (PreviousCount <= Count) {
+      FunctionProfile.addBodySamples(LeafLoc.Location.LineOffset, Discriminator,
+                                     Count - PreviousCount);
+    }
   }
 }
 
diff --git a/llvm/unittests/DebugInfo/DWARF/DWARFExpressionCopyBytesTest.cpp b/llvm/unittests/DebugInfo/DWARF/DWARFExpressionCopyBytesTest.cpp
index 43fdf5d3d6f31..ec9c0dddcbc0c 100644
--- a/llvm/unittests/DebugInfo/DWARF/DWARFExpressionCopyBytesTest.cpp
+++ b/llvm/unittests/DebugInfo/DWARF/DWARFExpressionCopyBytesTest.cpp
@@ -112,10 +112,7 @@ DWARFExpressionCopyBytesTest::createStreamer(raw_pwrite_stream &OS) {
   std::unique_ptr<MCObjectWriter> OW = MAB->createObjectWriter(OS);
   Res.Streamer.reset(TheTarget->createMCObjectStreamer(
       Triple(TripleName), *Res.Ctx, std::unique_ptr<MCAsmBackend>(MAB),
-      std::move(OW), std::unique_ptr<MCCodeEmitter>(MCE), *STI,
-      /* RelaxAll */ false,
-      /* IncrementalLinkerCompatible */ false,
-      /* DWARFMustBeAtTheEnd */ false));
+      std::move(OW), std::unique_ptr<MCCodeEmitter>(MCE), *STI));
   return Res;
 }
 
diff --git a/llvm/unittests/DebugInfo/DWARF/DwarfGenerator.cpp b/llvm/unittests/DebugInfo/DWARF/DwarfGenerator.cpp
index ad5e51b7efb83..2cbd4cc55df95 100644
--- a/llvm/unittests/DebugInfo/DWARF/DwarfGenerator.cpp
+++ b/llvm/unittests/DebugInfo/DWARF/DwarfGenerator.cpp
@@ -503,8 +503,7 @@ llvm::Error dwarfgen::Generator::init(Triple TheTriple, uint16_t V) {
   MS = TheTarget->createMCObjectStreamer(
       TheTriple, *MC, std::unique_ptr<MCAsmBackend>(MAB),
       MAB->createObjectWriter(*Stream), std::unique_ptr<MCCodeEmitter>(MCE),
-      *MSTI, MCOptions.MCRelaxAll, MCOptions.MCIncrementalLinkerCompatible,
-      /*DWARFMustBeAtTheEnd*/ false);
+      *MSTI);
   if (!MS)
     return make_error<StringError>("no object streamer for target " +
                                        TripleName,
diff --git a/llvm/unittests/MC/DwarfLineTableHeaders.cpp b/llvm/unittests/MC/DwarfLineTableHeaders.cpp
index d8a657ed5048e..1fad1ba6ce638 100644
--- a/llvm/unittests/MC/DwarfLineTableHeaders.cpp
+++ b/llvm/unittests/MC/DwarfLineTableHeaders.cpp
@@ -83,10 +83,7 @@ class DwarfLineTableHeaders : public ::testing::Test {
     std::unique_ptr<MCObjectWriter> OW = MAB->createObjectWriter(OS);
     Res.Streamer.reset(TheTarget->createMCObjectStreamer(
         Triple(TripleName), *Res.Ctx, std::unique_ptr<MCAsmBackend>(MAB),
-        std::move(OW), std::unique_ptr<MCCodeEmitter>(MCE), *STI,
-        /* RelaxAll */ false,
-        /* IncrementalLinkerCompatible */ false,
-        /* DWARFMustBeAtTheEnd */ false));
+        std::move(OW), std::unique_ptr<MCCodeEmitter>(MCE), *STI));
     return Res;
   }
 
diff --git a/llvm/unittests/SandboxIR/SandboxIRTest.cpp b/llvm/unittests/SandboxIR/SandboxIRTest.cpp
index 04beb429502bc..054a81e9cf308 100644
--- a/llvm/unittests/SandboxIR/SandboxIRTest.cpp
+++ b/llvm/unittests/SandboxIR/SandboxIRTest.cpp
@@ -591,3 +591,36 @@ define void @foo(ptr %arg0, ptr %arg1) {
   EXPECT_EQ(NewLd->getAlign(), 8);
   EXPECT_EQ(NewLd->getName(), "NewLd");
 }
+
+TEST_F(SandboxIRTest, StoreInst) {
+  parseIR(C, R"IR(
+define void @foo(i8 %val, ptr %ptr) {
+  store i8 %val, ptr %ptr, align 64
+  ret void
+}
+)IR");
+  llvm::Function *LLVMF = &*M->getFunction("foo");
+  sandboxir::Context Ctx(C);
+  sandboxir::Function *F = Ctx.createFunction(LLVMF);
+  auto *Val = F->getArg(0);
+  auto *Ptr = F->getArg(1);
+  auto *BB = &*F->begin();
+  auto It = BB->begin();
+  auto *St = cast<sandboxir::StoreInst>(&*It++);
+  auto *Ret = &*It++;
+
+  // Check that the StoreInst has been created correctly.
+  // Check getPointerOperand()
+  EXPECT_EQ(St->getValueOperand(), Val);
+  EXPECT_EQ(St->getPointerOperand(), Ptr);
+  // Check getAlign()
+  EXPECT_EQ(St->getAlign(), 64);
+  // Check create(InsertBefore)
+  sandboxir::StoreInst *NewSt =
+      sandboxir::StoreInst::create(Val, Ptr, Align(8),
+                                   /*InsertBefore=*/Ret, Ctx);
+  EXPECT_EQ(NewSt->getType(), St->getType());
+  EXPECT_EQ(NewSt->getValueOperand(), Val);
+  EXPECT_EQ(NewSt->getPointerOperand(), Ptr);
+  EXPECT_EQ(NewSt->getAlign(), 8);
+}
diff --git a/llvm/utils/lit/lit/cl_arguments.py b/llvm/utils/lit/lit/cl_arguments.py
index b9122d07afd8a..ed78256ee414b 100644
--- a/llvm/utils/lit/lit/cl_arguments.py
+++ b/llvm/utils/lit/lit/cl_arguments.py
@@ -154,11 +154,6 @@ def parse_args():
         action="append",
         default=[],
     )
-    execution_group.add_argument(
-        "--time-tests",
-        help="Track elapsed wall time for each test",
-        action="store_true",
-    )
     execution_group.add_argument(
         "--no-execute",
         dest="noExecute",
@@ -209,6 +204,17 @@ def parse_args():
         action="store_true",
         help="Exit with status zero even if some tests fail",
     )
+    execution_test_time_group = execution_group.add_mutually_exclusive_group()
+    execution_test_time_group.add_argument(
+        "--skip-test-time-recording",
+        help="Do not track elapsed wall time for each test",
+        action="store_true",
+    )
+    execution_test_time_group.add_argument(
+        "--time-tests",
+        help="Track elapsed wall time for each test printed in a histogram",
+        action="store_true",
+    )
 
     selection_group = parser.add_argument_group("Test Selection")
     selection_group.add_argument(
diff --git a/llvm/utils/lit/lit/main.py b/llvm/utils/lit/lit/main.py
index db9f24f748d9e..24ba804f0c363 100755
--- a/llvm/utils/lit/lit/main.py
+++ b/llvm/utils/lit/lit/main.py
@@ -124,7 +124,8 @@ def main(builtin_params={}):
     run_tests(selected_tests, lit_config, opts, len(discovered_tests))
     elapsed = time.time() - start
 
-    record_test_times(selected_tests, lit_config)
+    if not opts.skip_test_time_recording:
+        record_test_times(selected_tests, lit_config)
 
     selected_tests, discovered_tests = GoogleTest.post_process_shard_results(
         selected_tests, discovered_tests
diff --git a/llvm/utils/lit/tests/Inputs/time-tests/a.txt b/llvm/utils/lit/tests/Inputs/time-tests/a.txt
new file mode 100644
index 0000000000000..b80b60b7a2794
--- /dev/null
+++ b/llvm/utils/lit/tests/Inputs/time-tests/a.txt
@@ -0,0 +1 @@
+# RUN: true
diff --git a/llvm/utils/lit/tests/Inputs/time-tests/lit.cfg b/llvm/utils/lit/tests/Inputs/time-tests/lit.cfg
new file mode 100644
index 0000000000000..e6ae41833874a
--- /dev/null
+++ b/llvm/utils/lit/tests/Inputs/time-tests/lit.cfg
@@ -0,0 +1,7 @@
+import lit.formats
+
+config.name = "time-tests"
+config.suffixes = [".txt"]
+config.test_format = lit.formats.ShTest()
+config.test_source_root = None
+config.test_exec_root = None
diff --git a/llvm/utils/lit/tests/time-tests.py b/llvm/utils/lit/tests/time-tests.py
new file mode 100644
index 0000000000000..20b83a64330f0
--- /dev/null
+++ b/llvm/utils/lit/tests/time-tests.py
@@ -0,0 +1,15 @@
+## Check that --skip-test-time-recording skips .lit_test_times.txt recording.
+
+# RUN: %{lit-no-order-opt} --skip-test-time-recording %{inputs}/time-tests
+# RUN: not ls %{inputs}/time-tests/.lit_test_times.txt
+
+## Check that --time-tests generates a printed histogram.
+
+# RUN: %{lit-no-order-opt} --time-tests %{inputs}/time-tests > %t.out
+# RUN: FileCheck < %t.out %s
+# RUN: rm %{inputs}/time-tests/.lit_test_times.txt
+
+# CHECK:      Tests Times:
+# CHECK-NEXT: --------------------------------------------------------------------------
+# CHECK-NEXT: [    Range    ] :: [               Percentage               ] :: [Count]
+# CHECK-NEXT: --------------------------------------------------------------------------
diff --git a/mlir/lib/Bytecode/Writer/BytecodeWriter.cpp b/mlir/lib/Bytecode/Writer/BytecodeWriter.cpp
index 449d7549eb724..0e96aa97abeba 100644
--- a/mlir/lib/Bytecode/Writer/BytecodeWriter.cpp
+++ b/mlir/lib/Bytecode/Writer/BytecodeWriter.cpp
@@ -18,6 +18,7 @@
 #include "llvm/ADT/CachedHashString.h"
 #include "llvm/ADT/MapVector.h"
 #include "llvm/ADT/SmallVector.h"
+#include "llvm/Support/Debug.h"
 #include "llvm/Support/Endian.h"
 #include "llvm/Support/raw_ostream.h"
 #include <optional>
@@ -145,7 +146,9 @@ class EncodingEmitter {
   //===--------------------------------------------------------------------===//
 
   /// Backpatch a byte in the result buffer at the given offset.
-  void patchByte(uint64_t offset, uint8_t value) {
+  void patchByte(uint64_t offset, uint8_t value, StringLiteral desc) {
+    LLVM_DEBUG(llvm::dbgs() << "patchByte(" << offset << ',' << uint64_t(value)
+                            << ")\t" << desc << '\n');
     assert(offset < size() && offset >= prevResultSize &&
            "cannot patch previously emitted data");
     currentResult[offset - prevResultSize] = value;
@@ -153,7 +156,9 @@ class EncodingEmitter {
 
   /// Emit the provided blob of data, which is owned by the caller and is
   /// guaranteed to not die before the end of the bytecode process.
-  void emitOwnedBlob(ArrayRef<uint8_t> data) {
+  void emitOwnedBlob(ArrayRef<uint8_t> data, StringLiteral desc) {
+    LLVM_DEBUG(llvm::dbgs()
+               << "emitOwnedBlob(" << data.size() << "b)\t" << desc << '\n');
     // Push the current buffer before adding the provided data.
     appendResult(std::move(currentResult));
     appendOwnedResult(data);
@@ -163,17 +168,19 @@ class EncodingEmitter {
   /// owned by the caller and is guaranteed to not die before the end of the
   /// bytecode process. The alignment value is also encoded, making it available
   /// on load.
-  void emitOwnedBlobAndAlignment(ArrayRef<uint8_t> data, uint32_t alignment) {
-    emitVarInt(alignment);
-    emitVarInt(data.size());
+  void emitOwnedBlobAndAlignment(ArrayRef<uint8_t> data, uint32_t alignment,
+                                 StringLiteral desc) {
+    emitVarInt(alignment, desc);
+    emitVarInt(data.size(), desc);
 
     alignTo(alignment);
-    emitOwnedBlob(data);
+    emitOwnedBlob(data, desc);
   }
-  void emitOwnedBlobAndAlignment(ArrayRef<char> data, uint32_t alignment) {
+  void emitOwnedBlobAndAlignment(ArrayRef<char> data, uint32_t alignment,
+                                 StringLiteral desc) {
     ArrayRef<uint8_t> castedData(reinterpret_cast<const uint8_t *>(data.data()),
                                  data.size());
-    emitOwnedBlobAndAlignment(castedData, alignment);
+    emitOwnedBlobAndAlignment(castedData, alignment, desc);
   }
 
   /// Align the emitter to the given alignment.
@@ -187,7 +194,7 @@ class EncodingEmitter {
     size_t curOffset = size();
     size_t paddingSize = llvm::alignTo(curOffset, alignment) - curOffset;
     while (paddingSize--)
-      emitByte(bytecode::kAlignmentByte);
+      emitByte(bytecode::kAlignmentByte, "alignment byte");
 
     // Keep track of the maximum required alignment.
     requiredAlignment = std::max(requiredAlignment, alignment);
@@ -198,12 +205,16 @@ class EncodingEmitter {
 
   /// Emit a single byte.
   template <typename T>
-  void emitByte(T byte) {
+  void emitByte(T byte, StringLiteral desc) {
+    LLVM_DEBUG(llvm::dbgs()
+               << "emitByte(" << uint64_t(byte) << ")\t" << desc << '\n');
     currentResult.push_back(static_cast<uint8_t>(byte));
   }
 
   /// Emit a range of bytes.
-  void emitBytes(ArrayRef<uint8_t> bytes) {
+  void emitBytes(ArrayRef<uint8_t> bytes, StringLiteral desc) {
+    LLVM_DEBUG(llvm::dbgs()
+               << "emitBytes(" << bytes.size() << "b)\t" << desc << '\n');
     llvm::append_range(currentResult, bytes);
   }
 
@@ -214,40 +225,43 @@ class EncodingEmitter {
   /// All remaining bits in the first byte, along with all of the bits in
   /// additional bytes, provide the value of the integer encoded in
   /// little-endian order.
-  void emitVarInt(uint64_t value) {
+  void emitVarInt(uint64_t value, StringLiteral desc) {
+    LLVM_DEBUG(llvm::dbgs() << "emitVarInt(" << value << ")\t" << desc << '\n');
+
     // In the most common case, the value can be represented in a single byte.
     // Given how hot this case is, explicitly handle that here.
     if ((value >> 7) == 0)
-      return emitByte((value << 1) | 0x1);
-    emitMultiByteVarInt(value);
+      return emitByte((value << 1) | 0x1, desc);
+    emitMultiByteVarInt(value, desc);
   }
 
   /// Emit a signed variable length integer. Signed varints are encoded using
   /// a varint with zigzag encoding, meaning that we use the low bit of the
   /// value to indicate the sign of the value. This allows for more efficient
   /// encoding of negative values by limiting the number of active bits
-  void emitSignedVarInt(uint64_t value) {
-    emitVarInt((value << 1) ^ (uint64_t)((int64_t)value >> 63));
+  void emitSignedVarInt(uint64_t value, StringLiteral desc) {
+    emitVarInt((value << 1) ^ (uint64_t)((int64_t)value >> 63), desc);
   }
 
   /// Emit a variable length integer whose low bit is used to encode the
   /// provided flag, i.e. encoded as: (value << 1) | (flag ? 1 : 0).
-  void emitVarIntWithFlag(uint64_t value, bool flag) {
-    emitVarInt((value << 1) | (flag ? 1 : 0));
+  void emitVarIntWithFlag(uint64_t value, bool flag, StringLiteral desc) {
+    emitVarInt((value << 1) | (flag ? 1 : 0), desc);
   }
 
   //===--------------------------------------------------------------------===//
   // String Emission
 
   /// Emit the given string as a nul terminated string.
-  void emitNulTerminatedString(StringRef str) {
-    emitString(str);
-    emitByte(0);
+  void emitNulTerminatedString(StringRef str, StringLiteral desc) {
+    emitString(str, desc);
+    emitByte(0, "null terminator");
   }
 
   /// Emit the given string without a nul terminator.
-  void emitString(StringRef str) {
-    emitBytes({reinterpret_cast<const uint8_t *>(str.data()), str.size()});
+  void emitString(StringRef str, StringLiteral desc) {
+    emitBytes({reinterpret_cast<const uint8_t *>(str.data()), str.size()},
+              desc);
   }
 
   //===--------------------------------------------------------------------===//
@@ -260,14 +274,14 @@ class EncodingEmitter {
     // indicate whether the section alignment is present, so save an offset to
     // it.
     uint64_t codeOffset = currentResult.size();
-    emitByte(code);
-    emitVarInt(emitter.size());
+    emitByte(code, "section code");
+    emitVarInt(emitter.size(), "section size");
 
     // Integrate the alignment of the section into this emitter if necessary.
     unsigned emitterAlign = emitter.requiredAlignment;
     if (emitterAlign > 1) {
       if (size() & (emitterAlign - 1)) {
-        emitVarInt(emitterAlign);
+        emitVarInt(emitterAlign, "section alignment");
         alignTo(emitterAlign);
 
         // Indicate that we needed to align the section, the high bit of the
@@ -295,7 +309,8 @@ class EncodingEmitter {
   /// fallback when the number of bytes needed to encode the value is greater
   /// than 1. We mark it noinline here so that the single byte hot path isn't
   /// pessimized.
-  LLVM_ATTRIBUTE_NOINLINE void emitMultiByteVarInt(uint64_t value);
+  LLVM_ATTRIBUTE_NOINLINE void emitMultiByteVarInt(uint64_t value,
+                                                   StringLiteral desc);
 
   /// Append a new result buffer to the current contents.
   void appendResult(std::vector<uint8_t> &&result) {
@@ -345,15 +360,15 @@ class StringSectionBuilder {
 
   /// Write the current set of strings to the given emitter.
   void write(EncodingEmitter &emitter) {
-    emitter.emitVarInt(strings.size());
+    emitter.emitVarInt(strings.size(), "string section size");
 
     // Emit the sizes in reverse order, so that we don't need to backpatch an
     // offset to the string data or have a separate section.
     for (const auto &it : llvm::reverse(strings))
-      emitter.emitVarInt(it.first.size() + 1);
+      emitter.emitVarInt(it.first.size() + 1, "string size");
     // Emit the string data itself.
     for (const auto &it : strings)
-      emitter.emitNulTerminatedString(it.first.val());
+      emitter.emitNulTerminatedString(it.first.val(), "string");
   }
 
 private:
@@ -380,32 +395,35 @@ class DialectWriter : public DialectBytecodeWriter {
   //===--------------------------------------------------------------------===//
 
   void writeAttribute(Attribute attr) override {
-    emitter.emitVarInt(numberingState.getNumber(attr));
+    emitter.emitVarInt(numberingState.getNumber(attr), "dialect attr");
   }
   void writeOptionalAttribute(Attribute attr) override {
     if (!attr) {
-      emitter.emitVarInt(0);
+      emitter.emitVarInt(0, "dialect optional attr none");
       return;
     }
-    emitter.emitVarIntWithFlag(numberingState.getNumber(attr), true);
+    emitter.emitVarIntWithFlag(numberingState.getNumber(attr), true,
+                               "dialect optional attr");
   }
 
   void writeType(Type type) override {
-    emitter.emitVarInt(numberingState.getNumber(type));
+    emitter.emitVarInt(numberingState.getNumber(type), "dialect type");
   }
 
   void writeResourceHandle(const AsmDialectResourceHandle &resource) override {
-    emitter.emitVarInt(numberingState.getNumber(resource));
+    emitter.emitVarInt(numberingState.getNumber(resource), "dialect resource");
   }
 
   //===--------------------------------------------------------------------===//
   // Primitives
   //===--------------------------------------------------------------------===//
 
-  void writeVarInt(uint64_t value) override { emitter.emitVarInt(value); }
+  void writeVarInt(uint64_t value) override {
+    emitter.emitVarInt(value, "dialect writer");
+  }
 
   void writeSignedVarInt(int64_t value) override {
-    emitter.emitSignedVarInt(value);
+    emitter.emitSignedVarInt(value, "dialect writer");
   }
 
   void writeAPIntWithKnownWidth(const APInt &value) override {
@@ -414,21 +432,21 @@ class DialectWriter : public DialectBytecodeWriter {
     // If the value is a single byte, just emit it directly without going
     // through a varint.
     if (bitWidth <= 8)
-      return emitter.emitByte(value.getLimitedValue());
+      return emitter.emitByte(value.getLimitedValue(), "dialect APInt");
 
     // If the value fits within a single varint, emit it directly.
     if (bitWidth <= 64)
-      return emitter.emitSignedVarInt(value.getLimitedValue());
+      return emitter.emitSignedVarInt(value.getLimitedValue(), "dialect APInt");
 
     // Otherwise, we need to encode a variable number of active words. We use
     // active words instead of the number of total words under the observation
     // that smaller values will be more common.
     unsigned numActiveWords = value.getActiveWords();
-    emitter.emitVarInt(numActiveWords);
+    emitter.emitVarInt(numActiveWords, "dialect APInt word count");
 
     const uint64_t *rawValueData = value.getRawData();
     for (unsigned i = 0; i < numActiveWords; ++i)
-      emitter.emitSignedVarInt(rawValueData[i]);
+      emitter.emitSignedVarInt(rawValueData[i], "dialect APInt word");
   }
 
   void writeAPFloatWithKnownSemantics(const APFloat &value) override {
@@ -436,16 +454,20 @@ class DialectWriter : public DialectBytecodeWriter {
   }
 
   void writeOwnedString(StringRef str) override {
-    emitter.emitVarInt(stringSection.insert(str));
+    emitter.emitVarInt(stringSection.insert(str), "dialect string");
   }
 
   void writeOwnedBlob(ArrayRef<char> blob) override {
-    emitter.emitVarInt(blob.size());
-    emitter.emitOwnedBlob(ArrayRef<uint8_t>(
-        reinterpret_cast<const uint8_t *>(blob.data()), blob.size()));
+    emitter.emitVarInt(blob.size(), "dialect blob");
+    emitter.emitOwnedBlob(
+        ArrayRef<uint8_t>(reinterpret_cast<const uint8_t *>(blob.data()),
+                          blob.size()),
+        "dialect blob");
   }
 
-  void writeOwnedBool(bool value) override { emitter.emitByte(value); }
+  void writeOwnedBool(bool value) override {
+    emitter.emitByte(value, "dialect bool");
+  }
 
   int64_t getBytecodeVersion() const override { return bytecodeVersion; }
 
@@ -486,7 +508,7 @@ class PropertiesSectionBuilder {
       if (!prop)
         return std::nullopt;
       EncodingEmitter sizeEmitter;
-      sizeEmitter.emitVarInt(numberingState.getNumber(prop));
+      sizeEmitter.emitVarInt(numberingState.getNumber(prop), "properties size");
       scratch.clear();
       llvm::raw_svector_ostream os(scratch);
       sizeEmitter.writeTo(os);
@@ -507,16 +529,17 @@ class PropertiesSectionBuilder {
 
   /// Write the current set of properties to the given emitter.
   void write(EncodingEmitter &emitter) {
-    emitter.emitVarInt(propertiesStorage.size());
+    emitter.emitVarInt(propertiesStorage.size(), "properties size");
     if (propertiesStorage.empty())
       return;
     for (const auto &storage : propertiesStorage) {
       if (storage.empty()) {
-        emitter.emitBytes(ArrayRef<uint8_t>());
+        emitter.emitBytes(ArrayRef<uint8_t>(), "empty properties");
         continue;
       }
       emitter.emitBytes(ArrayRef(reinterpret_cast<const uint8_t *>(&storage[0]),
-                                 storage.size()));
+                                 storage.size()),
+                        "property");
     }
   }
 
@@ -532,7 +555,7 @@ class PropertiesSectionBuilder {
     SmallVector<char> sizeScratch;
     {
       EncodingEmitter sizeEmitter;
-      sizeEmitter.emitVarInt(rawProperties.size());
+      sizeEmitter.emitVarInt(rawProperties.size(), "properties");
       llvm::raw_svector_ostream os(sizeScratch);
       sizeEmitter.writeTo(os);
     }
@@ -576,7 +599,8 @@ class RawEmitterOstream : public raw_ostream {
 
 private:
   void write_impl(const char *ptr, size_t size) override {
-    emitter.emitBytes({reinterpret_cast<const uint8_t *>(ptr), size});
+    emitter.emitBytes({reinterpret_cast<const uint8_t *>(ptr), size},
+                      "raw emitter");
   }
   uint64_t current_pos() const override { return emitter.size(); }
 
@@ -591,7 +615,7 @@ void EncodingEmitter::writeTo(raw_ostream &os) const {
   os.write((const char *)currentResult.data(), currentResult.size());
 }
 
-void EncodingEmitter::emitMultiByteVarInt(uint64_t value) {
+void EncodingEmitter::emitMultiByteVarInt(uint64_t value, StringLiteral desc) {
   // Compute the number of bytes needed to encode the value. Each byte can hold
   // up to 7-bits of data. We only check up to the number of bits we can encode
   // in the first byte (8).
@@ -601,16 +625,16 @@ void EncodingEmitter::emitMultiByteVarInt(uint64_t value) {
       uint64_t encodedValue = (value << 1) | 0x1;
       encodedValue <<= (numBytes - 1);
       llvm::support::ulittle64_t encodedValueLE(encodedValue);
-      emitBytes({reinterpret_cast<uint8_t *>(&encodedValueLE), numBytes});
+      emitBytes({reinterpret_cast<uint8_t *>(&encodedValueLE), numBytes}, desc);
       return;
     }
   }
 
   // If the value is too large to encode in a single byte, emit a special all
   // zero marker byte and splat the value directly.
-  emitByte(0);
+  emitByte(0, desc);
   llvm::support::ulittle64_t valueLE(value);
-  emitBytes({reinterpret_cast<uint8_t *>(&valueLE), sizeof(valueLE)});
+  emitBytes({reinterpret_cast<uint8_t *>(&valueLE), sizeof(valueLE)}, desc);
 }
 
 //===----------------------------------------------------------------------===//
@@ -696,7 +720,7 @@ LogicalResult BytecodeWriter::write(Operation *rootOp, raw_ostream &os) {
 
   // Emit the bytecode file header. This is how we identify the output as a
   // bytecode file.
-  emitter.emitString("ML\xefR");
+  emitter.emitString("ML\xefR", "bytecode header");
 
   // Emit the bytecode version.
   if (config.bytecodeVersion < bytecode::kMinSupportedVersion ||
@@ -706,10 +730,10 @@ LogicalResult BytecodeWriter::write(Operation *rootOp, raw_ostream &os) {
            << ", must be in range ["
            << static_cast<int64_t>(bytecode::kMinSupportedVersion) << ", "
            << static_cast<int64_t>(bytecode::kVersion) << ']';
-  emitter.emitVarInt(config.bytecodeVersion);
+  emitter.emitVarInt(config.bytecodeVersion, "bytecode version");
 
   // Emit the producer.
-  emitter.emitNulTerminatedString(config.producer);
+  emitter.emitNulTerminatedString(config.producer, "bytecode producer");
 
   // Emit the dialect section.
   writeDialectSection(emitter);
@@ -760,8 +784,8 @@ static void writeDialectGrouping(EncodingEmitter &emitter, EntriesT &&entries,
     });
 
     // Emit the dialect and number of elements.
-    emitter.emitVarInt(currentDialect->number);
-    emitter.emitVarInt(std::distance(groupStart, it));
+    emitter.emitVarInt(currentDialect->number, "dialect number");
+    emitter.emitVarInt(std::distance(groupStart, it), "dialect offset");
 
     // Emit the entries within the group.
     for (auto &entry : llvm::make_range(groupStart, it))
@@ -774,13 +798,13 @@ void BytecodeWriter::writeDialectSection(EncodingEmitter &emitter) {
 
   // Emit the referenced dialects.
   auto dialects = numberingState.getDialects();
-  dialectEmitter.emitVarInt(llvm::size(dialects));
+  dialectEmitter.emitVarInt(llvm::size(dialects), "dialects count");
   for (DialectNumbering &dialect : dialects) {
     // Write the string section and get the ID.
     size_t nameID = stringSection.insert(dialect.name);
 
     if (config.bytecodeVersion < bytecode::kDialectVersioning) {
-      dialectEmitter.emitVarInt(nameID);
+      dialectEmitter.emitVarInt(nameID, "dialect name ID");
       continue;
     }
 
@@ -798,22 +822,25 @@ void BytecodeWriter::writeDialectSection(EncodingEmitter &emitter) {
     // this in the dialect ID, so if there is no version, we don't write the
     // section.
     size_t versionAvailable = versionEmitter.size() > 0;
-    dialectEmitter.emitVarIntWithFlag(nameID, versionAvailable);
+    dialectEmitter.emitVarIntWithFlag(nameID, versionAvailable,
+                                      "dialect version");
     if (versionAvailable)
       dialectEmitter.emitSection(bytecode::Section::kDialectVersions,
                                  std::move(versionEmitter));
   }
 
   if (config.bytecodeVersion >= bytecode::kElideUnknownBlockArgLocation)
-    dialectEmitter.emitVarInt(size(numberingState.getOpNames()));
+    dialectEmitter.emitVarInt(size(numberingState.getOpNames()),
+                              "op names count");
 
   // Emit the referenced operation names grouped by dialect.
   auto emitOpName = [&](OpNameNumbering &name) {
     size_t stringId = stringSection.insert(name.name.stripDialect());
     if (config.bytecodeVersion < bytecode::kNativePropertiesEncoding)
-      dialectEmitter.emitVarInt(stringId);
+      dialectEmitter.emitVarInt(stringId, "dialect op name");
     else
-      dialectEmitter.emitVarIntWithFlag(stringId, name.name.isRegistered());
+      dialectEmitter.emitVarIntWithFlag(stringId, name.name.isRegistered(),
+                                        "dialect op name");
   };
   writeDialectGrouping(dialectEmitter, numberingState.getOpNames(), emitOpName);
 
@@ -826,8 +853,10 @@ void BytecodeWriter::writeDialectSection(EncodingEmitter &emitter) {
 void BytecodeWriter::writeAttrTypeSection(EncodingEmitter &emitter) {
   EncodingEmitter attrTypeEmitter;
   EncodingEmitter offsetEmitter;
-  offsetEmitter.emitVarInt(llvm::size(numberingState.getAttributes()));
-  offsetEmitter.emitVarInt(llvm::size(numberingState.getTypes()));
+  offsetEmitter.emitVarInt(llvm::size(numberingState.getAttributes()),
+                           "attributes count");
+  offsetEmitter.emitVarInt(llvm::size(numberingState.getTypes()),
+                           "types count");
 
   // A functor used to emit an attribute or type entry.
   uint64_t prevOffset = 0;
@@ -836,7 +865,7 @@ void BytecodeWriter::writeAttrTypeSection(EncodingEmitter &emitter) {
 
     auto emitAttrOrTypeRawImpl = [&]() -> void {
       RawEmitterOstream(attrTypeEmitter) << entryValue;
-      attrTypeEmitter.emitByte(0);
+      attrTypeEmitter.emitByte(0, "attr/type separator");
     };
     auto emitAttrOrTypeImpl = [&]() -> bool {
       // TODO: We don't currently support custom encoded mutable types and
@@ -882,7 +911,8 @@ void BytecodeWriter::writeAttrTypeSection(EncodingEmitter &emitter) {
 
     // Record the offset of this entry.
     uint64_t curOffset = attrTypeEmitter.size();
-    offsetEmitter.emitVarIntWithFlag(curOffset - prevOffset, hasCustomEncoding);
+    offsetEmitter.emitVarIntWithFlag(curOffset - prevOffset, hasCustomEncoding,
+                                     "attr/type offset");
     prevOffset = curOffset;
   };
 
@@ -910,30 +940,33 @@ LogicalResult BytecodeWriter::writeBlock(EncodingEmitter &emitter,
   // use the low bit of the operation count to indicate if the block has
   // arguments.
   unsigned numOps = numberingState.getOperationCount(block);
-  emitter.emitVarIntWithFlag(numOps, hasArgs);
+  emitter.emitVarIntWithFlag(numOps, hasArgs, "block num ops");
 
   // Emit the arguments of the block.
   if (hasArgs) {
-    emitter.emitVarInt(args.size());
+    emitter.emitVarInt(args.size(), "block args count");
     for (BlockArgument arg : args) {
       Location argLoc = arg.getLoc();
       if (config.bytecodeVersion >= bytecode::kElideUnknownBlockArgLocation) {
         emitter.emitVarIntWithFlag(numberingState.getNumber(arg.getType()),
-                                   !isa<UnknownLoc>(argLoc));
+                                   !isa<UnknownLoc>(argLoc), "block arg type");
         if (!isa<UnknownLoc>(argLoc))
-          emitter.emitVarInt(numberingState.getNumber(argLoc));
+          emitter.emitVarInt(numberingState.getNumber(argLoc),
+                             "block arg location");
       } else {
-        emitter.emitVarInt(numberingState.getNumber(arg.getType()));
-        emitter.emitVarInt(numberingState.getNumber(argLoc));
+        emitter.emitVarInt(numberingState.getNumber(arg.getType()),
+                           "block arg type");
+        emitter.emitVarInt(numberingState.getNumber(argLoc),
+                           "block arg location");
       }
     }
     if (config.bytecodeVersion >= bytecode::kUseListOrdering) {
       uint64_t maskOffset = emitter.size();
       uint8_t encodingMask = 0;
-      emitter.emitByte(0);
+      emitter.emitByte(0, "use-list separator");
       writeUseListOrders(emitter, encodingMask, args);
       if (encodingMask)
-        emitter.patchByte(maskOffset, encodingMask);
+        emitter.patchByte(maskOffset, encodingMask, "block patch encoding");
     }
   }
 
@@ -945,17 +978,17 @@ LogicalResult BytecodeWriter::writeBlock(EncodingEmitter &emitter,
 }
 
 LogicalResult BytecodeWriter::writeOp(EncodingEmitter &emitter, Operation *op) {
-  emitter.emitVarInt(numberingState.getNumber(op->getName()));
+  emitter.emitVarInt(numberingState.getNumber(op->getName()), "op name ID");
 
   // Emit a mask for the operation components. We need to fill this in later
   // (when we actually know what needs to be emitted), so emit a placeholder for
   // now.
   uint64_t maskOffset = emitter.size();
   uint8_t opEncodingMask = 0;
-  emitter.emitByte(0);
+  emitter.emitByte(0, "op separator");
 
   // Emit the location for this operation.
-  emitter.emitVarInt(numberingState.getNumber(op->getLoc()));
+  emitter.emitVarInt(numberingState.getNumber(op->getLoc()), "op location");
 
   // Emit the attributes of this operation.
   DictionaryAttr attrs = op->getDiscardableAttrDictionary();
@@ -969,7 +1002,7 @@ LogicalResult BytecodeWriter::writeOp(EncodingEmitter &emitter, Operation *op) {
   }
   if (!attrs.empty()) {
     opEncodingMask |= bytecode::OpEncodingMask::kHasAttrs;
-    emitter.emitVarInt(numberingState.getNumber(attrs));
+    emitter.emitVarInt(numberingState.getNumber(attrs), "op attrs count");
   }
 
   // Emit the properties of this operation, for now we still support deployment
@@ -978,32 +1011,32 @@ LogicalResult BytecodeWriter::writeOp(EncodingEmitter &emitter, Operation *op) {
     std::optional<ssize_t> propertiesId = propertiesSection.emit(op);
     if (propertiesId.has_value()) {
       opEncodingMask |= bytecode::OpEncodingMask::kHasProperties;
-      emitter.emitVarInt(*propertiesId);
+      emitter.emitVarInt(*propertiesId, "op properties ID");
     }
   }
 
   // Emit the result types of the operation.
   if (unsigned numResults = op->getNumResults()) {
     opEncodingMask |= bytecode::OpEncodingMask::kHasResults;
-    emitter.emitVarInt(numResults);
+    emitter.emitVarInt(numResults, "op results count");
     for (Type type : op->getResultTypes())
-      emitter.emitVarInt(numberingState.getNumber(type));
+      emitter.emitVarInt(numberingState.getNumber(type), "op result type");
   }
 
   // Emit the operands of the operation.
   if (unsigned numOperands = op->getNumOperands()) {
     opEncodingMask |= bytecode::OpEncodingMask::kHasOperands;
-    emitter.emitVarInt(numOperands);
+    emitter.emitVarInt(numOperands, "op operands count");
     for (Value operand : op->getOperands())
-      emitter.emitVarInt(numberingState.getNumber(operand));
+      emitter.emitVarInt(numberingState.getNumber(operand), "op operand types");
   }
 
   // Emit the successors of the operation.
   if (unsigned numSuccessors = op->getNumSuccessors()) {
     opEncodingMask |= bytecode::OpEncodingMask::kHasSuccessors;
-    emitter.emitVarInt(numSuccessors);
+    emitter.emitVarInt(numSuccessors, "op successors count");
     for (Block *successor : op->getSuccessors())
-      emitter.emitVarInt(numberingState.getNumber(successor));
+      emitter.emitVarInt(numberingState.getNumber(successor), "op successor");
   }
 
   // Emit the use-list orders to bytecode, so we can reconstruct the same order
@@ -1017,7 +1050,7 @@ LogicalResult BytecodeWriter::writeOp(EncodingEmitter &emitter, Operation *op) {
     opEncodingMask |= bytecode::OpEncodingMask::kHasInlineRegions;
 
   // Update the mask for the operation.
-  emitter.patchByte(maskOffset, opEncodingMask);
+  emitter.patchByte(maskOffset, opEncodingMask, "op encoding mask");
 
   // With the mask emitted, we can now emit the regions of the operation. We do
   // this after mask emission to avoid offset complications that may arise by
@@ -1025,7 +1058,8 @@ LogicalResult BytecodeWriter::writeOp(EncodingEmitter &emitter, Operation *op) {
   // op encoding mask is more annoying).
   if (numRegions) {
     bool isIsolatedFromAbove = numberingState.isIsolatedFromAbove(op);
-    emitter.emitVarIntWithFlag(numRegions, isIsolatedFromAbove);
+    emitter.emitVarIntWithFlag(numRegions, isIsolatedFromAbove,
+                               "op regions count");
 
     // If the region is not isolated from above, or we are emitting bytecode
     // targeting version <kLazyLoading, we don't use a section.
@@ -1096,8 +1130,9 @@ void BytecodeWriter::writeUseListOrders(EncodingEmitter &emitter,
   opEncodingMask |= bytecode::OpEncodingMask::kHasUseListOrders;
   // Emit the number of results that have a custom use-list order if the number
   // of results is greater than one.
-  if (range.size() != 1)
-    emitter.emitVarInt(map.size());
+  if (range.size() != 1) {
+    emitter.emitVarInt(map.size(), "custom use-list size");
+  }
 
   for (const auto &item : map) {
     auto resultIdx = item.getFirst();
@@ -1113,20 +1148,22 @@ void BytecodeWriter::writeUseListOrders(EncodingEmitter &emitter,
 
     // For single result, we don't need to store the result index.
     if (range.size() != 1)
-      emitter.emitVarInt(resultIdx);
+      emitter.emitVarInt(resultIdx, "use-list result index");
 
     if (indexPairEncoding) {
-      emitter.emitVarIntWithFlag(shuffledElements * 2, indexPairEncoding);
+      emitter.emitVarIntWithFlag(shuffledElements * 2, indexPairEncoding,
+                                 "use-list index pair size");
       for (auto pair : llvm::enumerate(useListOrder)) {
         if (pair.index() != pair.value()) {
-          emitter.emitVarInt(pair.value());
-          emitter.emitVarInt(pair.index());
+          emitter.emitVarInt(pair.value(), "use-list index pair first");
+          emitter.emitVarInt(pair.index(), "use-list index pair second");
         }
       }
     } else {
-      emitter.emitVarIntWithFlag(useListOrder.size(), indexPairEncoding);
+      emitter.emitVarIntWithFlag(useListOrder.size(), indexPairEncoding,
+                                 "use-list size");
       for (const auto &index : useListOrder)
-        emitter.emitVarInt(index);
+        emitter.emitVarInt(index, "use-list order");
     }
   }
 }
@@ -1136,15 +1173,15 @@ LogicalResult BytecodeWriter::writeRegion(EncodingEmitter &emitter,
   // If the region is empty, we only need to emit the number of blocks (which is
   // zero).
   if (region->empty()) {
-    emitter.emitVarInt(/*numBlocks*/ 0);
+    emitter.emitVarInt(/*numBlocks*/ 0, "region block count empty");
     return success();
   }
 
   // Emit the number of blocks and values within the region.
   unsigned numBlocks, numValues;
   std::tie(numBlocks, numValues) = numberingState.getBlockValueCount(region);
-  emitter.emitVarInt(numBlocks);
-  emitter.emitVarInt(numValues);
+  emitter.emitVarInt(numBlocks, "region block count");
+  emitter.emitVarInt(numValues, "region value count");
 
   // Emit the blocks within the region.
   for (Block &block : *region)
@@ -1160,7 +1197,7 @@ LogicalResult BytecodeWriter::writeIRSection(EncodingEmitter &emitter,
   // Write the IR section the same way as a block with no arguments. Note that
   // the low-bit of the operation count for a block is used to indicate if the
   // block has arguments, which in this case is always false.
-  irEmitter.emitVarIntWithFlag(/*numOps*/ 1, /*hasArgs*/ false);
+  irEmitter.emitVarIntWithFlag(/*numOps*/ 1, /*hasArgs*/ false, "ir section");
 
   // Emit the operations.
   if (failed(writeOp(irEmitter, op)))
@@ -1189,17 +1226,17 @@ class ResourceBuilder : public AsmResourceBuilder {
   void buildBlob(StringRef key, ArrayRef<char> data,
                  uint32_t dataAlignment) final {
     if (!shouldElideData)
-      emitter.emitOwnedBlobAndAlignment(data, dataAlignment);
+      emitter.emitOwnedBlobAndAlignment(data, dataAlignment, "resource blob");
     postProcessFn(key, AsmResourceEntryKind::Blob);
   }
   void buildBool(StringRef key, bool data) final {
     if (!shouldElideData)
-      emitter.emitByte(data);
+      emitter.emitByte(data, "resource bool");
     postProcessFn(key, AsmResourceEntryKind::Bool);
   }
   void buildString(StringRef key, StringRef data) final {
     if (!shouldElideData)
-      emitter.emitVarInt(stringSection.insert(data));
+      emitter.emitVarInt(stringSection.insert(data), "resource string");
     postProcessFn(key, AsmResourceEntryKind::String);
   }
 
@@ -1229,12 +1266,14 @@ void BytecodeWriter::writeResourceSection(Operation *op,
 
   // Functor used to emit a resource group defined by 'key'.
   auto emitResourceGroup = [&](uint64_t key) {
-    resourceOffsetEmitter.emitVarInt(key);
-    resourceOffsetEmitter.emitVarInt(curResourceEntries.size());
+    resourceOffsetEmitter.emitVarInt(key, "resource group key");
+    resourceOffsetEmitter.emitVarInt(curResourceEntries.size(),
+                                     "resource group size");
     for (auto [key, kind, size] : curResourceEntries) {
-      resourceOffsetEmitter.emitVarInt(stringSection.insert(key));
-      resourceOffsetEmitter.emitVarInt(size);
-      resourceOffsetEmitter.emitByte(kind);
+      resourceOffsetEmitter.emitVarInt(stringSection.insert(key),
+                                       "resource key");
+      resourceOffsetEmitter.emitVarInt(size, "resource size");
+      resourceOffsetEmitter.emitByte(kind, "resource kind");
     }
   };
 
@@ -1244,7 +1283,8 @@ void BytecodeWriter::writeResourceSection(Operation *op,
                                config.shouldElideResourceData);
 
   // Emit the external resource entries.
-  resourceOffsetEmitter.emitVarInt(config.externalResourcePrinters.size());
+  resourceOffsetEmitter.emitVarInt(config.externalResourcePrinters.size(),
+                                   "external resource printer count");
   for (const auto &printer : config.externalResourcePrinters) {
     curResourceEntries.clear();
     printer->buildResources(op, entryBuilder);
diff --git a/mlir/lib/Target/LLVM/ROCDL/Target.cpp b/mlir/lib/Target/LLVM/ROCDL/Target.cpp
index 70d6bcd76285a..4d23f987eb05e 100644
--- a/mlir/lib/Target/LLVM/ROCDL/Target.cpp
+++ b/mlir/lib/Target/LLVM/ROCDL/Target.cpp
@@ -324,8 +324,7 @@ SerializeGPUModuleBase::assembleIsa(StringRef isa) {
   mcStreamer.reset(target->createMCObjectStreamer(
       triple, ctx, std::unique_ptr<llvm::MCAsmBackend>(mab),
       mab->createObjectWriter(os), std::unique_ptr<llvm::MCCodeEmitter>(ce),
-      *sti, mcOptions.MCRelaxAll, mcOptions.MCIncrementalLinkerCompatible,
-      /*DWARFMustBeAtTheEnd*/ false));
+      *sti));
 
   std::unique_ptr<llvm::MCAsmParser> parser(
       createMCAsmParser(srcMgr, ctx, *mcStreamer, *mai));
diff --git a/mlir/lib/Transforms/Utils/DialectConversion.cpp b/mlir/lib/Transforms/Utils/DialectConversion.cpp
index 1e0afee2373a9..0b552a7e1ca3b 100644
--- a/mlir/lib/Transforms/Utils/DialectConversion.cpp
+++ b/mlir/lib/Transforms/Utils/DialectConversion.cpp
@@ -432,34 +432,14 @@ class MoveBlockRewrite : public BlockRewrite {
   Block *insertBeforeBlock;
 };
 
-/// This structure contains the information pertaining to an argument that has
-/// been converted.
-struct ConvertedArgInfo {
-  ConvertedArgInfo(unsigned newArgIdx, unsigned newArgSize,
-                   Value castValue = nullptr)
-      : newArgIdx(newArgIdx), newArgSize(newArgSize), castValue(castValue) {}
-
-  /// The start index of in the new argument list that contains arguments that
-  /// replace the original.
-  unsigned newArgIdx;
-
-  /// The number of arguments that replaced the original argument.
-  unsigned newArgSize;
-
-  /// The cast value that was created to cast from the new arguments to the
-  /// old. This only used if 'newArgSize' > 1.
-  Value castValue;
-};
-
 /// Block type conversion. This rewrite is partially reflected in the IR.
 class BlockTypeConversionRewrite : public BlockRewrite {
 public:
-  BlockTypeConversionRewrite(
-      ConversionPatternRewriterImpl &rewriterImpl, Block *block,
-      Block *origBlock, SmallVector<std::optional<ConvertedArgInfo>, 1> argInfo,
-      const TypeConverter *converter)
+  BlockTypeConversionRewrite(ConversionPatternRewriterImpl &rewriterImpl,
+                             Block *block, Block *origBlock,
+                             const TypeConverter *converter)
       : BlockRewrite(Kind::BlockTypeConversion, rewriterImpl, block),
-        origBlock(origBlock), argInfo(argInfo), converter(converter) {}
+        origBlock(origBlock), converter(converter) {}
 
   static bool classof(const IRRewrite *rewrite) {
     return rewrite->getKind() == Kind::BlockTypeConversion;
@@ -479,10 +459,6 @@ class BlockTypeConversionRewrite : public BlockRewrite {
   /// The original block that was requested to have its signature converted.
   Block *origBlock;
 
-  /// The conversion information for each of the arguments. The information is
-  /// std::nullopt if the argument was dropped during conversion.
-  SmallVector<std::optional<ConvertedArgInfo>, 1> argInfo;
-
   /// The type converter used to convert the arguments.
   const TypeConverter *converter;
 };
@@ -691,12 +667,16 @@ class CreateOperationRewrite : public OperationRewrite {
 /// The type of materialization.
 enum MaterializationKind {
   /// This materialization materializes a conversion for an illegal block
-  /// argument type, to a legal one.
+  /// argument type, to the original one.
   Argument,
 
   /// This materialization materializes a conversion from an illegal type to a
   /// legal one.
-  Target
+  Target,
+
+  /// This materialization materializes a conversion from a legal type back to
+  /// an illegal one.
+  Source
 };
 
 /// An unresolved materialization, i.e., a "builtin.unrealized_conversion_cast"
@@ -736,7 +716,7 @@ class UnresolvedMaterializationRewrite : public OperationRewrite {
 private:
   /// The corresponding type converter to use when resolving this
   /// materialization, and the kind of this materialization.
-  llvm::PointerIntPair<const TypeConverter *, 1, MaterializationKind>
+  llvm::PointerIntPair<const TypeConverter *, 2, MaterializationKind>
       converterAndKind;
 };
 } // namespace
@@ -855,11 +835,6 @@ struct ConversionPatternRewriterImpl : public RewriterBase::Listener {
                                        ValueRange inputs, Type outputType,
                                        const TypeConverter *converter);
 
-  Value buildUnresolvedArgumentMaterialization(Block *block, Location loc,
-                                               ValueRange inputs,
-                                               Type outputType,
-                                               const TypeConverter *converter);
-
   Value buildUnresolvedTargetMaterialization(Location loc, Value input,
                                              Type outputType,
                                              const TypeConverter *converter);
@@ -989,28 +964,6 @@ void BlockTypeConversionRewrite::commit(RewriterBase &rewriter) {
           dyn_cast_or_null<RewriterBase::Listener>(rewriter.getListener()))
     for (Operation *op : block->getUsers())
       listener->notifyOperationModified(op);
-
-  // Process the remapping for each of the original arguments.
-  for (auto [origArg, info] :
-       llvm::zip_equal(origBlock->getArguments(), argInfo)) {
-    // Handle the case of a 1->0 value mapping.
-    if (!info) {
-      if (Value newArg =
-              rewriterImpl.mapping.lookupOrNull(origArg, origArg.getType()))
-        rewriter.replaceAllUsesWith(origArg, newArg);
-      continue;
-    }
-
-    // Otherwise this is a 1->1+ value mapping.
-    Value castValue = info->castValue;
-    assert(info->newArgSize >= 1 && castValue && "expected 1->1+ mapping");
-
-    // If the argument is still used, replace it with the generated cast.
-    if (!origArg.use_empty()) {
-      rewriter.replaceAllUsesWith(origArg, rewriterImpl.mapping.lookupOrDefault(
-                                               castValue, origArg.getType()));
-    }
-  }
 }
 
 void BlockTypeConversionRewrite::rollback() {
@@ -1035,14 +988,12 @@ LogicalResult BlockTypeConversionRewrite::materializeLiveConversions(
       continue;
 
     Value replacementValue = rewriterImpl.mapping.lookupOrDefault(origArg);
-    bool isDroppedArg = replacementValue == origArg;
-    if (!isDroppedArg)
-      builder.setInsertionPointAfterValue(replacementValue);
+    assert(replacementValue && "replacement value not found");
     Value newArg;
     if (converter) {
+      builder.setInsertionPointAfterValue(replacementValue);
       newArg = converter->materializeSourceConversion(
-          builder, origArg.getLoc(), origArg.getType(),
-          isDroppedArg ? ValueRange() : ValueRange(replacementValue));
+          builder, origArg.getLoc(), origArg.getType(), replacementValue);
       assert((!newArg || newArg.getType() == origArg.getType()) &&
              "materialization hook did not provide a value of the expected "
              "type");
@@ -1053,8 +1004,6 @@ LogicalResult BlockTypeConversionRewrite::materializeLiveConversions(
           << "failed to materialize conversion for block argument #"
           << it.index() << " that remained live after conversion, type was "
           << origArg.getType();
-      if (!isDroppedArg)
-        diag << ", with target type " << replacementValue.getType();
       diag.attachNote(liveUser->getLoc())
           << "see existing live user here: " << *liveUser;
       return failure();
@@ -1340,73 +1289,64 @@ Block *ConversionPatternRewriterImpl::applySignatureConversion(
   // Replace all uses of the old block with the new block.
   block->replaceAllUsesWith(newBlock);
 
-  // Remap each of the original arguments as determined by the signature
-  // conversion.
-  SmallVector<std::optional<ConvertedArgInfo>, 1> argInfo;
-  argInfo.resize(origArgCount);
-
   for (unsigned i = 0; i != origArgCount; ++i) {
-    auto inputMap = signatureConversion.getInputMapping(i);
-    if (!inputMap)
-      continue;
     BlockArgument origArg = block->getArgument(i);
+    Type origArgType = origArg.getType();
+
+    std::optional<TypeConverter::SignatureConversion::InputMapping> inputMap =
+        signatureConversion.getInputMapping(i);
+    if (!inputMap) {
+      // This block argument was dropped and no replacement value was provided.
+      // Materialize a replacement value "out of thin air".
+      Value repl = buildUnresolvedMaterialization(
+          MaterializationKind::Source, newBlock, newBlock->begin(),
+          origArg.getLoc(), /*inputs=*/ValueRange(),
+          /*outputType=*/origArgType, converter);
+      mapping.map(origArg, repl);
+      appendRewrite<ReplaceBlockArgRewrite>(block, origArg);
+      continue;
+    }
 
-    // If inputMap->replacementValue is not nullptr, then the argument is
-    // dropped and a replacement value is provided to be the remappedValue.
-    if (inputMap->replacementValue) {
+    if (Value repl = inputMap->replacementValue) {
+      // This block argument was dropped and a replacement value was provided.
       assert(inputMap->size == 0 &&
              "invalid to provide a replacement value when the argument isn't "
              "dropped");
-      mapping.map(origArg, inputMap->replacementValue);
+      mapping.map(origArg, repl);
       appendRewrite<ReplaceBlockArgRewrite>(block, origArg);
       continue;
     }
 
-    // Otherwise, this is a 1->1+ mapping.
+    // This is a 1->1+ mapping. 1->N mappings are not fully supported in the
+    // dialect conversion. Therefore, we need an argument materialization to
+    // turn the replacement block arguments into a single SSA value that can be
+    // used as a replacement.
     auto replArgs =
         newBlock->getArguments().slice(inputMap->inputNo, inputMap->size);
-    Value newArg;
+    Value argMat = buildUnresolvedMaterialization(
+        MaterializationKind::Argument, newBlock, newBlock->begin(),
+        origArg.getLoc(), /*inputs=*/replArgs, origArgType, converter);
+    mapping.map(origArg, argMat);
+    appendRewrite<ReplaceBlockArgRewrite>(block, origArg);
 
-    // If this is a 1->1 mapping and the types of new and replacement arguments
-    // match (i.e. it's an identity map), then the argument is mapped to its
-    // original type.
     // FIXME: We simply pass through the replacement argument if there wasn't a
     // converter, which isn't great as it allows implicit type conversions to
     // appear. We should properly restructure this code to handle cases where a
     // converter isn't provided and also to properly handle the case where an
     // argument materialization is actually a temporary source materialization
     // (e.g. in the case of 1->N).
-    if (replArgs.size() == 1 &&
-        (!converter || replArgs[0].getType() == origArg.getType())) {
-      newArg = replArgs.front();
-      mapping.map(origArg, newArg);
-    } else {
-      // Build argument materialization: new block arguments -> old block
-      // argument type.
-      Value argMat = buildUnresolvedArgumentMaterialization(
-          newBlock, origArg.getLoc(), replArgs, origArg.getType(), converter);
-      mapping.map(origArg, argMat);
-
-      // Build target materialization: old block argument type -> legal type.
-      // Note: This function returns an "empty" type if no valid conversion to
-      // a legal type exists. In that case, we continue the conversion with the
-      // original block argument type.
-      Type legalOutputType = converter->convertType(origArg.getType());
-      if (legalOutputType && legalOutputType != origArg.getType()) {
-        newArg = buildUnresolvedTargetMaterialization(
-            origArg.getLoc(), argMat, legalOutputType, converter);
-        mapping.map(argMat, newArg);
-      } else {
-        newArg = argMat;
-      }
+    Type legalOutputType;
+    if (converter)
+      legalOutputType = converter->convertType(origArgType);
+    if (legalOutputType && legalOutputType != origArgType) {
+      Value targetMat = buildUnresolvedTargetMaterialization(
+          origArg.getLoc(), argMat, legalOutputType, converter);
+      mapping.map(argMat, targetMat);
     }
-
     appendRewrite<ReplaceBlockArgRewrite>(block, origArg);
-    argInfo[i] = ConvertedArgInfo(inputMap->inputNo, inputMap->size, newArg);
   }
 
-  appendRewrite<BlockTypeConversionRewrite>(newBlock, block, argInfo,
-                                            converter);
+  appendRewrite<BlockTypeConversionRewrite>(newBlock, block, converter);
 
   // Erase the old block. (It is just unlinked for now and will be erased during
   // cleanup.)
@@ -1437,13 +1377,6 @@ Value ConversionPatternRewriterImpl::buildUnresolvedMaterialization(
   appendRewrite<UnresolvedMaterializationRewrite>(convertOp, converter, kind);
   return convertOp.getResult(0);
 }
-Value ConversionPatternRewriterImpl::buildUnresolvedArgumentMaterialization(
-    Block *block, Location loc, ValueRange inputs, Type outputType,
-    const TypeConverter *converter) {
-  return buildUnresolvedMaterialization(MaterializationKind::Argument, block,
-                                        block->begin(), loc, inputs, outputType,
-                                        converter);
-}
 Value ConversionPatternRewriterImpl::buildUnresolvedTargetMaterialization(
     Location loc, Value input, Type outputType,
     const TypeConverter *converter) {
@@ -2862,6 +2795,10 @@ static LogicalResult legalizeUnresolvedMaterialization(
       newMaterialization = converter->materializeTargetConversion(
           rewriter, op->getLoc(), outputType, inputOperands);
       break;
+    case MaterializationKind::Source:
+      newMaterialization = converter->materializeSourceConversion(
+          rewriter, op->getLoc(), outputType, inputOperands);
+      break;
     }
     if (newMaterialization) {
       assert(newMaterialization.getType() == outputType &&
@@ -2874,8 +2811,8 @@ static LogicalResult legalizeUnresolvedMaterialization(
 
   InFlightDiagnostic diag = op->emitError()
                             << "failed to legalize unresolved materialization "
-                               "from "
-                            << inputOperands.getTypes() << " to " << outputType
+                               "from ("
+                            << inputOperands.getTypes() << ") to " << outputType
                             << " that remained live after conversion";
   if (Operation *liveUser = findLiveUser(op->getUsers())) {
     diag.attachNote(liveUser->getLoc())
diff --git a/mlir/test/Transforms/test-legalize-type-conversion.mlir b/mlir/test/Transforms/test-legalize-type-conversion.mlir
index b35cda8e724f6..8254be68912c8 100644
--- a/mlir/test/Transforms/test-legalize-type-conversion.mlir
+++ b/mlir/test/Transforms/test-legalize-type-conversion.mlir
@@ -2,9 +2,8 @@
 
 
 func.func @test_invalid_arg_materialization(
-  // expected-error at below {{failed to materialize conversion for block argument #0 that remained live after conversion, type was 'i16'}}
+  // expected-error at below {{failed to legalize unresolved materialization from () to 'i16' that remained live after conversion}}
   %arg0: i16) {
-  // expected-note at below {{see existing live user here}}
   "foo.return"(%arg0) : (i16) -> ()
 }
 
@@ -104,9 +103,8 @@ func.func @test_block_argument_not_converted() {
 // Make sure argument type changes aren't implicitly forwarded.
 func.func @test_signature_conversion_no_converter() {
   "test.signature_conversion_no_converter"() ({
-  // expected-error at below {{failed to materialize conversion for block argument #0 that remained live after conversion}}
+  // expected-error at below {{failed to legalize unresolved materialization from ('f64') to 'f32' that remained live after conversion}}
   ^bb0(%arg0: f32):
-    // expected-note at below {{see existing live user here}}
     "test.type_consumer"(%arg0) : (f32) -> ()
     "test.return"(%arg0) : (f32) -> ()
   }) : () -> ()
diff --git a/utils/bazel/llvm-project-overlay/llvm/BUILD.bazel b/utils/bazel/llvm-project-overlay/llvm/BUILD.bazel
index ae17746c72882..64d36c7b7f664 100644
--- a/utils/bazel/llvm-project-overlay/llvm/BUILD.bazel
+++ b/utils/bazel/llvm-project-overlay/llvm/BUILD.bazel
@@ -947,14 +947,7 @@ cc_library(
     ]) + [
         # To avoid a dependency cycle.
         "include/llvm/Analysis/IVDescriptors.h",
-        "include/llvm/CodeGen/GenVT.inc",
-    ] + glob(
-        # To avoid a dependency cycle.
-        [
-            "include/llvm/CodeGen/**/*.h",
-            "include/llvm/CodeGenTypes/**/*.h",
-        ],
-    ),
+    ],
     hdrs = glob(
         [
             "include/llvm/*.h",

>From f6478e36a962843329c519ba35ad2a132ffd8c9e Mon Sep 17 00:00:00 2001
From: Amir Ayupov <aaupov at fb.com>
Date: Mon, 22 Jul 2024 16:34:02 -0700
Subject: [PATCH 2/2] fix getOrCreateJumpTable

Created using spr 1.3.4
---
 bolt/lib/Core/BinaryContext.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/bolt/lib/Core/BinaryContext.cpp b/bolt/lib/Core/BinaryContext.cpp
index bdfd91417a696..874cdd26ce6ea 100644
--- a/bolt/lib/Core/BinaryContext.cpp
+++ b/bolt/lib/Core/BinaryContext.cpp
@@ -841,7 +841,7 @@ BinaryContext::getOrCreateJumpTable(BinaryFunction &Function, uint64_t Address,
     // Prevent associating a jump table to a specific fragment twice.
     // This simple check arises from the assumption: no more than 2 fragments.
     if (JT->Parents.size() == 1 && JT->Parents[0] != &Function) {
-      assert(JT->Parents[0]->isParentOrChildOf(Function) &&
+      assert(areRelatedFragments(JT->Parents[0], &Function) &&
              "cannot re-use jump table of a different function");
       // Duplicate the entry for the parent function for easy access
       JT->Parents.push_back(&Function);